cognite-extractor-utils 6.4.1__tar.gz → 7.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

Files changed (28) hide show
  1. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/PKG-INFO +1 -3
  2. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/cognite/extractorutils/__init__.py +1 -1
  3. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/cognite/extractorutils/_inner_util.py +1 -1
  4. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/cognite/extractorutils/base.py +13 -11
  5. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/cognite/extractorutils/configtools/elements.py +2 -2
  6. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/cognite/extractorutils/configtools/loaders.py +11 -6
  7. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/cognite/extractorutils/metrics.py +7 -8
  8. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/cognite/extractorutils/statestore.py +86 -80
  9. cognite_extractor_utils-7.0.0/cognite/extractorutils/threading.py +90 -0
  10. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/cognite/extractorutils/uploader/_base.py +9 -7
  11. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/cognite/extractorutils/uploader/assets.py +32 -30
  12. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/cognite/extractorutils/uploader/events.py +32 -30
  13. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/cognite/extractorutils/uploader/files.py +106 -85
  14. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/cognite/extractorutils/uploader/raw.py +17 -17
  15. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/cognite/extractorutils/uploader/time_series.py +117 -111
  16. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/cognite/extractorutils/uploader_extractor.py +4 -4
  17. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/cognite/extractorutils/util.py +41 -36
  18. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/pyproject.toml +3 -4
  19. cognite_extractor_utils-6.4.1/cognite/extractorutils/middleware.py +0 -36
  20. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/LICENSE +0 -0
  21. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/README.md +0 -0
  22. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/cognite/extractorutils/configtools/__init__.py +0 -0
  23. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/cognite/extractorutils/configtools/_util.py +0 -0
  24. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/cognite/extractorutils/exceptions.py +0 -0
  25. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/cognite/extractorutils/py.typed +0 -0
  26. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/cognite/extractorutils/uploader/__init__.py +0 -0
  27. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/cognite/extractorutils/uploader/_metrics.py +0 -0
  28. {cognite_extractor_utils-6.4.1 → cognite_extractor_utils-7.0.0}/cognite/extractorutils/uploader_types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cognite-extractor-utils
3
- Version: 6.4.1
3
+ Version: 7.0.0
4
4
  Summary: Utilities for easier development of extractors for CDF
5
5
  Home-page: https://github.com/cognitedata/python-extractor-utils
6
6
  License: Apache-2.0
@@ -21,8 +21,6 @@ Requires-Dist: azure-keyvault-secrets (>=4.7.0,<5.0.0)
21
21
  Requires-Dist: cognite-sdk (>=7,<8)
22
22
  Requires-Dist: dacite (>=1.6.0,<2.0.0)
23
23
  Requires-Dist: decorator (>=5.1.1,<6.0.0)
24
- Requires-Dist: jq (>=1.3.0,<2.0.0) ; sys_platform == "darwin"
25
- Requires-Dist: jq (>=1.3.0,<2.0.0) ; sys_platform == "linux"
26
24
  Requires-Dist: more-itertools (>=10.0.0,<11.0.0)
27
25
  Requires-Dist: prometheus-client (>0.7.0,<=1.0.0)
28
26
  Requires-Dist: psutil (>=5.7.0,<6.0.0)
@@ -16,5 +16,5 @@
16
16
  Cognite extractor utils is a Python package that simplifies the development of new extractors.
17
17
  """
18
18
 
19
- __version__ = "6.4.1"
19
+ __version__ = "7.0.0"
20
20
  from .base import Extractor
@@ -34,7 +34,7 @@ class _DecimalEncoder(json.JSONEncoder):
34
34
 
35
35
  class _DecimalDecoder(json.JSONDecoder):
36
36
  def __init__(self, *args: Any, **kwargs: Any) -> None:
37
- json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs)
37
+ json.JSONDecoder.__init__(self, *args, object_hook=self.object_hook, **kwargs)
38
38
 
39
39
  def object_hook(self, obj_dict: Dict[str, str]) -> Union[Dict[str, str], Decimal]:
40
40
  if obj_dict.get("type") == "decimal_encoded":
@@ -17,7 +17,7 @@ import os
17
17
  import sys
18
18
  from dataclasses import is_dataclass
19
19
  from enum import Enum
20
- from threading import Event, Thread
20
+ from threading import Thread
21
21
  from types import TracebackType
22
22
  from typing import Any, Callable, Dict, Generic, Optional, Type, TypeVar
23
23
 
@@ -29,7 +29,7 @@ from cognite.extractorutils.configtools import BaseConfig, ConfigResolver, State
29
29
  from cognite.extractorutils.exceptions import InvalidConfigError
30
30
  from cognite.extractorutils.metrics import BaseMetrics
31
31
  from cognite.extractorutils.statestore import AbstractStateStore, LocalStateStore, NoStateStore
32
- from cognite.extractorutils.util import set_event_on_interrupt
32
+ from cognite.extractorutils.threading import CancellationToken
33
33
 
34
34
 
35
35
  class ReloadConfigAction(Enum):
@@ -77,11 +77,13 @@ class Extractor(Generic[CustomConfigClass]):
77
77
  name: str,
78
78
  description: str,
79
79
  version: Optional[str] = None,
80
- run_handle: Optional[Callable[[CogniteClient, AbstractStateStore, CustomConfigClass, Event], None]] = None,
80
+ run_handle: Optional[
81
+ Callable[[CogniteClient, AbstractStateStore, CustomConfigClass, CancellationToken], None]
82
+ ] = None,
81
83
  config_class: Type[CustomConfigClass],
82
84
  metrics: Optional[BaseMetrics] = None,
83
85
  use_default_state_store: bool = True,
84
- cancellation_token: Event = Event(),
86
+ cancellation_token: Optional[CancellationToken] = None,
85
87
  config_file_path: Optional[str] = None,
86
88
  continuous_extractor: bool = False,
87
89
  heartbeat_waiting_time: int = 600,
@@ -95,7 +97,7 @@ class Extractor(Generic[CustomConfigClass]):
95
97
  self.config_class = config_class
96
98
  self.use_default_state_store = use_default_state_store
97
99
  self.version = version or "unknown"
98
- self.cancellation_token = cancellation_token
100
+ self.cancellation_token = cancellation_token.create_child_token() if cancellation_token else CancellationToken()
99
101
  self.config_file_path = config_file_path
100
102
  self.continuous_extractor = continuous_extractor
101
103
  self.heartbeat_waiting_time = heartbeat_waiting_time
@@ -136,7 +138,7 @@ class Extractor(Generic[CustomConfigClass]):
136
138
  Extractor._config_singleton = self.config # type: ignore
137
139
 
138
140
  def config_refresher() -> None:
139
- while not self.cancellation_token.is_set():
141
+ while not self.cancellation_token.is_cancelled:
140
142
  self.cancellation_token.wait(self.reload_config_interval)
141
143
  if self.config_resolver.has_changed:
142
144
  self._reload_config()
@@ -158,7 +160,7 @@ class Extractor(Generic[CustomConfigClass]):
158
160
 
159
161
  elif self.reload_config_action == ReloadConfigAction.SHUTDOWN:
160
162
  self.logger.info("Shutting down, expecting to be restarted")
161
- self.cancellation_token.set()
163
+ self.cancellation_token.cancel()
162
164
 
163
165
  elif self.reload_config_action == ReloadConfigAction.CALLBACK:
164
166
  self.logger.info("Loading in new config file")
@@ -265,7 +267,7 @@ class Extractor(Generic[CustomConfigClass]):
265
267
  self.logger.info(f"Loaded {'remote' if self.config_resolver.is_remote else 'local'} config file")
266
268
 
267
269
  if self.handle_interrupts:
268
- set_event_on_interrupt(self.cancellation_token)
270
+ self.cancellation_token.cancel_on_interrupt()
269
271
 
270
272
  self.cognite_client = self.config.cognite.get_cognite_client(self.name)
271
273
  self._load_state_store()
@@ -279,10 +281,10 @@ class Extractor(Generic[CustomConfigClass]):
279
281
  pass
280
282
 
281
283
  def heartbeat_loop() -> None:
282
- while not self.cancellation_token.is_set():
284
+ while not self.cancellation_token.is_cancelled:
283
285
  self.cancellation_token.wait(self.heartbeat_waiting_time)
284
286
 
285
- if not self.cancellation_token.is_set():
287
+ if not self.cancellation_token.is_cancelled:
286
288
  self.logger.info("Reporting new heartbeat")
287
289
  try:
288
290
  self.cognite_client.extraction_pipelines.runs.create(
@@ -329,7 +331,7 @@ class Extractor(Generic[CustomConfigClass]):
329
331
  Returns:
330
332
  True if the extractor shut down cleanly, False if the extractor was shut down due to an unhandled error
331
333
  """
332
- self.cancellation_token.set()
334
+ self.cancellation_token.cancel()
333
335
 
334
336
  if self.state_store:
335
337
  self.state_store.synchronize()
@@ -18,7 +18,6 @@ from dataclasses import dataclass, field
18
18
  from datetime import timedelta
19
19
  from enum import Enum
20
20
  from logging.handlers import TimedRotatingFileHandler
21
- from threading import Event
22
21
  from time import sleep
23
22
  from typing import Any, Dict, List, Optional, Tuple, Union
24
23
  from urllib.parse import urljoin
@@ -46,6 +45,7 @@ from cognite.extractorutils.statestore import (
46
45
  NoStateStore,
47
46
  RawStateStore,
48
47
  )
48
+ from cognite.extractorutils.threading import CancellationToken
49
49
  from cognite.extractorutils.util import EitherId
50
50
 
51
51
  _logger = logging.getLogger(__name__)
@@ -481,7 +481,7 @@ class MetricsConfig:
481
481
  cognite: Optional[_CogniteMetricsConfig]
482
482
  server: Optional[_PromServerConfig]
483
483
 
484
- def start_pushers(self, cdf_client: CogniteClient, cancellation_token: Event = Event()) -> None:
484
+ def start_pushers(self, cdf_client: CogniteClient, cancellation_token: Optional[CancellationToken] = None) -> None:
485
485
  self._pushers: List[AbstractMetricsPusher] = []
486
486
  self._clear_on_stop: Dict[PrometheusPusher, int] = {}
487
487
 
@@ -20,6 +20,7 @@ import re
20
20
  import sys
21
21
  from enum import Enum
22
22
  from hashlib import sha256
23
+ from pathlib import Path
23
24
  from typing import Any, Callable, Dict, Generic, Iterable, Optional, TextIO, Type, TypeVar, Union
24
25
 
25
26
  import dacite
@@ -111,7 +112,7 @@ class KeyVaultLoader:
111
112
  try:
112
113
  return self.client.get_secret(node.value).value # type: ignore # _init_client guarantees not None
113
114
  except (ResourceNotFoundError, ServiceRequestError, HttpResponseError) as e:
114
- raise InvalidConfigError(str(e))
115
+ raise InvalidConfigError(str(e)) from e
115
116
 
116
117
 
117
118
  class _EnvLoader(yaml.SafeLoader):
@@ -188,11 +189,15 @@ def _load_yaml(
188
189
 
189
190
  try:
190
191
  config = dacite.from_dict(
191
- data=config_dict, data_class=config_type, config=dacite.Config(strict=True, cast=[Enum, TimeIntervalConfig])
192
+ data=config_dict,
193
+ data_class=config_type,
194
+ config=dacite.Config(strict=True, cast=[Enum, TimeIntervalConfig, Path]),
192
195
  )
193
196
  except dacite.UnexpectedDataError as e:
194
197
  unknowns = [f'"{k.replace("_", "-") if case_style == "hyphen" else k}"' for k in e.keys]
195
- raise InvalidConfigError(f"Unknown config parameter{'s' if len(unknowns) > 1 else ''} {', '.join(unknowns)}")
198
+ raise InvalidConfigError(
199
+ f"Unknown config parameter{'s' if len(unknowns) > 1 else ''} {', '.join(unknowns)}"
200
+ ) from e
196
201
 
197
202
  except (dacite.WrongTypeError, dacite.MissingValueError, dacite.UnionMatchError) as e:
198
203
  if e.field_path:
@@ -212,11 +217,11 @@ def _load_yaml(
212
217
 
213
218
  raise InvalidConfigError(
214
219
  f'Wrong type for field "{path}" - got "{e.value}" of type {got_type} instead of {need_type}'
215
- )
216
- raise InvalidConfigError(f'Missing mandatory field "{path}"')
220
+ ) from e
221
+ raise InvalidConfigError(f'Missing mandatory field "{path}"') from e
217
222
 
218
223
  except dacite.ForwardReferenceError as e:
219
- raise ValueError(f"Invalid config class: {str(e)}")
224
+ raise ValueError(f"Invalid config class: {str(e)}") from e
220
225
 
221
226
  config._file_hash = sha256(json.dumps(config_dict).encode("utf-8")).hexdigest()
222
227
 
@@ -41,7 +41,6 @@ import logging
41
41
  import os
42
42
  import threading
43
43
  from abc import ABC, abstractmethod
44
- from threading import Event
45
44
  from time import sleep
46
45
  from types import TracebackType
47
46
  from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TypeVar, Union
@@ -55,6 +54,7 @@ from prometheus_client.exposition import basic_auth_handler, delete_from_gateway
55
54
  from cognite.client import CogniteClient
56
55
  from cognite.client.data_classes import Asset, Datapoints, DatapointsArray, TimeSeries
57
56
  from cognite.client.exceptions import CogniteDuplicatedError
57
+ from cognite.extractorutils.threading import CancellationToken
58
58
  from cognite.extractorutils.util import EitherId
59
59
 
60
60
  from .util import ensure_time_series
@@ -179,14 +179,14 @@ class AbstractMetricsPusher(ABC):
179
179
  self,
180
180
  push_interval: Optional[int] = None,
181
181
  thread_name: Optional[str] = None,
182
- cancellation_token: Event = Event(),
182
+ cancellation_token: Optional[CancellationToken] = None,
183
183
  ):
184
184
  self.push_interval = push_interval
185
185
  self.thread_name = thread_name
186
186
 
187
187
  self.thread: Optional[threading.Thread] = None
188
188
  self.thread_name = thread_name
189
- self.cancellation_token = cancellation_token
189
+ self.cancellation_token = cancellation_token.create_child_token() if cancellation_token else CancellationToken()
190
190
 
191
191
  self.logger = logging.getLogger(__name__)
192
192
 
@@ -201,7 +201,7 @@ class AbstractMetricsPusher(ABC):
201
201
  """
202
202
  Run push loop.
203
203
  """
204
- while not self.cancellation_token.is_set():
204
+ while not self.cancellation_token.is_cancelled:
205
205
  self._push_to_server()
206
206
  self.cancellation_token.wait(self.push_interval)
207
207
 
@@ -210,7 +210,6 @@ class AbstractMetricsPusher(ABC):
210
210
  Starts a thread that pushes the default registry to the configured gateway at certain intervals.
211
211
 
212
212
  """
213
- self.cancellation_token.clear()
214
213
  self.thread = threading.Thread(target=self._run, daemon=True, name=self.thread_name)
215
214
  self.thread.start()
216
215
 
@@ -220,7 +219,7 @@ class AbstractMetricsPusher(ABC):
220
219
  """
221
220
  # Make sure everything is pushed
222
221
  self._push_to_server()
223
- self.cancellation_token.set()
222
+ self.cancellation_token.cancel()
224
223
 
225
224
  def __enter__(self) -> "AbstractMetricsPusher":
226
225
  """
@@ -268,7 +267,7 @@ class PrometheusPusher(AbstractMetricsPusher):
268
267
  username: Optional[str] = None,
269
268
  password: Optional[str] = None,
270
269
  thread_name: Optional[str] = None,
271
- cancellation_token: Event = Event(),
270
+ cancellation_token: Optional[CancellationToken] = None,
272
271
  ):
273
272
  super(PrometheusPusher, self).__init__(push_interval, thread_name, cancellation_token)
274
273
 
@@ -344,7 +343,7 @@ class CognitePusher(AbstractMetricsPusher):
344
343
  asset: Optional[Asset] = None,
345
344
  data_set: Optional[EitherId] = None,
346
345
  thread_name: Optional[str] = None,
347
- cancellation_token: Event = Event(),
346
+ cancellation_token: Optional[CancellationToken] = None,
348
347
  ):
349
348
  super(CognitePusher, self).__init__(push_interval, thread_name, cancellation_token)
350
349
 
@@ -92,18 +92,17 @@ from abc import ABC, abstractmethod
92
92
  from types import TracebackType
93
93
  from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Type, Union
94
94
 
95
- from requests.exceptions import ConnectionError
96
-
97
95
  from cognite.client import CogniteClient
98
- from cognite.client.exceptions import CogniteAPIError, CogniteException
96
+ from cognite.client.exceptions import CogniteAPIError
97
+ from cognite.extractorutils.threading import CancellationToken
99
98
  from cognite.extractorutils.uploader import DataPointList
100
99
 
101
100
  from ._inner_util import _DecimalDecoder, _DecimalEncoder, _resolve_log_level
102
- from .util import retry
101
+ from .util import cognite_exceptions, retry
103
102
 
104
103
  RETRY_BACKOFF_FACTOR = 1.5
105
- RETRY_MAX_DELAY = 15
106
- RETRY_DELAY = 5
104
+ RETRY_MAX_DELAY = 60
105
+ RETRY_DELAY = 1
107
106
  RETRIES = 10
108
107
 
109
108
 
@@ -124,7 +123,7 @@ class AbstractStateStore(ABC):
124
123
  save_interval: Optional[int] = None,
125
124
  trigger_log_level: str = "DEBUG",
126
125
  thread_name: Optional[str] = None,
127
- cancellation_token: threading.Event = threading.Event(),
126
+ cancellation_token: Optional[CancellationToken] = None,
128
127
  ):
129
128
  self._initialized = False
130
129
  self._local_state: Dict[str, Dict[str, Any]] = {}
@@ -135,7 +134,7 @@ class AbstractStateStore(ABC):
135
134
 
136
135
  self.thread = threading.Thread(target=self._run, daemon=True, name=thread_name)
137
136
  self.lock = threading.RLock()
138
- self.cancellation_token: threading.Event = cancellation_token
137
+ self.cancellation_token = cancellation_token.create_child_token() if cancellation_token else CancellationToken()
139
138
 
140
139
  self._deleted: List[str] = []
141
140
 
@@ -145,7 +144,6 @@ class AbstractStateStore(ABC):
145
144
  This calls the synchronize method every save_interval seconds.
146
145
  """
147
146
  if self.save_interval is not None:
148
- self.cancellation_token.clear()
149
147
  self.thread.start()
150
148
 
151
149
  def stop(self, ensure_synchronize: bool = True) -> None:
@@ -155,7 +153,7 @@ class AbstractStateStore(ABC):
155
153
  Args:
156
154
  ensure_synchronize (bool): (Optional). Call synchronize one last time after shutting down thread.
157
155
  """
158
- self.cancellation_token.set()
156
+ self.cancellation_token.cancel()
159
157
  if ensure_synchronize:
160
158
  self.synchronize()
161
159
 
@@ -337,7 +335,7 @@ class RawStateStore(AbstractStateStore):
337
335
  save_interval: Optional[int] = None,
338
336
  trigger_log_level: str = "DEBUG",
339
337
  thread_name: Optional[str] = None,
340
- cancellation_token: threading.Event = threading.Event(),
338
+ cancellation_token: Optional[CancellationToken] = None,
341
339
  ):
342
340
  super().__init__(save_interval, trigger_log_level, thread_name, cancellation_token)
343
341
 
@@ -347,79 +345,87 @@ class RawStateStore(AbstractStateStore):
347
345
 
348
346
  self._ensure_table()
349
347
 
350
- @retry(
351
- exceptions=(CogniteException, ConnectionError),
352
- tries=RETRIES,
353
- delay=RETRY_DELAY,
354
- max_delay=RETRY_MAX_DELAY,
355
- backoff=RETRY_BACKOFF_FACTOR,
356
- )
357
348
  def _ensure_table(self) -> None:
358
- try:
359
- self._cdf_client.raw.databases.create(self.database)
360
- except CogniteAPIError as e:
361
- if not e.code == 400:
362
- raise e
363
- try:
364
- self._cdf_client.raw.tables.create(self.database, self.table)
365
- except CogniteAPIError as e:
366
- if not e.code == 400:
367
- raise e
368
-
369
- def initialize(self, force: bool = False) -> None:
370
- self._initialize_implementation(force)
371
-
372
- @retry(
373
- exceptions=(CogniteException, ConnectionError),
374
- tries=RETRIES,
375
- delay=RETRY_DELAY,
376
- max_delay=RETRY_MAX_DELAY,
377
- backoff=RETRY_BACKOFF_FACTOR,
378
- )
379
- def _initialize_implementation(self, force: bool = False) -> None:
380
- """
381
- Get all known states.
382
-
383
- Args:
384
- force: Enable re-initialization, ie overwrite when called multiple times
385
- """
386
- if self._initialized and not force:
387
- return
388
-
389
- # ignore type since list _is_ optional, sdk types are wrong
390
- rows = self._cdf_client.raw.rows.list(db_name=self.database, table_name=self.table, limit=None) # type: ignore
349
+ @retry(
350
+ exceptions=cognite_exceptions(),
351
+ cancellation_token=self.cancellation_token,
352
+ tries=RETRIES,
353
+ delay=RETRY_DELAY,
354
+ max_delay=RETRY_MAX_DELAY,
355
+ backoff=RETRY_BACKOFF_FACTOR,
356
+ )
357
+ def impl() -> None:
358
+ try:
359
+ self._cdf_client.raw.databases.create(self.database)
360
+ except CogniteAPIError as e:
361
+ if not e.code == 400:
362
+ raise e
363
+ try:
364
+ self._cdf_client.raw.tables.create(self.database, self.table)
365
+ except CogniteAPIError as e:
366
+ if not e.code == 400:
367
+ raise e
391
368
 
392
- with self.lock:
393
- self._local_state.clear()
394
- for row in rows:
395
- if row.key is None or row.columns is None:
396
- self.logger.warning(f"None encountered in row: {str(row)}")
397
- # should never happen, but type from sdk is optional
398
- continue
399
- self._local_state[row.key] = row.columns
369
+ impl()
400
370
 
401
- self._initialized = True
371
+ def initialize(self, force: bool = False) -> None:
372
+ @retry(
373
+ exceptions=cognite_exceptions(),
374
+ cancellation_token=self.cancellation_token,
375
+ tries=RETRIES,
376
+ delay=RETRY_DELAY,
377
+ max_delay=RETRY_MAX_DELAY,
378
+ backoff=RETRY_BACKOFF_FACTOR,
379
+ )
380
+ def impl() -> None:
381
+ """
382
+ Get all known states.
383
+
384
+ Args:
385
+ force: Enable re-initialization, ie overwrite when called multiple times
386
+ """
387
+ if self._initialized and not force:
388
+ return
389
+
390
+ # ignore type since list _is_ optional, sdk types are wrong
391
+ rows = self._cdf_client.raw.rows.list(db_name=self.database, table_name=self.table, limit=None) # type: ignore
392
+
393
+ with self.lock:
394
+ self._local_state.clear()
395
+ for row in rows:
396
+ if row.key is None or row.columns is None:
397
+ self.logger.warning(f"None encountered in row: {str(row)}")
398
+ # should never happen, but type from sdk is optional
399
+ continue
400
+ self._local_state[row.key] = row.columns
401
+
402
+ self._initialized = True
403
+
404
+ impl()
402
405
 
403
406
  def synchronize(self) -> None:
404
- self._synchronize_implementation()
405
-
406
- @retry(
407
- exceptions=(CogniteException, ConnectionError),
408
- tries=RETRIES,
409
- delay=RETRY_DELAY,
410
- max_delay=RETRY_MAX_DELAY,
411
- backoff=RETRY_BACKOFF_FACTOR,
412
- )
413
- def _synchronize_implementation(self) -> None:
414
- """
415
- Upload local state store to CDF
416
- """
417
- self._cdf_client.raw.rows.insert(db_name=self.database, table_name=self.table, row=self._local_state)
418
- # Create a copy of deleted to facilitate testing (mock library stores list, and as it changes, the assertions
419
- # fail)
420
- self._cdf_client.raw.rows.delete(db_name=self.database, table_name=self.table, key=[k for k in self._deleted])
421
- with self.lock:
422
- self._deleted.clear()
407
+ @retry(
408
+ exceptions=cognite_exceptions(),
409
+ cancellation_token=self.cancellation_token,
410
+ tries=RETRIES,
411
+ delay=RETRY_DELAY,
412
+ max_delay=RETRY_MAX_DELAY,
413
+ backoff=RETRY_BACKOFF_FACTOR,
414
+ )
415
+ def impl() -> None:
416
+ """
417
+ Upload local state store to CDF
418
+ """
419
+ self._cdf_client.raw.rows.insert(db_name=self.database, table_name=self.table, row=self._local_state)
420
+ # Create a copy of deleted to facilitate testing (mock library stores list, and as it changes, the
421
+ # assertions fail)
422
+ self._cdf_client.raw.rows.delete(
423
+ db_name=self.database, table_name=self.table, key=[k for k in self._deleted]
424
+ )
425
+ with self.lock:
426
+ self._deleted.clear()
427
+
428
+ impl()
423
429
 
424
430
  def __enter__(self) -> "RawStateStore":
425
431
  """
@@ -464,7 +470,7 @@ class LocalStateStore(AbstractStateStore):
464
470
  save_interval: Optional[int] = None,
465
471
  trigger_log_level: str = "DEBUG",
466
472
  thread_name: Optional[str] = None,
467
- cancellation_token: threading.Event = threading.Event(),
473
+ cancellation_token: Optional[CancellationToken] = None,
468
474
  ):
469
475
  super().__init__(save_interval, trigger_log_level, thread_name, cancellation_token)
470
476
 
@@ -0,0 +1,90 @@
1
+ import logging
2
+ import signal
3
+ from threading import Condition
4
+ from typing import Any, Optional
5
+
6
+
7
+ class CancellationToken:
8
+ """
9
+ Abstraction for a hierarchical cancellation token.
10
+
11
+ Using this you can create hierarchies of cancellation tokens, to cancel a part of the extractor
12
+ without cancelling the whole process. Use ``create_child_token`` to create a token that will be
13
+ cancelled if the parent is cancelled, but can be canceled alone without affecting the parent token.
14
+ """
15
+
16
+ def __init__(self, condition: Optional[Condition] = None) -> None:
17
+ self._cv: Condition = condition or Condition()
18
+ self._is_cancelled_int: bool = False
19
+ self._parent: Optional["CancellationToken"] = None
20
+
21
+ def __repr__(self) -> str:
22
+ cls = self.__class__
23
+ status = "cancelled" if self.is_cancelled else "not cancelled"
24
+ return f"<{cls.__module__}.{cls.__qualname__} at {id(self):#x}: {status}>"
25
+
26
+ @property
27
+ def is_cancelled(self) -> bool:
28
+ """
29
+ ``True`` if the token has been cancelled, or if some parent token has been cancelled.
30
+ """
31
+ return self._is_cancelled_int or self._parent is not None and self._parent.is_cancelled
32
+
33
+ def is_set(self) -> bool:
34
+ """
35
+ Deprecated, use ``is_cancelled`` instead.
36
+
37
+ ``True`` if the token has been cancelled, or if some parent token has been cancelled.
38
+ """
39
+ return self.is_cancelled
40
+
41
+ def cancel(self) -> None:
42
+ """
43
+ Cancel the token, notifying any waiting threads.
44
+ """
45
+ # No point in cancelling if a parent token is already canceled.
46
+ if self.is_cancelled:
47
+ return
48
+
49
+ with self._cv:
50
+ self._is_cancelled_int = True
51
+ self._cv.notify_all()
52
+
53
+ def set(self) -> None:
54
+ """
55
+ Deprecated, use ``cancel`` instead. This will be removed in the next major release.
56
+
57
+ Cancel the token, notifying any waiting threads.
58
+ """
59
+ self.cancel()
60
+
61
+ def wait(self, timeout: Optional[float] = None) -> bool:
62
+ while not self.is_cancelled:
63
+ with self._cv:
64
+ did_not_time_out = self._cv.wait(timeout)
65
+ if not did_not_time_out:
66
+ return False
67
+ return True
68
+
69
+ def create_child_token(self) -> "CancellationToken":
70
+ child = CancellationToken(self._cv)
71
+ child._parent = self
72
+ return child
73
+
74
+ def cancel_on_interrupt(self) -> None:
75
+ """
76
+ Register an interrupt handler to capture SIGINT (Ctrl-C) and cancel this token,
77
+ instead of throwing a KeyboardInterrupt exception.
78
+ """
79
+
80
+ def sigint_handler(sig_num: int, frame: Any) -> None:
81
+ logger = logging.getLogger(__name__)
82
+ logger.warning("Interrupt signal received, stopping extractor gracefully")
83
+ self.cancel()
84
+ logger.info("Waiting for threads to complete. Send another interrupt to force quit.")
85
+ signal.signal(signal.SIGINT, signal.default_int_handler)
86
+
87
+ try:
88
+ signal.signal(signal.SIGINT, sigint_handler)
89
+ except ValueError as e:
90
+ logging.getLogger(__name__).warning(f"Could not register handler for interrupt signals: {str(e)}")
@@ -22,6 +22,7 @@ from arrow import Arrow
22
22
 
23
23
  from cognite.client import CogniteClient
24
24
  from cognite.extractorutils._inner_util import _resolve_log_level
25
+ from cognite.extractorutils.threading import CancellationToken
25
26
 
26
27
 
27
28
  class AbstractUploadQueue(ABC):
@@ -47,7 +48,7 @@ class AbstractUploadQueue(ABC):
47
48
  max_upload_interval: Optional[int] = None,
48
49
  trigger_log_level: str = "DEBUG",
49
50
  thread_name: Optional[str] = None,
50
- cancellation_token: threading.Event = threading.Event(),
51
+ cancellation_token: Optional[CancellationToken] = None,
51
52
  ):
52
53
  self.cdf_client = cdf_client
53
54
 
@@ -59,7 +60,9 @@ class AbstractUploadQueue(ABC):
59
60
 
60
61
  self.thread = threading.Thread(target=self._run, daemon=True, name=thread_name)
61
62
  self.lock = threading.RLock()
62
- self.cancellation_token: threading.Event = cancellation_token
63
+ self.cancellation_token: CancellationToken = (
64
+ cancellation_token.create_child_token() if cancellation_token else CancellationToken()
65
+ )
63
66
 
64
67
  self.max_upload_interval = max_upload_interval
65
68
 
@@ -117,7 +120,6 @@ class AbstractUploadQueue(ABC):
117
120
  seconds.
118
121
  """
119
122
  if self.max_upload_interval is not None:
120
- self.cancellation_token.clear()
121
123
  self.thread.start()
122
124
 
123
125
  def stop(self, ensure_upload: bool = True) -> None:
@@ -128,7 +130,7 @@ class AbstractUploadQueue(ABC):
128
130
  ensure_upload (bool): (Optional). Call upload one last time after shutting down thread to ensure empty
129
131
  upload queue.
130
132
  """
131
- self.cancellation_token.set()
133
+ self.cancellation_token.cancel()
132
134
  if ensure_upload:
133
135
  self.upload()
134
136
 
@@ -148,7 +150,7 @@ class TimestampedObject:
148
150
  created: Arrow
149
151
 
150
152
 
151
- RETRY_BACKOFF_FACTOR = 1.5
152
- RETRY_MAX_DELAY = 15
153
- RETRY_DELAY = 5
153
+ RETRY_BACKOFF_FACTOR = 2
154
+ RETRY_MAX_DELAY = 60
155
+ RETRY_DELAY = 1
154
156
  RETRIES = 10