cognite-extractor-utils 6.4.1__py3-none-any.whl → 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- cognite/extractorutils/__init__.py +1 -1
- cognite/extractorutils/_inner_util.py +1 -1
- cognite/extractorutils/base.py +13 -11
- cognite/extractorutils/configtools/elements.py +2 -2
- cognite/extractorutils/configtools/loaders.py +11 -6
- cognite/extractorutils/metrics.py +7 -8
- cognite/extractorutils/statestore.py +86 -80
- cognite/extractorutils/threading.py +90 -0
- cognite/extractorutils/uploader/_base.py +9 -7
- cognite/extractorutils/uploader/assets.py +32 -30
- cognite/extractorutils/uploader/events.py +32 -30
- cognite/extractorutils/uploader/files.py +106 -85
- cognite/extractorutils/uploader/raw.py +17 -17
- cognite/extractorutils/uploader/time_series.py +117 -111
- cognite/extractorutils/uploader_extractor.py +4 -4
- cognite/extractorutils/util.py +41 -36
- {cognite_extractor_utils-6.4.1.dist-info → cognite_extractor_utils-7.0.0.dist-info}/METADATA +1 -3
- cognite_extractor_utils-7.0.0.dist-info/RECORD +27 -0
- cognite/extractorutils/middleware.py +0 -36
- cognite_extractor_utils-6.4.1.dist-info/RECORD +0 -27
- {cognite_extractor_utils-6.4.1.dist-info → cognite_extractor_utils-7.0.0.dist-info}/LICENSE +0 -0
- {cognite_extractor_utils-6.4.1.dist-info → cognite_extractor_utils-7.0.0.dist-info}/WHEEL +0 -0
|
@@ -34,7 +34,7 @@ class _DecimalEncoder(json.JSONEncoder):
|
|
|
34
34
|
|
|
35
35
|
class _DecimalDecoder(json.JSONDecoder):
|
|
36
36
|
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
37
|
-
json.JSONDecoder.__init__(self, object_hook=self.object_hook,
|
|
37
|
+
json.JSONDecoder.__init__(self, *args, object_hook=self.object_hook, **kwargs)
|
|
38
38
|
|
|
39
39
|
def object_hook(self, obj_dict: Dict[str, str]) -> Union[Dict[str, str], Decimal]:
|
|
40
40
|
if obj_dict.get("type") == "decimal_encoded":
|
cognite/extractorutils/base.py
CHANGED
|
@@ -17,7 +17,7 @@ import os
|
|
|
17
17
|
import sys
|
|
18
18
|
from dataclasses import is_dataclass
|
|
19
19
|
from enum import Enum
|
|
20
|
-
from threading import
|
|
20
|
+
from threading import Thread
|
|
21
21
|
from types import TracebackType
|
|
22
22
|
from typing import Any, Callable, Dict, Generic, Optional, Type, TypeVar
|
|
23
23
|
|
|
@@ -29,7 +29,7 @@ from cognite.extractorutils.configtools import BaseConfig, ConfigResolver, State
|
|
|
29
29
|
from cognite.extractorutils.exceptions import InvalidConfigError
|
|
30
30
|
from cognite.extractorutils.metrics import BaseMetrics
|
|
31
31
|
from cognite.extractorutils.statestore import AbstractStateStore, LocalStateStore, NoStateStore
|
|
32
|
-
from cognite.extractorutils.
|
|
32
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
class ReloadConfigAction(Enum):
|
|
@@ -77,11 +77,13 @@ class Extractor(Generic[CustomConfigClass]):
|
|
|
77
77
|
name: str,
|
|
78
78
|
description: str,
|
|
79
79
|
version: Optional[str] = None,
|
|
80
|
-
run_handle: Optional[
|
|
80
|
+
run_handle: Optional[
|
|
81
|
+
Callable[[CogniteClient, AbstractStateStore, CustomConfigClass, CancellationToken], None]
|
|
82
|
+
] = None,
|
|
81
83
|
config_class: Type[CustomConfigClass],
|
|
82
84
|
metrics: Optional[BaseMetrics] = None,
|
|
83
85
|
use_default_state_store: bool = True,
|
|
84
|
-
cancellation_token:
|
|
86
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
85
87
|
config_file_path: Optional[str] = None,
|
|
86
88
|
continuous_extractor: bool = False,
|
|
87
89
|
heartbeat_waiting_time: int = 600,
|
|
@@ -95,7 +97,7 @@ class Extractor(Generic[CustomConfigClass]):
|
|
|
95
97
|
self.config_class = config_class
|
|
96
98
|
self.use_default_state_store = use_default_state_store
|
|
97
99
|
self.version = version or "unknown"
|
|
98
|
-
self.cancellation_token = cancellation_token
|
|
100
|
+
self.cancellation_token = cancellation_token.create_child_token() if cancellation_token else CancellationToken()
|
|
99
101
|
self.config_file_path = config_file_path
|
|
100
102
|
self.continuous_extractor = continuous_extractor
|
|
101
103
|
self.heartbeat_waiting_time = heartbeat_waiting_time
|
|
@@ -136,7 +138,7 @@ class Extractor(Generic[CustomConfigClass]):
|
|
|
136
138
|
Extractor._config_singleton = self.config # type: ignore
|
|
137
139
|
|
|
138
140
|
def config_refresher() -> None:
|
|
139
|
-
while not self.cancellation_token.
|
|
141
|
+
while not self.cancellation_token.is_cancelled:
|
|
140
142
|
self.cancellation_token.wait(self.reload_config_interval)
|
|
141
143
|
if self.config_resolver.has_changed:
|
|
142
144
|
self._reload_config()
|
|
@@ -158,7 +160,7 @@ class Extractor(Generic[CustomConfigClass]):
|
|
|
158
160
|
|
|
159
161
|
elif self.reload_config_action == ReloadConfigAction.SHUTDOWN:
|
|
160
162
|
self.logger.info("Shutting down, expecting to be restarted")
|
|
161
|
-
self.cancellation_token.
|
|
163
|
+
self.cancellation_token.cancel()
|
|
162
164
|
|
|
163
165
|
elif self.reload_config_action == ReloadConfigAction.CALLBACK:
|
|
164
166
|
self.logger.info("Loading in new config file")
|
|
@@ -265,7 +267,7 @@ class Extractor(Generic[CustomConfigClass]):
|
|
|
265
267
|
self.logger.info(f"Loaded {'remote' if self.config_resolver.is_remote else 'local'} config file")
|
|
266
268
|
|
|
267
269
|
if self.handle_interrupts:
|
|
268
|
-
|
|
270
|
+
self.cancellation_token.cancel_on_interrupt()
|
|
269
271
|
|
|
270
272
|
self.cognite_client = self.config.cognite.get_cognite_client(self.name)
|
|
271
273
|
self._load_state_store()
|
|
@@ -279,10 +281,10 @@ class Extractor(Generic[CustomConfigClass]):
|
|
|
279
281
|
pass
|
|
280
282
|
|
|
281
283
|
def heartbeat_loop() -> None:
|
|
282
|
-
while not self.cancellation_token.
|
|
284
|
+
while not self.cancellation_token.is_cancelled:
|
|
283
285
|
self.cancellation_token.wait(self.heartbeat_waiting_time)
|
|
284
286
|
|
|
285
|
-
if not self.cancellation_token.
|
|
287
|
+
if not self.cancellation_token.is_cancelled:
|
|
286
288
|
self.logger.info("Reporting new heartbeat")
|
|
287
289
|
try:
|
|
288
290
|
self.cognite_client.extraction_pipelines.runs.create(
|
|
@@ -329,7 +331,7 @@ class Extractor(Generic[CustomConfigClass]):
|
|
|
329
331
|
Returns:
|
|
330
332
|
True if the extractor shut down cleanly, False if the extractor was shut down due to an unhandled error
|
|
331
333
|
"""
|
|
332
|
-
self.cancellation_token.
|
|
334
|
+
self.cancellation_token.cancel()
|
|
333
335
|
|
|
334
336
|
if self.state_store:
|
|
335
337
|
self.state_store.synchronize()
|
|
@@ -18,7 +18,6 @@ from dataclasses import dataclass, field
|
|
|
18
18
|
from datetime import timedelta
|
|
19
19
|
from enum import Enum
|
|
20
20
|
from logging.handlers import TimedRotatingFileHandler
|
|
21
|
-
from threading import Event
|
|
22
21
|
from time import sleep
|
|
23
22
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
24
23
|
from urllib.parse import urljoin
|
|
@@ -46,6 +45,7 @@ from cognite.extractorutils.statestore import (
|
|
|
46
45
|
NoStateStore,
|
|
47
46
|
RawStateStore,
|
|
48
47
|
)
|
|
48
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
49
49
|
from cognite.extractorutils.util import EitherId
|
|
50
50
|
|
|
51
51
|
_logger = logging.getLogger(__name__)
|
|
@@ -481,7 +481,7 @@ class MetricsConfig:
|
|
|
481
481
|
cognite: Optional[_CogniteMetricsConfig]
|
|
482
482
|
server: Optional[_PromServerConfig]
|
|
483
483
|
|
|
484
|
-
def start_pushers(self, cdf_client: CogniteClient, cancellation_token:
|
|
484
|
+
def start_pushers(self, cdf_client: CogniteClient, cancellation_token: Optional[CancellationToken] = None) -> None:
|
|
485
485
|
self._pushers: List[AbstractMetricsPusher] = []
|
|
486
486
|
self._clear_on_stop: Dict[PrometheusPusher, int] = {}
|
|
487
487
|
|
|
@@ -20,6 +20,7 @@ import re
|
|
|
20
20
|
import sys
|
|
21
21
|
from enum import Enum
|
|
22
22
|
from hashlib import sha256
|
|
23
|
+
from pathlib import Path
|
|
23
24
|
from typing import Any, Callable, Dict, Generic, Iterable, Optional, TextIO, Type, TypeVar, Union
|
|
24
25
|
|
|
25
26
|
import dacite
|
|
@@ -111,7 +112,7 @@ class KeyVaultLoader:
|
|
|
111
112
|
try:
|
|
112
113
|
return self.client.get_secret(node.value).value # type: ignore # _init_client guarantees not None
|
|
113
114
|
except (ResourceNotFoundError, ServiceRequestError, HttpResponseError) as e:
|
|
114
|
-
raise InvalidConfigError(str(e))
|
|
115
|
+
raise InvalidConfigError(str(e)) from e
|
|
115
116
|
|
|
116
117
|
|
|
117
118
|
class _EnvLoader(yaml.SafeLoader):
|
|
@@ -188,11 +189,15 @@ def _load_yaml(
|
|
|
188
189
|
|
|
189
190
|
try:
|
|
190
191
|
config = dacite.from_dict(
|
|
191
|
-
data=config_dict,
|
|
192
|
+
data=config_dict,
|
|
193
|
+
data_class=config_type,
|
|
194
|
+
config=dacite.Config(strict=True, cast=[Enum, TimeIntervalConfig, Path]),
|
|
192
195
|
)
|
|
193
196
|
except dacite.UnexpectedDataError as e:
|
|
194
197
|
unknowns = [f'"{k.replace("_", "-") if case_style == "hyphen" else k}"' for k in e.keys]
|
|
195
|
-
raise InvalidConfigError(
|
|
198
|
+
raise InvalidConfigError(
|
|
199
|
+
f"Unknown config parameter{'s' if len(unknowns) > 1 else ''} {', '.join(unknowns)}"
|
|
200
|
+
) from e
|
|
196
201
|
|
|
197
202
|
except (dacite.WrongTypeError, dacite.MissingValueError, dacite.UnionMatchError) as e:
|
|
198
203
|
if e.field_path:
|
|
@@ -212,11 +217,11 @@ def _load_yaml(
|
|
|
212
217
|
|
|
213
218
|
raise InvalidConfigError(
|
|
214
219
|
f'Wrong type for field "{path}" - got "{e.value}" of type {got_type} instead of {need_type}'
|
|
215
|
-
)
|
|
216
|
-
raise InvalidConfigError(f'Missing mandatory field "{path}"')
|
|
220
|
+
) from e
|
|
221
|
+
raise InvalidConfigError(f'Missing mandatory field "{path}"') from e
|
|
217
222
|
|
|
218
223
|
except dacite.ForwardReferenceError as e:
|
|
219
|
-
raise ValueError(f"Invalid config class: {str(e)}")
|
|
224
|
+
raise ValueError(f"Invalid config class: {str(e)}") from e
|
|
220
225
|
|
|
221
226
|
config._file_hash = sha256(json.dumps(config_dict).encode("utf-8")).hexdigest()
|
|
222
227
|
|
|
@@ -41,7 +41,6 @@ import logging
|
|
|
41
41
|
import os
|
|
42
42
|
import threading
|
|
43
43
|
from abc import ABC, abstractmethod
|
|
44
|
-
from threading import Event
|
|
45
44
|
from time import sleep
|
|
46
45
|
from types import TracebackType
|
|
47
46
|
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TypeVar, Union
|
|
@@ -55,6 +54,7 @@ from prometheus_client.exposition import basic_auth_handler, delete_from_gateway
|
|
|
55
54
|
from cognite.client import CogniteClient
|
|
56
55
|
from cognite.client.data_classes import Asset, Datapoints, DatapointsArray, TimeSeries
|
|
57
56
|
from cognite.client.exceptions import CogniteDuplicatedError
|
|
57
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
58
58
|
from cognite.extractorutils.util import EitherId
|
|
59
59
|
|
|
60
60
|
from .util import ensure_time_series
|
|
@@ -179,14 +179,14 @@ class AbstractMetricsPusher(ABC):
|
|
|
179
179
|
self,
|
|
180
180
|
push_interval: Optional[int] = None,
|
|
181
181
|
thread_name: Optional[str] = None,
|
|
182
|
-
cancellation_token:
|
|
182
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
183
183
|
):
|
|
184
184
|
self.push_interval = push_interval
|
|
185
185
|
self.thread_name = thread_name
|
|
186
186
|
|
|
187
187
|
self.thread: Optional[threading.Thread] = None
|
|
188
188
|
self.thread_name = thread_name
|
|
189
|
-
self.cancellation_token = cancellation_token
|
|
189
|
+
self.cancellation_token = cancellation_token.create_child_token() if cancellation_token else CancellationToken()
|
|
190
190
|
|
|
191
191
|
self.logger = logging.getLogger(__name__)
|
|
192
192
|
|
|
@@ -201,7 +201,7 @@ class AbstractMetricsPusher(ABC):
|
|
|
201
201
|
"""
|
|
202
202
|
Run push loop.
|
|
203
203
|
"""
|
|
204
|
-
while not self.cancellation_token.
|
|
204
|
+
while not self.cancellation_token.is_cancelled:
|
|
205
205
|
self._push_to_server()
|
|
206
206
|
self.cancellation_token.wait(self.push_interval)
|
|
207
207
|
|
|
@@ -210,7 +210,6 @@ class AbstractMetricsPusher(ABC):
|
|
|
210
210
|
Starts a thread that pushes the default registry to the configured gateway at certain intervals.
|
|
211
211
|
|
|
212
212
|
"""
|
|
213
|
-
self.cancellation_token.clear()
|
|
214
213
|
self.thread = threading.Thread(target=self._run, daemon=True, name=self.thread_name)
|
|
215
214
|
self.thread.start()
|
|
216
215
|
|
|
@@ -220,7 +219,7 @@ class AbstractMetricsPusher(ABC):
|
|
|
220
219
|
"""
|
|
221
220
|
# Make sure everything is pushed
|
|
222
221
|
self._push_to_server()
|
|
223
|
-
self.cancellation_token.
|
|
222
|
+
self.cancellation_token.cancel()
|
|
224
223
|
|
|
225
224
|
def __enter__(self) -> "AbstractMetricsPusher":
|
|
226
225
|
"""
|
|
@@ -268,7 +267,7 @@ class PrometheusPusher(AbstractMetricsPusher):
|
|
|
268
267
|
username: Optional[str] = None,
|
|
269
268
|
password: Optional[str] = None,
|
|
270
269
|
thread_name: Optional[str] = None,
|
|
271
|
-
cancellation_token:
|
|
270
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
272
271
|
):
|
|
273
272
|
super(PrometheusPusher, self).__init__(push_interval, thread_name, cancellation_token)
|
|
274
273
|
|
|
@@ -344,7 +343,7 @@ class CognitePusher(AbstractMetricsPusher):
|
|
|
344
343
|
asset: Optional[Asset] = None,
|
|
345
344
|
data_set: Optional[EitherId] = None,
|
|
346
345
|
thread_name: Optional[str] = None,
|
|
347
|
-
cancellation_token:
|
|
346
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
348
347
|
):
|
|
349
348
|
super(CognitePusher, self).__init__(push_interval, thread_name, cancellation_token)
|
|
350
349
|
|
|
@@ -92,18 +92,17 @@ from abc import ABC, abstractmethod
|
|
|
92
92
|
from types import TracebackType
|
|
93
93
|
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Type, Union
|
|
94
94
|
|
|
95
|
-
from requests.exceptions import ConnectionError
|
|
96
|
-
|
|
97
95
|
from cognite.client import CogniteClient
|
|
98
|
-
from cognite.client.exceptions import CogniteAPIError
|
|
96
|
+
from cognite.client.exceptions import CogniteAPIError
|
|
97
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
99
98
|
from cognite.extractorutils.uploader import DataPointList
|
|
100
99
|
|
|
101
100
|
from ._inner_util import _DecimalDecoder, _DecimalEncoder, _resolve_log_level
|
|
102
|
-
from .util import retry
|
|
101
|
+
from .util import cognite_exceptions, retry
|
|
103
102
|
|
|
104
103
|
RETRY_BACKOFF_FACTOR = 1.5
|
|
105
|
-
RETRY_MAX_DELAY =
|
|
106
|
-
RETRY_DELAY =
|
|
104
|
+
RETRY_MAX_DELAY = 60
|
|
105
|
+
RETRY_DELAY = 1
|
|
107
106
|
RETRIES = 10
|
|
108
107
|
|
|
109
108
|
|
|
@@ -124,7 +123,7 @@ class AbstractStateStore(ABC):
|
|
|
124
123
|
save_interval: Optional[int] = None,
|
|
125
124
|
trigger_log_level: str = "DEBUG",
|
|
126
125
|
thread_name: Optional[str] = None,
|
|
127
|
-
cancellation_token:
|
|
126
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
128
127
|
):
|
|
129
128
|
self._initialized = False
|
|
130
129
|
self._local_state: Dict[str, Dict[str, Any]] = {}
|
|
@@ -135,7 +134,7 @@ class AbstractStateStore(ABC):
|
|
|
135
134
|
|
|
136
135
|
self.thread = threading.Thread(target=self._run, daemon=True, name=thread_name)
|
|
137
136
|
self.lock = threading.RLock()
|
|
138
|
-
self.cancellation_token
|
|
137
|
+
self.cancellation_token = cancellation_token.create_child_token() if cancellation_token else CancellationToken()
|
|
139
138
|
|
|
140
139
|
self._deleted: List[str] = []
|
|
141
140
|
|
|
@@ -145,7 +144,6 @@ class AbstractStateStore(ABC):
|
|
|
145
144
|
This calls the synchronize method every save_interval seconds.
|
|
146
145
|
"""
|
|
147
146
|
if self.save_interval is not None:
|
|
148
|
-
self.cancellation_token.clear()
|
|
149
147
|
self.thread.start()
|
|
150
148
|
|
|
151
149
|
def stop(self, ensure_synchronize: bool = True) -> None:
|
|
@@ -155,7 +153,7 @@ class AbstractStateStore(ABC):
|
|
|
155
153
|
Args:
|
|
156
154
|
ensure_synchronize (bool): (Optional). Call synchronize one last time after shutting down thread.
|
|
157
155
|
"""
|
|
158
|
-
self.cancellation_token.
|
|
156
|
+
self.cancellation_token.cancel()
|
|
159
157
|
if ensure_synchronize:
|
|
160
158
|
self.synchronize()
|
|
161
159
|
|
|
@@ -337,7 +335,7 @@ class RawStateStore(AbstractStateStore):
|
|
|
337
335
|
save_interval: Optional[int] = None,
|
|
338
336
|
trigger_log_level: str = "DEBUG",
|
|
339
337
|
thread_name: Optional[str] = None,
|
|
340
|
-
cancellation_token:
|
|
338
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
341
339
|
):
|
|
342
340
|
super().__init__(save_interval, trigger_log_level, thread_name, cancellation_token)
|
|
343
341
|
|
|
@@ -347,79 +345,87 @@ class RawStateStore(AbstractStateStore):
|
|
|
347
345
|
|
|
348
346
|
self._ensure_table()
|
|
349
347
|
|
|
350
|
-
@retry(
|
|
351
|
-
exceptions=(CogniteException, ConnectionError),
|
|
352
|
-
tries=RETRIES,
|
|
353
|
-
delay=RETRY_DELAY,
|
|
354
|
-
max_delay=RETRY_MAX_DELAY,
|
|
355
|
-
backoff=RETRY_BACKOFF_FACTOR,
|
|
356
|
-
)
|
|
357
348
|
def _ensure_table(self) -> None:
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
backoff=RETRY_BACKOFF_FACTOR,
|
|
378
|
-
)
|
|
379
|
-
def _initialize_implementation(self, force: bool = False) -> None:
|
|
380
|
-
"""
|
|
381
|
-
Get all known states.
|
|
382
|
-
|
|
383
|
-
Args:
|
|
384
|
-
force: Enable re-initialization, ie overwrite when called multiple times
|
|
385
|
-
"""
|
|
386
|
-
if self._initialized and not force:
|
|
387
|
-
return
|
|
388
|
-
|
|
389
|
-
# ignore type since list _is_ optional, sdk types are wrong
|
|
390
|
-
rows = self._cdf_client.raw.rows.list(db_name=self.database, table_name=self.table, limit=None) # type: ignore
|
|
349
|
+
@retry(
|
|
350
|
+
exceptions=cognite_exceptions(),
|
|
351
|
+
cancellation_token=self.cancellation_token,
|
|
352
|
+
tries=RETRIES,
|
|
353
|
+
delay=RETRY_DELAY,
|
|
354
|
+
max_delay=RETRY_MAX_DELAY,
|
|
355
|
+
backoff=RETRY_BACKOFF_FACTOR,
|
|
356
|
+
)
|
|
357
|
+
def impl() -> None:
|
|
358
|
+
try:
|
|
359
|
+
self._cdf_client.raw.databases.create(self.database)
|
|
360
|
+
except CogniteAPIError as e:
|
|
361
|
+
if not e.code == 400:
|
|
362
|
+
raise e
|
|
363
|
+
try:
|
|
364
|
+
self._cdf_client.raw.tables.create(self.database, self.table)
|
|
365
|
+
except CogniteAPIError as e:
|
|
366
|
+
if not e.code == 400:
|
|
367
|
+
raise e
|
|
391
368
|
|
|
392
|
-
|
|
393
|
-
self._local_state.clear()
|
|
394
|
-
for row in rows:
|
|
395
|
-
if row.key is None or row.columns is None:
|
|
396
|
-
self.logger.warning(f"None encountered in row: {str(row)}")
|
|
397
|
-
# should never happen, but type from sdk is optional
|
|
398
|
-
continue
|
|
399
|
-
self._local_state[row.key] = row.columns
|
|
369
|
+
impl()
|
|
400
370
|
|
|
401
|
-
|
|
371
|
+
def initialize(self, force: bool = False) -> None:
|
|
372
|
+
@retry(
|
|
373
|
+
exceptions=cognite_exceptions(),
|
|
374
|
+
cancellation_token=self.cancellation_token,
|
|
375
|
+
tries=RETRIES,
|
|
376
|
+
delay=RETRY_DELAY,
|
|
377
|
+
max_delay=RETRY_MAX_DELAY,
|
|
378
|
+
backoff=RETRY_BACKOFF_FACTOR,
|
|
379
|
+
)
|
|
380
|
+
def impl() -> None:
|
|
381
|
+
"""
|
|
382
|
+
Get all known states.
|
|
383
|
+
|
|
384
|
+
Args:
|
|
385
|
+
force: Enable re-initialization, ie overwrite when called multiple times
|
|
386
|
+
"""
|
|
387
|
+
if self._initialized and not force:
|
|
388
|
+
return
|
|
389
|
+
|
|
390
|
+
# ignore type since list _is_ optional, sdk types are wrong
|
|
391
|
+
rows = self._cdf_client.raw.rows.list(db_name=self.database, table_name=self.table, limit=None) # type: ignore
|
|
392
|
+
|
|
393
|
+
with self.lock:
|
|
394
|
+
self._local_state.clear()
|
|
395
|
+
for row in rows:
|
|
396
|
+
if row.key is None or row.columns is None:
|
|
397
|
+
self.logger.warning(f"None encountered in row: {str(row)}")
|
|
398
|
+
# should never happen, but type from sdk is optional
|
|
399
|
+
continue
|
|
400
|
+
self._local_state[row.key] = row.columns
|
|
401
|
+
|
|
402
|
+
self._initialized = True
|
|
403
|
+
|
|
404
|
+
impl()
|
|
402
405
|
|
|
403
406
|
def synchronize(self) -> None:
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
self.
|
|
407
|
+
@retry(
|
|
408
|
+
exceptions=cognite_exceptions(),
|
|
409
|
+
cancellation_token=self.cancellation_token,
|
|
410
|
+
tries=RETRIES,
|
|
411
|
+
delay=RETRY_DELAY,
|
|
412
|
+
max_delay=RETRY_MAX_DELAY,
|
|
413
|
+
backoff=RETRY_BACKOFF_FACTOR,
|
|
414
|
+
)
|
|
415
|
+
def impl() -> None:
|
|
416
|
+
"""
|
|
417
|
+
Upload local state store to CDF
|
|
418
|
+
"""
|
|
419
|
+
self._cdf_client.raw.rows.insert(db_name=self.database, table_name=self.table, row=self._local_state)
|
|
420
|
+
# Create a copy of deleted to facilitate testing (mock library stores list, and as it changes, the
|
|
421
|
+
# assertions fail)
|
|
422
|
+
self._cdf_client.raw.rows.delete(
|
|
423
|
+
db_name=self.database, table_name=self.table, key=[k for k in self._deleted]
|
|
424
|
+
)
|
|
425
|
+
with self.lock:
|
|
426
|
+
self._deleted.clear()
|
|
427
|
+
|
|
428
|
+
impl()
|
|
423
429
|
|
|
424
430
|
def __enter__(self) -> "RawStateStore":
|
|
425
431
|
"""
|
|
@@ -464,7 +470,7 @@ class LocalStateStore(AbstractStateStore):
|
|
|
464
470
|
save_interval: Optional[int] = None,
|
|
465
471
|
trigger_log_level: str = "DEBUG",
|
|
466
472
|
thread_name: Optional[str] = None,
|
|
467
|
-
cancellation_token:
|
|
473
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
468
474
|
):
|
|
469
475
|
super().__init__(save_interval, trigger_log_level, thread_name, cancellation_token)
|
|
470
476
|
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import signal
|
|
3
|
+
from threading import Condition
|
|
4
|
+
from typing import Any, Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class CancellationToken:
|
|
8
|
+
"""
|
|
9
|
+
Abstraction for a hierarchical cancellation token.
|
|
10
|
+
|
|
11
|
+
Using this you can create hierarchies of cancellation tokens, to cancel a part of the extractor
|
|
12
|
+
without cancelling the whole process. Use ``create_child_token`` to create a token that will be
|
|
13
|
+
cancelled if the parent is cancelled, but can be canceled alone without affecting the parent token.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, condition: Optional[Condition] = None) -> None:
|
|
17
|
+
self._cv: Condition = condition or Condition()
|
|
18
|
+
self._is_cancelled_int: bool = False
|
|
19
|
+
self._parent: Optional["CancellationToken"] = None
|
|
20
|
+
|
|
21
|
+
def __repr__(self) -> str:
|
|
22
|
+
cls = self.__class__
|
|
23
|
+
status = "cancelled" if self.is_cancelled else "not cancelled"
|
|
24
|
+
return f"<{cls.__module__}.{cls.__qualname__} at {id(self):#x}: {status}>"
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def is_cancelled(self) -> bool:
|
|
28
|
+
"""
|
|
29
|
+
``True`` if the token has been cancelled, or if some parent token has been cancelled.
|
|
30
|
+
"""
|
|
31
|
+
return self._is_cancelled_int or self._parent is not None and self._parent.is_cancelled
|
|
32
|
+
|
|
33
|
+
def is_set(self) -> bool:
|
|
34
|
+
"""
|
|
35
|
+
Deprecated, use ``is_cancelled`` instead.
|
|
36
|
+
|
|
37
|
+
``True`` if the token has been cancelled, or if some parent token has been cancelled.
|
|
38
|
+
"""
|
|
39
|
+
return self.is_cancelled
|
|
40
|
+
|
|
41
|
+
def cancel(self) -> None:
|
|
42
|
+
"""
|
|
43
|
+
Cancel the token, notifying any waiting threads.
|
|
44
|
+
"""
|
|
45
|
+
# No point in cancelling if a parent token is already canceled.
|
|
46
|
+
if self.is_cancelled:
|
|
47
|
+
return
|
|
48
|
+
|
|
49
|
+
with self._cv:
|
|
50
|
+
self._is_cancelled_int = True
|
|
51
|
+
self._cv.notify_all()
|
|
52
|
+
|
|
53
|
+
def set(self) -> None:
|
|
54
|
+
"""
|
|
55
|
+
Deprecated, use ``cancel`` instead. This will be removed in the next major release.
|
|
56
|
+
|
|
57
|
+
Cancel the token, notifying any waiting threads.
|
|
58
|
+
"""
|
|
59
|
+
self.cancel()
|
|
60
|
+
|
|
61
|
+
def wait(self, timeout: Optional[float] = None) -> bool:
|
|
62
|
+
while not self.is_cancelled:
|
|
63
|
+
with self._cv:
|
|
64
|
+
did_not_time_out = self._cv.wait(timeout)
|
|
65
|
+
if not did_not_time_out:
|
|
66
|
+
return False
|
|
67
|
+
return True
|
|
68
|
+
|
|
69
|
+
def create_child_token(self) -> "CancellationToken":
|
|
70
|
+
child = CancellationToken(self._cv)
|
|
71
|
+
child._parent = self
|
|
72
|
+
return child
|
|
73
|
+
|
|
74
|
+
def cancel_on_interrupt(self) -> None:
|
|
75
|
+
"""
|
|
76
|
+
Register an interrupt handler to capture SIGINT (Ctrl-C) and cancel this token,
|
|
77
|
+
instead of throwing a KeyboardInterrupt exception.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def sigint_handler(sig_num: int, frame: Any) -> None:
|
|
81
|
+
logger = logging.getLogger(__name__)
|
|
82
|
+
logger.warning("Interrupt signal received, stopping extractor gracefully")
|
|
83
|
+
self.cancel()
|
|
84
|
+
logger.info("Waiting for threads to complete. Send another interrupt to force quit.")
|
|
85
|
+
signal.signal(signal.SIGINT, signal.default_int_handler)
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
signal.signal(signal.SIGINT, sigint_handler)
|
|
89
|
+
except ValueError as e:
|
|
90
|
+
logging.getLogger(__name__).warning(f"Could not register handler for interrupt signals: {str(e)}")
|
|
@@ -22,6 +22,7 @@ from arrow import Arrow
|
|
|
22
22
|
|
|
23
23
|
from cognite.client import CogniteClient
|
|
24
24
|
from cognite.extractorutils._inner_util import _resolve_log_level
|
|
25
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
class AbstractUploadQueue(ABC):
|
|
@@ -47,7 +48,7 @@ class AbstractUploadQueue(ABC):
|
|
|
47
48
|
max_upload_interval: Optional[int] = None,
|
|
48
49
|
trigger_log_level: str = "DEBUG",
|
|
49
50
|
thread_name: Optional[str] = None,
|
|
50
|
-
cancellation_token:
|
|
51
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
51
52
|
):
|
|
52
53
|
self.cdf_client = cdf_client
|
|
53
54
|
|
|
@@ -59,7 +60,9 @@ class AbstractUploadQueue(ABC):
|
|
|
59
60
|
|
|
60
61
|
self.thread = threading.Thread(target=self._run, daemon=True, name=thread_name)
|
|
61
62
|
self.lock = threading.RLock()
|
|
62
|
-
self.cancellation_token:
|
|
63
|
+
self.cancellation_token: CancellationToken = (
|
|
64
|
+
cancellation_token.create_child_token() if cancellation_token else CancellationToken()
|
|
65
|
+
)
|
|
63
66
|
|
|
64
67
|
self.max_upload_interval = max_upload_interval
|
|
65
68
|
|
|
@@ -117,7 +120,6 @@ class AbstractUploadQueue(ABC):
|
|
|
117
120
|
seconds.
|
|
118
121
|
"""
|
|
119
122
|
if self.max_upload_interval is not None:
|
|
120
|
-
self.cancellation_token.clear()
|
|
121
123
|
self.thread.start()
|
|
122
124
|
|
|
123
125
|
def stop(self, ensure_upload: bool = True) -> None:
|
|
@@ -128,7 +130,7 @@ class AbstractUploadQueue(ABC):
|
|
|
128
130
|
ensure_upload (bool): (Optional). Call upload one last time after shutting down thread to ensure empty
|
|
129
131
|
upload queue.
|
|
130
132
|
"""
|
|
131
|
-
self.cancellation_token.
|
|
133
|
+
self.cancellation_token.cancel()
|
|
132
134
|
if ensure_upload:
|
|
133
135
|
self.upload()
|
|
134
136
|
|
|
@@ -148,7 +150,7 @@ class TimestampedObject:
|
|
|
148
150
|
created: Arrow
|
|
149
151
|
|
|
150
152
|
|
|
151
|
-
RETRY_BACKOFF_FACTOR =
|
|
152
|
-
RETRY_MAX_DELAY =
|
|
153
|
-
RETRY_DELAY =
|
|
153
|
+
RETRY_BACKOFF_FACTOR = 2
|
|
154
|
+
RETRY_MAX_DELAY = 60
|
|
155
|
+
RETRY_DELAY = 1
|
|
154
156
|
RETRIES = 10
|