cognite-extractor-utils 6.4.0__py3-none-any.whl → 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- cognite/extractorutils/__init__.py +1 -1
- cognite/extractorutils/_inner_util.py +1 -1
- cognite/extractorutils/base.py +13 -11
- cognite/extractorutils/configtools/elements.py +2 -2
- cognite/extractorutils/configtools/loaders.py +11 -6
- cognite/extractorutils/metrics.py +7 -8
- cognite/extractorutils/statestore.py +86 -80
- cognite/extractorutils/threading.py +90 -0
- cognite/extractorutils/uploader/_base.py +9 -7
- cognite/extractorutils/uploader/assets.py +32 -30
- cognite/extractorutils/uploader/events.py +32 -30
- cognite/extractorutils/uploader/files.py +118 -86
- cognite/extractorutils/uploader/raw.py +17 -17
- cognite/extractorutils/uploader/time_series.py +117 -111
- cognite/extractorutils/uploader_extractor.py +4 -4
- cognite/extractorutils/util.py +41 -36
- {cognite_extractor_utils-6.4.0.dist-info → cognite_extractor_utils-7.0.0.dist-info}/METADATA +1 -3
- cognite_extractor_utils-7.0.0.dist-info/RECORD +27 -0
- cognite/extractorutils/middleware.py +0 -36
- cognite_extractor_utils-6.4.0.dist-info/RECORD +0 -27
- {cognite_extractor_utils-6.4.0.dist-info → cognite_extractor_utils-7.0.0.dist-info}/LICENSE +0 -0
- {cognite_extractor_utils-6.4.0.dist-info → cognite_extractor_utils-7.0.0.dist-info}/WHEEL +0 -0
|
@@ -13,13 +13,10 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import math
|
|
16
|
-
import threading
|
|
17
16
|
from datetime import datetime
|
|
18
17
|
from types import TracebackType
|
|
19
18
|
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
|
|
20
19
|
|
|
21
|
-
from requests import ConnectionError
|
|
22
|
-
|
|
23
20
|
from cognite.client import CogniteClient
|
|
24
21
|
from cognite.client.data_classes import (
|
|
25
22
|
Sequence,
|
|
@@ -27,7 +24,8 @@ from cognite.client.data_classes import (
|
|
|
27
24
|
SequenceRows,
|
|
28
25
|
TimeSeries,
|
|
29
26
|
)
|
|
30
|
-
from cognite.client.exceptions import
|
|
27
|
+
from cognite.client.exceptions import CogniteDuplicatedError, CogniteNotFoundError
|
|
28
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
31
29
|
from cognite.extractorutils.uploader._base import (
|
|
32
30
|
RETRIES,
|
|
33
31
|
RETRY_BACKOFF_FACTOR,
|
|
@@ -44,7 +42,7 @@ from cognite.extractorutils.uploader._metrics import (
|
|
|
44
42
|
TIMESERIES_UPLOADER_POINTS_WRITTEN,
|
|
45
43
|
TIMESERIES_UPLOADER_QUEUE_SIZE,
|
|
46
44
|
)
|
|
47
|
-
from cognite.extractorutils.util import EitherId, retry
|
|
45
|
+
from cognite.extractorutils.util import EitherId, cognite_exceptions, retry
|
|
48
46
|
|
|
49
47
|
MIN_DATAPOINT_TIMESTAMP = -2208988800000
|
|
50
48
|
MAX_DATAPOINT_STRING_LENGTH = 255
|
|
@@ -108,7 +106,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
108
106
|
thread_name: Optional[str] = None,
|
|
109
107
|
create_missing: Union[Callable[[str, DataPointList], TimeSeries], bool] = False,
|
|
110
108
|
data_set_id: Optional[int] = None,
|
|
111
|
-
cancellation_token:
|
|
109
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
112
110
|
):
|
|
113
111
|
# Super sets post_upload and threshold
|
|
114
112
|
super().__init__(
|
|
@@ -169,7 +167,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
169
167
|
return True
|
|
170
168
|
|
|
171
169
|
def add_to_upload_queue(
|
|
172
|
-
self, *, id: Optional[int] = None, external_id: Optional[str] = None, datapoints: DataPointList =
|
|
170
|
+
self, *, id: Optional[int] = None, external_id: Optional[str] = None, datapoints: Optional[DataPointList] = None
|
|
173
171
|
) -> None:
|
|
174
172
|
"""
|
|
175
173
|
Add data points to upload queue. The queue will be uploaded if the queue size is larger than the threshold
|
|
@@ -180,6 +178,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
180
178
|
external_id: External ID of time series. Either this or external_id must be set.
|
|
181
179
|
datapoints: List of data points to add
|
|
182
180
|
"""
|
|
181
|
+
datapoints = datapoints or []
|
|
183
182
|
old_len = len(datapoints)
|
|
184
183
|
datapoints = list(filter(self._is_datapoint_valid, datapoints))
|
|
185
184
|
|
|
@@ -207,11 +206,82 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
207
206
|
"""
|
|
208
207
|
Trigger an upload of the queue, clears queue afterwards
|
|
209
208
|
"""
|
|
209
|
+
|
|
210
|
+
@retry(
|
|
211
|
+
exceptions=cognite_exceptions(),
|
|
212
|
+
cancellation_token=self.cancellation_token,
|
|
213
|
+
tries=RETRIES,
|
|
214
|
+
delay=RETRY_DELAY,
|
|
215
|
+
max_delay=RETRY_MAX_DELAY,
|
|
216
|
+
backoff=RETRY_BACKOFF_FACTOR,
|
|
217
|
+
)
|
|
218
|
+
def _upload_batch(upload_this: List[Dict], retries: int = 5) -> List[Dict]:
|
|
219
|
+
if len(upload_this) == 0:
|
|
220
|
+
return upload_this
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
self.cdf_client.time_series.data.insert_multiple(upload_this)
|
|
224
|
+
|
|
225
|
+
except CogniteNotFoundError as ex:
|
|
226
|
+
if not retries:
|
|
227
|
+
raise ex
|
|
228
|
+
|
|
229
|
+
if not self.create_missing:
|
|
230
|
+
self.logger.error("Could not upload data points to %s: %s", str(ex.not_found), str(ex))
|
|
231
|
+
|
|
232
|
+
# Get IDs of time series that exists, but failed because of the non-existing time series
|
|
233
|
+
retry_these = [EitherId(**id_dict) for id_dict in ex.failed if id_dict not in ex.not_found]
|
|
234
|
+
|
|
235
|
+
if self.create_missing:
|
|
236
|
+
# Get the time series that can be created
|
|
237
|
+
create_these_ids = set(
|
|
238
|
+
[id_dict["externalId"] for id_dict in ex.not_found if "externalId" in id_dict]
|
|
239
|
+
)
|
|
240
|
+
datapoints_lists: Dict[str, DataPointList] = {
|
|
241
|
+
ts_dict["externalId"]: ts_dict["datapoints"]
|
|
242
|
+
for ts_dict in upload_this
|
|
243
|
+
if ts_dict["externalId"] in create_these_ids
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
self.logger.info(f"Creating {len(create_these_ids)} time series")
|
|
247
|
+
to_create: List[TimeSeries] = [
|
|
248
|
+
self.missing_factory(external_id, datapoints_lists[external_id])
|
|
249
|
+
for external_id in create_these_ids
|
|
250
|
+
]
|
|
251
|
+
if self.data_set_id is not None:
|
|
252
|
+
for ts in to_create:
|
|
253
|
+
if ts.data_set_id is None:
|
|
254
|
+
ts.data_set_id = self.data_set_id
|
|
255
|
+
self.cdf_client.time_series.create(to_create)
|
|
256
|
+
|
|
257
|
+
retry_these.extend([EitherId(external_id=i) for i in create_these_ids])
|
|
258
|
+
|
|
259
|
+
if len(ex.not_found) != len(create_these_ids):
|
|
260
|
+
missing = [id_dict for id_dict in ex.not_found if id_dict.get("externalId") not in retry_these]
|
|
261
|
+
missing_num = len(ex.not_found) - len(create_these_ids)
|
|
262
|
+
self.logger.error(
|
|
263
|
+
f"{missing_num} time series not found, and could not be created automatically:\n"
|
|
264
|
+
+ str(missing)
|
|
265
|
+
+ "\nData will be dropped"
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# Remove entries with non-existing time series from upload queue
|
|
269
|
+
upload_this = [
|
|
270
|
+
entry
|
|
271
|
+
for entry in upload_this
|
|
272
|
+
if EitherId(id=entry.get("id"), external_id=entry.get("externalId")) in retry_these
|
|
273
|
+
]
|
|
274
|
+
|
|
275
|
+
# Upload remaining
|
|
276
|
+
_upload_batch(upload_this, retries - 1)
|
|
277
|
+
|
|
278
|
+
return upload_this
|
|
279
|
+
|
|
210
280
|
if len(self.upload_queue) == 0:
|
|
211
281
|
return
|
|
212
282
|
|
|
213
283
|
with self.lock:
|
|
214
|
-
upload_this =
|
|
284
|
+
upload_this = _upload_batch(
|
|
215
285
|
[
|
|
216
286
|
{either_id.type(): either_id.content(), "datapoints": datapoints}
|
|
217
287
|
for either_id, datapoints in self.upload_queue.items()
|
|
@@ -219,7 +289,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
219
289
|
]
|
|
220
290
|
)
|
|
221
291
|
|
|
222
|
-
for
|
|
292
|
+
for _either_id, datapoints in self.upload_queue.items():
|
|
223
293
|
self.points_written.inc(len(datapoints))
|
|
224
294
|
|
|
225
295
|
try:
|
|
@@ -232,72 +302,6 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
232
302
|
self.upload_queue_size = 0
|
|
233
303
|
self.queue_size.set(self.upload_queue_size)
|
|
234
304
|
|
|
235
|
-
@retry(
|
|
236
|
-
exceptions=(CogniteAPIError, ConnectionError),
|
|
237
|
-
tries=RETRIES,
|
|
238
|
-
delay=RETRY_DELAY,
|
|
239
|
-
max_delay=RETRY_MAX_DELAY,
|
|
240
|
-
backoff=RETRY_BACKOFF_FACTOR,
|
|
241
|
-
)
|
|
242
|
-
def _upload_batch(self, upload_this: List[Dict], retries: int = 5) -> List[Dict]:
|
|
243
|
-
if len(upload_this) == 0:
|
|
244
|
-
return upload_this
|
|
245
|
-
|
|
246
|
-
try:
|
|
247
|
-
self.cdf_client.time_series.data.insert_multiple(upload_this)
|
|
248
|
-
|
|
249
|
-
except CogniteNotFoundError as ex:
|
|
250
|
-
if not retries:
|
|
251
|
-
raise ex
|
|
252
|
-
|
|
253
|
-
if not self.create_missing:
|
|
254
|
-
self.logger.error("Could not upload data points to %s: %s", str(ex.not_found), str(ex))
|
|
255
|
-
|
|
256
|
-
# Get IDs of time series that exists, but failed because of the non-existing time series
|
|
257
|
-
retry_these = [EitherId(**id_dict) for id_dict in ex.failed if id_dict not in ex.not_found]
|
|
258
|
-
|
|
259
|
-
if self.create_missing:
|
|
260
|
-
# Get the time series that can be created
|
|
261
|
-
create_these_ids = set([id_dict["externalId"] for id_dict in ex.not_found if "externalId" in id_dict])
|
|
262
|
-
datapoints_lists: Dict[str, DataPointList] = {
|
|
263
|
-
ts_dict["externalId"]: ts_dict["datapoints"]
|
|
264
|
-
for ts_dict in upload_this
|
|
265
|
-
if ts_dict["externalId"] in create_these_ids
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
self.logger.info(f"Creating {len(create_these_ids)} time series")
|
|
269
|
-
to_create: List[TimeSeries] = [
|
|
270
|
-
self.missing_factory(external_id, datapoints_lists[external_id]) for external_id in create_these_ids
|
|
271
|
-
]
|
|
272
|
-
if self.data_set_id is not None:
|
|
273
|
-
for ts in to_create:
|
|
274
|
-
if ts.data_set_id is None:
|
|
275
|
-
ts.data_set_id = self.data_set_id
|
|
276
|
-
self.cdf_client.time_series.create(to_create)
|
|
277
|
-
|
|
278
|
-
retry_these.extend([EitherId(external_id=i) for i in create_these_ids])
|
|
279
|
-
|
|
280
|
-
if len(ex.not_found) != len(create_these_ids):
|
|
281
|
-
missing = [id_dict for id_dict in ex.not_found if id_dict.get("externalId") not in retry_these]
|
|
282
|
-
missing_num = len(ex.not_found) - len(create_these_ids)
|
|
283
|
-
self.logger.error(
|
|
284
|
-
f"{missing_num} time series not found, and could not be created automatically:\n"
|
|
285
|
-
+ str(missing)
|
|
286
|
-
+ "\nData will be dropped"
|
|
287
|
-
)
|
|
288
|
-
|
|
289
|
-
# Remove entries with non-existing time series from upload queue
|
|
290
|
-
upload_this = [
|
|
291
|
-
entry
|
|
292
|
-
for entry in upload_this
|
|
293
|
-
if EitherId(id=entry.get("id"), external_id=entry.get("externalId")) in retry_these
|
|
294
|
-
]
|
|
295
|
-
|
|
296
|
-
# Upload remaining
|
|
297
|
-
self._upload_batch(upload_this, retries - 1)
|
|
298
|
-
|
|
299
|
-
return upload_this
|
|
300
|
-
|
|
301
305
|
def __enter__(self) -> "TimeSeriesUploadQueue":
|
|
302
306
|
"""
|
|
303
307
|
Wraps around start method, for use as context manager
|
|
@@ -341,7 +345,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
341
345
|
trigger_log_level: str = "DEBUG",
|
|
342
346
|
thread_name: Optional[str] = None,
|
|
343
347
|
create_missing: bool = False,
|
|
344
|
-
cancellation_token:
|
|
348
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
345
349
|
):
|
|
346
350
|
"""
|
|
347
351
|
Args:
|
|
@@ -504,6 +508,42 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
504
508
|
"""
|
|
505
509
|
Trigger an upload of the queue, clears queue afterwards
|
|
506
510
|
"""
|
|
511
|
+
|
|
512
|
+
@retry(
|
|
513
|
+
exceptions=cognite_exceptions(),
|
|
514
|
+
cancellation_token=self.cancellation_token,
|
|
515
|
+
tries=RETRIES,
|
|
516
|
+
delay=RETRY_DELAY,
|
|
517
|
+
max_delay=RETRY_MAX_DELAY,
|
|
518
|
+
backoff=RETRY_BACKOFF_FACTOR,
|
|
519
|
+
)
|
|
520
|
+
def _upload_single(either_id: EitherId, upload_this: SequenceData) -> SequenceData:
|
|
521
|
+
self.logger.debug("Writing {} rows to sequence {}".format(len(upload_this.values), either_id))
|
|
522
|
+
|
|
523
|
+
try:
|
|
524
|
+
self.cdf_client.sequences.data.insert(
|
|
525
|
+
id=either_id.internal_id, # type: ignore
|
|
526
|
+
external_id=either_id.external_id, # type: ignore
|
|
527
|
+
rows=upload_this,
|
|
528
|
+
column_external_ids=None,
|
|
529
|
+
)
|
|
530
|
+
except CogniteNotFoundError as ex:
|
|
531
|
+
if self.create_missing:
|
|
532
|
+
# Create missing sequence
|
|
533
|
+
self._create_or_update(either_id)
|
|
534
|
+
|
|
535
|
+
# Retry
|
|
536
|
+
self.cdf_client.sequences.data.insert(
|
|
537
|
+
id=either_id.internal_id, # type: ignore
|
|
538
|
+
external_id=either_id.external_id, # type: ignore
|
|
539
|
+
rows=upload_this,
|
|
540
|
+
column_external_ids=None,
|
|
541
|
+
)
|
|
542
|
+
else:
|
|
543
|
+
raise ex
|
|
544
|
+
|
|
545
|
+
return upload_this
|
|
546
|
+
|
|
507
547
|
if len(self.upload_queue) == 0:
|
|
508
548
|
return
|
|
509
549
|
|
|
@@ -514,7 +554,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
514
554
|
|
|
515
555
|
for either_id, upload_this in self.upload_queue.items():
|
|
516
556
|
_labels = str(either_id.content())
|
|
517
|
-
|
|
557
|
+
_upload_single(either_id, upload_this)
|
|
518
558
|
self.points_written.inc()
|
|
519
559
|
|
|
520
560
|
try:
|
|
@@ -527,40 +567,6 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
527
567
|
self.upload_queue_size = 0
|
|
528
568
|
self.queue_size.set(self.upload_queue_size)
|
|
529
569
|
|
|
530
|
-
@retry(
|
|
531
|
-
exceptions=(CogniteAPIError, ConnectionError),
|
|
532
|
-
tries=RETRIES,
|
|
533
|
-
delay=RETRY_DELAY,
|
|
534
|
-
max_delay=RETRY_MAX_DELAY,
|
|
535
|
-
backoff=RETRY_BACKOFF_FACTOR,
|
|
536
|
-
)
|
|
537
|
-
def _upload_single(self, either_id: EitherId, upload_this: SequenceData) -> SequenceData:
|
|
538
|
-
self.logger.debug("Writing {} rows to sequence {}".format(len(upload_this.values), either_id))
|
|
539
|
-
|
|
540
|
-
try:
|
|
541
|
-
self.cdf_client.sequences.data.insert(
|
|
542
|
-
id=either_id.internal_id, # type: ignore
|
|
543
|
-
external_id=either_id.external_id, # type: ignore
|
|
544
|
-
rows=upload_this,
|
|
545
|
-
column_external_ids=None,
|
|
546
|
-
)
|
|
547
|
-
except CogniteNotFoundError as ex:
|
|
548
|
-
if self.create_missing:
|
|
549
|
-
# Create missing sequence
|
|
550
|
-
self._create_or_update(either_id)
|
|
551
|
-
|
|
552
|
-
# Retry
|
|
553
|
-
self.cdf_client.sequences.data.insert(
|
|
554
|
-
id=either_id.internal_id, # type: ignore
|
|
555
|
-
external_id=either_id.external_id, # type: ignore
|
|
556
|
-
rows=upload_this,
|
|
557
|
-
column_external_ids=None,
|
|
558
|
-
)
|
|
559
|
-
else:
|
|
560
|
-
raise ex
|
|
561
|
-
|
|
562
|
-
return upload_this
|
|
563
|
-
|
|
564
570
|
def _create_or_update(self, either_id: EitherId) -> None:
|
|
565
571
|
"""
|
|
566
572
|
Create or update sequence, based on provided metadata and column definitions
|
|
@@ -15,7 +15,6 @@
|
|
|
15
15
|
"""
|
|
16
16
|
A module containing a slightly more advanced base extractor class, sorting a generic output into upload queues.
|
|
17
17
|
"""
|
|
18
|
-
import threading
|
|
19
18
|
from dataclasses import dataclass
|
|
20
19
|
from types import TracebackType
|
|
21
20
|
from typing import Any, Callable, Iterable, List, Optional, Type, TypeVar
|
|
@@ -27,6 +26,7 @@ from cognite.extractorutils.base import Extractor
|
|
|
27
26
|
from cognite.extractorutils.configtools import BaseConfig, TimeIntervalConfig
|
|
28
27
|
from cognite.extractorutils.metrics import BaseMetrics
|
|
29
28
|
from cognite.extractorutils.statestore import AbstractStateStore
|
|
29
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
30
30
|
from cognite.extractorutils.uploader import EventUploadQueue, RawUploadQueue, TimeSeriesUploadQueue
|
|
31
31
|
from cognite.extractorutils.uploader_types import CdfTypes, Event, InsertDatapoints, RawRow
|
|
32
32
|
|
|
@@ -78,17 +78,17 @@ class UploaderExtractor(Extractor[UploaderExtractorConfigClass]):
|
|
|
78
78
|
description: str,
|
|
79
79
|
version: Optional[str] = None,
|
|
80
80
|
run_handle: Optional[
|
|
81
|
-
Callable[[CogniteClient, AbstractStateStore, UploaderExtractorConfigClass,
|
|
81
|
+
Callable[[CogniteClient, AbstractStateStore, UploaderExtractorConfigClass, CancellationToken], None]
|
|
82
82
|
] = None,
|
|
83
83
|
config_class: Type[UploaderExtractorConfigClass],
|
|
84
84
|
metrics: Optional[BaseMetrics] = None,
|
|
85
85
|
use_default_state_store: bool = True,
|
|
86
|
-
cancellation_token:
|
|
86
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
87
87
|
config_file_path: Optional[str] = None,
|
|
88
88
|
continuous_extractor: bool = False,
|
|
89
89
|
heartbeat_waiting_time: int = 600,
|
|
90
90
|
handle_interrupts: bool = True,
|
|
91
|
-
middleware: List[Callable[[dict], dict]] =
|
|
91
|
+
middleware: Optional[List[Callable[[dict], dict]]] = None,
|
|
92
92
|
):
|
|
93
93
|
super(UploaderExtractor, self).__init__(
|
|
94
94
|
name=name,
|
cognite/extractorutils/util.py
CHANGED
|
@@ -18,10 +18,8 @@ extractors.
|
|
|
18
18
|
"""
|
|
19
19
|
import logging
|
|
20
20
|
import random
|
|
21
|
-
import signal
|
|
22
|
-
import threading
|
|
23
21
|
from functools import partial, wraps
|
|
24
|
-
from threading import
|
|
22
|
+
from threading import Thread
|
|
25
23
|
from time import time
|
|
26
24
|
from typing import Any, Callable, Dict, Generator, Iterable, List, Optional, Tuple, Type, TypeVar, Union
|
|
27
25
|
|
|
@@ -29,7 +27,8 @@ from decorator import decorator
|
|
|
29
27
|
|
|
30
28
|
from cognite.client import CogniteClient
|
|
31
29
|
from cognite.client.data_classes import Asset, ExtractionPipelineRun, TimeSeries
|
|
32
|
-
from cognite.client.exceptions import CogniteNotFoundError
|
|
30
|
+
from cognite.client.exceptions import CogniteAPIError, CogniteException, CogniteNotFoundError
|
|
31
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
33
32
|
|
|
34
33
|
|
|
35
34
|
def _ensure(endpoint: Any, items: Iterable[Any]) -> None:
|
|
@@ -73,27 +72,6 @@ def ensure_assets(cdf_client: CogniteClient, assets: Iterable[Asset]) -> None:
|
|
|
73
72
|
_ensure(cdf_client.assets, assets)
|
|
74
73
|
|
|
75
74
|
|
|
76
|
-
def set_event_on_interrupt(stop_event: Event) -> None:
|
|
77
|
-
"""
|
|
78
|
-
Set given event on SIGINT (Ctrl-C) instead of throwing a KeyboardInterrupt exception.
|
|
79
|
-
|
|
80
|
-
Args:
|
|
81
|
-
stop_event: Event to set
|
|
82
|
-
"""
|
|
83
|
-
|
|
84
|
-
def sigint_handler(sig_num: int, frame: Any) -> None:
|
|
85
|
-
logger = logging.getLogger(__name__)
|
|
86
|
-
logger.warning("Interrupt signal received, stopping extractor gracefully")
|
|
87
|
-
stop_event.set()
|
|
88
|
-
logger.info("Waiting for threads to complete. Send another interrupt to force quit.")
|
|
89
|
-
signal.signal(signal.SIGINT, signal.default_int_handler)
|
|
90
|
-
|
|
91
|
-
try:
|
|
92
|
-
signal.signal(signal.SIGINT, sigint_handler)
|
|
93
|
-
except ValueError as e:
|
|
94
|
-
logging.getLogger(__name__).warning(f"Could not register handler for interrupt signals: {str(e)}")
|
|
95
|
-
|
|
96
|
-
|
|
97
75
|
class EitherId:
|
|
98
76
|
"""
|
|
99
77
|
Class representing an ID in CDF, which can either be an external or internal ID. An EitherId can only hold one ID
|
|
@@ -220,7 +198,7 @@ def add_extraction_pipeline(
|
|
|
220
198
|
# TODO 1. Consider refactoring this decorator to share methods with the Extractor context manager in .base.py
|
|
221
199
|
# as they serve a similar purpose
|
|
222
200
|
|
|
223
|
-
cancellation_token:
|
|
201
|
+
cancellation_token: CancellationToken = CancellationToken()
|
|
224
202
|
|
|
225
203
|
_logger = logging.getLogger(__name__)
|
|
226
204
|
|
|
@@ -254,7 +232,7 @@ def add_extraction_pipeline(
|
|
|
254
232
|
)
|
|
255
233
|
|
|
256
234
|
def heartbeat_loop() -> None:
|
|
257
|
-
while not cancellation_token.
|
|
235
|
+
while not cancellation_token.is_cancelled:
|
|
258
236
|
cognite_client.extraction_pipelines.runs.create(
|
|
259
237
|
ExtractionPipelineRun(extpipe_external_id=extraction_pipeline_ext_id, status="seen")
|
|
260
238
|
)
|
|
@@ -279,7 +257,7 @@ def add_extraction_pipeline(
|
|
|
279
257
|
_report_success()
|
|
280
258
|
_logger.info("Extraction ran successfully")
|
|
281
259
|
finally:
|
|
282
|
-
cancellation_token.
|
|
260
|
+
cancellation_token.cancel()
|
|
283
261
|
if heartbeat_thread:
|
|
284
262
|
heartbeat_thread.join()
|
|
285
263
|
|
|
@@ -290,7 +268,7 @@ def add_extraction_pipeline(
|
|
|
290
268
|
return decorator_ext_pip
|
|
291
269
|
|
|
292
270
|
|
|
293
|
-
def throttled_loop(target_time: int, cancellation_token:
|
|
271
|
+
def throttled_loop(target_time: int, cancellation_token: CancellationToken) -> Generator[None, None, None]:
|
|
294
272
|
"""
|
|
295
273
|
A loop generator that automatically sleeps until each iteration has taken the desired amount of time. Useful for
|
|
296
274
|
when you want to avoid overloading a source system with requests.
|
|
@@ -312,7 +290,7 @@ def throttled_loop(target_time: int, cancellation_token: Event) -> Generator[Non
|
|
|
312
290
|
"""
|
|
313
291
|
logger = logging.getLogger(__name__)
|
|
314
292
|
|
|
315
|
-
while not cancellation_token.
|
|
293
|
+
while not cancellation_token.is_cancelled:
|
|
316
294
|
start_time = time()
|
|
317
295
|
yield
|
|
318
296
|
iteration_time = time() - start_time
|
|
@@ -329,7 +307,7 @@ _T2 = TypeVar("_T2")
|
|
|
329
307
|
|
|
330
308
|
def _retry_internal(
|
|
331
309
|
f: Callable[..., _T2],
|
|
332
|
-
cancellation_token:
|
|
310
|
+
cancellation_token: CancellationToken,
|
|
333
311
|
exceptions: Union[Tuple[Type[Exception], ...], Dict[Type[Exception], Callable[[Exception], bool]]],
|
|
334
312
|
tries: int,
|
|
335
313
|
delay: float,
|
|
@@ -339,7 +317,7 @@ def _retry_internal(
|
|
|
339
317
|
) -> _T2:
|
|
340
318
|
logger = logging.getLogger(__name__)
|
|
341
319
|
|
|
342
|
-
while tries and not cancellation_token.
|
|
320
|
+
while tries and not cancellation_token.is_cancelled:
|
|
343
321
|
try:
|
|
344
322
|
return f()
|
|
345
323
|
|
|
@@ -380,7 +358,7 @@ def _retry_internal(
|
|
|
380
358
|
|
|
381
359
|
|
|
382
360
|
def retry(
|
|
383
|
-
cancellation_token:
|
|
361
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
384
362
|
exceptions: Union[Tuple[Type[Exception], ...], Dict[Type[Exception], Callable[[Any], bool]]] = (Exception,),
|
|
385
363
|
tries: int = 10,
|
|
386
364
|
delay: float = 1,
|
|
@@ -415,7 +393,7 @@ def retry(
|
|
|
415
393
|
|
|
416
394
|
return _retry_internal(
|
|
417
395
|
partial(f, *args, **kwargs),
|
|
418
|
-
cancellation_token,
|
|
396
|
+
cancellation_token or CancellationToken(),
|
|
419
397
|
exceptions,
|
|
420
398
|
tries,
|
|
421
399
|
delay,
|
|
@@ -428,7 +406,7 @@ def retry(
|
|
|
428
406
|
|
|
429
407
|
|
|
430
408
|
def requests_exceptions(
|
|
431
|
-
status_codes: List[int] =
|
|
409
|
+
status_codes: Optional[List[int]] = None,
|
|
432
410
|
) -> Dict[Type[Exception], Callable[[Any], bool]]:
|
|
433
411
|
"""
|
|
434
412
|
Retry exceptions from using the ``requests`` library. This will retry all connection and HTTP errors matching
|
|
@@ -443,6 +421,7 @@ def requests_exceptions(
|
|
|
443
421
|
...
|
|
444
422
|
|
|
445
423
|
"""
|
|
424
|
+
status_codes = status_codes or [408, 425, 429, 500, 502, 503, 504]
|
|
446
425
|
# types ignored, since they are not installed as we don't depend on the package
|
|
447
426
|
from requests.exceptions import HTTPError, RequestException # type: ignore
|
|
448
427
|
|
|
@@ -461,7 +440,7 @@ def requests_exceptions(
|
|
|
461
440
|
|
|
462
441
|
|
|
463
442
|
def httpx_exceptions(
|
|
464
|
-
status_codes: List[int] =
|
|
443
|
+
status_codes: Optional[List[int]] = None,
|
|
465
444
|
) -> Dict[Type[Exception], Callable[[Any], bool]]:
|
|
466
445
|
"""
|
|
467
446
|
Retry exceptions from using the ``httpx`` library. This will retry all connection and HTTP errors matching
|
|
@@ -476,6 +455,7 @@ def httpx_exceptions(
|
|
|
476
455
|
...
|
|
477
456
|
|
|
478
457
|
"""
|
|
458
|
+
status_codes = status_codes or [408, 425, 429, 500, 502, 503, 504]
|
|
479
459
|
# types ignored, since they are not installed as we don't depend on the package
|
|
480
460
|
from httpx import HTTPError, HTTPStatusError # type: ignore
|
|
481
461
|
|
|
@@ -491,3 +471,28 @@ def httpx_exceptions(
|
|
|
491
471
|
return True
|
|
492
472
|
|
|
493
473
|
return {HTTPError: handle_http_errors}
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def cognite_exceptions(
|
|
477
|
+
status_codes: Optional[List[int]] = None,
|
|
478
|
+
) -> Dict[Type[Exception], Callable[[Any], bool]]:
|
|
479
|
+
"""
|
|
480
|
+
Retry exceptions from using the Cognite SDK. This will retry all connection and HTTP errors matching
|
|
481
|
+
the given status codes.
|
|
482
|
+
|
|
483
|
+
Example:
|
|
484
|
+
|
|
485
|
+
.. code-block:: python
|
|
486
|
+
|
|
487
|
+
@retry(exceptions = cognite_exceptions())
|
|
488
|
+
def my_function() -> None:
|
|
489
|
+
...
|
|
490
|
+
"""
|
|
491
|
+
status_codes = status_codes or [408, 425, 429, 500, 502, 503, 504]
|
|
492
|
+
|
|
493
|
+
def handle_cognite_errors(exception: CogniteException) -> bool:
|
|
494
|
+
if isinstance(exception, CogniteAPIError):
|
|
495
|
+
return exception.code in status_codes
|
|
496
|
+
return True
|
|
497
|
+
|
|
498
|
+
return {CogniteException: handle_cognite_errors}
|
{cognite_extractor_utils-6.4.0.dist-info → cognite_extractor_utils-7.0.0.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cognite-extractor-utils
|
|
3
|
-
Version:
|
|
3
|
+
Version: 7.0.0
|
|
4
4
|
Summary: Utilities for easier development of extractors for CDF
|
|
5
5
|
Home-page: https://github.com/cognitedata/python-extractor-utils
|
|
6
6
|
License: Apache-2.0
|
|
@@ -21,8 +21,6 @@ Requires-Dist: azure-keyvault-secrets (>=4.7.0,<5.0.0)
|
|
|
21
21
|
Requires-Dist: cognite-sdk (>=7,<8)
|
|
22
22
|
Requires-Dist: dacite (>=1.6.0,<2.0.0)
|
|
23
23
|
Requires-Dist: decorator (>=5.1.1,<6.0.0)
|
|
24
|
-
Requires-Dist: jq (>=1.3.0,<2.0.0) ; sys_platform == "darwin"
|
|
25
|
-
Requires-Dist: jq (>=1.3.0,<2.0.0) ; sys_platform == "linux"
|
|
26
24
|
Requires-Dist: more-itertools (>=10.0.0,<11.0.0)
|
|
27
25
|
Requires-Dist: prometheus-client (>0.7.0,<=1.0.0)
|
|
28
26
|
Requires-Dist: psutil (>=5.7.0,<6.0.0)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
cognite/extractorutils/__init__.py,sha256=7RFWajf36G6iOaLnzGuuL3rDlIkt5L1GnsrgApS5-_8,739
|
|
2
|
+
cognite/extractorutils/_inner_util.py,sha256=gmz6aqS7jDNsg8z4RHgJjMFohDLOMiaU4gMWBhg3xcE,1558
|
|
3
|
+
cognite/extractorutils/base.py,sha256=o5oFzGBaszv7-HWMVG2ATmZwZfzi8enyrYIKwUagyCQ,16148
|
|
4
|
+
cognite/extractorutils/configtools/__init__.py,sha256=fj9kH8DdisNi9mI8cKm2sz50vnzeOkJQErIGB3mTYRo,2861
|
|
5
|
+
cognite/extractorutils/configtools/_util.py,sha256=SZycZm_py9v9WZbDiDQbgS6_PiLtu-TtwuuH7tG2YCI,4739
|
|
6
|
+
cognite/extractorutils/configtools/elements.py,sha256=CM9ZVAUgy7DnbTGI_kgnNclioEkaUqv0Lt_a2zoS8Yc,20680
|
|
7
|
+
cognite/extractorutils/configtools/loaders.py,sha256=Q062K-AFJLn9MjJaURar_Le79edGUZvvpgQdg9wm51I,15082
|
|
8
|
+
cognite/extractorutils/exceptions.py,sha256=PERRmySUfJRM2Ta8cFvADTe-KUdXsoMLKdk4140AOHI,1061
|
|
9
|
+
cognite/extractorutils/metrics.py,sha256=IzYevOH19N_Nhi2XfhcuGgsGfmF7SexfcHcTQyZmH1o,15635
|
|
10
|
+
cognite/extractorutils/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
+
cognite/extractorutils/statestore.py,sha256=iJdVJQihjU2ljfS46FNEKdD5PSurTzaAH09iGbDYJro,18598
|
|
12
|
+
cognite/extractorutils/threading.py,sha256=C77KgNZolI8E_xbv-G-BkJwLEALkkvZs5UoNx4hJiFg,3249
|
|
13
|
+
cognite/extractorutils/uploader/__init__.py,sha256=W22u6QHA4cR0j78LN5LTL5YGbfC-uTApagTyP5ab7uQ,3110
|
|
14
|
+
cognite/extractorutils/uploader/_base.py,sha256=-aFfoMSBGd9YUUMHL3ZQpLIuNMA7TNklWCEjPA18ER8,5282
|
|
15
|
+
cognite/extractorutils/uploader/_metrics.py,sha256=J2LJXb19L_SLSJ_voNIQHYLp0pjxUKevpH1q_xKX6Hk,3247
|
|
16
|
+
cognite/extractorutils/uploader/assets.py,sha256=2E90N1kxsaA6Ah4h0_r_dTVhDYY_68ItRWrHYkkltJw,5628
|
|
17
|
+
cognite/extractorutils/uploader/events.py,sha256=NZP2tMoU_rh_rb-EZiUBsOT5KdNABHN4c9Oddk0OsdE,5680
|
|
18
|
+
cognite/extractorutils/uploader/files.py,sha256=2BKwdgj2rlJqJWU-13ktCINdktI6IoKkeU2nAWUKOus,13176
|
|
19
|
+
cognite/extractorutils/uploader/raw.py,sha256=wFjF90PFTjmByOWx_Y4_YfDJ2w2jl0EQJ2Tjx2MP2PM,6738
|
|
20
|
+
cognite/extractorutils/uploader/time_series.py,sha256=VptUq129MY0t8yw4rxeL0kOhz2dMibz4XdyvfhfYGj8,26840
|
|
21
|
+
cognite/extractorutils/uploader_extractor.py,sha256=E-mpVvbPg_Tk90U4S9JybV0duptJ2SXE88HB6npE3zI,7732
|
|
22
|
+
cognite/extractorutils/uploader_types.py,sha256=5MKT14DUnTFVD5Nx4Zvnfp2SfaICuTKLWAFAYaZishk,1045
|
|
23
|
+
cognite/extractorutils/util.py,sha256=PFxVkDfpAEXTONYC1U-iO5DiaE3snn81qVUUT2O8xAM,17025
|
|
24
|
+
cognite_extractor_utils-7.0.0.dist-info/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
|
|
25
|
+
cognite_extractor_utils-7.0.0.dist-info/METADATA,sha256=lSHI8N78-cWlmraTSSiHovCTL2P-4olyjpWLlwTwTD0,5437
|
|
26
|
+
cognite_extractor_utils-7.0.0.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
|
27
|
+
cognite_extractor_utils-7.0.0.dist-info/RECORD,,
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
This module is deprecated and will be removed in a future version
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
from sys import platform
|
|
7
|
-
from typing import Any, Union
|
|
8
|
-
|
|
9
|
-
from cognite.client.data_classes import Row
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class JQMiddleware:
|
|
13
|
-
def __init__(self, jq_rules: str) -> None:
|
|
14
|
-
if platform == "win32":
|
|
15
|
-
raise Exception("Windows platform doesn't support jq bindings for Python yet")
|
|
16
|
-
import jq # type: ignore
|
|
17
|
-
|
|
18
|
-
self._jq = jq.compile(jq_rules)
|
|
19
|
-
|
|
20
|
-
def __call__(self, data: Union[Row, dict]) -> Union[Row, dict]:
|
|
21
|
-
if not isinstance(data, (Row, dict)):
|
|
22
|
-
raise ValueError(f"type {type(data).__name__} is not currently supported")
|
|
23
|
-
|
|
24
|
-
if isinstance(data, Row):
|
|
25
|
-
data.columns = self._jq.input(data.columns).first()
|
|
26
|
-
self._raise_for_non_dict(data.columns)
|
|
27
|
-
|
|
28
|
-
if isinstance(data, dict):
|
|
29
|
-
data = self._jq.input(data).first()
|
|
30
|
-
self._raise_for_non_dict(data)
|
|
31
|
-
|
|
32
|
-
return data
|
|
33
|
-
|
|
34
|
-
def _raise_for_non_dict(self, data: Any) -> None:
|
|
35
|
-
if not isinstance(data, dict):
|
|
36
|
-
raise ValueError("output of jq middleware must be a dict")
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
cognite/extractorutils/__init__.py,sha256=kPofZ4EKmTEmGYHabSV5nihFfcJguswS0gTKtpaFGQs,739
|
|
2
|
-
cognite/extractorutils/_inner_util.py,sha256=jCPLg-FfTpyLAHkhoKaKObNwbqm19d2Z11to_DYk5EU,1558
|
|
3
|
-
cognite/extractorutils/base.py,sha256=8kzePjVlCIQVftDQ1t_4join9qhs-VOlB6eTpeX9zGk,16018
|
|
4
|
-
cognite/extractorutils/configtools/__init__.py,sha256=fj9kH8DdisNi9mI8cKm2sz50vnzeOkJQErIGB3mTYRo,2861
|
|
5
|
-
cognite/extractorutils/configtools/_util.py,sha256=SZycZm_py9v9WZbDiDQbgS6_PiLtu-TtwuuH7tG2YCI,4739
|
|
6
|
-
cognite/extractorutils/configtools/elements.py,sha256=OgNuLF6iOBnPsANwMsSRrJ_0KwBngD_ZKIgE4NZfOOc,20626
|
|
7
|
-
cognite/extractorutils/configtools/loaders.py,sha256=s4tHuNgBhk6TnrDDifIYQX8CickXVSqVCol9E62DPIs,14969
|
|
8
|
-
cognite/extractorutils/exceptions.py,sha256=PERRmySUfJRM2Ta8cFvADTe-KUdXsoMLKdk4140AOHI,1061
|
|
9
|
-
cognite/extractorutils/metrics.py,sha256=PFrOO0yDRGaFiJmNLXModKY7CW1gvHDBtPaoK-ETCxw,15508
|
|
10
|
-
cognite/extractorutils/middleware.py,sha256=d5bnKEOmC49QmoBK7OyM9krOGDYX8sXxM00wRWKhmHg,1108
|
|
11
|
-
cognite/extractorutils/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
-
cognite/extractorutils/statestore.py,sha256=wC619be0L0EKmn_Fn-ccT3-D2_wt4Mb9eAr_oqrDdis,18272
|
|
13
|
-
cognite/extractorutils/uploader/__init__.py,sha256=W22u6QHA4cR0j78LN5LTL5YGbfC-uTApagTyP5ab7uQ,3110
|
|
14
|
-
cognite/extractorutils/uploader/_base.py,sha256=ed9F-qFmlIylTg8UtuxfykYUfEsG_R0PrfufXYEPcjI,5169
|
|
15
|
-
cognite/extractorutils/uploader/_metrics.py,sha256=J2LJXb19L_SLSJ_voNIQHYLp0pjxUKevpH1q_xKX6Hk,3247
|
|
16
|
-
cognite/extractorutils/uploader/assets.py,sha256=Gqu1VJqfFcIQphsfqciDFghL_zlNhBmHU8hPfaKSyFw,5441
|
|
17
|
-
cognite/extractorutils/uploader/events.py,sha256=dzqdDHRKsSUiBWqpcwJX9BpTiUGLb6abneqk1k_QmLA,5502
|
|
18
|
-
cognite/extractorutils/uploader/files.py,sha256=u1tJDT8cCD9W9EegQWqOYevsj3BfOdFfyiO3DSWHjsc,11784
|
|
19
|
-
cognite/extractorutils/uploader/raw.py,sha256=9kvHtJoZVLD-V9xcnNEkucwvaa8DPMqsXjvCWMGiw_g,6732
|
|
20
|
-
cognite/extractorutils/uploader/time_series.py,sha256=ZPkz9NVCmz2vE7SkRm0-LjYi-ZKCRZ_Qe-YR1EVtGms,26315
|
|
21
|
-
cognite/extractorutils/uploader_extractor.py,sha256=K8cH8CIDiFNB0fXNdUymE_MKh1GYfqxHHcRoH07CFew,7673
|
|
22
|
-
cognite/extractorutils/uploader_types.py,sha256=5MKT14DUnTFVD5Nx4Zvnfp2SfaICuTKLWAFAYaZishk,1045
|
|
23
|
-
cognite/extractorutils/util.py,sha256=N63Mb_hxfZOcGg7oZOGKnunjj-Lkjib1EK5azEKDeT0,16835
|
|
24
|
-
cognite_extractor_utils-6.4.0.dist-info/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
|
|
25
|
-
cognite_extractor_utils-6.4.0.dist-info/METADATA,sha256=Z5UZiVA4dM4Nk_L1cJPvY2oQDC-Kn0yt0IZPLxCZwUM,5560
|
|
26
|
-
cognite_extractor_utils-6.4.0.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
|
27
|
-
cognite_extractor_utils-6.4.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|