cognite-extractor-utils 5.0.1__py3-none-any.whl → 5.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- cognite/extractorutils/__init__.py +1 -1
- cognite/extractorutils/_inner_util.py +4 -3
- cognite/extractorutils/base.py +22 -24
- cognite/extractorutils/configtools/__init__.py +3 -3
- cognite/extractorutils/configtools/_util.py +18 -12
- cognite/extractorutils/configtools/elements.py +45 -18
- cognite/extractorutils/configtools/loaders.py +21 -13
- cognite/extractorutils/metrics.py +18 -10
- cognite/extractorutils/middleware.py +9 -4
- cognite/extractorutils/statestore.py +19 -17
- cognite/extractorutils/uploader/_base.py +2 -9
- cognite/extractorutils/uploader/events.py +25 -7
- cognite/extractorutils/uploader/files.py +17 -12
- cognite/extractorutils/uploader/raw.py +10 -7
- cognite/extractorutils/uploader/time_series.py +87 -63
- cognite/extractorutils/uploader_extractor.py +8 -9
- cognite/extractorutils/util.py +39 -22
- {cognite_extractor_utils-5.0.1.dist-info → cognite_extractor_utils-5.2.0.dist-info}/METADATA +1 -2
- cognite_extractor_utils-5.2.0.dist-info/RECORD +26 -0
- cognite_extractor_utils-5.0.1.dist-info/RECORD +0 -26
- {cognite_extractor_utils-5.0.1.dist-info → cognite_extractor_utils-5.2.0.dist-info}/LICENSE +0 -0
- {cognite_extractor_utils-5.0.1.dist-info → cognite_extractor_utils-5.2.0.dist-info}/WHEEL +0 -0
|
@@ -15,14 +15,15 @@
|
|
|
15
15
|
import math
|
|
16
16
|
import threading
|
|
17
17
|
from datetime import datetime
|
|
18
|
-
from
|
|
18
|
+
from types import TracebackType
|
|
19
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
|
|
19
20
|
|
|
20
21
|
import arrow
|
|
22
|
+
from requests import ConnectionError
|
|
23
|
+
|
|
21
24
|
from cognite.client import CogniteClient
|
|
22
25
|
from cognite.client.data_classes import Sequence, SequenceData, TimeSeries
|
|
23
26
|
from cognite.client.exceptions import CogniteAPIError, CogniteDuplicatedError, CogniteNotFoundError
|
|
24
|
-
from requests import ConnectionError
|
|
25
|
-
|
|
26
27
|
from cognite.extractorutils.uploader._base import (
|
|
27
28
|
RETRIES,
|
|
28
29
|
RETRY_BACKOFF_FACTOR,
|
|
@@ -48,9 +49,7 @@ MAX_DATAPOINT_STRING_LENGTH = 255
|
|
|
48
49
|
MAX_DATAPOINT_VALUE = 1e100
|
|
49
50
|
MIN_DATAPOINT_VALUE = -1e100
|
|
50
51
|
|
|
51
|
-
DataPoint = Union[
|
|
52
|
-
Dict[str, Union[int, float, str, datetime]], Tuple[Union[int, float, datetime], Union[int, float, str]]
|
|
53
|
-
]
|
|
52
|
+
DataPoint = Tuple[Union[int, float, datetime], Union[int, float, str]]
|
|
54
53
|
DataPointList = List[DataPoint]
|
|
55
54
|
|
|
56
55
|
|
|
@@ -66,7 +65,7 @@ def default_time_series_factory(external_id: str, datapoints: DataPointList) ->
|
|
|
66
65
|
A TimeSeries object with external_id set, and the is_string automatically detected
|
|
67
66
|
"""
|
|
68
67
|
is_string = (
|
|
69
|
-
isinstance(datapoints[0].get("value"), str)
|
|
68
|
+
isinstance(datapoints[0].get("value"), str) # type: ignore # input might be dict to keep compatibility
|
|
70
69
|
if isinstance(datapoints[0], dict)
|
|
71
70
|
else isinstance(datapoints[0][1], str)
|
|
72
71
|
)
|
|
@@ -118,6 +117,8 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
118
117
|
cancellation_token,
|
|
119
118
|
)
|
|
120
119
|
|
|
120
|
+
self.missing_factory: Callable[[str, DataPointList], TimeSeries]
|
|
121
|
+
|
|
121
122
|
if isinstance(create_missing, bool):
|
|
122
123
|
self.create_missing = create_missing
|
|
123
124
|
self.missing_factory = default_time_series_factory
|
|
@@ -125,7 +126,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
125
126
|
self.create_missing = True
|
|
126
127
|
self.missing_factory = create_missing
|
|
127
128
|
|
|
128
|
-
self.upload_queue: Dict[EitherId, DataPointList] =
|
|
129
|
+
self.upload_queue: Dict[EitherId, DataPointList] = {}
|
|
129
130
|
|
|
130
131
|
self.points_queued = TIMESERIES_UPLOADER_POINTS_QUEUED
|
|
131
132
|
self.points_written = TIMESERIES_UPLOADER_POINTS_WRITTEN
|
|
@@ -134,19 +135,23 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
134
135
|
self.latency_zero_point = arrow.utcnow()
|
|
135
136
|
self.data_set_id = data_set_id
|
|
136
137
|
|
|
137
|
-
def _verify_datapoint_time(self, time: Union[int, float, datetime]) -> bool:
|
|
138
|
+
def _verify_datapoint_time(self, time: Union[int, float, datetime, str]) -> bool:
|
|
138
139
|
if isinstance(time, int) or isinstance(time, float):
|
|
139
140
|
return not math.isnan(time) and time >= MIN_DATAPOINT_TIMESTAMP
|
|
141
|
+
elif isinstance(time, str):
|
|
142
|
+
return False
|
|
140
143
|
else:
|
|
141
144
|
return time.timestamp() * 1000.0 >= MIN_DATAPOINT_TIMESTAMP
|
|
142
145
|
|
|
143
|
-
def _verify_datapoint_value(self, value: Union[int, float, str]) -> bool:
|
|
146
|
+
def _verify_datapoint_value(self, value: Union[int, float, datetime, str]) -> bool:
|
|
144
147
|
if isinstance(value, float):
|
|
145
148
|
return not (
|
|
146
149
|
math.isnan(value) or math.isinf(value) or value > MAX_DATAPOINT_VALUE or value < MIN_DATAPOINT_VALUE
|
|
147
150
|
)
|
|
148
151
|
elif isinstance(value, str):
|
|
149
152
|
return len(value) <= MAX_DATAPOINT_STRING_LENGTH
|
|
153
|
+
elif isinstance(value, datetime):
|
|
154
|
+
return False
|
|
150
155
|
else:
|
|
151
156
|
return True
|
|
152
157
|
|
|
@@ -154,14 +159,16 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
154
159
|
self,
|
|
155
160
|
dp: DataPoint,
|
|
156
161
|
) -> bool:
|
|
157
|
-
if isinstance(dp,
|
|
162
|
+
if isinstance(dp, dict):
|
|
158
163
|
return self._verify_datapoint_time(dp["timestamp"]) and self._verify_datapoint_value(dp["value"])
|
|
159
|
-
elif isinstance(dp,
|
|
164
|
+
elif isinstance(dp, tuple):
|
|
160
165
|
return self._verify_datapoint_time(dp[0]) and self._verify_datapoint_value(dp[1])
|
|
161
166
|
else:
|
|
162
167
|
return True
|
|
163
168
|
|
|
164
|
-
def add_to_upload_queue(
|
|
169
|
+
def add_to_upload_queue(
|
|
170
|
+
self, *, id: Optional[int] = None, external_id: Optional[str] = None, datapoints: DataPointList = []
|
|
171
|
+
) -> None:
|
|
165
172
|
"""
|
|
166
173
|
Add data points to upload queue. The queue will be uploaded if the queue size is larger than the threshold
|
|
167
174
|
specified in the __init__.
|
|
@@ -236,7 +243,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
236
243
|
max_delay=RETRY_MAX_DELAY,
|
|
237
244
|
backoff=RETRY_BACKOFF_FACTOR,
|
|
238
245
|
)
|
|
239
|
-
def _upload_batch(self, upload_this: List[Dict], retries=5) -> List[Dict]:
|
|
246
|
+
def _upload_batch(self, upload_this: List[Dict], retries: int = 5) -> List[Dict]:
|
|
240
247
|
if len(upload_this) == 0:
|
|
241
248
|
return upload_this
|
|
242
249
|
|
|
@@ -276,8 +283,9 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
276
283
|
|
|
277
284
|
if len(ex.not_found) != len(create_these_ids):
|
|
278
285
|
missing = [id_dict for id_dict in ex.not_found if id_dict.get("externalId") not in retry_these]
|
|
286
|
+
missing_num = len(ex.not_found) - len(create_these_ids)
|
|
279
287
|
self.logger.error(
|
|
280
|
-
f"{
|
|
288
|
+
f"{missing_num} time series not found, and could not be created automatically:\n"
|
|
281
289
|
+ str(missing)
|
|
282
290
|
+ "\nData will be dropped"
|
|
283
291
|
)
|
|
@@ -304,7 +312,9 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
304
312
|
self.start()
|
|
305
313
|
return self
|
|
306
314
|
|
|
307
|
-
def __exit__(
|
|
315
|
+
def __exit__(
|
|
316
|
+
self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
|
|
317
|
+
) -> None:
|
|
308
318
|
"""
|
|
309
319
|
Wraps around stop method, for use as context manager
|
|
310
320
|
|
|
@@ -334,14 +344,14 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
334
344
|
max_upload_interval: Optional[int] = None,
|
|
335
345
|
trigger_log_level: str = "DEBUG",
|
|
336
346
|
thread_name: Optional[str] = None,
|
|
337
|
-
create_missing=False,
|
|
347
|
+
create_missing: bool = False,
|
|
338
348
|
cancellation_token: threading.Event = threading.Event(),
|
|
339
349
|
):
|
|
340
350
|
"""
|
|
341
351
|
Args:
|
|
342
352
|
cdf_client: Cognite Data Fusion client to use
|
|
343
|
-
post_upload_function: A function that will be called after each upload. The function will be given one
|
|
344
|
-
A list of the events that were uploaded.
|
|
353
|
+
post_upload_function: A function that will be called after each upload. The function will be given one
|
|
354
|
+
argument: A list of the events that were uploaded.
|
|
345
355
|
max_queue_size: Maximum size of upload queue. Defaults to no max size.
|
|
346
356
|
max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
|
|
347
357
|
methods).
|
|
@@ -360,15 +370,15 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
360
370
|
thread_name,
|
|
361
371
|
cancellation_token,
|
|
362
372
|
)
|
|
363
|
-
self.upload_queue: Dict[EitherId, SequenceData] =
|
|
364
|
-
self.sequence_metadata: Dict[EitherId, Dict[str, Union[str, int, float]]] =
|
|
365
|
-
self.sequence_asset_external_ids: Dict[EitherId, str] =
|
|
366
|
-
self.sequence_dataset_external_ids: Dict[EitherId, str] =
|
|
367
|
-
self.sequence_names: Dict[EitherId, str] =
|
|
368
|
-
self.sequence_descriptions: Dict[EitherId, str] =
|
|
369
|
-
self.column_definitions: Dict[EitherId, List[Dict[str, str]]] =
|
|
370
|
-
self.asset_ids: Dict[str, int] =
|
|
371
|
-
self.dataset_ids: Dict[str, int] =
|
|
373
|
+
self.upload_queue: Dict[EitherId, SequenceData] = {}
|
|
374
|
+
self.sequence_metadata: Dict[EitherId, Dict[str, Union[str, int, float]]] = {}
|
|
375
|
+
self.sequence_asset_external_ids: Dict[EitherId, str] = {}
|
|
376
|
+
self.sequence_dataset_external_ids: Dict[EitherId, str] = {}
|
|
377
|
+
self.sequence_names: Dict[EitherId, str] = {}
|
|
378
|
+
self.sequence_descriptions: Dict[EitherId, str] = {}
|
|
379
|
+
self.column_definitions: Dict[EitherId, List[Dict[str, str]]] = {}
|
|
380
|
+
self.asset_ids: Dict[str, int] = {}
|
|
381
|
+
self.dataset_ids: Dict[str, int] = {}
|
|
372
382
|
self.create_missing = create_missing
|
|
373
383
|
|
|
374
384
|
self.points_queued = SEQUENCES_UPLOADER_POINTS_QUEUED
|
|
@@ -380,12 +390,12 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
380
390
|
def set_sequence_metadata(
|
|
381
391
|
self,
|
|
382
392
|
metadata: Dict[str, Union[str, int, float]],
|
|
383
|
-
id: int = None,
|
|
384
|
-
external_id: str = None,
|
|
385
|
-
asset_external_id: str = None,
|
|
386
|
-
dataset_external_id: str = None,
|
|
387
|
-
name: str = None,
|
|
388
|
-
description: str = None,
|
|
393
|
+
id: Optional[int] = None,
|
|
394
|
+
external_id: Optional[str] = None,
|
|
395
|
+
asset_external_id: Optional[str] = None,
|
|
396
|
+
dataset_external_id: Optional[str] = None,
|
|
397
|
+
name: Optional[str] = None,
|
|
398
|
+
description: Optional[str] = None,
|
|
389
399
|
) -> None:
|
|
390
400
|
"""
|
|
391
401
|
Set sequence metadata. Metadata will be cached until the sequence is created. The metadata will be updated
|
|
@@ -404,13 +414,17 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
404
414
|
"""
|
|
405
415
|
either_id = EitherId(id=id, external_id=external_id)
|
|
406
416
|
self.sequence_metadata[either_id] = metadata
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
417
|
+
if asset_external_id:
|
|
418
|
+
self.sequence_asset_external_ids[either_id] = asset_external_id
|
|
419
|
+
if dataset_external_id:
|
|
420
|
+
self.sequence_dataset_external_ids[either_id] = dataset_external_id
|
|
421
|
+
if name:
|
|
422
|
+
self.sequence_names[either_id] = name
|
|
423
|
+
if description:
|
|
424
|
+
self.sequence_descriptions[either_id] = description
|
|
411
425
|
|
|
412
426
|
def set_sequence_column_definition(
|
|
413
|
-
self, col_def: List[Dict[str, str]], id: int = None, external_id: str = None
|
|
427
|
+
self, col_def: List[Dict[str, str]], id: Optional[int] = None, external_id: Optional[str] = None
|
|
414
428
|
) -> None:
|
|
415
429
|
"""
|
|
416
430
|
Set sequence column definition
|
|
@@ -434,8 +448,8 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
434
448
|
SequenceData,
|
|
435
449
|
],
|
|
436
450
|
column_external_ids: Optional[List[dict]] = None,
|
|
437
|
-
id: int = None,
|
|
438
|
-
external_id: str = None,
|
|
451
|
+
id: Optional[int] = None,
|
|
452
|
+
external_id: Optional[str] = None,
|
|
439
453
|
) -> None:
|
|
440
454
|
"""
|
|
441
455
|
Add sequence rows to upload queue. Mirrors implementation of SequenceApi.insert. Inserted rows will be
|
|
@@ -462,13 +476,13 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
462
476
|
elif isinstance(rows, dict):
|
|
463
477
|
rows = [{"rowNumber": row_number, "values": values} for row_number, values in rows.items()]
|
|
464
478
|
|
|
465
|
-
rows = SequenceData(id=id, external_id=id, rows=rows, columns=column_external_ids)
|
|
479
|
+
rows = SequenceData(id=id, external_id=id, rows=rows, columns=column_external_ids) # type: ignore
|
|
466
480
|
|
|
467
481
|
elif isinstance(rows, list):
|
|
468
482
|
if isinstance(rows[0], tuple) or isinstance(rows[0], list):
|
|
469
483
|
rows = [{"rowNumber": row_number, "values": values} for row_number, values in rows]
|
|
470
484
|
|
|
471
|
-
rows = SequenceData(id=id, external_id=id, rows=rows, columns=column_external_ids)
|
|
485
|
+
rows = SequenceData(id=id, external_id=id, rows=rows, columns=column_external_ids) # type: ignore
|
|
472
486
|
else:
|
|
473
487
|
raise TypeError("Unsupported type for sequence rows: {}".format(type(rows)))
|
|
474
488
|
|
|
@@ -476,8 +490,8 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
476
490
|
seq = self.upload_queue.get(either_id)
|
|
477
491
|
if seq is not None:
|
|
478
492
|
# Update sequence
|
|
479
|
-
seq.values.extend(rows.values)
|
|
480
|
-
seq.row_numbers.extend(rows.row_numbers)
|
|
493
|
+
seq.values.extend(rows.values) # type: ignore # type is list, mypy is wrong
|
|
494
|
+
seq.row_numbers.extend(rows.row_numbers) # type: ignore
|
|
481
495
|
|
|
482
496
|
self.upload_queue[either_id] = seq
|
|
483
497
|
else:
|
|
@@ -517,7 +531,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
517
531
|
self.queue_size.set(self.upload_queue_size)
|
|
518
532
|
|
|
519
533
|
@retry(
|
|
520
|
-
exceptions=CogniteAPIError,
|
|
534
|
+
exceptions=[CogniteAPIError],
|
|
521
535
|
tries=RETRIES,
|
|
522
536
|
delay=RETRY_DELAY,
|
|
523
537
|
max_delay=RETRY_MAX_DELAY,
|
|
@@ -528,7 +542,10 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
528
542
|
|
|
529
543
|
try:
|
|
530
544
|
self.cdf_client.sequences.data.insert(
|
|
531
|
-
id=either_id.internal_id,
|
|
545
|
+
id=either_id.internal_id, # type: ignore
|
|
546
|
+
external_id=either_id.external_id, # type: ignore
|
|
547
|
+
rows=upload_this,
|
|
548
|
+
column_external_ids=None,
|
|
532
549
|
)
|
|
533
550
|
except CogniteNotFoundError as ex:
|
|
534
551
|
if self.create_missing:
|
|
@@ -537,8 +554,8 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
537
554
|
|
|
538
555
|
# Retry
|
|
539
556
|
self.cdf_client.sequences.data.insert(
|
|
540
|
-
id=either_id.internal_id,
|
|
541
|
-
external_id=either_id.external_id,
|
|
557
|
+
id=either_id.internal_id, # type: ignore
|
|
558
|
+
external_id=either_id.external_id, # type: ignore
|
|
542
559
|
rows=upload_this,
|
|
543
560
|
column_external_ids=None,
|
|
544
561
|
)
|
|
@@ -556,24 +573,29 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
556
573
|
"""
|
|
557
574
|
|
|
558
575
|
column_def = self.column_definitions.get(either_id)
|
|
576
|
+
if column_def is None:
|
|
577
|
+
self.logger.error(f"Can't create sequence {str(either_id)}, no column definitions provided")
|
|
559
578
|
|
|
560
579
|
try:
|
|
561
580
|
seq = self.cdf_client.sequences.create(
|
|
562
581
|
Sequence(
|
|
563
|
-
id=either_id.internal_id,
|
|
564
|
-
external_id=either_id.external_id,
|
|
565
|
-
name=self.sequence_names.get(either_id, None),
|
|
566
|
-
description=self.sequence_descriptions.get(either_id, None),
|
|
567
|
-
metadata=self.sequence_metadata.get(either_id, None),
|
|
568
|
-
asset_id=self.asset_ids.get(self.sequence_asset_external_ids.get(either_id, None), None),
|
|
569
|
-
data_set_id=self.dataset_ids.get(self.sequence_dataset_external_ids.get(either_id, None), None),
|
|
570
|
-
columns=column_def,
|
|
582
|
+
id=either_id.internal_id, # type: ignore # these are optional, the SDK types are wrong
|
|
583
|
+
external_id=either_id.external_id, # type: ignore
|
|
584
|
+
name=self.sequence_names.get(either_id, None), # type: ignore
|
|
585
|
+
description=self.sequence_descriptions.get(either_id, None), # type: ignore
|
|
586
|
+
metadata=self.sequence_metadata.get(either_id, None), # type: ignore
|
|
587
|
+
asset_id=self.asset_ids.get(self.sequence_asset_external_ids.get(either_id, None), None), # type: ignore
|
|
588
|
+
data_set_id=self.dataset_ids.get(self.sequence_dataset_external_ids.get(either_id, None), None), # type: ignore
|
|
589
|
+
columns=column_def, # type: ignore # We already checked for None, mypy is wrong
|
|
571
590
|
)
|
|
572
591
|
)
|
|
573
592
|
|
|
574
593
|
except CogniteDuplicatedError:
|
|
575
594
|
self.logger.info("Sequnce already exist: {}".format(either_id))
|
|
576
|
-
seq = self.cdf_client.sequences.retrieve(
|
|
595
|
+
seq = self.cdf_client.sequences.retrieve(
|
|
596
|
+
id=either_id.internal_id, # type: ignore # these are optional, the SDK types are wrong
|
|
597
|
+
external_id=either_id.external_id, # type: ignore
|
|
598
|
+
)
|
|
577
599
|
|
|
578
600
|
# Update definition of cached sequence
|
|
579
601
|
cseq = self.upload_queue[either_id]
|
|
@@ -584,7 +606,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
584
606
|
Resolve id of assets if specified, for use in sequence creation
|
|
585
607
|
"""
|
|
586
608
|
assets = set(self.sequence_asset_external_ids.values())
|
|
587
|
-
assets.discard(None)
|
|
609
|
+
assets.discard(None) # type: ignore # safeguard, remove Nones if any
|
|
588
610
|
|
|
589
611
|
if len(assets) > 0:
|
|
590
612
|
try:
|
|
@@ -596,14 +618,14 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
596
618
|
}
|
|
597
619
|
except Exception as e:
|
|
598
620
|
self.logger.error("Error in resolving asset id: %s", str(e))
|
|
599
|
-
self.asset_ids =
|
|
621
|
+
self.asset_ids = {}
|
|
600
622
|
|
|
601
623
|
def _resolve_dataset_ids(self) -> None:
|
|
602
624
|
"""
|
|
603
625
|
Resolve id of datasets if specified, for use in sequence creation
|
|
604
626
|
"""
|
|
605
627
|
datasets = set(self.sequence_dataset_external_ids.values())
|
|
606
|
-
datasets.discard(None)
|
|
628
|
+
datasets.discard(None) # type: ignore # safeguard, remove Nones if any
|
|
607
629
|
|
|
608
630
|
if len(datasets) > 0:
|
|
609
631
|
try:
|
|
@@ -615,7 +637,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
615
637
|
}
|
|
616
638
|
except Exception as e:
|
|
617
639
|
self.logger.error("Error in resolving dataset id: %s", str(e))
|
|
618
|
-
self.dataset_ids =
|
|
640
|
+
self.dataset_ids = {}
|
|
619
641
|
|
|
620
642
|
def __enter__(self) -> "SequenceUploadQueue":
|
|
621
643
|
"""
|
|
@@ -627,7 +649,9 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
627
649
|
self.start()
|
|
628
650
|
return self
|
|
629
651
|
|
|
630
|
-
def __exit__(
|
|
652
|
+
def __exit__(
|
|
653
|
+
self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
|
|
654
|
+
) -> None:
|
|
631
655
|
"""
|
|
632
656
|
Wraps around stop method, for use as context manager
|
|
633
657
|
|
|
@@ -15,14 +15,14 @@
|
|
|
15
15
|
"""
|
|
16
16
|
A module containing a slightly more advanced base extractor class, sorting a generic output into upload queues.
|
|
17
17
|
"""
|
|
18
|
-
|
|
18
|
+
import threading
|
|
19
19
|
from dataclasses import dataclass
|
|
20
20
|
from types import TracebackType
|
|
21
|
-
from typing import Callable, Iterable, List, Optional, Type, TypeVar
|
|
21
|
+
from typing import Any, Callable, Iterable, List, Optional, Type, TypeVar
|
|
22
22
|
|
|
23
|
-
from cognite.client import CogniteClient
|
|
24
23
|
from more_itertools import peekable
|
|
25
24
|
|
|
25
|
+
from cognite.client import CogniteClient
|
|
26
26
|
from cognite.extractorutils.base import Extractor
|
|
27
27
|
from cognite.extractorutils.configtools import BaseConfig, TimeIntervalConfig
|
|
28
28
|
from cognite.extractorutils.metrics import BaseMetrics
|
|
@@ -78,12 +78,12 @@ class UploaderExtractor(Extractor[UploaderExtractorConfigClass]):
|
|
|
78
78
|
description: str,
|
|
79
79
|
version: Optional[str] = None,
|
|
80
80
|
run_handle: Optional[
|
|
81
|
-
Callable[[CogniteClient, AbstractStateStore, UploaderExtractorConfigClass, Event], None]
|
|
81
|
+
Callable[[CogniteClient, AbstractStateStore, UploaderExtractorConfigClass, threading.Event], None]
|
|
82
82
|
] = None,
|
|
83
83
|
config_class: Type[UploaderExtractorConfigClass],
|
|
84
84
|
metrics: Optional[BaseMetrics] = None,
|
|
85
85
|
use_default_state_store: bool = True,
|
|
86
|
-
cancellation_token: Event = Event(),
|
|
86
|
+
cancellation_token: threading.Event = threading.Event(),
|
|
87
87
|
config_file_path: Optional[str] = None,
|
|
88
88
|
continuous_extractor: bool = False,
|
|
89
89
|
heartbeat_waiting_time: int = 600,
|
|
@@ -107,10 +107,9 @@ class UploaderExtractor(Extractor[UploaderExtractorConfigClass]):
|
|
|
107
107
|
self.middleware = middleware if isinstance(middleware, list) else []
|
|
108
108
|
|
|
109
109
|
def handle_output(self, output: CdfTypes) -> None:
|
|
110
|
-
if not isinstance(output, Iterable)
|
|
111
|
-
|
|
110
|
+
list_output = [output] if not isinstance(output, Iterable) else output
|
|
111
|
+
peekable_output = peekable(list_output)
|
|
112
112
|
|
|
113
|
-
peekable_output = peekable(output)
|
|
114
113
|
peek = peekable_output.peek(None)
|
|
115
114
|
|
|
116
115
|
if peek is None:
|
|
@@ -133,7 +132,7 @@ class UploaderExtractor(Extractor[UploaderExtractorConfigClass]):
|
|
|
133
132
|
else:
|
|
134
133
|
raise ValueError(f"Unexpected type: {type(peek)}")
|
|
135
134
|
|
|
136
|
-
def _apply_middleware(self, item):
|
|
135
|
+
def _apply_middleware(self, item: Any) -> Any:
|
|
137
136
|
for mw in self.middleware:
|
|
138
137
|
item = mw(item)
|
|
139
138
|
return item
|
cognite/extractorutils/util.py
CHANGED
|
@@ -23,12 +23,13 @@ import threading
|
|
|
23
23
|
from functools import partial, wraps
|
|
24
24
|
from threading import Event, Thread
|
|
25
25
|
from time import time
|
|
26
|
-
from typing import Any,
|
|
26
|
+
from typing import Any, Callable, Generator, Iterable, Optional, Tuple, Type, TypeVar, Union
|
|
27
|
+
|
|
28
|
+
from decorator import decorator
|
|
27
29
|
|
|
28
30
|
from cognite.client import CogniteClient
|
|
29
31
|
from cognite.client.data_classes import Asset, ExtractionPipelineRun, TimeSeries
|
|
30
32
|
from cognite.client.exceptions import CogniteNotFoundError
|
|
31
|
-
from decorator import decorator
|
|
32
33
|
|
|
33
34
|
|
|
34
35
|
def _ensure(endpoint: Any, items: Iterable[Any]) -> None:
|
|
@@ -80,7 +81,7 @@ def set_event_on_interrupt(stop_event: Event) -> None:
|
|
|
80
81
|
stop_event: Event to set
|
|
81
82
|
"""
|
|
82
83
|
|
|
83
|
-
def sigint_handler(
|
|
84
|
+
def sigint_handler(sig_num: int, frame: Any) -> None:
|
|
84
85
|
logger = logging.getLogger(__name__)
|
|
85
86
|
logger.warning("Interrupt signal received, stopping extractor gracefully")
|
|
86
87
|
stop_event.set()
|
|
@@ -106,7 +107,7 @@ class EitherId:
|
|
|
106
107
|
TypeError: If none of both of id types are set.
|
|
107
108
|
"""
|
|
108
109
|
|
|
109
|
-
def __init__(self, **kwargs):
|
|
110
|
+
def __init__(self, **kwargs: Union[int, str, None]):
|
|
110
111
|
internal_id = kwargs.get("id")
|
|
111
112
|
external_id = kwargs.get("externalId") or kwargs.get("external_id")
|
|
112
113
|
|
|
@@ -116,8 +117,14 @@ class EitherId:
|
|
|
116
117
|
if internal_id is not None and external_id is not None:
|
|
117
118
|
raise TypeError("Only one of id and external_id can be set")
|
|
118
119
|
|
|
119
|
-
|
|
120
|
-
|
|
120
|
+
if internal_id is not None and not isinstance(internal_id, int):
|
|
121
|
+
raise TypeError("Internal IDs must be integers")
|
|
122
|
+
|
|
123
|
+
if external_id is not None and not isinstance(external_id, str):
|
|
124
|
+
raise TypeError("Internal IDs must be integers")
|
|
125
|
+
|
|
126
|
+
self.internal_id: Optional[int] = internal_id
|
|
127
|
+
self.external_id: Optional[str] = external_id
|
|
121
128
|
|
|
122
129
|
def type(self) -> str:
|
|
123
130
|
"""
|
|
@@ -135,7 +142,7 @@ class EitherId:
|
|
|
135
142
|
Returns:
|
|
136
143
|
The ID
|
|
137
144
|
"""
|
|
138
|
-
return self.internal_id or self.external_id
|
|
145
|
+
return self.internal_id or self.external_id # type: ignore # checked to be not None in init
|
|
139
146
|
|
|
140
147
|
def __eq__(self, other: Any) -> bool:
|
|
141
148
|
"""
|
|
@@ -180,12 +187,15 @@ class EitherId:
|
|
|
180
187
|
return self.__str__()
|
|
181
188
|
|
|
182
189
|
|
|
190
|
+
_T1 = TypeVar("_T1")
|
|
191
|
+
|
|
192
|
+
|
|
183
193
|
def add_extraction_pipeline(
|
|
184
194
|
extraction_pipeline_ext_id: str,
|
|
185
195
|
cognite_client: CogniteClient,
|
|
186
196
|
heartbeat_waiting_time: int = 600,
|
|
187
197
|
added_message: str = "",
|
|
188
|
-
):
|
|
198
|
+
) -> Callable[[Callable[..., _T1]], Callable[..., _T1]]:
|
|
189
199
|
"""
|
|
190
200
|
This is to be used as a decorator for extractor functions to add extraction pipeline information
|
|
191
201
|
|
|
@@ -214,9 +224,9 @@ def add_extraction_pipeline(
|
|
|
214
224
|
|
|
215
225
|
_logger = logging.getLogger(__name__)
|
|
216
226
|
|
|
217
|
-
def decorator_ext_pip(input_function):
|
|
227
|
+
def decorator_ext_pip(input_function: Callable[..., _T1]) -> Callable[..., _T1]:
|
|
218
228
|
@wraps(input_function)
|
|
219
|
-
def wrapper_ext_pip(*args, **kwargs):
|
|
229
|
+
def wrapper_ext_pip(*args: Any, **kwargs: Any) -> _T1:
|
|
220
230
|
##############################
|
|
221
231
|
# Setup Extraction Pipelines #
|
|
222
232
|
##############################
|
|
@@ -256,6 +266,7 @@ def add_extraction_pipeline(
|
|
|
256
266
|
##############################
|
|
257
267
|
_logger.info(f"Starting to run function: {input_function.__name__}")
|
|
258
268
|
|
|
269
|
+
heartbeat_thread: Optional[Thread] = None
|
|
259
270
|
try:
|
|
260
271
|
heartbeat_thread = Thread(target=heartbeat_loop, name="HeartbeatLoop", daemon=True)
|
|
261
272
|
heartbeat_thread.start()
|
|
@@ -266,10 +277,11 @@ def add_extraction_pipeline(
|
|
|
266
277
|
raise e
|
|
267
278
|
else:
|
|
268
279
|
_report_success()
|
|
269
|
-
_logger.info(
|
|
280
|
+
_logger.info("Extraction ran successfully")
|
|
270
281
|
finally:
|
|
271
282
|
cancellation_token.set()
|
|
272
|
-
heartbeat_thread
|
|
283
|
+
if heartbeat_thread:
|
|
284
|
+
heartbeat_thread.join()
|
|
273
285
|
|
|
274
286
|
return output
|
|
275
287
|
|
|
@@ -312,25 +324,28 @@ def throttled_loop(target_time: int, cancellation_token: Event) -> Generator[Non
|
|
|
312
324
|
cancellation_token.wait(target_time - iteration_time)
|
|
313
325
|
|
|
314
326
|
|
|
327
|
+
_T2 = TypeVar("_T2")
|
|
328
|
+
|
|
329
|
+
|
|
315
330
|
def _retry_internal(
|
|
316
|
-
f,
|
|
331
|
+
f: Callable[..., _T2],
|
|
317
332
|
cancellation_token: threading.Event = threading.Event(),
|
|
318
|
-
exceptions: Iterable[Type[Exception]] = Exception,
|
|
333
|
+
exceptions: Iterable[Type[Exception]] = [Exception],
|
|
319
334
|
tries: int = -1,
|
|
320
335
|
delay: float = 0,
|
|
321
336
|
max_delay: Optional[float] = None,
|
|
322
337
|
backoff: float = 1,
|
|
323
338
|
jitter: Union[float, Tuple[float, float]] = 0,
|
|
324
|
-
):
|
|
339
|
+
) -> _T2:
|
|
325
340
|
logger = logging.getLogger(__name__)
|
|
326
341
|
|
|
327
342
|
while tries and not cancellation_token.is_set():
|
|
328
343
|
try:
|
|
329
344
|
return f()
|
|
330
|
-
except exceptions as e:
|
|
345
|
+
except exceptions as e: # type: ignore # Exception is an exception type, smh mypy
|
|
331
346
|
tries -= 1
|
|
332
347
|
if not tries:
|
|
333
|
-
raise
|
|
348
|
+
raise e
|
|
334
349
|
|
|
335
350
|
if logger is not None:
|
|
336
351
|
logger.warning("%s, retrying in %s seconds...", e, delay)
|
|
@@ -346,16 +361,18 @@ def _retry_internal(
|
|
|
346
361
|
if max_delay is not None:
|
|
347
362
|
delay = min(delay, max_delay)
|
|
348
363
|
|
|
364
|
+
return None # type: ignore # unreachable, we will have raised an exception before this
|
|
365
|
+
|
|
349
366
|
|
|
350
367
|
def retry(
|
|
351
368
|
cancellation_token: threading.Event = threading.Event(),
|
|
352
|
-
exceptions: Iterable[Type[Exception]] = Exception,
|
|
369
|
+
exceptions: Iterable[Type[Exception]] = [Exception],
|
|
353
370
|
tries: int = -1,
|
|
354
371
|
delay: float = 0,
|
|
355
372
|
max_delay: Optional[float] = None,
|
|
356
373
|
backoff: float = 1,
|
|
357
374
|
jitter: Union[float, Tuple[float, float]] = 0,
|
|
358
|
-
):
|
|
375
|
+
) -> Callable[[Callable[..., _T2]], Callable[..., _T2]]:
|
|
359
376
|
"""
|
|
360
377
|
Returns a retry decorator.
|
|
361
378
|
|
|
@@ -375,9 +392,9 @@ def retry(
|
|
|
375
392
|
"""
|
|
376
393
|
|
|
377
394
|
@decorator
|
|
378
|
-
def retry_decorator(f, *fargs, **fkwargs):
|
|
379
|
-
args = fargs if fargs else
|
|
380
|
-
kwargs = fkwargs if fkwargs else
|
|
395
|
+
def retry_decorator(f: Callable[..., _T2], *fargs: Any, **fkwargs: Any) -> _T2:
|
|
396
|
+
args = fargs if fargs else []
|
|
397
|
+
kwargs = fkwargs if fkwargs else {}
|
|
381
398
|
|
|
382
399
|
return _retry_internal(
|
|
383
400
|
partial(f, *args, **kwargs),
|
{cognite_extractor_utils-5.0.1.dist-info → cognite_extractor_utils-5.2.0.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cognite-extractor-utils
|
|
3
|
-
Version: 5.0
|
|
3
|
+
Version: 5.2.0
|
|
4
4
|
Summary: Utilities for easier development of extractors for CDF
|
|
5
5
|
Home-page: https://github.com/cognitedata/python-extractor-utils
|
|
6
6
|
License: Apache-2.0
|
|
@@ -121,4 +121,3 @@ Each public method, class and module should have docstrings. Docstrings are writ
|
|
|
121
121
|
style](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings). Please include unit and/or
|
|
122
122
|
integration tests for submitted code, and remember to update the [changelog](./CHANGELOG.md).
|
|
123
123
|
|
|
124
|
-
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
cognite/extractorutils/__init__.py,sha256=YpHnq2cOlBjlfQGnnajBJHlP4R0tz1RhKjFUhN3Rp9s,739
|
|
2
|
+
cognite/extractorutils/_inner_util.py,sha256=jCPLg-FfTpyLAHkhoKaKObNwbqm19d2Z11to_DYk5EU,1558
|
|
3
|
+
cognite/extractorutils/base.py,sha256=ydDV0oUJH6W2S150P-PZxotnZGAwVGzKiKcpcCZdB0E,15974
|
|
4
|
+
cognite/extractorutils/configtools/__init__.py,sha256=fj9kH8DdisNi9mI8cKm2sz50vnzeOkJQErIGB3mTYRo,2861
|
|
5
|
+
cognite/extractorutils/configtools/_util.py,sha256=SZycZm_py9v9WZbDiDQbgS6_PiLtu-TtwuuH7tG2YCI,4739
|
|
6
|
+
cognite/extractorutils/configtools/elements.py,sha256=BAfFrUrDwGPoyEMQeYySWRrqeIM_MrmLMyGPe-hrg0A,19910
|
|
7
|
+
cognite/extractorutils/configtools/loaders.py,sha256=02eK2dp7zsIU7ZIsMY6o7H_VfaiQ312QzCGqTuAypbU,9545
|
|
8
|
+
cognite/extractorutils/exceptions.py,sha256=PERRmySUfJRM2Ta8cFvADTe-KUdXsoMLKdk4140AOHI,1061
|
|
9
|
+
cognite/extractorutils/metrics.py,sha256=HYW5oQIKd4Zk0ahIuRbxON9Y6LXJ7QODya_SWogNj0M,14929
|
|
10
|
+
cognite/extractorutils/middleware.py,sha256=d5bnKEOmC49QmoBK7OyM9krOGDYX8sXxM00wRWKhmHg,1108
|
|
11
|
+
cognite/extractorutils/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
cognite/extractorutils/statestore.py,sha256=F54IEgWUbAfeOWRPWiVEqQDUuqb_WaFdM0bzYNE-Sis,17829
|
|
13
|
+
cognite/extractorutils/uploader/__init__.py,sha256=h9EAktLxNZjmZunFcK6Tngu6K7tQKwYTmNOkB4meCA8,3054
|
|
14
|
+
cognite/extractorutils/uploader/_base.py,sha256=1MVzMEyuuYNlCAG7Hv1kPCzUOfVXhO2fA_sZYSGBLyY,4983
|
|
15
|
+
cognite/extractorutils/uploader/_metrics.py,sha256=cF6EaixueFSSRw97O0HK3kEM7fX8AvNAUVz5xDyr4Yk,3865
|
|
16
|
+
cognite/extractorutils/uploader/events.py,sha256=RmBQcHpImGJG3D7F0Ccb5aK7Ecy4mkF6GYk-UTLjp38,6118
|
|
17
|
+
cognite/extractorutils/uploader/files.py,sha256=de6nny4yQiaSSEol8gE0KzYZ9srTKwjOTWVJztCnZH8,11674
|
|
18
|
+
cognite/extractorutils/uploader/raw.py,sha256=B6B0B659Zfvhy9Tp_QrGi1wkwJw94Z6am5_YmFZFxvQ,7021
|
|
19
|
+
cognite/extractorutils/uploader/time_series.py,sha256=gsMqwGYFS4wx3LF62URUMLx4PoCZb7MS3RWeWBQe2AM,26579
|
|
20
|
+
cognite/extractorutils/uploader_extractor.py,sha256=cAseJZXBmE6feyHJNMGxXBfGbdGBmOyBTeU1ZC5Krmg,7476
|
|
21
|
+
cognite/extractorutils/uploader_types.py,sha256=nztoOqdszm4BEDcCybdApub0zN3KEfwLgTDeP69Xdlk,1146
|
|
22
|
+
cognite/extractorutils/util.py,sha256=6y-2ZWviLmPYSbEWu0_I8Oeuwz-4yK9EhE7FM1g_GX8,14284
|
|
23
|
+
cognite_extractor_utils-5.2.0.dist-info/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
|
|
24
|
+
cognite_extractor_utils-5.2.0.dist-info/METADATA,sha256=X0ig0LlZzWhFLMQenXn44XdKD-_YcMnT7-BakiNQFEk,5406
|
|
25
|
+
cognite_extractor_utils-5.2.0.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
|
26
|
+
cognite_extractor_utils-5.2.0.dist-info/RECORD,,
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
cognite/extractorutils/__init__.py,sha256=VsW0H6dGKBgnovfeKtFM9fsrkvp_XV6kslSkrpiIvmA,739
|
|
2
|
-
cognite/extractorutils/_inner_util.py,sha256=idg6EZzcDIwCYJJVSvyyEq8M2QUyLvW4soJS1XTlFv4,1431
|
|
3
|
-
cognite/extractorutils/base.py,sha256=_c7zF1_PDIwIA8lqvf0Du5vJyITYissOptBncvKY7Dg,15990
|
|
4
|
-
cognite/extractorutils/configtools/__init__.py,sha256=qYBtB-wVwXV6mcdRe8xRclhF9AMADlaJaAJzTNuWT48,2861
|
|
5
|
-
cognite/extractorutils/configtools/_util.py,sha256=XYNKaCD_mMgvFBxvLE_KwgvfTkgHZEzs1Wj7gL0WkpY,4294
|
|
6
|
-
cognite/extractorutils/configtools/elements.py,sha256=2uT0_EeJaRbxGWBXWDWfkA8qYvpu8O8X-S2_maEeUpc,18506
|
|
7
|
-
cognite/extractorutils/configtools/loaders.py,sha256=Q_aeaT-TxqRMuurLasTzbSsIq0I1cnnhcs3Zzw3-FNk,9188
|
|
8
|
-
cognite/extractorutils/exceptions.py,sha256=PERRmySUfJRM2Ta8cFvADTe-KUdXsoMLKdk4140AOHI,1061
|
|
9
|
-
cognite/extractorutils/metrics.py,sha256=QTIe6LIMZ8u77lImB1jXgvTvMf2mMOVPESeAZ1hA3dg,14604
|
|
10
|
-
cognite/extractorutils/middleware.py,sha256=XelqIxmWBfpoDUt0cKvAXyDCYnjIFyKNHVJU4CYjXes,1005
|
|
11
|
-
cognite/extractorutils/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
-
cognite/extractorutils/statestore.py,sha256=5nFkzPbrOYwlGOKyElwX-JJ1Zb9hhzJfWXyWeD-mlMs,17449
|
|
13
|
-
cognite/extractorutils/uploader/__init__.py,sha256=h9EAktLxNZjmZunFcK6Tngu6K7tQKwYTmNOkB4meCA8,3054
|
|
14
|
-
cognite/extractorutils/uploader/_base.py,sha256=K0pOIt-O_yIOJcpGZctm24kUsZgrvxG0L_YGw1DqHCQ,5240
|
|
15
|
-
cognite/extractorutils/uploader/_metrics.py,sha256=cF6EaixueFSSRw97O0HK3kEM7fX8AvNAUVz5xDyr4Yk,3865
|
|
16
|
-
cognite/extractorutils/uploader/events.py,sha256=xpxrcIpGwnKgXhfCJCovtWkl3QvHI9XZkhO6TAJBHXE,5292
|
|
17
|
-
cognite/extractorutils/uploader/files.py,sha256=Mh5MxswyGjrteLd22yopWMPD036DGXc6_tibHYEnMU0,11351
|
|
18
|
-
cognite/extractorutils/uploader/raw.py,sha256=wdVeyD4DoqY0eIk0fvWnAlfIXGbtG5EgWetOClbQGZU,6888
|
|
19
|
-
cognite/extractorutils/uploader/time_series.py,sha256=qmamsSAJFUwX9adFgejEV_cGmjEhbTuiucIVIovj8U4,25071
|
|
20
|
-
cognite/extractorutils/uploader_extractor.py,sha256=TwiRAqgVRheA-KKnxloBZvD1tZjuDGKVGuGbtvPtj8I,7404
|
|
21
|
-
cognite/extractorutils/uploader_types.py,sha256=nztoOqdszm4BEDcCybdApub0zN3KEfwLgTDeP69Xdlk,1146
|
|
22
|
-
cognite/extractorutils/util.py,sha256=NtqR6EGUDZmGjrfy8lnge83ITh1HzX4u_JtqGqNQb-4,13352
|
|
23
|
-
cognite_extractor_utils-5.0.1.dist-info/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
|
|
24
|
-
cognite_extractor_utils-5.0.1.dist-info/METADATA,sha256=HSvv1Zqkui1FNH0dpMtneo7rFybL6s5I3CtxRKaJZuE,5407
|
|
25
|
-
cognite_extractor_utils-5.0.1.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
|
26
|
-
cognite_extractor_utils-5.0.1.dist-info/RECORD,,
|
|
File without changes
|