cognite-extractor-utils 7.5.4__py3-none-any.whl → 7.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- cognite/extractorutils/__init__.py +3 -1
- cognite/extractorutils/_inner_util.py +14 -3
- cognite/extractorutils/base.py +14 -15
- cognite/extractorutils/configtools/__init__.py +25 -0
- cognite/extractorutils/configtools/_util.py +7 -9
- cognite/extractorutils/configtools/elements.py +58 -49
- cognite/extractorutils/configtools/loaders.py +29 -26
- cognite/extractorutils/configtools/validators.py +2 -3
- cognite/extractorutils/exceptions.py +1 -4
- cognite/extractorutils/metrics.py +18 -18
- cognite/extractorutils/statestore/_base.py +3 -4
- cognite/extractorutils/statestore/hashing.py +24 -24
- cognite/extractorutils/statestore/watermark.py +17 -14
- cognite/extractorutils/threading.py +4 -4
- cognite/extractorutils/unstable/configuration/exceptions.py +24 -0
- cognite/extractorutils/unstable/configuration/loaders.py +18 -7
- cognite/extractorutils/unstable/configuration/models.py +25 -3
- cognite/extractorutils/unstable/core/_dto.py +10 -0
- cognite/extractorutils/unstable/core/base.py +179 -29
- cognite/extractorutils/unstable/core/errors.py +72 -0
- cognite/extractorutils/unstable/core/restart_policy.py +29 -0
- cognite/extractorutils/unstable/core/runtime.py +170 -26
- cognite/extractorutils/unstable/core/tasks.py +2 -0
- cognite/extractorutils/unstable/scheduling/_scheduler.py +4 -4
- cognite/extractorutils/uploader/__init__.py +14 -0
- cognite/extractorutils/uploader/_base.py +8 -8
- cognite/extractorutils/uploader/assets.py +15 -9
- cognite/extractorutils/uploader/data_modeling.py +13 -13
- cognite/extractorutils/uploader/events.py +9 -9
- cognite/extractorutils/uploader/files.py +153 -46
- cognite/extractorutils/uploader/raw.py +10 -10
- cognite/extractorutils/uploader/time_series.py +56 -58
- cognite/extractorutils/uploader/upload_failure_handler.py +64 -0
- cognite/extractorutils/uploader_extractor.py +11 -11
- cognite/extractorutils/uploader_types.py +4 -12
- cognite/extractorutils/util.py +21 -23
- {cognite_extractor_utils-7.5.4.dist-info → cognite_extractor_utils-7.5.6.dist-info}/METADATA +4 -3
- cognite_extractor_utils-7.5.6.dist-info/RECORD +49 -0
- {cognite_extractor_utils-7.5.4.dist-info → cognite_extractor_utils-7.5.6.dist-info}/WHEEL +1 -1
- cognite/extractorutils/unstable/core/__main__.py +0 -31
- cognite_extractor_utils-7.5.4.dist-info/RECORD +0 -46
- {cognite_extractor_utils-7.5.4.dist-info → cognite_extractor_utils-7.5.6.dist-info}/LICENSE +0 -0
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
import math
|
|
16
16
|
from datetime import datetime
|
|
17
17
|
from types import TracebackType
|
|
18
|
-
from typing import Any, Callable,
|
|
18
|
+
from typing import Any, Callable, Type
|
|
19
19
|
|
|
20
20
|
from cognite.client import CogniteClient
|
|
21
21
|
from cognite.client.data_classes import (
|
|
@@ -50,13 +50,13 @@ MAX_DATAPOINT_STRING_LENGTH = 255
|
|
|
50
50
|
MAX_DATAPOINT_VALUE = 1e100
|
|
51
51
|
MIN_DATAPOINT_VALUE = -1e100
|
|
52
52
|
|
|
53
|
-
TimeStamp =
|
|
53
|
+
TimeStamp = int | datetime
|
|
54
54
|
|
|
55
|
-
DataPointWithoutStatus =
|
|
56
|
-
FullStatusCode =
|
|
57
|
-
DataPointWithStatus =
|
|
58
|
-
DataPoint =
|
|
59
|
-
DataPointList =
|
|
55
|
+
DataPointWithoutStatus = tuple[TimeStamp, float] | tuple[TimeStamp, str] | tuple[TimeStamp, int]
|
|
56
|
+
FullStatusCode = StatusCode | int
|
|
57
|
+
DataPointWithStatus = tuple[TimeStamp, float, FullStatusCode] | tuple[TimeStamp, str, FullStatusCode]
|
|
58
|
+
DataPoint = DataPointWithoutStatus | DataPointWithStatus
|
|
59
|
+
DataPointList = list[DataPoint]
|
|
60
60
|
|
|
61
61
|
|
|
62
62
|
def default_time_series_factory(external_id: str, datapoints: DataPointList) -> TimeSeries:
|
|
@@ -103,14 +103,14 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
103
103
|
def __init__(
|
|
104
104
|
self,
|
|
105
105
|
cdf_client: CogniteClient,
|
|
106
|
-
post_upload_function:
|
|
107
|
-
max_queue_size:
|
|
108
|
-
max_upload_interval:
|
|
106
|
+
post_upload_function: Callable[[list[dict[str, str | DataPointList]]], None] | None = None,
|
|
107
|
+
max_queue_size: int | None = None,
|
|
108
|
+
max_upload_interval: int | None = None,
|
|
109
109
|
trigger_log_level: str = "DEBUG",
|
|
110
|
-
thread_name:
|
|
111
|
-
create_missing:
|
|
112
|
-
data_set_id:
|
|
113
|
-
cancellation_token:
|
|
110
|
+
thread_name: str | None = None,
|
|
111
|
+
create_missing: Callable[[str, DataPointList], TimeSeries] | bool = False,
|
|
112
|
+
data_set_id: int | None = None,
|
|
113
|
+
cancellation_token: CancellationToken | None = None,
|
|
114
114
|
):
|
|
115
115
|
# Super sets post_upload and threshold
|
|
116
116
|
super().__init__(
|
|
@@ -132,14 +132,14 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
132
132
|
self.create_missing = True
|
|
133
133
|
self.missing_factory = create_missing
|
|
134
134
|
|
|
135
|
-
self.upload_queue:
|
|
135
|
+
self.upload_queue: dict[EitherId, DataPointList] = {}
|
|
136
136
|
|
|
137
137
|
self.points_queued = TIMESERIES_UPLOADER_POINTS_QUEUED
|
|
138
138
|
self.points_written = TIMESERIES_UPLOADER_POINTS_WRITTEN
|
|
139
139
|
self.queue_size = TIMESERIES_UPLOADER_QUEUE_SIZE
|
|
140
140
|
self.data_set_id = data_set_id
|
|
141
141
|
|
|
142
|
-
def _verify_datapoint_time(self, time:
|
|
142
|
+
def _verify_datapoint_time(self, time: int | float | datetime | str) -> bool:
|
|
143
143
|
if isinstance(time, int) or isinstance(time, float):
|
|
144
144
|
return not math.isnan(time) and time >= MIN_DATAPOINT_TIMESTAMP
|
|
145
145
|
elif isinstance(time, str):
|
|
@@ -147,7 +147,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
147
147
|
else:
|
|
148
148
|
return time.timestamp() * 1000.0 >= MIN_DATAPOINT_TIMESTAMP
|
|
149
149
|
|
|
150
|
-
def _verify_datapoint_value(self, value:
|
|
150
|
+
def _verify_datapoint_value(self, value: int | float | datetime | str) -> bool:
|
|
151
151
|
if isinstance(value, float):
|
|
152
152
|
return not (
|
|
153
153
|
math.isnan(value) or math.isinf(value) or value > MAX_DATAPOINT_VALUE or value < MIN_DATAPOINT_VALUE
|
|
@@ -171,7 +171,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
171
171
|
return True
|
|
172
172
|
|
|
173
173
|
def add_to_upload_queue(
|
|
174
|
-
self, *, id:
|
|
174
|
+
self, *, id: int | None = None, external_id: str | None = None, datapoints: DataPointList | None = None
|
|
175
175
|
) -> None:
|
|
176
176
|
"""
|
|
177
177
|
Add data points to upload queue. The queue will be uploaded if the queue size is larger than the threshold
|
|
@@ -180,7 +180,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
180
180
|
Args:
|
|
181
181
|
id: Internal ID of time series. Either this or external_id must be set.
|
|
182
182
|
external_id: External ID of time series. Either this or external_id must be set.
|
|
183
|
-
datapoints:
|
|
183
|
+
datapoints: list of data points to add
|
|
184
184
|
"""
|
|
185
185
|
datapoints = datapoints or []
|
|
186
186
|
old_len = len(datapoints)
|
|
@@ -219,7 +219,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
219
219
|
max_delay=RETRY_MAX_DELAY,
|
|
220
220
|
backoff=RETRY_BACKOFF_FACTOR,
|
|
221
221
|
)
|
|
222
|
-
def _upload_batch(upload_this:
|
|
222
|
+
def _upload_batch(upload_this: list[dict], retries: int = 5) -> list[dict]:
|
|
223
223
|
if len(upload_this) == 0:
|
|
224
224
|
return upload_this
|
|
225
225
|
|
|
@@ -241,14 +241,14 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
241
241
|
create_these_ids = set(
|
|
242
242
|
[id_dict["externalId"] for id_dict in ex.not_found if "externalId" in id_dict]
|
|
243
243
|
)
|
|
244
|
-
datapoints_lists:
|
|
244
|
+
datapoints_lists: dict[str, DataPointList] = {
|
|
245
245
|
ts_dict["externalId"]: ts_dict["datapoints"]
|
|
246
246
|
for ts_dict in upload_this
|
|
247
247
|
if ts_dict["externalId"] in create_these_ids
|
|
248
248
|
}
|
|
249
249
|
|
|
250
250
|
self.logger.info(f"Creating {len(create_these_ids)} time series")
|
|
251
|
-
to_create:
|
|
251
|
+
to_create: list[TimeSeries] = [
|
|
252
252
|
self.missing_factory(external_id, datapoints_lists[external_id])
|
|
253
253
|
for external_id in create_these_ids
|
|
254
254
|
]
|
|
@@ -317,7 +317,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
317
317
|
return self
|
|
318
318
|
|
|
319
319
|
def __exit__(
|
|
320
|
-
self, exc_type:
|
|
320
|
+
self, exc_type: Type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
|
|
321
321
|
) -> None:
|
|
322
322
|
"""
|
|
323
323
|
Wraps around stop method, for use as context manager
|
|
@@ -343,13 +343,13 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
343
343
|
def __init__(
|
|
344
344
|
self,
|
|
345
345
|
cdf_client: CogniteClient,
|
|
346
|
-
post_upload_function:
|
|
347
|
-
max_queue_size:
|
|
348
|
-
max_upload_interval:
|
|
346
|
+
post_upload_function: Callable[[list[Any]], None] | None = None,
|
|
347
|
+
max_queue_size: int | None = None,
|
|
348
|
+
max_upload_interval: int | None = None,
|
|
349
349
|
trigger_log_level: str = "DEBUG",
|
|
350
|
-
thread_name:
|
|
350
|
+
thread_name: str | None = None,
|
|
351
351
|
create_missing: bool = False,
|
|
352
|
-
cancellation_token:
|
|
352
|
+
cancellation_token: CancellationToken | None = None,
|
|
353
353
|
):
|
|
354
354
|
"""
|
|
355
355
|
Args:
|
|
@@ -374,15 +374,15 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
374
374
|
thread_name,
|
|
375
375
|
cancellation_token,
|
|
376
376
|
)
|
|
377
|
-
self.upload_queue:
|
|
378
|
-
self.sequence_metadata:
|
|
379
|
-
self.sequence_asset_external_ids:
|
|
380
|
-
self.sequence_dataset_external_ids:
|
|
381
|
-
self.sequence_names:
|
|
382
|
-
self.sequence_descriptions:
|
|
383
|
-
self.column_definitions:
|
|
384
|
-
self.asset_ids:
|
|
385
|
-
self.dataset_ids:
|
|
377
|
+
self.upload_queue: dict[EitherId, SequenceRows] = {}
|
|
378
|
+
self.sequence_metadata: dict[EitherId, dict[str, str | int | float]] = {}
|
|
379
|
+
self.sequence_asset_external_ids: dict[EitherId, str] = {}
|
|
380
|
+
self.sequence_dataset_external_ids: dict[EitherId, str] = {}
|
|
381
|
+
self.sequence_names: dict[EitherId, str] = {}
|
|
382
|
+
self.sequence_descriptions: dict[EitherId, str] = {}
|
|
383
|
+
self.column_definitions: dict[EitherId, list[dict[str, str]]] = {}
|
|
384
|
+
self.asset_ids: dict[str, int] = {}
|
|
385
|
+
self.dataset_ids: dict[str, int] = {}
|
|
386
386
|
self.create_missing = create_missing
|
|
387
387
|
|
|
388
388
|
self.points_queued = SEQUENCES_UPLOADER_POINTS_QUEUED
|
|
@@ -391,13 +391,13 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
391
391
|
|
|
392
392
|
def set_sequence_metadata(
|
|
393
393
|
self,
|
|
394
|
-
metadata:
|
|
395
|
-
id:
|
|
396
|
-
external_id:
|
|
397
|
-
asset_external_id:
|
|
398
|
-
dataset_external_id:
|
|
399
|
-
name:
|
|
400
|
-
description:
|
|
394
|
+
metadata: dict[str, str | int | float],
|
|
395
|
+
id: int | None = None,
|
|
396
|
+
external_id: str | None = None,
|
|
397
|
+
asset_external_id: str | None = None,
|
|
398
|
+
dataset_external_id: str | None = None,
|
|
399
|
+
name: str | None = None,
|
|
400
|
+
description: str | None = None,
|
|
401
401
|
) -> None:
|
|
402
402
|
"""
|
|
403
403
|
Set sequence metadata. Metadata will be cached until the sequence is created. The metadata will be updated
|
|
@@ -426,7 +426,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
426
426
|
self.sequence_descriptions[either_id] = description
|
|
427
427
|
|
|
428
428
|
def set_sequence_column_definition(
|
|
429
|
-
self, col_def:
|
|
429
|
+
self, col_def: list[dict[str, str]], id: int | None = None, external_id: str | None = None
|
|
430
430
|
) -> None:
|
|
431
431
|
"""
|
|
432
432
|
Set sequence column definition
|
|
@@ -443,16 +443,14 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
443
443
|
|
|
444
444
|
def add_to_upload_queue(
|
|
445
445
|
self,
|
|
446
|
-
rows:
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
id: Optional[int] = None,
|
|
455
|
-
external_id: Optional[str] = None,
|
|
446
|
+
rows: dict[int, list[int | float | str]]
|
|
447
|
+
| list[tuple[int, int | float | str]]
|
|
448
|
+
| list[dict[str, Any]]
|
|
449
|
+
| SequenceData
|
|
450
|
+
| SequenceRows,
|
|
451
|
+
column_external_ids: list[dict] | None = None,
|
|
452
|
+
id: int | None = None,
|
|
453
|
+
external_id: str | None = None,
|
|
456
454
|
) -> None:
|
|
457
455
|
"""
|
|
458
456
|
Add sequence rows to upload queue. Mirrors implementation of SequenceApi.insert. Inserted rows will be
|
|
@@ -461,7 +459,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
461
459
|
Args:
|
|
462
460
|
rows: The rows to be inserted. Can either be a list of tuples, a list of ["rownumber": ..., "values": ...]
|
|
463
461
|
objects, a dictionary of rowNumber: data, or a SequenceData object.
|
|
464
|
-
column_external_ids:
|
|
462
|
+
column_external_ids: list of external id for the columns of the sequence
|
|
465
463
|
id: Sequence internal ID
|
|
466
464
|
Use if external_id is None
|
|
467
465
|
external_id: Sequence external ID
|
|
@@ -477,7 +475,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
477
475
|
# Already in the desired format
|
|
478
476
|
pass
|
|
479
477
|
elif isinstance(rows, (dict, list)):
|
|
480
|
-
rows_raw:
|
|
478
|
+
rows_raw: list[dict[str, Any]]
|
|
481
479
|
if isinstance(rows, dict):
|
|
482
480
|
rows_raw = [{"rowNumber": row_number, "values": values} for row_number, values in rows.items()]
|
|
483
481
|
elif isinstance(rows, list) and rows and isinstance(rows[0], (tuple, list)):
|
|
@@ -658,7 +656,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
658
656
|
return self
|
|
659
657
|
|
|
660
658
|
def __exit__(
|
|
661
|
-
self, exc_type:
|
|
659
|
+
self, exc_type: Type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
|
|
662
660
|
) -> None:
|
|
663
661
|
"""
|
|
664
662
|
Wraps around stop method, for use as context manager
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import Iterator, List
|
|
3
|
+
|
|
4
|
+
import jsonlines
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class FileErrorMapping:
|
|
8
|
+
def __init__(self, file_name: str, error_reason: str) -> None:
|
|
9
|
+
self.file_name = file_name
|
|
10
|
+
self.error_reason = error_reason
|
|
11
|
+
|
|
12
|
+
def __iter__(self) -> Iterator[List[str]]:
|
|
13
|
+
return iter([[self.file_name, self.error_reason]])
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class FileFailureManager:
|
|
17
|
+
MAX_QUEUE_SIZE = 500
|
|
18
|
+
START_TIME_KEY = "start_time"
|
|
19
|
+
FILE_REASON_MAP_KEY = "file_error_reason_map"
|
|
20
|
+
|
|
21
|
+
def __init__(self, start_time: str | None = None, path_to_file: str | None = None) -> None:
|
|
22
|
+
self.failure_logs: dict[str, str] = {}
|
|
23
|
+
|
|
24
|
+
self.path_to_failure_log: str = self._pre_process_file_extension(path_to_file)
|
|
25
|
+
self.start_time = start_time or str(datetime.now())
|
|
26
|
+
self._initialize_failure_logs()
|
|
27
|
+
|
|
28
|
+
def _pre_process_file_extension(self, path_to_file: str | None) -> str:
|
|
29
|
+
if path_to_file and not path_to_file.endswith(".jsonl"):
|
|
30
|
+
return path_to_file + ".jsonl"
|
|
31
|
+
return str(path_to_file)
|
|
32
|
+
|
|
33
|
+
def _initialize_failure_logs(self) -> None:
|
|
34
|
+
self.failure_logs = {}
|
|
35
|
+
|
|
36
|
+
def __len__(self) -> int:
|
|
37
|
+
return len(self.failure_logs)
|
|
38
|
+
|
|
39
|
+
def clear(self) -> None:
|
|
40
|
+
self.failure_logs.clear()
|
|
41
|
+
self._initialize_failure_logs()
|
|
42
|
+
|
|
43
|
+
def add(self, file_name: str, error_reason: str) -> None:
|
|
44
|
+
error_file_object = FileErrorMapping(file_name=file_name, error_reason=error_reason)
|
|
45
|
+
error_file_dict = dict(error_file_object)
|
|
46
|
+
|
|
47
|
+
self.failure_logs.update(error_file_dict)
|
|
48
|
+
|
|
49
|
+
if len(self) >= self.MAX_QUEUE_SIZE:
|
|
50
|
+
self.write_to_file()
|
|
51
|
+
|
|
52
|
+
def write_to_file(self) -> None:
|
|
53
|
+
if len(self) == 0:
|
|
54
|
+
return
|
|
55
|
+
|
|
56
|
+
dict_to_write = {
|
|
57
|
+
self.START_TIME_KEY: self.start_time,
|
|
58
|
+
self.FILE_REASON_MAP_KEY: self.failure_logs,
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
with jsonlines.open(self.path_to_failure_log, mode="a") as writer:
|
|
62
|
+
writer.write(dict_to_write)
|
|
63
|
+
|
|
64
|
+
self.clear()
|
|
@@ -15,9 +15,10 @@
|
|
|
15
15
|
"""
|
|
16
16
|
A module containing a slightly more advanced base extractor class, sorting a generic output into upload queues.
|
|
17
17
|
"""
|
|
18
|
+
|
|
18
19
|
from dataclasses import dataclass
|
|
19
20
|
from types import TracebackType
|
|
20
|
-
from typing import Any, Callable, Iterable,
|
|
21
|
+
from typing import Any, Callable, Iterable, Type, TypeVar
|
|
21
22
|
|
|
22
23
|
from more_itertools import peekable
|
|
23
24
|
|
|
@@ -41,10 +42,11 @@ class QueueConfigClass:
|
|
|
41
42
|
|
|
42
43
|
@dataclass
|
|
43
44
|
class UploaderExtractorConfig(BaseConfig):
|
|
44
|
-
queues:
|
|
45
|
+
queues: QueueConfigClass | None
|
|
45
46
|
|
|
46
47
|
|
|
47
48
|
UploaderExtractorConfigClass = TypeVar("UploaderExtractorConfigClass", bound=UploaderExtractorConfig)
|
|
49
|
+
RunHandle = Callable[[CogniteClient, AbstractStateStore, UploaderExtractorConfigClass, CancellationToken], None]
|
|
48
50
|
|
|
49
51
|
|
|
50
52
|
class UploaderExtractor(Extractor[UploaderExtractorConfigClass]):
|
|
@@ -76,19 +78,17 @@ class UploaderExtractor(Extractor[UploaderExtractorConfigClass]):
|
|
|
76
78
|
*,
|
|
77
79
|
name: str,
|
|
78
80
|
description: str,
|
|
79
|
-
version:
|
|
80
|
-
run_handle:
|
|
81
|
-
Callable[[CogniteClient, AbstractStateStore, UploaderExtractorConfigClass, CancellationToken], None]
|
|
82
|
-
] = None,
|
|
81
|
+
version: str | None = None,
|
|
82
|
+
run_handle: RunHandle | None = None,
|
|
83
83
|
config_class: Type[UploaderExtractorConfigClass],
|
|
84
|
-
metrics:
|
|
84
|
+
metrics: BaseMetrics | None = None,
|
|
85
85
|
use_default_state_store: bool = True,
|
|
86
|
-
cancellation_token:
|
|
87
|
-
config_file_path:
|
|
86
|
+
cancellation_token: CancellationToken | None = None,
|
|
87
|
+
config_file_path: str | None = None,
|
|
88
88
|
continuous_extractor: bool = False,
|
|
89
89
|
heartbeat_waiting_time: int = 600,
|
|
90
90
|
handle_interrupts: bool = True,
|
|
91
|
-
middleware:
|
|
91
|
+
middleware: list[Callable[[dict], dict]] | None = None,
|
|
92
92
|
):
|
|
93
93
|
super(UploaderExtractor, self).__init__(
|
|
94
94
|
name=name,
|
|
@@ -170,7 +170,7 @@ class UploaderExtractor(Extractor[UploaderExtractorConfigClass]):
|
|
|
170
170
|
return self
|
|
171
171
|
|
|
172
172
|
def __exit__(
|
|
173
|
-
self, exc_type:
|
|
173
|
+
self, exc_type: Type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
|
|
174
174
|
) -> bool:
|
|
175
175
|
self.event_queue.__exit__(exc_type, exc_val, exc_tb)
|
|
176
176
|
self.raw_queue.__exit__(exc_type, exc_val, exc_tb)
|
|
@@ -1,27 +1,19 @@
|
|
|
1
|
-
import
|
|
2
|
-
from typing import Iterable, List, Optional, Union
|
|
1
|
+
from typing import Iterable, TypeAlias
|
|
3
2
|
|
|
4
3
|
from cognite.client.data_classes import Event as _Event
|
|
5
4
|
from cognite.client.data_classes import Row as _Row
|
|
6
|
-
|
|
7
|
-
if sys.version_info >= (3, 10):
|
|
8
|
-
from typing import TypeAlias
|
|
9
|
-
else:
|
|
10
|
-
from typing_extensions import TypeAlias
|
|
11
|
-
|
|
12
|
-
|
|
13
5
|
from cognite.extractorutils.uploader.time_series import DataPoint
|
|
14
6
|
|
|
15
7
|
|
|
16
8
|
class InsertDatapoints:
|
|
17
|
-
def __init__(self, *, id:
|
|
9
|
+
def __init__(self, *, id: int | None = None, external_id: str | None = None, datapoints: list[DataPoint]):
|
|
18
10
|
self.id = id
|
|
19
11
|
self.external_id = external_id
|
|
20
12
|
self.datapoints = datapoints
|
|
21
13
|
|
|
22
14
|
|
|
23
15
|
class RawRow:
|
|
24
|
-
def __init__(self, db_name: str, table_name: str, row:
|
|
16
|
+
def __init__(self, db_name: str, table_name: str, row: _Row | Iterable[_Row]):
|
|
25
17
|
self.db_name = db_name
|
|
26
18
|
self.table_name = table_name
|
|
27
19
|
if isinstance(row, Iterable):
|
|
@@ -32,4 +24,4 @@ class RawRow:
|
|
|
32
24
|
|
|
33
25
|
Event: TypeAlias = _Event
|
|
34
26
|
|
|
35
|
-
CdfTypes =
|
|
27
|
+
CdfTypes = Event | Iterable[Event] | RawRow | Iterable[RawRow] | InsertDatapoints | Iterable[InsertDatapoints]
|
cognite/extractorutils/util.py
CHANGED
|
@@ -25,7 +25,7 @@ from functools import partial, wraps
|
|
|
25
25
|
from io import RawIOBase
|
|
26
26
|
from threading import Thread
|
|
27
27
|
from time import time
|
|
28
|
-
from typing import Any, Callable,
|
|
28
|
+
from typing import Any, Callable, Generator, Iterable, Type, TypeVar
|
|
29
29
|
|
|
30
30
|
from decorator import decorator
|
|
31
31
|
|
|
@@ -89,7 +89,7 @@ class EitherId:
|
|
|
89
89
|
TypeError: If none of both of id types are set.
|
|
90
90
|
"""
|
|
91
91
|
|
|
92
|
-
def __init__(self, **kwargs:
|
|
92
|
+
def __init__(self, **kwargs: int | str | None):
|
|
93
93
|
internal_id = kwargs.get("id")
|
|
94
94
|
external_id = kwargs.get("externalId") or kwargs.get("external_id")
|
|
95
95
|
|
|
@@ -105,8 +105,8 @@ class EitherId:
|
|
|
105
105
|
if external_id is not None and not isinstance(external_id, str):
|
|
106
106
|
raise TypeError("External IDs must be strings")
|
|
107
107
|
|
|
108
|
-
self.internal_id:
|
|
109
|
-
self.external_id:
|
|
108
|
+
self.internal_id: int | None = internal_id
|
|
109
|
+
self.external_id: str | None = external_id
|
|
110
110
|
|
|
111
111
|
def type(self) -> str:
|
|
112
112
|
"""
|
|
@@ -117,7 +117,7 @@ class EitherId:
|
|
|
117
117
|
"""
|
|
118
118
|
return "id" if self.internal_id is not None else "externalId"
|
|
119
119
|
|
|
120
|
-
def content(self) ->
|
|
120
|
+
def content(self) -> int | str:
|
|
121
121
|
"""
|
|
122
122
|
Get the value of the ID
|
|
123
123
|
|
|
@@ -249,7 +249,7 @@ def add_extraction_pipeline(
|
|
|
249
249
|
##############################
|
|
250
250
|
_logger.info(f"Starting to run function: {input_function.__name__}")
|
|
251
251
|
|
|
252
|
-
heartbeat_thread:
|
|
252
|
+
heartbeat_thread: Thread | None = None
|
|
253
253
|
try:
|
|
254
254
|
heartbeat_thread = Thread(target=heartbeat_loop, name="HeartbeatLoop", daemon=True)
|
|
255
255
|
heartbeat_thread.start()
|
|
@@ -313,12 +313,12 @@ _T2 = TypeVar("_T2")
|
|
|
313
313
|
def _retry_internal(
|
|
314
314
|
f: Callable[..., _T2],
|
|
315
315
|
cancellation_token: CancellationToken,
|
|
316
|
-
exceptions:
|
|
316
|
+
exceptions: tuple[Type[Exception], ...] | dict[Type[Exception], Callable[[Exception], bool]],
|
|
317
317
|
tries: int,
|
|
318
318
|
delay: float,
|
|
319
|
-
max_delay:
|
|
319
|
+
max_delay: float | None,
|
|
320
320
|
backoff: float,
|
|
321
|
-
jitter:
|
|
321
|
+
jitter: float | tuple[float, float],
|
|
322
322
|
) -> _T2:
|
|
323
323
|
logger = logging.getLogger(__name__)
|
|
324
324
|
|
|
@@ -366,13 +366,13 @@ def _retry_internal(
|
|
|
366
366
|
|
|
367
367
|
|
|
368
368
|
def retry(
|
|
369
|
-
cancellation_token:
|
|
370
|
-
exceptions:
|
|
369
|
+
cancellation_token: CancellationToken | None = None,
|
|
370
|
+
exceptions: tuple[Type[Exception], ...] | dict[Type[Exception], Callable[[Any], bool]] = (Exception,),
|
|
371
371
|
tries: int = 10,
|
|
372
372
|
delay: float = 1,
|
|
373
|
-
max_delay:
|
|
373
|
+
max_delay: float | None = 60,
|
|
374
374
|
backoff: float = 2,
|
|
375
|
-
jitter:
|
|
375
|
+
jitter: float | tuple[float, float] = (0, 2),
|
|
376
376
|
) -> Callable[[Callable[..., _T2]], Callable[..., _T2]]:
|
|
377
377
|
"""
|
|
378
378
|
Returns a retry decorator.
|
|
@@ -414,8 +414,8 @@ def retry(
|
|
|
414
414
|
|
|
415
415
|
|
|
416
416
|
def requests_exceptions(
|
|
417
|
-
status_codes:
|
|
418
|
-
) ->
|
|
417
|
+
status_codes: list[int] | None = None,
|
|
418
|
+
) -> dict[Type[Exception], Callable[[Any], bool]]:
|
|
419
419
|
"""
|
|
420
420
|
Retry exceptions from using the ``requests`` library. This will retry all connection and HTTP errors matching
|
|
421
421
|
the given status codes.
|
|
@@ -448,8 +448,8 @@ def requests_exceptions(
|
|
|
448
448
|
|
|
449
449
|
|
|
450
450
|
def httpx_exceptions(
|
|
451
|
-
status_codes:
|
|
452
|
-
) ->
|
|
451
|
+
status_codes: list[int] | None = None,
|
|
452
|
+
) -> dict[Type[Exception], Callable[[Any], bool]]:
|
|
453
453
|
"""
|
|
454
454
|
Retry exceptions from using the ``httpx`` library. This will retry all connection and HTTP errors matching
|
|
455
455
|
the given status codes.
|
|
@@ -482,8 +482,8 @@ def httpx_exceptions(
|
|
|
482
482
|
|
|
483
483
|
|
|
484
484
|
def cognite_exceptions(
|
|
485
|
-
status_codes:
|
|
486
|
-
) ->
|
|
485
|
+
status_codes: list[int] | None = None,
|
|
486
|
+
) -> dict[Type[Exception], Callable[[Any], bool]]:
|
|
487
487
|
"""
|
|
488
488
|
Retry exceptions from using the Cognite SDK. This will retry all connection and HTTP errors matching
|
|
489
489
|
the given status codes.
|
|
@@ -569,9 +569,7 @@ def truncate_byte_len(item: str, ln: int) -> str:
|
|
|
569
569
|
|
|
570
570
|
|
|
571
571
|
class BufferedReadWithLength(io.BufferedReader):
|
|
572
|
-
def __init__(
|
|
573
|
-
self, raw: RawIOBase, buffer_size: int, len: int, on_close: Optional[Callable[[], None]] = None
|
|
574
|
-
) -> None:
|
|
572
|
+
def __init__(self, raw: RawIOBase, buffer_size: int, len: int, on_close: Callable[[], None] | None = None) -> None:
|
|
575
573
|
super().__init__(raw, buffer_size)
|
|
576
574
|
# Do not remove even if it appears to be unused. :P
|
|
577
575
|
# Requests uses this to add the content-length header, which is necessary for writing to files in azure clusters
|
|
@@ -588,7 +586,7 @@ def iterable_to_stream(
|
|
|
588
586
|
iterator: Iterable[bytes],
|
|
589
587
|
file_size_bytes: int,
|
|
590
588
|
buffer_size: int = io.DEFAULT_BUFFER_SIZE,
|
|
591
|
-
on_close:
|
|
589
|
+
on_close: Callable[[], None] | None = None,
|
|
592
590
|
) -> BufferedReadWithLength:
|
|
593
591
|
class ChunkIteratorStream(io.RawIOBase):
|
|
594
592
|
def __init__(self) -> None:
|
{cognite_extractor_utils-7.5.4.dist-info → cognite_extractor_utils-7.5.6.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: cognite-extractor-utils
|
|
3
|
-
Version: 7.5.
|
|
3
|
+
Version: 7.5.6
|
|
4
4
|
Summary: Utilities for easier development of extractors for CDF
|
|
5
5
|
Home-page: https://github.com/cognitedata/python-extractor-utils
|
|
6
6
|
License: Apache-2.0
|
|
@@ -19,10 +19,11 @@ Requires-Dist: arrow (>=1.0.0,<2.0.0)
|
|
|
19
19
|
Requires-Dist: azure-identity (>=1.14.0,<2.0.0)
|
|
20
20
|
Requires-Dist: azure-keyvault-secrets (>=4.7.0,<5.0.0)
|
|
21
21
|
Requires-Dist: cognite-sdk (>=7.59.0,<8.0.0)
|
|
22
|
-
Requires-Dist: croniter (>=
|
|
22
|
+
Requires-Dist: croniter (>=6.0.0,<7.0.0)
|
|
23
23
|
Requires-Dist: dacite (>=1.6.0,<2.0.0)
|
|
24
24
|
Requires-Dist: decorator (>=5.1.1,<6.0.0)
|
|
25
25
|
Requires-Dist: httpx (>=0.27.0,<0.28.0)
|
|
26
|
+
Requires-Dist: jsonlines (>=4.0.0,<5.0.0)
|
|
26
27
|
Requires-Dist: more-itertools (>=10.0.0,<11.0.0)
|
|
27
28
|
Requires-Dist: orjson (>=3.10.3,<4.0.0)
|
|
28
29
|
Requires-Dist: prometheus-client (>0.7.0,<=1.0.0)
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
cognite/extractorutils/__init__.py,sha256=uWu9rc8gh485FrNT-ocKmSQwkxPzFo7L9ac103j7E-s,764
|
|
2
|
+
cognite/extractorutils/_inner_util.py,sha256=cdoz9Sl3Wt1IsxiCZlcd913_hKrTCxDRrM_L-Zn1_F8,1800
|
|
3
|
+
cognite/extractorutils/base.py,sha256=pV3xy0Dzt8q9I5DvI-TvmRZXMmSTk8Kk-d0jZWa_ua8,16333
|
|
4
|
+
cognite/extractorutils/configtools/__init__.py,sha256=llNMzHu4yCWx5Kjm8G9IN5Pij8OUaVT_VZuZ2r3JtAA,3616
|
|
5
|
+
cognite/extractorutils/configtools/_util.py,sha256=uXpR8YnEkfeZOuaZGjRRk_wgC5AGOEKNWMYfV50atsc,4746
|
|
6
|
+
cognite/extractorutils/configtools/elements.py,sha256=ti3PFmwHyiFJFXNEzObRY6IxQo18LABSsYafPxuoYSU,26590
|
|
7
|
+
cognite/extractorutils/configtools/loaders.py,sha256=w8NoZcZJZbEctvkTq8aG_UH2x2gct_fpb2KenksmVaQ,18294
|
|
8
|
+
cognite/extractorutils/configtools/validators.py,sha256=xug3GOMIO4NOdyyvXtYlpKyq9wuDtGf7-xqIefD5bIo,1016
|
|
9
|
+
cognite/extractorutils/exceptions.py,sha256=NDmiElg1cmGMwIl82kpCDF37UcAFNnfDK9NxUn_u2rk,1149
|
|
10
|
+
cognite/extractorutils/metrics.py,sha256=-sUBaZ7lNrcdxuQcsh7rU-CwMNTqlT3DiMRyn5CxPTQ,15422
|
|
11
|
+
cognite/extractorutils/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
cognite/extractorutils/statestore/__init__.py,sha256=hV3r11FUXkH6-60Ct6zLSROMNVrEeiE3Shmkf28Q-co,359
|
|
13
|
+
cognite/extractorutils/statestore/_base.py,sha256=mWdFk4EZl886V6uXRj4O2sv2_ANJ3Sigmgeql-XEsmc,2675
|
|
14
|
+
cognite/extractorutils/statestore/hashing.py,sha256=Le6PUpLYV7kTKgO2nc5BKCEf-3LTXoGzEVzLtw8tkn0,8011
|
|
15
|
+
cognite/extractorutils/statestore/watermark.py,sha256=U_cA0XlqkgMML-ZeEl13KE8KjQHsId5t7mMHibRhUyA,16713
|
|
16
|
+
cognite/extractorutils/threading.py,sha256=RN9oEXO6N2RqYKThFoDqzSeo593hkzTVePK1KSVOu3A,3586
|
|
17
|
+
cognite/extractorutils/unstable/__init__.py,sha256=L6nqJHjylpk67CE-PbXJyb_TBI4yjhEYEz9J9WShDfM,341
|
|
18
|
+
cognite/extractorutils/unstable/configuration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
+
cognite/extractorutils/unstable/configuration/exceptions.py,sha256=-cziC11IbUP308ldbAYoQn4x2SNCIxYanN2eIV1n9To,654
|
|
20
|
+
cognite/extractorutils/unstable/configuration/loaders.py,sha256=iMlCx6abKaDHx5-nOQSRtf-creqJPv1QrnbapCaIZkA,3689
|
|
21
|
+
cognite/extractorutils/unstable/configuration/models.py,sha256=jFlA5eEeNRq39KEwAjZV9UkbV2juVUHANNeXq0VtqL4,8210
|
|
22
|
+
cognite/extractorutils/unstable/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
|
+
cognite/extractorutils/unstable/core/_dto.py,sha256=tvvy39cvf-QT28GWz5FpqxQ5vAVk0t69JoPPhpWlweY,1293
|
|
24
|
+
cognite/extractorutils/unstable/core/_messaging.py,sha256=D9rOW8fijryXffbm90d8VTf2vy5FmwVGU-H0O-cn-EI,68
|
|
25
|
+
cognite/extractorutils/unstable/core/base.py,sha256=QljO7Zpn5RSTEI9PHIavhKWdr4Hp-Ni5tdmsQ_ocOLk,12190
|
|
26
|
+
cognite/extractorutils/unstable/core/errors.py,sha256=D8QAaqwJec62ZbhBNC0flmKjw_EdHLKGn8npqtPQhZE,1706
|
|
27
|
+
cognite/extractorutils/unstable/core/restart_policy.py,sha256=SodG2Gs9Es05yk3EbAAWY_sbSoBUmhTRrUMBR4BSQbQ,622
|
|
28
|
+
cognite/extractorutils/unstable/core/runtime.py,sha256=sb8ouTCZqvzpns_8UpVwPd4nGnfinf7vsVvOk23jksQ,11834
|
|
29
|
+
cognite/extractorutils/unstable/core/tasks.py,sha256=K3R40sNSqYJ1Oc0UMTUDF4lY_WaZ7HokvZ5kctDsjGQ,585
|
|
30
|
+
cognite/extractorutils/unstable/scheduling/__init__.py,sha256=L90_rCZNHvti-PInne0r7W9edIkifctELjiaxEoQiSc,67
|
|
31
|
+
cognite/extractorutils/unstable/scheduling/_scheduler.py,sha256=tzu3-olhBU8uFDYj-Q6mEJUVBVin8wSGJONJVrNP3NE,3694
|
|
32
|
+
cognite/extractorutils/unstable/scheduling/_schedules.py,sha256=y0NVeXYZOFcAyzBgAe8jqK0W-SZL5m99UwXAacGzqIw,677
|
|
33
|
+
cognite/extractorutils/uploader/__init__.py,sha256=MgyvZojwLE-oUCZ0VALISd2rUCqShlyozxhzAKX5uj4,3396
|
|
34
|
+
cognite/extractorutils/uploader/_base.py,sha256=JPr5Dp25XYzwN4MJ2ddd-xhPg5kVV3jASNecD8sAaKs,5273
|
|
35
|
+
cognite/extractorutils/uploader/_metrics.py,sha256=J2LJXb19L_SLSJ_voNIQHYLp0pjxUKevpH1q_xKX6Hk,3247
|
|
36
|
+
cognite/extractorutils/uploader/assets.py,sha256=SDX48xjqIT4tbQ9HtaIgQT8bw61XHJGic5ofZJeK7UE,5692
|
|
37
|
+
cognite/extractorutils/uploader/data_modeling.py,sha256=Vd9eDWE-KPICChtxcKZdFcH3mSbavD8s1627wXxF_SI,3593
|
|
38
|
+
cognite/extractorutils/uploader/events.py,sha256=qo1rVhk3eUfcbNLauZfvBohQ2aFRazbyGuMFcU-UyQ8,5640
|
|
39
|
+
cognite/extractorutils/uploader/files.py,sha256=3VH8lsZmPL4TI3r_mIzTf8T2YmYc3kAtyBeo_4g9zP0,26610
|
|
40
|
+
cognite/extractorutils/uploader/raw.py,sha256=VMYfeZN8XAHfZ77AuGcL85bIWvhaO7-Whx_marnGAmQ,6692
|
|
41
|
+
cognite/extractorutils/uploader/time_series.py,sha256=yBN7ppD5hg0CgUIw7WvhhAPyOj0gbIWG4_-ifPaAuOE,26575
|
|
42
|
+
cognite/extractorutils/uploader/upload_failure_handler.py,sha256=Oj3xDK_qlGQdEOzswE-6ti7tDAQXR0Rvee3lg6KBg3s,2000
|
|
43
|
+
cognite/extractorutils/uploader_extractor.py,sha256=X71M_7JcGMwC3kHMETmTF8cdjSQwZaNmIGlT-mBs3Pk,7687
|
|
44
|
+
cognite/extractorutils/uploader_types.py,sha256=eLKFQJT53zpn9_3-SDUtgHUMASGdK7c85HWrLWEF-JE,865
|
|
45
|
+
cognite/extractorutils/util.py,sha256=TL3fkHlvPqWjdyr4yorq5LNJbPxJSom69HKyeQM92xE,21042
|
|
46
|
+
cognite_extractor_utils-7.5.6.dist-info/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
|
|
47
|
+
cognite_extractor_utils-7.5.6.dist-info/METADATA,sha256=A1Sc24JpE_1afcPP8Dor7_f6KJb6NQ5dSY6_12Zswfk,5691
|
|
48
|
+
cognite_extractor_utils-7.5.6.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
|
|
49
|
+
cognite_extractor_utils-7.5.6.dist-info/RECORD,,
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Example of how you would build an extractor with the new base class
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
from cognite.extractorutils.unstable.configuration.models import ExtractorConfig
|
|
6
|
-
|
|
7
|
-
from .base import Extractor
|
|
8
|
-
from .runtime import Runtime
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class MyConfig(ExtractorConfig):
|
|
12
|
-
parameter_one: int
|
|
13
|
-
parameter_two: str
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class MyExtractor(Extractor[MyConfig]):
|
|
17
|
-
NAME = "Test extractor"
|
|
18
|
-
EXTERNAL_ID = "test-extractor"
|
|
19
|
-
DESCRIPTION = "Test of the new runtime"
|
|
20
|
-
VERSION = "1.0.0"
|
|
21
|
-
CONFIG_TYPE = MyConfig
|
|
22
|
-
|
|
23
|
-
def run(self) -> None:
|
|
24
|
-
self.logger.info("Started!")
|
|
25
|
-
if not self.cancellation_token.wait(10):
|
|
26
|
-
raise ValueError("Oops")
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
if __name__ == "__main__":
|
|
30
|
-
runtime = Runtime(MyExtractor)
|
|
31
|
-
runtime.run()
|