cognite-extractor-utils 7.5.14__py3-none-any.whl → 7.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- cognite/extractorutils/__init__.py +1 -1
- cognite/extractorutils/_inner_util.py +1 -1
- cognite/extractorutils/base.py +120 -40
- cognite/extractorutils/configtools/__init__.py +4 -5
- cognite/extractorutils/configtools/_util.py +3 -2
- cognite/extractorutils/configtools/elements.py +206 -33
- cognite/extractorutils/configtools/loaders.py +68 -16
- cognite/extractorutils/configtools/validators.py +5 -1
- cognite/extractorutils/exceptions.py +11 -2
- cognite/extractorutils/metrics.py +17 -12
- cognite/extractorutils/statestore/__init__.py +77 -3
- cognite/extractorutils/statestore/_base.py +7 -3
- cognite/extractorutils/statestore/hashing.py +129 -15
- cognite/extractorutils/statestore/watermark.py +77 -87
- cognite/extractorutils/threading.py +30 -4
- cognite/extractorutils/unstable/__init__.py +5 -5
- cognite/extractorutils/unstable/configuration/__init__.py +3 -0
- cognite/extractorutils/unstable/configuration/exceptions.py +13 -2
- cognite/extractorutils/unstable/configuration/loaders.py +78 -13
- cognite/extractorutils/unstable/configuration/models.py +121 -7
- cognite/extractorutils/unstable/core/__init__.py +5 -0
- cognite/extractorutils/unstable/core/_dto.py +5 -3
- cognite/extractorutils/unstable/core/base.py +113 -4
- cognite/extractorutils/unstable/core/errors.py +41 -0
- cognite/extractorutils/unstable/core/logger.py +149 -0
- cognite/extractorutils/unstable/core/restart_policy.py +16 -2
- cognite/extractorutils/unstable/core/runtime.py +44 -6
- cognite/extractorutils/unstable/core/tasks.py +53 -1
- cognite/extractorutils/unstable/scheduling/__init__.py +13 -0
- cognite/extractorutils/unstable/scheduling/_scheduler.py +1 -1
- cognite/extractorutils/uploader/__init__.py +7 -5
- cognite/extractorutils/uploader/_base.py +4 -5
- cognite/extractorutils/uploader/assets.py +13 -8
- cognite/extractorutils/uploader/data_modeling.py +37 -2
- cognite/extractorutils/uploader/events.py +14 -9
- cognite/extractorutils/uploader/files.py +80 -21
- cognite/extractorutils/uploader/raw.py +12 -7
- cognite/extractorutils/uploader/time_series.py +58 -49
- cognite/extractorutils/uploader/upload_failure_handler.py +35 -2
- cognite/extractorutils/uploader_extractor.py +29 -6
- cognite/extractorutils/uploader_types.py +15 -1
- cognite/extractorutils/util.py +76 -23
- {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.6.0.dist-info}/METADATA +1 -1
- cognite_extractor_utils-7.6.0.dist-info/RECORD +50 -0
- cognite_extractor_utils-7.5.14.dist-info/RECORD +0 -50
- {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.6.0.dist-info}/WHEEL +0 -0
- {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.6.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Upload queue for time series and sequences.
|
|
3
|
+
"""
|
|
1
4
|
# Copyright 2023 Cognite AS
|
|
2
5
|
#
|
|
3
6
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -81,7 +84,7 @@ def default_time_series_factory(external_id: str, datapoints: DataPointList) ->
|
|
|
81
84
|
|
|
82
85
|
class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
83
86
|
"""
|
|
84
|
-
Upload queue for time series
|
|
87
|
+
Upload queue for time series.
|
|
85
88
|
|
|
86
89
|
Args:
|
|
87
90
|
cdf_client: Cognite Data Fusion client to use
|
|
@@ -141,7 +144,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
141
144
|
self.data_set_id = data_set_id
|
|
142
145
|
|
|
143
146
|
def _verify_datapoint_time(self, time: int | float | datetime | str) -> bool:
|
|
144
|
-
if isinstance(time, int
|
|
147
|
+
if isinstance(time, int | float):
|
|
145
148
|
return not math.isnan(time) and time >= MIN_DATAPOINT_TIMESTAMP
|
|
146
149
|
elif isinstance(time, str):
|
|
147
150
|
return False
|
|
@@ -155,10 +158,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
155
158
|
)
|
|
156
159
|
elif isinstance(value, str):
|
|
157
160
|
return len(value) <= MAX_DATAPOINT_STRING_LENGTH
|
|
158
|
-
|
|
159
|
-
return False
|
|
160
|
-
else:
|
|
161
|
-
return True
|
|
161
|
+
return not isinstance(value, datetime)
|
|
162
162
|
|
|
163
163
|
def _is_datapoint_valid(
|
|
164
164
|
self,
|
|
@@ -172,11 +172,16 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
172
172
|
return True
|
|
173
173
|
|
|
174
174
|
def add_to_upload_queue(
|
|
175
|
-
self,
|
|
175
|
+
self,
|
|
176
|
+
*,
|
|
177
|
+
id: int | None = None, # noqa: A002
|
|
178
|
+
external_id: str | None = None,
|
|
179
|
+
datapoints: DataPointList | None = None,
|
|
176
180
|
) -> None:
|
|
177
181
|
"""
|
|
178
|
-
Add data points to upload queue.
|
|
179
|
-
|
|
182
|
+
Add data points to upload queue.
|
|
183
|
+
|
|
184
|
+
The queue will be uploaded if the queue size is larger than the threshold specified in the ``__init__``.
|
|
180
185
|
|
|
181
186
|
Args:
|
|
182
187
|
id: Internal ID of time series. Either this or external_id must be set.
|
|
@@ -209,7 +214,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
209
214
|
|
|
210
215
|
def upload(self) -> None:
|
|
211
216
|
"""
|
|
212
|
-
Trigger an upload of the queue, clears queue afterwards
|
|
217
|
+
Trigger an upload of the queue, clears queue afterwards.
|
|
213
218
|
"""
|
|
214
219
|
|
|
215
220
|
@retry(
|
|
@@ -239,9 +244,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
239
244
|
|
|
240
245
|
if self.create_missing:
|
|
241
246
|
# Get the time series that can be created
|
|
242
|
-
create_these_ids =
|
|
243
|
-
[id_dict["externalId"] for id_dict in ex.not_found if "externalId" in id_dict]
|
|
244
|
-
)
|
|
247
|
+
create_these_ids = {id_dict["externalId"] for id_dict in ex.not_found if "externalId" in id_dict}
|
|
245
248
|
datapoints_lists: dict[str, DataPointList] = {
|
|
246
249
|
ts_dict["externalId"]: ts_dict["datapoints"]
|
|
247
250
|
for ts_dict in upload_this
|
|
@@ -294,7 +297,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
294
297
|
]
|
|
295
298
|
)
|
|
296
299
|
|
|
297
|
-
for
|
|
300
|
+
for datapoints in self.upload_queue.values():
|
|
298
301
|
self.points_written.inc(len(datapoints))
|
|
299
302
|
|
|
300
303
|
try:
|
|
@@ -309,7 +312,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
309
312
|
|
|
310
313
|
def __enter__(self) -> "TimeSeriesUploadQueue":
|
|
311
314
|
"""
|
|
312
|
-
Wraps around start method, for use as context manager
|
|
315
|
+
Wraps around start method, for use as context manager.
|
|
313
316
|
|
|
314
317
|
Returns:
|
|
315
318
|
self
|
|
@@ -321,7 +324,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
321
324
|
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
|
|
322
325
|
) -> None:
|
|
323
326
|
"""
|
|
324
|
-
Wraps around stop method, for use as context manager
|
|
327
|
+
Wraps around stop method, for use as context manager.
|
|
325
328
|
|
|
326
329
|
Args:
|
|
327
330
|
exc_type: Exception type
|
|
@@ -332,7 +335,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
332
335
|
|
|
333
336
|
def __len__(self) -> int:
|
|
334
337
|
"""
|
|
335
|
-
The size of the upload queue
|
|
338
|
+
The size of the upload queue.
|
|
336
339
|
|
|
337
340
|
Returns:
|
|
338
341
|
Number of data points in queue
|
|
@@ -341,6 +344,21 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
341
344
|
|
|
342
345
|
|
|
343
346
|
class SequenceUploadQueue(AbstractUploadQueue):
|
|
347
|
+
"""
|
|
348
|
+
Upload queue for sequences.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
cdf_client: Cognite Data Fusion client to use
|
|
352
|
+
post_upload_function: A function that will be called after each upload. The function will be given one
|
|
353
|
+
argument: A list of the events that were uploaded.
|
|
354
|
+
max_queue_size: Maximum size of upload queue. Defaults to no max size.
|
|
355
|
+
max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
|
|
356
|
+
methods).
|
|
357
|
+
trigger_log_level: Log level to log upload triggers to.
|
|
358
|
+
thread_name: Thread name of uploader thread.
|
|
359
|
+
create_missing: Create missing sequences if possible (ie, if external id is used).
|
|
360
|
+
"""
|
|
361
|
+
|
|
344
362
|
def __init__(
|
|
345
363
|
self,
|
|
346
364
|
cdf_client: CogniteClient,
|
|
@@ -352,19 +370,6 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
352
370
|
create_missing: bool = False,
|
|
353
371
|
cancellation_token: CancellationToken | None = None,
|
|
354
372
|
):
|
|
355
|
-
"""
|
|
356
|
-
Args:
|
|
357
|
-
cdf_client: Cognite Data Fusion client to use
|
|
358
|
-
post_upload_function: A function that will be called after each upload. The function will be given one
|
|
359
|
-
argument: A list of the events that were uploaded.
|
|
360
|
-
max_queue_size: Maximum size of upload queue. Defaults to no max size.
|
|
361
|
-
max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
|
|
362
|
-
methods).
|
|
363
|
-
trigger_log_level: Log level to log upload triggers to.
|
|
364
|
-
thread_name: Thread name of uploader thread.
|
|
365
|
-
create_missing: Create missing sequences if possible (ie, if external id is used)
|
|
366
|
-
"""
|
|
367
|
-
|
|
368
373
|
# Super sets post_upload and threshold
|
|
369
374
|
super().__init__(
|
|
370
375
|
cdf_client,
|
|
@@ -393,7 +398,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
393
398
|
def set_sequence_metadata(
|
|
394
399
|
self,
|
|
395
400
|
metadata: dict[str, str | int | float],
|
|
396
|
-
id: int | None = None,
|
|
401
|
+
id: int | None = None, # noqa: A002
|
|
397
402
|
external_id: str | None = None,
|
|
398
403
|
asset_external_id: str | None = None,
|
|
399
404
|
dataset_external_id: str | None = None,
|
|
@@ -401,8 +406,10 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
401
406
|
description: str | None = None,
|
|
402
407
|
) -> None:
|
|
403
408
|
"""
|
|
404
|
-
Set sequence metadata.
|
|
405
|
-
|
|
409
|
+
Set sequence metadata.
|
|
410
|
+
|
|
411
|
+
Metadata will be cached until the sequence is created. The metadata will be updated if the sequence already
|
|
412
|
+
exists.
|
|
406
413
|
|
|
407
414
|
Args:
|
|
408
415
|
metadata: Sequence metadata
|
|
@@ -427,10 +434,13 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
427
434
|
self.sequence_descriptions[either_id] = description
|
|
428
435
|
|
|
429
436
|
def set_sequence_column_definition(
|
|
430
|
-
self,
|
|
437
|
+
self,
|
|
438
|
+
col_def: list[dict[str, str]],
|
|
439
|
+
id: int | None = None, # noqa: A002
|
|
440
|
+
external_id: str | None = None,
|
|
431
441
|
) -> None:
|
|
432
442
|
"""
|
|
433
|
-
Set sequence column definition
|
|
443
|
+
Set sequence column definition.
|
|
434
444
|
|
|
435
445
|
Args:
|
|
436
446
|
col_def: Sequence column definition
|
|
@@ -450,12 +460,13 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
450
460
|
| SequenceData
|
|
451
461
|
| SequenceRows,
|
|
452
462
|
column_external_ids: list[dict] | None = None,
|
|
453
|
-
id: int | None = None,
|
|
463
|
+
id: int | None = None, # noqa: A002
|
|
454
464
|
external_id: str | None = None,
|
|
455
465
|
) -> None:
|
|
456
466
|
"""
|
|
457
|
-
Add sequence rows to upload queue.
|
|
458
|
-
|
|
467
|
+
Add sequence rows to upload queue.
|
|
468
|
+
|
|
469
|
+
Mirrors implementation of SequenceApi.insert. Inserted rows will be cached until uploaded.
|
|
459
470
|
|
|
460
471
|
Args:
|
|
461
472
|
rows: The rows to be inserted. Can either be a list of tuples, a list of ["rownumber": ..., "values": ...]
|
|
@@ -466,7 +477,6 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
466
477
|
external_id: Sequence external ID
|
|
467
478
|
Us if id is None
|
|
468
479
|
"""
|
|
469
|
-
|
|
470
480
|
if len(rows) == 0:
|
|
471
481
|
pass
|
|
472
482
|
|
|
@@ -509,7 +519,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
509
519
|
|
|
510
520
|
def upload(self) -> None:
|
|
511
521
|
"""
|
|
512
|
-
Trigger an upload of the queue, clears queue afterwards
|
|
522
|
+
Trigger an upload of the queue, clears queue afterwards.
|
|
513
523
|
"""
|
|
514
524
|
|
|
515
525
|
@retry(
|
|
@@ -571,15 +581,14 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
571
581
|
|
|
572
582
|
def _create_or_update(self, either_id: EitherId) -> None:
|
|
573
583
|
"""
|
|
574
|
-
Create or update sequence, based on provided metadata and column definitions
|
|
584
|
+
Create or update sequence, based on provided metadata and column definitions.
|
|
575
585
|
|
|
576
586
|
Args:
|
|
577
587
|
either_id: Id/External Id of sequence to be updated
|
|
578
588
|
"""
|
|
579
|
-
|
|
580
589
|
column_def = self.column_definitions.get(either_id)
|
|
581
590
|
if column_def is None:
|
|
582
|
-
self.logger.error(f"Can't create sequence {
|
|
591
|
+
self.logger.error(f"Can't create sequence {either_id!s}, no column definitions provided")
|
|
583
592
|
|
|
584
593
|
try:
|
|
585
594
|
seq = self.cdf_client.sequences.create(
|
|
@@ -596,7 +605,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
596
605
|
)
|
|
597
606
|
|
|
598
607
|
except CogniteDuplicatedError:
|
|
599
|
-
self.logger.info(f"
|
|
608
|
+
self.logger.info(f"Sequence already exist: {either_id}")
|
|
600
609
|
seq = self.cdf_client.sequences.retrieve( # type: ignore [assignment]
|
|
601
610
|
id=either_id.internal_id,
|
|
602
611
|
external_id=either_id.external_id,
|
|
@@ -608,7 +617,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
608
617
|
|
|
609
618
|
def _resolve_asset_ids(self) -> None:
|
|
610
619
|
"""
|
|
611
|
-
Resolve id of assets if specified, for use in sequence creation
|
|
620
|
+
Resolve id of assets if specified, for use in sequence creation.
|
|
612
621
|
"""
|
|
613
622
|
assets = set(self.sequence_asset_external_ids.values())
|
|
614
623
|
assets.discard(None) # type: ignore # safeguard, remove Nones if any
|
|
@@ -628,7 +637,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
628
637
|
|
|
629
638
|
def _resolve_dataset_ids(self) -> None:
|
|
630
639
|
"""
|
|
631
|
-
Resolve id of datasets if specified, for use in sequence creation
|
|
640
|
+
Resolve id of datasets if specified, for use in sequence creation.
|
|
632
641
|
"""
|
|
633
642
|
datasets = set(self.sequence_dataset_external_ids.values())
|
|
634
643
|
datasets.discard(None) # type: ignore # safeguard, remove Nones if any
|
|
@@ -648,7 +657,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
648
657
|
|
|
649
658
|
def __enter__(self) -> "SequenceUploadQueue":
|
|
650
659
|
"""
|
|
651
|
-
Wraps around start method, for use as context manager
|
|
660
|
+
Wraps around start method, for use as context manager.
|
|
652
661
|
|
|
653
662
|
Returns:
|
|
654
663
|
self
|
|
@@ -660,7 +669,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
660
669
|
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
|
|
661
670
|
) -> None:
|
|
662
671
|
"""
|
|
663
|
-
Wraps around stop method, for use as context manager
|
|
672
|
+
Wraps around stop method, for use as context manager.
|
|
664
673
|
|
|
665
674
|
Args:
|
|
666
675
|
exc_type: Exception type
|
|
@@ -671,7 +680,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
671
680
|
|
|
672
681
|
def __len__(self) -> int:
|
|
673
682
|
"""
|
|
674
|
-
The size of the upload queue
|
|
683
|
+
The size of the upload queue.
|
|
675
684
|
|
|
676
685
|
Returns:
|
|
677
686
|
Number of data points in queue
|
|
@@ -1,19 +1,34 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module provides a mechanism to handle file upload failures by logging details to a newline delimited JSON file.
|
|
3
|
+
"""
|
|
4
|
+
|
|
1
5
|
from collections.abc import Iterator
|
|
2
|
-
from datetime import datetime
|
|
6
|
+
from datetime import datetime, timezone
|
|
3
7
|
|
|
4
8
|
import jsonlines
|
|
5
9
|
|
|
6
10
|
|
|
7
11
|
class FileErrorMapping:
|
|
12
|
+
"""
|
|
13
|
+
A class to represent a mapping of file name to its error reason.
|
|
14
|
+
"""
|
|
15
|
+
|
|
8
16
|
def __init__(self, file_name: str, error_reason: str) -> None:
|
|
9
17
|
self.file_name = file_name
|
|
10
18
|
self.error_reason = error_reason
|
|
11
19
|
|
|
12
20
|
def __iter__(self) -> Iterator[list[str]]:
|
|
21
|
+
"""
|
|
22
|
+
Returns an single-item iterator containing the file name and error reason.
|
|
23
|
+
"""
|
|
13
24
|
return iter([[self.file_name, self.error_reason]])
|
|
14
25
|
|
|
15
26
|
|
|
16
27
|
class FileFailureManager:
|
|
28
|
+
"""
|
|
29
|
+
A class to manage file upload failures by logging them to a newline delimited JSON file.
|
|
30
|
+
"""
|
|
31
|
+
|
|
17
32
|
MAX_QUEUE_SIZE = 500
|
|
18
33
|
START_TIME_KEY = "start_time"
|
|
19
34
|
FILE_REASON_MAP_KEY = "file_error_reason_map"
|
|
@@ -22,7 +37,7 @@ class FileFailureManager:
|
|
|
22
37
|
self.failure_logs: dict[str, str] = {}
|
|
23
38
|
|
|
24
39
|
self.path_to_failure_log: str = self._pre_process_file_extension(path_to_file)
|
|
25
|
-
self.start_time = start_time or str(datetime.now())
|
|
40
|
+
self.start_time = start_time or str(datetime.now(tz=timezone.utc))
|
|
26
41
|
self._initialize_failure_logs()
|
|
27
42
|
|
|
28
43
|
def _pre_process_file_extension(self, path_to_file: str | None) -> str:
|
|
@@ -34,13 +49,28 @@ class FileFailureManager:
|
|
|
34
49
|
self.failure_logs = {}
|
|
35
50
|
|
|
36
51
|
def __len__(self) -> int:
|
|
52
|
+
"""
|
|
53
|
+
Returns the number of failure logs currently stored.
|
|
54
|
+
"""
|
|
37
55
|
return len(self.failure_logs)
|
|
38
56
|
|
|
39
57
|
def clear(self) -> None:
|
|
58
|
+
"""
|
|
59
|
+
Clears the queue of failure logs.
|
|
60
|
+
"""
|
|
40
61
|
self.failure_logs.clear()
|
|
41
62
|
self._initialize_failure_logs()
|
|
42
63
|
|
|
43
64
|
def add(self, file_name: str, error_reason: str) -> None:
|
|
65
|
+
"""
|
|
66
|
+
Adds a file name and its error reason to the failure logs.
|
|
67
|
+
|
|
68
|
+
If the number of logs exceeds the maximum queue size, it writes the logs to a file.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
file_name: The name of the file that failed to upload.
|
|
72
|
+
error_reason: The reason for the failure.
|
|
73
|
+
"""
|
|
44
74
|
error_file_object = FileErrorMapping(file_name=file_name, error_reason=error_reason)
|
|
45
75
|
error_file_dict = dict(error_file_object)
|
|
46
76
|
|
|
@@ -50,6 +80,9 @@ class FileFailureManager:
|
|
|
50
80
|
self.write_to_file()
|
|
51
81
|
|
|
52
82
|
def write_to_file(self) -> None:
|
|
83
|
+
"""
|
|
84
|
+
Flushes the current failure logs to a newline delimited JSON file and clears the queue.
|
|
85
|
+
"""
|
|
53
86
|
if len(self) == 0:
|
|
54
87
|
return
|
|
55
88
|
|
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DEPRECATED. Use the normal base class and instantiate the upload queues manually.
|
|
3
|
+
|
|
4
|
+
A module containing a version of the Extractor class with pre-defined upload queues.
|
|
5
|
+
"""
|
|
6
|
+
|
|
1
7
|
# Copyright 2022 Cognite AS
|
|
2
8
|
#
|
|
3
9
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -12,12 +18,8 @@
|
|
|
12
18
|
# See the License for the specific language governing permissions and
|
|
13
19
|
# limitations under the License.
|
|
14
20
|
|
|
15
|
-
"""
|
|
16
|
-
A module containing a slightly more advanced base extractor class, sorting a generic output into upload queues.
|
|
17
|
-
"""
|
|
18
|
-
|
|
19
21
|
from collections.abc import Callable, Iterable
|
|
20
|
-
from dataclasses import dataclass
|
|
22
|
+
from dataclasses import dataclass, field
|
|
21
23
|
from types import TracebackType
|
|
22
24
|
from typing import Any, TypeVar
|
|
23
25
|
|
|
@@ -35,14 +37,22 @@ from cognite.extractorutils.uploader_types import CdfTypes, Event, InsertDatapoi
|
|
|
35
37
|
|
|
36
38
|
@dataclass
|
|
37
39
|
class QueueConfigClass:
|
|
40
|
+
"""
|
|
41
|
+
Configuration for several upload queues.
|
|
42
|
+
"""
|
|
43
|
+
|
|
38
44
|
event_size: int = 10_000
|
|
39
45
|
raw_size: int = 50_000
|
|
40
46
|
timeseries_size: int = 1_000_000
|
|
41
|
-
upload_interval: TimeIntervalConfig = TimeIntervalConfig("1m")
|
|
47
|
+
upload_interval: TimeIntervalConfig = field(default_factory=lambda: TimeIntervalConfig("1m"))
|
|
42
48
|
|
|
43
49
|
|
|
44
50
|
@dataclass
|
|
45
51
|
class UploaderExtractorConfig(BaseConfig):
|
|
52
|
+
"""
|
|
53
|
+
Base configuration for the UploaderExtractor.
|
|
54
|
+
"""
|
|
55
|
+
|
|
46
56
|
queues: QueueConfigClass | None
|
|
47
57
|
|
|
48
58
|
|
|
@@ -108,6 +118,13 @@ class UploaderExtractor(Extractor[UploaderExtractorConfigClass]):
|
|
|
108
118
|
self.middleware = middleware if isinstance(middleware, list) else []
|
|
109
119
|
|
|
110
120
|
def handle_output(self, output: CdfTypes) -> None:
|
|
121
|
+
"""
|
|
122
|
+
Handle the output of the extractor and sort it into appropriate upload queues.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
output: The output from the extractor, which can be an Event, RawRow, InsertDatapoints, or an iterable of
|
|
126
|
+
these types.
|
|
127
|
+
"""
|
|
111
128
|
list_output = [output] if not isinstance(output, Iterable) else output
|
|
112
129
|
peekable_output = peekable(list_output)
|
|
113
130
|
|
|
@@ -145,6 +162,9 @@ class UploaderExtractor(Extractor[UploaderExtractorConfigClass]):
|
|
|
145
162
|
return item
|
|
146
163
|
|
|
147
164
|
def __enter__(self) -> "UploaderExtractor":
|
|
165
|
+
"""
|
|
166
|
+
Initializes the upload queues and returns the extractor instance.
|
|
167
|
+
"""
|
|
148
168
|
super().__enter__()
|
|
149
169
|
|
|
150
170
|
queue_config = self.config.queues if self.config.queues else QueueConfigClass()
|
|
@@ -173,6 +193,9 @@ class UploaderExtractor(Extractor[UploaderExtractorConfigClass]):
|
|
|
173
193
|
def __exit__(
|
|
174
194
|
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
|
|
175
195
|
) -> bool:
|
|
196
|
+
"""
|
|
197
|
+
Waits for the upload queues and exits the extractor context.
|
|
198
|
+
"""
|
|
176
199
|
self.event_queue.__exit__(exc_type, exc_val, exc_tb)
|
|
177
200
|
self.raw_queue.__exit__(exc_type, exc_val, exc_tb)
|
|
178
201
|
self.time_series_queue.__exit__(exc_type, exc_val, exc_tb)
|
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DEPRECATED: This module is deprecated and will be removed in a future release.
|
|
3
|
+
|
|
4
|
+
These types are used in the UploaderExtractor, as well as the REST and MQTT extensions for the extractorutils library.
|
|
5
|
+
"""
|
|
6
|
+
|
|
1
7
|
from collections.abc import Iterable
|
|
2
8
|
from typing import TypeAlias
|
|
3
9
|
|
|
@@ -7,13 +13,21 @@ from cognite.extractorutils.uploader.time_series import DataPoint
|
|
|
7
13
|
|
|
8
14
|
|
|
9
15
|
class InsertDatapoints:
|
|
10
|
-
|
|
16
|
+
"""
|
|
17
|
+
A class representing a batch of datapoints to be inserted into a time series.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, *, id: int | None = None, external_id: str | None = None, datapoints: list[DataPoint]): # noqa: A002
|
|
11
21
|
self.id = id
|
|
12
22
|
self.external_id = external_id
|
|
13
23
|
self.datapoints = datapoints
|
|
14
24
|
|
|
15
25
|
|
|
16
26
|
class RawRow:
|
|
27
|
+
"""
|
|
28
|
+
A class representing a row of data to be inserted into a RAW table.
|
|
29
|
+
"""
|
|
30
|
+
|
|
17
31
|
def __init__(self, db_name: str, table_name: str, row: _Row | Iterable[_Row]):
|
|
18
32
|
self.db_name = db_name
|
|
19
33
|
self.table_name = table_name
|