cognite-extractor-utils 7.5.13__py3-none-any.whl → 7.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- cognite/extractorutils/__init__.py +1 -1
- cognite/extractorutils/_inner_util.py +1 -1
- cognite/extractorutils/base.py +120 -40
- cognite/extractorutils/configtools/__init__.py +4 -5
- cognite/extractorutils/configtools/_util.py +3 -2
- cognite/extractorutils/configtools/elements.py +213 -35
- cognite/extractorutils/configtools/loaders.py +68 -16
- cognite/extractorutils/configtools/validators.py +5 -1
- cognite/extractorutils/exceptions.py +11 -2
- cognite/extractorutils/metrics.py +17 -12
- cognite/extractorutils/statestore/__init__.py +77 -3
- cognite/extractorutils/statestore/_base.py +7 -3
- cognite/extractorutils/statestore/hashing.py +129 -15
- cognite/extractorutils/statestore/watermark.py +77 -87
- cognite/extractorutils/threading.py +30 -4
- cognite/extractorutils/unstable/__init__.py +5 -5
- cognite/extractorutils/unstable/configuration/__init__.py +3 -0
- cognite/extractorutils/unstable/configuration/exceptions.py +13 -2
- cognite/extractorutils/unstable/configuration/loaders.py +90 -19
- cognite/extractorutils/unstable/configuration/models.py +121 -7
- cognite/extractorutils/unstable/core/__init__.py +5 -0
- cognite/extractorutils/unstable/core/_dto.py +5 -3
- cognite/extractorutils/unstable/core/base.py +113 -4
- cognite/extractorutils/unstable/core/errors.py +41 -0
- cognite/extractorutils/unstable/core/logger.py +149 -0
- cognite/extractorutils/unstable/core/restart_policy.py +16 -2
- cognite/extractorutils/unstable/core/runtime.py +119 -36
- cognite/extractorutils/unstable/core/tasks.py +53 -1
- cognite/extractorutils/unstable/scheduling/__init__.py +13 -0
- cognite/extractorutils/unstable/scheduling/_scheduler.py +1 -1
- cognite/extractorutils/uploader/__init__.py +7 -5
- cognite/extractorutils/uploader/_base.py +4 -5
- cognite/extractorutils/uploader/assets.py +13 -8
- cognite/extractorutils/uploader/data_modeling.py +37 -2
- cognite/extractorutils/uploader/events.py +14 -9
- cognite/extractorutils/uploader/files.py +80 -21
- cognite/extractorutils/uploader/raw.py +12 -7
- cognite/extractorutils/uploader/time_series.py +58 -49
- cognite/extractorutils/uploader/upload_failure_handler.py +35 -2
- cognite/extractorutils/uploader_extractor.py +29 -6
- cognite/extractorutils/uploader_types.py +15 -1
- cognite/extractorutils/util.py +76 -23
- {cognite_extractor_utils-7.5.13.dist-info → cognite_extractor_utils-7.6.0.dist-info}/METADATA +1 -1
- cognite_extractor_utils-7.6.0.dist-info/RECORD +50 -0
- cognite_extractor_utils-7.5.13.dist-info/RECORD +0 -50
- {cognite_extractor_utils-7.5.13.dist-info → cognite_extractor_utils-7.6.0.dist-info}/WHEEL +0 -0
- {cognite_extractor_utils-7.5.13.dist-info → cognite_extractor_utils-7.6.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Upload queue for files.
|
|
3
|
+
"""
|
|
4
|
+
|
|
1
5
|
# Copyright 2023 Cognite AS
|
|
2
6
|
#
|
|
3
7
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -88,6 +92,9 @@ class ChunkedStream(RawIOBase, BinaryIO):
|
|
|
88
92
|
self._current_chunk_size = -1
|
|
89
93
|
|
|
90
94
|
def tell(self) -> int:
|
|
95
|
+
"""
|
|
96
|
+
Get the current position of the stream.
|
|
97
|
+
"""
|
|
91
98
|
return self._pos
|
|
92
99
|
|
|
93
100
|
# RawIOBase is (stupidly) incompatible with BinaryIO
|
|
@@ -97,12 +104,21 @@ class ChunkedStream(RawIOBase, BinaryIO):
|
|
|
97
104
|
# required to satisfy mypy.
|
|
98
105
|
# This may be solvable by changing the typing in the python SDK to use typing.Protocol.
|
|
99
106
|
def writelines(self, __lines: Any) -> None:
|
|
107
|
+
"""
|
|
108
|
+
Not supported for ChunkedStream.
|
|
109
|
+
"""
|
|
100
110
|
raise NotImplementedError()
|
|
101
111
|
|
|
102
112
|
def write(self, __b: Any) -> int:
|
|
113
|
+
"""
|
|
114
|
+
Not supported for ChunkedStream.
|
|
115
|
+
"""
|
|
103
116
|
raise NotImplementedError()
|
|
104
117
|
|
|
105
118
|
def __enter__(self) -> "ChunkedStream":
|
|
119
|
+
"""
|
|
120
|
+
Wraps around the inner stream's ``__enter__`` method, for use as context manager.
|
|
121
|
+
"""
|
|
106
122
|
return super().__enter__()
|
|
107
123
|
|
|
108
124
|
def __exit__(
|
|
@@ -111,14 +127,23 @@ class ChunkedStream(RawIOBase, BinaryIO):
|
|
|
111
127
|
exc_val: BaseException | None,
|
|
112
128
|
exc_tb: TracebackType | None,
|
|
113
129
|
) -> None:
|
|
130
|
+
"""
|
|
131
|
+
Wraps around the inner stream's ``__exit__`` method, for use as context manager.
|
|
132
|
+
"""
|
|
114
133
|
return super().__exit__(exc_type, exc_val, exc_tb)
|
|
115
134
|
|
|
116
135
|
@property
|
|
117
136
|
def chunk_count(self) -> int:
|
|
137
|
+
"""
|
|
138
|
+
Number of chunks in the stream.
|
|
139
|
+
"""
|
|
118
140
|
return ceil(self._stream_length / self._max_chunk_size)
|
|
119
141
|
|
|
120
142
|
@property
|
|
121
143
|
def len(self) -> int:
|
|
144
|
+
"""
|
|
145
|
+
Length of the current chunk, in bytes.
|
|
146
|
+
"""
|
|
122
147
|
return len(self)
|
|
123
148
|
|
|
124
149
|
@property
|
|
@@ -129,12 +154,21 @@ class ChunkedStream(RawIOBase, BinaryIO):
|
|
|
129
154
|
return self._chunk_index
|
|
130
155
|
|
|
131
156
|
def __len__(self) -> int:
|
|
157
|
+
"""
|
|
158
|
+
Length of the current chunk, in bytes.
|
|
159
|
+
"""
|
|
132
160
|
return self._current_chunk_size
|
|
133
161
|
|
|
134
162
|
def readable(self) -> bool:
|
|
163
|
+
"""
|
|
164
|
+
Check if the stream is readable. Always True for ChunkedStream.
|
|
165
|
+
"""
|
|
135
166
|
return True
|
|
136
167
|
|
|
137
168
|
def read(self, size: int = -1) -> bytes:
|
|
169
|
+
"""
|
|
170
|
+
Read bytes from the current chunk.
|
|
171
|
+
"""
|
|
138
172
|
if size < 0:
|
|
139
173
|
size = self._current_chunk_size - self._pos
|
|
140
174
|
|
|
@@ -162,6 +196,10 @@ class ChunkedStream(RawIOBase, BinaryIO):
|
|
|
162
196
|
|
|
163
197
|
|
|
164
198
|
class IOByteStream(SyncByteStream):
|
|
199
|
+
"""
|
|
200
|
+
Wraps a BinaryIO stream to be used as a httpx SyncByteStream.
|
|
201
|
+
"""
|
|
202
|
+
|
|
165
203
|
CHUNK_SIZE = 65_536
|
|
166
204
|
|
|
167
205
|
def __init__(self, stream: BinaryIO) -> None:
|
|
@@ -169,6 +207,9 @@ class IOByteStream(SyncByteStream):
|
|
|
169
207
|
self._is_stream_consumed = False
|
|
170
208
|
|
|
171
209
|
def __iter__(self) -> Iterator[bytes]:
|
|
210
|
+
"""
|
|
211
|
+
Iterate over the stream, yielding chunks of data.
|
|
212
|
+
"""
|
|
172
213
|
if self._is_stream_consumed:
|
|
173
214
|
raise StreamConsumed()
|
|
174
215
|
chunk = self._stream.read(self.CHUNK_SIZE)
|
|
@@ -179,7 +220,7 @@ class IOByteStream(SyncByteStream):
|
|
|
179
220
|
|
|
180
221
|
class IOFileUploadQueue(AbstractUploadQueue):
|
|
181
222
|
"""
|
|
182
|
-
Upload queue for files using BinaryIO
|
|
223
|
+
Upload queue for files using BinaryIO.
|
|
183
224
|
|
|
184
225
|
Note that if the upload fails, the stream needs to be restarted, so
|
|
185
226
|
the enqueued callback needs to produce a new IO object for each call.
|
|
@@ -264,6 +305,9 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
264
305
|
_QUEUES += 1
|
|
265
306
|
|
|
266
307
|
def initialize_failure_logging(self) -> None:
|
|
308
|
+
"""
|
|
309
|
+
Initialize the failure logging manager if a path is provided in the constructor.
|
|
310
|
+
"""
|
|
267
311
|
self._file_failure_manager: FileFailureManager | None = (
|
|
268
312
|
FileFailureManager(path_to_file=self.failure_logging_path)
|
|
269
313
|
if self.failure_logging_path is not None
|
|
@@ -271,14 +315,23 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
271
315
|
)
|
|
272
316
|
|
|
273
317
|
def get_failure_logger(self) -> FileFailureManager | None:
|
|
318
|
+
"""
|
|
319
|
+
Get the failure logger for this upload queue, if it exists.
|
|
320
|
+
"""
|
|
274
321
|
return self._file_failure_manager
|
|
275
322
|
|
|
276
323
|
def add_entry_failure_logger(self, file_name: str, error: Exception) -> None:
|
|
324
|
+
"""
|
|
325
|
+
Add an entry to the failure logger if it exists.
|
|
326
|
+
"""
|
|
277
327
|
if self._file_failure_manager is not None:
|
|
278
328
|
error_reason = str(error)
|
|
279
329
|
self._file_failure_manager.add(file_name=file_name, error_reason=error_reason)
|
|
280
330
|
|
|
281
331
|
def flush_failure_logger(self) -> None:
|
|
332
|
+
"""
|
|
333
|
+
Flush the failure logger if it exists, writing all failures to the file.
|
|
334
|
+
"""
|
|
282
335
|
if self._file_failure_manager is not None:
|
|
283
336
|
self.logger.info("Flushing failure logs")
|
|
284
337
|
self._file_failure_manager.write_to_file()
|
|
@@ -313,14 +366,14 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
313
366
|
)
|
|
314
367
|
if any_unchaged:
|
|
315
368
|
update = FileMetadataUpdate(external_id=file_meta.external_id)
|
|
316
|
-
|
|
369
|
+
need_update = False
|
|
317
370
|
if file_meta.source:
|
|
318
|
-
|
|
371
|
+
need_update = True
|
|
319
372
|
update.source.set(file_meta.source)
|
|
320
373
|
if file_meta.directory:
|
|
321
|
-
|
|
374
|
+
need_update = True
|
|
322
375
|
update.directory.set(file_meta.directory)
|
|
323
|
-
if
|
|
376
|
+
if need_update:
|
|
324
377
|
self.cdf_client.files.update(update)
|
|
325
378
|
|
|
326
379
|
return file_meta_response, url
|
|
@@ -373,7 +426,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
373
426
|
def _upload_multipart(self, size: int, file: BinaryIO, file_meta: FileMetadataOrCogniteExtractorFile) -> None:
|
|
374
427
|
chunks = ChunkedStream(file, self.max_file_chunk_size, size)
|
|
375
428
|
self.logger.debug(
|
|
376
|
-
f"File {file_meta.external_id} is larger than 5GiB ({size})
|
|
429
|
+
f"File {file_meta.external_id} is larger than 5GiB ({size}), uploading in {chunks.chunk_count} chunks"
|
|
377
430
|
)
|
|
378
431
|
|
|
379
432
|
returned_file_metadata = self._create_multi_part(file_meta, chunks)
|
|
@@ -428,13 +481,14 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
428
481
|
extra_retries: tuple[type[Exception], ...] | dict[type[Exception], Callable[[Any], bool]] | None = None,
|
|
429
482
|
) -> None:
|
|
430
483
|
"""
|
|
431
|
-
Add file to upload queue.
|
|
432
|
-
|
|
484
|
+
Add file to upload queue.
|
|
485
|
+
|
|
486
|
+
The file will start uploading immediately. If the size of the queue is larger than the specified max size, this
|
|
487
|
+
call will block until it's completed the upload.
|
|
433
488
|
|
|
434
489
|
Args:
|
|
435
490
|
file_meta: File metadata-object
|
|
436
|
-
|
|
437
|
-
If none, the file object will still be created, but no data is uploaded
|
|
491
|
+
read_file: Callable that returns a BinaryIO stream to read the file from.
|
|
438
492
|
extra_retries: Exception types that might be raised by ``read_file`` that should be retried
|
|
439
493
|
"""
|
|
440
494
|
retries = cognite_exceptions()
|
|
@@ -568,7 +622,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
568
622
|
|
|
569
623
|
def upload(self, fail_on_errors: bool = True, timeout: float | None = None) -> None:
|
|
570
624
|
"""
|
|
571
|
-
Wait for all uploads to finish
|
|
625
|
+
Wait for all uploads to finish.
|
|
572
626
|
"""
|
|
573
627
|
for future in self.upload_queue:
|
|
574
628
|
future.result(timeout=timeout)
|
|
@@ -581,7 +635,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
581
635
|
|
|
582
636
|
def __enter__(self) -> "IOFileUploadQueue":
|
|
583
637
|
"""
|
|
584
|
-
Wraps around start method, for use as context manager
|
|
638
|
+
Wraps around start method, for use as context manager.
|
|
585
639
|
|
|
586
640
|
Returns:
|
|
587
641
|
self
|
|
@@ -598,7 +652,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
598
652
|
exc_tb: TracebackType | None,
|
|
599
653
|
) -> None:
|
|
600
654
|
"""
|
|
601
|
-
Wraps around stop method, for use as context manager
|
|
655
|
+
Wraps around stop method, for use as context manager.
|
|
602
656
|
|
|
603
657
|
Args:
|
|
604
658
|
exc_type: Exception type
|
|
@@ -610,7 +664,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
610
664
|
|
|
611
665
|
def __len__(self) -> int:
|
|
612
666
|
"""
|
|
613
|
-
The size of the upload queue
|
|
667
|
+
The size of the upload queue.
|
|
614
668
|
|
|
615
669
|
Returns:
|
|
616
670
|
Number of events in queue
|
|
@@ -620,7 +674,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
620
674
|
|
|
621
675
|
class FileUploadQueue(IOFileUploadQueue):
|
|
622
676
|
"""
|
|
623
|
-
Upload queue for files
|
|
677
|
+
Upload queue for files.
|
|
624
678
|
|
|
625
679
|
Args:
|
|
626
680
|
cdf_client: Cognite Data Fusion client to use
|
|
@@ -661,8 +715,10 @@ class FileUploadQueue(IOFileUploadQueue):
|
|
|
661
715
|
file_name: str | PathLike,
|
|
662
716
|
) -> None:
|
|
663
717
|
"""
|
|
664
|
-
Add file to upload queue.
|
|
665
|
-
|
|
718
|
+
Add file to upload queue.
|
|
719
|
+
|
|
720
|
+
The file will start uploading immediately. If the size of the queue is larger than the specified max size, this
|
|
721
|
+
call will block until it's completed the upload.
|
|
666
722
|
|
|
667
723
|
Args:
|
|
668
724
|
file_meta: File metadata-object
|
|
@@ -678,7 +734,7 @@ class FileUploadQueue(IOFileUploadQueue):
|
|
|
678
734
|
|
|
679
735
|
class BytesUploadQueue(IOFileUploadQueue):
|
|
680
736
|
"""
|
|
681
|
-
Upload queue for bytes
|
|
737
|
+
Upload queue for bytes.
|
|
682
738
|
|
|
683
739
|
Args:
|
|
684
740
|
cdf_client: Cognite Data Fusion client to use
|
|
@@ -714,11 +770,14 @@ class BytesUploadQueue(IOFileUploadQueue):
|
|
|
714
770
|
|
|
715
771
|
def add_to_upload_queue(self, content: bytes, file_meta: FileMetadataOrCogniteExtractorFile) -> None:
|
|
716
772
|
"""
|
|
717
|
-
Add
|
|
718
|
-
|
|
773
|
+
Add file to upload queue.
|
|
774
|
+
|
|
775
|
+
The file will start uploading immediately. If the size of the queue is larger than the specified max size, this
|
|
776
|
+
call will block until it's completed the upload.
|
|
777
|
+
|
|
719
778
|
Args:
|
|
720
779
|
content: bytes object to upload
|
|
721
|
-
|
|
780
|
+
file_meta: File metadata-object
|
|
722
781
|
"""
|
|
723
782
|
|
|
724
783
|
def get_byte_io() -> BinaryIO:
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Upload queue for RAW.
|
|
3
|
+
"""
|
|
4
|
+
|
|
1
5
|
# Copyright 2023 Cognite AS
|
|
2
6
|
#
|
|
3
7
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -41,7 +45,7 @@ from cognite.extractorutils.util import cognite_exceptions, retry
|
|
|
41
45
|
|
|
42
46
|
class RawUploadQueue(AbstractUploadQueue):
|
|
43
47
|
"""
|
|
44
|
-
Upload queue for RAW
|
|
48
|
+
Upload queue for RAW.
|
|
45
49
|
|
|
46
50
|
Args:
|
|
47
51
|
cdf_client: Cognite Data Fusion client to use
|
|
@@ -84,8 +88,9 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
84
88
|
|
|
85
89
|
def add_to_upload_queue(self, database: str, table: str, raw_row: Row) -> None:
|
|
86
90
|
"""
|
|
87
|
-
Adds a row to the upload queue.
|
|
88
|
-
|
|
91
|
+
Adds a row to the upload queue.
|
|
92
|
+
|
|
93
|
+
The queue will be uploaded if the queue size is larger than the threshold specified in the ``__init__``.
|
|
89
94
|
|
|
90
95
|
Args:
|
|
91
96
|
database: The database to upload the Raw object to
|
|
@@ -109,7 +114,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
109
114
|
|
|
110
115
|
def upload(self) -> None:
|
|
111
116
|
"""
|
|
112
|
-
Trigger an upload of the queue, clears queue afterwards
|
|
117
|
+
Trigger an upload of the queue, clears queue afterwards.
|
|
113
118
|
"""
|
|
114
119
|
|
|
115
120
|
@retry(
|
|
@@ -154,7 +159,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
154
159
|
|
|
155
160
|
def __enter__(self) -> "RawUploadQueue":
|
|
156
161
|
"""
|
|
157
|
-
Wraps around start method, for use as context manager
|
|
162
|
+
Wraps around start method, for use as context manager.
|
|
158
163
|
|
|
159
164
|
Returns:
|
|
160
165
|
self
|
|
@@ -166,7 +171,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
166
171
|
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
|
|
167
172
|
) -> None:
|
|
168
173
|
"""
|
|
169
|
-
Wraps around stop method, for use as context manager
|
|
174
|
+
Wraps around stop method, for use as context manager.
|
|
170
175
|
|
|
171
176
|
Args:
|
|
172
177
|
exc_type: Exception type
|
|
@@ -177,7 +182,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
177
182
|
|
|
178
183
|
def __len__(self) -> int:
|
|
179
184
|
"""
|
|
180
|
-
The size of the upload queue
|
|
185
|
+
The size of the upload queue.
|
|
181
186
|
|
|
182
187
|
Returns:
|
|
183
188
|
Number of elements in queue
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Upload queue for time series and sequences.
|
|
3
|
+
"""
|
|
1
4
|
# Copyright 2023 Cognite AS
|
|
2
5
|
#
|
|
3
6
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -81,7 +84,7 @@ def default_time_series_factory(external_id: str, datapoints: DataPointList) ->
|
|
|
81
84
|
|
|
82
85
|
class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
83
86
|
"""
|
|
84
|
-
Upload queue for time series
|
|
87
|
+
Upload queue for time series.
|
|
85
88
|
|
|
86
89
|
Args:
|
|
87
90
|
cdf_client: Cognite Data Fusion client to use
|
|
@@ -141,7 +144,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
141
144
|
self.data_set_id = data_set_id
|
|
142
145
|
|
|
143
146
|
def _verify_datapoint_time(self, time: int | float | datetime | str) -> bool:
|
|
144
|
-
if isinstance(time, int
|
|
147
|
+
if isinstance(time, int | float):
|
|
145
148
|
return not math.isnan(time) and time >= MIN_DATAPOINT_TIMESTAMP
|
|
146
149
|
elif isinstance(time, str):
|
|
147
150
|
return False
|
|
@@ -155,10 +158,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
155
158
|
)
|
|
156
159
|
elif isinstance(value, str):
|
|
157
160
|
return len(value) <= MAX_DATAPOINT_STRING_LENGTH
|
|
158
|
-
|
|
159
|
-
return False
|
|
160
|
-
else:
|
|
161
|
-
return True
|
|
161
|
+
return not isinstance(value, datetime)
|
|
162
162
|
|
|
163
163
|
def _is_datapoint_valid(
|
|
164
164
|
self,
|
|
@@ -172,11 +172,16 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
172
172
|
return True
|
|
173
173
|
|
|
174
174
|
def add_to_upload_queue(
|
|
175
|
-
self,
|
|
175
|
+
self,
|
|
176
|
+
*,
|
|
177
|
+
id: int | None = None, # noqa: A002
|
|
178
|
+
external_id: str | None = None,
|
|
179
|
+
datapoints: DataPointList | None = None,
|
|
176
180
|
) -> None:
|
|
177
181
|
"""
|
|
178
|
-
Add data points to upload queue.
|
|
179
|
-
|
|
182
|
+
Add data points to upload queue.
|
|
183
|
+
|
|
184
|
+
The queue will be uploaded if the queue size is larger than the threshold specified in the ``__init__``.
|
|
180
185
|
|
|
181
186
|
Args:
|
|
182
187
|
id: Internal ID of time series. Either this or external_id must be set.
|
|
@@ -209,7 +214,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
209
214
|
|
|
210
215
|
def upload(self) -> None:
|
|
211
216
|
"""
|
|
212
|
-
Trigger an upload of the queue, clears queue afterwards
|
|
217
|
+
Trigger an upload of the queue, clears queue afterwards.
|
|
213
218
|
"""
|
|
214
219
|
|
|
215
220
|
@retry(
|
|
@@ -239,9 +244,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
239
244
|
|
|
240
245
|
if self.create_missing:
|
|
241
246
|
# Get the time series that can be created
|
|
242
|
-
create_these_ids =
|
|
243
|
-
[id_dict["externalId"] for id_dict in ex.not_found if "externalId" in id_dict]
|
|
244
|
-
)
|
|
247
|
+
create_these_ids = {id_dict["externalId"] for id_dict in ex.not_found if "externalId" in id_dict}
|
|
245
248
|
datapoints_lists: dict[str, DataPointList] = {
|
|
246
249
|
ts_dict["externalId"]: ts_dict["datapoints"]
|
|
247
250
|
for ts_dict in upload_this
|
|
@@ -294,7 +297,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
294
297
|
]
|
|
295
298
|
)
|
|
296
299
|
|
|
297
|
-
for
|
|
300
|
+
for datapoints in self.upload_queue.values():
|
|
298
301
|
self.points_written.inc(len(datapoints))
|
|
299
302
|
|
|
300
303
|
try:
|
|
@@ -309,7 +312,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
309
312
|
|
|
310
313
|
def __enter__(self) -> "TimeSeriesUploadQueue":
|
|
311
314
|
"""
|
|
312
|
-
Wraps around start method, for use as context manager
|
|
315
|
+
Wraps around start method, for use as context manager.
|
|
313
316
|
|
|
314
317
|
Returns:
|
|
315
318
|
self
|
|
@@ -321,7 +324,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
321
324
|
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
|
|
322
325
|
) -> None:
|
|
323
326
|
"""
|
|
324
|
-
Wraps around stop method, for use as context manager
|
|
327
|
+
Wraps around stop method, for use as context manager.
|
|
325
328
|
|
|
326
329
|
Args:
|
|
327
330
|
exc_type: Exception type
|
|
@@ -332,7 +335,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
332
335
|
|
|
333
336
|
def __len__(self) -> int:
|
|
334
337
|
"""
|
|
335
|
-
The size of the upload queue
|
|
338
|
+
The size of the upload queue.
|
|
336
339
|
|
|
337
340
|
Returns:
|
|
338
341
|
Number of data points in queue
|
|
@@ -341,6 +344,21 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
341
344
|
|
|
342
345
|
|
|
343
346
|
class SequenceUploadQueue(AbstractUploadQueue):
|
|
347
|
+
"""
|
|
348
|
+
Upload queue for sequences.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
cdf_client: Cognite Data Fusion client to use
|
|
352
|
+
post_upload_function: A function that will be called after each upload. The function will be given one
|
|
353
|
+
argument: A list of the events that were uploaded.
|
|
354
|
+
max_queue_size: Maximum size of upload queue. Defaults to no max size.
|
|
355
|
+
max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
|
|
356
|
+
methods).
|
|
357
|
+
trigger_log_level: Log level to log upload triggers to.
|
|
358
|
+
thread_name: Thread name of uploader thread.
|
|
359
|
+
create_missing: Create missing sequences if possible (ie, if external id is used).
|
|
360
|
+
"""
|
|
361
|
+
|
|
344
362
|
def __init__(
|
|
345
363
|
self,
|
|
346
364
|
cdf_client: CogniteClient,
|
|
@@ -352,19 +370,6 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
352
370
|
create_missing: bool = False,
|
|
353
371
|
cancellation_token: CancellationToken | None = None,
|
|
354
372
|
):
|
|
355
|
-
"""
|
|
356
|
-
Args:
|
|
357
|
-
cdf_client: Cognite Data Fusion client to use
|
|
358
|
-
post_upload_function: A function that will be called after each upload. The function will be given one
|
|
359
|
-
argument: A list of the events that were uploaded.
|
|
360
|
-
max_queue_size: Maximum size of upload queue. Defaults to no max size.
|
|
361
|
-
max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
|
|
362
|
-
methods).
|
|
363
|
-
trigger_log_level: Log level to log upload triggers to.
|
|
364
|
-
thread_name: Thread name of uploader thread.
|
|
365
|
-
create_missing: Create missing sequences if possible (ie, if external id is used)
|
|
366
|
-
"""
|
|
367
|
-
|
|
368
373
|
# Super sets post_upload and threshold
|
|
369
374
|
super().__init__(
|
|
370
375
|
cdf_client,
|
|
@@ -393,7 +398,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
393
398
|
def set_sequence_metadata(
|
|
394
399
|
self,
|
|
395
400
|
metadata: dict[str, str | int | float],
|
|
396
|
-
id: int | None = None,
|
|
401
|
+
id: int | None = None, # noqa: A002
|
|
397
402
|
external_id: str | None = None,
|
|
398
403
|
asset_external_id: str | None = None,
|
|
399
404
|
dataset_external_id: str | None = None,
|
|
@@ -401,8 +406,10 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
401
406
|
description: str | None = None,
|
|
402
407
|
) -> None:
|
|
403
408
|
"""
|
|
404
|
-
Set sequence metadata.
|
|
405
|
-
|
|
409
|
+
Set sequence metadata.
|
|
410
|
+
|
|
411
|
+
Metadata will be cached until the sequence is created. The metadata will be updated if the sequence already
|
|
412
|
+
exists.
|
|
406
413
|
|
|
407
414
|
Args:
|
|
408
415
|
metadata: Sequence metadata
|
|
@@ -427,10 +434,13 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
427
434
|
self.sequence_descriptions[either_id] = description
|
|
428
435
|
|
|
429
436
|
def set_sequence_column_definition(
|
|
430
|
-
self,
|
|
437
|
+
self,
|
|
438
|
+
col_def: list[dict[str, str]],
|
|
439
|
+
id: int | None = None, # noqa: A002
|
|
440
|
+
external_id: str | None = None,
|
|
431
441
|
) -> None:
|
|
432
442
|
"""
|
|
433
|
-
Set sequence column definition
|
|
443
|
+
Set sequence column definition.
|
|
434
444
|
|
|
435
445
|
Args:
|
|
436
446
|
col_def: Sequence column definition
|
|
@@ -450,12 +460,13 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
450
460
|
| SequenceData
|
|
451
461
|
| SequenceRows,
|
|
452
462
|
column_external_ids: list[dict] | None = None,
|
|
453
|
-
id: int | None = None,
|
|
463
|
+
id: int | None = None, # noqa: A002
|
|
454
464
|
external_id: str | None = None,
|
|
455
465
|
) -> None:
|
|
456
466
|
"""
|
|
457
|
-
Add sequence rows to upload queue.
|
|
458
|
-
|
|
467
|
+
Add sequence rows to upload queue.
|
|
468
|
+
|
|
469
|
+
Mirrors implementation of SequenceApi.insert. Inserted rows will be cached until uploaded.
|
|
459
470
|
|
|
460
471
|
Args:
|
|
461
472
|
rows: The rows to be inserted. Can either be a list of tuples, a list of ["rownumber": ..., "values": ...]
|
|
@@ -466,7 +477,6 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
466
477
|
external_id: Sequence external ID
|
|
467
478
|
Us if id is None
|
|
468
479
|
"""
|
|
469
|
-
|
|
470
480
|
if len(rows) == 0:
|
|
471
481
|
pass
|
|
472
482
|
|
|
@@ -509,7 +519,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
509
519
|
|
|
510
520
|
def upload(self) -> None:
|
|
511
521
|
"""
|
|
512
|
-
Trigger an upload of the queue, clears queue afterwards
|
|
522
|
+
Trigger an upload of the queue, clears queue afterwards.
|
|
513
523
|
"""
|
|
514
524
|
|
|
515
525
|
@retry(
|
|
@@ -571,15 +581,14 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
571
581
|
|
|
572
582
|
def _create_or_update(self, either_id: EitherId) -> None:
|
|
573
583
|
"""
|
|
574
|
-
Create or update sequence, based on provided metadata and column definitions
|
|
584
|
+
Create or update sequence, based on provided metadata and column definitions.
|
|
575
585
|
|
|
576
586
|
Args:
|
|
577
587
|
either_id: Id/External Id of sequence to be updated
|
|
578
588
|
"""
|
|
579
|
-
|
|
580
589
|
column_def = self.column_definitions.get(either_id)
|
|
581
590
|
if column_def is None:
|
|
582
|
-
self.logger.error(f"Can't create sequence {
|
|
591
|
+
self.logger.error(f"Can't create sequence {either_id!s}, no column definitions provided")
|
|
583
592
|
|
|
584
593
|
try:
|
|
585
594
|
seq = self.cdf_client.sequences.create(
|
|
@@ -596,7 +605,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
596
605
|
)
|
|
597
606
|
|
|
598
607
|
except CogniteDuplicatedError:
|
|
599
|
-
self.logger.info(f"
|
|
608
|
+
self.logger.info(f"Sequence already exist: {either_id}")
|
|
600
609
|
seq = self.cdf_client.sequences.retrieve( # type: ignore [assignment]
|
|
601
610
|
id=either_id.internal_id,
|
|
602
611
|
external_id=either_id.external_id,
|
|
@@ -608,7 +617,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
608
617
|
|
|
609
618
|
def _resolve_asset_ids(self) -> None:
|
|
610
619
|
"""
|
|
611
|
-
Resolve id of assets if specified, for use in sequence creation
|
|
620
|
+
Resolve id of assets if specified, for use in sequence creation.
|
|
612
621
|
"""
|
|
613
622
|
assets = set(self.sequence_asset_external_ids.values())
|
|
614
623
|
assets.discard(None) # type: ignore # safeguard, remove Nones if any
|
|
@@ -628,7 +637,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
628
637
|
|
|
629
638
|
def _resolve_dataset_ids(self) -> None:
|
|
630
639
|
"""
|
|
631
|
-
Resolve id of datasets if specified, for use in sequence creation
|
|
640
|
+
Resolve id of datasets if specified, for use in sequence creation.
|
|
632
641
|
"""
|
|
633
642
|
datasets = set(self.sequence_dataset_external_ids.values())
|
|
634
643
|
datasets.discard(None) # type: ignore # safeguard, remove Nones if any
|
|
@@ -648,7 +657,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
648
657
|
|
|
649
658
|
def __enter__(self) -> "SequenceUploadQueue":
|
|
650
659
|
"""
|
|
651
|
-
Wraps around start method, for use as context manager
|
|
660
|
+
Wraps around start method, for use as context manager.
|
|
652
661
|
|
|
653
662
|
Returns:
|
|
654
663
|
self
|
|
@@ -660,7 +669,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
660
669
|
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
|
|
661
670
|
) -> None:
|
|
662
671
|
"""
|
|
663
|
-
Wraps around stop method, for use as context manager
|
|
672
|
+
Wraps around stop method, for use as context manager.
|
|
664
673
|
|
|
665
674
|
Args:
|
|
666
675
|
exc_type: Exception type
|
|
@@ -671,7 +680,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
671
680
|
|
|
672
681
|
def __len__(self) -> int:
|
|
673
682
|
"""
|
|
674
|
-
The size of the upload queue
|
|
683
|
+
The size of the upload queue.
|
|
675
684
|
|
|
676
685
|
Returns:
|
|
677
686
|
Number of data points in queue
|