cognite-extractor-utils 7.5.4__py3-none-any.whl → 7.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- cognite/extractorutils/__init__.py +3 -1
- cognite/extractorutils/_inner_util.py +14 -3
- cognite/extractorutils/base.py +14 -15
- cognite/extractorutils/configtools/__init__.py +25 -0
- cognite/extractorutils/configtools/_util.py +7 -9
- cognite/extractorutils/configtools/elements.py +58 -49
- cognite/extractorutils/configtools/loaders.py +29 -26
- cognite/extractorutils/configtools/validators.py +2 -3
- cognite/extractorutils/exceptions.py +1 -4
- cognite/extractorutils/metrics.py +18 -18
- cognite/extractorutils/statestore/_base.py +3 -4
- cognite/extractorutils/statestore/hashing.py +24 -24
- cognite/extractorutils/statestore/watermark.py +17 -14
- cognite/extractorutils/threading.py +4 -4
- cognite/extractorutils/unstable/configuration/exceptions.py +24 -0
- cognite/extractorutils/unstable/configuration/loaders.py +18 -7
- cognite/extractorutils/unstable/configuration/models.py +25 -3
- cognite/extractorutils/unstable/core/_dto.py +10 -0
- cognite/extractorutils/unstable/core/base.py +179 -29
- cognite/extractorutils/unstable/core/errors.py +72 -0
- cognite/extractorutils/unstable/core/restart_policy.py +29 -0
- cognite/extractorutils/unstable/core/runtime.py +170 -26
- cognite/extractorutils/unstable/core/tasks.py +2 -0
- cognite/extractorutils/unstable/scheduling/_scheduler.py +4 -4
- cognite/extractorutils/uploader/__init__.py +14 -0
- cognite/extractorutils/uploader/_base.py +8 -8
- cognite/extractorutils/uploader/assets.py +15 -9
- cognite/extractorutils/uploader/data_modeling.py +13 -13
- cognite/extractorutils/uploader/events.py +9 -9
- cognite/extractorutils/uploader/files.py +127 -31
- cognite/extractorutils/uploader/raw.py +10 -10
- cognite/extractorutils/uploader/time_series.py +56 -58
- cognite/extractorutils/uploader/upload_failure_handler.py +64 -0
- cognite/extractorutils/uploader_extractor.py +11 -11
- cognite/extractorutils/uploader_types.py +4 -12
- cognite/extractorutils/util.py +21 -23
- {cognite_extractor_utils-7.5.4.dist-info → cognite_extractor_utils-7.5.5.dist-info}/METADATA +3 -2
- cognite_extractor_utils-7.5.5.dist-info/RECORD +49 -0
- {cognite_extractor_utils-7.5.4.dist-info → cognite_extractor_utils-7.5.5.dist-info}/WHEEL +1 -1
- cognite/extractorutils/unstable/core/__main__.py +0 -31
- cognite_extractor_utils-7.5.4.dist-info/RECORD +0 -46
- {cognite_extractor_utils-7.5.4.dist-info → cognite_extractor_utils-7.5.5.dist-info}/LICENSE +0 -0
|
@@ -18,7 +18,16 @@ from io import BytesIO, RawIOBase
|
|
|
18
18
|
from math import ceil
|
|
19
19
|
from os import PathLike
|
|
20
20
|
from types import TracebackType
|
|
21
|
-
from typing import
|
|
21
|
+
from typing import (
|
|
22
|
+
Any,
|
|
23
|
+
BinaryIO,
|
|
24
|
+
Callable,
|
|
25
|
+
Iterator,
|
|
26
|
+
List,
|
|
27
|
+
Optional,
|
|
28
|
+
Type,
|
|
29
|
+
Union,
|
|
30
|
+
)
|
|
22
31
|
from urllib.parse import ParseResult, urlparse
|
|
23
32
|
|
|
24
33
|
from httpx import URL, Client, Headers, Request, StreamConsumed, SyncByteStream
|
|
@@ -27,7 +36,9 @@ from requests.utils import super_len
|
|
|
27
36
|
from cognite.client import CogniteClient
|
|
28
37
|
from cognite.client.data_classes import FileMetadata, FileMetadataUpdate
|
|
29
38
|
from cognite.client.data_classes.data_modeling import NodeId
|
|
30
|
-
from cognite.client.data_classes.data_modeling.extractor_extensions.v1 import
|
|
39
|
+
from cognite.client.data_classes.data_modeling.extractor_extensions.v1 import (
|
|
40
|
+
CogniteExtractorFileApply,
|
|
41
|
+
)
|
|
31
42
|
from cognite.client.utils._identifier import IdentifierSequence
|
|
32
43
|
from cognite.extractorutils.threading import CancellationToken
|
|
33
44
|
from cognite.extractorutils.uploader._base import (
|
|
@@ -42,6 +53,7 @@ from cognite.extractorutils.uploader._metrics import (
|
|
|
42
53
|
FILES_UPLOADER_QUEUED,
|
|
43
54
|
FILES_UPLOADER_WRITTEN,
|
|
44
55
|
)
|
|
56
|
+
from cognite.extractorutils.uploader.upload_failure_handler import FileFailureManager
|
|
45
57
|
from cognite.extractorutils.util import cognite_exceptions, retry
|
|
46
58
|
|
|
47
59
|
_QUEUES: int = 0
|
|
@@ -54,6 +66,7 @@ _MAX_FILE_CHUNK_SIZE = 4 * 1024 * 1024 * 1000
|
|
|
54
66
|
|
|
55
67
|
_CDF_ALPHA_VERSION_HEADER = {"cdf-version": "alpha"}
|
|
56
68
|
|
|
69
|
+
|
|
57
70
|
FileMetadataOrCogniteExtractorFile = Union[FileMetadata, CogniteExtractorFileApply]
|
|
58
71
|
|
|
59
72
|
|
|
@@ -97,7 +110,10 @@ class ChunkedStream(RawIOBase, BinaryIO):
|
|
|
97
110
|
return super().__enter__()
|
|
98
111
|
|
|
99
112
|
def __exit__(
|
|
100
|
-
self,
|
|
113
|
+
self,
|
|
114
|
+
exc_type: Type[BaseException] | None,
|
|
115
|
+
exc_val: BaseException | None,
|
|
116
|
+
exc_tb: TracebackType | None,
|
|
101
117
|
) -> None:
|
|
102
118
|
return super().__exit__(exc_type, exc_val, exc_tb)
|
|
103
119
|
|
|
@@ -186,13 +202,14 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
186
202
|
def __init__(
|
|
187
203
|
self,
|
|
188
204
|
cdf_client: CogniteClient,
|
|
189
|
-
post_upload_function:
|
|
190
|
-
max_queue_size:
|
|
205
|
+
post_upload_function: Callable[[list[FileMetadataOrCogniteExtractorFile]], None] | None = None,
|
|
206
|
+
max_queue_size: int | None = None,
|
|
191
207
|
trigger_log_level: str = "DEBUG",
|
|
192
|
-
thread_name:
|
|
208
|
+
thread_name: str | None = None,
|
|
193
209
|
overwrite_existing: bool = False,
|
|
194
210
|
cancellation_token: Optional[CancellationToken] = None,
|
|
195
211
|
max_parallelism: Optional[int] = None,
|
|
212
|
+
failure_logging_path: None | str = None,
|
|
196
213
|
):
|
|
197
214
|
# Super sets post_upload and threshold
|
|
198
215
|
super().__init__(
|
|
@@ -208,6 +225,9 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
208
225
|
if self.threshold <= 0:
|
|
209
226
|
raise ValueError("Max queue size must be positive for file upload queues")
|
|
210
227
|
|
|
228
|
+
self.failure_logging_path = failure_logging_path or None
|
|
229
|
+
self.initialize_failure_logging()
|
|
230
|
+
|
|
211
231
|
self.upload_queue: List[Future] = []
|
|
212
232
|
self.errors: List[Exception] = []
|
|
213
233
|
|
|
@@ -235,10 +255,31 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
235
255
|
global _QUEUES, _QUEUES_LOCK
|
|
236
256
|
with _QUEUES_LOCK:
|
|
237
257
|
self._pool = ThreadPoolExecutor(
|
|
238
|
-
max_workers=self.parallelism,
|
|
258
|
+
max_workers=self.parallelism,
|
|
259
|
+
thread_name_prefix=f"FileUploadQueue-{_QUEUES}",
|
|
239
260
|
)
|
|
240
261
|
_QUEUES += 1
|
|
241
262
|
|
|
263
|
+
def initialize_failure_logging(self) -> None:
|
|
264
|
+
self._file_failure_manager: FileFailureManager | None = (
|
|
265
|
+
FileFailureManager(path_to_file=self.failure_logging_path)
|
|
266
|
+
if self.failure_logging_path is not None
|
|
267
|
+
else None
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
def get_failure_logger(self) -> FileFailureManager | None:
|
|
271
|
+
return self._file_failure_manager
|
|
272
|
+
|
|
273
|
+
def add_entry_failure_logger(self, file_name: str, error: Exception) -> None:
|
|
274
|
+
if self._file_failure_manager is not None:
|
|
275
|
+
error_reason = str(error)
|
|
276
|
+
self._file_failure_manager.add(file_name=file_name, error_reason=error_reason)
|
|
277
|
+
|
|
278
|
+
def flush_failure_logger(self) -> None:
|
|
279
|
+
if self._file_failure_manager is not None:
|
|
280
|
+
self.logger.info("Flushing failure logs")
|
|
281
|
+
self._file_failure_manager.write_to_file()
|
|
282
|
+
|
|
242
283
|
def _remove_done_from_queue(self) -> None:
|
|
243
284
|
while not self.cancellation_token.is_cancelled:
|
|
244
285
|
with self.lock:
|
|
@@ -251,7 +292,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
251
292
|
node = instance_result.nodes[0]
|
|
252
293
|
return node.as_id()
|
|
253
294
|
|
|
254
|
-
def
|
|
295
|
+
def _upload_only_metadata(
|
|
255
296
|
self, file_meta: FileMetadataOrCogniteExtractorFile
|
|
256
297
|
) -> tuple[FileMetadataOrCogniteExtractorFile, str]:
|
|
257
298
|
if isinstance(file_meta, CogniteExtractorFileApply):
|
|
@@ -281,11 +322,51 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
281
322
|
|
|
282
323
|
return file_meta_response, url
|
|
283
324
|
|
|
325
|
+
def _upload_empty_file(
|
|
326
|
+
self,
|
|
327
|
+
file_meta: FileMetadataOrCogniteExtractorFile,
|
|
328
|
+
) -> None:
|
|
329
|
+
file_meta_response, url = self._upload_only_metadata(file_meta)
|
|
330
|
+
|
|
331
|
+
self._upload_only_file_reference(file_meta, url)
|
|
332
|
+
|
|
284
333
|
def _upload_bytes(self, size: int, file: BinaryIO, file_meta: FileMetadataOrCogniteExtractorFile) -> None:
|
|
285
|
-
file_meta, url = self.
|
|
334
|
+
file_meta, url = self._upload_only_metadata(file_meta)
|
|
286
335
|
resp = self._httpx_client.send(self._get_file_upload_request(url, file, size, file_meta.mime_type))
|
|
287
336
|
resp.raise_for_status()
|
|
288
337
|
|
|
338
|
+
def _prepare_request_data_for_empty_file(self, url_str: str) -> Request:
|
|
339
|
+
FILE_SIZE = 0 # this path is only entered for an empty file
|
|
340
|
+
EMPTY_CONTENT = ""
|
|
341
|
+
|
|
342
|
+
url = URL(url_str)
|
|
343
|
+
base_url = URL(self.cdf_client.config.base_url)
|
|
344
|
+
|
|
345
|
+
if url.host == base_url.host:
|
|
346
|
+
upload_url = url
|
|
347
|
+
else:
|
|
348
|
+
parsed_url: ParseResult = urlparse(url_str)
|
|
349
|
+
parsed_base_url: ParseResult = urlparse(self.cdf_client.config.base_url)
|
|
350
|
+
replaced_upload_url = parsed_url._replace(netloc=parsed_base_url.netloc).geturl()
|
|
351
|
+
upload_url = URL(replaced_upload_url)
|
|
352
|
+
|
|
353
|
+
headers = Headers(self._httpx_client.headers)
|
|
354
|
+
headers.update(
|
|
355
|
+
{
|
|
356
|
+
"Accept": "*/*",
|
|
357
|
+
"Content-Length": str(FILE_SIZE),
|
|
358
|
+
"Host": upload_url.netloc.decode("ascii"),
|
|
359
|
+
"x-cdp-app": self.cdf_client._config.client_name,
|
|
360
|
+
}
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
return Request(method="PUT", url=upload_url, headers=headers, content=EMPTY_CONTENT)
|
|
364
|
+
|
|
365
|
+
def _upload_only_file_reference(self, file_meta: FileMetadataOrCogniteExtractorFile, url_str: str) -> None:
|
|
366
|
+
request_data = self._prepare_request_data_for_empty_file(url_str)
|
|
367
|
+
resp = self._httpx_client.send(request_data)
|
|
368
|
+
resp.raise_for_status()
|
|
369
|
+
|
|
289
370
|
def _upload_multipart(self, size: int, file: BinaryIO, file_meta: FileMetadataOrCogniteExtractorFile) -> None:
|
|
290
371
|
chunks = ChunkedStream(file, self.max_file_chunk_size, size)
|
|
291
372
|
self.logger.debug(
|
|
@@ -329,7 +410,10 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
329
410
|
res = self.cdf_client.files._post(
|
|
330
411
|
url_path="/files/initmultipartupload",
|
|
331
412
|
json=file_meta.dump(camel_case=True),
|
|
332
|
-
params={
|
|
413
|
+
params={
|
|
414
|
+
"overwrite": self.overwrite_existing,
|
|
415
|
+
"parts": chunks.chunk_count,
|
|
416
|
+
},
|
|
333
417
|
)
|
|
334
418
|
res.raise_for_status()
|
|
335
419
|
return res.json()
|
|
@@ -338,9 +422,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
338
422
|
self,
|
|
339
423
|
file_meta: FileMetadataOrCogniteExtractorFile,
|
|
340
424
|
read_file: Callable[[], BinaryIO],
|
|
341
|
-
extra_retries:
|
|
342
|
-
Union[Tuple[Type[Exception], ...], Dict[Type[Exception], Callable[[Any], bool]]]
|
|
343
|
-
] = None,
|
|
425
|
+
extra_retries: tuple[Type[Exception], ...] | dict[Type[Exception], Callable[[Any], bool]] | None = None,
|
|
344
426
|
) -> None:
|
|
345
427
|
"""
|
|
346
428
|
Add file to upload queue. The file will start uploading immedeately. If the size of the queue is larger than
|
|
@@ -366,12 +448,14 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
366
448
|
max_delay=RETRY_MAX_DELAY,
|
|
367
449
|
backoff=RETRY_BACKOFF_FACTOR,
|
|
368
450
|
)
|
|
369
|
-
def upload_file(
|
|
451
|
+
def upload_file(
|
|
452
|
+
read_file: Callable[[], BinaryIO],
|
|
453
|
+
file_meta: FileMetadataOrCogniteExtractorFile,
|
|
454
|
+
) -> None:
|
|
370
455
|
with read_file() as file:
|
|
371
456
|
size = super_len(file)
|
|
372
457
|
if size == 0:
|
|
373
|
-
|
|
374
|
-
file_meta, _ = self._upload_empty(file_meta)
|
|
458
|
+
self._upload_empty_file(file_meta)
|
|
375
459
|
elif size >= self.max_single_chunk_file_size:
|
|
376
460
|
# The minimum chunk size is 4000MiB.
|
|
377
461
|
self._upload_multipart(size, file, file_meta)
|
|
@@ -388,12 +472,18 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
388
472
|
except Exception as e:
|
|
389
473
|
self.logger.error("Error in upload callback: %s", str(e))
|
|
390
474
|
|
|
391
|
-
def wrapped_upload(
|
|
475
|
+
def wrapped_upload(
|
|
476
|
+
read_file: Callable[[], BinaryIO],
|
|
477
|
+
file_meta: FileMetadataOrCogniteExtractorFile,
|
|
478
|
+
) -> None:
|
|
392
479
|
try:
|
|
393
480
|
upload_file(read_file, file_meta)
|
|
394
481
|
|
|
395
482
|
except Exception as e:
|
|
396
|
-
self.logger.exception(
|
|
483
|
+
self.logger.exception(
|
|
484
|
+
f"Unexpected error while uploading file: {file_meta.external_id} {file_meta.name}"
|
|
485
|
+
)
|
|
486
|
+
self.add_entry_failure_logger(file_name=str(file_meta.name), error=e)
|
|
397
487
|
self.errors.append(e)
|
|
398
488
|
|
|
399
489
|
finally:
|
|
@@ -416,7 +506,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
416
506
|
self.queue_size.set(self.upload_queue_size)
|
|
417
507
|
|
|
418
508
|
def _get_file_upload_request(
|
|
419
|
-
self, url_str: str, stream: BinaryIO, size: int, mime_type:
|
|
509
|
+
self, url_str: str, stream: BinaryIO, size: int, mime_type: str | None = None
|
|
420
510
|
) -> Request:
|
|
421
511
|
url = URL(url_str)
|
|
422
512
|
base_url = URL(self.cdf_client.config.base_url)
|
|
@@ -460,7 +550,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
460
550
|
resp_json = res.json()["items"][0]
|
|
461
551
|
return FileMetadata.load(resp_json), resp_json["uploadUrl"]
|
|
462
552
|
|
|
463
|
-
def upload(self, fail_on_errors: bool = True, timeout:
|
|
553
|
+
def upload(self, fail_on_errors: bool = True, timeout: float | None = None) -> None:
|
|
464
554
|
"""
|
|
465
555
|
Wait for all uploads to finish
|
|
466
556
|
"""
|
|
@@ -470,6 +560,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
470
560
|
self.queue_size.set(self.upload_queue_size)
|
|
471
561
|
if fail_on_errors and self.errors:
|
|
472
562
|
# There might be more errors, but we can only have one as the cause, so pick the first
|
|
563
|
+
self.flush_failure_logger()
|
|
473
564
|
raise RuntimeError(f"{len(self.errors)} upload(s) finished with errors") from self.errors[0]
|
|
474
565
|
|
|
475
566
|
def __enter__(self) -> "IOFileUploadQueue":
|
|
@@ -485,7 +576,10 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
485
576
|
return self
|
|
486
577
|
|
|
487
578
|
def __exit__(
|
|
488
|
-
self,
|
|
579
|
+
self,
|
|
580
|
+
exc_type: Optional[Type[BaseException]],
|
|
581
|
+
exc_val: Optional[BaseException],
|
|
582
|
+
exc_tb: Optional[TracebackType],
|
|
489
583
|
) -> None:
|
|
490
584
|
"""
|
|
491
585
|
Wraps around stop method, for use as context manager
|
|
@@ -524,13 +618,13 @@ class FileUploadQueue(IOFileUploadQueue):
|
|
|
524
618
|
def __init__(
|
|
525
619
|
self,
|
|
526
620
|
cdf_client: CogniteClient,
|
|
527
|
-
post_upload_function:
|
|
528
|
-
max_queue_size:
|
|
529
|
-
max_upload_interval:
|
|
621
|
+
post_upload_function: Callable[[list[FileMetadataOrCogniteExtractorFile]], None] | None = None,
|
|
622
|
+
max_queue_size: int | None = None,
|
|
623
|
+
max_upload_interval: int | None = None,
|
|
530
624
|
trigger_log_level: str = "DEBUG",
|
|
531
|
-
thread_name:
|
|
625
|
+
thread_name: str | None = None,
|
|
532
626
|
overwrite_existing: bool = False,
|
|
533
|
-
cancellation_token:
|
|
627
|
+
cancellation_token: CancellationToken | None = None,
|
|
534
628
|
):
|
|
535
629
|
# Super sets post_upload and threshold
|
|
536
630
|
super().__init__(
|
|
@@ -544,7 +638,9 @@ class FileUploadQueue(IOFileUploadQueue):
|
|
|
544
638
|
)
|
|
545
639
|
|
|
546
640
|
def add_to_upload_queue(
|
|
547
|
-
self,
|
|
641
|
+
self,
|
|
642
|
+
file_meta: FileMetadataOrCogniteExtractorFile,
|
|
643
|
+
file_name: Union[str, PathLike],
|
|
548
644
|
) -> None:
|
|
549
645
|
"""
|
|
550
646
|
Add file to upload queue. The queue will be uploaded if the queue size is larger than the threshold
|
|
@@ -579,12 +675,12 @@ class BytesUploadQueue(IOFileUploadQueue):
|
|
|
579
675
|
def __init__(
|
|
580
676
|
self,
|
|
581
677
|
cdf_client: CogniteClient,
|
|
582
|
-
post_upload_function:
|
|
583
|
-
max_queue_size:
|
|
678
|
+
post_upload_function: Callable[[list[FileMetadataOrCogniteExtractorFile]], None] | None = None,
|
|
679
|
+
max_queue_size: int | None = None,
|
|
584
680
|
trigger_log_level: str = "DEBUG",
|
|
585
|
-
thread_name:
|
|
681
|
+
thread_name: str | None = None,
|
|
586
682
|
overwrite_existing: bool = False,
|
|
587
|
-
cancellation_token:
|
|
683
|
+
cancellation_token: CancellationToken | None = None,
|
|
588
684
|
) -> None:
|
|
589
685
|
super().__init__(
|
|
590
686
|
cdf_client,
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from types import TracebackType
|
|
16
|
-
from typing import Any, Callable,
|
|
16
|
+
from typing import Any, Callable, Type
|
|
17
17
|
|
|
18
18
|
import arrow
|
|
19
19
|
from arrow import Arrow
|
|
@@ -56,12 +56,12 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
56
56
|
def __init__(
|
|
57
57
|
self,
|
|
58
58
|
cdf_client: CogniteClient,
|
|
59
|
-
post_upload_function:
|
|
60
|
-
max_queue_size:
|
|
61
|
-
max_upload_interval:
|
|
59
|
+
post_upload_function: Callable[[list[Any]], None] | None = None,
|
|
60
|
+
max_queue_size: int | None = None,
|
|
61
|
+
max_upload_interval: int | None = None,
|
|
62
62
|
trigger_log_level: str = "DEBUG",
|
|
63
|
-
thread_name:
|
|
64
|
-
cancellation_token:
|
|
63
|
+
thread_name: str | None = None,
|
|
64
|
+
cancellation_token: CancellationToken | None = None,
|
|
65
65
|
):
|
|
66
66
|
# Super sets post_upload and thresholds
|
|
67
67
|
super().__init__(
|
|
@@ -73,7 +73,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
73
73
|
thread_name,
|
|
74
74
|
cancellation_token,
|
|
75
75
|
)
|
|
76
|
-
self.upload_queue:
|
|
76
|
+
self.upload_queue: dict[str, dict[str, list[TimestampedObject]]] = {}
|
|
77
77
|
|
|
78
78
|
# It is a hack since Prometheus client registers metrics on object creation, so object has to be created once
|
|
79
79
|
self.rows_queued = RAW_UPLOADER_ROWS_QUEUED
|
|
@@ -119,7 +119,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
119
119
|
max_delay=RETRY_MAX_DELAY,
|
|
120
120
|
backoff=RETRY_BACKOFF_FACTOR,
|
|
121
121
|
)
|
|
122
|
-
def _upload_batch(database: str, table: str, patch:
|
|
122
|
+
def _upload_batch(database: str, table: str, patch: list[Row]) -> None:
|
|
123
123
|
# Upload
|
|
124
124
|
self.cdf_client.raw.rows.insert(db_name=database, table_name=table, row=patch, ensure_parent=True)
|
|
125
125
|
|
|
@@ -133,7 +133,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
133
133
|
|
|
134
134
|
# Deduplicate
|
|
135
135
|
# In case of duplicate keys, the first key is preserved, and the last value is preserved.
|
|
136
|
-
patch:
|
|
136
|
+
patch: dict[str, Row] = {r.payload.key: r.payload for r in rows}
|
|
137
137
|
self.rows_duplicates.labels(_labels).inc(len(rows) - len(patch))
|
|
138
138
|
|
|
139
139
|
_upload_batch(database=database, table=table, patch=list(patch.values()))
|
|
@@ -162,7 +162,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
162
162
|
return self
|
|
163
163
|
|
|
164
164
|
def __exit__(
|
|
165
|
-
self, exc_type:
|
|
165
|
+
self, exc_type: Type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
|
|
166
166
|
) -> None:
|
|
167
167
|
"""
|
|
168
168
|
Wraps around stop method, for use as context manager
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
import math
|
|
16
16
|
from datetime import datetime
|
|
17
17
|
from types import TracebackType
|
|
18
|
-
from typing import Any, Callable,
|
|
18
|
+
from typing import Any, Callable, Type
|
|
19
19
|
|
|
20
20
|
from cognite.client import CogniteClient
|
|
21
21
|
from cognite.client.data_classes import (
|
|
@@ -50,13 +50,13 @@ MAX_DATAPOINT_STRING_LENGTH = 255
|
|
|
50
50
|
MAX_DATAPOINT_VALUE = 1e100
|
|
51
51
|
MIN_DATAPOINT_VALUE = -1e100
|
|
52
52
|
|
|
53
|
-
TimeStamp =
|
|
53
|
+
TimeStamp = int | datetime
|
|
54
54
|
|
|
55
|
-
DataPointWithoutStatus =
|
|
56
|
-
FullStatusCode =
|
|
57
|
-
DataPointWithStatus =
|
|
58
|
-
DataPoint =
|
|
59
|
-
DataPointList =
|
|
55
|
+
DataPointWithoutStatus = tuple[TimeStamp, float] | tuple[TimeStamp, str] | tuple[TimeStamp, int]
|
|
56
|
+
FullStatusCode = StatusCode | int
|
|
57
|
+
DataPointWithStatus = tuple[TimeStamp, float, FullStatusCode] | tuple[TimeStamp, str, FullStatusCode]
|
|
58
|
+
DataPoint = DataPointWithoutStatus | DataPointWithStatus
|
|
59
|
+
DataPointList = list[DataPoint]
|
|
60
60
|
|
|
61
61
|
|
|
62
62
|
def default_time_series_factory(external_id: str, datapoints: DataPointList) -> TimeSeries:
|
|
@@ -103,14 +103,14 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
103
103
|
def __init__(
|
|
104
104
|
self,
|
|
105
105
|
cdf_client: CogniteClient,
|
|
106
|
-
post_upload_function:
|
|
107
|
-
max_queue_size:
|
|
108
|
-
max_upload_interval:
|
|
106
|
+
post_upload_function: Callable[[list[dict[str, str | DataPointList]]], None] | None = None,
|
|
107
|
+
max_queue_size: int | None = None,
|
|
108
|
+
max_upload_interval: int | None = None,
|
|
109
109
|
trigger_log_level: str = "DEBUG",
|
|
110
|
-
thread_name:
|
|
111
|
-
create_missing:
|
|
112
|
-
data_set_id:
|
|
113
|
-
cancellation_token:
|
|
110
|
+
thread_name: str | None = None,
|
|
111
|
+
create_missing: Callable[[str, DataPointList], TimeSeries] | bool = False,
|
|
112
|
+
data_set_id: int | None = None,
|
|
113
|
+
cancellation_token: CancellationToken | None = None,
|
|
114
114
|
):
|
|
115
115
|
# Super sets post_upload and threshold
|
|
116
116
|
super().__init__(
|
|
@@ -132,14 +132,14 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
132
132
|
self.create_missing = True
|
|
133
133
|
self.missing_factory = create_missing
|
|
134
134
|
|
|
135
|
-
self.upload_queue:
|
|
135
|
+
self.upload_queue: dict[EitherId, DataPointList] = {}
|
|
136
136
|
|
|
137
137
|
self.points_queued = TIMESERIES_UPLOADER_POINTS_QUEUED
|
|
138
138
|
self.points_written = TIMESERIES_UPLOADER_POINTS_WRITTEN
|
|
139
139
|
self.queue_size = TIMESERIES_UPLOADER_QUEUE_SIZE
|
|
140
140
|
self.data_set_id = data_set_id
|
|
141
141
|
|
|
142
|
-
def _verify_datapoint_time(self, time:
|
|
142
|
+
def _verify_datapoint_time(self, time: int | float | datetime | str) -> bool:
|
|
143
143
|
if isinstance(time, int) or isinstance(time, float):
|
|
144
144
|
return not math.isnan(time) and time >= MIN_DATAPOINT_TIMESTAMP
|
|
145
145
|
elif isinstance(time, str):
|
|
@@ -147,7 +147,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
147
147
|
else:
|
|
148
148
|
return time.timestamp() * 1000.0 >= MIN_DATAPOINT_TIMESTAMP
|
|
149
149
|
|
|
150
|
-
def _verify_datapoint_value(self, value:
|
|
150
|
+
def _verify_datapoint_value(self, value: int | float | datetime | str) -> bool:
|
|
151
151
|
if isinstance(value, float):
|
|
152
152
|
return not (
|
|
153
153
|
math.isnan(value) or math.isinf(value) or value > MAX_DATAPOINT_VALUE or value < MIN_DATAPOINT_VALUE
|
|
@@ -171,7 +171,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
171
171
|
return True
|
|
172
172
|
|
|
173
173
|
def add_to_upload_queue(
|
|
174
|
-
self, *, id:
|
|
174
|
+
self, *, id: int | None = None, external_id: str | None = None, datapoints: DataPointList | None = None
|
|
175
175
|
) -> None:
|
|
176
176
|
"""
|
|
177
177
|
Add data points to upload queue. The queue will be uploaded if the queue size is larger than the threshold
|
|
@@ -180,7 +180,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
180
180
|
Args:
|
|
181
181
|
id: Internal ID of time series. Either this or external_id must be set.
|
|
182
182
|
external_id: External ID of time series. Either this or external_id must be set.
|
|
183
|
-
datapoints:
|
|
183
|
+
datapoints: list of data points to add
|
|
184
184
|
"""
|
|
185
185
|
datapoints = datapoints or []
|
|
186
186
|
old_len = len(datapoints)
|
|
@@ -219,7 +219,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
219
219
|
max_delay=RETRY_MAX_DELAY,
|
|
220
220
|
backoff=RETRY_BACKOFF_FACTOR,
|
|
221
221
|
)
|
|
222
|
-
def _upload_batch(upload_this:
|
|
222
|
+
def _upload_batch(upload_this: list[dict], retries: int = 5) -> list[dict]:
|
|
223
223
|
if len(upload_this) == 0:
|
|
224
224
|
return upload_this
|
|
225
225
|
|
|
@@ -241,14 +241,14 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
241
241
|
create_these_ids = set(
|
|
242
242
|
[id_dict["externalId"] for id_dict in ex.not_found if "externalId" in id_dict]
|
|
243
243
|
)
|
|
244
|
-
datapoints_lists:
|
|
244
|
+
datapoints_lists: dict[str, DataPointList] = {
|
|
245
245
|
ts_dict["externalId"]: ts_dict["datapoints"]
|
|
246
246
|
for ts_dict in upload_this
|
|
247
247
|
if ts_dict["externalId"] in create_these_ids
|
|
248
248
|
}
|
|
249
249
|
|
|
250
250
|
self.logger.info(f"Creating {len(create_these_ids)} time series")
|
|
251
|
-
to_create:
|
|
251
|
+
to_create: list[TimeSeries] = [
|
|
252
252
|
self.missing_factory(external_id, datapoints_lists[external_id])
|
|
253
253
|
for external_id in create_these_ids
|
|
254
254
|
]
|
|
@@ -317,7 +317,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
317
317
|
return self
|
|
318
318
|
|
|
319
319
|
def __exit__(
|
|
320
|
-
self, exc_type:
|
|
320
|
+
self, exc_type: Type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
|
|
321
321
|
) -> None:
|
|
322
322
|
"""
|
|
323
323
|
Wraps around stop method, for use as context manager
|
|
@@ -343,13 +343,13 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
343
343
|
def __init__(
|
|
344
344
|
self,
|
|
345
345
|
cdf_client: CogniteClient,
|
|
346
|
-
post_upload_function:
|
|
347
|
-
max_queue_size:
|
|
348
|
-
max_upload_interval:
|
|
346
|
+
post_upload_function: Callable[[list[Any]], None] | None = None,
|
|
347
|
+
max_queue_size: int | None = None,
|
|
348
|
+
max_upload_interval: int | None = None,
|
|
349
349
|
trigger_log_level: str = "DEBUG",
|
|
350
|
-
thread_name:
|
|
350
|
+
thread_name: str | None = None,
|
|
351
351
|
create_missing: bool = False,
|
|
352
|
-
cancellation_token:
|
|
352
|
+
cancellation_token: CancellationToken | None = None,
|
|
353
353
|
):
|
|
354
354
|
"""
|
|
355
355
|
Args:
|
|
@@ -374,15 +374,15 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
374
374
|
thread_name,
|
|
375
375
|
cancellation_token,
|
|
376
376
|
)
|
|
377
|
-
self.upload_queue:
|
|
378
|
-
self.sequence_metadata:
|
|
379
|
-
self.sequence_asset_external_ids:
|
|
380
|
-
self.sequence_dataset_external_ids:
|
|
381
|
-
self.sequence_names:
|
|
382
|
-
self.sequence_descriptions:
|
|
383
|
-
self.column_definitions:
|
|
384
|
-
self.asset_ids:
|
|
385
|
-
self.dataset_ids:
|
|
377
|
+
self.upload_queue: dict[EitherId, SequenceRows] = {}
|
|
378
|
+
self.sequence_metadata: dict[EitherId, dict[str, str | int | float]] = {}
|
|
379
|
+
self.sequence_asset_external_ids: dict[EitherId, str] = {}
|
|
380
|
+
self.sequence_dataset_external_ids: dict[EitherId, str] = {}
|
|
381
|
+
self.sequence_names: dict[EitherId, str] = {}
|
|
382
|
+
self.sequence_descriptions: dict[EitherId, str] = {}
|
|
383
|
+
self.column_definitions: dict[EitherId, list[dict[str, str]]] = {}
|
|
384
|
+
self.asset_ids: dict[str, int] = {}
|
|
385
|
+
self.dataset_ids: dict[str, int] = {}
|
|
386
386
|
self.create_missing = create_missing
|
|
387
387
|
|
|
388
388
|
self.points_queued = SEQUENCES_UPLOADER_POINTS_QUEUED
|
|
@@ -391,13 +391,13 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
391
391
|
|
|
392
392
|
def set_sequence_metadata(
|
|
393
393
|
self,
|
|
394
|
-
metadata:
|
|
395
|
-
id:
|
|
396
|
-
external_id:
|
|
397
|
-
asset_external_id:
|
|
398
|
-
dataset_external_id:
|
|
399
|
-
name:
|
|
400
|
-
description:
|
|
394
|
+
metadata: dict[str, str | int | float],
|
|
395
|
+
id: int | None = None,
|
|
396
|
+
external_id: str | None = None,
|
|
397
|
+
asset_external_id: str | None = None,
|
|
398
|
+
dataset_external_id: str | None = None,
|
|
399
|
+
name: str | None = None,
|
|
400
|
+
description: str | None = None,
|
|
401
401
|
) -> None:
|
|
402
402
|
"""
|
|
403
403
|
Set sequence metadata. Metadata will be cached until the sequence is created. The metadata will be updated
|
|
@@ -426,7 +426,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
426
426
|
self.sequence_descriptions[either_id] = description
|
|
427
427
|
|
|
428
428
|
def set_sequence_column_definition(
|
|
429
|
-
self, col_def:
|
|
429
|
+
self, col_def: list[dict[str, str]], id: int | None = None, external_id: str | None = None
|
|
430
430
|
) -> None:
|
|
431
431
|
"""
|
|
432
432
|
Set sequence column definition
|
|
@@ -443,16 +443,14 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
443
443
|
|
|
444
444
|
def add_to_upload_queue(
|
|
445
445
|
self,
|
|
446
|
-
rows:
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
id: Optional[int] = None,
|
|
455
|
-
external_id: Optional[str] = None,
|
|
446
|
+
rows: dict[int, list[int | float | str]]
|
|
447
|
+
| list[tuple[int, int | float | str]]
|
|
448
|
+
| list[dict[str, Any]]
|
|
449
|
+
| SequenceData
|
|
450
|
+
| SequenceRows,
|
|
451
|
+
column_external_ids: list[dict] | None = None,
|
|
452
|
+
id: int | None = None,
|
|
453
|
+
external_id: str | None = None,
|
|
456
454
|
) -> None:
|
|
457
455
|
"""
|
|
458
456
|
Add sequence rows to upload queue. Mirrors implementation of SequenceApi.insert. Inserted rows will be
|
|
@@ -461,7 +459,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
461
459
|
Args:
|
|
462
460
|
rows: The rows to be inserted. Can either be a list of tuples, a list of ["rownumber": ..., "values": ...]
|
|
463
461
|
objects, a dictionary of rowNumber: data, or a SequenceData object.
|
|
464
|
-
column_external_ids:
|
|
462
|
+
column_external_ids: list of external id for the columns of the sequence
|
|
465
463
|
id: Sequence internal ID
|
|
466
464
|
Use if external_id is None
|
|
467
465
|
external_id: Sequence external ID
|
|
@@ -477,7 +475,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
477
475
|
# Already in the desired format
|
|
478
476
|
pass
|
|
479
477
|
elif isinstance(rows, (dict, list)):
|
|
480
|
-
rows_raw:
|
|
478
|
+
rows_raw: list[dict[str, Any]]
|
|
481
479
|
if isinstance(rows, dict):
|
|
482
480
|
rows_raw = [{"rowNumber": row_number, "values": values} for row_number, values in rows.items()]
|
|
483
481
|
elif isinstance(rows, list) and rows and isinstance(rows[0], (tuple, list)):
|
|
@@ -658,7 +656,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
658
656
|
return self
|
|
659
657
|
|
|
660
658
|
def __exit__(
|
|
661
|
-
self, exc_type:
|
|
659
|
+
self, exc_type: Type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
|
|
662
660
|
) -> None:
|
|
663
661
|
"""
|
|
664
662
|
Wraps around stop method, for use as context manager
|