cognite-extractor-utils 7.5.4__py3-none-any.whl → 7.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- cognite/extractorutils/__init__.py +3 -1
- cognite/extractorutils/_inner_util.py +14 -3
- cognite/extractorutils/base.py +14 -15
- cognite/extractorutils/configtools/__init__.py +25 -0
- cognite/extractorutils/configtools/_util.py +7 -9
- cognite/extractorutils/configtools/elements.py +58 -49
- cognite/extractorutils/configtools/loaders.py +29 -26
- cognite/extractorutils/configtools/validators.py +2 -3
- cognite/extractorutils/exceptions.py +1 -4
- cognite/extractorutils/metrics.py +18 -18
- cognite/extractorutils/statestore/_base.py +3 -4
- cognite/extractorutils/statestore/hashing.py +24 -24
- cognite/extractorutils/statestore/watermark.py +17 -14
- cognite/extractorutils/threading.py +4 -4
- cognite/extractorutils/unstable/configuration/exceptions.py +24 -0
- cognite/extractorutils/unstable/configuration/loaders.py +18 -7
- cognite/extractorutils/unstable/configuration/models.py +25 -3
- cognite/extractorutils/unstable/core/_dto.py +10 -0
- cognite/extractorutils/unstable/core/base.py +179 -29
- cognite/extractorutils/unstable/core/errors.py +72 -0
- cognite/extractorutils/unstable/core/restart_policy.py +29 -0
- cognite/extractorutils/unstable/core/runtime.py +170 -26
- cognite/extractorutils/unstable/core/tasks.py +2 -0
- cognite/extractorutils/unstable/scheduling/_scheduler.py +4 -4
- cognite/extractorutils/uploader/__init__.py +14 -0
- cognite/extractorutils/uploader/_base.py +8 -8
- cognite/extractorutils/uploader/assets.py +15 -9
- cognite/extractorutils/uploader/data_modeling.py +13 -13
- cognite/extractorutils/uploader/events.py +9 -9
- cognite/extractorutils/uploader/files.py +153 -46
- cognite/extractorutils/uploader/raw.py +10 -10
- cognite/extractorutils/uploader/time_series.py +56 -58
- cognite/extractorutils/uploader/upload_failure_handler.py +64 -0
- cognite/extractorutils/uploader_extractor.py +11 -11
- cognite/extractorutils/uploader_types.py +4 -12
- cognite/extractorutils/util.py +21 -23
- {cognite_extractor_utils-7.5.4.dist-info → cognite_extractor_utils-7.5.6.dist-info}/METADATA +4 -3
- cognite_extractor_utils-7.5.6.dist-info/RECORD +49 -0
- {cognite_extractor_utils-7.5.4.dist-info → cognite_extractor_utils-7.5.6.dist-info}/WHEEL +1 -1
- cognite/extractorutils/unstable/core/__main__.py +0 -31
- cognite_extractor_utils-7.5.4.dist-info/RECORD +0 -46
- {cognite_extractor_utils-7.5.4.dist-info → cognite_extractor_utils-7.5.6.dist-info}/LICENSE +0 -0
|
@@ -18,7 +18,16 @@ from io import BytesIO, RawIOBase
|
|
|
18
18
|
from math import ceil
|
|
19
19
|
from os import PathLike
|
|
20
20
|
from types import TracebackType
|
|
21
|
-
from typing import
|
|
21
|
+
from typing import (
|
|
22
|
+
Any,
|
|
23
|
+
BinaryIO,
|
|
24
|
+
Callable,
|
|
25
|
+
Iterator,
|
|
26
|
+
List,
|
|
27
|
+
Optional,
|
|
28
|
+
Type,
|
|
29
|
+
Union,
|
|
30
|
+
)
|
|
22
31
|
from urllib.parse import ParseResult, urlparse
|
|
23
32
|
|
|
24
33
|
from httpx import URL, Client, Headers, Request, StreamConsumed, SyncByteStream
|
|
@@ -27,7 +36,9 @@ from requests.utils import super_len
|
|
|
27
36
|
from cognite.client import CogniteClient
|
|
28
37
|
from cognite.client.data_classes import FileMetadata, FileMetadataUpdate
|
|
29
38
|
from cognite.client.data_classes.data_modeling import NodeId
|
|
30
|
-
from cognite.client.data_classes.data_modeling.extractor_extensions.v1 import
|
|
39
|
+
from cognite.client.data_classes.data_modeling.extractor_extensions.v1 import (
|
|
40
|
+
CogniteExtractorFileApply,
|
|
41
|
+
)
|
|
31
42
|
from cognite.client.utils._identifier import IdentifierSequence
|
|
32
43
|
from cognite.extractorutils.threading import CancellationToken
|
|
33
44
|
from cognite.extractorutils.uploader._base import (
|
|
@@ -42,6 +53,7 @@ from cognite.extractorutils.uploader._metrics import (
|
|
|
42
53
|
FILES_UPLOADER_QUEUED,
|
|
43
54
|
FILES_UPLOADER_WRITTEN,
|
|
44
55
|
)
|
|
56
|
+
from cognite.extractorutils.uploader.upload_failure_handler import FileFailureManager
|
|
45
57
|
from cognite.extractorutils.util import cognite_exceptions, retry
|
|
46
58
|
|
|
47
59
|
_QUEUES: int = 0
|
|
@@ -54,6 +66,7 @@ _MAX_FILE_CHUNK_SIZE = 4 * 1024 * 1024 * 1000
|
|
|
54
66
|
|
|
55
67
|
_CDF_ALPHA_VERSION_HEADER = {"cdf-version": "alpha"}
|
|
56
68
|
|
|
69
|
+
|
|
57
70
|
FileMetadataOrCogniteExtractorFile = Union[FileMetadata, CogniteExtractorFileApply]
|
|
58
71
|
|
|
59
72
|
|
|
@@ -97,7 +110,10 @@ class ChunkedStream(RawIOBase, BinaryIO):
|
|
|
97
110
|
return super().__enter__()
|
|
98
111
|
|
|
99
112
|
def __exit__(
|
|
100
|
-
self,
|
|
113
|
+
self,
|
|
114
|
+
exc_type: Type[BaseException] | None,
|
|
115
|
+
exc_val: BaseException | None,
|
|
116
|
+
exc_tb: TracebackType | None,
|
|
101
117
|
) -> None:
|
|
102
118
|
return super().__exit__(exc_type, exc_val, exc_tb)
|
|
103
119
|
|
|
@@ -181,18 +197,22 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
181
197
|
thread_name: Thread name of uploader thread.
|
|
182
198
|
max_parallelism: Maximum number of parallel uploads. If nothing is given, the parallelism will be capped by the
|
|
183
199
|
max_workers of the cognite client.
|
|
200
|
+
ssl_verify: Either a string (path to a CA bundle) or a bool (false to turn off completely, true to use standard
|
|
201
|
+
CA bundle)
|
|
184
202
|
"""
|
|
185
203
|
|
|
186
204
|
def __init__(
|
|
187
205
|
self,
|
|
188
206
|
cdf_client: CogniteClient,
|
|
189
|
-
post_upload_function:
|
|
190
|
-
max_queue_size:
|
|
207
|
+
post_upload_function: Callable[[list[FileMetadataOrCogniteExtractorFile]], None] | None = None,
|
|
208
|
+
max_queue_size: int | None = None,
|
|
191
209
|
trigger_log_level: str = "DEBUG",
|
|
192
|
-
thread_name:
|
|
210
|
+
thread_name: str | None = None,
|
|
193
211
|
overwrite_existing: bool = False,
|
|
194
212
|
cancellation_token: Optional[CancellationToken] = None,
|
|
195
213
|
max_parallelism: Optional[int] = None,
|
|
214
|
+
failure_logging_path: None | str = None,
|
|
215
|
+
ssl_verify: bool | str = True,
|
|
196
216
|
):
|
|
197
217
|
# Super sets post_upload and threshold
|
|
198
218
|
super().__init__(
|
|
@@ -208,6 +228,9 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
208
228
|
if self.threshold <= 0:
|
|
209
229
|
raise ValueError("Max queue size must be positive for file upload queues")
|
|
210
230
|
|
|
231
|
+
self.failure_logging_path = failure_logging_path or None
|
|
232
|
+
self.initialize_failure_logging()
|
|
233
|
+
|
|
211
234
|
self.upload_queue: List[Future] = []
|
|
212
235
|
self.errors: List[Exception] = []
|
|
213
236
|
|
|
@@ -230,15 +253,40 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
230
253
|
|
|
231
254
|
self._full_queue = threading.Condition()
|
|
232
255
|
|
|
233
|
-
self._httpx_client = Client(
|
|
256
|
+
self._httpx_client = Client(
|
|
257
|
+
follow_redirects=True,
|
|
258
|
+
timeout=cdf_client.config.file_transfer_timeout,
|
|
259
|
+
verify=ssl_verify,
|
|
260
|
+
)
|
|
234
261
|
|
|
235
262
|
global _QUEUES, _QUEUES_LOCK
|
|
236
263
|
with _QUEUES_LOCK:
|
|
237
264
|
self._pool = ThreadPoolExecutor(
|
|
238
|
-
max_workers=self.parallelism,
|
|
265
|
+
max_workers=self.parallelism,
|
|
266
|
+
thread_name_prefix=f"FileUploadQueue-{_QUEUES}",
|
|
239
267
|
)
|
|
240
268
|
_QUEUES += 1
|
|
241
269
|
|
|
270
|
+
def initialize_failure_logging(self) -> None:
|
|
271
|
+
self._file_failure_manager: FileFailureManager | None = (
|
|
272
|
+
FileFailureManager(path_to_file=self.failure_logging_path)
|
|
273
|
+
if self.failure_logging_path is not None
|
|
274
|
+
else None
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
def get_failure_logger(self) -> FileFailureManager | None:
|
|
278
|
+
return self._file_failure_manager
|
|
279
|
+
|
|
280
|
+
def add_entry_failure_logger(self, file_name: str, error: Exception) -> None:
|
|
281
|
+
if self._file_failure_manager is not None:
|
|
282
|
+
error_reason = str(error)
|
|
283
|
+
self._file_failure_manager.add(file_name=file_name, error_reason=error_reason)
|
|
284
|
+
|
|
285
|
+
def flush_failure_logger(self) -> None:
|
|
286
|
+
if self._file_failure_manager is not None:
|
|
287
|
+
self.logger.info("Flushing failure logs")
|
|
288
|
+
self._file_failure_manager.write_to_file()
|
|
289
|
+
|
|
242
290
|
def _remove_done_from_queue(self) -> None:
|
|
243
291
|
while not self.cancellation_token.is_cancelled:
|
|
244
292
|
with self.lock:
|
|
@@ -251,7 +299,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
251
299
|
node = instance_result.nodes[0]
|
|
252
300
|
return node.as_id()
|
|
253
301
|
|
|
254
|
-
def
|
|
302
|
+
def _upload_only_metadata(
|
|
255
303
|
self, file_meta: FileMetadataOrCogniteExtractorFile
|
|
256
304
|
) -> tuple[FileMetadataOrCogniteExtractorFile, str]:
|
|
257
305
|
if isinstance(file_meta, CogniteExtractorFileApply):
|
|
@@ -281,11 +329,51 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
281
329
|
|
|
282
330
|
return file_meta_response, url
|
|
283
331
|
|
|
332
|
+
def _upload_empty_file(
|
|
333
|
+
self,
|
|
334
|
+
file_meta: FileMetadataOrCogniteExtractorFile,
|
|
335
|
+
) -> None:
|
|
336
|
+
file_meta_response, url = self._upload_only_metadata(file_meta)
|
|
337
|
+
|
|
338
|
+
self._upload_only_file_reference(file_meta, url)
|
|
339
|
+
|
|
284
340
|
def _upload_bytes(self, size: int, file: BinaryIO, file_meta: FileMetadataOrCogniteExtractorFile) -> None:
|
|
285
|
-
file_meta, url = self.
|
|
341
|
+
file_meta, url = self._upload_only_metadata(file_meta)
|
|
286
342
|
resp = self._httpx_client.send(self._get_file_upload_request(url, file, size, file_meta.mime_type))
|
|
287
343
|
resp.raise_for_status()
|
|
288
344
|
|
|
345
|
+
def _prepare_request_data_for_empty_file(self, url_str: str) -> Request:
|
|
346
|
+
FILE_SIZE = 0 # this path is only entered for an empty file
|
|
347
|
+
EMPTY_CONTENT = ""
|
|
348
|
+
|
|
349
|
+
url = URL(url_str)
|
|
350
|
+
base_url = URL(self.cdf_client.config.base_url)
|
|
351
|
+
|
|
352
|
+
if url.host == base_url.host:
|
|
353
|
+
upload_url = url
|
|
354
|
+
else:
|
|
355
|
+
parsed_url: ParseResult = urlparse(url_str)
|
|
356
|
+
parsed_base_url: ParseResult = urlparse(self.cdf_client.config.base_url)
|
|
357
|
+
replaced_upload_url = parsed_url._replace(netloc=parsed_base_url.netloc).geturl()
|
|
358
|
+
upload_url = URL(replaced_upload_url)
|
|
359
|
+
|
|
360
|
+
headers = Headers(self._httpx_client.headers)
|
|
361
|
+
headers.update(
|
|
362
|
+
{
|
|
363
|
+
"Accept": "*/*",
|
|
364
|
+
"Content-Length": str(FILE_SIZE),
|
|
365
|
+
"Host": upload_url.netloc.decode("ascii"),
|
|
366
|
+
"x-cdp-app": self.cdf_client._config.client_name,
|
|
367
|
+
}
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
return Request(method="PUT", url=upload_url, headers=headers, content=EMPTY_CONTENT)
|
|
371
|
+
|
|
372
|
+
def _upload_only_file_reference(self, file_meta: FileMetadataOrCogniteExtractorFile, url_str: str) -> None:
|
|
373
|
+
request_data = self._prepare_request_data_for_empty_file(url_str)
|
|
374
|
+
resp = self._httpx_client.send(request_data)
|
|
375
|
+
resp.raise_for_status()
|
|
376
|
+
|
|
289
377
|
def _upload_multipart(self, size: int, file: BinaryIO, file_meta: FileMetadataOrCogniteExtractorFile) -> None:
|
|
290
378
|
chunks = ChunkedStream(file, self.max_file_chunk_size, size)
|
|
291
379
|
self.logger.debug(
|
|
@@ -329,7 +417,10 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
329
417
|
res = self.cdf_client.files._post(
|
|
330
418
|
url_path="/files/initmultipartupload",
|
|
331
419
|
json=file_meta.dump(camel_case=True),
|
|
332
|
-
params={
|
|
420
|
+
params={
|
|
421
|
+
"overwrite": self.overwrite_existing,
|
|
422
|
+
"parts": chunks.chunk_count,
|
|
423
|
+
},
|
|
333
424
|
)
|
|
334
425
|
res.raise_for_status()
|
|
335
426
|
return res.json()
|
|
@@ -338,9 +429,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
338
429
|
self,
|
|
339
430
|
file_meta: FileMetadataOrCogniteExtractorFile,
|
|
340
431
|
read_file: Callable[[], BinaryIO],
|
|
341
|
-
extra_retries:
|
|
342
|
-
Union[Tuple[Type[Exception], ...], Dict[Type[Exception], Callable[[Any], bool]]]
|
|
343
|
-
] = None,
|
|
432
|
+
extra_retries: tuple[Type[Exception], ...] | dict[Type[Exception], Callable[[Any], bool]] | None = None,
|
|
344
433
|
) -> None:
|
|
345
434
|
"""
|
|
346
435
|
Add file to upload queue. The file will start uploading immedeately. If the size of the queue is larger than
|
|
@@ -366,12 +455,14 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
366
455
|
max_delay=RETRY_MAX_DELAY,
|
|
367
456
|
backoff=RETRY_BACKOFF_FACTOR,
|
|
368
457
|
)
|
|
369
|
-
def upload_file(
|
|
458
|
+
def upload_file(
|
|
459
|
+
read_file: Callable[[], BinaryIO],
|
|
460
|
+
file_meta: FileMetadataOrCogniteExtractorFile,
|
|
461
|
+
) -> None:
|
|
370
462
|
with read_file() as file:
|
|
371
463
|
size = super_len(file)
|
|
372
464
|
if size == 0:
|
|
373
|
-
|
|
374
|
-
file_meta, _ = self._upload_empty(file_meta)
|
|
465
|
+
self._upload_empty_file(file_meta)
|
|
375
466
|
elif size >= self.max_single_chunk_file_size:
|
|
376
467
|
# The minimum chunk size is 4000MiB.
|
|
377
468
|
self._upload_multipart(size, file, file_meta)
|
|
@@ -388,12 +479,18 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
388
479
|
except Exception as e:
|
|
389
480
|
self.logger.error("Error in upload callback: %s", str(e))
|
|
390
481
|
|
|
391
|
-
def wrapped_upload(
|
|
482
|
+
def wrapped_upload(
|
|
483
|
+
read_file: Callable[[], BinaryIO],
|
|
484
|
+
file_meta: FileMetadataOrCogniteExtractorFile,
|
|
485
|
+
) -> None:
|
|
392
486
|
try:
|
|
393
487
|
upload_file(read_file, file_meta)
|
|
394
488
|
|
|
395
489
|
except Exception as e:
|
|
396
|
-
self.logger.exception(
|
|
490
|
+
self.logger.exception(
|
|
491
|
+
f"Unexpected error while uploading file: {file_meta.external_id} {file_meta.name}"
|
|
492
|
+
)
|
|
493
|
+
self.add_entry_failure_logger(file_name=str(file_meta.name), error=e)
|
|
397
494
|
self.errors.append(e)
|
|
398
495
|
|
|
399
496
|
finally:
|
|
@@ -416,7 +513,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
416
513
|
self.queue_size.set(self.upload_queue_size)
|
|
417
514
|
|
|
418
515
|
def _get_file_upload_request(
|
|
419
|
-
self, url_str: str, stream: BinaryIO, size: int, mime_type:
|
|
516
|
+
self, url_str: str, stream: BinaryIO, size: int, mime_type: str | None = None
|
|
420
517
|
) -> Request:
|
|
421
518
|
url = URL(url_str)
|
|
422
519
|
base_url = URL(self.cdf_client.config.base_url)
|
|
@@ -460,7 +557,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
460
557
|
resp_json = res.json()["items"][0]
|
|
461
558
|
return FileMetadata.load(resp_json), resp_json["uploadUrl"]
|
|
462
559
|
|
|
463
|
-
def upload(self, fail_on_errors: bool = True, timeout:
|
|
560
|
+
def upload(self, fail_on_errors: bool = True, timeout: float | None = None) -> None:
|
|
464
561
|
"""
|
|
465
562
|
Wait for all uploads to finish
|
|
466
563
|
"""
|
|
@@ -470,6 +567,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
470
567
|
self.queue_size.set(self.upload_queue_size)
|
|
471
568
|
if fail_on_errors and self.errors:
|
|
472
569
|
# There might be more errors, but we can only have one as the cause, so pick the first
|
|
570
|
+
self.flush_failure_logger()
|
|
473
571
|
raise RuntimeError(f"{len(self.errors)} upload(s) finished with errors") from self.errors[0]
|
|
474
572
|
|
|
475
573
|
def __enter__(self) -> "IOFileUploadQueue":
|
|
@@ -485,7 +583,10 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
485
583
|
return self
|
|
486
584
|
|
|
487
585
|
def __exit__(
|
|
488
|
-
self,
|
|
586
|
+
self,
|
|
587
|
+
exc_type: Optional[Type[BaseException]],
|
|
588
|
+
exc_val: Optional[BaseException],
|
|
589
|
+
exc_tb: Optional[TracebackType],
|
|
489
590
|
) -> None:
|
|
490
591
|
"""
|
|
491
592
|
Wraps around stop method, for use as context manager
|
|
@@ -524,27 +625,31 @@ class FileUploadQueue(IOFileUploadQueue):
|
|
|
524
625
|
def __init__(
|
|
525
626
|
self,
|
|
526
627
|
cdf_client: CogniteClient,
|
|
527
|
-
post_upload_function:
|
|
528
|
-
max_queue_size:
|
|
529
|
-
max_upload_interval:
|
|
628
|
+
post_upload_function: Callable[[list[FileMetadataOrCogniteExtractorFile]], None] | None = None,
|
|
629
|
+
max_queue_size: int | None = None,
|
|
630
|
+
max_upload_interval: int | None = None,
|
|
530
631
|
trigger_log_level: str = "DEBUG",
|
|
531
|
-
thread_name:
|
|
632
|
+
thread_name: str | None = None,
|
|
532
633
|
overwrite_existing: bool = False,
|
|
533
|
-
cancellation_token:
|
|
634
|
+
cancellation_token: CancellationToken | None = None,
|
|
635
|
+
ssl_verify: bool | str = True,
|
|
534
636
|
):
|
|
535
637
|
# Super sets post_upload and threshold
|
|
536
638
|
super().__init__(
|
|
537
|
-
cdf_client,
|
|
538
|
-
post_upload_function,
|
|
539
|
-
max_queue_size,
|
|
540
|
-
trigger_log_level,
|
|
541
|
-
thread_name,
|
|
542
|
-
overwrite_existing,
|
|
543
|
-
cancellation_token,
|
|
639
|
+
cdf_client=cdf_client,
|
|
640
|
+
post_upload_function=post_upload_function,
|
|
641
|
+
max_queue_size=max_queue_size,
|
|
642
|
+
trigger_log_level=trigger_log_level,
|
|
643
|
+
thread_name=thread_name,
|
|
644
|
+
overwrite_existing=overwrite_existing,
|
|
645
|
+
cancellation_token=cancellation_token,
|
|
646
|
+
ssl_verify=ssl_verify,
|
|
544
647
|
)
|
|
545
648
|
|
|
546
649
|
def add_to_upload_queue(
|
|
547
|
-
self,
|
|
650
|
+
self,
|
|
651
|
+
file_meta: FileMetadataOrCogniteExtractorFile,
|
|
652
|
+
file_name: Union[str, PathLike],
|
|
548
653
|
) -> None:
|
|
549
654
|
"""
|
|
550
655
|
Add file to upload queue. The queue will be uploaded if the queue size is larger than the threshold
|
|
@@ -579,21 +684,23 @@ class BytesUploadQueue(IOFileUploadQueue):
|
|
|
579
684
|
def __init__(
|
|
580
685
|
self,
|
|
581
686
|
cdf_client: CogniteClient,
|
|
582
|
-
post_upload_function:
|
|
583
|
-
max_queue_size:
|
|
687
|
+
post_upload_function: Callable[[list[FileMetadataOrCogniteExtractorFile]], None] | None = None,
|
|
688
|
+
max_queue_size: int | None = None,
|
|
584
689
|
trigger_log_level: str = "DEBUG",
|
|
585
|
-
thread_name:
|
|
690
|
+
thread_name: str | None = None,
|
|
586
691
|
overwrite_existing: bool = False,
|
|
587
|
-
cancellation_token:
|
|
692
|
+
cancellation_token: CancellationToken | None = None,
|
|
693
|
+
ssl_verify: bool | str = True,
|
|
588
694
|
) -> None:
|
|
589
695
|
super().__init__(
|
|
590
|
-
cdf_client,
|
|
591
|
-
post_upload_function,
|
|
592
|
-
max_queue_size,
|
|
593
|
-
trigger_log_level,
|
|
594
|
-
thread_name,
|
|
595
|
-
overwrite_existing,
|
|
596
|
-
cancellation_token,
|
|
696
|
+
cdf_client=cdf_client,
|
|
697
|
+
post_upload_function=post_upload_function,
|
|
698
|
+
max_queue_size=max_queue_size,
|
|
699
|
+
trigger_log_level=trigger_log_level,
|
|
700
|
+
thread_name=thread_name,
|
|
701
|
+
overwrite_existing=overwrite_existing,
|
|
702
|
+
cancellation_token=cancellation_token,
|
|
703
|
+
ssl_verify=ssl_verify,
|
|
597
704
|
)
|
|
598
705
|
|
|
599
706
|
def add_to_upload_queue(self, content: bytes, file_meta: FileMetadataOrCogniteExtractorFile) -> None:
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from types import TracebackType
|
|
16
|
-
from typing import Any, Callable,
|
|
16
|
+
from typing import Any, Callable, Type
|
|
17
17
|
|
|
18
18
|
import arrow
|
|
19
19
|
from arrow import Arrow
|
|
@@ -56,12 +56,12 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
56
56
|
def __init__(
|
|
57
57
|
self,
|
|
58
58
|
cdf_client: CogniteClient,
|
|
59
|
-
post_upload_function:
|
|
60
|
-
max_queue_size:
|
|
61
|
-
max_upload_interval:
|
|
59
|
+
post_upload_function: Callable[[list[Any]], None] | None = None,
|
|
60
|
+
max_queue_size: int | None = None,
|
|
61
|
+
max_upload_interval: int | None = None,
|
|
62
62
|
trigger_log_level: str = "DEBUG",
|
|
63
|
-
thread_name:
|
|
64
|
-
cancellation_token:
|
|
63
|
+
thread_name: str | None = None,
|
|
64
|
+
cancellation_token: CancellationToken | None = None,
|
|
65
65
|
):
|
|
66
66
|
# Super sets post_upload and thresholds
|
|
67
67
|
super().__init__(
|
|
@@ -73,7 +73,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
73
73
|
thread_name,
|
|
74
74
|
cancellation_token,
|
|
75
75
|
)
|
|
76
|
-
self.upload_queue:
|
|
76
|
+
self.upload_queue: dict[str, dict[str, list[TimestampedObject]]] = {}
|
|
77
77
|
|
|
78
78
|
# It is a hack since Prometheus client registers metrics on object creation, so object has to be created once
|
|
79
79
|
self.rows_queued = RAW_UPLOADER_ROWS_QUEUED
|
|
@@ -119,7 +119,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
119
119
|
max_delay=RETRY_MAX_DELAY,
|
|
120
120
|
backoff=RETRY_BACKOFF_FACTOR,
|
|
121
121
|
)
|
|
122
|
-
def _upload_batch(database: str, table: str, patch:
|
|
122
|
+
def _upload_batch(database: str, table: str, patch: list[Row]) -> None:
|
|
123
123
|
# Upload
|
|
124
124
|
self.cdf_client.raw.rows.insert(db_name=database, table_name=table, row=patch, ensure_parent=True)
|
|
125
125
|
|
|
@@ -133,7 +133,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
133
133
|
|
|
134
134
|
# Deduplicate
|
|
135
135
|
# In case of duplicate keys, the first key is preserved, and the last value is preserved.
|
|
136
|
-
patch:
|
|
136
|
+
patch: dict[str, Row] = {r.payload.key: r.payload for r in rows}
|
|
137
137
|
self.rows_duplicates.labels(_labels).inc(len(rows) - len(patch))
|
|
138
138
|
|
|
139
139
|
_upload_batch(database=database, table=table, patch=list(patch.values()))
|
|
@@ -162,7 +162,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
162
162
|
return self
|
|
163
163
|
|
|
164
164
|
def __exit__(
|
|
165
|
-
self, exc_type:
|
|
165
|
+
self, exc_type: Type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
|
|
166
166
|
) -> None:
|
|
167
167
|
"""
|
|
168
168
|
Wraps around stop method, for use as context manager
|