cognite-extractor-utils 7.5.3__py3-none-any.whl → 7.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

Files changed (42) hide show
  1. cognite/extractorutils/__init__.py +3 -1
  2. cognite/extractorutils/_inner_util.py +14 -3
  3. cognite/extractorutils/base.py +14 -15
  4. cognite/extractorutils/configtools/__init__.py +25 -0
  5. cognite/extractorutils/configtools/_util.py +7 -9
  6. cognite/extractorutils/configtools/elements.py +58 -49
  7. cognite/extractorutils/configtools/loaders.py +29 -26
  8. cognite/extractorutils/configtools/validators.py +2 -3
  9. cognite/extractorutils/exceptions.py +1 -4
  10. cognite/extractorutils/metrics.py +18 -18
  11. cognite/extractorutils/statestore/_base.py +3 -4
  12. cognite/extractorutils/statestore/hashing.py +24 -24
  13. cognite/extractorutils/statestore/watermark.py +17 -14
  14. cognite/extractorutils/threading.py +4 -4
  15. cognite/extractorutils/unstable/configuration/exceptions.py +24 -0
  16. cognite/extractorutils/unstable/configuration/loaders.py +18 -7
  17. cognite/extractorutils/unstable/configuration/models.py +25 -3
  18. cognite/extractorutils/unstable/core/_dto.py +10 -0
  19. cognite/extractorutils/unstable/core/base.py +179 -29
  20. cognite/extractorutils/unstable/core/errors.py +72 -0
  21. cognite/extractorutils/unstable/core/restart_policy.py +29 -0
  22. cognite/extractorutils/unstable/core/runtime.py +170 -26
  23. cognite/extractorutils/unstable/core/tasks.py +2 -0
  24. cognite/extractorutils/unstable/scheduling/_scheduler.py +4 -4
  25. cognite/extractorutils/uploader/__init__.py +14 -0
  26. cognite/extractorutils/uploader/_base.py +8 -8
  27. cognite/extractorutils/uploader/assets.py +15 -9
  28. cognite/extractorutils/uploader/data_modeling.py +13 -13
  29. cognite/extractorutils/uploader/events.py +9 -9
  30. cognite/extractorutils/uploader/files.py +144 -38
  31. cognite/extractorutils/uploader/raw.py +10 -10
  32. cognite/extractorutils/uploader/time_series.py +56 -58
  33. cognite/extractorutils/uploader/upload_failure_handler.py +64 -0
  34. cognite/extractorutils/uploader_extractor.py +11 -11
  35. cognite/extractorutils/uploader_types.py +4 -12
  36. cognite/extractorutils/util.py +21 -23
  37. {cognite_extractor_utils-7.5.3.dist-info → cognite_extractor_utils-7.5.5.dist-info}/METADATA +3 -2
  38. cognite_extractor_utils-7.5.5.dist-info/RECORD +49 -0
  39. {cognite_extractor_utils-7.5.3.dist-info → cognite_extractor_utils-7.5.5.dist-info}/WHEEL +1 -1
  40. cognite/extractorutils/unstable/core/__main__.py +0 -31
  41. cognite_extractor_utils-7.5.3.dist-info/RECORD +0 -46
  42. {cognite_extractor_utils-7.5.3.dist-info → cognite_extractor_utils-7.5.5.dist-info}/LICENSE +0 -0
@@ -18,16 +18,27 @@ from io import BytesIO, RawIOBase
18
18
  from math import ceil
19
19
  from os import PathLike
20
20
  from types import TracebackType
21
- from typing import Any, BinaryIO, Callable, Dict, Iterator, List, Optional, Tuple, Type, Union
21
+ from typing import (
22
+ Any,
23
+ BinaryIO,
24
+ Callable,
25
+ Iterator,
26
+ List,
27
+ Optional,
28
+ Type,
29
+ Union,
30
+ )
22
31
  from urllib.parse import ParseResult, urlparse
23
32
 
24
33
  from httpx import URL, Client, Headers, Request, StreamConsumed, SyncByteStream
25
34
  from requests.utils import super_len
26
35
 
27
36
  from cognite.client import CogniteClient
28
- from cognite.client.data_classes import FileMetadata
37
+ from cognite.client.data_classes import FileMetadata, FileMetadataUpdate
29
38
  from cognite.client.data_classes.data_modeling import NodeId
30
- from cognite.client.data_classes.data_modeling.extractor_extensions.v1 import CogniteExtractorFileApply
39
+ from cognite.client.data_classes.data_modeling.extractor_extensions.v1 import (
40
+ CogniteExtractorFileApply,
41
+ )
31
42
  from cognite.client.utils._identifier import IdentifierSequence
32
43
  from cognite.extractorutils.threading import CancellationToken
33
44
  from cognite.extractorutils.uploader._base import (
@@ -42,6 +53,7 @@ from cognite.extractorutils.uploader._metrics import (
42
53
  FILES_UPLOADER_QUEUED,
43
54
  FILES_UPLOADER_WRITTEN,
44
55
  )
56
+ from cognite.extractorutils.uploader.upload_failure_handler import FileFailureManager
45
57
  from cognite.extractorutils.util import cognite_exceptions, retry
46
58
 
47
59
  _QUEUES: int = 0
@@ -54,6 +66,7 @@ _MAX_FILE_CHUNK_SIZE = 4 * 1024 * 1024 * 1000
54
66
 
55
67
  _CDF_ALPHA_VERSION_HEADER = {"cdf-version": "alpha"}
56
68
 
69
+
57
70
  FileMetadataOrCogniteExtractorFile = Union[FileMetadata, CogniteExtractorFileApply]
58
71
 
59
72
 
@@ -97,7 +110,10 @@ class ChunkedStream(RawIOBase, BinaryIO):
97
110
  return super().__enter__()
98
111
 
99
112
  def __exit__(
100
- self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
113
+ self,
114
+ exc_type: Type[BaseException] | None,
115
+ exc_val: BaseException | None,
116
+ exc_tb: TracebackType | None,
101
117
  ) -> None:
102
118
  return super().__exit__(exc_type, exc_val, exc_tb)
103
119
 
@@ -186,13 +202,14 @@ class IOFileUploadQueue(AbstractUploadQueue):
186
202
  def __init__(
187
203
  self,
188
204
  cdf_client: CogniteClient,
189
- post_upload_function: Optional[Callable[[List[FileMetadataOrCogniteExtractorFile]], None]] = None,
190
- max_queue_size: Optional[int] = None,
205
+ post_upload_function: Callable[[list[FileMetadataOrCogniteExtractorFile]], None] | None = None,
206
+ max_queue_size: int | None = None,
191
207
  trigger_log_level: str = "DEBUG",
192
- thread_name: Optional[str] = None,
208
+ thread_name: str | None = None,
193
209
  overwrite_existing: bool = False,
194
210
  cancellation_token: Optional[CancellationToken] = None,
195
211
  max_parallelism: Optional[int] = None,
212
+ failure_logging_path: None | str = None,
196
213
  ):
197
214
  # Super sets post_upload and threshold
198
215
  super().__init__(
@@ -208,6 +225,9 @@ class IOFileUploadQueue(AbstractUploadQueue):
208
225
  if self.threshold <= 0:
209
226
  raise ValueError("Max queue size must be positive for file upload queues")
210
227
 
228
+ self.failure_logging_path = failure_logging_path or None
229
+ self.initialize_failure_logging()
230
+
211
231
  self.upload_queue: List[Future] = []
212
232
  self.errors: List[Exception] = []
213
233
 
@@ -235,10 +255,31 @@ class IOFileUploadQueue(AbstractUploadQueue):
235
255
  global _QUEUES, _QUEUES_LOCK
236
256
  with _QUEUES_LOCK:
237
257
  self._pool = ThreadPoolExecutor(
238
- max_workers=self.parallelism, thread_name_prefix=f"FileUploadQueue-{_QUEUES}"
258
+ max_workers=self.parallelism,
259
+ thread_name_prefix=f"FileUploadQueue-{_QUEUES}",
239
260
  )
240
261
  _QUEUES += 1
241
262
 
263
+ def initialize_failure_logging(self) -> None:
264
+ self._file_failure_manager: FileFailureManager | None = (
265
+ FileFailureManager(path_to_file=self.failure_logging_path)
266
+ if self.failure_logging_path is not None
267
+ else None
268
+ )
269
+
270
+ def get_failure_logger(self) -> FileFailureManager | None:
271
+ return self._file_failure_manager
272
+
273
+ def add_entry_failure_logger(self, file_name: str, error: Exception) -> None:
274
+ if self._file_failure_manager is not None:
275
+ error_reason = str(error)
276
+ self._file_failure_manager.add(file_name=file_name, error_reason=error_reason)
277
+
278
+ def flush_failure_logger(self) -> None:
279
+ if self._file_failure_manager is not None:
280
+ self.logger.info("Flushing failure logs")
281
+ self._file_failure_manager.write_to_file()
282
+
242
283
  def _remove_done_from_queue(self) -> None:
243
284
  while not self.cancellation_token.is_cancelled:
244
285
  with self.lock:
@@ -251,7 +292,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
251
292
  node = instance_result.nodes[0]
252
293
  return node.as_id()
253
294
 
254
- def _upload_empty(
295
+ def _upload_only_metadata(
255
296
  self, file_meta: FileMetadataOrCogniteExtractorFile
256
297
  ) -> tuple[FileMetadataOrCogniteExtractorFile, str]:
257
298
  if isinstance(file_meta, CogniteExtractorFileApply):
@@ -261,21 +302,71 @@ class IOFileUploadQueue(AbstractUploadQueue):
261
302
  file_meta_response, url = self.cdf_client.files.create(
262
303
  file_metadata=file_meta, overwrite=self.overwrite_existing
263
304
  )
264
- # trigger update after creation (upsert =P)
265
- basic_attributes = set(["externalId", "name"])
266
- attr = set(file_meta.dump().keys())
267
- diff = attr - basic_attributes
268
305
 
269
- if len(diff) >= 1 and "externalId" in attr:
270
- file_meta_response = self.cdf_client.files.update(file_meta)
306
+ # The files API for whatever reason doesn't update directory or source when you overwrite,
307
+ # so we need to update those later.
308
+ any_unchaged = (
309
+ file_meta_response.directory != file_meta.directory or file_meta_response.source != file_meta.source
310
+ )
311
+ if any_unchaged:
312
+ update = FileMetadataUpdate(external_id=file_meta.external_id)
313
+ any = False
314
+ if file_meta.source:
315
+ any = True
316
+ update.source.set(file_meta.source)
317
+ if file_meta.directory:
318
+ any = True
319
+ update.directory.set(file_meta.directory)
320
+ if any:
321
+ self.cdf_client.files.update(update)
271
322
 
272
323
  return file_meta_response, url
273
324
 
325
+ def _upload_empty_file(
326
+ self,
327
+ file_meta: FileMetadataOrCogniteExtractorFile,
328
+ ) -> None:
329
+ file_meta_response, url = self._upload_only_metadata(file_meta)
330
+
331
+ self._upload_only_file_reference(file_meta, url)
332
+
274
333
  def _upload_bytes(self, size: int, file: BinaryIO, file_meta: FileMetadataOrCogniteExtractorFile) -> None:
275
- file_meta, url = self._upload_empty(file_meta)
334
+ file_meta, url = self._upload_only_metadata(file_meta)
276
335
  resp = self._httpx_client.send(self._get_file_upload_request(url, file, size, file_meta.mime_type))
277
336
  resp.raise_for_status()
278
337
 
338
+ def _prepare_request_data_for_empty_file(self, url_str: str) -> Request:
339
+ FILE_SIZE = 0 # this path is only entered for an empty file
340
+ EMPTY_CONTENT = ""
341
+
342
+ url = URL(url_str)
343
+ base_url = URL(self.cdf_client.config.base_url)
344
+
345
+ if url.host == base_url.host:
346
+ upload_url = url
347
+ else:
348
+ parsed_url: ParseResult = urlparse(url_str)
349
+ parsed_base_url: ParseResult = urlparse(self.cdf_client.config.base_url)
350
+ replaced_upload_url = parsed_url._replace(netloc=parsed_base_url.netloc).geturl()
351
+ upload_url = URL(replaced_upload_url)
352
+
353
+ headers = Headers(self._httpx_client.headers)
354
+ headers.update(
355
+ {
356
+ "Accept": "*/*",
357
+ "Content-Length": str(FILE_SIZE),
358
+ "Host": upload_url.netloc.decode("ascii"),
359
+ "x-cdp-app": self.cdf_client._config.client_name,
360
+ }
361
+ )
362
+
363
+ return Request(method="PUT", url=upload_url, headers=headers, content=EMPTY_CONTENT)
364
+
365
+ def _upload_only_file_reference(self, file_meta: FileMetadataOrCogniteExtractorFile, url_str: str) -> None:
366
+ request_data = self._prepare_request_data_for_empty_file(url_str)
367
+ resp = self._httpx_client.send(request_data)
368
+ resp.raise_for_status()
369
+
279
370
  def _upload_multipart(self, size: int, file: BinaryIO, file_meta: FileMetadataOrCogniteExtractorFile) -> None:
280
371
  chunks = ChunkedStream(file, self.max_file_chunk_size, size)
281
372
  self.logger.debug(
@@ -319,7 +410,10 @@ class IOFileUploadQueue(AbstractUploadQueue):
319
410
  res = self.cdf_client.files._post(
320
411
  url_path="/files/initmultipartupload",
321
412
  json=file_meta.dump(camel_case=True),
322
- params={"overwrite": self.overwrite_existing, "parts": chunks.chunk_count},
413
+ params={
414
+ "overwrite": self.overwrite_existing,
415
+ "parts": chunks.chunk_count,
416
+ },
323
417
  )
324
418
  res.raise_for_status()
325
419
  return res.json()
@@ -328,9 +422,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
328
422
  self,
329
423
  file_meta: FileMetadataOrCogniteExtractorFile,
330
424
  read_file: Callable[[], BinaryIO],
331
- extra_retries: Optional[
332
- Union[Tuple[Type[Exception], ...], Dict[Type[Exception], Callable[[Any], bool]]]
333
- ] = None,
425
+ extra_retries: tuple[Type[Exception], ...] | dict[Type[Exception], Callable[[Any], bool]] | None = None,
334
426
  ) -> None:
335
427
  """
336
428
  Add file to upload queue. The file will start uploading immedeately. If the size of the queue is larger than
@@ -356,12 +448,14 @@ class IOFileUploadQueue(AbstractUploadQueue):
356
448
  max_delay=RETRY_MAX_DELAY,
357
449
  backoff=RETRY_BACKOFF_FACTOR,
358
450
  )
359
- def upload_file(read_file: Callable[[], BinaryIO], file_meta: FileMetadataOrCogniteExtractorFile) -> None:
451
+ def upload_file(
452
+ read_file: Callable[[], BinaryIO],
453
+ file_meta: FileMetadataOrCogniteExtractorFile,
454
+ ) -> None:
360
455
  with read_file() as file:
361
456
  size = super_len(file)
362
457
  if size == 0:
363
- # upload just the file metadata witout data
364
- file_meta, _ = self._upload_empty(file_meta)
458
+ self._upload_empty_file(file_meta)
365
459
  elif size >= self.max_single_chunk_file_size:
366
460
  # The minimum chunk size is 4000MiB.
367
461
  self._upload_multipart(size, file, file_meta)
@@ -378,12 +472,18 @@ class IOFileUploadQueue(AbstractUploadQueue):
378
472
  except Exception as e:
379
473
  self.logger.error("Error in upload callback: %s", str(e))
380
474
 
381
- def wrapped_upload(read_file: Callable[[], BinaryIO], file_meta: FileMetadataOrCogniteExtractorFile) -> None:
475
+ def wrapped_upload(
476
+ read_file: Callable[[], BinaryIO],
477
+ file_meta: FileMetadataOrCogniteExtractorFile,
478
+ ) -> None:
382
479
  try:
383
480
  upload_file(read_file, file_meta)
384
481
 
385
482
  except Exception as e:
386
- self.logger.exception(f"Unexpected error while uploading file: {file_meta.external_id}")
483
+ self.logger.exception(
484
+ f"Unexpected error while uploading file: {file_meta.external_id} {file_meta.name}"
485
+ )
486
+ self.add_entry_failure_logger(file_name=str(file_meta.name), error=e)
387
487
  self.errors.append(e)
388
488
 
389
489
  finally:
@@ -406,7 +506,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
406
506
  self.queue_size.set(self.upload_queue_size)
407
507
 
408
508
  def _get_file_upload_request(
409
- self, url_str: str, stream: BinaryIO, size: int, mime_type: Optional[str] = None
509
+ self, url_str: str, stream: BinaryIO, size: int, mime_type: str | None = None
410
510
  ) -> Request:
411
511
  url = URL(url_str)
412
512
  base_url = URL(self.cdf_client.config.base_url)
@@ -450,7 +550,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
450
550
  resp_json = res.json()["items"][0]
451
551
  return FileMetadata.load(resp_json), resp_json["uploadUrl"]
452
552
 
453
- def upload(self, fail_on_errors: bool = True, timeout: Optional[float] = None) -> None:
553
+ def upload(self, fail_on_errors: bool = True, timeout: float | None = None) -> None:
454
554
  """
455
555
  Wait for all uploads to finish
456
556
  """
@@ -460,6 +560,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
460
560
  self.queue_size.set(self.upload_queue_size)
461
561
  if fail_on_errors and self.errors:
462
562
  # There might be more errors, but we can only have one as the cause, so pick the first
563
+ self.flush_failure_logger()
463
564
  raise RuntimeError(f"{len(self.errors)} upload(s) finished with errors") from self.errors[0]
464
565
 
465
566
  def __enter__(self) -> "IOFileUploadQueue":
@@ -475,7 +576,10 @@ class IOFileUploadQueue(AbstractUploadQueue):
475
576
  return self
476
577
 
477
578
  def __exit__(
478
- self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
579
+ self,
580
+ exc_type: Optional[Type[BaseException]],
581
+ exc_val: Optional[BaseException],
582
+ exc_tb: Optional[TracebackType],
479
583
  ) -> None:
480
584
  """
481
585
  Wraps around stop method, for use as context manager
@@ -514,13 +618,13 @@ class FileUploadQueue(IOFileUploadQueue):
514
618
  def __init__(
515
619
  self,
516
620
  cdf_client: CogniteClient,
517
- post_upload_function: Optional[Callable[[List[FileMetadataOrCogniteExtractorFile]], None]] = None,
518
- max_queue_size: Optional[int] = None,
519
- max_upload_interval: Optional[int] = None,
621
+ post_upload_function: Callable[[list[FileMetadataOrCogniteExtractorFile]], None] | None = None,
622
+ max_queue_size: int | None = None,
623
+ max_upload_interval: int | None = None,
520
624
  trigger_log_level: str = "DEBUG",
521
- thread_name: Optional[str] = None,
625
+ thread_name: str | None = None,
522
626
  overwrite_existing: bool = False,
523
- cancellation_token: Optional[CancellationToken] = None,
627
+ cancellation_token: CancellationToken | None = None,
524
628
  ):
525
629
  # Super sets post_upload and threshold
526
630
  super().__init__(
@@ -534,7 +638,9 @@ class FileUploadQueue(IOFileUploadQueue):
534
638
  )
535
639
 
536
640
  def add_to_upload_queue(
537
- self, file_meta: FileMetadataOrCogniteExtractorFile, file_name: Union[str, PathLike]
641
+ self,
642
+ file_meta: FileMetadataOrCogniteExtractorFile,
643
+ file_name: Union[str, PathLike],
538
644
  ) -> None:
539
645
  """
540
646
  Add file to upload queue. The queue will be uploaded if the queue size is larger than the threshold
@@ -569,12 +675,12 @@ class BytesUploadQueue(IOFileUploadQueue):
569
675
  def __init__(
570
676
  self,
571
677
  cdf_client: CogniteClient,
572
- post_upload_function: Optional[Callable[[List[FileMetadataOrCogniteExtractorFile]], None]] = None,
573
- max_queue_size: Optional[int] = None,
678
+ post_upload_function: Callable[[list[FileMetadataOrCogniteExtractorFile]], None] | None = None,
679
+ max_queue_size: int | None = None,
574
680
  trigger_log_level: str = "DEBUG",
575
- thread_name: Optional[str] = None,
681
+ thread_name: str | None = None,
576
682
  overwrite_existing: bool = False,
577
- cancellation_token: Optional[CancellationToken] = None,
683
+ cancellation_token: CancellationToken | None = None,
578
684
  ) -> None:
579
685
  super().__init__(
580
686
  cdf_client,
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from types import TracebackType
16
- from typing import Any, Callable, Dict, List, Optional, Type
16
+ from typing import Any, Callable, Type
17
17
 
18
18
  import arrow
19
19
  from arrow import Arrow
@@ -56,12 +56,12 @@ class RawUploadQueue(AbstractUploadQueue):
56
56
  def __init__(
57
57
  self,
58
58
  cdf_client: CogniteClient,
59
- post_upload_function: Optional[Callable[[List[Any]], None]] = None,
60
- max_queue_size: Optional[int] = None,
61
- max_upload_interval: Optional[int] = None,
59
+ post_upload_function: Callable[[list[Any]], None] | None = None,
60
+ max_queue_size: int | None = None,
61
+ max_upload_interval: int | None = None,
62
62
  trigger_log_level: str = "DEBUG",
63
- thread_name: Optional[str] = None,
64
- cancellation_token: Optional[CancellationToken] = None,
63
+ thread_name: str | None = None,
64
+ cancellation_token: CancellationToken | None = None,
65
65
  ):
66
66
  # Super sets post_upload and thresholds
67
67
  super().__init__(
@@ -73,7 +73,7 @@ class RawUploadQueue(AbstractUploadQueue):
73
73
  thread_name,
74
74
  cancellation_token,
75
75
  )
76
- self.upload_queue: Dict[str, Dict[str, List[TimestampedObject]]] = {}
76
+ self.upload_queue: dict[str, dict[str, list[TimestampedObject]]] = {}
77
77
 
78
78
  # It is a hack since Prometheus client registers metrics on object creation, so object has to be created once
79
79
  self.rows_queued = RAW_UPLOADER_ROWS_QUEUED
@@ -119,7 +119,7 @@ class RawUploadQueue(AbstractUploadQueue):
119
119
  max_delay=RETRY_MAX_DELAY,
120
120
  backoff=RETRY_BACKOFF_FACTOR,
121
121
  )
122
- def _upload_batch(database: str, table: str, patch: List[Row]) -> None:
122
+ def _upload_batch(database: str, table: str, patch: list[Row]) -> None:
123
123
  # Upload
124
124
  self.cdf_client.raw.rows.insert(db_name=database, table_name=table, row=patch, ensure_parent=True)
125
125
 
@@ -133,7 +133,7 @@ class RawUploadQueue(AbstractUploadQueue):
133
133
 
134
134
  # Deduplicate
135
135
  # In case of duplicate keys, the first key is preserved, and the last value is preserved.
136
- patch: Dict[str, Row] = {r.payload.key: r.payload for r in rows}
136
+ patch: dict[str, Row] = {r.payload.key: r.payload for r in rows}
137
137
  self.rows_duplicates.labels(_labels).inc(len(rows) - len(patch))
138
138
 
139
139
  _upload_batch(database=database, table=table, patch=list(patch.values()))
@@ -162,7 +162,7 @@ class RawUploadQueue(AbstractUploadQueue):
162
162
  return self
163
163
 
164
164
  def __exit__(
165
- self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
165
+ self, exc_type: Type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
166
166
  ) -> None:
167
167
  """
168
168
  Wraps around stop method, for use as context manager
@@ -15,7 +15,7 @@
15
15
  import math
16
16
  from datetime import datetime
17
17
  from types import TracebackType
18
- from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
18
+ from typing import Any, Callable, Type
19
19
 
20
20
  from cognite.client import CogniteClient
21
21
  from cognite.client.data_classes import (
@@ -50,13 +50,13 @@ MAX_DATAPOINT_STRING_LENGTH = 255
50
50
  MAX_DATAPOINT_VALUE = 1e100
51
51
  MIN_DATAPOINT_VALUE = -1e100
52
52
 
53
- TimeStamp = Union[int, datetime]
53
+ TimeStamp = int | datetime
54
54
 
55
- DataPointWithoutStatus = Union[Tuple[TimeStamp, float], Tuple[TimeStamp, str], Tuple[TimeStamp, int]]
56
- FullStatusCode = Union[StatusCode, int]
57
- DataPointWithStatus = Union[Tuple[TimeStamp, float, FullStatusCode], Tuple[TimeStamp, str, FullStatusCode]]
58
- DataPoint = Union[DataPointWithoutStatus, DataPointWithStatus]
59
- DataPointList = List[DataPoint]
55
+ DataPointWithoutStatus = tuple[TimeStamp, float] | tuple[TimeStamp, str] | tuple[TimeStamp, int]
56
+ FullStatusCode = StatusCode | int
57
+ DataPointWithStatus = tuple[TimeStamp, float, FullStatusCode] | tuple[TimeStamp, str, FullStatusCode]
58
+ DataPoint = DataPointWithoutStatus | DataPointWithStatus
59
+ DataPointList = list[DataPoint]
60
60
 
61
61
 
62
62
  def default_time_series_factory(external_id: str, datapoints: DataPointList) -> TimeSeries:
@@ -103,14 +103,14 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
103
103
  def __init__(
104
104
  self,
105
105
  cdf_client: CogniteClient,
106
- post_upload_function: Optional[Callable[[List[Dict[str, Union[str, DataPointList]]]], None]] = None,
107
- max_queue_size: Optional[int] = None,
108
- max_upload_interval: Optional[int] = None,
106
+ post_upload_function: Callable[[list[dict[str, str | DataPointList]]], None] | None = None,
107
+ max_queue_size: int | None = None,
108
+ max_upload_interval: int | None = None,
109
109
  trigger_log_level: str = "DEBUG",
110
- thread_name: Optional[str] = None,
111
- create_missing: Union[Callable[[str, DataPointList], TimeSeries], bool] = False,
112
- data_set_id: Optional[int] = None,
113
- cancellation_token: Optional[CancellationToken] = None,
110
+ thread_name: str | None = None,
111
+ create_missing: Callable[[str, DataPointList], TimeSeries] | bool = False,
112
+ data_set_id: int | None = None,
113
+ cancellation_token: CancellationToken | None = None,
114
114
  ):
115
115
  # Super sets post_upload and threshold
116
116
  super().__init__(
@@ -132,14 +132,14 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
132
132
  self.create_missing = True
133
133
  self.missing_factory = create_missing
134
134
 
135
- self.upload_queue: Dict[EitherId, DataPointList] = {}
135
+ self.upload_queue: dict[EitherId, DataPointList] = {}
136
136
 
137
137
  self.points_queued = TIMESERIES_UPLOADER_POINTS_QUEUED
138
138
  self.points_written = TIMESERIES_UPLOADER_POINTS_WRITTEN
139
139
  self.queue_size = TIMESERIES_UPLOADER_QUEUE_SIZE
140
140
  self.data_set_id = data_set_id
141
141
 
142
- def _verify_datapoint_time(self, time: Union[int, float, datetime, str]) -> bool:
142
+ def _verify_datapoint_time(self, time: int | float | datetime | str) -> bool:
143
143
  if isinstance(time, int) or isinstance(time, float):
144
144
  return not math.isnan(time) and time >= MIN_DATAPOINT_TIMESTAMP
145
145
  elif isinstance(time, str):
@@ -147,7 +147,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
147
147
  else:
148
148
  return time.timestamp() * 1000.0 >= MIN_DATAPOINT_TIMESTAMP
149
149
 
150
- def _verify_datapoint_value(self, value: Union[int, float, datetime, str]) -> bool:
150
+ def _verify_datapoint_value(self, value: int | float | datetime | str) -> bool:
151
151
  if isinstance(value, float):
152
152
  return not (
153
153
  math.isnan(value) or math.isinf(value) or value > MAX_DATAPOINT_VALUE or value < MIN_DATAPOINT_VALUE
@@ -171,7 +171,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
171
171
  return True
172
172
 
173
173
  def add_to_upload_queue(
174
- self, *, id: Optional[int] = None, external_id: Optional[str] = None, datapoints: Optional[DataPointList] = None
174
+ self, *, id: int | None = None, external_id: str | None = None, datapoints: DataPointList | None = None
175
175
  ) -> None:
176
176
  """
177
177
  Add data points to upload queue. The queue will be uploaded if the queue size is larger than the threshold
@@ -180,7 +180,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
180
180
  Args:
181
181
  id: Internal ID of time series. Either this or external_id must be set.
182
182
  external_id: External ID of time series. Either this or external_id must be set.
183
- datapoints: List of data points to add
183
+ datapoints: list of data points to add
184
184
  """
185
185
  datapoints = datapoints or []
186
186
  old_len = len(datapoints)
@@ -219,7 +219,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
219
219
  max_delay=RETRY_MAX_DELAY,
220
220
  backoff=RETRY_BACKOFF_FACTOR,
221
221
  )
222
- def _upload_batch(upload_this: List[Dict], retries: int = 5) -> List[Dict]:
222
+ def _upload_batch(upload_this: list[dict], retries: int = 5) -> list[dict]:
223
223
  if len(upload_this) == 0:
224
224
  return upload_this
225
225
 
@@ -241,14 +241,14 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
241
241
  create_these_ids = set(
242
242
  [id_dict["externalId"] for id_dict in ex.not_found if "externalId" in id_dict]
243
243
  )
244
- datapoints_lists: Dict[str, DataPointList] = {
244
+ datapoints_lists: dict[str, DataPointList] = {
245
245
  ts_dict["externalId"]: ts_dict["datapoints"]
246
246
  for ts_dict in upload_this
247
247
  if ts_dict["externalId"] in create_these_ids
248
248
  }
249
249
 
250
250
  self.logger.info(f"Creating {len(create_these_ids)} time series")
251
- to_create: List[TimeSeries] = [
251
+ to_create: list[TimeSeries] = [
252
252
  self.missing_factory(external_id, datapoints_lists[external_id])
253
253
  for external_id in create_these_ids
254
254
  ]
@@ -317,7 +317,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
317
317
  return self
318
318
 
319
319
  def __exit__(
320
- self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
320
+ self, exc_type: Type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
321
321
  ) -> None:
322
322
  """
323
323
  Wraps around stop method, for use as context manager
@@ -343,13 +343,13 @@ class SequenceUploadQueue(AbstractUploadQueue):
343
343
  def __init__(
344
344
  self,
345
345
  cdf_client: CogniteClient,
346
- post_upload_function: Optional[Callable[[List[Any]], None]] = None,
347
- max_queue_size: Optional[int] = None,
348
- max_upload_interval: Optional[int] = None,
346
+ post_upload_function: Callable[[list[Any]], None] | None = None,
347
+ max_queue_size: int | None = None,
348
+ max_upload_interval: int | None = None,
349
349
  trigger_log_level: str = "DEBUG",
350
- thread_name: Optional[str] = None,
350
+ thread_name: str | None = None,
351
351
  create_missing: bool = False,
352
- cancellation_token: Optional[CancellationToken] = None,
352
+ cancellation_token: CancellationToken | None = None,
353
353
  ):
354
354
  """
355
355
  Args:
@@ -374,15 +374,15 @@ class SequenceUploadQueue(AbstractUploadQueue):
374
374
  thread_name,
375
375
  cancellation_token,
376
376
  )
377
- self.upload_queue: Dict[EitherId, SequenceRows] = {}
378
- self.sequence_metadata: Dict[EitherId, Dict[str, Union[str, int, float]]] = {}
379
- self.sequence_asset_external_ids: Dict[EitherId, str] = {}
380
- self.sequence_dataset_external_ids: Dict[EitherId, str] = {}
381
- self.sequence_names: Dict[EitherId, str] = {}
382
- self.sequence_descriptions: Dict[EitherId, str] = {}
383
- self.column_definitions: Dict[EitherId, List[Dict[str, str]]] = {}
384
- self.asset_ids: Dict[str, int] = {}
385
- self.dataset_ids: Dict[str, int] = {}
377
+ self.upload_queue: dict[EitherId, SequenceRows] = {}
378
+ self.sequence_metadata: dict[EitherId, dict[str, str | int | float]] = {}
379
+ self.sequence_asset_external_ids: dict[EitherId, str] = {}
380
+ self.sequence_dataset_external_ids: dict[EitherId, str] = {}
381
+ self.sequence_names: dict[EitherId, str] = {}
382
+ self.sequence_descriptions: dict[EitherId, str] = {}
383
+ self.column_definitions: dict[EitherId, list[dict[str, str]]] = {}
384
+ self.asset_ids: dict[str, int] = {}
385
+ self.dataset_ids: dict[str, int] = {}
386
386
  self.create_missing = create_missing
387
387
 
388
388
  self.points_queued = SEQUENCES_UPLOADER_POINTS_QUEUED
@@ -391,13 +391,13 @@ class SequenceUploadQueue(AbstractUploadQueue):
391
391
 
392
392
  def set_sequence_metadata(
393
393
  self,
394
- metadata: Dict[str, Union[str, int, float]],
395
- id: Optional[int] = None,
396
- external_id: Optional[str] = None,
397
- asset_external_id: Optional[str] = None,
398
- dataset_external_id: Optional[str] = None,
399
- name: Optional[str] = None,
400
- description: Optional[str] = None,
394
+ metadata: dict[str, str | int | float],
395
+ id: int | None = None,
396
+ external_id: str | None = None,
397
+ asset_external_id: str | None = None,
398
+ dataset_external_id: str | None = None,
399
+ name: str | None = None,
400
+ description: str | None = None,
401
401
  ) -> None:
402
402
  """
403
403
  Set sequence metadata. Metadata will be cached until the sequence is created. The metadata will be updated
@@ -426,7 +426,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
426
426
  self.sequence_descriptions[either_id] = description
427
427
 
428
428
  def set_sequence_column_definition(
429
- self, col_def: List[Dict[str, str]], id: Optional[int] = None, external_id: Optional[str] = None
429
+ self, col_def: list[dict[str, str]], id: int | None = None, external_id: str | None = None
430
430
  ) -> None:
431
431
  """
432
432
  Set sequence column definition
@@ -443,16 +443,14 @@ class SequenceUploadQueue(AbstractUploadQueue):
443
443
 
444
444
  def add_to_upload_queue(
445
445
  self,
446
- rows: Union[
447
- Dict[int, List[Union[int, float, str]]],
448
- List[Tuple[int, Union[int, float, str]]],
449
- List[Dict[str, Any]],
450
- SequenceData,
451
- SequenceRows,
452
- ],
453
- column_external_ids: Optional[List[dict]] = None,
454
- id: Optional[int] = None,
455
- external_id: Optional[str] = None,
446
+ rows: dict[int, list[int | float | str]]
447
+ | list[tuple[int, int | float | str]]
448
+ | list[dict[str, Any]]
449
+ | SequenceData
450
+ | SequenceRows,
451
+ column_external_ids: list[dict] | None = None,
452
+ id: int | None = None,
453
+ external_id: str | None = None,
456
454
  ) -> None:
457
455
  """
458
456
  Add sequence rows to upload queue. Mirrors implementation of SequenceApi.insert. Inserted rows will be
@@ -461,7 +459,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
461
459
  Args:
462
460
  rows: The rows to be inserted. Can either be a list of tuples, a list of ["rownumber": ..., "values": ...]
463
461
  objects, a dictionary of rowNumber: data, or a SequenceData object.
464
- column_external_ids: List of external id for the columns of the sequence
462
+ column_external_ids: list of external id for the columns of the sequence
465
463
  id: Sequence internal ID
466
464
  Use if external_id is None
467
465
  external_id: Sequence external ID
@@ -477,7 +475,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
477
475
  # Already in the desired format
478
476
  pass
479
477
  elif isinstance(rows, (dict, list)):
480
- rows_raw: List[Dict[str, Any]]
478
+ rows_raw: list[dict[str, Any]]
481
479
  if isinstance(rows, dict):
482
480
  rows_raw = [{"rowNumber": row_number, "values": values} for row_number, values in rows.items()]
483
481
  elif isinstance(rows, list) and rows and isinstance(rows[0], (tuple, list)):
@@ -658,7 +656,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
658
656
  return self
659
657
 
660
658
  def __exit__(
661
- self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
659
+ self, exc_type: Type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
662
660
  ) -> None:
663
661
  """
664
662
  Wraps around stop method, for use as context manager