cognite-extractor-utils 7.5.4__py3-none-any.whl → 7.5.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

Files changed (42) hide show
  1. cognite/extractorutils/__init__.py +3 -1
  2. cognite/extractorutils/_inner_util.py +14 -3
  3. cognite/extractorutils/base.py +14 -15
  4. cognite/extractorutils/configtools/__init__.py +25 -0
  5. cognite/extractorutils/configtools/_util.py +7 -9
  6. cognite/extractorutils/configtools/elements.py +58 -49
  7. cognite/extractorutils/configtools/loaders.py +29 -26
  8. cognite/extractorutils/configtools/validators.py +2 -3
  9. cognite/extractorutils/exceptions.py +1 -4
  10. cognite/extractorutils/metrics.py +18 -18
  11. cognite/extractorutils/statestore/_base.py +3 -4
  12. cognite/extractorutils/statestore/hashing.py +24 -24
  13. cognite/extractorutils/statestore/watermark.py +17 -14
  14. cognite/extractorutils/threading.py +4 -4
  15. cognite/extractorutils/unstable/configuration/exceptions.py +24 -0
  16. cognite/extractorutils/unstable/configuration/loaders.py +18 -7
  17. cognite/extractorutils/unstable/configuration/models.py +25 -3
  18. cognite/extractorutils/unstable/core/_dto.py +10 -0
  19. cognite/extractorutils/unstable/core/base.py +179 -29
  20. cognite/extractorutils/unstable/core/errors.py +72 -0
  21. cognite/extractorutils/unstable/core/restart_policy.py +29 -0
  22. cognite/extractorutils/unstable/core/runtime.py +170 -26
  23. cognite/extractorutils/unstable/core/tasks.py +2 -0
  24. cognite/extractorutils/unstable/scheduling/_scheduler.py +4 -4
  25. cognite/extractorutils/uploader/__init__.py +14 -0
  26. cognite/extractorutils/uploader/_base.py +8 -8
  27. cognite/extractorutils/uploader/assets.py +15 -9
  28. cognite/extractorutils/uploader/data_modeling.py +13 -13
  29. cognite/extractorutils/uploader/events.py +9 -9
  30. cognite/extractorutils/uploader/files.py +153 -46
  31. cognite/extractorutils/uploader/raw.py +10 -10
  32. cognite/extractorutils/uploader/time_series.py +56 -58
  33. cognite/extractorutils/uploader/upload_failure_handler.py +64 -0
  34. cognite/extractorutils/uploader_extractor.py +11 -11
  35. cognite/extractorutils/uploader_types.py +4 -12
  36. cognite/extractorutils/util.py +21 -23
  37. {cognite_extractor_utils-7.5.4.dist-info → cognite_extractor_utils-7.5.6.dist-info}/METADATA +4 -3
  38. cognite_extractor_utils-7.5.6.dist-info/RECORD +49 -0
  39. {cognite_extractor_utils-7.5.4.dist-info → cognite_extractor_utils-7.5.6.dist-info}/WHEEL +1 -1
  40. cognite/extractorutils/unstable/core/__main__.py +0 -31
  41. cognite_extractor_utils-7.5.4.dist-info/RECORD +0 -46
  42. {cognite_extractor_utils-7.5.4.dist-info → cognite_extractor_utils-7.5.6.dist-info}/LICENSE +0 -0
@@ -15,7 +15,7 @@
15
15
  import math
16
16
  from datetime import datetime
17
17
  from types import TracebackType
18
- from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
18
+ from typing import Any, Callable, Type
19
19
 
20
20
  from cognite.client import CogniteClient
21
21
  from cognite.client.data_classes import (
@@ -50,13 +50,13 @@ MAX_DATAPOINT_STRING_LENGTH = 255
50
50
  MAX_DATAPOINT_VALUE = 1e100
51
51
  MIN_DATAPOINT_VALUE = -1e100
52
52
 
53
- TimeStamp = Union[int, datetime]
53
+ TimeStamp = int | datetime
54
54
 
55
- DataPointWithoutStatus = Union[Tuple[TimeStamp, float], Tuple[TimeStamp, str], Tuple[TimeStamp, int]]
56
- FullStatusCode = Union[StatusCode, int]
57
- DataPointWithStatus = Union[Tuple[TimeStamp, float, FullStatusCode], Tuple[TimeStamp, str, FullStatusCode]]
58
- DataPoint = Union[DataPointWithoutStatus, DataPointWithStatus]
59
- DataPointList = List[DataPoint]
55
+ DataPointWithoutStatus = tuple[TimeStamp, float] | tuple[TimeStamp, str] | tuple[TimeStamp, int]
56
+ FullStatusCode = StatusCode | int
57
+ DataPointWithStatus = tuple[TimeStamp, float, FullStatusCode] | tuple[TimeStamp, str, FullStatusCode]
58
+ DataPoint = DataPointWithoutStatus | DataPointWithStatus
59
+ DataPointList = list[DataPoint]
60
60
 
61
61
 
62
62
  def default_time_series_factory(external_id: str, datapoints: DataPointList) -> TimeSeries:
@@ -103,14 +103,14 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
103
103
  def __init__(
104
104
  self,
105
105
  cdf_client: CogniteClient,
106
- post_upload_function: Optional[Callable[[List[Dict[str, Union[str, DataPointList]]]], None]] = None,
107
- max_queue_size: Optional[int] = None,
108
- max_upload_interval: Optional[int] = None,
106
+ post_upload_function: Callable[[list[dict[str, str | DataPointList]]], None] | None = None,
107
+ max_queue_size: int | None = None,
108
+ max_upload_interval: int | None = None,
109
109
  trigger_log_level: str = "DEBUG",
110
- thread_name: Optional[str] = None,
111
- create_missing: Union[Callable[[str, DataPointList], TimeSeries], bool] = False,
112
- data_set_id: Optional[int] = None,
113
- cancellation_token: Optional[CancellationToken] = None,
110
+ thread_name: str | None = None,
111
+ create_missing: Callable[[str, DataPointList], TimeSeries] | bool = False,
112
+ data_set_id: int | None = None,
113
+ cancellation_token: CancellationToken | None = None,
114
114
  ):
115
115
  # Super sets post_upload and threshold
116
116
  super().__init__(
@@ -132,14 +132,14 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
132
132
  self.create_missing = True
133
133
  self.missing_factory = create_missing
134
134
 
135
- self.upload_queue: Dict[EitherId, DataPointList] = {}
135
+ self.upload_queue: dict[EitherId, DataPointList] = {}
136
136
 
137
137
  self.points_queued = TIMESERIES_UPLOADER_POINTS_QUEUED
138
138
  self.points_written = TIMESERIES_UPLOADER_POINTS_WRITTEN
139
139
  self.queue_size = TIMESERIES_UPLOADER_QUEUE_SIZE
140
140
  self.data_set_id = data_set_id
141
141
 
142
- def _verify_datapoint_time(self, time: Union[int, float, datetime, str]) -> bool:
142
+ def _verify_datapoint_time(self, time: int | float | datetime | str) -> bool:
143
143
  if isinstance(time, int) or isinstance(time, float):
144
144
  return not math.isnan(time) and time >= MIN_DATAPOINT_TIMESTAMP
145
145
  elif isinstance(time, str):
@@ -147,7 +147,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
147
147
  else:
148
148
  return time.timestamp() * 1000.0 >= MIN_DATAPOINT_TIMESTAMP
149
149
 
150
- def _verify_datapoint_value(self, value: Union[int, float, datetime, str]) -> bool:
150
+ def _verify_datapoint_value(self, value: int | float | datetime | str) -> bool:
151
151
  if isinstance(value, float):
152
152
  return not (
153
153
  math.isnan(value) or math.isinf(value) or value > MAX_DATAPOINT_VALUE or value < MIN_DATAPOINT_VALUE
@@ -171,7 +171,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
171
171
  return True
172
172
 
173
173
  def add_to_upload_queue(
174
- self, *, id: Optional[int] = None, external_id: Optional[str] = None, datapoints: Optional[DataPointList] = None
174
+ self, *, id: int | None = None, external_id: str | None = None, datapoints: DataPointList | None = None
175
175
  ) -> None:
176
176
  """
177
177
  Add data points to upload queue. The queue will be uploaded if the queue size is larger than the threshold
@@ -180,7 +180,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
180
180
  Args:
181
181
  id: Internal ID of time series. Either this or external_id must be set.
182
182
  external_id: External ID of time series. Either this or external_id must be set.
183
- datapoints: List of data points to add
183
+ datapoints: list of data points to add
184
184
  """
185
185
  datapoints = datapoints or []
186
186
  old_len = len(datapoints)
@@ -219,7 +219,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
219
219
  max_delay=RETRY_MAX_DELAY,
220
220
  backoff=RETRY_BACKOFF_FACTOR,
221
221
  )
222
- def _upload_batch(upload_this: List[Dict], retries: int = 5) -> List[Dict]:
222
+ def _upload_batch(upload_this: list[dict], retries: int = 5) -> list[dict]:
223
223
  if len(upload_this) == 0:
224
224
  return upload_this
225
225
 
@@ -241,14 +241,14 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
241
241
  create_these_ids = set(
242
242
  [id_dict["externalId"] for id_dict in ex.not_found if "externalId" in id_dict]
243
243
  )
244
- datapoints_lists: Dict[str, DataPointList] = {
244
+ datapoints_lists: dict[str, DataPointList] = {
245
245
  ts_dict["externalId"]: ts_dict["datapoints"]
246
246
  for ts_dict in upload_this
247
247
  if ts_dict["externalId"] in create_these_ids
248
248
  }
249
249
 
250
250
  self.logger.info(f"Creating {len(create_these_ids)} time series")
251
- to_create: List[TimeSeries] = [
251
+ to_create: list[TimeSeries] = [
252
252
  self.missing_factory(external_id, datapoints_lists[external_id])
253
253
  for external_id in create_these_ids
254
254
  ]
@@ -317,7 +317,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
317
317
  return self
318
318
 
319
319
  def __exit__(
320
- self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
320
+ self, exc_type: Type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
321
321
  ) -> None:
322
322
  """
323
323
  Wraps around stop method, for use as context manager
@@ -343,13 +343,13 @@ class SequenceUploadQueue(AbstractUploadQueue):
343
343
  def __init__(
344
344
  self,
345
345
  cdf_client: CogniteClient,
346
- post_upload_function: Optional[Callable[[List[Any]], None]] = None,
347
- max_queue_size: Optional[int] = None,
348
- max_upload_interval: Optional[int] = None,
346
+ post_upload_function: Callable[[list[Any]], None] | None = None,
347
+ max_queue_size: int | None = None,
348
+ max_upload_interval: int | None = None,
349
349
  trigger_log_level: str = "DEBUG",
350
- thread_name: Optional[str] = None,
350
+ thread_name: str | None = None,
351
351
  create_missing: bool = False,
352
- cancellation_token: Optional[CancellationToken] = None,
352
+ cancellation_token: CancellationToken | None = None,
353
353
  ):
354
354
  """
355
355
  Args:
@@ -374,15 +374,15 @@ class SequenceUploadQueue(AbstractUploadQueue):
374
374
  thread_name,
375
375
  cancellation_token,
376
376
  )
377
- self.upload_queue: Dict[EitherId, SequenceRows] = {}
378
- self.sequence_metadata: Dict[EitherId, Dict[str, Union[str, int, float]]] = {}
379
- self.sequence_asset_external_ids: Dict[EitherId, str] = {}
380
- self.sequence_dataset_external_ids: Dict[EitherId, str] = {}
381
- self.sequence_names: Dict[EitherId, str] = {}
382
- self.sequence_descriptions: Dict[EitherId, str] = {}
383
- self.column_definitions: Dict[EitherId, List[Dict[str, str]]] = {}
384
- self.asset_ids: Dict[str, int] = {}
385
- self.dataset_ids: Dict[str, int] = {}
377
+ self.upload_queue: dict[EitherId, SequenceRows] = {}
378
+ self.sequence_metadata: dict[EitherId, dict[str, str | int | float]] = {}
379
+ self.sequence_asset_external_ids: dict[EitherId, str] = {}
380
+ self.sequence_dataset_external_ids: dict[EitherId, str] = {}
381
+ self.sequence_names: dict[EitherId, str] = {}
382
+ self.sequence_descriptions: dict[EitherId, str] = {}
383
+ self.column_definitions: dict[EitherId, list[dict[str, str]]] = {}
384
+ self.asset_ids: dict[str, int] = {}
385
+ self.dataset_ids: dict[str, int] = {}
386
386
  self.create_missing = create_missing
387
387
 
388
388
  self.points_queued = SEQUENCES_UPLOADER_POINTS_QUEUED
@@ -391,13 +391,13 @@ class SequenceUploadQueue(AbstractUploadQueue):
391
391
 
392
392
  def set_sequence_metadata(
393
393
  self,
394
- metadata: Dict[str, Union[str, int, float]],
395
- id: Optional[int] = None,
396
- external_id: Optional[str] = None,
397
- asset_external_id: Optional[str] = None,
398
- dataset_external_id: Optional[str] = None,
399
- name: Optional[str] = None,
400
- description: Optional[str] = None,
394
+ metadata: dict[str, str | int | float],
395
+ id: int | None = None,
396
+ external_id: str | None = None,
397
+ asset_external_id: str | None = None,
398
+ dataset_external_id: str | None = None,
399
+ name: str | None = None,
400
+ description: str | None = None,
401
401
  ) -> None:
402
402
  """
403
403
  Set sequence metadata. Metadata will be cached until the sequence is created. The metadata will be updated
@@ -426,7 +426,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
426
426
  self.sequence_descriptions[either_id] = description
427
427
 
428
428
  def set_sequence_column_definition(
429
- self, col_def: List[Dict[str, str]], id: Optional[int] = None, external_id: Optional[str] = None
429
+ self, col_def: list[dict[str, str]], id: int | None = None, external_id: str | None = None
430
430
  ) -> None:
431
431
  """
432
432
  Set sequence column definition
@@ -443,16 +443,14 @@ class SequenceUploadQueue(AbstractUploadQueue):
443
443
 
444
444
  def add_to_upload_queue(
445
445
  self,
446
- rows: Union[
447
- Dict[int, List[Union[int, float, str]]],
448
- List[Tuple[int, Union[int, float, str]]],
449
- List[Dict[str, Any]],
450
- SequenceData,
451
- SequenceRows,
452
- ],
453
- column_external_ids: Optional[List[dict]] = None,
454
- id: Optional[int] = None,
455
- external_id: Optional[str] = None,
446
+ rows: dict[int, list[int | float | str]]
447
+ | list[tuple[int, int | float | str]]
448
+ | list[dict[str, Any]]
449
+ | SequenceData
450
+ | SequenceRows,
451
+ column_external_ids: list[dict] | None = None,
452
+ id: int | None = None,
453
+ external_id: str | None = None,
456
454
  ) -> None:
457
455
  """
458
456
  Add sequence rows to upload queue. Mirrors implementation of SequenceApi.insert. Inserted rows will be
@@ -461,7 +459,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
461
459
  Args:
462
460
  rows: The rows to be inserted. Can either be a list of tuples, a list of ["rownumber": ..., "values": ...]
463
461
  objects, a dictionary of rowNumber: data, or a SequenceData object.
464
- column_external_ids: List of external id for the columns of the sequence
462
+ column_external_ids: list of external id for the columns of the sequence
465
463
  id: Sequence internal ID
466
464
  Use if external_id is None
467
465
  external_id: Sequence external ID
@@ -477,7 +475,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
477
475
  # Already in the desired format
478
476
  pass
479
477
  elif isinstance(rows, (dict, list)):
480
- rows_raw: List[Dict[str, Any]]
478
+ rows_raw: list[dict[str, Any]]
481
479
  if isinstance(rows, dict):
482
480
  rows_raw = [{"rowNumber": row_number, "values": values} for row_number, values in rows.items()]
483
481
  elif isinstance(rows, list) and rows and isinstance(rows[0], (tuple, list)):
@@ -658,7 +656,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
658
656
  return self
659
657
 
660
658
  def __exit__(
661
- self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
659
+ self, exc_type: Type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
662
660
  ) -> None:
663
661
  """
664
662
  Wraps around stop method, for use as context manager
@@ -0,0 +1,64 @@
1
+ from datetime import datetime
2
+ from typing import Iterator, List
3
+
4
+ import jsonlines
5
+
6
+
7
+ class FileErrorMapping:
8
+ def __init__(self, file_name: str, error_reason: str) -> None:
9
+ self.file_name = file_name
10
+ self.error_reason = error_reason
11
+
12
+ def __iter__(self) -> Iterator[List[str]]:
13
+ return iter([[self.file_name, self.error_reason]])
14
+
15
+
16
+ class FileFailureManager:
17
+ MAX_QUEUE_SIZE = 500
18
+ START_TIME_KEY = "start_time"
19
+ FILE_REASON_MAP_KEY = "file_error_reason_map"
20
+
21
+ def __init__(self, start_time: str | None = None, path_to_file: str | None = None) -> None:
22
+ self.failure_logs: dict[str, str] = {}
23
+
24
+ self.path_to_failure_log: str = self._pre_process_file_extension(path_to_file)
25
+ self.start_time = start_time or str(datetime.now())
26
+ self._initialize_failure_logs()
27
+
28
+ def _pre_process_file_extension(self, path_to_file: str | None) -> str:
29
+ if path_to_file and not path_to_file.endswith(".jsonl"):
30
+ return path_to_file + ".jsonl"
31
+ return str(path_to_file)
32
+
33
+ def _initialize_failure_logs(self) -> None:
34
+ self.failure_logs = {}
35
+
36
+ def __len__(self) -> int:
37
+ return len(self.failure_logs)
38
+
39
+ def clear(self) -> None:
40
+ self.failure_logs.clear()
41
+ self._initialize_failure_logs()
42
+
43
+ def add(self, file_name: str, error_reason: str) -> None:
44
+ error_file_object = FileErrorMapping(file_name=file_name, error_reason=error_reason)
45
+ error_file_dict = dict(error_file_object)
46
+
47
+ self.failure_logs.update(error_file_dict)
48
+
49
+ if len(self) >= self.MAX_QUEUE_SIZE:
50
+ self.write_to_file()
51
+
52
+ def write_to_file(self) -> None:
53
+ if len(self) == 0:
54
+ return
55
+
56
+ dict_to_write = {
57
+ self.START_TIME_KEY: self.start_time,
58
+ self.FILE_REASON_MAP_KEY: self.failure_logs,
59
+ }
60
+
61
+ with jsonlines.open(self.path_to_failure_log, mode="a") as writer:
62
+ writer.write(dict_to_write)
63
+
64
+ self.clear()
@@ -15,9 +15,10 @@
15
15
  """
16
16
  A module containing a slightly more advanced base extractor class, sorting a generic output into upload queues.
17
17
  """
18
+
18
19
  from dataclasses import dataclass
19
20
  from types import TracebackType
20
- from typing import Any, Callable, Iterable, List, Optional, Type, TypeVar
21
+ from typing import Any, Callable, Iterable, Type, TypeVar
21
22
 
22
23
  from more_itertools import peekable
23
24
 
@@ -41,10 +42,11 @@ class QueueConfigClass:
41
42
 
42
43
  @dataclass
43
44
  class UploaderExtractorConfig(BaseConfig):
44
- queues: Optional[QueueConfigClass]
45
+ queues: QueueConfigClass | None
45
46
 
46
47
 
47
48
  UploaderExtractorConfigClass = TypeVar("UploaderExtractorConfigClass", bound=UploaderExtractorConfig)
49
+ RunHandle = Callable[[CogniteClient, AbstractStateStore, UploaderExtractorConfigClass, CancellationToken], None]
48
50
 
49
51
 
50
52
  class UploaderExtractor(Extractor[UploaderExtractorConfigClass]):
@@ -76,19 +78,17 @@ class UploaderExtractor(Extractor[UploaderExtractorConfigClass]):
76
78
  *,
77
79
  name: str,
78
80
  description: str,
79
- version: Optional[str] = None,
80
- run_handle: Optional[
81
- Callable[[CogniteClient, AbstractStateStore, UploaderExtractorConfigClass, CancellationToken], None]
82
- ] = None,
81
+ version: str | None = None,
82
+ run_handle: RunHandle | None = None,
83
83
  config_class: Type[UploaderExtractorConfigClass],
84
- metrics: Optional[BaseMetrics] = None,
84
+ metrics: BaseMetrics | None = None,
85
85
  use_default_state_store: bool = True,
86
- cancellation_token: Optional[CancellationToken] = None,
87
- config_file_path: Optional[str] = None,
86
+ cancellation_token: CancellationToken | None = None,
87
+ config_file_path: str | None = None,
88
88
  continuous_extractor: bool = False,
89
89
  heartbeat_waiting_time: int = 600,
90
90
  handle_interrupts: bool = True,
91
- middleware: Optional[List[Callable[[dict], dict]]] = None,
91
+ middleware: list[Callable[[dict], dict]] | None = None,
92
92
  ):
93
93
  super(UploaderExtractor, self).__init__(
94
94
  name=name,
@@ -170,7 +170,7 @@ class UploaderExtractor(Extractor[UploaderExtractorConfigClass]):
170
170
  return self
171
171
 
172
172
  def __exit__(
173
- self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
173
+ self, exc_type: Type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
174
174
  ) -> bool:
175
175
  self.event_queue.__exit__(exc_type, exc_val, exc_tb)
176
176
  self.raw_queue.__exit__(exc_type, exc_val, exc_tb)
@@ -1,27 +1,19 @@
1
- import sys
2
- from typing import Iterable, List, Optional, Union
1
+ from typing import Iterable, TypeAlias
3
2
 
4
3
  from cognite.client.data_classes import Event as _Event
5
4
  from cognite.client.data_classes import Row as _Row
6
-
7
- if sys.version_info >= (3, 10):
8
- from typing import TypeAlias
9
- else:
10
- from typing_extensions import TypeAlias
11
-
12
-
13
5
  from cognite.extractorutils.uploader.time_series import DataPoint
14
6
 
15
7
 
16
8
  class InsertDatapoints:
17
- def __init__(self, *, id: Optional[int] = None, external_id: Optional[str] = None, datapoints: List[DataPoint]):
9
+ def __init__(self, *, id: int | None = None, external_id: str | None = None, datapoints: list[DataPoint]):
18
10
  self.id = id
19
11
  self.external_id = external_id
20
12
  self.datapoints = datapoints
21
13
 
22
14
 
23
15
  class RawRow:
24
- def __init__(self, db_name: str, table_name: str, row: Union[_Row, Iterable[_Row]]):
16
+ def __init__(self, db_name: str, table_name: str, row: _Row | Iterable[_Row]):
25
17
  self.db_name = db_name
26
18
  self.table_name = table_name
27
19
  if isinstance(row, Iterable):
@@ -32,4 +24,4 @@ class RawRow:
32
24
 
33
25
  Event: TypeAlias = _Event
34
26
 
35
- CdfTypes = Union[Event, Iterable[Event], RawRow, Iterable[RawRow], InsertDatapoints, Iterable[InsertDatapoints]]
27
+ CdfTypes = Event | Iterable[Event] | RawRow | Iterable[RawRow] | InsertDatapoints | Iterable[InsertDatapoints]
@@ -25,7 +25,7 @@ from functools import partial, wraps
25
25
  from io import RawIOBase
26
26
  from threading import Thread
27
27
  from time import time
28
- from typing import Any, Callable, Dict, Generator, Iterable, List, Optional, Tuple, Type, TypeVar, Union
28
+ from typing import Any, Callable, Generator, Iterable, Type, TypeVar
29
29
 
30
30
  from decorator import decorator
31
31
 
@@ -89,7 +89,7 @@ class EitherId:
89
89
  TypeError: If none of both of id types are set.
90
90
  """
91
91
 
92
- def __init__(self, **kwargs: Union[int, str, None]):
92
+ def __init__(self, **kwargs: int | str | None):
93
93
  internal_id = kwargs.get("id")
94
94
  external_id = kwargs.get("externalId") or kwargs.get("external_id")
95
95
 
@@ -105,8 +105,8 @@ class EitherId:
105
105
  if external_id is not None and not isinstance(external_id, str):
106
106
  raise TypeError("External IDs must be strings")
107
107
 
108
- self.internal_id: Optional[int] = internal_id
109
- self.external_id: Optional[str] = external_id
108
+ self.internal_id: int | None = internal_id
109
+ self.external_id: str | None = external_id
110
110
 
111
111
  def type(self) -> str:
112
112
  """
@@ -117,7 +117,7 @@ class EitherId:
117
117
  """
118
118
  return "id" if self.internal_id is not None else "externalId"
119
119
 
120
- def content(self) -> Union[int, str]:
120
+ def content(self) -> int | str:
121
121
  """
122
122
  Get the value of the ID
123
123
 
@@ -249,7 +249,7 @@ def add_extraction_pipeline(
249
249
  ##############################
250
250
  _logger.info(f"Starting to run function: {input_function.__name__}")
251
251
 
252
- heartbeat_thread: Optional[Thread] = None
252
+ heartbeat_thread: Thread | None = None
253
253
  try:
254
254
  heartbeat_thread = Thread(target=heartbeat_loop, name="HeartbeatLoop", daemon=True)
255
255
  heartbeat_thread.start()
@@ -313,12 +313,12 @@ _T2 = TypeVar("_T2")
313
313
  def _retry_internal(
314
314
  f: Callable[..., _T2],
315
315
  cancellation_token: CancellationToken,
316
- exceptions: Union[Tuple[Type[Exception], ...], Dict[Type[Exception], Callable[[Exception], bool]]],
316
+ exceptions: tuple[Type[Exception], ...] | dict[Type[Exception], Callable[[Exception], bool]],
317
317
  tries: int,
318
318
  delay: float,
319
- max_delay: Optional[float],
319
+ max_delay: float | None,
320
320
  backoff: float,
321
- jitter: Union[float, Tuple[float, float]],
321
+ jitter: float | tuple[float, float],
322
322
  ) -> _T2:
323
323
  logger = logging.getLogger(__name__)
324
324
 
@@ -366,13 +366,13 @@ def _retry_internal(
366
366
 
367
367
 
368
368
  def retry(
369
- cancellation_token: Optional[CancellationToken] = None,
370
- exceptions: Union[Tuple[Type[Exception], ...], Dict[Type[Exception], Callable[[Any], bool]]] = (Exception,),
369
+ cancellation_token: CancellationToken | None = None,
370
+ exceptions: tuple[Type[Exception], ...] | dict[Type[Exception], Callable[[Any], bool]] = (Exception,),
371
371
  tries: int = 10,
372
372
  delay: float = 1,
373
- max_delay: Optional[float] = 60,
373
+ max_delay: float | None = 60,
374
374
  backoff: float = 2,
375
- jitter: Union[float, Tuple[float, float]] = (0, 2),
375
+ jitter: float | tuple[float, float] = (0, 2),
376
376
  ) -> Callable[[Callable[..., _T2]], Callable[..., _T2]]:
377
377
  """
378
378
  Returns a retry decorator.
@@ -414,8 +414,8 @@ def retry(
414
414
 
415
415
 
416
416
  def requests_exceptions(
417
- status_codes: Optional[List[int]] = None,
418
- ) -> Dict[Type[Exception], Callable[[Any], bool]]:
417
+ status_codes: list[int] | None = None,
418
+ ) -> dict[Type[Exception], Callable[[Any], bool]]:
419
419
  """
420
420
  Retry exceptions from using the ``requests`` library. This will retry all connection and HTTP errors matching
421
421
  the given status codes.
@@ -448,8 +448,8 @@ def requests_exceptions(
448
448
 
449
449
 
450
450
  def httpx_exceptions(
451
- status_codes: Optional[List[int]] = None,
452
- ) -> Dict[Type[Exception], Callable[[Any], bool]]:
451
+ status_codes: list[int] | None = None,
452
+ ) -> dict[Type[Exception], Callable[[Any], bool]]:
453
453
  """
454
454
  Retry exceptions from using the ``httpx`` library. This will retry all connection and HTTP errors matching
455
455
  the given status codes.
@@ -482,8 +482,8 @@ def httpx_exceptions(
482
482
 
483
483
 
484
484
  def cognite_exceptions(
485
- status_codes: Optional[List[int]] = None,
486
- ) -> Dict[Type[Exception], Callable[[Any], bool]]:
485
+ status_codes: list[int] | None = None,
486
+ ) -> dict[Type[Exception], Callable[[Any], bool]]:
487
487
  """
488
488
  Retry exceptions from using the Cognite SDK. This will retry all connection and HTTP errors matching
489
489
  the given status codes.
@@ -569,9 +569,7 @@ def truncate_byte_len(item: str, ln: int) -> str:
569
569
 
570
570
 
571
571
  class BufferedReadWithLength(io.BufferedReader):
572
- def __init__(
573
- self, raw: RawIOBase, buffer_size: int, len: int, on_close: Optional[Callable[[], None]] = None
574
- ) -> None:
572
+ def __init__(self, raw: RawIOBase, buffer_size: int, len: int, on_close: Callable[[], None] | None = None) -> None:
575
573
  super().__init__(raw, buffer_size)
576
574
  # Do not remove even if it appears to be unused. :P
577
575
  # Requests uses this to add the content-length header, which is necessary for writing to files in azure clusters
@@ -588,7 +586,7 @@ def iterable_to_stream(
588
586
  iterator: Iterable[bytes],
589
587
  file_size_bytes: int,
590
588
  buffer_size: int = io.DEFAULT_BUFFER_SIZE,
591
- on_close: Optional[Callable[[], None]] = None,
589
+ on_close: Callable[[], None] | None = None,
592
590
  ) -> BufferedReadWithLength:
593
591
  class ChunkIteratorStream(io.RawIOBase):
594
592
  def __init__(self) -> None:
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: cognite-extractor-utils
3
- Version: 7.5.4
3
+ Version: 7.5.6
4
4
  Summary: Utilities for easier development of extractors for CDF
5
5
  Home-page: https://github.com/cognitedata/python-extractor-utils
6
6
  License: Apache-2.0
@@ -19,10 +19,11 @@ Requires-Dist: arrow (>=1.0.0,<2.0.0)
19
19
  Requires-Dist: azure-identity (>=1.14.0,<2.0.0)
20
20
  Requires-Dist: azure-keyvault-secrets (>=4.7.0,<5.0.0)
21
21
  Requires-Dist: cognite-sdk (>=7.59.0,<8.0.0)
22
- Requires-Dist: croniter (>=5.0.0,<6.0.0)
22
+ Requires-Dist: croniter (>=6.0.0,<7.0.0)
23
23
  Requires-Dist: dacite (>=1.6.0,<2.0.0)
24
24
  Requires-Dist: decorator (>=5.1.1,<6.0.0)
25
25
  Requires-Dist: httpx (>=0.27.0,<0.28.0)
26
+ Requires-Dist: jsonlines (>=4.0.0,<5.0.0)
26
27
  Requires-Dist: more-itertools (>=10.0.0,<11.0.0)
27
28
  Requires-Dist: orjson (>=3.10.3,<4.0.0)
28
29
  Requires-Dist: prometheus-client (>0.7.0,<=1.0.0)
@@ -0,0 +1,49 @@
1
+ cognite/extractorutils/__init__.py,sha256=uWu9rc8gh485FrNT-ocKmSQwkxPzFo7L9ac103j7E-s,764
2
+ cognite/extractorutils/_inner_util.py,sha256=cdoz9Sl3Wt1IsxiCZlcd913_hKrTCxDRrM_L-Zn1_F8,1800
3
+ cognite/extractorutils/base.py,sha256=pV3xy0Dzt8q9I5DvI-TvmRZXMmSTk8Kk-d0jZWa_ua8,16333
4
+ cognite/extractorutils/configtools/__init__.py,sha256=llNMzHu4yCWx5Kjm8G9IN5Pij8OUaVT_VZuZ2r3JtAA,3616
5
+ cognite/extractorutils/configtools/_util.py,sha256=uXpR8YnEkfeZOuaZGjRRk_wgC5AGOEKNWMYfV50atsc,4746
6
+ cognite/extractorutils/configtools/elements.py,sha256=ti3PFmwHyiFJFXNEzObRY6IxQo18LABSsYafPxuoYSU,26590
7
+ cognite/extractorutils/configtools/loaders.py,sha256=w8NoZcZJZbEctvkTq8aG_UH2x2gct_fpb2KenksmVaQ,18294
8
+ cognite/extractorutils/configtools/validators.py,sha256=xug3GOMIO4NOdyyvXtYlpKyq9wuDtGf7-xqIefD5bIo,1016
9
+ cognite/extractorutils/exceptions.py,sha256=NDmiElg1cmGMwIl82kpCDF37UcAFNnfDK9NxUn_u2rk,1149
10
+ cognite/extractorutils/metrics.py,sha256=-sUBaZ7lNrcdxuQcsh7rU-CwMNTqlT3DiMRyn5CxPTQ,15422
11
+ cognite/extractorutils/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ cognite/extractorutils/statestore/__init__.py,sha256=hV3r11FUXkH6-60Ct6zLSROMNVrEeiE3Shmkf28Q-co,359
13
+ cognite/extractorutils/statestore/_base.py,sha256=mWdFk4EZl886V6uXRj4O2sv2_ANJ3Sigmgeql-XEsmc,2675
14
+ cognite/extractorutils/statestore/hashing.py,sha256=Le6PUpLYV7kTKgO2nc5BKCEf-3LTXoGzEVzLtw8tkn0,8011
15
+ cognite/extractorutils/statestore/watermark.py,sha256=U_cA0XlqkgMML-ZeEl13KE8KjQHsId5t7mMHibRhUyA,16713
16
+ cognite/extractorutils/threading.py,sha256=RN9oEXO6N2RqYKThFoDqzSeo593hkzTVePK1KSVOu3A,3586
17
+ cognite/extractorutils/unstable/__init__.py,sha256=L6nqJHjylpk67CE-PbXJyb_TBI4yjhEYEz9J9WShDfM,341
18
+ cognite/extractorutils/unstable/configuration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
+ cognite/extractorutils/unstable/configuration/exceptions.py,sha256=-cziC11IbUP308ldbAYoQn4x2SNCIxYanN2eIV1n9To,654
20
+ cognite/extractorutils/unstable/configuration/loaders.py,sha256=iMlCx6abKaDHx5-nOQSRtf-creqJPv1QrnbapCaIZkA,3689
21
+ cognite/extractorutils/unstable/configuration/models.py,sha256=jFlA5eEeNRq39KEwAjZV9UkbV2juVUHANNeXq0VtqL4,8210
22
+ cognite/extractorutils/unstable/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
+ cognite/extractorutils/unstable/core/_dto.py,sha256=tvvy39cvf-QT28GWz5FpqxQ5vAVk0t69JoPPhpWlweY,1293
24
+ cognite/extractorutils/unstable/core/_messaging.py,sha256=D9rOW8fijryXffbm90d8VTf2vy5FmwVGU-H0O-cn-EI,68
25
+ cognite/extractorutils/unstable/core/base.py,sha256=QljO7Zpn5RSTEI9PHIavhKWdr4Hp-Ni5tdmsQ_ocOLk,12190
26
+ cognite/extractorutils/unstable/core/errors.py,sha256=D8QAaqwJec62ZbhBNC0flmKjw_EdHLKGn8npqtPQhZE,1706
27
+ cognite/extractorutils/unstable/core/restart_policy.py,sha256=SodG2Gs9Es05yk3EbAAWY_sbSoBUmhTRrUMBR4BSQbQ,622
28
+ cognite/extractorutils/unstable/core/runtime.py,sha256=sb8ouTCZqvzpns_8UpVwPd4nGnfinf7vsVvOk23jksQ,11834
29
+ cognite/extractorutils/unstable/core/tasks.py,sha256=K3R40sNSqYJ1Oc0UMTUDF4lY_WaZ7HokvZ5kctDsjGQ,585
30
+ cognite/extractorutils/unstable/scheduling/__init__.py,sha256=L90_rCZNHvti-PInne0r7W9edIkifctELjiaxEoQiSc,67
31
+ cognite/extractorutils/unstable/scheduling/_scheduler.py,sha256=tzu3-olhBU8uFDYj-Q6mEJUVBVin8wSGJONJVrNP3NE,3694
32
+ cognite/extractorutils/unstable/scheduling/_schedules.py,sha256=y0NVeXYZOFcAyzBgAe8jqK0W-SZL5m99UwXAacGzqIw,677
33
+ cognite/extractorutils/uploader/__init__.py,sha256=MgyvZojwLE-oUCZ0VALISd2rUCqShlyozxhzAKX5uj4,3396
34
+ cognite/extractorutils/uploader/_base.py,sha256=JPr5Dp25XYzwN4MJ2ddd-xhPg5kVV3jASNecD8sAaKs,5273
35
+ cognite/extractorutils/uploader/_metrics.py,sha256=J2LJXb19L_SLSJ_voNIQHYLp0pjxUKevpH1q_xKX6Hk,3247
36
+ cognite/extractorutils/uploader/assets.py,sha256=SDX48xjqIT4tbQ9HtaIgQT8bw61XHJGic5ofZJeK7UE,5692
37
+ cognite/extractorutils/uploader/data_modeling.py,sha256=Vd9eDWE-KPICChtxcKZdFcH3mSbavD8s1627wXxF_SI,3593
38
+ cognite/extractorutils/uploader/events.py,sha256=qo1rVhk3eUfcbNLauZfvBohQ2aFRazbyGuMFcU-UyQ8,5640
39
+ cognite/extractorutils/uploader/files.py,sha256=3VH8lsZmPL4TI3r_mIzTf8T2YmYc3kAtyBeo_4g9zP0,26610
40
+ cognite/extractorutils/uploader/raw.py,sha256=VMYfeZN8XAHfZ77AuGcL85bIWvhaO7-Whx_marnGAmQ,6692
41
+ cognite/extractorutils/uploader/time_series.py,sha256=yBN7ppD5hg0CgUIw7WvhhAPyOj0gbIWG4_-ifPaAuOE,26575
42
+ cognite/extractorutils/uploader/upload_failure_handler.py,sha256=Oj3xDK_qlGQdEOzswE-6ti7tDAQXR0Rvee3lg6KBg3s,2000
43
+ cognite/extractorutils/uploader_extractor.py,sha256=X71M_7JcGMwC3kHMETmTF8cdjSQwZaNmIGlT-mBs3Pk,7687
44
+ cognite/extractorutils/uploader_types.py,sha256=eLKFQJT53zpn9_3-SDUtgHUMASGdK7c85HWrLWEF-JE,865
45
+ cognite/extractorutils/util.py,sha256=TL3fkHlvPqWjdyr4yorq5LNJbPxJSom69HKyeQM92xE,21042
46
+ cognite_extractor_utils-7.5.6.dist-info/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
47
+ cognite_extractor_utils-7.5.6.dist-info/METADATA,sha256=A1Sc24JpE_1afcPP8Dor7_f6KJb6NQ5dSY6_12Zswfk,5691
48
+ cognite_extractor_utils-7.5.6.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
49
+ cognite_extractor_utils-7.5.6.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.1
2
+ Generator: poetry-core 2.0.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,31 +0,0 @@
1
- """
2
- Example of how you would build an extractor with the new base class
3
- """
4
-
5
- from cognite.extractorutils.unstable.configuration.models import ExtractorConfig
6
-
7
- from .base import Extractor
8
- from .runtime import Runtime
9
-
10
-
11
- class MyConfig(ExtractorConfig):
12
- parameter_one: int
13
- parameter_two: str
14
-
15
-
16
- class MyExtractor(Extractor[MyConfig]):
17
- NAME = "Test extractor"
18
- EXTERNAL_ID = "test-extractor"
19
- DESCRIPTION = "Test of the new runtime"
20
- VERSION = "1.0.0"
21
- CONFIG_TYPE = MyConfig
22
-
23
- def run(self) -> None:
24
- self.logger.info("Started!")
25
- if not self.cancellation_token.wait(10):
26
- raise ValueError("Oops")
27
-
28
-
29
- if __name__ == "__main__":
30
- runtime = Runtime(MyExtractor)
31
- runtime.run()