cognite-extractor-utils 5.0.1__py3-none-any.whl → 5.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

@@ -15,14 +15,15 @@
15
15
  import math
16
16
  import threading
17
17
  from datetime import datetime
18
- from typing import Any, Callable, Dict, List, Optional, Tuple, Union
18
+ from types import TracebackType
19
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
19
20
 
20
21
  import arrow
22
+ from requests import ConnectionError
23
+
21
24
  from cognite.client import CogniteClient
22
25
  from cognite.client.data_classes import Sequence, SequenceData, TimeSeries
23
26
  from cognite.client.exceptions import CogniteAPIError, CogniteDuplicatedError, CogniteNotFoundError
24
- from requests import ConnectionError
25
-
26
27
  from cognite.extractorutils.uploader._base import (
27
28
  RETRIES,
28
29
  RETRY_BACKOFF_FACTOR,
@@ -48,9 +49,7 @@ MAX_DATAPOINT_STRING_LENGTH = 255
48
49
  MAX_DATAPOINT_VALUE = 1e100
49
50
  MIN_DATAPOINT_VALUE = -1e100
50
51
 
51
- DataPoint = Union[
52
- Dict[str, Union[int, float, str, datetime]], Tuple[Union[int, float, datetime], Union[int, float, str]]
53
- ]
52
+ DataPoint = Tuple[Union[int, float, datetime], Union[int, float, str]]
54
53
  DataPointList = List[DataPoint]
55
54
 
56
55
 
@@ -66,7 +65,7 @@ def default_time_series_factory(external_id: str, datapoints: DataPointList) ->
66
65
  A TimeSeries object with external_id set, and the is_string automatically detected
67
66
  """
68
67
  is_string = (
69
- isinstance(datapoints[0].get("value"), str)
68
+ isinstance(datapoints[0].get("value"), str) # type: ignore # input might be dict to keep compatibility
70
69
  if isinstance(datapoints[0], dict)
71
70
  else isinstance(datapoints[0][1], str)
72
71
  )
@@ -118,6 +117,8 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
118
117
  cancellation_token,
119
118
  )
120
119
 
120
+ self.missing_factory: Callable[[str, DataPointList], TimeSeries]
121
+
121
122
  if isinstance(create_missing, bool):
122
123
  self.create_missing = create_missing
123
124
  self.missing_factory = default_time_series_factory
@@ -125,7 +126,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
125
126
  self.create_missing = True
126
127
  self.missing_factory = create_missing
127
128
 
128
- self.upload_queue: Dict[EitherId, DataPointList] = dict()
129
+ self.upload_queue: Dict[EitherId, DataPointList] = {}
129
130
 
130
131
  self.points_queued = TIMESERIES_UPLOADER_POINTS_QUEUED
131
132
  self.points_written = TIMESERIES_UPLOADER_POINTS_WRITTEN
@@ -134,19 +135,23 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
134
135
  self.latency_zero_point = arrow.utcnow()
135
136
  self.data_set_id = data_set_id
136
137
 
137
- def _verify_datapoint_time(self, time: Union[int, float, datetime]) -> bool:
138
+ def _verify_datapoint_time(self, time: Union[int, float, datetime, str]) -> bool:
138
139
  if isinstance(time, int) or isinstance(time, float):
139
140
  return not math.isnan(time) and time >= MIN_DATAPOINT_TIMESTAMP
141
+ elif isinstance(time, str):
142
+ return False
140
143
  else:
141
144
  return time.timestamp() * 1000.0 >= MIN_DATAPOINT_TIMESTAMP
142
145
 
143
- def _verify_datapoint_value(self, value: Union[int, float, str]) -> bool:
146
+ def _verify_datapoint_value(self, value: Union[int, float, datetime, str]) -> bool:
144
147
  if isinstance(value, float):
145
148
  return not (
146
149
  math.isnan(value) or math.isinf(value) or value > MAX_DATAPOINT_VALUE or value < MIN_DATAPOINT_VALUE
147
150
  )
148
151
  elif isinstance(value, str):
149
152
  return len(value) <= MAX_DATAPOINT_STRING_LENGTH
153
+ elif isinstance(value, datetime):
154
+ return False
150
155
  else:
151
156
  return True
152
157
 
@@ -154,14 +159,16 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
154
159
  self,
155
160
  dp: DataPoint,
156
161
  ) -> bool:
157
- if isinstance(dp, Dict):
162
+ if isinstance(dp, dict):
158
163
  return self._verify_datapoint_time(dp["timestamp"]) and self._verify_datapoint_value(dp["value"])
159
- elif isinstance(dp, Tuple):
164
+ elif isinstance(dp, tuple):
160
165
  return self._verify_datapoint_time(dp[0]) and self._verify_datapoint_value(dp[1])
161
166
  else:
162
167
  return True
163
168
 
164
- def add_to_upload_queue(self, *, id: int = None, external_id: str = None, datapoints: DataPointList = []) -> None:
169
+ def add_to_upload_queue(
170
+ self, *, id: Optional[int] = None, external_id: Optional[str] = None, datapoints: DataPointList = []
171
+ ) -> None:
165
172
  """
166
173
  Add data points to upload queue. The queue will be uploaded if the queue size is larger than the threshold
167
174
  specified in the __init__.
@@ -236,7 +243,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
236
243
  max_delay=RETRY_MAX_DELAY,
237
244
  backoff=RETRY_BACKOFF_FACTOR,
238
245
  )
239
- def _upload_batch(self, upload_this: List[Dict], retries=5) -> List[Dict]:
246
+ def _upload_batch(self, upload_this: List[Dict], retries: int = 5) -> List[Dict]:
240
247
  if len(upload_this) == 0:
241
248
  return upload_this
242
249
 
@@ -276,8 +283,9 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
276
283
 
277
284
  if len(ex.not_found) != len(create_these_ids):
278
285
  missing = [id_dict for id_dict in ex.not_found if id_dict.get("externalId") not in retry_these]
286
+ missing_num = len(ex.not_found) - len(create_these_ids)
279
287
  self.logger.error(
280
- f"{len(ex.not_found) - len(create_these_ids)} time series not found, and could not be created automatically:\n"
288
+ f"{missing_num} time series not found, and could not be created automatically:\n"
281
289
  + str(missing)
282
290
  + "\nData will be dropped"
283
291
  )
@@ -304,7 +312,9 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
304
312
  self.start()
305
313
  return self
306
314
 
307
- def __exit__(self, exc_type, exc_val, exc_tb) -> None:
315
+ def __exit__(
316
+ self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
317
+ ) -> None:
308
318
  """
309
319
  Wraps around stop method, for use as context manager
310
320
 
@@ -334,14 +344,14 @@ class SequenceUploadQueue(AbstractUploadQueue):
334
344
  max_upload_interval: Optional[int] = None,
335
345
  trigger_log_level: str = "DEBUG",
336
346
  thread_name: Optional[str] = None,
337
- create_missing=False,
347
+ create_missing: bool = False,
338
348
  cancellation_token: threading.Event = threading.Event(),
339
349
  ):
340
350
  """
341
351
  Args:
342
352
  cdf_client: Cognite Data Fusion client to use
343
- post_upload_function: A function that will be called after each upload. The function will be given one argument:
344
- A list of the events that were uploaded.
353
+ post_upload_function: A function that will be called after each upload. The function will be given one
354
+ argument: A list of the events that were uploaded.
345
355
  max_queue_size: Maximum size of upload queue. Defaults to no max size.
346
356
  max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
347
357
  methods).
@@ -360,15 +370,15 @@ class SequenceUploadQueue(AbstractUploadQueue):
360
370
  thread_name,
361
371
  cancellation_token,
362
372
  )
363
- self.upload_queue: Dict[EitherId, SequenceData] = dict()
364
- self.sequence_metadata: Dict[EitherId, Dict[str, Union[str, int, float]]] = dict()
365
- self.sequence_asset_external_ids: Dict[EitherId, str] = dict()
366
- self.sequence_dataset_external_ids: Dict[EitherId, str] = dict()
367
- self.sequence_names: Dict[EitherId, str] = dict()
368
- self.sequence_descriptions: Dict[EitherId, str] = dict()
369
- self.column_definitions: Dict[EitherId, List[Dict[str, str]]] = dict()
370
- self.asset_ids: Dict[str, int] = dict()
371
- self.dataset_ids: Dict[str, int] = dict()
373
+ self.upload_queue: Dict[EitherId, SequenceData] = {}
374
+ self.sequence_metadata: Dict[EitherId, Dict[str, Union[str, int, float]]] = {}
375
+ self.sequence_asset_external_ids: Dict[EitherId, str] = {}
376
+ self.sequence_dataset_external_ids: Dict[EitherId, str] = {}
377
+ self.sequence_names: Dict[EitherId, str] = {}
378
+ self.sequence_descriptions: Dict[EitherId, str] = {}
379
+ self.column_definitions: Dict[EitherId, List[Dict[str, str]]] = {}
380
+ self.asset_ids: Dict[str, int] = {}
381
+ self.dataset_ids: Dict[str, int] = {}
372
382
  self.create_missing = create_missing
373
383
 
374
384
  self.points_queued = SEQUENCES_UPLOADER_POINTS_QUEUED
@@ -380,12 +390,12 @@ class SequenceUploadQueue(AbstractUploadQueue):
380
390
  def set_sequence_metadata(
381
391
  self,
382
392
  metadata: Dict[str, Union[str, int, float]],
383
- id: int = None,
384
- external_id: str = None,
385
- asset_external_id: str = None,
386
- dataset_external_id: str = None,
387
- name: str = None,
388
- description: str = None,
393
+ id: Optional[int] = None,
394
+ external_id: Optional[str] = None,
395
+ asset_external_id: Optional[str] = None,
396
+ dataset_external_id: Optional[str] = None,
397
+ name: Optional[str] = None,
398
+ description: Optional[str] = None,
389
399
  ) -> None:
390
400
  """
391
401
  Set sequence metadata. Metadata will be cached until the sequence is created. The metadata will be updated
@@ -404,13 +414,17 @@ class SequenceUploadQueue(AbstractUploadQueue):
404
414
  """
405
415
  either_id = EitherId(id=id, external_id=external_id)
406
416
  self.sequence_metadata[either_id] = metadata
407
- self.sequence_asset_external_ids[either_id] = asset_external_id
408
- self.sequence_dataset_external_ids[either_id] = dataset_external_id
409
- self.sequence_names[either_id] = name
410
- self.sequence_descriptions[either_id] = description
417
+ if asset_external_id:
418
+ self.sequence_asset_external_ids[either_id] = asset_external_id
419
+ if dataset_external_id:
420
+ self.sequence_dataset_external_ids[either_id] = dataset_external_id
421
+ if name:
422
+ self.sequence_names[either_id] = name
423
+ if description:
424
+ self.sequence_descriptions[either_id] = description
411
425
 
412
426
  def set_sequence_column_definition(
413
- self, col_def: List[Dict[str, str]], id: int = None, external_id: str = None
427
+ self, col_def: List[Dict[str, str]], id: Optional[int] = None, external_id: Optional[str] = None
414
428
  ) -> None:
415
429
  """
416
430
  Set sequence column definition
@@ -434,8 +448,8 @@ class SequenceUploadQueue(AbstractUploadQueue):
434
448
  SequenceData,
435
449
  ],
436
450
  column_external_ids: Optional[List[dict]] = None,
437
- id: int = None,
438
- external_id: str = None,
451
+ id: Optional[int] = None,
452
+ external_id: Optional[str] = None,
439
453
  ) -> None:
440
454
  """
441
455
  Add sequence rows to upload queue. Mirrors implementation of SequenceApi.insert. Inserted rows will be
@@ -462,13 +476,13 @@ class SequenceUploadQueue(AbstractUploadQueue):
462
476
  elif isinstance(rows, dict):
463
477
  rows = [{"rowNumber": row_number, "values": values} for row_number, values in rows.items()]
464
478
 
465
- rows = SequenceData(id=id, external_id=id, rows=rows, columns=column_external_ids)
479
+ rows = SequenceData(id=id, external_id=id, rows=rows, columns=column_external_ids) # type: ignore
466
480
 
467
481
  elif isinstance(rows, list):
468
482
  if isinstance(rows[0], tuple) or isinstance(rows[0], list):
469
483
  rows = [{"rowNumber": row_number, "values": values} for row_number, values in rows]
470
484
 
471
- rows = SequenceData(id=id, external_id=id, rows=rows, columns=column_external_ids)
485
+ rows = SequenceData(id=id, external_id=id, rows=rows, columns=column_external_ids) # type: ignore
472
486
  else:
473
487
  raise TypeError("Unsupported type for sequence rows: {}".format(type(rows)))
474
488
 
@@ -476,8 +490,8 @@ class SequenceUploadQueue(AbstractUploadQueue):
476
490
  seq = self.upload_queue.get(either_id)
477
491
  if seq is not None:
478
492
  # Update sequence
479
- seq.values.extend(rows.values)
480
- seq.row_numbers.extend(rows.row_numbers)
493
+ seq.values.extend(rows.values) # type: ignore # type is list, mypy is wrong
494
+ seq.row_numbers.extend(rows.row_numbers) # type: ignore
481
495
 
482
496
  self.upload_queue[either_id] = seq
483
497
  else:
@@ -517,7 +531,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
517
531
  self.queue_size.set(self.upload_queue_size)
518
532
 
519
533
  @retry(
520
- exceptions=CogniteAPIError,
534
+ exceptions=[CogniteAPIError],
521
535
  tries=RETRIES,
522
536
  delay=RETRY_DELAY,
523
537
  max_delay=RETRY_MAX_DELAY,
@@ -528,7 +542,10 @@ class SequenceUploadQueue(AbstractUploadQueue):
528
542
 
529
543
  try:
530
544
  self.cdf_client.sequences.data.insert(
531
- id=either_id.internal_id, external_id=either_id.external_id, rows=upload_this, column_external_ids=None
545
+ id=either_id.internal_id, # type: ignore
546
+ external_id=either_id.external_id, # type: ignore
547
+ rows=upload_this,
548
+ column_external_ids=None,
532
549
  )
533
550
  except CogniteNotFoundError as ex:
534
551
  if self.create_missing:
@@ -537,8 +554,8 @@ class SequenceUploadQueue(AbstractUploadQueue):
537
554
 
538
555
  # Retry
539
556
  self.cdf_client.sequences.data.insert(
540
- id=either_id.internal_id,
541
- external_id=either_id.external_id,
557
+ id=either_id.internal_id, # type: ignore
558
+ external_id=either_id.external_id, # type: ignore
542
559
  rows=upload_this,
543
560
  column_external_ids=None,
544
561
  )
@@ -556,24 +573,29 @@ class SequenceUploadQueue(AbstractUploadQueue):
556
573
  """
557
574
 
558
575
  column_def = self.column_definitions.get(either_id)
576
+ if column_def is None:
577
+ self.logger.error(f"Can't create sequence {str(either_id)}, no column definitions provided")
559
578
 
560
579
  try:
561
580
  seq = self.cdf_client.sequences.create(
562
581
  Sequence(
563
- id=either_id.internal_id,
564
- external_id=either_id.external_id,
565
- name=self.sequence_names.get(either_id, None),
566
- description=self.sequence_descriptions.get(either_id, None),
567
- metadata=self.sequence_metadata.get(either_id, None),
568
- asset_id=self.asset_ids.get(self.sequence_asset_external_ids.get(either_id, None), None),
569
- data_set_id=self.dataset_ids.get(self.sequence_dataset_external_ids.get(either_id, None), None),
570
- columns=column_def,
582
+ id=either_id.internal_id, # type: ignore # these are optional, the SDK types are wrong
583
+ external_id=either_id.external_id, # type: ignore
584
+ name=self.sequence_names.get(either_id, None), # type: ignore
585
+ description=self.sequence_descriptions.get(either_id, None), # type: ignore
586
+ metadata=self.sequence_metadata.get(either_id, None), # type: ignore
587
+ asset_id=self.asset_ids.get(self.sequence_asset_external_ids.get(either_id, None), None), # type: ignore
588
+ data_set_id=self.dataset_ids.get(self.sequence_dataset_external_ids.get(either_id, None), None), # type: ignore
589
+ columns=column_def, # type: ignore # We already checked for None, mypy is wrong
571
590
  )
572
591
  )
573
592
 
574
593
  except CogniteDuplicatedError:
575
594
  self.logger.info("Sequnce already exist: {}".format(either_id))
576
- seq = self.cdf_client.sequences.retrieve(id=either_id.internal_id, external_id=either_id.external_id)
595
+ seq = self.cdf_client.sequences.retrieve(
596
+ id=either_id.internal_id, # type: ignore # these are optional, the SDK types are wrong
597
+ external_id=either_id.external_id, # type: ignore
598
+ )
577
599
 
578
600
  # Update definition of cached sequence
579
601
  cseq = self.upload_queue[either_id]
@@ -584,7 +606,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
584
606
  Resolve id of assets if specified, for use in sequence creation
585
607
  """
586
608
  assets = set(self.sequence_asset_external_ids.values())
587
- assets.discard(None)
609
+ assets.discard(None) # type: ignore # safeguard, remove Nones if any
588
610
 
589
611
  if len(assets) > 0:
590
612
  try:
@@ -596,14 +618,14 @@ class SequenceUploadQueue(AbstractUploadQueue):
596
618
  }
597
619
  except Exception as e:
598
620
  self.logger.error("Error in resolving asset id: %s", str(e))
599
- self.asset_ids = dict()
621
+ self.asset_ids = {}
600
622
 
601
623
  def _resolve_dataset_ids(self) -> None:
602
624
  """
603
625
  Resolve id of datasets if specified, for use in sequence creation
604
626
  """
605
627
  datasets = set(self.sequence_dataset_external_ids.values())
606
- datasets.discard(None)
628
+ datasets.discard(None) # type: ignore # safeguard, remove Nones if any
607
629
 
608
630
  if len(datasets) > 0:
609
631
  try:
@@ -615,7 +637,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
615
637
  }
616
638
  except Exception as e:
617
639
  self.logger.error("Error in resolving dataset id: %s", str(e))
618
- self.dataset_ids = dict()
640
+ self.dataset_ids = {}
619
641
 
620
642
  def __enter__(self) -> "SequenceUploadQueue":
621
643
  """
@@ -627,7 +649,9 @@ class SequenceUploadQueue(AbstractUploadQueue):
627
649
  self.start()
628
650
  return self
629
651
 
630
- def __exit__(self, exc_type, exc_val, exc_tb) -> None:
652
+ def __exit__(
653
+ self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
654
+ ) -> None:
631
655
  """
632
656
  Wraps around stop method, for use as context manager
633
657
 
@@ -15,14 +15,14 @@
15
15
  """
16
16
  A module containing a slightly more advanced base extractor class, sorting a generic output into upload queues.
17
17
  """
18
-
18
+ import threading
19
19
  from dataclasses import dataclass
20
20
  from types import TracebackType
21
- from typing import Callable, Iterable, List, Optional, Type, TypeVar
21
+ from typing import Any, Callable, Iterable, List, Optional, Type, TypeVar
22
22
 
23
- from cognite.client import CogniteClient
24
23
  from more_itertools import peekable
25
24
 
25
+ from cognite.client import CogniteClient
26
26
  from cognite.extractorutils.base import Extractor
27
27
  from cognite.extractorutils.configtools import BaseConfig, TimeIntervalConfig
28
28
  from cognite.extractorutils.metrics import BaseMetrics
@@ -78,12 +78,12 @@ class UploaderExtractor(Extractor[UploaderExtractorConfigClass]):
78
78
  description: str,
79
79
  version: Optional[str] = None,
80
80
  run_handle: Optional[
81
- Callable[[CogniteClient, AbstractStateStore, UploaderExtractorConfigClass, Event], None]
81
+ Callable[[CogniteClient, AbstractStateStore, UploaderExtractorConfigClass, threading.Event], None]
82
82
  ] = None,
83
83
  config_class: Type[UploaderExtractorConfigClass],
84
84
  metrics: Optional[BaseMetrics] = None,
85
85
  use_default_state_store: bool = True,
86
- cancellation_token: Event = Event(),
86
+ cancellation_token: threading.Event = threading.Event(),
87
87
  config_file_path: Optional[str] = None,
88
88
  continuous_extractor: bool = False,
89
89
  heartbeat_waiting_time: int = 600,
@@ -107,10 +107,9 @@ class UploaderExtractor(Extractor[UploaderExtractorConfigClass]):
107
107
  self.middleware = middleware if isinstance(middleware, list) else []
108
108
 
109
109
  def handle_output(self, output: CdfTypes) -> None:
110
- if not isinstance(output, Iterable):
111
- output = [output]
110
+ list_output = [output] if not isinstance(output, Iterable) else output
111
+ peekable_output = peekable(list_output)
112
112
 
113
- peekable_output = peekable(output)
114
113
  peek = peekable_output.peek(None)
115
114
 
116
115
  if peek is None:
@@ -133,7 +132,7 @@ class UploaderExtractor(Extractor[UploaderExtractorConfigClass]):
133
132
  else:
134
133
  raise ValueError(f"Unexpected type: {type(peek)}")
135
134
 
136
- def _apply_middleware(self, item):
135
+ def _apply_middleware(self, item: Any) -> Any:
137
136
  for mw in self.middleware:
138
137
  item = mw(item)
139
138
  return item
@@ -23,12 +23,13 @@ import threading
23
23
  from functools import partial, wraps
24
24
  from threading import Event, Thread
25
25
  from time import time
26
- from typing import Any, Dict, Generator, Iterable, Optional, Tuple, Type, Union
26
+ from typing import Any, Callable, Generator, Iterable, Optional, Tuple, Type, TypeVar, Union
27
+
28
+ from decorator import decorator
27
29
 
28
30
  from cognite.client import CogniteClient
29
31
  from cognite.client.data_classes import Asset, ExtractionPipelineRun, TimeSeries
30
32
  from cognite.client.exceptions import CogniteNotFoundError
31
- from decorator import decorator
32
33
 
33
34
 
34
35
  def _ensure(endpoint: Any, items: Iterable[Any]) -> None:
@@ -80,7 +81,7 @@ def set_event_on_interrupt(stop_event: Event) -> None:
80
81
  stop_event: Event to set
81
82
  """
82
83
 
83
- def sigint_handler(sig, frame):
84
+ def sigint_handler(sig_num: int, frame: Any) -> None:
84
85
  logger = logging.getLogger(__name__)
85
86
  logger.warning("Interrupt signal received, stopping extractor gracefully")
86
87
  stop_event.set()
@@ -106,7 +107,7 @@ class EitherId:
106
107
  TypeError: If none of both of id types are set.
107
108
  """
108
109
 
109
- def __init__(self, **kwargs):
110
+ def __init__(self, **kwargs: Union[int, str, None]):
110
111
  internal_id = kwargs.get("id")
111
112
  external_id = kwargs.get("externalId") or kwargs.get("external_id")
112
113
 
@@ -116,8 +117,14 @@ class EitherId:
116
117
  if internal_id is not None and external_id is not None:
117
118
  raise TypeError("Only one of id and external_id can be set")
118
119
 
119
- self.internal_id = internal_id
120
- self.external_id = external_id
120
+ if internal_id is not None and not isinstance(internal_id, int):
121
+ raise TypeError("Internal IDs must be integers")
122
+
123
+ if external_id is not None and not isinstance(external_id, str):
124
+ raise TypeError("Internal IDs must be integers")
125
+
126
+ self.internal_id: Optional[int] = internal_id
127
+ self.external_id: Optional[str] = external_id
121
128
 
122
129
  def type(self) -> str:
123
130
  """
@@ -135,7 +142,7 @@ class EitherId:
135
142
  Returns:
136
143
  The ID
137
144
  """
138
- return self.internal_id or self.external_id
145
+ return self.internal_id or self.external_id # type: ignore # checked to be not None in init
139
146
 
140
147
  def __eq__(self, other: Any) -> bool:
141
148
  """
@@ -180,12 +187,15 @@ class EitherId:
180
187
  return self.__str__()
181
188
 
182
189
 
190
+ _T1 = TypeVar("_T1")
191
+
192
+
183
193
  def add_extraction_pipeline(
184
194
  extraction_pipeline_ext_id: str,
185
195
  cognite_client: CogniteClient,
186
196
  heartbeat_waiting_time: int = 600,
187
197
  added_message: str = "",
188
- ):
198
+ ) -> Callable[[Callable[..., _T1]], Callable[..., _T1]]:
189
199
  """
190
200
  This is to be used as a decorator for extractor functions to add extraction pipeline information
191
201
 
@@ -214,9 +224,9 @@ def add_extraction_pipeline(
214
224
 
215
225
  _logger = logging.getLogger(__name__)
216
226
 
217
- def decorator_ext_pip(input_function):
227
+ def decorator_ext_pip(input_function: Callable[..., _T1]) -> Callable[..., _T1]:
218
228
  @wraps(input_function)
219
- def wrapper_ext_pip(*args, **kwargs):
229
+ def wrapper_ext_pip(*args: Any, **kwargs: Any) -> _T1:
220
230
  ##############################
221
231
  # Setup Extraction Pipelines #
222
232
  ##############################
@@ -256,6 +266,7 @@ def add_extraction_pipeline(
256
266
  ##############################
257
267
  _logger.info(f"Starting to run function: {input_function.__name__}")
258
268
 
269
+ heartbeat_thread: Optional[Thread] = None
259
270
  try:
260
271
  heartbeat_thread = Thread(target=heartbeat_loop, name="HeartbeatLoop", daemon=True)
261
272
  heartbeat_thread.start()
@@ -266,10 +277,11 @@ def add_extraction_pipeline(
266
277
  raise e
267
278
  else:
268
279
  _report_success()
269
- _logger.info(f"Extraction ran successfully")
280
+ _logger.info("Extraction ran successfully")
270
281
  finally:
271
282
  cancellation_token.set()
272
- heartbeat_thread.join()
283
+ if heartbeat_thread:
284
+ heartbeat_thread.join()
273
285
 
274
286
  return output
275
287
 
@@ -312,25 +324,28 @@ def throttled_loop(target_time: int, cancellation_token: Event) -> Generator[Non
312
324
  cancellation_token.wait(target_time - iteration_time)
313
325
 
314
326
 
327
+ _T2 = TypeVar("_T2")
328
+
329
+
315
330
  def _retry_internal(
316
- f,
331
+ f: Callable[..., _T2],
317
332
  cancellation_token: threading.Event = threading.Event(),
318
- exceptions: Iterable[Type[Exception]] = Exception,
333
+ exceptions: Iterable[Type[Exception]] = [Exception],
319
334
  tries: int = -1,
320
335
  delay: float = 0,
321
336
  max_delay: Optional[float] = None,
322
337
  backoff: float = 1,
323
338
  jitter: Union[float, Tuple[float, float]] = 0,
324
- ):
339
+ ) -> _T2:
325
340
  logger = logging.getLogger(__name__)
326
341
 
327
342
  while tries and not cancellation_token.is_set():
328
343
  try:
329
344
  return f()
330
- except exceptions as e:
345
+ except exceptions as e: # type: ignore # Exception is an exception type, smh mypy
331
346
  tries -= 1
332
347
  if not tries:
333
- raise
348
+ raise e
334
349
 
335
350
  if logger is not None:
336
351
  logger.warning("%s, retrying in %s seconds...", e, delay)
@@ -346,16 +361,18 @@ def _retry_internal(
346
361
  if max_delay is not None:
347
362
  delay = min(delay, max_delay)
348
363
 
364
+ return None # type: ignore # unreachable, we will have raised an exception before this
365
+
349
366
 
350
367
  def retry(
351
368
  cancellation_token: threading.Event = threading.Event(),
352
- exceptions: Iterable[Type[Exception]] = Exception,
369
+ exceptions: Iterable[Type[Exception]] = [Exception],
353
370
  tries: int = -1,
354
371
  delay: float = 0,
355
372
  max_delay: Optional[float] = None,
356
373
  backoff: float = 1,
357
374
  jitter: Union[float, Tuple[float, float]] = 0,
358
- ):
375
+ ) -> Callable[[Callable[..., _T2]], Callable[..., _T2]]:
359
376
  """
360
377
  Returns a retry decorator.
361
378
 
@@ -375,9 +392,9 @@ def retry(
375
392
  """
376
393
 
377
394
  @decorator
378
- def retry_decorator(f, *fargs, **fkwargs):
379
- args = fargs if fargs else list()
380
- kwargs = fkwargs if fkwargs else dict()
395
+ def retry_decorator(f: Callable[..., _T2], *fargs: Any, **fkwargs: Any) -> _T2:
396
+ args = fargs if fargs else []
397
+ kwargs = fkwargs if fkwargs else {}
381
398
 
382
399
  return _retry_internal(
383
400
  partial(f, *args, **kwargs),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cognite-extractor-utils
3
- Version: 5.0.1
3
+ Version: 5.2.0
4
4
  Summary: Utilities for easier development of extractors for CDF
5
5
  Home-page: https://github.com/cognitedata/python-extractor-utils
6
6
  License: Apache-2.0
@@ -121,4 +121,3 @@ Each public method, class and module should have docstrings. Docstrings are writ
121
121
  style](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings). Please include unit and/or
122
122
  integration tests for submitted code, and remember to update the [changelog](./CHANGELOG.md).
123
123
 
124
-
@@ -0,0 +1,26 @@
1
+ cognite/extractorutils/__init__.py,sha256=YpHnq2cOlBjlfQGnnajBJHlP4R0tz1RhKjFUhN3Rp9s,739
2
+ cognite/extractorutils/_inner_util.py,sha256=jCPLg-FfTpyLAHkhoKaKObNwbqm19d2Z11to_DYk5EU,1558
3
+ cognite/extractorutils/base.py,sha256=ydDV0oUJH6W2S150P-PZxotnZGAwVGzKiKcpcCZdB0E,15974
4
+ cognite/extractorutils/configtools/__init__.py,sha256=fj9kH8DdisNi9mI8cKm2sz50vnzeOkJQErIGB3mTYRo,2861
5
+ cognite/extractorutils/configtools/_util.py,sha256=SZycZm_py9v9WZbDiDQbgS6_PiLtu-TtwuuH7tG2YCI,4739
6
+ cognite/extractorutils/configtools/elements.py,sha256=BAfFrUrDwGPoyEMQeYySWRrqeIM_MrmLMyGPe-hrg0A,19910
7
+ cognite/extractorutils/configtools/loaders.py,sha256=02eK2dp7zsIU7ZIsMY6o7H_VfaiQ312QzCGqTuAypbU,9545
8
+ cognite/extractorutils/exceptions.py,sha256=PERRmySUfJRM2Ta8cFvADTe-KUdXsoMLKdk4140AOHI,1061
9
+ cognite/extractorutils/metrics.py,sha256=HYW5oQIKd4Zk0ahIuRbxON9Y6LXJ7QODya_SWogNj0M,14929
10
+ cognite/extractorutils/middleware.py,sha256=d5bnKEOmC49QmoBK7OyM9krOGDYX8sXxM00wRWKhmHg,1108
11
+ cognite/extractorutils/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ cognite/extractorutils/statestore.py,sha256=F54IEgWUbAfeOWRPWiVEqQDUuqb_WaFdM0bzYNE-Sis,17829
13
+ cognite/extractorutils/uploader/__init__.py,sha256=h9EAktLxNZjmZunFcK6Tngu6K7tQKwYTmNOkB4meCA8,3054
14
+ cognite/extractorutils/uploader/_base.py,sha256=1MVzMEyuuYNlCAG7Hv1kPCzUOfVXhO2fA_sZYSGBLyY,4983
15
+ cognite/extractorutils/uploader/_metrics.py,sha256=cF6EaixueFSSRw97O0HK3kEM7fX8AvNAUVz5xDyr4Yk,3865
16
+ cognite/extractorutils/uploader/events.py,sha256=RmBQcHpImGJG3D7F0Ccb5aK7Ecy4mkF6GYk-UTLjp38,6118
17
+ cognite/extractorutils/uploader/files.py,sha256=de6nny4yQiaSSEol8gE0KzYZ9srTKwjOTWVJztCnZH8,11674
18
+ cognite/extractorutils/uploader/raw.py,sha256=B6B0B659Zfvhy9Tp_QrGi1wkwJw94Z6am5_YmFZFxvQ,7021
19
+ cognite/extractorutils/uploader/time_series.py,sha256=gsMqwGYFS4wx3LF62URUMLx4PoCZb7MS3RWeWBQe2AM,26579
20
+ cognite/extractorutils/uploader_extractor.py,sha256=cAseJZXBmE6feyHJNMGxXBfGbdGBmOyBTeU1ZC5Krmg,7476
21
+ cognite/extractorutils/uploader_types.py,sha256=nztoOqdszm4BEDcCybdApub0zN3KEfwLgTDeP69Xdlk,1146
22
+ cognite/extractorutils/util.py,sha256=6y-2ZWviLmPYSbEWu0_I8Oeuwz-4yK9EhE7FM1g_GX8,14284
23
+ cognite_extractor_utils-5.2.0.dist-info/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
24
+ cognite_extractor_utils-5.2.0.dist-info/METADATA,sha256=X0ig0LlZzWhFLMQenXn44XdKD-_YcMnT7-BakiNQFEk,5406
25
+ cognite_extractor_utils-5.2.0.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
26
+ cognite_extractor_utils-5.2.0.dist-info/RECORD,,
@@ -1,26 +0,0 @@
1
- cognite/extractorutils/__init__.py,sha256=VsW0H6dGKBgnovfeKtFM9fsrkvp_XV6kslSkrpiIvmA,739
2
- cognite/extractorutils/_inner_util.py,sha256=idg6EZzcDIwCYJJVSvyyEq8M2QUyLvW4soJS1XTlFv4,1431
3
- cognite/extractorutils/base.py,sha256=_c7zF1_PDIwIA8lqvf0Du5vJyITYissOptBncvKY7Dg,15990
4
- cognite/extractorutils/configtools/__init__.py,sha256=qYBtB-wVwXV6mcdRe8xRclhF9AMADlaJaAJzTNuWT48,2861
5
- cognite/extractorutils/configtools/_util.py,sha256=XYNKaCD_mMgvFBxvLE_KwgvfTkgHZEzs1Wj7gL0WkpY,4294
6
- cognite/extractorutils/configtools/elements.py,sha256=2uT0_EeJaRbxGWBXWDWfkA8qYvpu8O8X-S2_maEeUpc,18506
7
- cognite/extractorutils/configtools/loaders.py,sha256=Q_aeaT-TxqRMuurLasTzbSsIq0I1cnnhcs3Zzw3-FNk,9188
8
- cognite/extractorutils/exceptions.py,sha256=PERRmySUfJRM2Ta8cFvADTe-KUdXsoMLKdk4140AOHI,1061
9
- cognite/extractorutils/metrics.py,sha256=QTIe6LIMZ8u77lImB1jXgvTvMf2mMOVPESeAZ1hA3dg,14604
10
- cognite/extractorutils/middleware.py,sha256=XelqIxmWBfpoDUt0cKvAXyDCYnjIFyKNHVJU4CYjXes,1005
11
- cognite/extractorutils/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- cognite/extractorutils/statestore.py,sha256=5nFkzPbrOYwlGOKyElwX-JJ1Zb9hhzJfWXyWeD-mlMs,17449
13
- cognite/extractorutils/uploader/__init__.py,sha256=h9EAktLxNZjmZunFcK6Tngu6K7tQKwYTmNOkB4meCA8,3054
14
- cognite/extractorutils/uploader/_base.py,sha256=K0pOIt-O_yIOJcpGZctm24kUsZgrvxG0L_YGw1DqHCQ,5240
15
- cognite/extractorutils/uploader/_metrics.py,sha256=cF6EaixueFSSRw97O0HK3kEM7fX8AvNAUVz5xDyr4Yk,3865
16
- cognite/extractorutils/uploader/events.py,sha256=xpxrcIpGwnKgXhfCJCovtWkl3QvHI9XZkhO6TAJBHXE,5292
17
- cognite/extractorutils/uploader/files.py,sha256=Mh5MxswyGjrteLd22yopWMPD036DGXc6_tibHYEnMU0,11351
18
- cognite/extractorutils/uploader/raw.py,sha256=wdVeyD4DoqY0eIk0fvWnAlfIXGbtG5EgWetOClbQGZU,6888
19
- cognite/extractorutils/uploader/time_series.py,sha256=qmamsSAJFUwX9adFgejEV_cGmjEhbTuiucIVIovj8U4,25071
20
- cognite/extractorutils/uploader_extractor.py,sha256=TwiRAqgVRheA-KKnxloBZvD1tZjuDGKVGuGbtvPtj8I,7404
21
- cognite/extractorutils/uploader_types.py,sha256=nztoOqdszm4BEDcCybdApub0zN3KEfwLgTDeP69Xdlk,1146
22
- cognite/extractorutils/util.py,sha256=NtqR6EGUDZmGjrfy8lnge83ITh1HzX4u_JtqGqNQb-4,13352
23
- cognite_extractor_utils-5.0.1.dist-info/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
24
- cognite_extractor_utils-5.0.1.dist-info/METADATA,sha256=HSvv1Zqkui1FNH0dpMtneo7rFybL6s5I3CtxRKaJZuE,5407
25
- cognite_extractor_utils-5.0.1.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
26
- cognite_extractor_utils-5.0.1.dist-info/RECORD,,