cognite-extractor-utils 7.6.0__py3-none-any.whl → 7.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

Files changed (38) hide show
  1. cognite/examples/unstable/extractors/simple_extractor/config/config.yaml +3 -0
  2. cognite/examples/unstable/extractors/simple_extractor/config/connection_config.yaml +10 -0
  3. cognite/examples/unstable/extractors/simple_extractor/main.py +81 -0
  4. cognite/extractorutils/__init__.py +1 -1
  5. cognite/extractorutils/_inner_util.py +2 -2
  6. cognite/extractorutils/base.py +1 -1
  7. cognite/extractorutils/configtools/elements.py +4 -2
  8. cognite/extractorutils/configtools/loaders.py +3 -3
  9. cognite/extractorutils/exceptions.py +1 -1
  10. cognite/extractorutils/metrics.py +8 -6
  11. cognite/extractorutils/statestore/watermark.py +6 -3
  12. cognite/extractorutils/threading.py +2 -2
  13. cognite/extractorutils/unstable/configuration/exceptions.py +28 -1
  14. cognite/extractorutils/unstable/configuration/models.py +157 -32
  15. cognite/extractorutils/unstable/core/_dto.py +80 -7
  16. cognite/extractorutils/unstable/core/base.py +175 -106
  17. cognite/extractorutils/unstable/core/checkin_worker.py +428 -0
  18. cognite/extractorutils/unstable/core/errors.py +2 -2
  19. cognite/extractorutils/unstable/core/logger.py +49 -0
  20. cognite/extractorutils/unstable/core/runtime.py +200 -31
  21. cognite/extractorutils/unstable/core/tasks.py +2 -2
  22. cognite/extractorutils/uploader/__init__.py +2 -0
  23. cognite/extractorutils/uploader/_base.py +1 -1
  24. cognite/extractorutils/uploader/assets.py +1 -1
  25. cognite/extractorutils/uploader/data_modeling.py +1 -1
  26. cognite/extractorutils/uploader/events.py +1 -1
  27. cognite/extractorutils/uploader/files.py +4 -4
  28. cognite/extractorutils/uploader/raw.py +1 -1
  29. cognite/extractorutils/uploader/time_series.py +319 -52
  30. cognite/extractorutils/uploader_extractor.py +20 -5
  31. cognite/extractorutils/uploader_types.py +13 -2
  32. cognite/extractorutils/util.py +8 -6
  33. {cognite_extractor_utils-7.6.0.dist-info → cognite_extractor_utils-7.8.0.dist-info}/METADATA +3 -2
  34. cognite_extractor_utils-7.8.0.dist-info/RECORD +55 -0
  35. cognite_extractor_utils-7.8.0.dist-info/entry_points.txt +2 -0
  36. cognite_extractor_utils-7.6.0.dist-info/RECORD +0 -50
  37. {cognite_extractor_utils-7.6.0.dist-info → cognite_extractor_utils-7.8.0.dist-info}/WHEEL +0 -0
  38. {cognite_extractor_utils-7.6.0.dist-info → cognite_extractor_utils-7.8.0.dist-info}/licenses/LICENSE +0 -0
@@ -19,7 +19,7 @@ import math
19
19
  from collections.abc import Callable
20
20
  from datetime import datetime
21
21
  from types import TracebackType
22
- from typing import Any
22
+ from typing import Any, Generic, Literal, TypedDict, TypeVar
23
23
 
24
24
  from cognite.client import CogniteClient
25
25
  from cognite.client.data_classes import (
@@ -29,6 +29,9 @@ from cognite.client.data_classes import (
29
29
  StatusCode,
30
30
  TimeSeries,
31
31
  )
32
+ from cognite.client.data_classes.data_modeling import NodeId
33
+ from cognite.client.data_classes.data_modeling.extractor_extensions.v1 import CogniteExtractorTimeSeriesApply
34
+ from cognite.client.data_classes.data_modeling.instances import DirectRelationReference
32
35
  from cognite.client.exceptions import CogniteDuplicatedError, CogniteNotFoundError
33
36
  from cognite.extractorutils.threading import CancellationToken
34
37
  from cognite.extractorutils.uploader._base import (
@@ -62,6 +65,18 @@ DataPointWithStatus = tuple[TimeStamp, float, FullStatusCode] | tuple[TimeStamp,
62
65
  DataPoint = DataPointWithoutStatus | DataPointWithStatus
63
66
  DataPointList = list[DataPoint]
64
67
 
68
+ TQueue = TypeVar("TQueue", bound="BaseTimeSeriesUploadQueue")
69
+ IdType = TypeVar("IdType", EitherId, NodeId)
70
+
71
+
72
+ class CdmDatapointsPayload(TypedDict):
73
+ """
74
+ Represents a payload for CDF datapoints, linking them to a specific instance.
75
+ """
76
+
77
+ instanceId: NodeId
78
+ datapoints: DataPointList
79
+
65
80
 
66
81
  def default_time_series_factory(external_id: str, datapoints: DataPointList) -> TimeSeries:
67
82
  """
@@ -82,9 +97,9 @@ def default_time_series_factory(external_id: str, datapoints: DataPointList) ->
82
97
  return TimeSeries(external_id=external_id, is_string=is_string)
83
98
 
84
99
 
85
- class TimeSeriesUploadQueue(AbstractUploadQueue):
100
+ class BaseTimeSeriesUploadQueue(AbstractUploadQueue, Generic[IdType]):
86
101
  """
87
- Upload queue for time series.
102
+ Abstract base upload queue for time series.
88
103
 
89
104
  Args:
90
105
  cdf_client: Cognite Data Fusion client to use
@@ -96,12 +111,6 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
96
111
  methods).
97
112
  trigger_log_level: Log level to log upload triggers to.
98
113
  thread_name: Thread name of uploader thread.
99
- create_missing: Create missing time series if possible (ie, if external id is used). Either given as a boolean
100
- (True would auto-create a time series with nothing but an external ID), or as a factory function taking an
101
- external ID and a list of datapoints about to be inserted and returning a TimeSeries object.
102
- data_set_id: Data set id passed to create_missing. Does nothing if create_missing is False.
103
- If a custom timeseries creation method is set in create_missing, this is used as fallback if
104
- that method does not set data set id on its own.
105
114
  """
106
115
 
107
116
  def __init__(
@@ -112,10 +121,8 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
112
121
  max_upload_interval: int | None = None,
113
122
  trigger_log_level: str = "DEBUG",
114
123
  thread_name: str | None = None,
115
- create_missing: Callable[[str, DataPointList], TimeSeries] | bool = False,
116
- data_set_id: int | None = None,
117
124
  cancellation_token: CancellationToken | None = None,
118
- ):
125
+ ) -> None:
119
126
  # Super sets post_upload and threshold
120
127
  super().__init__(
121
128
  cdf_client,
@@ -127,21 +134,11 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
127
134
  cancellation_token,
128
135
  )
129
136
 
130
- self.missing_factory: Callable[[str, DataPointList], TimeSeries]
131
-
132
- if isinstance(create_missing, bool):
133
- self.create_missing = create_missing
134
- self.missing_factory = default_time_series_factory
135
- else:
136
- self.create_missing = True
137
- self.missing_factory = create_missing
138
-
139
- self.upload_queue: dict[EitherId, DataPointList] = {}
137
+ self.upload_queue: dict[IdType, DataPointList] = {}
140
138
 
141
139
  self.points_queued = TIMESERIES_UPLOADER_POINTS_QUEUED
142
140
  self.points_written = TIMESERIES_UPLOADER_POINTS_WRITTEN
143
141
  self.queue_size = TIMESERIES_UPLOADER_QUEUE_SIZE
144
- self.data_set_id = data_set_id
145
142
 
146
143
  def _verify_datapoint_time(self, time: int | float | datetime | str) -> bool:
147
144
  if isinstance(time, int | float):
@@ -171,6 +168,109 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
171
168
  else:
172
169
  return True
173
170
 
171
+ def _sanitize_datapoints(self, datapoints: DataPointList | None) -> DataPointList:
172
+ datapoints = datapoints or []
173
+ old_len = len(datapoints)
174
+ datapoints = list(filter(self._is_datapoint_valid, datapoints))
175
+
176
+ new_len = len(datapoints)
177
+
178
+ if old_len > new_len:
179
+ diff = old_len - new_len
180
+ self.logger.warning(f"Discarding {diff} datapoints due to bad timestamp or value")
181
+ TIMESERIES_UPLOADER_POINTS_DISCARDED.inc(diff)
182
+
183
+ return datapoints
184
+
185
+ def __exit__(
186
+ self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
187
+ ) -> None:
188
+ """
189
+ Wraps around stop method, for use as context manager.
190
+
191
+ Args:
192
+ exc_type: Exception type
193
+ exc_val: Exception value
194
+ exc_tb: Traceback
195
+ """
196
+ self.stop()
197
+
198
+ def __len__(self) -> int:
199
+ """
200
+ The size of the upload queue.
201
+
202
+ Returns:
203
+ Number of data points in queue
204
+ """
205
+ return self.upload_queue_size
206
+
207
+ def __enter__(self: TQueue) -> TQueue:
208
+ """
209
+ Wraps around start method, for use as context manager.
210
+
211
+ Returns:
212
+ self
213
+ """
214
+ self.start()
215
+ return self
216
+
217
+
218
+ class TimeSeriesUploadQueue(BaseTimeSeriesUploadQueue[EitherId]):
219
+ """
220
+ Upload queue for time series.
221
+
222
+ Args:
223
+ cdf_client: Cognite Data Fusion client to use
224
+ post_upload_function: A function that will be called after each upload. The function will be given one argument:
225
+ A list of dicts containing the datapoints that were uploaded (on the same format as the kwargs in
226
+ datapoints upload in the Cognite SDK).
227
+ max_queue_size: Maximum size of upload queue. Defaults to no max size.
228
+ max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
229
+ methods).
230
+ trigger_log_level: Log level to log upload triggers to.
231
+ thread_name: Thread name of uploader thread.
232
+ create_missing: Create missing time series if possible (ie, if external id is used). Either given as a boolean
233
+ (True would auto-create a time series with nothing but an external ID), or as a factory function taking an
234
+ external ID and a list of datapoints about to be inserted and returning a TimeSeries object.
235
+ data_set_id: Data set id passed to create_missing. Does nothing if create_missing is False.
236
+ If a custom timeseries creation method is set in create_missing, this is used as fallback if
237
+ that method does not set data set id on its own.
238
+ """
239
+
240
+ def __init__(
241
+ self,
242
+ cdf_client: CogniteClient,
243
+ post_upload_function: Callable[[list[dict[str, str | DataPointList]]], None] | None = None,
244
+ max_queue_size: int | None = None,
245
+ max_upload_interval: int | None = None,
246
+ trigger_log_level: str = "DEBUG",
247
+ thread_name: str | None = None,
248
+ create_missing: Callable[[str, DataPointList], TimeSeries] | bool = False,
249
+ data_set_id: int | None = None,
250
+ cancellation_token: CancellationToken | None = None,
251
+ ) -> None:
252
+ # Super sets post_upload and threshold
253
+ super().__init__(
254
+ cdf_client,
255
+ post_upload_function,
256
+ max_queue_size,
257
+ max_upload_interval,
258
+ trigger_log_level,
259
+ thread_name,
260
+ cancellation_token,
261
+ )
262
+
263
+ self.missing_factory: Callable[[str, DataPointList], TimeSeries]
264
+
265
+ if isinstance(create_missing, bool):
266
+ self.create_missing = create_missing
267
+ self.missing_factory = default_time_series_factory
268
+ else:
269
+ self.create_missing = True
270
+ self.missing_factory = create_missing
271
+
272
+ self.data_set_id = data_set_id
273
+
174
274
  def add_to_upload_queue(
175
275
  self,
176
276
  *,
@@ -188,16 +288,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
188
288
  external_id: External ID of time series. Either this or external_id must be set.
189
289
  datapoints: list of data points to add
190
290
  """
191
- datapoints = datapoints or []
192
- old_len = len(datapoints)
193
- datapoints = list(filter(self._is_datapoint_valid, datapoints))
194
-
195
- new_len = len(datapoints)
196
-
197
- if old_len > new_len:
198
- diff = old_len - new_len
199
- self.logger.warning(f"Discarding {diff} datapoints due to bad timestamp or value")
200
- TIMESERIES_UPLOADER_POINTS_DISCARDED.inc(diff)
291
+ datapoints = self._sanitize_datapoints(datapoints)
201
292
 
202
293
  either_id = EitherId(id=id, external_id=external_id)
203
294
 
@@ -310,37 +401,213 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
310
401
  self.upload_queue_size = 0
311
402
  self.queue_size.set(self.upload_queue_size)
312
403
 
313
- def __enter__(self) -> "TimeSeriesUploadQueue":
404
+
405
+ class CDMTimeSeriesUploadQueue(BaseTimeSeriesUploadQueue[NodeId]):
406
+ """
407
+ Upload queue for CDM time series.
408
+
409
+ Args:
410
+ cdf_client: Cognite Data Fusion client to use
411
+ post_upload_function: A function that will be called after each upload. The function will be given one argument:
412
+ A list of dicts containing the datapoints that were uploaded (on the same format as the kwargs in
413
+ datapoints upload in the Cognite SDK).
414
+ max_queue_size: Maximum size of upload queue. Defaults to no max size.
415
+ max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
416
+ methods).
417
+ trigger_log_level: Log level to log upload triggers to.
418
+ thread_name: Thread name of uploader thread.
419
+ """
420
+
421
+ def __init__(
422
+ self,
423
+ cdf_client: CogniteClient,
424
+ post_upload_function: Callable[[list[dict[str, str | DataPointList]]], None] | None = None,
425
+ max_queue_size: int | None = None,
426
+ max_upload_interval: int | None = None,
427
+ trigger_log_level: str = "DEBUG",
428
+ thread_name: str | None = None,
429
+ create_missing: Callable[[NodeId, DataPointList], CogniteExtractorTimeSeriesApply] | bool = False,
430
+ cancellation_token: CancellationToken | None = None,
431
+ source: DirectRelationReference | None = None,
432
+ ) -> None:
433
+ super().__init__(
434
+ cdf_client,
435
+ post_upload_function,
436
+ max_queue_size,
437
+ max_upload_interval,
438
+ trigger_log_level,
439
+ thread_name,
440
+ cancellation_token,
441
+ )
442
+
443
+ self.missing_factory: Callable[[NodeId, DataPointList], CogniteExtractorTimeSeriesApply]
444
+ self.source = source
445
+
446
+ if isinstance(create_missing, bool):
447
+ self.create_missing = create_missing
448
+ self.missing_factory = self.default_cdm_time_series_factory
449
+ else:
450
+ self.create_missing = True
451
+ self.missing_factory = create_missing
452
+
453
+ def default_cdm_time_series_factory(
454
+ self, instance_id: NodeId, datapoints: DataPointList
455
+ ) -> CogniteExtractorTimeSeriesApply:
314
456
  """
315
- Wraps around start method, for use as context manager.
457
+ Default CDM time series factory used when create_missing in a CDMTimeSeriesUploadQueue is given as a boolean.
316
458
 
459
+ Args:
460
+ instance_id: Instance ID of time series to create
461
+ datapoints: The list of datapoints that were tried to be inserted
462
+ source: The source of the time series, used for creating the DirectRelationReference
317
463
  Returns:
318
- self
464
+ A CogniteExtractorTimeSeriesApply object with instance_id set, and the is_string automatically detected
319
465
  """
320
- self.start()
321
- return self
466
+ is_string = (
467
+ isinstance(datapoints[0].get("value"), str)
468
+ if isinstance(datapoints[0], dict)
469
+ else isinstance(datapoints[0][1], str)
470
+ )
322
471
 
323
- def __exit__(
324
- self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
472
+ time_series_type: Literal["numeric", "string"] = "string" if is_string else "numeric"
473
+
474
+ return CogniteExtractorTimeSeriesApply(
475
+ space=instance_id.space,
476
+ external_id=instance_id.external_id,
477
+ is_step=False,
478
+ time_series_type=time_series_type,
479
+ source=self.source,
480
+ )
481
+
482
+ def add_to_upload_queue(
483
+ self,
484
+ *,
485
+ instance_id: NodeId,
486
+ datapoints: DataPointList | None = None,
325
487
  ) -> None:
326
488
  """
327
- Wraps around stop method, for use as context manager.
489
+ Add data points to upload queue.
490
+
491
+ The queue will be uploaded if the queue size is larger than the threshold specified in the __init__.
328
492
 
329
493
  Args:
330
- exc_type: Exception type
331
- exc_val: Exception value
332
- exc_tb: Traceback
494
+ instance_id: The identifier for the time series to which the datapoints belong.
495
+ datapoints: list of data points to add
333
496
  """
334
- self.stop()
497
+ datapoints = self._sanitize_datapoints(datapoints)
335
498
 
336
- def __len__(self) -> int:
337
- """
338
- The size of the upload queue.
499
+ with self.lock:
500
+ if instance_id not in self.upload_queue:
501
+ self.upload_queue[instance_id] = []
339
502
 
340
- Returns:
341
- Number of data points in queue
503
+ self.upload_queue[instance_id].extend(datapoints)
504
+ self.points_queued.inc(len(datapoints))
505
+ self.upload_queue_size += len(datapoints)
506
+ self.queue_size.set(self.upload_queue_size)
507
+
508
+ self._check_triggers()
509
+
510
+ def upload(self) -> None:
511
+ """
512
+ Trigger an upload of the queue, clears queue afterwards.
342
513
  """
343
- return self.upload_queue_size
514
+
515
+ @retry(
516
+ exceptions=cognite_exceptions(),
517
+ cancellation_token=self.cancellation_token,
518
+ tries=RETRIES,
519
+ delay=RETRY_DELAY,
520
+ max_delay=RETRY_MAX_DELAY,
521
+ backoff=RETRY_BACKOFF_FACTOR,
522
+ )
523
+ def _upload_batch(upload_this: list[CdmDatapointsPayload], retries: int = 5) -> list[CdmDatapointsPayload]:
524
+ if len(upload_this) == 0:
525
+ return upload_this
526
+
527
+ try:
528
+ self.cdf_client.time_series.data.insert_multiple(upload_this) # type: ignore[arg-type]
529
+ except CogniteNotFoundError as ex:
530
+ if not retries:
531
+ raise ex
532
+
533
+ if not self.create_missing:
534
+ self.logger.error("Could not upload data points to %s: %s", str(ex.not_found), str(ex))
535
+
536
+ # Get IDs of time series that exists, but failed because of the non-existing time series
537
+ retry_these = [
538
+ NodeId(id_dict["instanceId"]["space"], id_dict["instanceId"]["externalId"])
539
+ for id_dict in ex.failed
540
+ if id_dict not in ex.not_found
541
+ ]
542
+
543
+ if self.create_missing:
544
+ # Get the time series that can be created
545
+ create_these_ids = {
546
+ NodeId(id_dict["instanceId"]["space"], id_dict["instanceId"]["externalId"])
547
+ for id_dict in ex.not_found
548
+ }
549
+ self.logger.info(f"Creating {len(create_these_ids)} time series")
550
+
551
+ datapoints_lists: dict[NodeId, DataPointList] = {
552
+ ts_dict["instanceId"]: ts_dict["datapoints"]
553
+ for ts_dict in upload_this
554
+ if ts_dict["instanceId"] in create_these_ids
555
+ }
556
+
557
+ to_create: list[CogniteExtractorTimeSeriesApply] = [
558
+ self.missing_factory(instance_id, datapoints_lists[instance_id])
559
+ for instance_id in create_these_ids
560
+ ]
561
+
562
+ instance_result = self.cdf_client.data_modeling.instances.apply(to_create)
563
+ retry_these.extend([node.as_id() for node in instance_result.nodes])
564
+
565
+ if len(ex.not_found) != len(create_these_ids):
566
+ missing = [
567
+ id_dict
568
+ for id_dict in ex.not_found
569
+ if NodeId(id_dict["instanceId"]["space"], id_dict["instanceId"]["externalId"])
570
+ not in retry_these
571
+ ]
572
+ missing_num = len(ex.not_found) - len(create_these_ids)
573
+ self.logger.error(
574
+ f"{missing_num} time series not found, and could not be created automatically: "
575
+ + str(missing)
576
+ + " Data will be dropped"
577
+ )
578
+
579
+ # Remove entries with non-existing time series from upload queue
580
+ upload_this = [entry for entry in upload_this if entry["instanceId"] in retry_these]
581
+
582
+ # Upload remaining
583
+ _upload_batch(upload_this, retries - 1)
584
+
585
+ return upload_this
586
+
587
+ if len(self.upload_queue) == 0:
588
+ return
589
+
590
+ with self.lock:
591
+ upload_this = _upload_batch(
592
+ [
593
+ {"instanceId": instance_id, "datapoints": list(datapoints)}
594
+ for instance_id, datapoints in self.upload_queue.items()
595
+ if len(datapoints) > 0
596
+ ]
597
+ )
598
+
599
+ for datapoints in self.upload_queue.values():
600
+ self.points_written.inc(len(datapoints))
601
+
602
+ try:
603
+ self._post_upload(upload_this)
604
+ except Exception as e:
605
+ self.logger.error("Error in upload callback: %s", str(e))
606
+
607
+ self.upload_queue.clear()
608
+ self.logger.info(f"Uploaded {self.upload_queue_size} datapoints")
609
+ self.upload_queue_size = 0
610
+ self.queue_size.set(self.upload_queue_size)
344
611
 
345
612
 
346
613
  class SequenceUploadQueue(AbstractUploadQueue):
@@ -369,7 +636,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
369
636
  thread_name: str | None = None,
370
637
  create_missing: bool = False,
371
638
  cancellation_token: CancellationToken | None = None,
372
- ):
639
+ ) -> None:
373
640
  # Super sets post_upload and threshold
374
641
  super().__init__(
375
642
  cdf_client,
@@ -31,8 +31,13 @@ from cognite.extractorutils.configtools import BaseConfig, TimeIntervalConfig
31
31
  from cognite.extractorutils.metrics import BaseMetrics
32
32
  from cognite.extractorutils.statestore import AbstractStateStore
33
33
  from cognite.extractorutils.threading import CancellationToken
34
- from cognite.extractorutils.uploader import EventUploadQueue, RawUploadQueue, TimeSeriesUploadQueue
35
- from cognite.extractorutils.uploader_types import CdfTypes, Event, InsertDatapoints, RawRow
34
+ from cognite.extractorutils.uploader import (
35
+ CDMTimeSeriesUploadQueue,
36
+ EventUploadQueue,
37
+ RawUploadQueue,
38
+ TimeSeriesUploadQueue,
39
+ )
40
+ from cognite.extractorutils.uploader_types import CdfTypes, Event, InsertCDMDatapoints, InsertDatapoints, RawRow
36
41
 
37
42
 
38
43
  @dataclass
@@ -100,7 +105,7 @@ class UploaderExtractor(Extractor[UploaderExtractorConfigClass]):
100
105
  heartbeat_waiting_time: int = 600,
101
106
  handle_interrupts: bool = True,
102
107
  middleware: list[Callable[[dict], dict]] | None = None,
103
- ):
108
+ ) -> None:
104
109
  super().__init__(
105
110
  name=name,
106
111
  description=description,
@@ -153,10 +158,14 @@ class UploaderExtractor(Extractor[UploaderExtractorConfigClass]):
153
158
  self.time_series_queue.add_to_upload_queue(
154
159
  id=dp.id, external_id=dp.external_id, datapoints=dp.datapoints
155
160
  )
161
+ elif isinstance(peek, InsertCDMDatapoints):
162
+ for dp in peekable_output:
163
+ if isinstance(dp, InsertCDMDatapoints):
164
+ self.cdm_time_series_queue.add_to_upload_queue(instance_id=dp.instance_id, datapoints=dp.datapoints)
156
165
  else:
157
166
  raise ValueError(f"Unexpected type: {type(peek)}")
158
167
 
159
- def _apply_middleware(self, item: Any) -> Any:
168
+ def _apply_middleware(self, item: Any) -> Any: # noqa: ANN401
160
169
  for mw in self.middleware:
161
170
  item = mw(item)
162
171
  return item
@@ -187,7 +196,12 @@ class UploaderExtractor(Extractor[UploaderExtractorConfigClass]):
187
196
  trigger_log_level="INFO",
188
197
  create_missing=True,
189
198
  ).__enter__()
190
-
199
+ self.cdm_time_series_queue = CDMTimeSeriesUploadQueue(
200
+ self.cognite_client,
201
+ max_queue_size=queue_config.timeseries_size,
202
+ max_upload_interval=queue_config.upload_interval.seconds,
203
+ trigger_log_level="INFO",
204
+ ).__enter__()
191
205
  return self
192
206
 
193
207
  def __exit__(
@@ -199,4 +213,5 @@ class UploaderExtractor(Extractor[UploaderExtractorConfigClass]):
199
213
  self.event_queue.__exit__(exc_type, exc_val, exc_tb)
200
214
  self.raw_queue.__exit__(exc_type, exc_val, exc_tb)
201
215
  self.time_series_queue.__exit__(exc_type, exc_val, exc_tb)
216
+ self.cdm_time_series_queue.__exit__(exc_type, exc_val, exc_tb)
202
217
  return super().__exit__(exc_type, exc_val, exc_tb)
@@ -9,6 +9,7 @@ from typing import TypeAlias
9
9
 
10
10
  from cognite.client.data_classes import Event as _Event
11
11
  from cognite.client.data_classes import Row as _Row
12
+ from cognite.client.data_classes.data_modeling import NodeId
12
13
  from cognite.extractorutils.uploader.time_series import DataPoint
13
14
 
14
15
 
@@ -17,18 +18,28 @@ class InsertDatapoints:
17
18
  A class representing a batch of datapoints to be inserted into a time series.
18
19
  """
19
20
 
20
- def __init__(self, *, id: int | None = None, external_id: str | None = None, datapoints: list[DataPoint]): # noqa: A002
21
+ def __init__(self, *, id: int | None = None, external_id: str | None = None, datapoints: list[DataPoint]) -> None: # noqa: A002
21
22
  self.id = id
22
23
  self.external_id = external_id
23
24
  self.datapoints = datapoints
24
25
 
25
26
 
27
+ class InsertCDMDatapoints:
28
+ """
29
+ A class representing a batch of datapoints to be inserted into a cdm time series.
30
+ """
31
+
32
+ def __init__(self, *, instance_id: NodeId, datapoints: list[DataPoint]) -> None:
33
+ self.instance_id = instance_id
34
+ self.datapoints = datapoints
35
+
36
+
26
37
  class RawRow:
27
38
  """
28
39
  A class representing a row of data to be inserted into a RAW table.
29
40
  """
30
41
 
31
- def __init__(self, db_name: str, table_name: str, row: _Row | Iterable[_Row]):
42
+ def __init__(self, db_name: str, table_name: str, row: _Row | Iterable[_Row]) -> None:
32
43
  self.db_name = db_name
33
44
  self.table_name = table_name
34
45
  if isinstance(row, Iterable):
@@ -30,12 +30,14 @@ from typing import Any, TypeVar
30
30
  from decorator import decorator
31
31
 
32
32
  from cognite.client import CogniteClient
33
+ from cognite.client._api.assets import AssetsAPI
34
+ from cognite.client._api.time_series import TimeSeriesAPI
33
35
  from cognite.client.data_classes import Asset, ExtractionPipelineRun, TimeSeries
34
36
  from cognite.client.exceptions import CogniteAPIError, CogniteException, CogniteFileUploadError, CogniteNotFoundError
35
37
  from cognite.extractorutils.threading import CancellationToken
36
38
 
37
39
 
38
- def _ensure(endpoint: Any, items: Iterable[Any]) -> None:
40
+ def _ensure(endpoint: TimeSeriesAPI | AssetsAPI, items: Iterable[Any]) -> None:
39
41
  try:
40
42
  external_ids = [ts.external_id for ts in items]
41
43
 
@@ -90,7 +92,7 @@ class EitherId:
90
92
  TypeError: If none of both of id types are set.
91
93
  """
92
94
 
93
- def __init__(self, **kwargs: int | str | None):
95
+ def __init__(self, **kwargs: int | str | None) -> None:
94
96
  internal_id = kwargs.get("id")
95
97
  external_id = kwargs.get("externalId") or kwargs.get("external_id")
96
98
 
@@ -127,7 +129,7 @@ class EitherId:
127
129
  """
128
130
  return self.internal_id or self.external_id # type: ignore # checked to be not None in init
129
131
 
130
- def __eq__(self, other: Any) -> bool:
132
+ def __eq__(self, other: object) -> bool:
131
133
  """
132
134
  Compare with another object. Only returns true if other is an EitherId with the same type and content.
133
135
 
@@ -210,7 +212,7 @@ def add_extraction_pipeline(
210
212
 
211
213
  def decorator_ext_pip(input_function: Callable[..., _T1]) -> Callable[..., _T1]:
212
214
  @wraps(input_function)
213
- def wrapper_ext_pip(*args: Any, **kwargs: Any) -> _T1:
215
+ def wrapper_ext_pip(*args: Any, **kwargs: Any) -> _T1: # noqa: ANN401
214
216
  ##############################
215
217
  # Setup Extraction Pipelines #
216
218
  ##############################
@@ -397,7 +399,7 @@ def retry(
397
399
  """
398
400
 
399
401
  @decorator
400
- def retry_decorator(f: Callable[..., _T2], *fargs: Any, **fkwargs: Any) -> _T2:
402
+ def retry_decorator(f: Callable[..., _T2], *fargs: Any, **fkwargs: Any) -> _T2: # noqa: ANN401
401
403
  args = fargs if fargs else []
402
404
  kwargs = fkwargs if fkwargs else {}
403
405
 
@@ -657,7 +659,7 @@ def iterable_to_stream(
657
659
  def readable(self) -> bool:
658
660
  return True
659
661
 
660
- def readinto(self, buffer: Any) -> int | None:
662
+ def readinto(self, buffer: "WritableBuffer") -> int | None: # type: ignore[name-defined] # noqa: F821
661
663
  try:
662
664
  # Bytes to return
663
665
  ln = len(buffer)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cognite-extractor-utils
3
- Version: 7.6.0
3
+ Version: 7.8.0
4
4
  Summary: Utilities for easier development of extractors for CDF
5
5
  Project-URL: repository, https://github.com/cognitedata/python-extractor-utils
6
6
  Author-email: Mathias Lohne <mathias.lohne@cognite.com>
@@ -12,7 +12,7 @@ Requires-Python: >=3.10
12
12
  Requires-Dist: arrow>=1.0.0
13
13
  Requires-Dist: azure-identity>=1.14.0
14
14
  Requires-Dist: azure-keyvault-secrets>=4.7.0
15
- Requires-Dist: cognite-sdk>=7.59.0
15
+ Requires-Dist: cognite-sdk>=7.75.2
16
16
  Requires-Dist: croniter>=6.0.0
17
17
  Requires-Dist: dacite<1.9.0,>=1.6.0
18
18
  Requires-Dist: decorator>=5.1.1
@@ -26,6 +26,7 @@ Requires-Dist: pydantic>=2.8.2
26
26
  Requires-Dist: pyhumps>=3.8.0
27
27
  Requires-Dist: python-dotenv>=1.0.0
28
28
  Requires-Dist: pyyaml<7,>=5.3.0
29
+ Requires-Dist: simple-winservice>=0.1.0; sys_platform == 'win32'
29
30
  Requires-Dist: typing-extensions<5,>=3.7.4
30
31
  Provides-Extra: experimental
31
32
  Requires-Dist: cognite-sdk-experimental; extra == 'experimental'