cognite-extractor-utils 7.5.14__py3-none-any.whl → 7.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

Files changed (47) hide show
  1. cognite/extractorutils/__init__.py +1 -1
  2. cognite/extractorutils/_inner_util.py +1 -1
  3. cognite/extractorutils/base.py +120 -40
  4. cognite/extractorutils/configtools/__init__.py +4 -5
  5. cognite/extractorutils/configtools/_util.py +3 -2
  6. cognite/extractorutils/configtools/elements.py +206 -33
  7. cognite/extractorutils/configtools/loaders.py +68 -16
  8. cognite/extractorutils/configtools/validators.py +5 -1
  9. cognite/extractorutils/exceptions.py +11 -2
  10. cognite/extractorutils/metrics.py +17 -12
  11. cognite/extractorutils/statestore/__init__.py +77 -3
  12. cognite/extractorutils/statestore/_base.py +7 -3
  13. cognite/extractorutils/statestore/hashing.py +129 -15
  14. cognite/extractorutils/statestore/watermark.py +77 -87
  15. cognite/extractorutils/threading.py +30 -4
  16. cognite/extractorutils/unstable/__init__.py +5 -5
  17. cognite/extractorutils/unstable/configuration/__init__.py +3 -0
  18. cognite/extractorutils/unstable/configuration/exceptions.py +13 -2
  19. cognite/extractorutils/unstable/configuration/loaders.py +78 -13
  20. cognite/extractorutils/unstable/configuration/models.py +121 -7
  21. cognite/extractorutils/unstable/core/__init__.py +5 -0
  22. cognite/extractorutils/unstable/core/_dto.py +5 -3
  23. cognite/extractorutils/unstable/core/base.py +113 -4
  24. cognite/extractorutils/unstable/core/errors.py +41 -0
  25. cognite/extractorutils/unstable/core/logger.py +149 -0
  26. cognite/extractorutils/unstable/core/restart_policy.py +16 -2
  27. cognite/extractorutils/unstable/core/runtime.py +44 -6
  28. cognite/extractorutils/unstable/core/tasks.py +53 -1
  29. cognite/extractorutils/unstable/scheduling/__init__.py +13 -0
  30. cognite/extractorutils/unstable/scheduling/_scheduler.py +1 -1
  31. cognite/extractorutils/uploader/__init__.py +9 -5
  32. cognite/extractorutils/uploader/_base.py +4 -5
  33. cognite/extractorutils/uploader/assets.py +13 -8
  34. cognite/extractorutils/uploader/data_modeling.py +37 -2
  35. cognite/extractorutils/uploader/events.py +14 -9
  36. cognite/extractorutils/uploader/files.py +80 -21
  37. cognite/extractorutils/uploader/raw.py +12 -7
  38. cognite/extractorutils/uploader/time_series.py +370 -94
  39. cognite/extractorutils/uploader/upload_failure_handler.py +35 -2
  40. cognite/extractorutils/uploader_extractor.py +47 -9
  41. cognite/extractorutils/uploader_types.py +26 -1
  42. cognite/extractorutils/util.py +76 -23
  43. {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.7.0.dist-info}/METADATA +1 -1
  44. cognite_extractor_utils-7.7.0.dist-info/RECORD +50 -0
  45. cognite_extractor_utils-7.5.14.dist-info/RECORD +0 -50
  46. {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.7.0.dist-info}/WHEEL +0 -0
  47. {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.7.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,3 +1,6 @@
1
+ """
2
+ Upload queue for time series and sequences.
3
+ """
1
4
  # Copyright 2023 Cognite AS
2
5
  #
3
6
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,7 +19,7 @@ import math
16
19
  from collections.abc import Callable
17
20
  from datetime import datetime
18
21
  from types import TracebackType
19
- from typing import Any
22
+ from typing import Any, Generic, Literal, TypedDict, TypeVar
20
23
 
21
24
  from cognite.client import CogniteClient
22
25
  from cognite.client.data_classes import (
@@ -26,6 +29,9 @@ from cognite.client.data_classes import (
26
29
  StatusCode,
27
30
  TimeSeries,
28
31
  )
32
+ from cognite.client.data_classes.data_modeling import NodeId
33
+ from cognite.client.data_classes.data_modeling.extractor_extensions.v1 import CogniteExtractorTimeSeriesApply
34
+ from cognite.client.data_classes.data_modeling.instances import DirectRelationReference
29
35
  from cognite.client.exceptions import CogniteDuplicatedError, CogniteNotFoundError
30
36
  from cognite.extractorutils.threading import CancellationToken
31
37
  from cognite.extractorutils.uploader._base import (
@@ -59,6 +65,18 @@ DataPointWithStatus = tuple[TimeStamp, float, FullStatusCode] | tuple[TimeStamp,
59
65
  DataPoint = DataPointWithoutStatus | DataPointWithStatus
60
66
  DataPointList = list[DataPoint]
61
67
 
68
+ TQueue = TypeVar("TQueue", bound="BaseTimeSeriesUploadQueue")
69
+ IdType = TypeVar("IdType", EitherId, NodeId)
70
+
71
+
72
+ class CdmDatapointsPayload(TypedDict):
73
+ """
74
+ Represents a payload for CDF datapoints, linking them to a specific instance.
75
+ """
76
+
77
+ instanceId: NodeId
78
+ datapoints: DataPointList
79
+
62
80
 
63
81
  def default_time_series_factory(external_id: str, datapoints: DataPointList) -> TimeSeries:
64
82
  """
@@ -79,9 +97,9 @@ def default_time_series_factory(external_id: str, datapoints: DataPointList) ->
79
97
  return TimeSeries(external_id=external_id, is_string=is_string)
80
98
 
81
99
 
82
- class TimeSeriesUploadQueue(AbstractUploadQueue):
100
+ class BaseTimeSeriesUploadQueue(AbstractUploadQueue, Generic[IdType]):
83
101
  """
84
- Upload queue for time series
102
+ Abstract base upload queue for time series.
85
103
 
86
104
  Args:
87
105
  cdf_client: Cognite Data Fusion client to use
@@ -93,12 +111,6 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
93
111
  methods).
94
112
  trigger_log_level: Log level to log upload triggers to.
95
113
  thread_name: Thread name of uploader thread.
96
- create_missing: Create missing time series if possible (ie, if external id is used). Either given as a boolean
97
- (True would auto-create a time series with nothing but an external ID), or as a factory function taking an
98
- external ID and a list of datapoints about to be inserted and returning a TimeSeries object.
99
- data_set_id: Data set id passed to create_missing. Does nothing if create_missing is False.
100
- If a custom timeseries creation method is set in create_missing, this is used as fallback if
101
- that method does not set data set id on its own.
102
114
  """
103
115
 
104
116
  def __init__(
@@ -109,8 +121,6 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
109
121
  max_upload_interval: int | None = None,
110
122
  trigger_log_level: str = "DEBUG",
111
123
  thread_name: str | None = None,
112
- create_missing: Callable[[str, DataPointList], TimeSeries] | bool = False,
113
- data_set_id: int | None = None,
114
124
  cancellation_token: CancellationToken | None = None,
115
125
  ):
116
126
  # Super sets post_upload and threshold
@@ -124,24 +134,14 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
124
134
  cancellation_token,
125
135
  )
126
136
 
127
- self.missing_factory: Callable[[str, DataPointList], TimeSeries]
128
-
129
- if isinstance(create_missing, bool):
130
- self.create_missing = create_missing
131
- self.missing_factory = default_time_series_factory
132
- else:
133
- self.create_missing = True
134
- self.missing_factory = create_missing
135
-
136
- self.upload_queue: dict[EitherId, DataPointList] = {}
137
+ self.upload_queue: dict[IdType, DataPointList] = {}
137
138
 
138
139
  self.points_queued = TIMESERIES_UPLOADER_POINTS_QUEUED
139
140
  self.points_written = TIMESERIES_UPLOADER_POINTS_WRITTEN
140
141
  self.queue_size = TIMESERIES_UPLOADER_QUEUE_SIZE
141
- self.data_set_id = data_set_id
142
142
 
143
143
  def _verify_datapoint_time(self, time: int | float | datetime | str) -> bool:
144
- if isinstance(time, int) or isinstance(time, float):
144
+ if isinstance(time, int | float):
145
145
  return not math.isnan(time) and time >= MIN_DATAPOINT_TIMESTAMP
146
146
  elif isinstance(time, str):
147
147
  return False
@@ -155,10 +155,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
155
155
  )
156
156
  elif isinstance(value, str):
157
157
  return len(value) <= MAX_DATAPOINT_STRING_LENGTH
158
- elif isinstance(value, datetime):
159
- return False
160
- else:
161
- return True
158
+ return not isinstance(value, datetime)
162
159
 
163
160
  def _is_datapoint_valid(
164
161
  self,
@@ -171,18 +168,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
171
168
  else:
172
169
  return True
173
170
 
174
- def add_to_upload_queue(
175
- self, *, id: int | None = None, external_id: str | None = None, datapoints: DataPointList | None = None
176
- ) -> None:
177
- """
178
- Add data points to upload queue. The queue will be uploaded if the queue size is larger than the threshold
179
- specified in the __init__.
180
-
181
- Args:
182
- id: Internal ID of time series. Either this or external_id must be set.
183
- external_id: External ID of time series. Either this or external_id must be set.
184
- datapoints: list of data points to add
185
- """
171
+ def _sanitize_datapoints(self, datapoints: DataPointList | None) -> DataPointList:
186
172
  datapoints = datapoints or []
187
173
  old_len = len(datapoints)
188
174
  datapoints = list(filter(self._is_datapoint_valid, datapoints))
@@ -194,6 +180,116 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
194
180
  self.logger.warning(f"Discarding {diff} datapoints due to bad timestamp or value")
195
181
  TIMESERIES_UPLOADER_POINTS_DISCARDED.inc(diff)
196
182
 
183
+ return datapoints
184
+
185
+ def __exit__(
186
+ self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
187
+ ) -> None:
188
+ """
189
+ Wraps around stop method, for use as context manager.
190
+
191
+ Args:
192
+ exc_type: Exception type
193
+ exc_val: Exception value
194
+ exc_tb: Traceback
195
+ """
196
+ self.stop()
197
+
198
+ def __len__(self) -> int:
199
+ """
200
+ The size of the upload queue.
201
+
202
+ Returns:
203
+ Number of data points in queue
204
+ """
205
+ return self.upload_queue_size
206
+
207
+ def __enter__(self: TQueue) -> TQueue:
208
+ """
209
+ Wraps around start method, for use as context manager.
210
+
211
+ Returns:
212
+ self
213
+ """
214
+ self.start()
215
+ return self
216
+
217
+
218
+ class TimeSeriesUploadQueue(BaseTimeSeriesUploadQueue[EitherId]):
219
+ """
220
+ Upload queue for time series.
221
+
222
+ Args:
223
+ cdf_client: Cognite Data Fusion client to use
224
+ post_upload_function: A function that will be called after each upload. The function will be given one argument:
225
+ A list of dicts containing the datapoints that were uploaded (on the same format as the kwargs in
226
+ datapoints upload in the Cognite SDK).
227
+ max_queue_size: Maximum size of upload queue. Defaults to no max size.
228
+ max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
229
+ methods).
230
+ trigger_log_level: Log level to log upload triggers to.
231
+ thread_name: Thread name of uploader thread.
232
+ create_missing: Create missing time series if possible (ie, if external id is used). Either given as a boolean
233
+ (True would auto-create a time series with nothing but an external ID), or as a factory function taking an
234
+ external ID and a list of datapoints about to be inserted and returning a TimeSeries object.
235
+ data_set_id: Data set id passed to create_missing. Does nothing if create_missing is False.
236
+ If a custom timeseries creation method is set in create_missing, this is used as fallback if
237
+ that method does not set data set id on its own.
238
+ """
239
+
240
+ def __init__(
241
+ self,
242
+ cdf_client: CogniteClient,
243
+ post_upload_function: Callable[[list[dict[str, str | DataPointList]]], None] | None = None,
244
+ max_queue_size: int | None = None,
245
+ max_upload_interval: int | None = None,
246
+ trigger_log_level: str = "DEBUG",
247
+ thread_name: str | None = None,
248
+ create_missing: Callable[[str, DataPointList], TimeSeries] | bool = False,
249
+ data_set_id: int | None = None,
250
+ cancellation_token: CancellationToken | None = None,
251
+ ):
252
+ # Super sets post_upload and threshold
253
+ super().__init__(
254
+ cdf_client,
255
+ post_upload_function,
256
+ max_queue_size,
257
+ max_upload_interval,
258
+ trigger_log_level,
259
+ thread_name,
260
+ cancellation_token,
261
+ )
262
+
263
+ self.missing_factory: Callable[[str, DataPointList], TimeSeries]
264
+
265
+ if isinstance(create_missing, bool):
266
+ self.create_missing = create_missing
267
+ self.missing_factory = default_time_series_factory
268
+ else:
269
+ self.create_missing = True
270
+ self.missing_factory = create_missing
271
+
272
+ self.data_set_id = data_set_id
273
+
274
+ def add_to_upload_queue(
275
+ self,
276
+ *,
277
+ id: int | None = None, # noqa: A002
278
+ external_id: str | None = None,
279
+ datapoints: DataPointList | None = None,
280
+ ) -> None:
281
+ """
282
+ Add data points to upload queue.
283
+
284
+ The queue will be uploaded if the queue size is larger than the threshold specified in the ``__init__``.
285
+
286
+ Args:
287
+ id: Internal ID of time series. Either this or external_id must be set.
288
+ external_id: External ID of time series. Either this or external_id must be set.
289
+ datapoints: list of data points to add
290
+ """
291
+ datapoints = self._sanitize_datapoints(datapoints)
292
+
197
293
  either_id = EitherId(id=id, external_id=external_id)
198
294
 
199
295
  with self.lock:
@@ -209,7 +305,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
209
305
 
210
306
  def upload(self) -> None:
211
307
  """
212
- Trigger an upload of the queue, clears queue afterwards
308
+ Trigger an upload of the queue, clears queue afterwards.
213
309
  """
214
310
 
215
311
  @retry(
@@ -239,9 +335,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
239
335
 
240
336
  if self.create_missing:
241
337
  # Get the time series that can be created
242
- create_these_ids = set(
243
- [id_dict["externalId"] for id_dict in ex.not_found if "externalId" in id_dict]
244
- )
338
+ create_these_ids = {id_dict["externalId"] for id_dict in ex.not_found if "externalId" in id_dict}
245
339
  datapoints_lists: dict[str, DataPointList] = {
246
340
  ts_dict["externalId"]: ts_dict["datapoints"]
247
341
  for ts_dict in upload_this
@@ -294,7 +388,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
294
388
  ]
295
389
  )
296
390
 
297
- for _either_id, datapoints in self.upload_queue.items():
391
+ for datapoints in self.upload_queue.values():
298
392
  self.points_written.inc(len(datapoints))
299
393
 
300
394
  try:
@@ -307,40 +401,231 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
307
401
  self.upload_queue_size = 0
308
402
  self.queue_size.set(self.upload_queue_size)
309
403
 
310
- def __enter__(self) -> "TimeSeriesUploadQueue":
404
+
405
+ class CDMTimeSeriesUploadQueue(BaseTimeSeriesUploadQueue[NodeId]):
406
+ """
407
+ Upload queue for CDM time series.
408
+
409
+ Args:
410
+ cdf_client: Cognite Data Fusion client to use
411
+ post_upload_function: A function that will be called after each upload. The function will be given one argument:
412
+ A list of dicts containing the datapoints that were uploaded (on the same format as the kwargs in
413
+ datapoints upload in the Cognite SDK).
414
+ max_queue_size: Maximum size of upload queue. Defaults to no max size.
415
+ max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
416
+ methods).
417
+ trigger_log_level: Log level to log upload triggers to.
418
+ thread_name: Thread name of uploader thread.
419
+ """
420
+
421
+ def __init__(
422
+ self,
423
+ cdf_client: CogniteClient,
424
+ post_upload_function: Callable[[list[dict[str, str | DataPointList]]], None] | None = None,
425
+ max_queue_size: int | None = None,
426
+ max_upload_interval: int | None = None,
427
+ trigger_log_level: str = "DEBUG",
428
+ thread_name: str | None = None,
429
+ create_missing: Callable[[NodeId, DataPointList], CogniteExtractorTimeSeriesApply] | bool = False,
430
+ cancellation_token: CancellationToken | None = None,
431
+ source: DirectRelationReference | None = None,
432
+ ):
433
+ super().__init__(
434
+ cdf_client,
435
+ post_upload_function,
436
+ max_queue_size,
437
+ max_upload_interval,
438
+ trigger_log_level,
439
+ thread_name,
440
+ cancellation_token,
441
+ )
442
+
443
+ self.missing_factory: Callable[[NodeId, DataPointList], CogniteExtractorTimeSeriesApply]
444
+ self.source = source
445
+
446
+ if isinstance(create_missing, bool):
447
+ self.create_missing = create_missing
448
+ self.missing_factory = self.default_cdm_time_series_factory
449
+ else:
450
+ self.create_missing = True
451
+ self.missing_factory = create_missing
452
+
453
+ def default_cdm_time_series_factory(
454
+ self, instance_id: NodeId, datapoints: DataPointList
455
+ ) -> CogniteExtractorTimeSeriesApply:
311
456
  """
312
- Wraps around start method, for use as context manager
457
+ Default CDM time series factory used when create_missing in a CDMTimeSeriesUploadQueue is given as a boolean.
313
458
 
459
+ Args:
460
+ instance_id: Instance ID of time series to create
461
+ datapoints: The list of datapoints that were tried to be inserted
462
+ source: The source of the time series, used for creating the DirectRelationReference
314
463
  Returns:
315
- self
464
+ A CogniteExtractorTimeSeriesApply object with instance_id set, and the is_string automatically detected
316
465
  """
317
- self.start()
318
- return self
466
+ is_string = (
467
+ isinstance(datapoints[0].get("value"), str)
468
+ if isinstance(datapoints[0], dict)
469
+ else isinstance(datapoints[0][1], str)
470
+ )
319
471
 
320
- def __exit__(
321
- self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
472
+ time_series_type: Literal["numeric", "string"] = "string" if is_string else "numeric"
473
+
474
+ return CogniteExtractorTimeSeriesApply(
475
+ space=instance_id.space,
476
+ external_id=instance_id.external_id,
477
+ is_step=False,
478
+ time_series_type=time_series_type,
479
+ source=self.source,
480
+ )
481
+
482
+ def add_to_upload_queue(
483
+ self,
484
+ *,
485
+ instance_id: NodeId,
486
+ datapoints: DataPointList | None = None,
322
487
  ) -> None:
323
488
  """
324
- Wraps around stop method, for use as context manager
489
+ Add data points to upload queue.
490
+
491
+ The queue will be uploaded if the queue size is larger than the threshold specified in the __init__.
325
492
 
326
493
  Args:
327
- exc_type: Exception type
328
- exc_val: Exception value
329
- exc_tb: Traceback
494
+ instance_id: The identifier for the time series to which the datapoints belong.
495
+ datapoints: list of data points to add
330
496
  """
331
- self.stop()
497
+ datapoints = self._sanitize_datapoints(datapoints)
332
498
 
333
- def __len__(self) -> int:
334
- """
335
- The size of the upload queue
499
+ with self.lock:
500
+ if instance_id not in self.upload_queue:
501
+ self.upload_queue[instance_id] = []
336
502
 
337
- Returns:
338
- Number of data points in queue
503
+ self.upload_queue[instance_id].extend(datapoints)
504
+ self.points_queued.inc(len(datapoints))
505
+ self.upload_queue_size += len(datapoints)
506
+ self.queue_size.set(self.upload_queue_size)
507
+
508
+ self._check_triggers()
509
+
510
+ def upload(self) -> None:
339
511
  """
340
- return self.upload_queue_size
512
+ Trigger an upload of the queue, clears queue afterwards.
513
+ """
514
+
515
+ @retry(
516
+ exceptions=cognite_exceptions(),
517
+ cancellation_token=self.cancellation_token,
518
+ tries=RETRIES,
519
+ delay=RETRY_DELAY,
520
+ max_delay=RETRY_MAX_DELAY,
521
+ backoff=RETRY_BACKOFF_FACTOR,
522
+ )
523
+ def _upload_batch(upload_this: list[CdmDatapointsPayload], retries: int = 5) -> list[CdmDatapointsPayload]:
524
+ if len(upload_this) == 0:
525
+ return upload_this
526
+
527
+ try:
528
+ self.cdf_client.time_series.data.insert_multiple(upload_this) # type: ignore[arg-type]
529
+ except CogniteNotFoundError as ex:
530
+ if not retries:
531
+ raise ex
532
+
533
+ if not self.create_missing:
534
+ self.logger.error("Could not upload data points to %s: %s", str(ex.not_found), str(ex))
535
+
536
+ # Get IDs of time series that exists, but failed because of the non-existing time series
537
+ retry_these = [
538
+ NodeId(id_dict["instanceId"]["space"], id_dict["instanceId"]["externalId"])
539
+ for id_dict in ex.failed
540
+ if id_dict not in ex.not_found
541
+ ]
542
+
543
+ if self.create_missing:
544
+ # Get the time series that can be created
545
+ create_these_ids = {
546
+ NodeId(id_dict["instanceId"]["space"], id_dict["instanceId"]["externalId"])
547
+ for id_dict in ex.not_found
548
+ }
549
+ self.logger.info(f"Creating {len(create_these_ids)} time series")
550
+
551
+ datapoints_lists: dict[NodeId, DataPointList] = {
552
+ ts_dict["instanceId"]: ts_dict["datapoints"]
553
+ for ts_dict in upload_this
554
+ if ts_dict["instanceId"] in create_these_ids
555
+ }
556
+
557
+ to_create: list[CogniteExtractorTimeSeriesApply] = [
558
+ self.missing_factory(instance_id, datapoints_lists[instance_id])
559
+ for instance_id in create_these_ids
560
+ ]
561
+
562
+ instance_result = self.cdf_client.data_modeling.instances.apply(to_create)
563
+ retry_these.extend([node.as_id() for node in instance_result.nodes])
564
+
565
+ if len(ex.not_found) != len(create_these_ids):
566
+ missing = [
567
+ id_dict
568
+ for id_dict in ex.not_found
569
+ if NodeId(id_dict["instanceId"]["space"], id_dict["instanceId"]["externalId"])
570
+ not in retry_these
571
+ ]
572
+ missing_num = len(ex.not_found) - len(create_these_ids)
573
+ self.logger.error(
574
+ f"{missing_num} time series not found, and could not be created automatically: "
575
+ + str(missing)
576
+ + " Data will be dropped"
577
+ )
578
+
579
+ # Remove entries with non-existing time series from upload queue
580
+ upload_this = [entry for entry in upload_this if entry["instanceId"] in retry_these]
581
+
582
+ # Upload remaining
583
+ _upload_batch(upload_this, retries - 1)
584
+
585
+ return upload_this
586
+
587
+ if len(self.upload_queue) == 0:
588
+ return
589
+
590
+ with self.lock:
591
+ upload_this = _upload_batch(
592
+ [
593
+ {"instanceId": instance_id, "datapoints": list(datapoints)}
594
+ for instance_id, datapoints in self.upload_queue.items()
595
+ if len(datapoints) > 0
596
+ ]
597
+ )
598
+
599
+ for datapoints in self.upload_queue.values():
600
+ self.points_written.inc(len(datapoints))
601
+
602
+ try:
603
+ self._post_upload(upload_this)
604
+ except Exception as e:
605
+ self.logger.error("Error in upload callback: %s", str(e))
606
+
607
+ self.upload_queue.clear()
608
+ self.logger.info(f"Uploaded {self.upload_queue_size} datapoints")
609
+ self.upload_queue_size = 0
610
+ self.queue_size.set(self.upload_queue_size)
341
611
 
342
612
 
343
613
  class SequenceUploadQueue(AbstractUploadQueue):
614
+ """
615
+ Upload queue for sequences.
616
+
617
+ Args:
618
+ cdf_client: Cognite Data Fusion client to use
619
+ post_upload_function: A function that will be called after each upload. The function will be given one
620
+ argument: A list of the events that were uploaded.
621
+ max_queue_size: Maximum size of upload queue. Defaults to no max size.
622
+ max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
623
+ methods).
624
+ trigger_log_level: Log level to log upload triggers to.
625
+ thread_name: Thread name of uploader thread.
626
+ create_missing: Create missing sequences if possible (ie, if external id is used).
627
+ """
628
+
344
629
  def __init__(
345
630
  self,
346
631
  cdf_client: CogniteClient,
@@ -352,19 +637,6 @@ class SequenceUploadQueue(AbstractUploadQueue):
352
637
  create_missing: bool = False,
353
638
  cancellation_token: CancellationToken | None = None,
354
639
  ):
355
- """
356
- Args:
357
- cdf_client: Cognite Data Fusion client to use
358
- post_upload_function: A function that will be called after each upload. The function will be given one
359
- argument: A list of the events that were uploaded.
360
- max_queue_size: Maximum size of upload queue. Defaults to no max size.
361
- max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
362
- methods).
363
- trigger_log_level: Log level to log upload triggers to.
364
- thread_name: Thread name of uploader thread.
365
- create_missing: Create missing sequences if possible (ie, if external id is used)
366
- """
367
-
368
640
  # Super sets post_upload and threshold
369
641
  super().__init__(
370
642
  cdf_client,
@@ -393,7 +665,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
393
665
  def set_sequence_metadata(
394
666
  self,
395
667
  metadata: dict[str, str | int | float],
396
- id: int | None = None,
668
+ id: int | None = None, # noqa: A002
397
669
  external_id: str | None = None,
398
670
  asset_external_id: str | None = None,
399
671
  dataset_external_id: str | None = None,
@@ -401,8 +673,10 @@ class SequenceUploadQueue(AbstractUploadQueue):
401
673
  description: str | None = None,
402
674
  ) -> None:
403
675
  """
404
- Set sequence metadata. Metadata will be cached until the sequence is created. The metadata will be updated
405
- if the sequence already exists
676
+ Set sequence metadata.
677
+
678
+ Metadata will be cached until the sequence is created. The metadata will be updated if the sequence already
679
+ exists.
406
680
 
407
681
  Args:
408
682
  metadata: Sequence metadata
@@ -427,10 +701,13 @@ class SequenceUploadQueue(AbstractUploadQueue):
427
701
  self.sequence_descriptions[either_id] = description
428
702
 
429
703
  def set_sequence_column_definition(
430
- self, col_def: list[dict[str, str]], id: int | None = None, external_id: str | None = None
704
+ self,
705
+ col_def: list[dict[str, str]],
706
+ id: int | None = None, # noqa: A002
707
+ external_id: str | None = None,
431
708
  ) -> None:
432
709
  """
433
- Set sequence column definition
710
+ Set sequence column definition.
434
711
 
435
712
  Args:
436
713
  col_def: Sequence column definition
@@ -450,12 +727,13 @@ class SequenceUploadQueue(AbstractUploadQueue):
450
727
  | SequenceData
451
728
  | SequenceRows,
452
729
  column_external_ids: list[dict] | None = None,
453
- id: int | None = None,
730
+ id: int | None = None, # noqa: A002
454
731
  external_id: str | None = None,
455
732
  ) -> None:
456
733
  """
457
- Add sequence rows to upload queue. Mirrors implementation of SequenceApi.insert. Inserted rows will be
458
- cached until uploaded
734
+ Add sequence rows to upload queue.
735
+
736
+ Mirrors implementation of SequenceApi.insert. Inserted rows will be cached until uploaded.
459
737
 
460
738
  Args:
461
739
  rows: The rows to be inserted. Can either be a list of tuples, a list of ["rownumber": ..., "values": ...]
@@ -466,7 +744,6 @@ class SequenceUploadQueue(AbstractUploadQueue):
466
744
  external_id: Sequence external ID
467
745
  Us if id is None
468
746
  """
469
-
470
747
  if len(rows) == 0:
471
748
  pass
472
749
 
@@ -509,7 +786,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
509
786
 
510
787
  def upload(self) -> None:
511
788
  """
512
- Trigger an upload of the queue, clears queue afterwards
789
+ Trigger an upload of the queue, clears queue afterwards.
513
790
  """
514
791
 
515
792
  @retry(
@@ -571,15 +848,14 @@ class SequenceUploadQueue(AbstractUploadQueue):
571
848
 
572
849
  def _create_or_update(self, either_id: EitherId) -> None:
573
850
  """
574
- Create or update sequence, based on provided metadata and column definitions
851
+ Create or update sequence, based on provided metadata and column definitions.
575
852
 
576
853
  Args:
577
854
  either_id: Id/External Id of sequence to be updated
578
855
  """
579
-
580
856
  column_def = self.column_definitions.get(either_id)
581
857
  if column_def is None:
582
- self.logger.error(f"Can't create sequence {str(either_id)}, no column definitions provided")
858
+ self.logger.error(f"Can't create sequence {either_id!s}, no column definitions provided")
583
859
 
584
860
  try:
585
861
  seq = self.cdf_client.sequences.create(
@@ -596,7 +872,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
596
872
  )
597
873
 
598
874
  except CogniteDuplicatedError:
599
- self.logger.info(f"Sequnce already exist: {either_id}")
875
+ self.logger.info(f"Sequence already exist: {either_id}")
600
876
  seq = self.cdf_client.sequences.retrieve( # type: ignore [assignment]
601
877
  id=either_id.internal_id,
602
878
  external_id=either_id.external_id,
@@ -608,7 +884,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
608
884
 
609
885
  def _resolve_asset_ids(self) -> None:
610
886
  """
611
- Resolve id of assets if specified, for use in sequence creation
887
+ Resolve id of assets if specified, for use in sequence creation.
612
888
  """
613
889
  assets = set(self.sequence_asset_external_ids.values())
614
890
  assets.discard(None) # type: ignore # safeguard, remove Nones if any
@@ -628,7 +904,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
628
904
 
629
905
  def _resolve_dataset_ids(self) -> None:
630
906
  """
631
- Resolve id of datasets if specified, for use in sequence creation
907
+ Resolve id of datasets if specified, for use in sequence creation.
632
908
  """
633
909
  datasets = set(self.sequence_dataset_external_ids.values())
634
910
  datasets.discard(None) # type: ignore # safeguard, remove Nones if any
@@ -648,7 +924,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
648
924
 
649
925
  def __enter__(self) -> "SequenceUploadQueue":
650
926
  """
651
- Wraps around start method, for use as context manager
927
+ Wraps around start method, for use as context manager.
652
928
 
653
929
  Returns:
654
930
  self
@@ -660,7 +936,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
660
936
  self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
661
937
  ) -> None:
662
938
  """
663
- Wraps around stop method, for use as context manager
939
+ Wraps around stop method, for use as context manager.
664
940
 
665
941
  Args:
666
942
  exc_type: Exception type
@@ -671,7 +947,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
671
947
 
672
948
  def __len__(self) -> int:
673
949
  """
674
- The size of the upload queue
950
+ The size of the upload queue.
675
951
 
676
952
  Returns:
677
953
  Number of data points in queue