cognite-extractor-utils 7.5.14__py3-none-any.whl → 7.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- cognite/extractorutils/__init__.py +1 -1
- cognite/extractorutils/_inner_util.py +1 -1
- cognite/extractorutils/base.py +120 -40
- cognite/extractorutils/configtools/__init__.py +4 -5
- cognite/extractorutils/configtools/_util.py +3 -2
- cognite/extractorutils/configtools/elements.py +206 -33
- cognite/extractorutils/configtools/loaders.py +68 -16
- cognite/extractorutils/configtools/validators.py +5 -1
- cognite/extractorutils/exceptions.py +11 -2
- cognite/extractorutils/metrics.py +17 -12
- cognite/extractorutils/statestore/__init__.py +77 -3
- cognite/extractorutils/statestore/_base.py +7 -3
- cognite/extractorutils/statestore/hashing.py +129 -15
- cognite/extractorutils/statestore/watermark.py +77 -87
- cognite/extractorutils/threading.py +30 -4
- cognite/extractorutils/unstable/__init__.py +5 -5
- cognite/extractorutils/unstable/configuration/__init__.py +3 -0
- cognite/extractorutils/unstable/configuration/exceptions.py +13 -2
- cognite/extractorutils/unstable/configuration/loaders.py +78 -13
- cognite/extractorutils/unstable/configuration/models.py +121 -7
- cognite/extractorutils/unstable/core/__init__.py +5 -0
- cognite/extractorutils/unstable/core/_dto.py +5 -3
- cognite/extractorutils/unstable/core/base.py +113 -4
- cognite/extractorutils/unstable/core/errors.py +41 -0
- cognite/extractorutils/unstable/core/logger.py +149 -0
- cognite/extractorutils/unstable/core/restart_policy.py +16 -2
- cognite/extractorutils/unstable/core/runtime.py +44 -6
- cognite/extractorutils/unstable/core/tasks.py +53 -1
- cognite/extractorutils/unstable/scheduling/__init__.py +13 -0
- cognite/extractorutils/unstable/scheduling/_scheduler.py +1 -1
- cognite/extractorutils/uploader/__init__.py +9 -5
- cognite/extractorutils/uploader/_base.py +4 -5
- cognite/extractorutils/uploader/assets.py +13 -8
- cognite/extractorutils/uploader/data_modeling.py +37 -2
- cognite/extractorutils/uploader/events.py +14 -9
- cognite/extractorutils/uploader/files.py +80 -21
- cognite/extractorutils/uploader/raw.py +12 -7
- cognite/extractorutils/uploader/time_series.py +370 -94
- cognite/extractorutils/uploader/upload_failure_handler.py +35 -2
- cognite/extractorutils/uploader_extractor.py +47 -9
- cognite/extractorutils/uploader_types.py +26 -1
- cognite/extractorutils/util.py +76 -23
- {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.7.0.dist-info}/METADATA +1 -1
- cognite_extractor_utils-7.7.0.dist-info/RECORD +50 -0
- cognite_extractor_utils-7.5.14.dist-info/RECORD +0 -50
- {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.7.0.dist-info}/WHEEL +0 -0
- {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.7.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Upload queue for time series and sequences.
|
|
3
|
+
"""
|
|
1
4
|
# Copyright 2023 Cognite AS
|
|
2
5
|
#
|
|
3
6
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -16,7 +19,7 @@ import math
|
|
|
16
19
|
from collections.abc import Callable
|
|
17
20
|
from datetime import datetime
|
|
18
21
|
from types import TracebackType
|
|
19
|
-
from typing import Any
|
|
22
|
+
from typing import Any, Generic, Literal, TypedDict, TypeVar
|
|
20
23
|
|
|
21
24
|
from cognite.client import CogniteClient
|
|
22
25
|
from cognite.client.data_classes import (
|
|
@@ -26,6 +29,9 @@ from cognite.client.data_classes import (
|
|
|
26
29
|
StatusCode,
|
|
27
30
|
TimeSeries,
|
|
28
31
|
)
|
|
32
|
+
from cognite.client.data_classes.data_modeling import NodeId
|
|
33
|
+
from cognite.client.data_classes.data_modeling.extractor_extensions.v1 import CogniteExtractorTimeSeriesApply
|
|
34
|
+
from cognite.client.data_classes.data_modeling.instances import DirectRelationReference
|
|
29
35
|
from cognite.client.exceptions import CogniteDuplicatedError, CogniteNotFoundError
|
|
30
36
|
from cognite.extractorutils.threading import CancellationToken
|
|
31
37
|
from cognite.extractorutils.uploader._base import (
|
|
@@ -59,6 +65,18 @@ DataPointWithStatus = tuple[TimeStamp, float, FullStatusCode] | tuple[TimeStamp,
|
|
|
59
65
|
DataPoint = DataPointWithoutStatus | DataPointWithStatus
|
|
60
66
|
DataPointList = list[DataPoint]
|
|
61
67
|
|
|
68
|
+
TQueue = TypeVar("TQueue", bound="BaseTimeSeriesUploadQueue")
|
|
69
|
+
IdType = TypeVar("IdType", EitherId, NodeId)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class CdmDatapointsPayload(TypedDict):
|
|
73
|
+
"""
|
|
74
|
+
Represents a payload for CDF datapoints, linking them to a specific instance.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
instanceId: NodeId
|
|
78
|
+
datapoints: DataPointList
|
|
79
|
+
|
|
62
80
|
|
|
63
81
|
def default_time_series_factory(external_id: str, datapoints: DataPointList) -> TimeSeries:
|
|
64
82
|
"""
|
|
@@ -79,9 +97,9 @@ def default_time_series_factory(external_id: str, datapoints: DataPointList) ->
|
|
|
79
97
|
return TimeSeries(external_id=external_id, is_string=is_string)
|
|
80
98
|
|
|
81
99
|
|
|
82
|
-
class
|
|
100
|
+
class BaseTimeSeriesUploadQueue(AbstractUploadQueue, Generic[IdType]):
|
|
83
101
|
"""
|
|
84
|
-
|
|
102
|
+
Abstract base upload queue for time series.
|
|
85
103
|
|
|
86
104
|
Args:
|
|
87
105
|
cdf_client: Cognite Data Fusion client to use
|
|
@@ -93,12 +111,6 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
93
111
|
methods).
|
|
94
112
|
trigger_log_level: Log level to log upload triggers to.
|
|
95
113
|
thread_name: Thread name of uploader thread.
|
|
96
|
-
create_missing: Create missing time series if possible (ie, if external id is used). Either given as a boolean
|
|
97
|
-
(True would auto-create a time series with nothing but an external ID), or as a factory function taking an
|
|
98
|
-
external ID and a list of datapoints about to be inserted and returning a TimeSeries object.
|
|
99
|
-
data_set_id: Data set id passed to create_missing. Does nothing if create_missing is False.
|
|
100
|
-
If a custom timeseries creation method is set in create_missing, this is used as fallback if
|
|
101
|
-
that method does not set data set id on its own.
|
|
102
114
|
"""
|
|
103
115
|
|
|
104
116
|
def __init__(
|
|
@@ -109,8 +121,6 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
109
121
|
max_upload_interval: int | None = None,
|
|
110
122
|
trigger_log_level: str = "DEBUG",
|
|
111
123
|
thread_name: str | None = None,
|
|
112
|
-
create_missing: Callable[[str, DataPointList], TimeSeries] | bool = False,
|
|
113
|
-
data_set_id: int | None = None,
|
|
114
124
|
cancellation_token: CancellationToken | None = None,
|
|
115
125
|
):
|
|
116
126
|
# Super sets post_upload and threshold
|
|
@@ -124,24 +134,14 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
124
134
|
cancellation_token,
|
|
125
135
|
)
|
|
126
136
|
|
|
127
|
-
self.
|
|
128
|
-
|
|
129
|
-
if isinstance(create_missing, bool):
|
|
130
|
-
self.create_missing = create_missing
|
|
131
|
-
self.missing_factory = default_time_series_factory
|
|
132
|
-
else:
|
|
133
|
-
self.create_missing = True
|
|
134
|
-
self.missing_factory = create_missing
|
|
135
|
-
|
|
136
|
-
self.upload_queue: dict[EitherId, DataPointList] = {}
|
|
137
|
+
self.upload_queue: dict[IdType, DataPointList] = {}
|
|
137
138
|
|
|
138
139
|
self.points_queued = TIMESERIES_UPLOADER_POINTS_QUEUED
|
|
139
140
|
self.points_written = TIMESERIES_UPLOADER_POINTS_WRITTEN
|
|
140
141
|
self.queue_size = TIMESERIES_UPLOADER_QUEUE_SIZE
|
|
141
|
-
self.data_set_id = data_set_id
|
|
142
142
|
|
|
143
143
|
def _verify_datapoint_time(self, time: int | float | datetime | str) -> bool:
|
|
144
|
-
if isinstance(time, int
|
|
144
|
+
if isinstance(time, int | float):
|
|
145
145
|
return not math.isnan(time) and time >= MIN_DATAPOINT_TIMESTAMP
|
|
146
146
|
elif isinstance(time, str):
|
|
147
147
|
return False
|
|
@@ -155,10 +155,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
155
155
|
)
|
|
156
156
|
elif isinstance(value, str):
|
|
157
157
|
return len(value) <= MAX_DATAPOINT_STRING_LENGTH
|
|
158
|
-
|
|
159
|
-
return False
|
|
160
|
-
else:
|
|
161
|
-
return True
|
|
158
|
+
return not isinstance(value, datetime)
|
|
162
159
|
|
|
163
160
|
def _is_datapoint_valid(
|
|
164
161
|
self,
|
|
@@ -171,18 +168,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
171
168
|
else:
|
|
172
169
|
return True
|
|
173
170
|
|
|
174
|
-
def
|
|
175
|
-
self, *, id: int | None = None, external_id: str | None = None, datapoints: DataPointList | None = None
|
|
176
|
-
) -> None:
|
|
177
|
-
"""
|
|
178
|
-
Add data points to upload queue. The queue will be uploaded if the queue size is larger than the threshold
|
|
179
|
-
specified in the __init__.
|
|
180
|
-
|
|
181
|
-
Args:
|
|
182
|
-
id: Internal ID of time series. Either this or external_id must be set.
|
|
183
|
-
external_id: External ID of time series. Either this or external_id must be set.
|
|
184
|
-
datapoints: list of data points to add
|
|
185
|
-
"""
|
|
171
|
+
def _sanitize_datapoints(self, datapoints: DataPointList | None) -> DataPointList:
|
|
186
172
|
datapoints = datapoints or []
|
|
187
173
|
old_len = len(datapoints)
|
|
188
174
|
datapoints = list(filter(self._is_datapoint_valid, datapoints))
|
|
@@ -194,6 +180,116 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
194
180
|
self.logger.warning(f"Discarding {diff} datapoints due to bad timestamp or value")
|
|
195
181
|
TIMESERIES_UPLOADER_POINTS_DISCARDED.inc(diff)
|
|
196
182
|
|
|
183
|
+
return datapoints
|
|
184
|
+
|
|
185
|
+
def __exit__(
|
|
186
|
+
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
|
|
187
|
+
) -> None:
|
|
188
|
+
"""
|
|
189
|
+
Wraps around stop method, for use as context manager.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
exc_type: Exception type
|
|
193
|
+
exc_val: Exception value
|
|
194
|
+
exc_tb: Traceback
|
|
195
|
+
"""
|
|
196
|
+
self.stop()
|
|
197
|
+
|
|
198
|
+
def __len__(self) -> int:
|
|
199
|
+
"""
|
|
200
|
+
The size of the upload queue.
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
Number of data points in queue
|
|
204
|
+
"""
|
|
205
|
+
return self.upload_queue_size
|
|
206
|
+
|
|
207
|
+
def __enter__(self: TQueue) -> TQueue:
|
|
208
|
+
"""
|
|
209
|
+
Wraps around start method, for use as context manager.
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
self
|
|
213
|
+
"""
|
|
214
|
+
self.start()
|
|
215
|
+
return self
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class TimeSeriesUploadQueue(BaseTimeSeriesUploadQueue[EitherId]):
|
|
219
|
+
"""
|
|
220
|
+
Upload queue for time series.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
cdf_client: Cognite Data Fusion client to use
|
|
224
|
+
post_upload_function: A function that will be called after each upload. The function will be given one argument:
|
|
225
|
+
A list of dicts containing the datapoints that were uploaded (on the same format as the kwargs in
|
|
226
|
+
datapoints upload in the Cognite SDK).
|
|
227
|
+
max_queue_size: Maximum size of upload queue. Defaults to no max size.
|
|
228
|
+
max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
|
|
229
|
+
methods).
|
|
230
|
+
trigger_log_level: Log level to log upload triggers to.
|
|
231
|
+
thread_name: Thread name of uploader thread.
|
|
232
|
+
create_missing: Create missing time series if possible (ie, if external id is used). Either given as a boolean
|
|
233
|
+
(True would auto-create a time series with nothing but an external ID), or as a factory function taking an
|
|
234
|
+
external ID and a list of datapoints about to be inserted and returning a TimeSeries object.
|
|
235
|
+
data_set_id: Data set id passed to create_missing. Does nothing if create_missing is False.
|
|
236
|
+
If a custom timeseries creation method is set in create_missing, this is used as fallback if
|
|
237
|
+
that method does not set data set id on its own.
|
|
238
|
+
"""
|
|
239
|
+
|
|
240
|
+
def __init__(
|
|
241
|
+
self,
|
|
242
|
+
cdf_client: CogniteClient,
|
|
243
|
+
post_upload_function: Callable[[list[dict[str, str | DataPointList]]], None] | None = None,
|
|
244
|
+
max_queue_size: int | None = None,
|
|
245
|
+
max_upload_interval: int | None = None,
|
|
246
|
+
trigger_log_level: str = "DEBUG",
|
|
247
|
+
thread_name: str | None = None,
|
|
248
|
+
create_missing: Callable[[str, DataPointList], TimeSeries] | bool = False,
|
|
249
|
+
data_set_id: int | None = None,
|
|
250
|
+
cancellation_token: CancellationToken | None = None,
|
|
251
|
+
):
|
|
252
|
+
# Super sets post_upload and threshold
|
|
253
|
+
super().__init__(
|
|
254
|
+
cdf_client,
|
|
255
|
+
post_upload_function,
|
|
256
|
+
max_queue_size,
|
|
257
|
+
max_upload_interval,
|
|
258
|
+
trigger_log_level,
|
|
259
|
+
thread_name,
|
|
260
|
+
cancellation_token,
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
self.missing_factory: Callable[[str, DataPointList], TimeSeries]
|
|
264
|
+
|
|
265
|
+
if isinstance(create_missing, bool):
|
|
266
|
+
self.create_missing = create_missing
|
|
267
|
+
self.missing_factory = default_time_series_factory
|
|
268
|
+
else:
|
|
269
|
+
self.create_missing = True
|
|
270
|
+
self.missing_factory = create_missing
|
|
271
|
+
|
|
272
|
+
self.data_set_id = data_set_id
|
|
273
|
+
|
|
274
|
+
def add_to_upload_queue(
|
|
275
|
+
self,
|
|
276
|
+
*,
|
|
277
|
+
id: int | None = None, # noqa: A002
|
|
278
|
+
external_id: str | None = None,
|
|
279
|
+
datapoints: DataPointList | None = None,
|
|
280
|
+
) -> None:
|
|
281
|
+
"""
|
|
282
|
+
Add data points to upload queue.
|
|
283
|
+
|
|
284
|
+
The queue will be uploaded if the queue size is larger than the threshold specified in the ``__init__``.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
id: Internal ID of time series. Either this or external_id must be set.
|
|
288
|
+
external_id: External ID of time series. Either this or external_id must be set.
|
|
289
|
+
datapoints: list of data points to add
|
|
290
|
+
"""
|
|
291
|
+
datapoints = self._sanitize_datapoints(datapoints)
|
|
292
|
+
|
|
197
293
|
either_id = EitherId(id=id, external_id=external_id)
|
|
198
294
|
|
|
199
295
|
with self.lock:
|
|
@@ -209,7 +305,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
209
305
|
|
|
210
306
|
def upload(self) -> None:
|
|
211
307
|
"""
|
|
212
|
-
Trigger an upload of the queue, clears queue afterwards
|
|
308
|
+
Trigger an upload of the queue, clears queue afterwards.
|
|
213
309
|
"""
|
|
214
310
|
|
|
215
311
|
@retry(
|
|
@@ -239,9 +335,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
239
335
|
|
|
240
336
|
if self.create_missing:
|
|
241
337
|
# Get the time series that can be created
|
|
242
|
-
create_these_ids =
|
|
243
|
-
[id_dict["externalId"] for id_dict in ex.not_found if "externalId" in id_dict]
|
|
244
|
-
)
|
|
338
|
+
create_these_ids = {id_dict["externalId"] for id_dict in ex.not_found if "externalId" in id_dict}
|
|
245
339
|
datapoints_lists: dict[str, DataPointList] = {
|
|
246
340
|
ts_dict["externalId"]: ts_dict["datapoints"]
|
|
247
341
|
for ts_dict in upload_this
|
|
@@ -294,7 +388,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
294
388
|
]
|
|
295
389
|
)
|
|
296
390
|
|
|
297
|
-
for
|
|
391
|
+
for datapoints in self.upload_queue.values():
|
|
298
392
|
self.points_written.inc(len(datapoints))
|
|
299
393
|
|
|
300
394
|
try:
|
|
@@ -307,40 +401,231 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
307
401
|
self.upload_queue_size = 0
|
|
308
402
|
self.queue_size.set(self.upload_queue_size)
|
|
309
403
|
|
|
310
|
-
|
|
404
|
+
|
|
405
|
+
class CDMTimeSeriesUploadQueue(BaseTimeSeriesUploadQueue[NodeId]):
|
|
406
|
+
"""
|
|
407
|
+
Upload queue for CDM time series.
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
cdf_client: Cognite Data Fusion client to use
|
|
411
|
+
post_upload_function: A function that will be called after each upload. The function will be given one argument:
|
|
412
|
+
A list of dicts containing the datapoints that were uploaded (on the same format as the kwargs in
|
|
413
|
+
datapoints upload in the Cognite SDK).
|
|
414
|
+
max_queue_size: Maximum size of upload queue. Defaults to no max size.
|
|
415
|
+
max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
|
|
416
|
+
methods).
|
|
417
|
+
trigger_log_level: Log level to log upload triggers to.
|
|
418
|
+
thread_name: Thread name of uploader thread.
|
|
419
|
+
"""
|
|
420
|
+
|
|
421
|
+
def __init__(
|
|
422
|
+
self,
|
|
423
|
+
cdf_client: CogniteClient,
|
|
424
|
+
post_upload_function: Callable[[list[dict[str, str | DataPointList]]], None] | None = None,
|
|
425
|
+
max_queue_size: int | None = None,
|
|
426
|
+
max_upload_interval: int | None = None,
|
|
427
|
+
trigger_log_level: str = "DEBUG",
|
|
428
|
+
thread_name: str | None = None,
|
|
429
|
+
create_missing: Callable[[NodeId, DataPointList], CogniteExtractorTimeSeriesApply] | bool = False,
|
|
430
|
+
cancellation_token: CancellationToken | None = None,
|
|
431
|
+
source: DirectRelationReference | None = None,
|
|
432
|
+
):
|
|
433
|
+
super().__init__(
|
|
434
|
+
cdf_client,
|
|
435
|
+
post_upload_function,
|
|
436
|
+
max_queue_size,
|
|
437
|
+
max_upload_interval,
|
|
438
|
+
trigger_log_level,
|
|
439
|
+
thread_name,
|
|
440
|
+
cancellation_token,
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
self.missing_factory: Callable[[NodeId, DataPointList], CogniteExtractorTimeSeriesApply]
|
|
444
|
+
self.source = source
|
|
445
|
+
|
|
446
|
+
if isinstance(create_missing, bool):
|
|
447
|
+
self.create_missing = create_missing
|
|
448
|
+
self.missing_factory = self.default_cdm_time_series_factory
|
|
449
|
+
else:
|
|
450
|
+
self.create_missing = True
|
|
451
|
+
self.missing_factory = create_missing
|
|
452
|
+
|
|
453
|
+
def default_cdm_time_series_factory(
|
|
454
|
+
self, instance_id: NodeId, datapoints: DataPointList
|
|
455
|
+
) -> CogniteExtractorTimeSeriesApply:
|
|
311
456
|
"""
|
|
312
|
-
|
|
457
|
+
Default CDM time series factory used when create_missing in a CDMTimeSeriesUploadQueue is given as a boolean.
|
|
313
458
|
|
|
459
|
+
Args:
|
|
460
|
+
instance_id: Instance ID of time series to create
|
|
461
|
+
datapoints: The list of datapoints that were tried to be inserted
|
|
462
|
+
source: The source of the time series, used for creating the DirectRelationReference
|
|
314
463
|
Returns:
|
|
315
|
-
|
|
464
|
+
A CogniteExtractorTimeSeriesApply object with instance_id set, and the is_string automatically detected
|
|
316
465
|
"""
|
|
317
|
-
|
|
318
|
-
|
|
466
|
+
is_string = (
|
|
467
|
+
isinstance(datapoints[0].get("value"), str)
|
|
468
|
+
if isinstance(datapoints[0], dict)
|
|
469
|
+
else isinstance(datapoints[0][1], str)
|
|
470
|
+
)
|
|
319
471
|
|
|
320
|
-
|
|
321
|
-
|
|
472
|
+
time_series_type: Literal["numeric", "string"] = "string" if is_string else "numeric"
|
|
473
|
+
|
|
474
|
+
return CogniteExtractorTimeSeriesApply(
|
|
475
|
+
space=instance_id.space,
|
|
476
|
+
external_id=instance_id.external_id,
|
|
477
|
+
is_step=False,
|
|
478
|
+
time_series_type=time_series_type,
|
|
479
|
+
source=self.source,
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
def add_to_upload_queue(
|
|
483
|
+
self,
|
|
484
|
+
*,
|
|
485
|
+
instance_id: NodeId,
|
|
486
|
+
datapoints: DataPointList | None = None,
|
|
322
487
|
) -> None:
|
|
323
488
|
"""
|
|
324
|
-
|
|
489
|
+
Add data points to upload queue.
|
|
490
|
+
|
|
491
|
+
The queue will be uploaded if the queue size is larger than the threshold specified in the __init__.
|
|
325
492
|
|
|
326
493
|
Args:
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
exc_tb: Traceback
|
|
494
|
+
instance_id: The identifier for the time series to which the datapoints belong.
|
|
495
|
+
datapoints: list of data points to add
|
|
330
496
|
"""
|
|
331
|
-
self.
|
|
497
|
+
datapoints = self._sanitize_datapoints(datapoints)
|
|
332
498
|
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
499
|
+
with self.lock:
|
|
500
|
+
if instance_id not in self.upload_queue:
|
|
501
|
+
self.upload_queue[instance_id] = []
|
|
336
502
|
|
|
337
|
-
|
|
338
|
-
|
|
503
|
+
self.upload_queue[instance_id].extend(datapoints)
|
|
504
|
+
self.points_queued.inc(len(datapoints))
|
|
505
|
+
self.upload_queue_size += len(datapoints)
|
|
506
|
+
self.queue_size.set(self.upload_queue_size)
|
|
507
|
+
|
|
508
|
+
self._check_triggers()
|
|
509
|
+
|
|
510
|
+
def upload(self) -> None:
|
|
339
511
|
"""
|
|
340
|
-
|
|
512
|
+
Trigger an upload of the queue, clears queue afterwards.
|
|
513
|
+
"""
|
|
514
|
+
|
|
515
|
+
@retry(
|
|
516
|
+
exceptions=cognite_exceptions(),
|
|
517
|
+
cancellation_token=self.cancellation_token,
|
|
518
|
+
tries=RETRIES,
|
|
519
|
+
delay=RETRY_DELAY,
|
|
520
|
+
max_delay=RETRY_MAX_DELAY,
|
|
521
|
+
backoff=RETRY_BACKOFF_FACTOR,
|
|
522
|
+
)
|
|
523
|
+
def _upload_batch(upload_this: list[CdmDatapointsPayload], retries: int = 5) -> list[CdmDatapointsPayload]:
|
|
524
|
+
if len(upload_this) == 0:
|
|
525
|
+
return upload_this
|
|
526
|
+
|
|
527
|
+
try:
|
|
528
|
+
self.cdf_client.time_series.data.insert_multiple(upload_this) # type: ignore[arg-type]
|
|
529
|
+
except CogniteNotFoundError as ex:
|
|
530
|
+
if not retries:
|
|
531
|
+
raise ex
|
|
532
|
+
|
|
533
|
+
if not self.create_missing:
|
|
534
|
+
self.logger.error("Could not upload data points to %s: %s", str(ex.not_found), str(ex))
|
|
535
|
+
|
|
536
|
+
# Get IDs of time series that exists, but failed because of the non-existing time series
|
|
537
|
+
retry_these = [
|
|
538
|
+
NodeId(id_dict["instanceId"]["space"], id_dict["instanceId"]["externalId"])
|
|
539
|
+
for id_dict in ex.failed
|
|
540
|
+
if id_dict not in ex.not_found
|
|
541
|
+
]
|
|
542
|
+
|
|
543
|
+
if self.create_missing:
|
|
544
|
+
# Get the time series that can be created
|
|
545
|
+
create_these_ids = {
|
|
546
|
+
NodeId(id_dict["instanceId"]["space"], id_dict["instanceId"]["externalId"])
|
|
547
|
+
for id_dict in ex.not_found
|
|
548
|
+
}
|
|
549
|
+
self.logger.info(f"Creating {len(create_these_ids)} time series")
|
|
550
|
+
|
|
551
|
+
datapoints_lists: dict[NodeId, DataPointList] = {
|
|
552
|
+
ts_dict["instanceId"]: ts_dict["datapoints"]
|
|
553
|
+
for ts_dict in upload_this
|
|
554
|
+
if ts_dict["instanceId"] in create_these_ids
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
to_create: list[CogniteExtractorTimeSeriesApply] = [
|
|
558
|
+
self.missing_factory(instance_id, datapoints_lists[instance_id])
|
|
559
|
+
for instance_id in create_these_ids
|
|
560
|
+
]
|
|
561
|
+
|
|
562
|
+
instance_result = self.cdf_client.data_modeling.instances.apply(to_create)
|
|
563
|
+
retry_these.extend([node.as_id() for node in instance_result.nodes])
|
|
564
|
+
|
|
565
|
+
if len(ex.not_found) != len(create_these_ids):
|
|
566
|
+
missing = [
|
|
567
|
+
id_dict
|
|
568
|
+
for id_dict in ex.not_found
|
|
569
|
+
if NodeId(id_dict["instanceId"]["space"], id_dict["instanceId"]["externalId"])
|
|
570
|
+
not in retry_these
|
|
571
|
+
]
|
|
572
|
+
missing_num = len(ex.not_found) - len(create_these_ids)
|
|
573
|
+
self.logger.error(
|
|
574
|
+
f"{missing_num} time series not found, and could not be created automatically: "
|
|
575
|
+
+ str(missing)
|
|
576
|
+
+ " Data will be dropped"
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
# Remove entries with non-existing time series from upload queue
|
|
580
|
+
upload_this = [entry for entry in upload_this if entry["instanceId"] in retry_these]
|
|
581
|
+
|
|
582
|
+
# Upload remaining
|
|
583
|
+
_upload_batch(upload_this, retries - 1)
|
|
584
|
+
|
|
585
|
+
return upload_this
|
|
586
|
+
|
|
587
|
+
if len(self.upload_queue) == 0:
|
|
588
|
+
return
|
|
589
|
+
|
|
590
|
+
with self.lock:
|
|
591
|
+
upload_this = _upload_batch(
|
|
592
|
+
[
|
|
593
|
+
{"instanceId": instance_id, "datapoints": list(datapoints)}
|
|
594
|
+
for instance_id, datapoints in self.upload_queue.items()
|
|
595
|
+
if len(datapoints) > 0
|
|
596
|
+
]
|
|
597
|
+
)
|
|
598
|
+
|
|
599
|
+
for datapoints in self.upload_queue.values():
|
|
600
|
+
self.points_written.inc(len(datapoints))
|
|
601
|
+
|
|
602
|
+
try:
|
|
603
|
+
self._post_upload(upload_this)
|
|
604
|
+
except Exception as e:
|
|
605
|
+
self.logger.error("Error in upload callback: %s", str(e))
|
|
606
|
+
|
|
607
|
+
self.upload_queue.clear()
|
|
608
|
+
self.logger.info(f"Uploaded {self.upload_queue_size} datapoints")
|
|
609
|
+
self.upload_queue_size = 0
|
|
610
|
+
self.queue_size.set(self.upload_queue_size)
|
|
341
611
|
|
|
342
612
|
|
|
343
613
|
class SequenceUploadQueue(AbstractUploadQueue):
|
|
614
|
+
"""
|
|
615
|
+
Upload queue for sequences.
|
|
616
|
+
|
|
617
|
+
Args:
|
|
618
|
+
cdf_client: Cognite Data Fusion client to use
|
|
619
|
+
post_upload_function: A function that will be called after each upload. The function will be given one
|
|
620
|
+
argument: A list of the events that were uploaded.
|
|
621
|
+
max_queue_size: Maximum size of upload queue. Defaults to no max size.
|
|
622
|
+
max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
|
|
623
|
+
methods).
|
|
624
|
+
trigger_log_level: Log level to log upload triggers to.
|
|
625
|
+
thread_name: Thread name of uploader thread.
|
|
626
|
+
create_missing: Create missing sequences if possible (ie, if external id is used).
|
|
627
|
+
"""
|
|
628
|
+
|
|
344
629
|
def __init__(
|
|
345
630
|
self,
|
|
346
631
|
cdf_client: CogniteClient,
|
|
@@ -352,19 +637,6 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
352
637
|
create_missing: bool = False,
|
|
353
638
|
cancellation_token: CancellationToken | None = None,
|
|
354
639
|
):
|
|
355
|
-
"""
|
|
356
|
-
Args:
|
|
357
|
-
cdf_client: Cognite Data Fusion client to use
|
|
358
|
-
post_upload_function: A function that will be called after each upload. The function will be given one
|
|
359
|
-
argument: A list of the events that were uploaded.
|
|
360
|
-
max_queue_size: Maximum size of upload queue. Defaults to no max size.
|
|
361
|
-
max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
|
|
362
|
-
methods).
|
|
363
|
-
trigger_log_level: Log level to log upload triggers to.
|
|
364
|
-
thread_name: Thread name of uploader thread.
|
|
365
|
-
create_missing: Create missing sequences if possible (ie, if external id is used)
|
|
366
|
-
"""
|
|
367
|
-
|
|
368
640
|
# Super sets post_upload and threshold
|
|
369
641
|
super().__init__(
|
|
370
642
|
cdf_client,
|
|
@@ -393,7 +665,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
393
665
|
def set_sequence_metadata(
|
|
394
666
|
self,
|
|
395
667
|
metadata: dict[str, str | int | float],
|
|
396
|
-
id: int | None = None,
|
|
668
|
+
id: int | None = None, # noqa: A002
|
|
397
669
|
external_id: str | None = None,
|
|
398
670
|
asset_external_id: str | None = None,
|
|
399
671
|
dataset_external_id: str | None = None,
|
|
@@ -401,8 +673,10 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
401
673
|
description: str | None = None,
|
|
402
674
|
) -> None:
|
|
403
675
|
"""
|
|
404
|
-
Set sequence metadata.
|
|
405
|
-
|
|
676
|
+
Set sequence metadata.
|
|
677
|
+
|
|
678
|
+
Metadata will be cached until the sequence is created. The metadata will be updated if the sequence already
|
|
679
|
+
exists.
|
|
406
680
|
|
|
407
681
|
Args:
|
|
408
682
|
metadata: Sequence metadata
|
|
@@ -427,10 +701,13 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
427
701
|
self.sequence_descriptions[either_id] = description
|
|
428
702
|
|
|
429
703
|
def set_sequence_column_definition(
|
|
430
|
-
self,
|
|
704
|
+
self,
|
|
705
|
+
col_def: list[dict[str, str]],
|
|
706
|
+
id: int | None = None, # noqa: A002
|
|
707
|
+
external_id: str | None = None,
|
|
431
708
|
) -> None:
|
|
432
709
|
"""
|
|
433
|
-
Set sequence column definition
|
|
710
|
+
Set sequence column definition.
|
|
434
711
|
|
|
435
712
|
Args:
|
|
436
713
|
col_def: Sequence column definition
|
|
@@ -450,12 +727,13 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
450
727
|
| SequenceData
|
|
451
728
|
| SequenceRows,
|
|
452
729
|
column_external_ids: list[dict] | None = None,
|
|
453
|
-
id: int | None = None,
|
|
730
|
+
id: int | None = None, # noqa: A002
|
|
454
731
|
external_id: str | None = None,
|
|
455
732
|
) -> None:
|
|
456
733
|
"""
|
|
457
|
-
Add sequence rows to upload queue.
|
|
458
|
-
|
|
734
|
+
Add sequence rows to upload queue.
|
|
735
|
+
|
|
736
|
+
Mirrors implementation of SequenceApi.insert. Inserted rows will be cached until uploaded.
|
|
459
737
|
|
|
460
738
|
Args:
|
|
461
739
|
rows: The rows to be inserted. Can either be a list of tuples, a list of ["rownumber": ..., "values": ...]
|
|
@@ -466,7 +744,6 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
466
744
|
external_id: Sequence external ID
|
|
467
745
|
Us if id is None
|
|
468
746
|
"""
|
|
469
|
-
|
|
470
747
|
if len(rows) == 0:
|
|
471
748
|
pass
|
|
472
749
|
|
|
@@ -509,7 +786,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
509
786
|
|
|
510
787
|
def upload(self) -> None:
|
|
511
788
|
"""
|
|
512
|
-
Trigger an upload of the queue, clears queue afterwards
|
|
789
|
+
Trigger an upload of the queue, clears queue afterwards.
|
|
513
790
|
"""
|
|
514
791
|
|
|
515
792
|
@retry(
|
|
@@ -571,15 +848,14 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
571
848
|
|
|
572
849
|
def _create_or_update(self, either_id: EitherId) -> None:
|
|
573
850
|
"""
|
|
574
|
-
Create or update sequence, based on provided metadata and column definitions
|
|
851
|
+
Create or update sequence, based on provided metadata and column definitions.
|
|
575
852
|
|
|
576
853
|
Args:
|
|
577
854
|
either_id: Id/External Id of sequence to be updated
|
|
578
855
|
"""
|
|
579
|
-
|
|
580
856
|
column_def = self.column_definitions.get(either_id)
|
|
581
857
|
if column_def is None:
|
|
582
|
-
self.logger.error(f"Can't create sequence {
|
|
858
|
+
self.logger.error(f"Can't create sequence {either_id!s}, no column definitions provided")
|
|
583
859
|
|
|
584
860
|
try:
|
|
585
861
|
seq = self.cdf_client.sequences.create(
|
|
@@ -596,7 +872,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
596
872
|
)
|
|
597
873
|
|
|
598
874
|
except CogniteDuplicatedError:
|
|
599
|
-
self.logger.info(f"
|
|
875
|
+
self.logger.info(f"Sequence already exist: {either_id}")
|
|
600
876
|
seq = self.cdf_client.sequences.retrieve( # type: ignore [assignment]
|
|
601
877
|
id=either_id.internal_id,
|
|
602
878
|
external_id=either_id.external_id,
|
|
@@ -608,7 +884,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
608
884
|
|
|
609
885
|
def _resolve_asset_ids(self) -> None:
|
|
610
886
|
"""
|
|
611
|
-
Resolve id of assets if specified, for use in sequence creation
|
|
887
|
+
Resolve id of assets if specified, for use in sequence creation.
|
|
612
888
|
"""
|
|
613
889
|
assets = set(self.sequence_asset_external_ids.values())
|
|
614
890
|
assets.discard(None) # type: ignore # safeguard, remove Nones if any
|
|
@@ -628,7 +904,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
628
904
|
|
|
629
905
|
def _resolve_dataset_ids(self) -> None:
|
|
630
906
|
"""
|
|
631
|
-
Resolve id of datasets if specified, for use in sequence creation
|
|
907
|
+
Resolve id of datasets if specified, for use in sequence creation.
|
|
632
908
|
"""
|
|
633
909
|
datasets = set(self.sequence_dataset_external_ids.values())
|
|
634
910
|
datasets.discard(None) # type: ignore # safeguard, remove Nones if any
|
|
@@ -648,7 +924,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
648
924
|
|
|
649
925
|
def __enter__(self) -> "SequenceUploadQueue":
|
|
650
926
|
"""
|
|
651
|
-
Wraps around start method, for use as context manager
|
|
927
|
+
Wraps around start method, for use as context manager.
|
|
652
928
|
|
|
653
929
|
Returns:
|
|
654
930
|
self
|
|
@@ -660,7 +936,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
660
936
|
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
|
|
661
937
|
) -> None:
|
|
662
938
|
"""
|
|
663
|
-
Wraps around stop method, for use as context manager
|
|
939
|
+
Wraps around stop method, for use as context manager.
|
|
664
940
|
|
|
665
941
|
Args:
|
|
666
942
|
exc_type: Exception type
|
|
@@ -671,7 +947,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
|
|
|
671
947
|
|
|
672
948
|
def __len__(self) -> int:
|
|
673
949
|
"""
|
|
674
|
-
The size of the upload queue
|
|
950
|
+
The size of the upload queue.
|
|
675
951
|
|
|
676
952
|
Returns:
|
|
677
953
|
Number of data points in queue
|