cognite-extractor-utils 5.1.0__py3-none-any.whl → 5.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- cognite/extractorutils/__init__.py +1 -1
- cognite/extractorutils/_inner_util.py +4 -3
- cognite/extractorutils/base.py +22 -24
- cognite/extractorutils/configtools/__init__.py +3 -3
- cognite/extractorutils/configtools/_util.py +18 -12
- cognite/extractorutils/configtools/elements.py +19 -17
- cognite/extractorutils/configtools/loaders.py +21 -13
- cognite/extractorutils/metrics.py +18 -10
- cognite/extractorutils/middleware.py +9 -4
- cognite/extractorutils/statestore.py +19 -17
- cognite/extractorutils/uploader/_base.py +2 -9
- cognite/extractorutils/uploader/events.py +25 -7
- cognite/extractorutils/uploader/files.py +17 -12
- cognite/extractorutils/uploader/raw.py +10 -7
- cognite/extractorutils/uploader/time_series.py +87 -63
- cognite/extractorutils/uploader_extractor.py +8 -9
- cognite/extractorutils/util.py +39 -22
- {cognite_extractor_utils-5.1.0.dist-info → cognite_extractor_utils-5.2.0.dist-info}/METADATA +1 -2
- cognite_extractor_utils-5.2.0.dist-info/RECORD +26 -0
- cognite_extractor_utils-5.1.0.dist-info/RECORD +0 -26
- {cognite_extractor_utils-5.1.0.dist-info → cognite_extractor_utils-5.2.0.dist-info}/LICENSE +0 -0
- {cognite_extractor_utils-5.1.0.dist-info → cognite_extractor_utils-5.2.0.dist-info}/WHEEL +0 -0
|
@@ -89,14 +89,13 @@ import json
|
|
|
89
89
|
import logging
|
|
90
90
|
import threading
|
|
91
91
|
from abc import ABC, abstractmethod
|
|
92
|
-
from
|
|
93
|
-
from
|
|
94
|
-
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
92
|
+
from types import TracebackType
|
|
93
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
|
|
95
94
|
|
|
96
|
-
from cognite.client import CogniteClient
|
|
97
|
-
from cognite.client.exceptions import CogniteAPIError
|
|
98
95
|
from requests.exceptions import ConnectionError
|
|
99
96
|
|
|
97
|
+
from cognite.client import CogniteClient
|
|
98
|
+
from cognite.client.exceptions import CogniteAPIError
|
|
100
99
|
from cognite.extractorutils.uploader import DataPointList
|
|
101
100
|
|
|
102
101
|
from ._inner_util import _DecimalDecoder, _DecimalEncoder, _resolve_log_level
|
|
@@ -140,8 +139,6 @@ class AbstractStateStore(ABC):
|
|
|
140
139
|
|
|
141
140
|
self._deleted: List[str] = []
|
|
142
141
|
|
|
143
|
-
self.lock = Lock()
|
|
144
|
-
|
|
145
142
|
def start(self) -> None:
|
|
146
143
|
"""
|
|
147
144
|
Start saving state periodically if save_interval is set.
|
|
@@ -203,12 +200,12 @@ class AbstractStateStore(ABC):
|
|
|
203
200
|
"""
|
|
204
201
|
with self.lock:
|
|
205
202
|
if isinstance(external_id, list):
|
|
206
|
-
|
|
203
|
+
states = []
|
|
207
204
|
for e in external_id:
|
|
208
205
|
state = self._local_state.get(e, {})
|
|
209
|
-
|
|
206
|
+
states.append((state.get("low"), state.get("high")))
|
|
210
207
|
|
|
211
|
-
return
|
|
208
|
+
return states
|
|
212
209
|
|
|
213
210
|
else:
|
|
214
211
|
state = self._local_state.get(external_id, {})
|
|
@@ -263,14 +260,14 @@ class AbstractStateStore(ABC):
|
|
|
263
260
|
A function that expands the current states with the values given
|
|
264
261
|
"""
|
|
265
262
|
|
|
266
|
-
def callback(uploaded_points: List[Dict[str, Union[str, DataPointList]]]):
|
|
263
|
+
def callback(uploaded_points: List[Dict[str, Union[str, DataPointList]]]) -> None:
|
|
267
264
|
for time_series in uploaded_points:
|
|
268
265
|
# Use CDF timestamps
|
|
269
266
|
data_points = time_series["datapoints"]
|
|
270
267
|
if data_points:
|
|
271
268
|
high = max(data_points)[0]
|
|
272
269
|
low = min(data_points)[0]
|
|
273
|
-
external_id = time_series["externalId"]
|
|
270
|
+
external_id: str = time_series["externalId"] # type: ignore # known to be str from where we set it
|
|
274
271
|
self.expand_state(external_id, low, high)
|
|
275
272
|
|
|
276
273
|
return callback
|
|
@@ -301,7 +298,7 @@ class AbstractStateStore(ABC):
|
|
|
301
298
|
return False
|
|
302
299
|
|
|
303
300
|
def __getitem__(self, external_id: str) -> Tuple[Any, Any]:
|
|
304
|
-
return self.get_state(external_id)
|
|
301
|
+
return self.get_state(external_id) # type: ignore # will not be list if input is single str
|
|
305
302
|
|
|
306
303
|
def __setitem__(self, key: str, value: Tuple[Any, Any]) -> None:
|
|
307
304
|
self.set_state(external_id=key, low=value[0], high=value[1])
|
|
@@ -382,7 +379,8 @@ class RawStateStore(AbstractStateStore):
|
|
|
382
379
|
if self._initialized and not force:
|
|
383
380
|
return
|
|
384
381
|
|
|
385
|
-
|
|
382
|
+
# ignore type since list _is_ optional, sdk types are wrong
|
|
383
|
+
rows = self._cdf_client.raw.rows.list(db_name=self.database, table_name=self.table, limit=None) # type: ignore
|
|
386
384
|
|
|
387
385
|
with self.lock:
|
|
388
386
|
self._local_state.clear()
|
|
@@ -422,7 +420,9 @@ class RawStateStore(AbstractStateStore):
|
|
|
422
420
|
self.start()
|
|
423
421
|
return self
|
|
424
422
|
|
|
425
|
-
def __exit__(
|
|
423
|
+
def __exit__(
|
|
424
|
+
self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
|
|
425
|
+
) -> None:
|
|
426
426
|
"""
|
|
427
427
|
Wraps around stop method, for use as context manager
|
|
428
428
|
|
|
@@ -500,7 +500,9 @@ class LocalStateStore(AbstractStateStore):
|
|
|
500
500
|
self.start()
|
|
501
501
|
return self
|
|
502
502
|
|
|
503
|
-
def __exit__(
|
|
503
|
+
def __exit__(
|
|
504
|
+
self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
|
|
505
|
+
) -> None:
|
|
504
506
|
"""
|
|
505
507
|
Wraps around stop method, for use as context manager
|
|
506
508
|
|
|
@@ -517,7 +519,7 @@ class NoStateStore(AbstractStateStore):
|
|
|
517
519
|
A state store that only keeps states in memory and never stores or initializes from external sources.
|
|
518
520
|
"""
|
|
519
521
|
|
|
520
|
-
def __init__(self):
|
|
522
|
+
def __init__(self) -> None:
|
|
521
523
|
super().__init__()
|
|
522
524
|
|
|
523
525
|
def initialize(self, force: bool = False) -> None:
|
|
@@ -19,8 +19,8 @@ from dataclasses import dataclass
|
|
|
19
19
|
from typing import Any, Callable, List, Optional
|
|
20
20
|
|
|
21
21
|
from arrow import Arrow
|
|
22
|
-
from cognite.client import CogniteClient
|
|
23
22
|
|
|
23
|
+
from cognite.client import CogniteClient
|
|
24
24
|
from cognite.extractorutils._inner_util import _resolve_log_level
|
|
25
25
|
|
|
26
26
|
|
|
@@ -88,16 +88,9 @@ class AbstractUploadQueue(ABC):
|
|
|
88
88
|
if self.post_upload_function is not None:
|
|
89
89
|
try:
|
|
90
90
|
self.post_upload_function(uploaded)
|
|
91
|
-
except Exception
|
|
91
|
+
except Exception:
|
|
92
92
|
logging.getLogger(__name__).exception("Error during upload callback")
|
|
93
93
|
|
|
94
|
-
@abstractmethod
|
|
95
|
-
def add_to_upload_queue(self, *args) -> None:
|
|
96
|
-
"""
|
|
97
|
-
Adds an element to the upload queue. The queue will be uploaded if the queue byte size is larger than the
|
|
98
|
-
threshold specified in the config.
|
|
99
|
-
"""
|
|
100
|
-
|
|
101
94
|
@abstractmethod
|
|
102
95
|
def upload(self) -> None:
|
|
103
96
|
"""
|
|
@@ -13,14 +13,15 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import threading
|
|
16
|
-
from
|
|
16
|
+
from types import TracebackType
|
|
17
|
+
from typing import Callable, List, Optional, Type
|
|
17
18
|
|
|
18
19
|
import arrow
|
|
19
|
-
from cognite.client import CogniteClient
|
|
20
|
-
from cognite.client.data_classes import Event
|
|
21
|
-
from cognite.client.exceptions import CogniteAPIError
|
|
22
20
|
from requests import ConnectionError
|
|
23
21
|
|
|
22
|
+
from cognite.client import CogniteClient
|
|
23
|
+
from cognite.client.data_classes import Event
|
|
24
|
+
from cognite.client.exceptions import CogniteAPIError, CogniteDuplicatedError
|
|
24
25
|
from cognite.extractorutils.uploader._base import (
|
|
25
26
|
RETRIES,
|
|
26
27
|
RETRY_BACKOFF_FACTOR,
|
|
@@ -131,8 +132,23 @@ class EventUploadQueue(AbstractUploadQueue):
|
|
|
131
132
|
max_delay=RETRY_MAX_DELAY,
|
|
132
133
|
backoff=RETRY_BACKOFF_FACTOR,
|
|
133
134
|
)
|
|
134
|
-
def _upload_batch(self):
|
|
135
|
-
|
|
135
|
+
def _upload_batch(self) -> None:
|
|
136
|
+
try:
|
|
137
|
+
self.cdf_client.events.create([e for e in self.upload_queue])
|
|
138
|
+
except CogniteDuplicatedError as e:
|
|
139
|
+
duplicated_ids = set([dup["externalId"] for dup in e.duplicated if "externalId" in dup])
|
|
140
|
+
failed: List[Event] = [e for e in e.failed]
|
|
141
|
+
to_create = []
|
|
142
|
+
to_update = []
|
|
143
|
+
for evt in failed:
|
|
144
|
+
if evt.external_id is not None and evt.external_id in duplicated_ids:
|
|
145
|
+
to_update.append(evt)
|
|
146
|
+
else:
|
|
147
|
+
to_create.append(evt)
|
|
148
|
+
if to_create:
|
|
149
|
+
self.cdf_client.events.create(to_create)
|
|
150
|
+
if to_update:
|
|
151
|
+
self.cdf_client.events.update(to_update)
|
|
136
152
|
|
|
137
153
|
def __enter__(self) -> "EventUploadQueue":
|
|
138
154
|
"""
|
|
@@ -144,7 +160,9 @@ class EventUploadQueue(AbstractUploadQueue):
|
|
|
144
160
|
self.start()
|
|
145
161
|
return self
|
|
146
162
|
|
|
147
|
-
def __exit__(
|
|
163
|
+
def __exit__(
|
|
164
|
+
self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
|
|
165
|
+
) -> None:
|
|
148
166
|
"""
|
|
149
167
|
Wraps around stop method, for use as context manager
|
|
150
168
|
|
|
@@ -15,14 +15,15 @@
|
|
|
15
15
|
import threading
|
|
16
16
|
from concurrent.futures import ThreadPoolExecutor
|
|
17
17
|
from os import PathLike
|
|
18
|
-
from
|
|
18
|
+
from types import TracebackType
|
|
19
|
+
from typing import Any, Callable, List, Optional, Tuple, Type, Union
|
|
19
20
|
|
|
20
21
|
import arrow
|
|
22
|
+
from requests import ConnectionError
|
|
23
|
+
|
|
21
24
|
from cognite.client import CogniteClient
|
|
22
25
|
from cognite.client.data_classes import Event, FileMetadata
|
|
23
26
|
from cognite.client.exceptions import CogniteAPIError
|
|
24
|
-
from requests import ConnectionError
|
|
25
|
-
|
|
26
27
|
from cognite.extractorutils.uploader._base import (
|
|
27
28
|
RETRIES,
|
|
28
29
|
RETRY_BACKOFF_FACTOR,
|
|
@@ -89,7 +90,7 @@ class FileUploadQueue(AbstractUploadQueue):
|
|
|
89
90
|
self.latency = FILES_UPLOADER_LATENCY
|
|
90
91
|
self.latency_zero_point = arrow.utcnow()
|
|
91
92
|
|
|
92
|
-
def add_to_upload_queue(self, file_meta: FileMetadata, file_name: Union[str, PathLike]
|
|
93
|
+
def add_to_upload_queue(self, file_meta: FileMetadata, file_name: Union[str, PathLike]) -> None:
|
|
93
94
|
"""
|
|
94
95
|
Add file to upload queue. The queue will be uploaded if the queue size is larger than the threshold
|
|
95
96
|
specified in the __init__.
|
|
@@ -141,14 +142,14 @@ class FileUploadQueue(AbstractUploadQueue):
|
|
|
141
142
|
max_delay=RETRY_MAX_DELAY,
|
|
142
143
|
backoff=RETRY_BACKOFF_FACTOR,
|
|
143
144
|
)
|
|
144
|
-
def _upload_single(self, index, file_name, file_meta):
|
|
145
|
+
def _upload_single(self, index: int, file_name: Union[str, PathLike], file_meta: FileMetadata) -> None:
|
|
145
146
|
# Upload file
|
|
146
|
-
file_meta = self.cdf_client.files.upload(file_name, overwrite=self.overwrite_existing, **file_meta.dump())
|
|
147
|
+
file_meta = self.cdf_client.files.upload(str(file_name), overwrite=self.overwrite_existing, **file_meta.dump()) # type: ignore
|
|
147
148
|
|
|
148
149
|
# Update meta-object in queue
|
|
149
150
|
self.upload_queue[index] = (file_meta, file_name)
|
|
150
151
|
|
|
151
|
-
def _upload_batch(self):
|
|
152
|
+
def _upload_batch(self) -> None:
|
|
152
153
|
# Concurrently execute file-uploads
|
|
153
154
|
|
|
154
155
|
with ThreadPoolExecutor(self.cdf_client.config.max_workers) as pool:
|
|
@@ -165,7 +166,9 @@ class FileUploadQueue(AbstractUploadQueue):
|
|
|
165
166
|
self.start()
|
|
166
167
|
return self
|
|
167
168
|
|
|
168
|
-
def __exit__(
|
|
169
|
+
def __exit__(
|
|
170
|
+
self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
|
|
171
|
+
) -> None:
|
|
169
172
|
"""
|
|
170
173
|
Wraps around stop method, for use as context manager
|
|
171
174
|
|
|
@@ -212,7 +215,7 @@ class BytesUploadQueue(AbstractUploadQueue):
|
|
|
212
215
|
thread_name: Optional[str] = None,
|
|
213
216
|
overwrite_existing: bool = False,
|
|
214
217
|
cancellation_token: threading.Event = threading.Event(),
|
|
215
|
-
):
|
|
218
|
+
) -> None:
|
|
216
219
|
super().__init__(
|
|
217
220
|
cdf_client,
|
|
218
221
|
post_upload_function,
|
|
@@ -277,7 +280,7 @@ class BytesUploadQueue(AbstractUploadQueue):
|
|
|
277
280
|
self.logger.info(f"Uploaded {self.upload_queue_size} files")
|
|
278
281
|
self.queue_size.set(self.upload_queue_size)
|
|
279
282
|
|
|
280
|
-
def _upload_batch(self):
|
|
283
|
+
def _upload_batch(self) -> None:
|
|
281
284
|
# Concurrently execute bytes-uploads
|
|
282
285
|
with ThreadPoolExecutor(self.cdf_client.config.max_workers) as pool:
|
|
283
286
|
for i, (frame, metadata) in enumerate(self.upload_queue):
|
|
@@ -290,7 +293,7 @@ class BytesUploadQueue(AbstractUploadQueue):
|
|
|
290
293
|
max_delay=RETRY_MAX_DELAY,
|
|
291
294
|
backoff=RETRY_BACKOFF_FACTOR,
|
|
292
295
|
)
|
|
293
|
-
def _upload_single(self, index: int, content: bytes, metadata: FileMetadata):
|
|
296
|
+
def _upload_single(self, index: int, content: bytes, metadata: FileMetadata) -> None:
|
|
294
297
|
# Upload object
|
|
295
298
|
file_meta_data: FileMetadata = self.cdf_client.files.upload_bytes(
|
|
296
299
|
content, overwrite=self.overwrite_existing, **metadata.dump()
|
|
@@ -309,7 +312,9 @@ class BytesUploadQueue(AbstractUploadQueue):
|
|
|
309
312
|
self.start()
|
|
310
313
|
return self
|
|
311
314
|
|
|
312
|
-
def __exit__(
|
|
315
|
+
def __exit__(
|
|
316
|
+
self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
|
|
317
|
+
) -> None:
|
|
313
318
|
"""
|
|
314
319
|
Wraps around stop method, for use as context manager
|
|
315
320
|
|
|
@@ -13,15 +13,16 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import threading
|
|
16
|
-
from
|
|
16
|
+
from types import TracebackType
|
|
17
|
+
from typing import Any, Callable, Dict, List, Optional, Type
|
|
17
18
|
|
|
18
19
|
import arrow
|
|
19
20
|
from arrow import Arrow
|
|
21
|
+
from requests import ConnectionError
|
|
22
|
+
|
|
20
23
|
from cognite.client import CogniteClient
|
|
21
24
|
from cognite.client.data_classes import Row
|
|
22
25
|
from cognite.client.exceptions import CogniteAPIError, CogniteReadTimeout
|
|
23
|
-
from requests import ConnectionError
|
|
24
|
-
|
|
25
26
|
from cognite.extractorutils.uploader._base import (
|
|
26
27
|
RETRIES,
|
|
27
28
|
RETRY_BACKOFF_FACTOR,
|
|
@@ -75,7 +76,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
75
76
|
thread_name,
|
|
76
77
|
cancellation_token,
|
|
77
78
|
)
|
|
78
|
-
self.upload_queue: Dict[str, Dict[str, List[TimestampedObject]]] =
|
|
79
|
+
self.upload_queue: Dict[str, Dict[str, List[TimestampedObject]]] = {}
|
|
79
80
|
|
|
80
81
|
# It is a hack since Prometheus client registers metrics on object creation, so object has to be created once
|
|
81
82
|
self.rows_queued = RAW_UPLOADER_ROWS_QUEUED
|
|
@@ -97,7 +98,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
97
98
|
with self.lock:
|
|
98
99
|
# Ensure that the dicts has correct keys
|
|
99
100
|
if database not in self.upload_queue:
|
|
100
|
-
self.upload_queue[database] =
|
|
101
|
+
self.upload_queue[database] = {}
|
|
101
102
|
if table not in self.upload_queue[database]:
|
|
102
103
|
self.upload_queue[database][table] = []
|
|
103
104
|
|
|
@@ -152,7 +153,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
152
153
|
max_delay=RETRY_MAX_DELAY,
|
|
153
154
|
backoff=RETRY_BACKOFF_FACTOR,
|
|
154
155
|
)
|
|
155
|
-
def _upload_batch(self, database: str, table: str, patch: List[Row]):
|
|
156
|
+
def _upload_batch(self, database: str, table: str, patch: List[Row]) -> None:
|
|
156
157
|
# Upload
|
|
157
158
|
self.cdf_client.raw.rows.insert(db_name=database, table_name=table, row=patch, ensure_parent=True)
|
|
158
159
|
|
|
@@ -166,7 +167,9 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
166
167
|
self.start()
|
|
167
168
|
return self
|
|
168
169
|
|
|
169
|
-
def __exit__(
|
|
170
|
+
def __exit__(
|
|
171
|
+
self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
|
|
172
|
+
) -> None:
|
|
170
173
|
"""
|
|
171
174
|
Wraps around stop method, for use as context manager
|
|
172
175
|
|