cognite-extractor-utils 7.1.6__tar.gz → 7.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/PKG-INFO +2 -1
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/__init__.py +1 -1
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/configtools/loaders.py +48 -10
- cognite_extractor_utils-7.2.1/cognite/extractorutils/statestore/__init__.py +12 -0
- cognite_extractor_utils-7.2.1/cognite/extractorutils/statestore/_base.py +81 -0
- cognite_extractor_utils-7.2.1/cognite/extractorutils/statestore/hashing.py +258 -0
- cognite_extractor_utils-7.1.6/cognite/extractorutils/statestore.py → cognite_extractor_utils-7.2.1/cognite/extractorutils/statestore/watermark.py +12 -71
- cognite_extractor_utils-7.2.1/cognite/extractorutils/uploader/data_modeling.py +115 -0
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/uploader/time_series.py +2 -2
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/util.py +4 -1
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/pyproject.toml +3 -1
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/LICENSE +0 -0
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/README.md +0 -0
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/_inner_util.py +0 -0
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/base.py +0 -0
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/configtools/__init__.py +0 -0
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/configtools/_util.py +0 -0
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/configtools/elements.py +0 -0
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/exceptions.py +0 -0
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/metrics.py +0 -0
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/py.typed +0 -0
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/threading.py +0 -0
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/uploader/__init__.py +0 -0
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/uploader/_base.py +0 -0
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/uploader/_metrics.py +0 -0
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/uploader/assets.py +0 -0
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/uploader/events.py +0 -0
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/uploader/files.py +0 -0
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/uploader/raw.py +0 -0
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/uploader_extractor.py +0 -0
- {cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/uploader_types.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cognite-extractor-utils
|
|
3
|
-
Version: 7.1
|
|
3
|
+
Version: 7.2.1
|
|
4
4
|
Summary: Utilities for easier development of extractors for CDF
|
|
5
5
|
Home-page: https://github.com/cognitedata/python-extractor-utils
|
|
6
6
|
License: Apache-2.0
|
|
@@ -22,6 +22,7 @@ Requires-Dist: cognite-sdk (>=7.43.3,<8.0.0)
|
|
|
22
22
|
Requires-Dist: dacite (>=1.6.0,<2.0.0)
|
|
23
23
|
Requires-Dist: decorator (>=5.1.1,<6.0.0)
|
|
24
24
|
Requires-Dist: more-itertools (>=10.0.0,<11.0.0)
|
|
25
|
+
Requires-Dist: orjson (>=3.10.3,<4.0.0)
|
|
25
26
|
Requires-Dist: prometheus-client (>0.7.0,<=1.0.0)
|
|
26
27
|
Requires-Dist: psutil (>=5.7.0,<6.0.0)
|
|
27
28
|
Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
|
|
@@ -139,11 +139,10 @@ def _env_constructor(_: yaml.SafeLoader, node: yaml.Node) -> bool:
|
|
|
139
139
|
return bool_values.get(expanded_value.lower(), expanded_value)
|
|
140
140
|
|
|
141
141
|
|
|
142
|
-
def
|
|
142
|
+
def _load_yaml_dict_raw(
|
|
143
143
|
source: Union[TextIO, str],
|
|
144
|
-
case_style: str = "hyphen",
|
|
145
144
|
expand_envvars: bool = True,
|
|
146
|
-
|
|
145
|
+
keyvault_loader: Optional[KeyVaultLoader] = None,
|
|
147
146
|
) -> Dict[str, Any]:
|
|
148
147
|
loader = _EnvLoader if expand_envvars else yaml.SafeLoader
|
|
149
148
|
|
|
@@ -159,11 +158,14 @@ def _load_yaml_dict(
|
|
|
159
158
|
if not isinstance(source, str):
|
|
160
159
|
source.seek(0)
|
|
161
160
|
|
|
162
|
-
|
|
161
|
+
if keyvault_loader:
|
|
162
|
+
_EnvLoader.add_constructor("!keyvault", keyvault_loader)
|
|
163
|
+
else:
|
|
164
|
+
keyvault_config = initial_load.get("azure-keyvault", initial_load.get("key-vault"))
|
|
165
|
+
_EnvLoader.add_constructor("!keyvault", KeyVaultLoader(keyvault_config))
|
|
163
166
|
|
|
164
167
|
_EnvLoader.add_implicit_resolver("!env", re.compile(r"\$\{([^}^{]+)\}"), None)
|
|
165
168
|
_EnvLoader.add_constructor("!env", _env_constructor)
|
|
166
|
-
_EnvLoader.add_constructor("!keyvault", KeyVaultLoader(keyvault_config))
|
|
167
169
|
|
|
168
170
|
try:
|
|
169
171
|
config_dict = yaml.load(source, Loader=loader) # noqa: S506
|
|
@@ -173,12 +175,26 @@ def _load_yaml_dict(
|
|
|
173
175
|
cause = e.problem or e.context
|
|
174
176
|
raise InvalidConfigError(f"Invalid YAML{formatted_location}: {cause or ''}") from e
|
|
175
177
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
+
return config_dict
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _load_yaml_dict(
|
|
182
|
+
source: Union[TextIO, str],
|
|
183
|
+
case_style: str = "hyphen",
|
|
184
|
+
expand_envvars: bool = True,
|
|
185
|
+
dict_manipulator: Callable[[Dict[str, Any]], Dict[str, Any]] = lambda x: x,
|
|
186
|
+
keyvault_loader: Optional[KeyVaultLoader] = None,
|
|
187
|
+
) -> Dict[str, Any]:
|
|
188
|
+
config_dict = _load_yaml_dict_raw(source, expand_envvars, keyvault_loader)
|
|
178
189
|
|
|
179
190
|
config_dict = dict_manipulator(config_dict)
|
|
180
191
|
config_dict = _to_snake_case(config_dict, case_style)
|
|
181
192
|
|
|
193
|
+
if "azure_keyvault" in config_dict:
|
|
194
|
+
config_dict.pop("azure_keyvault")
|
|
195
|
+
if "key_vault" in config_dict:
|
|
196
|
+
config_dict.pop("key_vault")
|
|
197
|
+
|
|
182
198
|
return config_dict
|
|
183
199
|
|
|
184
200
|
|
|
@@ -188,9 +204,14 @@ def _load_yaml(
|
|
|
188
204
|
case_style: str = "hyphen",
|
|
189
205
|
expand_envvars: bool = True,
|
|
190
206
|
dict_manipulator: Callable[[Dict[str, Any]], Dict[str, Any]] = lambda x: x,
|
|
207
|
+
keyvault_loader: Optional[KeyVaultLoader] = None,
|
|
191
208
|
) -> CustomConfigClass:
|
|
192
209
|
config_dict = _load_yaml_dict(
|
|
193
|
-
source,
|
|
210
|
+
source,
|
|
211
|
+
case_style=case_style,
|
|
212
|
+
expand_envvars=expand_envvars,
|
|
213
|
+
dict_manipulator=dict_manipulator,
|
|
214
|
+
keyvault_loader=keyvault_loader,
|
|
194
215
|
)
|
|
195
216
|
|
|
196
217
|
try:
|
|
@@ -239,6 +260,7 @@ def load_yaml(
|
|
|
239
260
|
config_type: Type[CustomConfigClass],
|
|
240
261
|
case_style: str = "hyphen",
|
|
241
262
|
expand_envvars: bool = True,
|
|
263
|
+
keyvault_loader: Optional[KeyVaultLoader] = None,
|
|
242
264
|
) -> CustomConfigClass:
|
|
243
265
|
"""
|
|
244
266
|
Read a YAML file, and create a config object based on its contents.
|
|
@@ -249,6 +271,7 @@ def load_yaml(
|
|
|
249
271
|
case_style: Casing convention of config file. Valid options are 'snake', 'hyphen' or 'camel'. Should be
|
|
250
272
|
'hyphen'.
|
|
251
273
|
expand_envvars: Substitute values with the pattern ${VAR} with the content of the environment variable VAR
|
|
274
|
+
keyvault_loader: Pre-built loader for keyvault tags. Will be loaded from config if not set.
|
|
252
275
|
|
|
253
276
|
Returns:
|
|
254
277
|
An initialized config object.
|
|
@@ -256,13 +279,20 @@ def load_yaml(
|
|
|
256
279
|
Raises:
|
|
257
280
|
InvalidConfigError: If any config field is given as an invalid type, is missing or is unknown
|
|
258
281
|
"""
|
|
259
|
-
return _load_yaml(
|
|
282
|
+
return _load_yaml(
|
|
283
|
+
source=source,
|
|
284
|
+
config_type=config_type,
|
|
285
|
+
case_style=case_style,
|
|
286
|
+
expand_envvars=expand_envvars,
|
|
287
|
+
keyvault_loader=keyvault_loader,
|
|
288
|
+
)
|
|
260
289
|
|
|
261
290
|
|
|
262
291
|
def load_yaml_dict(
|
|
263
292
|
source: Union[TextIO, str],
|
|
264
293
|
case_style: str = "hyphen",
|
|
265
294
|
expand_envvars: bool = True,
|
|
295
|
+
keyvault_loader: Optional[KeyVaultLoader] = None,
|
|
266
296
|
) -> Dict[str, Any]:
|
|
267
297
|
"""
|
|
268
298
|
Read a YAML file and return a dictionary from its contents
|
|
@@ -272,6 +302,7 @@ def load_yaml_dict(
|
|
|
272
302
|
case_style: Casing convention of config file. Valid options are 'snake', 'hyphen' or 'camel'. Should be
|
|
273
303
|
'hyphen'.
|
|
274
304
|
expand_envvars: Substitute values with the pattern ${VAR} with the content of the environment variable VAR
|
|
305
|
+
keyvault_loader: Pre-built loader for keyvault tags. Will be loaded from config if not set.
|
|
275
306
|
|
|
276
307
|
Returns:
|
|
277
308
|
A raw dict with the contents of the config file.
|
|
@@ -279,7 +310,9 @@ def load_yaml_dict(
|
|
|
279
310
|
Raises:
|
|
280
311
|
InvalidConfigError: If any config field is given as an invalid type, is missing or is unknown
|
|
281
312
|
"""
|
|
282
|
-
return _load_yaml_dict(
|
|
313
|
+
return _load_yaml_dict(
|
|
314
|
+
source=source, case_style=case_style, expand_envvars=expand_envvars, keyvault_loader=keyvault_loader
|
|
315
|
+
)
|
|
283
316
|
|
|
284
317
|
|
|
285
318
|
class ConfigResolver(Generic[CustomConfigClass]):
|
|
@@ -374,6 +407,10 @@ class ConfigResolver(Generic[CustomConfigClass]):
|
|
|
374
407
|
and tmp_config.cognite.idp_authentication == self._config.cognite.idp_authentication
|
|
375
408
|
)
|
|
376
409
|
|
|
410
|
+
def _get_keyvault_loader(self) -> KeyVaultLoader:
|
|
411
|
+
temp_config = _load_yaml_dict_raw(self._config_text)
|
|
412
|
+
return KeyVaultLoader(temp_config.get("azure-keyvault", temp_config.get("key-vault")))
|
|
413
|
+
|
|
377
414
|
def _resolve_config(self) -> None:
|
|
378
415
|
self._reload_file()
|
|
379
416
|
|
|
@@ -400,6 +437,7 @@ class ConfigResolver(Generic[CustomConfigClass]):
|
|
|
400
437
|
source=response.config,
|
|
401
438
|
config_type=self.config_type,
|
|
402
439
|
dict_manipulator=lambda d: self._inject_cognite(tmp_config, d),
|
|
440
|
+
keyvault_loader=self._get_keyvault_loader(),
|
|
403
441
|
)
|
|
404
442
|
|
|
405
443
|
else:
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from .hashing import AbstractHashStateStore, LocalHashStateStore, RawHashStateStore
|
|
2
|
+
from .watermark import AbstractStateStore, LocalStateStore, NoStateStore, RawStateStore
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"AbstractStateStore",
|
|
6
|
+
"RawStateStore",
|
|
7
|
+
"LocalStateStore",
|
|
8
|
+
"NoStateStore",
|
|
9
|
+
"AbstractHashStateStore",
|
|
10
|
+
"RawHashStateStore",
|
|
11
|
+
"LocalHashStateStore",
|
|
12
|
+
]
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import threading
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from cognite.extractorutils._inner_util import _resolve_log_level
|
|
7
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
8
|
+
|
|
9
|
+
RETRY_BACKOFF_FACTOR = 1.5
|
|
10
|
+
RETRY_MAX_DELAY = 60
|
|
11
|
+
RETRY_DELAY = 1
|
|
12
|
+
RETRIES = 10
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class _BaseStateStore(ABC):
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
save_interval: Optional[int] = None,
|
|
19
|
+
trigger_log_level: str = "DEBUG",
|
|
20
|
+
thread_name: Optional[str] = None,
|
|
21
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
22
|
+
) -> None:
|
|
23
|
+
self._initialized = False
|
|
24
|
+
|
|
25
|
+
self.logger = logging.getLogger(__name__)
|
|
26
|
+
self.trigger_log_level = _resolve_log_level(trigger_log_level)
|
|
27
|
+
self.save_interval = save_interval
|
|
28
|
+
|
|
29
|
+
self.thread = threading.Thread(target=self._run, daemon=cancellation_token is None, name=thread_name)
|
|
30
|
+
self.lock = threading.RLock()
|
|
31
|
+
self.cancellation_token = cancellation_token.create_child_token() if cancellation_token else CancellationToken()
|
|
32
|
+
|
|
33
|
+
def start(self, initialize: bool = True) -> None:
|
|
34
|
+
"""
|
|
35
|
+
Start saving state periodically if save_interval is set.
|
|
36
|
+
This calls the synchronize method every save_interval seconds.
|
|
37
|
+
"""
|
|
38
|
+
if initialize and not self._initialized:
|
|
39
|
+
self.initialize()
|
|
40
|
+
if self.save_interval is not None:
|
|
41
|
+
self.thread.start()
|
|
42
|
+
|
|
43
|
+
def stop(self, ensure_synchronize: bool = True) -> None:
|
|
44
|
+
"""
|
|
45
|
+
Stop synchronize thread if running, and ensure state is saved if ensure_synchronize is True.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
ensure_synchronize (bool): (Optional). Call synchronize one last time after shutting down thread.
|
|
49
|
+
"""
|
|
50
|
+
self.cancellation_token.cancel()
|
|
51
|
+
if ensure_synchronize:
|
|
52
|
+
self.synchronize()
|
|
53
|
+
|
|
54
|
+
def _run(self) -> None:
|
|
55
|
+
"""
|
|
56
|
+
Internal run method for synchronize thread
|
|
57
|
+
"""
|
|
58
|
+
self.initialize()
|
|
59
|
+
while not self.cancellation_token.wait(timeout=self.save_interval):
|
|
60
|
+
try:
|
|
61
|
+
self.logger.log(self.trigger_log_level, "Triggering scheduled state store synchronization")
|
|
62
|
+
self.synchronize()
|
|
63
|
+
except Exception as e:
|
|
64
|
+
self.logger.error("Unexpected error while synchronizing state store: %s.", str(e))
|
|
65
|
+
|
|
66
|
+
# trigger stop event explicitly to drain the queue
|
|
67
|
+
self.stop(ensure_synchronize=True)
|
|
68
|
+
|
|
69
|
+
@abstractmethod
|
|
70
|
+
def initialize(self, force: bool = False) -> None:
|
|
71
|
+
"""
|
|
72
|
+
Get states from remote store
|
|
73
|
+
"""
|
|
74
|
+
pass
|
|
75
|
+
|
|
76
|
+
@abstractmethod
|
|
77
|
+
def synchronize(self) -> None:
|
|
78
|
+
"""
|
|
79
|
+
Upload states to remote store
|
|
80
|
+
"""
|
|
81
|
+
pass
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import json
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from types import TracebackType
|
|
5
|
+
from typing import Any, Dict, Iterable, Iterator, Optional, Set, Type
|
|
6
|
+
|
|
7
|
+
import orjson
|
|
8
|
+
|
|
9
|
+
from cognite.client import CogniteClient
|
|
10
|
+
from cognite.client.data_classes import Row
|
|
11
|
+
from cognite.client.exceptions import CogniteAPIError
|
|
12
|
+
from cognite.extractorutils._inner_util import _DecimalDecoder, _DecimalEncoder
|
|
13
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
14
|
+
from cognite.extractorutils.util import cognite_exceptions, retry
|
|
15
|
+
|
|
16
|
+
from ._base import RETRIES, RETRY_BACKOFF_FACTOR, RETRY_DELAY, RETRY_MAX_DELAY, _BaseStateStore
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class AbstractHashStateStore(_BaseStateStore, ABC):
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
save_interval: Optional[int] = None,
|
|
23
|
+
trigger_log_level: str = "DEBUG",
|
|
24
|
+
thread_name: Optional[str] = None,
|
|
25
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
26
|
+
) -> None:
|
|
27
|
+
super().__init__(
|
|
28
|
+
save_interval=save_interval,
|
|
29
|
+
trigger_log_level=trigger_log_level,
|
|
30
|
+
thread_name=thread_name,
|
|
31
|
+
cancellation_token=cancellation_token,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
self._local_state: Dict[str, Dict[str, str]] = {}
|
|
35
|
+
self._seen: Set[str] = set()
|
|
36
|
+
|
|
37
|
+
def get_state(self, external_id: str) -> Optional[str]:
|
|
38
|
+
with self.lock:
|
|
39
|
+
return self._local_state.get(external_id, {}).get("digest")
|
|
40
|
+
|
|
41
|
+
def _hash_row(self, data: Dict[str, Any]) -> str:
|
|
42
|
+
return hashlib.sha256(orjson.dumps(data, option=orjson.OPT_SORT_KEYS)).hexdigest()
|
|
43
|
+
|
|
44
|
+
def set_state(self, external_id: str, data: Dict[str, Any]) -> None:
|
|
45
|
+
with self.lock:
|
|
46
|
+
self._local_state[external_id] = {"digest": self._hash_row(data)}
|
|
47
|
+
|
|
48
|
+
def has_changed(self, external_id: str, data: Dict[str, Any]) -> bool:
|
|
49
|
+
with self.lock:
|
|
50
|
+
if external_id not in self._local_state:
|
|
51
|
+
return True
|
|
52
|
+
|
|
53
|
+
return self._hash_row(data) != self._local_state[external_id]["digest"]
|
|
54
|
+
|
|
55
|
+
def __getitem__(self, external_id: str) -> Optional[str]:
|
|
56
|
+
return self.get_state(external_id)
|
|
57
|
+
|
|
58
|
+
def __setitem__(self, key: str, value: Dict[str, Any]) -> None:
|
|
59
|
+
self.set_state(external_id=key, data=value)
|
|
60
|
+
|
|
61
|
+
def __contains__(self, external_id: str) -> bool:
|
|
62
|
+
return external_id in self._local_state
|
|
63
|
+
|
|
64
|
+
def __len__(self) -> int:
|
|
65
|
+
return len(self._local_state)
|
|
66
|
+
|
|
67
|
+
def __iter__(self) -> Iterator[str]:
|
|
68
|
+
with self.lock:
|
|
69
|
+
for key in self._local_state:
|
|
70
|
+
yield key
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class RawHashStateStore(AbstractHashStateStore):
|
|
74
|
+
def __init__(
|
|
75
|
+
self,
|
|
76
|
+
cdf_client: CogniteClient,
|
|
77
|
+
database: str,
|
|
78
|
+
table: str,
|
|
79
|
+
save_interval: Optional[int] = None,
|
|
80
|
+
trigger_log_level: str = "DEBUG",
|
|
81
|
+
thread_name: Optional[str] = None,
|
|
82
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
83
|
+
) -> None:
|
|
84
|
+
super().__init__(
|
|
85
|
+
save_interval=save_interval,
|
|
86
|
+
trigger_log_level=trigger_log_level,
|
|
87
|
+
thread_name=thread_name,
|
|
88
|
+
cancellation_token=cancellation_token,
|
|
89
|
+
)
|
|
90
|
+
self._cdf_client = cdf_client
|
|
91
|
+
self.database = database
|
|
92
|
+
self.table = table
|
|
93
|
+
|
|
94
|
+
def synchronize(self) -> None:
|
|
95
|
+
@retry(
|
|
96
|
+
exceptions=cognite_exceptions(),
|
|
97
|
+
cancellation_token=self.cancellation_token,
|
|
98
|
+
tries=RETRIES,
|
|
99
|
+
delay=RETRY_DELAY,
|
|
100
|
+
max_delay=RETRY_MAX_DELAY,
|
|
101
|
+
backoff=RETRY_BACKOFF_FACTOR,
|
|
102
|
+
)
|
|
103
|
+
def impl() -> None:
|
|
104
|
+
"""
|
|
105
|
+
Upload local state store to CDF
|
|
106
|
+
"""
|
|
107
|
+
with self.lock:
|
|
108
|
+
self._cdf_client.raw.rows.insert(
|
|
109
|
+
db_name=self.database,
|
|
110
|
+
table_name=self.table,
|
|
111
|
+
row=self._local_state,
|
|
112
|
+
ensure_parent=True,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
impl()
|
|
116
|
+
|
|
117
|
+
def initialize(self, force: bool = False) -> None:
|
|
118
|
+
@retry(
|
|
119
|
+
exceptions=cognite_exceptions(),
|
|
120
|
+
cancellation_token=self.cancellation_token,
|
|
121
|
+
tries=RETRIES,
|
|
122
|
+
delay=RETRY_DELAY,
|
|
123
|
+
max_delay=RETRY_MAX_DELAY,
|
|
124
|
+
backoff=RETRY_BACKOFF_FACTOR,
|
|
125
|
+
)
|
|
126
|
+
def impl() -> None:
|
|
127
|
+
"""
|
|
128
|
+
Get all known states.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
force: Enable re-initialization, ie overwrite when called multiple times
|
|
132
|
+
"""
|
|
133
|
+
if self._initialized and not force:
|
|
134
|
+
return
|
|
135
|
+
|
|
136
|
+
rows: Iterable[Row]
|
|
137
|
+
try:
|
|
138
|
+
rows = self._cdf_client.raw.rows.list(db_name=self.database, table_name=self.table, limit=None)
|
|
139
|
+
except CogniteAPIError as e:
|
|
140
|
+
if e.code == 404:
|
|
141
|
+
rows = []
|
|
142
|
+
else:
|
|
143
|
+
raise e
|
|
144
|
+
|
|
145
|
+
with self.lock:
|
|
146
|
+
self._local_state.clear()
|
|
147
|
+
for row in rows:
|
|
148
|
+
if row.key is None or row.columns is None:
|
|
149
|
+
self.logger.warning(f"None encountered in row: {str(row)}")
|
|
150
|
+
# should never happen, but type from sdk is optional
|
|
151
|
+
continue
|
|
152
|
+
state = row.columns.get("digest")
|
|
153
|
+
if state:
|
|
154
|
+
self._local_state[row.key] = {"digest": state}
|
|
155
|
+
|
|
156
|
+
self._initialized = True
|
|
157
|
+
|
|
158
|
+
impl()
|
|
159
|
+
|
|
160
|
+
def __enter__(self) -> "RawHashStateStore":
|
|
161
|
+
"""
|
|
162
|
+
Wraps around start method, for use as context manager
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
self
|
|
166
|
+
"""
|
|
167
|
+
self.start()
|
|
168
|
+
return self
|
|
169
|
+
|
|
170
|
+
def __exit__(
|
|
171
|
+
self,
|
|
172
|
+
exc_type: Optional[Type[BaseException]],
|
|
173
|
+
exc_val: Optional[BaseException],
|
|
174
|
+
exc_tb: Optional[TracebackType],
|
|
175
|
+
) -> None:
|
|
176
|
+
"""
|
|
177
|
+
Wraps around stop method, for use as context manager
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
exc_type: Exception type
|
|
181
|
+
exc_val: Exception value
|
|
182
|
+
exc_tb: Traceback
|
|
183
|
+
"""
|
|
184
|
+
self.stop()
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class LocalHashStateStore(AbstractHashStateStore):
|
|
188
|
+
def __init__(
|
|
189
|
+
self,
|
|
190
|
+
file_path: str,
|
|
191
|
+
save_interval: Optional[int] = None,
|
|
192
|
+
trigger_log_level: str = "DEBUG",
|
|
193
|
+
thread_name: Optional[str] = None,
|
|
194
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
195
|
+
) -> None:
|
|
196
|
+
super().__init__(
|
|
197
|
+
save_interval=save_interval,
|
|
198
|
+
trigger_log_level=trigger_log_level,
|
|
199
|
+
thread_name=thread_name,
|
|
200
|
+
cancellation_token=cancellation_token,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
self._file_path = file_path
|
|
204
|
+
|
|
205
|
+
def initialize(self, force: bool = False) -> None:
|
|
206
|
+
"""
|
|
207
|
+
Load states from specified JSON file
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
force: Enable re-initialization, ie overwrite when called multiple times
|
|
211
|
+
"""
|
|
212
|
+
if self._initialized and not force:
|
|
213
|
+
return
|
|
214
|
+
|
|
215
|
+
with self.lock:
|
|
216
|
+
try:
|
|
217
|
+
with open(self._file_path, "r") as f:
|
|
218
|
+
self._local_state = json.load(f, cls=_DecimalDecoder)
|
|
219
|
+
except FileNotFoundError:
|
|
220
|
+
pass
|
|
221
|
+
except json.decoder.JSONDecodeError as e:
|
|
222
|
+
raise ValueError(f"Invalid JSON in state store file: {str(e)}") from e
|
|
223
|
+
|
|
224
|
+
self._initialized = True
|
|
225
|
+
|
|
226
|
+
def synchronize(self) -> None:
|
|
227
|
+
"""
|
|
228
|
+
Save states to specified JSON file
|
|
229
|
+
"""
|
|
230
|
+
with self.lock:
|
|
231
|
+
with open(self._file_path, "w") as f:
|
|
232
|
+
json.dump(self._local_state, f, cls=_DecimalEncoder)
|
|
233
|
+
|
|
234
|
+
def __enter__(self) -> "LocalHashStateStore":
|
|
235
|
+
"""
|
|
236
|
+
Wraps around start method, for use as context manager
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
self
|
|
240
|
+
"""
|
|
241
|
+
self.start()
|
|
242
|
+
return self
|
|
243
|
+
|
|
244
|
+
def __exit__(
|
|
245
|
+
self,
|
|
246
|
+
exc_type: Optional[Type[BaseException]],
|
|
247
|
+
exc_val: Optional[BaseException],
|
|
248
|
+
exc_tb: Optional[TracebackType],
|
|
249
|
+
) -> None:
|
|
250
|
+
"""
|
|
251
|
+
Wraps around stop method, for use as context manager
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
exc_type: Exception type
|
|
255
|
+
exc_val: Exception value
|
|
256
|
+
exc_tb: Traceback
|
|
257
|
+
"""
|
|
258
|
+
self.stop()
|
|
@@ -86,27 +86,21 @@ You can set a state store to automatically update on upload triggers from an upl
|
|
|
86
86
|
"""
|
|
87
87
|
|
|
88
88
|
import json
|
|
89
|
-
import
|
|
90
|
-
import threading
|
|
91
|
-
from abc import ABC, abstractmethod
|
|
89
|
+
from abc import ABC
|
|
92
90
|
from types import TracebackType
|
|
93
91
|
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Type, Union
|
|
94
92
|
|
|
95
93
|
from cognite.client import CogniteClient
|
|
96
94
|
from cognite.client.exceptions import CogniteAPIError
|
|
95
|
+
from cognite.extractorutils._inner_util import _DecimalDecoder, _DecimalEncoder
|
|
97
96
|
from cognite.extractorutils.threading import CancellationToken
|
|
98
97
|
from cognite.extractorutils.uploader import DataPointList
|
|
98
|
+
from cognite.extractorutils.util import cognite_exceptions, retry
|
|
99
99
|
|
|
100
|
-
from .
|
|
101
|
-
from .util import cognite_exceptions, retry
|
|
100
|
+
from ._base import RETRIES, RETRY_BACKOFF_FACTOR, RETRY_DELAY, RETRY_MAX_DELAY, _BaseStateStore
|
|
102
101
|
|
|
103
|
-
RETRY_BACKOFF_FACTOR = 1.5
|
|
104
|
-
RETRY_MAX_DELAY = 60
|
|
105
|
-
RETRY_DELAY = 1
|
|
106
|
-
RETRIES = 10
|
|
107
102
|
|
|
108
|
-
|
|
109
|
-
class AbstractStateStore(ABC):
|
|
103
|
+
class AbstractStateStore(_BaseStateStore, ABC):
|
|
110
104
|
"""
|
|
111
105
|
Base class for a state store.
|
|
112
106
|
|
|
@@ -125,69 +119,16 @@ class AbstractStateStore(ABC):
|
|
|
125
119
|
thread_name: Optional[str] = None,
|
|
126
120
|
cancellation_token: Optional[CancellationToken] = None,
|
|
127
121
|
):
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
self.thread = threading.Thread(target=self._run, daemon=cancellation_token is None, name=thread_name)
|
|
136
|
-
self.lock = threading.RLock()
|
|
137
|
-
self.cancellation_token = cancellation_token.create_child_token() if cancellation_token else CancellationToken()
|
|
122
|
+
super().__init__(
|
|
123
|
+
save_interval=save_interval,
|
|
124
|
+
trigger_log_level=trigger_log_level,
|
|
125
|
+
thread_name=thread_name,
|
|
126
|
+
cancellation_token=cancellation_token,
|
|
127
|
+
)
|
|
138
128
|
|
|
129
|
+
self._local_state: Dict[str, Dict[str, Any]] = {}
|
|
139
130
|
self._deleted: List[str] = []
|
|
140
131
|
|
|
141
|
-
def start(self, initialize: bool = True) -> None:
|
|
142
|
-
"""
|
|
143
|
-
Start saving state periodically if save_interval is set.
|
|
144
|
-
This calls the synchronize method every save_interval seconds.
|
|
145
|
-
"""
|
|
146
|
-
if initialize and not self._initialized:
|
|
147
|
-
self.initialize()
|
|
148
|
-
if self.save_interval is not None:
|
|
149
|
-
self.thread.start()
|
|
150
|
-
|
|
151
|
-
def stop(self, ensure_synchronize: bool = True) -> None:
|
|
152
|
-
"""
|
|
153
|
-
Stop synchronize thread if running, and ensure state is saved if ensure_synchronize is True.
|
|
154
|
-
|
|
155
|
-
Args:
|
|
156
|
-
ensure_synchronize (bool): (Optional). Call synchronize one last time after shutting down thread.
|
|
157
|
-
"""
|
|
158
|
-
self.cancellation_token.cancel()
|
|
159
|
-
if ensure_synchronize:
|
|
160
|
-
self.synchronize()
|
|
161
|
-
|
|
162
|
-
def _run(self) -> None:
|
|
163
|
-
"""
|
|
164
|
-
Internal run method for synchronize thread
|
|
165
|
-
"""
|
|
166
|
-
self.initialize()
|
|
167
|
-
while not self.cancellation_token.wait(timeout=self.save_interval):
|
|
168
|
-
try:
|
|
169
|
-
self.logger.log(self.trigger_log_level, "Triggering scheduled state store synchronization")
|
|
170
|
-
self.synchronize()
|
|
171
|
-
except Exception as e:
|
|
172
|
-
self.logger.error("Unexpected error while synchronizing state store: %s.", str(e))
|
|
173
|
-
|
|
174
|
-
# trigger stop event explicitly to drain the queue
|
|
175
|
-
self.stop(ensure_synchronize=True)
|
|
176
|
-
|
|
177
|
-
@abstractmethod
|
|
178
|
-
def initialize(self, force: bool = False) -> None:
|
|
179
|
-
"""
|
|
180
|
-
Get states from remote store
|
|
181
|
-
"""
|
|
182
|
-
pass
|
|
183
|
-
|
|
184
|
-
@abstractmethod
|
|
185
|
-
def synchronize(self) -> None:
|
|
186
|
-
"""
|
|
187
|
-
Upload states to remote store
|
|
188
|
-
"""
|
|
189
|
-
pass
|
|
190
|
-
|
|
191
132
|
def get_state(self, external_id: Union[str, List[str]]) -> Union[Tuple[Any, Any], List[Tuple[Any, Any]]]:
|
|
192
133
|
"""
|
|
193
134
|
Get state(s) for external ID(s)
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
from types import TracebackType
|
|
2
|
+
from typing import Any, Callable, List, Optional, Type
|
|
3
|
+
|
|
4
|
+
from cognite.client import CogniteClient
|
|
5
|
+
from cognite.client.data_classes.data_modeling import EdgeApply, NodeApply
|
|
6
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
7
|
+
from cognite.extractorutils.uploader._base import (
|
|
8
|
+
RETRIES,
|
|
9
|
+
RETRY_BACKOFF_FACTOR,
|
|
10
|
+
RETRY_DELAY,
|
|
11
|
+
RETRY_MAX_DELAY,
|
|
12
|
+
AbstractUploadQueue,
|
|
13
|
+
)
|
|
14
|
+
from cognite.extractorutils.util import cognite_exceptions, retry
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class InstanceUploadQueue(AbstractUploadQueue):
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
cdf_client: CogniteClient,
|
|
21
|
+
post_upload_function: Optional[Callable[[List[Any]], None]] = None,
|
|
22
|
+
max_queue_size: Optional[int] = None,
|
|
23
|
+
max_upload_interval: Optional[int] = None,
|
|
24
|
+
trigger_log_level: str = "DEBUG",
|
|
25
|
+
thread_name: Optional[str] = None,
|
|
26
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
27
|
+
auto_create_start_nodes: bool = True,
|
|
28
|
+
auto_create_end_nodes: bool = True,
|
|
29
|
+
auto_create_direct_relations: bool = True,
|
|
30
|
+
):
|
|
31
|
+
super().__init__(
|
|
32
|
+
cdf_client,
|
|
33
|
+
post_upload_function,
|
|
34
|
+
max_queue_size,
|
|
35
|
+
max_upload_interval,
|
|
36
|
+
trigger_log_level,
|
|
37
|
+
thread_name,
|
|
38
|
+
cancellation_token,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
self.auto_create_start_nodes = auto_create_start_nodes
|
|
42
|
+
self.auto_create_end_nodes = auto_create_end_nodes
|
|
43
|
+
self.auto_create_direct_relations = auto_create_direct_relations
|
|
44
|
+
|
|
45
|
+
self.node_queue: List[NodeApply] = []
|
|
46
|
+
self.edge_queue: List[EdgeApply] = []
|
|
47
|
+
|
|
48
|
+
def add_to_upload_queue(
|
|
49
|
+
self,
|
|
50
|
+
*,
|
|
51
|
+
node_data: Optional[List[NodeApply]] = None,
|
|
52
|
+
edge_data: Optional[List[EdgeApply]] = None,
|
|
53
|
+
) -> None:
|
|
54
|
+
if node_data:
|
|
55
|
+
with self.lock:
|
|
56
|
+
self.node_queue.extend(node_data)
|
|
57
|
+
self.upload_queue_size += len(node_data)
|
|
58
|
+
|
|
59
|
+
if edge_data:
|
|
60
|
+
with self.lock:
|
|
61
|
+
self.edge_queue.extend(edge_data)
|
|
62
|
+
self.upload_queue_size += len(edge_data)
|
|
63
|
+
|
|
64
|
+
with self.lock:
|
|
65
|
+
self._check_triggers()
|
|
66
|
+
|
|
67
|
+
def upload(self) -> None:
|
|
68
|
+
@retry(
|
|
69
|
+
exceptions=cognite_exceptions(),
|
|
70
|
+
cancellation_token=self.cancellation_token,
|
|
71
|
+
tries=RETRIES,
|
|
72
|
+
delay=RETRY_DELAY,
|
|
73
|
+
max_delay=RETRY_MAX_DELAY,
|
|
74
|
+
backoff=RETRY_BACKOFF_FACTOR,
|
|
75
|
+
)
|
|
76
|
+
def upload_batch() -> None:
|
|
77
|
+
self.cdf_client.data_modeling.instances.apply(
|
|
78
|
+
nodes=self.node_queue,
|
|
79
|
+
edges=self.edge_queue,
|
|
80
|
+
auto_create_start_nodes=self.auto_create_start_nodes,
|
|
81
|
+
auto_create_end_nodes=self.auto_create_end_nodes,
|
|
82
|
+
auto_create_direct_relations=self.auto_create_direct_relations,
|
|
83
|
+
)
|
|
84
|
+
self.node_queue.clear()
|
|
85
|
+
self.edge_queue.clear()
|
|
86
|
+
self.upload_queue_size = 0
|
|
87
|
+
|
|
88
|
+
with self.lock:
|
|
89
|
+
upload_batch()
|
|
90
|
+
|
|
91
|
+
def __enter__(self) -> "InstanceUploadQueue":
|
|
92
|
+
"""
|
|
93
|
+
Wraps around start method, for use as context manager
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
self
|
|
97
|
+
"""
|
|
98
|
+
self.start()
|
|
99
|
+
return self
|
|
100
|
+
|
|
101
|
+
def __exit__(
|
|
102
|
+
self,
|
|
103
|
+
exc_type: Optional[Type[BaseException]],
|
|
104
|
+
exc_val: Optional[BaseException],
|
|
105
|
+
exc_tb: Optional[TracebackType],
|
|
106
|
+
) -> None:
|
|
107
|
+
"""
|
|
108
|
+
Wraps around stop method, for use as context manager
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
exc_type: Exception type
|
|
112
|
+
exc_val: Exception value
|
|
113
|
+
exc_tb: Traceback
|
|
114
|
+
"""
|
|
115
|
+
self.stop()
|
|
@@ -264,9 +264,9 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
|
|
|
264
264
|
missing = [id_dict for id_dict in ex.not_found if id_dict.get("externalId") not in retry_these]
|
|
265
265
|
missing_num = len(ex.not_found) - len(create_these_ids)
|
|
266
266
|
self.logger.error(
|
|
267
|
-
f"{missing_num} time series not found, and could not be created automatically
|
|
267
|
+
f"{missing_num} time series not found, and could not be created automatically: "
|
|
268
268
|
+ str(missing)
|
|
269
|
-
+ "
|
|
269
|
+
+ " Data will be dropped"
|
|
270
270
|
)
|
|
271
271
|
|
|
272
272
|
# Remove entries with non-existing time series from upload queue
|
{cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/util.py
RENAMED
|
@@ -319,11 +319,14 @@ def _retry_internal(
|
|
|
319
319
|
) -> _T2:
|
|
320
320
|
logger = logging.getLogger(__name__)
|
|
321
321
|
|
|
322
|
-
while tries
|
|
322
|
+
while tries:
|
|
323
323
|
try:
|
|
324
324
|
return f()
|
|
325
325
|
|
|
326
326
|
except Exception as e:
|
|
327
|
+
if cancellation_token.is_cancelled:
|
|
328
|
+
break
|
|
329
|
+
|
|
327
330
|
if isinstance(exceptions, tuple):
|
|
328
331
|
for ex_type in exceptions:
|
|
329
332
|
if isinstance(e, ex_type):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "cognite-extractor-utils"
|
|
3
|
-
version = "7.1
|
|
3
|
+
version = "7.2.1"
|
|
4
4
|
description = "Utilities for easier development of extractors for CDF"
|
|
5
5
|
authors = ["Mathias Lohne <mathias.lohne@cognite.com>"]
|
|
6
6
|
license = "Apache-2.0"
|
|
@@ -65,6 +65,7 @@ typing-extensions = ">=3.7.4, <5"
|
|
|
65
65
|
python-dotenv = "^1.0.0"
|
|
66
66
|
azure-identity = "^1.14.0"
|
|
67
67
|
azure-keyvault-secrets = "^4.7.0"
|
|
68
|
+
orjson = "^3.10.3"
|
|
68
69
|
|
|
69
70
|
[tool.poetry.extras]
|
|
70
71
|
experimental = ["cognite-sdk-experimental"]
|
|
@@ -84,6 +85,7 @@ parameterized = "*"
|
|
|
84
85
|
requests = "^2.31.0"
|
|
85
86
|
types-requests = "^2.31.0.20240125"
|
|
86
87
|
httpx = "^0.27.0"
|
|
88
|
+
faker = "^25.2.0"
|
|
87
89
|
|
|
88
90
|
[build-system]
|
|
89
91
|
requires = ["poetry-core>=1.0.0"]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/base.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/exceptions.py
RENAMED
|
File without changes
|
{cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/metrics.py
RENAMED
|
File without changes
|
{cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/py.typed
RENAMED
|
File without changes
|
{cognite_extractor_utils-7.1.6 → cognite_extractor_utils-7.2.1}/cognite/extractorutils/threading.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|