cognite-extractor-utils 7.1.5__tar.gz → 7.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

Files changed (31) hide show
  1. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/PKG-INFO +3 -2
  2. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/cognite/extractorutils/__init__.py +1 -1
  3. cognite_extractor_utils-7.2.0/cognite/extractorutils/statestore/__init__.py +12 -0
  4. cognite_extractor_utils-7.2.0/cognite/extractorutils/statestore/_base.py +81 -0
  5. cognite_extractor_utils-7.2.0/cognite/extractorutils/statestore/hashing.py +258 -0
  6. cognite_extractor_utils-7.1.5/cognite/extractorutils/statestore.py → cognite_extractor_utils-7.2.0/cognite/extractorutils/statestore/watermark.py +12 -71
  7. cognite_extractor_utils-7.2.0/cognite/extractorutils/uploader/data_modeling.py +115 -0
  8. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/cognite/extractorutils/util.py +4 -1
  9. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/pyproject.toml +4 -2
  10. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/LICENSE +0 -0
  11. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/README.md +0 -0
  12. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/cognite/extractorutils/_inner_util.py +0 -0
  13. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/cognite/extractorutils/base.py +0 -0
  14. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/cognite/extractorutils/configtools/__init__.py +0 -0
  15. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/cognite/extractorutils/configtools/_util.py +0 -0
  16. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/cognite/extractorutils/configtools/elements.py +0 -0
  17. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/cognite/extractorutils/configtools/loaders.py +0 -0
  18. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/cognite/extractorutils/exceptions.py +0 -0
  19. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/cognite/extractorutils/metrics.py +0 -0
  20. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/cognite/extractorutils/py.typed +0 -0
  21. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/cognite/extractorutils/threading.py +0 -0
  22. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/cognite/extractorutils/uploader/__init__.py +0 -0
  23. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/cognite/extractorutils/uploader/_base.py +0 -0
  24. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/cognite/extractorutils/uploader/_metrics.py +0 -0
  25. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/cognite/extractorutils/uploader/assets.py +0 -0
  26. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/cognite/extractorutils/uploader/events.py +0 -0
  27. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/cognite/extractorutils/uploader/files.py +0 -0
  28. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/cognite/extractorutils/uploader/raw.py +0 -0
  29. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/cognite/extractorutils/uploader/time_series.py +0 -0
  30. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/cognite/extractorutils/uploader_extractor.py +0 -0
  31. {cognite_extractor_utils-7.1.5 → cognite_extractor_utils-7.2.0}/cognite/extractorutils/uploader_types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cognite-extractor-utils
3
- Version: 7.1.5
3
+ Version: 7.2.0
4
4
  Summary: Utilities for easier development of extractors for CDF
5
5
  Home-page: https://github.com/cognitedata/python-extractor-utils
6
6
  License: Apache-2.0
@@ -18,10 +18,11 @@ Provides-Extra: experimental
18
18
  Requires-Dist: arrow (>=1.0.0,<2.0.0)
19
19
  Requires-Dist: azure-identity (>=1.14.0,<2.0.0)
20
20
  Requires-Dist: azure-keyvault-secrets (>=4.7.0,<5.0.0)
21
- Requires-Dist: cognite-sdk (>=7.41.0,<8.0.0)
21
+ Requires-Dist: cognite-sdk (>=7.43.3,<8.0.0)
22
22
  Requires-Dist: dacite (>=1.6.0,<2.0.0)
23
23
  Requires-Dist: decorator (>=5.1.1,<6.0.0)
24
24
  Requires-Dist: more-itertools (>=10.0.0,<11.0.0)
25
+ Requires-Dist: orjson (>=3.10.3,<4.0.0)
25
26
  Requires-Dist: prometheus-client (>0.7.0,<=1.0.0)
26
27
  Requires-Dist: psutil (>=5.7.0,<6.0.0)
27
28
  Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
@@ -16,5 +16,5 @@
16
16
  Cognite extractor utils is a Python package that simplifies the development of new extractors.
17
17
  """
18
18
 
19
- __version__ = "7.1.5"
19
+ __version__ = "7.2.0"
20
20
  from .base import Extractor
@@ -0,0 +1,12 @@
1
+ from .hashing import AbstractHashStateStore, LocalHashStateStore, RawHashStateStore
2
+ from .watermark import AbstractStateStore, LocalStateStore, NoStateStore, RawStateStore
3
+
4
+ __all__ = [
5
+ "AbstractStateStore",
6
+ "RawStateStore",
7
+ "LocalStateStore",
8
+ "NoStateStore",
9
+ "AbstractHashStateStore",
10
+ "RawHashStateStore",
11
+ "LocalHashStateStore",
12
+ ]
@@ -0,0 +1,81 @@
1
+ import logging
2
+ import threading
3
+ from abc import ABC, abstractmethod
4
+ from typing import Optional
5
+
6
+ from cognite.extractorutils._inner_util import _resolve_log_level
7
+ from cognite.extractorutils.threading import CancellationToken
8
+
9
+ RETRY_BACKOFF_FACTOR = 1.5
10
+ RETRY_MAX_DELAY = 60
11
+ RETRY_DELAY = 1
12
+ RETRIES = 10
13
+
14
+
15
+ class _BaseStateStore(ABC):
16
+ def __init__(
17
+ self,
18
+ save_interval: Optional[int] = None,
19
+ trigger_log_level: str = "DEBUG",
20
+ thread_name: Optional[str] = None,
21
+ cancellation_token: Optional[CancellationToken] = None,
22
+ ) -> None:
23
+ self._initialized = False
24
+
25
+ self.logger = logging.getLogger(__name__)
26
+ self.trigger_log_level = _resolve_log_level(trigger_log_level)
27
+ self.save_interval = save_interval
28
+
29
+ self.thread = threading.Thread(target=self._run, daemon=cancellation_token is None, name=thread_name)
30
+ self.lock = threading.RLock()
31
+ self.cancellation_token = cancellation_token.create_child_token() if cancellation_token else CancellationToken()
32
+
33
+ def start(self, initialize: bool = True) -> None:
34
+ """
35
+ Start saving state periodically if save_interval is set.
36
+ This calls the synchronize method every save_interval seconds.
37
+ """
38
+ if initialize and not self._initialized:
39
+ self.initialize()
40
+ if self.save_interval is not None:
41
+ self.thread.start()
42
+
43
+ def stop(self, ensure_synchronize: bool = True) -> None:
44
+ """
45
+ Stop synchronize thread if running, and ensure state is saved if ensure_synchronize is True.
46
+
47
+ Args:
48
+ ensure_synchronize (bool): (Optional). Call synchronize one last time after shutting down thread.
49
+ """
50
+ self.cancellation_token.cancel()
51
+ if ensure_synchronize:
52
+ self.synchronize()
53
+
54
+ def _run(self) -> None:
55
+ """
56
+ Internal run method for synchronize thread
57
+ """
58
+ self.initialize()
59
+ while not self.cancellation_token.wait(timeout=self.save_interval):
60
+ try:
61
+ self.logger.log(self.trigger_log_level, "Triggering scheduled state store synchronization")
62
+ self.synchronize()
63
+ except Exception as e:
64
+ self.logger.error("Unexpected error while synchronizing state store: %s.", str(e))
65
+
66
+ # trigger stop event explicitly to drain the queue
67
+ self.stop(ensure_synchronize=True)
68
+
69
+ @abstractmethod
70
+ def initialize(self, force: bool = False) -> None:
71
+ """
72
+ Get states from remote store
73
+ """
74
+ pass
75
+
76
+ @abstractmethod
77
+ def synchronize(self) -> None:
78
+ """
79
+ Upload states to remote store
80
+ """
81
+ pass
@@ -0,0 +1,258 @@
1
+ import hashlib
2
+ import json
3
+ from abc import ABC
4
+ from types import TracebackType
5
+ from typing import Any, Dict, Iterable, Iterator, Optional, Set, Type
6
+
7
+ import orjson
8
+
9
+ from cognite.client import CogniteClient
10
+ from cognite.client.data_classes import Row
11
+ from cognite.client.exceptions import CogniteAPIError
12
+ from cognite.extractorutils._inner_util import _DecimalDecoder, _DecimalEncoder
13
+ from cognite.extractorutils.threading import CancellationToken
14
+ from cognite.extractorutils.util import cognite_exceptions, retry
15
+
16
+ from ._base import RETRIES, RETRY_BACKOFF_FACTOR, RETRY_DELAY, RETRY_MAX_DELAY, _BaseStateStore
17
+
18
+
19
+ class AbstractHashStateStore(_BaseStateStore, ABC):
20
+ def __init__(
21
+ self,
22
+ save_interval: Optional[int] = None,
23
+ trigger_log_level: str = "DEBUG",
24
+ thread_name: Optional[str] = None,
25
+ cancellation_token: Optional[CancellationToken] = None,
26
+ ) -> None:
27
+ super().__init__(
28
+ save_interval=save_interval,
29
+ trigger_log_level=trigger_log_level,
30
+ thread_name=thread_name,
31
+ cancellation_token=cancellation_token,
32
+ )
33
+
34
+ self._local_state: Dict[str, Dict[str, str]] = {}
35
+ self._seen: Set[str] = set()
36
+
37
+ def get_state(self, external_id: str) -> Optional[str]:
38
+ with self.lock:
39
+ return self._local_state.get(external_id, {}).get("digest")
40
+
41
+ def _hash_row(self, data: Dict[str, Any]) -> str:
42
+ return hashlib.sha256(orjson.dumps(data, option=orjson.OPT_SORT_KEYS)).hexdigest()
43
+
44
+ def set_state(self, external_id: str, data: Dict[str, Any]) -> None:
45
+ with self.lock:
46
+ self._local_state[external_id] = {"digest": self._hash_row(data)}
47
+
48
+ def has_changed(self, external_id: str, data: Dict[str, Any]) -> bool:
49
+ with self.lock:
50
+ if external_id not in self._local_state:
51
+ return True
52
+
53
+ return self._hash_row(data) != self._local_state[external_id]["digest"]
54
+
55
+ def __getitem__(self, external_id: str) -> Optional[str]:
56
+ return self.get_state(external_id)
57
+
58
+ def __setitem__(self, key: str, value: Dict[str, Any]) -> None:
59
+ self.set_state(external_id=key, data=value)
60
+
61
+ def __contains__(self, external_id: str) -> bool:
62
+ return external_id in self._local_state
63
+
64
+ def __len__(self) -> int:
65
+ return len(self._local_state)
66
+
67
+ def __iter__(self) -> Iterator[str]:
68
+ with self.lock:
69
+ for key in self._local_state:
70
+ yield key
71
+
72
+
73
+ class RawHashStateStore(AbstractHashStateStore):
74
+ def __init__(
75
+ self,
76
+ cdf_client: CogniteClient,
77
+ database: str,
78
+ table: str,
79
+ save_interval: Optional[int] = None,
80
+ trigger_log_level: str = "DEBUG",
81
+ thread_name: Optional[str] = None,
82
+ cancellation_token: Optional[CancellationToken] = None,
83
+ ) -> None:
84
+ super().__init__(
85
+ save_interval=save_interval,
86
+ trigger_log_level=trigger_log_level,
87
+ thread_name=thread_name,
88
+ cancellation_token=cancellation_token,
89
+ )
90
+ self._cdf_client = cdf_client
91
+ self.database = database
92
+ self.table = table
93
+
94
+ def synchronize(self) -> None:
95
+ @retry(
96
+ exceptions=cognite_exceptions(),
97
+ cancellation_token=self.cancellation_token,
98
+ tries=RETRIES,
99
+ delay=RETRY_DELAY,
100
+ max_delay=RETRY_MAX_DELAY,
101
+ backoff=RETRY_BACKOFF_FACTOR,
102
+ )
103
+ def impl() -> None:
104
+ """
105
+ Upload local state store to CDF
106
+ """
107
+ with self.lock:
108
+ self._cdf_client.raw.rows.insert(
109
+ db_name=self.database,
110
+ table_name=self.table,
111
+ row=self._local_state,
112
+ ensure_parent=True,
113
+ )
114
+
115
+ impl()
116
+
117
+ def initialize(self, force: bool = False) -> None:
118
+ @retry(
119
+ exceptions=cognite_exceptions(),
120
+ cancellation_token=self.cancellation_token,
121
+ tries=RETRIES,
122
+ delay=RETRY_DELAY,
123
+ max_delay=RETRY_MAX_DELAY,
124
+ backoff=RETRY_BACKOFF_FACTOR,
125
+ )
126
+ def impl() -> None:
127
+ """
128
+ Get all known states.
129
+
130
+ Args:
131
+ force: Enable re-initialization, ie overwrite when called multiple times
132
+ """
133
+ if self._initialized and not force:
134
+ return
135
+
136
+ rows: Iterable[Row]
137
+ try:
138
+ rows = self._cdf_client.raw.rows.list(db_name=self.database, table_name=self.table, limit=None)
139
+ except CogniteAPIError as e:
140
+ if e.code == 404:
141
+ rows = []
142
+ else:
143
+ raise e
144
+
145
+ with self.lock:
146
+ self._local_state.clear()
147
+ for row in rows:
148
+ if row.key is None or row.columns is None:
149
+ self.logger.warning(f"None encountered in row: {str(row)}")
150
+ # should never happen, but type from sdk is optional
151
+ continue
152
+ state = row.columns.get("digest")
153
+ if state:
154
+ self._local_state[row.key] = {"digest": state}
155
+
156
+ self._initialized = True
157
+
158
+ impl()
159
+
160
+ def __enter__(self) -> "RawHashStateStore":
161
+ """
162
+ Wraps around start method, for use as context manager
163
+
164
+ Returns:
165
+ self
166
+ """
167
+ self.start()
168
+ return self
169
+
170
+ def __exit__(
171
+ self,
172
+ exc_type: Optional[Type[BaseException]],
173
+ exc_val: Optional[BaseException],
174
+ exc_tb: Optional[TracebackType],
175
+ ) -> None:
176
+ """
177
+ Wraps around stop method, for use as context manager
178
+
179
+ Args:
180
+ exc_type: Exception type
181
+ exc_val: Exception value
182
+ exc_tb: Traceback
183
+ """
184
+ self.stop()
185
+
186
+
187
+ class LocalHashStateStore(AbstractHashStateStore):
188
+ def __init__(
189
+ self,
190
+ file_path: str,
191
+ save_interval: Optional[int] = None,
192
+ trigger_log_level: str = "DEBUG",
193
+ thread_name: Optional[str] = None,
194
+ cancellation_token: Optional[CancellationToken] = None,
195
+ ) -> None:
196
+ super().__init__(
197
+ save_interval=save_interval,
198
+ trigger_log_level=trigger_log_level,
199
+ thread_name=thread_name,
200
+ cancellation_token=cancellation_token,
201
+ )
202
+
203
+ self._file_path = file_path
204
+
205
+ def initialize(self, force: bool = False) -> None:
206
+ """
207
+ Load states from specified JSON file
208
+
209
+ Args:
210
+ force: Enable re-initialization, ie overwrite when called multiple times
211
+ """
212
+ if self._initialized and not force:
213
+ return
214
+
215
+ with self.lock:
216
+ try:
217
+ with open(self._file_path, "r") as f:
218
+ self._local_state = json.load(f, cls=_DecimalDecoder)
219
+ except FileNotFoundError:
220
+ pass
221
+ except json.decoder.JSONDecodeError as e:
222
+ raise ValueError(f"Invalid JSON in state store file: {str(e)}") from e
223
+
224
+ self._initialized = True
225
+
226
+ def synchronize(self) -> None:
227
+ """
228
+ Save states to specified JSON file
229
+ """
230
+ with self.lock:
231
+ with open(self._file_path, "w") as f:
232
+ json.dump(self._local_state, f, cls=_DecimalEncoder)
233
+
234
+ def __enter__(self) -> "LocalHashStateStore":
235
+ """
236
+ Wraps around start method, for use as context manager
237
+
238
+ Returns:
239
+ self
240
+ """
241
+ self.start()
242
+ return self
243
+
244
+ def __exit__(
245
+ self,
246
+ exc_type: Optional[Type[BaseException]],
247
+ exc_val: Optional[BaseException],
248
+ exc_tb: Optional[TracebackType],
249
+ ) -> None:
250
+ """
251
+ Wraps around stop method, for use as context manager
252
+
253
+ Args:
254
+ exc_type: Exception type
255
+ exc_val: Exception value
256
+ exc_tb: Traceback
257
+ """
258
+ self.stop()
@@ -86,27 +86,21 @@ You can set a state store to automatically update on upload triggers from an upl
86
86
  """
87
87
 
88
88
  import json
89
- import logging
90
- import threading
91
- from abc import ABC, abstractmethod
89
+ from abc import ABC
92
90
  from types import TracebackType
93
91
  from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Type, Union
94
92
 
95
93
  from cognite.client import CogniteClient
96
94
  from cognite.client.exceptions import CogniteAPIError
95
+ from cognite.extractorutils._inner_util import _DecimalDecoder, _DecimalEncoder
97
96
  from cognite.extractorutils.threading import CancellationToken
98
97
  from cognite.extractorutils.uploader import DataPointList
98
+ from cognite.extractorutils.util import cognite_exceptions, retry
99
99
 
100
- from ._inner_util import _DecimalDecoder, _DecimalEncoder, _resolve_log_level
101
- from .util import cognite_exceptions, retry
100
+ from ._base import RETRIES, RETRY_BACKOFF_FACTOR, RETRY_DELAY, RETRY_MAX_DELAY, _BaseStateStore
102
101
 
103
- RETRY_BACKOFF_FACTOR = 1.5
104
- RETRY_MAX_DELAY = 60
105
- RETRY_DELAY = 1
106
- RETRIES = 10
107
102
 
108
-
109
- class AbstractStateStore(ABC):
103
+ class AbstractStateStore(_BaseStateStore, ABC):
110
104
  """
111
105
  Base class for a state store.
112
106
 
@@ -125,69 +119,16 @@ class AbstractStateStore(ABC):
125
119
  thread_name: Optional[str] = None,
126
120
  cancellation_token: Optional[CancellationToken] = None,
127
121
  ):
128
- self._initialized = False
129
- self._local_state: Dict[str, Dict[str, Any]] = {}
130
- self.save_interval = save_interval
131
- self.trigger_log_level = _resolve_log_level(trigger_log_level)
132
-
133
- self.logger = logging.getLogger(__name__)
134
-
135
- self.thread = threading.Thread(target=self._run, daemon=cancellation_token is None, name=thread_name)
136
- self.lock = threading.RLock()
137
- self.cancellation_token = cancellation_token.create_child_token() if cancellation_token else CancellationToken()
122
+ super().__init__(
123
+ save_interval=save_interval,
124
+ trigger_log_level=trigger_log_level,
125
+ thread_name=thread_name,
126
+ cancellation_token=cancellation_token,
127
+ )
138
128
 
129
+ self._local_state: Dict[str, Dict[str, Any]] = {}
139
130
  self._deleted: List[str] = []
140
131
 
141
- def start(self, initialize: bool = True) -> None:
142
- """
143
- Start saving state periodically if save_interval is set.
144
- This calls the synchronize method every save_interval seconds.
145
- """
146
- if initialize and not self._initialized:
147
- self.initialize()
148
- if self.save_interval is not None:
149
- self.thread.start()
150
-
151
- def stop(self, ensure_synchronize: bool = True) -> None:
152
- """
153
- Stop synchronize thread if running, and ensure state is saved if ensure_synchronize is True.
154
-
155
- Args:
156
- ensure_synchronize (bool): (Optional). Call synchronize one last time after shutting down thread.
157
- """
158
- self.cancellation_token.cancel()
159
- if ensure_synchronize:
160
- self.synchronize()
161
-
162
- def _run(self) -> None:
163
- """
164
- Internal run method for synchronize thread
165
- """
166
- self.initialize()
167
- while not self.cancellation_token.wait(timeout=self.save_interval):
168
- try:
169
- self.logger.log(self.trigger_log_level, "Triggering scheduled state store synchronization")
170
- self.synchronize()
171
- except Exception as e:
172
- self.logger.error("Unexpected error while synchronizing state store: %s.", str(e))
173
-
174
- # trigger stop event explicitly to drain the queue
175
- self.stop(ensure_synchronize=True)
176
-
177
- @abstractmethod
178
- def initialize(self, force: bool = False) -> None:
179
- """
180
- Get states from remote store
181
- """
182
- pass
183
-
184
- @abstractmethod
185
- def synchronize(self) -> None:
186
- """
187
- Upload states to remote store
188
- """
189
- pass
190
-
191
132
  def get_state(self, external_id: Union[str, List[str]]) -> Union[Tuple[Any, Any], List[Tuple[Any, Any]]]:
192
133
  """
193
134
  Get state(s) for external ID(s)
@@ -0,0 +1,115 @@
1
+ from types import TracebackType
2
+ from typing import Any, Callable, List, Optional, Type
3
+
4
+ from cognite.client import CogniteClient
5
+ from cognite.client.data_classes.data_modeling import EdgeApply, NodeApply
6
+ from cognite.extractorutils.threading import CancellationToken
7
+ from cognite.extractorutils.uploader._base import (
8
+ RETRIES,
9
+ RETRY_BACKOFF_FACTOR,
10
+ RETRY_DELAY,
11
+ RETRY_MAX_DELAY,
12
+ AbstractUploadQueue,
13
+ )
14
+ from cognite.extractorutils.util import cognite_exceptions, retry
15
+
16
+
17
+ class InstanceUploadQueue(AbstractUploadQueue):
18
+ def __init__(
19
+ self,
20
+ cdf_client: CogniteClient,
21
+ post_upload_function: Optional[Callable[[List[Any]], None]] = None,
22
+ max_queue_size: Optional[int] = None,
23
+ max_upload_interval: Optional[int] = None,
24
+ trigger_log_level: str = "DEBUG",
25
+ thread_name: Optional[str] = None,
26
+ cancellation_token: Optional[CancellationToken] = None,
27
+ auto_create_start_nodes: bool = True,
28
+ auto_create_end_nodes: bool = True,
29
+ auto_create_direct_relations: bool = True,
30
+ ):
31
+ super().__init__(
32
+ cdf_client,
33
+ post_upload_function,
34
+ max_queue_size,
35
+ max_upload_interval,
36
+ trigger_log_level,
37
+ thread_name,
38
+ cancellation_token,
39
+ )
40
+
41
+ self.auto_create_start_nodes = auto_create_start_nodes
42
+ self.auto_create_end_nodes = auto_create_end_nodes
43
+ self.auto_create_direct_relations = auto_create_direct_relations
44
+
45
+ self.node_queue: List[NodeApply] = []
46
+ self.edge_queue: List[EdgeApply] = []
47
+
48
+ def add_to_upload_queue(
49
+ self,
50
+ *,
51
+ node_data: Optional[List[NodeApply]] = None,
52
+ edge_data: Optional[List[EdgeApply]] = None,
53
+ ) -> None:
54
+ if node_data:
55
+ with self.lock:
56
+ self.node_queue.extend(node_data)
57
+ self.upload_queue_size += len(node_data)
58
+
59
+ if edge_data:
60
+ with self.lock:
61
+ self.edge_queue.extend(edge_data)
62
+ self.upload_queue_size += len(edge_data)
63
+
64
+ with self.lock:
65
+ self._check_triggers()
66
+
67
+ def upload(self) -> None:
68
+ @retry(
69
+ exceptions=cognite_exceptions(),
70
+ cancellation_token=self.cancellation_token,
71
+ tries=RETRIES,
72
+ delay=RETRY_DELAY,
73
+ max_delay=RETRY_MAX_DELAY,
74
+ backoff=RETRY_BACKOFF_FACTOR,
75
+ )
76
+ def upload_batch() -> None:
77
+ self.cdf_client.data_modeling.instances.apply(
78
+ nodes=self.node_queue,
79
+ edges=self.edge_queue,
80
+ auto_create_start_nodes=self.auto_create_start_nodes,
81
+ auto_create_end_nodes=self.auto_create_end_nodes,
82
+ auto_create_direct_relations=self.auto_create_direct_relations,
83
+ )
84
+ self.node_queue.clear()
85
+ self.edge_queue.clear()
86
+ self.upload_queue_size = 0
87
+
88
+ with self.lock:
89
+ upload_batch()
90
+
91
+ def __enter__(self) -> "InstanceUploadQueue":
92
+ """
93
+ Wraps around start method, for use as context manager
94
+
95
+ Returns:
96
+ self
97
+ """
98
+ self.start()
99
+ return self
100
+
101
+ def __exit__(
102
+ self,
103
+ exc_type: Optional[Type[BaseException]],
104
+ exc_val: Optional[BaseException],
105
+ exc_tb: Optional[TracebackType],
106
+ ) -> None:
107
+ """
108
+ Wraps around stop method, for use as context manager
109
+
110
+ Args:
111
+ exc_type: Exception type
112
+ exc_val: Exception value
113
+ exc_tb: Traceback
114
+ """
115
+ self.stop()
@@ -319,11 +319,14 @@ def _retry_internal(
319
319
  ) -> _T2:
320
320
  logger = logging.getLogger(__name__)
321
321
 
322
- while tries and not cancellation_token.is_cancelled:
322
+ while tries:
323
323
  try:
324
324
  return f()
325
325
 
326
326
  except Exception as e:
327
+ if cancellation_token.is_cancelled:
328
+ break
329
+
327
330
  if isinstance(exceptions, tuple):
328
331
  for ex_type in exceptions:
329
332
  if isinstance(e, ex_type):
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "cognite-extractor-utils"
3
- version = "7.1.5"
3
+ version = "7.2.0"
4
4
  description = "Utilities for easier development of extractors for CDF"
5
5
  authors = ["Mathias Lohne <mathias.lohne@cognite.com>"]
6
6
  license = "Apache-2.0"
@@ -53,7 +53,7 @@ exclude = "tests/*"
53
53
 
54
54
  [tool.poetry.dependencies]
55
55
  python = "^3.8.0"
56
- cognite-sdk = "^7.41.0"
56
+ cognite-sdk = "^7.43.3"
57
57
  prometheus-client = ">0.7.0, <=1.0.0"
58
58
  arrow = "^1.0.0"
59
59
  pyyaml = ">=5.3.0, <7"
@@ -65,6 +65,7 @@ typing-extensions = ">=3.7.4, <5"
65
65
  python-dotenv = "^1.0.0"
66
66
  azure-identity = "^1.14.0"
67
67
  azure-keyvault-secrets = "^4.7.0"
68
+ orjson = "^3.10.3"
68
69
 
69
70
  [tool.poetry.extras]
70
71
  experimental = ["cognite-sdk-experimental"]
@@ -84,6 +85,7 @@ parameterized = "*"
84
85
  requests = "^2.31.0"
85
86
  types-requests = "^2.31.0.20240125"
86
87
  httpx = "^0.27.0"
88
+ faker = "^25.2.0"
87
89
 
88
90
  [build-system]
89
91
  requires = ["poetry-core>=1.0.0"]