cognite-extractor-utils 7.1.6__py3-none-any.whl → 7.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- cognite/extractorutils/__init__.py +1 -1
- cognite/extractorutils/statestore/__init__.py +12 -0
- cognite/extractorutils/statestore/_base.py +81 -0
- cognite/extractorutils/statestore/hashing.py +258 -0
- cognite/extractorutils/{statestore.py → statestore/watermark.py} +12 -71
- cognite/extractorutils/uploader/data_modeling.py +115 -0
- cognite/extractorutils/util.py +4 -1
- {cognite_extractor_utils-7.1.6.dist-info → cognite_extractor_utils-7.2.0.dist-info}/METADATA +2 -1
- {cognite_extractor_utils-7.1.6.dist-info → cognite_extractor_utils-7.2.0.dist-info}/RECORD +11 -7
- {cognite_extractor_utils-7.1.6.dist-info → cognite_extractor_utils-7.2.0.dist-info}/LICENSE +0 -0
- {cognite_extractor_utils-7.1.6.dist-info → cognite_extractor_utils-7.2.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from .hashing import AbstractHashStateStore, LocalHashStateStore, RawHashStateStore
|
|
2
|
+
from .watermark import AbstractStateStore, LocalStateStore, NoStateStore, RawStateStore
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"AbstractStateStore",
|
|
6
|
+
"RawStateStore",
|
|
7
|
+
"LocalStateStore",
|
|
8
|
+
"NoStateStore",
|
|
9
|
+
"AbstractHashStateStore",
|
|
10
|
+
"RawHashStateStore",
|
|
11
|
+
"LocalHashStateStore",
|
|
12
|
+
]
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import threading
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from cognite.extractorutils._inner_util import _resolve_log_level
|
|
7
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
8
|
+
|
|
9
|
+
RETRY_BACKOFF_FACTOR = 1.5
|
|
10
|
+
RETRY_MAX_DELAY = 60
|
|
11
|
+
RETRY_DELAY = 1
|
|
12
|
+
RETRIES = 10
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class _BaseStateStore(ABC):
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
save_interval: Optional[int] = None,
|
|
19
|
+
trigger_log_level: str = "DEBUG",
|
|
20
|
+
thread_name: Optional[str] = None,
|
|
21
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
22
|
+
) -> None:
|
|
23
|
+
self._initialized = False
|
|
24
|
+
|
|
25
|
+
self.logger = logging.getLogger(__name__)
|
|
26
|
+
self.trigger_log_level = _resolve_log_level(trigger_log_level)
|
|
27
|
+
self.save_interval = save_interval
|
|
28
|
+
|
|
29
|
+
self.thread = threading.Thread(target=self._run, daemon=cancellation_token is None, name=thread_name)
|
|
30
|
+
self.lock = threading.RLock()
|
|
31
|
+
self.cancellation_token = cancellation_token.create_child_token() if cancellation_token else CancellationToken()
|
|
32
|
+
|
|
33
|
+
def start(self, initialize: bool = True) -> None:
|
|
34
|
+
"""
|
|
35
|
+
Start saving state periodically if save_interval is set.
|
|
36
|
+
This calls the synchronize method every save_interval seconds.
|
|
37
|
+
"""
|
|
38
|
+
if initialize and not self._initialized:
|
|
39
|
+
self.initialize()
|
|
40
|
+
if self.save_interval is not None:
|
|
41
|
+
self.thread.start()
|
|
42
|
+
|
|
43
|
+
def stop(self, ensure_synchronize: bool = True) -> None:
|
|
44
|
+
"""
|
|
45
|
+
Stop synchronize thread if running, and ensure state is saved if ensure_synchronize is True.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
ensure_synchronize (bool): (Optional). Call synchronize one last time after shutting down thread.
|
|
49
|
+
"""
|
|
50
|
+
self.cancellation_token.cancel()
|
|
51
|
+
if ensure_synchronize:
|
|
52
|
+
self.synchronize()
|
|
53
|
+
|
|
54
|
+
def _run(self) -> None:
|
|
55
|
+
"""
|
|
56
|
+
Internal run method for synchronize thread
|
|
57
|
+
"""
|
|
58
|
+
self.initialize()
|
|
59
|
+
while not self.cancellation_token.wait(timeout=self.save_interval):
|
|
60
|
+
try:
|
|
61
|
+
self.logger.log(self.trigger_log_level, "Triggering scheduled state store synchronization")
|
|
62
|
+
self.synchronize()
|
|
63
|
+
except Exception as e:
|
|
64
|
+
self.logger.error("Unexpected error while synchronizing state store: %s.", str(e))
|
|
65
|
+
|
|
66
|
+
# trigger stop event explicitly to drain the queue
|
|
67
|
+
self.stop(ensure_synchronize=True)
|
|
68
|
+
|
|
69
|
+
@abstractmethod
|
|
70
|
+
def initialize(self, force: bool = False) -> None:
|
|
71
|
+
"""
|
|
72
|
+
Get states from remote store
|
|
73
|
+
"""
|
|
74
|
+
pass
|
|
75
|
+
|
|
76
|
+
@abstractmethod
|
|
77
|
+
def synchronize(self) -> None:
|
|
78
|
+
"""
|
|
79
|
+
Upload states to remote store
|
|
80
|
+
"""
|
|
81
|
+
pass
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import json
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from types import TracebackType
|
|
5
|
+
from typing import Any, Dict, Iterable, Iterator, Optional, Set, Type
|
|
6
|
+
|
|
7
|
+
import orjson
|
|
8
|
+
|
|
9
|
+
from cognite.client import CogniteClient
|
|
10
|
+
from cognite.client.data_classes import Row
|
|
11
|
+
from cognite.client.exceptions import CogniteAPIError
|
|
12
|
+
from cognite.extractorutils._inner_util import _DecimalDecoder, _DecimalEncoder
|
|
13
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
14
|
+
from cognite.extractorutils.util import cognite_exceptions, retry
|
|
15
|
+
|
|
16
|
+
from ._base import RETRIES, RETRY_BACKOFF_FACTOR, RETRY_DELAY, RETRY_MAX_DELAY, _BaseStateStore
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class AbstractHashStateStore(_BaseStateStore, ABC):
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
save_interval: Optional[int] = None,
|
|
23
|
+
trigger_log_level: str = "DEBUG",
|
|
24
|
+
thread_name: Optional[str] = None,
|
|
25
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
26
|
+
) -> None:
|
|
27
|
+
super().__init__(
|
|
28
|
+
save_interval=save_interval,
|
|
29
|
+
trigger_log_level=trigger_log_level,
|
|
30
|
+
thread_name=thread_name,
|
|
31
|
+
cancellation_token=cancellation_token,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
self._local_state: Dict[str, Dict[str, str]] = {}
|
|
35
|
+
self._seen: Set[str] = set()
|
|
36
|
+
|
|
37
|
+
def get_state(self, external_id: str) -> Optional[str]:
|
|
38
|
+
with self.lock:
|
|
39
|
+
return self._local_state.get(external_id, {}).get("digest")
|
|
40
|
+
|
|
41
|
+
def _hash_row(self, data: Dict[str, Any]) -> str:
|
|
42
|
+
return hashlib.sha256(orjson.dumps(data, option=orjson.OPT_SORT_KEYS)).hexdigest()
|
|
43
|
+
|
|
44
|
+
def set_state(self, external_id: str, data: Dict[str, Any]) -> None:
|
|
45
|
+
with self.lock:
|
|
46
|
+
self._local_state[external_id] = {"digest": self._hash_row(data)}
|
|
47
|
+
|
|
48
|
+
def has_changed(self, external_id: str, data: Dict[str, Any]) -> bool:
|
|
49
|
+
with self.lock:
|
|
50
|
+
if external_id not in self._local_state:
|
|
51
|
+
return True
|
|
52
|
+
|
|
53
|
+
return self._hash_row(data) != self._local_state[external_id]["digest"]
|
|
54
|
+
|
|
55
|
+
def __getitem__(self, external_id: str) -> Optional[str]:
|
|
56
|
+
return self.get_state(external_id)
|
|
57
|
+
|
|
58
|
+
def __setitem__(self, key: str, value: Dict[str, Any]) -> None:
|
|
59
|
+
self.set_state(external_id=key, data=value)
|
|
60
|
+
|
|
61
|
+
def __contains__(self, external_id: str) -> bool:
|
|
62
|
+
return external_id in self._local_state
|
|
63
|
+
|
|
64
|
+
def __len__(self) -> int:
|
|
65
|
+
return len(self._local_state)
|
|
66
|
+
|
|
67
|
+
def __iter__(self) -> Iterator[str]:
|
|
68
|
+
with self.lock:
|
|
69
|
+
for key in self._local_state:
|
|
70
|
+
yield key
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class RawHashStateStore(AbstractHashStateStore):
|
|
74
|
+
def __init__(
|
|
75
|
+
self,
|
|
76
|
+
cdf_client: CogniteClient,
|
|
77
|
+
database: str,
|
|
78
|
+
table: str,
|
|
79
|
+
save_interval: Optional[int] = None,
|
|
80
|
+
trigger_log_level: str = "DEBUG",
|
|
81
|
+
thread_name: Optional[str] = None,
|
|
82
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
83
|
+
) -> None:
|
|
84
|
+
super().__init__(
|
|
85
|
+
save_interval=save_interval,
|
|
86
|
+
trigger_log_level=trigger_log_level,
|
|
87
|
+
thread_name=thread_name,
|
|
88
|
+
cancellation_token=cancellation_token,
|
|
89
|
+
)
|
|
90
|
+
self._cdf_client = cdf_client
|
|
91
|
+
self.database = database
|
|
92
|
+
self.table = table
|
|
93
|
+
|
|
94
|
+
def synchronize(self) -> None:
|
|
95
|
+
@retry(
|
|
96
|
+
exceptions=cognite_exceptions(),
|
|
97
|
+
cancellation_token=self.cancellation_token,
|
|
98
|
+
tries=RETRIES,
|
|
99
|
+
delay=RETRY_DELAY,
|
|
100
|
+
max_delay=RETRY_MAX_DELAY,
|
|
101
|
+
backoff=RETRY_BACKOFF_FACTOR,
|
|
102
|
+
)
|
|
103
|
+
def impl() -> None:
|
|
104
|
+
"""
|
|
105
|
+
Upload local state store to CDF
|
|
106
|
+
"""
|
|
107
|
+
with self.lock:
|
|
108
|
+
self._cdf_client.raw.rows.insert(
|
|
109
|
+
db_name=self.database,
|
|
110
|
+
table_name=self.table,
|
|
111
|
+
row=self._local_state,
|
|
112
|
+
ensure_parent=True,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
impl()
|
|
116
|
+
|
|
117
|
+
def initialize(self, force: bool = False) -> None:
|
|
118
|
+
@retry(
|
|
119
|
+
exceptions=cognite_exceptions(),
|
|
120
|
+
cancellation_token=self.cancellation_token,
|
|
121
|
+
tries=RETRIES,
|
|
122
|
+
delay=RETRY_DELAY,
|
|
123
|
+
max_delay=RETRY_MAX_DELAY,
|
|
124
|
+
backoff=RETRY_BACKOFF_FACTOR,
|
|
125
|
+
)
|
|
126
|
+
def impl() -> None:
|
|
127
|
+
"""
|
|
128
|
+
Get all known states.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
force: Enable re-initialization, ie overwrite when called multiple times
|
|
132
|
+
"""
|
|
133
|
+
if self._initialized and not force:
|
|
134
|
+
return
|
|
135
|
+
|
|
136
|
+
rows: Iterable[Row]
|
|
137
|
+
try:
|
|
138
|
+
rows = self._cdf_client.raw.rows.list(db_name=self.database, table_name=self.table, limit=None)
|
|
139
|
+
except CogniteAPIError as e:
|
|
140
|
+
if e.code == 404:
|
|
141
|
+
rows = []
|
|
142
|
+
else:
|
|
143
|
+
raise e
|
|
144
|
+
|
|
145
|
+
with self.lock:
|
|
146
|
+
self._local_state.clear()
|
|
147
|
+
for row in rows:
|
|
148
|
+
if row.key is None or row.columns is None:
|
|
149
|
+
self.logger.warning(f"None encountered in row: {str(row)}")
|
|
150
|
+
# should never happen, but type from sdk is optional
|
|
151
|
+
continue
|
|
152
|
+
state = row.columns.get("digest")
|
|
153
|
+
if state:
|
|
154
|
+
self._local_state[row.key] = {"digest": state}
|
|
155
|
+
|
|
156
|
+
self._initialized = True
|
|
157
|
+
|
|
158
|
+
impl()
|
|
159
|
+
|
|
160
|
+
def __enter__(self) -> "RawHashStateStore":
|
|
161
|
+
"""
|
|
162
|
+
Wraps around start method, for use as context manager
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
self
|
|
166
|
+
"""
|
|
167
|
+
self.start()
|
|
168
|
+
return self
|
|
169
|
+
|
|
170
|
+
def __exit__(
|
|
171
|
+
self,
|
|
172
|
+
exc_type: Optional[Type[BaseException]],
|
|
173
|
+
exc_val: Optional[BaseException],
|
|
174
|
+
exc_tb: Optional[TracebackType],
|
|
175
|
+
) -> None:
|
|
176
|
+
"""
|
|
177
|
+
Wraps around stop method, for use as context manager
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
exc_type: Exception type
|
|
181
|
+
exc_val: Exception value
|
|
182
|
+
exc_tb: Traceback
|
|
183
|
+
"""
|
|
184
|
+
self.stop()
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class LocalHashStateStore(AbstractHashStateStore):
|
|
188
|
+
def __init__(
|
|
189
|
+
self,
|
|
190
|
+
file_path: str,
|
|
191
|
+
save_interval: Optional[int] = None,
|
|
192
|
+
trigger_log_level: str = "DEBUG",
|
|
193
|
+
thread_name: Optional[str] = None,
|
|
194
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
195
|
+
) -> None:
|
|
196
|
+
super().__init__(
|
|
197
|
+
save_interval=save_interval,
|
|
198
|
+
trigger_log_level=trigger_log_level,
|
|
199
|
+
thread_name=thread_name,
|
|
200
|
+
cancellation_token=cancellation_token,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
self._file_path = file_path
|
|
204
|
+
|
|
205
|
+
def initialize(self, force: bool = False) -> None:
|
|
206
|
+
"""
|
|
207
|
+
Load states from specified JSON file
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
force: Enable re-initialization, ie overwrite when called multiple times
|
|
211
|
+
"""
|
|
212
|
+
if self._initialized and not force:
|
|
213
|
+
return
|
|
214
|
+
|
|
215
|
+
with self.lock:
|
|
216
|
+
try:
|
|
217
|
+
with open(self._file_path, "r") as f:
|
|
218
|
+
self._local_state = json.load(f, cls=_DecimalDecoder)
|
|
219
|
+
except FileNotFoundError:
|
|
220
|
+
pass
|
|
221
|
+
except json.decoder.JSONDecodeError as e:
|
|
222
|
+
raise ValueError(f"Invalid JSON in state store file: {str(e)}") from e
|
|
223
|
+
|
|
224
|
+
self._initialized = True
|
|
225
|
+
|
|
226
|
+
def synchronize(self) -> None:
|
|
227
|
+
"""
|
|
228
|
+
Save states to specified JSON file
|
|
229
|
+
"""
|
|
230
|
+
with self.lock:
|
|
231
|
+
with open(self._file_path, "w") as f:
|
|
232
|
+
json.dump(self._local_state, f, cls=_DecimalEncoder)
|
|
233
|
+
|
|
234
|
+
def __enter__(self) -> "LocalHashStateStore":
|
|
235
|
+
"""
|
|
236
|
+
Wraps around start method, for use as context manager
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
self
|
|
240
|
+
"""
|
|
241
|
+
self.start()
|
|
242
|
+
return self
|
|
243
|
+
|
|
244
|
+
def __exit__(
|
|
245
|
+
self,
|
|
246
|
+
exc_type: Optional[Type[BaseException]],
|
|
247
|
+
exc_val: Optional[BaseException],
|
|
248
|
+
exc_tb: Optional[TracebackType],
|
|
249
|
+
) -> None:
|
|
250
|
+
"""
|
|
251
|
+
Wraps around stop method, for use as context manager
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
exc_type: Exception type
|
|
255
|
+
exc_val: Exception value
|
|
256
|
+
exc_tb: Traceback
|
|
257
|
+
"""
|
|
258
|
+
self.stop()
|
|
@@ -86,27 +86,21 @@ You can set a state store to automatically update on upload triggers from an upl
|
|
|
86
86
|
"""
|
|
87
87
|
|
|
88
88
|
import json
|
|
89
|
-
import
|
|
90
|
-
import threading
|
|
91
|
-
from abc import ABC, abstractmethod
|
|
89
|
+
from abc import ABC
|
|
92
90
|
from types import TracebackType
|
|
93
91
|
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Type, Union
|
|
94
92
|
|
|
95
93
|
from cognite.client import CogniteClient
|
|
96
94
|
from cognite.client.exceptions import CogniteAPIError
|
|
95
|
+
from cognite.extractorutils._inner_util import _DecimalDecoder, _DecimalEncoder
|
|
97
96
|
from cognite.extractorutils.threading import CancellationToken
|
|
98
97
|
from cognite.extractorutils.uploader import DataPointList
|
|
98
|
+
from cognite.extractorutils.util import cognite_exceptions, retry
|
|
99
99
|
|
|
100
|
-
from .
|
|
101
|
-
from .util import cognite_exceptions, retry
|
|
100
|
+
from ._base import RETRIES, RETRY_BACKOFF_FACTOR, RETRY_DELAY, RETRY_MAX_DELAY, _BaseStateStore
|
|
102
101
|
|
|
103
|
-
RETRY_BACKOFF_FACTOR = 1.5
|
|
104
|
-
RETRY_MAX_DELAY = 60
|
|
105
|
-
RETRY_DELAY = 1
|
|
106
|
-
RETRIES = 10
|
|
107
102
|
|
|
108
|
-
|
|
109
|
-
class AbstractStateStore(ABC):
|
|
103
|
+
class AbstractStateStore(_BaseStateStore, ABC):
|
|
110
104
|
"""
|
|
111
105
|
Base class for a state store.
|
|
112
106
|
|
|
@@ -125,69 +119,16 @@ class AbstractStateStore(ABC):
|
|
|
125
119
|
thread_name: Optional[str] = None,
|
|
126
120
|
cancellation_token: Optional[CancellationToken] = None,
|
|
127
121
|
):
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
self.thread = threading.Thread(target=self._run, daemon=cancellation_token is None, name=thread_name)
|
|
136
|
-
self.lock = threading.RLock()
|
|
137
|
-
self.cancellation_token = cancellation_token.create_child_token() if cancellation_token else CancellationToken()
|
|
122
|
+
super().__init__(
|
|
123
|
+
save_interval=save_interval,
|
|
124
|
+
trigger_log_level=trigger_log_level,
|
|
125
|
+
thread_name=thread_name,
|
|
126
|
+
cancellation_token=cancellation_token,
|
|
127
|
+
)
|
|
138
128
|
|
|
129
|
+
self._local_state: Dict[str, Dict[str, Any]] = {}
|
|
139
130
|
self._deleted: List[str] = []
|
|
140
131
|
|
|
141
|
-
def start(self, initialize: bool = True) -> None:
|
|
142
|
-
"""
|
|
143
|
-
Start saving state periodically if save_interval is set.
|
|
144
|
-
This calls the synchronize method every save_interval seconds.
|
|
145
|
-
"""
|
|
146
|
-
if initialize and not self._initialized:
|
|
147
|
-
self.initialize()
|
|
148
|
-
if self.save_interval is not None:
|
|
149
|
-
self.thread.start()
|
|
150
|
-
|
|
151
|
-
def stop(self, ensure_synchronize: bool = True) -> None:
|
|
152
|
-
"""
|
|
153
|
-
Stop synchronize thread if running, and ensure state is saved if ensure_synchronize is True.
|
|
154
|
-
|
|
155
|
-
Args:
|
|
156
|
-
ensure_synchronize (bool): (Optional). Call synchronize one last time after shutting down thread.
|
|
157
|
-
"""
|
|
158
|
-
self.cancellation_token.cancel()
|
|
159
|
-
if ensure_synchronize:
|
|
160
|
-
self.synchronize()
|
|
161
|
-
|
|
162
|
-
def _run(self) -> None:
|
|
163
|
-
"""
|
|
164
|
-
Internal run method for synchronize thread
|
|
165
|
-
"""
|
|
166
|
-
self.initialize()
|
|
167
|
-
while not self.cancellation_token.wait(timeout=self.save_interval):
|
|
168
|
-
try:
|
|
169
|
-
self.logger.log(self.trigger_log_level, "Triggering scheduled state store synchronization")
|
|
170
|
-
self.synchronize()
|
|
171
|
-
except Exception as e:
|
|
172
|
-
self.logger.error("Unexpected error while synchronizing state store: %s.", str(e))
|
|
173
|
-
|
|
174
|
-
# trigger stop event explicitly to drain the queue
|
|
175
|
-
self.stop(ensure_synchronize=True)
|
|
176
|
-
|
|
177
|
-
@abstractmethod
|
|
178
|
-
def initialize(self, force: bool = False) -> None:
|
|
179
|
-
"""
|
|
180
|
-
Get states from remote store
|
|
181
|
-
"""
|
|
182
|
-
pass
|
|
183
|
-
|
|
184
|
-
@abstractmethod
|
|
185
|
-
def synchronize(self) -> None:
|
|
186
|
-
"""
|
|
187
|
-
Upload states to remote store
|
|
188
|
-
"""
|
|
189
|
-
pass
|
|
190
|
-
|
|
191
132
|
def get_state(self, external_id: Union[str, List[str]]) -> Union[Tuple[Any, Any], List[Tuple[Any, Any]]]:
|
|
192
133
|
"""
|
|
193
134
|
Get state(s) for external ID(s)
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
from types import TracebackType
|
|
2
|
+
from typing import Any, Callable, List, Optional, Type
|
|
3
|
+
|
|
4
|
+
from cognite.client import CogniteClient
|
|
5
|
+
from cognite.client.data_classes.data_modeling import EdgeApply, NodeApply
|
|
6
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
7
|
+
from cognite.extractorutils.uploader._base import (
|
|
8
|
+
RETRIES,
|
|
9
|
+
RETRY_BACKOFF_FACTOR,
|
|
10
|
+
RETRY_DELAY,
|
|
11
|
+
RETRY_MAX_DELAY,
|
|
12
|
+
AbstractUploadQueue,
|
|
13
|
+
)
|
|
14
|
+
from cognite.extractorutils.util import cognite_exceptions, retry
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class InstanceUploadQueue(AbstractUploadQueue):
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
cdf_client: CogniteClient,
|
|
21
|
+
post_upload_function: Optional[Callable[[List[Any]], None]] = None,
|
|
22
|
+
max_queue_size: Optional[int] = None,
|
|
23
|
+
max_upload_interval: Optional[int] = None,
|
|
24
|
+
trigger_log_level: str = "DEBUG",
|
|
25
|
+
thread_name: Optional[str] = None,
|
|
26
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
27
|
+
auto_create_start_nodes: bool = True,
|
|
28
|
+
auto_create_end_nodes: bool = True,
|
|
29
|
+
auto_create_direct_relations: bool = True,
|
|
30
|
+
):
|
|
31
|
+
super().__init__(
|
|
32
|
+
cdf_client,
|
|
33
|
+
post_upload_function,
|
|
34
|
+
max_queue_size,
|
|
35
|
+
max_upload_interval,
|
|
36
|
+
trigger_log_level,
|
|
37
|
+
thread_name,
|
|
38
|
+
cancellation_token,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
self.auto_create_start_nodes = auto_create_start_nodes
|
|
42
|
+
self.auto_create_end_nodes = auto_create_end_nodes
|
|
43
|
+
self.auto_create_direct_relations = auto_create_direct_relations
|
|
44
|
+
|
|
45
|
+
self.node_queue: List[NodeApply] = []
|
|
46
|
+
self.edge_queue: List[EdgeApply] = []
|
|
47
|
+
|
|
48
|
+
def add_to_upload_queue(
|
|
49
|
+
self,
|
|
50
|
+
*,
|
|
51
|
+
node_data: Optional[List[NodeApply]] = None,
|
|
52
|
+
edge_data: Optional[List[EdgeApply]] = None,
|
|
53
|
+
) -> None:
|
|
54
|
+
if node_data:
|
|
55
|
+
with self.lock:
|
|
56
|
+
self.node_queue.extend(node_data)
|
|
57
|
+
self.upload_queue_size += len(node_data)
|
|
58
|
+
|
|
59
|
+
if edge_data:
|
|
60
|
+
with self.lock:
|
|
61
|
+
self.edge_queue.extend(edge_data)
|
|
62
|
+
self.upload_queue_size += len(edge_data)
|
|
63
|
+
|
|
64
|
+
with self.lock:
|
|
65
|
+
self._check_triggers()
|
|
66
|
+
|
|
67
|
+
def upload(self) -> None:
|
|
68
|
+
@retry(
|
|
69
|
+
exceptions=cognite_exceptions(),
|
|
70
|
+
cancellation_token=self.cancellation_token,
|
|
71
|
+
tries=RETRIES,
|
|
72
|
+
delay=RETRY_DELAY,
|
|
73
|
+
max_delay=RETRY_MAX_DELAY,
|
|
74
|
+
backoff=RETRY_BACKOFF_FACTOR,
|
|
75
|
+
)
|
|
76
|
+
def upload_batch() -> None:
|
|
77
|
+
self.cdf_client.data_modeling.instances.apply(
|
|
78
|
+
nodes=self.node_queue,
|
|
79
|
+
edges=self.edge_queue,
|
|
80
|
+
auto_create_start_nodes=self.auto_create_start_nodes,
|
|
81
|
+
auto_create_end_nodes=self.auto_create_end_nodes,
|
|
82
|
+
auto_create_direct_relations=self.auto_create_direct_relations,
|
|
83
|
+
)
|
|
84
|
+
self.node_queue.clear()
|
|
85
|
+
self.edge_queue.clear()
|
|
86
|
+
self.upload_queue_size = 0
|
|
87
|
+
|
|
88
|
+
with self.lock:
|
|
89
|
+
upload_batch()
|
|
90
|
+
|
|
91
|
+
def __enter__(self) -> "InstanceUploadQueue":
|
|
92
|
+
"""
|
|
93
|
+
Wraps around start method, for use as context manager
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
self
|
|
97
|
+
"""
|
|
98
|
+
self.start()
|
|
99
|
+
return self
|
|
100
|
+
|
|
101
|
+
def __exit__(
|
|
102
|
+
self,
|
|
103
|
+
exc_type: Optional[Type[BaseException]],
|
|
104
|
+
exc_val: Optional[BaseException],
|
|
105
|
+
exc_tb: Optional[TracebackType],
|
|
106
|
+
) -> None:
|
|
107
|
+
"""
|
|
108
|
+
Wraps around stop method, for use as context manager
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
exc_type: Exception type
|
|
112
|
+
exc_val: Exception value
|
|
113
|
+
exc_tb: Traceback
|
|
114
|
+
"""
|
|
115
|
+
self.stop()
|
cognite/extractorutils/util.py
CHANGED
|
@@ -319,11 +319,14 @@ def _retry_internal(
|
|
|
319
319
|
) -> _T2:
|
|
320
320
|
logger = logging.getLogger(__name__)
|
|
321
321
|
|
|
322
|
-
while tries
|
|
322
|
+
while tries:
|
|
323
323
|
try:
|
|
324
324
|
return f()
|
|
325
325
|
|
|
326
326
|
except Exception as e:
|
|
327
|
+
if cancellation_token.is_cancelled:
|
|
328
|
+
break
|
|
329
|
+
|
|
327
330
|
if isinstance(exceptions, tuple):
|
|
328
331
|
for ex_type in exceptions:
|
|
329
332
|
if isinstance(e, ex_type):
|
{cognite_extractor_utils-7.1.6.dist-info → cognite_extractor_utils-7.2.0.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cognite-extractor-utils
|
|
3
|
-
Version: 7.
|
|
3
|
+
Version: 7.2.0
|
|
4
4
|
Summary: Utilities for easier development of extractors for CDF
|
|
5
5
|
Home-page: https://github.com/cognitedata/python-extractor-utils
|
|
6
6
|
License: Apache-2.0
|
|
@@ -22,6 +22,7 @@ Requires-Dist: cognite-sdk (>=7.43.3,<8.0.0)
|
|
|
22
22
|
Requires-Dist: dacite (>=1.6.0,<2.0.0)
|
|
23
23
|
Requires-Dist: decorator (>=5.1.1,<6.0.0)
|
|
24
24
|
Requires-Dist: more-itertools (>=10.0.0,<11.0.0)
|
|
25
|
+
Requires-Dist: orjson (>=3.10.3,<4.0.0)
|
|
25
26
|
Requires-Dist: prometheus-client (>0.7.0,<=1.0.0)
|
|
26
27
|
Requires-Dist: psutil (>=5.7.0,<6.0.0)
|
|
27
28
|
Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
cognite/extractorutils/__init__.py,sha256=
|
|
1
|
+
cognite/extractorutils/__init__.py,sha256=FSo7g_qtWL_Dtb6sUYbzlhG16HbyQ1tNJWzPc8B4tEI,739
|
|
2
2
|
cognite/extractorutils/_inner_util.py,sha256=gmz6aqS7jDNsg8z4RHgJjMFohDLOMiaU4gMWBhg3xcE,1558
|
|
3
3
|
cognite/extractorutils/base.py,sha256=q6NU2bPec3WOasVnnIFoh-aUJudVZWZ2R6emz3IRj8Q,16391
|
|
4
4
|
cognite/extractorutils/configtools/__init__.py,sha256=L-daaqInIsmHcjb2forJeY0fW8tz1mlteOUo7IsWnrU,3059
|
|
@@ -8,20 +8,24 @@ cognite/extractorutils/configtools/loaders.py,sha256=VmKNfGqwdHycwZB91i-BHarjW-2
|
|
|
8
8
|
cognite/extractorutils/exceptions.py,sha256=XiwyNPSN0YxFYaPw7tfA63B94PL48xDK3EfdGdhgQgc,1084
|
|
9
9
|
cognite/extractorutils/metrics.py,sha256=01ZMRbDisXPxrfCSyTSEkXMsslzmZwEqw18fuu9okdc,15509
|
|
10
10
|
cognite/extractorutils/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
-
cognite/extractorutils/statestore.py,sha256
|
|
11
|
+
cognite/extractorutils/statestore/__init__.py,sha256=hV3r11FUXkH6-60Ct6zLSROMNVrEeiE3Shmkf28Q-co,359
|
|
12
|
+
cognite/extractorutils/statestore/_base.py,sha256=PM4C-bz41tldA5Lx8rD0AzgXJciAZc2l_1tbz1VV27I,2712
|
|
13
|
+
cognite/extractorutils/statestore/hashing.py,sha256=o-efTv21_ATQnyxYmple3MF7r5Afy-7qZsdZhR47emw,8083
|
|
14
|
+
cognite/extractorutils/statestore/watermark.py,sha256=c_lcmJfo8bOvWyCJ9iRbbE4BlqRVulom4TpHb2pOnkE,16755
|
|
12
15
|
cognite/extractorutils/threading.py,sha256=2Hke5cFvP-wA45Crvh58JahoKXB64P3tr7R4y_BhBqM,3605
|
|
13
16
|
cognite/extractorutils/uploader/__init__.py,sha256=W22u6QHA4cR0j78LN5LTL5YGbfC-uTApagTyP5ab7uQ,3110
|
|
14
17
|
cognite/extractorutils/uploader/_base.py,sha256=wktbV8dpb8zBOsNaECZkBNoJSpOz437NlNMER3-a3xQ,5304
|
|
15
18
|
cognite/extractorutils/uploader/_metrics.py,sha256=J2LJXb19L_SLSJ_voNIQHYLp0pjxUKevpH1q_xKX6Hk,3247
|
|
16
19
|
cognite/extractorutils/uploader/assets.py,sha256=2E90N1kxsaA6Ah4h0_r_dTVhDYY_68ItRWrHYkkltJw,5628
|
|
20
|
+
cognite/extractorutils/uploader/data_modeling.py,sha256=w35Ix5mu0Cgfn4ywnDyif4VVjo04LVTlkMEevk6ztUs,3639
|
|
17
21
|
cognite/extractorutils/uploader/events.py,sha256=NZP2tMoU_rh_rb-EZiUBsOT5KdNABHN4c9Oddk0OsdE,5680
|
|
18
22
|
cognite/extractorutils/uploader/files.py,sha256=31kPS4fwz8ZSXWss-CKmYTM6ZLVx9LtsDe7LHT7Wy98,18329
|
|
19
23
|
cognite/extractorutils/uploader/raw.py,sha256=wFjF90PFTjmByOWx_Y4_YfDJ2w2jl0EQJ2Tjx2MP2PM,6738
|
|
20
24
|
cognite/extractorutils/uploader/time_series.py,sha256=WAtEQy7k5IjG-sw1oWwCujIM6PjHZYl4LKa4wy2tBPw,26817
|
|
21
25
|
cognite/extractorutils/uploader_extractor.py,sha256=E-mpVvbPg_Tk90U4S9JybV0duptJ2SXE88HB6npE3zI,7732
|
|
22
26
|
cognite/extractorutils/uploader_types.py,sha256=wxfrsiKPTzG5lmoYtQsxt8Xyj-s5HnaLl8WDzJNrazg,1020
|
|
23
|
-
cognite/extractorutils/util.py,sha256=
|
|
24
|
-
cognite_extractor_utils-7.
|
|
25
|
-
cognite_extractor_utils-7.
|
|
26
|
-
cognite_extractor_utils-7.
|
|
27
|
-
cognite_extractor_utils-7.
|
|
27
|
+
cognite/extractorutils/util.py,sha256=UA6mUZ1caHd6vtA45gZXrk6cxo5cSB2PZ32bMwfEU0M,17229
|
|
28
|
+
cognite_extractor_utils-7.2.0.dist-info/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
|
|
29
|
+
cognite_extractor_utils-7.2.0.dist-info/METADATA,sha256=KoWttPiWPXifkWzMkDefy1Hz2pNNiZZrli7W7EoSAFM,5486
|
|
30
|
+
cognite_extractor_utils-7.2.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
31
|
+
cognite_extractor_utils-7.2.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|