cognite-extractor-utils 7.5.14__py3-none-any.whl → 7.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- cognite/extractorutils/__init__.py +1 -1
- cognite/extractorutils/_inner_util.py +1 -1
- cognite/extractorutils/base.py +120 -40
- cognite/extractorutils/configtools/__init__.py +4 -5
- cognite/extractorutils/configtools/_util.py +3 -2
- cognite/extractorutils/configtools/elements.py +206 -33
- cognite/extractorutils/configtools/loaders.py +68 -16
- cognite/extractorutils/configtools/validators.py +5 -1
- cognite/extractorutils/exceptions.py +11 -2
- cognite/extractorutils/metrics.py +17 -12
- cognite/extractorutils/statestore/__init__.py +77 -3
- cognite/extractorutils/statestore/_base.py +7 -3
- cognite/extractorutils/statestore/hashing.py +129 -15
- cognite/extractorutils/statestore/watermark.py +77 -87
- cognite/extractorutils/threading.py +30 -4
- cognite/extractorutils/unstable/__init__.py +5 -5
- cognite/extractorutils/unstable/configuration/__init__.py +3 -0
- cognite/extractorutils/unstable/configuration/exceptions.py +13 -2
- cognite/extractorutils/unstable/configuration/loaders.py +78 -13
- cognite/extractorutils/unstable/configuration/models.py +121 -7
- cognite/extractorutils/unstable/core/__init__.py +5 -0
- cognite/extractorutils/unstable/core/_dto.py +5 -3
- cognite/extractorutils/unstable/core/base.py +113 -4
- cognite/extractorutils/unstable/core/errors.py +41 -0
- cognite/extractorutils/unstable/core/logger.py +149 -0
- cognite/extractorutils/unstable/core/restart_policy.py +16 -2
- cognite/extractorutils/unstable/core/runtime.py +44 -6
- cognite/extractorutils/unstable/core/tasks.py +53 -1
- cognite/extractorutils/unstable/scheduling/__init__.py +13 -0
- cognite/extractorutils/unstable/scheduling/_scheduler.py +1 -1
- cognite/extractorutils/uploader/__init__.py +7 -5
- cognite/extractorutils/uploader/_base.py +4 -5
- cognite/extractorutils/uploader/assets.py +13 -8
- cognite/extractorutils/uploader/data_modeling.py +37 -2
- cognite/extractorutils/uploader/events.py +14 -9
- cognite/extractorutils/uploader/files.py +80 -21
- cognite/extractorutils/uploader/raw.py +12 -7
- cognite/extractorutils/uploader/time_series.py +58 -49
- cognite/extractorutils/uploader/upload_failure_handler.py +35 -2
- cognite/extractorutils/uploader_extractor.py +29 -6
- cognite/extractorutils/uploader_types.py +15 -1
- cognite/extractorutils/util.py +76 -23
- {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.6.0.dist-info}/METADATA +1 -1
- cognite_extractor_utils-7.6.0.dist-info/RECORD +50 -0
- cognite_extractor_utils-7.5.14.dist-info/RECORD +0 -50
- {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.6.0.dist-info}/WHEEL +0 -0
- {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.6.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
"""
|
|
2
|
+
State store implementations that use hashing to track changes.
|
|
3
|
+
|
|
4
|
+
This module provides two main classes for state management:
|
|
5
|
+
- ``RawHashStateStore``: A state store that uses CDF RAW to store and persist states based on a hash of the data.
|
|
6
|
+
- ``LocalHashStateStore``: A state store that uses a local JSON file to store and persist states based on a hash of the
|
|
7
|
+
data.
|
|
8
|
+
"""
|
|
9
|
+
|
|
1
10
|
import hashlib
|
|
2
11
|
import json
|
|
3
12
|
from abc import ABC
|
|
@@ -18,6 +27,12 @@ from ._base import RETRIES, RETRY_BACKOFF_FACTOR, RETRY_DELAY, RETRY_MAX_DELAY,
|
|
|
18
27
|
|
|
19
28
|
|
|
20
29
|
class AbstractHashStateStore(_BaseStateStore, ABC):
|
|
30
|
+
"""
|
|
31
|
+
Base class for state stores that use hashing to track changes.
|
|
32
|
+
|
|
33
|
+
This class is thread-safe.
|
|
34
|
+
"""
|
|
35
|
+
|
|
21
36
|
def __init__(
|
|
22
37
|
self,
|
|
23
38
|
save_interval: int | None = None,
|
|
@@ -36,6 +51,15 @@ class AbstractHashStateStore(_BaseStateStore, ABC):
|
|
|
36
51
|
self._seen: set[str] = set()
|
|
37
52
|
|
|
38
53
|
def get_state(self, external_id: str) -> str | None:
|
|
54
|
+
"""
|
|
55
|
+
Get the state for a given external ID as a hash digest.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
external_id: The external ID for which to retrieve the state.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
The hash digest of the state if it exists, otherwise None.
|
|
62
|
+
"""
|
|
39
63
|
with self.lock:
|
|
40
64
|
return self._local_state.get(external_id, {}).get("digest")
|
|
41
65
|
|
|
@@ -43,10 +67,29 @@ class AbstractHashStateStore(_BaseStateStore, ABC):
|
|
|
43
67
|
return hashlib.sha256(orjson.dumps(data, option=orjson.OPT_SORT_KEYS)).hexdigest()
|
|
44
68
|
|
|
45
69
|
def set_state(self, external_id: str, data: dict[str, Any]) -> None:
|
|
70
|
+
"""
|
|
71
|
+
Set the state for a given external ID based on a hash of the provided data.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
external_id: The external ID for which to set the state.
|
|
75
|
+
data: The data to hash and store as the state.
|
|
76
|
+
"""
|
|
46
77
|
with self.lock:
|
|
47
78
|
self._local_state[external_id] = {"digest": self._hash_row(data)}
|
|
48
79
|
|
|
49
80
|
def has_changed(self, external_id: str, data: dict[str, Any]) -> bool:
|
|
81
|
+
"""
|
|
82
|
+
Check if the provided data is different from the stored state for the given external ID.
|
|
83
|
+
|
|
84
|
+
This is done by comparing the hash of the provided data with the stored hash.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
external_id: The external ID for which to check the state.
|
|
88
|
+
data: The data to hash and compare against the stored state.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
True if the data has changed (i.e., the hash is different or not present), otherwise False.
|
|
92
|
+
"""
|
|
50
93
|
with self.lock:
|
|
51
94
|
if external_id not in self._local_state:
|
|
52
95
|
return True
|
|
@@ -54,23 +97,65 @@ class AbstractHashStateStore(_BaseStateStore, ABC):
|
|
|
54
97
|
return self._hash_row(data) != self._local_state[external_id]["digest"]
|
|
55
98
|
|
|
56
99
|
def __getitem__(self, external_id: str) -> str | None:
|
|
100
|
+
"""
|
|
101
|
+
Get the state for a given external ID as a hash digest.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
external_id: The external ID for which to retrieve the state.
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
The hash digest of the state if it exists, otherwise None.
|
|
108
|
+
"""
|
|
57
109
|
return self.get_state(external_id)
|
|
58
110
|
|
|
59
111
|
def __setitem__(self, key: str, value: dict[str, Any]) -> None:
|
|
112
|
+
"""
|
|
113
|
+
Set the state for a given external ID based on a hash of the provided data.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
key: The external ID for which to set the state.
|
|
117
|
+
value: The data to hash and store as the state.
|
|
118
|
+
"""
|
|
60
119
|
self.set_state(external_id=key, data=value)
|
|
61
120
|
|
|
62
121
|
def __contains__(self, external_id: str) -> bool:
|
|
122
|
+
"""
|
|
123
|
+
Check if the given external ID exists in the state store.
|
|
124
|
+
"""
|
|
63
125
|
return external_id in self._local_state
|
|
64
126
|
|
|
65
127
|
def __len__(self) -> int:
|
|
128
|
+
"""
|
|
129
|
+
Get the number of external IDs stored in the state store.
|
|
130
|
+
"""
|
|
66
131
|
return len(self._local_state)
|
|
67
132
|
|
|
68
133
|
def __iter__(self) -> Iterator[str]:
|
|
134
|
+
"""
|
|
135
|
+
Iterate over the external IDs stored in the state store.
|
|
136
|
+
"""
|
|
69
137
|
with self.lock:
|
|
70
138
|
yield from self._local_state
|
|
71
139
|
|
|
72
140
|
|
|
73
141
|
class RawHashStateStore(AbstractHashStateStore):
|
|
142
|
+
"""
|
|
143
|
+
A version of AbstractHashStateStore that uses CDF RAW to store and persist states.
|
|
144
|
+
|
|
145
|
+
All states are stored in a CDF RAW table, where each row is identified by an external ID.
|
|
146
|
+
|
|
147
|
+
This class is thread-safe.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
cdf_client: The CogniteClient instance to use for ingesting to/reading from RAW.
|
|
151
|
+
database: The name of the CDF RAW database.
|
|
152
|
+
table: The name of the CDF RAW table.
|
|
153
|
+
save_interval: If set, the state store will periodically synchronize with CDF RAW.
|
|
154
|
+
trigger_log_level: The logging level to use for synchronization triggers.
|
|
155
|
+
thread_name: Name of the thread used for synchronization.
|
|
156
|
+
cancellation_token: A CancellationToken to control the lifecycle of the state store.
|
|
157
|
+
"""
|
|
158
|
+
|
|
74
159
|
def __init__(
|
|
75
160
|
self,
|
|
76
161
|
cdf_client: CogniteClient,
|
|
@@ -92,6 +177,10 @@ class RawHashStateStore(AbstractHashStateStore):
|
|
|
92
177
|
self.table = table
|
|
93
178
|
|
|
94
179
|
def synchronize(self) -> None:
|
|
180
|
+
"""
|
|
181
|
+
Upload local state store to CDF.
|
|
182
|
+
"""
|
|
183
|
+
|
|
95
184
|
@retry(
|
|
96
185
|
exceptions=cognite_exceptions(),
|
|
97
186
|
cancellation_token=self.cancellation_token,
|
|
@@ -101,9 +190,6 @@ class RawHashStateStore(AbstractHashStateStore):
|
|
|
101
190
|
backoff=RETRY_BACKOFF_FACTOR,
|
|
102
191
|
)
|
|
103
192
|
def impl() -> None:
|
|
104
|
-
"""
|
|
105
|
-
Upload local state store to CDF
|
|
106
|
-
"""
|
|
107
193
|
with self.lock:
|
|
108
194
|
self._cdf_client.raw.rows.insert(
|
|
109
195
|
db_name=self.database,
|
|
@@ -115,6 +201,16 @@ class RawHashStateStore(AbstractHashStateStore):
|
|
|
115
201
|
impl()
|
|
116
202
|
|
|
117
203
|
def initialize(self, force: bool = False) -> None:
|
|
204
|
+
"""
|
|
205
|
+
Initialize the state store by loading all known states from CDF RAW.
|
|
206
|
+
|
|
207
|
+
Unless ``force`` is set to True, this will not re-initialize the state store if it has already been initialized.
|
|
208
|
+
Subsequent calls to this method will be noop unless ``force`` is set to True.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
force: Enable re-initialization, ie overwrite when called multiple times
|
|
212
|
+
"""
|
|
213
|
+
|
|
118
214
|
@retry(
|
|
119
215
|
exceptions=cognite_exceptions(),
|
|
120
216
|
cancellation_token=self.cancellation_token,
|
|
@@ -146,7 +242,7 @@ class RawHashStateStore(AbstractHashStateStore):
|
|
|
146
242
|
self._local_state.clear()
|
|
147
243
|
for row in rows:
|
|
148
244
|
if row.key is None or row.columns is None:
|
|
149
|
-
self.logger.warning(f"None encountered in row: {
|
|
245
|
+
self.logger.warning(f"None encountered in row: {row!s}")
|
|
150
246
|
# should never happen, but type from sdk is optional
|
|
151
247
|
continue
|
|
152
248
|
state = row.columns.get("digest")
|
|
@@ -159,7 +255,7 @@ class RawHashStateStore(AbstractHashStateStore):
|
|
|
159
255
|
|
|
160
256
|
def __enter__(self) -> "RawHashStateStore":
|
|
161
257
|
"""
|
|
162
|
-
Wraps around start method, for use as context manager
|
|
258
|
+
Wraps around start method, for use as context manager.
|
|
163
259
|
|
|
164
260
|
Returns:
|
|
165
261
|
self
|
|
@@ -174,7 +270,7 @@ class RawHashStateStore(AbstractHashStateStore):
|
|
|
174
270
|
exc_tb: TracebackType | None,
|
|
175
271
|
) -> None:
|
|
176
272
|
"""
|
|
177
|
-
Wraps around stop method, for use as context manager
|
|
273
|
+
Wraps around stop method, for use as context manager.
|
|
178
274
|
|
|
179
275
|
Args:
|
|
180
276
|
exc_type: Exception type
|
|
@@ -185,6 +281,22 @@ class RawHashStateStore(AbstractHashStateStore):
|
|
|
185
281
|
|
|
186
282
|
|
|
187
283
|
class LocalHashStateStore(AbstractHashStateStore):
|
|
284
|
+
"""
|
|
285
|
+
A version of AbstractHashStateStore that uses a local JSON file to store and persist states.
|
|
286
|
+
|
|
287
|
+
All states are stored in a JSON file, where each key is an external ID and the value is a dictionary containing
|
|
288
|
+
the hash digest of the data.
|
|
289
|
+
|
|
290
|
+
This class is thread-safe.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
file_path: The path to the JSON file where states will be stored.
|
|
294
|
+
save_interval: If set, the state store will periodically synchronize with the JSON file.
|
|
295
|
+
trigger_log_level: The logging level to use for synchronization triggers.
|
|
296
|
+
thread_name: Name of the thread used for synchronization.
|
|
297
|
+
cancellation_token: A CancellationToken to control the lifecycle of the state store.
|
|
298
|
+
"""
|
|
299
|
+
|
|
188
300
|
def __init__(
|
|
189
301
|
self,
|
|
190
302
|
file_path: str,
|
|
@@ -204,10 +316,13 @@ class LocalHashStateStore(AbstractHashStateStore):
|
|
|
204
316
|
|
|
205
317
|
def initialize(self, force: bool = False) -> None:
|
|
206
318
|
"""
|
|
207
|
-
Load states from specified JSON file
|
|
319
|
+
Load states from specified JSON file.
|
|
320
|
+
|
|
321
|
+
Unless ``force`` is set to True, this will not re-initialize the state store if it has already been initialized.
|
|
322
|
+
Subsequent calls to this method will be noop unless ``force`` is set to True.
|
|
208
323
|
|
|
209
324
|
Args:
|
|
210
|
-
force: Enable re-initialization,
|
|
325
|
+
force: Enable re-initialization, i.e. overwrite when called multiple times
|
|
211
326
|
"""
|
|
212
327
|
if self._initialized and not force:
|
|
213
328
|
return
|
|
@@ -219,21 +334,20 @@ class LocalHashStateStore(AbstractHashStateStore):
|
|
|
219
334
|
except FileNotFoundError:
|
|
220
335
|
pass
|
|
221
336
|
except json.decoder.JSONDecodeError as e:
|
|
222
|
-
raise ValueError(f"Invalid JSON in state store file: {
|
|
337
|
+
raise ValueError(f"Invalid JSON in state store file: {e!s}") from e
|
|
223
338
|
|
|
224
339
|
self._initialized = True
|
|
225
340
|
|
|
226
341
|
def synchronize(self) -> None:
|
|
227
342
|
"""
|
|
228
|
-
Save states to specified JSON file
|
|
343
|
+
Save states to specified JSON file.
|
|
229
344
|
"""
|
|
230
|
-
with self.lock:
|
|
231
|
-
|
|
232
|
-
json.dump(self._local_state, f, cls=_DecimalEncoder)
|
|
345
|
+
with self.lock, open(self._file_path, "w") as f:
|
|
346
|
+
json.dump(self._local_state, f, cls=_DecimalEncoder)
|
|
233
347
|
|
|
234
348
|
def __enter__(self) -> "LocalHashStateStore":
|
|
235
349
|
"""
|
|
236
|
-
Wraps around start method, for use as context manager
|
|
350
|
+
Wraps around start method, for use as context manager.
|
|
237
351
|
|
|
238
352
|
Returns:
|
|
239
353
|
self
|
|
@@ -248,7 +362,7 @@ class LocalHashStateStore(AbstractHashStateStore):
|
|
|
248
362
|
exc_tb: TracebackType | None,
|
|
249
363
|
) -> None:
|
|
250
364
|
"""
|
|
251
|
-
Wraps around stop method, for use as context manager
|
|
365
|
+
Wraps around stop method, for use as context manager.
|
|
252
366
|
|
|
253
367
|
Args:
|
|
254
368
|
exc_type: Exception type
|
|
@@ -11,78 +11,23 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
15
14
|
"""
|
|
16
|
-
|
|
17
|
-
incremental load and speeding up startup times.
|
|
18
|
-
|
|
19
|
-
At the beginning of a run the extractor typically calls the ``initialize`` method, which loads the states from the
|
|
20
|
-
remote store (which can either be a local JSON file or a table in CDF RAW), and during and/or at the end of a run, the
|
|
21
|
-
``synchronize`` method is called, which saves the current states to the remote store.
|
|
22
|
-
|
|
23
|
-
You can choose the back-end for your state store with which class you're instantiating:
|
|
24
|
-
|
|
25
|
-
.. code-block:: python
|
|
26
|
-
|
|
27
|
-
# A state store using a JSON file as remote storage:
|
|
28
|
-
states = LocalStateStore("state.json")
|
|
29
|
-
states.initialize()
|
|
30
|
-
|
|
31
|
-
# A state store using a RAW table as remote storage:
|
|
32
|
-
states = RawStateStore(
|
|
33
|
-
cdf_client = CogniteClient(),
|
|
34
|
-
database = "extractor_states",
|
|
35
|
-
table = "my_extractor_deployment"
|
|
36
|
-
)
|
|
37
|
-
states.initialize()
|
|
38
|
-
|
|
39
|
-
You can now use this state store to get states:
|
|
40
|
-
|
|
41
|
-
.. code-block:: python
|
|
42
|
-
|
|
43
|
-
low, high = states.get_state(external_id = "my-id")
|
|
44
|
-
|
|
45
|
-
You can set states:
|
|
46
|
-
|
|
47
|
-
.. code-block:: python
|
|
48
|
-
|
|
49
|
-
states.set_state(external_id = "another-id", high=100)
|
|
50
|
-
|
|
51
|
-
and similar for ``low``. The ``set_state(...)`` method will always overwrite the current state. Some times you might
|
|
52
|
-
want to only set state *if larger* than the previous state, in that case consider ``expand_state(...)``:
|
|
53
|
-
|
|
54
|
-
.. code-block:: python
|
|
15
|
+
State store implementation that uses watermarks to track changes.
|
|
55
16
|
|
|
56
|
-
|
|
57
|
-
|
|
17
|
+
Watermarks are either low and high values, or just high values, that represent the known range of data that has been
|
|
18
|
+
processed for a given external ID. This allows for incremental processing of data, where only new or changed data
|
|
19
|
+
is processed in subsequent runs.
|
|
58
20
|
|
|
59
|
-
|
|
60
|
-
|
|
21
|
+
For example, if a time series has a low watermark of 100 and a high watermark of 200, the extractor can start processing
|
|
22
|
+
new data from 201 onwards when starting up, and can begin backfilling historical data from 100 and backwards.
|
|
61
23
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
.. code-block:: python
|
|
65
|
-
|
|
66
|
-
states.synchronize()
|
|
67
|
-
|
|
68
|
-
You can set a state store to automatically update on upload triggers from an upload queue by using the
|
|
69
|
-
``post_upload_function`` in the upload queue:
|
|
70
|
-
|
|
71
|
-
.. code-block:: python
|
|
72
|
-
|
|
73
|
-
states = LocalStateStore("state.json")
|
|
74
|
-
states.initialize()
|
|
75
|
-
|
|
76
|
-
uploader = TimeSeriesUploadQueue(
|
|
77
|
-
cdf_client = CogniteClient(),
|
|
78
|
-
max_upload_interval = 10
|
|
79
|
-
post_upload_function = states.post_upload_handler()
|
|
80
|
-
)
|
|
81
|
-
|
|
82
|
-
# The state store is now updated automatically!
|
|
83
|
-
|
|
84
|
-
states.synchronize()
|
|
24
|
+
Or if a file has a high watermark of 1000, and the extractor receives a new file with a high watermark of 1500, the
|
|
25
|
+
extractor will know that this the file has indeed changed.
|
|
85
26
|
|
|
27
|
+
This module provides the following state store implementations:
|
|
28
|
+
- `RawStateStore`: A state store that uses a CDF RAW table to store states.
|
|
29
|
+
- `LocalStateStore`: A state store that uses a local JSON file to store states.
|
|
30
|
+
- `NoStateStore`: A state store that does not persist states between runs, but keeps the state in memory only.
|
|
86
31
|
"""
|
|
87
32
|
|
|
88
33
|
import json
|
|
@@ -105,6 +50,8 @@ class AbstractStateStore(_BaseStateStore, ABC):
|
|
|
105
50
|
"""
|
|
106
51
|
Base class for a state store.
|
|
107
52
|
|
|
53
|
+
This class is thread-safe.
|
|
54
|
+
|
|
108
55
|
Args:
|
|
109
56
|
save_interval: Automatically trigger synchronize each m seconds when run as a thread (use start/stop
|
|
110
57
|
methods).
|
|
@@ -132,7 +79,7 @@ class AbstractStateStore(_BaseStateStore, ABC):
|
|
|
132
79
|
|
|
133
80
|
def get_state(self, external_id: str | list[str]) -> tuple[Any, Any] | list[tuple[Any, Any]]:
|
|
134
81
|
"""
|
|
135
|
-
Get state(s) for external ID(s)
|
|
82
|
+
Get state(s) for external ID(s).
|
|
136
83
|
|
|
137
84
|
Args:
|
|
138
85
|
external_id: An external ID or list of external IDs to get states for
|
|
@@ -157,6 +104,9 @@ class AbstractStateStore(_BaseStateStore, ABC):
|
|
|
157
104
|
"""
|
|
158
105
|
Set/update state of a singe external ID.
|
|
159
106
|
|
|
107
|
+
Consider using `expand_state` instead, since this method will overwrite the current state no matter if it is
|
|
108
|
+
actually outside the current state.
|
|
109
|
+
|
|
160
110
|
Args:
|
|
161
111
|
external_id: External ID of e.g. time series to store state of
|
|
162
112
|
low: Low watermark
|
|
@@ -169,8 +119,10 @@ class AbstractStateStore(_BaseStateStore, ABC):
|
|
|
169
119
|
|
|
170
120
|
def expand_state(self, external_id: str, low: Any | None = None, high: Any | None = None) -> None:
|
|
171
121
|
"""
|
|
172
|
-
|
|
173
|
-
|
|
122
|
+
Only set/update state if the proposed state is outside the stored state.
|
|
123
|
+
|
|
124
|
+
Only updates the low watermark if the proposed low is lower than the stored low, and only updates the high
|
|
125
|
+
watermark if the proposed high is higher than the stored high.
|
|
174
126
|
|
|
175
127
|
Args:
|
|
176
128
|
external_id: External ID of e.g. time series to store state of
|
|
@@ -195,7 +147,9 @@ class AbstractStateStore(_BaseStateStore, ABC):
|
|
|
195
147
|
|
|
196
148
|
def post_upload_handler(self) -> Callable[[list[dict[str, str | DataPointList]]], None]:
|
|
197
149
|
"""
|
|
198
|
-
Get a
|
|
150
|
+
Get a callback function to handle post-upload events.
|
|
151
|
+
|
|
152
|
+
This callable is suitable for passing to a time series upload queue as ``post_upload_function``, that will
|
|
199
153
|
automatically update the states in this state store when that upload queue is uploading.
|
|
200
154
|
|
|
201
155
|
Returns:
|
|
@@ -234,24 +188,38 @@ class AbstractStateStore(_BaseStateStore, ABC):
|
|
|
234
188
|
|
|
235
189
|
if high is not None and new_state > high:
|
|
236
190
|
return True
|
|
237
|
-
|
|
238
|
-
return True
|
|
239
|
-
|
|
240
|
-
return False
|
|
191
|
+
return bool(low is not None and new_state < low)
|
|
241
192
|
|
|
242
193
|
def __getitem__(self, external_id: str) -> tuple[Any, Any]:
|
|
194
|
+
"""
|
|
195
|
+
Get state for a single external ID.
|
|
196
|
+
"""
|
|
243
197
|
return self.get_state(external_id) # type: ignore # will not be list if input is single str
|
|
244
198
|
|
|
245
199
|
def __setitem__(self, key: str, value: tuple[Any, Any]) -> None:
|
|
200
|
+
"""
|
|
201
|
+
Set state for a single external ID.
|
|
202
|
+
|
|
203
|
+
This will always overwrite the current state, so use with care.
|
|
204
|
+
"""
|
|
246
205
|
self.set_state(external_id=key, low=value[0], high=value[1])
|
|
247
206
|
|
|
248
207
|
def __contains__(self, external_id: str) -> bool:
|
|
208
|
+
"""
|
|
209
|
+
Check if an external ID is in the state store.
|
|
210
|
+
"""
|
|
249
211
|
return external_id in self._local_state
|
|
250
212
|
|
|
251
213
|
def __len__(self) -> int:
|
|
214
|
+
"""
|
|
215
|
+
Get the number of external IDs in the state store.
|
|
216
|
+
"""
|
|
252
217
|
return len(self._local_state)
|
|
253
218
|
|
|
254
219
|
def __iter__(self) -> Iterator[str]:
|
|
220
|
+
"""
|
|
221
|
+
Iterate over external IDs in the state store.
|
|
222
|
+
"""
|
|
255
223
|
yield from self._local_state
|
|
256
224
|
|
|
257
225
|
|
|
@@ -259,6 +227,8 @@ class RawStateStore(AbstractStateStore):
|
|
|
259
227
|
"""
|
|
260
228
|
An extractor state store based on CDF RAW.
|
|
261
229
|
|
|
230
|
+
This class is thread-safe.
|
|
231
|
+
|
|
262
232
|
Args:
|
|
263
233
|
cdf_client: Cognite client to use
|
|
264
234
|
database: Name of CDF database
|
|
@@ -312,6 +282,16 @@ class RawStateStore(AbstractStateStore):
|
|
|
312
282
|
impl()
|
|
313
283
|
|
|
314
284
|
def initialize(self, force: bool = False) -> None:
|
|
285
|
+
"""
|
|
286
|
+
Initialize the state store by loading all known states from CDF RAW.
|
|
287
|
+
|
|
288
|
+
Unless ``force`` is set to True, this will not re-initialize the state store if it has already been initialized.
|
|
289
|
+
Subsequent calls to this method will be noop unless ``force`` is set to True.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
force: Enable re-initialization, ie overwrite when called multiple times
|
|
293
|
+
"""
|
|
294
|
+
|
|
315
295
|
@retry(
|
|
316
296
|
exceptions=cognite_exceptions(),
|
|
317
297
|
cancellation_token=self.cancellation_token,
|
|
@@ -336,7 +316,7 @@ class RawStateStore(AbstractStateStore):
|
|
|
336
316
|
self._local_state.clear()
|
|
337
317
|
for row in rows:
|
|
338
318
|
if row.key is None or row.columns is None:
|
|
339
|
-
self.logger.warning(f"None encountered in row: {
|
|
319
|
+
self.logger.warning(f"None encountered in row: {row!s}")
|
|
340
320
|
# should never happen, but type from sdk is optional
|
|
341
321
|
continue
|
|
342
322
|
self._local_state[row.key] = row.columns
|
|
@@ -346,6 +326,10 @@ class RawStateStore(AbstractStateStore):
|
|
|
346
326
|
impl()
|
|
347
327
|
|
|
348
328
|
def synchronize(self) -> None:
|
|
329
|
+
"""
|
|
330
|
+
Upload the contents of the state store to CDF RAW.
|
|
331
|
+
"""
|
|
332
|
+
|
|
349
333
|
@retry(
|
|
350
334
|
exceptions=cognite_exceptions(),
|
|
351
335
|
cancellation_token=self.cancellation_token,
|
|
@@ -356,22 +340,20 @@ class RawStateStore(AbstractStateStore):
|
|
|
356
340
|
)
|
|
357
341
|
def impl() -> None:
|
|
358
342
|
"""
|
|
359
|
-
Upload local state store to CDF
|
|
343
|
+
Upload local state store to CDF.
|
|
360
344
|
"""
|
|
361
345
|
with self.lock:
|
|
362
346
|
self._cdf_client.raw.rows.insert(db_name=self.database, table_name=self.table, row=self._local_state)
|
|
363
347
|
# Create a copy of deleted to facilitate testing (mock library stores list, and as it changes, the
|
|
364
348
|
# assertions fail)
|
|
365
|
-
self._cdf_client.raw.rows.delete(
|
|
366
|
-
db_name=self.database, table_name=self.table, key=[k for k in self._deleted]
|
|
367
|
-
)
|
|
349
|
+
self._cdf_client.raw.rows.delete(db_name=self.database, table_name=self.table, key=list(self._deleted))
|
|
368
350
|
self._deleted.clear()
|
|
369
351
|
|
|
370
352
|
impl()
|
|
371
353
|
|
|
372
354
|
def __enter__(self) -> "RawStateStore":
|
|
373
355
|
"""
|
|
374
|
-
Wraps around start method, for use as context manager
|
|
356
|
+
Wraps around start method, for use as context manager.
|
|
375
357
|
|
|
376
358
|
Returns:
|
|
377
359
|
self
|
|
@@ -383,7 +365,7 @@ class RawStateStore(AbstractStateStore):
|
|
|
383
365
|
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
|
|
384
366
|
) -> None:
|
|
385
367
|
"""
|
|
386
|
-
Wraps around stop method, for use as context manager
|
|
368
|
+
Wraps around stop method, for use as context manager.
|
|
387
369
|
|
|
388
370
|
Args:
|
|
389
371
|
exc_type: Exception type
|
|
@@ -420,7 +402,7 @@ class LocalStateStore(AbstractStateStore):
|
|
|
420
402
|
|
|
421
403
|
def initialize(self, force: bool = False) -> None:
|
|
422
404
|
"""
|
|
423
|
-
Load states from specified JSON file
|
|
405
|
+
Load states from specified JSON file.
|
|
424
406
|
|
|
425
407
|
Args:
|
|
426
408
|
force: Enable re-initialization, ie overwrite when called multiple times
|
|
@@ -435,13 +417,13 @@ class LocalStateStore(AbstractStateStore):
|
|
|
435
417
|
except FileNotFoundError:
|
|
436
418
|
pass
|
|
437
419
|
except json.decoder.JSONDecodeError as e:
|
|
438
|
-
raise ValueError(f"Invalid JSON in state store file: {
|
|
420
|
+
raise ValueError(f"Invalid JSON in state store file: {e!s}") from e
|
|
439
421
|
|
|
440
422
|
self._initialized = True
|
|
441
423
|
|
|
442
424
|
def synchronize(self) -> None:
|
|
443
425
|
"""
|
|
444
|
-
Save states to specified JSON file
|
|
426
|
+
Save states to specified JSON file.
|
|
445
427
|
"""
|
|
446
428
|
with self.lock:
|
|
447
429
|
with open(self._file_path, "w") as f:
|
|
@@ -450,7 +432,7 @@ class LocalStateStore(AbstractStateStore):
|
|
|
450
432
|
|
|
451
433
|
def __enter__(self) -> "LocalStateStore":
|
|
452
434
|
"""
|
|
453
|
-
Wraps around start method, for use as context manager
|
|
435
|
+
Wraps around start method, for use as context manager.
|
|
454
436
|
|
|
455
437
|
Returns:
|
|
456
438
|
self
|
|
@@ -465,7 +447,7 @@ class LocalStateStore(AbstractStateStore):
|
|
|
465
447
|
exc_tb: TracebackType | None,
|
|
466
448
|
) -> None:
|
|
467
449
|
"""
|
|
468
|
-
Wraps around stop method, for use as context manager
|
|
450
|
+
Wraps around stop method, for use as context manager.
|
|
469
451
|
|
|
470
452
|
Args:
|
|
471
453
|
exc_type: Exception type
|
|
@@ -478,13 +460,21 @@ class LocalStateStore(AbstractStateStore):
|
|
|
478
460
|
class NoStateStore(AbstractStateStore):
|
|
479
461
|
"""
|
|
480
462
|
A state store that only keeps states in memory and never stores or initializes from external sources.
|
|
463
|
+
|
|
464
|
+
This class is thread-safe.
|
|
481
465
|
"""
|
|
482
466
|
|
|
483
467
|
def __init__(self) -> None:
|
|
484
468
|
super().__init__()
|
|
485
469
|
|
|
486
470
|
def initialize(self, force: bool = False) -> None:
|
|
471
|
+
"""
|
|
472
|
+
Does nothing.
|
|
473
|
+
"""
|
|
487
474
|
pass
|
|
488
475
|
|
|
489
476
|
def synchronize(self) -> None:
|
|
477
|
+
"""
|
|
478
|
+
Does nothing.
|
|
479
|
+
"""
|
|
490
480
|
pass
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module that provides additional threading utilities.
|
|
3
|
+
"""
|
|
4
|
+
|
|
1
5
|
import logging
|
|
2
6
|
import signal
|
|
3
7
|
from threading import Condition
|
|
@@ -20,6 +24,9 @@ class CancellationToken:
|
|
|
20
24
|
self._parent: CancellationToken | None = None
|
|
21
25
|
|
|
22
26
|
def __repr__(self) -> str:
|
|
27
|
+
"""
|
|
28
|
+
Return a string representation of the CancellationToken instance.
|
|
29
|
+
"""
|
|
23
30
|
cls = self.__class__
|
|
24
31
|
status = "cancelled" if self.is_cancelled else "not cancelled"
|
|
25
32
|
return f"<{cls.__module__}.{cls.__qualname__} at {id(self):#x}: {status}>"
|
|
@@ -29,7 +36,7 @@ class CancellationToken:
|
|
|
29
36
|
"""
|
|
30
37
|
``True`` if the token has been cancelled, or if some parent token has been cancelled.
|
|
31
38
|
"""
|
|
32
|
-
return self._is_cancelled_int or self._parent is not None and self._parent.is_cancelled
|
|
39
|
+
return self._is_cancelled_int or (self._parent is not None and self._parent.is_cancelled)
|
|
33
40
|
|
|
34
41
|
def is_set(self) -> bool:
|
|
35
42
|
"""
|
|
@@ -60,6 +67,19 @@ class CancellationToken:
|
|
|
60
67
|
self.cancel()
|
|
61
68
|
|
|
62
69
|
def wait(self, timeout: float | None = None) -> bool:
|
|
70
|
+
"""
|
|
71
|
+
Wait for the token to be cancelled, or until the timeout expires.
|
|
72
|
+
|
|
73
|
+
This can also be used as a drop-in replacement for sleep if you want to wait for a certain amount of time. A
|
|
74
|
+
call to sleep will not be interrupted by a cancellation, but a call to wait will return immediately if the token
|
|
75
|
+
is cancelled.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
timeout: The maximum time to wait in seconds. If None, wait indefinitely.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
``True`` if the token was cancelled, ``False`` if the timeout expired before cancellation.
|
|
82
|
+
"""
|
|
63
83
|
endtime = None
|
|
64
84
|
if timeout is not None:
|
|
65
85
|
endtime = time() + timeout
|
|
@@ -78,14 +98,20 @@ class CancellationToken:
|
|
|
78
98
|
return True
|
|
79
99
|
|
|
80
100
|
def create_child_token(self) -> "CancellationToken":
|
|
101
|
+
"""
|
|
102
|
+
Create a child cancellation token of this token.
|
|
103
|
+
|
|
104
|
+
The child token will be cancelled if this token is cancelled, but can also be cancelled independently.
|
|
105
|
+
"""
|
|
81
106
|
child = CancellationToken(self._cv)
|
|
82
107
|
child._parent = self
|
|
83
108
|
return child
|
|
84
109
|
|
|
85
110
|
def cancel_on_interrupt(self) -> None:
|
|
86
111
|
"""
|
|
87
|
-
Register an interrupt handler to capture SIGINT (Ctrl-C) and cancel this token
|
|
88
|
-
|
|
112
|
+
Register an interrupt handler to capture SIGINT (Ctrl-C) and cancel this token.
|
|
113
|
+
|
|
114
|
+
This will set the cancellation token instead of throwing a KeyboardInterrupt exception.
|
|
89
115
|
"""
|
|
90
116
|
|
|
91
117
|
def sigint_handler(sig_num: int, frame: Any) -> None:
|
|
@@ -98,4 +124,4 @@ class CancellationToken:
|
|
|
98
124
|
try:
|
|
99
125
|
signal.signal(signal.SIGINT, sigint_handler)
|
|
100
126
|
except ValueError as e:
|
|
101
|
-
logging.getLogger(__name__).warning(f"Could not register handler for interrupt signals: {
|
|
127
|
+
logging.getLogger(__name__).warning(f"Could not register handler for interrupt signals: {e!s}")
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
"""
|
|
2
|
-
The unstable package contains experimental functions and classes currently
|
|
3
|
-
deemed unstable. The contents of this package is subject to change without
|
|
4
|
-
notice, even in minor or patch releases.
|
|
2
|
+
The unstable package contains experimental functions and classes currently deemed unstable.
|
|
5
3
|
|
|
6
|
-
|
|
7
|
-
run a type checker such as mypy to help catch these changes.
|
|
4
|
+
The contents of this package is subject to change without notice, even in minor or patch releases. Whenever you import
|
|
5
|
+
anything from the unstable package, you should make sure to run a type checker such as mypy to help catch these changes.
|
|
6
|
+
|
|
7
|
+
Parts of ``unstable`` might be promoted to the main library at some point.
|
|
8
8
|
"""
|