cognite-extractor-utils 7.5.14__py3-none-any.whl → 7.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- cognite/extractorutils/__init__.py +1 -1
- cognite/extractorutils/_inner_util.py +1 -1
- cognite/extractorutils/base.py +120 -40
- cognite/extractorutils/configtools/__init__.py +4 -5
- cognite/extractorutils/configtools/_util.py +3 -2
- cognite/extractorutils/configtools/elements.py +206 -33
- cognite/extractorutils/configtools/loaders.py +68 -16
- cognite/extractorutils/configtools/validators.py +5 -1
- cognite/extractorutils/exceptions.py +11 -2
- cognite/extractorutils/metrics.py +17 -12
- cognite/extractorutils/statestore/__init__.py +77 -3
- cognite/extractorutils/statestore/_base.py +7 -3
- cognite/extractorutils/statestore/hashing.py +129 -15
- cognite/extractorutils/statestore/watermark.py +77 -87
- cognite/extractorutils/threading.py +30 -4
- cognite/extractorutils/unstable/__init__.py +5 -5
- cognite/extractorutils/unstable/configuration/__init__.py +3 -0
- cognite/extractorutils/unstable/configuration/exceptions.py +13 -2
- cognite/extractorutils/unstable/configuration/loaders.py +78 -13
- cognite/extractorutils/unstable/configuration/models.py +121 -7
- cognite/extractorutils/unstable/core/__init__.py +5 -0
- cognite/extractorutils/unstable/core/_dto.py +5 -3
- cognite/extractorutils/unstable/core/base.py +113 -4
- cognite/extractorutils/unstable/core/errors.py +41 -0
- cognite/extractorutils/unstable/core/logger.py +149 -0
- cognite/extractorutils/unstable/core/restart_policy.py +16 -2
- cognite/extractorutils/unstable/core/runtime.py +44 -6
- cognite/extractorutils/unstable/core/tasks.py +53 -1
- cognite/extractorutils/unstable/scheduling/__init__.py +13 -0
- cognite/extractorutils/unstable/scheduling/_scheduler.py +1 -1
- cognite/extractorutils/uploader/__init__.py +9 -5
- cognite/extractorutils/uploader/_base.py +4 -5
- cognite/extractorutils/uploader/assets.py +13 -8
- cognite/extractorutils/uploader/data_modeling.py +37 -2
- cognite/extractorutils/uploader/events.py +14 -9
- cognite/extractorutils/uploader/files.py +80 -21
- cognite/extractorutils/uploader/raw.py +12 -7
- cognite/extractorutils/uploader/time_series.py +370 -94
- cognite/extractorutils/uploader/upload_failure_handler.py +35 -2
- cognite/extractorutils/uploader_extractor.py +47 -9
- cognite/extractorutils/uploader_types.py +26 -1
- cognite/extractorutils/util.py +76 -23
- {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.7.0.dist-info}/METADATA +1 -1
- cognite_extractor_utils-7.7.0.dist-info/RECORD +50 -0
- cognite_extractor_utils-7.5.14.dist-info/RECORD +0 -50
- {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.7.0.dist-info}/WHEEL +0 -0
- {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.7.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module containing functions and classes for loading configuration files.
|
|
3
|
+
"""
|
|
1
4
|
# Copyright 2023 Cognite AS
|
|
2
5
|
#
|
|
3
6
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -53,19 +56,25 @@ CustomConfigClass = TypeVar("CustomConfigClass", bound=BaseConfig)
|
|
|
53
56
|
|
|
54
57
|
|
|
55
58
|
class KeyVaultAuthenticationMethod(Enum):
|
|
59
|
+
"""
|
|
60
|
+
Enum representing the authentication methods for Azure KeyVault.
|
|
61
|
+
"""
|
|
62
|
+
|
|
56
63
|
DEFAULT = "default"
|
|
57
64
|
CLIENTSECRET = "client-secret"
|
|
58
65
|
|
|
59
66
|
|
|
60
67
|
class KeyVaultLoader:
|
|
61
68
|
"""
|
|
62
|
-
Class responsible for configuring keyvault for clients using Azure
|
|
69
|
+
Class responsible for configuring keyvault for clients using Azure.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
config: A dictionary containing the configuration for the keyvault.
|
|
63
73
|
"""
|
|
64
74
|
|
|
65
75
|
def __init__(self, config: dict | None):
|
|
66
76
|
self.config = config
|
|
67
77
|
|
|
68
|
-
self.credentials: TokenCredential | None = None
|
|
69
78
|
self.client: SecretClient | None = None
|
|
70
79
|
|
|
71
80
|
def _init_client(self) -> None:
|
|
@@ -89,9 +98,10 @@ class KeyVaultLoader:
|
|
|
89
98
|
|
|
90
99
|
vault_url = f"https://{keyvault_name}.vault.azure.net"
|
|
91
100
|
|
|
101
|
+
credentials: TokenCredential
|
|
92
102
|
if self.config["authentication-method"] == KeyVaultAuthenticationMethod.DEFAULT.value:
|
|
93
103
|
_logger.info("Using Azure DefaultCredentials to access KeyVault")
|
|
94
|
-
|
|
104
|
+
credentials = DefaultAzureCredential()
|
|
95
105
|
|
|
96
106
|
elif self.config["authentication-method"] == KeyVaultAuthenticationMethod.CLIENTSECRET.value:
|
|
97
107
|
auth_parameters = ("client-id", "tenant-id", "secret")
|
|
@@ -104,11 +114,11 @@ class KeyVaultLoader:
|
|
|
104
114
|
_logger.info(f"Local environment file not found at {Path.cwd() / '.env'}")
|
|
105
115
|
|
|
106
116
|
if all(param in self.config for param in auth_parameters):
|
|
107
|
-
tenant_id = os.path.expandvars(self.config
|
|
108
|
-
client_id = os.path.expandvars(self.config
|
|
109
|
-
secret = os.path.expandvars(self.config
|
|
117
|
+
tenant_id = os.path.expandvars(self.config["tenant-id"])
|
|
118
|
+
client_id = os.path.expandvars(self.config["client-id"])
|
|
119
|
+
secret = os.path.expandvars(self.config["secret"])
|
|
110
120
|
|
|
111
|
-
|
|
121
|
+
credentials = ClientSecretCredential(
|
|
112
122
|
tenant_id=tenant_id,
|
|
113
123
|
client_id=client_id,
|
|
114
124
|
client_secret=secret,
|
|
@@ -122,9 +132,12 @@ class KeyVaultLoader:
|
|
|
122
132
|
"Invalid KeyVault authentication method. Possible values : default or client-secret"
|
|
123
133
|
)
|
|
124
134
|
|
|
125
|
-
self.client = SecretClient(vault_url=vault_url, credential=
|
|
135
|
+
self.client = SecretClient(vault_url=vault_url, credential=credentials)
|
|
126
136
|
|
|
127
137
|
def __call__(self, _: yaml.SafeLoader, node: yaml.Node) -> str:
|
|
138
|
+
"""
|
|
139
|
+
Method to be called when the !keyvault tag is encountered in the YAML file.
|
|
140
|
+
"""
|
|
128
141
|
self._init_client()
|
|
129
142
|
try:
|
|
130
143
|
return self.client.get_secret(node.value).value # type: ignore # _init_client guarantees not None
|
|
@@ -137,7 +150,14 @@ class _EnvLoader(yaml.SafeLoader):
|
|
|
137
150
|
|
|
138
151
|
|
|
139
152
|
class SafeLoaderIgnoreUnknown(yaml.SafeLoader):
|
|
153
|
+
"""
|
|
154
|
+
Variant of PyYAML's SafeLoader that ignores unknown tags.
|
|
155
|
+
"""
|
|
156
|
+
|
|
140
157
|
def ignore_unknown(self, node: yaml.Node) -> None:
|
|
158
|
+
"""
|
|
159
|
+
Constructor for unknown tags that does nothing.
|
|
160
|
+
"""
|
|
141
161
|
return None
|
|
142
162
|
|
|
143
163
|
|
|
@@ -185,7 +205,9 @@ def _load_yaml_dict_raw(
|
|
|
185
205
|
config_dict = yaml.load(source, Loader=loader) # noqa: S506
|
|
186
206
|
except ScannerError as e:
|
|
187
207
|
location = e.problem_mark or e.context_mark
|
|
188
|
-
formatted_location =
|
|
208
|
+
formatted_location = (
|
|
209
|
+
f" at line {location.line + 1}, column {location.column + 1}" if location is not None else ""
|
|
210
|
+
)
|
|
189
211
|
cause = e.problem or e.context
|
|
190
212
|
raise InvalidConfigError(f"Invalid YAML{formatted_location}: {cause or ''}") from e
|
|
191
213
|
|
|
@@ -241,10 +263,7 @@ def _load_yaml(
|
|
|
241
263
|
) from e
|
|
242
264
|
|
|
243
265
|
except (dacite.WrongTypeError, dacite.MissingValueError, dacite.UnionMatchError) as e:
|
|
244
|
-
if e.field_path
|
|
245
|
-
path = e.field_path.replace("_", "-") if case_style == "hyphen" else e.field_path
|
|
246
|
-
else:
|
|
247
|
-
path = None
|
|
266
|
+
path = (e.field_path.replace("_", "-") if case_style == "hyphen" else e.field_path) if e.field_path else None
|
|
248
267
|
|
|
249
268
|
def name(type_: type) -> str:
|
|
250
269
|
return type_.__name__ if hasattr(type_, "__name__") else str(type_)
|
|
@@ -262,7 +281,7 @@ def _load_yaml(
|
|
|
262
281
|
raise InvalidConfigError(f'Missing mandatory field "{path}"') from e
|
|
263
282
|
|
|
264
283
|
except dacite.ForwardReferenceError as e:
|
|
265
|
-
raise ValueError(f"Invalid config class: {
|
|
284
|
+
raise ValueError(f"Invalid config class: {e!s}") from e
|
|
266
285
|
|
|
267
286
|
config._file_hash = sha256(json.dumps(config_dict).encode("utf-8")).hexdigest()
|
|
268
287
|
|
|
@@ -309,7 +328,7 @@ def load_yaml_dict(
|
|
|
309
328
|
keyvault_loader: KeyVaultLoader | None = None,
|
|
310
329
|
) -> dict[str, Any]:
|
|
311
330
|
"""
|
|
312
|
-
Read a YAML file and return a dictionary from its contents
|
|
331
|
+
Read a YAML file and return a dictionary from its contents.
|
|
313
332
|
|
|
314
333
|
Args:
|
|
315
334
|
source: Input stream (as returned by open(...)) or string containing YAML.
|
|
@@ -331,7 +350,7 @@ def load_yaml_dict(
|
|
|
331
350
|
|
|
332
351
|
def compile_patterns(ignore_patterns: list[str | IgnorePattern]) -> list[re.Pattern[str]]:
|
|
333
352
|
"""
|
|
334
|
-
|
|
353
|
+
List of patterns to compile.
|
|
335
354
|
|
|
336
355
|
Args:
|
|
337
356
|
ignore_patterns: A list of strings or IgnorePattern to be compiled.
|
|
@@ -349,6 +368,12 @@ def compile_patterns(ignore_patterns: list[str | IgnorePattern]) -> list[re.Patt
|
|
|
349
368
|
|
|
350
369
|
|
|
351
370
|
class ConfigResolver(Generic[CustomConfigClass]):
|
|
371
|
+
"""
|
|
372
|
+
Class for resolving configuration files, either from a local file or a remote CDF extraction pipeline.
|
|
373
|
+
|
|
374
|
+
Automatically reloads the configuration file if it has changed
|
|
375
|
+
"""
|
|
376
|
+
|
|
352
377
|
def __init__(self, config_path: str, config_type: type[CustomConfigClass]):
|
|
353
378
|
self.config_path = config_path
|
|
354
379
|
self.config_type = config_type
|
|
@@ -364,6 +389,9 @@ class ConfigResolver(Generic[CustomConfigClass]):
|
|
|
364
389
|
|
|
365
390
|
@property
|
|
366
391
|
def cognite_client(self) -> CogniteClient | None:
|
|
392
|
+
"""
|
|
393
|
+
Returns a CogniteClient instance based on the configuration.
|
|
394
|
+
"""
|
|
367
395
|
if self._cognite_client is None and self._config is not None:
|
|
368
396
|
self._cognite_client = self._config.cognite.get_cognite_client("config_resolver")
|
|
369
397
|
return self._cognite_client
|
|
@@ -376,6 +404,9 @@ class ConfigResolver(Generic[CustomConfigClass]):
|
|
|
376
404
|
|
|
377
405
|
@property
|
|
378
406
|
def is_remote(self) -> bool:
|
|
407
|
+
"""
|
|
408
|
+
Returns True if the configuration is a remote CDF extraction pipeline config, False if it is a local file.
|
|
409
|
+
"""
|
|
379
410
|
raw_config_type = load_yaml_dict(self._config_text).get("type")
|
|
380
411
|
if raw_config_type is None:
|
|
381
412
|
_logger.warning("No config type specified, default to local")
|
|
@@ -385,6 +416,9 @@ class ConfigResolver(Generic[CustomConfigClass]):
|
|
|
385
416
|
|
|
386
417
|
@property
|
|
387
418
|
def has_changed(self) -> bool:
|
|
419
|
+
"""
|
|
420
|
+
Returns True if the configuration file has changed since the last accepted configuration.
|
|
421
|
+
"""
|
|
388
422
|
try:
|
|
389
423
|
self._resolve_config()
|
|
390
424
|
except Exception:
|
|
@@ -394,18 +428,36 @@ class ConfigResolver(Generic[CustomConfigClass]):
|
|
|
394
428
|
|
|
395
429
|
@property
|
|
396
430
|
def config(self) -> CustomConfigClass:
|
|
431
|
+
"""
|
|
432
|
+
Returns the current configuration object. If it has not been resolved yet, it will resolve it first.
|
|
433
|
+
"""
|
|
397
434
|
if self._config is None:
|
|
398
435
|
self._resolve_config()
|
|
399
436
|
self.accept_new_config()
|
|
400
437
|
return self._config # type: ignore
|
|
401
438
|
|
|
402
439
|
def accept_new_config(self) -> None:
|
|
440
|
+
"""
|
|
441
|
+
Accepts the new configuration, making it the current configuration.
|
|
442
|
+
"""
|
|
403
443
|
self._config = self._next_config
|
|
404
444
|
|
|
405
445
|
@classmethod
|
|
406
446
|
def from_cli(
|
|
407
447
|
cls, name: str, description: str, version: str, config_type: type[CustomConfigClass]
|
|
408
448
|
) -> "ConfigResolver":
|
|
449
|
+
"""
|
|
450
|
+
Creates a ConfigResolver instance from command line arguments.
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
name: The name of the extractor.
|
|
454
|
+
description: A description of the extractor.
|
|
455
|
+
version: The version of the extractor.
|
|
456
|
+
config_type: The type of the configuration class to be used.
|
|
457
|
+
|
|
458
|
+
Returns:
|
|
459
|
+
A ConfigResolver instance initialized with the configuration file path.
|
|
460
|
+
"""
|
|
409
461
|
argument_parser = argparse.ArgumentParser(sys.argv[0], description=description)
|
|
410
462
|
argument_parser.add_argument(
|
|
411
463
|
"config", nargs=1, type=str, help="The YAML file containing configuration for the extractor."
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module containing utility functions for validating config values.
|
|
3
|
+
"""
|
|
4
|
+
|
|
1
5
|
import logging
|
|
2
6
|
import re
|
|
3
7
|
|
|
@@ -15,7 +19,7 @@ def matches_patterns(patterns: list[str | re.Pattern[str]], string: str) -> bool
|
|
|
15
19
|
Returns:
|
|
16
20
|
boolean value indicating whether string matches any of the patterns.
|
|
17
21
|
"""
|
|
18
|
-
return any(
|
|
22
|
+
return any(matches_pattern(pattern, string) for pattern in patterns)
|
|
19
23
|
|
|
20
24
|
|
|
21
25
|
def matches_pattern(pattern: str | re.Pattern[str], string: str) -> bool:
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module defines custom exceptions for the extractorutils package.
|
|
3
|
+
"""
|
|
1
4
|
# Copyright 2020 Cognite AS
|
|
2
5
|
#
|
|
3
6
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -15,11 +18,11 @@
|
|
|
15
18
|
|
|
16
19
|
class InvalidConfigError(Exception):
|
|
17
20
|
"""
|
|
18
|
-
Exception thrown from ``load_yaml`` and ``load_yaml_dict`` if config file is invalid. This can be due to
|
|
21
|
+
Exception thrown from ``load_yaml`` and ``load_yaml_dict`` if config file is invalid. This can be due to.
|
|
19
22
|
|
|
20
23
|
* Missing fields
|
|
21
24
|
* Incompatible types
|
|
22
|
-
*
|
|
25
|
+
* Unknown fields
|
|
23
26
|
"""
|
|
24
27
|
|
|
25
28
|
def __init__(self, message: str, details: list[str] | None = None):
|
|
@@ -28,7 +31,13 @@ class InvalidConfigError(Exception):
|
|
|
28
31
|
self.details = details
|
|
29
32
|
|
|
30
33
|
def __str__(self) -> str:
|
|
34
|
+
"""
|
|
35
|
+
Returns a string representation of the error.
|
|
36
|
+
"""
|
|
31
37
|
return f"Invalid config: {self.message}"
|
|
32
38
|
|
|
33
39
|
def __repr__(self) -> str:
|
|
40
|
+
"""
|
|
41
|
+
Returns a string representation of the error.
|
|
42
|
+
"""
|
|
34
43
|
return self.__str__()
|
|
@@ -54,6 +54,7 @@ from prometheus_client.exposition import basic_auth_handler, delete_from_gateway
|
|
|
54
54
|
|
|
55
55
|
from cognite.client import CogniteClient
|
|
56
56
|
from cognite.client.data_classes import Asset, Datapoints, DatapointsArray, TimeSeries
|
|
57
|
+
from cognite.client.data_classes.data_modeling import NodeId
|
|
57
58
|
from cognite.client.exceptions import CogniteDuplicatedError
|
|
58
59
|
from cognite.extractorutils.threading import CancellationToken
|
|
59
60
|
from cognite.extractorutils.util import EitherId
|
|
@@ -84,6 +85,8 @@ def safe_get(cls: type[T], *args: Any, **kwargs: Any) -> T:
|
|
|
84
85
|
|
|
85
86
|
Args:
|
|
86
87
|
cls: Metrics class to either create or get a cached version of
|
|
88
|
+
args: Arguments passed as-is to the class constructor
|
|
89
|
+
kwargs: Keyword arguments passed as-is to the class constructor
|
|
87
90
|
|
|
88
91
|
Returns:
|
|
89
92
|
An instance of given class
|
|
@@ -98,8 +101,10 @@ def safe_get(cls: type[T], *args: Any, **kwargs: Any) -> T:
|
|
|
98
101
|
|
|
99
102
|
class BaseMetrics:
|
|
100
103
|
"""
|
|
101
|
-
Base collection of extractor metrics.
|
|
102
|
-
|
|
104
|
+
Base collection of extractor metrics.
|
|
105
|
+
|
|
106
|
+
The class also spawns a collector thread on init that regularly fetches process information and update the
|
|
107
|
+
``process_*`` gauges.
|
|
103
108
|
|
|
104
109
|
To create a set of metrics for an extractor, create a subclass of this class.
|
|
105
110
|
|
|
@@ -144,7 +149,7 @@ class BaseMetrics:
|
|
|
144
149
|
|
|
145
150
|
def _proc_collect(self) -> None:
|
|
146
151
|
"""
|
|
147
|
-
Collect values for process metrics
|
|
152
|
+
Collect values for process metrics.
|
|
148
153
|
"""
|
|
149
154
|
total_memory_available = psutil.virtual_memory().total
|
|
150
155
|
while True:
|
|
@@ -157,7 +162,7 @@ class BaseMetrics:
|
|
|
157
162
|
|
|
158
163
|
def _start_proc_collector(self) -> None:
|
|
159
164
|
"""
|
|
160
|
-
Start a thread that collects process metrics at a regular interval
|
|
165
|
+
Start a thread that collects process metrics at a regular interval.
|
|
161
166
|
"""
|
|
162
167
|
thread = threading.Thread(target=self._proc_collect, name="ProcessMetricsCollector", daemon=True)
|
|
163
168
|
thread.start()
|
|
@@ -165,8 +170,9 @@ class BaseMetrics:
|
|
|
165
170
|
|
|
166
171
|
class AbstractMetricsPusher(ABC):
|
|
167
172
|
"""
|
|
168
|
-
Base class for metric pushers.
|
|
169
|
-
|
|
173
|
+
Base class for metric pushers.
|
|
174
|
+
|
|
175
|
+
Metric pushers spawns a thread that routinely pushes metrics to a configured destination.
|
|
170
176
|
|
|
171
177
|
Contains all the logic for starting and running threads.
|
|
172
178
|
|
|
@@ -194,7 +200,7 @@ class AbstractMetricsPusher(ABC):
|
|
|
194
200
|
@abstractmethod
|
|
195
201
|
def _push_to_server(self) -> None:
|
|
196
202
|
"""
|
|
197
|
-
Push metrics to a remote server, to be
|
|
203
|
+
Push metrics to a remote server, to be overridden in subclasses.
|
|
198
204
|
"""
|
|
199
205
|
pass
|
|
200
206
|
|
|
@@ -209,7 +215,6 @@ class AbstractMetricsPusher(ABC):
|
|
|
209
215
|
def start(self) -> None:
|
|
210
216
|
"""
|
|
211
217
|
Starts a thread that pushes the default registry to the configured gateway at certain intervals.
|
|
212
|
-
|
|
213
218
|
"""
|
|
214
219
|
self.thread = threading.Thread(target=self._run, daemon=True, name=self.thread_name)
|
|
215
220
|
self.thread.start()
|
|
@@ -224,7 +229,7 @@ class AbstractMetricsPusher(ABC):
|
|
|
224
229
|
|
|
225
230
|
def __enter__(self) -> "AbstractMetricsPusher":
|
|
226
231
|
"""
|
|
227
|
-
Wraps around start method, for use as context manager
|
|
232
|
+
Wraps around start method, for use as context manager.
|
|
228
233
|
|
|
229
234
|
Returns:
|
|
230
235
|
self
|
|
@@ -236,7 +241,7 @@ class AbstractMetricsPusher(ABC):
|
|
|
236
241
|
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
|
|
237
242
|
) -> None:
|
|
238
243
|
"""
|
|
239
|
-
Wraps around stop method, for use as context manager
|
|
244
|
+
Wraps around stop method, for use as context manager.
|
|
240
245
|
|
|
241
246
|
Args:
|
|
242
247
|
exc_type: Exception type
|
|
@@ -403,11 +408,11 @@ class CognitePusher(AbstractMetricsPusher):
|
|
|
403
408
|
|
|
404
409
|
def _push_to_server(self) -> None:
|
|
405
410
|
"""
|
|
406
|
-
Create datapoints an push them to their respective time series
|
|
411
|
+
Create datapoints an push them to their respective time series.
|
|
407
412
|
"""
|
|
408
413
|
timestamp = int(arrow.get().float_timestamp * 1000)
|
|
409
414
|
|
|
410
|
-
datapoints: list[dict[str, str | int | list[Any] | Datapoints | DatapointsArray]] = []
|
|
415
|
+
datapoints: list[dict[str, str | int | list[Any] | Datapoints | DatapointsArray | NodeId]] = []
|
|
411
416
|
|
|
412
417
|
for metric in REGISTRY.collect():
|
|
413
418
|
if isinstance(metric, Metric) and metric.type in ["gauge", "counter"]:
|
|
@@ -1,12 +1,86 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module containing state stores for extractors.
|
|
3
|
+
|
|
4
|
+
The ``statestore`` module contains classes for keeping track of the extraction state of individual items, facilitating
|
|
5
|
+
incremental load and speeding up startup times.
|
|
6
|
+
|
|
7
|
+
At the beginning of a run the extractor typically calls the ``initialize`` method, which loads the states from the
|
|
8
|
+
remote store (which can either be a local JSON file or a table in CDF RAW), and during and/or at the end of a run, the
|
|
9
|
+
``synchronize`` method is called, which saves the current states to the remote store.
|
|
10
|
+
|
|
11
|
+
You can choose the back-end for your state store with which class you're instantiating:
|
|
12
|
+
|
|
13
|
+
.. code-block:: python
|
|
14
|
+
|
|
15
|
+
# A state store using a JSON file as remote storage:
|
|
16
|
+
states = LocalStateStore("state.json")
|
|
17
|
+
states.initialize()
|
|
18
|
+
|
|
19
|
+
# A state store using a RAW table as remote storage:
|
|
20
|
+
states = RawStateStore(
|
|
21
|
+
cdf_client = CogniteClient(),
|
|
22
|
+
database = "extractor_states",
|
|
23
|
+
table = "my_extractor_deployment"
|
|
24
|
+
)
|
|
25
|
+
states.initialize()
|
|
26
|
+
|
|
27
|
+
You can now use this state store to get states:
|
|
28
|
+
|
|
29
|
+
.. code-block:: python
|
|
30
|
+
|
|
31
|
+
low, high = states.get_state(external_id = "my-id")
|
|
32
|
+
|
|
33
|
+
You can set states:
|
|
34
|
+
|
|
35
|
+
.. code-block:: python
|
|
36
|
+
|
|
37
|
+
states.set_state(external_id = "another-id", high=100)
|
|
38
|
+
|
|
39
|
+
and similar for ``low``. The ``set_state(...)`` method will always overwrite the current state. Some times you might
|
|
40
|
+
want to only set state *if larger* than the previous state, in that case consider ``expand_state(...)``:
|
|
41
|
+
|
|
42
|
+
.. code-block:: python
|
|
43
|
+
|
|
44
|
+
# High watermark of another-id is already 100, nothing happens in this call:
|
|
45
|
+
states.expand_state(external_id = "another-id", high=50)
|
|
46
|
+
|
|
47
|
+
# This will set high to 150 as it is larger than the previous state
|
|
48
|
+
states.expand_state(external_id = "another-id", high=150)
|
|
49
|
+
|
|
50
|
+
To store the state to the remote store, use the ``synchronize()`` method:
|
|
51
|
+
|
|
52
|
+
.. code-block:: python
|
|
53
|
+
|
|
54
|
+
states.synchronize()
|
|
55
|
+
|
|
56
|
+
You can set a state store to automatically update on upload triggers from an upload queue by using the
|
|
57
|
+
``post_upload_function`` in the upload queue:
|
|
58
|
+
|
|
59
|
+
.. code-block:: python
|
|
60
|
+
|
|
61
|
+
states = LocalStateStore("state.json")
|
|
62
|
+
states.initialize()
|
|
63
|
+
|
|
64
|
+
uploader = TimeSeriesUploadQueue(
|
|
65
|
+
cdf_client = CogniteClient(),
|
|
66
|
+
max_upload_interval = 10
|
|
67
|
+
post_upload_function = states.post_upload_handler()
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# The state store is now updated automatically!
|
|
71
|
+
|
|
72
|
+
states.synchronize()
|
|
73
|
+
"""
|
|
74
|
+
|
|
1
75
|
from .hashing import AbstractHashStateStore, LocalHashStateStore, RawHashStateStore
|
|
2
76
|
from .watermark import AbstractStateStore, LocalStateStore, NoStateStore, RawStateStore
|
|
3
77
|
|
|
4
78
|
__all__ = [
|
|
79
|
+
"AbstractHashStateStore",
|
|
5
80
|
"AbstractStateStore",
|
|
6
|
-
"
|
|
81
|
+
"LocalHashStateStore",
|
|
7
82
|
"LocalStateStore",
|
|
8
83
|
"NoStateStore",
|
|
9
|
-
"AbstractHashStateStore",
|
|
10
84
|
"RawHashStateStore",
|
|
11
|
-
"
|
|
85
|
+
"RawStateStore",
|
|
12
86
|
]
|
|
@@ -32,7 +32,11 @@ class _BaseStateStore(ABC):
|
|
|
32
32
|
def start(self, initialize: bool = True) -> None:
|
|
33
33
|
"""
|
|
34
34
|
Start saving state periodically if save_interval is set.
|
|
35
|
+
|
|
35
36
|
This calls the synchronize method every save_interval seconds.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
initialize (bool): (Optional). If True, call initialize method before starting the thread.
|
|
36
40
|
"""
|
|
37
41
|
if initialize and not self._initialized:
|
|
38
42
|
self.initialize()
|
|
@@ -52,7 +56,7 @@ class _BaseStateStore(ABC):
|
|
|
52
56
|
|
|
53
57
|
def _run(self) -> None:
|
|
54
58
|
"""
|
|
55
|
-
Internal run method for synchronize thread
|
|
59
|
+
Internal run method for synchronize thread.
|
|
56
60
|
"""
|
|
57
61
|
self.initialize()
|
|
58
62
|
while not self.cancellation_token.wait(timeout=self.save_interval):
|
|
@@ -68,13 +72,13 @@ class _BaseStateStore(ABC):
|
|
|
68
72
|
@abstractmethod
|
|
69
73
|
def initialize(self, force: bool = False) -> None:
|
|
70
74
|
"""
|
|
71
|
-
Get states from remote store
|
|
75
|
+
Get states from remote store.
|
|
72
76
|
"""
|
|
73
77
|
pass
|
|
74
78
|
|
|
75
79
|
@abstractmethod
|
|
76
80
|
def synchronize(self) -> None:
|
|
77
81
|
"""
|
|
78
|
-
Upload states to remote store
|
|
82
|
+
Upload states to remote store.
|
|
79
83
|
"""
|
|
80
84
|
pass
|