cognite-extractor-utils 7.5.14__py3-none-any.whl → 7.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- cognite/extractorutils/__init__.py +1 -1
- cognite/extractorutils/_inner_util.py +1 -1
- cognite/extractorutils/base.py +120 -40
- cognite/extractorutils/configtools/__init__.py +4 -5
- cognite/extractorutils/configtools/_util.py +3 -2
- cognite/extractorutils/configtools/elements.py +206 -33
- cognite/extractorutils/configtools/loaders.py +68 -16
- cognite/extractorutils/configtools/validators.py +5 -1
- cognite/extractorutils/exceptions.py +11 -2
- cognite/extractorutils/metrics.py +17 -12
- cognite/extractorutils/statestore/__init__.py +77 -3
- cognite/extractorutils/statestore/_base.py +7 -3
- cognite/extractorutils/statestore/hashing.py +129 -15
- cognite/extractorutils/statestore/watermark.py +77 -87
- cognite/extractorutils/threading.py +30 -4
- cognite/extractorutils/unstable/__init__.py +5 -5
- cognite/extractorutils/unstable/configuration/__init__.py +3 -0
- cognite/extractorutils/unstable/configuration/exceptions.py +13 -2
- cognite/extractorutils/unstable/configuration/loaders.py +78 -13
- cognite/extractorutils/unstable/configuration/models.py +121 -7
- cognite/extractorutils/unstable/core/__init__.py +5 -0
- cognite/extractorutils/unstable/core/_dto.py +5 -3
- cognite/extractorutils/unstable/core/base.py +113 -4
- cognite/extractorutils/unstable/core/errors.py +41 -0
- cognite/extractorutils/unstable/core/logger.py +149 -0
- cognite/extractorutils/unstable/core/restart_policy.py +16 -2
- cognite/extractorutils/unstable/core/runtime.py +44 -6
- cognite/extractorutils/unstable/core/tasks.py +53 -1
- cognite/extractorutils/unstable/scheduling/__init__.py +13 -0
- cognite/extractorutils/unstable/scheduling/_scheduler.py +1 -1
- cognite/extractorutils/uploader/__init__.py +7 -5
- cognite/extractorutils/uploader/_base.py +4 -5
- cognite/extractorutils/uploader/assets.py +13 -8
- cognite/extractorutils/uploader/data_modeling.py +37 -2
- cognite/extractorutils/uploader/events.py +14 -9
- cognite/extractorutils/uploader/files.py +80 -21
- cognite/extractorutils/uploader/raw.py +12 -7
- cognite/extractorutils/uploader/time_series.py +58 -49
- cognite/extractorutils/uploader/upload_failure_handler.py +35 -2
- cognite/extractorutils/uploader_extractor.py +29 -6
- cognite/extractorutils/uploader_types.py +15 -1
- cognite/extractorutils/util.py +76 -23
- {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.6.0.dist-info}/METADATA +1 -1
- cognite_extractor_utils-7.6.0.dist-info/RECORD +50 -0
- cognite_extractor_utils-7.5.14.dist-info/RECORD +0 -50
- {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.6.0.dist-info}/WHEEL +0 -0
- {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.6.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,10 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Exceptions representing invalid configurations.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
|
|
1
6
|
class InvalidConfigError(Exception):
|
|
2
7
|
"""
|
|
3
|
-
Exception thrown from ``load_yaml`` and ``load_yaml_dict`` if config file is invalid. This can be due to
|
|
8
|
+
Exception thrown from ``load_yaml`` and ``load_yaml_dict`` if config file is invalid. This can be due to.
|
|
4
9
|
|
|
5
10
|
* Missing fields
|
|
6
11
|
* Incompatible types
|
|
7
|
-
*
|
|
12
|
+
* Unknown fields
|
|
8
13
|
"""
|
|
9
14
|
|
|
10
15
|
def __init__(self, message: str, details: list[str] | None = None):
|
|
@@ -15,7 +20,13 @@ class InvalidConfigError(Exception):
|
|
|
15
20
|
self.attempted_revision: int | None = None
|
|
16
21
|
|
|
17
22
|
def __str__(self) -> str:
|
|
23
|
+
"""
|
|
24
|
+
Underlying message prefixed with 'Invalid config:'.
|
|
25
|
+
"""
|
|
18
26
|
return f"Invalid config: {self.message}"
|
|
19
27
|
|
|
20
28
|
def __repr__(self) -> str:
|
|
29
|
+
"""
|
|
30
|
+
Underlying message prefixed with 'Invalid config:'.
|
|
31
|
+
"""
|
|
21
32
|
return self.__str__()
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module containing functions and classes for loading configuration files.
|
|
3
|
+
"""
|
|
4
|
+
|
|
1
5
|
import json
|
|
2
6
|
from enum import Enum
|
|
3
7
|
from io import StringIO
|
|
@@ -13,32 +17,69 @@ from cognite.extractorutils.exceptions import InvalidConfigError as OldInvalidCo
|
|
|
13
17
|
from cognite.extractorutils.unstable.configuration.exceptions import InvalidConfigError
|
|
14
18
|
from cognite.extractorutils.unstable.configuration.models import ConfigModel
|
|
15
19
|
|
|
16
|
-
__all__ = ["ConfigFormat", "
|
|
20
|
+
__all__ = ["ConfigFormat", "load_dict", "load_file", "load_from_cdf", "load_io"]
|
|
17
21
|
|
|
18
22
|
|
|
19
23
|
_T = TypeVar("_T", bound=ConfigModel)
|
|
20
24
|
|
|
21
25
|
|
|
22
26
|
class ConfigFormat(Enum):
|
|
27
|
+
"""
|
|
28
|
+
Enumeration of supported configuration file formats.
|
|
29
|
+
|
|
30
|
+
Attributes:
|
|
31
|
+
JSON: Represents the JSON configuration file format.
|
|
32
|
+
YAML: Represents the YAML configuration file format.
|
|
33
|
+
"""
|
|
34
|
+
|
|
23
35
|
JSON = "json"
|
|
24
36
|
YAML = "yaml"
|
|
25
37
|
|
|
26
38
|
|
|
27
39
|
def load_file(path: Path, schema: type[_T]) -> _T:
|
|
40
|
+
"""
|
|
41
|
+
Load a configuration file from the given path and parse it into the specified schema.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
path: Path to the configuration file.
|
|
45
|
+
schema: The schema class to parse the configuration into.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
An instance of the schema populated with the configuration data.
|
|
49
|
+
|
|
50
|
+
Raises:
|
|
51
|
+
InvalidConfigError: If the file type is unknown or the configuration is invalid.
|
|
52
|
+
"""
|
|
28
53
|
if path.suffix in [".yaml", ".yml"]:
|
|
29
|
-
|
|
54
|
+
file_format = ConfigFormat.YAML
|
|
30
55
|
elif path.suffix == ".json":
|
|
31
|
-
|
|
56
|
+
file_format = ConfigFormat.JSON
|
|
32
57
|
else:
|
|
33
58
|
raise InvalidConfigError(f"Unknown file type {path.suffix}")
|
|
34
59
|
|
|
35
60
|
with open(path) as stream:
|
|
36
|
-
return load_io(stream,
|
|
61
|
+
return load_io(stream, file_format, schema)
|
|
37
62
|
|
|
38
63
|
|
|
39
64
|
def load_from_cdf(
|
|
40
65
|
cognite_client: CogniteClient, external_id: str, schema: type[_T], revision: int | None = None
|
|
41
66
|
) -> tuple[_T, int]:
|
|
67
|
+
"""
|
|
68
|
+
Load a configuration from a CDF integration using the provided external ID and schema.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
cognite_client: An instance of CogniteClient to interact with CDF.
|
|
72
|
+
external_id: The external ID of the integration to load configuration from.
|
|
73
|
+
schema: The schema class to parse the configuration into.
|
|
74
|
+
revision: the specific revision of the configuration to load, otherwise get the latest.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
A tuple containing the parsed configuration instance and the revision number.
|
|
78
|
+
|
|
79
|
+
Raises:
|
|
80
|
+
InvalidConfigError: If the configuration is invalid or not found.
|
|
81
|
+
CogniteAPIError: If there is an unexpected error communicating with CDF.
|
|
82
|
+
"""
|
|
42
83
|
params: dict[str, str | int] = {"integration": external_id}
|
|
43
84
|
if revision:
|
|
44
85
|
params["revision"] = revision
|
|
@@ -67,11 +108,25 @@ def load_from_cdf(
|
|
|
67
108
|
raise new_e from e
|
|
68
109
|
|
|
69
110
|
|
|
70
|
-
def load_io(stream: TextIO,
|
|
71
|
-
|
|
111
|
+
def load_io(stream: TextIO, file_format: ConfigFormat, schema: type[_T]) -> _T:
|
|
112
|
+
"""
|
|
113
|
+
Load a configuration from a stream (e.g., file or string) and parse it into the specified schema.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
stream: A text stream containing the configuration data.
|
|
117
|
+
file_format: The format of the configuration data.
|
|
118
|
+
schema: The schema class to parse the configuration into.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
An instance of the schema populated with the configuration data.
|
|
122
|
+
|
|
123
|
+
Raises:
|
|
124
|
+
InvalidConfigError: If the file format is unknown or the configuration is invalid.
|
|
125
|
+
"""
|
|
126
|
+
if file_format == ConfigFormat.JSON:
|
|
72
127
|
data = json.load(stream)
|
|
73
128
|
|
|
74
|
-
elif
|
|
129
|
+
elif file_format == ConfigFormat.YAML:
|
|
75
130
|
data = _load_yaml_dict_raw(stream)
|
|
76
131
|
|
|
77
132
|
if "azure-keyvault" in data:
|
|
@@ -95,15 +150,25 @@ def _make_loc_str(loc: tuple) -> str:
|
|
|
95
150
|
loc_str = f"{loc_str}{lo}"
|
|
96
151
|
needs_sep = True
|
|
97
152
|
else:
|
|
98
|
-
if isinstance(lo, int)
|
|
99
|
-
loc_str = f"{loc_str}[{lo}]"
|
|
100
|
-
else:
|
|
101
|
-
loc_str = f"{loc_str}.{lo}"
|
|
153
|
+
loc_str = f"{loc_str}[{lo}]" if isinstance(lo, int) else f"{loc_str}.{lo}"
|
|
102
154
|
|
|
103
155
|
return loc_str
|
|
104
156
|
|
|
105
157
|
|
|
106
158
|
def load_dict(data: dict, schema: type[_T]) -> _T:
|
|
159
|
+
"""
|
|
160
|
+
Load a configuration from a dictionary and parse it into the specified schema.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
data: A dictionary containing the configuration data.
|
|
164
|
+
schema: The schema class to parse the configuration into.
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
An instance of the schema populated with the configuration data.
|
|
168
|
+
|
|
169
|
+
Raises:
|
|
170
|
+
InvalidConfigError: If the configuration is invalid.
|
|
171
|
+
"""
|
|
107
172
|
try:
|
|
108
173
|
return schema.model_validate(data)
|
|
109
174
|
|
|
@@ -119,8 +184,8 @@ def load_dict(data: dict, schema: type[_T]) -> _T:
|
|
|
119
184
|
|
|
120
185
|
if "ctx" in err and "error" in err["ctx"]:
|
|
121
186
|
exc = err["ctx"]["error"]
|
|
122
|
-
if isinstance(exc, ValueError
|
|
123
|
-
messages.append(f"{
|
|
187
|
+
if isinstance(exc, ValueError | AssertionError):
|
|
188
|
+
messages.append(f"{exc!s}: {loc_str}")
|
|
124
189
|
continue
|
|
125
190
|
|
|
126
191
|
messages.append(f"{err.get('msg')}: {loc_str}")
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module containing pre-built models for common extractor configuration.
|
|
3
|
+
"""
|
|
4
|
+
|
|
1
5
|
import os
|
|
2
6
|
import re
|
|
3
7
|
from datetime import timedelta
|
|
@@ -21,22 +25,26 @@ from cognite.extractorutils.configtools._util import _load_certificate_data
|
|
|
21
25
|
from cognite.extractorutils.exceptions import InvalidConfigError
|
|
22
26
|
|
|
23
27
|
__all__ = [
|
|
24
|
-
"ConfigModel",
|
|
25
28
|
"AuthenticationConfig",
|
|
26
|
-
"
|
|
29
|
+
"ConfigModel",
|
|
27
30
|
"ConnectionConfig",
|
|
28
31
|
"CronConfig",
|
|
32
|
+
"ExtractorConfig",
|
|
29
33
|
"IntervalConfig",
|
|
30
|
-
"ScheduleConfig",
|
|
31
|
-
"LogLevel",
|
|
32
|
-
"LogFileHandlerConfig",
|
|
33
34
|
"LogConsoleHandlerConfig",
|
|
35
|
+
"LogFileHandlerConfig",
|
|
34
36
|
"LogHandlerConfig",
|
|
35
|
-
"
|
|
37
|
+
"LogLevel",
|
|
38
|
+
"ScheduleConfig",
|
|
39
|
+
"TimeIntervalConfig",
|
|
36
40
|
]
|
|
37
41
|
|
|
38
42
|
|
|
39
43
|
class ConfigModel(BaseModel):
|
|
44
|
+
"""
|
|
45
|
+
Base model for configuration objects, setting the correct pydantic options for extractor config.
|
|
46
|
+
"""
|
|
47
|
+
|
|
40
48
|
model_config = ConfigDict(
|
|
41
49
|
alias_generator=kebabize,
|
|
42
50
|
populate_by_name=True,
|
|
@@ -69,7 +77,7 @@ AuthenticationConfig = Annotated[_ClientCredentialsConfig | _ClientCertificateCo
|
|
|
69
77
|
|
|
70
78
|
class TimeIntervalConfig:
|
|
71
79
|
"""
|
|
72
|
-
Configuration parameter for setting a time interval
|
|
80
|
+
Configuration parameter for setting a time interval.
|
|
73
81
|
"""
|
|
74
82
|
|
|
75
83
|
def __init__(self, expression: str) -> None:
|
|
@@ -77,14 +85,25 @@ class TimeIntervalConfig:
|
|
|
77
85
|
|
|
78
86
|
@classmethod
|
|
79
87
|
def __get_pydantic_core_schema__(cls, source_type: Any, handler: GetCoreSchemaHandler) -> CoreSchema:
|
|
88
|
+
"""
|
|
89
|
+
Pydantic hook to define how this class should be serialized/deserialized.
|
|
90
|
+
|
|
91
|
+
This allows the class to be used as a field in Pydantic models.
|
|
92
|
+
"""
|
|
80
93
|
return core_schema.no_info_after_validator_function(cls, handler(str | int))
|
|
81
94
|
|
|
82
95
|
def __eq__(self, other: object) -> bool:
|
|
96
|
+
"""
|
|
97
|
+
Two TimeIntervalConfig objects are equal if they have the same number of seconds in their interval.
|
|
98
|
+
"""
|
|
83
99
|
if not isinstance(other, TimeIntervalConfig):
|
|
84
100
|
return NotImplemented
|
|
85
101
|
return self._interval == other._interval
|
|
86
102
|
|
|
87
103
|
def __hash__(self) -> int:
|
|
104
|
+
"""
|
|
105
|
+
Hash function for TimeIntervalConfig based on the number of seconds in the interval.
|
|
106
|
+
"""
|
|
88
107
|
return hash(self._interval)
|
|
89
108
|
|
|
90
109
|
@classmethod
|
|
@@ -106,36 +125,69 @@ class TimeIntervalConfig:
|
|
|
106
125
|
|
|
107
126
|
@property
|
|
108
127
|
def seconds(self) -> int:
|
|
128
|
+
"""
|
|
129
|
+
Time interval as number of seconds.
|
|
130
|
+
"""
|
|
109
131
|
return self._interval
|
|
110
132
|
|
|
111
133
|
@property
|
|
112
134
|
def minutes(self) -> float:
|
|
135
|
+
"""
|
|
136
|
+
Time interval as number of minutes.
|
|
137
|
+
|
|
138
|
+
This is a float since the underlying interval is in seconds.
|
|
139
|
+
"""
|
|
113
140
|
return self._interval / 60
|
|
114
141
|
|
|
115
142
|
@property
|
|
116
143
|
def hours(self) -> float:
|
|
144
|
+
"""
|
|
145
|
+
Time interval as number of hours.
|
|
146
|
+
|
|
147
|
+
This is a float since the underlying interval is in seconds.
|
|
148
|
+
"""
|
|
117
149
|
return self._interval / (60 * 60)
|
|
118
150
|
|
|
119
151
|
@property
|
|
120
152
|
def days(self) -> float:
|
|
153
|
+
"""
|
|
154
|
+
Time interval as number of days.
|
|
155
|
+
|
|
156
|
+
This is a float since the underlying interval is in seconds.
|
|
157
|
+
"""
|
|
121
158
|
return self._interval / (60 * 60 * 24)
|
|
122
159
|
|
|
123
160
|
@property
|
|
124
161
|
def timedelta(self) -> timedelta:
|
|
162
|
+
"""
|
|
163
|
+
Time interval as a timedelta object.
|
|
164
|
+
"""
|
|
125
165
|
days = self._interval // (60 * 60 * 24)
|
|
126
166
|
seconds = self._interval % (60 * 60 * 24)
|
|
127
167
|
return timedelta(days=days, seconds=seconds)
|
|
128
168
|
|
|
129
169
|
def __int__(self) -> int:
|
|
170
|
+
"""
|
|
171
|
+
Returns the time interval as a number of seconds.
|
|
172
|
+
"""
|
|
130
173
|
return int(self._interval)
|
|
131
174
|
|
|
132
175
|
def __float__(self) -> float:
|
|
176
|
+
"""
|
|
177
|
+
Returns the time interval as a number of seconds.
|
|
178
|
+
"""
|
|
133
179
|
return float(self._interval)
|
|
134
180
|
|
|
135
181
|
def __str__(self) -> str:
|
|
182
|
+
"""
|
|
183
|
+
Returns the time interval as a human readable string.
|
|
184
|
+
"""
|
|
136
185
|
return self._expression
|
|
137
186
|
|
|
138
187
|
def __repr__(self) -> str:
|
|
188
|
+
"""
|
|
189
|
+
Returns the time interval as a human readable string.
|
|
190
|
+
"""
|
|
139
191
|
return self._expression
|
|
140
192
|
|
|
141
193
|
|
|
@@ -152,6 +204,15 @@ class _ConnectionParameters(ConfigModel):
|
|
|
152
204
|
|
|
153
205
|
|
|
154
206
|
class ConnectionConfig(ConfigModel):
|
|
207
|
+
"""
|
|
208
|
+
Configuration for connecting to a Cognite Data Fusion project.
|
|
209
|
+
|
|
210
|
+
This configuration includes the project name, base URL, integration name, and authentication details, as well as
|
|
211
|
+
optional connection parameters.
|
|
212
|
+
|
|
213
|
+
This configuration is common for all extractors.
|
|
214
|
+
"""
|
|
215
|
+
|
|
155
216
|
project: str
|
|
156
217
|
base_url: str
|
|
157
218
|
|
|
@@ -162,6 +223,15 @@ class ConnectionConfig(ConfigModel):
|
|
|
162
223
|
connection: _ConnectionParameters = Field(default_factory=_ConnectionParameters)
|
|
163
224
|
|
|
164
225
|
def get_cognite_client(self, client_name: str) -> CogniteClient:
|
|
226
|
+
"""
|
|
227
|
+
Create a CogniteClient instance using the configuration parameters.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
client_name: Name of the client, set as the x-cdp-app header in the requests
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
CogniteClient: An instance of CogniteClient configured with the provided parameters.
|
|
234
|
+
"""
|
|
165
235
|
from cognite.client.config import global_config
|
|
166
236
|
|
|
167
237
|
global_config.disable_pypi_version_check = True
|
|
@@ -218,6 +288,26 @@ class ConnectionConfig(ConfigModel):
|
|
|
218
288
|
|
|
219
289
|
@classmethod
|
|
220
290
|
def from_environment(cls) -> "ConnectionConfig":
|
|
291
|
+
"""
|
|
292
|
+
Create a ConnectionConfig instance from environment variables.
|
|
293
|
+
|
|
294
|
+
Environment variables should be set as follows:
|
|
295
|
+
- COGNITE_PROJECT: The name of the Cognite Data Fusion project.
|
|
296
|
+
- COGNITE_BASE_URL: The base URL of the Cognite Data Fusion instance.
|
|
297
|
+
- COGNITE_INTEGRATION: The external ID of the corresponding integration in CDF.
|
|
298
|
+
- COGNITE_CLIENT_ID: The client ID for authentication.
|
|
299
|
+
- COGNITE_TOKEN_SCOPES: The scopes for the token.
|
|
300
|
+
- COGNITE_CLIENT_SECRET: The client secret for authentication (if using client credentials).
|
|
301
|
+
- COGNITE_TOKEN_URL: The token URL for authentication (if using client credentials).
|
|
302
|
+
- COGNITE_CLIENT_CERTIFICATE_PATH: The path to the client certificate (if using client certificate).
|
|
303
|
+
- COGNITE_AUTHORITY_URL: The authority URL for authentication (if using client certificate).
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
ConnectionConfig: An instance of ConnectionConfig populated with the environment variables.
|
|
307
|
+
|
|
308
|
+
Raises:
|
|
309
|
+
KeyError: If any of the required environment variables are missing.
|
|
310
|
+
"""
|
|
221
311
|
auth: AuthenticationConfig
|
|
222
312
|
if "COGNITE_CLIENT_SECRET" in os.environ:
|
|
223
313
|
auth = _ClientCredentialsConfig(
|
|
@@ -248,11 +338,19 @@ class ConnectionConfig(ConfigModel):
|
|
|
248
338
|
|
|
249
339
|
|
|
250
340
|
class CronConfig(ConfigModel):
|
|
341
|
+
"""
|
|
342
|
+
Configuration parameter for setting a cron schedule.
|
|
343
|
+
"""
|
|
344
|
+
|
|
251
345
|
type: Literal["cron"]
|
|
252
346
|
expression: str
|
|
253
347
|
|
|
254
348
|
|
|
255
349
|
class IntervalConfig(ConfigModel):
|
|
350
|
+
"""
|
|
351
|
+
Configuration parameter for setting an interval schedule.
|
|
352
|
+
"""
|
|
353
|
+
|
|
256
354
|
type: Literal["interval"]
|
|
257
355
|
expression: TimeIntervalConfig
|
|
258
356
|
|
|
@@ -261,6 +359,10 @@ ScheduleConfig = Annotated[CronConfig | IntervalConfig, Field(discriminator="typ
|
|
|
261
359
|
|
|
262
360
|
|
|
263
361
|
class LogLevel(Enum):
|
|
362
|
+
"""
|
|
363
|
+
Enumeration of log levels for the extractor.
|
|
364
|
+
"""
|
|
365
|
+
|
|
264
366
|
CRITICAL = "CRITICAL"
|
|
265
367
|
ERROR = "ERROR"
|
|
266
368
|
WARNING = "WARNING"
|
|
@@ -269,6 +371,10 @@ class LogLevel(Enum):
|
|
|
269
371
|
|
|
270
372
|
|
|
271
373
|
class LogFileHandlerConfig(ConfigModel):
|
|
374
|
+
"""
|
|
375
|
+
Configuration for a log handler that writes to a file, with daily rotation.
|
|
376
|
+
"""
|
|
377
|
+
|
|
272
378
|
type: Literal["file"]
|
|
273
379
|
path: Path
|
|
274
380
|
level: LogLevel
|
|
@@ -276,6 +382,10 @@ class LogFileHandlerConfig(ConfigModel):
|
|
|
276
382
|
|
|
277
383
|
|
|
278
384
|
class LogConsoleHandlerConfig(ConfigModel):
|
|
385
|
+
"""
|
|
386
|
+
Configuration for a log handler that writes to standard output.
|
|
387
|
+
"""
|
|
388
|
+
|
|
279
389
|
type: Literal["console"]
|
|
280
390
|
level: LogLevel
|
|
281
391
|
|
|
@@ -289,4 +399,8 @@ def _log_handler_default() -> list[LogHandlerConfig]:
|
|
|
289
399
|
|
|
290
400
|
|
|
291
401
|
class ExtractorConfig(ConfigModel):
|
|
402
|
+
"""
|
|
403
|
+
Base class for application configuration for extractors.
|
|
404
|
+
"""
|
|
405
|
+
|
|
292
406
|
log_handlers: list[LogHandlerConfig] = Field(default_factory=_log_handler_default)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Temporary holding place for DTOs against Extraction Pipelines 2.0 until it's in the SDK
|
|
2
|
+
Temporary holding place for DTOs against Extraction Pipelines 2.0 until it's in the SDK.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
from typing import Any, Literal
|
|
@@ -10,9 +10,11 @@ from pydantic import BaseModel, ConfigDict
|
|
|
10
10
|
|
|
11
11
|
class CogniteModel(BaseModel):
|
|
12
12
|
"""
|
|
13
|
-
Base class for DTO classes based on pydantic
|
|
13
|
+
Base class for DTO classes based on pydantic.
|
|
14
|
+
|
|
15
|
+
With a few tweaks to make it inline with the CDF API guidelines:
|
|
14
16
|
* camelCase instead of snake_case when serializing/deserializing into/from JSON
|
|
15
|
-
* exclude Nones from serialized JSON instead of having nulls in the response text
|
|
17
|
+
* exclude Nones from serialized JSON instead of having nulls in the response text.
|
|
16
18
|
"""
|
|
17
19
|
|
|
18
20
|
def model_dump(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
|
|
@@ -1,5 +1,50 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module provides the base class for extractors.
|
|
3
|
+
|
|
4
|
+
It includes functionality for task management, logging, error handling, and configuration management.
|
|
5
|
+
|
|
6
|
+
Extractors should subclass the `Extractor` class and implement the `__init_tasks__` method to define their tasks.
|
|
7
|
+
The subclass should also define several class attributes:
|
|
8
|
+
- ``NAME``: A human-readable name for the extractor.
|
|
9
|
+
- ``EXTERNAL_ID``: A unique identifier for the extractor, used when reporting to CDF Integrations.
|
|
10
|
+
- ``DESCRIPTION``: A brief description of the extractor.
|
|
11
|
+
- ``VERSION``: The version of the extractor, used when reporting to CDF Integrations. This should follow semantic
|
|
12
|
+
versioning.
|
|
13
|
+
- ``CONFIG_TYPE``: The type of the application configuration for the extractor, which should be a subclass of
|
|
14
|
+
``ExtractorConfig``. This should be the same class as the one used for the generic type parameter of the
|
|
15
|
+
``Extractor`` class.
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
.. code-block:: python
|
|
19
|
+
|
|
20
|
+
class MyConfig(ExtractorConfig):
|
|
21
|
+
parameter: str
|
|
22
|
+
another_parameter: int
|
|
23
|
+
schedule: ScheduleConfig
|
|
24
|
+
|
|
25
|
+
class MyExtractor(Extractor[MyConfig]):
|
|
26
|
+
NAME = "My Extractor"
|
|
27
|
+
EXTERNAL_ID = "my-extractor"
|
|
28
|
+
DESCRIPTION = "An example extractor"
|
|
29
|
+
VERSION = "1.0.0"
|
|
30
|
+
|
|
31
|
+
CONFIG_TYPE = MyConfig
|
|
32
|
+
|
|
33
|
+
def __init_tasks__(self) -> None:
|
|
34
|
+
self.add_task(
|
|
35
|
+
ScheduledTask(
|
|
36
|
+
name="my_task",
|
|
37
|
+
description="An example task",
|
|
38
|
+
schedule=self.application_config.schedule,
|
|
39
|
+
target=self.my_task_function,
|
|
40
|
+
)
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def my_task_function(self, task_context: TaskContext) -> None:
|
|
44
|
+
task_context.logger.info("Running my task")
|
|
45
|
+
"""
|
|
46
|
+
|
|
1
47
|
import logging
|
|
2
|
-
import logging.config
|
|
3
48
|
import time
|
|
4
49
|
from concurrent.futures import ThreadPoolExecutor
|
|
5
50
|
from functools import partial
|
|
@@ -30,7 +75,7 @@ from cognite.extractorutils.unstable.core.tasks import ContinuousTask, Scheduled
|
|
|
30
75
|
from cognite.extractorutils.unstable.scheduling import TaskScheduler
|
|
31
76
|
from cognite.extractorutils.util import now
|
|
32
77
|
|
|
33
|
-
__all__ = ["
|
|
78
|
+
__all__ = ["ConfigRevision", "ConfigType", "Extractor"]
|
|
34
79
|
|
|
35
80
|
ConfigType = TypeVar("ConfigType", bound=ExtractorConfig)
|
|
36
81
|
ConfigRevision = Literal["local"] | int
|
|
@@ -40,6 +85,13 @@ _T = TypeVar("_T", bound=ExtractorConfig)
|
|
|
40
85
|
|
|
41
86
|
|
|
42
87
|
class FullConfig(Generic[_T]):
|
|
88
|
+
"""
|
|
89
|
+
A class that holds the full configuration for an extractor.
|
|
90
|
+
|
|
91
|
+
This includes the connection configuration, application configuration, and which revision of the application
|
|
92
|
+
configuration is currently active.
|
|
93
|
+
"""
|
|
94
|
+
|
|
43
95
|
def __init__(
|
|
44
96
|
self,
|
|
45
97
|
connection_config: ConnectionConfig,
|
|
@@ -52,6 +104,16 @@ class FullConfig(Generic[_T]):
|
|
|
52
104
|
|
|
53
105
|
|
|
54
106
|
class Extractor(Generic[ConfigType], CogniteLogger):
|
|
107
|
+
"""
|
|
108
|
+
Base class for all extractors.
|
|
109
|
+
|
|
110
|
+
This class provides the basic functionality for running an extractor, including task management, logging,
|
|
111
|
+
error handling, and configuration management.
|
|
112
|
+
|
|
113
|
+
It designed to be subclassed by specific extractors, which should implement the `__init_tasks__` method
|
|
114
|
+
to define their tasks.
|
|
115
|
+
"""
|
|
116
|
+
|
|
55
117
|
NAME: str
|
|
56
118
|
EXTERNAL_ID: str
|
|
57
119
|
DESCRIPTION: str
|
|
@@ -128,6 +190,13 @@ class Extractor(Generic[ConfigType], CogniteLogger):
|
|
|
128
190
|
root.addHandler(fh)
|
|
129
191
|
|
|
130
192
|
def __init_tasks__(self) -> None:
|
|
193
|
+
"""
|
|
194
|
+
This method should be overridden by subclasses to define their tasks.
|
|
195
|
+
|
|
196
|
+
It is called automatically when the extractor is initialized.
|
|
197
|
+
|
|
198
|
+
Subclasses should call ``self.add_task(...)`` to add tasks to the extractor.
|
|
199
|
+
"""
|
|
131
200
|
pass
|
|
132
201
|
|
|
133
202
|
def _set_runtime_message_queue(self, queue: Queue) -> None:
|
|
@@ -200,6 +269,9 @@ class Extractor(Generic[ConfigType], CogniteLogger):
|
|
|
200
269
|
)
|
|
201
270
|
|
|
202
271
|
def restart(self) -> None:
|
|
272
|
+
"""
|
|
273
|
+
Trigger a restart of the extractor.
|
|
274
|
+
"""
|
|
203
275
|
self._logger.info("Restarting extractor")
|
|
204
276
|
if self._runtime_messages:
|
|
205
277
|
self._runtime_messages.put(RuntimeMessage.RESTART)
|
|
@@ -210,12 +282,20 @@ class Extractor(Generic[ConfigType], CogniteLogger):
|
|
|
210
282
|
return cls(config)
|
|
211
283
|
|
|
212
284
|
def add_task(self, task: Task) -> None:
|
|
285
|
+
"""
|
|
286
|
+
Add a task to the extractor.
|
|
287
|
+
|
|
288
|
+
This method wraps the task's target function to include error handling and task tracking.
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
task: The task to add. It should be an instance of ``StartupTask``, ``ContinuousTask``, or ``ScheduledTask``
|
|
292
|
+
"""
|
|
213
293
|
# Store this for later, since we'll override it with the wrapped version
|
|
214
294
|
target = task.target
|
|
215
295
|
|
|
216
296
|
def run_task(task_context: TaskContext) -> None:
|
|
217
297
|
"""
|
|
218
|
-
A wrapped version of the task's target, with tracking and error handling
|
|
298
|
+
A wrapped version of the task's target, with tracking and error handling.
|
|
219
299
|
"""
|
|
220
300
|
# Record a task start
|
|
221
301
|
with self._checkin_lock:
|
|
@@ -275,7 +355,7 @@ class Extractor(Generic[ConfigType], CogniteLogger):
|
|
|
275
355
|
{
|
|
276
356
|
"name": t.name,
|
|
277
357
|
"type": "continuous" if isinstance(t, ContinuousTask) else "batch",
|
|
278
|
-
"action":
|
|
358
|
+
"action": bool(isinstance(t, ScheduledTask)),
|
|
279
359
|
"description": t.description,
|
|
280
360
|
}
|
|
281
361
|
for t in self._tasks
|
|
@@ -285,14 +365,29 @@ class Extractor(Generic[ConfigType], CogniteLogger):
|
|
|
285
365
|
)
|
|
286
366
|
|
|
287
367
|
def start(self) -> None:
|
|
368
|
+
"""
|
|
369
|
+
Start the extractor.
|
|
370
|
+
|
|
371
|
+
Instead of calling this method directly, it is recommended to use the context manager interface by using the
|
|
372
|
+
``with`` statement, which ensures proper cleanup on exit.
|
|
373
|
+
"""
|
|
288
374
|
self._setup_logging()
|
|
289
375
|
self._report_extractor_info()
|
|
290
376
|
Thread(target=self._run_checkin, name="ExtractorCheckin", daemon=True).start()
|
|
291
377
|
|
|
292
378
|
def stop(self) -> None:
|
|
379
|
+
"""
|
|
380
|
+
Stop the extractor.
|
|
381
|
+
|
|
382
|
+
Instead of calling this method directly, it is recommended to use the context manager interface by using the
|
|
383
|
+
``with`` statement, which ensures proper cleanup on exit.
|
|
384
|
+
"""
|
|
293
385
|
self.cancellation_token.cancel()
|
|
294
386
|
|
|
295
387
|
def __enter__(self) -> Self:
|
|
388
|
+
"""
|
|
389
|
+
Start the extractor in a context manager.
|
|
390
|
+
"""
|
|
296
391
|
self.start()
|
|
297
392
|
return self
|
|
298
393
|
|
|
@@ -302,6 +397,9 @@ class Extractor(Generic[ConfigType], CogniteLogger):
|
|
|
302
397
|
exc_val: BaseException | None,
|
|
303
398
|
exc_tb: TracebackType | None,
|
|
304
399
|
) -> bool:
|
|
400
|
+
"""
|
|
401
|
+
Stop the extractor when exiting the context manager.
|
|
402
|
+
"""
|
|
305
403
|
self.stop()
|
|
306
404
|
with self._checkin_lock:
|
|
307
405
|
self._checkin()
|
|
@@ -310,6 +408,17 @@ class Extractor(Generic[ConfigType], CogniteLogger):
|
|
|
310
408
|
return exc_val is None
|
|
311
409
|
|
|
312
410
|
def run(self) -> None:
|
|
411
|
+
"""
|
|
412
|
+
Run the extractor. This method starts the extractor and runs all tasks that have been added.
|
|
413
|
+
|
|
414
|
+
This method assumes ``self.start()`` has been called first. The recommended way to use this method is
|
|
415
|
+
to use the context manager interface, which ensures that the extractor is started and stopped properly.
|
|
416
|
+
|
|
417
|
+
.. code-block:: python
|
|
418
|
+
|
|
419
|
+
with extractor:
|
|
420
|
+
extractor.run()
|
|
421
|
+
"""
|
|
313
422
|
has_scheduled = False
|
|
314
423
|
|
|
315
424
|
startup: list[StartupTask] = []
|