cognite-extractor-utils 7.5.13__py3-none-any.whl → 7.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- cognite/extractorutils/__init__.py +1 -1
- cognite/extractorutils/_inner_util.py +1 -1
- cognite/extractorutils/base.py +120 -40
- cognite/extractorutils/configtools/__init__.py +4 -5
- cognite/extractorutils/configtools/_util.py +3 -2
- cognite/extractorutils/configtools/elements.py +213 -35
- cognite/extractorutils/configtools/loaders.py +68 -16
- cognite/extractorutils/configtools/validators.py +5 -1
- cognite/extractorutils/exceptions.py +11 -2
- cognite/extractorutils/metrics.py +17 -12
- cognite/extractorutils/statestore/__init__.py +77 -3
- cognite/extractorutils/statestore/_base.py +7 -3
- cognite/extractorutils/statestore/hashing.py +129 -15
- cognite/extractorutils/statestore/watermark.py +77 -87
- cognite/extractorutils/threading.py +30 -4
- cognite/extractorutils/unstable/__init__.py +5 -5
- cognite/extractorutils/unstable/configuration/__init__.py +3 -0
- cognite/extractorutils/unstable/configuration/exceptions.py +13 -2
- cognite/extractorutils/unstable/configuration/loaders.py +90 -19
- cognite/extractorutils/unstable/configuration/models.py +121 -7
- cognite/extractorutils/unstable/core/__init__.py +5 -0
- cognite/extractorutils/unstable/core/_dto.py +5 -3
- cognite/extractorutils/unstable/core/base.py +113 -4
- cognite/extractorutils/unstable/core/errors.py +41 -0
- cognite/extractorutils/unstable/core/logger.py +149 -0
- cognite/extractorutils/unstable/core/restart_policy.py +16 -2
- cognite/extractorutils/unstable/core/runtime.py +119 -36
- cognite/extractorutils/unstable/core/tasks.py +53 -1
- cognite/extractorutils/unstable/scheduling/__init__.py +13 -0
- cognite/extractorutils/unstable/scheduling/_scheduler.py +1 -1
- cognite/extractorutils/uploader/__init__.py +7 -5
- cognite/extractorutils/uploader/_base.py +4 -5
- cognite/extractorutils/uploader/assets.py +13 -8
- cognite/extractorutils/uploader/data_modeling.py +37 -2
- cognite/extractorutils/uploader/events.py +14 -9
- cognite/extractorutils/uploader/files.py +80 -21
- cognite/extractorutils/uploader/raw.py +12 -7
- cognite/extractorutils/uploader/time_series.py +58 -49
- cognite/extractorutils/uploader/upload_failure_handler.py +35 -2
- cognite/extractorutils/uploader_extractor.py +29 -6
- cognite/extractorutils/uploader_types.py +15 -1
- cognite/extractorutils/util.py +76 -23
- {cognite_extractor_utils-7.5.13.dist-info → cognite_extractor_utils-7.6.0.dist-info}/METADATA +1 -1
- cognite_extractor_utils-7.6.0.dist-info/RECORD +50 -0
- cognite_extractor_utils-7.5.13.dist-info/RECORD +0 -50
- {cognite_extractor_utils-7.5.13.dist-info → cognite_extractor_utils-7.6.0.dist-info}/WHEEL +0 -0
- {cognite_extractor_utils-7.5.13.dist-info → cognite_extractor_utils-7.6.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,3 +1,28 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module providing the runtime for an extractor.
|
|
3
|
+
|
|
4
|
+
The runtime is responsible for starting the extractor in a separate process, managing its lifecycle, and handling
|
|
5
|
+
configuration loading and updates. It also handles errors and restarts the extractor if necessary.
|
|
6
|
+
|
|
7
|
+
It is the preferred way to run an extractor, as it provides a more robust and flexible way to manage the extractor's
|
|
8
|
+
lifecycle compared to running it directly in the main process.
|
|
9
|
+
|
|
10
|
+
The runtime also contains a command line interface (CLI) for starting the extractor, which allows users to specify
|
|
11
|
+
the connection configuration and other parameters.
|
|
12
|
+
|
|
13
|
+
.. code-block:: python
|
|
14
|
+
|
|
15
|
+
from cognite.extractorutils.unstable.core.runtime import Runtime
|
|
16
|
+
from my_extractor import MyExtractor
|
|
17
|
+
|
|
18
|
+
def main() -> None:
|
|
19
|
+
runtime = Runtime(MyExtractor)
|
|
20
|
+
runtime.run()
|
|
21
|
+
|
|
22
|
+
if __name__ == "__main__":
|
|
23
|
+
main()
|
|
24
|
+
"""
|
|
25
|
+
|
|
1
26
|
import logging
|
|
2
27
|
import os
|
|
3
28
|
import sys
|
|
@@ -5,27 +30,48 @@ import time
|
|
|
5
30
|
from argparse import ArgumentParser, Namespace
|
|
6
31
|
from multiprocessing import Process, Queue
|
|
7
32
|
from pathlib import Path
|
|
33
|
+
from random import randint
|
|
8
34
|
from typing import Any, Generic, TypeVar
|
|
35
|
+
from uuid import uuid4
|
|
9
36
|
|
|
10
|
-
from requests.exceptions import ConnectionError
|
|
37
|
+
from requests.exceptions import ConnectionError as RequestsConnectionError
|
|
11
38
|
from typing_extensions import assert_never
|
|
12
39
|
|
|
13
|
-
from cognite.client
|
|
40
|
+
from cognite.client import CogniteClient
|
|
41
|
+
from cognite.client.exceptions import (
|
|
42
|
+
CogniteAPIError,
|
|
43
|
+
CogniteAuthError,
|
|
44
|
+
CogniteConnectionError,
|
|
45
|
+
)
|
|
14
46
|
from cognite.extractorutils.threading import CancellationToken
|
|
15
47
|
from cognite.extractorutils.unstable.configuration.exceptions import InvalidConfigError
|
|
16
|
-
from cognite.extractorutils.unstable.configuration.loaders import
|
|
48
|
+
from cognite.extractorutils.unstable.configuration.loaders import (
|
|
49
|
+
load_file,
|
|
50
|
+
load_from_cdf,
|
|
51
|
+
)
|
|
17
52
|
from cognite.extractorutils.unstable.configuration.models import ConnectionConfig
|
|
18
53
|
from cognite.extractorutils.unstable.core._dto import Error
|
|
54
|
+
from cognite.extractorutils.unstable.core.errors import ErrorLevel
|
|
55
|
+
from cognite.extractorutils.util import now
|
|
19
56
|
|
|
20
57
|
from ._messaging import RuntimeMessage
|
|
21
58
|
from .base import ConfigRevision, ConfigType, Extractor, FullConfig
|
|
22
59
|
|
|
23
|
-
__all__ = ["
|
|
60
|
+
__all__ = ["ExtractorType", "Runtime"]
|
|
24
61
|
|
|
25
62
|
ExtractorType = TypeVar("ExtractorType", bound=Extractor)
|
|
26
63
|
|
|
27
64
|
|
|
28
65
|
class Runtime(Generic[ExtractorType]):
|
|
66
|
+
"""
|
|
67
|
+
The runtime for an extractor.
|
|
68
|
+
|
|
69
|
+
This class is responsible for starting the extractor in a separate process, managing its lifecycle, and handling
|
|
70
|
+
configuration loading and updates. It also handles errors and restarts the extractor if necessary.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
RETRY_CONFIG_INTERVAL = 30
|
|
74
|
+
|
|
29
75
|
def __init__(
|
|
30
76
|
self,
|
|
31
77
|
extractor: type[ExtractorType],
|
|
@@ -37,6 +83,8 @@ class Runtime(Generic[ExtractorType]):
|
|
|
37
83
|
self.logger = logging.getLogger(f"{self._extractor_class.EXTERNAL_ID}.runtime")
|
|
38
84
|
self._setup_logging()
|
|
39
85
|
|
|
86
|
+
self._cognite_client: CogniteClient
|
|
87
|
+
|
|
40
88
|
def _create_argparser(self) -> ArgumentParser:
|
|
41
89
|
argparser = ArgumentParser(
|
|
42
90
|
prog=sys.argv[0],
|
|
@@ -57,8 +105,8 @@ class Runtime(Generic[ExtractorType]):
|
|
|
57
105
|
help="Connection parameters",
|
|
58
106
|
)
|
|
59
107
|
argparser.add_argument(
|
|
60
|
-
"-
|
|
61
|
-
"--local-
|
|
108
|
+
"-f",
|
|
109
|
+
"--force-local-config",
|
|
62
110
|
nargs=1,
|
|
63
111
|
type=Path,
|
|
64
112
|
required=False,
|
|
@@ -121,7 +169,7 @@ class Runtime(Generic[ExtractorType]):
|
|
|
121
169
|
self.logger.info(f"Started extractor with PID {process.pid}")
|
|
122
170
|
return process
|
|
123
171
|
|
|
124
|
-
def
|
|
172
|
+
def _try_get_application_config(
|
|
125
173
|
self,
|
|
126
174
|
args: Namespace,
|
|
127
175
|
connection_config: ConnectionConfig,
|
|
@@ -143,39 +191,65 @@ class Runtime(Generic[ExtractorType]):
|
|
|
143
191
|
|
|
144
192
|
else:
|
|
145
193
|
self.logger.info("Loading application config from CDF")
|
|
146
|
-
|
|
147
|
-
|
|
194
|
+
|
|
195
|
+
application_config, current_config_revision = load_from_cdf(
|
|
196
|
+
self._cognite_client,
|
|
197
|
+
connection_config.integration,
|
|
198
|
+
self._extractor_class.CONFIG_TYPE,
|
|
148
199
|
)
|
|
149
200
|
|
|
150
|
-
|
|
201
|
+
return application_config, current_config_revision
|
|
202
|
+
|
|
203
|
+
def _safe_get_application_config(
|
|
204
|
+
self,
|
|
205
|
+
args: Namespace,
|
|
206
|
+
connection_config: ConnectionConfig,
|
|
207
|
+
) -> tuple[ConfigType, ConfigRevision] | None:
|
|
208
|
+
prev_error: str | None = None
|
|
151
209
|
|
|
210
|
+
while not self._cancellation_token.is_cancelled:
|
|
152
211
|
try:
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
212
|
+
return self._try_get_application_config(args, connection_config)
|
|
213
|
+
|
|
214
|
+
except Exception as e:
|
|
215
|
+
error_message = str(e)
|
|
216
|
+
if error_message == prev_error:
|
|
217
|
+
# Same error as before, no need to log it again
|
|
218
|
+
self._cancellation_token.wait(randint(1, self.RETRY_CONFIG_INTERVAL))
|
|
219
|
+
continue
|
|
220
|
+
prev_error = error_message
|
|
221
|
+
|
|
222
|
+
ts = now()
|
|
223
|
+
error = Error(
|
|
224
|
+
external_id=str(uuid4()),
|
|
225
|
+
level=ErrorLevel.fatal.value,
|
|
226
|
+
start_time=ts,
|
|
227
|
+
end_time=ts,
|
|
228
|
+
description=error_message,
|
|
229
|
+
details=None,
|
|
230
|
+
task=None,
|
|
157
231
|
)
|
|
158
232
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
headers={"cdf-version": "alpha"},
|
|
168
|
-
)
|
|
233
|
+
self._cognite_client.post(
|
|
234
|
+
f"/api/v1/projects/{self._cognite_client.config.project}/odin/checkin",
|
|
235
|
+
json={
|
|
236
|
+
"externalId": connection_config.integration,
|
|
237
|
+
"errors": [error.model_dump()],
|
|
238
|
+
},
|
|
239
|
+
headers={"cdf-version": "alpha"},
|
|
240
|
+
)
|
|
169
241
|
|
|
170
|
-
|
|
242
|
+
self._cancellation_token.wait(randint(1, self.RETRY_CONFIG_INTERVAL))
|
|
243
|
+
|
|
244
|
+
return None
|
|
171
245
|
|
|
172
246
|
def _verify_connection_config(self, connection_config: ConnectionConfig) -> bool:
|
|
173
|
-
|
|
247
|
+
self._cognite_client = connection_config.get_cognite_client(
|
|
174
248
|
f"{self._extractor_class.EXTERNAL_ID}-{self._extractor_class.VERSION}"
|
|
175
249
|
)
|
|
176
250
|
try:
|
|
177
|
-
|
|
178
|
-
f"/api/v1/projects/{
|
|
251
|
+
self._cognite_client.post(
|
|
252
|
+
f"/api/v1/projects/{self._cognite_client.config.project}/odin/checkin",
|
|
179
253
|
json={
|
|
180
254
|
"externalId": connection_config.integration,
|
|
181
255
|
},
|
|
@@ -206,11 +280,11 @@ class Runtime(Generic[ExtractorType]):
|
|
|
206
280
|
self.logger.critical(str(e.message))
|
|
207
281
|
|
|
208
282
|
else:
|
|
209
|
-
self.logger.critical(f"Error while connecting to CDF {
|
|
283
|
+
self.logger.critical(f"Error while connecting to CDF {e!s}")
|
|
210
284
|
|
|
211
285
|
return False
|
|
212
286
|
|
|
213
|
-
except
|
|
287
|
+
except RequestsConnectionError as e:
|
|
214
288
|
# This is sometime thrown, I've seen it when trying to get an auth token but it might happen elsewhere too
|
|
215
289
|
self.logger.error(str(e))
|
|
216
290
|
self.logger.critical("Could not initiate connection. Please check your configuration.")
|
|
@@ -219,6 +293,12 @@ class Runtime(Generic[ExtractorType]):
|
|
|
219
293
|
return True
|
|
220
294
|
|
|
221
295
|
def run(self) -> None:
|
|
296
|
+
"""
|
|
297
|
+
Run the extractor runtime.
|
|
298
|
+
|
|
299
|
+
This is intended as the main entry point for the extractor runtime, and starts by parsing command line
|
|
300
|
+
arguments.
|
|
301
|
+
"""
|
|
222
302
|
argparser = self._create_argparser()
|
|
223
303
|
args = argparser.parse_args()
|
|
224
304
|
|
|
@@ -234,16 +314,19 @@ class Runtime(Generic[ExtractorType]):
|
|
|
234
314
|
if not args.skip_init_checks and not self._verify_connection_config(connection_config):
|
|
235
315
|
sys.exit(1)
|
|
236
316
|
|
|
237
|
-
# This has to be Any. We don't know the type of the extractors' config at type checking since the
|
|
317
|
+
# This has to be Any. We don't know the type of the extractors' config at type checking since the self doesn't
|
|
238
318
|
# exist yet, and I have not found a way to represent it in a generic way that isn't just an Any in disguise.
|
|
239
319
|
application_config: Any
|
|
320
|
+
config: tuple[Any, ConfigRevision] | None
|
|
321
|
+
|
|
240
322
|
while not self._cancellation_token.is_cancelled:
|
|
241
|
-
|
|
242
|
-
|
|
323
|
+
config = self._safe_get_application_config(args, connection_config)
|
|
324
|
+
if config is None:
|
|
325
|
+
if self._cancellation_token.is_cancelled:
|
|
326
|
+
break
|
|
327
|
+
continue
|
|
243
328
|
|
|
244
|
-
|
|
245
|
-
self.logger.critical("Could not get a valid application config file. Shutting down")
|
|
246
|
-
sys.exit(1)
|
|
329
|
+
application_config, current_config_revision = config
|
|
247
330
|
|
|
248
331
|
# Start extractor in separate process, and wait for it to end
|
|
249
332
|
process = self._spawn_extractor(
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module defines the base classes for tasks in the extractor framework.
|
|
3
|
+
"""
|
|
4
|
+
|
|
1
5
|
import logging
|
|
2
6
|
from collections.abc import Callable
|
|
3
7
|
from typing import TYPE_CHECKING
|
|
@@ -14,10 +18,16 @@ from cognite.extractorutils.unstable.core.logger import CogniteLogger
|
|
|
14
18
|
if TYPE_CHECKING:
|
|
15
19
|
from cognite.extractorutils.unstable.core.base import Extractor
|
|
16
20
|
|
|
17
|
-
__all__ = ["
|
|
21
|
+
__all__ = ["ContinuousTask", "ScheduledTask", "StartupTask", "Task", "TaskContext"]
|
|
18
22
|
|
|
19
23
|
|
|
20
24
|
class TaskContext(CogniteLogger):
|
|
25
|
+
"""
|
|
26
|
+
Context for a task execution.
|
|
27
|
+
|
|
28
|
+
This class is used to log errors and messages related to the task execution.
|
|
29
|
+
"""
|
|
30
|
+
|
|
21
31
|
def __init__(self, task: "Task", extractor: "Extractor"):
|
|
22
32
|
super().__init__()
|
|
23
33
|
self._task = task
|
|
@@ -58,6 +68,18 @@ class _Task:
|
|
|
58
68
|
|
|
59
69
|
|
|
60
70
|
class ScheduledTask(_Task):
|
|
71
|
+
"""
|
|
72
|
+
A task that is scheduled to run at specific intervals or according to a cron expression.
|
|
73
|
+
|
|
74
|
+
This class allows you to define tasks that can be scheduled using either an interval or a cron expression.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
name: The name of the task.
|
|
78
|
+
target: A callable that takes a ``TaskContext`` and performs the task.
|
|
79
|
+
description: An optional description of the task.
|
|
80
|
+
schedule: A ``ScheduleConfig`` object that defines the scheduling configuration for the task.
|
|
81
|
+
"""
|
|
82
|
+
|
|
61
83
|
def __init__(
|
|
62
84
|
self,
|
|
63
85
|
*,
|
|
@@ -73,6 +95,15 @@ class ScheduledTask(_Task):
|
|
|
73
95
|
def from_interval(
|
|
74
96
|
cls, *, interval: str, name: str, target: TaskTarget, description: str | None = None
|
|
75
97
|
) -> "ScheduledTask":
|
|
98
|
+
"""
|
|
99
|
+
Create a scheduled task that runs at regular intervals.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
interval: A string representing the time interval (e.g., "5m" for 5 minutes).
|
|
103
|
+
name: The name of the task.
|
|
104
|
+
target: A callable that takes a ``TaskContext`` and performs the task.
|
|
105
|
+
description: An optional description of the task.
|
|
106
|
+
"""
|
|
76
107
|
return ScheduledTask(
|
|
77
108
|
name=name,
|
|
78
109
|
target=target,
|
|
@@ -82,6 +113,15 @@ class ScheduledTask(_Task):
|
|
|
82
113
|
|
|
83
114
|
@classmethod
|
|
84
115
|
def from_cron(cls, *, cron: str, name: str, target: TaskTarget, description: str | None = None) -> "ScheduledTask":
|
|
116
|
+
"""
|
|
117
|
+
Create a scheduled task that runs according to a cron expression.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
cron: A string representing the cron expression (e.g., "0 0 * * *" for daily at midnight).
|
|
121
|
+
name: The name of the task.
|
|
122
|
+
target: A callable that takes a ``TaskContext`` and performs the task.
|
|
123
|
+
description: An optional description of the task.
|
|
124
|
+
"""
|
|
85
125
|
return ScheduledTask(
|
|
86
126
|
name=name,
|
|
87
127
|
target=target,
|
|
@@ -91,6 +131,12 @@ class ScheduledTask(_Task):
|
|
|
91
131
|
|
|
92
132
|
|
|
93
133
|
class ContinuousTask(_Task):
|
|
134
|
+
"""
|
|
135
|
+
A task that runs continuously.
|
|
136
|
+
|
|
137
|
+
Continuous tasks are started when the extractor starts and are expected to run until the extractor stops.
|
|
138
|
+
"""
|
|
139
|
+
|
|
94
140
|
def __init__(
|
|
95
141
|
self,
|
|
96
142
|
*,
|
|
@@ -102,6 +148,12 @@ class ContinuousTask(_Task):
|
|
|
102
148
|
|
|
103
149
|
|
|
104
150
|
class StartupTask(_Task):
|
|
151
|
+
"""
|
|
152
|
+
A task that runs once at the startup of the extractor.
|
|
153
|
+
|
|
154
|
+
Startup tasks are executed before any continuous or scheduled tasks and are typically used for initialization.
|
|
155
|
+
"""
|
|
156
|
+
|
|
105
157
|
def __init__(
|
|
106
158
|
self,
|
|
107
159
|
*,
|
|
@@ -1,3 +1,16 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module provides a task scheduler.
|
|
3
|
+
|
|
4
|
+
It is inspired by the ``APScheduler`` library and is designed to manage the scheduling of tasks within the extractor
|
|
5
|
+
framework. It differs from ``APScheduler`` in a few key ways:
|
|
6
|
+
- It is designed to be used within the extractor framework, allowing for better integration with the extractor's
|
|
7
|
+
lifecycle and error handling. For example, it respects the extractor's cancellation token and will gracefully shut
|
|
8
|
+
down upon cancellation.
|
|
9
|
+
- It has a simpler interface, focusing on the core functionality needed for scheduling tasks without the additional
|
|
10
|
+
complexity of a full-featured scheduler like ``APScheduler``.
|
|
11
|
+
- It is fully typed, providing better type safety and autocompletion in IDEs.
|
|
12
|
+
"""
|
|
13
|
+
|
|
1
14
|
from ._scheduler import TaskScheduler
|
|
2
15
|
|
|
3
16
|
__all__ = ["TaskScheduler"]
|
|
@@ -51,7 +51,7 @@ class TaskScheduler:
|
|
|
51
51
|
return []
|
|
52
52
|
with self._jobs_lock:
|
|
53
53
|
next_runs = sorted([(j.schedule.next(), j) for j in self._jobs.values()], key=lambda tup: tup[0])
|
|
54
|
-
return [job for (
|
|
54
|
+
return [job for (scheduled_time, job) in next_runs if scheduled_time == next_runs[0][0]] if next_runs else []
|
|
55
55
|
|
|
56
56
|
def _run_job(self, job: Job) -> bool:
|
|
57
57
|
with self._running_lock:
|
|
@@ -13,8 +13,10 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
"""
|
|
16
|
-
Module containing upload queue classes.
|
|
17
|
-
|
|
16
|
+
Module containing upload queue classes.
|
|
17
|
+
|
|
18
|
+
The UploadQueue classes chunks together items and uploads them together to CDF,both to minimize the load on the API, and
|
|
19
|
+
also to speed up uploading as requests can be slow.
|
|
18
20
|
|
|
19
21
|
Each upload queue comes with some configurable conditions that, when met, automatically triggers an upload.
|
|
20
22
|
|
|
@@ -78,13 +80,13 @@ from .time_series import (
|
|
|
78
80
|
|
|
79
81
|
__all__ = [
|
|
80
82
|
"AssetUploadQueue",
|
|
81
|
-
"EventUploadQueue",
|
|
82
83
|
"BytesUploadQueue",
|
|
84
|
+
"DataPoint",
|
|
85
|
+
"DataPointList",
|
|
86
|
+
"EventUploadQueue",
|
|
83
87
|
"FileUploadQueue",
|
|
84
88
|
"IOFileUploadQueue",
|
|
85
89
|
"RawUploadQueue",
|
|
86
|
-
"DataPoint",
|
|
87
|
-
"DataPointList",
|
|
88
90
|
"SequenceUploadQueue",
|
|
89
91
|
"TimeSeriesUploadQueue",
|
|
90
92
|
"default_time_series_factory",
|
|
@@ -84,7 +84,7 @@ class AbstractUploadQueue(ABC):
|
|
|
84
84
|
|
|
85
85
|
def _post_upload(self, uploaded: list[Any]) -> None:
|
|
86
86
|
"""
|
|
87
|
-
Perform post_upload_function to uploaded data, if applicable
|
|
87
|
+
Perform post_upload_function to uploaded data, if applicable.
|
|
88
88
|
|
|
89
89
|
Args:
|
|
90
90
|
uploaded: list of uploaded data
|
|
@@ -103,7 +103,7 @@ class AbstractUploadQueue(ABC):
|
|
|
103
103
|
|
|
104
104
|
def _run(self) -> None:
|
|
105
105
|
"""
|
|
106
|
-
Internal run method for upload thread
|
|
106
|
+
Internal run method for upload thread.
|
|
107
107
|
"""
|
|
108
108
|
while not self.cancellation_token.wait(timeout=self.max_upload_interval):
|
|
109
109
|
try:
|
|
@@ -117,8 +117,7 @@ class AbstractUploadQueue(ABC):
|
|
|
117
117
|
|
|
118
118
|
def start(self) -> None:
|
|
119
119
|
"""
|
|
120
|
-
Start upload thread if max_upload_interval is set
|
|
121
|
-
seconds.
|
|
120
|
+
Start upload thread if max_upload_interval is set.
|
|
122
121
|
"""
|
|
123
122
|
if self.max_upload_interval is not None:
|
|
124
123
|
self.thread.start()
|
|
@@ -137,7 +136,7 @@ class AbstractUploadQueue(ABC):
|
|
|
137
136
|
|
|
138
137
|
def __len__(self) -> int:
|
|
139
138
|
"""
|
|
140
|
-
The size of the upload queue
|
|
139
|
+
The size of the upload queue.
|
|
141
140
|
|
|
142
141
|
Returns:
|
|
143
142
|
Number of events in queue
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Upload queue for (legacy) assets.
|
|
3
|
+
"""
|
|
4
|
+
|
|
1
5
|
# Copyright 2023 Cognite AS
|
|
2
6
|
#
|
|
3
7
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -37,7 +41,7 @@ from cognite.extractorutils.util import cognite_exceptions, retry
|
|
|
37
41
|
|
|
38
42
|
class AssetUploadQueue(AbstractUploadQueue):
|
|
39
43
|
"""
|
|
40
|
-
Upload queue for assets
|
|
44
|
+
Upload queue for assets.
|
|
41
45
|
|
|
42
46
|
Args:
|
|
43
47
|
cdf_client: Cognite Data Fusion client to use
|
|
@@ -77,8 +81,9 @@ class AssetUploadQueue(AbstractUploadQueue):
|
|
|
77
81
|
|
|
78
82
|
def add_to_upload_queue(self, asset: Asset) -> None:
|
|
79
83
|
"""
|
|
80
|
-
Add asset to upload queue.
|
|
81
|
-
|
|
84
|
+
Add asset to upload queue.
|
|
85
|
+
|
|
86
|
+
The queue will be uploaded if the queue size is larger than the threshold specified in the ``__init__``.
|
|
82
87
|
|
|
83
88
|
Args:
|
|
84
89
|
asset: Asset to add
|
|
@@ -92,7 +97,7 @@ class AssetUploadQueue(AbstractUploadQueue):
|
|
|
92
97
|
|
|
93
98
|
def upload(self) -> None:
|
|
94
99
|
"""
|
|
95
|
-
Trigger an upload of the queue, clears queue afterwards
|
|
100
|
+
Trigger an upload of the queue, clears queue afterwards.
|
|
96
101
|
"""
|
|
97
102
|
|
|
98
103
|
@retry(
|
|
@@ -107,8 +112,8 @@ class AssetUploadQueue(AbstractUploadQueue):
|
|
|
107
112
|
try:
|
|
108
113
|
self.cdf_client.assets.create(self.upload_queue)
|
|
109
114
|
except CogniteDuplicatedError as e:
|
|
110
|
-
duplicated_ids =
|
|
111
|
-
failed: list[Asset] =
|
|
115
|
+
duplicated_ids = {dup["externalId"] for dup in e.duplicated if "externalId" in dup}
|
|
116
|
+
failed: list[Asset] = list(e.failed)
|
|
112
117
|
to_create = []
|
|
113
118
|
to_update = []
|
|
114
119
|
for asset in failed:
|
|
@@ -138,7 +143,7 @@ class AssetUploadQueue(AbstractUploadQueue):
|
|
|
138
143
|
|
|
139
144
|
def __enter__(self) -> "AssetUploadQueue":
|
|
140
145
|
"""
|
|
141
|
-
Wraps around start method, for use as context manager
|
|
146
|
+
Wraps around start method, for use as context manager.
|
|
142
147
|
|
|
143
148
|
Returns:
|
|
144
149
|
self
|
|
@@ -153,7 +158,7 @@ class AssetUploadQueue(AbstractUploadQueue):
|
|
|
153
158
|
exc_tb: TracebackType | None,
|
|
154
159
|
) -> None:
|
|
155
160
|
"""
|
|
156
|
-
Wraps around stop method, for use as context manager
|
|
161
|
+
Wraps around stop method, for use as context manager.
|
|
157
162
|
|
|
158
163
|
Args:
|
|
159
164
|
exc_type: Exception type
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module for uploading data modeling instances to CDF.
|
|
3
|
+
"""
|
|
4
|
+
|
|
1
5
|
from collections.abc import Callable
|
|
2
6
|
from types import TracebackType
|
|
3
7
|
from typing import Any
|
|
@@ -16,6 +20,24 @@ from cognite.extractorutils.util import cognite_exceptions, retry
|
|
|
16
20
|
|
|
17
21
|
|
|
18
22
|
class InstanceUploadQueue(AbstractUploadQueue):
|
|
23
|
+
"""
|
|
24
|
+
Upload queue for data modeling instances (nodes and edges).
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
cdf_client: Cognite Data Fusion client to use.
|
|
28
|
+
post_upload_function: A function that will be called after each upload. The function will be given one argument:
|
|
29
|
+
A list of the nodes and edges that were uploaded.
|
|
30
|
+
max_queue_size: Maximum size of upload queue. Defaults to no max size.
|
|
31
|
+
max_upload_interval: Automatically trigger an upload on an interval when run as a thread (use start/stop
|
|
32
|
+
methods). Unit is seconds.
|
|
33
|
+
trigger_log_level: Log level to log upload triggers to.
|
|
34
|
+
thread_name: Thread name of uploader thread.
|
|
35
|
+
cancellation_token: Cancellation token for managing thread cancellation.
|
|
36
|
+
auto_create_start_nodes: Automatically create start nodes if they do not exist.
|
|
37
|
+
auto_create_end_nodes: Automatically create end nodes if they do not exist.
|
|
38
|
+
auto_create_direct_relations: Automatically create direct relations if they do not exist.
|
|
39
|
+
"""
|
|
40
|
+
|
|
19
41
|
def __init__(
|
|
20
42
|
self,
|
|
21
43
|
cdf_client: CogniteClient,
|
|
@@ -52,6 +74,15 @@ class InstanceUploadQueue(AbstractUploadQueue):
|
|
|
52
74
|
node_data: list[NodeApply] | None = None,
|
|
53
75
|
edge_data: list[EdgeApply] | None = None,
|
|
54
76
|
) -> None:
|
|
77
|
+
"""
|
|
78
|
+
Add instances to the upload queue.
|
|
79
|
+
|
|
80
|
+
The queue will be uploaded if the queue size is larger than the threshold specified in the ``__init__``.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
node_data: List of nodes to add to the upload queue.
|
|
84
|
+
edge_data: List of edges to add to the upload queue.
|
|
85
|
+
"""
|
|
55
86
|
if node_data:
|
|
56
87
|
with self.lock:
|
|
57
88
|
self.node_queue.extend(node_data)
|
|
@@ -66,6 +97,10 @@ class InstanceUploadQueue(AbstractUploadQueue):
|
|
|
66
97
|
self._check_triggers()
|
|
67
98
|
|
|
68
99
|
def upload(self) -> None:
|
|
100
|
+
"""
|
|
101
|
+
Trigger an upload of the queue, clears queue afterwards.
|
|
102
|
+
"""
|
|
103
|
+
|
|
69
104
|
@retry(
|
|
70
105
|
exceptions=cognite_exceptions(),
|
|
71
106
|
cancellation_token=self.cancellation_token,
|
|
@@ -91,7 +126,7 @@ class InstanceUploadQueue(AbstractUploadQueue):
|
|
|
91
126
|
|
|
92
127
|
def __enter__(self) -> "InstanceUploadQueue":
|
|
93
128
|
"""
|
|
94
|
-
Wraps around start method, for use as context manager
|
|
129
|
+
Wraps around start method, for use as context manager.
|
|
95
130
|
|
|
96
131
|
Returns:
|
|
97
132
|
self
|
|
@@ -106,7 +141,7 @@ class InstanceUploadQueue(AbstractUploadQueue):
|
|
|
106
141
|
exc_tb: TracebackType | None,
|
|
107
142
|
) -> None:
|
|
108
143
|
"""
|
|
109
|
-
Wraps around stop method, for use as context manager
|
|
144
|
+
Wraps around stop method, for use as context manager.
|
|
110
145
|
|
|
111
146
|
Args:
|
|
112
147
|
exc_type: Exception type
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Upload queue for (legacy) events.
|
|
3
|
+
"""
|
|
4
|
+
|
|
1
5
|
# Copyright 2023 Cognite AS
|
|
2
6
|
#
|
|
3
7
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -36,7 +40,7 @@ from cognite.extractorutils.util import cognite_exceptions, retry
|
|
|
36
40
|
|
|
37
41
|
class EventUploadQueue(AbstractUploadQueue):
|
|
38
42
|
"""
|
|
39
|
-
Upload queue for events
|
|
43
|
+
Upload queue for events.
|
|
40
44
|
|
|
41
45
|
Args:
|
|
42
46
|
cdf_client: Cognite Data Fusion client to use
|
|
@@ -78,8 +82,9 @@ class EventUploadQueue(AbstractUploadQueue):
|
|
|
78
82
|
|
|
79
83
|
def add_to_upload_queue(self, event: Event) -> None:
|
|
80
84
|
"""
|
|
81
|
-
Add event to upload queue.
|
|
82
|
-
|
|
85
|
+
Add event to upload queue.
|
|
86
|
+
|
|
87
|
+
The queue will be uploaded if the queue size is larger than the threshold specified in the ``__init__``.
|
|
83
88
|
|
|
84
89
|
Args:
|
|
85
90
|
event: Event to add
|
|
@@ -94,7 +99,7 @@ class EventUploadQueue(AbstractUploadQueue):
|
|
|
94
99
|
|
|
95
100
|
def upload(self) -> None:
|
|
96
101
|
"""
|
|
97
|
-
Trigger an upload of the queue, clears queue afterwards
|
|
102
|
+
Trigger an upload of the queue, clears queue afterwards.
|
|
98
103
|
"""
|
|
99
104
|
|
|
100
105
|
@retry(
|
|
@@ -107,10 +112,10 @@ class EventUploadQueue(AbstractUploadQueue):
|
|
|
107
112
|
)
|
|
108
113
|
def _upload_batch() -> None:
|
|
109
114
|
try:
|
|
110
|
-
self.cdf_client.events.create(
|
|
115
|
+
self.cdf_client.events.create(list(self.upload_queue))
|
|
111
116
|
except CogniteDuplicatedError as e:
|
|
112
|
-
duplicated_ids =
|
|
113
|
-
failed: list[Event] =
|
|
117
|
+
duplicated_ids = {dup["externalId"] for dup in e.duplicated if "externalId" in dup}
|
|
118
|
+
failed: list[Event] = list(e.failed)
|
|
114
119
|
to_create = []
|
|
115
120
|
to_update = []
|
|
116
121
|
for evt in failed:
|
|
@@ -142,7 +147,7 @@ class EventUploadQueue(AbstractUploadQueue):
|
|
|
142
147
|
|
|
143
148
|
def __enter__(self) -> "EventUploadQueue":
|
|
144
149
|
"""
|
|
145
|
-
Wraps around start method, for use as context manager
|
|
150
|
+
Wraps around start method, for use as context manager.
|
|
146
151
|
|
|
147
152
|
Returns:
|
|
148
153
|
self
|
|
@@ -154,7 +159,7 @@ class EventUploadQueue(AbstractUploadQueue):
|
|
|
154
159
|
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
|
|
155
160
|
) -> None:
|
|
156
161
|
"""
|
|
157
|
-
Wraps around stop method, for use as context manager
|
|
162
|
+
Wraps around stop method, for use as context manager.
|
|
158
163
|
|
|
159
164
|
Args:
|
|
160
165
|
exc_type: Exception type
|