cognite-extractor-utils 7.5.14__py3-none-any.whl → 7.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- cognite/extractorutils/__init__.py +1 -1
- cognite/extractorutils/_inner_util.py +1 -1
- cognite/extractorutils/base.py +120 -40
- cognite/extractorutils/configtools/__init__.py +4 -5
- cognite/extractorutils/configtools/_util.py +3 -2
- cognite/extractorutils/configtools/elements.py +206 -33
- cognite/extractorutils/configtools/loaders.py +68 -16
- cognite/extractorutils/configtools/validators.py +5 -1
- cognite/extractorutils/exceptions.py +11 -2
- cognite/extractorutils/metrics.py +17 -12
- cognite/extractorutils/statestore/__init__.py +77 -3
- cognite/extractorutils/statestore/_base.py +7 -3
- cognite/extractorutils/statestore/hashing.py +129 -15
- cognite/extractorutils/statestore/watermark.py +77 -87
- cognite/extractorutils/threading.py +30 -4
- cognite/extractorutils/unstable/__init__.py +5 -5
- cognite/extractorutils/unstable/configuration/__init__.py +3 -0
- cognite/extractorutils/unstable/configuration/exceptions.py +13 -2
- cognite/extractorutils/unstable/configuration/loaders.py +78 -13
- cognite/extractorutils/unstable/configuration/models.py +121 -7
- cognite/extractorutils/unstable/core/__init__.py +5 -0
- cognite/extractorutils/unstable/core/_dto.py +5 -3
- cognite/extractorutils/unstable/core/base.py +113 -4
- cognite/extractorutils/unstable/core/errors.py +41 -0
- cognite/extractorutils/unstable/core/logger.py +149 -0
- cognite/extractorutils/unstable/core/restart_policy.py +16 -2
- cognite/extractorutils/unstable/core/runtime.py +44 -6
- cognite/extractorutils/unstable/core/tasks.py +53 -1
- cognite/extractorutils/unstable/scheduling/__init__.py +13 -0
- cognite/extractorutils/unstable/scheduling/_scheduler.py +1 -1
- cognite/extractorutils/uploader/__init__.py +7 -5
- cognite/extractorutils/uploader/_base.py +4 -5
- cognite/extractorutils/uploader/assets.py +13 -8
- cognite/extractorutils/uploader/data_modeling.py +37 -2
- cognite/extractorutils/uploader/events.py +14 -9
- cognite/extractorutils/uploader/files.py +80 -21
- cognite/extractorutils/uploader/raw.py +12 -7
- cognite/extractorutils/uploader/time_series.py +58 -49
- cognite/extractorutils/uploader/upload_failure_handler.py +35 -2
- cognite/extractorutils/uploader_extractor.py +29 -6
- cognite/extractorutils/uploader_types.py +15 -1
- cognite/extractorutils/util.py +76 -23
- {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.6.0.dist-info}/METADATA +1 -1
- cognite_extractor_utils-7.6.0.dist-info/RECORD +50 -0
- cognite_extractor_utils-7.5.14.dist-info/RECORD +0 -50
- {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.6.0.dist-info}/WHEEL +0 -0
- {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.6.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module defines the Error and ErrorLevel classes for reporting errors in extractors.
|
|
3
|
+
"""
|
|
4
|
+
|
|
1
5
|
import logging
|
|
2
6
|
from enum import Enum
|
|
3
7
|
from types import TracebackType
|
|
@@ -15,12 +19,19 @@ __all__ = ["Error", "ErrorLevel"]
|
|
|
15
19
|
|
|
16
20
|
|
|
17
21
|
class ErrorLevel(Enum):
|
|
22
|
+
"""
|
|
23
|
+
Enumeration of error levels for reporting errors in extractors.
|
|
24
|
+
"""
|
|
25
|
+
|
|
18
26
|
warning = "warning"
|
|
19
27
|
error = "error"
|
|
20
28
|
fatal = "fatal"
|
|
21
29
|
|
|
22
30
|
@property
|
|
23
31
|
def log_level(self) -> int:
|
|
32
|
+
"""
|
|
33
|
+
Returns the corresponding logging level for the error level.
|
|
34
|
+
"""
|
|
24
35
|
match self:
|
|
25
36
|
case ErrorLevel.warning:
|
|
26
37
|
return logging.WARNING
|
|
@@ -33,6 +44,20 @@ class ErrorLevel(Enum):
|
|
|
33
44
|
|
|
34
45
|
|
|
35
46
|
class Error:
|
|
47
|
+
"""
|
|
48
|
+
Represents an error that occurred during the run of an extractor.
|
|
49
|
+
|
|
50
|
+
This class should not be instantiated directly. Instead, use the ``CogniteLogger`` methods (either in the
|
|
51
|
+
TaskContext or the extractor base class) to create errors.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
level: The severity level of the error.
|
|
55
|
+
description: A brief description of the error.
|
|
56
|
+
details: Additional details about the error, if any.
|
|
57
|
+
task_name: The name of the task during which the error occurred, if applicable.
|
|
58
|
+
extractor: The extractor instance that reported the error.
|
|
59
|
+
"""
|
|
60
|
+
|
|
36
61
|
def __init__(
|
|
37
62
|
self,
|
|
38
63
|
level: ErrorLevel,
|
|
@@ -55,6 +80,9 @@ class Error:
|
|
|
55
80
|
self._extractor._report_error(self)
|
|
56
81
|
|
|
57
82
|
def instant(self) -> None:
|
|
83
|
+
"""
|
|
84
|
+
Make this error an instant error, meaning it does not have a duration.
|
|
85
|
+
"""
|
|
58
86
|
# Only end the error once
|
|
59
87
|
if self.end_time is not None:
|
|
60
88
|
return
|
|
@@ -65,6 +93,11 @@ class Error:
|
|
|
65
93
|
self._extractor._report_error(self)
|
|
66
94
|
|
|
67
95
|
def finish(self) -> None:
|
|
96
|
+
"""
|
|
97
|
+
Mark the error as finished, setting the end time to the current time.
|
|
98
|
+
|
|
99
|
+
This method should be called when the error is resolved or no longer relevant.
|
|
100
|
+
"""
|
|
68
101
|
# Only end the error once
|
|
69
102
|
if self.end_time is not None:
|
|
70
103
|
return
|
|
@@ -75,6 +108,11 @@ class Error:
|
|
|
75
108
|
self._extractor._report_error(self)
|
|
76
109
|
|
|
77
110
|
def __enter__(self) -> "Error":
|
|
111
|
+
"""
|
|
112
|
+
Start tracking an error as a context manager.
|
|
113
|
+
|
|
114
|
+
This allows the error to be automatically finished when exiting the context.
|
|
115
|
+
"""
|
|
78
116
|
return self
|
|
79
117
|
|
|
80
118
|
def __exit__(
|
|
@@ -83,5 +121,8 @@ class Error:
|
|
|
83
121
|
exc_val: BaseException | None,
|
|
84
122
|
exc_tb: TracebackType | None,
|
|
85
123
|
) -> bool:
|
|
124
|
+
"""
|
|
125
|
+
Finish the error context manager, marking the error as finished.
|
|
126
|
+
"""
|
|
86
127
|
self.finish()
|
|
87
128
|
return exc_val is None
|
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module provides the ``CogniteLogger`` base class, which is an abstract base class for logging.
|
|
3
|
+
|
|
4
|
+
This class is subclassed by both the ``TaskContext`` and the ``Extractor`` base classes, providing a unified interface
|
|
5
|
+
for logging and error handling in extractors.
|
|
6
|
+
"""
|
|
7
|
+
|
|
1
8
|
from abc import ABC, abstractmethod
|
|
2
9
|
from logging import Logger, getLogger
|
|
3
10
|
from traceback import format_exception
|
|
@@ -9,6 +16,16 @@ from cognite.extractorutils.unstable.core.errors import Error, ErrorLevel
|
|
|
9
16
|
|
|
10
17
|
|
|
11
18
|
class CogniteLogger(ABC):
|
|
19
|
+
"""
|
|
20
|
+
Base class for logging and error handling in extractors.
|
|
21
|
+
|
|
22
|
+
This class provides methods to log messages at different levels (debug, info, warning, error, fatal) and to
|
|
23
|
+
create and manage errors that occur during the execution of an extractor.
|
|
24
|
+
|
|
25
|
+
If you use this class instead of a standard logger, you will get additional functionality such as reporting errors
|
|
26
|
+
back to CDF.
|
|
27
|
+
"""
|
|
28
|
+
|
|
12
29
|
def __init__(self) -> None:
|
|
13
30
|
self._logger: Logger = getLogger()
|
|
14
31
|
|
|
@@ -24,9 +41,15 @@ class CogniteLogger(ABC):
|
|
|
24
41
|
pass
|
|
25
42
|
|
|
26
43
|
def debug(self, message: str) -> None:
|
|
44
|
+
"""
|
|
45
|
+
Log a debug message.
|
|
46
|
+
"""
|
|
27
47
|
self._logger.debug(message)
|
|
28
48
|
|
|
29
49
|
def info(self, message: str) -> None:
|
|
50
|
+
"""
|
|
51
|
+
Log an information message.
|
|
52
|
+
"""
|
|
30
53
|
self._logger.info(message)
|
|
31
54
|
|
|
32
55
|
def begin_warning(
|
|
@@ -36,6 +59,32 @@ class CogniteLogger(ABC):
|
|
|
36
59
|
details: str | None = None,
|
|
37
60
|
auto_log: bool = True,
|
|
38
61
|
) -> Error:
|
|
62
|
+
"""
|
|
63
|
+
Begin a warning error.
|
|
64
|
+
|
|
65
|
+
This will both log the message and create an error object that can be used to track and report the error.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
message: The message to log and include in the error.
|
|
69
|
+
details: Additional details about the error, if any.
|
|
70
|
+
auto_log: If True, the message will be logged to the standard logging framework as well. Defaults to True.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
An ``Error`` object representing the warning error, tied to the current extractor instance.
|
|
74
|
+
|
|
75
|
+
Examples:
|
|
76
|
+
To track and complete an error, you can keep a reference to the error object and call its ``finish``
|
|
77
|
+
method when the error is resolved, or use it in a context manager to automatically finish it:
|
|
78
|
+
|
|
79
|
+
... code-block:: python
|
|
80
|
+
error = logger.begin_warning("This is a warning", details="Some details")
|
|
81
|
+
# Do something
|
|
82
|
+
error.finish()
|
|
83
|
+
|
|
84
|
+
... code-block:: python
|
|
85
|
+
with logger.begin_warning("This is a warning", details="Some details")
|
|
86
|
+
# Do something
|
|
87
|
+
"""
|
|
39
88
|
if auto_log:
|
|
40
89
|
self._logger.warning(message)
|
|
41
90
|
return self._new_error(
|
|
@@ -51,6 +100,32 @@ class CogniteLogger(ABC):
|
|
|
51
100
|
details: str | None = None,
|
|
52
101
|
auto_log: bool = True,
|
|
53
102
|
) -> Error:
|
|
103
|
+
"""
|
|
104
|
+
Begin an error.
|
|
105
|
+
|
|
106
|
+
This will both log the message and create an error object that can be used to track and report the error.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
message: The message to log and include in the error.
|
|
110
|
+
details: Additional details about the error, if any.
|
|
111
|
+
auto_log: If True, the message will be logged to the standard logging framework as well. Defaults to True.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
An ``Error`` object representing the error, tied to the current extractor instance.
|
|
115
|
+
|
|
116
|
+
Examples:
|
|
117
|
+
To track and complete an error, you can keep a reference to the error object and call its ``finish``
|
|
118
|
+
method when the error is resolved, or use it in a context manager to automatically finish it:
|
|
119
|
+
|
|
120
|
+
... code-block:: python
|
|
121
|
+
error = logger.begin_error("This is an error", details="Some details")
|
|
122
|
+
# Do something
|
|
123
|
+
error.finish()
|
|
124
|
+
|
|
125
|
+
... code-block:: python
|
|
126
|
+
with logger.begin_error("This is an error", details="Some details")
|
|
127
|
+
# Do something
|
|
128
|
+
"""
|
|
54
129
|
if auto_log:
|
|
55
130
|
self._logger.error(message)
|
|
56
131
|
return self._new_error(
|
|
@@ -66,6 +141,32 @@ class CogniteLogger(ABC):
|
|
|
66
141
|
details: str | None = None,
|
|
67
142
|
auto_log: bool = True,
|
|
68
143
|
) -> Error:
|
|
144
|
+
"""
|
|
145
|
+
Begin a fatal error.
|
|
146
|
+
|
|
147
|
+
This will both log the message and create an error object that can be used to track and report the error.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
message: The message to log and include in the error.
|
|
151
|
+
details: Additional details about the error, if any.
|
|
152
|
+
auto_log: If True, the message will be logged to the standard logging framework as well. Defaults to True.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
An ``Error`` object representing the fatal error, tied to the current extractor instance.
|
|
156
|
+
|
|
157
|
+
Examples:
|
|
158
|
+
To track and complete an error, you can keep a reference to the error object and call its ``finish``
|
|
159
|
+
method when the error is resolved, or use it in a context manager to automatically finish it:
|
|
160
|
+
|
|
161
|
+
... code-block:: python
|
|
162
|
+
error = logger.begin_fatal("This is a fatal error", details="Some details")
|
|
163
|
+
# Do something
|
|
164
|
+
error.finish()
|
|
165
|
+
|
|
166
|
+
... code-block:: python
|
|
167
|
+
with logger.begin_fatal("This is a fatal error", details="Some details")
|
|
168
|
+
# Do something
|
|
169
|
+
"""
|
|
69
170
|
if auto_log:
|
|
70
171
|
self._logger.critical(message)
|
|
71
172
|
return self._new_error(
|
|
@@ -81,6 +182,17 @@ class CogniteLogger(ABC):
|
|
|
81
182
|
details: str | None = None,
|
|
82
183
|
auto_log: bool = True,
|
|
83
184
|
) -> None:
|
|
185
|
+
"""
|
|
186
|
+
Report an instant warning.
|
|
187
|
+
|
|
188
|
+
This will log the message and create an error object that is marked as instant, meaning it does not have a
|
|
189
|
+
duration.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
message: The message to log and include in the error.
|
|
193
|
+
details: Additional details about the error, if any.
|
|
194
|
+
auto_log: If True, the message will be logged to the standard logging framework as well. Defaults to True.
|
|
195
|
+
"""
|
|
84
196
|
if auto_log:
|
|
85
197
|
self._logger.warning(message)
|
|
86
198
|
self._new_error(
|
|
@@ -96,6 +208,17 @@ class CogniteLogger(ABC):
|
|
|
96
208
|
details: str | None = None,
|
|
97
209
|
auto_log: bool = True,
|
|
98
210
|
) -> None:
|
|
211
|
+
"""
|
|
212
|
+
Report an instant error.
|
|
213
|
+
|
|
214
|
+
This will log the message and create an error object that is marked as instant, meaning it does not have a
|
|
215
|
+
duration.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
message: The message to log and include in the error.
|
|
219
|
+
details: Additional details about the error, if any.
|
|
220
|
+
auto_log: If True, the message will be logged to the standard logging framework as well. Defaults to True.
|
|
221
|
+
"""
|
|
99
222
|
if auto_log:
|
|
100
223
|
self._logger.error(message)
|
|
101
224
|
self._new_error(
|
|
@@ -111,6 +234,17 @@ class CogniteLogger(ABC):
|
|
|
111
234
|
details: str | None = None,
|
|
112
235
|
auto_log: bool = True,
|
|
113
236
|
) -> None:
|
|
237
|
+
"""
|
|
238
|
+
Report an instant fatal.
|
|
239
|
+
|
|
240
|
+
This will log the message and create an error object that is marked as instant, meaning it does not have a
|
|
241
|
+
duration.
|
|
242
|
+
|
|
243
|
+
Args:
|
|
244
|
+
message: The message to log and include in the error.
|
|
245
|
+
details: Additional details about the error, if any.
|
|
246
|
+
auto_log: If True, the message will be logged to the standard logging framework as well. Defaults to True.
|
|
247
|
+
"""
|
|
114
248
|
if auto_log:
|
|
115
249
|
self._logger.critical(message)
|
|
116
250
|
self._new_error(
|
|
@@ -128,6 +262,21 @@ class CogniteLogger(ABC):
|
|
|
128
262
|
include_details: Literal["stack_trace"] | Literal["exception_message"] | bool = "exception_message",
|
|
129
263
|
auto_log: bool = True,
|
|
130
264
|
) -> None:
|
|
265
|
+
"""
|
|
266
|
+
Report an exception as an error.
|
|
267
|
+
|
|
268
|
+
This will log the message and create an error object that is marked as instant, meaning it does not have a
|
|
269
|
+
duration. The exception details can be included in the error.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
message: The message to log and include in the error.
|
|
273
|
+
exception: The exception to report.
|
|
274
|
+
level: The severity level of the error. Defaults to ``ErrorLevel.error``.
|
|
275
|
+
include_details: How to include details about the exception. Can be "stack_trace", "exception_message",
|
|
276
|
+
or True (equivalent to "exception_message"). If False, no details are included. Defaults to
|
|
277
|
+
"exception_message".
|
|
278
|
+
auto_log: If True, the message will be logged to the standard logging framework as well. Defaults to True.
|
|
279
|
+
"""
|
|
131
280
|
if auto_log:
|
|
132
281
|
self._logger.log(level=level.log_level, msg=message, exc_info=exception)
|
|
133
282
|
|
|
@@ -1,3 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module defines the restart policies for extractors.
|
|
3
|
+
|
|
4
|
+
Is is used by the ``Runtime`` to determine whether an extractor should be restarted after a task failure.
|
|
5
|
+
|
|
6
|
+
It provides three predefined restart policies:
|
|
7
|
+
- ``NEVER``: The extractor will never be restarted.
|
|
8
|
+
- ``WHEN_ANY_TASK_CRASHES``: The extractor will be restarted if any task crashes.
|
|
9
|
+
- ``WHEN_CONTINUOUS_TASKS_CRASHES``: The extractor will be restarted only if a continuous task crashes.
|
|
10
|
+
|
|
11
|
+
Users can also define their own restart policies by providing a callable that takes a `Task` and an `Exception`
|
|
12
|
+
and returns a boolean indicating whether the extractor should be restarted.
|
|
13
|
+
"""
|
|
14
|
+
|
|
1
15
|
from collections.abc import Callable
|
|
2
16
|
|
|
3
17
|
from cognite.extractorutils.unstable.core.tasks import ContinuousTask, Task
|
|
@@ -22,8 +36,8 @@ WHEN_CONTINUOUS_TASKS_CRASHES = _is_continuous
|
|
|
22
36
|
WHEN_ANY_TASK_CRASHES = _true
|
|
23
37
|
|
|
24
38
|
__all__ = [
|
|
25
|
-
"RestartPolicy",
|
|
26
39
|
"NEVER",
|
|
27
|
-
"WHEN_CONTINUOUS_TASKS_CRASHES",
|
|
28
40
|
"WHEN_ANY_TASK_CRASHES",
|
|
41
|
+
"WHEN_CONTINUOUS_TASKS_CRASHES",
|
|
42
|
+
"RestartPolicy",
|
|
29
43
|
]
|
|
@@ -1,3 +1,28 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module providing the runtime for an extractor.
|
|
3
|
+
|
|
4
|
+
The runtime is responsible for starting the extractor in a separate process, managing its lifecycle, and handling
|
|
5
|
+
configuration loading and updates. It also handles errors and restarts the extractor if necessary.
|
|
6
|
+
|
|
7
|
+
It is the preferred way to run an extractor, as it provides a more robust and flexible way to manage the extractor's
|
|
8
|
+
lifecycle compared to running it directly in the main process.
|
|
9
|
+
|
|
10
|
+
The runtime also contains a command line interface (CLI) for starting the extractor, which allows users to specify
|
|
11
|
+
the connection configuration and other parameters.
|
|
12
|
+
|
|
13
|
+
.. code-block:: python
|
|
14
|
+
|
|
15
|
+
from cognite.extractorutils.unstable.core.runtime import Runtime
|
|
16
|
+
from my_extractor import MyExtractor
|
|
17
|
+
|
|
18
|
+
def main() -> None:
|
|
19
|
+
runtime = Runtime(MyExtractor)
|
|
20
|
+
runtime.run()
|
|
21
|
+
|
|
22
|
+
if __name__ == "__main__":
|
|
23
|
+
main()
|
|
24
|
+
"""
|
|
25
|
+
|
|
1
26
|
import logging
|
|
2
27
|
import os
|
|
3
28
|
import sys
|
|
@@ -9,7 +34,7 @@ from random import randint
|
|
|
9
34
|
from typing import Any, Generic, TypeVar
|
|
10
35
|
from uuid import uuid4
|
|
11
36
|
|
|
12
|
-
from requests.exceptions import ConnectionError
|
|
37
|
+
from requests.exceptions import ConnectionError as RequestsConnectionError
|
|
13
38
|
from typing_extensions import assert_never
|
|
14
39
|
|
|
15
40
|
from cognite.client import CogniteClient
|
|
@@ -32,12 +57,19 @@ from cognite.extractorutils.util import now
|
|
|
32
57
|
from ._messaging import RuntimeMessage
|
|
33
58
|
from .base import ConfigRevision, ConfigType, Extractor, FullConfig
|
|
34
59
|
|
|
35
|
-
__all__ = ["
|
|
60
|
+
__all__ = ["ExtractorType", "Runtime"]
|
|
36
61
|
|
|
37
62
|
ExtractorType = TypeVar("ExtractorType", bound=Extractor)
|
|
38
63
|
|
|
39
64
|
|
|
40
65
|
class Runtime(Generic[ExtractorType]):
|
|
66
|
+
"""
|
|
67
|
+
The runtime for an extractor.
|
|
68
|
+
|
|
69
|
+
This class is responsible for starting the extractor in a separate process, managing its lifecycle, and handling
|
|
70
|
+
configuration loading and updates. It also handles errors and restarts the extractor if necessary.
|
|
71
|
+
"""
|
|
72
|
+
|
|
41
73
|
RETRY_CONFIG_INTERVAL = 30
|
|
42
74
|
|
|
43
75
|
def __init__(
|
|
@@ -73,8 +105,8 @@ class Runtime(Generic[ExtractorType]):
|
|
|
73
105
|
help="Connection parameters",
|
|
74
106
|
)
|
|
75
107
|
argparser.add_argument(
|
|
76
|
-
"-
|
|
77
|
-
"--local-
|
|
108
|
+
"-f",
|
|
109
|
+
"--force-local-config",
|
|
78
110
|
nargs=1,
|
|
79
111
|
type=Path,
|
|
80
112
|
required=False,
|
|
@@ -248,11 +280,11 @@ class Runtime(Generic[ExtractorType]):
|
|
|
248
280
|
self.logger.critical(str(e.message))
|
|
249
281
|
|
|
250
282
|
else:
|
|
251
|
-
self.logger.critical(f"Error while connecting to CDF {
|
|
283
|
+
self.logger.critical(f"Error while connecting to CDF {e!s}")
|
|
252
284
|
|
|
253
285
|
return False
|
|
254
286
|
|
|
255
|
-
except
|
|
287
|
+
except RequestsConnectionError as e:
|
|
256
288
|
# This is sometime thrown, I've seen it when trying to get an auth token but it might happen elsewhere too
|
|
257
289
|
self.logger.error(str(e))
|
|
258
290
|
self.logger.critical("Could not initiate connection. Please check your configuration.")
|
|
@@ -261,6 +293,12 @@ class Runtime(Generic[ExtractorType]):
|
|
|
261
293
|
return True
|
|
262
294
|
|
|
263
295
|
def run(self) -> None:
|
|
296
|
+
"""
|
|
297
|
+
Run the extractor runtime.
|
|
298
|
+
|
|
299
|
+
This is intended as the main entry point for the extractor runtime, and starts by parsing command line
|
|
300
|
+
arguments.
|
|
301
|
+
"""
|
|
264
302
|
argparser = self._create_argparser()
|
|
265
303
|
args = argparser.parse_args()
|
|
266
304
|
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module defines the base classes for tasks in the extractor framework.
|
|
3
|
+
"""
|
|
4
|
+
|
|
1
5
|
import logging
|
|
2
6
|
from collections.abc import Callable
|
|
3
7
|
from typing import TYPE_CHECKING
|
|
@@ -14,10 +18,16 @@ from cognite.extractorutils.unstable.core.logger import CogniteLogger
|
|
|
14
18
|
if TYPE_CHECKING:
|
|
15
19
|
from cognite.extractorutils.unstable.core.base import Extractor
|
|
16
20
|
|
|
17
|
-
__all__ = ["
|
|
21
|
+
__all__ = ["ContinuousTask", "ScheduledTask", "StartupTask", "Task", "TaskContext"]
|
|
18
22
|
|
|
19
23
|
|
|
20
24
|
class TaskContext(CogniteLogger):
|
|
25
|
+
"""
|
|
26
|
+
Context for a task execution.
|
|
27
|
+
|
|
28
|
+
This class is used to log errors and messages related to the task execution.
|
|
29
|
+
"""
|
|
30
|
+
|
|
21
31
|
def __init__(self, task: "Task", extractor: "Extractor"):
|
|
22
32
|
super().__init__()
|
|
23
33
|
self._task = task
|
|
@@ -58,6 +68,18 @@ class _Task:
|
|
|
58
68
|
|
|
59
69
|
|
|
60
70
|
class ScheduledTask(_Task):
|
|
71
|
+
"""
|
|
72
|
+
A task that is scheduled to run at specific intervals or according to a cron expression.
|
|
73
|
+
|
|
74
|
+
This class allows you to define tasks that can be scheduled using either an interval or a cron expression.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
name: The name of the task.
|
|
78
|
+
target: A callable that takes a ``TaskContext`` and performs the task.
|
|
79
|
+
description: An optional description of the task.
|
|
80
|
+
schedule: A ``ScheduleConfig`` object that defines the scheduling configuration for the task.
|
|
81
|
+
"""
|
|
82
|
+
|
|
61
83
|
def __init__(
|
|
62
84
|
self,
|
|
63
85
|
*,
|
|
@@ -73,6 +95,15 @@ class ScheduledTask(_Task):
|
|
|
73
95
|
def from_interval(
|
|
74
96
|
cls, *, interval: str, name: str, target: TaskTarget, description: str | None = None
|
|
75
97
|
) -> "ScheduledTask":
|
|
98
|
+
"""
|
|
99
|
+
Create a scheduled task that runs at regular intervals.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
interval: A string representing the time interval (e.g., "5m" for 5 minutes).
|
|
103
|
+
name: The name of the task.
|
|
104
|
+
target: A callable that takes a ``TaskContext`` and performs the task.
|
|
105
|
+
description: An optional description of the task.
|
|
106
|
+
"""
|
|
76
107
|
return ScheduledTask(
|
|
77
108
|
name=name,
|
|
78
109
|
target=target,
|
|
@@ -82,6 +113,15 @@ class ScheduledTask(_Task):
|
|
|
82
113
|
|
|
83
114
|
@classmethod
|
|
84
115
|
def from_cron(cls, *, cron: str, name: str, target: TaskTarget, description: str | None = None) -> "ScheduledTask":
|
|
116
|
+
"""
|
|
117
|
+
Create a scheduled task that runs according to a cron expression.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
cron: A string representing the cron expression (e.g., "0 0 * * *" for daily at midnight).
|
|
121
|
+
name: The name of the task.
|
|
122
|
+
target: A callable that takes a ``TaskContext`` and performs the task.
|
|
123
|
+
description: An optional description of the task.
|
|
124
|
+
"""
|
|
85
125
|
return ScheduledTask(
|
|
86
126
|
name=name,
|
|
87
127
|
target=target,
|
|
@@ -91,6 +131,12 @@ class ScheduledTask(_Task):
|
|
|
91
131
|
|
|
92
132
|
|
|
93
133
|
class ContinuousTask(_Task):
|
|
134
|
+
"""
|
|
135
|
+
A task that runs continuously.
|
|
136
|
+
|
|
137
|
+
Continuous tasks are started when the extractor starts and are expected to run until the extractor stops.
|
|
138
|
+
"""
|
|
139
|
+
|
|
94
140
|
def __init__(
|
|
95
141
|
self,
|
|
96
142
|
*,
|
|
@@ -102,6 +148,12 @@ class ContinuousTask(_Task):
|
|
|
102
148
|
|
|
103
149
|
|
|
104
150
|
class StartupTask(_Task):
|
|
151
|
+
"""
|
|
152
|
+
A task that runs once at the startup of the extractor.
|
|
153
|
+
|
|
154
|
+
Startup tasks are executed before any continuous or scheduled tasks and are typically used for initialization.
|
|
155
|
+
"""
|
|
156
|
+
|
|
105
157
|
def __init__(
|
|
106
158
|
self,
|
|
107
159
|
*,
|
|
@@ -1,3 +1,16 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module provides a task scheduler.
|
|
3
|
+
|
|
4
|
+
It is inspired by the ``APScheduler`` library and is designed to manage the scheduling of tasks within the extractor
|
|
5
|
+
framework. It differs from ``APScheduler`` in a few key ways:
|
|
6
|
+
- It is designed to be used within the extractor framework, allowing for better integration with the extractor's
|
|
7
|
+
lifecycle and error handling. For example, it respects the extractor's cancellation token and will gracefully shut
|
|
8
|
+
down upon cancellation.
|
|
9
|
+
- It has a simpler interface, focusing on the core functionality needed for scheduling tasks without the additional
|
|
10
|
+
complexity of a full-featured scheduler like ``APScheduler``.
|
|
11
|
+
- It is fully typed, providing better type safety and autocompletion in IDEs.
|
|
12
|
+
"""
|
|
13
|
+
|
|
1
14
|
from ._scheduler import TaskScheduler
|
|
2
15
|
|
|
3
16
|
__all__ = ["TaskScheduler"]
|
|
@@ -51,7 +51,7 @@ class TaskScheduler:
|
|
|
51
51
|
return []
|
|
52
52
|
with self._jobs_lock:
|
|
53
53
|
next_runs = sorted([(j.schedule.next(), j) for j in self._jobs.values()], key=lambda tup: tup[0])
|
|
54
|
-
return [job for (
|
|
54
|
+
return [job for (scheduled_time, job) in next_runs if scheduled_time == next_runs[0][0]] if next_runs else []
|
|
55
55
|
|
|
56
56
|
def _run_job(self, job: Job) -> bool:
|
|
57
57
|
with self._running_lock:
|
|
@@ -13,8 +13,10 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
"""
|
|
16
|
-
Module containing upload queue classes.
|
|
17
|
-
|
|
16
|
+
Module containing upload queue classes.
|
|
17
|
+
|
|
18
|
+
The UploadQueue classes chunks together items and uploads them together to CDF,both to minimize the load on the API, and
|
|
19
|
+
also to speed up uploading as requests can be slow.
|
|
18
20
|
|
|
19
21
|
Each upload queue comes with some configurable conditions that, when met, automatically triggers an upload.
|
|
20
22
|
|
|
@@ -78,13 +80,13 @@ from .time_series import (
|
|
|
78
80
|
|
|
79
81
|
__all__ = [
|
|
80
82
|
"AssetUploadQueue",
|
|
81
|
-
"EventUploadQueue",
|
|
82
83
|
"BytesUploadQueue",
|
|
84
|
+
"DataPoint",
|
|
85
|
+
"DataPointList",
|
|
86
|
+
"EventUploadQueue",
|
|
83
87
|
"FileUploadQueue",
|
|
84
88
|
"IOFileUploadQueue",
|
|
85
89
|
"RawUploadQueue",
|
|
86
|
-
"DataPoint",
|
|
87
|
-
"DataPointList",
|
|
88
90
|
"SequenceUploadQueue",
|
|
89
91
|
"TimeSeriesUploadQueue",
|
|
90
92
|
"default_time_series_factory",
|
|
@@ -84,7 +84,7 @@ class AbstractUploadQueue(ABC):
|
|
|
84
84
|
|
|
85
85
|
def _post_upload(self, uploaded: list[Any]) -> None:
|
|
86
86
|
"""
|
|
87
|
-
Perform post_upload_function to uploaded data, if applicable
|
|
87
|
+
Perform post_upload_function to uploaded data, if applicable.
|
|
88
88
|
|
|
89
89
|
Args:
|
|
90
90
|
uploaded: list of uploaded data
|
|
@@ -103,7 +103,7 @@ class AbstractUploadQueue(ABC):
|
|
|
103
103
|
|
|
104
104
|
def _run(self) -> None:
|
|
105
105
|
"""
|
|
106
|
-
Internal run method for upload thread
|
|
106
|
+
Internal run method for upload thread.
|
|
107
107
|
"""
|
|
108
108
|
while not self.cancellation_token.wait(timeout=self.max_upload_interval):
|
|
109
109
|
try:
|
|
@@ -117,8 +117,7 @@ class AbstractUploadQueue(ABC):
|
|
|
117
117
|
|
|
118
118
|
def start(self) -> None:
|
|
119
119
|
"""
|
|
120
|
-
Start upload thread if max_upload_interval is set
|
|
121
|
-
seconds.
|
|
120
|
+
Start upload thread if max_upload_interval is set.
|
|
122
121
|
"""
|
|
123
122
|
if self.max_upload_interval is not None:
|
|
124
123
|
self.thread.start()
|
|
@@ -137,7 +136,7 @@ class AbstractUploadQueue(ABC):
|
|
|
137
136
|
|
|
138
137
|
def __len__(self) -> int:
|
|
139
138
|
"""
|
|
140
|
-
The size of the upload queue
|
|
139
|
+
The size of the upload queue.
|
|
141
140
|
|
|
142
141
|
Returns:
|
|
143
142
|
Number of events in queue
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Upload queue for (legacy) assets.
|
|
3
|
+
"""
|
|
4
|
+
|
|
1
5
|
# Copyright 2023 Cognite AS
|
|
2
6
|
#
|
|
3
7
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -37,7 +41,7 @@ from cognite.extractorutils.util import cognite_exceptions, retry
|
|
|
37
41
|
|
|
38
42
|
class AssetUploadQueue(AbstractUploadQueue):
|
|
39
43
|
"""
|
|
40
|
-
Upload queue for assets
|
|
44
|
+
Upload queue for assets.
|
|
41
45
|
|
|
42
46
|
Args:
|
|
43
47
|
cdf_client: Cognite Data Fusion client to use
|
|
@@ -77,8 +81,9 @@ class AssetUploadQueue(AbstractUploadQueue):
|
|
|
77
81
|
|
|
78
82
|
def add_to_upload_queue(self, asset: Asset) -> None:
|
|
79
83
|
"""
|
|
80
|
-
Add asset to upload queue.
|
|
81
|
-
|
|
84
|
+
Add asset to upload queue.
|
|
85
|
+
|
|
86
|
+
The queue will be uploaded if the queue size is larger than the threshold specified in the ``__init__``.
|
|
82
87
|
|
|
83
88
|
Args:
|
|
84
89
|
asset: Asset to add
|
|
@@ -92,7 +97,7 @@ class AssetUploadQueue(AbstractUploadQueue):
|
|
|
92
97
|
|
|
93
98
|
def upload(self) -> None:
|
|
94
99
|
"""
|
|
95
|
-
Trigger an upload of the queue, clears queue afterwards
|
|
100
|
+
Trigger an upload of the queue, clears queue afterwards.
|
|
96
101
|
"""
|
|
97
102
|
|
|
98
103
|
@retry(
|
|
@@ -107,8 +112,8 @@ class AssetUploadQueue(AbstractUploadQueue):
|
|
|
107
112
|
try:
|
|
108
113
|
self.cdf_client.assets.create(self.upload_queue)
|
|
109
114
|
except CogniteDuplicatedError as e:
|
|
110
|
-
duplicated_ids =
|
|
111
|
-
failed: list[Asset] =
|
|
115
|
+
duplicated_ids = {dup["externalId"] for dup in e.duplicated if "externalId" in dup}
|
|
116
|
+
failed: list[Asset] = list(e.failed)
|
|
112
117
|
to_create = []
|
|
113
118
|
to_update = []
|
|
114
119
|
for asset in failed:
|
|
@@ -138,7 +143,7 @@ class AssetUploadQueue(AbstractUploadQueue):
|
|
|
138
143
|
|
|
139
144
|
def __enter__(self) -> "AssetUploadQueue":
|
|
140
145
|
"""
|
|
141
|
-
Wraps around start method, for use as context manager
|
|
146
|
+
Wraps around start method, for use as context manager.
|
|
142
147
|
|
|
143
148
|
Returns:
|
|
144
149
|
self
|
|
@@ -153,7 +158,7 @@ class AssetUploadQueue(AbstractUploadQueue):
|
|
|
153
158
|
exc_tb: TracebackType | None,
|
|
154
159
|
) -> None:
|
|
155
160
|
"""
|
|
156
|
-
Wraps around stop method, for use as context manager
|
|
161
|
+
Wraps around stop method, for use as context manager.
|
|
157
162
|
|
|
158
163
|
Args:
|
|
159
164
|
exc_type: Exception type
|