cognite-extractor-utils 7.5.13__py3-none-any.whl → 7.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

Files changed (47) hide show
  1. cognite/extractorutils/__init__.py +1 -1
  2. cognite/extractorutils/_inner_util.py +1 -1
  3. cognite/extractorutils/base.py +120 -40
  4. cognite/extractorutils/configtools/__init__.py +4 -5
  5. cognite/extractorutils/configtools/_util.py +3 -2
  6. cognite/extractorutils/configtools/elements.py +213 -35
  7. cognite/extractorutils/configtools/loaders.py +68 -16
  8. cognite/extractorutils/configtools/validators.py +5 -1
  9. cognite/extractorutils/exceptions.py +11 -2
  10. cognite/extractorutils/metrics.py +17 -12
  11. cognite/extractorutils/statestore/__init__.py +77 -3
  12. cognite/extractorutils/statestore/_base.py +7 -3
  13. cognite/extractorutils/statestore/hashing.py +129 -15
  14. cognite/extractorutils/statestore/watermark.py +77 -87
  15. cognite/extractorutils/threading.py +30 -4
  16. cognite/extractorutils/unstable/__init__.py +5 -5
  17. cognite/extractorutils/unstable/configuration/__init__.py +3 -0
  18. cognite/extractorutils/unstable/configuration/exceptions.py +13 -2
  19. cognite/extractorutils/unstable/configuration/loaders.py +90 -19
  20. cognite/extractorutils/unstable/configuration/models.py +121 -7
  21. cognite/extractorutils/unstable/core/__init__.py +5 -0
  22. cognite/extractorutils/unstable/core/_dto.py +5 -3
  23. cognite/extractorutils/unstable/core/base.py +113 -4
  24. cognite/extractorutils/unstable/core/errors.py +41 -0
  25. cognite/extractorutils/unstable/core/logger.py +149 -0
  26. cognite/extractorutils/unstable/core/restart_policy.py +16 -2
  27. cognite/extractorutils/unstable/core/runtime.py +119 -36
  28. cognite/extractorutils/unstable/core/tasks.py +53 -1
  29. cognite/extractorutils/unstable/scheduling/__init__.py +13 -0
  30. cognite/extractorutils/unstable/scheduling/_scheduler.py +1 -1
  31. cognite/extractorutils/uploader/__init__.py +7 -5
  32. cognite/extractorutils/uploader/_base.py +4 -5
  33. cognite/extractorutils/uploader/assets.py +13 -8
  34. cognite/extractorutils/uploader/data_modeling.py +37 -2
  35. cognite/extractorutils/uploader/events.py +14 -9
  36. cognite/extractorutils/uploader/files.py +80 -21
  37. cognite/extractorutils/uploader/raw.py +12 -7
  38. cognite/extractorutils/uploader/time_series.py +58 -49
  39. cognite/extractorutils/uploader/upload_failure_handler.py +35 -2
  40. cognite/extractorutils/uploader_extractor.py +29 -6
  41. cognite/extractorutils/uploader_types.py +15 -1
  42. cognite/extractorutils/util.py +76 -23
  43. {cognite_extractor_utils-7.5.13.dist-info → cognite_extractor_utils-7.6.0.dist-info}/METADATA +1 -1
  44. cognite_extractor_utils-7.6.0.dist-info/RECORD +50 -0
  45. cognite_extractor_utils-7.5.13.dist-info/RECORD +0 -50
  46. {cognite_extractor_utils-7.5.13.dist-info → cognite_extractor_utils-7.6.0.dist-info}/WHEEL +0 -0
  47. {cognite_extractor_utils-7.5.13.dist-info → cognite_extractor_utils-7.6.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,5 +1,50 @@
1
+ """
2
+ This module provides the base class for extractors.
3
+
4
+ It includes functionality for task management, logging, error handling, and configuration management.
5
+
6
+ Extractors should subclass the `Extractor` class and implement the `__init_tasks__` method to define their tasks.
7
+ The subclass should also define several class attributes:
8
+ - ``NAME``: A human-readable name for the extractor.
9
+ - ``EXTERNAL_ID``: A unique identifier for the extractor, used when reporting to CDF Integrations.
10
+ - ``DESCRIPTION``: A brief description of the extractor.
11
+ - ``VERSION``: The version of the extractor, used when reporting to CDF Integrations. This should follow semantic
12
+ versioning.
13
+ - ``CONFIG_TYPE``: The type of the application configuration for the extractor, which should be a subclass of
14
+ ``ExtractorConfig``. This should be the same class as the one used for the generic type parameter of the
15
+ ``Extractor`` class.
16
+
17
+
18
+ .. code-block:: python
19
+
20
+ class MyConfig(ExtractorConfig):
21
+ parameter: str
22
+ another_parameter: int
23
+ schedule: ScheduleConfig
24
+
25
+ class MyExtractor(Extractor[MyConfig]):
26
+ NAME = "My Extractor"
27
+ EXTERNAL_ID = "my-extractor"
28
+ DESCRIPTION = "An example extractor"
29
+ VERSION = "1.0.0"
30
+
31
+ CONFIG_TYPE = MyConfig
32
+
33
+ def __init_tasks__(self) -> None:
34
+ self.add_task(
35
+ ScheduledTask(
36
+ name="my_task",
37
+ description="An example task",
38
+ schedule=self.application_config.schedule,
39
+ target=self.my_task_function,
40
+ )
41
+ )
42
+
43
+ def my_task_function(self, task_context: TaskContext) -> None:
44
+ task_context.logger.info("Running my task")
45
+ """
46
+
1
47
  import logging
2
- import logging.config
3
48
  import time
4
49
  from concurrent.futures import ThreadPoolExecutor
5
50
  from functools import partial
@@ -30,7 +75,7 @@ from cognite.extractorutils.unstable.core.tasks import ContinuousTask, Scheduled
30
75
  from cognite.extractorutils.unstable.scheduling import TaskScheduler
31
76
  from cognite.extractorutils.util import now
32
77
 
33
- __all__ = ["ConfigType", "ConfigRevision", "Extractor"]
78
+ __all__ = ["ConfigRevision", "ConfigType", "Extractor"]
34
79
 
35
80
  ConfigType = TypeVar("ConfigType", bound=ExtractorConfig)
36
81
  ConfigRevision = Literal["local"] | int
@@ -40,6 +85,13 @@ _T = TypeVar("_T", bound=ExtractorConfig)
40
85
 
41
86
 
42
87
  class FullConfig(Generic[_T]):
88
+ """
89
+ A class that holds the full configuration for an extractor.
90
+
91
+ This includes the connection configuration, application configuration, and which revision of the application
92
+ configuration is currently active.
93
+ """
94
+
43
95
  def __init__(
44
96
  self,
45
97
  connection_config: ConnectionConfig,
@@ -52,6 +104,16 @@ class FullConfig(Generic[_T]):
52
104
 
53
105
 
54
106
  class Extractor(Generic[ConfigType], CogniteLogger):
107
+ """
108
+ Base class for all extractors.
109
+
110
+ This class provides the basic functionality for running an extractor, including task management, logging,
111
+ error handling, and configuration management.
112
+
113
+ It designed to be subclassed by specific extractors, which should implement the `__init_tasks__` method
114
+ to define their tasks.
115
+ """
116
+
55
117
  NAME: str
56
118
  EXTERNAL_ID: str
57
119
  DESCRIPTION: str
@@ -128,6 +190,13 @@ class Extractor(Generic[ConfigType], CogniteLogger):
128
190
  root.addHandler(fh)
129
191
 
130
192
  def __init_tasks__(self) -> None:
193
+ """
194
+ This method should be overridden by subclasses to define their tasks.
195
+
196
+ It is called automatically when the extractor is initialized.
197
+
198
+ Subclasses should call ``self.add_task(...)`` to add tasks to the extractor.
199
+ """
131
200
  pass
132
201
 
133
202
  def _set_runtime_message_queue(self, queue: Queue) -> None:
@@ -200,6 +269,9 @@ class Extractor(Generic[ConfigType], CogniteLogger):
200
269
  )
201
270
 
202
271
  def restart(self) -> None:
272
+ """
273
+ Trigger a restart of the extractor.
274
+ """
203
275
  self._logger.info("Restarting extractor")
204
276
  if self._runtime_messages:
205
277
  self._runtime_messages.put(RuntimeMessage.RESTART)
@@ -210,12 +282,20 @@ class Extractor(Generic[ConfigType], CogniteLogger):
210
282
  return cls(config)
211
283
 
212
284
  def add_task(self, task: Task) -> None:
285
+ """
286
+ Add a task to the extractor.
287
+
288
+ This method wraps the task's target function to include error handling and task tracking.
289
+
290
+ Args:
291
+ task: The task to add. It should be an instance of ``StartupTask``, ``ContinuousTask``, or ``ScheduledTask``
292
+ """
213
293
  # Store this for later, since we'll override it with the wrapped version
214
294
  target = task.target
215
295
 
216
296
  def run_task(task_context: TaskContext) -> None:
217
297
  """
218
- A wrapped version of the task's target, with tracking and error handling
298
+ A wrapped version of the task's target, with tracking and error handling.
219
299
  """
220
300
  # Record a task start
221
301
  with self._checkin_lock:
@@ -275,7 +355,7 @@ class Extractor(Generic[ConfigType], CogniteLogger):
275
355
  {
276
356
  "name": t.name,
277
357
  "type": "continuous" if isinstance(t, ContinuousTask) else "batch",
278
- "action": True if isinstance(t, ScheduledTask) else False,
358
+ "action": bool(isinstance(t, ScheduledTask)),
279
359
  "description": t.description,
280
360
  }
281
361
  for t in self._tasks
@@ -285,14 +365,29 @@ class Extractor(Generic[ConfigType], CogniteLogger):
285
365
  )
286
366
 
287
367
  def start(self) -> None:
368
+ """
369
+ Start the extractor.
370
+
371
+ Instead of calling this method directly, it is recommended to use the context manager interface by using the
372
+ ``with`` statement, which ensures proper cleanup on exit.
373
+ """
288
374
  self._setup_logging()
289
375
  self._report_extractor_info()
290
376
  Thread(target=self._run_checkin, name="ExtractorCheckin", daemon=True).start()
291
377
 
292
378
  def stop(self) -> None:
379
+ """
380
+ Stop the extractor.
381
+
382
+ Instead of calling this method directly, it is recommended to use the context manager interface by using the
383
+ ``with`` statement, which ensures proper cleanup on exit.
384
+ """
293
385
  self.cancellation_token.cancel()
294
386
 
295
387
  def __enter__(self) -> Self:
388
+ """
389
+ Start the extractor in a context manager.
390
+ """
296
391
  self.start()
297
392
  return self
298
393
 
@@ -302,6 +397,9 @@ class Extractor(Generic[ConfigType], CogniteLogger):
302
397
  exc_val: BaseException | None,
303
398
  exc_tb: TracebackType | None,
304
399
  ) -> bool:
400
+ """
401
+ Stop the extractor when exiting the context manager.
402
+ """
305
403
  self.stop()
306
404
  with self._checkin_lock:
307
405
  self._checkin()
@@ -310,6 +408,17 @@ class Extractor(Generic[ConfigType], CogniteLogger):
310
408
  return exc_val is None
311
409
 
312
410
  def run(self) -> None:
411
+ """
412
+ Run the extractor. This method starts the extractor and runs all tasks that have been added.
413
+
414
+ This method assumes ``self.start()`` has been called first. The recommended way to use this method is
415
+ to use the context manager interface, which ensures that the extractor is started and stopped properly.
416
+
417
+ .. code-block:: python
418
+
419
+ with extractor:
420
+ extractor.run()
421
+ """
313
422
  has_scheduled = False
314
423
 
315
424
  startup: list[StartupTask] = []
@@ -1,3 +1,7 @@
1
+ """
2
+ This module defines the Error and ErrorLevel classes for reporting errors in extractors.
3
+ """
4
+
1
5
  import logging
2
6
  from enum import Enum
3
7
  from types import TracebackType
@@ -15,12 +19,19 @@ __all__ = ["Error", "ErrorLevel"]
15
19
 
16
20
 
17
21
  class ErrorLevel(Enum):
22
+ """
23
+ Enumeration of error levels for reporting errors in extractors.
24
+ """
25
+
18
26
  warning = "warning"
19
27
  error = "error"
20
28
  fatal = "fatal"
21
29
 
22
30
  @property
23
31
  def log_level(self) -> int:
32
+ """
33
+ Returns the corresponding logging level for the error level.
34
+ """
24
35
  match self:
25
36
  case ErrorLevel.warning:
26
37
  return logging.WARNING
@@ -33,6 +44,20 @@ class ErrorLevel(Enum):
33
44
 
34
45
 
35
46
  class Error:
47
+ """
48
+ Represents an error that occurred during the run of an extractor.
49
+
50
+ This class should not be instantiated directly. Instead, use the ``CogniteLogger`` methods (either in the
51
+ TaskContext or the extractor base class) to create errors.
52
+
53
+ Args:
54
+ level: The severity level of the error.
55
+ description: A brief description of the error.
56
+ details: Additional details about the error, if any.
57
+ task_name: The name of the task during which the error occurred, if applicable.
58
+ extractor: The extractor instance that reported the error.
59
+ """
60
+
36
61
  def __init__(
37
62
  self,
38
63
  level: ErrorLevel,
@@ -55,6 +80,9 @@ class Error:
55
80
  self._extractor._report_error(self)
56
81
 
57
82
  def instant(self) -> None:
83
+ """
84
+ Make this error an instant error, meaning it does not have a duration.
85
+ """
58
86
  # Only end the error once
59
87
  if self.end_time is not None:
60
88
  return
@@ -65,6 +93,11 @@ class Error:
65
93
  self._extractor._report_error(self)
66
94
 
67
95
  def finish(self) -> None:
96
+ """
97
+ Mark the error as finished, setting the end time to the current time.
98
+
99
+ This method should be called when the error is resolved or no longer relevant.
100
+ """
68
101
  # Only end the error once
69
102
  if self.end_time is not None:
70
103
  return
@@ -75,6 +108,11 @@ class Error:
75
108
  self._extractor._report_error(self)
76
109
 
77
110
  def __enter__(self) -> "Error":
111
+ """
112
+ Start tracking an error as a context manager.
113
+
114
+ This allows the error to be automatically finished when exiting the context.
115
+ """
78
116
  return self
79
117
 
80
118
  def __exit__(
@@ -83,5 +121,8 @@ class Error:
83
121
  exc_val: BaseException | None,
84
122
  exc_tb: TracebackType | None,
85
123
  ) -> bool:
124
+ """
125
+ Finish the error context manager, marking the error as finished.
126
+ """
86
127
  self.finish()
87
128
  return exc_val is None
@@ -1,3 +1,10 @@
1
+ """
2
+ This module provides the ``CogniteLogger`` base class, which is an abstract base class for logging.
3
+
4
+ This class is subclassed by both the ``TaskContext`` and the ``Extractor`` base classes, providing a unified interface
5
+ for logging and error handling in extractors.
6
+ """
7
+
1
8
  from abc import ABC, abstractmethod
2
9
  from logging import Logger, getLogger
3
10
  from traceback import format_exception
@@ -9,6 +16,16 @@ from cognite.extractorutils.unstable.core.errors import Error, ErrorLevel
9
16
 
10
17
 
11
18
  class CogniteLogger(ABC):
19
+ """
20
+ Base class for logging and error handling in extractors.
21
+
22
+ This class provides methods to log messages at different levels (debug, info, warning, error, fatal) and to
23
+ create and manage errors that occur during the execution of an extractor.
24
+
25
+ If you use this class instead of a standard logger, you will get additional functionality such as reporting errors
26
+ back to CDF.
27
+ """
28
+
12
29
  def __init__(self) -> None:
13
30
  self._logger: Logger = getLogger()
14
31
 
@@ -24,9 +41,15 @@ class CogniteLogger(ABC):
24
41
  pass
25
42
 
26
43
  def debug(self, message: str) -> None:
44
+ """
45
+ Log a debug message.
46
+ """
27
47
  self._logger.debug(message)
28
48
 
29
49
  def info(self, message: str) -> None:
50
+ """
51
+ Log an information message.
52
+ """
30
53
  self._logger.info(message)
31
54
 
32
55
  def begin_warning(
@@ -36,6 +59,32 @@ class CogniteLogger(ABC):
36
59
  details: str | None = None,
37
60
  auto_log: bool = True,
38
61
  ) -> Error:
62
+ """
63
+ Begin a warning error.
64
+
65
+ This will both log the message and create an error object that can be used to track and report the error.
66
+
67
+ Args:
68
+ message: The message to log and include in the error.
69
+ details: Additional details about the error, if any.
70
+ auto_log: If True, the message will be logged to the standard logging framework as well. Defaults to True.
71
+
72
+ Returns:
73
+ An ``Error`` object representing the warning error, tied to the current extractor instance.
74
+
75
+ Examples:
76
+ To track and complete an error, you can keep a reference to the error object and call its ``finish``
77
+ method when the error is resolved, or use it in a context manager to automatically finish it:
78
+
79
+ ... code-block:: python
80
+ error = logger.begin_warning("This is a warning", details="Some details")
81
+ # Do something
82
+ error.finish()
83
+
84
+ ... code-block:: python
85
+ with logger.begin_warning("This is a warning", details="Some details")
86
+ # Do something
87
+ """
39
88
  if auto_log:
40
89
  self._logger.warning(message)
41
90
  return self._new_error(
@@ -51,6 +100,32 @@ class CogniteLogger(ABC):
51
100
  details: str | None = None,
52
101
  auto_log: bool = True,
53
102
  ) -> Error:
103
+ """
104
+ Begin an error.
105
+
106
+ This will both log the message and create an error object that can be used to track and report the error.
107
+
108
+ Args:
109
+ message: The message to log and include in the error.
110
+ details: Additional details about the error, if any.
111
+ auto_log: If True, the message will be logged to the standard logging framework as well. Defaults to True.
112
+
113
+ Returns:
114
+ An ``Error`` object representing the error, tied to the current extractor instance.
115
+
116
+ Examples:
117
+ To track and complete an error, you can keep a reference to the error object and call its ``finish``
118
+ method when the error is resolved, or use it in a context manager to automatically finish it:
119
+
120
+ ... code-block:: python
121
+ error = logger.begin_error("This is an error", details="Some details")
122
+ # Do something
123
+ error.finish()
124
+
125
+ ... code-block:: python
126
+ with logger.begin_error("This is an error", details="Some details")
127
+ # Do something
128
+ """
54
129
  if auto_log:
55
130
  self._logger.error(message)
56
131
  return self._new_error(
@@ -66,6 +141,32 @@ class CogniteLogger(ABC):
66
141
  details: str | None = None,
67
142
  auto_log: bool = True,
68
143
  ) -> Error:
144
+ """
145
+ Begin a fatal error.
146
+
147
+ This will both log the message and create an error object that can be used to track and report the error.
148
+
149
+ Args:
150
+ message: The message to log and include in the error.
151
+ details: Additional details about the error, if any.
152
+ auto_log: If True, the message will be logged to the standard logging framework as well. Defaults to True.
153
+
154
+ Returns:
155
+ An ``Error`` object representing the fatal error, tied to the current extractor instance.
156
+
157
+ Examples:
158
+ To track and complete an error, you can keep a reference to the error object and call its ``finish``
159
+ method when the error is resolved, or use it in a context manager to automatically finish it:
160
+
161
+ ... code-block:: python
162
+ error = logger.begin_fatal("This is a fatal error", details="Some details")
163
+ # Do something
164
+ error.finish()
165
+
166
+ ... code-block:: python
167
+ with logger.begin_fatal("This is a fatal error", details="Some details")
168
+ # Do something
169
+ """
69
170
  if auto_log:
70
171
  self._logger.critical(message)
71
172
  return self._new_error(
@@ -81,6 +182,17 @@ class CogniteLogger(ABC):
81
182
  details: str | None = None,
82
183
  auto_log: bool = True,
83
184
  ) -> None:
185
+ """
186
+ Report an instant warning.
187
+
188
+ This will log the message and create an error object that is marked as instant, meaning it does not have a
189
+ duration.
190
+
191
+ Args:
192
+ message: The message to log and include in the error.
193
+ details: Additional details about the error, if any.
194
+ auto_log: If True, the message will be logged to the standard logging framework as well. Defaults to True.
195
+ """
84
196
  if auto_log:
85
197
  self._logger.warning(message)
86
198
  self._new_error(
@@ -96,6 +208,17 @@ class CogniteLogger(ABC):
96
208
  details: str | None = None,
97
209
  auto_log: bool = True,
98
210
  ) -> None:
211
+ """
212
+ Report an instant error.
213
+
214
+ This will log the message and create an error object that is marked as instant, meaning it does not have a
215
+ duration.
216
+
217
+ Args:
218
+ message: The message to log and include in the error.
219
+ details: Additional details about the error, if any.
220
+ auto_log: If True, the message will be logged to the standard logging framework as well. Defaults to True.
221
+ """
99
222
  if auto_log:
100
223
  self._logger.error(message)
101
224
  self._new_error(
@@ -111,6 +234,17 @@ class CogniteLogger(ABC):
111
234
  details: str | None = None,
112
235
  auto_log: bool = True,
113
236
  ) -> None:
237
+ """
238
+ Report an instant fatal.
239
+
240
+ This will log the message and create an error object that is marked as instant, meaning it does not have a
241
+ duration.
242
+
243
+ Args:
244
+ message: The message to log and include in the error.
245
+ details: Additional details about the error, if any.
246
+ auto_log: If True, the message will be logged to the standard logging framework as well. Defaults to True.
247
+ """
114
248
  if auto_log:
115
249
  self._logger.critical(message)
116
250
  self._new_error(
@@ -128,6 +262,21 @@ class CogniteLogger(ABC):
128
262
  include_details: Literal["stack_trace"] | Literal["exception_message"] | bool = "exception_message",
129
263
  auto_log: bool = True,
130
264
  ) -> None:
265
+ """
266
+ Report an exception as an error.
267
+
268
+ This will log the message and create an error object that is marked as instant, meaning it does not have a
269
+ duration. The exception details can be included in the error.
270
+
271
+ Args:
272
+ message: The message to log and include in the error.
273
+ exception: The exception to report.
274
+ level: The severity level of the error. Defaults to ``ErrorLevel.error``.
275
+ include_details: How to include details about the exception. Can be "stack_trace", "exception_message",
276
+ or True (equivalent to "exception_message"). If False, no details are included. Defaults to
277
+ "exception_message".
278
+ auto_log: If True, the message will be logged to the standard logging framework as well. Defaults to True.
279
+ """
131
280
  if auto_log:
132
281
  self._logger.log(level=level.log_level, msg=message, exc_info=exception)
133
282
 
@@ -1,3 +1,17 @@
1
+ """
2
+ This module defines the restart policies for extractors.
3
+
4
+ Is is used by the ``Runtime`` to determine whether an extractor should be restarted after a task failure.
5
+
6
+ It provides three predefined restart policies:
7
+ - ``NEVER``: The extractor will never be restarted.
8
+ - ``WHEN_ANY_TASK_CRASHES``: The extractor will be restarted if any task crashes.
9
+ - ``WHEN_CONTINUOUS_TASKS_CRASHES``: The extractor will be restarted only if a continuous task crashes.
10
+
11
+ Users can also define their own restart policies by providing a callable that takes a `Task` and an `Exception`
12
+ and returns a boolean indicating whether the extractor should be restarted.
13
+ """
14
+
1
15
  from collections.abc import Callable
2
16
 
3
17
  from cognite.extractorutils.unstable.core.tasks import ContinuousTask, Task
@@ -22,8 +36,8 @@ WHEN_CONTINUOUS_TASKS_CRASHES = _is_continuous
22
36
  WHEN_ANY_TASK_CRASHES = _true
23
37
 
24
38
  __all__ = [
25
- "RestartPolicy",
26
39
  "NEVER",
27
- "WHEN_CONTINUOUS_TASKS_CRASHES",
28
40
  "WHEN_ANY_TASK_CRASHES",
41
+ "WHEN_CONTINUOUS_TASKS_CRASHES",
42
+ "RestartPolicy",
29
43
  ]