cognite-extractor-utils 7.5.14__py3-none-any.whl → 7.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

Files changed (47) hide show
  1. cognite/extractorutils/__init__.py +1 -1
  2. cognite/extractorutils/_inner_util.py +1 -1
  3. cognite/extractorutils/base.py +120 -40
  4. cognite/extractorutils/configtools/__init__.py +4 -5
  5. cognite/extractorutils/configtools/_util.py +3 -2
  6. cognite/extractorutils/configtools/elements.py +206 -33
  7. cognite/extractorutils/configtools/loaders.py +68 -16
  8. cognite/extractorutils/configtools/validators.py +5 -1
  9. cognite/extractorutils/exceptions.py +11 -2
  10. cognite/extractorutils/metrics.py +17 -12
  11. cognite/extractorutils/statestore/__init__.py +77 -3
  12. cognite/extractorutils/statestore/_base.py +7 -3
  13. cognite/extractorutils/statestore/hashing.py +129 -15
  14. cognite/extractorutils/statestore/watermark.py +77 -87
  15. cognite/extractorutils/threading.py +30 -4
  16. cognite/extractorutils/unstable/__init__.py +5 -5
  17. cognite/extractorutils/unstable/configuration/__init__.py +3 -0
  18. cognite/extractorutils/unstable/configuration/exceptions.py +13 -2
  19. cognite/extractorutils/unstable/configuration/loaders.py +78 -13
  20. cognite/extractorutils/unstable/configuration/models.py +121 -7
  21. cognite/extractorutils/unstable/core/__init__.py +5 -0
  22. cognite/extractorutils/unstable/core/_dto.py +5 -3
  23. cognite/extractorutils/unstable/core/base.py +113 -4
  24. cognite/extractorutils/unstable/core/errors.py +41 -0
  25. cognite/extractorutils/unstable/core/logger.py +149 -0
  26. cognite/extractorutils/unstable/core/restart_policy.py +16 -2
  27. cognite/extractorutils/unstable/core/runtime.py +44 -6
  28. cognite/extractorutils/unstable/core/tasks.py +53 -1
  29. cognite/extractorutils/unstable/scheduling/__init__.py +13 -0
  30. cognite/extractorutils/unstable/scheduling/_scheduler.py +1 -1
  31. cognite/extractorutils/uploader/__init__.py +9 -5
  32. cognite/extractorutils/uploader/_base.py +4 -5
  33. cognite/extractorutils/uploader/assets.py +13 -8
  34. cognite/extractorutils/uploader/data_modeling.py +37 -2
  35. cognite/extractorutils/uploader/events.py +14 -9
  36. cognite/extractorutils/uploader/files.py +80 -21
  37. cognite/extractorutils/uploader/raw.py +12 -7
  38. cognite/extractorutils/uploader/time_series.py +370 -94
  39. cognite/extractorutils/uploader/upload_failure_handler.py +35 -2
  40. cognite/extractorutils/uploader_extractor.py +47 -9
  41. cognite/extractorutils/uploader_types.py +26 -1
  42. cognite/extractorutils/util.py +76 -23
  43. {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.7.0.dist-info}/METADATA +1 -1
  44. cognite_extractor_utils-7.7.0.dist-info/RECORD +50 -0
  45. cognite_extractor_utils-7.5.14.dist-info/RECORD +0 -50
  46. {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.7.0.dist-info}/WHEEL +0 -0
  47. {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.7.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,3 @@
1
+ """
2
+ New version of ``configtools`` based on pydantic instead of dataclasses.
3
+ """
@@ -1,10 +1,15 @@
1
+ """
2
+ Exceptions representing invalid configurations.
3
+ """
4
+
5
+
1
6
  class InvalidConfigError(Exception):
2
7
  """
3
- Exception thrown from ``load_yaml`` and ``load_yaml_dict`` if config file is invalid. This can be due to
8
+ Exception thrown from ``load_yaml`` and ``load_yaml_dict`` if config file is invalid. This can be due to.
4
9
 
5
10
  * Missing fields
6
11
  * Incompatible types
7
- * Unkown fields
12
+ * Unknown fields
8
13
  """
9
14
 
10
15
  def __init__(self, message: str, details: list[str] | None = None):
@@ -15,7 +20,13 @@ class InvalidConfigError(Exception):
15
20
  self.attempted_revision: int | None = None
16
21
 
17
22
  def __str__(self) -> str:
23
+ """
24
+ Underlying message prefixed with 'Invalid config:'.
25
+ """
18
26
  return f"Invalid config: {self.message}"
19
27
 
20
28
  def __repr__(self) -> str:
29
+ """
30
+ Underlying message prefixed with 'Invalid config:'.
31
+ """
21
32
  return self.__str__()
@@ -1,3 +1,7 @@
1
+ """
2
+ Module containing functions and classes for loading configuration files.
3
+ """
4
+
1
5
  import json
2
6
  from enum import Enum
3
7
  from io import StringIO
@@ -13,32 +17,69 @@ from cognite.extractorutils.exceptions import InvalidConfigError as OldInvalidCo
13
17
  from cognite.extractorutils.unstable.configuration.exceptions import InvalidConfigError
14
18
  from cognite.extractorutils.unstable.configuration.models import ConfigModel
15
19
 
16
- __all__ = ["ConfigFormat", "load_file", "load_from_cdf", "load_io", "load_dict"]
20
+ __all__ = ["ConfigFormat", "load_dict", "load_file", "load_from_cdf", "load_io"]
17
21
 
18
22
 
19
23
  _T = TypeVar("_T", bound=ConfigModel)
20
24
 
21
25
 
22
26
  class ConfigFormat(Enum):
27
+ """
28
+ Enumeration of supported configuration file formats.
29
+
30
+ Attributes:
31
+ JSON: Represents the JSON configuration file format.
32
+ YAML: Represents the YAML configuration file format.
33
+ """
34
+
23
35
  JSON = "json"
24
36
  YAML = "yaml"
25
37
 
26
38
 
27
39
  def load_file(path: Path, schema: type[_T]) -> _T:
40
+ """
41
+ Load a configuration file from the given path and parse it into the specified schema.
42
+
43
+ Args:
44
+ path: Path to the configuration file.
45
+ schema: The schema class to parse the configuration into.
46
+
47
+ Returns:
48
+ An instance of the schema populated with the configuration data.
49
+
50
+ Raises:
51
+ InvalidConfigError: If the file type is unknown or the configuration is invalid.
52
+ """
28
53
  if path.suffix in [".yaml", ".yml"]:
29
- format = ConfigFormat.YAML
54
+ file_format = ConfigFormat.YAML
30
55
  elif path.suffix == ".json":
31
- format = ConfigFormat.JSON
56
+ file_format = ConfigFormat.JSON
32
57
  else:
33
58
  raise InvalidConfigError(f"Unknown file type {path.suffix}")
34
59
 
35
60
  with open(path) as stream:
36
- return load_io(stream, format, schema)
61
+ return load_io(stream, file_format, schema)
37
62
 
38
63
 
39
64
  def load_from_cdf(
40
65
  cognite_client: CogniteClient, external_id: str, schema: type[_T], revision: int | None = None
41
66
  ) -> tuple[_T, int]:
67
+ """
68
+ Load a configuration from a CDF integration using the provided external ID and schema.
69
+
70
+ Args:
71
+ cognite_client: An instance of CogniteClient to interact with CDF.
72
+ external_id: The external ID of the integration to load configuration from.
73
+ schema: The schema class to parse the configuration into.
74
+ revision: the specific revision of the configuration to load, otherwise get the latest.
75
+
76
+ Returns:
77
+ A tuple containing the parsed configuration instance and the revision number.
78
+
79
+ Raises:
80
+ InvalidConfigError: If the configuration is invalid or not found.
81
+ CogniteAPIError: If there is an unexpected error communicating with CDF.
82
+ """
42
83
  params: dict[str, str | int] = {"integration": external_id}
43
84
  if revision:
44
85
  params["revision"] = revision
@@ -67,11 +108,25 @@ def load_from_cdf(
67
108
  raise new_e from e
68
109
 
69
110
 
70
- def load_io(stream: TextIO, format: ConfigFormat, schema: type[_T]) -> _T:
71
- if format == ConfigFormat.JSON:
111
+ def load_io(stream: TextIO, file_format: ConfigFormat, schema: type[_T]) -> _T:
112
+ """
113
+ Load a configuration from a stream (e.g., file or string) and parse it into the specified schema.
114
+
115
+ Args:
116
+ stream: A text stream containing the configuration data.
117
+ file_format: The format of the configuration data.
118
+ schema: The schema class to parse the configuration into.
119
+
120
+ Returns:
121
+ An instance of the schema populated with the configuration data.
122
+
123
+ Raises:
124
+ InvalidConfigError: If the file format is unknown or the configuration is invalid.
125
+ """
126
+ if file_format == ConfigFormat.JSON:
72
127
  data = json.load(stream)
73
128
 
74
- elif format == ConfigFormat.YAML:
129
+ elif file_format == ConfigFormat.YAML:
75
130
  data = _load_yaml_dict_raw(stream)
76
131
 
77
132
  if "azure-keyvault" in data:
@@ -95,15 +150,25 @@ def _make_loc_str(loc: tuple) -> str:
95
150
  loc_str = f"{loc_str}{lo}"
96
151
  needs_sep = True
97
152
  else:
98
- if isinstance(lo, int):
99
- loc_str = f"{loc_str}[{lo}]"
100
- else:
101
- loc_str = f"{loc_str}.{lo}"
153
+ loc_str = f"{loc_str}[{lo}]" if isinstance(lo, int) else f"{loc_str}.{lo}"
102
154
 
103
155
  return loc_str
104
156
 
105
157
 
106
158
  def load_dict(data: dict, schema: type[_T]) -> _T:
159
+ """
160
+ Load a configuration from a dictionary and parse it into the specified schema.
161
+
162
+ Args:
163
+ data: A dictionary containing the configuration data.
164
+ schema: The schema class to parse the configuration into.
165
+
166
+ Returns:
167
+ An instance of the schema populated with the configuration data.
168
+
169
+ Raises:
170
+ InvalidConfigError: If the configuration is invalid.
171
+ """
107
172
  try:
108
173
  return schema.model_validate(data)
109
174
 
@@ -119,8 +184,8 @@ def load_dict(data: dict, schema: type[_T]) -> _T:
119
184
 
120
185
  if "ctx" in err and "error" in err["ctx"]:
121
186
  exc = err["ctx"]["error"]
122
- if isinstance(exc, ValueError) or isinstance(exc, AssertionError):
123
- messages.append(f"{str(exc)}: {loc_str}")
187
+ if isinstance(exc, ValueError | AssertionError):
188
+ messages.append(f"{exc!s}: {loc_str}")
124
189
  continue
125
190
 
126
191
  messages.append(f"{err.get('msg')}: {loc_str}")
@@ -1,3 +1,7 @@
1
+ """
2
+ Module containing pre-built models for common extractor configuration.
3
+ """
4
+
1
5
  import os
2
6
  import re
3
7
  from datetime import timedelta
@@ -21,22 +25,26 @@ from cognite.extractorutils.configtools._util import _load_certificate_data
21
25
  from cognite.extractorutils.exceptions import InvalidConfigError
22
26
 
23
27
  __all__ = [
24
- "ConfigModel",
25
28
  "AuthenticationConfig",
26
- "TimeIntervalConfig",
29
+ "ConfigModel",
27
30
  "ConnectionConfig",
28
31
  "CronConfig",
32
+ "ExtractorConfig",
29
33
  "IntervalConfig",
30
- "ScheduleConfig",
31
- "LogLevel",
32
- "LogFileHandlerConfig",
33
34
  "LogConsoleHandlerConfig",
35
+ "LogFileHandlerConfig",
34
36
  "LogHandlerConfig",
35
- "ExtractorConfig",
37
+ "LogLevel",
38
+ "ScheduleConfig",
39
+ "TimeIntervalConfig",
36
40
  ]
37
41
 
38
42
 
39
43
  class ConfigModel(BaseModel):
44
+ """
45
+ Base model for configuration objects, setting the correct pydantic options for extractor config.
46
+ """
47
+
40
48
  model_config = ConfigDict(
41
49
  alias_generator=kebabize,
42
50
  populate_by_name=True,
@@ -69,7 +77,7 @@ AuthenticationConfig = Annotated[_ClientCredentialsConfig | _ClientCertificateCo
69
77
 
70
78
  class TimeIntervalConfig:
71
79
  """
72
- Configuration parameter for setting a time interval
80
+ Configuration parameter for setting a time interval.
73
81
  """
74
82
 
75
83
  def __init__(self, expression: str) -> None:
@@ -77,14 +85,25 @@ class TimeIntervalConfig:
77
85
 
78
86
  @classmethod
79
87
  def __get_pydantic_core_schema__(cls, source_type: Any, handler: GetCoreSchemaHandler) -> CoreSchema:
88
+ """
89
+ Pydantic hook to define how this class should be serialized/deserialized.
90
+
91
+ This allows the class to be used as a field in Pydantic models.
92
+ """
80
93
  return core_schema.no_info_after_validator_function(cls, handler(str | int))
81
94
 
82
95
  def __eq__(self, other: object) -> bool:
96
+ """
97
+ Two TimeIntervalConfig objects are equal if they have the same number of seconds in their interval.
98
+ """
83
99
  if not isinstance(other, TimeIntervalConfig):
84
100
  return NotImplemented
85
101
  return self._interval == other._interval
86
102
 
87
103
  def __hash__(self) -> int:
104
+ """
105
+ Hash function for TimeIntervalConfig based on the number of seconds in the interval.
106
+ """
88
107
  return hash(self._interval)
89
108
 
90
109
  @classmethod
@@ -106,36 +125,69 @@ class TimeIntervalConfig:
106
125
 
107
126
  @property
108
127
  def seconds(self) -> int:
128
+ """
129
+ Time interval as number of seconds.
130
+ """
109
131
  return self._interval
110
132
 
111
133
  @property
112
134
  def minutes(self) -> float:
135
+ """
136
+ Time interval as number of minutes.
137
+
138
+ This is a float since the underlying interval is in seconds.
139
+ """
113
140
  return self._interval / 60
114
141
 
115
142
  @property
116
143
  def hours(self) -> float:
144
+ """
145
+ Time interval as number of hours.
146
+
147
+ This is a float since the underlying interval is in seconds.
148
+ """
117
149
  return self._interval / (60 * 60)
118
150
 
119
151
  @property
120
152
  def days(self) -> float:
153
+ """
154
+ Time interval as number of days.
155
+
156
+ This is a float since the underlying interval is in seconds.
157
+ """
121
158
  return self._interval / (60 * 60 * 24)
122
159
 
123
160
  @property
124
161
  def timedelta(self) -> timedelta:
162
+ """
163
+ Time interval as a timedelta object.
164
+ """
125
165
  days = self._interval // (60 * 60 * 24)
126
166
  seconds = self._interval % (60 * 60 * 24)
127
167
  return timedelta(days=days, seconds=seconds)
128
168
 
129
169
  def __int__(self) -> int:
170
+ """
171
+ Returns the time interval as a number of seconds.
172
+ """
130
173
  return int(self._interval)
131
174
 
132
175
  def __float__(self) -> float:
176
+ """
177
+ Returns the time interval as a number of seconds.
178
+ """
133
179
  return float(self._interval)
134
180
 
135
181
  def __str__(self) -> str:
182
+ """
183
+ Returns the time interval as a human readable string.
184
+ """
136
185
  return self._expression
137
186
 
138
187
  def __repr__(self) -> str:
188
+ """
189
+ Returns the time interval as a human readable string.
190
+ """
139
191
  return self._expression
140
192
 
141
193
 
@@ -152,6 +204,15 @@ class _ConnectionParameters(ConfigModel):
152
204
 
153
205
 
154
206
  class ConnectionConfig(ConfigModel):
207
+ """
208
+ Configuration for connecting to a Cognite Data Fusion project.
209
+
210
+ This configuration includes the project name, base URL, integration name, and authentication details, as well as
211
+ optional connection parameters.
212
+
213
+ This configuration is common for all extractors.
214
+ """
215
+
155
216
  project: str
156
217
  base_url: str
157
218
 
@@ -162,6 +223,15 @@ class ConnectionConfig(ConfigModel):
162
223
  connection: _ConnectionParameters = Field(default_factory=_ConnectionParameters)
163
224
 
164
225
  def get_cognite_client(self, client_name: str) -> CogniteClient:
226
+ """
227
+ Create a CogniteClient instance using the configuration parameters.
228
+
229
+ Args:
230
+ client_name: Name of the client, set as the x-cdp-app header in the requests
231
+
232
+ Returns:
233
+ CogniteClient: An instance of CogniteClient configured with the provided parameters.
234
+ """
165
235
  from cognite.client.config import global_config
166
236
 
167
237
  global_config.disable_pypi_version_check = True
@@ -218,6 +288,26 @@ class ConnectionConfig(ConfigModel):
218
288
 
219
289
  @classmethod
220
290
  def from_environment(cls) -> "ConnectionConfig":
291
+ """
292
+ Create a ConnectionConfig instance from environment variables.
293
+
294
+ Environment variables should be set as follows:
295
+ - COGNITE_PROJECT: The name of the Cognite Data Fusion project.
296
+ - COGNITE_BASE_URL: The base URL of the Cognite Data Fusion instance.
297
+ - COGNITE_INTEGRATION: The external ID of the corresponding integration in CDF.
298
+ - COGNITE_CLIENT_ID: The client ID for authentication.
299
+ - COGNITE_TOKEN_SCOPES: The scopes for the token.
300
+ - COGNITE_CLIENT_SECRET: The client secret for authentication (if using client credentials).
301
+ - COGNITE_TOKEN_URL: The token URL for authentication (if using client credentials).
302
+ - COGNITE_CLIENT_CERTIFICATE_PATH: The path to the client certificate (if using client certificate).
303
+ - COGNITE_AUTHORITY_URL: The authority URL for authentication (if using client certificate).
304
+
305
+ Returns:
306
+ ConnectionConfig: An instance of ConnectionConfig populated with the environment variables.
307
+
308
+ Raises:
309
+ KeyError: If any of the required environment variables are missing.
310
+ """
221
311
  auth: AuthenticationConfig
222
312
  if "COGNITE_CLIENT_SECRET" in os.environ:
223
313
  auth = _ClientCredentialsConfig(
@@ -248,11 +338,19 @@ class ConnectionConfig(ConfigModel):
248
338
 
249
339
 
250
340
  class CronConfig(ConfigModel):
341
+ """
342
+ Configuration parameter for setting a cron schedule.
343
+ """
344
+
251
345
  type: Literal["cron"]
252
346
  expression: str
253
347
 
254
348
 
255
349
  class IntervalConfig(ConfigModel):
350
+ """
351
+ Configuration parameter for setting an interval schedule.
352
+ """
353
+
256
354
  type: Literal["interval"]
257
355
  expression: TimeIntervalConfig
258
356
 
@@ -261,6 +359,10 @@ ScheduleConfig = Annotated[CronConfig | IntervalConfig, Field(discriminator="typ
261
359
 
262
360
 
263
361
  class LogLevel(Enum):
362
+ """
363
+ Enumeration of log levels for the extractor.
364
+ """
365
+
264
366
  CRITICAL = "CRITICAL"
265
367
  ERROR = "ERROR"
266
368
  WARNING = "WARNING"
@@ -269,6 +371,10 @@ class LogLevel(Enum):
269
371
 
270
372
 
271
373
  class LogFileHandlerConfig(ConfigModel):
374
+ """
375
+ Configuration for a log handler that writes to a file, with daily rotation.
376
+ """
377
+
272
378
  type: Literal["file"]
273
379
  path: Path
274
380
  level: LogLevel
@@ -276,6 +382,10 @@ class LogFileHandlerConfig(ConfigModel):
276
382
 
277
383
 
278
384
  class LogConsoleHandlerConfig(ConfigModel):
385
+ """
386
+ Configuration for a log handler that writes to standard output.
387
+ """
388
+
279
389
  type: Literal["console"]
280
390
  level: LogLevel
281
391
 
@@ -289,4 +399,8 @@ def _log_handler_default() -> list[LogHandlerConfig]:
289
399
 
290
400
 
291
401
  class ExtractorConfig(ConfigModel):
402
+ """
403
+ Base class for application configuration for extractors.
404
+ """
405
+
292
406
  log_handlers: list[LogHandlerConfig] = Field(default_factory=_log_handler_default)
@@ -0,0 +1,5 @@
1
+ """
2
+ The ``core`` package contains the core functionality for defining and managing extractors.
3
+
4
+ It contains the base class for extractors, the runtime for running extractors, and classes for tasks and errors.
5
+ """
@@ -1,5 +1,5 @@
1
1
  """
2
- Temporary holding place for DTOs against Extraction Pipelines 2.0 until it's in the SDK
2
+ Temporary holding place for DTOs against Extraction Pipelines 2.0 until it's in the SDK.
3
3
  """
4
4
 
5
5
  from typing import Any, Literal
@@ -10,9 +10,11 @@ from pydantic import BaseModel, ConfigDict
10
10
 
11
11
  class CogniteModel(BaseModel):
12
12
  """
13
- Base class for DTO classes based on pydantic, but with a few tweaks to make it inline with the CDF API guidelines:
13
+ Base class for DTO classes based on pydantic.
14
+
15
+ With a few tweaks to make it inline with the CDF API guidelines:
14
16
  * camelCase instead of snake_case when serializing/deserializing into/from JSON
15
- * exclude Nones from serialized JSON instead of having nulls in the response text
17
+ * exclude Nones from serialized JSON instead of having nulls in the response text.
16
18
  """
17
19
 
18
20
  def model_dump(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
@@ -1,5 +1,50 @@
1
+ """
2
+ This module provides the base class for extractors.
3
+
4
+ It includes functionality for task management, logging, error handling, and configuration management.
5
+
6
+ Extractors should subclass the `Extractor` class and implement the `__init_tasks__` method to define their tasks.
7
+ The subclass should also define several class attributes:
8
+ - ``NAME``: A human-readable name for the extractor.
9
+ - ``EXTERNAL_ID``: A unique identifier for the extractor, used when reporting to CDF Integrations.
10
+ - ``DESCRIPTION``: A brief description of the extractor.
11
+ - ``VERSION``: The version of the extractor, used when reporting to CDF Integrations. This should follow semantic
12
+ versioning.
13
+ - ``CONFIG_TYPE``: The type of the application configuration for the extractor, which should be a subclass of
14
+ ``ExtractorConfig``. This should be the same class as the one used for the generic type parameter of the
15
+ ``Extractor`` class.
16
+
17
+
18
+ .. code-block:: python
19
+
20
+ class MyConfig(ExtractorConfig):
21
+ parameter: str
22
+ another_parameter: int
23
+ schedule: ScheduleConfig
24
+
25
+ class MyExtractor(Extractor[MyConfig]):
26
+ NAME = "My Extractor"
27
+ EXTERNAL_ID = "my-extractor"
28
+ DESCRIPTION = "An example extractor"
29
+ VERSION = "1.0.0"
30
+
31
+ CONFIG_TYPE = MyConfig
32
+
33
+ def __init_tasks__(self) -> None:
34
+ self.add_task(
35
+ ScheduledTask(
36
+ name="my_task",
37
+ description="An example task",
38
+ schedule=self.application_config.schedule,
39
+ target=self.my_task_function,
40
+ )
41
+ )
42
+
43
+ def my_task_function(self, task_context: TaskContext) -> None:
44
+ task_context.logger.info("Running my task")
45
+ """
46
+
1
47
  import logging
2
- import logging.config
3
48
  import time
4
49
  from concurrent.futures import ThreadPoolExecutor
5
50
  from functools import partial
@@ -30,7 +75,7 @@ from cognite.extractorutils.unstable.core.tasks import ContinuousTask, Scheduled
30
75
  from cognite.extractorutils.unstable.scheduling import TaskScheduler
31
76
  from cognite.extractorutils.util import now
32
77
 
33
- __all__ = ["ConfigType", "ConfigRevision", "Extractor"]
78
+ __all__ = ["ConfigRevision", "ConfigType", "Extractor"]
34
79
 
35
80
  ConfigType = TypeVar("ConfigType", bound=ExtractorConfig)
36
81
  ConfigRevision = Literal["local"] | int
@@ -40,6 +85,13 @@ _T = TypeVar("_T", bound=ExtractorConfig)
40
85
 
41
86
 
42
87
  class FullConfig(Generic[_T]):
88
+ """
89
+ A class that holds the full configuration for an extractor.
90
+
91
+ This includes the connection configuration, application configuration, and which revision of the application
92
+ configuration is currently active.
93
+ """
94
+
43
95
  def __init__(
44
96
  self,
45
97
  connection_config: ConnectionConfig,
@@ -52,6 +104,16 @@ class FullConfig(Generic[_T]):
52
104
 
53
105
 
54
106
  class Extractor(Generic[ConfigType], CogniteLogger):
107
+ """
108
+ Base class for all extractors.
109
+
110
+ This class provides the basic functionality for running an extractor, including task management, logging,
111
+ error handling, and configuration management.
112
+
113
+ It designed to be subclassed by specific extractors, which should implement the `__init_tasks__` method
114
+ to define their tasks.
115
+ """
116
+
55
117
  NAME: str
56
118
  EXTERNAL_ID: str
57
119
  DESCRIPTION: str
@@ -128,6 +190,13 @@ class Extractor(Generic[ConfigType], CogniteLogger):
128
190
  root.addHandler(fh)
129
191
 
130
192
  def __init_tasks__(self) -> None:
193
+ """
194
+ This method should be overridden by subclasses to define their tasks.
195
+
196
+ It is called automatically when the extractor is initialized.
197
+
198
+ Subclasses should call ``self.add_task(...)`` to add tasks to the extractor.
199
+ """
131
200
  pass
132
201
 
133
202
  def _set_runtime_message_queue(self, queue: Queue) -> None:
@@ -200,6 +269,9 @@ class Extractor(Generic[ConfigType], CogniteLogger):
200
269
  )
201
270
 
202
271
  def restart(self) -> None:
272
+ """
273
+ Trigger a restart of the extractor.
274
+ """
203
275
  self._logger.info("Restarting extractor")
204
276
  if self._runtime_messages:
205
277
  self._runtime_messages.put(RuntimeMessage.RESTART)
@@ -210,12 +282,20 @@ class Extractor(Generic[ConfigType], CogniteLogger):
210
282
  return cls(config)
211
283
 
212
284
  def add_task(self, task: Task) -> None:
285
+ """
286
+ Add a task to the extractor.
287
+
288
+ This method wraps the task's target function to include error handling and task tracking.
289
+
290
+ Args:
291
+ task: The task to add. It should be an instance of ``StartupTask``, ``ContinuousTask``, or ``ScheduledTask``
292
+ """
213
293
  # Store this for later, since we'll override it with the wrapped version
214
294
  target = task.target
215
295
 
216
296
  def run_task(task_context: TaskContext) -> None:
217
297
  """
218
- A wrapped version of the task's target, with tracking and error handling
298
+ A wrapped version of the task's target, with tracking and error handling.
219
299
  """
220
300
  # Record a task start
221
301
  with self._checkin_lock:
@@ -275,7 +355,7 @@ class Extractor(Generic[ConfigType], CogniteLogger):
275
355
  {
276
356
  "name": t.name,
277
357
  "type": "continuous" if isinstance(t, ContinuousTask) else "batch",
278
- "action": True if isinstance(t, ScheduledTask) else False,
358
+ "action": bool(isinstance(t, ScheduledTask)),
279
359
  "description": t.description,
280
360
  }
281
361
  for t in self._tasks
@@ -285,14 +365,29 @@ class Extractor(Generic[ConfigType], CogniteLogger):
285
365
  )
286
366
 
287
367
  def start(self) -> None:
368
+ """
369
+ Start the extractor.
370
+
371
+ Instead of calling this method directly, it is recommended to use the context manager interface by using the
372
+ ``with`` statement, which ensures proper cleanup on exit.
373
+ """
288
374
  self._setup_logging()
289
375
  self._report_extractor_info()
290
376
  Thread(target=self._run_checkin, name="ExtractorCheckin", daemon=True).start()
291
377
 
292
378
  def stop(self) -> None:
379
+ """
380
+ Stop the extractor.
381
+
382
+ Instead of calling this method directly, it is recommended to use the context manager interface by using the
383
+ ``with`` statement, which ensures proper cleanup on exit.
384
+ """
293
385
  self.cancellation_token.cancel()
294
386
 
295
387
  def __enter__(self) -> Self:
388
+ """
389
+ Start the extractor in a context manager.
390
+ """
296
391
  self.start()
297
392
  return self
298
393
 
@@ -302,6 +397,9 @@ class Extractor(Generic[ConfigType], CogniteLogger):
302
397
  exc_val: BaseException | None,
303
398
  exc_tb: TracebackType | None,
304
399
  ) -> bool:
400
+ """
401
+ Stop the extractor when exiting the context manager.
402
+ """
305
403
  self.stop()
306
404
  with self._checkin_lock:
307
405
  self._checkin()
@@ -310,6 +408,17 @@ class Extractor(Generic[ConfigType], CogniteLogger):
310
408
  return exc_val is None
311
409
 
312
410
  def run(self) -> None:
411
+ """
412
+ Run the extractor. This method starts the extractor and runs all tasks that have been added.
413
+
414
+ This method assumes ``self.start()`` has been called first. The recommended way to use this method is
415
+ to use the context manager interface, which ensures that the extractor is started and stopped properly.
416
+
417
+ .. code-block:: python
418
+
419
+ with extractor:
420
+ extractor.run()
421
+ """
313
422
  has_scheduled = False
314
423
 
315
424
  startup: list[StartupTask] = []