cognite-extractor-utils 7.5.13__py3-none-any.whl → 7.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

Files changed (47) hide show
  1. cognite/extractorutils/__init__.py +1 -1
  2. cognite/extractorutils/_inner_util.py +1 -1
  3. cognite/extractorutils/base.py +120 -40
  4. cognite/extractorutils/configtools/__init__.py +4 -5
  5. cognite/extractorutils/configtools/_util.py +3 -2
  6. cognite/extractorutils/configtools/elements.py +213 -35
  7. cognite/extractorutils/configtools/loaders.py +68 -16
  8. cognite/extractorutils/configtools/validators.py +5 -1
  9. cognite/extractorutils/exceptions.py +11 -2
  10. cognite/extractorutils/metrics.py +17 -12
  11. cognite/extractorutils/statestore/__init__.py +77 -3
  12. cognite/extractorutils/statestore/_base.py +7 -3
  13. cognite/extractorutils/statestore/hashing.py +129 -15
  14. cognite/extractorutils/statestore/watermark.py +77 -87
  15. cognite/extractorutils/threading.py +30 -4
  16. cognite/extractorutils/unstable/__init__.py +5 -5
  17. cognite/extractorutils/unstable/configuration/__init__.py +3 -0
  18. cognite/extractorutils/unstable/configuration/exceptions.py +13 -2
  19. cognite/extractorutils/unstable/configuration/loaders.py +90 -19
  20. cognite/extractorutils/unstable/configuration/models.py +121 -7
  21. cognite/extractorutils/unstable/core/__init__.py +5 -0
  22. cognite/extractorutils/unstable/core/_dto.py +5 -3
  23. cognite/extractorutils/unstable/core/base.py +113 -4
  24. cognite/extractorutils/unstable/core/errors.py +41 -0
  25. cognite/extractorutils/unstable/core/logger.py +149 -0
  26. cognite/extractorutils/unstable/core/restart_policy.py +16 -2
  27. cognite/extractorutils/unstable/core/runtime.py +119 -36
  28. cognite/extractorutils/unstable/core/tasks.py +53 -1
  29. cognite/extractorutils/unstable/scheduling/__init__.py +13 -0
  30. cognite/extractorutils/unstable/scheduling/_scheduler.py +1 -1
  31. cognite/extractorutils/uploader/__init__.py +7 -5
  32. cognite/extractorutils/uploader/_base.py +4 -5
  33. cognite/extractorutils/uploader/assets.py +13 -8
  34. cognite/extractorutils/uploader/data_modeling.py +37 -2
  35. cognite/extractorutils/uploader/events.py +14 -9
  36. cognite/extractorutils/uploader/files.py +80 -21
  37. cognite/extractorutils/uploader/raw.py +12 -7
  38. cognite/extractorutils/uploader/time_series.py +58 -49
  39. cognite/extractorutils/uploader/upload_failure_handler.py +35 -2
  40. cognite/extractorutils/uploader_extractor.py +29 -6
  41. cognite/extractorutils/uploader_types.py +15 -1
  42. cognite/extractorutils/util.py +76 -23
  43. {cognite_extractor_utils-7.5.13.dist-info → cognite_extractor_utils-7.6.0.dist-info}/METADATA +1 -1
  44. cognite_extractor_utils-7.6.0.dist-info/RECORD +50 -0
  45. cognite_extractor_utils-7.5.13.dist-info/RECORD +0 -50
  46. {cognite_extractor_utils-7.5.13.dist-info → cognite_extractor_utils-7.6.0.dist-info}/WHEEL +0 -0
  47. {cognite_extractor_utils-7.5.13.dist-info → cognite_extractor_utils-7.6.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,3 +1,28 @@
1
+ """
2
+ Module providing the runtime for an extractor.
3
+
4
+ The runtime is responsible for starting the extractor in a separate process, managing its lifecycle, and handling
5
+ configuration loading and updates. It also handles errors and restarts the extractor if necessary.
6
+
7
+ It is the preferred way to run an extractor, as it provides a more robust and flexible way to manage the extractor's
8
+ lifecycle compared to running it directly in the main process.
9
+
10
+ The runtime also contains a command line interface (CLI) for starting the extractor, which allows users to specify
11
+ the connection configuration and other parameters.
12
+
13
+ .. code-block:: python
14
+
15
+ from cognite.extractorutils.unstable.core.runtime import Runtime
16
+ from my_extractor import MyExtractor
17
+
18
+ def main() -> None:
19
+ runtime = Runtime(MyExtractor)
20
+ runtime.run()
21
+
22
+ if __name__ == "__main__":
23
+ main()
24
+ """
25
+
1
26
  import logging
2
27
  import os
3
28
  import sys
@@ -5,27 +30,48 @@ import time
5
30
  from argparse import ArgumentParser, Namespace
6
31
  from multiprocessing import Process, Queue
7
32
  from pathlib import Path
33
+ from random import randint
8
34
  from typing import Any, Generic, TypeVar
35
+ from uuid import uuid4
9
36
 
10
- from requests.exceptions import ConnectionError
37
+ from requests.exceptions import ConnectionError as RequestsConnectionError
11
38
  from typing_extensions import assert_never
12
39
 
13
- from cognite.client.exceptions import CogniteAPIError, CogniteAuthError, CogniteConnectionError
40
+ from cognite.client import CogniteClient
41
+ from cognite.client.exceptions import (
42
+ CogniteAPIError,
43
+ CogniteAuthError,
44
+ CogniteConnectionError,
45
+ )
14
46
  from cognite.extractorutils.threading import CancellationToken
15
47
  from cognite.extractorutils.unstable.configuration.exceptions import InvalidConfigError
16
- from cognite.extractorutils.unstable.configuration.loaders import load_file, load_from_cdf
48
+ from cognite.extractorutils.unstable.configuration.loaders import (
49
+ load_file,
50
+ load_from_cdf,
51
+ )
17
52
  from cognite.extractorutils.unstable.configuration.models import ConnectionConfig
18
53
  from cognite.extractorutils.unstable.core._dto import Error
54
+ from cognite.extractorutils.unstable.core.errors import ErrorLevel
55
+ from cognite.extractorutils.util import now
19
56
 
20
57
  from ._messaging import RuntimeMessage
21
58
  from .base import ConfigRevision, ConfigType, Extractor, FullConfig
22
59
 
23
- __all__ = ["Runtime", "ExtractorType"]
60
+ __all__ = ["ExtractorType", "Runtime"]
24
61
 
25
62
  ExtractorType = TypeVar("ExtractorType", bound=Extractor)
26
63
 
27
64
 
28
65
  class Runtime(Generic[ExtractorType]):
66
+ """
67
+ The runtime for an extractor.
68
+
69
+ This class is responsible for starting the extractor in a separate process, managing its lifecycle, and handling
70
+ configuration loading and updates. It also handles errors and restarts the extractor if necessary.
71
+ """
72
+
73
+ RETRY_CONFIG_INTERVAL = 30
74
+
29
75
  def __init__(
30
76
  self,
31
77
  extractor: type[ExtractorType],
@@ -37,6 +83,8 @@ class Runtime(Generic[ExtractorType]):
37
83
  self.logger = logging.getLogger(f"{self._extractor_class.EXTERNAL_ID}.runtime")
38
84
  self._setup_logging()
39
85
 
86
+ self._cognite_client: CogniteClient
87
+
40
88
  def _create_argparser(self) -> ArgumentParser:
41
89
  argparser = ArgumentParser(
42
90
  prog=sys.argv[0],
@@ -57,8 +105,8 @@ class Runtime(Generic[ExtractorType]):
57
105
  help="Connection parameters",
58
106
  )
59
107
  argparser.add_argument(
60
- "-l",
61
- "--local-override",
108
+ "-f",
109
+ "--force-local-config",
62
110
  nargs=1,
63
111
  type=Path,
64
112
  required=False,
@@ -121,7 +169,7 @@ class Runtime(Generic[ExtractorType]):
121
169
  self.logger.info(f"Started extractor with PID {process.pid}")
122
170
  return process
123
171
 
124
- def _get_application_config(
172
+ def _try_get_application_config(
125
173
  self,
126
174
  args: Namespace,
127
175
  connection_config: ConnectionConfig,
@@ -143,39 +191,65 @@ class Runtime(Generic[ExtractorType]):
143
191
 
144
192
  else:
145
193
  self.logger.info("Loading application config from CDF")
146
- client = connection_config.get_cognite_client(
147
- f"{self._extractor_class.EXTERNAL_ID}-{self._extractor_class.VERSION}"
194
+
195
+ application_config, current_config_revision = load_from_cdf(
196
+ self._cognite_client,
197
+ connection_config.integration,
198
+ self._extractor_class.CONFIG_TYPE,
148
199
  )
149
200
 
150
- errors: list[Error] = []
201
+ return application_config, current_config_revision
202
+
203
+ def _safe_get_application_config(
204
+ self,
205
+ args: Namespace,
206
+ connection_config: ConnectionConfig,
207
+ ) -> tuple[ConfigType, ConfigRevision] | None:
208
+ prev_error: str | None = None
151
209
 
210
+ while not self._cancellation_token.is_cancelled:
152
211
  try:
153
- application_config, current_config_revision = load_from_cdf(
154
- client,
155
- connection_config.integration,
156
- self._extractor_class.CONFIG_TYPE,
212
+ return self._try_get_application_config(args, connection_config)
213
+
214
+ except Exception as e:
215
+ error_message = str(e)
216
+ if error_message == prev_error:
217
+ # Same error as before, no need to log it again
218
+ self._cancellation_token.wait(randint(1, self.RETRY_CONFIG_INTERVAL))
219
+ continue
220
+ prev_error = error_message
221
+
222
+ ts = now()
223
+ error = Error(
224
+ external_id=str(uuid4()),
225
+ level=ErrorLevel.fatal.value,
226
+ start_time=ts,
227
+ end_time=ts,
228
+ description=error_message,
229
+ details=None,
230
+ task=None,
157
231
  )
158
232
 
159
- finally:
160
- if errors:
161
- client.post(
162
- f"/api/v1/projects/{client.config.project}/odin/checkin",
163
- json={
164
- "externalId": connection_config.integration,
165
- "errors": [e.model_dump() for e in errors],
166
- },
167
- headers={"cdf-version": "alpha"},
168
- )
233
+ self._cognite_client.post(
234
+ f"/api/v1/projects/{self._cognite_client.config.project}/odin/checkin",
235
+ json={
236
+ "externalId": connection_config.integration,
237
+ "errors": [error.model_dump()],
238
+ },
239
+ headers={"cdf-version": "alpha"},
240
+ )
169
241
 
170
- return application_config, current_config_revision
242
+ self._cancellation_token.wait(randint(1, self.RETRY_CONFIG_INTERVAL))
243
+
244
+ return None
171
245
 
172
246
  def _verify_connection_config(self, connection_config: ConnectionConfig) -> bool:
173
- client = connection_config.get_cognite_client(
247
+ self._cognite_client = connection_config.get_cognite_client(
174
248
  f"{self._extractor_class.EXTERNAL_ID}-{self._extractor_class.VERSION}"
175
249
  )
176
250
  try:
177
- client.post(
178
- f"/api/v1/projects/{client.config.project}/odin/checkin",
251
+ self._cognite_client.post(
252
+ f"/api/v1/projects/{self._cognite_client.config.project}/odin/checkin",
179
253
  json={
180
254
  "externalId": connection_config.integration,
181
255
  },
@@ -206,11 +280,11 @@ class Runtime(Generic[ExtractorType]):
206
280
  self.logger.critical(str(e.message))
207
281
 
208
282
  else:
209
- self.logger.critical(f"Error while connecting to CDF {str(e)}")
283
+ self.logger.critical(f"Error while connecting to CDF {e!s}")
210
284
 
211
285
  return False
212
286
 
213
- except ConnectionError as e:
287
+ except RequestsConnectionError as e:
214
288
  # This is sometime thrown, I've seen it when trying to get an auth token but it might happen elsewhere too
215
289
  self.logger.error(str(e))
216
290
  self.logger.critical("Could not initiate connection. Please check your configuration.")
@@ -219,6 +293,12 @@ class Runtime(Generic[ExtractorType]):
219
293
  return True
220
294
 
221
295
  def run(self) -> None:
296
+ """
297
+ Run the extractor runtime.
298
+
299
+ This is intended as the main entry point for the extractor runtime, and starts by parsing command line
300
+ arguments.
301
+ """
222
302
  argparser = self._create_argparser()
223
303
  args = argparser.parse_args()
224
304
 
@@ -234,16 +314,19 @@ class Runtime(Generic[ExtractorType]):
234
314
  if not args.skip_init_checks and not self._verify_connection_config(connection_config):
235
315
  sys.exit(1)
236
316
 
237
- # This has to be Any. We don't know the type of the extractors' config at type checking since the sel doesn't
317
+ # This has to be Any. We don't know the type of the extractors' config at type checking since the self doesn't
238
318
  # exist yet, and I have not found a way to represent it in a generic way that isn't just an Any in disguise.
239
319
  application_config: Any
320
+ config: tuple[Any, ConfigRevision] | None
321
+
240
322
  while not self._cancellation_token.is_cancelled:
241
- try:
242
- application_config, current_config_revision = self._get_application_config(args, connection_config)
323
+ config = self._safe_get_application_config(args, connection_config)
324
+ if config is None:
325
+ if self._cancellation_token.is_cancelled:
326
+ break
327
+ continue
243
328
 
244
- except InvalidConfigError:
245
- self.logger.critical("Could not get a valid application config file. Shutting down")
246
- sys.exit(1)
329
+ application_config, current_config_revision = config
247
330
 
248
331
  # Start extractor in separate process, and wait for it to end
249
332
  process = self._spawn_extractor(
@@ -1,3 +1,7 @@
1
+ """
2
+ This module defines the base classes for tasks in the extractor framework.
3
+ """
4
+
1
5
  import logging
2
6
  from collections.abc import Callable
3
7
  from typing import TYPE_CHECKING
@@ -14,10 +18,16 @@ from cognite.extractorutils.unstable.core.logger import CogniteLogger
14
18
  if TYPE_CHECKING:
15
19
  from cognite.extractorutils.unstable.core.base import Extractor
16
20
 
17
- __all__ = ["ScheduledTask", "ContinuousTask", "StartupTask", "Task", "TaskContext"]
21
+ __all__ = ["ContinuousTask", "ScheduledTask", "StartupTask", "Task", "TaskContext"]
18
22
 
19
23
 
20
24
  class TaskContext(CogniteLogger):
25
+ """
26
+ Context for a task execution.
27
+
28
+ This class is used to log errors and messages related to the task execution.
29
+ """
30
+
21
31
  def __init__(self, task: "Task", extractor: "Extractor"):
22
32
  super().__init__()
23
33
  self._task = task
@@ -58,6 +68,18 @@ class _Task:
58
68
 
59
69
 
60
70
  class ScheduledTask(_Task):
71
+ """
72
+ A task that is scheduled to run at specific intervals or according to a cron expression.
73
+
74
+ This class allows you to define tasks that can be scheduled using either an interval or a cron expression.
75
+
76
+ Args:
77
+ name: The name of the task.
78
+ target: A callable that takes a ``TaskContext`` and performs the task.
79
+ description: An optional description of the task.
80
+ schedule: A ``ScheduleConfig`` object that defines the scheduling configuration for the task.
81
+ """
82
+
61
83
  def __init__(
62
84
  self,
63
85
  *,
@@ -73,6 +95,15 @@ class ScheduledTask(_Task):
73
95
  def from_interval(
74
96
  cls, *, interval: str, name: str, target: TaskTarget, description: str | None = None
75
97
  ) -> "ScheduledTask":
98
+ """
99
+ Create a scheduled task that runs at regular intervals.
100
+
101
+ Args:
102
+ interval: A string representing the time interval (e.g., "5m" for 5 minutes).
103
+ name: The name of the task.
104
+ target: A callable that takes a ``TaskContext`` and performs the task.
105
+ description: An optional description of the task.
106
+ """
76
107
  return ScheduledTask(
77
108
  name=name,
78
109
  target=target,
@@ -82,6 +113,15 @@ class ScheduledTask(_Task):
82
113
 
83
114
  @classmethod
84
115
  def from_cron(cls, *, cron: str, name: str, target: TaskTarget, description: str | None = None) -> "ScheduledTask":
116
+ """
117
+ Create a scheduled task that runs according to a cron expression.
118
+
119
+ Args:
120
+ cron: A string representing the cron expression (e.g., "0 0 * * *" for daily at midnight).
121
+ name: The name of the task.
122
+ target: A callable that takes a ``TaskContext`` and performs the task.
123
+ description: An optional description of the task.
124
+ """
85
125
  return ScheduledTask(
86
126
  name=name,
87
127
  target=target,
@@ -91,6 +131,12 @@ class ScheduledTask(_Task):
91
131
 
92
132
 
93
133
  class ContinuousTask(_Task):
134
+ """
135
+ A task that runs continuously.
136
+
137
+ Continuous tasks are started when the extractor starts and are expected to run until the extractor stops.
138
+ """
139
+
94
140
  def __init__(
95
141
  self,
96
142
  *,
@@ -102,6 +148,12 @@ class ContinuousTask(_Task):
102
148
 
103
149
 
104
150
  class StartupTask(_Task):
151
+ """
152
+ A task that runs once at the startup of the extractor.
153
+
154
+ Startup tasks are executed before any continuous or scheduled tasks and are typically used for initialization.
155
+ """
156
+
105
157
  def __init__(
106
158
  self,
107
159
  *,
@@ -1,3 +1,16 @@
1
+ """
2
+ This module provides a task scheduler.
3
+
4
+ It is inspired by the ``APScheduler`` library and is designed to manage the scheduling of tasks within the extractor
5
+ framework. It differs from ``APScheduler`` in a few key ways:
6
+ - It is designed to be used within the extractor framework, allowing for better integration with the extractor's
7
+ lifecycle and error handling. For example, it respects the extractor's cancellation token and will gracefully shut
8
+ down upon cancellation.
9
+ - It has a simpler interface, focusing on the core functionality needed for scheduling tasks without the additional
10
+ complexity of a full-featured scheduler like ``APScheduler``.
11
+ - It is fully typed, providing better type safety and autocompletion in IDEs.
12
+ """
13
+
1
14
  from ._scheduler import TaskScheduler
2
15
 
3
16
  __all__ = ["TaskScheduler"]
@@ -51,7 +51,7 @@ class TaskScheduler:
51
51
  return []
52
52
  with self._jobs_lock:
53
53
  next_runs = sorted([(j.schedule.next(), j) for j in self._jobs.values()], key=lambda tup: tup[0])
54
- return [job for (next, job) in next_runs if next == next_runs[0][0]] if next_runs else []
54
+ return [job for (scheduled_time, job) in next_runs if scheduled_time == next_runs[0][0]] if next_runs else []
55
55
 
56
56
  def _run_job(self, job: Job) -> bool:
57
57
  with self._running_lock:
@@ -13,8 +13,10 @@
13
13
  # limitations under the License.
14
14
 
15
15
  """
16
- Module containing upload queue classes. The UploadQueue classes chunks together items and uploads them together to CDF,
17
- both to minimize the load on the API, and also to speed up uploading as requests can be slow.
16
+ Module containing upload queue classes.
17
+
18
+ The UploadQueue classes chunks together items and uploads them together to CDF,both to minimize the load on the API, and
19
+ also to speed up uploading as requests can be slow.
18
20
 
19
21
  Each upload queue comes with some configurable conditions that, when met, automatically triggers an upload.
20
22
 
@@ -78,13 +80,13 @@ from .time_series import (
78
80
 
79
81
  __all__ = [
80
82
  "AssetUploadQueue",
81
- "EventUploadQueue",
82
83
  "BytesUploadQueue",
84
+ "DataPoint",
85
+ "DataPointList",
86
+ "EventUploadQueue",
83
87
  "FileUploadQueue",
84
88
  "IOFileUploadQueue",
85
89
  "RawUploadQueue",
86
- "DataPoint",
87
- "DataPointList",
88
90
  "SequenceUploadQueue",
89
91
  "TimeSeriesUploadQueue",
90
92
  "default_time_series_factory",
@@ -84,7 +84,7 @@ class AbstractUploadQueue(ABC):
84
84
 
85
85
  def _post_upload(self, uploaded: list[Any]) -> None:
86
86
  """
87
- Perform post_upload_function to uploaded data, if applicable
87
+ Perform post_upload_function to uploaded data, if applicable.
88
88
 
89
89
  Args:
90
90
  uploaded: list of uploaded data
@@ -103,7 +103,7 @@ class AbstractUploadQueue(ABC):
103
103
 
104
104
  def _run(self) -> None:
105
105
  """
106
- Internal run method for upload thread
106
+ Internal run method for upload thread.
107
107
  """
108
108
  while not self.cancellation_token.wait(timeout=self.max_upload_interval):
109
109
  try:
@@ -117,8 +117,7 @@ class AbstractUploadQueue(ABC):
117
117
 
118
118
  def start(self) -> None:
119
119
  """
120
- Start upload thread if max_upload_interval is set, this called the upload method every max_upload_interval
121
- seconds.
120
+ Start upload thread if max_upload_interval is set.
122
121
  """
123
122
  if self.max_upload_interval is not None:
124
123
  self.thread.start()
@@ -137,7 +136,7 @@ class AbstractUploadQueue(ABC):
137
136
 
138
137
  def __len__(self) -> int:
139
138
  """
140
- The size of the upload queue
139
+ The size of the upload queue.
141
140
 
142
141
  Returns:
143
142
  Number of events in queue
@@ -1,3 +1,7 @@
1
+ """
2
+ Upload queue for (legacy) assets.
3
+ """
4
+
1
5
  # Copyright 2023 Cognite AS
2
6
  #
3
7
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -37,7 +41,7 @@ from cognite.extractorutils.util import cognite_exceptions, retry
37
41
 
38
42
  class AssetUploadQueue(AbstractUploadQueue):
39
43
  """
40
- Upload queue for assets
44
+ Upload queue for assets.
41
45
 
42
46
  Args:
43
47
  cdf_client: Cognite Data Fusion client to use
@@ -77,8 +81,9 @@ class AssetUploadQueue(AbstractUploadQueue):
77
81
 
78
82
  def add_to_upload_queue(self, asset: Asset) -> None:
79
83
  """
80
- Add asset to upload queue. The queue will be uploaded if the queue size is larger than the threshold
81
- specified in the __init__.
84
+ Add asset to upload queue.
85
+
86
+ The queue will be uploaded if the queue size is larger than the threshold specified in the ``__init__``.
82
87
 
83
88
  Args:
84
89
  asset: Asset to add
@@ -92,7 +97,7 @@ class AssetUploadQueue(AbstractUploadQueue):
92
97
 
93
98
  def upload(self) -> None:
94
99
  """
95
- Trigger an upload of the queue, clears queue afterwards
100
+ Trigger an upload of the queue, clears queue afterwards.
96
101
  """
97
102
 
98
103
  @retry(
@@ -107,8 +112,8 @@ class AssetUploadQueue(AbstractUploadQueue):
107
112
  try:
108
113
  self.cdf_client.assets.create(self.upload_queue)
109
114
  except CogniteDuplicatedError as e:
110
- duplicated_ids = set([dup["externalId"] for dup in e.duplicated if "externalId" in dup])
111
- failed: list[Asset] = [e for e in e.failed]
115
+ duplicated_ids = {dup["externalId"] for dup in e.duplicated if "externalId" in dup}
116
+ failed: list[Asset] = list(e.failed)
112
117
  to_create = []
113
118
  to_update = []
114
119
  for asset in failed:
@@ -138,7 +143,7 @@ class AssetUploadQueue(AbstractUploadQueue):
138
143
 
139
144
  def __enter__(self) -> "AssetUploadQueue":
140
145
  """
141
- Wraps around start method, for use as context manager
146
+ Wraps around start method, for use as context manager.
142
147
 
143
148
  Returns:
144
149
  self
@@ -153,7 +158,7 @@ class AssetUploadQueue(AbstractUploadQueue):
153
158
  exc_tb: TracebackType | None,
154
159
  ) -> None:
155
160
  """
156
- Wraps around stop method, for use as context manager
161
+ Wraps around stop method, for use as context manager.
157
162
 
158
163
  Args:
159
164
  exc_type: Exception type
@@ -1,3 +1,7 @@
1
+ """
2
+ Module for uploading data modeling instances to CDF.
3
+ """
4
+
1
5
  from collections.abc import Callable
2
6
  from types import TracebackType
3
7
  from typing import Any
@@ -16,6 +20,24 @@ from cognite.extractorutils.util import cognite_exceptions, retry
16
20
 
17
21
 
18
22
  class InstanceUploadQueue(AbstractUploadQueue):
23
+ """
24
+ Upload queue for data modeling instances (nodes and edges).
25
+
26
+ Args:
27
+ cdf_client: Cognite Data Fusion client to use.
28
+ post_upload_function: A function that will be called after each upload. The function will be given one argument:
29
+ A list of the nodes and edges that were uploaded.
30
+ max_queue_size: Maximum size of upload queue. Defaults to no max size.
31
+ max_upload_interval: Automatically trigger an upload on an interval when run as a thread (use start/stop
32
+ methods). Unit is seconds.
33
+ trigger_log_level: Log level to log upload triggers to.
34
+ thread_name: Thread name of uploader thread.
35
+ cancellation_token: Cancellation token for managing thread cancellation.
36
+ auto_create_start_nodes: Automatically create start nodes if they do not exist.
37
+ auto_create_end_nodes: Automatically create end nodes if they do not exist.
38
+ auto_create_direct_relations: Automatically create direct relations if they do not exist.
39
+ """
40
+
19
41
  def __init__(
20
42
  self,
21
43
  cdf_client: CogniteClient,
@@ -52,6 +74,15 @@ class InstanceUploadQueue(AbstractUploadQueue):
52
74
  node_data: list[NodeApply] | None = None,
53
75
  edge_data: list[EdgeApply] | None = None,
54
76
  ) -> None:
77
+ """
78
+ Add instances to the upload queue.
79
+
80
+ The queue will be uploaded if the queue size is larger than the threshold specified in the ``__init__``.
81
+
82
+ Args:
83
+ node_data: List of nodes to add to the upload queue.
84
+ edge_data: List of edges to add to the upload queue.
85
+ """
55
86
  if node_data:
56
87
  with self.lock:
57
88
  self.node_queue.extend(node_data)
@@ -66,6 +97,10 @@ class InstanceUploadQueue(AbstractUploadQueue):
66
97
  self._check_triggers()
67
98
 
68
99
  def upload(self) -> None:
100
+ """
101
+ Trigger an upload of the queue, clears queue afterwards.
102
+ """
103
+
69
104
  @retry(
70
105
  exceptions=cognite_exceptions(),
71
106
  cancellation_token=self.cancellation_token,
@@ -91,7 +126,7 @@ class InstanceUploadQueue(AbstractUploadQueue):
91
126
 
92
127
  def __enter__(self) -> "InstanceUploadQueue":
93
128
  """
94
- Wraps around start method, for use as context manager
129
+ Wraps around start method, for use as context manager.
95
130
 
96
131
  Returns:
97
132
  self
@@ -106,7 +141,7 @@ class InstanceUploadQueue(AbstractUploadQueue):
106
141
  exc_tb: TracebackType | None,
107
142
  ) -> None:
108
143
  """
109
- Wraps around stop method, for use as context manager
144
+ Wraps around stop method, for use as context manager.
110
145
 
111
146
  Args:
112
147
  exc_type: Exception type
@@ -1,3 +1,7 @@
1
+ """
2
+ Upload queue for (legacy) events.
3
+ """
4
+
1
5
  # Copyright 2023 Cognite AS
2
6
  #
3
7
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -36,7 +40,7 @@ from cognite.extractorutils.util import cognite_exceptions, retry
36
40
 
37
41
  class EventUploadQueue(AbstractUploadQueue):
38
42
  """
39
- Upload queue for events
43
+ Upload queue for events.
40
44
 
41
45
  Args:
42
46
  cdf_client: Cognite Data Fusion client to use
@@ -78,8 +82,9 @@ class EventUploadQueue(AbstractUploadQueue):
78
82
 
79
83
  def add_to_upload_queue(self, event: Event) -> None:
80
84
  """
81
- Add event to upload queue. The queue will be uploaded if the queue size is larger than the threshold
82
- specified in the __init__.
85
+ Add event to upload queue.
86
+
87
+ The queue will be uploaded if the queue size is larger than the threshold specified in the ``__init__``.
83
88
 
84
89
  Args:
85
90
  event: Event to add
@@ -94,7 +99,7 @@ class EventUploadQueue(AbstractUploadQueue):
94
99
 
95
100
  def upload(self) -> None:
96
101
  """
97
- Trigger an upload of the queue, clears queue afterwards
102
+ Trigger an upload of the queue, clears queue afterwards.
98
103
  """
99
104
 
100
105
  @retry(
@@ -107,10 +112,10 @@ class EventUploadQueue(AbstractUploadQueue):
107
112
  )
108
113
  def _upload_batch() -> None:
109
114
  try:
110
- self.cdf_client.events.create([e for e in self.upload_queue])
115
+ self.cdf_client.events.create(list(self.upload_queue))
111
116
  except CogniteDuplicatedError as e:
112
- duplicated_ids = set([dup["externalId"] for dup in e.duplicated if "externalId" in dup])
113
- failed: list[Event] = [e for e in e.failed]
117
+ duplicated_ids = {dup["externalId"] for dup in e.duplicated if "externalId" in dup}
118
+ failed: list[Event] = list(e.failed)
114
119
  to_create = []
115
120
  to_update = []
116
121
  for evt in failed:
@@ -142,7 +147,7 @@ class EventUploadQueue(AbstractUploadQueue):
142
147
 
143
148
  def __enter__(self) -> "EventUploadQueue":
144
149
  """
145
- Wraps around start method, for use as context manager
150
+ Wraps around start method, for use as context manager.
146
151
 
147
152
  Returns:
148
153
  self
@@ -154,7 +159,7 @@ class EventUploadQueue(AbstractUploadQueue):
154
159
  self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
155
160
  ) -> None:
156
161
  """
157
- Wraps around stop method, for use as context manager
162
+ Wraps around stop method, for use as context manager.
158
163
 
159
164
  Args:
160
165
  exc_type: Exception type