cognite-extractor-utils 7.7.0__py3-none-any.whl → 7.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

Files changed (37) hide show
  1. cognite/examples/unstable/extractors/simple_extractor/config/config.yaml +3 -0
  2. cognite/examples/unstable/extractors/simple_extractor/config/connection_config.yaml +10 -0
  3. cognite/examples/unstable/extractors/simple_extractor/main.py +81 -0
  4. cognite/extractorutils/__init__.py +1 -1
  5. cognite/extractorutils/_inner_util.py +2 -2
  6. cognite/extractorutils/base.py +1 -1
  7. cognite/extractorutils/configtools/elements.py +4 -2
  8. cognite/extractorutils/configtools/loaders.py +18 -4
  9. cognite/extractorutils/exceptions.py +1 -1
  10. cognite/extractorutils/metrics.py +8 -6
  11. cognite/extractorutils/statestore/watermark.py +6 -3
  12. cognite/extractorutils/threading.py +2 -2
  13. cognite/extractorutils/unstable/configuration/exceptions.py +28 -1
  14. cognite/extractorutils/unstable/configuration/models.py +157 -32
  15. cognite/extractorutils/unstable/core/_dto.py +80 -7
  16. cognite/extractorutils/unstable/core/base.py +171 -106
  17. cognite/extractorutils/unstable/core/checkin_worker.py +428 -0
  18. cognite/extractorutils/unstable/core/errors.py +2 -2
  19. cognite/extractorutils/unstable/core/logger.py +49 -0
  20. cognite/extractorutils/unstable/core/runtime.py +200 -31
  21. cognite/extractorutils/unstable/core/tasks.py +2 -2
  22. cognite/extractorutils/uploader/_base.py +1 -1
  23. cognite/extractorutils/uploader/assets.py +1 -1
  24. cognite/extractorutils/uploader/data_modeling.py +1 -1
  25. cognite/extractorutils/uploader/events.py +1 -1
  26. cognite/extractorutils/uploader/files.py +4 -4
  27. cognite/extractorutils/uploader/raw.py +1 -1
  28. cognite/extractorutils/uploader/time_series.py +4 -4
  29. cognite/extractorutils/uploader_extractor.py +2 -2
  30. cognite/extractorutils/uploader_types.py +3 -3
  31. cognite/extractorutils/util.py +8 -6
  32. {cognite_extractor_utils-7.7.0.dist-info → cognite_extractor_utils-7.8.1.dist-info}/METADATA +4 -3
  33. cognite_extractor_utils-7.8.1.dist-info/RECORD +55 -0
  34. cognite_extractor_utils-7.8.1.dist-info/entry_points.txt +2 -0
  35. cognite_extractor_utils-7.7.0.dist-info/RECORD +0 -50
  36. {cognite_extractor_utils-7.7.0.dist-info → cognite_extractor_utils-7.8.1.dist-info}/WHEEL +0 -0
  37. {cognite_extractor_utils-7.7.0.dist-info → cognite_extractor_utils-7.8.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,428 @@
1
+ """
2
+ Check-in worker for reporting errors and task updates to the CDF Integrations API.
3
+
4
+ The logic in this file is based off of the implementation in the dotnet extractorutils package.
5
+
6
+ It manages the reporting of startup, errors, and task updates to the Integrations API.
7
+
8
+ It ensures that startup (on none extractor related errors) are reported first,
9
+ followed by (all) errors and task updates.
10
+ """
11
+
12
+ import sys
13
+ from collections.abc import Callable
14
+ from logging import Logger
15
+ from secrets import SystemRandom
16
+ from threading import RLock
17
+ from time import sleep
18
+
19
+ from requests import Response
20
+
21
+ from cognite.client import CogniteClient
22
+ from cognite.client.exceptions import CogniteAPIError, CogniteAuthError, CogniteConnectionError
23
+ from cognite.extractorutils.threading import CancellationToken
24
+ from cognite.extractorutils.unstable.configuration.models import ConfigRevision
25
+ from cognite.extractorutils.unstable.core._dto import (
26
+ CheckinRequest,
27
+ CheckinResponse,
28
+ JSONType,
29
+ MessageType,
30
+ StartupRequest,
31
+ TaskUpdate,
32
+ )
33
+ from cognite.extractorutils.unstable.core._dto import (
34
+ Error as DtoError,
35
+ )
36
+ from cognite.extractorutils.unstable.core.errors import Error
37
+ from cognite.extractorutils.util import now
38
+
39
+ DEFAULT_SLEEP_INTERVAL = STARTUP_BACKOFF_SECONDS = 30.0
40
+ MAX_ERRORS_PER_CHECKIN = MAX_TASK_UPDATES_PER_CHECKIN = 1000
41
+ rng = SystemRandom()
42
+
43
+
44
+ class CheckinWorker:
45
+ """
46
+ A worker to manage how we report check-ins to the Integrations API.
47
+
48
+ This will help us:
49
+ 1. Ensure that we don't report any check-ins before the start up is reported.
50
+ 1. Manage how we batch errors and task updates.
51
+ 2. Manage how we handle retries and backoff.
52
+ """
53
+
54
+ _lock = RLock()
55
+ _flush_lock = RLock()
56
+
57
+ def __init__(
58
+ self,
59
+ cognite_client: CogniteClient,
60
+ integration: str,
61
+ logger: Logger,
62
+ ) -> None:
63
+ """
64
+ Initialize the CheckinWorker.
65
+
66
+ Arguments:
67
+ cognite_client (CogniteClient): Cognite client to use for API requests.
68
+ integration (str): The external ID of the integration.
69
+ logger (Logger): Logger to use for logging.
70
+ on_revision_change (Callable[[int], None]): A trigger function to call when
71
+ the configuration revision changes.
72
+ on_fatal_error: Callable[[Exception], None]: A trigger function to call when a fatal error occurs
73
+ such as a wrong CDF credentials.
74
+ active_revision (ConfigRevision): The initial config revision when the worker is initialized.
75
+ retry_startup (bool): Whether to retry reporting startup if it fails. Defaults to False.
76
+ """
77
+ self._cognite_client: CogniteClient = cognite_client
78
+ self._integration: str = integration
79
+ self._logger: Logger = logger
80
+ self._on_revision_change: Callable[[int], None] | None = None
81
+ self._on_fatal_error: Callable[[Exception], None] | None = None
82
+ self._is_running: bool = False
83
+ self._retry_startup: bool = False
84
+ self._has_reported_startup: bool = False
85
+ self._active_revision: ConfigRevision = "local"
86
+ self._errors: dict[str, Error] = {}
87
+ self._task_updates: list[TaskUpdate] = []
88
+
89
+ @property
90
+ def active_revision(self) -> ConfigRevision:
91
+ """Get the active configuration revision."""
92
+ return self._active_revision
93
+
94
+ @active_revision.setter
95
+ def active_revision(self, value: ConfigRevision) -> None:
96
+ with self._lock:
97
+ self._active_revision = value
98
+
99
+ def set_on_revision_change_handler(self, on_revision_change: Callable[[int], None]) -> None:
100
+ """
101
+ Set the handler for when the configuration revision changes.
102
+
103
+ This handler will be called with the new configuration revision when it changes.
104
+
105
+ Arguments:
106
+ on_revision_change (Callable[[int], None]): A callback to call when the configuration revision.
107
+ """
108
+ self._on_revision_change = on_revision_change
109
+
110
+ def set_on_fatal_error_handler(self, on_fatal_error: Callable[[Exception], None]) -> None:
111
+ """
112
+ Set the handler for when a fatal error occurs.
113
+
114
+ This handler will be called with the exception when a fatal error occurs, such as a wrong CDF credentials.
115
+
116
+ Arguments:
117
+ on_fatal_error (Callable[[Exception], None]): A callback to call when a fatal error occurs.
118
+ """
119
+ self._on_fatal_error = on_fatal_error
120
+
121
+ def set_retry_startup(self, retry_startup: bool) -> None:
122
+ """
123
+ Set whether to retry reporting startup if it fails.
124
+
125
+ Arguments:
126
+ retry_startup (bool): Whether to retry reporting startup if it fails.
127
+ """
128
+ self._retry_startup = retry_startup
129
+
130
+ def run_periodic_checkin(
131
+ self, cancellation_token: CancellationToken, startup_request: StartupRequest, interval: float | None = None
132
+ ) -> None:
133
+ """
134
+ Run periodic check ins with the Integrations API.
135
+
136
+ This method will start a process that periodically reports check-ins to the Integrations API.
137
+ It will also ensure that we report the start up first or just report errors that are not associated with a task
138
+
139
+ Arguments:
140
+ cancellation_token: A token to cancel the periodic check-in.
141
+ startup_request: The start up request.
142
+ interval: The interval in seconds between each check-in. If None, defaults to DEFAULT_SLEEP_INTERVAL.
143
+ """
144
+ with self._lock:
145
+ if self._is_running:
146
+ raise RuntimeError("Attempting to start a check-in worker that was already running")
147
+ self._is_running = True
148
+
149
+ self._run_startup_report(cancellation_token, startup_request, interval)
150
+
151
+ report_interval = interval or DEFAULT_SLEEP_INTERVAL
152
+
153
+ while not cancellation_token.is_cancelled:
154
+ self._logger.debug("Running periodic check-in with interval %.2f seconds", report_interval)
155
+ self.flush(cancellation_token)
156
+ self._logger.debug(f"Check-in worker finished check-in, sleeping for {report_interval:.2f} seconds")
157
+ cancellation_token.wait(report_interval)
158
+
159
+ def _run_startup_report(
160
+ self, cancellation_token: CancellationToken, startup_request: StartupRequest, interval: float | None = None
161
+ ) -> None:
162
+ with self._flush_lock:
163
+ while not cancellation_token.is_cancelled:
164
+ should_retry = self._report_startup(startup_request)
165
+ if not should_retry:
166
+ self._has_reported_startup = True
167
+ break
168
+ elif not self._retry_startup:
169
+ raise RuntimeError("Could not report startup")
170
+
171
+ interval = interval or STARTUP_BACKOFF_SECONDS
172
+ next_retry = interval / 2 + interval * rng.random()
173
+ self._logger.info("Failed to report startup, retrying in %.2f seconds", next_retry)
174
+ sleep(next_retry)
175
+
176
+ def _report_startup(self, startup_request: StartupRequest) -> bool:
177
+ return self._wrap_checkin_like_request(
178
+ lambda: self._cognite_client.post(
179
+ f"/api/v1/projects/{self._cognite_client.config.project}/integrations/startup",
180
+ json=startup_request.model_dump(mode="json", by_alias=True),
181
+ headers={"cdf-version": "alpha"},
182
+ )
183
+ )
184
+
185
+ def _handle_checkin_response(self, response: JSONType) -> None:
186
+ checkin_response = CheckinResponse.model_validate(response)
187
+ self._logger.debug("Received check-in response: %s", checkin_response)
188
+
189
+ if checkin_response.last_config_revision is not None:
190
+ if self._active_revision == "local":
191
+ self._logger.warning(
192
+ "Remote config revision changed "
193
+ f"{self._active_revision} -> {checkin_response.last_config_revision}. "
194
+ "The extractor is currently using local configuration and will need to be manually restarted "
195
+ "and configured to use remote config for the new config to take effect.",
196
+ )
197
+ elif self._active_revision < checkin_response.last_config_revision:
198
+ self._active_revision = checkin_response.last_config_revision
199
+ if self._on_revision_change is not None:
200
+ self._logger.info(
201
+ "Remote config revision changed %s -> %s. The extractor will now use the new configuration.",
202
+ self._active_revision,
203
+ checkin_response.last_config_revision,
204
+ )
205
+ self._on_revision_change(checkin_response.last_config_revision)
206
+
207
+ def flush(self, cancellation_token: CancellationToken) -> None:
208
+ """
209
+ Flush available check-ins.
210
+
211
+ Arguments:
212
+ cancellation_token: A token to cancel the check-in reporting.
213
+ """
214
+ with self._flush_lock:
215
+ self._logger.debug(
216
+ "Going to report check-in with %d errors and %d task updates.",
217
+ len(self._errors),
218
+ len(self._task_updates),
219
+ )
220
+ self.report_checkin(cancellation_token)
221
+
222
+ def report_checkin(self, cancellation_token: CancellationToken) -> None:
223
+ """
224
+ Report a check-in to the Integrations API.
225
+
226
+ Arguments:
227
+ cancellation_token: A token to cancel the check-in reporting.
228
+ """
229
+ with self._lock:
230
+ if not self._has_reported_startup:
231
+ new_errors = [error for error in self._errors.values() if error._task_name is None]
232
+ if len(new_errors) == 0:
233
+ self._logger.info("No startup request has been reported yet, skipping check-in.")
234
+ return
235
+
236
+ self._logger.warning(
237
+ "Check-in worker has not reported startup yet, only reporting errors not associated with a task."
238
+ )
239
+ for error in new_errors:
240
+ del self._errors[error.external_id]
241
+ task_updates: list[TaskUpdate] = []
242
+ else:
243
+ new_errors = list(self._errors.values())
244
+ self._errors.clear()
245
+ task_updates = self._task_updates[:]
246
+ self._task_updates.clear()
247
+
248
+ new_errors.sort(key=lambda e: e.end_time or e.start_time)
249
+ task_updates.sort(key=lambda t: t.timestamp)
250
+
251
+ while not cancellation_token.is_cancelled:
252
+ if len(new_errors) <= MAX_ERRORS_PER_CHECKIN and len(task_updates) <= MAX_TASK_UPDATES_PER_CHECKIN:
253
+ self._logger.debug("Writing %d errors and %d task updates.", len(new_errors), len(task_updates))
254
+ errors_to_write = new_errors
255
+ new_errors = []
256
+ task_updates_to_write = task_updates
257
+ task_updates = []
258
+ self.try_write_checkin(
259
+ errors_to_write,
260
+ task_updates_to_write,
261
+ )
262
+ break
263
+
264
+ errs_idx = 0
265
+ tasks_idx = 0
266
+
267
+ while (
268
+ (errs_idx < len(new_errors) or tasks_idx < len(task_updates))
269
+ and errs_idx < MAX_ERRORS_PER_CHECKIN
270
+ and tasks_idx < MAX_TASK_UPDATES_PER_CHECKIN
271
+ ):
272
+ err = new_errors[errs_idx] if errs_idx < len(new_errors) else None
273
+ err_time = sys.maxsize if err is None else (err.end_time or err.start_time)
274
+ task_time = task_updates[tasks_idx].timestamp if tasks_idx < len(task_updates) else sys.maxsize
275
+
276
+ if err_time <= task_time:
277
+ errs_idx += 1
278
+ if task_time <= err_time:
279
+ tasks_idx += 1
280
+ self._logger.debug(f"Batching check-in with {errs_idx} errors and {tasks_idx} task updates.")
281
+
282
+ errors_to_write = new_errors[:errs_idx]
283
+ task_updates_to_write = task_updates[:tasks_idx]
284
+
285
+ self._logger.debug("Writing check-in with batching needed.")
286
+ self._logger.debug(
287
+ "Writing %d errors and %d task updates.", len(errors_to_write), len(task_updates_to_write)
288
+ )
289
+
290
+ if errs_idx > 0:
291
+ new_errors = new_errors[errs_idx:]
292
+ if tasks_idx > 0:
293
+ task_updates = task_updates[tasks_idx:]
294
+ self.try_write_checkin(
295
+ errors_to_write,
296
+ task_updates_to_write,
297
+ )
298
+ if errs_idx == 0 and tasks_idx == 0:
299
+ self._logger.debug("Check-in worker finished writing check-in.")
300
+ break
301
+
302
+ if cancellation_token.is_cancelled:
303
+ self._logger.debug("Extractor was stopped during check-in, requeuing remaining errors and task updates.")
304
+ self._requeue_checkin(new_errors, task_updates)
305
+
306
+ def try_write_checkin(self, errors: list[Error], task_updates: list[TaskUpdate]) -> None:
307
+ """
308
+ We try to write a check-in.
309
+
310
+ This will try to write a check in to integrations.
311
+
312
+ Arguments:
313
+ errors(list[Error]): The errors to write.
314
+ task_updates(list[TaskUpdate]): The task updates to write.
315
+ """
316
+ checkin_request = CheckinRequest(
317
+ external_id=self._integration,
318
+ errors=list(map(DtoError.from_internal, errors)) if len(errors) > 0 else None,
319
+ task_events=task_updates if len(task_updates) > 0 else None,
320
+ )
321
+ should_requeue = self._wrap_checkin_like_request(
322
+ lambda: self._cognite_client.post(
323
+ f"/api/v1/projects/{self._cognite_client.config.project}/integrations/checkin",
324
+ json=checkin_request.model_dump(mode="json", by_alias=True),
325
+ headers={"cdf-version": "alpha"},
326
+ )
327
+ )
328
+
329
+ if should_requeue:
330
+ self._requeue_checkin(errors, checkin_request.task_events)
331
+
332
+ def report_error(self, error: Error) -> None:
333
+ """
334
+ Queue check-in error to be reported to Integrations API.
335
+
336
+ This method is used to report errors that occur during the execution of the extractor.
337
+ It will automatically requeue the error if the check-in fails.
338
+ """
339
+ with self._lock:
340
+ if error.external_id not in self._errors:
341
+ self._errors[error.external_id] = error
342
+ else:
343
+ self._logger.warning(f"Error {error.external_id} already reported, skipping re-reporting.")
344
+
345
+ def try_report_error(self, error: Error) -> None:
346
+ """
347
+ This method will try to queue an error to be reported to the Integrations API.
348
+
349
+ Arguments:
350
+ error (Error): The error to report.
351
+ """
352
+ with self._lock:
353
+ if error.external_id not in self._errors:
354
+ self._errors[error.external_id] = error
355
+
356
+ def report_task_start(self, name: str, message: MessageType | None = None, timestamp: int | None = None) -> None:
357
+ """
358
+ Queue task start to be reported to Integrations API.
359
+
360
+ This method is used to queue start related to tasks that are running in the extractor.
361
+ It will automatically requeue the task update if the check-in fails.
362
+ """
363
+ with self._lock:
364
+ self._task_updates.append(
365
+ TaskUpdate(type="started", name=name, timestamp=timestamp or (int(now() * 1000)), message=message)
366
+ )
367
+
368
+ def report_task_end(self, name: str, message: MessageType | None = None, timestamp: int | None = None) -> None:
369
+ """
370
+ Queue task start to be reported to Integrations API.
371
+
372
+ This method is used to queue end related to tasks that are running in the extractor.
373
+ It will automatically requeue the task update if the check-in fails.
374
+ """
375
+ with self._lock:
376
+ self._task_updates.append(
377
+ TaskUpdate(type="ended", name=name, timestamp=timestamp or (int(now() * 1000)), message=message)
378
+ )
379
+
380
+ def _requeue_checkin(self, errors: list[Error] | None, task_updates: list[TaskUpdate] | None) -> None:
381
+ with self._lock:
382
+ for error in errors or []:
383
+ if error.external_id not in self._errors:
384
+ self._errors[error.external_id] = error
385
+ self._task_updates.extend(task_updates or [])
386
+
387
+ def _wrap_checkin_like_request(self, request: Callable[[], Response]) -> bool:
388
+ exception_to_report: Exception | None = None
389
+ requeue = False
390
+ try:
391
+ response = request()
392
+ self._handle_checkin_response(response.json())
393
+ except CogniteConnectionError as e:
394
+ if e.__cause__ is not None:
395
+ self._logger.error(str(e.__cause__))
396
+ self._logger.critical("Could not connect to CDF. Please check your configuration.")
397
+ requeue = True
398
+
399
+ except CogniteAuthError as e:
400
+ self._logger.error(str(e))
401
+ self._logger.critical("Could not get an access token. Please check your configuration.")
402
+ exception_to_report = e
403
+ requeue = True
404
+
405
+ except CogniteAPIError as e:
406
+ if e.code == 401:
407
+ self._logger.critical(
408
+ "Got a 401 error from CDF. Please check your configuration. "
409
+ "Make sure the credentials and project is correct."
410
+ )
411
+ exception_to_report = e
412
+
413
+ elif e.message:
414
+ self._logger.critical(str(e.message))
415
+
416
+ else:
417
+ self._logger.critical(f"Error while connecting to CDF {e!s}")
418
+ requeue = True
419
+
420
+ except Exception as e:
421
+ self._logger.critical(f"Extractor could not connect to CDF {e!s}")
422
+ exception_to_report = e
423
+ requeue = True
424
+
425
+ if exception_to_report is not None and self._on_fatal_error is not None:
426
+ self._on_fatal_error(exception_to_report)
427
+
428
+ return requeue
@@ -90,7 +90,7 @@ class Error:
90
90
  self.end_time = self.start_time
91
91
 
92
92
  # Re-add in case the error has already been reported and dict cleared
93
- self._extractor._report_error(self)
93
+ self._extractor._try_report_error(self)
94
94
 
95
95
  def finish(self) -> None:
96
96
  """
@@ -105,7 +105,7 @@ class Error:
105
105
  self.end_time = now()
106
106
 
107
107
  # Re-add in case the error has already been reported and dict cleared
108
- self._extractor._report_error(self)
108
+ self._extractor._try_report_error(self)
109
109
 
110
110
  def __enter__(self) -> "Error":
111
111
  """
@@ -5,8 +5,12 @@ This class is subclassed by both the ``TaskContext`` and the ``Extractor`` base
5
5
  for logging and error handling in extractors.
6
6
  """
7
7
 
8
+ import datetime
9
+ import os
8
10
  from abc import ABC, abstractmethod
9
11
  from logging import Logger, getLogger
12
+ from logging.handlers import TimedRotatingFileHandler
13
+ from pathlib import Path
10
14
  from traceback import format_exception
11
15
  from typing import Literal
12
16
 
@@ -296,3 +300,48 @@ class CogniteLogger(ABC):
296
300
  description=message,
297
301
  details=details,
298
302
  ).instant()
303
+
304
+
305
+ class RobustFileHandler(TimedRotatingFileHandler):
306
+ """
307
+ A TimedRotatingFileHandler that gracefully handles directory/permission issues.
308
+
309
+ It can automatically create log directories and raise error to fallback to console logging
310
+ if the file cannot be created or accessed.
311
+ """
312
+
313
+ def __init__(
314
+ self,
315
+ filename: Path,
316
+ create_dirs: bool = True,
317
+ when: str = "h",
318
+ interval: int = 1,
319
+ backupCount: int = 0,
320
+ encoding: str | None = None,
321
+ delay: bool = False,
322
+ utc: bool = False,
323
+ atTime: datetime.time | None = None,
324
+ errors: str | None = None,
325
+ ) -> None:
326
+ self.create_dirs = create_dirs
327
+
328
+ if self.create_dirs:
329
+ directory = filename.parent
330
+ directory.mkdir(parents=True, exist_ok=True)
331
+ if not os.access(directory, os.W_OK):
332
+ raise PermissionError(f"Cannot write to directory: {directory}")
333
+
334
+ super().__init__(
335
+ filename,
336
+ when=when,
337
+ interval=interval,
338
+ backupCount=backupCount,
339
+ encoding=encoding,
340
+ delay=delay,
341
+ utc=utc,
342
+ atTime=atTime,
343
+ errors=errors,
344
+ )
345
+
346
+ self.stream.write("")
347
+ self.stream.flush()