cognite-extractor-utils 7.5.4__py3-none-any.whl → 7.5.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

Files changed (42) hide show
  1. cognite/extractorutils/__init__.py +3 -1
  2. cognite/extractorutils/_inner_util.py +14 -3
  3. cognite/extractorutils/base.py +14 -15
  4. cognite/extractorutils/configtools/__init__.py +25 -0
  5. cognite/extractorutils/configtools/_util.py +7 -9
  6. cognite/extractorutils/configtools/elements.py +58 -49
  7. cognite/extractorutils/configtools/loaders.py +29 -26
  8. cognite/extractorutils/configtools/validators.py +2 -3
  9. cognite/extractorutils/exceptions.py +1 -4
  10. cognite/extractorutils/metrics.py +18 -18
  11. cognite/extractorutils/statestore/_base.py +3 -4
  12. cognite/extractorutils/statestore/hashing.py +24 -24
  13. cognite/extractorutils/statestore/watermark.py +17 -14
  14. cognite/extractorutils/threading.py +4 -4
  15. cognite/extractorutils/unstable/configuration/exceptions.py +24 -0
  16. cognite/extractorutils/unstable/configuration/loaders.py +18 -7
  17. cognite/extractorutils/unstable/configuration/models.py +25 -3
  18. cognite/extractorutils/unstable/core/_dto.py +10 -0
  19. cognite/extractorutils/unstable/core/base.py +179 -29
  20. cognite/extractorutils/unstable/core/errors.py +72 -0
  21. cognite/extractorutils/unstable/core/restart_policy.py +29 -0
  22. cognite/extractorutils/unstable/core/runtime.py +170 -26
  23. cognite/extractorutils/unstable/core/tasks.py +2 -0
  24. cognite/extractorutils/unstable/scheduling/_scheduler.py +4 -4
  25. cognite/extractorutils/uploader/__init__.py +14 -0
  26. cognite/extractorutils/uploader/_base.py +8 -8
  27. cognite/extractorutils/uploader/assets.py +15 -9
  28. cognite/extractorutils/uploader/data_modeling.py +13 -13
  29. cognite/extractorutils/uploader/events.py +9 -9
  30. cognite/extractorutils/uploader/files.py +153 -46
  31. cognite/extractorutils/uploader/raw.py +10 -10
  32. cognite/extractorutils/uploader/time_series.py +56 -58
  33. cognite/extractorutils/uploader/upload_failure_handler.py +64 -0
  34. cognite/extractorutils/uploader_extractor.py +11 -11
  35. cognite/extractorutils/uploader_types.py +4 -12
  36. cognite/extractorutils/util.py +21 -23
  37. {cognite_extractor_utils-7.5.4.dist-info → cognite_extractor_utils-7.5.6.dist-info}/METADATA +4 -3
  38. cognite_extractor_utils-7.5.6.dist-info/RECORD +49 -0
  39. {cognite_extractor_utils-7.5.4.dist-info → cognite_extractor_utils-7.5.6.dist-info}/WHEEL +1 -1
  40. cognite/extractorutils/unstable/core/__main__.py +0 -31
  41. cognite_extractor_utils-7.5.4.dist-info/RECORD +0 -46
  42. {cognite_extractor_utils-7.5.4.dist-info → cognite_extractor_utils-7.5.6.dist-info}/LICENSE +0 -0
@@ -1,23 +1,56 @@
1
1
  import logging
2
+ import logging.config
3
+ import time
2
4
  from concurrent.futures import ThreadPoolExecutor
5
+ from contextvars import ContextVar, Token
6
+ from logging.handlers import TimedRotatingFileHandler
3
7
  from multiprocessing import Queue
4
8
  from threading import RLock, Thread
9
+ from traceback import format_exception
5
10
  from types import TracebackType
6
- from typing import Generic, Literal, Optional, Type, TypeVar, Union
11
+ from typing import Generic, Literal, Type, TypeVar
7
12
 
8
13
  from humps import pascalize
9
14
  from typing_extensions import Self, assert_never
10
15
 
16
+ from cognite.extractorutils._inner_util import _resolve_log_level
11
17
  from cognite.extractorutils.threading import CancellationToken
12
- from cognite.extractorutils.unstable.configuration.models import ConnectionConfig, ExtractorConfig
18
+ from cognite.extractorutils.unstable.configuration.models import (
19
+ ConnectionConfig,
20
+ ExtractorConfig,
21
+ LogConsoleHandlerConfig,
22
+ LogFileHandlerConfig,
23
+ )
24
+ from cognite.extractorutils.unstable.core._dto import Error as DtoError
13
25
  from cognite.extractorutils.unstable.core._dto import TaskUpdate
14
26
  from cognite.extractorutils.unstable.core._messaging import RuntimeMessage
27
+ from cognite.extractorutils.unstable.core.errors import Error, ErrorLevel
28
+ from cognite.extractorutils.unstable.core.restart_policy import WHEN_CONTINUOUS_TASKS_CRASHES, RestartPolicy
15
29
  from cognite.extractorutils.unstable.core.tasks import ContinuousTask, ScheduledTask, StartupTask, Task
16
30
  from cognite.extractorutils.unstable.scheduling import TaskScheduler
17
31
  from cognite.extractorutils.util import now
18
32
 
33
+ __all__ = ["ConfigType", "ConfigRevision", "Extractor"]
34
+
19
35
  ConfigType = TypeVar("ConfigType", bound=ExtractorConfig)
20
- ConfigRevision = Union[Literal["local"], int]
36
+ ConfigRevision = Literal["local"] | int
37
+
38
+
39
+ _T = TypeVar("_T", bound=ExtractorConfig)
40
+
41
+
42
+ class FullConfig(Generic[_T]):
43
+ def __init__(
44
+ self,
45
+ connection_config: ConnectionConfig,
46
+ application_config: _T,
47
+ current_config_revision: ConfigRevision,
48
+ newest_config_revision: ConfigRevision,
49
+ ) -> None:
50
+ self.connection_config = connection_config
51
+ self.application_config = application_config
52
+ self.current_config_revision = current_config_revision
53
+ self.newest_config_revision = newest_config_revision
21
54
 
22
55
 
23
56
  class Extractor(Generic[ConfigType]):
@@ -28,31 +61,80 @@ class Extractor(Generic[ConfigType]):
28
61
 
29
62
  CONFIG_TYPE: Type[ConfigType]
30
63
 
31
- def __init__(
32
- self,
33
- connection_config: ConnectionConfig,
34
- application_config: ConfigType,
35
- current_config_revision: ConfigRevision,
36
- ) -> None:
64
+ RESTART_POLICY: RestartPolicy = WHEN_CONTINUOUS_TASKS_CRASHES
65
+
66
+ def __init__(self, config: FullConfig[ConfigType]) -> None:
37
67
  self.cancellation_token = CancellationToken()
38
68
  self.cancellation_token.cancel_on_interrupt()
39
69
 
40
- self.connection_config = connection_config
41
- self.application_config = application_config
42
- self.current_config_revision = current_config_revision
70
+ self.connection_config = config.connection_config
71
+ self.application_config = config.application_config
72
+ self.current_config_revision = config.current_config_revision
73
+ self.newest_config_revision = config.newest_config_revision
43
74
 
44
75
  self.cognite_client = self.connection_config.get_cognite_client(f"{self.EXTERNAL_ID}-{self.VERSION}")
45
76
 
46
77
  self._checkin_lock = RLock()
47
- self._runtime_messages: Optional[Queue[RuntimeMessage]] = None
78
+ self._runtime_messages: Queue[RuntimeMessage] | None = None
48
79
 
49
80
  self._scheduler = TaskScheduler(self.cancellation_token.create_child_token())
50
81
 
51
82
  self._tasks: list[Task] = []
52
83
  self._task_updates: list[TaskUpdate] = []
84
+ self._errors: dict[str, Error] = {}
53
85
 
54
86
  self.logger = logging.getLogger(f"{self.EXTERNAL_ID}.main")
55
87
 
88
+ self._current_task: ContextVar[str | None] = ContextVar("current_task", default=None)
89
+
90
+ self.__init_tasks__()
91
+
92
+ def _setup_logging(self) -> None:
93
+ min_level = min([_resolve_log_level(h.level.value) for h in self.application_config.log_handlers])
94
+ max_level = max([_resolve_log_level(h.level.value) for h in self.application_config.log_handlers])
95
+
96
+ root = logging.getLogger()
97
+ root.setLevel(min_level)
98
+
99
+ # The oathlib logs too much on debug level, including secrets
100
+ logging.getLogger("requests_oauthlib.oauth2_session").setLevel(max(max_level, logging.INFO))
101
+
102
+ fmt = logging.Formatter(
103
+ "%(asctime)s.%(msecs)03d UTC [%(levelname)-8s] %(process)d %(threadName)s - %(message)s",
104
+ "%Y-%m-%d %H:%M:%S",
105
+ )
106
+ # Set logging to UTC
107
+ fmt.converter = time.gmtime
108
+
109
+ # Remove any previous logging handlers
110
+ for handler in root.handlers:
111
+ root.removeHandler(handler)
112
+
113
+ # Define new handlers
114
+ for handler_config in self.application_config.log_handlers:
115
+ match handler_config:
116
+ case LogConsoleHandlerConfig() as console_handler:
117
+ sh = logging.StreamHandler()
118
+ sh.setFormatter(fmt)
119
+ sh.setLevel(_resolve_log_level(console_handler.level.value))
120
+
121
+ root.addHandler(sh)
122
+
123
+ case LogFileHandlerConfig() as file_handler:
124
+ fh = TimedRotatingFileHandler(
125
+ filename=file_handler.path,
126
+ when="midnight",
127
+ utc=True,
128
+ backupCount=file_handler.retention,
129
+ )
130
+ fh.setLevel(_resolve_log_level(file_handler.level.value))
131
+ fh.setFormatter(fmt)
132
+
133
+ root.addHandler(fh)
134
+
135
+ def __init_tasks__(self) -> None:
136
+ pass
137
+
56
138
  def _set_runtime_message_queue(self, queue: Queue) -> None:
57
139
  self._runtime_messages = queue
58
140
 
@@ -61,17 +143,36 @@ class Extractor(Generic[ConfigType]):
61
143
  task_updates = [t.model_dump() for t in self._task_updates]
62
144
  self._task_updates.clear()
63
145
 
146
+ error_updates = [
147
+ DtoError(
148
+ external_id=e.external_id,
149
+ level=e.level.value,
150
+ description=e.description,
151
+ details=e.details,
152
+ start_time=e.start_time,
153
+ end_time=e.end_time,
154
+ task=e._task_name if e._task_name is not None else None,
155
+ ).model_dump()
156
+ for e in self._errors.values()
157
+ ]
158
+ self._errors.clear()
159
+
64
160
  res = self.cognite_client.post(
65
161
  f"/api/v1/projects/{self.cognite_client.config.project}/odin/checkin",
66
162
  json={
67
- "externalId": self.connection_config.extraction_pipeline,
163
+ "externalId": self.connection_config.integration,
68
164
  "taskEvents": task_updates,
165
+ "errors": error_updates,
69
166
  },
70
167
  headers={"cdf-version": "alpha"},
71
168
  )
72
169
  new_config_revision = res.json().get("lastConfigRevision")
73
170
 
74
- if new_config_revision and new_config_revision != self.current_config_revision:
171
+ if (
172
+ new_config_revision
173
+ and self.current_config_revision != "local"
174
+ and new_config_revision > self.newest_config_revision
175
+ ):
75
176
  self.restart()
76
177
 
77
178
  def _run_checkin(self) -> None:
@@ -83,39 +184,86 @@ class Extractor(Generic[ConfigType]):
83
184
  self.logger.exception("Error during checkin")
84
185
  self.cancellation_token.wait(10)
85
186
 
187
+ def _report_error(self, error: Error) -> None:
188
+ with self._checkin_lock:
189
+ self._errors[error.external_id] = error
190
+
191
+ def error(
192
+ self,
193
+ level: ErrorLevel,
194
+ description: str,
195
+ details: str | None = None,
196
+ *,
197
+ force_global: bool = False,
198
+ ) -> Error:
199
+ task_name = self._current_task.get()
200
+
201
+ return Error(
202
+ level=level,
203
+ description=description,
204
+ details=details,
205
+ extractor=self,
206
+ task_name=None if force_global else task_name,
207
+ )
208
+
86
209
  def restart(self) -> None:
210
+ self.logger.info("Restarting extractor")
87
211
  if self._runtime_messages:
88
212
  self._runtime_messages.put(RuntimeMessage.RESTART)
89
213
  self.cancellation_token.cancel()
90
214
 
91
215
  @classmethod
92
- def init_from_runtime(
93
- cls,
94
- connection_config: ConnectionConfig,
95
- application_config: ConfigType,
96
- current_config_revision: ConfigRevision,
97
- ) -> Self:
98
- return cls(connection_config, application_config, current_config_revision)
216
+ def _init_from_runtime(cls, config: FullConfig[ConfigType]) -> Self:
217
+ return cls(config)
99
218
 
100
219
  def add_task(self, task: Task) -> None:
220
+ # Store this for later, since we'll override it with the wrapped version
101
221
  target = task.target
102
222
 
103
- def wrapped() -> None:
223
+ def run_task() -> None:
224
+ """
225
+ A wrapped version of the task's target, with tracking and error handling
226
+ """
227
+ # Record a task start
104
228
  with self._checkin_lock:
105
229
  self._task_updates.append(
106
230
  TaskUpdate(type="started", name=task.name, timestamp=now()),
107
231
  )
108
232
 
233
+ context_token: Token[str | None] | None = None
234
+
109
235
  try:
236
+ # Set the current task context var, used to track that we're in a task for error reporting
237
+ context_token = self._current_task.set(task.name)
238
+
239
+ # Run task
110
240
  target()
111
241
 
242
+ except Exception as e:
243
+ self.logger.exception(f"Unexpected error in {task.name}")
244
+
245
+ # Task crashed, record it as a fatal error
246
+ self.error(
247
+ ErrorLevel.fatal,
248
+ description="Task crashed unexpectedly",
249
+ details="".join(format_exception(e)),
250
+ ).instant()
251
+
252
+ if self.__class__.RESTART_POLICY(task, e):
253
+ self.restart()
254
+
112
255
  finally:
256
+ # Unset the current task
257
+ if context_token is not None:
258
+ self._current_task.reset(context_token)
259
+
260
+ # Record task end
113
261
  with self._checkin_lock:
114
262
  self._task_updates.append(
115
263
  TaskUpdate(type="ended", name=task.name, timestamp=now()),
116
264
  )
117
265
 
118
- task.target = wrapped
266
+ task.target = run_task
119
267
  self._tasks.append(task)
120
268
 
121
269
  match task:
@@ -126,7 +274,7 @@ class Extractor(Generic[ConfigType]):
126
274
  self.cognite_client.post(
127
275
  f"/api/v1/projects/{self.cognite_client.config.project}/odin/extractorinfo",
128
276
  json={
129
- "externalId": self.connection_config.extraction_pipeline,
277
+ "externalId": self.connection_config.integration,
130
278
  "activeConfigRevision": self.current_config_revision,
131
279
  "extractor": {
132
280
  "version": self.VERSION,
@@ -144,6 +292,7 @@ class Extractor(Generic[ConfigType]):
144
292
  )
145
293
 
146
294
  def start(self) -> None:
295
+ self._setup_logging()
147
296
  self._report_extractor_info()
148
297
  Thread(target=self._run_checkin, name="ExtractorCheckin", daemon=True).start()
149
298
 
@@ -156,14 +305,15 @@ class Extractor(Generic[ConfigType]):
156
305
 
157
306
  def __exit__(
158
307
  self,
159
- exc_type: Optional[Type[BaseException]],
160
- exc_val: Optional[BaseException],
161
- exc_tb: Optional[TracebackType],
308
+ exc_type: Type[BaseException] | None,
309
+ exc_val: BaseException | None,
310
+ exc_tb: TracebackType | None,
162
311
  ) -> bool:
163
312
  self.stop()
164
313
  with self._checkin_lock:
165
314
  self._checkin()
166
315
 
316
+ self.logger.info("Shutting down extractor")
167
317
  return exc_val is None
168
318
 
169
319
  def run(self) -> None:
@@ -186,7 +336,7 @@ class Extractor(Generic[ConfigType]):
186
336
  case _:
187
337
  assert_never(task)
188
338
 
189
- self.logger.info("Starting up extractor")
339
+ self.logger.info("Starting extractor")
190
340
  if startup:
191
341
  with ThreadPoolExecutor() as pool:
192
342
  for task in startup:
@@ -0,0 +1,72 @@
1
+ import typing
2
+ from enum import Enum
3
+ from types import TracebackType
4
+ from uuid import uuid4
5
+
6
+ from cognite.extractorutils.util import now
7
+
8
+ if typing.TYPE_CHECKING:
9
+ from .base import Extractor
10
+
11
+ __all__ = ["Error", "ErrorLevel"]
12
+
13
+
14
+ class ErrorLevel(Enum):
15
+ warning = "warning"
16
+ error = "error"
17
+ fatal = "fatal"
18
+
19
+
20
+ class Error:
21
+ def __init__(
22
+ self,
23
+ level: ErrorLevel,
24
+ description: str,
25
+ details: str | None,
26
+ task_name: str | None,
27
+ extractor: "Extractor",
28
+ ) -> None:
29
+ self.level = level
30
+ self.description = description
31
+ self.details = details
32
+
33
+ self.external_id = str(uuid4())
34
+ self.start_time = now()
35
+ self.end_time: int | None = None
36
+
37
+ self._extractor = extractor
38
+ self._task_name = task_name
39
+
40
+ self._extractor._report_error(self)
41
+
42
+ def instant(self) -> None:
43
+ # Only end the error once
44
+ if self.end_time is not None:
45
+ return
46
+
47
+ self.end_time = self.start_time
48
+
49
+ # Re-add in case the error has already been reported and dict cleared
50
+ self._extractor._report_error(self)
51
+
52
+ def finish(self) -> None:
53
+ # Only end the error once
54
+ if self.end_time is not None:
55
+ return
56
+
57
+ self.end_time = now()
58
+
59
+ # Re-add in case the error has already been reported and dict cleared
60
+ self._extractor._report_error(self)
61
+
62
+ def __enter__(self) -> "Error":
63
+ return self
64
+
65
+ def __exit__(
66
+ self,
67
+ exc_type: typing.Type[BaseException] | None,
68
+ exc_val: BaseException | None,
69
+ exc_tb: TracebackType | None,
70
+ ) -> bool:
71
+ self.finish()
72
+ return exc_val is None
@@ -0,0 +1,29 @@
1
+ from typing import Callable
2
+
3
+ from cognite.extractorutils.unstable.core.tasks import ContinuousTask, Task
4
+
5
+ RestartPolicy = Callable[[Task, Exception], bool]
6
+
7
+
8
+ def _false(_task: Task, _exception: Exception) -> bool:
9
+ return False
10
+
11
+
12
+ def _true(_task: Task, _exception: Exception) -> bool:
13
+ return True
14
+
15
+
16
+ def _is_continuous(task: Task, _exception: Exception) -> bool:
17
+ return isinstance(task, ContinuousTask)
18
+
19
+
20
+ NEVER = _false
21
+ WHEN_CONTINUOUS_TASKS_CRASHES = _is_continuous
22
+ WHEN_ANY_TASK_CRASHES = _true
23
+
24
+ __all__ = [
25
+ "RestartPolicy",
26
+ "NEVER",
27
+ "WHEN_CONTINUOUS_TASKS_CRASHES",
28
+ "WHEN_ANY_TASK_CRASHES",
29
+ ]