cognite-extractor-utils 7.4.9__tar.gz → 7.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

Files changed (47) hide show
  1. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/PKG-INFO +1 -1
  2. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/__init__.py +1 -1
  3. cognite_extractor_utils-7.5.0/cognite/extractorutils/unstable/core/_dto.py +34 -0
  4. cognite_extractor_utils-7.5.0/cognite/extractorutils/unstable/core/base.py +203 -0
  5. cognite_extractor_utils-7.5.0/cognite/extractorutils/unstable/core/tasks.py +29 -0
  6. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/unstable/scheduling/_scheduler.py +2 -0
  7. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/util.py +112 -0
  8. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/pyproject.toml +3 -3
  9. cognite_extractor_utils-7.4.9/cognite/extractorutils/unstable/core/base.py +0 -116
  10. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/LICENSE +0 -0
  11. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/README.md +0 -0
  12. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/_inner_util.py +0 -0
  13. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/base.py +0 -0
  14. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/configtools/__init__.py +0 -0
  15. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/configtools/_util.py +0 -0
  16. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/configtools/elements.py +0 -0
  17. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/configtools/loaders.py +0 -0
  18. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/configtools/validators.py +0 -0
  19. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/exceptions.py +0 -0
  20. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/metrics.py +0 -0
  21. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/py.typed +0 -0
  22. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/statestore/__init__.py +0 -0
  23. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/statestore/_base.py +0 -0
  24. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/statestore/hashing.py +0 -0
  25. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/statestore/watermark.py +0 -0
  26. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/threading.py +0 -0
  27. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/unstable/__init__.py +0 -0
  28. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/unstable/configuration/__init__.py +0 -0
  29. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/unstable/configuration/loaders.py +0 -0
  30. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/unstable/configuration/models.py +0 -0
  31. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/unstable/core/__init__.py +0 -0
  32. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/unstable/core/__main__.py +0 -0
  33. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/unstable/core/_messaging.py +0 -0
  34. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/unstable/core/runtime.py +0 -0
  35. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/unstable/scheduling/__init__.py +0 -0
  36. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/unstable/scheduling/_schedules.py +0 -0
  37. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/uploader/__init__.py +0 -0
  38. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/uploader/_base.py +0 -0
  39. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/uploader/_metrics.py +0 -0
  40. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/uploader/assets.py +0 -0
  41. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/uploader/data_modeling.py +0 -0
  42. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/uploader/events.py +0 -0
  43. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/uploader/files.py +0 -0
  44. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/uploader/raw.py +0 -0
  45. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/uploader/time_series.py +0 -0
  46. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/uploader_extractor.py +0 -0
  47. {cognite_extractor_utils-7.4.9 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/uploader_types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cognite-extractor-utils
3
- Version: 7.4.9
3
+ Version: 7.5.0
4
4
  Summary: Utilities for easier development of extractors for CDF
5
5
  Home-page: https://github.com/cognitedata/python-extractor-utils
6
6
  License: Apache-2.0
@@ -16,5 +16,5 @@
16
16
  Cognite extractor utils is a Python package that simplifies the development of new extractors.
17
17
  """
18
18
 
19
- __version__ = "7.4.9"
19
+ __version__ = "7.5.0"
20
20
  from .base import Extractor
@@ -0,0 +1,34 @@
1
+ """
2
+ Temporary holding place for DTOs against Extraction Pipelines 2.0 until it's in the SDK
3
+ """
4
+
5
+ from typing import Any, Literal
6
+
7
+ from humps import camelize
8
+ from pydantic import BaseModel, ConfigDict
9
+
10
+
11
+ class CogniteModel(BaseModel):
12
+ """
13
+ Base class for DTO classes based on pydantic, but with a few tweaks to make it inline with the CDF API guidelines:
14
+ * camelCase instead of snake_case when serializing/deserializing into/from JSON
15
+ * exclude Nones from serialized JSON instead of having nulls in the response text
16
+ """
17
+
18
+ def model_dump(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
19
+ if kwargs:
20
+ kwargs["exclude_none"] = True
21
+ else:
22
+ kwargs = {"exclude_none": True}
23
+ return BaseModel.model_dump(self, *args, **kwargs)
24
+
25
+ def dict(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
26
+ return self.model_dump(*args, **kwargs)
27
+
28
+ model_config = ConfigDict(alias_generator=camelize, populate_by_name=True, extra="forbid")
29
+
30
+
31
+ class TaskUpdate(CogniteModel):
32
+ type: Literal["started"] | Literal["ended"]
33
+ name: str
34
+ timestamp: int
@@ -0,0 +1,203 @@
1
+ import logging
2
+ from concurrent.futures import ThreadPoolExecutor
3
+ from multiprocessing import Queue
4
+ from threading import RLock, Thread
5
+ from types import TracebackType
6
+ from typing import Generic, Literal, Optional, Type, TypeVar, Union
7
+
8
+ from humps import pascalize
9
+ from typing_extensions import Self, assert_never
10
+
11
+ from cognite.extractorutils.threading import CancellationToken
12
+ from cognite.extractorutils.unstable.configuration.models import ConnectionConfig, ExtractorConfig
13
+ from cognite.extractorutils.unstable.core._dto import TaskUpdate
14
+ from cognite.extractorutils.unstable.core._messaging import RuntimeMessage
15
+ from cognite.extractorutils.unstable.core.tasks import ContinuousTask, ScheduledTask, StartupTask, Task
16
+ from cognite.extractorutils.unstable.scheduling import TaskScheduler
17
+ from cognite.extractorutils.util import now
18
+
19
+ ConfigType = TypeVar("ConfigType", bound=ExtractorConfig)
20
+ ConfigRevision = Union[Literal["local"], int]
21
+
22
+
23
+ class Extractor(Generic[ConfigType]):
24
+ NAME: str
25
+ EXTERNAL_ID: str
26
+ DESCRIPTION: str
27
+ VERSION: str
28
+
29
+ CONFIG_TYPE: Type[ConfigType]
30
+
31
+ def __init__(
32
+ self,
33
+ connection_config: ConnectionConfig,
34
+ application_config: ConfigType,
35
+ current_config_revision: ConfigRevision,
36
+ ) -> None:
37
+ self.cancellation_token = CancellationToken()
38
+ self.cancellation_token.cancel_on_interrupt()
39
+
40
+ self.connection_config = connection_config
41
+ self.application_config = application_config
42
+ self.current_config_revision = current_config_revision
43
+
44
+ self.cognite_client = self.connection_config.get_cognite_client(f"{self.EXTERNAL_ID}-{self.VERSION}")
45
+
46
+ self._checkin_lock = RLock()
47
+ self._runtime_messages: Optional[Queue[RuntimeMessage]] = None
48
+
49
+ self._scheduler = TaskScheduler(self.cancellation_token.create_child_token())
50
+
51
+ self._tasks: list[Task] = []
52
+ self._task_updates: list[TaskUpdate] = []
53
+
54
+ self.logger = logging.getLogger(f"{self.EXTERNAL_ID}.main")
55
+
56
+ def _set_runtime_message_queue(self, queue: Queue) -> None:
57
+ self._runtime_messages = queue
58
+
59
+ def _checkin(self) -> None:
60
+ with self._checkin_lock:
61
+ task_updates = [t.model_dump() for t in self._task_updates]
62
+ self._task_updates.clear()
63
+
64
+ res = self.cognite_client.post(
65
+ f"/api/v1/projects/{self.cognite_client.config.project}/odin/checkin",
66
+ json={
67
+ "externalId": self.connection_config.extraction_pipeline,
68
+ "taskEvents": task_updates,
69
+ },
70
+ headers={"cdf-version": "alpha"},
71
+ )
72
+ new_config_revision = res.json().get("lastConfigRevision")
73
+
74
+ if new_config_revision and new_config_revision != self.current_config_revision:
75
+ self.restart()
76
+
77
+ def _run_checkin(self) -> None:
78
+ while not self.cancellation_token.is_cancelled:
79
+ try:
80
+ self.logger.debug("Running checkin")
81
+ self._checkin()
82
+ except Exception:
83
+ self.logger.exception("Error during checkin")
84
+ self.cancellation_token.wait(10)
85
+
86
+ def restart(self) -> None:
87
+ if self._runtime_messages:
88
+ self._runtime_messages.put(RuntimeMessage.RESTART)
89
+ self.cancellation_token.cancel()
90
+
91
+ @classmethod
92
+ def init_from_runtime(
93
+ cls,
94
+ connection_config: ConnectionConfig,
95
+ application_config: ConfigType,
96
+ current_config_revision: ConfigRevision,
97
+ ) -> Self:
98
+ return cls(connection_config, application_config, current_config_revision)
99
+
100
+ def add_task(self, task: Task) -> None:
101
+ target = task.target
102
+
103
+ def wrapped() -> None:
104
+ with self._checkin_lock:
105
+ self._task_updates.append(
106
+ TaskUpdate(type="started", name=task.name, timestamp=now()),
107
+ )
108
+
109
+ try:
110
+ target()
111
+
112
+ finally:
113
+ with self._checkin_lock:
114
+ self._task_updates.append(
115
+ TaskUpdate(type="ended", name=task.name, timestamp=now()),
116
+ )
117
+
118
+ task.target = wrapped
119
+ self._tasks.append(task)
120
+
121
+ match task:
122
+ case ScheduledTask() as t:
123
+ self._scheduler.schedule_task(name=t.name, schedule=t.schedule, task=t.target)
124
+
125
+ def _report_extractor_info(self) -> None:
126
+ self.cognite_client.post(
127
+ f"/api/v1/projects/{self.cognite_client.config.project}/odin/extractorinfo",
128
+ json={
129
+ "externalId": self.connection_config.extraction_pipeline,
130
+ "activeConfigRevision": self.current_config_revision,
131
+ "extractor": {
132
+ "version": self.VERSION,
133
+ "externalId": self.EXTERNAL_ID,
134
+ },
135
+ "tasks": [
136
+ {
137
+ "name": t.name,
138
+ "type": "continuous" if isinstance(t, ContinuousTask) else "batch",
139
+ }
140
+ for t in self._tasks
141
+ ],
142
+ },
143
+ headers={"cdf-version": "alpha"},
144
+ )
145
+
146
+ def start(self) -> None:
147
+ self._report_extractor_info()
148
+ Thread(target=self._run_checkin, name="ExtractorCheckin", daemon=True).start()
149
+
150
+ def stop(self) -> None:
151
+ self.cancellation_token.cancel()
152
+
153
+ def __enter__(self) -> Self:
154
+ self.start()
155
+ return self
156
+
157
+ def __exit__(
158
+ self,
159
+ exc_type: Optional[Type[BaseException]],
160
+ exc_val: Optional[BaseException],
161
+ exc_tb: Optional[TracebackType],
162
+ ) -> bool:
163
+ self.stop()
164
+ with self._checkin_lock:
165
+ self._checkin()
166
+
167
+ return exc_val is None
168
+
169
+ def run(self) -> None:
170
+ has_scheduled = False
171
+
172
+ startup: list[StartupTask] = []
173
+ continuous: list[ContinuousTask] = []
174
+
175
+ for task in self._tasks:
176
+ match task:
177
+ case ScheduledTask():
178
+ has_scheduled = True
179
+
180
+ case StartupTask() as t:
181
+ startup.append(t)
182
+
183
+ case ContinuousTask() as t:
184
+ continuous.append(t)
185
+
186
+ case _:
187
+ assert_never(task)
188
+
189
+ self.logger.info("Starting up extractor")
190
+ if startup:
191
+ with ThreadPoolExecutor() as pool:
192
+ for task in startup:
193
+ pool.submit(task.target)
194
+ self.logger.info("Startup done")
195
+
196
+ for task in continuous:
197
+ Thread(name=pascalize(task.name), target=task.target).start()
198
+
199
+ if has_scheduled:
200
+ self._scheduler.run()
201
+
202
+ else:
203
+ self.cancellation_token.wait()
@@ -0,0 +1,29 @@
1
+ from abc import ABC
2
+ from dataclasses import dataclass
3
+ from typing import Callable
4
+
5
+ from cognite.extractorutils.unstable.configuration.models import ScheduleConfig
6
+
7
+
8
+ @dataclass
9
+ class _Task(ABC):
10
+ name: str
11
+ target: Callable[[], None]
12
+
13
+
14
+ @dataclass
15
+ class ScheduledTask(_Task):
16
+ schedule: ScheduleConfig
17
+
18
+
19
+ @dataclass
20
+ class ContinuousTask(_Task):
21
+ pass
22
+
23
+
24
+ class StartupTask(_Task):
25
+ pass
26
+
27
+
28
+ # Making a type union to help with exhaustion checks in matches
29
+ Task = ScheduledTask | ContinuousTask | StartupTask
@@ -42,6 +42,8 @@ class TaskScheduler:
42
42
  parsed_schedule = IntervalSchedule(interval=interval_config.expression.seconds)
43
43
 
44
44
  with self._jobs_lock:
45
+ if name in self._jobs:
46
+ raise KeyError(f"Job '{name}' is already added to the scheduler")
45
47
  self._jobs[name] = Job(name=name, call=task, schedule=parsed_schedule)
46
48
 
47
49
  def _get_next(self) -> list[Job]:
@@ -17,10 +17,12 @@ The ``util`` package contains miscellaneous functions and classes that can some
17
17
  extractors.
18
18
  """
19
19
 
20
+ import io
20
21
  import logging
21
22
  import random
22
23
  from datetime import datetime, timezone
23
24
  from functools import partial, wraps
25
+ from io import RawIOBase
24
26
  from threading import Thread
25
27
  from time import time
26
28
  from typing import Any, Callable, Dict, Generator, Iterable, List, Optional, Tuple, Type, TypeVar, Union
@@ -510,3 +512,113 @@ def datetime_to_timestamp(dt: datetime) -> int:
510
512
 
511
513
  def timestamp_to_datetime(ts: int) -> datetime:
512
514
  return datetime.fromtimestamp(ts / 1000, tz=timezone.utc)
515
+
516
+
517
+ def now() -> int:
518
+ """
519
+ Current time in CDF format (milliseonds since 1970-01-01 00:00:00 UTC)
520
+ """
521
+ return int(time() * 1000)
522
+
523
+
524
+ def truncate_byte_len(item: str, ln: int) -> str:
525
+ """Safely truncate an arbitrary utf-8 string.
526
+ Used to sanitize metadata.
527
+
528
+ Args:
529
+ item (str): string to be truncated
530
+ ln (int): length (bytes)
531
+
532
+ Returns:
533
+ str: truncated string
534
+ """
535
+
536
+ bts = item.encode("utf-8")
537
+ if len(bts) <= ln:
538
+ return item
539
+ bts = bts[:ln]
540
+ last_codepoint_index = len(bts) - 1
541
+ # Find the last byte that's the start of an UTF-8 codepoint
542
+ while last_codepoint_index > 0 and (bts[last_codepoint_index] & 0b11000000) == 0b10000000:
543
+ last_codepoint_index -= 1
544
+
545
+ last_codepoint_start = bts[last_codepoint_index]
546
+ last_codepoint_len = 0
547
+ if last_codepoint_start & 0b11111000 == 0b11110000:
548
+ last_codepoint_len = 4
549
+ elif last_codepoint_start & 0b11110000 == 0b11100000:
550
+ last_codepoint_len = 3
551
+ elif last_codepoint_start & 0b11100000 == 0b11000000:
552
+ last_codepoint_len = 2
553
+ elif last_codepoint_start & 0b10000000 == 0:
554
+ last_codepoint_len = 1
555
+ else:
556
+ if last_codepoint_index - 2 <= 0:
557
+ return ""
558
+ # Somehow a longer codepoint? In this case just use the previous codepoint.
559
+ return bts[: (last_codepoint_index - 2)].decode("utf-8")
560
+
561
+ last_codepoint_end_index = last_codepoint_index + last_codepoint_len - 1
562
+ if last_codepoint_end_index > ln - 1:
563
+ if last_codepoint_index - 2 <= 0:
564
+ return ""
565
+ # We're in the middle of a codepoint, cut to the previous one
566
+ return bts[:last_codepoint_index].decode("utf-8")
567
+ else:
568
+ return bts.decode("utf-8")
569
+
570
+
571
+ class BufferedReadWithLength(io.BufferedReader):
572
+ def __init__(
573
+ self, raw: RawIOBase, buffer_size: int, len: int, on_close: Optional[Callable[[], None]] = None
574
+ ) -> None:
575
+ super().__init__(raw, buffer_size)
576
+ # Do not remove even if it appears to be unused. :P
577
+ # Requests uses this to add the content-length header, which is necessary for writing to files in azure clusters
578
+ self.len = len
579
+ self.on_close = on_close
580
+
581
+ def close(self) -> None:
582
+ if self.on_close:
583
+ self.on_close()
584
+ return super().close()
585
+
586
+
587
+ def iterable_to_stream(
588
+ iterator: Iterable[bytes],
589
+ file_size_bytes: int,
590
+ buffer_size: int = io.DEFAULT_BUFFER_SIZE,
591
+ on_close: Optional[Callable[[], None]] = None,
592
+ ) -> BufferedReadWithLength:
593
+ class ChunkIteratorStream(io.RawIOBase):
594
+ def __init__(self) -> None:
595
+ self.last_chunk = None
596
+ self.loaded_bytes = 0
597
+ self.file_size_bytes = file_size_bytes
598
+
599
+ def tell(self) -> int:
600
+ return self.loaded_bytes
601
+
602
+ def __len__(self) -> int:
603
+ return self.file_size_bytes
604
+
605
+ def readable(self) -> bool:
606
+ return True
607
+
608
+ def readinto(self, buffer: Any) -> int | None:
609
+ try:
610
+ # Bytes to return
611
+ ln = len(buffer)
612
+ chunk = self.last_chunk or next(iterator) # type: ignore
613
+ output, self.last_chunk = chunk[:ln], chunk[ln:]
614
+ if len(self.last_chunk) == 0: # type: ignore
615
+ self.last_chunk = None
616
+ buffer[: len(output)] = output
617
+ self.loaded_bytes += len(output)
618
+ return len(output)
619
+ except StopIteration:
620
+ return 0
621
+
622
+ return BufferedReadWithLength(
623
+ ChunkIteratorStream(), buffer_size=buffer_size, len=file_size_bytes, on_close=on_close
624
+ )
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "cognite-extractor-utils"
3
- version = "7.4.9"
3
+ version = "7.5.0"
4
4
  description = "Utilities for easier development of extractors for CDF"
5
5
  authors = ["Mathias Lohne <mathias.lohne@cognite.com>"]
6
6
  license = "Apache-2.0"
@@ -85,8 +85,8 @@ ruff = "^0.6.0"
85
85
  pytest = "^8.0.0"
86
86
  pytest-cov = "^5.0.0"
87
87
  sphinx = "^7.0.0"
88
- sphinx-rtd-theme = "^2.0.0"
89
- pre-commit = "^3.3.0"
88
+ sphinx-rtd-theme = "^3.0.0"
89
+ pre-commit = "^4.0.0"
90
90
  SecretStorage = "^3.1.2"
91
91
  twine = "^5.0.0"
92
92
  pytest-order = "^1.0.1"
@@ -1,116 +0,0 @@
1
- import logging
2
- from multiprocessing import Queue
3
- from threading import RLock, Thread
4
- from types import TracebackType
5
- from typing import Generic, Literal, Optional, Type, TypeVar, Union
6
-
7
- from typing_extensions import Self
8
-
9
- from cognite.extractorutils.threading import CancellationToken
10
- from cognite.extractorutils.unstable.configuration.models import ConnectionConfig, ExtractorConfig
11
- from cognite.extractorutils.unstable.core._messaging import RuntimeMessage
12
-
13
- ConfigType = TypeVar("ConfigType", bound=ExtractorConfig)
14
- ConfigRevision = Union[Literal["local"], int]
15
-
16
-
17
- class Extractor(Generic[ConfigType]):
18
- NAME: str
19
- EXTERNAL_ID: str
20
- DESCRIPTION: str
21
- VERSION: str
22
-
23
- CONFIG_TYPE: Type[ConfigType]
24
-
25
- def __init__(
26
- self,
27
- connection_config: ConnectionConfig,
28
- application_config: ConfigType,
29
- current_config_revision: ConfigRevision,
30
- ) -> None:
31
- self.cancellation_token = CancellationToken()
32
- self.cancellation_token.cancel_on_interrupt()
33
-
34
- self.connection_config = connection_config
35
- self.application_config = application_config
36
- self.current_config_revision = current_config_revision
37
-
38
- self.cognite_client = self.connection_config.get_cognite_client(f"{self.EXTERNAL_ID}-{self.VERSION}")
39
-
40
- self._checkin_lock = RLock()
41
- self._runtime_messages: Optional[Queue[RuntimeMessage]] = None
42
-
43
- self.logger = logging.getLogger(f"{self.EXTERNAL_ID}.main")
44
-
45
- def _set_runtime_message_queue(self, queue: Queue) -> None:
46
- self._runtime_messages = queue
47
-
48
- def _run_checkin(self) -> None:
49
- def checkin() -> None:
50
- body = {"externalId": self.connection_config.extraction_pipeline}
51
-
52
- with self._checkin_lock:
53
- res = self.cognite_client.post(
54
- f"/api/v1/projects/{self.cognite_client.config.project}/odin/checkin",
55
- json=body,
56
- headers={"cdf-version": "alpha"},
57
- )
58
- new_config_revision = res.json().get("lastConfigRevision")
59
-
60
- if new_config_revision and new_config_revision != self.current_config_revision:
61
- self.restart()
62
-
63
- while not self.cancellation_token.is_cancelled:
64
- try:
65
- checkin()
66
- except Exception:
67
- self.logger.exception("Error during checkin")
68
- self.cancellation_token.wait(10)
69
-
70
- def restart(self) -> None:
71
- if self._runtime_messages:
72
- self._runtime_messages.put(RuntimeMessage.RESTART)
73
- self.cancellation_token.cancel()
74
-
75
- @classmethod
76
- def init_from_runtime(
77
- cls,
78
- connection_config: ConnectionConfig,
79
- application_config: ConfigType,
80
- current_config_revision: ConfigRevision,
81
- ) -> Self:
82
- return cls(connection_config, application_config, current_config_revision)
83
-
84
- def start(self) -> None:
85
- self.cognite_client.post(
86
- f"/api/v1/projects/{self.cognite_client.config.project}/odin/extractorinfo",
87
- json={
88
- "externalId": self.connection_config.extraction_pipeline,
89
- "activeConfigRevision": self.current_config_revision,
90
- "extractor": {
91
- "version": self.VERSION,
92
- "externalId": self.EXTERNAL_ID,
93
- },
94
- },
95
- headers={"cdf-version": "alpha"},
96
- )
97
- Thread(target=self._run_checkin, name="ExtractorCheckin", daemon=True).start()
98
-
99
- def stop(self) -> None:
100
- self.cancellation_token.cancel()
101
-
102
- def __enter__(self) -> Self:
103
- self.start()
104
- return self
105
-
106
- def __exit__(
107
- self,
108
- exc_type: Optional[Type[BaseException]],
109
- exc_val: Optional[BaseException],
110
- exc_tb: Optional[TracebackType],
111
- ) -> bool:
112
- self.stop()
113
- return exc_val is None
114
-
115
- def run(self) -> None:
116
- raise NotImplementedError()