cognite-extractor-utils 7.4.8__tar.gz → 7.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/PKG-INFO +1 -1
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/__init__.py +1 -1
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/configtools/__init__.py +2 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/configtools/elements.py +40 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/configtools/loaders.py +3 -1
- cognite_extractor_utils-7.5.0/cognite/extractorutils/unstable/core/_dto.py +34 -0
- cognite_extractor_utils-7.5.0/cognite/extractorutils/unstable/core/base.py +203 -0
- cognite_extractor_utils-7.5.0/cognite/extractorutils/unstable/core/tasks.py +29 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/unstable/scheduling/_scheduler.py +2 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/util.py +112 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/pyproject.toml +3 -3
- cognite_extractor_utils-7.4.8/cognite/extractorutils/unstable/core/base.py +0 -116
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/LICENSE +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/README.md +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/_inner_util.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/base.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/configtools/_util.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/configtools/validators.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/exceptions.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/metrics.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/py.typed +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/statestore/__init__.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/statestore/_base.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/statestore/hashing.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/statestore/watermark.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/threading.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/unstable/__init__.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/unstable/configuration/__init__.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/unstable/configuration/loaders.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/unstable/configuration/models.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/unstable/core/__init__.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/unstable/core/__main__.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/unstable/core/_messaging.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/unstable/core/runtime.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/unstable/scheduling/__init__.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/unstable/scheduling/_schedules.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/uploader/__init__.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/uploader/_base.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/uploader/_metrics.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/uploader/assets.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/uploader/data_modeling.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/uploader/events.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/uploader/files.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/uploader/raw.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/uploader/time_series.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/uploader_extractor.py +0 -0
- {cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/uploader_types.py +0 -0
|
@@ -90,6 +90,7 @@ from cognite.extractorutils.exceptions import InvalidConfigError
|
|
|
90
90
|
from .elements import (
|
|
91
91
|
AuthenticatorConfig,
|
|
92
92
|
BaseConfig,
|
|
93
|
+
CastableInt,
|
|
93
94
|
CertificateConfig,
|
|
94
95
|
CogniteConfig,
|
|
95
96
|
ConfigType,
|
|
@@ -99,6 +100,7 @@ from .elements import (
|
|
|
99
100
|
LocalStateStoreConfig,
|
|
100
101
|
LoggingConfig,
|
|
101
102
|
MetricsConfig,
|
|
103
|
+
PortNumber,
|
|
102
104
|
RawDestinationConfig,
|
|
103
105
|
RawStateStoreConfig,
|
|
104
106
|
StateStoreConfig,
|
|
@@ -744,3 +744,43 @@ class IgnorePattern:
|
|
|
744
744
|
_logger.warning("'options' is preferred over 'flags' as this may be removed in a future release")
|
|
745
745
|
self.options = self.flags
|
|
746
746
|
self.flags = None
|
|
747
|
+
|
|
748
|
+
|
|
749
|
+
class CastableInt(int):
|
|
750
|
+
"""
|
|
751
|
+
Represents an integer in a config schema. Difference from regular int is that the
|
|
752
|
+
value if this type can be either a string or an integer in the yaml file.
|
|
753
|
+
"""
|
|
754
|
+
|
|
755
|
+
def __new__(cls, value: Any) -> "CastableInt":
|
|
756
|
+
"""
|
|
757
|
+
Returns value as is if it's int. If it's str or bytes try to convert to int.
|
|
758
|
+
Raises ValueError if conversion is unsuccessful or value is of not supported type.
|
|
759
|
+
|
|
760
|
+
Type check is required to avoid unexpected behaviour, such as implictly casting booleans,
|
|
761
|
+
floats and other types supported by standard int.
|
|
762
|
+
"""
|
|
763
|
+
|
|
764
|
+
if not isinstance(value, (int, str, bytes)):
|
|
765
|
+
raise ValueError(f"CastableInt cannot be created form value {value!r} of type {type(value)!r}.")
|
|
766
|
+
|
|
767
|
+
return super().__new__(cls, value)
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
class PortNumber(CastableInt):
|
|
771
|
+
"""
|
|
772
|
+
A subclass of int to be used in config schemas. It represents a valid port number (0 to 65535) and allows the value
|
|
773
|
+
to be of either str or int type. If the value is not a valid port number raises a ValueError at instantiation.
|
|
774
|
+
"""
|
|
775
|
+
|
|
776
|
+
def __new__(cls, value: Any) -> "PortNumber":
|
|
777
|
+
"""
|
|
778
|
+
Try to convert the `value` to int. If successful, check if it's within a valid range for a port number.
|
|
779
|
+
Raises ValueError if conversion to int or validation is unsuccessful.
|
|
780
|
+
"""
|
|
781
|
+
value = super().__new__(cls, value)
|
|
782
|
+
|
|
783
|
+
if not (0 <= value <= 65535):
|
|
784
|
+
raise ValueError(f"Port number must be between 0 and 65535. Got: {value}.")
|
|
785
|
+
|
|
786
|
+
return value
|
|
@@ -36,8 +36,10 @@ from cognite.client import CogniteClient
|
|
|
36
36
|
from cognite.extractorutils.configtools._util import _to_snake_case
|
|
37
37
|
from cognite.extractorutils.configtools.elements import (
|
|
38
38
|
BaseConfig,
|
|
39
|
+
CastableInt,
|
|
39
40
|
ConfigType,
|
|
40
41
|
IgnorePattern,
|
|
42
|
+
PortNumber,
|
|
41
43
|
TimeIntervalConfig,
|
|
42
44
|
_BaseConfig,
|
|
43
45
|
)
|
|
@@ -224,7 +226,7 @@ def _load_yaml(
|
|
|
224
226
|
config = dacite.from_dict(
|
|
225
227
|
data=config_dict,
|
|
226
228
|
data_class=config_type,
|
|
227
|
-
config=dacite.Config(strict=True, cast=[Enum, TimeIntervalConfig, Path]),
|
|
229
|
+
config=dacite.Config(strict=True, cast=[Enum, TimeIntervalConfig, Path, CastableInt, PortNumber]),
|
|
228
230
|
)
|
|
229
231
|
except dacite.UnexpectedDataError as e:
|
|
230
232
|
unknowns = [f'"{k.replace("_", "-") if case_style == "hyphen" else k}"' for k in e.keys]
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Temporary holding place for DTOs against Extraction Pipelines 2.0 until it's in the SDK
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Any, Literal
|
|
6
|
+
|
|
7
|
+
from humps import camelize
|
|
8
|
+
from pydantic import BaseModel, ConfigDict
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class CogniteModel(BaseModel):
|
|
12
|
+
"""
|
|
13
|
+
Base class for DTO classes based on pydantic, but with a few tweaks to make it inline with the CDF API guidelines:
|
|
14
|
+
* camelCase instead of snake_case when serializing/deserializing into/from JSON
|
|
15
|
+
* exclude Nones from serialized JSON instead of having nulls in the response text
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def model_dump(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
|
|
19
|
+
if kwargs:
|
|
20
|
+
kwargs["exclude_none"] = True
|
|
21
|
+
else:
|
|
22
|
+
kwargs = {"exclude_none": True}
|
|
23
|
+
return BaseModel.model_dump(self, *args, **kwargs)
|
|
24
|
+
|
|
25
|
+
def dict(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
|
|
26
|
+
return self.model_dump(*args, **kwargs)
|
|
27
|
+
|
|
28
|
+
model_config = ConfigDict(alias_generator=camelize, populate_by_name=True, extra="forbid")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class TaskUpdate(CogniteModel):
|
|
32
|
+
type: Literal["started"] | Literal["ended"]
|
|
33
|
+
name: str
|
|
34
|
+
timestamp: int
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
3
|
+
from multiprocessing import Queue
|
|
4
|
+
from threading import RLock, Thread
|
|
5
|
+
from types import TracebackType
|
|
6
|
+
from typing import Generic, Literal, Optional, Type, TypeVar, Union
|
|
7
|
+
|
|
8
|
+
from humps import pascalize
|
|
9
|
+
from typing_extensions import Self, assert_never
|
|
10
|
+
|
|
11
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
12
|
+
from cognite.extractorutils.unstable.configuration.models import ConnectionConfig, ExtractorConfig
|
|
13
|
+
from cognite.extractorutils.unstable.core._dto import TaskUpdate
|
|
14
|
+
from cognite.extractorutils.unstable.core._messaging import RuntimeMessage
|
|
15
|
+
from cognite.extractorutils.unstable.core.tasks import ContinuousTask, ScheduledTask, StartupTask, Task
|
|
16
|
+
from cognite.extractorutils.unstable.scheduling import TaskScheduler
|
|
17
|
+
from cognite.extractorutils.util import now
|
|
18
|
+
|
|
19
|
+
ConfigType = TypeVar("ConfigType", bound=ExtractorConfig)
|
|
20
|
+
ConfigRevision = Union[Literal["local"], int]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Extractor(Generic[ConfigType]):
|
|
24
|
+
NAME: str
|
|
25
|
+
EXTERNAL_ID: str
|
|
26
|
+
DESCRIPTION: str
|
|
27
|
+
VERSION: str
|
|
28
|
+
|
|
29
|
+
CONFIG_TYPE: Type[ConfigType]
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
connection_config: ConnectionConfig,
|
|
34
|
+
application_config: ConfigType,
|
|
35
|
+
current_config_revision: ConfigRevision,
|
|
36
|
+
) -> None:
|
|
37
|
+
self.cancellation_token = CancellationToken()
|
|
38
|
+
self.cancellation_token.cancel_on_interrupt()
|
|
39
|
+
|
|
40
|
+
self.connection_config = connection_config
|
|
41
|
+
self.application_config = application_config
|
|
42
|
+
self.current_config_revision = current_config_revision
|
|
43
|
+
|
|
44
|
+
self.cognite_client = self.connection_config.get_cognite_client(f"{self.EXTERNAL_ID}-{self.VERSION}")
|
|
45
|
+
|
|
46
|
+
self._checkin_lock = RLock()
|
|
47
|
+
self._runtime_messages: Optional[Queue[RuntimeMessage]] = None
|
|
48
|
+
|
|
49
|
+
self._scheduler = TaskScheduler(self.cancellation_token.create_child_token())
|
|
50
|
+
|
|
51
|
+
self._tasks: list[Task] = []
|
|
52
|
+
self._task_updates: list[TaskUpdate] = []
|
|
53
|
+
|
|
54
|
+
self.logger = logging.getLogger(f"{self.EXTERNAL_ID}.main")
|
|
55
|
+
|
|
56
|
+
def _set_runtime_message_queue(self, queue: Queue) -> None:
|
|
57
|
+
self._runtime_messages = queue
|
|
58
|
+
|
|
59
|
+
def _checkin(self) -> None:
|
|
60
|
+
with self._checkin_lock:
|
|
61
|
+
task_updates = [t.model_dump() for t in self._task_updates]
|
|
62
|
+
self._task_updates.clear()
|
|
63
|
+
|
|
64
|
+
res = self.cognite_client.post(
|
|
65
|
+
f"/api/v1/projects/{self.cognite_client.config.project}/odin/checkin",
|
|
66
|
+
json={
|
|
67
|
+
"externalId": self.connection_config.extraction_pipeline,
|
|
68
|
+
"taskEvents": task_updates,
|
|
69
|
+
},
|
|
70
|
+
headers={"cdf-version": "alpha"},
|
|
71
|
+
)
|
|
72
|
+
new_config_revision = res.json().get("lastConfigRevision")
|
|
73
|
+
|
|
74
|
+
if new_config_revision and new_config_revision != self.current_config_revision:
|
|
75
|
+
self.restart()
|
|
76
|
+
|
|
77
|
+
def _run_checkin(self) -> None:
|
|
78
|
+
while not self.cancellation_token.is_cancelled:
|
|
79
|
+
try:
|
|
80
|
+
self.logger.debug("Running checkin")
|
|
81
|
+
self._checkin()
|
|
82
|
+
except Exception:
|
|
83
|
+
self.logger.exception("Error during checkin")
|
|
84
|
+
self.cancellation_token.wait(10)
|
|
85
|
+
|
|
86
|
+
def restart(self) -> None:
|
|
87
|
+
if self._runtime_messages:
|
|
88
|
+
self._runtime_messages.put(RuntimeMessage.RESTART)
|
|
89
|
+
self.cancellation_token.cancel()
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
def init_from_runtime(
|
|
93
|
+
cls,
|
|
94
|
+
connection_config: ConnectionConfig,
|
|
95
|
+
application_config: ConfigType,
|
|
96
|
+
current_config_revision: ConfigRevision,
|
|
97
|
+
) -> Self:
|
|
98
|
+
return cls(connection_config, application_config, current_config_revision)
|
|
99
|
+
|
|
100
|
+
def add_task(self, task: Task) -> None:
|
|
101
|
+
target = task.target
|
|
102
|
+
|
|
103
|
+
def wrapped() -> None:
|
|
104
|
+
with self._checkin_lock:
|
|
105
|
+
self._task_updates.append(
|
|
106
|
+
TaskUpdate(type="started", name=task.name, timestamp=now()),
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
target()
|
|
111
|
+
|
|
112
|
+
finally:
|
|
113
|
+
with self._checkin_lock:
|
|
114
|
+
self._task_updates.append(
|
|
115
|
+
TaskUpdate(type="ended", name=task.name, timestamp=now()),
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
task.target = wrapped
|
|
119
|
+
self._tasks.append(task)
|
|
120
|
+
|
|
121
|
+
match task:
|
|
122
|
+
case ScheduledTask() as t:
|
|
123
|
+
self._scheduler.schedule_task(name=t.name, schedule=t.schedule, task=t.target)
|
|
124
|
+
|
|
125
|
+
def _report_extractor_info(self) -> None:
|
|
126
|
+
self.cognite_client.post(
|
|
127
|
+
f"/api/v1/projects/{self.cognite_client.config.project}/odin/extractorinfo",
|
|
128
|
+
json={
|
|
129
|
+
"externalId": self.connection_config.extraction_pipeline,
|
|
130
|
+
"activeConfigRevision": self.current_config_revision,
|
|
131
|
+
"extractor": {
|
|
132
|
+
"version": self.VERSION,
|
|
133
|
+
"externalId": self.EXTERNAL_ID,
|
|
134
|
+
},
|
|
135
|
+
"tasks": [
|
|
136
|
+
{
|
|
137
|
+
"name": t.name,
|
|
138
|
+
"type": "continuous" if isinstance(t, ContinuousTask) else "batch",
|
|
139
|
+
}
|
|
140
|
+
for t in self._tasks
|
|
141
|
+
],
|
|
142
|
+
},
|
|
143
|
+
headers={"cdf-version": "alpha"},
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
def start(self) -> None:
|
|
147
|
+
self._report_extractor_info()
|
|
148
|
+
Thread(target=self._run_checkin, name="ExtractorCheckin", daemon=True).start()
|
|
149
|
+
|
|
150
|
+
def stop(self) -> None:
|
|
151
|
+
self.cancellation_token.cancel()
|
|
152
|
+
|
|
153
|
+
def __enter__(self) -> Self:
|
|
154
|
+
self.start()
|
|
155
|
+
return self
|
|
156
|
+
|
|
157
|
+
def __exit__(
|
|
158
|
+
self,
|
|
159
|
+
exc_type: Optional[Type[BaseException]],
|
|
160
|
+
exc_val: Optional[BaseException],
|
|
161
|
+
exc_tb: Optional[TracebackType],
|
|
162
|
+
) -> bool:
|
|
163
|
+
self.stop()
|
|
164
|
+
with self._checkin_lock:
|
|
165
|
+
self._checkin()
|
|
166
|
+
|
|
167
|
+
return exc_val is None
|
|
168
|
+
|
|
169
|
+
def run(self) -> None:
|
|
170
|
+
has_scheduled = False
|
|
171
|
+
|
|
172
|
+
startup: list[StartupTask] = []
|
|
173
|
+
continuous: list[ContinuousTask] = []
|
|
174
|
+
|
|
175
|
+
for task in self._tasks:
|
|
176
|
+
match task:
|
|
177
|
+
case ScheduledTask():
|
|
178
|
+
has_scheduled = True
|
|
179
|
+
|
|
180
|
+
case StartupTask() as t:
|
|
181
|
+
startup.append(t)
|
|
182
|
+
|
|
183
|
+
case ContinuousTask() as t:
|
|
184
|
+
continuous.append(t)
|
|
185
|
+
|
|
186
|
+
case _:
|
|
187
|
+
assert_never(task)
|
|
188
|
+
|
|
189
|
+
self.logger.info("Starting up extractor")
|
|
190
|
+
if startup:
|
|
191
|
+
with ThreadPoolExecutor() as pool:
|
|
192
|
+
for task in startup:
|
|
193
|
+
pool.submit(task.target)
|
|
194
|
+
self.logger.info("Startup done")
|
|
195
|
+
|
|
196
|
+
for task in continuous:
|
|
197
|
+
Thread(name=pascalize(task.name), target=task.target).start()
|
|
198
|
+
|
|
199
|
+
if has_scheduled:
|
|
200
|
+
self._scheduler.run()
|
|
201
|
+
|
|
202
|
+
else:
|
|
203
|
+
self.cancellation_token.wait()
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from abc import ABC
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Callable
|
|
4
|
+
|
|
5
|
+
from cognite.extractorutils.unstable.configuration.models import ScheduleConfig
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class _Task(ABC):
|
|
10
|
+
name: str
|
|
11
|
+
target: Callable[[], None]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class ScheduledTask(_Task):
|
|
16
|
+
schedule: ScheduleConfig
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ContinuousTask(_Task):
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class StartupTask(_Task):
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# Making a type union to help with exhaustion checks in matches
|
|
29
|
+
Task = ScheduledTask | ContinuousTask | StartupTask
|
|
@@ -42,6 +42,8 @@ class TaskScheduler:
|
|
|
42
42
|
parsed_schedule = IntervalSchedule(interval=interval_config.expression.seconds)
|
|
43
43
|
|
|
44
44
|
with self._jobs_lock:
|
|
45
|
+
if name in self._jobs:
|
|
46
|
+
raise KeyError(f"Job '{name}' is already added to the scheduler")
|
|
45
47
|
self._jobs[name] = Job(name=name, call=task, schedule=parsed_schedule)
|
|
46
48
|
|
|
47
49
|
def _get_next(self) -> list[Job]:
|
{cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/util.py
RENAMED
|
@@ -17,10 +17,12 @@ The ``util`` package contains miscellaneous functions and classes that can some
|
|
|
17
17
|
extractors.
|
|
18
18
|
"""
|
|
19
19
|
|
|
20
|
+
import io
|
|
20
21
|
import logging
|
|
21
22
|
import random
|
|
22
23
|
from datetime import datetime, timezone
|
|
23
24
|
from functools import partial, wraps
|
|
25
|
+
from io import RawIOBase
|
|
24
26
|
from threading import Thread
|
|
25
27
|
from time import time
|
|
26
28
|
from typing import Any, Callable, Dict, Generator, Iterable, List, Optional, Tuple, Type, TypeVar, Union
|
|
@@ -510,3 +512,113 @@ def datetime_to_timestamp(dt: datetime) -> int:
|
|
|
510
512
|
|
|
511
513
|
def timestamp_to_datetime(ts: int) -> datetime:
|
|
512
514
|
return datetime.fromtimestamp(ts / 1000, tz=timezone.utc)
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def now() -> int:
|
|
518
|
+
"""
|
|
519
|
+
Current time in CDF format (milliseonds since 1970-01-01 00:00:00 UTC)
|
|
520
|
+
"""
|
|
521
|
+
return int(time() * 1000)
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def truncate_byte_len(item: str, ln: int) -> str:
|
|
525
|
+
"""Safely truncate an arbitrary utf-8 string.
|
|
526
|
+
Used to sanitize metadata.
|
|
527
|
+
|
|
528
|
+
Args:
|
|
529
|
+
item (str): string to be truncated
|
|
530
|
+
ln (int): length (bytes)
|
|
531
|
+
|
|
532
|
+
Returns:
|
|
533
|
+
str: truncated string
|
|
534
|
+
"""
|
|
535
|
+
|
|
536
|
+
bts = item.encode("utf-8")
|
|
537
|
+
if len(bts) <= ln:
|
|
538
|
+
return item
|
|
539
|
+
bts = bts[:ln]
|
|
540
|
+
last_codepoint_index = len(bts) - 1
|
|
541
|
+
# Find the last byte that's the start of an UTF-8 codepoint
|
|
542
|
+
while last_codepoint_index > 0 and (bts[last_codepoint_index] & 0b11000000) == 0b10000000:
|
|
543
|
+
last_codepoint_index -= 1
|
|
544
|
+
|
|
545
|
+
last_codepoint_start = bts[last_codepoint_index]
|
|
546
|
+
last_codepoint_len = 0
|
|
547
|
+
if last_codepoint_start & 0b11111000 == 0b11110000:
|
|
548
|
+
last_codepoint_len = 4
|
|
549
|
+
elif last_codepoint_start & 0b11110000 == 0b11100000:
|
|
550
|
+
last_codepoint_len = 3
|
|
551
|
+
elif last_codepoint_start & 0b11100000 == 0b11000000:
|
|
552
|
+
last_codepoint_len = 2
|
|
553
|
+
elif last_codepoint_start & 0b10000000 == 0:
|
|
554
|
+
last_codepoint_len = 1
|
|
555
|
+
else:
|
|
556
|
+
if last_codepoint_index - 2 <= 0:
|
|
557
|
+
return ""
|
|
558
|
+
# Somehow a longer codepoint? In this case just use the previous codepoint.
|
|
559
|
+
return bts[: (last_codepoint_index - 2)].decode("utf-8")
|
|
560
|
+
|
|
561
|
+
last_codepoint_end_index = last_codepoint_index + last_codepoint_len - 1
|
|
562
|
+
if last_codepoint_end_index > ln - 1:
|
|
563
|
+
if last_codepoint_index - 2 <= 0:
|
|
564
|
+
return ""
|
|
565
|
+
# We're in the middle of a codepoint, cut to the previous one
|
|
566
|
+
return bts[:last_codepoint_index].decode("utf-8")
|
|
567
|
+
else:
|
|
568
|
+
return bts.decode("utf-8")
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
class BufferedReadWithLength(io.BufferedReader):
|
|
572
|
+
def __init__(
|
|
573
|
+
self, raw: RawIOBase, buffer_size: int, len: int, on_close: Optional[Callable[[], None]] = None
|
|
574
|
+
) -> None:
|
|
575
|
+
super().__init__(raw, buffer_size)
|
|
576
|
+
# Do not remove even if it appears to be unused. :P
|
|
577
|
+
# Requests uses this to add the content-length header, which is necessary for writing to files in azure clusters
|
|
578
|
+
self.len = len
|
|
579
|
+
self.on_close = on_close
|
|
580
|
+
|
|
581
|
+
def close(self) -> None:
|
|
582
|
+
if self.on_close:
|
|
583
|
+
self.on_close()
|
|
584
|
+
return super().close()
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
def iterable_to_stream(
|
|
588
|
+
iterator: Iterable[bytes],
|
|
589
|
+
file_size_bytes: int,
|
|
590
|
+
buffer_size: int = io.DEFAULT_BUFFER_SIZE,
|
|
591
|
+
on_close: Optional[Callable[[], None]] = None,
|
|
592
|
+
) -> BufferedReadWithLength:
|
|
593
|
+
class ChunkIteratorStream(io.RawIOBase):
|
|
594
|
+
def __init__(self) -> None:
|
|
595
|
+
self.last_chunk = None
|
|
596
|
+
self.loaded_bytes = 0
|
|
597
|
+
self.file_size_bytes = file_size_bytes
|
|
598
|
+
|
|
599
|
+
def tell(self) -> int:
|
|
600
|
+
return self.loaded_bytes
|
|
601
|
+
|
|
602
|
+
def __len__(self) -> int:
|
|
603
|
+
return self.file_size_bytes
|
|
604
|
+
|
|
605
|
+
def readable(self) -> bool:
|
|
606
|
+
return True
|
|
607
|
+
|
|
608
|
+
def readinto(self, buffer: Any) -> int | None:
|
|
609
|
+
try:
|
|
610
|
+
# Bytes to return
|
|
611
|
+
ln = len(buffer)
|
|
612
|
+
chunk = self.last_chunk or next(iterator) # type: ignore
|
|
613
|
+
output, self.last_chunk = chunk[:ln], chunk[ln:]
|
|
614
|
+
if len(self.last_chunk) == 0: # type: ignore
|
|
615
|
+
self.last_chunk = None
|
|
616
|
+
buffer[: len(output)] = output
|
|
617
|
+
self.loaded_bytes += len(output)
|
|
618
|
+
return len(output)
|
|
619
|
+
except StopIteration:
|
|
620
|
+
return 0
|
|
621
|
+
|
|
622
|
+
return BufferedReadWithLength(
|
|
623
|
+
ChunkIteratorStream(), buffer_size=buffer_size, len=file_size_bytes, on_close=on_close
|
|
624
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "cognite-extractor-utils"
|
|
3
|
-
version = "7.
|
|
3
|
+
version = "7.5.0"
|
|
4
4
|
description = "Utilities for easier development of extractors for CDF"
|
|
5
5
|
authors = ["Mathias Lohne <mathias.lohne@cognite.com>"]
|
|
6
6
|
license = "Apache-2.0"
|
|
@@ -85,8 +85,8 @@ ruff = "^0.6.0"
|
|
|
85
85
|
pytest = "^8.0.0"
|
|
86
86
|
pytest-cov = "^5.0.0"
|
|
87
87
|
sphinx = "^7.0.0"
|
|
88
|
-
sphinx-rtd-theme = "^
|
|
89
|
-
pre-commit = "^
|
|
88
|
+
sphinx-rtd-theme = "^3.0.0"
|
|
89
|
+
pre-commit = "^4.0.0"
|
|
90
90
|
SecretStorage = "^3.1.2"
|
|
91
91
|
twine = "^5.0.0"
|
|
92
92
|
pytest-order = "^1.0.1"
|
|
@@ -1,116 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
from multiprocessing import Queue
|
|
3
|
-
from threading import RLock, Thread
|
|
4
|
-
from types import TracebackType
|
|
5
|
-
from typing import Generic, Literal, Optional, Type, TypeVar, Union
|
|
6
|
-
|
|
7
|
-
from typing_extensions import Self
|
|
8
|
-
|
|
9
|
-
from cognite.extractorutils.threading import CancellationToken
|
|
10
|
-
from cognite.extractorutils.unstable.configuration.models import ConnectionConfig, ExtractorConfig
|
|
11
|
-
from cognite.extractorutils.unstable.core._messaging import RuntimeMessage
|
|
12
|
-
|
|
13
|
-
ConfigType = TypeVar("ConfigType", bound=ExtractorConfig)
|
|
14
|
-
ConfigRevision = Union[Literal["local"], int]
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class Extractor(Generic[ConfigType]):
|
|
18
|
-
NAME: str
|
|
19
|
-
EXTERNAL_ID: str
|
|
20
|
-
DESCRIPTION: str
|
|
21
|
-
VERSION: str
|
|
22
|
-
|
|
23
|
-
CONFIG_TYPE: Type[ConfigType]
|
|
24
|
-
|
|
25
|
-
def __init__(
|
|
26
|
-
self,
|
|
27
|
-
connection_config: ConnectionConfig,
|
|
28
|
-
application_config: ConfigType,
|
|
29
|
-
current_config_revision: ConfigRevision,
|
|
30
|
-
) -> None:
|
|
31
|
-
self.cancellation_token = CancellationToken()
|
|
32
|
-
self.cancellation_token.cancel_on_interrupt()
|
|
33
|
-
|
|
34
|
-
self.connection_config = connection_config
|
|
35
|
-
self.application_config = application_config
|
|
36
|
-
self.current_config_revision = current_config_revision
|
|
37
|
-
|
|
38
|
-
self.cognite_client = self.connection_config.get_cognite_client(f"{self.EXTERNAL_ID}-{self.VERSION}")
|
|
39
|
-
|
|
40
|
-
self._checkin_lock = RLock()
|
|
41
|
-
self._runtime_messages: Optional[Queue[RuntimeMessage]] = None
|
|
42
|
-
|
|
43
|
-
self.logger = logging.getLogger(f"{self.EXTERNAL_ID}.main")
|
|
44
|
-
|
|
45
|
-
def _set_runtime_message_queue(self, queue: Queue) -> None:
|
|
46
|
-
self._runtime_messages = queue
|
|
47
|
-
|
|
48
|
-
def _run_checkin(self) -> None:
|
|
49
|
-
def checkin() -> None:
|
|
50
|
-
body = {"externalId": self.connection_config.extraction_pipeline}
|
|
51
|
-
|
|
52
|
-
with self._checkin_lock:
|
|
53
|
-
res = self.cognite_client.post(
|
|
54
|
-
f"/api/v1/projects/{self.cognite_client.config.project}/odin/checkin",
|
|
55
|
-
json=body,
|
|
56
|
-
headers={"cdf-version": "alpha"},
|
|
57
|
-
)
|
|
58
|
-
new_config_revision = res.json().get("lastConfigRevision")
|
|
59
|
-
|
|
60
|
-
if new_config_revision and new_config_revision != self.current_config_revision:
|
|
61
|
-
self.restart()
|
|
62
|
-
|
|
63
|
-
while not self.cancellation_token.is_cancelled:
|
|
64
|
-
try:
|
|
65
|
-
checkin()
|
|
66
|
-
except Exception:
|
|
67
|
-
self.logger.exception("Error during checkin")
|
|
68
|
-
self.cancellation_token.wait(10)
|
|
69
|
-
|
|
70
|
-
def restart(self) -> None:
|
|
71
|
-
if self._runtime_messages:
|
|
72
|
-
self._runtime_messages.put(RuntimeMessage.RESTART)
|
|
73
|
-
self.cancellation_token.cancel()
|
|
74
|
-
|
|
75
|
-
@classmethod
|
|
76
|
-
def init_from_runtime(
|
|
77
|
-
cls,
|
|
78
|
-
connection_config: ConnectionConfig,
|
|
79
|
-
application_config: ConfigType,
|
|
80
|
-
current_config_revision: ConfigRevision,
|
|
81
|
-
) -> Self:
|
|
82
|
-
return cls(connection_config, application_config, current_config_revision)
|
|
83
|
-
|
|
84
|
-
def start(self) -> None:
|
|
85
|
-
self.cognite_client.post(
|
|
86
|
-
f"/api/v1/projects/{self.cognite_client.config.project}/odin/extractorinfo",
|
|
87
|
-
json={
|
|
88
|
-
"externalId": self.connection_config.extraction_pipeline,
|
|
89
|
-
"activeConfigRevision": self.current_config_revision,
|
|
90
|
-
"extractor": {
|
|
91
|
-
"version": self.VERSION,
|
|
92
|
-
"externalId": self.EXTERNAL_ID,
|
|
93
|
-
},
|
|
94
|
-
},
|
|
95
|
-
headers={"cdf-version": "alpha"},
|
|
96
|
-
)
|
|
97
|
-
Thread(target=self._run_checkin, name="ExtractorCheckin", daemon=True).start()
|
|
98
|
-
|
|
99
|
-
def stop(self) -> None:
|
|
100
|
-
self.cancellation_token.cancel()
|
|
101
|
-
|
|
102
|
-
def __enter__(self) -> Self:
|
|
103
|
-
self.start()
|
|
104
|
-
return self
|
|
105
|
-
|
|
106
|
-
def __exit__(
|
|
107
|
-
self,
|
|
108
|
-
exc_type: Optional[Type[BaseException]],
|
|
109
|
-
exc_val: Optional[BaseException],
|
|
110
|
-
exc_tb: Optional[TracebackType],
|
|
111
|
-
) -> bool:
|
|
112
|
-
self.stop()
|
|
113
|
-
return exc_val is None
|
|
114
|
-
|
|
115
|
-
def run(self) -> None:
|
|
116
|
-
raise NotImplementedError()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/base.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/exceptions.py
RENAMED
|
File without changes
|
{cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/metrics.py
RENAMED
|
File without changes
|
{cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/py.typed
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cognite_extractor_utils-7.4.8 → cognite_extractor_utils-7.5.0}/cognite/extractorutils/threading.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|