cognite-extractor-utils 7.4.7__tar.gz → 7.4.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/PKG-INFO +2 -1
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/__init__.py +1 -1
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/configtools/__init__.py +2 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/configtools/_util.py +4 -2
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/configtools/elements.py +40 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/configtools/loaders.py +3 -1
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/unstable/configuration/loaders.py +3 -3
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/unstable/configuration/models.py +82 -2
- cognite_extractor_utils-7.4.9/cognite/extractorutils/unstable/core/__init__.py +0 -0
- cognite_extractor_utils-7.4.9/cognite/extractorutils/unstable/core/__main__.py +31 -0
- cognite_extractor_utils-7.4.9/cognite/extractorutils/unstable/core/_messaging.py +5 -0
- cognite_extractor_utils-7.4.9/cognite/extractorutils/unstable/core/base.py +116 -0
- cognite_extractor_utils-7.4.9/cognite/extractorutils/unstable/core/runtime.py +171 -0
- cognite_extractor_utils-7.4.9/cognite/extractorutils/unstable/scheduling/__init__.py +3 -0
- cognite_extractor_utils-7.4.9/cognite/extractorutils/unstable/scheduling/_scheduler.py +102 -0
- cognite_extractor_utils-7.4.9/cognite/extractorutils/unstable/scheduling/_schedules.py +31 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/uploader/files.py +13 -2
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/pyproject.toml +3 -2
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/LICENSE +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/README.md +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/_inner_util.py +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/base.py +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/configtools/validators.py +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/exceptions.py +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/metrics.py +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/py.typed +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/statestore/__init__.py +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/statestore/_base.py +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/statestore/hashing.py +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/statestore/watermark.py +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/threading.py +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/unstable/__init__.py +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/unstable/configuration/__init__.py +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/uploader/__init__.py +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/uploader/_base.py +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/uploader/_metrics.py +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/uploader/assets.py +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/uploader/data_modeling.py +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/uploader/events.py +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/uploader/raw.py +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/uploader/time_series.py +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/uploader_extractor.py +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/uploader_types.py +0 -0
- {cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cognite-extractor-utils
|
|
3
|
-
Version: 7.4.
|
|
3
|
+
Version: 7.4.9
|
|
4
4
|
Summary: Utilities for easier development of extractors for CDF
|
|
5
5
|
Home-page: https://github.com/cognitedata/python-extractor-utils
|
|
6
6
|
License: Apache-2.0
|
|
@@ -18,6 +18,7 @@ Requires-Dist: arrow (>=1.0.0,<2.0.0)
|
|
|
18
18
|
Requires-Dist: azure-identity (>=1.14.0,<2.0.0)
|
|
19
19
|
Requires-Dist: azure-keyvault-secrets (>=4.7.0,<5.0.0)
|
|
20
20
|
Requires-Dist: cognite-sdk (>=7.59.0,<8.0.0)
|
|
21
|
+
Requires-Dist: croniter (>=3.0.3,<4.0.0)
|
|
21
22
|
Requires-Dist: dacite (>=1.6.0,<2.0.0)
|
|
22
23
|
Requires-Dist: decorator (>=5.1.1,<6.0.0)
|
|
23
24
|
Requires-Dist: httpx (>=0.27.0,<0.28.0)
|
|
@@ -90,6 +90,7 @@ from cognite.extractorutils.exceptions import InvalidConfigError
|
|
|
90
90
|
from .elements import (
|
|
91
91
|
AuthenticatorConfig,
|
|
92
92
|
BaseConfig,
|
|
93
|
+
CastableInt,
|
|
93
94
|
CertificateConfig,
|
|
94
95
|
CogniteConfig,
|
|
95
96
|
ConfigType,
|
|
@@ -99,6 +100,7 @@ from .elements import (
|
|
|
99
100
|
LocalStateStoreConfig,
|
|
100
101
|
LoggingConfig,
|
|
101
102
|
MetricsConfig,
|
|
103
|
+
PortNumber,
|
|
102
104
|
RawDestinationConfig,
|
|
103
105
|
RawStateStoreConfig,
|
|
104
106
|
StateStoreConfig,
|
|
@@ -81,8 +81,10 @@ def _to_snake_case(dictionary: Dict[str, Any], case_style: str) -> Dict[str, Any
|
|
|
81
81
|
raise ValueError(f"Invalid case style: {case_style}")
|
|
82
82
|
|
|
83
83
|
|
|
84
|
-
def _load_certificate_data(
|
|
85
|
-
|
|
84
|
+
def _load_certificate_data(
|
|
85
|
+
cert_path: str | Path, password: Optional[str]
|
|
86
|
+
) -> Union[Tuple[str, str], Tuple[bytes, bytes]]:
|
|
87
|
+
path = Path(cert_path) if isinstance(cert_path, str) else cert_path
|
|
86
88
|
cert_data = Path(path).read_bytes()
|
|
87
89
|
|
|
88
90
|
if path.suffix == ".pem":
|
|
@@ -744,3 +744,43 @@ class IgnorePattern:
|
|
|
744
744
|
_logger.warning("'options' is preferred over 'flags' as this may be removed in a future release")
|
|
745
745
|
self.options = self.flags
|
|
746
746
|
self.flags = None
|
|
747
|
+
|
|
748
|
+
|
|
749
|
+
class CastableInt(int):
|
|
750
|
+
"""
|
|
751
|
+
Represents an integer in a config schema. Difference from regular int is that the
|
|
752
|
+
value if this type can be either a string or an integer in the yaml file.
|
|
753
|
+
"""
|
|
754
|
+
|
|
755
|
+
def __new__(cls, value: Any) -> "CastableInt":
|
|
756
|
+
"""
|
|
757
|
+
Returns value as is if it's int. If it's str or bytes try to convert to int.
|
|
758
|
+
Raises ValueError if conversion is unsuccessful or value is of not supported type.
|
|
759
|
+
|
|
760
|
+
Type check is required to avoid unexpected behaviour, such as implictly casting booleans,
|
|
761
|
+
floats and other types supported by standard int.
|
|
762
|
+
"""
|
|
763
|
+
|
|
764
|
+
if not isinstance(value, (int, str, bytes)):
|
|
765
|
+
raise ValueError(f"CastableInt cannot be created form value {value!r} of type {type(value)!r}.")
|
|
766
|
+
|
|
767
|
+
return super().__new__(cls, value)
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
class PortNumber(CastableInt):
|
|
771
|
+
"""
|
|
772
|
+
A subclass of int to be used in config schemas. It represents a valid port number (0 to 65535) and allows the value
|
|
773
|
+
to be of either str or int type. If the value is not a valid port number raises a ValueError at instantiation.
|
|
774
|
+
"""
|
|
775
|
+
|
|
776
|
+
def __new__(cls, value: Any) -> "PortNumber":
|
|
777
|
+
"""
|
|
778
|
+
Try to convert the `value` to int. If successful, check if it's within a valid range for a port number.
|
|
779
|
+
Raises ValueError if conversion to int or validation is unsuccessful.
|
|
780
|
+
"""
|
|
781
|
+
value = super().__new__(cls, value)
|
|
782
|
+
|
|
783
|
+
if not (0 <= value <= 65535):
|
|
784
|
+
raise ValueError(f"Port number must be between 0 and 65535. Got: {value}.")
|
|
785
|
+
|
|
786
|
+
return value
|
|
@@ -36,8 +36,10 @@ from cognite.client import CogniteClient
|
|
|
36
36
|
from cognite.extractorutils.configtools._util import _to_snake_case
|
|
37
37
|
from cognite.extractorutils.configtools.elements import (
|
|
38
38
|
BaseConfig,
|
|
39
|
+
CastableInt,
|
|
39
40
|
ConfigType,
|
|
40
41
|
IgnorePattern,
|
|
42
|
+
PortNumber,
|
|
41
43
|
TimeIntervalConfig,
|
|
42
44
|
_BaseConfig,
|
|
43
45
|
)
|
|
@@ -224,7 +226,7 @@ def _load_yaml(
|
|
|
224
226
|
config = dacite.from_dict(
|
|
225
227
|
data=config_dict,
|
|
226
228
|
data_class=config_type,
|
|
227
|
-
config=dacite.Config(strict=True, cast=[Enum, TimeIntervalConfig, Path]),
|
|
229
|
+
config=dacite.Config(strict=True, cast=[Enum, TimeIntervalConfig, Path, CastableInt, PortNumber]),
|
|
228
230
|
)
|
|
229
231
|
except dacite.UnexpectedDataError as e:
|
|
230
232
|
unknowns = [f'"{k.replace("_", "-") if case_style == "hyphen" else k}"' for k in e.keys]
|
|
@@ -2,7 +2,7 @@ import json
|
|
|
2
2
|
from enum import Enum
|
|
3
3
|
from io import StringIO
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Dict, Optional, TextIO, Type, TypeVar, Union
|
|
5
|
+
from typing import Dict, Optional, TextIO, Tuple, Type, TypeVar, Union
|
|
6
6
|
|
|
7
7
|
from pydantic import ValidationError
|
|
8
8
|
|
|
@@ -33,7 +33,7 @@ def load_file(path: Path, schema: Type[_T]) -> _T:
|
|
|
33
33
|
|
|
34
34
|
def load_from_cdf(
|
|
35
35
|
cognite_client: CogniteClient, external_id: str, schema: Type[_T], revision: Optional[int] = None
|
|
36
|
-
) -> _T:
|
|
36
|
+
) -> Tuple[_T, int]:
|
|
37
37
|
params: Dict[str, Union[str, int]] = {"externalId": external_id}
|
|
38
38
|
if revision:
|
|
39
39
|
params["revision"] = revision
|
|
@@ -44,7 +44,7 @@ def load_from_cdf(
|
|
|
44
44
|
)
|
|
45
45
|
response.raise_for_status()
|
|
46
46
|
data = response.json()
|
|
47
|
-
return load_io(StringIO(data["config"]), ConfigFormat.YAML, schema)
|
|
47
|
+
return load_io(StringIO(data["config"]), ConfigFormat.YAML, schema), data["revision"]
|
|
48
48
|
|
|
49
49
|
|
|
50
50
|
def load_io(stream: TextIO, format: ConfigFormat, schema: Type[_T]) -> _T:
|
|
@@ -7,7 +7,16 @@ from typing import Annotated, Any, Dict, List, Literal, Optional, Union
|
|
|
7
7
|
from humps import kebabize
|
|
8
8
|
from pydantic import BaseModel, ConfigDict, Field, GetCoreSchemaHandler
|
|
9
9
|
from pydantic_core import CoreSchema, core_schema
|
|
10
|
-
|
|
10
|
+
from typing_extensions import assert_never
|
|
11
|
+
|
|
12
|
+
from cognite.client import CogniteClient
|
|
13
|
+
from cognite.client.config import ClientConfig
|
|
14
|
+
from cognite.client.credentials import (
|
|
15
|
+
CredentialProvider,
|
|
16
|
+
OAuthClientCertificate,
|
|
17
|
+
OAuthClientCredentials,
|
|
18
|
+
)
|
|
19
|
+
from cognite.extractorutils.configtools._util import _load_certificate_data
|
|
11
20
|
from cognite.extractorutils.exceptions import InvalidConfigError
|
|
12
21
|
|
|
13
22
|
|
|
@@ -33,7 +42,9 @@ class _ClientCredentialsConfig(ConfigModel):
|
|
|
33
42
|
class _ClientCertificateConfig(ConfigModel):
|
|
34
43
|
type: Literal["client-certificate"]
|
|
35
44
|
client_id: str
|
|
36
|
-
|
|
45
|
+
path: Path
|
|
46
|
+
password: Optional[str] = None
|
|
47
|
+
authority_url: str
|
|
37
48
|
scopes: List[str]
|
|
38
49
|
|
|
39
50
|
|
|
@@ -121,6 +132,7 @@ class _ConnectionParameters(ConfigModel):
|
|
|
121
132
|
max_connection_pool_size: int = 50
|
|
122
133
|
ssl_verify: bool = True
|
|
123
134
|
proxies: Dict[str, str] = Field(default_factory=dict)
|
|
135
|
+
timeout: TimeIntervalConfig = Field(default_factory=lambda: TimeIntervalConfig("30s"))
|
|
124
136
|
|
|
125
137
|
|
|
126
138
|
class ConnectionConfig(ConfigModel):
|
|
@@ -133,6 +145,74 @@ class ConnectionConfig(ConfigModel):
|
|
|
133
145
|
|
|
134
146
|
connection: _ConnectionParameters = Field(default_factory=_ConnectionParameters)
|
|
135
147
|
|
|
148
|
+
def get_cognite_client(self, client_name: str) -> CogniteClient:
|
|
149
|
+
from cognite.client.config import global_config
|
|
150
|
+
|
|
151
|
+
global_config.disable_pypi_version_check = True
|
|
152
|
+
global_config.disable_gzip = not self.connection.gzip_compression
|
|
153
|
+
global_config.status_forcelist = set(self.connection.status_forcelist)
|
|
154
|
+
global_config.max_retries = self.connection.max_retries
|
|
155
|
+
global_config.max_retries_connect = self.connection.max_retries_connect
|
|
156
|
+
global_config.max_retry_backoff = self.connection.max_retry_backoff.seconds
|
|
157
|
+
global_config.max_connection_pool_size = self.connection.max_connection_pool_size
|
|
158
|
+
global_config.disable_ssl = not self.connection.ssl_verify
|
|
159
|
+
global_config.proxies = self.connection.proxies
|
|
160
|
+
|
|
161
|
+
credential_provider: CredentialProvider
|
|
162
|
+
match self.authentication:
|
|
163
|
+
case _ClientCredentialsConfig() as client_credentials:
|
|
164
|
+
kwargs = {
|
|
165
|
+
"token_url": client_credentials.token_url,
|
|
166
|
+
"client_id": client_credentials.client_id,
|
|
167
|
+
"client_secret": client_credentials.client_secret,
|
|
168
|
+
"scopes": client_credentials.scopes,
|
|
169
|
+
}
|
|
170
|
+
if client_credentials.audience is not None:
|
|
171
|
+
kwargs["audience"] = client_credentials.audience
|
|
172
|
+
if client_credentials.resource is not None:
|
|
173
|
+
kwargs["resource"] = client_credentials.resource
|
|
174
|
+
|
|
175
|
+
credential_provider = OAuthClientCredentials(**kwargs) # type: ignore # I know what I'm doing
|
|
176
|
+
|
|
177
|
+
case _ClientCertificateConfig() as client_certificate:
|
|
178
|
+
thumbprint, key = _load_certificate_data(
|
|
179
|
+
client_certificate.path,
|
|
180
|
+
client_certificate.password,
|
|
181
|
+
)
|
|
182
|
+
credential_provider = OAuthClientCertificate(
|
|
183
|
+
authority_url=client_certificate.authority_url,
|
|
184
|
+
client_id=client_certificate.client_id,
|
|
185
|
+
cert_thumbprint=str(thumbprint),
|
|
186
|
+
certificate=str(key),
|
|
187
|
+
scopes=client_certificate.scopes,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
case _:
|
|
191
|
+
assert_never(self.authentication)
|
|
192
|
+
|
|
193
|
+
client_config = ClientConfig(
|
|
194
|
+
project=self.project,
|
|
195
|
+
base_url=self.base_url,
|
|
196
|
+
client_name=client_name,
|
|
197
|
+
timeout=self.connection.timeout.seconds,
|
|
198
|
+
credentials=credential_provider,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
return CogniteClient(client_config)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class CronConfig(ConfigModel):
|
|
205
|
+
type: Literal["cron"]
|
|
206
|
+
expression: str
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
class IntervalConfig(ConfigModel):
|
|
210
|
+
type: Literal["interval"]
|
|
211
|
+
expression: TimeIntervalConfig
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
ScheduleConfig = Annotated[CronConfig | IntervalConfig, Field(discriminator="type")]
|
|
215
|
+
|
|
136
216
|
|
|
137
217
|
class LogLevel(Enum):
|
|
138
218
|
CRITICAL = "CRITICAL"
|
|
File without changes
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Example of how you would build an extractor with the new base class
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from cognite.extractorutils.unstable.configuration.models import ExtractorConfig
|
|
6
|
+
|
|
7
|
+
from .base import Extractor
|
|
8
|
+
from .runtime import Runtime
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MyConfig(ExtractorConfig):
|
|
12
|
+
parameter_one: int
|
|
13
|
+
parameter_two: str
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class MyExtractor(Extractor[MyConfig]):
|
|
17
|
+
NAME = "Test extractor"
|
|
18
|
+
EXTERNAL_ID = "test-extractor"
|
|
19
|
+
DESCRIPTION = "Test of the new runtime"
|
|
20
|
+
VERSION = "1.0.0"
|
|
21
|
+
CONFIG_TYPE = MyConfig
|
|
22
|
+
|
|
23
|
+
def run(self) -> None:
|
|
24
|
+
self.logger.info("Started!")
|
|
25
|
+
if not self.cancellation_token.wait(10):
|
|
26
|
+
raise ValueError("Oops")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
if __name__ == "__main__":
|
|
30
|
+
runtime = Runtime(MyExtractor)
|
|
31
|
+
runtime.run()
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from multiprocessing import Queue
|
|
3
|
+
from threading import RLock, Thread
|
|
4
|
+
from types import TracebackType
|
|
5
|
+
from typing import Generic, Literal, Optional, Type, TypeVar, Union
|
|
6
|
+
|
|
7
|
+
from typing_extensions import Self
|
|
8
|
+
|
|
9
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
10
|
+
from cognite.extractorutils.unstable.configuration.models import ConnectionConfig, ExtractorConfig
|
|
11
|
+
from cognite.extractorutils.unstable.core._messaging import RuntimeMessage
|
|
12
|
+
|
|
13
|
+
ConfigType = TypeVar("ConfigType", bound=ExtractorConfig)
|
|
14
|
+
ConfigRevision = Union[Literal["local"], int]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Extractor(Generic[ConfigType]):
|
|
18
|
+
NAME: str
|
|
19
|
+
EXTERNAL_ID: str
|
|
20
|
+
DESCRIPTION: str
|
|
21
|
+
VERSION: str
|
|
22
|
+
|
|
23
|
+
CONFIG_TYPE: Type[ConfigType]
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
connection_config: ConnectionConfig,
|
|
28
|
+
application_config: ConfigType,
|
|
29
|
+
current_config_revision: ConfigRevision,
|
|
30
|
+
) -> None:
|
|
31
|
+
self.cancellation_token = CancellationToken()
|
|
32
|
+
self.cancellation_token.cancel_on_interrupt()
|
|
33
|
+
|
|
34
|
+
self.connection_config = connection_config
|
|
35
|
+
self.application_config = application_config
|
|
36
|
+
self.current_config_revision = current_config_revision
|
|
37
|
+
|
|
38
|
+
self.cognite_client = self.connection_config.get_cognite_client(f"{self.EXTERNAL_ID}-{self.VERSION}")
|
|
39
|
+
|
|
40
|
+
self._checkin_lock = RLock()
|
|
41
|
+
self._runtime_messages: Optional[Queue[RuntimeMessage]] = None
|
|
42
|
+
|
|
43
|
+
self.logger = logging.getLogger(f"{self.EXTERNAL_ID}.main")
|
|
44
|
+
|
|
45
|
+
def _set_runtime_message_queue(self, queue: Queue) -> None:
|
|
46
|
+
self._runtime_messages = queue
|
|
47
|
+
|
|
48
|
+
def _run_checkin(self) -> None:
|
|
49
|
+
def checkin() -> None:
|
|
50
|
+
body = {"externalId": self.connection_config.extraction_pipeline}
|
|
51
|
+
|
|
52
|
+
with self._checkin_lock:
|
|
53
|
+
res = self.cognite_client.post(
|
|
54
|
+
f"/api/v1/projects/{self.cognite_client.config.project}/odin/checkin",
|
|
55
|
+
json=body,
|
|
56
|
+
headers={"cdf-version": "alpha"},
|
|
57
|
+
)
|
|
58
|
+
new_config_revision = res.json().get("lastConfigRevision")
|
|
59
|
+
|
|
60
|
+
if new_config_revision and new_config_revision != self.current_config_revision:
|
|
61
|
+
self.restart()
|
|
62
|
+
|
|
63
|
+
while not self.cancellation_token.is_cancelled:
|
|
64
|
+
try:
|
|
65
|
+
checkin()
|
|
66
|
+
except Exception:
|
|
67
|
+
self.logger.exception("Error during checkin")
|
|
68
|
+
self.cancellation_token.wait(10)
|
|
69
|
+
|
|
70
|
+
def restart(self) -> None:
|
|
71
|
+
if self._runtime_messages:
|
|
72
|
+
self._runtime_messages.put(RuntimeMessage.RESTART)
|
|
73
|
+
self.cancellation_token.cancel()
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def init_from_runtime(
|
|
77
|
+
cls,
|
|
78
|
+
connection_config: ConnectionConfig,
|
|
79
|
+
application_config: ConfigType,
|
|
80
|
+
current_config_revision: ConfigRevision,
|
|
81
|
+
) -> Self:
|
|
82
|
+
return cls(connection_config, application_config, current_config_revision)
|
|
83
|
+
|
|
84
|
+
def start(self) -> None:
|
|
85
|
+
self.cognite_client.post(
|
|
86
|
+
f"/api/v1/projects/{self.cognite_client.config.project}/odin/extractorinfo",
|
|
87
|
+
json={
|
|
88
|
+
"externalId": self.connection_config.extraction_pipeline,
|
|
89
|
+
"activeConfigRevision": self.current_config_revision,
|
|
90
|
+
"extractor": {
|
|
91
|
+
"version": self.VERSION,
|
|
92
|
+
"externalId": self.EXTERNAL_ID,
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
headers={"cdf-version": "alpha"},
|
|
96
|
+
)
|
|
97
|
+
Thread(target=self._run_checkin, name="ExtractorCheckin", daemon=True).start()
|
|
98
|
+
|
|
99
|
+
def stop(self) -> None:
|
|
100
|
+
self.cancellation_token.cancel()
|
|
101
|
+
|
|
102
|
+
def __enter__(self) -> Self:
|
|
103
|
+
self.start()
|
|
104
|
+
return self
|
|
105
|
+
|
|
106
|
+
def __exit__(
|
|
107
|
+
self,
|
|
108
|
+
exc_type: Optional[Type[BaseException]],
|
|
109
|
+
exc_val: Optional[BaseException],
|
|
110
|
+
exc_tb: Optional[TracebackType],
|
|
111
|
+
) -> bool:
|
|
112
|
+
self.stop()
|
|
113
|
+
return exc_val is None
|
|
114
|
+
|
|
115
|
+
def run(self) -> None:
|
|
116
|
+
raise NotImplementedError()
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
import time
|
|
5
|
+
from argparse import ArgumentParser, Namespace
|
|
6
|
+
from multiprocessing import Process, Queue
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Generic, Type, TypeVar
|
|
9
|
+
|
|
10
|
+
from typing_extensions import assert_never
|
|
11
|
+
|
|
12
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
13
|
+
from cognite.extractorutils.unstable.configuration.loaders import load_file, load_from_cdf
|
|
14
|
+
from cognite.extractorutils.unstable.configuration.models import ConnectionConfig
|
|
15
|
+
|
|
16
|
+
from ._messaging import RuntimeMessage
|
|
17
|
+
from .base import ConfigRevision, ConfigType, Extractor
|
|
18
|
+
|
|
19
|
+
ExtractorType = TypeVar("ExtractorType", bound=Extractor)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Runtime(Generic[ExtractorType]):
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
extractor: Type[ExtractorType],
|
|
26
|
+
) -> None:
|
|
27
|
+
self._extractor_class = extractor
|
|
28
|
+
self._cancellation_token = CancellationToken()
|
|
29
|
+
self._cancellation_token.cancel_on_interrupt()
|
|
30
|
+
self._message_queue: Queue[RuntimeMessage] = Queue()
|
|
31
|
+
self.logger = logging.getLogger(f"{self._extractor_class.EXTERNAL_ID}.runtime")
|
|
32
|
+
self._setup_logging()
|
|
33
|
+
|
|
34
|
+
def _create_argparser(self) -> ArgumentParser:
|
|
35
|
+
argparser = ArgumentParser(
|
|
36
|
+
prog=sys.argv[0],
|
|
37
|
+
description=self._extractor_class.DESCRIPTION,
|
|
38
|
+
)
|
|
39
|
+
argparser.add_argument(
|
|
40
|
+
"-v",
|
|
41
|
+
"--version",
|
|
42
|
+
action="version",
|
|
43
|
+
version=f"{self._extractor_class.NAME} v{self._extractor_class.VERSION}",
|
|
44
|
+
)
|
|
45
|
+
argparser.add_argument(
|
|
46
|
+
"-c",
|
|
47
|
+
"--connection-config",
|
|
48
|
+
nargs=1,
|
|
49
|
+
type=Path,
|
|
50
|
+
required=True,
|
|
51
|
+
help="Connection parameters",
|
|
52
|
+
)
|
|
53
|
+
argparser.add_argument(
|
|
54
|
+
"-l",
|
|
55
|
+
"--local-override",
|
|
56
|
+
nargs=1,
|
|
57
|
+
type=Path,
|
|
58
|
+
required=False,
|
|
59
|
+
default=None,
|
|
60
|
+
help="Include to use a local application configuration instead of fetching it from CDF",
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
return argparser
|
|
64
|
+
|
|
65
|
+
def _setup_logging(self) -> None:
|
|
66
|
+
# TODO: Figure out file logging for runtime
|
|
67
|
+
fmt = logging.Formatter(
|
|
68
|
+
"%(asctime)s.%(msecs)03d UTC [%(levelname)-8s] %(threadName)s - %(message)s",
|
|
69
|
+
"%Y-%m-%d %H:%M:%S",
|
|
70
|
+
)
|
|
71
|
+
# Set logging to UTC
|
|
72
|
+
fmt.converter = time.gmtime
|
|
73
|
+
|
|
74
|
+
root = logging.getLogger()
|
|
75
|
+
root.setLevel(logging.INFO)
|
|
76
|
+
|
|
77
|
+
console_handler = logging.StreamHandler()
|
|
78
|
+
console_handler.setFormatter(fmt)
|
|
79
|
+
|
|
80
|
+
root.addHandler(console_handler)
|
|
81
|
+
|
|
82
|
+
def _inner_run(
|
|
83
|
+
self,
|
|
84
|
+
message_queue: Queue,
|
|
85
|
+
connection_config: ConnectionConfig,
|
|
86
|
+
application_config: ConfigType,
|
|
87
|
+
current_config_revision: ConfigRevision,
|
|
88
|
+
) -> None:
|
|
89
|
+
# This code is run inside the new extractor process
|
|
90
|
+
extractor = self._extractor_class.init_from_runtime(
|
|
91
|
+
connection_config,
|
|
92
|
+
application_config,
|
|
93
|
+
current_config_revision,
|
|
94
|
+
)
|
|
95
|
+
extractor._set_runtime_message_queue(message_queue)
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
with extractor:
|
|
99
|
+
extractor.run()
|
|
100
|
+
|
|
101
|
+
except Exception:
|
|
102
|
+
self.logger.exception("Extractor crashed, will attempt restart")
|
|
103
|
+
message_queue.put(RuntimeMessage.RESTART)
|
|
104
|
+
|
|
105
|
+
def _spawn_extractor(
|
|
106
|
+
self,
|
|
107
|
+
connection_config: ConnectionConfig,
|
|
108
|
+
application_config: ConfigType,
|
|
109
|
+
current_config_revision: ConfigRevision,
|
|
110
|
+
) -> Process:
|
|
111
|
+
self._message_queue = Queue()
|
|
112
|
+
process = Process(
|
|
113
|
+
target=self._inner_run,
|
|
114
|
+
args=(self._message_queue, connection_config, application_config, current_config_revision),
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
process.start()
|
|
118
|
+
self.logger.info(f"Started extractor as {process.pid}")
|
|
119
|
+
return process
|
|
120
|
+
|
|
121
|
+
def _get_application_config(
|
|
122
|
+
self,
|
|
123
|
+
args: Namespace,
|
|
124
|
+
connection_config: ConnectionConfig,
|
|
125
|
+
) -> tuple[ConfigType, ConfigRevision]:
|
|
126
|
+
current_config_revision: ConfigRevision
|
|
127
|
+
if args.local_override:
|
|
128
|
+
current_config_revision = "local"
|
|
129
|
+
application_config = load_file(args.local_override[0], self._extractor_class.CONFIG_TYPE)
|
|
130
|
+
else:
|
|
131
|
+
client = connection_config.get_cognite_client(
|
|
132
|
+
f"{self._extractor_class.EXTERNAL_ID}-{self._extractor_class.VERSION}"
|
|
133
|
+
)
|
|
134
|
+
application_config, current_config_revision = load_from_cdf(
|
|
135
|
+
client,
|
|
136
|
+
connection_config.extraction_pipeline,
|
|
137
|
+
self._extractor_class.CONFIG_TYPE,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
return application_config, current_config_revision
|
|
141
|
+
|
|
142
|
+
def run(self) -> None:
|
|
143
|
+
argparser = self._create_argparser()
|
|
144
|
+
args = argparser.parse_args()
|
|
145
|
+
|
|
146
|
+
self.logger.info(f"Started runtime as {os.getpid()}")
|
|
147
|
+
|
|
148
|
+
connection_config = load_file(args.connection_config[0], ConnectionConfig)
|
|
149
|
+
|
|
150
|
+
# This has to be Any. We don't know the type of the extractors' config at type checking since the sel doesn't
|
|
151
|
+
# exist yet, and I have not found a way to represent it in a generic way that isn't just an Any in disguise.
|
|
152
|
+
application_config: Any
|
|
153
|
+
while not self._cancellation_token.is_cancelled:
|
|
154
|
+
application_config, current_config_revision = self._get_application_config(args, connection_config)
|
|
155
|
+
# Start extractor in separate process, and wait for it to end
|
|
156
|
+
process = self._spawn_extractor(connection_config, application_config, current_config_revision)
|
|
157
|
+
process.join()
|
|
158
|
+
|
|
159
|
+
# Check if we are asked to restart the extractor, shut down otherwise
|
|
160
|
+
if not self._message_queue.empty():
|
|
161
|
+
message = self._message_queue.get_nowait()
|
|
162
|
+
match message:
|
|
163
|
+
case RuntimeMessage.RESTART:
|
|
164
|
+
continue
|
|
165
|
+
|
|
166
|
+
case _:
|
|
167
|
+
assert_never(message)
|
|
168
|
+
|
|
169
|
+
else:
|
|
170
|
+
self.logger.info("Shutting down runtime")
|
|
171
|
+
self._cancellation_token.cancel()
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from logging import getLogger
|
|
3
|
+
from threading import RLock, Thread
|
|
4
|
+
from time import time
|
|
5
|
+
from typing import Callable
|
|
6
|
+
|
|
7
|
+
import arrow
|
|
8
|
+
from humps import pascalize
|
|
9
|
+
|
|
10
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
11
|
+
from cognite.extractorutils.unstable.configuration.models import CronConfig, IntervalConfig, ScheduleConfig
|
|
12
|
+
from cognite.extractorutils.unstable.scheduling._schedules import CronSchedule, IntervalSchedule, Schedule
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class Job:
|
|
17
|
+
name: str
|
|
18
|
+
call: Callable[[], None]
|
|
19
|
+
schedule: Schedule
|
|
20
|
+
|
|
21
|
+
def __hash__(self) -> int:
|
|
22
|
+
return hash(self.name)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class TaskScheduler:
|
|
26
|
+
def __init__(self, cancellation_token: CancellationToken) -> None:
|
|
27
|
+
self._cancellation_token = cancellation_token
|
|
28
|
+
self._jobs: dict[str, Job] = {}
|
|
29
|
+
self._jobs_lock = RLock()
|
|
30
|
+
self._running: set[Job] = set()
|
|
31
|
+
self._running_lock = RLock()
|
|
32
|
+
|
|
33
|
+
self._logger = getLogger()
|
|
34
|
+
|
|
35
|
+
def schedule_task(self, name: str, schedule: ScheduleConfig, task: Callable[[], None]) -> None:
|
|
36
|
+
parsed_schedule: Schedule
|
|
37
|
+
match schedule:
|
|
38
|
+
case CronConfig() as cron_config:
|
|
39
|
+
parsed_schedule = CronSchedule(expression=cron_config.expression)
|
|
40
|
+
|
|
41
|
+
case IntervalConfig() as interval_config:
|
|
42
|
+
parsed_schedule = IntervalSchedule(interval=interval_config.expression.seconds)
|
|
43
|
+
|
|
44
|
+
with self._jobs_lock:
|
|
45
|
+
self._jobs[name] = Job(name=name, call=task, schedule=parsed_schedule)
|
|
46
|
+
|
|
47
|
+
def _get_next(self) -> list[Job]:
|
|
48
|
+
if not self._jobs:
|
|
49
|
+
return []
|
|
50
|
+
with self._jobs_lock:
|
|
51
|
+
next_runs = sorted([(j.schedule.next(), j) for j in self._jobs.values()], key=lambda tup: tup[0])
|
|
52
|
+
return [job for (next, job) in next_runs if next == next_runs[0][0]] if next_runs else []
|
|
53
|
+
|
|
54
|
+
def _run_job(self, job: Job) -> bool:
|
|
55
|
+
with self._running_lock:
|
|
56
|
+
if job in self._running:
|
|
57
|
+
self._logger.warning(f"Job {job.name} already running")
|
|
58
|
+
return False
|
|
59
|
+
|
|
60
|
+
def wrap() -> None:
|
|
61
|
+
with self._running_lock:
|
|
62
|
+
self._running.add(job)
|
|
63
|
+
try:
|
|
64
|
+
job.call()
|
|
65
|
+
|
|
66
|
+
self._logger.info(f"Job {job.name} done. Next run at {arrow.get(job.schedule.next()).isoformat()}")
|
|
67
|
+
|
|
68
|
+
finally:
|
|
69
|
+
with self._running_lock:
|
|
70
|
+
self._running.remove(job)
|
|
71
|
+
|
|
72
|
+
Thread(target=wrap, name=f"Run{pascalize(job.name)}").start()
|
|
73
|
+
return True
|
|
74
|
+
|
|
75
|
+
def trigger(self, name: str) -> bool:
|
|
76
|
+
return self._run_job(self._jobs[name])
|
|
77
|
+
|
|
78
|
+
def run(self) -> None:
|
|
79
|
+
if not self._jobs:
|
|
80
|
+
raise ValueError("Can't run scheduler without any scheduled tasks")
|
|
81
|
+
|
|
82
|
+
# Run all interval jobs on startup since the first next() is one interval from now
|
|
83
|
+
for job in [j for j in self._jobs.values() if isinstance(j.schedule, IntervalSchedule)]:
|
|
84
|
+
self.trigger(job.name)
|
|
85
|
+
|
|
86
|
+
while not self._cancellation_token.is_cancelled:
|
|
87
|
+
next_runs = self._get_next()
|
|
88
|
+
|
|
89
|
+
next_time = next_runs[0].schedule.next()
|
|
90
|
+
wait_time = max(next_time - time(), 0)
|
|
91
|
+
|
|
92
|
+
if wait_time:
|
|
93
|
+
self._logger.info(f"Waiting until {arrow.get(next_time).isoformat()}")
|
|
94
|
+
if self._cancellation_token.wait(wait_time):
|
|
95
|
+
break
|
|
96
|
+
|
|
97
|
+
for job in next_runs:
|
|
98
|
+
self._logger.info(f"Starting job {job.name}")
|
|
99
|
+
self._run_job(job)
|
|
100
|
+
|
|
101
|
+
def stop(self) -> None:
|
|
102
|
+
self._cancellation_token.cancel()
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from time import time
|
|
3
|
+
|
|
4
|
+
from croniter import croniter
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Schedule(ABC):
|
|
8
|
+
@abstractmethod
|
|
9
|
+
def next(self) -> int:
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CronSchedule(Schedule):
|
|
14
|
+
def __init__(self, expression: str) -> None:
|
|
15
|
+
self._cron = croniter(expression)
|
|
16
|
+
|
|
17
|
+
def next(self) -> int:
|
|
18
|
+
return int(self._cron.get_next(start_time=time()))
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class IntervalSchedule(Schedule):
|
|
22
|
+
def __init__(self, interval: int) -> None:
|
|
23
|
+
self._interval = interval
|
|
24
|
+
self._next = int(time())
|
|
25
|
+
|
|
26
|
+
def next(self) -> int:
|
|
27
|
+
t = time()
|
|
28
|
+
while t > self._next:
|
|
29
|
+
self._next += self._interval
|
|
30
|
+
|
|
31
|
+
return self._next
|
|
@@ -19,6 +19,7 @@ from math import ceil
|
|
|
19
19
|
from os import PathLike
|
|
20
20
|
from types import TracebackType
|
|
21
21
|
from typing import Any, BinaryIO, Callable, Dict, Iterator, List, Optional, Tuple, Type, Union
|
|
22
|
+
from urllib.parse import ParseResult, urlparse
|
|
22
23
|
|
|
23
24
|
from httpx import URL, Client, Headers, Request, StreamConsumed, SyncByteStream
|
|
24
25
|
from requests.utils import super_len
|
|
@@ -408,12 +409,22 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
408
409
|
self, url_str: str, stream: BinaryIO, size: int, mime_type: Optional[str] = None
|
|
409
410
|
) -> Request:
|
|
410
411
|
url = URL(url_str)
|
|
412
|
+
base_url = URL(self.cdf_client.config.base_url)
|
|
413
|
+
|
|
414
|
+
if url.host == base_url.host:
|
|
415
|
+
upload_url = url
|
|
416
|
+
else:
|
|
417
|
+
parsed_url: ParseResult = urlparse(url_str)
|
|
418
|
+
parsed_base_url: ParseResult = urlparse(self.cdf_client.config.base_url)
|
|
419
|
+
replaced_upload_url = parsed_url._replace(netloc=parsed_base_url.netloc).geturl()
|
|
420
|
+
upload_url = URL(replaced_upload_url)
|
|
421
|
+
|
|
411
422
|
headers = Headers(self._httpx_client.headers)
|
|
412
423
|
headers.update(
|
|
413
424
|
{
|
|
414
425
|
"Accept": "*/*",
|
|
415
426
|
"Content-Length": str(size),
|
|
416
|
-
"Host":
|
|
427
|
+
"Host": upload_url.netloc.decode("ascii"),
|
|
417
428
|
"x-cdp-app": self.cdf_client._config.client_name,
|
|
418
429
|
}
|
|
419
430
|
)
|
|
@@ -423,7 +434,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
423
434
|
|
|
424
435
|
return Request(
|
|
425
436
|
method="PUT",
|
|
426
|
-
url=
|
|
437
|
+
url=upload_url,
|
|
427
438
|
stream=IOByteStream(stream),
|
|
428
439
|
headers=headers,
|
|
429
440
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "cognite-extractor-utils"
|
|
3
|
-
version = "7.4.
|
|
3
|
+
version = "7.4.9"
|
|
4
4
|
description = "Utilities for easier development of extractors for CDF"
|
|
5
5
|
authors = ["Mathias Lohne <mathias.lohne@cognite.com>"]
|
|
6
6
|
license = "Apache-2.0"
|
|
@@ -74,6 +74,7 @@ orjson = "^3.10.3"
|
|
|
74
74
|
httpx = "^0.27.0"
|
|
75
75
|
pydantic = "^2.8.2"
|
|
76
76
|
pyhumps = "^3.8.0"
|
|
77
|
+
croniter = "^3.0.3"
|
|
77
78
|
|
|
78
79
|
[tool.poetry.extras]
|
|
79
80
|
experimental = ["cognite-sdk-experimental"]
|
|
@@ -93,7 +94,7 @@ parameterized = "*"
|
|
|
93
94
|
requests = "^2.31.0"
|
|
94
95
|
types-requests = "^2.31.0.20240125"
|
|
95
96
|
httpx = "^0.27.0"
|
|
96
|
-
faker = "^
|
|
97
|
+
faker = "^30.0.0"
|
|
97
98
|
|
|
98
99
|
[build-system]
|
|
99
100
|
requires = ["poetry-core>=1.0.0"]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/base.py
RENAMED
|
File without changes
|
|
File without changes
|
{cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/exceptions.py
RENAMED
|
File without changes
|
{cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/metrics.py
RENAMED
|
File without changes
|
{cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/py.typed
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/threading.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cognite_extractor_utils-7.4.7 → cognite_extractor_utils-7.4.9}/cognite/extractorutils/util.py
RENAMED
|
File without changes
|