cognite-extractor-utils 7.6.0__py3-none-any.whl → 7.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

Files changed (38) hide show
  1. cognite/examples/unstable/extractors/simple_extractor/config/config.yaml +3 -0
  2. cognite/examples/unstable/extractors/simple_extractor/config/connection_config.yaml +10 -0
  3. cognite/examples/unstable/extractors/simple_extractor/main.py +81 -0
  4. cognite/extractorutils/__init__.py +1 -1
  5. cognite/extractorutils/_inner_util.py +2 -2
  6. cognite/extractorutils/base.py +1 -1
  7. cognite/extractorutils/configtools/elements.py +4 -2
  8. cognite/extractorutils/configtools/loaders.py +3 -3
  9. cognite/extractorutils/exceptions.py +1 -1
  10. cognite/extractorutils/metrics.py +8 -6
  11. cognite/extractorutils/statestore/watermark.py +6 -3
  12. cognite/extractorutils/threading.py +2 -2
  13. cognite/extractorutils/unstable/configuration/exceptions.py +28 -1
  14. cognite/extractorutils/unstable/configuration/models.py +157 -32
  15. cognite/extractorutils/unstable/core/_dto.py +80 -7
  16. cognite/extractorutils/unstable/core/base.py +175 -106
  17. cognite/extractorutils/unstable/core/checkin_worker.py +428 -0
  18. cognite/extractorutils/unstable/core/errors.py +2 -2
  19. cognite/extractorutils/unstable/core/logger.py +49 -0
  20. cognite/extractorutils/unstable/core/runtime.py +200 -31
  21. cognite/extractorutils/unstable/core/tasks.py +2 -2
  22. cognite/extractorutils/uploader/__init__.py +2 -0
  23. cognite/extractorutils/uploader/_base.py +1 -1
  24. cognite/extractorutils/uploader/assets.py +1 -1
  25. cognite/extractorutils/uploader/data_modeling.py +1 -1
  26. cognite/extractorutils/uploader/events.py +1 -1
  27. cognite/extractorutils/uploader/files.py +4 -4
  28. cognite/extractorutils/uploader/raw.py +1 -1
  29. cognite/extractorutils/uploader/time_series.py +319 -52
  30. cognite/extractorutils/uploader_extractor.py +20 -5
  31. cognite/extractorutils/uploader_types.py +13 -2
  32. cognite/extractorutils/util.py +8 -6
  33. {cognite_extractor_utils-7.6.0.dist-info → cognite_extractor_utils-7.8.0.dist-info}/METADATA +3 -2
  34. cognite_extractor_utils-7.8.0.dist-info/RECORD +55 -0
  35. cognite_extractor_utils-7.8.0.dist-info/entry_points.txt +2 -0
  36. cognite_extractor_utils-7.6.0.dist-info/RECORD +0 -50
  37. {cognite_extractor_utils-7.6.0.dist-info → cognite_extractor_utils-7.8.0.dist-info}/WHEEL +0 -0
  38. {cognite_extractor_utils-7.6.0.dist-info → cognite_extractor_utils-7.8.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,3 @@
1
+ log-handlers:
2
+ - type: console
3
+ level: INFO
@@ -0,0 +1,10 @@
1
+ project: ${COGNITE_PROJECT}
2
+ base_url: ${COGNITE_BASE_URL}
3
+ integration:
4
+ external_id: ${COGNITE_INTEGRATION_ID}
5
+ authentication:
6
+ type: "client-credentials"
7
+ client_id: ${COGNITE_CLIENT_ID}
8
+ client_secret: ${COGNITE_CLIENT_SECRET}
9
+ token_url: ${COGNITE_TOKEN_URL}
10
+ scopes: ${COGNITE_SCOPES}
@@ -0,0 +1,81 @@
1
+ """
2
+ An example extractor that logs messages at various levels.
3
+ """
4
+
5
+ from cognite.extractorutils.unstable.configuration.models import ExtractorConfig, IntervalConfig, TimeIntervalConfig
6
+ from cognite.extractorutils.unstable.core.base import Extractor, StartupTask, TaskContext
7
+ from cognite.extractorutils.unstable.core.runtime import Runtime
8
+ from cognite.extractorutils.unstable.core.tasks import ScheduledTask
9
+
10
+
11
+ class SimpleConfig(ExtractorConfig):
12
+ """
13
+ Defines the configuration for the SimpleExtractor.
14
+ """
15
+
16
+ pass
17
+
18
+
19
+ class SimpleExtractor(Extractor[SimpleConfig]):
20
+ """
21
+ An example extractor that logs messages at various levels.
22
+ """
23
+
24
+ NAME = "SimpleTestExtractor"
25
+ EXTERNAL_ID = "test-extractor"
26
+ DESCRIPTION = "An extractor for testing log levels"
27
+ VERSION = "1.0.0"
28
+ CONFIG_TYPE = SimpleConfig
29
+ SUPPORTS_DRY_RUN = True
30
+
31
+ def __init_tasks__(self) -> None:
32
+ """
33
+ Initializes and adds tasks to the extractor.
34
+ """
35
+ self.add_task(StartupTask(name="main_task", target=self.run_my_task))
36
+ self.add_task(
37
+ ScheduledTask(
38
+ name="scheduled_task",
39
+ target=self.scheduled_task,
40
+ schedule=IntervalConfig(type="interval", expression=TimeIntervalConfig("3s")),
41
+ )
42
+ )
43
+
44
+ # example task that logs messages at different levels
45
+ def run_my_task(self, ctx: TaskContext) -> None:
46
+ """
47
+ An example task that logs messages at different levels.
48
+
49
+ Args:
50
+ ctx: The context for the task execution, used for logging.
51
+ """
52
+ ctx.debug("This is a detailed debug message.")
53
+ ctx.info("This is an informational message.")
54
+ ctx.warning("This is a warning message.")
55
+ ctx.info("Test finished.")
56
+
57
+ def scheduled_task(self, ctx: TaskContext) -> None:
58
+ """
59
+ An example scheduled task that logs a message.
60
+
61
+ Args:
62
+ ctx: The context for the task execution, used for logging.
63
+ """
64
+ ctx.info("This is a scheduled task running.")
65
+ ctx.warning("This is a warning from the scheduled task.")
66
+ ctx.debug("Debugging the scheduled task execution.")
67
+ ctx.error("This is an error message from the scheduled task.")
68
+
69
+ # add more tasks as needed
70
+
71
+
72
+ def main() -> None:
73
+ """
74
+ Main function to run the SimpleExtractor.
75
+ """
76
+ runtime = Runtime(SimpleExtractor)
77
+ runtime.run()
78
+
79
+
80
+ if __name__ == "__main__":
81
+ main()
@@ -16,7 +16,7 @@
16
16
  Cognite extractor utils is a Python package that simplifies the development of new extractors.
17
17
  """
18
18
 
19
- __version__ = "7.6.0"
19
+ __version__ = "7.8.0"
20
20
  from .base import Extractor
21
21
 
22
22
  __all__ = ["Extractor"]
@@ -37,14 +37,14 @@ def resolve_log_level_for_httpx(level: str) -> str:
37
37
 
38
38
 
39
39
  class _DecimalEncoder(json.JSONEncoder):
40
- def default(self, obj: Any) -> dict[str, str]:
40
+ def default(self, obj: Any) -> dict[str, str]: # noqa: ANN401
41
41
  if isinstance(obj, Decimal):
42
42
  return {"type": "decimal_encoded", "value": str(obj)}
43
43
  return super().default(obj)
44
44
 
45
45
 
46
46
  class _DecimalDecoder(json.JSONDecoder):
47
- def __init__(self, *args: Any, **kwargs: Any) -> None:
47
+ def __init__(self, *args: Any, **kwargs: Any) -> None: # noqa: ANN401
48
48
  json.JSONDecoder.__init__(self, *args, object_hook=self.object_hook, **kwargs)
49
49
 
50
50
  def object_hook(self, obj_dict: dict[str, str]) -> dict[str, str] | Decimal:
@@ -112,7 +112,7 @@ class Extractor(Generic[CustomConfigClass]):
112
112
  reload_config_interval: int | None = 300,
113
113
  reload_config_action: ReloadConfigAction = ReloadConfigAction.DO_NOTHING,
114
114
  success_message: str = "Successful shutdown",
115
- ):
115
+ ) -> None:
116
116
  self.name = name
117
117
  self.description = description
118
118
  self.run_handle = run_handle
@@ -29,6 +29,7 @@ from urllib.parse import urljoin, urlparse
29
29
 
30
30
  import yaml
31
31
  from prometheus_client import REGISTRY, start_http_server
32
+ from typing_extensions import Self
32
33
 
33
34
  from cognite.client import ClientConfig, CogniteClient
34
35
  from cognite.client.credentials import (
@@ -604,6 +605,7 @@ class LoggingConfig:
604
605
  when="midnight",
605
606
  utc=True,
606
607
  backupCount=self.file.retention,
608
+ encoding="utf-8",
607
609
  )
608
610
  file_handler.setLevel(self.file.level)
609
611
  file_handler.setFormatter(fmt)
@@ -926,7 +928,7 @@ class CastableInt(int):
926
928
  file.
927
929
  """
928
930
 
929
- def __new__(cls, value: Any) -> "CastableInt":
931
+ def __new__(cls, value: int | str | bytes) -> Self:
930
932
  """
931
933
  Returns value as is if it's int.
932
934
 
@@ -955,7 +957,7 @@ class PortNumber(CastableInt):
955
957
  not a valid port number raises a ValueError at instantiation.
956
958
  """
957
959
 
958
- def __new__(cls, value: Any) -> "PortNumber":
960
+ def __new__(cls, value: int | str | bytes) -> Self:
959
961
  """
960
962
  Try to cast the value to an integer and validate it as a port number.
961
963
 
@@ -72,7 +72,7 @@ class KeyVaultLoader:
72
72
  config: A dictionary containing the configuration for the keyvault.
73
73
  """
74
74
 
75
- def __init__(self, config: dict | None):
75
+ def __init__(self, config: dict | None) -> None:
76
76
  self.config = config
77
77
 
78
78
  self.client: SecretClient | None = None
@@ -374,7 +374,7 @@ class ConfigResolver(Generic[CustomConfigClass]):
374
374
  Automatically reloads the configuration file if it has changed
375
375
  """
376
376
 
377
- def __init__(self, config_path: str, config_type: type[CustomConfigClass]):
377
+ def __init__(self, config_path: str, config_type: type[CustomConfigClass]) -> None:
378
378
  self.config_path = config_path
379
379
  self.config_type = config_type
380
380
 
@@ -384,7 +384,7 @@ class ConfigResolver(Generic[CustomConfigClass]):
384
384
  self._cognite_client: CogniteClient | None = None
385
385
 
386
386
  def _reload_file(self) -> None:
387
- with open(self.config_path) as stream:
387
+ with open(self.config_path, encoding="utf-8") as stream:
388
388
  self._config_text = stream.read()
389
389
 
390
390
  @property
@@ -25,7 +25,7 @@ class InvalidConfigError(Exception):
25
25
  * Unknown fields
26
26
  """
27
27
 
28
- def __init__(self, message: str, details: list[str] | None = None):
28
+ def __init__(self, message: str, details: list[str] | None = None) -> None:
29
29
  super().__init__()
30
30
  self.message = message
31
31
  self.details = details
@@ -67,7 +67,7 @@ _metrics_singularities = {}
67
67
  T = TypeVar("T")
68
68
 
69
69
 
70
- def safe_get(cls: type[T], *args: Any, **kwargs: Any) -> T:
70
+ def safe_get(cls: type[T], *args: Any, **kwargs: Any) -> T: # noqa: ANN401
71
71
  """
72
72
  A factory for instances of metrics collections.
73
73
 
@@ -122,7 +122,7 @@ class BaseMetrics:
122
122
  process_scrape_interval: Interval (in seconds) between each fetch of data for the ``process_*`` gauges
123
123
  """
124
124
 
125
- def __init__(self, extractor_name: str, extractor_version: str, process_scrape_interval: float = 15):
125
+ def __init__(self, extractor_name: str, extractor_version: str, process_scrape_interval: float = 15) -> None:
126
126
  extractor_name = extractor_name.strip().replace(" ", "_")
127
127
 
128
128
  self.startup = Gauge(f"{extractor_name}_start_time", "Timestamp (seconds) of when the extractor last started")
@@ -187,7 +187,7 @@ class AbstractMetricsPusher(ABC):
187
187
  push_interval: int | None = None,
188
188
  thread_name: str | None = None,
189
189
  cancellation_token: CancellationToken | None = None,
190
- ):
190
+ ) -> None:
191
191
  self.push_interval = push_interval
192
192
  self.thread_name = thread_name
193
193
 
@@ -274,7 +274,7 @@ class PrometheusPusher(AbstractMetricsPusher):
274
274
  password: str | None = None,
275
275
  thread_name: str | None = None,
276
276
  cancellation_token: CancellationToken | None = None,
277
- ):
277
+ ) -> None:
278
278
  super().__init__(push_interval, thread_name, cancellation_token)
279
279
 
280
280
  self.username = username
@@ -283,7 +283,9 @@ class PrometheusPusher(AbstractMetricsPusher):
283
283
 
284
284
  self.url = url
285
285
 
286
- def _auth_handler(self, url: str, method: str, timeout: int, headers: list[tuple[str, str]], data: Any) -> Callable:
286
+ def _auth_handler(
287
+ self, url: str, method: str, timeout: int, headers: list[tuple[str, str]], data: bytes
288
+ ) -> Callable[[], None]:
287
289
  """
288
290
  Returns a authentication handler against the Prometheus Pushgateway to use in the pushadd_to_gateway method.
289
291
 
@@ -350,7 +352,7 @@ class CognitePusher(AbstractMetricsPusher):
350
352
  data_set: EitherId | None = None,
351
353
  thread_name: str | None = None,
352
354
  cancellation_token: CancellationToken | None = None,
353
- ):
355
+ ) -> None:
354
356
  super().__init__(push_interval, thread_name, cancellation_token)
355
357
 
356
358
  self.cdf_client = cdf_client
@@ -1,3 +1,6 @@
1
+ # ruff: noqa: ANN401
2
+ # TODO: the state stores should be generic over the type of state, not just Any.
3
+
1
4
  # Copyright 2020 Cognite AS
2
5
  #
3
6
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -66,7 +69,7 @@ class AbstractStateStore(_BaseStateStore, ABC):
66
69
  trigger_log_level: str = "DEBUG",
67
70
  thread_name: str | None = None,
68
71
  cancellation_token: CancellationToken | None = None,
69
- ):
72
+ ) -> None:
70
73
  super().__init__(
71
74
  save_interval=save_interval,
72
75
  trigger_log_level=trigger_log_level,
@@ -249,7 +252,7 @@ class RawStateStore(AbstractStateStore):
249
252
  trigger_log_level: str = "DEBUG",
250
253
  thread_name: str | None = None,
251
254
  cancellation_token: CancellationToken | None = None,
252
- ):
255
+ ) -> None:
253
256
  super().__init__(save_interval, trigger_log_level, thread_name, cancellation_token)
254
257
 
255
258
  self._cdf_client = cdf_client
@@ -395,7 +398,7 @@ class LocalStateStore(AbstractStateStore):
395
398
  trigger_log_level: str = "DEBUG",
396
399
  thread_name: str | None = None,
397
400
  cancellation_token: CancellationToken | None = None,
398
- ):
401
+ ) -> None:
399
402
  super().__init__(save_interval, trigger_log_level, thread_name, cancellation_token)
400
403
 
401
404
  self._file_path = file_path
@@ -6,7 +6,7 @@ import logging
6
6
  import signal
7
7
  from threading import Condition
8
8
  from time import time
9
- from typing import Any
9
+ from types import FrameType
10
10
 
11
11
 
12
12
  class CancellationToken:
@@ -114,7 +114,7 @@ class CancellationToken:
114
114
  This will set the cancellation token instead of throwing a KeyboardInterrupt exception.
115
115
  """
116
116
 
117
- def sigint_handler(sig_num: int, frame: Any) -> None:
117
+ def sigint_handler(sig_num: int, frame: FrameType | None) -> None:
118
118
  logger = logging.getLogger(__name__)
119
119
  logger.warning("Interrupt signal received, stopping extractor gracefully")
120
120
  self.cancel()
@@ -12,7 +12,7 @@ class InvalidConfigError(Exception):
12
12
  * Unknown fields
13
13
  """
14
14
 
15
- def __init__(self, message: str, details: list[str] | None = None):
15
+ def __init__(self, message: str, details: list[str] | None = None) -> None:
16
16
  super().__init__()
17
17
  self.message = message
18
18
  self.details = details
@@ -30,3 +30,30 @@ class InvalidConfigError(Exception):
30
30
  Underlying message prefixed with 'Invalid config:'.
31
31
  """
32
32
  return self.__str__()
33
+
34
+
35
+ class InvalidArgumentError(Exception):
36
+ """
37
+ Exception thrown when an invalid argument is passed to the extractor.
38
+
39
+ This can be due to:
40
+ * Missing required arguments
41
+ * Invalid argument types
42
+ * Unsupported argument values
43
+ """
44
+
45
+ def __init__(self, message: str) -> None:
46
+ super().__init__(message)
47
+ self.message = message
48
+
49
+ def __str__(self) -> str:
50
+ """
51
+ Underlying message prefixed with 'Invalid argument:'.
52
+ """
53
+ return f"Invalid argument: {self.message}"
54
+
55
+ def __repr__(self) -> str:
56
+ """
57
+ Underlying message prefixed with 'Invalid argument:'.
58
+ """
59
+ return self.__str__()
@@ -4,10 +4,11 @@ Module containing pre-built models for common extractor configuration.
4
4
 
5
5
  import os
6
6
  import re
7
+ from collections.abc import Iterator
7
8
  from datetime import timedelta
8
9
  from enum import Enum
9
10
  from pathlib import Path
10
- from typing import Annotated, Any, Literal
11
+ from typing import Annotated, Any, Literal, TypeVar
11
12
 
12
13
  from humps import kebabize
13
14
  from pydantic import BaseModel, ConfigDict, Field, GetCoreSchemaHandler
@@ -23,6 +24,13 @@ from cognite.client.credentials import (
23
24
  )
24
25
  from cognite.extractorutils.configtools._util import _load_certificate_data
25
26
  from cognite.extractorutils.exceptions import InvalidConfigError
27
+ from cognite.extractorutils.statestore import (
28
+ AbstractStateStore,
29
+ LocalStateStore,
30
+ NoStateStore,
31
+ RawStateStore,
32
+ )
33
+ from cognite.extractorutils.threading import CancellationToken
26
34
 
27
35
  __all__ = [
28
36
  "AuthenticationConfig",
@@ -53,23 +61,44 @@ class ConfigModel(BaseModel):
53
61
  )
54
62
 
55
63
 
56
- class _ClientCredentialsConfig(ConfigModel):
57
- type: Literal["client-credentials"]
64
+ class Scopes(str):
65
+ def __init__(self, scopes: str) -> None:
66
+ self._scopes = list(scopes.split(" "))
67
+
68
+ @classmethod
69
+ def __get_pydantic_core_schema__(cls, source_type: Any, handler: GetCoreSchemaHandler) -> CoreSchema: # noqa: ANN401
70
+ return core_schema.no_info_after_validator_function(cls, handler(str))
71
+
72
+ def __eq__(self, other: object) -> bool:
73
+ if not isinstance(other, Scopes):
74
+ return NotImplemented
75
+ return self._scopes == other._scopes
76
+
77
+ def __hash__(self) -> int:
78
+ return hash(self._scopes)
79
+
80
+ def __iter__(self) -> Iterator[str]:
81
+ return iter(self._scopes)
82
+
83
+
84
+ class BaseCredentialsConfig(ConfigModel):
58
85
  client_id: str
86
+ scopes: Scopes
87
+
88
+
89
+ class _ClientCredentialsConfig(BaseCredentialsConfig):
90
+ type: Literal["client-credentials"]
59
91
  client_secret: str
60
92
  token_url: str
61
- scopes: list[str]
62
93
  resource: str | None = None
63
94
  audience: str | None = None
64
95
 
65
96
 
66
- class _ClientCertificateConfig(ConfigModel):
97
+ class _ClientCertificateConfig(BaseCredentialsConfig):
67
98
  type: Literal["client-certificate"]
68
- client_id: str
69
99
  path: Path
70
100
  password: str | None = None
71
101
  authority_url: str
72
- scopes: list[str]
73
102
 
74
103
 
75
104
  AuthenticationConfig = Annotated[_ClientCredentialsConfig | _ClientCertificateConfig, Field(discriminator="type")]
@@ -84,7 +113,7 @@ class TimeIntervalConfig:
84
113
  self._interval, self._expression = TimeIntervalConfig._parse_expression(expression)
85
114
 
86
115
  @classmethod
87
- def __get_pydantic_core_schema__(cls, source_type: Any, handler: GetCoreSchemaHandler) -> CoreSchema:
116
+ def __get_pydantic_core_schema__(cls, source_type: Any, handler: GetCoreSchemaHandler) -> CoreSchema: # noqa: ANN401
88
117
  """
89
118
  Pydantic hook to define how this class should be serialized/deserialized.
90
119
 
@@ -191,18 +220,26 @@ class TimeIntervalConfig:
191
220
  return self._expression
192
221
 
193
222
 
194
- class _ConnectionParameters(ConfigModel):
195
- gzip_compression: bool = False
196
- status_forcelist: list[int] = Field(default_factory=lambda: [429, 502, 503, 504])
197
- max_retries: int = 10
198
- max_retries_connect: int = 3
199
- max_retry_backoff: TimeIntervalConfig = Field(default_factory=lambda: TimeIntervalConfig("30s"))
200
- max_connection_pool_size: int = 50
201
- ssl_verify: bool = True
202
- proxies: dict[str, str] = Field(default_factory=dict)
223
+ class RetriesConfig(ConfigModel):
224
+ max_retries: int = Field(default=10, ge=-1)
225
+ max_backoff: TimeIntervalConfig = Field(default_factory=lambda: TimeIntervalConfig("30s"))
203
226
  timeout: TimeIntervalConfig = Field(default_factory=lambda: TimeIntervalConfig("30s"))
204
227
 
205
228
 
229
+ class SslCertificatesConfig(ConfigModel):
230
+ verify: bool = True
231
+ allow_list: list[str] | None = None
232
+
233
+
234
+ class ConnectionParameters(ConfigModel):
235
+ retries: RetriesConfig = Field(default_factory=RetriesConfig)
236
+ ssl_certificates: SslCertificatesConfig = Field(default_factory=SslCertificatesConfig)
237
+
238
+
239
+ class IntegrationConfig(ConfigModel):
240
+ external_id: str
241
+
242
+
206
243
  class ConnectionConfig(ConfigModel):
207
244
  """
208
245
  Configuration for connecting to a Cognite Data Fusion project.
@@ -216,11 +253,11 @@ class ConnectionConfig(ConfigModel):
216
253
  project: str
217
254
  base_url: str
218
255
 
219
- integration: str
256
+ integration: IntegrationConfig
220
257
 
221
258
  authentication: AuthenticationConfig
222
259
 
223
- connection: _ConnectionParameters = Field(default_factory=_ConnectionParameters)
260
+ connection: ConnectionParameters = Field(default_factory=ConnectionParameters)
224
261
 
225
262
  def get_cognite_client(self, client_name: str) -> CogniteClient:
226
263
  """
@@ -235,14 +272,9 @@ class ConnectionConfig(ConfigModel):
235
272
  from cognite.client.config import global_config
236
273
 
237
274
  global_config.disable_pypi_version_check = True
238
- global_config.disable_gzip = not self.connection.gzip_compression
239
- global_config.status_forcelist = set(self.connection.status_forcelist)
240
- global_config.max_retries = self.connection.max_retries
241
- global_config.max_retries_connect = self.connection.max_retries_connect
242
- global_config.max_retry_backoff = self.connection.max_retry_backoff.seconds
243
- global_config.max_connection_pool_size = self.connection.max_connection_pool_size
244
- global_config.disable_ssl = not self.connection.ssl_verify
245
- global_config.proxies = self.connection.proxies
275
+ global_config.max_retries = self.connection.retries.max_retries
276
+ global_config.max_retry_backoff = self.connection.retries.max_backoff.seconds
277
+ global_config.disable_ssl = not self.connection.ssl_certificates.verify
246
278
 
247
279
  credential_provider: CredentialProvider
248
280
  match self.authentication:
@@ -270,7 +302,7 @@ class ConnectionConfig(ConfigModel):
270
302
  client_id=client_certificate.client_id,
271
303
  cert_thumbprint=str(thumbprint),
272
304
  certificate=str(key),
273
- scopes=client_certificate.scopes,
305
+ scopes=list(client_certificate.scopes),
274
306
  )
275
307
 
276
308
  case _:
@@ -280,7 +312,7 @@ class ConnectionConfig(ConfigModel):
280
312
  project=self.project,
281
313
  base_url=self.base_url,
282
314
  client_name=client_name,
283
- timeout=self.connection.timeout.seconds,
315
+ timeout=self.connection.retries.timeout.seconds,
284
316
  credentials=credential_provider,
285
317
  )
286
318
 
@@ -315,7 +347,9 @@ class ConnectionConfig(ConfigModel):
315
347
  client_id=os.environ["COGNITE_CLIENT_ID"],
316
348
  client_secret=os.environ["COGNITE_CLIENT_SECRET"],
317
349
  token_url=os.environ["COGNITE_TOKEN_URL"],
318
- scopes=os.environ["COGNITE_TOKEN_SCOPES"].split(","),
350
+ scopes=Scopes(
351
+ os.environ["COGNITE_TOKEN_SCOPES"],
352
+ ),
319
353
  )
320
354
  elif "COGNITE_CLIENT_CERTIFICATE_PATH" in os.environ:
321
355
  auth = _ClientCertificateConfig(
@@ -324,7 +358,9 @@ class ConnectionConfig(ConfigModel):
324
358
  path=Path(os.environ["COGNITE_CLIENT_CERTIFICATE_PATH"]),
325
359
  password=os.environ.get("COGNITE_CLIENT_CERTIFICATE_PATH"),
326
360
  authority_url=os.environ["COGNITE_AUTHORITY_URL"],
327
- scopes=os.environ["COGNITE_TOKEN_SCOPES"].split(","),
361
+ scopes=Scopes(
362
+ os.environ["COGNITE_TOKEN_SCOPES"],
363
+ ),
328
364
  )
329
365
  else:
330
366
  raise KeyError("Missing auth, either COGNITE_CLIENT_SECRET or COGNITE_CLIENT_CERTIFICATE_PATH must be set")
@@ -332,7 +368,7 @@ class ConnectionConfig(ConfigModel):
332
368
  return ConnectionConfig(
333
369
  project=os.environ["COGNITE_PROJECT"],
334
370
  base_url=os.environ["COGNITE_BASE_URL"],
335
- integration=os.environ["COGNITE_INTEGRATION"],
371
+ integration=IntegrationConfig(external_id=os.environ["COGNITE_INTEGRATION"]),
336
372
  authentication=auth,
337
373
  )
338
374
 
@@ -398,9 +434,98 @@ def _log_handler_default() -> list[LogHandlerConfig]:
398
434
  return [LogConsoleHandlerConfig(type="console", level=LogLevel.INFO)]
399
435
 
400
436
 
437
+ class RawDestinationConfig(ConfigModel):
438
+ """
439
+ Configuration parameters for using Raw.
440
+ """
441
+
442
+ database: str
443
+ table: str
444
+
445
+
446
+ class RawStateStoreConfig(RawDestinationConfig):
447
+ """
448
+ Configuration of a state store based on CDF RAW.
449
+ """
450
+
451
+ upload_interval: TimeIntervalConfig = Field(default_factory=lambda: TimeIntervalConfig("30s"))
452
+
453
+
454
+ class LocalStateStoreConfig(ConfigModel):
455
+ """
456
+ Configuration of a state store using a local JSON file.
457
+ """
458
+
459
+ path: Path
460
+ save_interval: TimeIntervalConfig = Field(default_factory=lambda: TimeIntervalConfig("30s"))
461
+
462
+
463
+ class StateStoreConfig(ConfigModel):
464
+ """
465
+ Configuration of the State Store, containing ``LocalStateStoreConfig`` or ``RawStateStoreConfig``.
466
+ """
467
+
468
+ raw: RawStateStoreConfig | None = None
469
+ local: LocalStateStoreConfig | None = None
470
+
471
+ def create_state_store(
472
+ self,
473
+ cdf_client: CogniteClient | None = None,
474
+ default_to_local: bool = True,
475
+ cancellation_token: CancellationToken | None = None,
476
+ ) -> AbstractStateStore:
477
+ """
478
+ Create a state store object based on the config.
479
+
480
+ Args:
481
+ cdf_client: CogniteClient object to use in case of a RAW state store (ignored otherwise)
482
+ default_to_local: If true, return a LocalStateStore if no state store is configured. Otherwise return a
483
+ NoStateStore
484
+ cancellation_token: Cancellation token to pass to created state stores
485
+
486
+ Returns:
487
+ An (uninitialized) state store
488
+ """
489
+ if self.raw and self.local:
490
+ raise ValueError("Only one state store can be used simultaneously")
491
+
492
+ if self.raw:
493
+ if cdf_client is None:
494
+ raise TypeError("A cognite client object must be provided when state store is RAW")
495
+
496
+ return RawStateStore(
497
+ cdf_client=cdf_client,
498
+ database=self.raw.database,
499
+ table=self.raw.table,
500
+ save_interval=self.raw.upload_interval.seconds,
501
+ cancellation_token=cancellation_token,
502
+ )
503
+
504
+ if self.local:
505
+ if self.local.path.is_dir():
506
+ raise IsADirectoryError(self.local.path)
507
+
508
+ return LocalStateStore(
509
+ file_path=str(self.local.path),
510
+ save_interval=self.local.save_interval.seconds,
511
+ cancellation_token=cancellation_token,
512
+ )
513
+
514
+ if default_to_local:
515
+ return LocalStateStore(file_path="states.json", cancellation_token=cancellation_token)
516
+
517
+ return NoStateStore()
518
+
519
+
401
520
  class ExtractorConfig(ConfigModel):
402
521
  """
403
522
  Base class for application configuration for extractors.
404
523
  """
405
524
 
525
+ state_store: StateStoreConfig | None = None
406
526
  log_handlers: list[LogHandlerConfig] = Field(default_factory=_log_handler_default)
527
+ retry_startup: bool = True
528
+
529
+
530
+ ConfigType = TypeVar("ConfigType", bound=ExtractorConfig)
531
+ ConfigRevision = Literal["local"] | int