cognite-extractor-utils 7.4.2__tar.gz → 7.4.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

Files changed (35) hide show
  1. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/PKG-INFO +4 -2
  2. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/__init__.py +1 -1
  3. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/exceptions.py +5 -1
  4. cognite_extractor_utils-7.4.4/cognite/extractorutils/unstable/configuration/__init__.py +0 -0
  5. cognite_extractor_utils-7.4.4/cognite/extractorutils/unstable/configuration/loaders.py +111 -0
  6. cognite_extractor_utils-7.4.4/cognite/extractorutils/unstable/configuration/models.py +159 -0
  7. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/uploader/files.py +34 -41
  8. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/pyproject.toml +4 -2
  9. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/LICENSE +0 -0
  10. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/README.md +0 -0
  11. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/_inner_util.py +0 -0
  12. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/base.py +0 -0
  13. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/configtools/__init__.py +0 -0
  14. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/configtools/_util.py +0 -0
  15. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/configtools/elements.py +0 -0
  16. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/configtools/loaders.py +0 -0
  17. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/metrics.py +0 -0
  18. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/py.typed +0 -0
  19. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/statestore/__init__.py +0 -0
  20. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/statestore/_base.py +0 -0
  21. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/statestore/hashing.py +0 -0
  22. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/statestore/watermark.py +0 -0
  23. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/threading.py +0 -0
  24. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/unstable/__init__.py +0 -0
  25. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/uploader/__init__.py +0 -0
  26. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/uploader/_base.py +0 -0
  27. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/uploader/_metrics.py +0 -0
  28. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/uploader/assets.py +0 -0
  29. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/uploader/data_modeling.py +0 -0
  30. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/uploader/events.py +0 -0
  31. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/uploader/raw.py +0 -0
  32. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/uploader/time_series.py +0 -0
  33. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/uploader_extractor.py +0 -0
  34. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/uploader_types.py +0 -0
  35. {cognite_extractor_utils-7.4.2 → cognite_extractor_utils-7.4.4}/cognite/extractorutils/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cognite-extractor-utils
3
- Version: 7.4.2
3
+ Version: 7.4.4
4
4
  Summary: Utilities for easier development of extractors for CDF
5
5
  Home-page: https://github.com/cognitedata/python-extractor-utils
6
6
  License: Apache-2.0
@@ -17,7 +17,7 @@ Provides-Extra: experimental
17
17
  Requires-Dist: arrow (>=1.0.0,<2.0.0)
18
18
  Requires-Dist: azure-identity (>=1.14.0,<2.0.0)
19
19
  Requires-Dist: azure-keyvault-secrets (>=4.7.0,<5.0.0)
20
- Requires-Dist: cognite-sdk (>=7.58.4,<8.0.0)
20
+ Requires-Dist: cognite-sdk (>=7.59.0,<8.0.0)
21
21
  Requires-Dist: dacite (>=1.6.0,<2.0.0)
22
22
  Requires-Dist: decorator (>=5.1.1,<6.0.0)
23
23
  Requires-Dist: httpx (>=0.27.0,<0.28.0)
@@ -25,6 +25,8 @@ Requires-Dist: more-itertools (>=10.0.0,<11.0.0)
25
25
  Requires-Dist: orjson (>=3.10.3,<4.0.0)
26
26
  Requires-Dist: prometheus-client (>0.7.0,<=1.0.0)
27
27
  Requires-Dist: psutil (>=6.0.0,<7.0.0)
28
+ Requires-Dist: pydantic (>=2.8.2,<3.0.0)
29
+ Requires-Dist: pyhumps (>=3.8.0,<4.0.0)
28
30
  Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
29
31
  Requires-Dist: pyyaml (>=5.3.0,<7)
30
32
  Requires-Dist: typing-extensions (>=3.7.4,<5)
@@ -16,5 +16,5 @@
16
16
  Cognite extractor utils is a Python package that simplifies the development of new extractors.
17
17
  """
18
18
 
19
- __version__ = "7.4.2"
19
+ __version__ = "7.4.4"
20
20
  from .base import Extractor
@@ -13,6 +13,9 @@
13
13
  # limitations under the License.
14
14
 
15
15
 
16
+ from typing import List, Optional
17
+
18
+
16
19
  class InvalidConfigError(Exception):
17
20
  """
18
21
  Exception thrown from ``load_yaml`` and ``load_yaml_dict`` if config file is invalid. This can be due to
@@ -22,9 +25,10 @@ class InvalidConfigError(Exception):
22
25
  * Unkown fields
23
26
  """
24
27
 
25
- def __init__(self, message: str):
28
+ def __init__(self, message: str, details: Optional[List[str]] = None):
26
29
  super(InvalidConfigError, self).__init__()
27
30
  self.message = message
31
+ self.details = details
28
32
 
29
33
  def __str__(self) -> str:
30
34
  return f"Invalid config: {self.message}"
@@ -0,0 +1,111 @@
1
+ import json
2
+ from enum import Enum
3
+ from io import StringIO
4
+ from pathlib import Path
5
+ from typing import Dict, Optional, TextIO, Type, TypeVar, Union
6
+
7
+ from pydantic import ValidationError
8
+
9
+ from cognite.client import CogniteClient
10
+ from cognite.extractorutils.configtools.loaders import _load_yaml_dict_raw
11
+ from cognite.extractorutils.exceptions import InvalidConfigError
12
+ from cognite.extractorutils.unstable.configuration.models import ConfigModel
13
+
14
+ _T = TypeVar("_T", bound=ConfigModel)
15
+
16
+
17
+ class ConfigFormat(Enum):
18
+ JSON = "json"
19
+ YAML = "yaml"
20
+
21
+
22
+ def load_file(path: Path, schema: Type[_T]) -> _T:
23
+ if path.suffix in [".yaml", ".yml"]:
24
+ format = ConfigFormat.YAML
25
+ elif path.suffix == ".json":
26
+ format = ConfigFormat.JSON
27
+ else:
28
+ raise InvalidConfigError(f"Unknown file type {path.suffix}")
29
+
30
+ with open(path, "r") as stream:
31
+ return load_io(stream, format, schema)
32
+
33
+
34
+ def load_from_cdf(
35
+ cognite_client: CogniteClient, external_id: str, schema: Type[_T], revision: Optional[int] = None
36
+ ) -> _T:
37
+ params: Dict[str, Union[str, int]] = {"externalId": external_id}
38
+ if revision:
39
+ params["revision"] = revision
40
+ response = cognite_client.get(
41
+ f"/api/v1/projects/{cognite_client.config.project}/odin/config",
42
+ params=params,
43
+ headers={"cdf-version": "alpha"},
44
+ )
45
+ response.raise_for_status()
46
+ data = response.json()
47
+ return load_io(StringIO(data["config"]), ConfigFormat.YAML, schema)
48
+
49
+
50
+ def load_io(stream: TextIO, format: ConfigFormat, schema: Type[_T]) -> _T:
51
+ if format == ConfigFormat.JSON:
52
+ data = json.load(stream)
53
+
54
+ elif format == ConfigFormat.YAML:
55
+ data = _load_yaml_dict_raw(stream)
56
+
57
+ if "azure-keyvault" in data:
58
+ data.pop("azure-keyvault")
59
+ if "key-vault" in data:
60
+ data.pop("key-vault")
61
+
62
+ return load_dict(data, schema)
63
+
64
+
65
+ def _make_loc_str(loc: tuple) -> str:
66
+ # Remove the body parameter if it is present
67
+ if loc[0] == "body":
68
+ loc = loc[1:]
69
+
70
+ # Create a string from the loc parameter
71
+ loc_str = ""
72
+ needs_sep = False
73
+ for lo in loc:
74
+ if not needs_sep:
75
+ loc_str = f"{loc_str}{lo}"
76
+ needs_sep = True
77
+ else:
78
+ if isinstance(lo, int):
79
+ loc_str = f"{loc_str}[{lo}]"
80
+ else:
81
+ loc_str = f"{loc_str}.{lo}"
82
+
83
+ return loc_str
84
+
85
+
86
+ def load_dict(data: dict, schema: Type[_T]) -> _T:
87
+ try:
88
+ return schema.model_validate(data)
89
+
90
+ except ValidationError as e:
91
+ messages = []
92
+ for err in e.errors():
93
+ loc = err.get("loc")
94
+ if loc is None:
95
+ continue
96
+
97
+ # Create a string from the loc parameter
98
+ loc_str = _make_loc_str(loc)
99
+
100
+ if "ctx" in err and "error" in err["ctx"]:
101
+ exc = err["ctx"]["error"]
102
+ if isinstance(exc, ValueError) or isinstance(exc, AssertionError):
103
+ messages.append(f"{loc_str}: {str(exc)}")
104
+ continue
105
+
106
+ if err.get("type") == "json_invalid":
107
+ messages.append(f"{err.get('msg')}: {loc_str}")
108
+ else:
109
+ messages.append(f"{loc_str}: {err.get('msg')}")
110
+
111
+ raise InvalidConfigError(", ".join(messages), details=messages) from e
@@ -0,0 +1,159 @@
1
+ import re
2
+ from datetime import timedelta
3
+ from enum import Enum
4
+ from pathlib import Path
5
+ from typing import Annotated, Any, Dict, List, Literal, Optional, Union
6
+
7
+ from humps import kebabize
8
+ from pydantic import BaseModel, ConfigDict, Field, GetCoreSchemaHandler
9
+ from pydantic_core import CoreSchema, core_schema
10
+
11
+ from cognite.extractorutils.exceptions import InvalidConfigError
12
+
13
+
14
+ class ConfigModel(BaseModel):
15
+ model_config = ConfigDict(
16
+ alias_generator=kebabize,
17
+ populate_by_name=True,
18
+ extra="forbid",
19
+ # arbitrary_types_allowed=True,
20
+ )
21
+
22
+
23
+ class _ClientCredentialsConfig(ConfigModel):
24
+ type: Literal["client-credentials"]
25
+ client_id: str
26
+ client_secret: str
27
+ token_url: str
28
+ scopes: List[str]
29
+ resource: Optional[str] = None
30
+ audience: Optional[str] = None
31
+
32
+
33
+ class _ClientCertificateConfig(ConfigModel):
34
+ type: Literal["client-certificate"]
35
+ client_id: str
36
+ certificate_path: Path
37
+ scopes: List[str]
38
+
39
+
40
+ AuthenticationConfig = Annotated[Union[_ClientCredentialsConfig, _ClientCertificateConfig], Field(discriminator="type")]
41
+
42
+
43
+ class TimeIntervalConfig:
44
+ """
45
+ Configuration parameter for setting a time interval
46
+ """
47
+
48
+ def __init__(self, expression: str) -> None:
49
+ self._interval, self._expression = TimeIntervalConfig._parse_expression(expression)
50
+
51
+ @classmethod
52
+ def __get_pydantic_core_schema__(cls, source_type: Any, handler: GetCoreSchemaHandler) -> CoreSchema:
53
+ return core_schema.no_info_after_validator_function(cls, handler(Union[str, int]))
54
+
55
+ def __eq__(self, other: object) -> bool:
56
+ if not isinstance(other, TimeIntervalConfig):
57
+ return NotImplemented
58
+ return self._interval == other._interval
59
+
60
+ def __hash__(self) -> int:
61
+ return hash(self._interval)
62
+
63
+ @classmethod
64
+ def _parse_expression(cls, expression: str) -> tuple[int, str]:
65
+ # First, try to parse pure number and assume seconds (for backwards compatibility)
66
+ try:
67
+ return int(expression), f"{expression}s"
68
+ except ValueError:
69
+ pass
70
+
71
+ match = re.match(r"(\d+)[ \t]*(s|m|h|d)", expression)
72
+ if not match:
73
+ raise InvalidConfigError("Invalid interval pattern")
74
+
75
+ number, unit = match.groups()
76
+ numeric_unit = {"s": 1, "m": 60, "h": 60 * 60, "d": 60 * 60 * 24}[unit]
77
+
78
+ return int(number) * numeric_unit, expression
79
+
80
+ @property
81
+ def seconds(self) -> int:
82
+ return self._interval
83
+
84
+ @property
85
+ def minutes(self) -> float:
86
+ return self._interval / 60
87
+
88
+ @property
89
+ def hours(self) -> float:
90
+ return self._interval / (60 * 60)
91
+
92
+ @property
93
+ def days(self) -> float:
94
+ return self._interval / (60 * 60 * 24)
95
+
96
+ @property
97
+ def timedelta(self) -> timedelta:
98
+ days = self._interval // (60 * 60 * 24)
99
+ seconds = self._interval % (60 * 60 * 24)
100
+ return timedelta(days=days, seconds=seconds)
101
+
102
+ def __int__(self) -> int:
103
+ return int(self._interval)
104
+
105
+ def __float__(self) -> float:
106
+ return float(self._interval)
107
+
108
+ def __str__(self) -> str:
109
+ return self._expression
110
+
111
+ def __repr__(self) -> str:
112
+ return self._expression
113
+
114
+
115
+ class _ConnectionParameters(ConfigModel):
116
+ gzip_compression: bool = False
117
+ status_forcelist: List[int] = Field(default_factory=lambda: [429, 502, 503, 504])
118
+ max_retries: int = 10
119
+ max_retries_connect: int = 3
120
+ max_retry_backoff: TimeIntervalConfig = Field(default_factory=lambda: TimeIntervalConfig("30s"))
121
+ max_connection_pool_size: int = 50
122
+ ssl_verify: bool = True
123
+ proxies: Dict[str, str] = Field(default_factory=dict)
124
+
125
+
126
+ class ConnectionConfig(ConfigModel):
127
+ project: str
128
+ base_url: str
129
+
130
+ extraction_pipeline: str
131
+
132
+ authentication: AuthenticationConfig
133
+
134
+ connection: _ConnectionParameters = Field(default_factory=_ConnectionParameters)
135
+
136
+
137
+ class LogLevel(Enum):
138
+ CRITICAL = "CRITICAL"
139
+ ERROR = "ERROR"
140
+ WARNING = "WARNING"
141
+ INFO = "INFO"
142
+ DEBUG = "DEBUG"
143
+
144
+
145
+ class LogFileHandlerConfig(ConfigModel):
146
+ path: Path
147
+ level: LogLevel
148
+ retention: int = 7
149
+
150
+
151
+ class LogConsoleHandlerConfig(ConfigModel):
152
+ level: LogLevel
153
+
154
+
155
+ LogHandlerConfig = Union[LogFileHandlerConfig, LogConsoleHandlerConfig]
156
+
157
+
158
+ class ExtractorConfig(ConfigModel):
159
+ log_handlers: List[LogHandlerConfig] = Field(default_factory=lambda: [LogConsoleHandlerConfig(level=LogLevel.INFO)])
@@ -251,43 +251,38 @@ class IOFileUploadQueue(AbstractUploadQueue):
251
251
  return node.as_id()
252
252
 
253
253
  def _upload_empty(
254
- self, meta_or_apply: FileMetadataOrCogniteExtractorFile
254
+ self, file_meta: FileMetadataOrCogniteExtractorFile
255
255
  ) -> tuple[FileMetadataOrCogniteExtractorFile, str]:
256
- if isinstance(meta_or_apply, CogniteExtractorFileApply):
257
- node_id = self._apply_cognite_file(meta_or_apply)
258
- meta_or_apply, url = self._create_cdm(instance_id=node_id)
256
+ if isinstance(file_meta, CogniteExtractorFileApply):
257
+ node_id = self._apply_cognite_file(file_meta)
258
+ file_meta, url = self._create_cdm(instance_id=node_id)
259
259
  else:
260
- meta_or_apply, url = self.cdf_client.files.create(
261
- file_metadata=meta_or_apply, overwrite=self.overwrite_existing
262
- )
263
- return meta_or_apply, url
260
+ file_meta, url = self.cdf_client.files.create(file_metadata=file_meta, overwrite=self.overwrite_existing)
261
+ return file_meta, url
264
262
 
265
- def _upload_bytes(self, size: int, file: BinaryIO, meta_or_apply: FileMetadataOrCogniteExtractorFile) -> None:
266
- meta_or_apply, url = self._upload_empty(meta_or_apply)
267
- resp = self._httpx_client.send(self._get_file_upload_request(url, file, size, meta_or_apply.mime_type))
263
+ def _upload_bytes(self, size: int, file: BinaryIO, file_meta: FileMetadataOrCogniteExtractorFile) -> None:
264
+ file_meta, url = self._upload_empty(file_meta)
265
+ resp = self._httpx_client.send(self._get_file_upload_request(url, file, size, file_meta.mime_type))
268
266
  resp.raise_for_status()
269
267
 
270
- def _upload_multipart(self, size: int, file: BinaryIO, meta_or_apply: FileMetadataOrCogniteExtractorFile) -> None:
268
+ def _upload_multipart(self, size: int, file: BinaryIO, file_meta: FileMetadataOrCogniteExtractorFile) -> None:
271
269
  chunks = ChunkedStream(file, self.max_file_chunk_size, size)
272
270
  self.logger.debug(
273
- f"File {meta_or_apply.external_id} is larger than 5GiB ({size})"
274
- f", uploading in {chunks.chunk_count} chunks"
271
+ f"File {file_meta.external_id} is larger than 5GiB ({size})" f", uploading in {chunks.chunk_count} chunks"
275
272
  )
276
273
 
277
- returned_file_metadata = self._create_multi_part(meta_or_apply, chunks)
274
+ returned_file_metadata = self._create_multi_part(file_meta, chunks)
278
275
  upload_urls = returned_file_metadata["uploadUrls"]
279
276
  upload_id = returned_file_metadata["uploadId"]
280
277
  file_meta = FileMetadata.load(returned_file_metadata)
281
278
 
282
279
  for url in upload_urls:
283
280
  chunks.next_chunk()
284
- resp = self._httpx_client.send(
285
- self._get_file_upload_request(url, chunks, len(chunks), meta_or_apply.mime_type)
286
- )
281
+ resp = self._httpx_client.send(self._get_file_upload_request(url, chunks, len(chunks), file_meta.mime_type))
287
282
  resp.raise_for_status()
288
283
 
289
284
  completed_headers = (
290
- _CDF_ALPHA_VERSION_HEADER if isinstance(meta_or_apply, CogniteExtractorFileApply) is not None else None
285
+ _CDF_ALPHA_VERSION_HEADER if isinstance(file_meta, CogniteExtractorFileApply) is not None else None
291
286
  )
292
287
 
293
288
  res = self.cdf_client.files._post(
@@ -297,9 +292,9 @@ class IOFileUploadQueue(AbstractUploadQueue):
297
292
  )
298
293
  res.raise_for_status()
299
294
 
300
- def _create_multi_part(self, meta_or_apply: FileMetadataOrCogniteExtractorFile, chunks: ChunkedStream) -> dict:
301
- if isinstance(meta_or_apply, CogniteExtractorFileApply):
302
- node_id = self._apply_cognite_file(meta_or_apply)
295
+ def _create_multi_part(self, file_meta: FileMetadataOrCogniteExtractorFile, chunks: ChunkedStream) -> dict:
296
+ if isinstance(file_meta, CogniteExtractorFileApply):
297
+ node_id = self._apply_cognite_file(file_meta)
303
298
  identifiers = IdentifierSequence.load(instance_ids=node_id).as_singleton()
304
299
  self.cdf_client.files._warn_alpha()
305
300
  res = self.cdf_client.files._post(
@@ -313,7 +308,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
313
308
  else:
314
309
  res = self.cdf_client.files._post(
315
310
  url_path="/files/initmultipartupload",
316
- json=meta_or_apply.dump(camel_case=True),
311
+ json=file_meta.dump(camel_case=True),
317
312
  params={"overwrite": self.overwrite_existing, "parts": chunks.chunk_count},
318
313
  )
319
314
  res.raise_for_status()
@@ -321,7 +316,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
321
316
 
322
317
  def add_io_to_upload_queue(
323
318
  self,
324
- meta_or_apply: FileMetadataOrCogniteExtractorFile,
319
+ file_meta: FileMetadataOrCogniteExtractorFile,
325
320
  read_file: Callable[[], BinaryIO],
326
321
  extra_retries: Optional[
327
322
  Union[Tuple[Type[Exception], ...], Dict[Type[Exception], Callable[[Any], bool]]]
@@ -351,36 +346,34 @@ class IOFileUploadQueue(AbstractUploadQueue):
351
346
  max_delay=RETRY_MAX_DELAY,
352
347
  backoff=RETRY_BACKOFF_FACTOR,
353
348
  )
354
- def upload_file(read_file: Callable[[], BinaryIO], meta_or_apply: FileMetadataOrCogniteExtractorFile) -> None:
349
+ def upload_file(read_file: Callable[[], BinaryIO], file_meta: FileMetadataOrCogniteExtractorFile) -> None:
355
350
  with read_file() as file:
356
351
  size = super_len(file)
357
352
  if size == 0:
358
353
  # upload just the file metadata witout data
359
- meta_or_apply, _ = self._upload_empty(meta_or_apply)
354
+ file_meta, _ = self._upload_empty(file_meta)
360
355
  elif size >= self.max_single_chunk_file_size:
361
356
  # The minimum chunk size is 4000MiB.
362
- self._upload_multipart(size, file, meta_or_apply)
357
+ self._upload_multipart(size, file, file_meta)
363
358
 
364
359
  else:
365
- self._upload_bytes(size, file, meta_or_apply)
360
+ self._upload_bytes(size, file, file_meta)
366
361
 
367
- if isinstance(meta_or_apply, CogniteExtractorFileApply):
368
- meta_or_apply.is_uploaded = True
362
+ if isinstance(file_meta, CogniteExtractorFileApply):
363
+ file_meta.is_uploaded = True
369
364
 
370
365
  if self.post_upload_function:
371
366
  try:
372
- self.post_upload_function([meta_or_apply])
367
+ self.post_upload_function([file_meta])
373
368
  except Exception as e:
374
369
  self.logger.error("Error in upload callback: %s", str(e))
375
370
 
376
- def wrapped_upload(
377
- read_file: Callable[[], BinaryIO], meta_or_apply: FileMetadataOrCogniteExtractorFile
378
- ) -> None:
371
+ def wrapped_upload(read_file: Callable[[], BinaryIO], file_meta: FileMetadataOrCogniteExtractorFile) -> None:
379
372
  try:
380
- upload_file(read_file, meta_or_apply)
373
+ upload_file(read_file, file_meta)
381
374
 
382
375
  except Exception as e:
383
- self.logger.exception(f"Unexpected error while uploading file: {meta_or_apply.external_id}")
376
+ self.logger.exception(f"Unexpected error while uploading file: {file_meta.external_id}")
384
377
  self.errors.append(e)
385
378
 
386
379
  finally:
@@ -397,7 +390,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
397
390
  pass
398
391
 
399
392
  with self.lock:
400
- self.upload_queue.append(self._pool.submit(wrapped_upload, read_file, meta_or_apply))
393
+ self.upload_queue.append(self._pool.submit(wrapped_upload, read_file, file_meta))
401
394
  self.upload_queue_size += 1
402
395
  self.files_queued.inc()
403
396
  self.queue_size.set(self.upload_queue_size)
@@ -522,7 +515,7 @@ class FileUploadQueue(IOFileUploadQueue):
522
515
  )
523
516
 
524
517
  def add_to_upload_queue(
525
- self, meta_or_apply: FileMetadataOrCogniteExtractorFile, file_name: Union[str, PathLike]
518
+ self, file_meta: FileMetadataOrCogniteExtractorFile, file_name: Union[str, PathLike]
526
519
  ) -> None:
527
520
  """
528
521
  Add file to upload queue. The queue will be uploaded if the queue size is larger than the threshold
@@ -537,7 +530,7 @@ class FileUploadQueue(IOFileUploadQueue):
537
530
  def load_file_from_path() -> BinaryIO:
538
531
  return open(file_name, "rb")
539
532
 
540
- self.add_io_to_upload_queue(meta_or_apply, load_file_from_path)
533
+ self.add_io_to_upload_queue(file_meta, load_file_from_path)
541
534
 
542
535
 
543
536
  class BytesUploadQueue(IOFileUploadQueue):
@@ -574,7 +567,7 @@ class BytesUploadQueue(IOFileUploadQueue):
574
567
  cancellation_token,
575
568
  )
576
569
 
577
- def add_to_upload_queue(self, content: bytes, meta_or_apply: FileMetadataOrCogniteExtractorFile) -> None:
570
+ def add_to_upload_queue(self, content: bytes, file_meta: FileMetadataOrCogniteExtractorFile) -> None:
578
571
  """
579
572
  Add object to upload queue. The queue will be uploaded if the queue size is larger than the threshold
580
573
  specified in the __init__.
@@ -586,4 +579,4 @@ class BytesUploadQueue(IOFileUploadQueue):
586
579
  def get_byte_io() -> BinaryIO:
587
580
  return BytesIO(content)
588
581
 
589
- self.add_io_to_upload_queue(meta_or_apply, get_byte_io)
582
+ self.add_io_to_upload_queue(file_meta, get_byte_io)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "cognite-extractor-utils"
3
- version = "7.4.2"
3
+ version = "7.4.4"
4
4
  description = "Utilities for easier development of extractors for CDF"
5
5
  authors = ["Mathias Lohne <mathias.lohne@cognite.com>"]
6
6
  license = "Apache-2.0"
@@ -58,7 +58,7 @@ exclude = "tests/*"
58
58
 
59
59
  [tool.poetry.dependencies]
60
60
  python = "^3.9.0"
61
- cognite-sdk = "^7.58.4"
61
+ cognite-sdk = "^7.59.0"
62
62
  prometheus-client = ">0.7.0, <=1.0.0"
63
63
  arrow = "^1.0.0"
64
64
  pyyaml = ">=5.3.0, <7"
@@ -72,6 +72,8 @@ azure-identity = "^1.14.0"
72
72
  azure-keyvault-secrets = "^4.7.0"
73
73
  orjson = "^3.10.3"
74
74
  httpx = "^0.27.0"
75
+ pydantic = "^2.8.2"
76
+ pyhumps = "^3.8.0"
75
77
 
76
78
  [tool.poetry.extras]
77
79
  experimental = ["cognite-sdk-experimental"]