cognite-extractor-utils 7.4.6__tar.gz → 7.4.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

Files changed (36) hide show
  1. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/PKG-INFO +1 -1
  2. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/__init__.py +1 -1
  3. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/configtools/elements.py +48 -0
  4. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/configtools/loaders.py +21 -1
  5. cognite_extractor_utils-7.4.7/cognite/extractorutils/configtools/validators.py +37 -0
  6. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/uploader/files.py +6 -4
  7. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/pyproject.toml +2 -2
  8. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/LICENSE +0 -0
  9. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/README.md +0 -0
  10. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/_inner_util.py +0 -0
  11. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/base.py +0 -0
  12. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/configtools/__init__.py +0 -0
  13. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/configtools/_util.py +0 -0
  14. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/exceptions.py +0 -0
  15. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/metrics.py +0 -0
  16. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/py.typed +0 -0
  17. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/statestore/__init__.py +0 -0
  18. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/statestore/_base.py +0 -0
  19. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/statestore/hashing.py +0 -0
  20. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/statestore/watermark.py +0 -0
  21. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/threading.py +0 -0
  22. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/unstable/__init__.py +0 -0
  23. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/unstable/configuration/__init__.py +0 -0
  24. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/unstable/configuration/loaders.py +0 -0
  25. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/unstable/configuration/models.py +0 -0
  26. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/uploader/__init__.py +0 -0
  27. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/uploader/_base.py +0 -0
  28. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/uploader/_metrics.py +0 -0
  29. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/uploader/assets.py +0 -0
  30. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/uploader/data_modeling.py +0 -0
  31. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/uploader/events.py +0 -0
  32. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/uploader/raw.py +0 -0
  33. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/uploader/time_series.py +0 -0
  34. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/uploader_extractor.py +0 -0
  35. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/uploader_types.py +0 -0
  36. {cognite_extractor_utils-7.4.6 → cognite_extractor_utils-7.4.7}/cognite/extractorutils/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cognite-extractor-utils
3
- Version: 7.4.6
3
+ Version: 7.4.7
4
4
  Summary: Utilities for easier development of extractors for CDF
5
5
  Home-page: https://github.com/cognitedata/python-extractor-utils
6
6
  License: Apache-2.0
@@ -16,5 +16,5 @@
16
16
  Cognite extractor utils is a Python package that simplifies the development of new extractors.
17
17
  """
18
18
 
19
- __version__ = "7.4.6"
19
+ __version__ = "7.4.7"
20
20
  from .base import Extractor
@@ -696,3 +696,51 @@ class StateStoreConfig:
696
696
  return LocalStateStore(file_path="states.json", cancellation_token=cancellation_token)
697
697
  else:
698
698
  return NoStateStore()
699
+
700
+
701
+ class RegExpFlag(Enum):
702
+ IGNORECASE = "ignore-case"
703
+ IC = "i"
704
+ ASCII = "ascii-only"
705
+ A = "a"
706
+
707
+ def get_regex_flag(self) -> int:
708
+ if self in (RegExpFlag.IGNORECASE, RegExpFlag.IC):
709
+ return re.IGNORECASE
710
+ elif self.value in (RegExpFlag.ASCII, RegExpFlag.A):
711
+ return re.ASCII
712
+ return 0
713
+
714
+
715
+ @dataclass
716
+ class IgnorePattern:
717
+ """
718
+ Configuration for regexp for ignore pattern
719
+ """
720
+
721
+ pattern: str
722
+ options: Optional[list[RegExpFlag]] = None
723
+ flags: Optional[list[RegExpFlag]] = None
724
+
725
+ def compile(self) -> re.Pattern[str]:
726
+ """
727
+ Compile RegExp pattern.
728
+
729
+ Returns:
730
+ Compiled pattern.
731
+ """
732
+ flag = 0
733
+ for f in self.options or []:
734
+ flag |= f.get_regex_flag()
735
+ return re.compile(self.pattern, flag)
736
+
737
+ def __post_init__(self) -> None:
738
+ if self.options is not None and self.flags is not None:
739
+ raise ValueError("Only one of either 'options' or 'flags' can be specified.")
740
+ if self.options is None and self.flags is None:
741
+ raise ValueError("'options' is required.")
742
+
743
+ if self.flags is not None:
744
+ _logger.warning("'options' is preferred over 'flags' as this may be removed in a future release")
745
+ self.options = self.flags
746
+ self.flags = None
@@ -22,7 +22,7 @@ import sys
22
22
  from enum import Enum
23
23
  from hashlib import sha256
24
24
  from pathlib import Path
25
- from typing import Any, Callable, Dict, Generic, Iterable, Optional, TextIO, Type, TypeVar, Union, cast
25
+ from typing import Any, Callable, Dict, Generic, Iterable, List, Optional, TextIO, Type, TypeVar, Union, cast
26
26
 
27
27
  import dacite
28
28
  import yaml
@@ -37,6 +37,7 @@ from cognite.extractorutils.configtools._util import _to_snake_case
37
37
  from cognite.extractorutils.configtools.elements import (
38
38
  BaseConfig,
39
39
  ConfigType,
40
+ IgnorePattern,
40
41
  TimeIntervalConfig,
41
42
  _BaseConfig,
42
43
  )
@@ -320,6 +321,25 @@ def load_yaml_dict(
320
321
  )
321
322
 
322
323
 
324
+ def compile_patterns(ignore_patterns: List[Union[str, IgnorePattern]]) -> list[re.Pattern[str]]:
325
+ """
326
+ List of patterns to compile
327
+
328
+ Args:
329
+ ignore_patterns: A list of strings or IgnorePattern to be compiled.
330
+
331
+ Returns:
332
+ A list of compiled RegExp patterns.
333
+ """
334
+ compiled = []
335
+ for p in ignore_patterns:
336
+ if isinstance(p, IgnorePattern):
337
+ compiled.append(re.compile(p.compile()))
338
+ else:
339
+ compiled.append(re.compile(p))
340
+ return compiled
341
+
342
+
323
343
  class ConfigResolver(Generic[CustomConfigClass]):
324
344
  def __init__(self, config_path: str, config_type: Type[CustomConfigClass]):
325
345
  self.config_path = config_path
@@ -0,0 +1,37 @@
1
+ import logging
2
+ import re
3
+ from typing import Union
4
+
5
+ _logger = logging.getLogger(__name__)
6
+
7
+
8
+ def matches_patterns(patterns: list[Union[str, re.Pattern[str]]], string: str) -> bool:
9
+ """
10
+ Check string against list of RegExp patterns.
11
+
12
+ Args:
13
+ patterns: A list of (re) patterns to match string against.
14
+ string: String to which we match the pattern.
15
+
16
+ Returns:
17
+ boolean value indicating whether string matches any of the patterns.
18
+ """
19
+ return any([matches_pattern(pattern, string) for pattern in patterns])
20
+
21
+
22
+ def matches_pattern(pattern: Union[str, re.Pattern[str]], string: str) -> bool:
23
+ """
24
+ Match pattern against a string.
25
+
26
+ Args:
27
+ pattern: (re) Pattern to match against a string.
28
+ string: String to which we match the pattern.
29
+
30
+ Returns:
31
+ boolean value indicating a match or otherwise.
32
+ """
33
+ try:
34
+ return re.search(pattern, string) is not None
35
+ except re.error as e:
36
+ _logger.warning(f"Could not apply RegExp: {pattern}\nReason: {e}")
37
+ return False
@@ -255,18 +255,20 @@ class IOFileUploadQueue(AbstractUploadQueue):
255
255
  ) -> tuple[FileMetadataOrCogniteExtractorFile, str]:
256
256
  if isinstance(file_meta, CogniteExtractorFileApply):
257
257
  node_id = self._apply_cognite_file(file_meta)
258
- file_meta, url = self._create_cdm(instance_id=node_id)
258
+ file_meta_response, url = self._create_cdm(instance_id=node_id)
259
259
  else:
260
- file_meta, url = self.cdf_client.files.create(file_metadata=file_meta, overwrite=self.overwrite_existing)
260
+ file_meta_response, url = self.cdf_client.files.create(
261
+ file_metadata=file_meta, overwrite=self.overwrite_existing
262
+ )
261
263
  # trigger update after creation (upsert =P)
262
264
  basic_attributes = set(["externalId", "name"])
263
265
  attr = set(file_meta.dump().keys())
264
266
  diff = attr - basic_attributes
265
267
 
266
268
  if len(diff) >= 1 and "externalId" in attr:
267
- file_meta = self.cdf_client.files.update(file_meta)
269
+ file_meta_response = self.cdf_client.files.update(file_meta)
268
270
 
269
- return file_meta, url
271
+ return file_meta_response, url
270
272
 
271
273
  def _upload_bytes(self, size: int, file: BinaryIO, file_meta: FileMetadataOrCogniteExtractorFile) -> None:
272
274
  file_meta, url = self._upload_empty(file_meta)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "cognite-extractor-utils"
3
- version = "7.4.6"
3
+ version = "7.4.7"
4
4
  description = "Utilities for easier development of extractors for CDF"
5
5
  authors = ["Mathias Lohne <mathias.lohne@cognite.com>"]
6
6
  license = "Apache-2.0"
@@ -93,7 +93,7 @@ parameterized = "*"
93
93
  requests = "^2.31.0"
94
94
  types-requests = "^2.31.0.20240125"
95
95
  httpx = "^0.27.0"
96
- faker = "^28.0.0"
96
+ faker = "^29.0.0"
97
97
 
98
98
  [build-system]
99
99
  requires = ["poetry-core>=1.0.0"]