cognite-extractor-utils 5.0.1__py3-none-any.whl → 5.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

@@ -1,14 +1,19 @@
1
+ """
2
+ This module is deprecated and will be removed in a future version
3
+ """
4
+
5
+
1
6
  from sys import platform
2
- from typing import Union
7
+ from typing import Any, Union
3
8
 
4
- from cognite.client.data_classes import Event, Row
9
+ from cognite.client.data_classes import Row
5
10
 
6
11
 
7
12
  class JQMiddleware:
8
13
  def __init__(self, jq_rules: str) -> None:
9
14
  if platform == "win32":
10
15
  raise Exception("Windows platform doesn't support jq bindings for Python yet")
11
- import jq
16
+ import jq # type: ignore
12
17
 
13
18
  self._jq = jq.compile(jq_rules)
14
19
 
@@ -26,6 +31,6 @@ class JQMiddleware:
26
31
 
27
32
  return data
28
33
 
29
- def _raise_for_non_dict(self, data):
34
+ def _raise_for_non_dict(self, data: Any) -> None:
30
35
  if not isinstance(data, dict):
31
36
  raise ValueError("output of jq middleware must be a dict")
@@ -89,14 +89,13 @@ import json
89
89
  import logging
90
90
  import threading
91
91
  from abc import ABC, abstractmethod
92
- from decimal import Decimal
93
- from threading import Lock
94
- from typing import Any, Callable, Dict, List, Optional, Tuple, Union
92
+ from types import TracebackType
93
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
95
94
 
96
- from cognite.client import CogniteClient
97
- from cognite.client.exceptions import CogniteAPIError
98
95
  from requests.exceptions import ConnectionError
99
96
 
97
+ from cognite.client import CogniteClient
98
+ from cognite.client.exceptions import CogniteAPIError
100
99
  from cognite.extractorutils.uploader import DataPointList
101
100
 
102
101
  from ._inner_util import _DecimalDecoder, _DecimalEncoder, _resolve_log_level
@@ -140,8 +139,6 @@ class AbstractStateStore(ABC):
140
139
 
141
140
  self._deleted: List[str] = []
142
141
 
143
- self.lock = Lock()
144
-
145
142
  def start(self) -> None:
146
143
  """
147
144
  Start saving state periodically if save_interval is set.
@@ -203,12 +200,12 @@ class AbstractStateStore(ABC):
203
200
  """
204
201
  with self.lock:
205
202
  if isinstance(external_id, list):
206
- l = []
203
+ states = []
207
204
  for e in external_id:
208
205
  state = self._local_state.get(e, {})
209
- l.append((state.get("low"), state.get("high")))
206
+ states.append((state.get("low"), state.get("high")))
210
207
 
211
- return l
208
+ return states
212
209
 
213
210
  else:
214
211
  state = self._local_state.get(external_id, {})
@@ -263,14 +260,14 @@ class AbstractStateStore(ABC):
263
260
  A function that expands the current states with the values given
264
261
  """
265
262
 
266
- def callback(uploaded_points: List[Dict[str, Union[str, DataPointList]]]):
263
+ def callback(uploaded_points: List[Dict[str, Union[str, DataPointList]]]) -> None:
267
264
  for time_series in uploaded_points:
268
265
  # Use CDF timestamps
269
266
  data_points = time_series["datapoints"]
270
267
  if data_points:
271
268
  high = max(data_points)[0]
272
269
  low = min(data_points)[0]
273
- external_id = time_series["externalId"]
270
+ external_id: str = time_series["externalId"] # type: ignore # known to be str from where we set it
274
271
  self.expand_state(external_id, low, high)
275
272
 
276
273
  return callback
@@ -301,7 +298,7 @@ class AbstractStateStore(ABC):
301
298
  return False
302
299
 
303
300
  def __getitem__(self, external_id: str) -> Tuple[Any, Any]:
304
- return self.get_state(external_id)
301
+ return self.get_state(external_id) # type: ignore # will not be list if input is single str
305
302
 
306
303
  def __setitem__(self, key: str, value: Tuple[Any, Any]) -> None:
307
304
  self.set_state(external_id=key, low=value[0], high=value[1])
@@ -382,7 +379,8 @@ class RawStateStore(AbstractStateStore):
382
379
  if self._initialized and not force:
383
380
  return
384
381
 
385
- rows = self._cdf_client.raw.rows.list(db_name=self.database, table_name=self.table, limit=None)
382
+ # ignore type since list _is_ optional, sdk types are wrong
383
+ rows = self._cdf_client.raw.rows.list(db_name=self.database, table_name=self.table, limit=None) # type: ignore
386
384
 
387
385
  with self.lock:
388
386
  self._local_state.clear()
@@ -422,7 +420,9 @@ class RawStateStore(AbstractStateStore):
422
420
  self.start()
423
421
  return self
424
422
 
425
- def __exit__(self, exc_type, exc_val, exc_tb) -> None:
423
+ def __exit__(
424
+ self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
425
+ ) -> None:
426
426
  """
427
427
  Wraps around stop method, for use as context manager
428
428
 
@@ -500,7 +500,9 @@ class LocalStateStore(AbstractStateStore):
500
500
  self.start()
501
501
  return self
502
502
 
503
- def __exit__(self, exc_type, exc_val, exc_tb) -> None:
503
+ def __exit__(
504
+ self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
505
+ ) -> None:
504
506
  """
505
507
  Wraps around stop method, for use as context manager
506
508
 
@@ -517,7 +519,7 @@ class NoStateStore(AbstractStateStore):
517
519
  A state store that only keeps states in memory and never stores or initializes from external sources.
518
520
  """
519
521
 
520
- def __init__(self):
522
+ def __init__(self) -> None:
521
523
  super().__init__()
522
524
 
523
525
  def initialize(self, force: bool = False) -> None:
@@ -19,8 +19,8 @@ from dataclasses import dataclass
19
19
  from typing import Any, Callable, List, Optional
20
20
 
21
21
  from arrow import Arrow
22
- from cognite.client import CogniteClient
23
22
 
23
+ from cognite.client import CogniteClient
24
24
  from cognite.extractorutils._inner_util import _resolve_log_level
25
25
 
26
26
 
@@ -88,16 +88,9 @@ class AbstractUploadQueue(ABC):
88
88
  if self.post_upload_function is not None:
89
89
  try:
90
90
  self.post_upload_function(uploaded)
91
- except Exception as e:
91
+ except Exception:
92
92
  logging.getLogger(__name__).exception("Error during upload callback")
93
93
 
94
- @abstractmethod
95
- def add_to_upload_queue(self, *args) -> None:
96
- """
97
- Adds an element to the upload queue. The queue will be uploaded if the queue byte size is larger than the
98
- threshold specified in the config.
99
- """
100
-
101
94
  @abstractmethod
102
95
  def upload(self) -> None:
103
96
  """
@@ -13,14 +13,15 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import threading
16
- from typing import Callable, List, Optional
16
+ from types import TracebackType
17
+ from typing import Callable, List, Optional, Type
17
18
 
18
19
  import arrow
19
- from cognite.client import CogniteClient
20
- from cognite.client.data_classes import Event
21
- from cognite.client.exceptions import CogniteAPIError
22
20
  from requests import ConnectionError
23
21
 
22
+ from cognite.client import CogniteClient
23
+ from cognite.client.data_classes import Event
24
+ from cognite.client.exceptions import CogniteAPIError, CogniteDuplicatedError
24
25
  from cognite.extractorutils.uploader._base import (
25
26
  RETRIES,
26
27
  RETRY_BACKOFF_FACTOR,
@@ -131,8 +132,23 @@ class EventUploadQueue(AbstractUploadQueue):
131
132
  max_delay=RETRY_MAX_DELAY,
132
133
  backoff=RETRY_BACKOFF_FACTOR,
133
134
  )
134
- def _upload_batch(self):
135
- self.cdf_client.events.create([e for e in self.upload_queue])
135
+ def _upload_batch(self) -> None:
136
+ try:
137
+ self.cdf_client.events.create([e for e in self.upload_queue])
138
+ except CogniteDuplicatedError as e:
139
+ duplicated_ids = set([dup["externalId"] for dup in e.duplicated if "externalId" in dup])
140
+ failed: List[Event] = [e for e in e.failed]
141
+ to_create = []
142
+ to_update = []
143
+ for evt in failed:
144
+ if evt.external_id is not None and evt.external_id in duplicated_ids:
145
+ to_update.append(evt)
146
+ else:
147
+ to_create.append(evt)
148
+ if to_create:
149
+ self.cdf_client.events.create(to_create)
150
+ if to_update:
151
+ self.cdf_client.events.update(to_update)
136
152
 
137
153
  def __enter__(self) -> "EventUploadQueue":
138
154
  """
@@ -144,7 +160,9 @@ class EventUploadQueue(AbstractUploadQueue):
144
160
  self.start()
145
161
  return self
146
162
 
147
- def __exit__(self, exc_type, exc_val, exc_tb) -> None:
163
+ def __exit__(
164
+ self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
165
+ ) -> None:
148
166
  """
149
167
  Wraps around stop method, for use as context manager
150
168
 
@@ -15,14 +15,15 @@
15
15
  import threading
16
16
  from concurrent.futures import ThreadPoolExecutor
17
17
  from os import PathLike
18
- from typing import Any, Callable, List, Optional, Tuple, Union
18
+ from types import TracebackType
19
+ from typing import Any, Callable, List, Optional, Tuple, Type, Union
19
20
 
20
21
  import arrow
22
+ from requests import ConnectionError
23
+
21
24
  from cognite.client import CogniteClient
22
25
  from cognite.client.data_classes import Event, FileMetadata
23
26
  from cognite.client.exceptions import CogniteAPIError
24
- from requests import ConnectionError
25
-
26
27
  from cognite.extractorutils.uploader._base import (
27
28
  RETRIES,
28
29
  RETRY_BACKOFF_FACTOR,
@@ -89,7 +90,7 @@ class FileUploadQueue(AbstractUploadQueue):
89
90
  self.latency = FILES_UPLOADER_LATENCY
90
91
  self.latency_zero_point = arrow.utcnow()
91
92
 
92
- def add_to_upload_queue(self, file_meta: FileMetadata, file_name: Union[str, PathLike] = None) -> None:
93
+ def add_to_upload_queue(self, file_meta: FileMetadata, file_name: Union[str, PathLike]) -> None:
93
94
  """
94
95
  Add file to upload queue. The queue will be uploaded if the queue size is larger than the threshold
95
96
  specified in the __init__.
@@ -141,14 +142,14 @@ class FileUploadQueue(AbstractUploadQueue):
141
142
  max_delay=RETRY_MAX_DELAY,
142
143
  backoff=RETRY_BACKOFF_FACTOR,
143
144
  )
144
- def _upload_single(self, index, file_name, file_meta):
145
+ def _upload_single(self, index: int, file_name: Union[str, PathLike], file_meta: FileMetadata) -> None:
145
146
  # Upload file
146
- file_meta = self.cdf_client.files.upload(file_name, overwrite=self.overwrite_existing, **file_meta.dump())
147
+ file_meta = self.cdf_client.files.upload(str(file_name), overwrite=self.overwrite_existing, **file_meta.dump()) # type: ignore
147
148
 
148
149
  # Update meta-object in queue
149
150
  self.upload_queue[index] = (file_meta, file_name)
150
151
 
151
- def _upload_batch(self):
152
+ def _upload_batch(self) -> None:
152
153
  # Concurrently execute file-uploads
153
154
 
154
155
  with ThreadPoolExecutor(self.cdf_client.config.max_workers) as pool:
@@ -165,7 +166,9 @@ class FileUploadQueue(AbstractUploadQueue):
165
166
  self.start()
166
167
  return self
167
168
 
168
- def __exit__(self, exc_type, exc_val, exc_tb) -> None:
169
+ def __exit__(
170
+ self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
171
+ ) -> None:
169
172
  """
170
173
  Wraps around stop method, for use as context manager
171
174
 
@@ -212,7 +215,7 @@ class BytesUploadQueue(AbstractUploadQueue):
212
215
  thread_name: Optional[str] = None,
213
216
  overwrite_existing: bool = False,
214
217
  cancellation_token: threading.Event = threading.Event(),
215
- ):
218
+ ) -> None:
216
219
  super().__init__(
217
220
  cdf_client,
218
221
  post_upload_function,
@@ -277,7 +280,7 @@ class BytesUploadQueue(AbstractUploadQueue):
277
280
  self.logger.info(f"Uploaded {self.upload_queue_size} files")
278
281
  self.queue_size.set(self.upload_queue_size)
279
282
 
280
- def _upload_batch(self):
283
+ def _upload_batch(self) -> None:
281
284
  # Concurrently execute bytes-uploads
282
285
  with ThreadPoolExecutor(self.cdf_client.config.max_workers) as pool:
283
286
  for i, (frame, metadata) in enumerate(self.upload_queue):
@@ -290,7 +293,7 @@ class BytesUploadQueue(AbstractUploadQueue):
290
293
  max_delay=RETRY_MAX_DELAY,
291
294
  backoff=RETRY_BACKOFF_FACTOR,
292
295
  )
293
- def _upload_single(self, index: int, content: bytes, metadata: FileMetadata):
296
+ def _upload_single(self, index: int, content: bytes, metadata: FileMetadata) -> None:
294
297
  # Upload object
295
298
  file_meta_data: FileMetadata = self.cdf_client.files.upload_bytes(
296
299
  content, overwrite=self.overwrite_existing, **metadata.dump()
@@ -309,7 +312,9 @@ class BytesUploadQueue(AbstractUploadQueue):
309
312
  self.start()
310
313
  return self
311
314
 
312
- def __exit__(self, exc_type, exc_val, exc_tb) -> None:
315
+ def __exit__(
316
+ self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
317
+ ) -> None:
313
318
  """
314
319
  Wraps around stop method, for use as context manager
315
320
 
@@ -13,15 +13,16 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import threading
16
- from typing import Any, Callable, Dict, List, Optional
16
+ from types import TracebackType
17
+ from typing import Any, Callable, Dict, List, Optional, Type
17
18
 
18
19
  import arrow
19
20
  from arrow import Arrow
21
+ from requests import ConnectionError
22
+
20
23
  from cognite.client import CogniteClient
21
24
  from cognite.client.data_classes import Row
22
25
  from cognite.client.exceptions import CogniteAPIError, CogniteReadTimeout
23
- from requests import ConnectionError
24
-
25
26
  from cognite.extractorutils.uploader._base import (
26
27
  RETRIES,
27
28
  RETRY_BACKOFF_FACTOR,
@@ -75,7 +76,7 @@ class RawUploadQueue(AbstractUploadQueue):
75
76
  thread_name,
76
77
  cancellation_token,
77
78
  )
78
- self.upload_queue: Dict[str, Dict[str, List[TimestampedObject]]] = dict()
79
+ self.upload_queue: Dict[str, Dict[str, List[TimestampedObject]]] = {}
79
80
 
80
81
  # It is a hack since Prometheus client registers metrics on object creation, so object has to be created once
81
82
  self.rows_queued = RAW_UPLOADER_ROWS_QUEUED
@@ -97,7 +98,7 @@ class RawUploadQueue(AbstractUploadQueue):
97
98
  with self.lock:
98
99
  # Ensure that the dicts has correct keys
99
100
  if database not in self.upload_queue:
100
- self.upload_queue[database] = dict()
101
+ self.upload_queue[database] = {}
101
102
  if table not in self.upload_queue[database]:
102
103
  self.upload_queue[database][table] = []
103
104
 
@@ -152,7 +153,7 @@ class RawUploadQueue(AbstractUploadQueue):
152
153
  max_delay=RETRY_MAX_DELAY,
153
154
  backoff=RETRY_BACKOFF_FACTOR,
154
155
  )
155
- def _upload_batch(self, database: str, table: str, patch: List[Row]):
156
+ def _upload_batch(self, database: str, table: str, patch: List[Row]) -> None:
156
157
  # Upload
157
158
  self.cdf_client.raw.rows.insert(db_name=database, table_name=table, row=patch, ensure_parent=True)
158
159
 
@@ -166,7 +167,9 @@ class RawUploadQueue(AbstractUploadQueue):
166
167
  self.start()
167
168
  return self
168
169
 
169
- def __exit__(self, exc_type, exc_val, exc_tb) -> None:
170
+ def __exit__(
171
+ self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
172
+ ) -> None:
170
173
  """
171
174
  Wraps around stop method, for use as context manager
172
175