cognite-extractor-utils 7.1.3__py3-none-any.whl → 7.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

@@ -16,5 +16,5 @@
16
16
  Cognite extractor utils is a Python package that simplifies the development of new extractors.
17
17
  """
18
18
 
19
- __version__ = "7.1.3"
19
+ __version__ = "7.1.5"
20
20
  from .base import Extractor
@@ -189,9 +189,17 @@ class Extractor(Generic[CustomConfigClass]):
189
189
 
190
190
  state_store_config = recursive_find_state_store(self.config.__dict__)
191
191
  if state_store_config:
192
- self.state_store = state_store_config.create_state_store(self.cognite_client, self.use_default_state_store)
192
+ self.state_store = state_store_config.create_state_store(
193
+ cdf_client=self.cognite_client,
194
+ default_to_local=self.use_default_state_store,
195
+ cancellation_token=self.cancellation_token,
196
+ )
193
197
  else:
194
- self.state_store = LocalStateStore("states.json") if self.use_default_state_store else NoStateStore()
198
+ self.state_store = (
199
+ LocalStateStore("states.json", cancellation_token=self.cancellation_token)
200
+ if self.use_default_state_store
201
+ else NoStateStore()
202
+ )
195
203
 
196
204
  try:
197
205
  self.state_store.initialize()
@@ -623,7 +623,10 @@ class StateStoreConfig:
623
623
  local: Optional[LocalStateStoreConfig] = None
624
624
 
625
625
  def create_state_store(
626
- self, cdf_client: Optional[CogniteClient] = None, default_to_local: bool = True
626
+ self,
627
+ cdf_client: Optional[CogniteClient] = None,
628
+ default_to_local: bool = True,
629
+ cancellation_token: Optional[CancellationToken] = None,
627
630
  ) -> AbstractStateStore:
628
631
  """
629
632
  Create a state store object based on the config.
@@ -648,15 +651,17 @@ class StateStoreConfig:
648
651
  database=self.raw.database,
649
652
  table=self.raw.table,
650
653
  save_interval=self.raw.upload_interval.seconds,
654
+ cancellation_token=cancellation_token,
651
655
  )
652
656
 
653
657
  if self.local:
654
658
  return LocalStateStore(
655
659
  file_path=self.local.path,
656
660
  save_interval=self.local.save_interval.seconds,
661
+ cancellation_token=cancellation_token,
657
662
  )
658
663
 
659
664
  if default_to_local:
660
- return LocalStateStore(file_path="states.json")
665
+ return LocalStateStore(file_path="states.json", cancellation_token=cancellation_token)
661
666
  else:
662
667
  return NoStateStore()
@@ -132,7 +132,7 @@ class AbstractStateStore(ABC):
132
132
 
133
133
  self.logger = logging.getLogger(__name__)
134
134
 
135
- self.thread = threading.Thread(target=self._run, daemon=True, name=thread_name)
135
+ self.thread = threading.Thread(target=self._run, daemon=cancellation_token is None, name=thread_name)
136
136
  self.lock = threading.RLock()
137
137
  self.cancellation_token = cancellation_token.create_child_token() if cancellation_token else CancellationToken()
138
138
 
@@ -58,7 +58,7 @@ class AbstractUploadQueue(ABC):
58
58
  self.trigger_log_level = _resolve_log_level(trigger_log_level)
59
59
  self.logger = logging.getLogger(__name__)
60
60
 
61
- self.thread = threading.Thread(target=self._run, daemon=True, name=thread_name)
61
+ self.thread = threading.Thread(target=self._run, daemon=cancellation_token is None, name=thread_name)
62
62
  self.lock = threading.RLock()
63
63
  self.cancellation_token: CancellationToken = (
64
64
  cancellation_token.create_child_token() if cancellation_token else CancellationToken()
@@ -251,70 +251,72 @@ class IOFileUploadQueue(AbstractUploadQueue):
251
251
  max_delay=RETRY_MAX_DELAY,
252
252
  backoff=RETRY_BACKOFF_FACTOR,
253
253
  )
254
- def _upload_single(read_file: Callable[[], BinaryIO], file_meta: FileMetadata) -> None:
254
+ def upload_file(read_file: Callable[[], BinaryIO], file_meta: FileMetadata) -> None:
255
+ with read_file() as file:
256
+ size = super_len(file)
257
+ if size == 0:
258
+ # upload just the file metadata witout data
259
+ file_meta, _url = self.cdf_client.files.create(
260
+ file_metadata=file_meta, overwrite=self.overwrite_existing
261
+ )
262
+ elif size >= self.max_single_chunk_file_size:
263
+ # The minimum chunk size is 4000MiB.
264
+ chunks = ChunkedStream(file, self.max_file_chunk_size, size)
265
+ self.logger.debug(
266
+ f"File {file_meta.external_id} is larger than 5GiB ({size})"
267
+ f", uploading in {chunks.chunk_count} chunks"
268
+ )
269
+ with self.cdf_client.files.multipart_upload_session(
270
+ file_meta.name if file_meta.name is not None else "",
271
+ parts=chunks.chunk_count,
272
+ overwrite=self.overwrite_existing,
273
+ external_id=file_meta.external_id,
274
+ source=file_meta.source,
275
+ mime_type=file_meta.mime_type,
276
+ metadata=file_meta.metadata,
277
+ directory=file_meta.directory,
278
+ asset_ids=file_meta.asset_ids,
279
+ data_set_id=file_meta.data_set_id,
280
+ labels=file_meta.labels,
281
+ geo_location=file_meta.geo_location,
282
+ source_created_time=file_meta.source_created_time,
283
+ source_modified_time=file_meta.source_modified_time,
284
+ security_categories=file_meta.security_categories,
285
+ ) as session:
286
+ while chunks.next_chunk():
287
+ session.upload_part(chunks.current_chunk, chunks)
288
+ file_meta = session.file_metadata
289
+ else:
290
+ file_meta = self.cdf_client.files.upload_bytes(
291
+ file,
292
+ file_meta.name if file_meta.name is not None else "",
293
+ overwrite=self.overwrite_existing,
294
+ external_id=file_meta.external_id,
295
+ source=file_meta.source,
296
+ mime_type=file_meta.mime_type,
297
+ metadata=file_meta.metadata,
298
+ directory=file_meta.directory,
299
+ asset_ids=file_meta.asset_ids,
300
+ data_set_id=file_meta.data_set_id,
301
+ labels=file_meta.labels,
302
+ geo_location=file_meta.geo_location,
303
+ source_created_time=file_meta.source_created_time,
304
+ source_modified_time=file_meta.source_modified_time,
305
+ security_categories=file_meta.security_categories,
306
+ )
307
+
308
+ if self.post_upload_function:
309
+ try:
310
+ self.post_upload_function([file_meta])
311
+ except Exception as e:
312
+ self.logger.error("Error in upload callback: %s", str(e))
313
+
314
+ def wrapped_upload(read_file: Callable[[], BinaryIO], file_meta: FileMetadata) -> None:
255
315
  try:
256
- # Upload file
257
- with read_file() as file:
258
- size = super_len(file)
259
- if size == 0:
260
- # upload just the file metadata witout data
261
- file_meta, _url = self.cdf_client.files.create(
262
- file_metadata=file_meta, overwrite=self.overwrite_existing
263
- )
264
- elif size >= self.max_single_chunk_file_size:
265
- # The minimum chunk size is 4000MiB.
266
- chunks = ChunkedStream(file, self.max_file_chunk_size, size)
267
- self.logger.debug(
268
- f"File {file_meta.external_id} is larger than 5GiB ({size})"
269
- f", uploading in {chunks.chunk_count} chunks"
270
- )
271
- with self.cdf_client.files.multipart_upload_session(
272
- file_meta.name if file_meta.name is not None else "",
273
- parts=chunks.chunk_count,
274
- overwrite=self.overwrite_existing,
275
- external_id=file_meta.external_id,
276
- source=file_meta.source,
277
- mime_type=file_meta.mime_type,
278
- metadata=file_meta.metadata,
279
- directory=file_meta.directory,
280
- asset_ids=file_meta.asset_ids,
281
- data_set_id=file_meta.data_set_id,
282
- labels=file_meta.labels,
283
- geo_location=file_meta.geo_location,
284
- source_created_time=file_meta.source_created_time,
285
- source_modified_time=file_meta.source_modified_time,
286
- security_categories=file_meta.security_categories,
287
- ) as session:
288
- while chunks.next_chunk():
289
- session.upload_part(chunks.current_chunk, chunks)
290
- file_meta = session.file_metadata
291
- else:
292
- file_meta = self.cdf_client.files.upload_bytes(
293
- file,
294
- file_meta.name if file_meta.name is not None else "",
295
- overwrite=self.overwrite_existing,
296
- external_id=file_meta.external_id,
297
- source=file_meta.source,
298
- mime_type=file_meta.mime_type,
299
- metadata=file_meta.metadata,
300
- directory=file_meta.directory,
301
- asset_ids=file_meta.asset_ids,
302
- data_set_id=file_meta.data_set_id,
303
- labels=file_meta.labels,
304
- geo_location=file_meta.geo_location,
305
- source_created_time=file_meta.source_created_time,
306
- source_modified_time=file_meta.source_modified_time,
307
- security_categories=file_meta.security_categories,
308
- )
309
-
310
- if self.post_upload_function:
311
- try:
312
- self.post_upload_function([file_meta])
313
- except Exception as e:
314
- self.logger.error("Error in upload callback: %s", str(e))
316
+ upload_file(read_file, file_meta)
315
317
 
316
318
  except Exception as e:
317
- self.logger.exception("Unexpected error while uploading file")
319
+ self.logger.exception(f"Unexpected error while uploading file: {file_meta.external_id}")
318
320
  self.errors.append(e)
319
321
 
320
322
  finally:
@@ -331,7 +333,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
331
333
  pass
332
334
 
333
335
  with self.lock:
334
- self.upload_queue.append(self._pool.submit(_upload_single, read_file, file_meta))
336
+ self.upload_queue.append(self._pool.submit(wrapped_upload, read_file, file_meta))
335
337
  self.upload_queue_size += 1
336
338
  self.files_queued.inc()
337
339
  self.queue_size.set(self.upload_queue_size)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cognite-extractor-utils
3
- Version: 7.1.3
3
+ Version: 7.1.5
4
4
  Summary: Utilities for easier development of extractors for CDF
5
5
  Home-page: https://github.com/cognitedata/python-extractor-utils
6
6
  License: Apache-2.0
@@ -1,27 +1,27 @@
1
- cognite/extractorutils/__init__.py,sha256=-IiYwZmQ1ixTVlM0nkB38zxXs4AsB04AKutQnXLoKTM,739
1
+ cognite/extractorutils/__init__.py,sha256=vRCQ7k-wwiC8W2oUIVpZoFbuyYyNOOarwLEFnZPau_A,739
2
2
  cognite/extractorutils/_inner_util.py,sha256=gmz6aqS7jDNsg8z4RHgJjMFohDLOMiaU4gMWBhg3xcE,1558
3
- cognite/extractorutils/base.py,sha256=0t9HUANPLKSbAyfDtb0X7stwuarW1NkamRF3vyXyQzc,16148
3
+ cognite/extractorutils/base.py,sha256=q6NU2bPec3WOasVnnIFoh-aUJudVZWZ2R6emz3IRj8Q,16391
4
4
  cognite/extractorutils/configtools/__init__.py,sha256=L-daaqInIsmHcjb2forJeY0fW8tz1mlteOUo7IsWnrU,3059
5
5
  cognite/extractorutils/configtools/_util.py,sha256=SZycZm_py9v9WZbDiDQbgS6_PiLtu-TtwuuH7tG2YCI,4739
6
- cognite/extractorutils/configtools/elements.py,sha256=P9oXw07my3Q6UNwef9Ln6-LupChwaSrsF69BHuee9sY,21334
6
+ cognite/extractorutils/configtools/elements.py,sha256=FbkKg1YQOpCs4Bwjs4RacRswttM08TJHnkA9HNweYFs,21564
7
7
  cognite/extractorutils/configtools/loaders.py,sha256=VmKNfGqwdHycwZB91i-BHarjW-2Mw_Shv31R6uozm88,16317
8
8
  cognite/extractorutils/exceptions.py,sha256=XiwyNPSN0YxFYaPw7tfA63B94PL48xDK3EfdGdhgQgc,1084
9
9
  cognite/extractorutils/metrics.py,sha256=01ZMRbDisXPxrfCSyTSEkXMsslzmZwEqw18fuu9okdc,15509
10
10
  cognite/extractorutils/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- cognite/extractorutils/statestore.py,sha256=CiE2E6zOCWCLXmEzxaoM9V_-laH3T7U4-ll_qE6rgRE,18645
11
+ cognite/extractorutils/statestore.py,sha256=-KC71NyXaSvzS-j1kUJM9YE70qCmWS_iVzBVhDC0i74,18667
12
12
  cognite/extractorutils/threading.py,sha256=2Hke5cFvP-wA45Crvh58JahoKXB64P3tr7R4y_BhBqM,3605
13
13
  cognite/extractorutils/uploader/__init__.py,sha256=W22u6QHA4cR0j78LN5LTL5YGbfC-uTApagTyP5ab7uQ,3110
14
- cognite/extractorutils/uploader/_base.py,sha256=-aFfoMSBGd9YUUMHL3ZQpLIuNMA7TNklWCEjPA18ER8,5282
14
+ cognite/extractorutils/uploader/_base.py,sha256=wktbV8dpb8zBOsNaECZkBNoJSpOz437NlNMER3-a3xQ,5304
15
15
  cognite/extractorutils/uploader/_metrics.py,sha256=J2LJXb19L_SLSJ_voNIQHYLp0pjxUKevpH1q_xKX6Hk,3247
16
16
  cognite/extractorutils/uploader/assets.py,sha256=2E90N1kxsaA6Ah4h0_r_dTVhDYY_68ItRWrHYkkltJw,5628
17
17
  cognite/extractorutils/uploader/events.py,sha256=NZP2tMoU_rh_rb-EZiUBsOT5KdNABHN4c9Oddk0OsdE,5680
18
- cognite/extractorutils/uploader/files.py,sha256=-yskmzcS9FcAsT2wmu3G4pd9cHJeiNqxmrERoRC72Dg,18417
18
+ cognite/extractorutils/uploader/files.py,sha256=31kPS4fwz8ZSXWss-CKmYTM6ZLVx9LtsDe7LHT7Wy98,18329
19
19
  cognite/extractorutils/uploader/raw.py,sha256=wFjF90PFTjmByOWx_Y4_YfDJ2w2jl0EQJ2Tjx2MP2PM,6738
20
20
  cognite/extractorutils/uploader/time_series.py,sha256=WAtEQy7k5IjG-sw1oWwCujIM6PjHZYl4LKa4wy2tBPw,26817
21
21
  cognite/extractorutils/uploader_extractor.py,sha256=E-mpVvbPg_Tk90U4S9JybV0duptJ2SXE88HB6npE3zI,7732
22
22
  cognite/extractorutils/uploader_types.py,sha256=wxfrsiKPTzG5lmoYtQsxt8Xyj-s5HnaLl8WDzJNrazg,1020
23
23
  cognite/extractorutils/util.py,sha256=p7AGEgeIU0bNjuFJcFR3V5ZYr6QDj_ZC3zGxRJTf4yk,17198
24
- cognite_extractor_utils-7.1.3.dist-info/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
25
- cognite_extractor_utils-7.1.3.dist-info/METADATA,sha256=yHvMX5Sjas6K_XAFBdxNt-YFxe9q1QzI7k7PyDM6dx0,5446
26
- cognite_extractor_utils-7.1.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
27
- cognite_extractor_utils-7.1.3.dist-info/RECORD,,
24
+ cognite_extractor_utils-7.1.5.dist-info/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
25
+ cognite_extractor_utils-7.1.5.dist-info/METADATA,sha256=YUu6fg06uVoH0m9HCeWuobsiWhFZpe8C_qJrzGl8Ey0,5446
26
+ cognite_extractor_utils-7.1.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
27
+ cognite_extractor_utils-7.1.5.dist-info/RECORD,,