cognite-extractor-utils 7.1.4__py3-none-any.whl → 7.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

@@ -16,5 +16,5 @@
16
16
  Cognite extractor utils is a Python package that simplifies the development of new extractors.
17
17
  """
18
18
 
19
- __version__ = "7.1.4"
19
+ __version__ = "7.1.6"
20
20
  from .base import Extractor
@@ -251,70 +251,72 @@ class IOFileUploadQueue(AbstractUploadQueue):
251
251
  max_delay=RETRY_MAX_DELAY,
252
252
  backoff=RETRY_BACKOFF_FACTOR,
253
253
  )
254
- def _upload_single(read_file: Callable[[], BinaryIO], file_meta: FileMetadata) -> None:
254
+ def upload_file(read_file: Callable[[], BinaryIO], file_meta: FileMetadata) -> None:
255
+ with read_file() as file:
256
+ size = super_len(file)
257
+ if size == 0:
258
+ # upload just the file metadata witout data
259
+ file_meta, _url = self.cdf_client.files.create(
260
+ file_metadata=file_meta, overwrite=self.overwrite_existing
261
+ )
262
+ elif size >= self.max_single_chunk_file_size:
263
+ # The minimum chunk size is 4000MiB.
264
+ chunks = ChunkedStream(file, self.max_file_chunk_size, size)
265
+ self.logger.debug(
266
+ f"File {file_meta.external_id} is larger than 5GiB ({size})"
267
+ f", uploading in {chunks.chunk_count} chunks"
268
+ )
269
+ with self.cdf_client.files.multipart_upload_session(
270
+ file_meta.name if file_meta.name is not None else "",
271
+ parts=chunks.chunk_count,
272
+ overwrite=self.overwrite_existing,
273
+ external_id=file_meta.external_id,
274
+ source=file_meta.source,
275
+ mime_type=file_meta.mime_type,
276
+ metadata=file_meta.metadata,
277
+ directory=file_meta.directory,
278
+ asset_ids=file_meta.asset_ids,
279
+ data_set_id=file_meta.data_set_id,
280
+ labels=file_meta.labels,
281
+ geo_location=file_meta.geo_location,
282
+ source_created_time=file_meta.source_created_time,
283
+ source_modified_time=file_meta.source_modified_time,
284
+ security_categories=file_meta.security_categories,
285
+ ) as session:
286
+ while chunks.next_chunk():
287
+ session.upload_part(chunks.current_chunk, chunks)
288
+ file_meta = session.file_metadata
289
+ else:
290
+ file_meta = self.cdf_client.files.upload_bytes(
291
+ file,
292
+ file_meta.name if file_meta.name is not None else "",
293
+ overwrite=self.overwrite_existing,
294
+ external_id=file_meta.external_id,
295
+ source=file_meta.source,
296
+ mime_type=file_meta.mime_type,
297
+ metadata=file_meta.metadata,
298
+ directory=file_meta.directory,
299
+ asset_ids=file_meta.asset_ids,
300
+ data_set_id=file_meta.data_set_id,
301
+ labels=file_meta.labels,
302
+ geo_location=file_meta.geo_location,
303
+ source_created_time=file_meta.source_created_time,
304
+ source_modified_time=file_meta.source_modified_time,
305
+ security_categories=file_meta.security_categories,
306
+ )
307
+
308
+ if self.post_upload_function:
309
+ try:
310
+ self.post_upload_function([file_meta])
311
+ except Exception as e:
312
+ self.logger.error("Error in upload callback: %s", str(e))
313
+
314
+ def wrapped_upload(read_file: Callable[[], BinaryIO], file_meta: FileMetadata) -> None:
255
315
  try:
256
- # Upload file
257
- with read_file() as file:
258
- size = super_len(file)
259
- if size == 0:
260
- # upload just the file metadata witout data
261
- file_meta, _url = self.cdf_client.files.create(
262
- file_metadata=file_meta, overwrite=self.overwrite_existing
263
- )
264
- elif size >= self.max_single_chunk_file_size:
265
- # The minimum chunk size is 4000MiB.
266
- chunks = ChunkedStream(file, self.max_file_chunk_size, size)
267
- self.logger.debug(
268
- f"File {file_meta.external_id} is larger than 5GiB ({size})"
269
- f", uploading in {chunks.chunk_count} chunks"
270
- )
271
- with self.cdf_client.files.multipart_upload_session(
272
- file_meta.name if file_meta.name is not None else "",
273
- parts=chunks.chunk_count,
274
- overwrite=self.overwrite_existing,
275
- external_id=file_meta.external_id,
276
- source=file_meta.source,
277
- mime_type=file_meta.mime_type,
278
- metadata=file_meta.metadata,
279
- directory=file_meta.directory,
280
- asset_ids=file_meta.asset_ids,
281
- data_set_id=file_meta.data_set_id,
282
- labels=file_meta.labels,
283
- geo_location=file_meta.geo_location,
284
- source_created_time=file_meta.source_created_time,
285
- source_modified_time=file_meta.source_modified_time,
286
- security_categories=file_meta.security_categories,
287
- ) as session:
288
- while chunks.next_chunk():
289
- session.upload_part(chunks.current_chunk, chunks)
290
- file_meta = session.file_metadata
291
- else:
292
- file_meta = self.cdf_client.files.upload_bytes(
293
- file,
294
- file_meta.name if file_meta.name is not None else "",
295
- overwrite=self.overwrite_existing,
296
- external_id=file_meta.external_id,
297
- source=file_meta.source,
298
- mime_type=file_meta.mime_type,
299
- metadata=file_meta.metadata,
300
- directory=file_meta.directory,
301
- asset_ids=file_meta.asset_ids,
302
- data_set_id=file_meta.data_set_id,
303
- labels=file_meta.labels,
304
- geo_location=file_meta.geo_location,
305
- source_created_time=file_meta.source_created_time,
306
- source_modified_time=file_meta.source_modified_time,
307
- security_categories=file_meta.security_categories,
308
- )
309
-
310
- if self.post_upload_function:
311
- try:
312
- self.post_upload_function([file_meta])
313
- except Exception as e:
314
- self.logger.error("Error in upload callback: %s", str(e))
316
+ upload_file(read_file, file_meta)
315
317
 
316
318
  except Exception as e:
317
- self.logger.exception("Unexpected error while uploading file")
319
+ self.logger.exception(f"Unexpected error while uploading file: {file_meta.external_id}")
318
320
  self.errors.append(e)
319
321
 
320
322
  finally:
@@ -331,7 +333,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
331
333
  pass
332
334
 
333
335
  with self.lock:
334
- self.upload_queue.append(self._pool.submit(_upload_single, read_file, file_meta))
336
+ self.upload_queue.append(self._pool.submit(wrapped_upload, read_file, file_meta))
335
337
  self.upload_queue_size += 1
336
338
  self.files_queued.inc()
337
339
  self.queue_size.set(self.upload_queue_size)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cognite-extractor-utils
3
- Version: 7.1.4
3
+ Version: 7.1.6
4
4
  Summary: Utilities for easier development of extractors for CDF
5
5
  Home-page: https://github.com/cognitedata/python-extractor-utils
6
6
  License: Apache-2.0
@@ -18,7 +18,7 @@ Provides-Extra: experimental
18
18
  Requires-Dist: arrow (>=1.0.0,<2.0.0)
19
19
  Requires-Dist: azure-identity (>=1.14.0,<2.0.0)
20
20
  Requires-Dist: azure-keyvault-secrets (>=4.7.0,<5.0.0)
21
- Requires-Dist: cognite-sdk (>=7.41.0,<8.0.0)
21
+ Requires-Dist: cognite-sdk (>=7.43.3,<8.0.0)
22
22
  Requires-Dist: dacite (>=1.6.0,<2.0.0)
23
23
  Requires-Dist: decorator (>=5.1.1,<6.0.0)
24
24
  Requires-Dist: more-itertools (>=10.0.0,<11.0.0)
@@ -1,4 +1,4 @@
1
- cognite/extractorutils/__init__.py,sha256=SEjG_txXBnbsMe3Fr5VxS1poP4XYsm6RM7d-g-_s81U,739
1
+ cognite/extractorutils/__init__.py,sha256=rJAstzCVQ53u8dhEY1mpXwfauLJj4Apwuc4T0SywH-Y,739
2
2
  cognite/extractorutils/_inner_util.py,sha256=gmz6aqS7jDNsg8z4RHgJjMFohDLOMiaU4gMWBhg3xcE,1558
3
3
  cognite/extractorutils/base.py,sha256=q6NU2bPec3WOasVnnIFoh-aUJudVZWZ2R6emz3IRj8Q,16391
4
4
  cognite/extractorutils/configtools/__init__.py,sha256=L-daaqInIsmHcjb2forJeY0fW8tz1mlteOUo7IsWnrU,3059
@@ -15,13 +15,13 @@ cognite/extractorutils/uploader/_base.py,sha256=wktbV8dpb8zBOsNaECZkBNoJSpOz437N
15
15
  cognite/extractorutils/uploader/_metrics.py,sha256=J2LJXb19L_SLSJ_voNIQHYLp0pjxUKevpH1q_xKX6Hk,3247
16
16
  cognite/extractorutils/uploader/assets.py,sha256=2E90N1kxsaA6Ah4h0_r_dTVhDYY_68ItRWrHYkkltJw,5628
17
17
  cognite/extractorutils/uploader/events.py,sha256=NZP2tMoU_rh_rb-EZiUBsOT5KdNABHN4c9Oddk0OsdE,5680
18
- cognite/extractorutils/uploader/files.py,sha256=-yskmzcS9FcAsT2wmu3G4pd9cHJeiNqxmrERoRC72Dg,18417
18
+ cognite/extractorutils/uploader/files.py,sha256=31kPS4fwz8ZSXWss-CKmYTM6ZLVx9LtsDe7LHT7Wy98,18329
19
19
  cognite/extractorutils/uploader/raw.py,sha256=wFjF90PFTjmByOWx_Y4_YfDJ2w2jl0EQJ2Tjx2MP2PM,6738
20
20
  cognite/extractorutils/uploader/time_series.py,sha256=WAtEQy7k5IjG-sw1oWwCujIM6PjHZYl4LKa4wy2tBPw,26817
21
21
  cognite/extractorutils/uploader_extractor.py,sha256=E-mpVvbPg_Tk90U4S9JybV0duptJ2SXE88HB6npE3zI,7732
22
22
  cognite/extractorutils/uploader_types.py,sha256=wxfrsiKPTzG5lmoYtQsxt8Xyj-s5HnaLl8WDzJNrazg,1020
23
23
  cognite/extractorutils/util.py,sha256=p7AGEgeIU0bNjuFJcFR3V5ZYr6QDj_ZC3zGxRJTf4yk,17198
24
- cognite_extractor_utils-7.1.4.dist-info/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
25
- cognite_extractor_utils-7.1.4.dist-info/METADATA,sha256=2kFFLglmkRQH5oVko9iD3BZWELQrHbOdUQ2KH-QBJhI,5446
26
- cognite_extractor_utils-7.1.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
27
- cognite_extractor_utils-7.1.4.dist-info/RECORD,,
24
+ cognite_extractor_utils-7.1.6.dist-info/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
25
+ cognite_extractor_utils-7.1.6.dist-info/METADATA,sha256=lKPIa6knFqkRfZNTIff9JyC-dTnBYKLVbGmgASboGLs,5446
26
+ cognite_extractor_utils-7.1.6.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
27
+ cognite_extractor_utils-7.1.6.dist-info/RECORD,,