cognite-extractor-utils 7.4.1__tar.gz → 7.4.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

Files changed (32) hide show
  1. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/PKG-INFO +1 -1
  2. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/__init__.py +1 -1
  3. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/uploader/files.py +40 -40
  4. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/pyproject.toml +1 -1
  5. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/LICENSE +0 -0
  6. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/README.md +0 -0
  7. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/_inner_util.py +0 -0
  8. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/base.py +0 -0
  9. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/configtools/__init__.py +0 -0
  10. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/configtools/_util.py +0 -0
  11. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/configtools/elements.py +0 -0
  12. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/configtools/loaders.py +0 -0
  13. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/exceptions.py +0 -0
  14. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/metrics.py +0 -0
  15. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/py.typed +0 -0
  16. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/statestore/__init__.py +0 -0
  17. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/statestore/_base.py +0 -0
  18. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/statestore/hashing.py +0 -0
  19. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/statestore/watermark.py +0 -0
  20. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/threading.py +0 -0
  21. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/unstable/__init__.py +0 -0
  22. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/uploader/__init__.py +0 -0
  23. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/uploader/_base.py +0 -0
  24. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/uploader/_metrics.py +0 -0
  25. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/uploader/assets.py +0 -0
  26. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/uploader/data_modeling.py +0 -0
  27. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/uploader/events.py +0 -0
  28. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/uploader/raw.py +0 -0
  29. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/uploader/time_series.py +0 -0
  30. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/uploader_extractor.py +0 -0
  31. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/uploader_types.py +0 -0
  32. {cognite_extractor_utils-7.4.1 → cognite_extractor_utils-7.4.3}/cognite/extractorutils/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cognite-extractor-utils
3
- Version: 7.4.1
3
+ Version: 7.4.3
4
4
  Summary: Utilities for easier development of extractors for CDF
5
5
  Home-page: https://github.com/cognitedata/python-extractor-utils
6
6
  License: Apache-2.0
@@ -16,5 +16,5 @@
16
16
  Cognite extractor utils is a Python package that simplifies the development of new extractors.
17
17
  """
18
18
 
19
- __version__ = "7.4.1"
19
+ __version__ = "7.4.3"
20
20
  from .base import Extractor
@@ -251,41 +251,38 @@ class IOFileUploadQueue(AbstractUploadQueue):
251
251
  return node.as_id()
252
252
 
253
253
  def _upload_empty(
254
- self, meta_or_apply: FileMetadataOrCogniteExtractorFile
254
+ self, file_meta: FileMetadataOrCogniteExtractorFile
255
255
  ) -> tuple[FileMetadataOrCogniteExtractorFile, str]:
256
- if isinstance(meta_or_apply, CogniteExtractorFileApply):
257
- node_id = self._apply_cognite_file(meta_or_apply)
258
- meta_or_apply, url = self._create_cdm(instance_id=node_id)
256
+ if isinstance(file_meta, CogniteExtractorFileApply):
257
+ node_id = self._apply_cognite_file(file_meta)
258
+ file_meta, url = self._create_cdm(instance_id=node_id)
259
259
  else:
260
- meta_or_apply, url = self.cdf_client.files.create(
261
- file_metadata=meta_or_apply, overwrite=self.overwrite_existing
262
- )
263
- return meta_or_apply, url
260
+ file_meta, url = self.cdf_client.files.create(file_metadata=file_meta, overwrite=self.overwrite_existing)
261
+ return file_meta, url
264
262
 
265
- def _upload_bytes(self, size: int, file: BinaryIO, meta_or_apply: FileMetadataOrCogniteExtractorFile) -> None:
266
- meta_or_apply, url = self._upload_empty(meta_or_apply)
267
- resp = self._httpx_client.send(self._get_file_upload_request(url, file, size))
263
+ def _upload_bytes(self, size: int, file: BinaryIO, file_meta: FileMetadataOrCogniteExtractorFile) -> None:
264
+ file_meta, url = self._upload_empty(file_meta)
265
+ resp = self._httpx_client.send(self._get_file_upload_request(url, file, size, file_meta.mime_type))
268
266
  resp.raise_for_status()
269
267
 
270
- def _upload_multipart(self, size: int, file: BinaryIO, meta_or_apply: FileMetadataOrCogniteExtractorFile) -> None:
268
+ def _upload_multipart(self, size: int, file: BinaryIO, file_meta: FileMetadataOrCogniteExtractorFile) -> None:
271
269
  chunks = ChunkedStream(file, self.max_file_chunk_size, size)
272
270
  self.logger.debug(
273
- f"File {meta_or_apply.external_id} is larger than 5GiB ({size})"
274
- f", uploading in {chunks.chunk_count} chunks"
271
+ f"File {file_meta.external_id} is larger than 5GiB ({size})" f", uploading in {chunks.chunk_count} chunks"
275
272
  )
276
273
 
277
- returned_file_metadata = self._create_multi_part(meta_or_apply, chunks)
274
+ returned_file_metadata = self._create_multi_part(file_meta, chunks)
278
275
  upload_urls = returned_file_metadata["uploadUrls"]
279
276
  upload_id = returned_file_metadata["uploadId"]
280
277
  file_meta = FileMetadata.load(returned_file_metadata)
281
278
 
282
279
  for url in upload_urls:
283
280
  chunks.next_chunk()
284
- resp = self._httpx_client.send(self._get_file_upload_request(url, chunks, len(chunks)))
281
+ resp = self._httpx_client.send(self._get_file_upload_request(url, chunks, len(chunks), file_meta.mime_type))
285
282
  resp.raise_for_status()
286
283
 
287
284
  completed_headers = (
288
- _CDF_ALPHA_VERSION_HEADER if isinstance(meta_or_apply, CogniteExtractorFileApply) is not None else None
285
+ _CDF_ALPHA_VERSION_HEADER if isinstance(file_meta, CogniteExtractorFileApply) is not None else None
289
286
  )
290
287
 
291
288
  res = self.cdf_client.files._post(
@@ -295,9 +292,9 @@ class IOFileUploadQueue(AbstractUploadQueue):
295
292
  )
296
293
  res.raise_for_status()
297
294
 
298
- def _create_multi_part(self, meta_or_apply: FileMetadataOrCogniteExtractorFile, chunks: ChunkedStream) -> dict:
299
- if isinstance(meta_or_apply, CogniteExtractorFileApply):
300
- node_id = self._apply_cognite_file(meta_or_apply)
295
+ def _create_multi_part(self, file_meta: FileMetadataOrCogniteExtractorFile, chunks: ChunkedStream) -> dict:
296
+ if isinstance(file_meta, CogniteExtractorFileApply):
297
+ node_id = self._apply_cognite_file(file_meta)
301
298
  identifiers = IdentifierSequence.load(instance_ids=node_id).as_singleton()
302
299
  self.cdf_client.files._warn_alpha()
303
300
  res = self.cdf_client.files._post(
@@ -311,7 +308,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
311
308
  else:
312
309
  res = self.cdf_client.files._post(
313
310
  url_path="/files/initmultipartupload",
314
- json=meta_or_apply.dump(camel_case=True),
311
+ json=file_meta.dump(camel_case=True),
315
312
  params={"overwrite": self.overwrite_existing, "parts": chunks.chunk_count},
316
313
  )
317
314
  res.raise_for_status()
@@ -319,7 +316,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
319
316
 
320
317
  def add_io_to_upload_queue(
321
318
  self,
322
- meta_or_apply: FileMetadataOrCogniteExtractorFile,
319
+ file_meta: FileMetadataOrCogniteExtractorFile,
323
320
  read_file: Callable[[], BinaryIO],
324
321
  extra_retries: Optional[
325
322
  Union[Tuple[Type[Exception], ...], Dict[Type[Exception], Callable[[Any], bool]]]
@@ -349,36 +346,34 @@ class IOFileUploadQueue(AbstractUploadQueue):
349
346
  max_delay=RETRY_MAX_DELAY,
350
347
  backoff=RETRY_BACKOFF_FACTOR,
351
348
  )
352
- def upload_file(read_file: Callable[[], BinaryIO], meta_or_apply: FileMetadataOrCogniteExtractorFile) -> None:
349
+ def upload_file(read_file: Callable[[], BinaryIO], file_meta: FileMetadataOrCogniteExtractorFile) -> None:
353
350
  with read_file() as file:
354
351
  size = super_len(file)
355
352
  if size == 0:
356
353
  # upload just the file metadata witout data
357
- meta_or_apply, _ = self._upload_empty(meta_or_apply)
354
+ file_meta, _ = self._upload_empty(file_meta)
358
355
  elif size >= self.max_single_chunk_file_size:
359
356
  # The minimum chunk size is 4000MiB.
360
- self._upload_multipart(size, file, meta_or_apply)
357
+ self._upload_multipart(size, file, file_meta)
361
358
 
362
359
  else:
363
- self._upload_bytes(size, file, meta_or_apply)
360
+ self._upload_bytes(size, file, file_meta)
364
361
 
365
- if isinstance(meta_or_apply, CogniteExtractorFileApply):
366
- meta_or_apply.is_uploaded = True
362
+ if isinstance(file_meta, CogniteExtractorFileApply):
363
+ file_meta.is_uploaded = True
367
364
 
368
365
  if self.post_upload_function:
369
366
  try:
370
- self.post_upload_function([meta_or_apply])
367
+ self.post_upload_function([file_meta])
371
368
  except Exception as e:
372
369
  self.logger.error("Error in upload callback: %s", str(e))
373
370
 
374
- def wrapped_upload(
375
- read_file: Callable[[], BinaryIO], meta_or_apply: FileMetadataOrCogniteExtractorFile
376
- ) -> None:
371
+ def wrapped_upload(read_file: Callable[[], BinaryIO], file_meta: FileMetadataOrCogniteExtractorFile) -> None:
377
372
  try:
378
- upload_file(read_file, meta_or_apply)
373
+ upload_file(read_file, file_meta)
379
374
 
380
375
  except Exception as e:
381
- self.logger.exception(f"Unexpected error while uploading file: {meta_or_apply.external_id}")
376
+ self.logger.exception(f"Unexpected error while uploading file: {file_meta.external_id}")
382
377
  self.errors.append(e)
383
378
 
384
379
  finally:
@@ -395,12 +390,14 @@ class IOFileUploadQueue(AbstractUploadQueue):
395
390
  pass
396
391
 
397
392
  with self.lock:
398
- self.upload_queue.append(self._pool.submit(wrapped_upload, read_file, meta_or_apply))
393
+ self.upload_queue.append(self._pool.submit(wrapped_upload, read_file, file_meta))
399
394
  self.upload_queue_size += 1
400
395
  self.files_queued.inc()
401
396
  self.queue_size.set(self.upload_queue_size)
402
397
 
403
- def _get_file_upload_request(self, url_str: str, stream: BinaryIO, size: int) -> Request:
398
+ def _get_file_upload_request(
399
+ self, url_str: str, stream: BinaryIO, size: int, mime_type: Optional[str] = None
400
+ ) -> Request:
404
401
  url = URL(url_str)
405
402
  headers = Headers(self._httpx_client.headers)
406
403
  headers.update(
@@ -412,6 +409,9 @@ class IOFileUploadQueue(AbstractUploadQueue):
412
409
  }
413
410
  )
414
411
 
412
+ if mime_type is not None:
413
+ headers.update({"Content-Type": mime_type})
414
+
415
415
  return Request(
416
416
  method="PUT",
417
417
  url=url,
@@ -515,7 +515,7 @@ class FileUploadQueue(IOFileUploadQueue):
515
515
  )
516
516
 
517
517
  def add_to_upload_queue(
518
- self, meta_or_apply: FileMetadataOrCogniteExtractorFile, file_name: Union[str, PathLike]
518
+ self, file_meta: FileMetadataOrCogniteExtractorFile, file_name: Union[str, PathLike]
519
519
  ) -> None:
520
520
  """
521
521
  Add file to upload queue. The queue will be uploaded if the queue size is larger than the threshold
@@ -530,7 +530,7 @@ class FileUploadQueue(IOFileUploadQueue):
530
530
  def load_file_from_path() -> BinaryIO:
531
531
  return open(file_name, "rb")
532
532
 
533
- self.add_io_to_upload_queue(meta_or_apply, load_file_from_path)
533
+ self.add_io_to_upload_queue(file_meta, load_file_from_path)
534
534
 
535
535
 
536
536
  class BytesUploadQueue(IOFileUploadQueue):
@@ -567,7 +567,7 @@ class BytesUploadQueue(IOFileUploadQueue):
567
567
  cancellation_token,
568
568
  )
569
569
 
570
- def add_to_upload_queue(self, content: bytes, meta_or_apply: FileMetadataOrCogniteExtractorFile) -> None:
570
+ def add_to_upload_queue(self, content: bytes, file_meta: FileMetadataOrCogniteExtractorFile) -> None:
571
571
  """
572
572
  Add object to upload queue. The queue will be uploaded if the queue size is larger than the threshold
573
573
  specified in the __init__.
@@ -579,4 +579,4 @@ class BytesUploadQueue(IOFileUploadQueue):
579
579
  def get_byte_io() -> BinaryIO:
580
580
  return BytesIO(content)
581
581
 
582
- self.add_io_to_upload_queue(meta_or_apply, get_byte_io)
582
+ self.add_io_to_upload_queue(file_meta, get_byte_io)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "cognite-extractor-utils"
3
- version = "7.4.1"
3
+ version = "7.4.3"
4
4
  description = "Utilities for easier development of extractors for CDF"
5
5
  authors = ["Mathias Lohne <mathias.lohne@cognite.com>"]
6
6
  license = "Apache-2.0"