cognite-extractor-utils 7.3.0__py3-none-any.whl → 7.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

@@ -0,0 +1,8 @@
1
+ """
2
+ The unstable package contains experimental functions and classes currently
3
+ deemed unstable. The contents of this package is subject to change without
4
+ notice, even in minor or patch releases.
5
+
6
+ Whenever you import anything from the unstable package, you should make sure to
7
+ run a type checker such as mypy to help catch these changes.
8
+ """
@@ -25,6 +25,9 @@ from requests.utils import super_len
25
25
 
26
26
  from cognite.client import CogniteClient
27
27
  from cognite.client.data_classes import FileMetadata
28
+ from cognite.client.data_classes.data_modeling import NodeId
29
+ from cognite.client.data_classes.data_modeling.extractor_extensions.v1 import CogniteExtractorFileApply
30
+ from cognite.client.utils._identifier import IdentifierSequence
28
31
  from cognite.extractorutils.threading import CancellationToken
29
32
  from cognite.extractorutils.uploader._base import (
30
33
  RETRIES,
@@ -48,6 +51,10 @@ _MAX_SINGLE_CHUNK_FILE_SIZE = 5 * 1024 * 1024 * 1024
48
51
  # 4000 MiB
49
52
  _MAX_FILE_CHUNK_SIZE = 4 * 1024 * 1024 * 1000
50
53
 
54
+ _CDF_ALPHA_VERSION_HEADER = {"cdf-version": "alpha"}
55
+
56
+ FileMetadataOrCogniteExtractorFile = Union[FileMetadata, CogniteExtractorFileApply]
57
+
51
58
 
52
59
  class ChunkedStream(RawIOBase, BinaryIO):
53
60
  """
@@ -178,7 +185,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
178
185
  def __init__(
179
186
  self,
180
187
  cdf_client: CogniteClient,
181
- post_upload_function: Optional[Callable[[List[FileMetadata]], None]] = None,
188
+ post_upload_function: Optional[Callable[[List[FileMetadataOrCogniteExtractorFile]], None]] = None,
182
189
  max_queue_size: Optional[int] = None,
183
190
  trigger_log_level: str = "DEBUG",
184
191
  thread_name: Optional[str] = None,
@@ -238,9 +245,81 @@ class IOFileUploadQueue(AbstractUploadQueue):
238
245
 
239
246
  self.cancellation_token.wait(5)
240
247
 
248
+ def _apply_cognite_file(self, file_apply: CogniteExtractorFileApply) -> NodeId:
249
+ instance_result = self.cdf_client.data_modeling.instances.apply(file_apply)
250
+ node = instance_result.nodes[0]
251
+ return node.as_id()
252
+
253
+ def _upload_empty(
254
+ self, meta_or_apply: FileMetadataOrCogniteExtractorFile
255
+ ) -> tuple[FileMetadataOrCogniteExtractorFile, str]:
256
+ if isinstance(meta_or_apply, CogniteExtractorFileApply):
257
+ node_id = self._apply_cognite_file(meta_or_apply)
258
+ meta_or_apply, url = self._create_cdm(instance_id=node_id)
259
+ else:
260
+ meta_or_apply, url = self.cdf_client.files.create(
261
+ file_metadata=meta_or_apply, overwrite=self.overwrite_existing
262
+ )
263
+ return meta_or_apply, url
264
+
265
+ def _upload_bytes(self, size: int, file: BinaryIO, meta_or_apply: FileMetadataOrCogniteExtractorFile) -> None:
266
+ meta_or_apply, url = self._upload_empty(meta_or_apply)
267
+ resp = self._httpx_client.send(self._get_file_upload_request(url, file, size))
268
+ resp.raise_for_status()
269
+
270
+ def _upload_multipart(self, size: int, file: BinaryIO, meta_or_apply: FileMetadataOrCogniteExtractorFile) -> None:
271
+ chunks = ChunkedStream(file, self.max_file_chunk_size, size)
272
+ self.logger.debug(
273
+ f"File {meta_or_apply.external_id} is larger than 5GiB ({size})"
274
+ f", uploading in {chunks.chunk_count} chunks"
275
+ )
276
+
277
+ returned_file_metadata = self._create_multi_part(meta_or_apply, chunks)
278
+ upload_urls = returned_file_metadata["uploadUrls"]
279
+ upload_id = returned_file_metadata["uploadId"]
280
+ file_meta = FileMetadata.load(returned_file_metadata)
281
+
282
+ for url in upload_urls:
283
+ chunks.next_chunk()
284
+ resp = self._httpx_client.send(self._get_file_upload_request(url, chunks, len(chunks)))
285
+ resp.raise_for_status()
286
+
287
+ completed_headers = (
288
+ _CDF_ALPHA_VERSION_HEADER if isinstance(meta_or_apply, CogniteExtractorFileApply) is not None else None
289
+ )
290
+
291
+ res = self.cdf_client.files._post(
292
+ url_path="/files/completemultipartupload",
293
+ json={"id": file_meta.id, "uploadId": upload_id},
294
+ headers=completed_headers,
295
+ )
296
+ res.raise_for_status()
297
+
298
+ def _create_multi_part(self, meta_or_apply: FileMetadataOrCogniteExtractorFile, chunks: ChunkedStream) -> dict:
299
+ if isinstance(meta_or_apply, CogniteExtractorFileApply):
300
+ node_id = self._apply_cognite_file(meta_or_apply)
301
+ identifiers = IdentifierSequence.load(instance_ids=node_id).as_singleton()
302
+ self.cdf_client.files._warn_alpha()
303
+ res = self.cdf_client.files._post(
304
+ url_path="/files/multiuploadlink",
305
+ json={"items": identifiers.as_dicts()},
306
+ params={"parts": chunks.chunk_count},
307
+ headers=_CDF_ALPHA_VERSION_HEADER,
308
+ )
309
+ res.raise_for_status()
310
+ return res.json()["items"][0]
311
+ else:
312
+ res = self.cdf_client.files._post(
313
+ url_path="/files/initmultipartupload",
314
+ json=meta_or_apply.dump(camel_case=True),
315
+ params={"overwrite": self.overwrite_existing, "parts": chunks.chunk_count},
316
+ )
317
+ res.raise_for_status()
318
+ return res.json()
319
+
241
320
  def add_io_to_upload_queue(
242
321
  self,
243
- file_meta: FileMetadata,
322
+ meta_or_apply: FileMetadataOrCogniteExtractorFile,
244
323
  read_file: Callable[[], BinaryIO],
245
324
  extra_retries: Optional[
246
325
  Union[Tuple[Type[Exception], ...], Dict[Type[Exception], Callable[[Any], bool]]]
@@ -248,7 +327,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
248
327
  ) -> None:
249
328
  """
250
329
  Add file to upload queue. The file will start uploading immedeately. If the size of the queue is larger than
251
- the specified max size, this call will block until it's
330
+ the specified max size, this call will block until it's completed the upload.
252
331
 
253
332
  Args:
254
333
  file_meta: File metadata-object
@@ -258,7 +337,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
258
337
  """
259
338
  retries = cognite_exceptions()
260
339
  if isinstance(extra_retries, tuple):
261
- retries.update({exc: lambda _e: True for exc in extra_retries or []})
340
+ retries.update({exc: lambda _: True for exc in extra_retries or []})
262
341
  elif isinstance(extra_retries, dict):
263
342
  retries.update(extra_retries)
264
343
 
@@ -270,60 +349,36 @@ class IOFileUploadQueue(AbstractUploadQueue):
270
349
  max_delay=RETRY_MAX_DELAY,
271
350
  backoff=RETRY_BACKOFF_FACTOR,
272
351
  )
273
- def upload_file(read_file: Callable[[], BinaryIO], file_meta: FileMetadata) -> None:
352
+ def upload_file(read_file: Callable[[], BinaryIO], meta_or_apply: FileMetadataOrCogniteExtractorFile) -> None:
274
353
  with read_file() as file:
275
354
  size = super_len(file)
276
355
  if size == 0:
277
356
  # upload just the file metadata witout data
278
- file_meta, _url = self.cdf_client.files.create(
279
- file_metadata=file_meta, overwrite=self.overwrite_existing
280
- )
357
+ meta_or_apply, _ = self._upload_empty(meta_or_apply)
281
358
  elif size >= self.max_single_chunk_file_size:
282
359
  # The minimum chunk size is 4000MiB.
283
- chunks = ChunkedStream(file, self.max_file_chunk_size, size)
284
- self.logger.debug(
285
- f"File {file_meta.external_id} is larger than 5GiB ({size})"
286
- f", uploading in {chunks.chunk_count} chunks"
287
- )
288
-
289
- res = self.cdf_client.files._post(
290
- url_path="/files/initmultipartupload",
291
- json=file_meta.dump(camel_case=True),
292
- params={"overwrite": self.overwrite_existing, "parts": chunks.chunk_count},
293
- )
294
- returned_file_metadata = res.json()
295
- upload_urls = returned_file_metadata["uploadUrls"]
296
- upload_id = returned_file_metadata["uploadId"]
297
- file_meta = FileMetadata.load(returned_file_metadata)
298
-
299
- for url in upload_urls:
300
- chunks.next_chunk()
301
- resp = self._httpx_client.send(self._get_file_upload_request(url, chunks, len(chunks)))
302
- resp.raise_for_status()
303
-
304
- self.cdf_client.files._post(
305
- url_path="/files/completemultipartupload", json={"id": file_meta.id, "uploadId": upload_id}
306
- )
360
+ self._upload_multipart(size, file, meta_or_apply)
307
361
 
308
362
  else:
309
- file_meta, url = self.cdf_client.files.create(
310
- file_metadata=file_meta, overwrite=self.overwrite_existing
311
- )
312
- resp = self._httpx_client.send(self._get_file_upload_request(url, file, size))
313
- resp.raise_for_status()
363
+ self._upload_bytes(size, file, meta_or_apply)
364
+
365
+ if isinstance(meta_or_apply, CogniteExtractorFileApply):
366
+ meta_or_apply.is_uploaded = True
314
367
 
315
368
  if self.post_upload_function:
316
369
  try:
317
- self.post_upload_function([file_meta])
370
+ self.post_upload_function([meta_or_apply])
318
371
  except Exception as e:
319
372
  self.logger.error("Error in upload callback: %s", str(e))
320
373
 
321
- def wrapped_upload(read_file: Callable[[], BinaryIO], file_meta: FileMetadata) -> None:
374
+ def wrapped_upload(
375
+ read_file: Callable[[], BinaryIO], meta_or_apply: FileMetadataOrCogniteExtractorFile
376
+ ) -> None:
322
377
  try:
323
- upload_file(read_file, file_meta)
378
+ upload_file(read_file, meta_or_apply)
324
379
 
325
380
  except Exception as e:
326
- self.logger.exception(f"Unexpected error while uploading file: {file_meta.external_id}")
381
+ self.logger.exception(f"Unexpected error while uploading file: {meta_or_apply.external_id}")
327
382
  self.errors.append(e)
328
383
 
329
384
  finally:
@@ -340,7 +395,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
340
395
  pass
341
396
 
342
397
  with self.lock:
343
- self.upload_queue.append(self._pool.submit(wrapped_upload, read_file, file_meta))
398
+ self.upload_queue.append(self._pool.submit(wrapped_upload, read_file, meta_or_apply))
344
399
  self.upload_queue_size += 1
345
400
  self.files_queued.inc()
346
401
  self.queue_size.set(self.upload_queue_size)
@@ -364,6 +419,18 @@ class IOFileUploadQueue(AbstractUploadQueue):
364
419
  headers=headers,
365
420
  )
366
421
 
422
+ def _create_cdm(self, instance_id: NodeId) -> tuple[FileMetadata, str]:
423
+ self.cdf_client.files._warn_alpha()
424
+ identifiers = IdentifierSequence.load(instance_ids=instance_id).as_singleton()
425
+ res = self.cdf_client.files._post(
426
+ url_path="/files/uploadlink",
427
+ json={"items": identifiers.as_dicts()},
428
+ headers=_CDF_ALPHA_VERSION_HEADER,
429
+ )
430
+ res.raise_for_status()
431
+ resp_json = res.json()["items"][0]
432
+ return FileMetadata.load(resp_json), resp_json["uploadUrl"]
433
+
367
434
  def upload(self, fail_on_errors: bool = True, timeout: Optional[float] = None) -> None:
368
435
  """
369
436
  Wait for all uploads to finish
@@ -428,7 +495,7 @@ class FileUploadQueue(IOFileUploadQueue):
428
495
  def __init__(
429
496
  self,
430
497
  cdf_client: CogniteClient,
431
- post_upload_function: Optional[Callable[[List[FileMetadata]], None]] = None,
498
+ post_upload_function: Optional[Callable[[List[FileMetadataOrCogniteExtractorFile]], None]] = None,
432
499
  max_queue_size: Optional[int] = None,
433
500
  max_upload_interval: Optional[int] = None,
434
501
  trigger_log_level: str = "DEBUG",
@@ -447,7 +514,9 @@ class FileUploadQueue(IOFileUploadQueue):
447
514
  cancellation_token,
448
515
  )
449
516
 
450
- def add_to_upload_queue(self, file_meta: FileMetadata, file_name: Union[str, PathLike]) -> None:
517
+ def add_to_upload_queue(
518
+ self, meta_or_apply: FileMetadataOrCogniteExtractorFile, file_name: Union[str, PathLike]
519
+ ) -> None:
451
520
  """
452
521
  Add file to upload queue. The queue will be uploaded if the queue size is larger than the threshold
453
522
  specified in the __init__.
@@ -461,7 +530,7 @@ class FileUploadQueue(IOFileUploadQueue):
461
530
  def load_file_from_path() -> BinaryIO:
462
531
  return open(file_name, "rb")
463
532
 
464
- self.add_io_to_upload_queue(file_meta, load_file_from_path)
533
+ self.add_io_to_upload_queue(meta_or_apply, load_file_from_path)
465
534
 
466
535
 
467
536
  class BytesUploadQueue(IOFileUploadQueue):
@@ -481,7 +550,7 @@ class BytesUploadQueue(IOFileUploadQueue):
481
550
  def __init__(
482
551
  self,
483
552
  cdf_client: CogniteClient,
484
- post_upload_function: Optional[Callable[[List[FileMetadata]], None]] = None,
553
+ post_upload_function: Optional[Callable[[List[FileMetadataOrCogniteExtractorFile]], None]] = None,
485
554
  max_queue_size: Optional[int] = None,
486
555
  trigger_log_level: str = "DEBUG",
487
556
  thread_name: Optional[str] = None,
@@ -498,7 +567,7 @@ class BytesUploadQueue(IOFileUploadQueue):
498
567
  cancellation_token,
499
568
  )
500
569
 
501
- def add_to_upload_queue(self, content: bytes, metadata: FileMetadata) -> None:
570
+ def add_to_upload_queue(self, content: bytes, meta_or_apply: FileMetadataOrCogniteExtractorFile) -> None:
502
571
  """
503
572
  Add object to upload queue. The queue will be uploaded if the queue size is larger than the threshold
504
573
  specified in the __init__.
@@ -510,4 +579,4 @@ class BytesUploadQueue(IOFileUploadQueue):
510
579
  def get_byte_io() -> BinaryIO:
511
580
  return BytesIO(content)
512
581
 
513
- self.add_io_to_upload_queue(metadata, get_byte_io)
582
+ self.add_io_to_upload_queue(meta_or_apply, get_byte_io)
@@ -19,6 +19,7 @@ extractors.
19
19
 
20
20
  import logging
21
21
  import random
22
+ from datetime import datetime, timezone
22
23
  from functools import partial, wraps
23
24
  from threading import Thread
24
25
  from time import time
@@ -501,3 +502,11 @@ def cognite_exceptions(
501
502
  return True
502
503
 
503
504
  return {CogniteException: handle_cognite_errors}
505
+
506
+
507
+ def datetime_to_timestamp(dt: datetime) -> int:
508
+ return int(dt.timestamp() * 1000)
509
+
510
+
511
+ def timestamp_to_datetime(ts: int) -> datetime:
512
+ return datetime.fromtimestamp(ts / 1000, tz=timezone.utc)
@@ -1,15 +1,14 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cognite-extractor-utils
3
- Version: 7.3.0
3
+ Version: 7.4.0
4
4
  Summary: Utilities for easier development of extractors for CDF
5
5
  Home-page: https://github.com/cognitedata/python-extractor-utils
6
6
  License: Apache-2.0
7
7
  Author: Mathias Lohne
8
8
  Author-email: mathias.lohne@cognite.com
9
- Requires-Python: >=3.8.0,<4.0.0
9
+ Requires-Python: >=3.9.0,<4.0.0
10
10
  Classifier: License :: OSI Approved :: Apache Software License
11
11
  Classifier: Programming Language :: Python :: 3
12
- Classifier: Programming Language :: Python :: 3.8
13
12
  Classifier: Programming Language :: Python :: 3.9
14
13
  Classifier: Programming Language :: Python :: 3.10
15
14
  Classifier: Programming Language :: Python :: 3.11
@@ -18,7 +17,7 @@ Provides-Extra: experimental
18
17
  Requires-Dist: arrow (>=1.0.0,<2.0.0)
19
18
  Requires-Dist: azure-identity (>=1.14.0,<2.0.0)
20
19
  Requires-Dist: azure-keyvault-secrets (>=4.7.0,<5.0.0)
21
- Requires-Dist: cognite-sdk (>=7.43.3,<8.0.0)
20
+ Requires-Dist: cognite-sdk (>=7.54.17,<8.0.0)
22
21
  Requires-Dist: dacite (>=1.6.0,<2.0.0)
23
22
  Requires-Dist: decorator (>=5.1.1,<6.0.0)
24
23
  Requires-Dist: httpx (>=0.27.0,<0.28.0)
@@ -13,19 +13,20 @@ cognite/extractorutils/statestore/_base.py,sha256=PM4C-bz41tldA5Lx8rD0AzgXJciAZc
13
13
  cognite/extractorutils/statestore/hashing.py,sha256=o-efTv21_ATQnyxYmple3MF7r5Afy-7qZsdZhR47emw,8083
14
14
  cognite/extractorutils/statestore/watermark.py,sha256=c_lcmJfo8bOvWyCJ9iRbbE4BlqRVulom4TpHb2pOnkE,16755
15
15
  cognite/extractorutils/threading.py,sha256=2Hke5cFvP-wA45Crvh58JahoKXB64P3tr7R4y_BhBqM,3605
16
+ cognite/extractorutils/unstable/__init__.py,sha256=L6nqJHjylpk67CE-PbXJyb_TBI4yjhEYEz9J9WShDfM,341
16
17
  cognite/extractorutils/uploader/__init__.py,sha256=W22u6QHA4cR0j78LN5LTL5YGbfC-uTApagTyP5ab7uQ,3110
17
18
  cognite/extractorutils/uploader/_base.py,sha256=wktbV8dpb8zBOsNaECZkBNoJSpOz437NlNMER3-a3xQ,5304
18
19
  cognite/extractorutils/uploader/_metrics.py,sha256=J2LJXb19L_SLSJ_voNIQHYLp0pjxUKevpH1q_xKX6Hk,3247
19
20
  cognite/extractorutils/uploader/assets.py,sha256=2E90N1kxsaA6Ah4h0_r_dTVhDYY_68ItRWrHYkkltJw,5628
20
21
  cognite/extractorutils/uploader/data_modeling.py,sha256=w35Ix5mu0Cgfn4ywnDyif4VVjo04LVTlkMEevk6ztUs,3639
21
22
  cognite/extractorutils/uploader/events.py,sha256=NZP2tMoU_rh_rb-EZiUBsOT5KdNABHN4c9Oddk0OsdE,5680
22
- cognite/extractorutils/uploader/files.py,sha256=5dVdG_3QtZ5Gr_GwYcE-FPQkKzC2I5odr6JXG5-arCc,18658
23
+ cognite/extractorutils/uploader/files.py,sha256=jEZ_QwUnXTsfQ5Xsm03j_vNWTlYBg2gmSxE3MOyoC6s,21765
23
24
  cognite/extractorutils/uploader/raw.py,sha256=wFjF90PFTjmByOWx_Y4_YfDJ2w2jl0EQJ2Tjx2MP2PM,6738
24
25
  cognite/extractorutils/uploader/time_series.py,sha256=HBtQdsQoIOaL-EG5lMsaY-ORwVb0kGiXG86VjE5-_Bg,26815
25
26
  cognite/extractorutils/uploader_extractor.py,sha256=E-mpVvbPg_Tk90U4S9JybV0duptJ2SXE88HB6npE3zI,7732
26
27
  cognite/extractorutils/uploader_types.py,sha256=wxfrsiKPTzG5lmoYtQsxt8Xyj-s5HnaLl8WDzJNrazg,1020
27
- cognite/extractorutils/util.py,sha256=UA6mUZ1caHd6vtA45gZXrk6cxo5cSB2PZ32bMwfEU0M,17229
28
- cognite_extractor_utils-7.3.0.dist-info/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
29
- cognite_extractor_utils-7.3.0.dist-info/METADATA,sha256=d5YNb6IXGvnpihhUECrRu3yhspo7ywz9Fs1ejlgpzE4,5526
30
- cognite_extractor_utils-7.3.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
31
- cognite_extractor_utils-7.3.0.dist-info/RECORD,,
28
+ cognite/extractorutils/util.py,sha256=T6ef5b7aYJ8yq9swQwybYaLe3YGr3hElsJQy8E-d5Rs,17469
29
+ cognite_extractor_utils-7.4.0.dist-info/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
30
+ cognite_extractor_utils-7.4.0.dist-info/METADATA,sha256=eD0--_YZWCF4Vj9oOcVvPIM2hiAOBrGsbZwAc5lRr9Q,5477
31
+ cognite_extractor_utils-7.4.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
32
+ cognite_extractor_utils-7.4.0.dist-info/RECORD,,