cognite-extractor-utils 7.2.3__py3-none-any.whl → 7.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

@@ -16,5 +16,5 @@
16
16
  Cognite extractor utils is a Python package that simplifies the development of new extractors.
17
17
  """
18
18
 
19
- __version__ = "7.2.3"
19
+ __version__ = "7.3.0"
20
20
  from .base import Extractor
@@ -18,8 +18,9 @@ from io import BytesIO, RawIOBase
18
18
  from math import ceil
19
19
  from os import PathLike
20
20
  from types import TracebackType
21
- from typing import Any, BinaryIO, Callable, Dict, List, Optional, Tuple, Type, Union
21
+ from typing import Any, BinaryIO, Callable, Dict, Iterator, List, Optional, Tuple, Type, Union
22
22
 
23
+ from httpx import URL, Client, Headers, Request, StreamConsumed, SyncByteStream
23
24
  from requests.utils import super_len
24
25
 
25
26
  from cognite.client import CogniteClient
@@ -140,6 +141,22 @@ class ChunkedStream(RawIOBase, BinaryIO):
140
141
  return True
141
142
 
142
143
 
144
+ class IOByteStream(SyncByteStream):
145
+ CHUNK_SIZE = 65_536
146
+
147
+ def __init__(self, stream: BinaryIO) -> None:
148
+ self._stream = stream
149
+ self._is_stream_consumed = False
150
+
151
+ def __iter__(self) -> Iterator[bytes]:
152
+ if self._is_stream_consumed:
153
+ raise StreamConsumed()
154
+ chunk = self._stream.read(self.CHUNK_SIZE)
155
+ while chunk:
156
+ yield chunk
157
+ chunk = self._stream.read(self.CHUNK_SIZE)
158
+
159
+
143
160
  class IOFileUploadQueue(AbstractUploadQueue):
144
161
  """
145
162
  Upload queue for files using BinaryIO
@@ -205,6 +222,8 @@ class IOFileUploadQueue(AbstractUploadQueue):
205
222
 
206
223
  self._full_queue = threading.Condition()
207
224
 
225
+ self._httpx_client = Client(follow_redirects=True)
226
+
208
227
  global _QUEUES, _QUEUES_LOCK
209
228
  with _QUEUES_LOCK:
210
229
  self._pool = ThreadPoolExecutor(
@@ -266,44 +285,32 @@ class IOFileUploadQueue(AbstractUploadQueue):
266
285
  f"File {file_meta.external_id} is larger than 5GiB ({size})"
267
286
  f", uploading in {chunks.chunk_count} chunks"
268
287
  )
269
- with self.cdf_client.files.multipart_upload_session(
270
- file_meta.name if file_meta.name is not None else "",
271
- parts=chunks.chunk_count,
272
- overwrite=self.overwrite_existing,
273
- external_id=file_meta.external_id,
274
- source=file_meta.source,
275
- mime_type=file_meta.mime_type,
276
- metadata=file_meta.metadata,
277
- directory=file_meta.directory,
278
- asset_ids=file_meta.asset_ids,
279
- data_set_id=file_meta.data_set_id,
280
- labels=file_meta.labels,
281
- geo_location=file_meta.geo_location,
282
- source_created_time=file_meta.source_created_time,
283
- source_modified_time=file_meta.source_modified_time,
284
- security_categories=file_meta.security_categories,
285
- ) as session:
286
- while chunks.next_chunk():
287
- session.upload_part(chunks.current_chunk, chunks)
288
- file_meta = session.file_metadata
288
+
289
+ res = self.cdf_client.files._post(
290
+ url_path="/files/initmultipartupload",
291
+ json=file_meta.dump(camel_case=True),
292
+ params={"overwrite": self.overwrite_existing, "parts": chunks.chunk_count},
293
+ )
294
+ returned_file_metadata = res.json()
295
+ upload_urls = returned_file_metadata["uploadUrls"]
296
+ upload_id = returned_file_metadata["uploadId"]
297
+ file_meta = FileMetadata.load(returned_file_metadata)
298
+
299
+ for url in upload_urls:
300
+ chunks.next_chunk()
301
+ resp = self._httpx_client.send(self._get_file_upload_request(url, chunks, len(chunks)))
302
+ resp.raise_for_status()
303
+
304
+ self.cdf_client.files._post(
305
+ url_path="/files/completemultipartupload", json={"id": file_meta.id, "uploadId": upload_id}
306
+ )
307
+
289
308
  else:
290
- file_meta = self.cdf_client.files.upload_bytes(
291
- file,
292
- file_meta.name if file_meta.name is not None else "",
293
- overwrite=self.overwrite_existing,
294
- external_id=file_meta.external_id,
295
- source=file_meta.source,
296
- mime_type=file_meta.mime_type,
297
- metadata=file_meta.metadata,
298
- directory=file_meta.directory,
299
- asset_ids=file_meta.asset_ids,
300
- data_set_id=file_meta.data_set_id,
301
- labels=file_meta.labels,
302
- geo_location=file_meta.geo_location,
303
- source_created_time=file_meta.source_created_time,
304
- source_modified_time=file_meta.source_modified_time,
305
- security_categories=file_meta.security_categories,
309
+ file_meta, url = self.cdf_client.files.create(
310
+ file_metadata=file_meta, overwrite=self.overwrite_existing
306
311
  )
312
+ resp = self._httpx_client.send(self._get_file_upload_request(url, file, size))
313
+ resp.raise_for_status()
307
314
 
308
315
  if self.post_upload_function:
309
316
  try:
@@ -338,6 +345,25 @@ class IOFileUploadQueue(AbstractUploadQueue):
338
345
  self.files_queued.inc()
339
346
  self.queue_size.set(self.upload_queue_size)
340
347
 
348
+ def _get_file_upload_request(self, url_str: str, stream: BinaryIO, size: int) -> Request:
349
+ url = URL(url_str)
350
+ headers = Headers(self._httpx_client.headers)
351
+ headers.update(
352
+ {
353
+ "Accept": "*/*",
354
+ "Content-Length": str(size),
355
+ "Host": url.netloc.decode("ascii"),
356
+ "x-cdp-app": self.cdf_client._config.client_name,
357
+ }
358
+ )
359
+
360
+ return Request(
361
+ method="PUT",
362
+ url=url,
363
+ stream=IOByteStream(stream),
364
+ headers=headers,
365
+ )
366
+
341
367
  def upload(self, fail_on_errors: bool = True, timeout: Optional[float] = None) -> None:
342
368
  """
343
369
  Wait for all uploads to finish
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cognite-extractor-utils
3
- Version: 7.2.3
3
+ Version: 7.3.0
4
4
  Summary: Utilities for easier development of extractors for CDF
5
5
  Home-page: https://github.com/cognitedata/python-extractor-utils
6
6
  License: Apache-2.0
@@ -21,6 +21,7 @@ Requires-Dist: azure-keyvault-secrets (>=4.7.0,<5.0.0)
21
21
  Requires-Dist: cognite-sdk (>=7.43.3,<8.0.0)
22
22
  Requires-Dist: dacite (>=1.6.0,<2.0.0)
23
23
  Requires-Dist: decorator (>=5.1.1,<6.0.0)
24
+ Requires-Dist: httpx (>=0.27.0,<0.28.0)
24
25
  Requires-Dist: more-itertools (>=10.0.0,<11.0.0)
25
26
  Requires-Dist: orjson (>=3.10.3,<4.0.0)
26
27
  Requires-Dist: prometheus-client (>0.7.0,<=1.0.0)
@@ -1,4 +1,4 @@
1
- cognite/extractorutils/__init__.py,sha256=No8QzFZvAX7DY63jX70dTCPGuk7k0Rd0k99nU_sqeTo,739
1
+ cognite/extractorutils/__init__.py,sha256=xQex_pCl54rp85pvtXkNBPlRtgkgs5dfw7G_CBOPtpM,739
2
2
  cognite/extractorutils/_inner_util.py,sha256=gmz6aqS7jDNsg8z4RHgJjMFohDLOMiaU4gMWBhg3xcE,1558
3
3
  cognite/extractorutils/base.py,sha256=q6NU2bPec3WOasVnnIFoh-aUJudVZWZ2R6emz3IRj8Q,16391
4
4
  cognite/extractorutils/configtools/__init__.py,sha256=L-daaqInIsmHcjb2forJeY0fW8tz1mlteOUo7IsWnrU,3059
@@ -19,13 +19,13 @@ cognite/extractorutils/uploader/_metrics.py,sha256=J2LJXb19L_SLSJ_voNIQHYLp0pjxU
19
19
  cognite/extractorutils/uploader/assets.py,sha256=2E90N1kxsaA6Ah4h0_r_dTVhDYY_68ItRWrHYkkltJw,5628
20
20
  cognite/extractorutils/uploader/data_modeling.py,sha256=w35Ix5mu0Cgfn4ywnDyif4VVjo04LVTlkMEevk6ztUs,3639
21
21
  cognite/extractorutils/uploader/events.py,sha256=NZP2tMoU_rh_rb-EZiUBsOT5KdNABHN4c9Oddk0OsdE,5680
22
- cognite/extractorutils/uploader/files.py,sha256=31kPS4fwz8ZSXWss-CKmYTM6ZLVx9LtsDe7LHT7Wy98,18329
22
+ cognite/extractorutils/uploader/files.py,sha256=5dVdG_3QtZ5Gr_GwYcE-FPQkKzC2I5odr6JXG5-arCc,18658
23
23
  cognite/extractorutils/uploader/raw.py,sha256=wFjF90PFTjmByOWx_Y4_YfDJ2w2jl0EQJ2Tjx2MP2PM,6738
24
24
  cognite/extractorutils/uploader/time_series.py,sha256=HBtQdsQoIOaL-EG5lMsaY-ORwVb0kGiXG86VjE5-_Bg,26815
25
25
  cognite/extractorutils/uploader_extractor.py,sha256=E-mpVvbPg_Tk90U4S9JybV0duptJ2SXE88HB6npE3zI,7732
26
26
  cognite/extractorutils/uploader_types.py,sha256=wxfrsiKPTzG5lmoYtQsxt8Xyj-s5HnaLl8WDzJNrazg,1020
27
27
  cognite/extractorutils/util.py,sha256=UA6mUZ1caHd6vtA45gZXrk6cxo5cSB2PZ32bMwfEU0M,17229
28
- cognite_extractor_utils-7.2.3.dist-info/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
29
- cognite_extractor_utils-7.2.3.dist-info/METADATA,sha256=u-YGjhv1yLXR0t8OYCx07D7vp1_8Vg_yFwMke0HThZ8,5486
30
- cognite_extractor_utils-7.2.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
31
- cognite_extractor_utils-7.2.3.dist-info/RECORD,,
28
+ cognite_extractor_utils-7.3.0.dist-info/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
29
+ cognite_extractor_utils-7.3.0.dist-info/METADATA,sha256=d5YNb6IXGvnpihhUECrRu3yhspo7ywz9Fs1ejlgpzE4,5526
30
+ cognite_extractor_utils-7.3.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
31
+ cognite_extractor_utils-7.3.0.dist-info/RECORD,,