cognite-extractor-utils 7.1.3__tar.gz → 7.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/PKG-INFO +1 -1
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/__init__.py +1 -1
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/base.py +10 -2
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/configtools/elements.py +7 -2
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/statestore.py +1 -1
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/uploader/_base.py +1 -1
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/uploader/files.py +64 -62
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/pyproject.toml +1 -1
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/LICENSE +0 -0
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/README.md +0 -0
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/_inner_util.py +0 -0
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/configtools/__init__.py +0 -0
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/configtools/_util.py +0 -0
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/configtools/loaders.py +0 -0
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/exceptions.py +0 -0
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/metrics.py +0 -0
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/py.typed +0 -0
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/threading.py +0 -0
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/uploader/__init__.py +0 -0
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/uploader/_metrics.py +0 -0
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/uploader/assets.py +0 -0
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/uploader/events.py +0 -0
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/uploader/raw.py +0 -0
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/uploader/time_series.py +0 -0
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/uploader_extractor.py +0 -0
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/uploader_types.py +0 -0
- {cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/util.py +0 -0
{cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/base.py
RENAMED
|
@@ -189,9 +189,17 @@ class Extractor(Generic[CustomConfigClass]):
|
|
|
189
189
|
|
|
190
190
|
state_store_config = recursive_find_state_store(self.config.__dict__)
|
|
191
191
|
if state_store_config:
|
|
192
|
-
self.state_store = state_store_config.create_state_store(
|
|
192
|
+
self.state_store = state_store_config.create_state_store(
|
|
193
|
+
cdf_client=self.cognite_client,
|
|
194
|
+
default_to_local=self.use_default_state_store,
|
|
195
|
+
cancellation_token=self.cancellation_token,
|
|
196
|
+
)
|
|
193
197
|
else:
|
|
194
|
-
self.state_store =
|
|
198
|
+
self.state_store = (
|
|
199
|
+
LocalStateStore("states.json", cancellation_token=self.cancellation_token)
|
|
200
|
+
if self.use_default_state_store
|
|
201
|
+
else NoStateStore()
|
|
202
|
+
)
|
|
195
203
|
|
|
196
204
|
try:
|
|
197
205
|
self.state_store.initialize()
|
|
@@ -623,7 +623,10 @@ class StateStoreConfig:
|
|
|
623
623
|
local: Optional[LocalStateStoreConfig] = None
|
|
624
624
|
|
|
625
625
|
def create_state_store(
|
|
626
|
-
self,
|
|
626
|
+
self,
|
|
627
|
+
cdf_client: Optional[CogniteClient] = None,
|
|
628
|
+
default_to_local: bool = True,
|
|
629
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
627
630
|
) -> AbstractStateStore:
|
|
628
631
|
"""
|
|
629
632
|
Create a state store object based on the config.
|
|
@@ -648,15 +651,17 @@ class StateStoreConfig:
|
|
|
648
651
|
database=self.raw.database,
|
|
649
652
|
table=self.raw.table,
|
|
650
653
|
save_interval=self.raw.upload_interval.seconds,
|
|
654
|
+
cancellation_token=cancellation_token,
|
|
651
655
|
)
|
|
652
656
|
|
|
653
657
|
if self.local:
|
|
654
658
|
return LocalStateStore(
|
|
655
659
|
file_path=self.local.path,
|
|
656
660
|
save_interval=self.local.save_interval.seconds,
|
|
661
|
+
cancellation_token=cancellation_token,
|
|
657
662
|
)
|
|
658
663
|
|
|
659
664
|
if default_to_local:
|
|
660
|
-
return LocalStateStore(file_path="states.json")
|
|
665
|
+
return LocalStateStore(file_path="states.json", cancellation_token=cancellation_token)
|
|
661
666
|
else:
|
|
662
667
|
return NoStateStore()
|
{cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/statestore.py
RENAMED
|
@@ -132,7 +132,7 @@ class AbstractStateStore(ABC):
|
|
|
132
132
|
|
|
133
133
|
self.logger = logging.getLogger(__name__)
|
|
134
134
|
|
|
135
|
-
self.thread = threading.Thread(target=self._run, daemon=
|
|
135
|
+
self.thread = threading.Thread(target=self._run, daemon=cancellation_token is None, name=thread_name)
|
|
136
136
|
self.lock = threading.RLock()
|
|
137
137
|
self.cancellation_token = cancellation_token.create_child_token() if cancellation_token else CancellationToken()
|
|
138
138
|
|
|
@@ -58,7 +58,7 @@ class AbstractUploadQueue(ABC):
|
|
|
58
58
|
self.trigger_log_level = _resolve_log_level(trigger_log_level)
|
|
59
59
|
self.logger = logging.getLogger(__name__)
|
|
60
60
|
|
|
61
|
-
self.thread = threading.Thread(target=self._run, daemon=
|
|
61
|
+
self.thread = threading.Thread(target=self._run, daemon=cancellation_token is None, name=thread_name)
|
|
62
62
|
self.lock = threading.RLock()
|
|
63
63
|
self.cancellation_token: CancellationToken = (
|
|
64
64
|
cancellation_token.create_child_token() if cancellation_token else CancellationToken()
|
|
@@ -251,70 +251,72 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
251
251
|
max_delay=RETRY_MAX_DELAY,
|
|
252
252
|
backoff=RETRY_BACKOFF_FACTOR,
|
|
253
253
|
)
|
|
254
|
-
def
|
|
254
|
+
def upload_file(read_file: Callable[[], BinaryIO], file_meta: FileMetadata) -> None:
|
|
255
|
+
with read_file() as file:
|
|
256
|
+
size = super_len(file)
|
|
257
|
+
if size == 0:
|
|
258
|
+
# upload just the file metadata witout data
|
|
259
|
+
file_meta, _url = self.cdf_client.files.create(
|
|
260
|
+
file_metadata=file_meta, overwrite=self.overwrite_existing
|
|
261
|
+
)
|
|
262
|
+
elif size >= self.max_single_chunk_file_size:
|
|
263
|
+
# The minimum chunk size is 4000MiB.
|
|
264
|
+
chunks = ChunkedStream(file, self.max_file_chunk_size, size)
|
|
265
|
+
self.logger.debug(
|
|
266
|
+
f"File {file_meta.external_id} is larger than 5GiB ({size})"
|
|
267
|
+
f", uploading in {chunks.chunk_count} chunks"
|
|
268
|
+
)
|
|
269
|
+
with self.cdf_client.files.multipart_upload_session(
|
|
270
|
+
file_meta.name if file_meta.name is not None else "",
|
|
271
|
+
parts=chunks.chunk_count,
|
|
272
|
+
overwrite=self.overwrite_existing,
|
|
273
|
+
external_id=file_meta.external_id,
|
|
274
|
+
source=file_meta.source,
|
|
275
|
+
mime_type=file_meta.mime_type,
|
|
276
|
+
metadata=file_meta.metadata,
|
|
277
|
+
directory=file_meta.directory,
|
|
278
|
+
asset_ids=file_meta.asset_ids,
|
|
279
|
+
data_set_id=file_meta.data_set_id,
|
|
280
|
+
labels=file_meta.labels,
|
|
281
|
+
geo_location=file_meta.geo_location,
|
|
282
|
+
source_created_time=file_meta.source_created_time,
|
|
283
|
+
source_modified_time=file_meta.source_modified_time,
|
|
284
|
+
security_categories=file_meta.security_categories,
|
|
285
|
+
) as session:
|
|
286
|
+
while chunks.next_chunk():
|
|
287
|
+
session.upload_part(chunks.current_chunk, chunks)
|
|
288
|
+
file_meta = session.file_metadata
|
|
289
|
+
else:
|
|
290
|
+
file_meta = self.cdf_client.files.upload_bytes(
|
|
291
|
+
file,
|
|
292
|
+
file_meta.name if file_meta.name is not None else "",
|
|
293
|
+
overwrite=self.overwrite_existing,
|
|
294
|
+
external_id=file_meta.external_id,
|
|
295
|
+
source=file_meta.source,
|
|
296
|
+
mime_type=file_meta.mime_type,
|
|
297
|
+
metadata=file_meta.metadata,
|
|
298
|
+
directory=file_meta.directory,
|
|
299
|
+
asset_ids=file_meta.asset_ids,
|
|
300
|
+
data_set_id=file_meta.data_set_id,
|
|
301
|
+
labels=file_meta.labels,
|
|
302
|
+
geo_location=file_meta.geo_location,
|
|
303
|
+
source_created_time=file_meta.source_created_time,
|
|
304
|
+
source_modified_time=file_meta.source_modified_time,
|
|
305
|
+
security_categories=file_meta.security_categories,
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
if self.post_upload_function:
|
|
309
|
+
try:
|
|
310
|
+
self.post_upload_function([file_meta])
|
|
311
|
+
except Exception as e:
|
|
312
|
+
self.logger.error("Error in upload callback: %s", str(e))
|
|
313
|
+
|
|
314
|
+
def wrapped_upload(read_file: Callable[[], BinaryIO], file_meta: FileMetadata) -> None:
|
|
255
315
|
try:
|
|
256
|
-
|
|
257
|
-
with read_file() as file:
|
|
258
|
-
size = super_len(file)
|
|
259
|
-
if size == 0:
|
|
260
|
-
# upload just the file metadata witout data
|
|
261
|
-
file_meta, _url = self.cdf_client.files.create(
|
|
262
|
-
file_metadata=file_meta, overwrite=self.overwrite_existing
|
|
263
|
-
)
|
|
264
|
-
elif size >= self.max_single_chunk_file_size:
|
|
265
|
-
# The minimum chunk size is 4000MiB.
|
|
266
|
-
chunks = ChunkedStream(file, self.max_file_chunk_size, size)
|
|
267
|
-
self.logger.debug(
|
|
268
|
-
f"File {file_meta.external_id} is larger than 5GiB ({size})"
|
|
269
|
-
f", uploading in {chunks.chunk_count} chunks"
|
|
270
|
-
)
|
|
271
|
-
with self.cdf_client.files.multipart_upload_session(
|
|
272
|
-
file_meta.name if file_meta.name is not None else "",
|
|
273
|
-
parts=chunks.chunk_count,
|
|
274
|
-
overwrite=self.overwrite_existing,
|
|
275
|
-
external_id=file_meta.external_id,
|
|
276
|
-
source=file_meta.source,
|
|
277
|
-
mime_type=file_meta.mime_type,
|
|
278
|
-
metadata=file_meta.metadata,
|
|
279
|
-
directory=file_meta.directory,
|
|
280
|
-
asset_ids=file_meta.asset_ids,
|
|
281
|
-
data_set_id=file_meta.data_set_id,
|
|
282
|
-
labels=file_meta.labels,
|
|
283
|
-
geo_location=file_meta.geo_location,
|
|
284
|
-
source_created_time=file_meta.source_created_time,
|
|
285
|
-
source_modified_time=file_meta.source_modified_time,
|
|
286
|
-
security_categories=file_meta.security_categories,
|
|
287
|
-
) as session:
|
|
288
|
-
while chunks.next_chunk():
|
|
289
|
-
session.upload_part(chunks.current_chunk, chunks)
|
|
290
|
-
file_meta = session.file_metadata
|
|
291
|
-
else:
|
|
292
|
-
file_meta = self.cdf_client.files.upload_bytes(
|
|
293
|
-
file,
|
|
294
|
-
file_meta.name if file_meta.name is not None else "",
|
|
295
|
-
overwrite=self.overwrite_existing,
|
|
296
|
-
external_id=file_meta.external_id,
|
|
297
|
-
source=file_meta.source,
|
|
298
|
-
mime_type=file_meta.mime_type,
|
|
299
|
-
metadata=file_meta.metadata,
|
|
300
|
-
directory=file_meta.directory,
|
|
301
|
-
asset_ids=file_meta.asset_ids,
|
|
302
|
-
data_set_id=file_meta.data_set_id,
|
|
303
|
-
labels=file_meta.labels,
|
|
304
|
-
geo_location=file_meta.geo_location,
|
|
305
|
-
source_created_time=file_meta.source_created_time,
|
|
306
|
-
source_modified_time=file_meta.source_modified_time,
|
|
307
|
-
security_categories=file_meta.security_categories,
|
|
308
|
-
)
|
|
309
|
-
|
|
310
|
-
if self.post_upload_function:
|
|
311
|
-
try:
|
|
312
|
-
self.post_upload_function([file_meta])
|
|
313
|
-
except Exception as e:
|
|
314
|
-
self.logger.error("Error in upload callback: %s", str(e))
|
|
316
|
+
upload_file(read_file, file_meta)
|
|
315
317
|
|
|
316
318
|
except Exception as e:
|
|
317
|
-
self.logger.exception("Unexpected error while uploading file")
|
|
319
|
+
self.logger.exception(f"Unexpected error while uploading file: {file_meta.external_id}")
|
|
318
320
|
self.errors.append(e)
|
|
319
321
|
|
|
320
322
|
finally:
|
|
@@ -331,7 +333,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
331
333
|
pass
|
|
332
334
|
|
|
333
335
|
with self.lock:
|
|
334
|
-
self.upload_queue.append(self._pool.submit(
|
|
336
|
+
self.upload_queue.append(self._pool.submit(wrapped_upload, read_file, file_meta))
|
|
335
337
|
self.upload_queue_size += 1
|
|
336
338
|
self.files_queued.inc()
|
|
337
339
|
self.queue_size.set(self.upload_queue_size)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/exceptions.py
RENAMED
|
File without changes
|
{cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/metrics.py
RENAMED
|
File without changes
|
{cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/py.typed
RENAMED
|
File without changes
|
{cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/threading.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cognite_extractor_utils-7.1.3 → cognite_extractor_utils-7.1.5}/cognite/extractorutils/util.py
RENAMED
|
File without changes
|