cognite-extractor-utils 7.5.14__py3-none-any.whl → 7.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- cognite/extractorutils/__init__.py +1 -1
- cognite/extractorutils/_inner_util.py +1 -1
- cognite/extractorutils/base.py +120 -40
- cognite/extractorutils/configtools/__init__.py +4 -5
- cognite/extractorutils/configtools/_util.py +3 -2
- cognite/extractorutils/configtools/elements.py +206 -33
- cognite/extractorutils/configtools/loaders.py +68 -16
- cognite/extractorutils/configtools/validators.py +5 -1
- cognite/extractorutils/exceptions.py +11 -2
- cognite/extractorutils/metrics.py +17 -12
- cognite/extractorutils/statestore/__init__.py +77 -3
- cognite/extractorutils/statestore/_base.py +7 -3
- cognite/extractorutils/statestore/hashing.py +129 -15
- cognite/extractorutils/statestore/watermark.py +77 -87
- cognite/extractorutils/threading.py +30 -4
- cognite/extractorutils/unstable/__init__.py +5 -5
- cognite/extractorutils/unstable/configuration/__init__.py +3 -0
- cognite/extractorutils/unstable/configuration/exceptions.py +13 -2
- cognite/extractorutils/unstable/configuration/loaders.py +78 -13
- cognite/extractorutils/unstable/configuration/models.py +121 -7
- cognite/extractorutils/unstable/core/__init__.py +5 -0
- cognite/extractorutils/unstable/core/_dto.py +5 -3
- cognite/extractorutils/unstable/core/base.py +113 -4
- cognite/extractorutils/unstable/core/errors.py +41 -0
- cognite/extractorutils/unstable/core/logger.py +149 -0
- cognite/extractorutils/unstable/core/restart_policy.py +16 -2
- cognite/extractorutils/unstable/core/runtime.py +44 -6
- cognite/extractorutils/unstable/core/tasks.py +53 -1
- cognite/extractorutils/unstable/scheduling/__init__.py +13 -0
- cognite/extractorutils/unstable/scheduling/_scheduler.py +1 -1
- cognite/extractorutils/uploader/__init__.py +9 -5
- cognite/extractorutils/uploader/_base.py +4 -5
- cognite/extractorutils/uploader/assets.py +13 -8
- cognite/extractorutils/uploader/data_modeling.py +37 -2
- cognite/extractorutils/uploader/events.py +14 -9
- cognite/extractorutils/uploader/files.py +80 -21
- cognite/extractorutils/uploader/raw.py +12 -7
- cognite/extractorutils/uploader/time_series.py +370 -94
- cognite/extractorutils/uploader/upload_failure_handler.py +35 -2
- cognite/extractorutils/uploader_extractor.py +47 -9
- cognite/extractorutils/uploader_types.py +26 -1
- cognite/extractorutils/util.py +76 -23
- {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.7.0.dist-info}/METADATA +1 -1
- cognite_extractor_utils-7.7.0.dist-info/RECORD +50 -0
- cognite_extractor_utils-7.5.14.dist-info/RECORD +0 -50
- {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.7.0.dist-info}/WHEEL +0 -0
- {cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.7.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Upload queue for (legacy) assets.
|
|
3
|
+
"""
|
|
4
|
+
|
|
1
5
|
# Copyright 2023 Cognite AS
|
|
2
6
|
#
|
|
3
7
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -37,7 +41,7 @@ from cognite.extractorutils.util import cognite_exceptions, retry
|
|
|
37
41
|
|
|
38
42
|
class AssetUploadQueue(AbstractUploadQueue):
|
|
39
43
|
"""
|
|
40
|
-
Upload queue for assets
|
|
44
|
+
Upload queue for assets.
|
|
41
45
|
|
|
42
46
|
Args:
|
|
43
47
|
cdf_client: Cognite Data Fusion client to use
|
|
@@ -77,8 +81,9 @@ class AssetUploadQueue(AbstractUploadQueue):
|
|
|
77
81
|
|
|
78
82
|
def add_to_upload_queue(self, asset: Asset) -> None:
|
|
79
83
|
"""
|
|
80
|
-
Add asset to upload queue.
|
|
81
|
-
|
|
84
|
+
Add asset to upload queue.
|
|
85
|
+
|
|
86
|
+
The queue will be uploaded if the queue size is larger than the threshold specified in the ``__init__``.
|
|
82
87
|
|
|
83
88
|
Args:
|
|
84
89
|
asset: Asset to add
|
|
@@ -92,7 +97,7 @@ class AssetUploadQueue(AbstractUploadQueue):
|
|
|
92
97
|
|
|
93
98
|
def upload(self) -> None:
|
|
94
99
|
"""
|
|
95
|
-
Trigger an upload of the queue, clears queue afterwards
|
|
100
|
+
Trigger an upload of the queue, clears queue afterwards.
|
|
96
101
|
"""
|
|
97
102
|
|
|
98
103
|
@retry(
|
|
@@ -107,8 +112,8 @@ class AssetUploadQueue(AbstractUploadQueue):
|
|
|
107
112
|
try:
|
|
108
113
|
self.cdf_client.assets.create(self.upload_queue)
|
|
109
114
|
except CogniteDuplicatedError as e:
|
|
110
|
-
duplicated_ids =
|
|
111
|
-
failed: list[Asset] =
|
|
115
|
+
duplicated_ids = {dup["externalId"] for dup in e.duplicated if "externalId" in dup}
|
|
116
|
+
failed: list[Asset] = list(e.failed)
|
|
112
117
|
to_create = []
|
|
113
118
|
to_update = []
|
|
114
119
|
for asset in failed:
|
|
@@ -138,7 +143,7 @@ class AssetUploadQueue(AbstractUploadQueue):
|
|
|
138
143
|
|
|
139
144
|
def __enter__(self) -> "AssetUploadQueue":
|
|
140
145
|
"""
|
|
141
|
-
Wraps around start method, for use as context manager
|
|
146
|
+
Wraps around start method, for use as context manager.
|
|
142
147
|
|
|
143
148
|
Returns:
|
|
144
149
|
self
|
|
@@ -153,7 +158,7 @@ class AssetUploadQueue(AbstractUploadQueue):
|
|
|
153
158
|
exc_tb: TracebackType | None,
|
|
154
159
|
) -> None:
|
|
155
160
|
"""
|
|
156
|
-
Wraps around stop method, for use as context manager
|
|
161
|
+
Wraps around stop method, for use as context manager.
|
|
157
162
|
|
|
158
163
|
Args:
|
|
159
164
|
exc_type: Exception type
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module for uploading data modeling instances to CDF.
|
|
3
|
+
"""
|
|
4
|
+
|
|
1
5
|
from collections.abc import Callable
|
|
2
6
|
from types import TracebackType
|
|
3
7
|
from typing import Any
|
|
@@ -16,6 +20,24 @@ from cognite.extractorutils.util import cognite_exceptions, retry
|
|
|
16
20
|
|
|
17
21
|
|
|
18
22
|
class InstanceUploadQueue(AbstractUploadQueue):
|
|
23
|
+
"""
|
|
24
|
+
Upload queue for data modeling instances (nodes and edges).
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
cdf_client: Cognite Data Fusion client to use.
|
|
28
|
+
post_upload_function: A function that will be called after each upload. The function will be given one argument:
|
|
29
|
+
A list of the nodes and edges that were uploaded.
|
|
30
|
+
max_queue_size: Maximum size of upload queue. Defaults to no max size.
|
|
31
|
+
max_upload_interval: Automatically trigger an upload on an interval when run as a thread (use start/stop
|
|
32
|
+
methods). Unit is seconds.
|
|
33
|
+
trigger_log_level: Log level to log upload triggers to.
|
|
34
|
+
thread_name: Thread name of uploader thread.
|
|
35
|
+
cancellation_token: Cancellation token for managing thread cancellation.
|
|
36
|
+
auto_create_start_nodes: Automatically create start nodes if they do not exist.
|
|
37
|
+
auto_create_end_nodes: Automatically create end nodes if they do not exist.
|
|
38
|
+
auto_create_direct_relations: Automatically create direct relations if they do not exist.
|
|
39
|
+
"""
|
|
40
|
+
|
|
19
41
|
def __init__(
|
|
20
42
|
self,
|
|
21
43
|
cdf_client: CogniteClient,
|
|
@@ -52,6 +74,15 @@ class InstanceUploadQueue(AbstractUploadQueue):
|
|
|
52
74
|
node_data: list[NodeApply] | None = None,
|
|
53
75
|
edge_data: list[EdgeApply] | None = None,
|
|
54
76
|
) -> None:
|
|
77
|
+
"""
|
|
78
|
+
Add instances to the upload queue.
|
|
79
|
+
|
|
80
|
+
The queue will be uploaded if the queue size is larger than the threshold specified in the ``__init__``.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
node_data: List of nodes to add to the upload queue.
|
|
84
|
+
edge_data: List of edges to add to the upload queue.
|
|
85
|
+
"""
|
|
55
86
|
if node_data:
|
|
56
87
|
with self.lock:
|
|
57
88
|
self.node_queue.extend(node_data)
|
|
@@ -66,6 +97,10 @@ class InstanceUploadQueue(AbstractUploadQueue):
|
|
|
66
97
|
self._check_triggers()
|
|
67
98
|
|
|
68
99
|
def upload(self) -> None:
|
|
100
|
+
"""
|
|
101
|
+
Trigger an upload of the queue, clears queue afterwards.
|
|
102
|
+
"""
|
|
103
|
+
|
|
69
104
|
@retry(
|
|
70
105
|
exceptions=cognite_exceptions(),
|
|
71
106
|
cancellation_token=self.cancellation_token,
|
|
@@ -91,7 +126,7 @@ class InstanceUploadQueue(AbstractUploadQueue):
|
|
|
91
126
|
|
|
92
127
|
def __enter__(self) -> "InstanceUploadQueue":
|
|
93
128
|
"""
|
|
94
|
-
Wraps around start method, for use as context manager
|
|
129
|
+
Wraps around start method, for use as context manager.
|
|
95
130
|
|
|
96
131
|
Returns:
|
|
97
132
|
self
|
|
@@ -106,7 +141,7 @@ class InstanceUploadQueue(AbstractUploadQueue):
|
|
|
106
141
|
exc_tb: TracebackType | None,
|
|
107
142
|
) -> None:
|
|
108
143
|
"""
|
|
109
|
-
Wraps around stop method, for use as context manager
|
|
144
|
+
Wraps around stop method, for use as context manager.
|
|
110
145
|
|
|
111
146
|
Args:
|
|
112
147
|
exc_type: Exception type
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Upload queue for (legacy) events.
|
|
3
|
+
"""
|
|
4
|
+
|
|
1
5
|
# Copyright 2023 Cognite AS
|
|
2
6
|
#
|
|
3
7
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -36,7 +40,7 @@ from cognite.extractorutils.util import cognite_exceptions, retry
|
|
|
36
40
|
|
|
37
41
|
class EventUploadQueue(AbstractUploadQueue):
|
|
38
42
|
"""
|
|
39
|
-
Upload queue for events
|
|
43
|
+
Upload queue for events.
|
|
40
44
|
|
|
41
45
|
Args:
|
|
42
46
|
cdf_client: Cognite Data Fusion client to use
|
|
@@ -78,8 +82,9 @@ class EventUploadQueue(AbstractUploadQueue):
|
|
|
78
82
|
|
|
79
83
|
def add_to_upload_queue(self, event: Event) -> None:
|
|
80
84
|
"""
|
|
81
|
-
Add event to upload queue.
|
|
82
|
-
|
|
85
|
+
Add event to upload queue.
|
|
86
|
+
|
|
87
|
+
The queue will be uploaded if the queue size is larger than the threshold specified in the ``__init__``.
|
|
83
88
|
|
|
84
89
|
Args:
|
|
85
90
|
event: Event to add
|
|
@@ -94,7 +99,7 @@ class EventUploadQueue(AbstractUploadQueue):
|
|
|
94
99
|
|
|
95
100
|
def upload(self) -> None:
|
|
96
101
|
"""
|
|
97
|
-
Trigger an upload of the queue, clears queue afterwards
|
|
102
|
+
Trigger an upload of the queue, clears queue afterwards.
|
|
98
103
|
"""
|
|
99
104
|
|
|
100
105
|
@retry(
|
|
@@ -107,10 +112,10 @@ class EventUploadQueue(AbstractUploadQueue):
|
|
|
107
112
|
)
|
|
108
113
|
def _upload_batch() -> None:
|
|
109
114
|
try:
|
|
110
|
-
self.cdf_client.events.create(
|
|
115
|
+
self.cdf_client.events.create(list(self.upload_queue))
|
|
111
116
|
except CogniteDuplicatedError as e:
|
|
112
|
-
duplicated_ids =
|
|
113
|
-
failed: list[Event] =
|
|
117
|
+
duplicated_ids = {dup["externalId"] for dup in e.duplicated if "externalId" in dup}
|
|
118
|
+
failed: list[Event] = list(e.failed)
|
|
114
119
|
to_create = []
|
|
115
120
|
to_update = []
|
|
116
121
|
for evt in failed:
|
|
@@ -142,7 +147,7 @@ class EventUploadQueue(AbstractUploadQueue):
|
|
|
142
147
|
|
|
143
148
|
def __enter__(self) -> "EventUploadQueue":
|
|
144
149
|
"""
|
|
145
|
-
Wraps around start method, for use as context manager
|
|
150
|
+
Wraps around start method, for use as context manager.
|
|
146
151
|
|
|
147
152
|
Returns:
|
|
148
153
|
self
|
|
@@ -154,7 +159,7 @@ class EventUploadQueue(AbstractUploadQueue):
|
|
|
154
159
|
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
|
|
155
160
|
) -> None:
|
|
156
161
|
"""
|
|
157
|
-
Wraps around stop method, for use as context manager
|
|
162
|
+
Wraps around stop method, for use as context manager.
|
|
158
163
|
|
|
159
164
|
Args:
|
|
160
165
|
exc_type: Exception type
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Upload queue for files.
|
|
3
|
+
"""
|
|
4
|
+
|
|
1
5
|
# Copyright 2023 Cognite AS
|
|
2
6
|
#
|
|
3
7
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -88,6 +92,9 @@ class ChunkedStream(RawIOBase, BinaryIO):
|
|
|
88
92
|
self._current_chunk_size = -1
|
|
89
93
|
|
|
90
94
|
def tell(self) -> int:
|
|
95
|
+
"""
|
|
96
|
+
Get the current position of the stream.
|
|
97
|
+
"""
|
|
91
98
|
return self._pos
|
|
92
99
|
|
|
93
100
|
# RawIOBase is (stupidly) incompatible with BinaryIO
|
|
@@ -97,12 +104,21 @@ class ChunkedStream(RawIOBase, BinaryIO):
|
|
|
97
104
|
# required to satisfy mypy.
|
|
98
105
|
# This may be solvable by changing the typing in the python SDK to use typing.Protocol.
|
|
99
106
|
def writelines(self, __lines: Any) -> None:
|
|
107
|
+
"""
|
|
108
|
+
Not supported for ChunkedStream.
|
|
109
|
+
"""
|
|
100
110
|
raise NotImplementedError()
|
|
101
111
|
|
|
102
112
|
def write(self, __b: Any) -> int:
|
|
113
|
+
"""
|
|
114
|
+
Not supported for ChunkedStream.
|
|
115
|
+
"""
|
|
103
116
|
raise NotImplementedError()
|
|
104
117
|
|
|
105
118
|
def __enter__(self) -> "ChunkedStream":
|
|
119
|
+
"""
|
|
120
|
+
Wraps around the inner stream's ``__enter__`` method, for use as context manager.
|
|
121
|
+
"""
|
|
106
122
|
return super().__enter__()
|
|
107
123
|
|
|
108
124
|
def __exit__(
|
|
@@ -111,14 +127,23 @@ class ChunkedStream(RawIOBase, BinaryIO):
|
|
|
111
127
|
exc_val: BaseException | None,
|
|
112
128
|
exc_tb: TracebackType | None,
|
|
113
129
|
) -> None:
|
|
130
|
+
"""
|
|
131
|
+
Wraps around the inner stream's ``__exit__`` method, for use as context manager.
|
|
132
|
+
"""
|
|
114
133
|
return super().__exit__(exc_type, exc_val, exc_tb)
|
|
115
134
|
|
|
116
135
|
@property
|
|
117
136
|
def chunk_count(self) -> int:
|
|
137
|
+
"""
|
|
138
|
+
Number of chunks in the stream.
|
|
139
|
+
"""
|
|
118
140
|
return ceil(self._stream_length / self._max_chunk_size)
|
|
119
141
|
|
|
120
142
|
@property
|
|
121
143
|
def len(self) -> int:
|
|
144
|
+
"""
|
|
145
|
+
Length of the current chunk, in bytes.
|
|
146
|
+
"""
|
|
122
147
|
return len(self)
|
|
123
148
|
|
|
124
149
|
@property
|
|
@@ -129,12 +154,21 @@ class ChunkedStream(RawIOBase, BinaryIO):
|
|
|
129
154
|
return self._chunk_index
|
|
130
155
|
|
|
131
156
|
def __len__(self) -> int:
|
|
157
|
+
"""
|
|
158
|
+
Length of the current chunk, in bytes.
|
|
159
|
+
"""
|
|
132
160
|
return self._current_chunk_size
|
|
133
161
|
|
|
134
162
|
def readable(self) -> bool:
|
|
163
|
+
"""
|
|
164
|
+
Check if the stream is readable. Always True for ChunkedStream.
|
|
165
|
+
"""
|
|
135
166
|
return True
|
|
136
167
|
|
|
137
168
|
def read(self, size: int = -1) -> bytes:
|
|
169
|
+
"""
|
|
170
|
+
Read bytes from the current chunk.
|
|
171
|
+
"""
|
|
138
172
|
if size < 0:
|
|
139
173
|
size = self._current_chunk_size - self._pos
|
|
140
174
|
|
|
@@ -162,6 +196,10 @@ class ChunkedStream(RawIOBase, BinaryIO):
|
|
|
162
196
|
|
|
163
197
|
|
|
164
198
|
class IOByteStream(SyncByteStream):
|
|
199
|
+
"""
|
|
200
|
+
Wraps a BinaryIO stream to be used as a httpx SyncByteStream.
|
|
201
|
+
"""
|
|
202
|
+
|
|
165
203
|
CHUNK_SIZE = 65_536
|
|
166
204
|
|
|
167
205
|
def __init__(self, stream: BinaryIO) -> None:
|
|
@@ -169,6 +207,9 @@ class IOByteStream(SyncByteStream):
|
|
|
169
207
|
self._is_stream_consumed = False
|
|
170
208
|
|
|
171
209
|
def __iter__(self) -> Iterator[bytes]:
|
|
210
|
+
"""
|
|
211
|
+
Iterate over the stream, yielding chunks of data.
|
|
212
|
+
"""
|
|
172
213
|
if self._is_stream_consumed:
|
|
173
214
|
raise StreamConsumed()
|
|
174
215
|
chunk = self._stream.read(self.CHUNK_SIZE)
|
|
@@ -179,7 +220,7 @@ class IOByteStream(SyncByteStream):
|
|
|
179
220
|
|
|
180
221
|
class IOFileUploadQueue(AbstractUploadQueue):
|
|
181
222
|
"""
|
|
182
|
-
Upload queue for files using BinaryIO
|
|
223
|
+
Upload queue for files using BinaryIO.
|
|
183
224
|
|
|
184
225
|
Note that if the upload fails, the stream needs to be restarted, so
|
|
185
226
|
the enqueued callback needs to produce a new IO object for each call.
|
|
@@ -264,6 +305,9 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
264
305
|
_QUEUES += 1
|
|
265
306
|
|
|
266
307
|
def initialize_failure_logging(self) -> None:
|
|
308
|
+
"""
|
|
309
|
+
Initialize the failure logging manager if a path is provided in the constructor.
|
|
310
|
+
"""
|
|
267
311
|
self._file_failure_manager: FileFailureManager | None = (
|
|
268
312
|
FileFailureManager(path_to_file=self.failure_logging_path)
|
|
269
313
|
if self.failure_logging_path is not None
|
|
@@ -271,14 +315,23 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
271
315
|
)
|
|
272
316
|
|
|
273
317
|
def get_failure_logger(self) -> FileFailureManager | None:
|
|
318
|
+
"""
|
|
319
|
+
Get the failure logger for this upload queue, if it exists.
|
|
320
|
+
"""
|
|
274
321
|
return self._file_failure_manager
|
|
275
322
|
|
|
276
323
|
def add_entry_failure_logger(self, file_name: str, error: Exception) -> None:
|
|
324
|
+
"""
|
|
325
|
+
Add an entry to the failure logger if it exists.
|
|
326
|
+
"""
|
|
277
327
|
if self._file_failure_manager is not None:
|
|
278
328
|
error_reason = str(error)
|
|
279
329
|
self._file_failure_manager.add(file_name=file_name, error_reason=error_reason)
|
|
280
330
|
|
|
281
331
|
def flush_failure_logger(self) -> None:
|
|
332
|
+
"""
|
|
333
|
+
Flush the failure logger if it exists, writing all failures to the file.
|
|
334
|
+
"""
|
|
282
335
|
if self._file_failure_manager is not None:
|
|
283
336
|
self.logger.info("Flushing failure logs")
|
|
284
337
|
self._file_failure_manager.write_to_file()
|
|
@@ -313,14 +366,14 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
313
366
|
)
|
|
314
367
|
if any_unchaged:
|
|
315
368
|
update = FileMetadataUpdate(external_id=file_meta.external_id)
|
|
316
|
-
|
|
369
|
+
need_update = False
|
|
317
370
|
if file_meta.source:
|
|
318
|
-
|
|
371
|
+
need_update = True
|
|
319
372
|
update.source.set(file_meta.source)
|
|
320
373
|
if file_meta.directory:
|
|
321
|
-
|
|
374
|
+
need_update = True
|
|
322
375
|
update.directory.set(file_meta.directory)
|
|
323
|
-
if
|
|
376
|
+
if need_update:
|
|
324
377
|
self.cdf_client.files.update(update)
|
|
325
378
|
|
|
326
379
|
return file_meta_response, url
|
|
@@ -373,7 +426,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
373
426
|
def _upload_multipart(self, size: int, file: BinaryIO, file_meta: FileMetadataOrCogniteExtractorFile) -> None:
|
|
374
427
|
chunks = ChunkedStream(file, self.max_file_chunk_size, size)
|
|
375
428
|
self.logger.debug(
|
|
376
|
-
f"File {file_meta.external_id} is larger than 5GiB ({size})
|
|
429
|
+
f"File {file_meta.external_id} is larger than 5GiB ({size}), uploading in {chunks.chunk_count} chunks"
|
|
377
430
|
)
|
|
378
431
|
|
|
379
432
|
returned_file_metadata = self._create_multi_part(file_meta, chunks)
|
|
@@ -428,13 +481,14 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
428
481
|
extra_retries: tuple[type[Exception], ...] | dict[type[Exception], Callable[[Any], bool]] | None = None,
|
|
429
482
|
) -> None:
|
|
430
483
|
"""
|
|
431
|
-
Add file to upload queue.
|
|
432
|
-
|
|
484
|
+
Add file to upload queue.
|
|
485
|
+
|
|
486
|
+
The file will start uploading immediately. If the size of the queue is larger than the specified max size, this
|
|
487
|
+
call will block until it's completed the upload.
|
|
433
488
|
|
|
434
489
|
Args:
|
|
435
490
|
file_meta: File metadata-object
|
|
436
|
-
|
|
437
|
-
If none, the file object will still be created, but no data is uploaded
|
|
491
|
+
read_file: Callable that returns a BinaryIO stream to read the file from.
|
|
438
492
|
extra_retries: Exception types that might be raised by ``read_file`` that should be retried
|
|
439
493
|
"""
|
|
440
494
|
retries = cognite_exceptions()
|
|
@@ -568,7 +622,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
568
622
|
|
|
569
623
|
def upload(self, fail_on_errors: bool = True, timeout: float | None = None) -> None:
|
|
570
624
|
"""
|
|
571
|
-
Wait for all uploads to finish
|
|
625
|
+
Wait for all uploads to finish.
|
|
572
626
|
"""
|
|
573
627
|
for future in self.upload_queue:
|
|
574
628
|
future.result(timeout=timeout)
|
|
@@ -581,7 +635,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
581
635
|
|
|
582
636
|
def __enter__(self) -> "IOFileUploadQueue":
|
|
583
637
|
"""
|
|
584
|
-
Wraps around start method, for use as context manager
|
|
638
|
+
Wraps around start method, for use as context manager.
|
|
585
639
|
|
|
586
640
|
Returns:
|
|
587
641
|
self
|
|
@@ -598,7 +652,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
598
652
|
exc_tb: TracebackType | None,
|
|
599
653
|
) -> None:
|
|
600
654
|
"""
|
|
601
|
-
Wraps around stop method, for use as context manager
|
|
655
|
+
Wraps around stop method, for use as context manager.
|
|
602
656
|
|
|
603
657
|
Args:
|
|
604
658
|
exc_type: Exception type
|
|
@@ -610,7 +664,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
610
664
|
|
|
611
665
|
def __len__(self) -> int:
|
|
612
666
|
"""
|
|
613
|
-
The size of the upload queue
|
|
667
|
+
The size of the upload queue.
|
|
614
668
|
|
|
615
669
|
Returns:
|
|
616
670
|
Number of events in queue
|
|
@@ -620,7 +674,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
620
674
|
|
|
621
675
|
class FileUploadQueue(IOFileUploadQueue):
|
|
622
676
|
"""
|
|
623
|
-
Upload queue for files
|
|
677
|
+
Upload queue for files.
|
|
624
678
|
|
|
625
679
|
Args:
|
|
626
680
|
cdf_client: Cognite Data Fusion client to use
|
|
@@ -661,8 +715,10 @@ class FileUploadQueue(IOFileUploadQueue):
|
|
|
661
715
|
file_name: str | PathLike,
|
|
662
716
|
) -> None:
|
|
663
717
|
"""
|
|
664
|
-
Add file to upload queue.
|
|
665
|
-
|
|
718
|
+
Add file to upload queue.
|
|
719
|
+
|
|
720
|
+
The file will start uploading immediately. If the size of the queue is larger than the specified max size, this
|
|
721
|
+
call will block until it's completed the upload.
|
|
666
722
|
|
|
667
723
|
Args:
|
|
668
724
|
file_meta: File metadata-object
|
|
@@ -678,7 +734,7 @@ class FileUploadQueue(IOFileUploadQueue):
|
|
|
678
734
|
|
|
679
735
|
class BytesUploadQueue(IOFileUploadQueue):
|
|
680
736
|
"""
|
|
681
|
-
Upload queue for bytes
|
|
737
|
+
Upload queue for bytes.
|
|
682
738
|
|
|
683
739
|
Args:
|
|
684
740
|
cdf_client: Cognite Data Fusion client to use
|
|
@@ -714,11 +770,14 @@ class BytesUploadQueue(IOFileUploadQueue):
|
|
|
714
770
|
|
|
715
771
|
def add_to_upload_queue(self, content: bytes, file_meta: FileMetadataOrCogniteExtractorFile) -> None:
|
|
716
772
|
"""
|
|
717
|
-
Add
|
|
718
|
-
|
|
773
|
+
Add file to upload queue.
|
|
774
|
+
|
|
775
|
+
The file will start uploading immediately. If the size of the queue is larger than the specified max size, this
|
|
776
|
+
call will block until it's completed the upload.
|
|
777
|
+
|
|
719
778
|
Args:
|
|
720
779
|
content: bytes object to upload
|
|
721
|
-
|
|
780
|
+
file_meta: File metadata-object
|
|
722
781
|
"""
|
|
723
782
|
|
|
724
783
|
def get_byte_io() -> BinaryIO:
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Upload queue for RAW.
|
|
3
|
+
"""
|
|
4
|
+
|
|
1
5
|
# Copyright 2023 Cognite AS
|
|
2
6
|
#
|
|
3
7
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -41,7 +45,7 @@ from cognite.extractorutils.util import cognite_exceptions, retry
|
|
|
41
45
|
|
|
42
46
|
class RawUploadQueue(AbstractUploadQueue):
|
|
43
47
|
"""
|
|
44
|
-
Upload queue for RAW
|
|
48
|
+
Upload queue for RAW.
|
|
45
49
|
|
|
46
50
|
Args:
|
|
47
51
|
cdf_client: Cognite Data Fusion client to use
|
|
@@ -84,8 +88,9 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
84
88
|
|
|
85
89
|
def add_to_upload_queue(self, database: str, table: str, raw_row: Row) -> None:
|
|
86
90
|
"""
|
|
87
|
-
Adds a row to the upload queue.
|
|
88
|
-
|
|
91
|
+
Adds a row to the upload queue.
|
|
92
|
+
|
|
93
|
+
The queue will be uploaded if the queue size is larger than the threshold specified in the ``__init__``.
|
|
89
94
|
|
|
90
95
|
Args:
|
|
91
96
|
database: The database to upload the Raw object to
|
|
@@ -109,7 +114,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
109
114
|
|
|
110
115
|
def upload(self) -> None:
|
|
111
116
|
"""
|
|
112
|
-
Trigger an upload of the queue, clears queue afterwards
|
|
117
|
+
Trigger an upload of the queue, clears queue afterwards.
|
|
113
118
|
"""
|
|
114
119
|
|
|
115
120
|
@retry(
|
|
@@ -154,7 +159,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
154
159
|
|
|
155
160
|
def __enter__(self) -> "RawUploadQueue":
|
|
156
161
|
"""
|
|
157
|
-
Wraps around start method, for use as context manager
|
|
162
|
+
Wraps around start method, for use as context manager.
|
|
158
163
|
|
|
159
164
|
Returns:
|
|
160
165
|
self
|
|
@@ -166,7 +171,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
166
171
|
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
|
|
167
172
|
) -> None:
|
|
168
173
|
"""
|
|
169
|
-
Wraps around stop method, for use as context manager
|
|
174
|
+
Wraps around stop method, for use as context manager.
|
|
170
175
|
|
|
171
176
|
Args:
|
|
172
177
|
exc_type: Exception type
|
|
@@ -177,7 +182,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
177
182
|
|
|
178
183
|
def __len__(self) -> int:
|
|
179
184
|
"""
|
|
180
|
-
The size of the upload queue
|
|
185
|
+
The size of the upload queue.
|
|
181
186
|
|
|
182
187
|
Returns:
|
|
183
188
|
Number of elements in queue
|