cognite-extractor-utils 6.4.1__py3-none-any.whl → 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- cognite/extractorutils/__init__.py +1 -1
- cognite/extractorutils/_inner_util.py +1 -1
- cognite/extractorutils/base.py +13 -11
- cognite/extractorutils/configtools/elements.py +2 -2
- cognite/extractorutils/configtools/loaders.py +11 -6
- cognite/extractorutils/metrics.py +7 -8
- cognite/extractorutils/statestore.py +86 -80
- cognite/extractorutils/threading.py +90 -0
- cognite/extractorutils/uploader/_base.py +9 -7
- cognite/extractorutils/uploader/assets.py +32 -30
- cognite/extractorutils/uploader/events.py +32 -30
- cognite/extractorutils/uploader/files.py +106 -85
- cognite/extractorutils/uploader/raw.py +17 -17
- cognite/extractorutils/uploader/time_series.py +117 -111
- cognite/extractorutils/uploader_extractor.py +4 -4
- cognite/extractorutils/util.py +41 -36
- {cognite_extractor_utils-6.4.1.dist-info → cognite_extractor_utils-7.0.0.dist-info}/METADATA +1 -3
- cognite_extractor_utils-7.0.0.dist-info/RECORD +27 -0
- cognite/extractorutils/middleware.py +0 -36
- cognite_extractor_utils-6.4.1.dist-info/RECORD +0 -27
- {cognite_extractor_utils-6.4.1.dist-info → cognite_extractor_utils-7.0.0.dist-info}/LICENSE +0 -0
- {cognite_extractor_utils-6.4.1.dist-info → cognite_extractor_utils-7.0.0.dist-info}/WHEEL +0 -0
|
@@ -12,12 +12,12 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from threading import Event
|
|
16
15
|
from typing import Any, Callable, List, Optional, Type
|
|
17
16
|
|
|
18
17
|
from cognite.client import CogniteClient
|
|
19
18
|
from cognite.client.data_classes.assets import Asset
|
|
20
|
-
from cognite.client.exceptions import
|
|
19
|
+
from cognite.client.exceptions import CogniteDuplicatedError
|
|
20
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
21
21
|
from cognite.extractorutils.uploader._base import (
|
|
22
22
|
RETRIES,
|
|
23
23
|
RETRY_BACKOFF_FACTOR,
|
|
@@ -30,7 +30,7 @@ from cognite.extractorutils.uploader._metrics import (
|
|
|
30
30
|
ASSETS_UPLOADER_QUEUED,
|
|
31
31
|
ASSETS_UPLOADER_WRITTEN,
|
|
32
32
|
)
|
|
33
|
-
from cognite.extractorutils.util import retry
|
|
33
|
+
from cognite.extractorutils.util import cognite_exceptions, retry
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
class AssetUploadQueue(AbstractUploadQueue):
|
|
@@ -57,7 +57,7 @@ class AssetUploadQueue(AbstractUploadQueue):
|
|
|
57
57
|
max_upload_interval: Optional[int] = None,
|
|
58
58
|
trigger_log_level: str = "DEBUG",
|
|
59
59
|
thread_name: Optional[str] = None,
|
|
60
|
-
cancellation_token:
|
|
60
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
61
61
|
):
|
|
62
62
|
super().__init__(
|
|
63
63
|
cdf_client,
|
|
@@ -92,9 +92,36 @@ class AssetUploadQueue(AbstractUploadQueue):
|
|
|
92
92
|
"""
|
|
93
93
|
Trigger an upload of the queue, clears queue afterwards
|
|
94
94
|
"""
|
|
95
|
+
|
|
96
|
+
@retry(
|
|
97
|
+
exceptions=cognite_exceptions(),
|
|
98
|
+
cancellation_token=self.cancellation_token,
|
|
99
|
+
tries=RETRIES,
|
|
100
|
+
delay=RETRY_DELAY,
|
|
101
|
+
max_delay=RETRY_MAX_DELAY,
|
|
102
|
+
backoff=RETRY_BACKOFF_FACTOR,
|
|
103
|
+
)
|
|
104
|
+
def _upload_batch() -> None:
|
|
105
|
+
try:
|
|
106
|
+
self.cdf_client.assets.create(self.upload_queue)
|
|
107
|
+
except CogniteDuplicatedError as e:
|
|
108
|
+
duplicated_ids = set([dup["externalId"] for dup in e.duplicated if "externalId" in dup])
|
|
109
|
+
failed: List[Asset] = [e for e in e.failed]
|
|
110
|
+
to_create = []
|
|
111
|
+
to_update = []
|
|
112
|
+
for asset in failed:
|
|
113
|
+
if asset.external_id is not None and asset.external_id in duplicated_ids:
|
|
114
|
+
to_update.append(asset)
|
|
115
|
+
else:
|
|
116
|
+
to_create.append(asset)
|
|
117
|
+
if to_create:
|
|
118
|
+
self.cdf_client.assets.create(to_create)
|
|
119
|
+
if to_update:
|
|
120
|
+
self.cdf_client.assets.update(to_update)
|
|
121
|
+
|
|
95
122
|
if len(self.upload_queue) > 0:
|
|
96
123
|
with self.lock:
|
|
97
|
-
|
|
124
|
+
_upload_batch()
|
|
98
125
|
|
|
99
126
|
try:
|
|
100
127
|
self._post_upload(self.upload_queue)
|
|
@@ -107,31 +134,6 @@ class AssetUploadQueue(AbstractUploadQueue):
|
|
|
107
134
|
self.upload_queue.clear()
|
|
108
135
|
self.queue_size.set(self.upload_queue_size)
|
|
109
136
|
|
|
110
|
-
@retry(
|
|
111
|
-
exceptions=(CogniteAPIError, ConnectionError),
|
|
112
|
-
tries=RETRIES,
|
|
113
|
-
delay=RETRY_DELAY,
|
|
114
|
-
max_delay=RETRY_MAX_DELAY,
|
|
115
|
-
backoff=RETRY_BACKOFF_FACTOR,
|
|
116
|
-
)
|
|
117
|
-
def _upload_batch(self) -> None:
|
|
118
|
-
try:
|
|
119
|
-
self.cdf_client.assets.create(self.upload_queue)
|
|
120
|
-
except CogniteDuplicatedError as e:
|
|
121
|
-
duplicated_ids = set([dup["externalId"] for dup in e.duplicated if "externalId" in dup])
|
|
122
|
-
failed: List[Asset] = [e for e in e.failed]
|
|
123
|
-
to_create = []
|
|
124
|
-
to_update = []
|
|
125
|
-
for asset in failed:
|
|
126
|
-
if asset.external_id is not None and asset.external_id in duplicated_ids:
|
|
127
|
-
to_update.append(asset)
|
|
128
|
-
else:
|
|
129
|
-
to_create.append(asset)
|
|
130
|
-
if to_create:
|
|
131
|
-
self.cdf_client.assets.create(to_create)
|
|
132
|
-
if to_update:
|
|
133
|
-
self.cdf_client.assets.update(to_update)
|
|
134
|
-
|
|
135
137
|
def __enter__(self) -> "AssetUploadQueue":
|
|
136
138
|
"""
|
|
137
139
|
Wraps around start method, for use as context manager
|
|
@@ -12,13 +12,13 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import threading
|
|
16
15
|
from types import TracebackType
|
|
17
16
|
from typing import Callable, List, Optional, Type
|
|
18
17
|
|
|
19
18
|
from cognite.client import CogniteClient
|
|
20
19
|
from cognite.client.data_classes import Event
|
|
21
|
-
from cognite.client.exceptions import
|
|
20
|
+
from cognite.client.exceptions import CogniteDuplicatedError
|
|
21
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
22
22
|
from cognite.extractorutils.uploader._base import (
|
|
23
23
|
RETRIES,
|
|
24
24
|
RETRY_BACKOFF_FACTOR,
|
|
@@ -31,7 +31,7 @@ from cognite.extractorutils.uploader._metrics import (
|
|
|
31
31
|
EVENTS_UPLOADER_QUEUED,
|
|
32
32
|
EVENTS_UPLOADER_WRITTEN,
|
|
33
33
|
)
|
|
34
|
-
from cognite.extractorutils.util import retry
|
|
34
|
+
from cognite.extractorutils.util import cognite_exceptions, retry
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
class EventUploadQueue(AbstractUploadQueue):
|
|
@@ -57,7 +57,7 @@ class EventUploadQueue(AbstractUploadQueue):
|
|
|
57
57
|
max_upload_interval: Optional[int] = None,
|
|
58
58
|
trigger_log_level: str = "DEBUG",
|
|
59
59
|
thread_name: Optional[str] = None,
|
|
60
|
-
cancellation_token:
|
|
60
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
61
61
|
):
|
|
62
62
|
# Super sets post_upload and threshold
|
|
63
63
|
super().__init__(
|
|
@@ -96,11 +96,38 @@ class EventUploadQueue(AbstractUploadQueue):
|
|
|
96
96
|
"""
|
|
97
97
|
Trigger an upload of the queue, clears queue afterwards
|
|
98
98
|
"""
|
|
99
|
+
|
|
100
|
+
@retry(
|
|
101
|
+
exceptions=cognite_exceptions(),
|
|
102
|
+
cancellation_token=self.cancellation_token,
|
|
103
|
+
tries=RETRIES,
|
|
104
|
+
delay=RETRY_DELAY,
|
|
105
|
+
max_delay=RETRY_MAX_DELAY,
|
|
106
|
+
backoff=RETRY_BACKOFF_FACTOR,
|
|
107
|
+
)
|
|
108
|
+
def _upload_batch() -> None:
|
|
109
|
+
try:
|
|
110
|
+
self.cdf_client.events.create([e for e in self.upload_queue])
|
|
111
|
+
except CogniteDuplicatedError as e:
|
|
112
|
+
duplicated_ids = set([dup["externalId"] for dup in e.duplicated if "externalId" in dup])
|
|
113
|
+
failed: List[Event] = [e for e in e.failed]
|
|
114
|
+
to_create = []
|
|
115
|
+
to_update = []
|
|
116
|
+
for evt in failed:
|
|
117
|
+
if evt.external_id is not None and evt.external_id in duplicated_ids:
|
|
118
|
+
to_update.append(evt)
|
|
119
|
+
else:
|
|
120
|
+
to_create.append(evt)
|
|
121
|
+
if to_create:
|
|
122
|
+
self.cdf_client.events.create(to_create)
|
|
123
|
+
if to_update:
|
|
124
|
+
self.cdf_client.events.update(to_update)
|
|
125
|
+
|
|
99
126
|
if len(self.upload_queue) == 0:
|
|
100
127
|
return
|
|
101
128
|
|
|
102
129
|
with self.lock:
|
|
103
|
-
|
|
130
|
+
_upload_batch()
|
|
104
131
|
|
|
105
132
|
self.events_written.inc(self.upload_queue_size)
|
|
106
133
|
|
|
@@ -113,31 +140,6 @@ class EventUploadQueue(AbstractUploadQueue):
|
|
|
113
140
|
self.upload_queue_size = 0
|
|
114
141
|
self.queue_size.set(self.upload_queue_size)
|
|
115
142
|
|
|
116
|
-
@retry(
|
|
117
|
-
exceptions=(CogniteAPIError, ConnectionError),
|
|
118
|
-
tries=RETRIES,
|
|
119
|
-
delay=RETRY_DELAY,
|
|
120
|
-
max_delay=RETRY_MAX_DELAY,
|
|
121
|
-
backoff=RETRY_BACKOFF_FACTOR,
|
|
122
|
-
)
|
|
123
|
-
def _upload_batch(self) -> None:
|
|
124
|
-
try:
|
|
125
|
-
self.cdf_client.events.create([e for e in self.upload_queue])
|
|
126
|
-
except CogniteDuplicatedError as e:
|
|
127
|
-
duplicated_ids = set([dup["externalId"] for dup in e.duplicated if "externalId" in dup])
|
|
128
|
-
failed: List[Event] = [e for e in e.failed]
|
|
129
|
-
to_create = []
|
|
130
|
-
to_update = []
|
|
131
|
-
for evt in failed:
|
|
132
|
-
if evt.external_id is not None and evt.external_id in duplicated_ids:
|
|
133
|
-
to_update.append(evt)
|
|
134
|
-
else:
|
|
135
|
-
to_create.append(evt)
|
|
136
|
-
if to_create:
|
|
137
|
-
self.cdf_client.events.create(to_create)
|
|
138
|
-
if to_update:
|
|
139
|
-
self.cdf_client.events.update(to_update)
|
|
140
|
-
|
|
141
143
|
def __enter__(self) -> "EventUploadQueue":
|
|
142
144
|
"""
|
|
143
145
|
Wraps around start method, for use as context manager
|
|
@@ -17,13 +17,11 @@ from concurrent.futures import Future, ThreadPoolExecutor
|
|
|
17
17
|
from io import BytesIO
|
|
18
18
|
from os import PathLike
|
|
19
19
|
from types import TracebackType
|
|
20
|
-
from typing import BinaryIO, Callable, List, Optional, Tuple, Type, Union
|
|
21
|
-
|
|
22
|
-
from requests import ConnectionError
|
|
20
|
+
from typing import Any, BinaryIO, Callable, Dict, List, Optional, Tuple, Type, Union
|
|
23
21
|
|
|
24
22
|
from cognite.client import CogniteClient
|
|
25
23
|
from cognite.client.data_classes import FileMetadata
|
|
26
|
-
from cognite.
|
|
24
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
27
25
|
from cognite.extractorutils.uploader._base import (
|
|
28
26
|
RETRIES,
|
|
29
27
|
RETRY_BACKOFF_FACTOR,
|
|
@@ -36,7 +34,7 @@ from cognite.extractorutils.uploader._metrics import (
|
|
|
36
34
|
FILES_UPLOADER_QUEUED,
|
|
37
35
|
FILES_UPLOADER_WRITTEN,
|
|
38
36
|
)
|
|
39
|
-
from cognite.extractorutils.util import retry
|
|
37
|
+
from cognite.extractorutils.util import cognite_exceptions, retry
|
|
40
38
|
|
|
41
39
|
_QUEUES: int = 0
|
|
42
40
|
_QUEUES_LOCK: threading.RLock = threading.RLock()
|
|
@@ -53,9 +51,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
53
51
|
cdf_client: Cognite Data Fusion client to use
|
|
54
52
|
post_upload_function: A function that will be called after each upload. The function will be given one argument:
|
|
55
53
|
A list of the events that were uploaded.
|
|
56
|
-
max_queue_size: Maximum size of upload queue.
|
|
57
|
-
max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
|
|
58
|
-
methods).
|
|
54
|
+
max_queue_size: Maximum size of upload queue.
|
|
59
55
|
trigger_log_level: Log level to log upload triggers to.
|
|
60
56
|
thread_name: Thread name of uploader thread.
|
|
61
57
|
max_parallelism: Maximum number of parallel uploads. If this is greater than 0,
|
|
@@ -69,11 +65,10 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
69
65
|
cdf_client: CogniteClient,
|
|
70
66
|
post_upload_function: Optional[Callable[[List[FileMetadata]], None]] = None,
|
|
71
67
|
max_queue_size: Optional[int] = None,
|
|
72
|
-
max_upload_interval: Optional[int] = None,
|
|
73
68
|
trigger_log_level: str = "DEBUG",
|
|
74
69
|
thread_name: Optional[str] = None,
|
|
75
70
|
overwrite_existing: bool = False,
|
|
76
|
-
cancellation_token:
|
|
71
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
77
72
|
max_parallelism: int = 0,
|
|
78
73
|
):
|
|
79
74
|
# Super sets post_upload and threshold
|
|
@@ -81,13 +76,18 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
81
76
|
cdf_client,
|
|
82
77
|
post_upload_function,
|
|
83
78
|
max_queue_size,
|
|
84
|
-
|
|
79
|
+
None,
|
|
85
80
|
trigger_log_level,
|
|
86
81
|
thread_name,
|
|
87
82
|
cancellation_token,
|
|
88
83
|
)
|
|
89
84
|
|
|
90
|
-
self.
|
|
85
|
+
if self.threshold <= 0:
|
|
86
|
+
raise ValueError("Max queue size must be positive for file upload queues")
|
|
87
|
+
|
|
88
|
+
self.upload_queue: List[Future] = []
|
|
89
|
+
self.errors: List[Exception] = []
|
|
90
|
+
|
|
91
91
|
self.overwrite_existing = overwrite_existing
|
|
92
92
|
|
|
93
93
|
self.parallelism = self.cdf_client.config.max_workers
|
|
@@ -100,6 +100,10 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
100
100
|
self.files_written = FILES_UPLOADER_WRITTEN
|
|
101
101
|
self.queue_size = FILES_UPLOADER_QUEUE_SIZE
|
|
102
102
|
|
|
103
|
+
self._update_queue_thread = threading.Thread(target=self._remove_done_from_queue, daemon=True)
|
|
104
|
+
|
|
105
|
+
self._full_queue = threading.Condition()
|
|
106
|
+
|
|
103
107
|
global _QUEUES, _QUEUES_LOCK
|
|
104
108
|
with _QUEUES_LOCK:
|
|
105
109
|
self._pool = ThreadPoolExecutor(
|
|
@@ -107,84 +111,107 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
107
111
|
)
|
|
108
112
|
_QUEUES += 1
|
|
109
113
|
|
|
110
|
-
def
|
|
114
|
+
def _remove_done_from_queue(self) -> None:
|
|
115
|
+
while not self.cancellation_token.is_cancelled:
|
|
116
|
+
with self.lock:
|
|
117
|
+
self.upload_queue = list(filter(lambda f: f.running(), self.upload_queue))
|
|
118
|
+
|
|
119
|
+
self.cancellation_token.wait(5)
|
|
120
|
+
|
|
121
|
+
def add_io_to_upload_queue(
|
|
122
|
+
self,
|
|
123
|
+
file_meta: FileMetadata,
|
|
124
|
+
read_file: Callable[[], BinaryIO],
|
|
125
|
+
extra_retries: Optional[
|
|
126
|
+
Union[Tuple[Type[Exception], ...], Dict[Type[Exception], Callable[[Any], bool]]]
|
|
127
|
+
] = None,
|
|
128
|
+
) -> None:
|
|
111
129
|
"""
|
|
112
|
-
Add file to upload queue. The
|
|
113
|
-
specified
|
|
130
|
+
Add file to upload queue. The file will start uploading immedeately. If the size of the queue is larger than
|
|
131
|
+
the specified max size, this call will block until it's
|
|
114
132
|
|
|
115
133
|
Args:
|
|
116
134
|
file_meta: File metadata-object
|
|
117
135
|
file_name: Path to file to be uploaded.
|
|
118
136
|
If none, the file object will still be created, but no data is uploaded
|
|
137
|
+
extra_retries: Exception types that might be raised by ``read_file`` that should be retried
|
|
119
138
|
"""
|
|
139
|
+
retries = cognite_exceptions()
|
|
140
|
+
if isinstance(extra_retries, tuple):
|
|
141
|
+
retries.update({exc: lambda _e: True for exc in extra_retries or []})
|
|
142
|
+
elif isinstance(extra_retries, dict):
|
|
143
|
+
retries.update(extra_retries)
|
|
144
|
+
|
|
145
|
+
@retry(
|
|
146
|
+
exceptions=retries,
|
|
147
|
+
cancellation_token=self.cancellation_token,
|
|
148
|
+
tries=RETRIES,
|
|
149
|
+
delay=RETRY_DELAY,
|
|
150
|
+
max_delay=RETRY_MAX_DELAY,
|
|
151
|
+
backoff=RETRY_BACKOFF_FACTOR,
|
|
152
|
+
)
|
|
153
|
+
def _upload_single(read_file: Callable[[], BinaryIO], file_meta: FileMetadata) -> None:
|
|
154
|
+
try:
|
|
155
|
+
# Upload file
|
|
156
|
+
with read_file() as file:
|
|
157
|
+
file_meta = self.cdf_client.files.upload_bytes(
|
|
158
|
+
file,
|
|
159
|
+
file_meta.name if file_meta.name is not None else "",
|
|
160
|
+
overwrite=self.overwrite_existing,
|
|
161
|
+
external_id=file_meta.external_id,
|
|
162
|
+
source=file_meta.source,
|
|
163
|
+
mime_type=file_meta.mime_type,
|
|
164
|
+
metadata=file_meta.metadata,
|
|
165
|
+
directory=file_meta.directory,
|
|
166
|
+
asset_ids=file_meta.asset_ids,
|
|
167
|
+
data_set_id=file_meta.data_set_id,
|
|
168
|
+
labels=file_meta.labels,
|
|
169
|
+
geo_location=file_meta.geo_location,
|
|
170
|
+
source_created_time=file_meta.source_created_time,
|
|
171
|
+
source_modified_time=file_meta.source_modified_time,
|
|
172
|
+
security_categories=file_meta.security_categories,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
if self.post_upload_function:
|
|
176
|
+
try:
|
|
177
|
+
self.post_upload_function([file_meta])
|
|
178
|
+
except Exception as e:
|
|
179
|
+
self.logger.error("Error in upload callback: %s", str(e))
|
|
180
|
+
|
|
181
|
+
except Exception as e:
|
|
182
|
+
self.logger.exception("Unexpected error while uploading file")
|
|
183
|
+
self.errors.append(e)
|
|
184
|
+
|
|
185
|
+
finally:
|
|
186
|
+
with self.lock:
|
|
187
|
+
self.files_written.inc()
|
|
188
|
+
self.upload_queue_size -= 1
|
|
189
|
+
self.queue_size.set(self.upload_queue_size)
|
|
190
|
+
with self._full_queue:
|
|
191
|
+
self._full_queue.notify()
|
|
192
|
+
|
|
193
|
+
if self.upload_queue_size >= self.threshold:
|
|
194
|
+
with self._full_queue:
|
|
195
|
+
while not self._full_queue.wait(timeout=2) and not self.cancellation_token.is_cancelled:
|
|
196
|
+
pass
|
|
197
|
+
|
|
120
198
|
with self.lock:
|
|
121
|
-
self.upload_queue.append((
|
|
199
|
+
self.upload_queue.append(self._pool.submit(_upload_single, read_file, file_meta))
|
|
122
200
|
self.upload_queue_size += 1
|
|
123
201
|
self.files_queued.inc()
|
|
124
202
|
self.queue_size.set(self.upload_queue_size)
|
|
125
203
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
def upload(self) -> None:
|
|
204
|
+
def upload(self, fail_on_errors: bool = True, timeout: Optional[float] = None) -> None:
|
|
129
205
|
"""
|
|
130
|
-
|
|
206
|
+
Wait for all uploads to finish
|
|
131
207
|
"""
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
208
|
+
for future in self.upload_queue:
|
|
209
|
+
future.result(timeout=timeout)
|
|
135
210
|
with self.lock:
|
|
136
|
-
self._upload_batch()
|
|
137
|
-
|
|
138
|
-
self.files_written.inc(self.upload_queue_size)
|
|
139
|
-
|
|
140
|
-
try:
|
|
141
|
-
self._post_upload([el[0] for el in self.upload_queue])
|
|
142
|
-
except Exception as e:
|
|
143
|
-
self.logger.error("Error in upload callback: %s", str(e))
|
|
144
|
-
self.upload_queue.clear()
|
|
145
|
-
self.logger.info(f"Uploaded {self.upload_queue_size} files")
|
|
146
|
-
self.upload_queue_size = 0
|
|
147
211
|
self.queue_size.set(self.upload_queue_size)
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
tries=RETRIES,
|
|
152
|
-
delay=RETRY_DELAY,
|
|
153
|
-
max_delay=RETRY_MAX_DELAY,
|
|
154
|
-
backoff=RETRY_BACKOFF_FACTOR,
|
|
155
|
-
)
|
|
156
|
-
def _upload_single(self, index: int, read_file: Callable[[], BinaryIO], file_meta: FileMetadata) -> None:
|
|
157
|
-
# Upload file
|
|
158
|
-
with read_file() as file:
|
|
159
|
-
file_meta = self.cdf_client.files.upload_bytes(
|
|
160
|
-
file,
|
|
161
|
-
file_meta.name if file_meta.name is not None else "",
|
|
162
|
-
overwrite=self.overwrite_existing,
|
|
163
|
-
external_id=file_meta.external_id,
|
|
164
|
-
source=file_meta.source,
|
|
165
|
-
mime_type=file_meta.mime_type,
|
|
166
|
-
metadata=file_meta.metadata,
|
|
167
|
-
directory=file_meta.directory,
|
|
168
|
-
asset_ids=file_meta.asset_ids,
|
|
169
|
-
data_set_id=file_meta.data_set_id,
|
|
170
|
-
labels=file_meta.labels,
|
|
171
|
-
geo_location=file_meta.geo_location,
|
|
172
|
-
source_created_time=file_meta.source_created_time,
|
|
173
|
-
source_modified_time=file_meta.source_modified_time,
|
|
174
|
-
security_categories=file_meta.security_categories,
|
|
175
|
-
)
|
|
176
|
-
|
|
177
|
-
# Update meta-object in queue
|
|
178
|
-
self.upload_queue[index] = (file_meta, read_file)
|
|
179
|
-
|
|
180
|
-
def _upload_batch(self) -> None:
|
|
181
|
-
# Concurrently execute file-uploads
|
|
182
|
-
|
|
183
|
-
futures: List[Future] = []
|
|
184
|
-
for i, (file_meta, file_name) in enumerate(self.upload_queue):
|
|
185
|
-
futures.append(self._pool.submit(self._upload_single, i, file_name, file_meta))
|
|
186
|
-
for fut in futures:
|
|
187
|
-
fut.result()
|
|
212
|
+
if fail_on_errors and self.errors:
|
|
213
|
+
# There might be more errors, but we can only have one as the cause, so pick the first
|
|
214
|
+
raise RuntimeError(f"{len(self.errors)} upload(s) finished with errors") from self.errors[0]
|
|
188
215
|
|
|
189
216
|
def __enter__(self) -> "IOFileUploadQueue":
|
|
190
217
|
"""
|
|
@@ -195,6 +222,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
195
222
|
"""
|
|
196
223
|
self.start()
|
|
197
224
|
self._pool.__enter__()
|
|
225
|
+
self._update_queue_thread.start()
|
|
198
226
|
return self
|
|
199
227
|
|
|
200
228
|
def __exit__(
|
|
@@ -229,9 +257,7 @@ class FileUploadQueue(IOFileUploadQueue):
|
|
|
229
257
|
cdf_client: Cognite Data Fusion client to use
|
|
230
258
|
post_upload_function: A function that will be called after each upload. The function will be given one argument:
|
|
231
259
|
A list of the events that were uploaded.
|
|
232
|
-
max_queue_size: Maximum size of upload queue.
|
|
233
|
-
max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
|
|
234
|
-
methods).
|
|
260
|
+
max_queue_size: Maximum size of upload queue.
|
|
235
261
|
trigger_log_level: Log level to log upload triggers to.
|
|
236
262
|
thread_name: Thread name of uploader thread.
|
|
237
263
|
"""
|
|
@@ -245,14 +271,13 @@ class FileUploadQueue(IOFileUploadQueue):
|
|
|
245
271
|
trigger_log_level: str = "DEBUG",
|
|
246
272
|
thread_name: Optional[str] = None,
|
|
247
273
|
overwrite_existing: bool = False,
|
|
248
|
-
cancellation_token:
|
|
274
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
249
275
|
):
|
|
250
276
|
# Super sets post_upload and threshold
|
|
251
277
|
super().__init__(
|
|
252
278
|
cdf_client,
|
|
253
279
|
post_upload_function,
|
|
254
280
|
max_queue_size,
|
|
255
|
-
max_upload_interval,
|
|
256
281
|
trigger_log_level,
|
|
257
282
|
thread_name,
|
|
258
283
|
overwrite_existing,
|
|
@@ -284,9 +309,7 @@ class BytesUploadQueue(IOFileUploadQueue):
|
|
|
284
309
|
cdf_client: Cognite Data Fusion client to use
|
|
285
310
|
post_upload_function: A function that will be called after each upload. The function will be given one argument:
|
|
286
311
|
A list of the events that were uploaded.
|
|
287
|
-
max_queue_size: Maximum size of upload queue.
|
|
288
|
-
max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
|
|
289
|
-
methods).
|
|
312
|
+
max_queue_size: Maximum size of upload queue.
|
|
290
313
|
trigger_log_level: Log level to log upload triggers to.
|
|
291
314
|
thread_name: Thread name of uploader thread.
|
|
292
315
|
overwrite_existing: If 'overwrite' is set to true, fields for the files found for externalIds can be overwritten
|
|
@@ -297,17 +320,15 @@ class BytesUploadQueue(IOFileUploadQueue):
|
|
|
297
320
|
cdf_client: CogniteClient,
|
|
298
321
|
post_upload_function: Optional[Callable[[List[FileMetadata]], None]] = None,
|
|
299
322
|
max_queue_size: Optional[int] = None,
|
|
300
|
-
max_upload_interval: Optional[int] = None,
|
|
301
323
|
trigger_log_level: str = "DEBUG",
|
|
302
324
|
thread_name: Optional[str] = None,
|
|
303
325
|
overwrite_existing: bool = False,
|
|
304
|
-
cancellation_token:
|
|
326
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
305
327
|
) -> None:
|
|
306
328
|
super().__init__(
|
|
307
329
|
cdf_client,
|
|
308
330
|
post_upload_function,
|
|
309
331
|
max_queue_size,
|
|
310
|
-
max_upload_interval,
|
|
311
332
|
trigger_log_level,
|
|
312
333
|
thread_name,
|
|
313
334
|
overwrite_existing,
|
|
@@ -12,17 +12,15 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import threading
|
|
16
15
|
from types import TracebackType
|
|
17
16
|
from typing import Any, Callable, Dict, List, Optional, Type
|
|
18
17
|
|
|
19
18
|
import arrow
|
|
20
19
|
from arrow import Arrow
|
|
21
|
-
from requests import ConnectionError
|
|
22
20
|
|
|
23
21
|
from cognite.client import CogniteClient
|
|
24
22
|
from cognite.client.data_classes import Row
|
|
25
|
-
from cognite.
|
|
23
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
26
24
|
from cognite.extractorutils.uploader._base import (
|
|
27
25
|
RETRIES,
|
|
28
26
|
RETRY_BACKOFF_FACTOR,
|
|
@@ -37,7 +35,7 @@ from cognite.extractorutils.uploader._metrics import (
|
|
|
37
35
|
RAW_UPLOADER_ROWS_QUEUED,
|
|
38
36
|
RAW_UPLOADER_ROWS_WRITTEN,
|
|
39
37
|
)
|
|
40
|
-
from cognite.extractorutils.util import retry
|
|
38
|
+
from cognite.extractorutils.util import cognite_exceptions, retry
|
|
41
39
|
|
|
42
40
|
|
|
43
41
|
class RawUploadQueue(AbstractUploadQueue):
|
|
@@ -63,7 +61,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
63
61
|
max_upload_interval: Optional[int] = None,
|
|
64
62
|
trigger_log_level: str = "DEBUG",
|
|
65
63
|
thread_name: Optional[str] = None,
|
|
66
|
-
cancellation_token:
|
|
64
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
67
65
|
):
|
|
68
66
|
# Super sets post_upload and thresholds
|
|
69
67
|
super().__init__(
|
|
@@ -112,6 +110,19 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
112
110
|
"""
|
|
113
111
|
Trigger an upload of the queue, clears queue afterwards
|
|
114
112
|
"""
|
|
113
|
+
|
|
114
|
+
@retry(
|
|
115
|
+
exceptions=cognite_exceptions(),
|
|
116
|
+
cancellation_token=self.cancellation_token,
|
|
117
|
+
tries=RETRIES,
|
|
118
|
+
delay=RETRY_DELAY,
|
|
119
|
+
max_delay=RETRY_MAX_DELAY,
|
|
120
|
+
backoff=RETRY_BACKOFF_FACTOR,
|
|
121
|
+
)
|
|
122
|
+
def _upload_batch(database: str, table: str, patch: List[Row]) -> None:
|
|
123
|
+
# Upload
|
|
124
|
+
self.cdf_client.raw.rows.insert(db_name=database, table_name=table, row=patch, ensure_parent=True)
|
|
125
|
+
|
|
115
126
|
if len(self.upload_queue) == 0:
|
|
116
127
|
return
|
|
117
128
|
|
|
@@ -125,7 +136,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
125
136
|
patch: Dict[str, Row] = {r.payload.key: r.payload for r in rows}
|
|
126
137
|
self.rows_duplicates.labels(_labels).inc(len(rows) - len(patch))
|
|
127
138
|
|
|
128
|
-
|
|
139
|
+
_upload_batch(database=database, table=table, patch=list(patch.values()))
|
|
129
140
|
self.rows_written.labels(_labels).inc(len(patch))
|
|
130
141
|
_written: Arrow = arrow.utcnow()
|
|
131
142
|
|
|
@@ -140,17 +151,6 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
140
151
|
self.upload_queue_size = 0
|
|
141
152
|
self.queue_size.set(self.upload_queue_size)
|
|
142
153
|
|
|
143
|
-
@retry(
|
|
144
|
-
exceptions=(CogniteAPIError, ConnectionError, CogniteReadTimeout),
|
|
145
|
-
tries=RETRIES,
|
|
146
|
-
delay=RETRY_DELAY,
|
|
147
|
-
max_delay=RETRY_MAX_DELAY,
|
|
148
|
-
backoff=RETRY_BACKOFF_FACTOR,
|
|
149
|
-
)
|
|
150
|
-
def _upload_batch(self, database: str, table: str, patch: List[Row]) -> None:
|
|
151
|
-
# Upload
|
|
152
|
-
self.cdf_client.raw.rows.insert(db_name=database, table_name=table, row=patch, ensure_parent=True)
|
|
153
|
-
|
|
154
154
|
def __enter__(self) -> "RawUploadQueue":
|
|
155
155
|
"""
|
|
156
156
|
Wraps around start method, for use as context manager
|