cognite-extractor-utils 6.4.0__py3-none-any.whl → 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-extractor-utils might be problematic. Click here for more details.
- cognite/extractorutils/__init__.py +1 -1
- cognite/extractorutils/_inner_util.py +1 -1
- cognite/extractorutils/base.py +13 -11
- cognite/extractorutils/configtools/elements.py +2 -2
- cognite/extractorutils/configtools/loaders.py +11 -6
- cognite/extractorutils/metrics.py +7 -8
- cognite/extractorutils/statestore.py +86 -80
- cognite/extractorutils/threading.py +90 -0
- cognite/extractorutils/uploader/_base.py +9 -7
- cognite/extractorutils/uploader/assets.py +32 -30
- cognite/extractorutils/uploader/events.py +32 -30
- cognite/extractorutils/uploader/files.py +118 -86
- cognite/extractorutils/uploader/raw.py +17 -17
- cognite/extractorutils/uploader/time_series.py +117 -111
- cognite/extractorutils/uploader_extractor.py +4 -4
- cognite/extractorutils/util.py +41 -36
- {cognite_extractor_utils-6.4.0.dist-info → cognite_extractor_utils-7.0.0.dist-info}/METADATA +1 -3
- cognite_extractor_utils-7.0.0.dist-info/RECORD +27 -0
- cognite/extractorutils/middleware.py +0 -36
- cognite_extractor_utils-6.4.0.dist-info/RECORD +0 -27
- {cognite_extractor_utils-6.4.0.dist-info → cognite_extractor_utils-7.0.0.dist-info}/LICENSE +0 -0
- {cognite_extractor_utils-6.4.0.dist-info → cognite_extractor_utils-7.0.0.dist-info}/WHEEL +0 -0
|
@@ -12,12 +12,12 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from threading import Event
|
|
16
15
|
from typing import Any, Callable, List, Optional, Type
|
|
17
16
|
|
|
18
17
|
from cognite.client import CogniteClient
|
|
19
18
|
from cognite.client.data_classes.assets import Asset
|
|
20
|
-
from cognite.client.exceptions import
|
|
19
|
+
from cognite.client.exceptions import CogniteDuplicatedError
|
|
20
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
21
21
|
from cognite.extractorutils.uploader._base import (
|
|
22
22
|
RETRIES,
|
|
23
23
|
RETRY_BACKOFF_FACTOR,
|
|
@@ -30,7 +30,7 @@ from cognite.extractorutils.uploader._metrics import (
|
|
|
30
30
|
ASSETS_UPLOADER_QUEUED,
|
|
31
31
|
ASSETS_UPLOADER_WRITTEN,
|
|
32
32
|
)
|
|
33
|
-
from cognite.extractorutils.util import retry
|
|
33
|
+
from cognite.extractorutils.util import cognite_exceptions, retry
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
class AssetUploadQueue(AbstractUploadQueue):
|
|
@@ -57,7 +57,7 @@ class AssetUploadQueue(AbstractUploadQueue):
|
|
|
57
57
|
max_upload_interval: Optional[int] = None,
|
|
58
58
|
trigger_log_level: str = "DEBUG",
|
|
59
59
|
thread_name: Optional[str] = None,
|
|
60
|
-
cancellation_token:
|
|
60
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
61
61
|
):
|
|
62
62
|
super().__init__(
|
|
63
63
|
cdf_client,
|
|
@@ -92,9 +92,36 @@ class AssetUploadQueue(AbstractUploadQueue):
|
|
|
92
92
|
"""
|
|
93
93
|
Trigger an upload of the queue, clears queue afterwards
|
|
94
94
|
"""
|
|
95
|
+
|
|
96
|
+
@retry(
|
|
97
|
+
exceptions=cognite_exceptions(),
|
|
98
|
+
cancellation_token=self.cancellation_token,
|
|
99
|
+
tries=RETRIES,
|
|
100
|
+
delay=RETRY_DELAY,
|
|
101
|
+
max_delay=RETRY_MAX_DELAY,
|
|
102
|
+
backoff=RETRY_BACKOFF_FACTOR,
|
|
103
|
+
)
|
|
104
|
+
def _upload_batch() -> None:
|
|
105
|
+
try:
|
|
106
|
+
self.cdf_client.assets.create(self.upload_queue)
|
|
107
|
+
except CogniteDuplicatedError as e:
|
|
108
|
+
duplicated_ids = set([dup["externalId"] for dup in e.duplicated if "externalId" in dup])
|
|
109
|
+
failed: List[Asset] = [e for e in e.failed]
|
|
110
|
+
to_create = []
|
|
111
|
+
to_update = []
|
|
112
|
+
for asset in failed:
|
|
113
|
+
if asset.external_id is not None and asset.external_id in duplicated_ids:
|
|
114
|
+
to_update.append(asset)
|
|
115
|
+
else:
|
|
116
|
+
to_create.append(asset)
|
|
117
|
+
if to_create:
|
|
118
|
+
self.cdf_client.assets.create(to_create)
|
|
119
|
+
if to_update:
|
|
120
|
+
self.cdf_client.assets.update(to_update)
|
|
121
|
+
|
|
95
122
|
if len(self.upload_queue) > 0:
|
|
96
123
|
with self.lock:
|
|
97
|
-
|
|
124
|
+
_upload_batch()
|
|
98
125
|
|
|
99
126
|
try:
|
|
100
127
|
self._post_upload(self.upload_queue)
|
|
@@ -107,31 +134,6 @@ class AssetUploadQueue(AbstractUploadQueue):
|
|
|
107
134
|
self.upload_queue.clear()
|
|
108
135
|
self.queue_size.set(self.upload_queue_size)
|
|
109
136
|
|
|
110
|
-
@retry(
|
|
111
|
-
exceptions=(CogniteAPIError, ConnectionError),
|
|
112
|
-
tries=RETRIES,
|
|
113
|
-
delay=RETRY_DELAY,
|
|
114
|
-
max_delay=RETRY_MAX_DELAY,
|
|
115
|
-
backoff=RETRY_BACKOFF_FACTOR,
|
|
116
|
-
)
|
|
117
|
-
def _upload_batch(self) -> None:
|
|
118
|
-
try:
|
|
119
|
-
self.cdf_client.assets.create(self.upload_queue)
|
|
120
|
-
except CogniteDuplicatedError as e:
|
|
121
|
-
duplicated_ids = set([dup["externalId"] for dup in e.duplicated if "externalId" in dup])
|
|
122
|
-
failed: List[Asset] = [e for e in e.failed]
|
|
123
|
-
to_create = []
|
|
124
|
-
to_update = []
|
|
125
|
-
for asset in failed:
|
|
126
|
-
if asset.external_id is not None and asset.external_id in duplicated_ids:
|
|
127
|
-
to_update.append(asset)
|
|
128
|
-
else:
|
|
129
|
-
to_create.append(asset)
|
|
130
|
-
if to_create:
|
|
131
|
-
self.cdf_client.assets.create(to_create)
|
|
132
|
-
if to_update:
|
|
133
|
-
self.cdf_client.assets.update(to_update)
|
|
134
|
-
|
|
135
137
|
def __enter__(self) -> "AssetUploadQueue":
|
|
136
138
|
"""
|
|
137
139
|
Wraps around start method, for use as context manager
|
|
@@ -12,13 +12,13 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import threading
|
|
16
15
|
from types import TracebackType
|
|
17
16
|
from typing import Callable, List, Optional, Type
|
|
18
17
|
|
|
19
18
|
from cognite.client import CogniteClient
|
|
20
19
|
from cognite.client.data_classes import Event
|
|
21
|
-
from cognite.client.exceptions import
|
|
20
|
+
from cognite.client.exceptions import CogniteDuplicatedError
|
|
21
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
22
22
|
from cognite.extractorutils.uploader._base import (
|
|
23
23
|
RETRIES,
|
|
24
24
|
RETRY_BACKOFF_FACTOR,
|
|
@@ -31,7 +31,7 @@ from cognite.extractorutils.uploader._metrics import (
|
|
|
31
31
|
EVENTS_UPLOADER_QUEUED,
|
|
32
32
|
EVENTS_UPLOADER_WRITTEN,
|
|
33
33
|
)
|
|
34
|
-
from cognite.extractorutils.util import retry
|
|
34
|
+
from cognite.extractorutils.util import cognite_exceptions, retry
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
class EventUploadQueue(AbstractUploadQueue):
|
|
@@ -57,7 +57,7 @@ class EventUploadQueue(AbstractUploadQueue):
|
|
|
57
57
|
max_upload_interval: Optional[int] = None,
|
|
58
58
|
trigger_log_level: str = "DEBUG",
|
|
59
59
|
thread_name: Optional[str] = None,
|
|
60
|
-
cancellation_token:
|
|
60
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
61
61
|
):
|
|
62
62
|
# Super sets post_upload and threshold
|
|
63
63
|
super().__init__(
|
|
@@ -96,11 +96,38 @@ class EventUploadQueue(AbstractUploadQueue):
|
|
|
96
96
|
"""
|
|
97
97
|
Trigger an upload of the queue, clears queue afterwards
|
|
98
98
|
"""
|
|
99
|
+
|
|
100
|
+
@retry(
|
|
101
|
+
exceptions=cognite_exceptions(),
|
|
102
|
+
cancellation_token=self.cancellation_token,
|
|
103
|
+
tries=RETRIES,
|
|
104
|
+
delay=RETRY_DELAY,
|
|
105
|
+
max_delay=RETRY_MAX_DELAY,
|
|
106
|
+
backoff=RETRY_BACKOFF_FACTOR,
|
|
107
|
+
)
|
|
108
|
+
def _upload_batch() -> None:
|
|
109
|
+
try:
|
|
110
|
+
self.cdf_client.events.create([e for e in self.upload_queue])
|
|
111
|
+
except CogniteDuplicatedError as e:
|
|
112
|
+
duplicated_ids = set([dup["externalId"] for dup in e.duplicated if "externalId" in dup])
|
|
113
|
+
failed: List[Event] = [e for e in e.failed]
|
|
114
|
+
to_create = []
|
|
115
|
+
to_update = []
|
|
116
|
+
for evt in failed:
|
|
117
|
+
if evt.external_id is not None and evt.external_id in duplicated_ids:
|
|
118
|
+
to_update.append(evt)
|
|
119
|
+
else:
|
|
120
|
+
to_create.append(evt)
|
|
121
|
+
if to_create:
|
|
122
|
+
self.cdf_client.events.create(to_create)
|
|
123
|
+
if to_update:
|
|
124
|
+
self.cdf_client.events.update(to_update)
|
|
125
|
+
|
|
99
126
|
if len(self.upload_queue) == 0:
|
|
100
127
|
return
|
|
101
128
|
|
|
102
129
|
with self.lock:
|
|
103
|
-
|
|
130
|
+
_upload_batch()
|
|
104
131
|
|
|
105
132
|
self.events_written.inc(self.upload_queue_size)
|
|
106
133
|
|
|
@@ -113,31 +140,6 @@ class EventUploadQueue(AbstractUploadQueue):
|
|
|
113
140
|
self.upload_queue_size = 0
|
|
114
141
|
self.queue_size.set(self.upload_queue_size)
|
|
115
142
|
|
|
116
|
-
@retry(
|
|
117
|
-
exceptions=(CogniteAPIError, ConnectionError),
|
|
118
|
-
tries=RETRIES,
|
|
119
|
-
delay=RETRY_DELAY,
|
|
120
|
-
max_delay=RETRY_MAX_DELAY,
|
|
121
|
-
backoff=RETRY_BACKOFF_FACTOR,
|
|
122
|
-
)
|
|
123
|
-
def _upload_batch(self) -> None:
|
|
124
|
-
try:
|
|
125
|
-
self.cdf_client.events.create([e for e in self.upload_queue])
|
|
126
|
-
except CogniteDuplicatedError as e:
|
|
127
|
-
duplicated_ids = set([dup["externalId"] for dup in e.duplicated if "externalId" in dup])
|
|
128
|
-
failed: List[Event] = [e for e in e.failed]
|
|
129
|
-
to_create = []
|
|
130
|
-
to_update = []
|
|
131
|
-
for evt in failed:
|
|
132
|
-
if evt.external_id is not None and evt.external_id in duplicated_ids:
|
|
133
|
-
to_update.append(evt)
|
|
134
|
-
else:
|
|
135
|
-
to_create.append(evt)
|
|
136
|
-
if to_create:
|
|
137
|
-
self.cdf_client.events.create(to_create)
|
|
138
|
-
if to_update:
|
|
139
|
-
self.cdf_client.events.update(to_update)
|
|
140
|
-
|
|
141
143
|
def __enter__(self) -> "EventUploadQueue":
|
|
142
144
|
"""
|
|
143
145
|
Wraps around start method, for use as context manager
|
|
@@ -17,13 +17,11 @@ from concurrent.futures import Future, ThreadPoolExecutor
|
|
|
17
17
|
from io import BytesIO
|
|
18
18
|
from os import PathLike
|
|
19
19
|
from types import TracebackType
|
|
20
|
-
from typing import BinaryIO, Callable, List, Optional, Tuple, Type, Union
|
|
21
|
-
|
|
22
|
-
from requests import ConnectionError
|
|
20
|
+
from typing import Any, BinaryIO, Callable, Dict, List, Optional, Tuple, Type, Union
|
|
23
21
|
|
|
24
22
|
from cognite.client import CogniteClient
|
|
25
23
|
from cognite.client.data_classes import FileMetadata
|
|
26
|
-
from cognite.
|
|
24
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
27
25
|
from cognite.extractorutils.uploader._base import (
|
|
28
26
|
RETRIES,
|
|
29
27
|
RETRY_BACKOFF_FACTOR,
|
|
@@ -36,7 +34,10 @@ from cognite.extractorutils.uploader._metrics import (
|
|
|
36
34
|
FILES_UPLOADER_QUEUED,
|
|
37
35
|
FILES_UPLOADER_WRITTEN,
|
|
38
36
|
)
|
|
39
|
-
from cognite.extractorutils.util import retry
|
|
37
|
+
from cognite.extractorutils.util import cognite_exceptions, retry
|
|
38
|
+
|
|
39
|
+
_QUEUES: int = 0
|
|
40
|
+
_QUEUES_LOCK: threading.RLock = threading.RLock()
|
|
40
41
|
|
|
41
42
|
|
|
42
43
|
class IOFileUploadQueue(AbstractUploadQueue):
|
|
@@ -50,9 +51,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
50
51
|
cdf_client: Cognite Data Fusion client to use
|
|
51
52
|
post_upload_function: A function that will be called after each upload. The function will be given one argument:
|
|
52
53
|
A list of the events that were uploaded.
|
|
53
|
-
max_queue_size: Maximum size of upload queue.
|
|
54
|
-
max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
|
|
55
|
-
methods).
|
|
54
|
+
max_queue_size: Maximum size of upload queue.
|
|
56
55
|
trigger_log_level: Log level to log upload triggers to.
|
|
57
56
|
thread_name: Thread name of uploader thread.
|
|
58
57
|
max_parallelism: Maximum number of parallel uploads. If this is greater than 0,
|
|
@@ -66,11 +65,10 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
66
65
|
cdf_client: CogniteClient,
|
|
67
66
|
post_upload_function: Optional[Callable[[List[FileMetadata]], None]] = None,
|
|
68
67
|
max_queue_size: Optional[int] = None,
|
|
69
|
-
max_upload_interval: Optional[int] = None,
|
|
70
68
|
trigger_log_level: str = "DEBUG",
|
|
71
69
|
thread_name: Optional[str] = None,
|
|
72
70
|
overwrite_existing: bool = False,
|
|
73
|
-
cancellation_token:
|
|
71
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
74
72
|
max_parallelism: int = 0,
|
|
75
73
|
):
|
|
76
74
|
# Super sets post_upload and threshold
|
|
@@ -78,13 +76,18 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
78
76
|
cdf_client,
|
|
79
77
|
post_upload_function,
|
|
80
78
|
max_queue_size,
|
|
81
|
-
|
|
79
|
+
None,
|
|
82
80
|
trigger_log_level,
|
|
83
81
|
thread_name,
|
|
84
82
|
cancellation_token,
|
|
85
83
|
)
|
|
86
84
|
|
|
87
|
-
self.
|
|
85
|
+
if self.threshold <= 0:
|
|
86
|
+
raise ValueError("Max queue size must be positive for file upload queues")
|
|
87
|
+
|
|
88
|
+
self.upload_queue: List[Future] = []
|
|
89
|
+
self.errors: List[Exception] = []
|
|
90
|
+
|
|
88
91
|
self.overwrite_existing = overwrite_existing
|
|
89
92
|
|
|
90
93
|
self.parallelism = self.cdf_client.config.max_workers
|
|
@@ -97,85 +100,118 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
97
100
|
self.files_written = FILES_UPLOADER_WRITTEN
|
|
98
101
|
self.queue_size = FILES_UPLOADER_QUEUE_SIZE
|
|
99
102
|
|
|
100
|
-
|
|
103
|
+
self._update_queue_thread = threading.Thread(target=self._remove_done_from_queue, daemon=True)
|
|
104
|
+
|
|
105
|
+
self._full_queue = threading.Condition()
|
|
106
|
+
|
|
107
|
+
global _QUEUES, _QUEUES_LOCK
|
|
108
|
+
with _QUEUES_LOCK:
|
|
109
|
+
self._pool = ThreadPoolExecutor(
|
|
110
|
+
max_workers=self.parallelism, thread_name_prefix=f"FileUploadQueue-{_QUEUES}"
|
|
111
|
+
)
|
|
112
|
+
_QUEUES += 1
|
|
113
|
+
|
|
114
|
+
def _remove_done_from_queue(self) -> None:
|
|
115
|
+
while not self.cancellation_token.is_cancelled:
|
|
116
|
+
with self.lock:
|
|
117
|
+
self.upload_queue = list(filter(lambda f: f.running(), self.upload_queue))
|
|
118
|
+
|
|
119
|
+
self.cancellation_token.wait(5)
|
|
120
|
+
|
|
121
|
+
def add_io_to_upload_queue(
|
|
122
|
+
self,
|
|
123
|
+
file_meta: FileMetadata,
|
|
124
|
+
read_file: Callable[[], BinaryIO],
|
|
125
|
+
extra_retries: Optional[
|
|
126
|
+
Union[Tuple[Type[Exception], ...], Dict[Type[Exception], Callable[[Any], bool]]]
|
|
127
|
+
] = None,
|
|
128
|
+
) -> None:
|
|
101
129
|
"""
|
|
102
|
-
Add file to upload queue. The
|
|
103
|
-
specified
|
|
130
|
+
Add file to upload queue. The file will start uploading immedeately. If the size of the queue is larger than
|
|
131
|
+
the specified max size, this call will block until it's
|
|
104
132
|
|
|
105
133
|
Args:
|
|
106
134
|
file_meta: File metadata-object
|
|
107
135
|
file_name: Path to file to be uploaded.
|
|
108
136
|
If none, the file object will still be created, but no data is uploaded
|
|
137
|
+
extra_retries: Exception types that might be raised by ``read_file`` that should be retried
|
|
109
138
|
"""
|
|
139
|
+
retries = cognite_exceptions()
|
|
140
|
+
if isinstance(extra_retries, tuple):
|
|
141
|
+
retries.update({exc: lambda _e: True for exc in extra_retries or []})
|
|
142
|
+
elif isinstance(extra_retries, dict):
|
|
143
|
+
retries.update(extra_retries)
|
|
144
|
+
|
|
145
|
+
@retry(
|
|
146
|
+
exceptions=retries,
|
|
147
|
+
cancellation_token=self.cancellation_token,
|
|
148
|
+
tries=RETRIES,
|
|
149
|
+
delay=RETRY_DELAY,
|
|
150
|
+
max_delay=RETRY_MAX_DELAY,
|
|
151
|
+
backoff=RETRY_BACKOFF_FACTOR,
|
|
152
|
+
)
|
|
153
|
+
def _upload_single(read_file: Callable[[], BinaryIO], file_meta: FileMetadata) -> None:
|
|
154
|
+
try:
|
|
155
|
+
# Upload file
|
|
156
|
+
with read_file() as file:
|
|
157
|
+
file_meta = self.cdf_client.files.upload_bytes(
|
|
158
|
+
file,
|
|
159
|
+
file_meta.name if file_meta.name is not None else "",
|
|
160
|
+
overwrite=self.overwrite_existing,
|
|
161
|
+
external_id=file_meta.external_id,
|
|
162
|
+
source=file_meta.source,
|
|
163
|
+
mime_type=file_meta.mime_type,
|
|
164
|
+
metadata=file_meta.metadata,
|
|
165
|
+
directory=file_meta.directory,
|
|
166
|
+
asset_ids=file_meta.asset_ids,
|
|
167
|
+
data_set_id=file_meta.data_set_id,
|
|
168
|
+
labels=file_meta.labels,
|
|
169
|
+
geo_location=file_meta.geo_location,
|
|
170
|
+
source_created_time=file_meta.source_created_time,
|
|
171
|
+
source_modified_time=file_meta.source_modified_time,
|
|
172
|
+
security_categories=file_meta.security_categories,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
if self.post_upload_function:
|
|
176
|
+
try:
|
|
177
|
+
self.post_upload_function([file_meta])
|
|
178
|
+
except Exception as e:
|
|
179
|
+
self.logger.error("Error in upload callback: %s", str(e))
|
|
180
|
+
|
|
181
|
+
except Exception as e:
|
|
182
|
+
self.logger.exception("Unexpected error while uploading file")
|
|
183
|
+
self.errors.append(e)
|
|
184
|
+
|
|
185
|
+
finally:
|
|
186
|
+
with self.lock:
|
|
187
|
+
self.files_written.inc()
|
|
188
|
+
self.upload_queue_size -= 1
|
|
189
|
+
self.queue_size.set(self.upload_queue_size)
|
|
190
|
+
with self._full_queue:
|
|
191
|
+
self._full_queue.notify()
|
|
192
|
+
|
|
193
|
+
if self.upload_queue_size >= self.threshold:
|
|
194
|
+
with self._full_queue:
|
|
195
|
+
while not self._full_queue.wait(timeout=2) and not self.cancellation_token.is_cancelled:
|
|
196
|
+
pass
|
|
197
|
+
|
|
110
198
|
with self.lock:
|
|
111
|
-
self.upload_queue.append((
|
|
199
|
+
self.upload_queue.append(self._pool.submit(_upload_single, read_file, file_meta))
|
|
112
200
|
self.upload_queue_size += 1
|
|
113
201
|
self.files_queued.inc()
|
|
114
202
|
self.queue_size.set(self.upload_queue_size)
|
|
115
203
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
def upload(self) -> None:
|
|
204
|
+
def upload(self, fail_on_errors: bool = True, timeout: Optional[float] = None) -> None:
|
|
119
205
|
"""
|
|
120
|
-
|
|
206
|
+
Wait for all uploads to finish
|
|
121
207
|
"""
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
208
|
+
for future in self.upload_queue:
|
|
209
|
+
future.result(timeout=timeout)
|
|
125
210
|
with self.lock:
|
|
126
|
-
self._upload_batch()
|
|
127
|
-
|
|
128
|
-
self.files_written.inc(self.upload_queue_size)
|
|
129
|
-
|
|
130
|
-
try:
|
|
131
|
-
self._post_upload([el[0] for el in self.upload_queue])
|
|
132
|
-
except Exception as e:
|
|
133
|
-
self.logger.error("Error in upload callback: %s", str(e))
|
|
134
|
-
self.upload_queue.clear()
|
|
135
|
-
self.logger.info(f"Uploaded {self.upload_queue_size} files")
|
|
136
|
-
self.upload_queue_size = 0
|
|
137
211
|
self.queue_size.set(self.upload_queue_size)
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
tries=RETRIES,
|
|
142
|
-
delay=RETRY_DELAY,
|
|
143
|
-
max_delay=RETRY_MAX_DELAY,
|
|
144
|
-
backoff=RETRY_BACKOFF_FACTOR,
|
|
145
|
-
)
|
|
146
|
-
def _upload_single(self, index: int, read_file: Callable[[], BinaryIO], file_meta: FileMetadata) -> None:
|
|
147
|
-
# Upload file
|
|
148
|
-
with read_file() as file:
|
|
149
|
-
file_meta = self.cdf_client.files.upload_bytes(
|
|
150
|
-
file,
|
|
151
|
-
file_meta.name if file_meta.name is not None else "",
|
|
152
|
-
overwrite=self.overwrite_existing,
|
|
153
|
-
external_id=file_meta.external_id,
|
|
154
|
-
source=file_meta.source,
|
|
155
|
-
mime_type=file_meta.mime_type,
|
|
156
|
-
metadata=file_meta.metadata,
|
|
157
|
-
directory=file_meta.directory,
|
|
158
|
-
asset_ids=file_meta.asset_ids,
|
|
159
|
-
data_set_id=file_meta.data_set_id,
|
|
160
|
-
labels=file_meta.labels,
|
|
161
|
-
geo_location=file_meta.geo_location,
|
|
162
|
-
source_created_time=file_meta.source_created_time,
|
|
163
|
-
source_modified_time=file_meta.source_modified_time,
|
|
164
|
-
security_categories=file_meta.security_categories,
|
|
165
|
-
)
|
|
166
|
-
|
|
167
|
-
# Update meta-object in queue
|
|
168
|
-
self.upload_queue[index] = (file_meta, read_file)
|
|
169
|
-
|
|
170
|
-
def _upload_batch(self) -> None:
|
|
171
|
-
# Concurrently execute file-uploads
|
|
172
|
-
|
|
173
|
-
futures: List[Future] = []
|
|
174
|
-
with ThreadPoolExecutor(self.parallelism) as pool:
|
|
175
|
-
for i, (file_meta, file_name) in enumerate(self.upload_queue):
|
|
176
|
-
futures.append(pool.submit(self._upload_single, i, file_name, file_meta))
|
|
177
|
-
for fut in futures:
|
|
178
|
-
fut.result(0.0)
|
|
212
|
+
if fail_on_errors and self.errors:
|
|
213
|
+
# There might be more errors, but we can only have one as the cause, so pick the first
|
|
214
|
+
raise RuntimeError(f"{len(self.errors)} upload(s) finished with errors") from self.errors[0]
|
|
179
215
|
|
|
180
216
|
def __enter__(self) -> "IOFileUploadQueue":
|
|
181
217
|
"""
|
|
@@ -185,6 +221,8 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
185
221
|
self
|
|
186
222
|
"""
|
|
187
223
|
self.start()
|
|
224
|
+
self._pool.__enter__()
|
|
225
|
+
self._update_queue_thread.start()
|
|
188
226
|
return self
|
|
189
227
|
|
|
190
228
|
def __exit__(
|
|
@@ -198,6 +236,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
|
|
|
198
236
|
exc_val: Exception value
|
|
199
237
|
exc_tb: Traceback
|
|
200
238
|
"""
|
|
239
|
+
self._pool.__exit__(exc_type, exc_val, exc_tb)
|
|
201
240
|
self.stop()
|
|
202
241
|
|
|
203
242
|
def __len__(self) -> int:
|
|
@@ -218,9 +257,7 @@ class FileUploadQueue(IOFileUploadQueue):
|
|
|
218
257
|
cdf_client: Cognite Data Fusion client to use
|
|
219
258
|
post_upload_function: A function that will be called after each upload. The function will be given one argument:
|
|
220
259
|
A list of the events that were uploaded.
|
|
221
|
-
max_queue_size: Maximum size of upload queue.
|
|
222
|
-
max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
|
|
223
|
-
methods).
|
|
260
|
+
max_queue_size: Maximum size of upload queue.
|
|
224
261
|
trigger_log_level: Log level to log upload triggers to.
|
|
225
262
|
thread_name: Thread name of uploader thread.
|
|
226
263
|
"""
|
|
@@ -234,14 +271,13 @@ class FileUploadQueue(IOFileUploadQueue):
|
|
|
234
271
|
trigger_log_level: str = "DEBUG",
|
|
235
272
|
thread_name: Optional[str] = None,
|
|
236
273
|
overwrite_existing: bool = False,
|
|
237
|
-
cancellation_token:
|
|
274
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
238
275
|
):
|
|
239
276
|
# Super sets post_upload and threshold
|
|
240
277
|
super().__init__(
|
|
241
278
|
cdf_client,
|
|
242
279
|
post_upload_function,
|
|
243
280
|
max_queue_size,
|
|
244
|
-
max_upload_interval,
|
|
245
281
|
trigger_log_level,
|
|
246
282
|
thread_name,
|
|
247
283
|
overwrite_existing,
|
|
@@ -273,9 +309,7 @@ class BytesUploadQueue(IOFileUploadQueue):
|
|
|
273
309
|
cdf_client: Cognite Data Fusion client to use
|
|
274
310
|
post_upload_function: A function that will be called after each upload. The function will be given one argument:
|
|
275
311
|
A list of the events that were uploaded.
|
|
276
|
-
max_queue_size: Maximum size of upload queue.
|
|
277
|
-
max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
|
|
278
|
-
methods).
|
|
312
|
+
max_queue_size: Maximum size of upload queue.
|
|
279
313
|
trigger_log_level: Log level to log upload triggers to.
|
|
280
314
|
thread_name: Thread name of uploader thread.
|
|
281
315
|
overwrite_existing: If 'overwrite' is set to true, fields for the files found for externalIds can be overwritten
|
|
@@ -286,17 +320,15 @@ class BytesUploadQueue(IOFileUploadQueue):
|
|
|
286
320
|
cdf_client: CogniteClient,
|
|
287
321
|
post_upload_function: Optional[Callable[[List[FileMetadata]], None]] = None,
|
|
288
322
|
max_queue_size: Optional[int] = None,
|
|
289
|
-
max_upload_interval: Optional[int] = None,
|
|
290
323
|
trigger_log_level: str = "DEBUG",
|
|
291
324
|
thread_name: Optional[str] = None,
|
|
292
325
|
overwrite_existing: bool = False,
|
|
293
|
-
cancellation_token:
|
|
326
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
294
327
|
) -> None:
|
|
295
328
|
super().__init__(
|
|
296
329
|
cdf_client,
|
|
297
330
|
post_upload_function,
|
|
298
331
|
max_queue_size,
|
|
299
|
-
max_upload_interval,
|
|
300
332
|
trigger_log_level,
|
|
301
333
|
thread_name,
|
|
302
334
|
overwrite_existing,
|
|
@@ -12,17 +12,15 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import threading
|
|
16
15
|
from types import TracebackType
|
|
17
16
|
from typing import Any, Callable, Dict, List, Optional, Type
|
|
18
17
|
|
|
19
18
|
import arrow
|
|
20
19
|
from arrow import Arrow
|
|
21
|
-
from requests import ConnectionError
|
|
22
20
|
|
|
23
21
|
from cognite.client import CogniteClient
|
|
24
22
|
from cognite.client.data_classes import Row
|
|
25
|
-
from cognite.
|
|
23
|
+
from cognite.extractorutils.threading import CancellationToken
|
|
26
24
|
from cognite.extractorutils.uploader._base import (
|
|
27
25
|
RETRIES,
|
|
28
26
|
RETRY_BACKOFF_FACTOR,
|
|
@@ -37,7 +35,7 @@ from cognite.extractorutils.uploader._metrics import (
|
|
|
37
35
|
RAW_UPLOADER_ROWS_QUEUED,
|
|
38
36
|
RAW_UPLOADER_ROWS_WRITTEN,
|
|
39
37
|
)
|
|
40
|
-
from cognite.extractorutils.util import retry
|
|
38
|
+
from cognite.extractorutils.util import cognite_exceptions, retry
|
|
41
39
|
|
|
42
40
|
|
|
43
41
|
class RawUploadQueue(AbstractUploadQueue):
|
|
@@ -63,7 +61,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
63
61
|
max_upload_interval: Optional[int] = None,
|
|
64
62
|
trigger_log_level: str = "DEBUG",
|
|
65
63
|
thread_name: Optional[str] = None,
|
|
66
|
-
cancellation_token:
|
|
64
|
+
cancellation_token: Optional[CancellationToken] = None,
|
|
67
65
|
):
|
|
68
66
|
# Super sets post_upload and thresholds
|
|
69
67
|
super().__init__(
|
|
@@ -112,6 +110,19 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
112
110
|
"""
|
|
113
111
|
Trigger an upload of the queue, clears queue afterwards
|
|
114
112
|
"""
|
|
113
|
+
|
|
114
|
+
@retry(
|
|
115
|
+
exceptions=cognite_exceptions(),
|
|
116
|
+
cancellation_token=self.cancellation_token,
|
|
117
|
+
tries=RETRIES,
|
|
118
|
+
delay=RETRY_DELAY,
|
|
119
|
+
max_delay=RETRY_MAX_DELAY,
|
|
120
|
+
backoff=RETRY_BACKOFF_FACTOR,
|
|
121
|
+
)
|
|
122
|
+
def _upload_batch(database: str, table: str, patch: List[Row]) -> None:
|
|
123
|
+
# Upload
|
|
124
|
+
self.cdf_client.raw.rows.insert(db_name=database, table_name=table, row=patch, ensure_parent=True)
|
|
125
|
+
|
|
115
126
|
if len(self.upload_queue) == 0:
|
|
116
127
|
return
|
|
117
128
|
|
|
@@ -125,7 +136,7 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
125
136
|
patch: Dict[str, Row] = {r.payload.key: r.payload for r in rows}
|
|
126
137
|
self.rows_duplicates.labels(_labels).inc(len(rows) - len(patch))
|
|
127
138
|
|
|
128
|
-
|
|
139
|
+
_upload_batch(database=database, table=table, patch=list(patch.values()))
|
|
129
140
|
self.rows_written.labels(_labels).inc(len(patch))
|
|
130
141
|
_written: Arrow = arrow.utcnow()
|
|
131
142
|
|
|
@@ -140,17 +151,6 @@ class RawUploadQueue(AbstractUploadQueue):
|
|
|
140
151
|
self.upload_queue_size = 0
|
|
141
152
|
self.queue_size.set(self.upload_queue_size)
|
|
142
153
|
|
|
143
|
-
@retry(
|
|
144
|
-
exceptions=(CogniteAPIError, ConnectionError, CogniteReadTimeout),
|
|
145
|
-
tries=RETRIES,
|
|
146
|
-
delay=RETRY_DELAY,
|
|
147
|
-
max_delay=RETRY_MAX_DELAY,
|
|
148
|
-
backoff=RETRY_BACKOFF_FACTOR,
|
|
149
|
-
)
|
|
150
|
-
def _upload_batch(self, database: str, table: str, patch: List[Row]) -> None:
|
|
151
|
-
# Upload
|
|
152
|
-
self.cdf_client.raw.rows.insert(db_name=database, table_name=table, row=patch, ensure_parent=True)
|
|
153
|
-
|
|
154
154
|
def __enter__(self) -> "RawUploadQueue":
|
|
155
155
|
"""
|
|
156
156
|
Wraps around start method, for use as context manager
|