cognite-extractor-utils 6.4.1__py3-none-any.whl → 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

@@ -12,12 +12,12 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from threading import Event
16
15
  from typing import Any, Callable, List, Optional, Type
17
16
 
18
17
  from cognite.client import CogniteClient
19
18
  from cognite.client.data_classes.assets import Asset
20
- from cognite.client.exceptions import CogniteAPIError, CogniteDuplicatedError
19
+ from cognite.client.exceptions import CogniteDuplicatedError
20
+ from cognite.extractorutils.threading import CancellationToken
21
21
  from cognite.extractorutils.uploader._base import (
22
22
  RETRIES,
23
23
  RETRY_BACKOFF_FACTOR,
@@ -30,7 +30,7 @@ from cognite.extractorutils.uploader._metrics import (
30
30
  ASSETS_UPLOADER_QUEUED,
31
31
  ASSETS_UPLOADER_WRITTEN,
32
32
  )
33
- from cognite.extractorutils.util import retry
33
+ from cognite.extractorutils.util import cognite_exceptions, retry
34
34
 
35
35
 
36
36
  class AssetUploadQueue(AbstractUploadQueue):
@@ -57,7 +57,7 @@ class AssetUploadQueue(AbstractUploadQueue):
57
57
  max_upload_interval: Optional[int] = None,
58
58
  trigger_log_level: str = "DEBUG",
59
59
  thread_name: Optional[str] = None,
60
- cancellation_token: Event = Event(),
60
+ cancellation_token: Optional[CancellationToken] = None,
61
61
  ):
62
62
  super().__init__(
63
63
  cdf_client,
@@ -92,9 +92,36 @@ class AssetUploadQueue(AbstractUploadQueue):
92
92
  """
93
93
  Trigger an upload of the queue, clears queue afterwards
94
94
  """
95
+
96
+ @retry(
97
+ exceptions=cognite_exceptions(),
98
+ cancellation_token=self.cancellation_token,
99
+ tries=RETRIES,
100
+ delay=RETRY_DELAY,
101
+ max_delay=RETRY_MAX_DELAY,
102
+ backoff=RETRY_BACKOFF_FACTOR,
103
+ )
104
+ def _upload_batch() -> None:
105
+ try:
106
+ self.cdf_client.assets.create(self.upload_queue)
107
+ except CogniteDuplicatedError as e:
108
+ duplicated_ids = set([dup["externalId"] for dup in e.duplicated if "externalId" in dup])
109
+ failed: List[Asset] = [e for e in e.failed]
110
+ to_create = []
111
+ to_update = []
112
+ for asset in failed:
113
+ if asset.external_id is not None and asset.external_id in duplicated_ids:
114
+ to_update.append(asset)
115
+ else:
116
+ to_create.append(asset)
117
+ if to_create:
118
+ self.cdf_client.assets.create(to_create)
119
+ if to_update:
120
+ self.cdf_client.assets.update(to_update)
121
+
95
122
  if len(self.upload_queue) > 0:
96
123
  with self.lock:
97
- self._upload_batch()
124
+ _upload_batch()
98
125
 
99
126
  try:
100
127
  self._post_upload(self.upload_queue)
@@ -107,31 +134,6 @@ class AssetUploadQueue(AbstractUploadQueue):
107
134
  self.upload_queue.clear()
108
135
  self.queue_size.set(self.upload_queue_size)
109
136
 
110
- @retry(
111
- exceptions=(CogniteAPIError, ConnectionError),
112
- tries=RETRIES,
113
- delay=RETRY_DELAY,
114
- max_delay=RETRY_MAX_DELAY,
115
- backoff=RETRY_BACKOFF_FACTOR,
116
- )
117
- def _upload_batch(self) -> None:
118
- try:
119
- self.cdf_client.assets.create(self.upload_queue)
120
- except CogniteDuplicatedError as e:
121
- duplicated_ids = set([dup["externalId"] for dup in e.duplicated if "externalId" in dup])
122
- failed: List[Asset] = [e for e in e.failed]
123
- to_create = []
124
- to_update = []
125
- for asset in failed:
126
- if asset.external_id is not None and asset.external_id in duplicated_ids:
127
- to_update.append(asset)
128
- else:
129
- to_create.append(asset)
130
- if to_create:
131
- self.cdf_client.assets.create(to_create)
132
- if to_update:
133
- self.cdf_client.assets.update(to_update)
134
-
135
137
  def __enter__(self) -> "AssetUploadQueue":
136
138
  """
137
139
  Wraps around start method, for use as context manager
@@ -12,13 +12,13 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import threading
16
15
  from types import TracebackType
17
16
  from typing import Callable, List, Optional, Type
18
17
 
19
18
  from cognite.client import CogniteClient
20
19
  from cognite.client.data_classes import Event
21
- from cognite.client.exceptions import CogniteAPIError, CogniteDuplicatedError
20
+ from cognite.client.exceptions import CogniteDuplicatedError
21
+ from cognite.extractorutils.threading import CancellationToken
22
22
  from cognite.extractorutils.uploader._base import (
23
23
  RETRIES,
24
24
  RETRY_BACKOFF_FACTOR,
@@ -31,7 +31,7 @@ from cognite.extractorutils.uploader._metrics import (
31
31
  EVENTS_UPLOADER_QUEUED,
32
32
  EVENTS_UPLOADER_WRITTEN,
33
33
  )
34
- from cognite.extractorutils.util import retry
34
+ from cognite.extractorutils.util import cognite_exceptions, retry
35
35
 
36
36
 
37
37
  class EventUploadQueue(AbstractUploadQueue):
@@ -57,7 +57,7 @@ class EventUploadQueue(AbstractUploadQueue):
57
57
  max_upload_interval: Optional[int] = None,
58
58
  trigger_log_level: str = "DEBUG",
59
59
  thread_name: Optional[str] = None,
60
- cancellation_token: threading.Event = threading.Event(),
60
+ cancellation_token: Optional[CancellationToken] = None,
61
61
  ):
62
62
  # Super sets post_upload and threshold
63
63
  super().__init__(
@@ -96,11 +96,38 @@ class EventUploadQueue(AbstractUploadQueue):
96
96
  """
97
97
  Trigger an upload of the queue, clears queue afterwards
98
98
  """
99
+
100
+ @retry(
101
+ exceptions=cognite_exceptions(),
102
+ cancellation_token=self.cancellation_token,
103
+ tries=RETRIES,
104
+ delay=RETRY_DELAY,
105
+ max_delay=RETRY_MAX_DELAY,
106
+ backoff=RETRY_BACKOFF_FACTOR,
107
+ )
108
+ def _upload_batch() -> None:
109
+ try:
110
+ self.cdf_client.events.create([e for e in self.upload_queue])
111
+ except CogniteDuplicatedError as e:
112
+ duplicated_ids = set([dup["externalId"] for dup in e.duplicated if "externalId" in dup])
113
+ failed: List[Event] = [e for e in e.failed]
114
+ to_create = []
115
+ to_update = []
116
+ for evt in failed:
117
+ if evt.external_id is not None and evt.external_id in duplicated_ids:
118
+ to_update.append(evt)
119
+ else:
120
+ to_create.append(evt)
121
+ if to_create:
122
+ self.cdf_client.events.create(to_create)
123
+ if to_update:
124
+ self.cdf_client.events.update(to_update)
125
+
99
126
  if len(self.upload_queue) == 0:
100
127
  return
101
128
 
102
129
  with self.lock:
103
- self._upload_batch()
130
+ _upload_batch()
104
131
 
105
132
  self.events_written.inc(self.upload_queue_size)
106
133
 
@@ -113,31 +140,6 @@ class EventUploadQueue(AbstractUploadQueue):
113
140
  self.upload_queue_size = 0
114
141
  self.queue_size.set(self.upload_queue_size)
115
142
 
116
- @retry(
117
- exceptions=(CogniteAPIError, ConnectionError),
118
- tries=RETRIES,
119
- delay=RETRY_DELAY,
120
- max_delay=RETRY_MAX_DELAY,
121
- backoff=RETRY_BACKOFF_FACTOR,
122
- )
123
- def _upload_batch(self) -> None:
124
- try:
125
- self.cdf_client.events.create([e for e in self.upload_queue])
126
- except CogniteDuplicatedError as e:
127
- duplicated_ids = set([dup["externalId"] for dup in e.duplicated if "externalId" in dup])
128
- failed: List[Event] = [e for e in e.failed]
129
- to_create = []
130
- to_update = []
131
- for evt in failed:
132
- if evt.external_id is not None and evt.external_id in duplicated_ids:
133
- to_update.append(evt)
134
- else:
135
- to_create.append(evt)
136
- if to_create:
137
- self.cdf_client.events.create(to_create)
138
- if to_update:
139
- self.cdf_client.events.update(to_update)
140
-
141
143
  def __enter__(self) -> "EventUploadQueue":
142
144
  """
143
145
  Wraps around start method, for use as context manager
@@ -17,13 +17,11 @@ from concurrent.futures import Future, ThreadPoolExecutor
17
17
  from io import BytesIO
18
18
  from os import PathLike
19
19
  from types import TracebackType
20
- from typing import BinaryIO, Callable, List, Optional, Tuple, Type, Union
21
-
22
- from requests import ConnectionError
20
+ from typing import Any, BinaryIO, Callable, Dict, List, Optional, Tuple, Type, Union
23
21
 
24
22
  from cognite.client import CogniteClient
25
23
  from cognite.client.data_classes import FileMetadata
26
- from cognite.client.exceptions import CogniteAPIError
24
+ from cognite.extractorutils.threading import CancellationToken
27
25
  from cognite.extractorutils.uploader._base import (
28
26
  RETRIES,
29
27
  RETRY_BACKOFF_FACTOR,
@@ -36,7 +34,7 @@ from cognite.extractorutils.uploader._metrics import (
36
34
  FILES_UPLOADER_QUEUED,
37
35
  FILES_UPLOADER_WRITTEN,
38
36
  )
39
- from cognite.extractorutils.util import retry
37
+ from cognite.extractorutils.util import cognite_exceptions, retry
40
38
 
41
39
  _QUEUES: int = 0
42
40
  _QUEUES_LOCK: threading.RLock = threading.RLock()
@@ -53,9 +51,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
53
51
  cdf_client: Cognite Data Fusion client to use
54
52
  post_upload_function: A function that will be called after each upload. The function will be given one argument:
55
53
  A list of the events that were uploaded.
56
- max_queue_size: Maximum size of upload queue. Defaults to no max size.
57
- max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
58
- methods).
54
+ max_queue_size: Maximum size of upload queue.
59
55
  trigger_log_level: Log level to log upload triggers to.
60
56
  thread_name: Thread name of uploader thread.
61
57
  max_parallelism: Maximum number of parallel uploads. If this is greater than 0,
@@ -69,11 +65,10 @@ class IOFileUploadQueue(AbstractUploadQueue):
69
65
  cdf_client: CogniteClient,
70
66
  post_upload_function: Optional[Callable[[List[FileMetadata]], None]] = None,
71
67
  max_queue_size: Optional[int] = None,
72
- max_upload_interval: Optional[int] = None,
73
68
  trigger_log_level: str = "DEBUG",
74
69
  thread_name: Optional[str] = None,
75
70
  overwrite_existing: bool = False,
76
- cancellation_token: threading.Event = threading.Event(),
71
+ cancellation_token: Optional[CancellationToken] = None,
77
72
  max_parallelism: int = 0,
78
73
  ):
79
74
  # Super sets post_upload and threshold
@@ -81,13 +76,18 @@ class IOFileUploadQueue(AbstractUploadQueue):
81
76
  cdf_client,
82
77
  post_upload_function,
83
78
  max_queue_size,
84
- max_upload_interval,
79
+ None,
85
80
  trigger_log_level,
86
81
  thread_name,
87
82
  cancellation_token,
88
83
  )
89
84
 
90
- self.upload_queue: List[Tuple[FileMetadata, Union[str, Callable[[], BinaryIO]]]] = []
85
+ if self.threshold <= 0:
86
+ raise ValueError("Max queue size must be positive for file upload queues")
87
+
88
+ self.upload_queue: List[Future] = []
89
+ self.errors: List[Exception] = []
90
+
91
91
  self.overwrite_existing = overwrite_existing
92
92
 
93
93
  self.parallelism = self.cdf_client.config.max_workers
@@ -100,6 +100,10 @@ class IOFileUploadQueue(AbstractUploadQueue):
100
100
  self.files_written = FILES_UPLOADER_WRITTEN
101
101
  self.queue_size = FILES_UPLOADER_QUEUE_SIZE
102
102
 
103
+ self._update_queue_thread = threading.Thread(target=self._remove_done_from_queue, daemon=True)
104
+
105
+ self._full_queue = threading.Condition()
106
+
103
107
  global _QUEUES, _QUEUES_LOCK
104
108
  with _QUEUES_LOCK:
105
109
  self._pool = ThreadPoolExecutor(
@@ -107,84 +111,107 @@ class IOFileUploadQueue(AbstractUploadQueue):
107
111
  )
108
112
  _QUEUES += 1
109
113
 
110
- def add_io_to_upload_queue(self, file_meta: FileMetadata, read_file: Callable[[], BinaryIO]) -> None:
114
+ def _remove_done_from_queue(self) -> None:
115
+ while not self.cancellation_token.is_cancelled:
116
+ with self.lock:
117
+ self.upload_queue = list(filter(lambda f: f.running(), self.upload_queue))
118
+
119
+ self.cancellation_token.wait(5)
120
+
121
+ def add_io_to_upload_queue(
122
+ self,
123
+ file_meta: FileMetadata,
124
+ read_file: Callable[[], BinaryIO],
125
+ extra_retries: Optional[
126
+ Union[Tuple[Type[Exception], ...], Dict[Type[Exception], Callable[[Any], bool]]]
127
+ ] = None,
128
+ ) -> None:
111
129
  """
112
- Add file to upload queue. The queue will be uploaded if the queue size is larger than the threshold
113
- specified in the __init__.
130
+ Add file to upload queue. The file will start uploading immedeately. If the size of the queue is larger than
131
+ the specified max size, this call will block until it's
114
132
 
115
133
  Args:
116
134
  file_meta: File metadata-object
117
135
  file_name: Path to file to be uploaded.
118
136
  If none, the file object will still be created, but no data is uploaded
137
+ extra_retries: Exception types that might be raised by ``read_file`` that should be retried
119
138
  """
139
+ retries = cognite_exceptions()
140
+ if isinstance(extra_retries, tuple):
141
+ retries.update({exc: lambda _e: True for exc in extra_retries or []})
142
+ elif isinstance(extra_retries, dict):
143
+ retries.update(extra_retries)
144
+
145
+ @retry(
146
+ exceptions=retries,
147
+ cancellation_token=self.cancellation_token,
148
+ tries=RETRIES,
149
+ delay=RETRY_DELAY,
150
+ max_delay=RETRY_MAX_DELAY,
151
+ backoff=RETRY_BACKOFF_FACTOR,
152
+ )
153
+ def _upload_single(read_file: Callable[[], BinaryIO], file_meta: FileMetadata) -> None:
154
+ try:
155
+ # Upload file
156
+ with read_file() as file:
157
+ file_meta = self.cdf_client.files.upload_bytes(
158
+ file,
159
+ file_meta.name if file_meta.name is not None else "",
160
+ overwrite=self.overwrite_existing,
161
+ external_id=file_meta.external_id,
162
+ source=file_meta.source,
163
+ mime_type=file_meta.mime_type,
164
+ metadata=file_meta.metadata,
165
+ directory=file_meta.directory,
166
+ asset_ids=file_meta.asset_ids,
167
+ data_set_id=file_meta.data_set_id,
168
+ labels=file_meta.labels,
169
+ geo_location=file_meta.geo_location,
170
+ source_created_time=file_meta.source_created_time,
171
+ source_modified_time=file_meta.source_modified_time,
172
+ security_categories=file_meta.security_categories,
173
+ )
174
+
175
+ if self.post_upload_function:
176
+ try:
177
+ self.post_upload_function([file_meta])
178
+ except Exception as e:
179
+ self.logger.error("Error in upload callback: %s", str(e))
180
+
181
+ except Exception as e:
182
+ self.logger.exception("Unexpected error while uploading file")
183
+ self.errors.append(e)
184
+
185
+ finally:
186
+ with self.lock:
187
+ self.files_written.inc()
188
+ self.upload_queue_size -= 1
189
+ self.queue_size.set(self.upload_queue_size)
190
+ with self._full_queue:
191
+ self._full_queue.notify()
192
+
193
+ if self.upload_queue_size >= self.threshold:
194
+ with self._full_queue:
195
+ while not self._full_queue.wait(timeout=2) and not self.cancellation_token.is_cancelled:
196
+ pass
197
+
120
198
  with self.lock:
121
- self.upload_queue.append((file_meta, read_file))
199
+ self.upload_queue.append(self._pool.submit(_upload_single, read_file, file_meta))
122
200
  self.upload_queue_size += 1
123
201
  self.files_queued.inc()
124
202
  self.queue_size.set(self.upload_queue_size)
125
203
 
126
- self._check_triggers()
127
-
128
- def upload(self) -> None:
204
+ def upload(self, fail_on_errors: bool = True, timeout: Optional[float] = None) -> None:
129
205
  """
130
- Trigger an upload of the queue, clears queue afterwards
206
+ Wait for all uploads to finish
131
207
  """
132
- if len(self.upload_queue) == 0:
133
- return
134
-
208
+ for future in self.upload_queue:
209
+ future.result(timeout=timeout)
135
210
  with self.lock:
136
- self._upload_batch()
137
-
138
- self.files_written.inc(self.upload_queue_size)
139
-
140
- try:
141
- self._post_upload([el[0] for el in self.upload_queue])
142
- except Exception as e:
143
- self.logger.error("Error in upload callback: %s", str(e))
144
- self.upload_queue.clear()
145
- self.logger.info(f"Uploaded {self.upload_queue_size} files")
146
- self.upload_queue_size = 0
147
211
  self.queue_size.set(self.upload_queue_size)
148
-
149
- @retry(
150
- exceptions=(CogniteAPIError, ConnectionError),
151
- tries=RETRIES,
152
- delay=RETRY_DELAY,
153
- max_delay=RETRY_MAX_DELAY,
154
- backoff=RETRY_BACKOFF_FACTOR,
155
- )
156
- def _upload_single(self, index: int, read_file: Callable[[], BinaryIO], file_meta: FileMetadata) -> None:
157
- # Upload file
158
- with read_file() as file:
159
- file_meta = self.cdf_client.files.upload_bytes(
160
- file,
161
- file_meta.name if file_meta.name is not None else "",
162
- overwrite=self.overwrite_existing,
163
- external_id=file_meta.external_id,
164
- source=file_meta.source,
165
- mime_type=file_meta.mime_type,
166
- metadata=file_meta.metadata,
167
- directory=file_meta.directory,
168
- asset_ids=file_meta.asset_ids,
169
- data_set_id=file_meta.data_set_id,
170
- labels=file_meta.labels,
171
- geo_location=file_meta.geo_location,
172
- source_created_time=file_meta.source_created_time,
173
- source_modified_time=file_meta.source_modified_time,
174
- security_categories=file_meta.security_categories,
175
- )
176
-
177
- # Update meta-object in queue
178
- self.upload_queue[index] = (file_meta, read_file)
179
-
180
- def _upload_batch(self) -> None:
181
- # Concurrently execute file-uploads
182
-
183
- futures: List[Future] = []
184
- for i, (file_meta, file_name) in enumerate(self.upload_queue):
185
- futures.append(self._pool.submit(self._upload_single, i, file_name, file_meta))
186
- for fut in futures:
187
- fut.result()
212
+ if fail_on_errors and self.errors:
213
+ # There might be more errors, but we can only have one as the cause, so pick the first
214
+ raise RuntimeError(f"{len(self.errors)} upload(s) finished with errors") from self.errors[0]
188
215
 
189
216
  def __enter__(self) -> "IOFileUploadQueue":
190
217
  """
@@ -195,6 +222,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
195
222
  """
196
223
  self.start()
197
224
  self._pool.__enter__()
225
+ self._update_queue_thread.start()
198
226
  return self
199
227
 
200
228
  def __exit__(
@@ -229,9 +257,7 @@ class FileUploadQueue(IOFileUploadQueue):
229
257
  cdf_client: Cognite Data Fusion client to use
230
258
  post_upload_function: A function that will be called after each upload. The function will be given one argument:
231
259
  A list of the events that were uploaded.
232
- max_queue_size: Maximum size of upload queue. Defaults to no max size.
233
- max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
234
- methods).
260
+ max_queue_size: Maximum size of upload queue.
235
261
  trigger_log_level: Log level to log upload triggers to.
236
262
  thread_name: Thread name of uploader thread.
237
263
  """
@@ -245,14 +271,13 @@ class FileUploadQueue(IOFileUploadQueue):
245
271
  trigger_log_level: str = "DEBUG",
246
272
  thread_name: Optional[str] = None,
247
273
  overwrite_existing: bool = False,
248
- cancellation_token: threading.Event = threading.Event(),
274
+ cancellation_token: Optional[CancellationToken] = None,
249
275
  ):
250
276
  # Super sets post_upload and threshold
251
277
  super().__init__(
252
278
  cdf_client,
253
279
  post_upload_function,
254
280
  max_queue_size,
255
- max_upload_interval,
256
281
  trigger_log_level,
257
282
  thread_name,
258
283
  overwrite_existing,
@@ -284,9 +309,7 @@ class BytesUploadQueue(IOFileUploadQueue):
284
309
  cdf_client: Cognite Data Fusion client to use
285
310
  post_upload_function: A function that will be called after each upload. The function will be given one argument:
286
311
  A list of the events that were uploaded.
287
- max_queue_size: Maximum size of upload queue. Defaults to no max size.
288
- max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
289
- methods).
312
+ max_queue_size: Maximum size of upload queue.
290
313
  trigger_log_level: Log level to log upload triggers to.
291
314
  thread_name: Thread name of uploader thread.
292
315
  overwrite_existing: If 'overwrite' is set to true, fields for the files found for externalIds can be overwritten
@@ -297,17 +320,15 @@ class BytesUploadQueue(IOFileUploadQueue):
297
320
  cdf_client: CogniteClient,
298
321
  post_upload_function: Optional[Callable[[List[FileMetadata]], None]] = None,
299
322
  max_queue_size: Optional[int] = None,
300
- max_upload_interval: Optional[int] = None,
301
323
  trigger_log_level: str = "DEBUG",
302
324
  thread_name: Optional[str] = None,
303
325
  overwrite_existing: bool = False,
304
- cancellation_token: threading.Event = threading.Event(),
326
+ cancellation_token: Optional[CancellationToken] = None,
305
327
  ) -> None:
306
328
  super().__init__(
307
329
  cdf_client,
308
330
  post_upload_function,
309
331
  max_queue_size,
310
- max_upload_interval,
311
332
  trigger_log_level,
312
333
  thread_name,
313
334
  overwrite_existing,
@@ -12,17 +12,15 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import threading
16
15
  from types import TracebackType
17
16
  from typing import Any, Callable, Dict, List, Optional, Type
18
17
 
19
18
  import arrow
20
19
  from arrow import Arrow
21
- from requests import ConnectionError
22
20
 
23
21
  from cognite.client import CogniteClient
24
22
  from cognite.client.data_classes import Row
25
- from cognite.client.exceptions import CogniteAPIError, CogniteReadTimeout
23
+ from cognite.extractorutils.threading import CancellationToken
26
24
  from cognite.extractorutils.uploader._base import (
27
25
  RETRIES,
28
26
  RETRY_BACKOFF_FACTOR,
@@ -37,7 +35,7 @@ from cognite.extractorutils.uploader._metrics import (
37
35
  RAW_UPLOADER_ROWS_QUEUED,
38
36
  RAW_UPLOADER_ROWS_WRITTEN,
39
37
  )
40
- from cognite.extractorutils.util import retry
38
+ from cognite.extractorutils.util import cognite_exceptions, retry
41
39
 
42
40
 
43
41
  class RawUploadQueue(AbstractUploadQueue):
@@ -63,7 +61,7 @@ class RawUploadQueue(AbstractUploadQueue):
63
61
  max_upload_interval: Optional[int] = None,
64
62
  trigger_log_level: str = "DEBUG",
65
63
  thread_name: Optional[str] = None,
66
- cancellation_token: threading.Event = threading.Event(),
64
+ cancellation_token: Optional[CancellationToken] = None,
67
65
  ):
68
66
  # Super sets post_upload and thresholds
69
67
  super().__init__(
@@ -112,6 +110,19 @@ class RawUploadQueue(AbstractUploadQueue):
112
110
  """
113
111
  Trigger an upload of the queue, clears queue afterwards
114
112
  """
113
+
114
+ @retry(
115
+ exceptions=cognite_exceptions(),
116
+ cancellation_token=self.cancellation_token,
117
+ tries=RETRIES,
118
+ delay=RETRY_DELAY,
119
+ max_delay=RETRY_MAX_DELAY,
120
+ backoff=RETRY_BACKOFF_FACTOR,
121
+ )
122
+ def _upload_batch(database: str, table: str, patch: List[Row]) -> None:
123
+ # Upload
124
+ self.cdf_client.raw.rows.insert(db_name=database, table_name=table, row=patch, ensure_parent=True)
125
+
115
126
  if len(self.upload_queue) == 0:
116
127
  return
117
128
 
@@ -125,7 +136,7 @@ class RawUploadQueue(AbstractUploadQueue):
125
136
  patch: Dict[str, Row] = {r.payload.key: r.payload for r in rows}
126
137
  self.rows_duplicates.labels(_labels).inc(len(rows) - len(patch))
127
138
 
128
- self._upload_batch(database=database, table=table, patch=list(patch.values()))
139
+ _upload_batch(database=database, table=table, patch=list(patch.values()))
129
140
  self.rows_written.labels(_labels).inc(len(patch))
130
141
  _written: Arrow = arrow.utcnow()
131
142
 
@@ -140,17 +151,6 @@ class RawUploadQueue(AbstractUploadQueue):
140
151
  self.upload_queue_size = 0
141
152
  self.queue_size.set(self.upload_queue_size)
142
153
 
143
- @retry(
144
- exceptions=(CogniteAPIError, ConnectionError, CogniteReadTimeout),
145
- tries=RETRIES,
146
- delay=RETRY_DELAY,
147
- max_delay=RETRY_MAX_DELAY,
148
- backoff=RETRY_BACKOFF_FACTOR,
149
- )
150
- def _upload_batch(self, database: str, table: str, patch: List[Row]) -> None:
151
- # Upload
152
- self.cdf_client.raw.rows.insert(db_name=database, table_name=table, row=patch, ensure_parent=True)
153
-
154
154
  def __enter__(self) -> "RawUploadQueue":
155
155
  """
156
156
  Wraps around start method, for use as context manager