cognite-extractor-utils 6.4.0__py3-none-any.whl → 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-extractor-utils might be problematic. Click here for more details.

@@ -12,12 +12,12 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from threading import Event
16
15
  from typing import Any, Callable, List, Optional, Type
17
16
 
18
17
  from cognite.client import CogniteClient
19
18
  from cognite.client.data_classes.assets import Asset
20
- from cognite.client.exceptions import CogniteAPIError, CogniteDuplicatedError
19
+ from cognite.client.exceptions import CogniteDuplicatedError
20
+ from cognite.extractorutils.threading import CancellationToken
21
21
  from cognite.extractorutils.uploader._base import (
22
22
  RETRIES,
23
23
  RETRY_BACKOFF_FACTOR,
@@ -30,7 +30,7 @@ from cognite.extractorutils.uploader._metrics import (
30
30
  ASSETS_UPLOADER_QUEUED,
31
31
  ASSETS_UPLOADER_WRITTEN,
32
32
  )
33
- from cognite.extractorutils.util import retry
33
+ from cognite.extractorutils.util import cognite_exceptions, retry
34
34
 
35
35
 
36
36
  class AssetUploadQueue(AbstractUploadQueue):
@@ -57,7 +57,7 @@ class AssetUploadQueue(AbstractUploadQueue):
57
57
  max_upload_interval: Optional[int] = None,
58
58
  trigger_log_level: str = "DEBUG",
59
59
  thread_name: Optional[str] = None,
60
- cancellation_token: Event = Event(),
60
+ cancellation_token: Optional[CancellationToken] = None,
61
61
  ):
62
62
  super().__init__(
63
63
  cdf_client,
@@ -92,9 +92,36 @@ class AssetUploadQueue(AbstractUploadQueue):
92
92
  """
93
93
  Trigger an upload of the queue, clears queue afterwards
94
94
  """
95
+
96
+ @retry(
97
+ exceptions=cognite_exceptions(),
98
+ cancellation_token=self.cancellation_token,
99
+ tries=RETRIES,
100
+ delay=RETRY_DELAY,
101
+ max_delay=RETRY_MAX_DELAY,
102
+ backoff=RETRY_BACKOFF_FACTOR,
103
+ )
104
+ def _upload_batch() -> None:
105
+ try:
106
+ self.cdf_client.assets.create(self.upload_queue)
107
+ except CogniteDuplicatedError as e:
108
+ duplicated_ids = set([dup["externalId"] for dup in e.duplicated if "externalId" in dup])
109
+ failed: List[Asset] = [e for e in e.failed]
110
+ to_create = []
111
+ to_update = []
112
+ for asset in failed:
113
+ if asset.external_id is not None and asset.external_id in duplicated_ids:
114
+ to_update.append(asset)
115
+ else:
116
+ to_create.append(asset)
117
+ if to_create:
118
+ self.cdf_client.assets.create(to_create)
119
+ if to_update:
120
+ self.cdf_client.assets.update(to_update)
121
+
95
122
  if len(self.upload_queue) > 0:
96
123
  with self.lock:
97
- self._upload_batch()
124
+ _upload_batch()
98
125
 
99
126
  try:
100
127
  self._post_upload(self.upload_queue)
@@ -107,31 +134,6 @@ class AssetUploadQueue(AbstractUploadQueue):
107
134
  self.upload_queue.clear()
108
135
  self.queue_size.set(self.upload_queue_size)
109
136
 
110
- @retry(
111
- exceptions=(CogniteAPIError, ConnectionError),
112
- tries=RETRIES,
113
- delay=RETRY_DELAY,
114
- max_delay=RETRY_MAX_DELAY,
115
- backoff=RETRY_BACKOFF_FACTOR,
116
- )
117
- def _upload_batch(self) -> None:
118
- try:
119
- self.cdf_client.assets.create(self.upload_queue)
120
- except CogniteDuplicatedError as e:
121
- duplicated_ids = set([dup["externalId"] for dup in e.duplicated if "externalId" in dup])
122
- failed: List[Asset] = [e for e in e.failed]
123
- to_create = []
124
- to_update = []
125
- for asset in failed:
126
- if asset.external_id is not None and asset.external_id in duplicated_ids:
127
- to_update.append(asset)
128
- else:
129
- to_create.append(asset)
130
- if to_create:
131
- self.cdf_client.assets.create(to_create)
132
- if to_update:
133
- self.cdf_client.assets.update(to_update)
134
-
135
137
  def __enter__(self) -> "AssetUploadQueue":
136
138
  """
137
139
  Wraps around start method, for use as context manager
@@ -12,13 +12,13 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import threading
16
15
  from types import TracebackType
17
16
  from typing import Callable, List, Optional, Type
18
17
 
19
18
  from cognite.client import CogniteClient
20
19
  from cognite.client.data_classes import Event
21
- from cognite.client.exceptions import CogniteAPIError, CogniteDuplicatedError
20
+ from cognite.client.exceptions import CogniteDuplicatedError
21
+ from cognite.extractorutils.threading import CancellationToken
22
22
  from cognite.extractorutils.uploader._base import (
23
23
  RETRIES,
24
24
  RETRY_BACKOFF_FACTOR,
@@ -31,7 +31,7 @@ from cognite.extractorutils.uploader._metrics import (
31
31
  EVENTS_UPLOADER_QUEUED,
32
32
  EVENTS_UPLOADER_WRITTEN,
33
33
  )
34
- from cognite.extractorutils.util import retry
34
+ from cognite.extractorutils.util import cognite_exceptions, retry
35
35
 
36
36
 
37
37
  class EventUploadQueue(AbstractUploadQueue):
@@ -57,7 +57,7 @@ class EventUploadQueue(AbstractUploadQueue):
57
57
  max_upload_interval: Optional[int] = None,
58
58
  trigger_log_level: str = "DEBUG",
59
59
  thread_name: Optional[str] = None,
60
- cancellation_token: threading.Event = threading.Event(),
60
+ cancellation_token: Optional[CancellationToken] = None,
61
61
  ):
62
62
  # Super sets post_upload and threshold
63
63
  super().__init__(
@@ -96,11 +96,38 @@ class EventUploadQueue(AbstractUploadQueue):
96
96
  """
97
97
  Trigger an upload of the queue, clears queue afterwards
98
98
  """
99
+
100
+ @retry(
101
+ exceptions=cognite_exceptions(),
102
+ cancellation_token=self.cancellation_token,
103
+ tries=RETRIES,
104
+ delay=RETRY_DELAY,
105
+ max_delay=RETRY_MAX_DELAY,
106
+ backoff=RETRY_BACKOFF_FACTOR,
107
+ )
108
+ def _upload_batch() -> None:
109
+ try:
110
+ self.cdf_client.events.create([e for e in self.upload_queue])
111
+ except CogniteDuplicatedError as e:
112
+ duplicated_ids = set([dup["externalId"] for dup in e.duplicated if "externalId" in dup])
113
+ failed: List[Event] = [e for e in e.failed]
114
+ to_create = []
115
+ to_update = []
116
+ for evt in failed:
117
+ if evt.external_id is not None and evt.external_id in duplicated_ids:
118
+ to_update.append(evt)
119
+ else:
120
+ to_create.append(evt)
121
+ if to_create:
122
+ self.cdf_client.events.create(to_create)
123
+ if to_update:
124
+ self.cdf_client.events.update(to_update)
125
+
99
126
  if len(self.upload_queue) == 0:
100
127
  return
101
128
 
102
129
  with self.lock:
103
- self._upload_batch()
130
+ _upload_batch()
104
131
 
105
132
  self.events_written.inc(self.upload_queue_size)
106
133
 
@@ -113,31 +140,6 @@ class EventUploadQueue(AbstractUploadQueue):
113
140
  self.upload_queue_size = 0
114
141
  self.queue_size.set(self.upload_queue_size)
115
142
 
116
- @retry(
117
- exceptions=(CogniteAPIError, ConnectionError),
118
- tries=RETRIES,
119
- delay=RETRY_DELAY,
120
- max_delay=RETRY_MAX_DELAY,
121
- backoff=RETRY_BACKOFF_FACTOR,
122
- )
123
- def _upload_batch(self) -> None:
124
- try:
125
- self.cdf_client.events.create([e for e in self.upload_queue])
126
- except CogniteDuplicatedError as e:
127
- duplicated_ids = set([dup["externalId"] for dup in e.duplicated if "externalId" in dup])
128
- failed: List[Event] = [e for e in e.failed]
129
- to_create = []
130
- to_update = []
131
- for evt in failed:
132
- if evt.external_id is not None and evt.external_id in duplicated_ids:
133
- to_update.append(evt)
134
- else:
135
- to_create.append(evt)
136
- if to_create:
137
- self.cdf_client.events.create(to_create)
138
- if to_update:
139
- self.cdf_client.events.update(to_update)
140
-
141
143
  def __enter__(self) -> "EventUploadQueue":
142
144
  """
143
145
  Wraps around start method, for use as context manager
@@ -17,13 +17,11 @@ from concurrent.futures import Future, ThreadPoolExecutor
17
17
  from io import BytesIO
18
18
  from os import PathLike
19
19
  from types import TracebackType
20
- from typing import BinaryIO, Callable, List, Optional, Tuple, Type, Union
21
-
22
- from requests import ConnectionError
20
+ from typing import Any, BinaryIO, Callable, Dict, List, Optional, Tuple, Type, Union
23
21
 
24
22
  from cognite.client import CogniteClient
25
23
  from cognite.client.data_classes import FileMetadata
26
- from cognite.client.exceptions import CogniteAPIError
24
+ from cognite.extractorutils.threading import CancellationToken
27
25
  from cognite.extractorutils.uploader._base import (
28
26
  RETRIES,
29
27
  RETRY_BACKOFF_FACTOR,
@@ -36,7 +34,10 @@ from cognite.extractorutils.uploader._metrics import (
36
34
  FILES_UPLOADER_QUEUED,
37
35
  FILES_UPLOADER_WRITTEN,
38
36
  )
39
- from cognite.extractorutils.util import retry
37
+ from cognite.extractorutils.util import cognite_exceptions, retry
38
+
39
+ _QUEUES: int = 0
40
+ _QUEUES_LOCK: threading.RLock = threading.RLock()
40
41
 
41
42
 
42
43
  class IOFileUploadQueue(AbstractUploadQueue):
@@ -50,9 +51,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
50
51
  cdf_client: Cognite Data Fusion client to use
51
52
  post_upload_function: A function that will be called after each upload. The function will be given one argument:
52
53
  A list of the events that were uploaded.
53
- max_queue_size: Maximum size of upload queue. Defaults to no max size.
54
- max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
55
- methods).
54
+ max_queue_size: Maximum size of upload queue.
56
55
  trigger_log_level: Log level to log upload triggers to.
57
56
  thread_name: Thread name of uploader thread.
58
57
  max_parallelism: Maximum number of parallel uploads. If this is greater than 0,
@@ -66,11 +65,10 @@ class IOFileUploadQueue(AbstractUploadQueue):
66
65
  cdf_client: CogniteClient,
67
66
  post_upload_function: Optional[Callable[[List[FileMetadata]], None]] = None,
68
67
  max_queue_size: Optional[int] = None,
69
- max_upload_interval: Optional[int] = None,
70
68
  trigger_log_level: str = "DEBUG",
71
69
  thread_name: Optional[str] = None,
72
70
  overwrite_existing: bool = False,
73
- cancellation_token: threading.Event = threading.Event(),
71
+ cancellation_token: Optional[CancellationToken] = None,
74
72
  max_parallelism: int = 0,
75
73
  ):
76
74
  # Super sets post_upload and threshold
@@ -78,13 +76,18 @@ class IOFileUploadQueue(AbstractUploadQueue):
78
76
  cdf_client,
79
77
  post_upload_function,
80
78
  max_queue_size,
81
- max_upload_interval,
79
+ None,
82
80
  trigger_log_level,
83
81
  thread_name,
84
82
  cancellation_token,
85
83
  )
86
84
 
87
- self.upload_queue: List[Tuple[FileMetadata, Union[str, Callable[[], BinaryIO]]]] = []
85
+ if self.threshold <= 0:
86
+ raise ValueError("Max queue size must be positive for file upload queues")
87
+
88
+ self.upload_queue: List[Future] = []
89
+ self.errors: List[Exception] = []
90
+
88
91
  self.overwrite_existing = overwrite_existing
89
92
 
90
93
  self.parallelism = self.cdf_client.config.max_workers
@@ -97,85 +100,118 @@ class IOFileUploadQueue(AbstractUploadQueue):
97
100
  self.files_written = FILES_UPLOADER_WRITTEN
98
101
  self.queue_size = FILES_UPLOADER_QUEUE_SIZE
99
102
 
100
- def add_io_to_upload_queue(self, file_meta: FileMetadata, read_file: Callable[[], BinaryIO]) -> None:
103
+ self._update_queue_thread = threading.Thread(target=self._remove_done_from_queue, daemon=True)
104
+
105
+ self._full_queue = threading.Condition()
106
+
107
+ global _QUEUES, _QUEUES_LOCK
108
+ with _QUEUES_LOCK:
109
+ self._pool = ThreadPoolExecutor(
110
+ max_workers=self.parallelism, thread_name_prefix=f"FileUploadQueue-{_QUEUES}"
111
+ )
112
+ _QUEUES += 1
113
+
114
+ def _remove_done_from_queue(self) -> None:
115
+ while not self.cancellation_token.is_cancelled:
116
+ with self.lock:
117
+ self.upload_queue = list(filter(lambda f: f.running(), self.upload_queue))
118
+
119
+ self.cancellation_token.wait(5)
120
+
121
+ def add_io_to_upload_queue(
122
+ self,
123
+ file_meta: FileMetadata,
124
+ read_file: Callable[[], BinaryIO],
125
+ extra_retries: Optional[
126
+ Union[Tuple[Type[Exception], ...], Dict[Type[Exception], Callable[[Any], bool]]]
127
+ ] = None,
128
+ ) -> None:
101
129
  """
102
- Add file to upload queue. The queue will be uploaded if the queue size is larger than the threshold
103
- specified in the __init__.
130
+ Add file to upload queue. The file will start uploading immedeately. If the size of the queue is larger than
131
+ the specified max size, this call will block until it's
104
132
 
105
133
  Args:
106
134
  file_meta: File metadata-object
107
135
  file_name: Path to file to be uploaded.
108
136
  If none, the file object will still be created, but no data is uploaded
137
+ extra_retries: Exception types that might be raised by ``read_file`` that should be retried
109
138
  """
139
+ retries = cognite_exceptions()
140
+ if isinstance(extra_retries, tuple):
141
+ retries.update({exc: lambda _e: True for exc in extra_retries or []})
142
+ elif isinstance(extra_retries, dict):
143
+ retries.update(extra_retries)
144
+
145
+ @retry(
146
+ exceptions=retries,
147
+ cancellation_token=self.cancellation_token,
148
+ tries=RETRIES,
149
+ delay=RETRY_DELAY,
150
+ max_delay=RETRY_MAX_DELAY,
151
+ backoff=RETRY_BACKOFF_FACTOR,
152
+ )
153
+ def _upload_single(read_file: Callable[[], BinaryIO], file_meta: FileMetadata) -> None:
154
+ try:
155
+ # Upload file
156
+ with read_file() as file:
157
+ file_meta = self.cdf_client.files.upload_bytes(
158
+ file,
159
+ file_meta.name if file_meta.name is not None else "",
160
+ overwrite=self.overwrite_existing,
161
+ external_id=file_meta.external_id,
162
+ source=file_meta.source,
163
+ mime_type=file_meta.mime_type,
164
+ metadata=file_meta.metadata,
165
+ directory=file_meta.directory,
166
+ asset_ids=file_meta.asset_ids,
167
+ data_set_id=file_meta.data_set_id,
168
+ labels=file_meta.labels,
169
+ geo_location=file_meta.geo_location,
170
+ source_created_time=file_meta.source_created_time,
171
+ source_modified_time=file_meta.source_modified_time,
172
+ security_categories=file_meta.security_categories,
173
+ )
174
+
175
+ if self.post_upload_function:
176
+ try:
177
+ self.post_upload_function([file_meta])
178
+ except Exception as e:
179
+ self.logger.error("Error in upload callback: %s", str(e))
180
+
181
+ except Exception as e:
182
+ self.logger.exception("Unexpected error while uploading file")
183
+ self.errors.append(e)
184
+
185
+ finally:
186
+ with self.lock:
187
+ self.files_written.inc()
188
+ self.upload_queue_size -= 1
189
+ self.queue_size.set(self.upload_queue_size)
190
+ with self._full_queue:
191
+ self._full_queue.notify()
192
+
193
+ if self.upload_queue_size >= self.threshold:
194
+ with self._full_queue:
195
+ while not self._full_queue.wait(timeout=2) and not self.cancellation_token.is_cancelled:
196
+ pass
197
+
110
198
  with self.lock:
111
- self.upload_queue.append((file_meta, read_file))
199
+ self.upload_queue.append(self._pool.submit(_upload_single, read_file, file_meta))
112
200
  self.upload_queue_size += 1
113
201
  self.files_queued.inc()
114
202
  self.queue_size.set(self.upload_queue_size)
115
203
 
116
- self._check_triggers()
117
-
118
- def upload(self) -> None:
204
+ def upload(self, fail_on_errors: bool = True, timeout: Optional[float] = None) -> None:
119
205
  """
120
- Trigger an upload of the queue, clears queue afterwards
206
+ Wait for all uploads to finish
121
207
  """
122
- if len(self.upload_queue) == 0:
123
- return
124
-
208
+ for future in self.upload_queue:
209
+ future.result(timeout=timeout)
125
210
  with self.lock:
126
- self._upload_batch()
127
-
128
- self.files_written.inc(self.upload_queue_size)
129
-
130
- try:
131
- self._post_upload([el[0] for el in self.upload_queue])
132
- except Exception as e:
133
- self.logger.error("Error in upload callback: %s", str(e))
134
- self.upload_queue.clear()
135
- self.logger.info(f"Uploaded {self.upload_queue_size} files")
136
- self.upload_queue_size = 0
137
211
  self.queue_size.set(self.upload_queue_size)
138
-
139
- @retry(
140
- exceptions=(CogniteAPIError, ConnectionError),
141
- tries=RETRIES,
142
- delay=RETRY_DELAY,
143
- max_delay=RETRY_MAX_DELAY,
144
- backoff=RETRY_BACKOFF_FACTOR,
145
- )
146
- def _upload_single(self, index: int, read_file: Callable[[], BinaryIO], file_meta: FileMetadata) -> None:
147
- # Upload file
148
- with read_file() as file:
149
- file_meta = self.cdf_client.files.upload_bytes(
150
- file,
151
- file_meta.name if file_meta.name is not None else "",
152
- overwrite=self.overwrite_existing,
153
- external_id=file_meta.external_id,
154
- source=file_meta.source,
155
- mime_type=file_meta.mime_type,
156
- metadata=file_meta.metadata,
157
- directory=file_meta.directory,
158
- asset_ids=file_meta.asset_ids,
159
- data_set_id=file_meta.data_set_id,
160
- labels=file_meta.labels,
161
- geo_location=file_meta.geo_location,
162
- source_created_time=file_meta.source_created_time,
163
- source_modified_time=file_meta.source_modified_time,
164
- security_categories=file_meta.security_categories,
165
- )
166
-
167
- # Update meta-object in queue
168
- self.upload_queue[index] = (file_meta, read_file)
169
-
170
- def _upload_batch(self) -> None:
171
- # Concurrently execute file-uploads
172
-
173
- futures: List[Future] = []
174
- with ThreadPoolExecutor(self.parallelism) as pool:
175
- for i, (file_meta, file_name) in enumerate(self.upload_queue):
176
- futures.append(pool.submit(self._upload_single, i, file_name, file_meta))
177
- for fut in futures:
178
- fut.result(0.0)
212
+ if fail_on_errors and self.errors:
213
+ # There might be more errors, but we can only have one as the cause, so pick the first
214
+ raise RuntimeError(f"{len(self.errors)} upload(s) finished with errors") from self.errors[0]
179
215
 
180
216
  def __enter__(self) -> "IOFileUploadQueue":
181
217
  """
@@ -185,6 +221,8 @@ class IOFileUploadQueue(AbstractUploadQueue):
185
221
  self
186
222
  """
187
223
  self.start()
224
+ self._pool.__enter__()
225
+ self._update_queue_thread.start()
188
226
  return self
189
227
 
190
228
  def __exit__(
@@ -198,6 +236,7 @@ class IOFileUploadQueue(AbstractUploadQueue):
198
236
  exc_val: Exception value
199
237
  exc_tb: Traceback
200
238
  """
239
+ self._pool.__exit__(exc_type, exc_val, exc_tb)
201
240
  self.stop()
202
241
 
203
242
  def __len__(self) -> int:
@@ -218,9 +257,7 @@ class FileUploadQueue(IOFileUploadQueue):
218
257
  cdf_client: Cognite Data Fusion client to use
219
258
  post_upload_function: A function that will be called after each upload. The function will be given one argument:
220
259
  A list of the events that were uploaded.
221
- max_queue_size: Maximum size of upload queue. Defaults to no max size.
222
- max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
223
- methods).
260
+ max_queue_size: Maximum size of upload queue.
224
261
  trigger_log_level: Log level to log upload triggers to.
225
262
  thread_name: Thread name of uploader thread.
226
263
  """
@@ -234,14 +271,13 @@ class FileUploadQueue(IOFileUploadQueue):
234
271
  trigger_log_level: str = "DEBUG",
235
272
  thread_name: Optional[str] = None,
236
273
  overwrite_existing: bool = False,
237
- cancellation_token: threading.Event = threading.Event(),
274
+ cancellation_token: Optional[CancellationToken] = None,
238
275
  ):
239
276
  # Super sets post_upload and threshold
240
277
  super().__init__(
241
278
  cdf_client,
242
279
  post_upload_function,
243
280
  max_queue_size,
244
- max_upload_interval,
245
281
  trigger_log_level,
246
282
  thread_name,
247
283
  overwrite_existing,
@@ -273,9 +309,7 @@ class BytesUploadQueue(IOFileUploadQueue):
273
309
  cdf_client: Cognite Data Fusion client to use
274
310
  post_upload_function: A function that will be called after each upload. The function will be given one argument:
275
311
  A list of the events that were uploaded.
276
- max_queue_size: Maximum size of upload queue. Defaults to no max size.
277
- max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
278
- methods).
312
+ max_queue_size: Maximum size of upload queue.
279
313
  trigger_log_level: Log level to log upload triggers to.
280
314
  thread_name: Thread name of uploader thread.
281
315
  overwrite_existing: If 'overwrite' is set to true, fields for the files found for externalIds can be overwritten
@@ -286,17 +320,15 @@ class BytesUploadQueue(IOFileUploadQueue):
286
320
  cdf_client: CogniteClient,
287
321
  post_upload_function: Optional[Callable[[List[FileMetadata]], None]] = None,
288
322
  max_queue_size: Optional[int] = None,
289
- max_upload_interval: Optional[int] = None,
290
323
  trigger_log_level: str = "DEBUG",
291
324
  thread_name: Optional[str] = None,
292
325
  overwrite_existing: bool = False,
293
- cancellation_token: threading.Event = threading.Event(),
326
+ cancellation_token: Optional[CancellationToken] = None,
294
327
  ) -> None:
295
328
  super().__init__(
296
329
  cdf_client,
297
330
  post_upload_function,
298
331
  max_queue_size,
299
- max_upload_interval,
300
332
  trigger_log_level,
301
333
  thread_name,
302
334
  overwrite_existing,
@@ -12,17 +12,15 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import threading
16
15
  from types import TracebackType
17
16
  from typing import Any, Callable, Dict, List, Optional, Type
18
17
 
19
18
  import arrow
20
19
  from arrow import Arrow
21
- from requests import ConnectionError
22
20
 
23
21
  from cognite.client import CogniteClient
24
22
  from cognite.client.data_classes import Row
25
- from cognite.client.exceptions import CogniteAPIError, CogniteReadTimeout
23
+ from cognite.extractorutils.threading import CancellationToken
26
24
  from cognite.extractorutils.uploader._base import (
27
25
  RETRIES,
28
26
  RETRY_BACKOFF_FACTOR,
@@ -37,7 +35,7 @@ from cognite.extractorutils.uploader._metrics import (
37
35
  RAW_UPLOADER_ROWS_QUEUED,
38
36
  RAW_UPLOADER_ROWS_WRITTEN,
39
37
  )
40
- from cognite.extractorutils.util import retry
38
+ from cognite.extractorutils.util import cognite_exceptions, retry
41
39
 
42
40
 
43
41
  class RawUploadQueue(AbstractUploadQueue):
@@ -63,7 +61,7 @@ class RawUploadQueue(AbstractUploadQueue):
63
61
  max_upload_interval: Optional[int] = None,
64
62
  trigger_log_level: str = "DEBUG",
65
63
  thread_name: Optional[str] = None,
66
- cancellation_token: threading.Event = threading.Event(),
64
+ cancellation_token: Optional[CancellationToken] = None,
67
65
  ):
68
66
  # Super sets post_upload and thresholds
69
67
  super().__init__(
@@ -112,6 +110,19 @@ class RawUploadQueue(AbstractUploadQueue):
112
110
  """
113
111
  Trigger an upload of the queue, clears queue afterwards
114
112
  """
113
+
114
+ @retry(
115
+ exceptions=cognite_exceptions(),
116
+ cancellation_token=self.cancellation_token,
117
+ tries=RETRIES,
118
+ delay=RETRY_DELAY,
119
+ max_delay=RETRY_MAX_DELAY,
120
+ backoff=RETRY_BACKOFF_FACTOR,
121
+ )
122
+ def _upload_batch(database: str, table: str, patch: List[Row]) -> None:
123
+ # Upload
124
+ self.cdf_client.raw.rows.insert(db_name=database, table_name=table, row=patch, ensure_parent=True)
125
+
115
126
  if len(self.upload_queue) == 0:
116
127
  return
117
128
 
@@ -125,7 +136,7 @@ class RawUploadQueue(AbstractUploadQueue):
125
136
  patch: Dict[str, Row] = {r.payload.key: r.payload for r in rows}
126
137
  self.rows_duplicates.labels(_labels).inc(len(rows) - len(patch))
127
138
 
128
- self._upload_batch(database=database, table=table, patch=list(patch.values()))
139
+ _upload_batch(database=database, table=table, patch=list(patch.values()))
129
140
  self.rows_written.labels(_labels).inc(len(patch))
130
141
  _written: Arrow = arrow.utcnow()
131
142
 
@@ -140,17 +151,6 @@ class RawUploadQueue(AbstractUploadQueue):
140
151
  self.upload_queue_size = 0
141
152
  self.queue_size.set(self.upload_queue_size)
142
153
 
143
- @retry(
144
- exceptions=(CogniteAPIError, ConnectionError, CogniteReadTimeout),
145
- tries=RETRIES,
146
- delay=RETRY_DELAY,
147
- max_delay=RETRY_MAX_DELAY,
148
- backoff=RETRY_BACKOFF_FACTOR,
149
- )
150
- def _upload_batch(self, database: str, table: str, patch: List[Row]) -> None:
151
- # Upload
152
- self.cdf_client.raw.rows.insert(db_name=database, table_name=table, row=patch, ensure_parent=True)
153
-
154
154
  def __enter__(self) -> "RawUploadQueue":
155
155
  """
156
156
  Wraps around start method, for use as context manager