airbyte-source-shopify 2.4.14.dev202407181247__py3-none-any.whl → 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {airbyte_source_shopify-2.4.14.dev202407181247.dist-info → airbyte_source_shopify-3.1.0.dist-info}/METADATA +4 -4
  2. {airbyte_source_shopify-2.4.14.dev202407181247.dist-info → airbyte_source_shopify-3.1.0.dist-info}/RECORD +25 -27
  3. {airbyte_source_shopify-2.4.14.dev202407181247.dist-info → airbyte_source_shopify-3.1.0.dist-info}/WHEEL +1 -1
  4. source_shopify/auth.py +0 -1
  5. source_shopify/config_migrations.py +4 -1
  6. source_shopify/http_request.py +4 -2
  7. source_shopify/schemas/countries.json +7 -19
  8. source_shopify/schemas/customer_journey_summary.json +228 -148
  9. source_shopify/schemas/deleted_products.json +27 -0
  10. source_shopify/schemas/orders.json +38 -0
  11. source_shopify/schemas/product_variants.json +26 -8
  12. source_shopify/schemas/profile_location_groups.json +10 -0
  13. source_shopify/scopes.py +7 -6
  14. source_shopify/shopify_graphql/bulk/exceptions.py +6 -1
  15. source_shopify/shopify_graphql/bulk/job.py +173 -65
  16. source_shopify/shopify_graphql/bulk/query.py +440 -88
  17. source_shopify/shopify_graphql/bulk/record.py +260 -29
  18. source_shopify/shopify_graphql/bulk/retry.py +12 -12
  19. source_shopify/shopify_graphql/bulk/tools.py +17 -2
  20. source_shopify/source.py +6 -10
  21. source_shopify/spec.json +11 -5
  22. source_shopify/streams/base_streams.py +181 -54
  23. source_shopify/streams/streams.py +211 -58
  24. source_shopify/utils.py +47 -12
  25. source_shopify/schemas/customer_saved_search.json +0 -32
  26. source_shopify/schemas/products_graph_ql.json +0 -123
  27. source_shopify/shopify_graphql/graphql.py +0 -64
  28. source_shopify/shopify_graphql/schema.py +0 -29442
  29. {airbyte_source_shopify-2.4.14.dev202407181247.dist-info → airbyte_source_shopify-3.1.0.dist-info}/entry_points.txt +0 -0
@@ -2,27 +2,38 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- import logging
6
5
  from dataclasses import dataclass, field
7
6
  from datetime import datetime
7
+ from enum import Enum
8
8
  from time import sleep, time
9
9
  from typing import Any, Final, Iterable, List, Mapping, Optional
10
10
 
11
11
  import pendulum as pdm
12
12
  import requests
13
- from airbyte_cdk.sources.streams.http import HttpClient
14
13
  from requests.exceptions import JSONDecodeError
15
- from source_shopify.utils import ApiTypeEnum
14
+ from source_shopify.utils import LOGGER, ApiTypeEnum
16
15
  from source_shopify.utils import ShopifyRateLimiter as limiter
17
16
 
18
- from ...http_request import ShopifyErrorHandler
17
+ from airbyte_cdk.sources.streams.http import HttpClient
18
+
19
19
  from .exceptions import AirbyteTracedException, ShopifyBulkExceptions
20
20
  from .query import ShopifyBulkQuery, ShopifyBulkTemplates
21
+ from .record import ShopifyBulkRecord
21
22
  from .retry import bulk_retry_on_exception
22
23
  from .status import ShopifyBulkJobStatus
23
24
  from .tools import END_OF_FILE, BulkTools
24
25
 
25
26
 
27
+ class BulkOperationUserErrorCode(Enum):
28
+ """
29
+ Possible error codes that can be returned by BulkOperationUserError.
30
+ https://shopify.dev/docs/api/admin-graphql/latest/enums/BulkOperationUserErrorCode
31
+ """
32
+
33
+ INVALID = "INVALID"
34
+ OPERATION_IN_PROGRESS = "OPERATION_IN_PROGRESS"
35
+
36
+
26
37
  @dataclass
27
38
  class ShopifyBulkManager:
28
39
  http_client: HttpClient
@@ -32,8 +43,8 @@ class ShopifyBulkManager:
32
43
  job_size: float
33
44
  job_checkpoint_interval: int
34
45
 
35
- # default logger
36
- logger: Final[logging.Logger] = logging.getLogger("airbyte")
46
+ parent_stream_name: Optional[str] = None
47
+ parent_stream_cursor: Optional[str] = None
37
48
 
38
49
  # 10Mb chunk size to save the file
39
50
  _retrieve_chunk_size: Final[int] = 1024 * 1024 * 10
@@ -54,7 +65,7 @@ class ShopifyBulkManager:
54
65
 
55
66
  # currents: _job_id, _job_state, _job_created_at, _job_self_canceled
56
67
  _job_id: Optional[str] = field(init=False, default=None)
57
- _job_state: str = field(init=False, default=None) # this string is based on ShopifyBulkJobStatus
68
+ _job_state: str | None = field(init=False, default=None) # this string is based on ShopifyBulkJobStatus
58
69
  # completed and saved Bulk Job result filename
59
70
  _job_result_filename: Optional[str] = field(init=False, default=None)
60
71
  # date-time when the Bulk Job was created on the server
@@ -71,8 +82,8 @@ class ShopifyBulkManager:
71
82
  _job_last_rec_count: int = field(init=False, default=0)
72
83
  # the flag to adjust the next slice from the checkpointed cursor vaue
73
84
  _job_adjust_slice_from_checkpoint: bool = field(init=False, default=False)
74
- # flag to mark the long running BULK job
75
- _job_long_running_cancelation: bool = field(init=False, default=False)
85
+ # keeps the last checkpointed cursor value for supported streams
86
+ _job_last_checkpoint_cursor_value: str | None = field(init=False, default=None)
76
87
 
77
88
  # expand slice factor
78
89
  _job_size_expand_factor: int = field(init=False, default=2)
@@ -84,7 +95,7 @@ class ShopifyBulkManager:
84
95
  # 2 sec is set as default value to cover the case with the empty-fast-completed jobs
85
96
  _job_last_elapsed_time: float = field(init=False, default=2.0)
86
97
 
87
- def __post_init__(self):
98
+ def __post_init__(self) -> None:
88
99
  self._job_size = self.job_size
89
100
  # The upper boundary for slice size is limited by the value from the config, default value is `P30D`
90
101
  self._job_size_max = self.job_size
@@ -95,6 +106,8 @@ class ShopifyBulkManager:
95
106
  self._job_max_elapsed_time = self.job_termination_threshold
96
107
  # how many records should be collected before we use the checkpoining
97
108
  self._job_checkpoint_interval = self.job_checkpoint_interval
109
+ # define Record Producer instance
110
+ self.record_producer: ShopifyBulkRecord = ShopifyBulkRecord(self.query, self.parent_stream_name, self.parent_stream_cursor)
98
111
 
99
112
  @property
100
113
  def _tools(self) -> BulkTools:
@@ -151,9 +164,20 @@ class ShopifyBulkManager:
151
164
  self._job_should_revert_slice = False
152
165
  return False
153
166
 
167
+ @property
168
+ def _supports_checkpointing(self) -> bool:
169
+ """
170
+ The flag to determine whether or not the BULK Stream supports the `BULK checkpointing`.
171
+ """
172
+ return self.query.supports_checkpointing
173
+
154
174
  @property
155
175
  def _job_should_checkpoint(self) -> bool:
156
- return self._job_last_rec_count >= self._job_checkpoint_interval
176
+ return self._supports_checkpointing and self._job_last_rec_count >= self._job_checkpoint_interval
177
+
178
+ @property
179
+ def _job_any_lines_collected(self) -> bool:
180
+ return self._job_last_rec_count > 0
157
181
 
158
182
  def _expand_job_size(self) -> None:
159
183
  self._job_size += self._job_size_adjusted_expand_factor
@@ -191,18 +215,49 @@ class ShopifyBulkManager:
191
215
  # set the running job object count to default
192
216
  self._job_last_rec_count = 0
193
217
 
218
+ def _set_checkpointing(self) -> None:
219
+ # set the flag to adjust the next slice from the checkpointed cursor value
220
+ self._job_adjust_slice_from_checkpoint = True
221
+
222
+ def _reset_checkpointing(self) -> None:
223
+ # reseting the checkpoint flag, if bulk job has completed normally
224
+ self._job_adjust_slice_from_checkpoint = False
225
+
226
+ def _set_last_checkpoint_cursor_value(self, checkpointed_cursor: str) -> None:
227
+ """
228
+ Sets the last checkpoint cursor value.
229
+
230
+ Args:
231
+ checkpointed_cursor (str): The cursor value to set as the last checkpoint. Defaults to None.
232
+ """
233
+ self._job_last_checkpoint_cursor_value = checkpointed_cursor
234
+
235
+ def _checkpoint_cursor_has_collision(self, checkpointed_cursor: str) -> bool:
236
+ """
237
+ Checks if the provided checkpointed cursor collides with the last checkpointed cursor value.
238
+
239
+ Args:
240
+ checkpointed_cursor (str): The cursor value to check for collision. Defaults to None.
241
+
242
+ Returns:
243
+ bool: True if the provided cursor collides with the last checkpointed cursor value, False otherwise.
244
+ """
245
+ return self._job_last_checkpoint_cursor_value == checkpointed_cursor
246
+
194
247
  def _job_completed(self) -> bool:
195
248
  return self._job_state == ShopifyBulkJobStatus.COMPLETED.value
196
249
 
197
250
  def _job_canceled(self) -> bool:
198
251
  return self._job_state == ShopifyBulkJobStatus.CANCELED.value
199
252
 
253
+ def _job_failed(self) -> bool:
254
+ return self._job_state == ShopifyBulkJobStatus.FAILED.value
255
+
200
256
  def _job_cancel(self) -> None:
201
257
  _, canceled_response = self.http_client.send_request(
202
258
  http_method="POST",
203
259
  url=self.base_url,
204
- data=ShopifyBulkTemplates.cancel(self._job_id),
205
- headers={"Content-Type": "application/graphql"},
260
+ json={"query": ShopifyBulkTemplates.cancel(self._job_id)},
206
261
  request_kwargs={},
207
262
  )
208
263
  # mark the job was self-canceled
@@ -221,21 +276,24 @@ class ShopifyBulkManager:
221
276
  else:
222
277
  message = f"Elapsed time: {self._job_elapsed_time_in_state} sec"
223
278
  if self._job_last_rec_count > 0:
224
- count_message = f". Lines collected: {self._job_last_rec_count}"
279
+ count_message = f". Rows collected: {self._job_last_rec_count}"
225
280
  message = message + count_message
226
281
  self._log_state(message)
227
282
  self._log_job_msg_count = 0
228
283
 
229
284
  def _log_state(self, message: Optional[str] = None) -> None:
230
- pattern = f"Stream: `{self.http_client._name}`, the BULK Job: `{self._job_id}` is {self._job_state}"
285
+ pattern = f"Stream: `{self.http_client.name}`, the BULK Job: `{self._job_id}` is {self._job_state}"
231
286
  if message:
232
- self.logger.info(f"{pattern}. {message}.")
287
+ LOGGER.info(f"{pattern}. {message}.")
233
288
  else:
234
- self.logger.info(pattern)
289
+ LOGGER.info(pattern)
235
290
 
236
291
  def _job_get_result(self, response: Optional[requests.Response] = None) -> Optional[str]:
237
292
  parsed_response = response.json().get("data", {}).get("node", {}) if response else None
238
- job_result_url = parsed_response.get("url") if parsed_response else None
293
+ # get `complete` or `partial` result from collected Bulk Job results
294
+ full_result_url = parsed_response.get("url") if parsed_response else None
295
+ partial_result_url = parsed_response.get("partialDataUrl") if parsed_response else None
296
+ job_result_url = full_result_url if full_result_url else partial_result_url
239
297
  if job_result_url:
240
298
  # save to local file using chunks to avoid OOM
241
299
  filename = self._tools.filename_from_url(job_result_url)
@@ -248,6 +306,13 @@ class ShopifyBulkManager:
248
306
  file.write(END_OF_FILE.encode())
249
307
  return filename
250
308
 
309
+ def _job_get_checkpointed_result(self, response: Optional[requests.Response]) -> None:
310
+ if self._job_any_lines_collected or self._job_should_checkpoint:
311
+ # set the flag to adjust the next slice from the checkpointed cursor value
312
+ self._set_checkpointing()
313
+ # fetch the collected records from CANCELED Job on checkpointing
314
+ self._job_result_filename = self._job_get_result(response)
315
+
251
316
  def _job_update_state(self, response: Optional[requests.Response] = None) -> None:
252
317
  if response:
253
318
  self._job_state = response.json().get("data", {}).get("node", {}).get("status")
@@ -257,9 +322,7 @@ class ShopifyBulkManager:
257
322
  self._log_job_state_with_count()
258
323
  elif self._job_state in [ShopifyBulkJobStatus.CANCELED.value, ShopifyBulkJobStatus.CANCELING.value]:
259
324
  # do not emit `CANCELED / CANCELING` Bulk Job status, while checkpointing
260
- if self._job_should_checkpoint:
261
- pass
262
- else:
325
+ if not self._job_should_checkpoint:
263
326
  self._log_job_state_with_count()
264
327
  else:
265
328
  self._log_state()
@@ -273,26 +336,20 @@ class ShopifyBulkManager:
273
336
  f"The BULK Job: `{self._job_id}` exited with {self._job_state}, details: {response.text}"
274
337
  )
275
338
  else:
276
- if self._job_should_checkpoint:
277
- # set the flag to adjust the next slice from the checkpointed cursor value
278
- self._job_adjust_slice_from_checkpoint = True
279
- # fetch the collected records from CANCELED Job on checkpointing
280
- self._job_result_filename = self._job_get_result(response)
339
+ self._job_get_checkpointed_result(response)
281
340
 
282
341
  def _on_canceling_job(self, **kwargs) -> None:
283
342
  sleep(self._job_check_interval)
284
343
 
285
344
  def _cancel_on_long_running_job(self) -> None:
286
- self.logger.info(
287
- f"Stream: `{self.http_client._name}` the BULK Job: {self._job_id} runs longer than expected ({self._job_max_elapsed_time} sec). Retry with the reduced `Slice Size` after self-cancelation."
345
+ LOGGER.info(
346
+ f"Stream: `{self.http_client.name}` the BULK Job: {self._job_id} runs longer than expected ({self._job_max_elapsed_time} sec). Retry with the reduced `Slice Size` after self-cancelation."
288
347
  )
289
- self._job_long_running_cancelation = True
290
348
  self._job_cancel()
291
349
 
292
350
  def _cancel_on_checkpointing(self) -> None:
293
- self.logger.info(f"Stream: `{self.http_client._name}`, checkpointing after >= `{self._job_checkpoint_interval}` lines collected.")
351
+ LOGGER.info(f"Stream: `{self.http_client.name}`, checkpointing after >= `{self._job_checkpoint_interval}` rows collected.")
294
352
  # set the flag to adjust the next slice from the checkpointed cursor value
295
- self._job_adjust_slice_from_checkpoint = True
296
353
  self._job_cancel()
297
354
 
298
355
  def _on_running_job(self, **kwargs) -> None:
@@ -306,10 +363,15 @@ class ShopifyBulkManager:
306
363
  def _on_completed_job(self, response: Optional[requests.Response] = None) -> None:
307
364
  self._job_result_filename = self._job_get_result(response)
308
365
 
309
- def _on_failed_job(self, response: requests.Response) -> AirbyteTracedException:
310
- raise ShopifyBulkExceptions.BulkJobFailed(
311
- f"The BULK Job: `{self._job_id}` exited with {self._job_state}, details: {response.text}",
312
- )
366
+ def _on_failed_job(self, response: requests.Response) -> AirbyteTracedException | None:
367
+ if not self._supports_checkpointing:
368
+ raise ShopifyBulkExceptions.BulkJobFailed(
369
+ f"The BULK Job: `{self._job_id}` exited with {self._job_state}, details: {response.text}",
370
+ )
371
+ else:
372
+ # when the Bulk Job fails, usually there is a `partialDataUrl` available,
373
+ # we leverage the checkpointing in this case.
374
+ self._job_get_checkpointed_result(response)
313
375
 
314
376
  def _on_timeout_job(self, **kwargs) -> AirbyteTracedException:
315
377
  raise ShopifyBulkExceptions.BulkJobTimout(
@@ -325,17 +387,17 @@ class ShopifyBulkManager:
325
387
  raise ShopifyBulkExceptions.BulkJobError(f"Could not validate the status of the BULK Job `{self._job_id}`. Errors: {errors}.")
326
388
 
327
389
  def _on_non_handable_job_error(self, errors: List[Mapping[str, Any]]) -> AirbyteTracedException:
328
- raise ShopifyBulkExceptions.BulkJobNonHandableError(f"The Stream: `{self.http_client._name}`, Non-handable error occured: {errors}")
390
+ raise ShopifyBulkExceptions.BulkJobNonHandableError(f"The Stream: `{self.http_client.name}`, Non-handable error occured: {errors}")
329
391
 
330
- def _get_server_errors(self, response: requests.Response) -> List[Optional[dict]]:
392
+ def _get_server_errors(self, response: requests.Response) -> List[Optional[Mapping[str, Any]]]:
331
393
  server_errors = response.json().get("errors", [])
332
394
  return [server_errors] if isinstance(server_errors, str) else server_errors
333
395
 
334
- def _get_user_errors(self, response: requests.Response) -> List[Optional[dict]]:
396
+ def _get_user_errors(self, response: requests.Response) -> List[Optional[Mapping[str, Any]]]:
335
397
  user_errors = response.json().get("data", {}).get("bulkOperationRunQuery", {}).get("userErrors", [])
336
398
  return [user_errors] if isinstance(user_errors, str) else user_errors
337
399
 
338
- def _collect_bulk_errors(self, response: requests.Response) -> List[Optional[dict]]:
400
+ def _collect_bulk_errors(self, response: requests.Response) -> List[Optional[Mapping[str, Any]]]:
339
401
  try:
340
402
  return self._get_server_errors(response) + self._get_user_errors(response)
341
403
  except (Exception, JSONDecodeError) as e:
@@ -353,8 +415,7 @@ class ShopifyBulkManager:
353
415
  _, response = self.http_client.send_request(
354
416
  http_method="POST",
355
417
  url=self.base_url,
356
- data=ShopifyBulkTemplates.status(self._job_id),
357
- headers={"Content-Type": "application/graphql"},
418
+ json={"query": ShopifyBulkTemplates.status(self._job_id)},
358
419
  request_kwargs={},
359
420
  )
360
421
  self._job_healthcheck(response)
@@ -367,30 +428,23 @@ class ShopifyBulkManager:
367
428
  Error example:
368
429
  [
369
430
  {
431
+ 'code': 'OPERATION_IN_PROGRESS',
370
432
  'field': None,
371
433
  'message': 'A bulk query operation for this app and shop is already in progress: gid://shopify/BulkOperation/4039184154813.',
372
434
  }
373
435
  ]
374
436
  """
375
-
376
- concurrent_job_pattern = "A bulk query operation for this app and shop is already in progress"
377
437
  # the errors are handled in `job_job_check_for_errors`
378
438
  if errors:
379
439
  for error in errors:
380
- message = error.get("message", "") if isinstance(error, dict) else ""
381
- if concurrent_job_pattern in message:
440
+ error_code = error.get("code", "") if isinstance(error, dict) else ""
441
+ if error_code == BulkOperationUserErrorCode.OPERATION_IN_PROGRESS.value:
382
442
  return True
383
443
  return False
384
444
 
385
445
  def _has_reached_max_concurrency(self) -> bool:
386
446
  return self._concurrent_attempt == self._concurrent_max_retry
387
447
 
388
- def _switch_base_url(self) -> None:
389
- if self._new_base_url:
390
- self.base_url = self._new_base_url
391
- else:
392
- self.logger.warning(f"Failed switching the `base url`, no `new base url` has been retrieved.")
393
-
394
448
  def _should_switch_shop_name(self, response: requests.Response) -> bool:
395
449
  """
396
450
  Sometimes the API returns the redirected response that points to the same Store but with different Name:
@@ -402,24 +456,26 @@ class ShopifyBulkManager:
402
456
 
403
457
  This redirection is related to:
404
458
  1) `aliased` or `hidden` store names from being exposed
405
- 2) migrated to data to the new store, but referenced within the old one stil.
459
+ 2) `migrated` store data to the `new store`, but referenced within the old one stil
406
460
 
407
461
  reference issue: https://github.com/airbytehq/oncall/issues/5866
408
462
  """
409
463
  if self.base_url != response.url:
410
- self._new_base_url = response.url
464
+ self.base_url = response.url
411
465
  return True
412
466
  return False
413
467
 
414
- @bulk_retry_on_exception(logger)
468
+ @bulk_retry_on_exception()
415
469
  def _job_check_state(self) -> None:
416
470
  while not self._job_completed():
417
471
  if self._job_canceled():
418
472
  break
473
+ elif self._job_failed():
474
+ break
419
475
  else:
420
476
  self._job_track_running()
421
477
 
422
- @bulk_retry_on_exception(logger)
478
+ @bulk_retry_on_exception()
423
479
  def create_job(self, stream_slice: Mapping[str, str], filter_field: str) -> None:
424
480
  if stream_slice:
425
481
  query = self.query.get(filter_field, stream_slice["start"], stream_slice["end"])
@@ -437,7 +493,7 @@ class ShopifyBulkManager:
437
493
  if self._has_running_concurrent_job(errors):
438
494
  # when the concurrent job takes place, another job could not be created
439
495
  # we typically need to wait and retry, but no longer than 10 min. (see retry in `bulk_retry_on_exception`)
440
- raise ShopifyBulkExceptions.BulkJobCreationFailedConcurrentError(f"Failed to create job for stream {self.http_client._name}")
496
+ raise ShopifyBulkExceptions.BulkJobCreationFailedConcurrentError(f"Failed to create job for stream {self.http_client.name}")
441
497
  elif self._should_switch_shop_name(response):
442
498
  # assign new shop name, since the one that specified in `config` was redirected to the different one.
443
499
  raise ShopifyBulkExceptions.BulkJobRedirectToOtherShopError(f"Switching the `store` name, redirected to: {response.url}")
@@ -459,9 +515,9 @@ class ShopifyBulkManager:
459
515
  self._job_id = bulk_response.get("id")
460
516
  self._job_created_at = bulk_response.get("createdAt")
461
517
  self._job_state = ShopifyBulkJobStatus.CREATED.value
462
- self.logger.info(f"Stream: `{self.http_client._name}`, the BULK Job: `{self._job_id}` is {ShopifyBulkJobStatus.CREATED.value}")
518
+ LOGGER.info(f"Stream: `{self.http_client.name}`, the BULK Job: `{self._job_id}` is {ShopifyBulkJobStatus.CREATED.value}")
463
519
 
464
- def job_size_normalize(self, start: datetime, end: datetime) -> datetime:
520
+ def job_size_normalize(self, start: datetime, end: datetime) -> None:
465
521
  # adjust slice size when it's bigger than the loop point when it should end,
466
522
  # to preserve correct job size adjustments when this is the only job we need to run, based on STATE provided
467
523
  requested_slice_size = (end - start).total_days()
@@ -471,9 +527,45 @@ class ShopifyBulkManager:
471
527
  step = self._job_size if self._job_size else self._job_size_min
472
528
  return slice_start.add(days=step)
473
529
 
474
- def get_adjusted_job_end(self, slice_start: datetime, slice_end: datetime, checkpointed_cursor: Optional[datetime] = None) -> datetime:
530
+ def _adjust_slice_end(
531
+ self, slice_end: datetime, checkpointed_cursor: Optional[str] = None, filter_checkpointed_cursor: Optional[str] = None
532
+ ) -> datetime:
533
+ """
534
+ Choose between the existing `slice_end` value or `checkpointed_cursor` value or `filter_checkpointed_cursor` value, if provided.
535
+
536
+ Optionally: raises the `transient` error if the checkpoint collision occurs.
537
+
538
+ Note: filter_checkpointed_cursor is only used when cursor field is ID for streams like Customer Address etc.
539
+ This method should return a datetime from last checkpointed value to adjust slice end, when cursor value is ID (int type)
540
+ method gets end datetime from filter_checkpointed_cursor, which is value from filter field from last record.
541
+ See https://github.com/airbytehq/oncall/issues/9052 for more details.
542
+ """
543
+
544
+ if checkpointed_cursor:
545
+ if self._checkpoint_cursor_has_collision(checkpointed_cursor):
546
+ raise ShopifyBulkExceptions.BulkJobCheckpointCollisionError(
547
+ f"The stream: `{self.http_client.name}` checkpoint collision is detected. Try to increase the `BULK Job checkpoint (rows collected)` to the bigger value. The stream will be synced again during the next sync attempt."
548
+ )
549
+ # set the checkpointed cursor value
550
+ self._set_last_checkpoint_cursor_value(checkpointed_cursor)
551
+ if isinstance(checkpointed_cursor, str):
552
+ return pdm.parse(checkpointed_cursor)
553
+ if isinstance(checkpointed_cursor, int):
554
+ return pdm.parse(filter_checkpointed_cursor)
555
+
556
+ return slice_end
557
+
558
+ def get_adjusted_job_end(
559
+ self,
560
+ slice_start: datetime,
561
+ slice_end: datetime,
562
+ checkpointed_cursor: Optional[str] = None,
563
+ filter_checkpointed_cursor: Optional[str] = None,
564
+ ) -> datetime:
475
565
  if self._job_adjust_slice_from_checkpoint:
476
- return pdm.parse(checkpointed_cursor) if checkpointed_cursor else slice_end
566
+ # set the checkpointing to default, before the next slice is emitted, to avoid inf.loop
567
+ self._reset_checkpointing()
568
+ return self._adjust_slice_end(slice_end, checkpointed_cursor, filter_checkpointed_cursor)
477
569
 
478
570
  if self._is_long_running_job:
479
571
  self._job_size_reduce_next()
@@ -481,8 +573,25 @@ class ShopifyBulkManager:
481
573
 
482
574
  return slice_end
483
575
 
576
+ def _emit_final_job_message(self, job_current_elapsed_time: int) -> None:
577
+ final_message = f"Stream: `{self.http_client.name}`, the BULK Job: `{self._job_id}` time elapsed: {job_current_elapsed_time} sec."
578
+
579
+ if self._job_any_lines_collected:
580
+ lines_collected_message = f" Rows collected: {self._job_last_rec_count} --> records: `{self.record_producer.record_composed}`."
581
+ final_message = final_message + lines_collected_message
582
+
583
+ # emit final Bulk job status message
584
+ LOGGER.info(f"{final_message}")
585
+
586
+ def _process_bulk_results(self) -> Iterable[Mapping[str, Any]]:
587
+ if self._job_result_filename:
588
+ # produce records from saved bulk job result
589
+ yield from self.record_producer.read_file(self._job_result_filename)
590
+ else:
591
+ yield from []
592
+
484
593
  @limiter.balance_rate_limit(api_type=ApiTypeEnum.graphql.value)
485
- def job_check_for_completion(self) -> Optional[str]:
594
+ def job_get_results(self) -> Optional[Iterable[Mapping[str, Any]]]:
486
595
  """
487
596
  This method checks the status for the `CREATED` Shopify BULK Job, using it's `ID`.
488
597
  The time spent for the Job execution is tracked to understand the effort.
@@ -492,7 +601,7 @@ class ShopifyBulkManager:
492
601
  try:
493
602
  # track created job until it's COMPLETED
494
603
  self._job_check_state()
495
- return self._job_result_filename
604
+ yield from self._process_bulk_results()
496
605
  except (
497
606
  ShopifyBulkExceptions.BulkJobFailed,
498
607
  ShopifyBulkExceptions.BulkJobTimout,
@@ -504,9 +613,8 @@ class ShopifyBulkManager:
504
613
  raise bulk_job_error
505
614
  finally:
506
615
  job_current_elapsed_time = round((time() - job_started), 3)
507
- self.logger.info(
508
- f"Stream: `{self.http_client._name}`, the BULK Job: `{self._job_id}` time elapsed: {job_current_elapsed_time} sec."
509
- )
616
+ # emit the final Bulk Job log message
617
+ self._emit_final_job_message(job_current_elapsed_time)
510
618
  # check whether or not we should expand or reduce the size of the slice
511
619
  self.__adjust_job_size(job_current_elapsed_time)
512
620
  # reset the state for COMPLETED job