airbyte-source-shopify 3.0.8.dev202507101541__tar.gz → 3.0.9rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/PKG-INFO +3 -3
  2. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/pyproject.toml +7 -10
  3. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/shopify_graphql/bulk/job.py +25 -160
  4. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/streams/base_streams.py +18 -3
  5. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/streams/streams.py +7 -0
  6. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/README.md +0 -0
  7. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/__init__.py +0 -0
  8. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/auth.py +0 -0
  9. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/config_migrations.py +0 -0
  10. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/http_request.py +0 -0
  11. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/run.py +0 -0
  12. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/abandoned_checkouts.json +0 -0
  13. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/articles.json +0 -0
  14. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/balance_transactions.json +0 -0
  15. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/blogs.json +0 -0
  16. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/collections.json +0 -0
  17. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/collects.json +0 -0
  18. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/countries.json +0 -0
  19. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/custom_collections.json +0 -0
  20. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/customer_address.json +0 -0
  21. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/customer_journey_summary.json +0 -0
  22. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/customers.json +0 -0
  23. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/discount_codes.json +0 -0
  24. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/disputes.json +0 -0
  25. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/draft_orders.json +0 -0
  26. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/fulfillment_orders.json +0 -0
  27. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/fulfillments.json +0 -0
  28. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/inventory_items.json +0 -0
  29. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/inventory_levels.json +0 -0
  30. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/locations.json +0 -0
  31. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_articles.json +0 -0
  32. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_blogs.json +0 -0
  33. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_collections.json +0 -0
  34. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_customers.json +0 -0
  35. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_draft_orders.json +0 -0
  36. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_locations.json +0 -0
  37. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_orders.json +0 -0
  38. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_pages.json +0 -0
  39. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_product_images.json +0 -0
  40. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_product_variants.json +0 -0
  41. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_products.json +0 -0
  42. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_shops.json +0 -0
  43. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_smart_collections.json +0 -0
  44. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/order_agreements.json +0 -0
  45. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/order_refunds.json +0 -0
  46. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/order_risks.json +0 -0
  47. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/orders.json +0 -0
  48. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/pages.json +0 -0
  49. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/price_rules.json +0 -0
  50. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/product_images.json +0 -0
  51. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/product_variants.json +0 -0
  52. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/products.json +0 -0
  53. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/profile_location_groups.json +0 -0
  54. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/shop.json +0 -0
  55. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/smart_collections.json +0 -0
  56. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/tender_transactions.json +0 -0
  57. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/transactions.json +0 -0
  58. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/scopes.py +0 -0
  59. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/shopify_graphql/bulk/__init__.py +0 -0
  60. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/shopify_graphql/bulk/exceptions.py +0 -0
  61. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/shopify_graphql/bulk/query.py +0 -0
  62. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/shopify_graphql/bulk/record.py +0 -0
  63. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/shopify_graphql/bulk/retry.py +0 -0
  64. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/shopify_graphql/bulk/status.py +0 -0
  65. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/shopify_graphql/bulk/tools.py +0 -0
  66. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/source.py +0 -0
  67. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/spec.json +0 -0
  68. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/transform.py +0 -0
  69. {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/utils.py +0 -0
@@ -1,7 +1,8 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.1
2
2
  Name: airbyte-source-shopify
3
- Version: 3.0.8.dev202507101541
3
+ Version: 3.0.9rc1
4
4
  Summary: Source CDK implementation for Shopify.
5
+ Home-page: https://airbyte.com
5
6
  License: ELv2
6
7
  Author: Airbyte
7
8
  Author-email: contact@airbyte.io
@@ -15,7 +16,6 @@ Requires-Dist: graphql-query (>=1,<2)
15
16
  Requires-Dist: pendulum (>=2.1.2,<3.0.0)
16
17
  Requires-Dist: sgqlc (==16.3)
17
18
  Project-URL: Documentation, https://docs.airbyte.com/integrations/sources/shopify
18
- Project-URL: Homepage, https://airbyte.com
19
19
  Project-URL: Repository, https://github.com/airbytehq/airbyte
20
20
  Description-Content-Type: text/markdown
21
21
 
@@ -1,24 +1,19 @@
1
1
  [build-system]
2
- requires = [
3
- "poetry-core>=1.0.0",
4
- ]
2
+ requires = [ "poetry-core>=1.0.0",]
5
3
  build-backend = "poetry.core.masonry.api"
6
4
 
7
5
  [tool.poetry]
8
- version = "3.0.8.dev202507101541"
6
+ version = "3.0.9-rc.1"
9
7
  name = "airbyte-source-shopify"
10
8
  description = "Source CDK implementation for Shopify."
11
- authors = [
12
- "Airbyte <contact@airbyte.io>",
13
- ]
9
+ authors = [ "Airbyte <contact@airbyte.io>",]
14
10
  license = "ELv2"
15
11
  readme = "README.md"
16
12
  documentation = "https://docs.airbyte.com/integrations/sources/shopify"
17
13
  homepage = "https://airbyte.com"
18
14
  repository = "https://github.com/airbytehq/airbyte"
19
- packages = [
20
- { include = "source_shopify" },
21
- ]
15
+ [[tool.poetry.packages]]
16
+ include = "source_shopify"
22
17
 
23
18
  [tool.poetry.dependencies]
24
19
  python = "^3.10,<3.12"
@@ -38,5 +33,7 @@ freezegun = "^1.4.0"
38
33
 
39
34
  [tool.poe]
40
35
  include = [
36
+ # Shared tasks definition file(s) can be imported here.
37
+ # Run `poe` or `poe --help` to see the list of available tasks.
41
38
  "${POE_GIT_DIR}/poe-tasks/poetry-connector-tasks.toml",
42
39
  ]
@@ -5,7 +5,6 @@
5
5
  from dataclasses import dataclass, field
6
6
  from datetime import datetime
7
7
  from enum import Enum
8
- from json import loads
9
8
  from time import sleep, time
10
9
  from typing import Any, Final, Iterable, List, Mapping, Optional
11
10
 
@@ -66,7 +65,7 @@ class ShopifyBulkManager:
66
65
 
67
66
  # currents: _job_id, _job_state, _job_created_at, _job_self_canceled
68
67
  _job_id: Optional[str] = field(init=False, default=None)
69
- _job_state: Optional[str] = field(init=False, default=None) # this string is based on ShopifyBulkJobStatus
68
+ _job_state: str | None = field(init=False, default=None) # this string is based on ShopifyBulkJobStatus
70
69
  # completed and saved Bulk Job result filename
71
70
  _job_result_filename: Optional[str] = field(init=False, default=None)
72
71
  # date-time when the Bulk Job was created on the server
@@ -84,9 +83,7 @@ class ShopifyBulkManager:
84
83
  # the flag to adjust the next slice from the checkpointed cursor vaue
85
84
  _job_adjust_slice_from_checkpoint: bool = field(init=False, default=False)
86
85
  # keeps the last checkpointed cursor value for supported streams
87
- _job_last_checkpoint_cursor_value: Optional[str] = field(init=False, default=None)
88
- # stores extracted cursor from INTERNAL_SERVER_ERROR recovery (temporary storage)
89
- _job_extracted_checkpoint_cursor: Optional[str] = field(init=False, default=None)
86
+ _job_last_checkpoint_cursor_value: str | None = field(init=False, default=None)
90
87
 
91
88
  # expand slice factor
92
89
  _job_size_expand_factor: int = field(init=False, default=2)
@@ -217,8 +214,6 @@ class ShopifyBulkManager:
217
214
  self._log_job_msg_count = 0
218
215
  # set the running job object count to default
219
216
  self._job_last_rec_count = 0
220
- # clear any extracted cursor from INTERNAL_SERVER_ERROR recovery
221
- self._job_extracted_checkpoint_cursor = None
222
217
 
223
218
  def _set_checkpointing(self) -> None:
224
219
  # set the flag to adjust the next slice from the checkpointed cursor value
@@ -318,24 +313,6 @@ class ShopifyBulkManager:
318
313
  # fetch the collected records from CANCELED Job on checkpointing
319
314
  self._job_result_filename = self._job_get_result(response)
320
315
 
321
- # Special handling: For FAILED jobs with INTERNAL_SERVER_ERROR, extract the last processed cursor
322
- if response:
323
- parsed_response = response.json().get("data", {}).get("node", {}) if response else {}
324
- error_code = parsed_response.get("errorCode")
325
- if error_code == "INTERNAL_SERVER_ERROR":
326
- last_cursor = self._extract_last_cursor_from_partial_data(response)
327
- if last_cursor:
328
- # Check if this cursor would cause a collision before storing it
329
- if self._checkpoint_cursor_has_collision(last_cursor):
330
- # Skip cursor extraction to avoid collision
331
- pass
332
- else:
333
- # Store the extracted cursor for later use (don't set it yet to avoid collision)
334
- self._job_extracted_checkpoint_cursor = last_cursor
335
- else:
336
- # Not processing data due to insufficient records or checkpointing disabled
337
- pass
338
-
339
316
  def _job_update_state(self, response: Optional[requests.Response] = None) -> None:
340
317
  if response:
341
318
  self._job_state = response.json().get("data", {}).get("node", {}).get("status")
@@ -386,26 +363,7 @@ class ShopifyBulkManager:
386
363
  def _on_completed_job(self, response: Optional[requests.Response] = None) -> None:
387
364
  self._job_result_filename = self._job_get_result(response)
388
365
 
389
- def _on_failed_job(self, response: requests.Response) -> Optional[AirbyteTracedException]:
390
- # Special handling for FAILED jobs with INTERNAL_SERVER_ERROR that support checkpointing
391
- parsed_response = response.json().get("data", {}).get("node", {}) if response else {}
392
- error_code = parsed_response.get("errorCode")
393
-
394
- if error_code == "INTERNAL_SERVER_ERROR" and self._supports_checkpointing:
395
- LOGGER.info(
396
- f"Stream: `{self.http_client.name}`, BULK Job: `{self._job_id}` failed with INTERNAL_SERVER_ERROR. Waiting for partial data availability..."
397
- )
398
- # For INTERNAL_SERVER_ERROR specifically, wait and retry to check if partial data becomes available
399
- partial_response = self._wait_for_partial_data_on_failure()
400
- if partial_response:
401
- # Use the updated response that may contain partialDataUrl
402
- response = partial_response
403
- # Update the job state with the new response to ensure _job_last_rec_count is set correctly
404
- self._job_update_state(response)
405
- # For INTERNAL_SERVER_ERROR with partial data, extract cursor and treat as checkpointable
406
- self._job_get_checkpointed_result(response)
407
- return None # Don't raise exception, we recovered the data
408
-
366
+ def _on_failed_job(self, response: requests.Response) -> AirbyteTracedException | None:
409
367
  if not self._supports_checkpointing:
410
368
  raise ShopifyBulkExceptions.BulkJobFailed(
411
369
  f"The BULK Job: `{self._job_id}` exited with {self._job_state}, details: {response.text}",
@@ -415,102 +373,6 @@ class ShopifyBulkManager:
415
373
  # we leverage the checkpointing in this case.
416
374
  self._job_get_checkpointed_result(response)
417
375
 
418
- def _wait_for_partial_data_on_failure(self) -> Optional[requests.Response]:
419
- """
420
- Wait for partial data to become available when a BULK job fails with INTERNAL_SERVER_ERROR.
421
-
422
- This method is specifically designed for INTERNAL_SERVER_ERROR cases where
423
- Shopify's BULK API may make partial data available (via partialDataUrl)
424
- after a short wait, even though the job initially failed.
425
-
426
- Returns:
427
- Optional[requests.Response]: Updated response with potential partialDataUrl, or None if no data
428
- """
429
- max_wait_attempts = 10 # Maximum number of wait attempts
430
- wait_interval = 10 # Wait 10 seconds between checks
431
-
432
- for attempt in range(max_wait_attempts):
433
- sleep(wait_interval)
434
-
435
- # Check job status again to see if partial data is now available
436
- try:
437
- _, response = self.http_client.send_request(
438
- http_method="POST",
439
- url=self.base_url,
440
- json={"query": ShopifyBulkTemplates.status(self._job_id)},
441
- request_kwargs={},
442
- )
443
-
444
- parsed_response = response.json().get("data", {}).get("node", {}) if response else {}
445
- partial_data_url = parsed_response.get("partialDataUrl")
446
- object_count = parsed_response.get("objectCount", "0")
447
-
448
- # Only stop waiting if we actually have a partialDataUrl - objectCount alone is not sufficient
449
- if partial_data_url and int(object_count) > 0:
450
- LOGGER.info(f"Stream: `{self.http_client.name}`, partial data available after wait. Object count: {object_count}")
451
- return response
452
- elif int(object_count) > 0:
453
- # objectCount available but no partialDataUrl yet - continue waiting
454
- continue
455
-
456
- except Exception as e:
457
- # Error during partial data check - continue waiting
458
- continue
459
-
460
- LOGGER.warning(f"Stream: `{self.http_client.name}`, no partial data became available after {max_wait_attempts} attempts")
461
- return None
462
-
463
- def _extract_last_cursor_from_partial_data(self, response: Optional[requests.Response]) -> Optional[str]:
464
- """
465
- Extract the last processed cursor value from partial data for INTERNAL_SERVER_ERROR recovery.
466
-
467
- This method retrieves partial data from a failed INTERNAL_SERVER_ERROR job and extracts
468
- the updatedAt value of the last record, which can be used to resume processing from that point.
469
- Only used in INTERNAL_SERVER_ERROR scenarios with checkpointing support.
470
-
471
- Args:
472
- response: The response containing partial data information
473
-
474
- Returns:
475
- Optional[str]: The cursor value of the last processed record, or None if unavailable
476
- """
477
- if not response:
478
- return None
479
-
480
- try:
481
- parsed_response = response.json().get("data", {}).get("node", {})
482
- partial_data_url = parsed_response.get("partialDataUrl")
483
-
484
- if not partial_data_url:
485
- return None
486
-
487
- # Download the partial data
488
- _, partial_response = self.http_client.send_request(http_method="GET", url=partial_data_url, request_kwargs={"stream": True})
489
- partial_response.raise_for_status()
490
-
491
- last_record = None
492
- # Read through the JSONL data to find the last record
493
- for line in partial_response.iter_lines(decode_unicode=True):
494
- if line and line.strip() and line.strip() != END_OF_FILE:
495
- try:
496
- record = loads(line)
497
- # Look for the main record types (Order, Product, etc.)
498
- if record.get("__typename") in ["Order", "Product", "Customer", "FulfillmentOrder"]:
499
- last_record = record
500
- except Exception:
501
- continue
502
-
503
- # Extract the updatedAt cursor from the last record
504
- if last_record and "updatedAt" in last_record:
505
- cursor_value = last_record["updatedAt"]
506
- return cursor_value
507
-
508
- except Exception as e:
509
- # Failed to extract cursor from partial data
510
- pass
511
-
512
- return None
513
-
514
376
  def _on_timeout_job(self, **kwargs) -> AirbyteTracedException:
515
377
  raise ShopifyBulkExceptions.BulkJobTimout(
516
378
  f"The BULK Job: `{self._job_id}` exited with {self._job_state}, please reduce the `GraphQL BULK Date Range in Days` in SOURCES > Your Shopify Source > SETTINGS.",
@@ -665,42 +527,45 @@ class ShopifyBulkManager:
665
527
  step = self._job_size if self._job_size else self._job_size_min
666
528
  return slice_start.add(days=step)
667
529
 
668
- def _adjust_slice_end(self, slice_end: datetime, checkpointed_cursor: Optional[str] = None) -> datetime:
530
+ def _adjust_slice_end(
531
+ self, slice_end: datetime, checkpointed_cursor: Optional[str] = None, filter_checkpointed_cursor: Optional[str] = None
532
+ ) -> datetime:
669
533
  """
670
- Choose between the existing `slice_end` value or `checkpointed_cursor` value, if provided.
534
+ Choose between the existing `slice_end` value or `checkpointed_cursor` value or `filter_checkpointed_cursor` value, if provided.
671
535
 
672
536
  Optionally: raises the `transient` error if the checkpoint collision occurs.
537
+
538
+ Note: filter_checkpointed_cursor is only used when cursor field is ID for streams like Customer Address etc.
539
+ This method should return a datetime from last checkpointed value to adjust slice end, when cursor value is ID (int type)
540
+ method gets end datetime from filter_checkpointed_cursor, which is value from filter field from last record.
541
+ See https://github.com/airbytehq/oncall/issues/9052 for more details.
673
542
  """
674
543
 
675
544
  if checkpointed_cursor:
676
- # Check for collision and provide more context in the error
677
545
  if self._checkpoint_cursor_has_collision(checkpointed_cursor):
678
- # For INTERNAL_SERVER_ERROR recovery, if the cursor is the same, we might need to skip ahead slightly
679
- # This can happen if the failure occurred right at the boundary of what was already processed
680
- if hasattr(self, "_job_extracted_checkpoint_cursor") and self._job_extracted_checkpoint_cursor == checkpointed_cursor:
681
- pass # Collision from INTERNAL_SERVER_ERROR recovery at boundary
682
-
683
546
  raise ShopifyBulkExceptions.BulkJobCheckpointCollisionError(
684
- f"The stream: `{self.http_client.name}` checkpoint collision is detected. Current cursor: {self._job_last_checkpoint_cursor_value}, New cursor: {checkpointed_cursor}. Try to increase the `BULK Job checkpoint (rows collected)` to the bigger value. The stream will be synced again during the next sync attempt."
547
+ f"The stream: `{self.http_client.name}` checkpoint collision is detected. Try to increase the `BULK Job checkpoint (rows collected)` to the bigger value. The stream will be synced again during the next sync attempt."
685
548
  )
686
549
  # set the checkpointed cursor value
687
550
  self._set_last_checkpoint_cursor_value(checkpointed_cursor)
688
- return pdm.parse(checkpointed_cursor)
551
+ if isinstance(checkpointed_cursor, str):
552
+ return pdm.parse(checkpointed_cursor)
553
+ if isinstance(checkpointed_cursor, int):
554
+ return pdm.parse(filter_checkpointed_cursor)
689
555
 
690
556
  return slice_end
691
557
 
692
- def get_adjusted_job_end(self, slice_start: datetime, slice_end: datetime, checkpointed_cursor: Optional[str] = None) -> datetime:
558
+ def get_adjusted_job_end(
559
+ self,
560
+ slice_start: datetime,
561
+ slice_end: datetime,
562
+ checkpointed_cursor: Optional[str] = None,
563
+ filter_checkpointed_cursor: Optional[str] = None,
564
+ ) -> datetime:
693
565
  if self._job_adjust_slice_from_checkpoint:
694
566
  # set the checkpointing to default, before the next slice is emitted, to avoid inf.loop
695
567
  self._reset_checkpointing()
696
- # Clear the extracted cursor after use to avoid reusing it
697
- if self._job_extracted_checkpoint_cursor:
698
- extracted_cursor = self._job_extracted_checkpoint_cursor
699
- self._job_extracted_checkpoint_cursor = None
700
- cursor_to_use = extracted_cursor
701
- else:
702
- cursor_to_use = checkpointed_cursor or self._job_last_checkpoint_cursor_value
703
- return self._adjust_slice_end(slice_end, cursor_to_use)
568
+ return self._adjust_slice_end(slice_end, checkpointed_cursor, filter_checkpointed_cursor)
704
569
 
705
570
  if self._is_long_running_job:
706
571
  self._job_size_reduce_next()
@@ -178,6 +178,11 @@ class IncrementalShopifyStream(ShopifyStream, ABC):
178
178
  # Setting the check point interval to the limit of the records output
179
179
  state_checkpoint_interval = 250
180
180
 
181
+ def __init__(self, config: Dict):
182
+ super().__init__(config)
183
+ # _filter_checkpointed_cursor used to checkpoint streams with cursor field - ID in job.get_adjusted_job_end
184
+ self._filter_checkpointed_cursor = None
185
+
181
186
  @property
182
187
  def filter_by_state_checkpoint(self) -> bool:
183
188
  """
@@ -216,7 +221,12 @@ class IncrementalShopifyStream(ShopifyStream, ABC):
216
221
  params[self.filter_field] = stream_state.get(self.cursor_field)
217
222
  return params
218
223
 
219
- def track_checkpoint_cursor(self, record_value: Union[str, int]) -> None:
224
+ def track_checkpoint_cursor(self, record_value: Union[str, int], filter_record_value: Optional[str] = None) -> None:
225
+ """
226
+ Tracks _checkpoint_cursor value (values from cursor field) and _filter_checkpointed_cursor value (value from filter field).
227
+ _filter_checkpointed_cursor value is only used when cursor field is ID for streams like Customer Address etc.
228
+ When after canceled/failed job source tries to adjust stream slice (see ShopifyBulkManager._adjust_slice_end()).
229
+ """
220
230
  if self.filter_by_state_checkpoint:
221
231
  # set checkpoint cursor
222
232
  if not self._checkpoint_cursor:
@@ -225,6 +235,10 @@ class IncrementalShopifyStream(ShopifyStream, ABC):
225
235
  if str(record_value) >= str(self._checkpoint_cursor):
226
236
  self._checkpoint_cursor = record_value
227
237
 
238
+ if filter_record_value:
239
+ if not self._filter_checkpointed_cursor or str(filter_record_value) >= str(self._filter_checkpointed_cursor):
240
+ self._filter_checkpointed_cursor = filter_record_value
241
+
228
242
  def should_checkpoint(self, index: int) -> bool:
229
243
  return self.filter_by_state_checkpoint and index >= self.state_checkpoint_interval
230
244
 
@@ -242,7 +256,8 @@ class IncrementalShopifyStream(ShopifyStream, ABC):
242
256
  for index, record in enumerate(records_slice, 1):
243
257
  if self.cursor_field in record:
244
258
  record_value = record.get(self.cursor_field, self.default_state_comparison_value)
245
- self.track_checkpoint_cursor(record_value)
259
+ filter_record_value = record.get(self.filter_field) if self.filter_field else None
260
+ self.track_checkpoint_cursor(record_value, filter_record_value)
246
261
  if record_value:
247
262
  if record_value >= state_value:
248
263
  yield record
@@ -815,7 +830,7 @@ class IncrementalShopifyGraphQlBulkStream(IncrementalShopifyStream):
815
830
  self.emit_slice_message(start, slice_end)
816
831
  yield {"start": start.to_rfc3339_string(), "end": slice_end.to_rfc3339_string()}
817
832
  # increment the end of the slice or reduce the next slice
818
- start = self.job_manager.get_adjusted_job_end(start, slice_end, self._checkpoint_cursor)
833
+ start = self.job_manager.get_adjusted_job_end(start, slice_end, self._checkpoint_cursor, self._filter_checkpointed_cursor)
819
834
  else:
820
835
  # for the streams that don't support filtering
821
836
  yield {}
@@ -4,6 +4,7 @@
4
4
 
5
5
 
6
6
  import logging
7
+ import sys
7
8
  from typing import Any, Iterable, Mapping, MutableMapping, Optional
8
9
 
9
10
  import requests
@@ -138,6 +139,12 @@ class MetafieldProducts(IncrementalShopifyGraphQlBulkStream):
138
139
  parent_stream_class = Products
139
140
  bulk_query: MetafieldProduct = MetafieldProduct
140
141
 
142
+ state_checkpoint_interval = sys.maxsize
143
+
144
+ @property
145
+ def filter_by_state_checkpoint(self) -> bool:
146
+ return True
147
+
141
148
 
142
149
  class ProductImages(IncrementalShopifyGraphQlBulkStream):
143
150
  parent_stream_class = Products