airbyte-source-shopify 3.1.2.dev202601082220__py3-none-any.whl → 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-source-shopify
3
- Version: 3.1.2.dev202601082220
3
+ Version: 3.2.0
4
4
  Summary: Source CDK implementation for Shopify.
5
5
  Home-page: https://airbyte.com
6
6
  License: ELv2
@@ -7,6 +7,7 @@ source_shopify/schemas/abandoned_checkouts.json,sha256=nlYpb8VOQCO8wuHWM8gWhlzzj
7
7
  source_shopify/schemas/articles.json,sha256=tGnbM4lI2jA8joevDsrd-mQJksO2lLG3dIJGpxeuoYs,2548
8
8
  source_shopify/schemas/balance_transactions.json,sha256=RAU7duUHTWS7nI0pochhTZt575ksPwc51Qmcv4XL39o,2135
9
9
  source_shopify/schemas/blogs.json,sha256=ciBS_3eCf4UJUaB0DPCVadeJR4W6ndq7N0JwykXp0RY,2151
10
+ source_shopify/schemas/collection_products.json,sha256=ycOpp5egXg3nEWpGy8IWFW6ekDXP4fgqdk_trKjvo7Q,1102
10
11
  source_shopify/schemas/collections.json,sha256=2iJMCyAn_yeMKsQVt7jGR3_u3N3CA8QQ6179QvRuwqY,1889
11
12
  source_shopify/schemas/collects.json,sha256=dOX0_O7meWELWHYQG_MWqGkWLelAoiIlPtDXuxz9ig8,1173
12
13
  source_shopify/schemas/countries.json,sha256=fdJPrd8tQEzzonkunm0hvMbqZeXixPaphHd3PSt8g58,1783
@@ -50,22 +51,22 @@ source_shopify/schemas/shop.json,sha256=vEGiTvEYX7qnMq06MRVBycqih49h49xjTNC6gJux
50
51
  source_shopify/schemas/smart_collections.json,sha256=kv7dINsvgzJ0RyKfFNKjU0apdNDXwQaHfnNZfQsshcU,2009
51
52
  source_shopify/schemas/tender_transactions.json,sha256=U8fdT-eflycEPzYSpBDiB0lp9wxmJHgioHTrICflh78,2006
52
53
  source_shopify/schemas/transactions.json,sha256=vbwscH3UcAtbSsC70mBka4oNaFR4S3S6IFBmzR7t37U,10226
53
- source_shopify/scopes.py,sha256=78f9QL3PJZ9UDx1gIWzNwx5fYJE9OB3vPi9RahB_kFw,6533
54
+ source_shopify/scopes.py,sha256=R6f7t211n_nrh0T897u72ODo_hI6mzPC1NbN0vq-upI,6579
54
55
  source_shopify/shopify_graphql/bulk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
56
  source_shopify/shopify_graphql/bulk/exceptions.py,sha256=4dj7Za4xIfwL-zf8joT9svF_RSoGlE3GviMiIl1e1rs,2532
56
57
  source_shopify/shopify_graphql/bulk/job.py,sha256=c3Cg70_Io9jTD-rU-5MvjHaPmJCtcpeqEYnRtFECGOo,28673
57
- source_shopify/shopify_graphql/bulk/query.py,sha256=D8rnI1SDw50-Gt18lt7YwwNNdsbVMbBfxZa9xVJZbto,130981
58
+ source_shopify/shopify_graphql/bulk/query.py,sha256=GlwUwZoDkHyUYzfEr7RhVENhS2aMta6OMPYod2wcQbA,134819
58
59
  source_shopify/shopify_graphql/bulk/record.py,sha256=X6VGngugv7a_S8UEeDo121BkdCVLj5nWlHK76A21kyo,16898
59
60
  source_shopify/shopify_graphql/bulk/retry.py,sha256=R5rSJJE8D5zcj6mN-OmmNO2aFZEIdjAlWclDDVW5KPI,2626
60
61
  source_shopify/shopify_graphql/bulk/status.py,sha256=RmuQ2XsYL3iRCpVGxea9F1wXGmbwasDCSXjaTyL4LMA,328
61
62
  source_shopify/shopify_graphql/bulk/tools.py,sha256=nUQ2ZmPTKJNJdfLToR6KJtLKcJFCChSifkAOvwg0Vss,4065
62
- source_shopify/source.py,sha256=oikoM-VPNk62zlmeAQR59PMxfuXq2s42N7zaqLM6_lo,8575
63
- source_shopify/spec.json,sha256=vwEY5T3IryqSne0cRcJO53FaLnApuKOKRRS6yQJABpo,6667
64
- source_shopify/streams/base_streams.py,sha256=xYuyH6YjxZYl2x8CsdIYl3AqxBtz5xF_Oioatn5V-Rs,43979
65
- source_shopify/streams/streams.py,sha256=Ro0JXboCQHpfdI5VsLR4TseSL077GiFMZ-gOOnCCqtc,19356
63
+ source_shopify/source.py,sha256=_eDPHsRDlpwPWdUzG1FPEyf6uDAHRm8Vt2n6VDfyAhE,8879
64
+ source_shopify/spec.json,sha256=ITYWiQ-NrI5VISk5qmUQhp9ChUE2FV18d8xzVzPwvAg,6144
65
+ source_shopify/streams/base_streams.py,sha256=k_4uLaLADLRTUcSmP8uA_830uuzRvnqUaCVGcb0Zpd8,42625
66
+ source_shopify/streams/streams.py,sha256=I68Li0EPR9acj3Jhk7qd-zWhESkPHRzLA_b-LmbTjsE,20044
66
67
  source_shopify/transform.py,sha256=mn0htL812_90zc_YszGQa0hHcIZQpYYdmk8IqpZm5TI,4685
67
68
  source_shopify/utils.py,sha256=DSqEchu-MQJ7zust7CNfqOkGIv9OSR-5UUsuD-bsDa8,16224
68
- airbyte_source_shopify-3.1.2.dev202601082220.dist-info/METADATA,sha256=WtXZxV5MKHMrSSdUoChJ0kB6TrDYmET6oChVWm0LrIo,5313
69
- airbyte_source_shopify-3.1.2.dev202601082220.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
70
- airbyte_source_shopify-3.1.2.dev202601082220.dist-info/entry_points.txt,sha256=SyTwKSsPk9MCdPf01saWpnp8hcmZOgBssVcSIvMbBeQ,57
71
- airbyte_source_shopify-3.1.2.dev202601082220.dist-info/RECORD,,
69
+ airbyte_source_shopify-3.2.0.dist-info/METADATA,sha256=_hASusIiIv3XMGAUhHCfT_7HDZvoxnregmL6vPadIuc,5297
70
+ airbyte_source_shopify-3.2.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
71
+ airbyte_source_shopify-3.2.0.dist-info/entry_points.txt,sha256=SyTwKSsPk9MCdPf01saWpnp8hcmZOgBssVcSIvMbBeQ,57
72
+ airbyte_source_shopify-3.2.0.dist-info/RECORD,,
@@ -0,0 +1,35 @@
1
+ {
2
+ "type": "object",
3
+ "additionalProperties": true,
4
+ "properties": {
5
+ "collection_id": {
6
+ "description": "The unique identifier for the collection.",
7
+ "type": ["null", "integer"]
8
+ },
9
+ "collection_admin_graphql_api_id": {
10
+ "description": "The Admin GraphQL API ID for the collection.",
11
+ "type": ["null", "string"]
12
+ },
13
+ "collection_handle": {
14
+ "description": "The handle (URL-friendly name) for the collection.",
15
+ "type": ["null", "string"]
16
+ },
17
+ "collection_updated_at": {
18
+ "description": "The date and time when the collection was last updated.",
19
+ "type": ["null", "string"],
20
+ "format": "date-time"
21
+ },
22
+ "product_id": {
23
+ "description": "The unique identifier for the product.",
24
+ "type": ["null", "integer"]
25
+ },
26
+ "product_admin_graphql_api_id": {
27
+ "description": "The Admin GraphQL API ID for the product.",
28
+ "type": ["null", "string"]
29
+ },
30
+ "shop_url": {
31
+ "description": "The URL of the shop associated with this collection-product association.",
32
+ "type": ["null", "string"]
33
+ }
34
+ }
35
+ }
source_shopify/scopes.py CHANGED
@@ -44,6 +44,7 @@ SCOPES_MAPPING: Mapping[str, set[str]] = {
44
44
  "MetafieldProductVariants": ("read_products",),
45
45
  "CustomCollections": ("read_products",),
46
46
  "Collects": ("read_products",),
47
+ "CollectionProducts": ("read_products",),
47
48
  "ProductVariants": ("read_products", "read_inventory"),
48
49
  "MetafieldCollections": ("read_products",),
49
50
  "SmartCollections": ("read_products",),
@@ -952,6 +952,114 @@ class Collection(ShopifyBulkQuery):
952
952
  yield record
953
953
 
954
954
 
955
+ class CollectionProduct(ShopifyBulkQuery):
956
+ """
957
+ Returns the products associated with each collection, including both custom collections
958
+ and smart collections. This provides all product<>collection associations, not just
959
+ manually associated products (which is what the Collects REST API provides).
960
+
961
+ {
962
+ collections(query: "updated_at:>='2023-02-07T00:00:00+00:00' AND updated_at:<='2023-12-04T00:00:00+00:00'", sortKey: UPDATED_AT) {
963
+ edges {
964
+ node {
965
+ __typename
966
+ id
967
+ handle
968
+ updatedAt
969
+ products {
970
+ edges {
971
+ node {
972
+ __typename
973
+ id
974
+ }
975
+ }
976
+ }
977
+ }
978
+ }
979
+ }
980
+ }
981
+ """
982
+
983
+ query_name = "collections"
984
+ sort_key = "UPDATED_AT"
985
+
986
+ products_fields: List[Field] = [
987
+ Field(
988
+ name="edges",
989
+ fields=[
990
+ Field(
991
+ name="node",
992
+ fields=[
993
+ "__typename",
994
+ "id",
995
+ ],
996
+ )
997
+ ],
998
+ )
999
+ ]
1000
+
1001
+ query_nodes: List[Field] = [
1002
+ "__typename",
1003
+ "id",
1004
+ Field(name="handle"),
1005
+ Field(name="updatedAt"),
1006
+ Field(name="products", fields=products_fields),
1007
+ ]
1008
+
1009
+ record_composition = {
1010
+ "new_record": "Collection",
1011
+ "record_components": ["Product"],
1012
+ }
1013
+
1014
+ def _process_product_components(self, products: List[dict]) -> List[dict]:
1015
+ """
1016
+ Process product components to resolve IDs from string to int and preserve the original ID.
1017
+
1018
+ Args:
1019
+ products: List of product dictionaries with string IDs
1020
+
1021
+ Returns:
1022
+ List of processed product dictionaries with both id (int) and admin_graphql_api_id (str)
1023
+ """
1024
+ for product in products:
1025
+ # Save the original string ID before resolving
1026
+ product["admin_graphql_api_id"] = product.get("id")
1027
+ # Resolve the ID from string to int
1028
+ product["id"] = self.tools.resolve_str_id(product.get("id"))
1029
+ return products
1030
+
1031
+ def record_process_components(self, record: MutableMapping[str, Any]) -> Iterable[MutableMapping[str, Any]]:
1032
+ """
1033
+ Process collection records and yield one record per collection-product association.
1034
+ """
1035
+ record_components = record.get("record_components", {})
1036
+ products = record_components.get("Product", [])
1037
+
1038
+ # Get collection info - id is already resolved to int, admin_graphql_api_id has the string version
1039
+ collection_id = record.get("id")
1040
+ collection_admin_graphql_api_id = record.get("admin_graphql_api_id")
1041
+ collection_handle = record.get("handle")
1042
+ collection_updated_at = self.tools.from_iso8601_to_rfc3339(record, "updatedAt")
1043
+
1044
+ if products:
1045
+ # Process products to resolve their IDs
1046
+ products = self._process_product_components(products)
1047
+
1048
+ for product in products:
1049
+ product_id = product.get("id")
1050
+ product_admin_graphql_api_id = product.get("admin_graphql_api_id")
1051
+
1052
+ yield {
1053
+ "collection_id": collection_id,
1054
+ "collection_admin_graphql_api_id": collection_admin_graphql_api_id,
1055
+ "collection_handle": collection_handle,
1056
+ "collection_updated_at": collection_updated_at,
1057
+ "product_id": product_id,
1058
+ "product_admin_graphql_api_id": product_admin_graphql_api_id,
1059
+ "shop_url": self.config.get("shop"),
1060
+ }
1061
+
1062
+
955
1063
  class CustomerAddresses(ShopifyBulkQuery):
956
1064
  """
957
1065
  {
source_shopify/source.py CHANGED
@@ -11,6 +11,7 @@ from requests.exceptions import ConnectionError, RequestException, SSLError
11
11
  from airbyte_cdk.models import FailureType, SyncMode
12
12
  from airbyte_cdk.sources import AbstractSource
13
13
  from airbyte_cdk.sources.streams import Stream
14
+ from airbyte_cdk.sources.streams.http.exceptions import BaseBackoffException
14
15
  from airbyte_cdk.utils import AirbyteTracedException
15
16
 
16
17
  from .auth import MissingAccessTokenError, ShopifyAuthenticator
@@ -20,6 +21,7 @@ from .streams.streams import (
20
21
  Articles,
21
22
  BalanceTransactions,
22
23
  Blogs,
24
+ CollectionProducts,
23
25
  Collections,
24
26
  Collects,
25
27
  Countries,
@@ -108,6 +110,8 @@ class ConnectionCheckTest:
108
110
  return False, self.describe_error("index_error", shop_name, response)
109
111
  except MissingAccessTokenError:
110
112
  return False, self.describe_error("missing_token_error")
113
+ except (BaseBackoffException, AirbyteTracedException) as error:
114
+ return False, self.describe_error("connection_error", shop_name) or str(error)
111
115
 
112
116
  def get_shop_id(self) -> str:
113
117
  """
@@ -178,6 +182,7 @@ class SourceShopify(AbstractSource):
178
182
  Articles(config),
179
183
  BalanceTransactions(config),
180
184
  Blogs(config),
185
+ CollectionProducts(config),
181
186
  Collections(config),
182
187
  Collects(config),
183
188
  CustomCollections(config),
source_shopify/spec.json CHANGED
@@ -119,15 +119,6 @@
119
119
  "default": 100000,
120
120
  "minimum": 15000,
121
121
  "maximum": 1000000
122
- },
123
- "lookback_window_in_days": {
124
- "type": "integer",
125
- "title": "Lookback Window (in days)",
126
- "description": "The number of days to look back for records that may have been missed due to race conditions or late-arriving data. This is useful for ensuring data completeness in incremental syncs. Setting this to 1 or more will re-fetch records from the specified number of days before the last sync state.",
127
- "default": 0,
128
- "minimum": 0,
129
- "maximum": 30,
130
- "order": 7
131
122
  }
132
123
  }
133
124
  },
@@ -209,24 +209,6 @@ class IncrementalShopifyStream(ShopifyStream, ABC):
209
209
  current_state_value = current_stream_state.get(self.cursor_field) or self.default_state_comparison_value
210
210
  return {self.cursor_field: max(last_record_value, current_state_value)}
211
211
 
212
- def _apply_lookback_window(self, state_value: str) -> str:
213
- """
214
- Apply the lookback window to the state value by subtracting the configured number of days.
215
- This helps capture records that may have been missed due to race conditions or late-arriving data.
216
- """
217
- lookback_days = self.config.get("lookback_window_in_days", 0)
218
- if lookback_days > 0:
219
- state_datetime = pdm.parse(state_value)
220
- adjusted_datetime = state_datetime.subtract(days=lookback_days)
221
- # Ensure we don't go before the configured start_date
222
- start_date = self.config.get("start_date")
223
- if start_date:
224
- start_datetime = pdm.parse(start_date)
225
- if adjusted_datetime < start_datetime:
226
- adjusted_datetime = start_datetime
227
- return adjusted_datetime.to_rfc3339_string()
228
- return state_value
229
-
230
212
  @stream_state_cache.cache_stream_state
231
213
  def request_params(
232
214
  self, stream_state: Optional[Mapping[str, Any]] = None, next_page_token: Optional[Mapping[str, Any]] = None, **kwargs
@@ -236,11 +218,7 @@ class IncrementalShopifyStream(ShopifyStream, ABC):
236
218
  if not next_page_token:
237
219
  params["order"] = f"{self.order_field} asc"
238
220
  if stream_state:
239
- state_value = stream_state.get(self.cursor_field)
240
- # Apply lookback window to go back N days from the state
241
- if state_value and self.filter_field != "since_id":
242
- state_value = self._apply_lookback_window(state_value)
243
- params[self.filter_field] = state_value
221
+ params[self.filter_field] = stream_state.get(self.cursor_field)
244
222
  return params
245
223
 
246
224
  def track_checkpoint_cursor(self, record_value: Union[str, int], filter_record_value: Optional[str] = None) -> None:
@@ -844,9 +822,6 @@ class IncrementalShopifyGraphQlBulkStream(IncrementalShopifyStream):
844
822
  def stream_slices(self, stream_state: Optional[Mapping[str, Any]] = None, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
845
823
  if self.filter_field:
846
824
  state = self._get_state_value(stream_state)
847
- # Apply lookback window only when we have a stream state (not on first sync)
848
- if stream_state:
849
- state = self._apply_lookback_window(state)
850
825
  start = pdm.parse(state)
851
826
  end = pdm.now()
852
827
  while start < end:
@@ -10,6 +10,7 @@ from typing import Any, Iterable, Mapping, MutableMapping, Optional
10
10
  import requests
11
11
  from source_shopify.shopify_graphql.bulk.query import (
12
12
  Collection,
13
+ CollectionProduct,
13
14
  CustomerAddresses,
14
15
  CustomerJourney,
15
16
  DeliveryProfile,
@@ -323,6 +324,21 @@ class MetafieldCollections(IncrementalShopifyGraphQlBulkStream):
323
324
  bulk_query: MetafieldCollection = MetafieldCollection
324
325
 
325
326
 
327
+ class CollectionProducts(IncrementalShopifyGraphQlBulkStream):
328
+ """
329
+ Stream that returns all products associated with each collection, including both
330
+ custom collections and smart collections. Unlike the Collects stream which only
331
+ returns manually associated products, this stream returns all products that belong
332
+ to a collection (including those matched by smart collection rules).
333
+
334
+ https://shopify.dev/docs/api/admin-graphql/latest/objects/Collection#field-Collection.fields.products
335
+ """
336
+
337
+ bulk_query: CollectionProduct = CollectionProduct
338
+ cursor_field = "collection_updated_at"
339
+ primary_key = ["collection_id", "product_id"]
340
+
341
+
326
342
  class BalanceTransactions(IncrementalShopifyStream):
327
343
  """
328
344
  PaymentsTransactions stream does not support Incremental Refresh based on datetime fields, only `since_id` is supported: