airbyte-source-shopify 3.3.0__tar.gz → 3.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/PKG-INFO +1 -1
  2. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/pyproject.toml +1 -1
  3. airbyte_source_shopify-3.3.2/source_shopify/shopify_graphql/bulk/external_sort.py +140 -0
  4. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/streams/base_streams.py +30 -13
  5. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/utils.py +1 -1
  6. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/README.md +0 -0
  7. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/__init__.py +0 -0
  8. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/auth.py +0 -0
  9. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/config_migrations.py +0 -0
  10. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/http_request.py +0 -0
  11. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/run.py +0 -0
  12. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/abandoned_checkouts.json +0 -0
  13. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/articles.json +0 -0
  14. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/balance_transactions.json +0 -0
  15. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/blogs.json +0 -0
  16. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/collection_products.json +0 -0
  17. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/collections.json +0 -0
  18. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/collects.json +0 -0
  19. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/countries.json +0 -0
  20. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/custom_collections.json +0 -0
  21. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/customer_address.json +0 -0
  22. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/customer_journey_summary.json +0 -0
  23. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/customers.json +0 -0
  24. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/deleted_products.json +0 -0
  25. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/discount_codes.json +0 -0
  26. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/disputes.json +0 -0
  27. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/draft_orders.json +0 -0
  28. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/fulfillment_orders.json +0 -0
  29. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/fulfillments.json +0 -0
  30. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/inventory_items.json +0 -0
  31. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/inventory_levels.json +0 -0
  32. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/locations.json +0 -0
  33. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/metafield_articles.json +0 -0
  34. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/metafield_blogs.json +0 -0
  35. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/metafield_collections.json +0 -0
  36. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/metafield_customers.json +0 -0
  37. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/metafield_draft_orders.json +0 -0
  38. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/metafield_locations.json +0 -0
  39. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/metafield_orders.json +0 -0
  40. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/metafield_pages.json +0 -0
  41. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/metafield_product_images.json +0 -0
  42. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/metafield_product_variants.json +0 -0
  43. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/metafield_products.json +0 -0
  44. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/metafield_shops.json +0 -0
  45. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/metafield_smart_collections.json +0 -0
  46. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/order_agreements.json +0 -0
  47. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/order_refunds.json +0 -0
  48. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/order_risks.json +0 -0
  49. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/orders.json +0 -0
  50. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/pages.json +0 -0
  51. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/price_rules.json +0 -0
  52. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/product_images.json +0 -0
  53. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/product_variants.json +0 -0
  54. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/products.json +0 -0
  55. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/profile_location_groups.json +0 -0
  56. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/shop.json +0 -0
  57. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/smart_collections.json +0 -0
  58. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/tender_transactions.json +0 -0
  59. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/schemas/transactions.json +0 -0
  60. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/scopes.py +0 -0
  61. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/shopify_graphql/bulk/__init__.py +0 -0
  62. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/shopify_graphql/bulk/exceptions.py +0 -0
  63. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/shopify_graphql/bulk/job.py +0 -0
  64. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/shopify_graphql/bulk/query.py +0 -0
  65. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/shopify_graphql/bulk/record.py +0 -0
  66. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/shopify_graphql/bulk/retry.py +0 -0
  67. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/shopify_graphql/bulk/status.py +0 -0
  68. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/shopify_graphql/bulk/tools.py +0 -0
  69. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/source.py +0 -0
  70. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/spec.json +0 -0
  71. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/streams/streams.py +0 -0
  72. {airbyte_source_shopify-3.3.0 → airbyte_source_shopify-3.3.2}/source_shopify/transform.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-source-shopify
3
- Version: 3.3.0
3
+ Version: 3.3.2
4
4
  Summary: Source CDK implementation for Shopify.
5
5
  Home-page: https://airbyte.com
6
6
  License: ELv2
@@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
3
3
  build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
- version = "3.3.0"
6
+ version = "3.3.2"
7
7
  name = "airbyte-source-shopify"
8
8
  description = "Source CDK implementation for Shopify."
9
9
  authors = [ "Airbyte <contact@airbyte.io>",]
@@ -0,0 +1,140 @@
1
+ #
2
+ # Copyright (c) 2026 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ """Disk-backed external merge sort used by the Shopify bulk GraphQL streams.
6
+
7
+ Large bulk GraphQL result sets (for example, the metafield streams) used to be
8
+ globally sorted in memory with Python's builtin `sorted()` before emission,
9
+ which materialized every record at once and caused out-of-memory failures on
10
+ large slices. `external_stable_sort` replaces that in-memory sort with a
11
+ stable, streaming, disk-backed sort: records are accumulated into fixed-size
12
+ chunks, each chunk is sorted in memory, chunks that exceed the in-memory
13
+ budget are spilled as sorted runs to temp files, and the final output is a
14
+ k-way heap merge across those runs.
15
+
16
+ The sort is **stable**: when two records have equal sort keys, the record that
17
+ arrived first in the input iterable is emitted first in the output. That
18
+ matches the prior `sorted()` behavior and preserves the existing checkpoint
19
+ and state semantics for the Shopify bulk streams.
20
+
21
+ When the entire input fits in a single chunk, the implementation skips the
22
+ spill-and-merge path entirely and sorts in place, so small slices pay no disk
23
+ cost.
24
+ """
25
+
26
+ import heapq
27
+ import json
28
+ import logging
29
+ import os
30
+ import tempfile
31
+ from typing import Any, Callable, Iterable, Iterator, List, Mapping, Optional
32
+
33
+
34
+ logger = logging.getLogger("airbyte")
35
+
36
+
37
+ DEFAULT_SORT_CHUNK_SIZE = 50_000
38
+
39
+
40
+ def external_stable_sort(
41
+ records: Iterable[Mapping[str, Any]],
42
+ key_fn: Callable[[Mapping[str, Any]], Any],
43
+ chunk_size: int = DEFAULT_SORT_CHUNK_SIZE,
44
+ tmp_dir: Optional[str] = None,
45
+ ) -> Iterator[Mapping[str, Any]]:
46
+ """Stably sort `records` by `key_fn` using bounded memory.
47
+
48
+ Records are consumed lazily from the input iterable in chunks of at most
49
+ `chunk_size`. Each chunk is sorted in memory by `(key_fn(record), ordinal)`
50
+ where `ordinal` is a monotonically increasing insertion counter that
51
+ guarantees stability. Chunks beyond the first are spilled to temporary
52
+ JSON-lines files and then streamed back through a k-way heap merge.
53
+
54
+ The output iterator is generator-based and yields records one at a time,
55
+ so callers can pipe results into downstream filters without materializing
56
+ the full sorted output in memory.
57
+
58
+ Temp files are always cleaned up — on normal completion, on generator
59
+ close, and on exceptions raised from downstream consumers.
60
+ """
61
+ if chunk_size <= 0:
62
+ raise ValueError(f"chunk_size must be a positive integer, got {chunk_size}")
63
+
64
+ chunk: List[Any] = []
65
+ run_paths: List[str] = []
66
+ ordinal = 0
67
+
68
+ try:
69
+ for record in records:
70
+ chunk.append((key_fn(record), ordinal, record))
71
+ ordinal += 1
72
+ if len(chunk) >= chunk_size:
73
+ _spill_chunk(chunk, tmp_dir, run_paths)
74
+ chunk = []
75
+
76
+ if not run_paths:
77
+ # Fast path: entire input fits in one in-memory chunk, so we skip
78
+ # the spill-and-merge pipeline entirely.
79
+ chunk.sort(key=_sort_key)
80
+ for _, _, record in chunk:
81
+ yield record
82
+ return
83
+
84
+ if chunk:
85
+ _spill_chunk(chunk, tmp_dir, run_paths)
86
+
87
+ logger.info(f"External sort spilled {len(run_paths)} run file(s) covering {ordinal} record(s); merging.")
88
+ yield from _merge_runs(run_paths)
89
+ finally:
90
+ _cleanup_runs(run_paths)
91
+
92
+
93
+ def _sort_key(item: Any) -> Any:
94
+ # item is (key, ordinal, record). Tuples compare lexicographically, so the
95
+ # ordinal acts as a stable secondary key for equal primary keys.
96
+ return (item[0], item[1])
97
+
98
+
99
+ def _spill_chunk(chunk: List[Any], tmp_dir: Optional[str], run_paths: List[str]) -> None:
100
+ """Sort `chunk` in place and write it as a new sorted run file.
101
+
102
+ The newly created path is appended to `run_paths` **before** any write
103
+ begins, so a failure in `json.dumps` or `fh.write` still leaves the path
104
+ visible to the outer `finally` block for cleanup. Each line is a
105
+ JSON-encoded `[key, ordinal, record]` triple, deserialized during merge.
106
+ """
107
+ chunk.sort(key=_sort_key)
108
+ fd, path = tempfile.mkstemp(prefix="shopify_bulk_sort_", suffix=".jsonl", dir=tmp_dir)
109
+ run_paths.append(path)
110
+ with os.fdopen(fd, "w", encoding="utf-8") as fh:
111
+ for key, ord_value, record in chunk:
112
+ fh.write(json.dumps([key, ord_value, record], separators=(",", ":")) + "\n")
113
+
114
+
115
+ def _iter_run(path: str) -> Iterator[Any]:
116
+ """Yield `(key, ordinal, record)` triples from a spilled run file."""
117
+ with open(path, "r", encoding="utf-8") as fh:
118
+ for line in fh:
119
+ key, ord_value, record = json.loads(line)
120
+ yield (key, ord_value, record)
121
+
122
+
123
+ def _merge_runs(run_paths: List[str]) -> Iterator[Mapping[str, Any]]:
124
+ """Stream the merged output of all run files in sorted order."""
125
+ run_iters = [_iter_run(path) for path in run_paths]
126
+ for _, _, record in heapq.merge(*run_iters, key=_sort_key):
127
+ yield record
128
+
129
+
130
+ def _cleanup_runs(run_paths: List[str]) -> None:
131
+ """Best-effort removal of spilled run files. Missing files are ignored."""
132
+ for path in run_paths:
133
+ try:
134
+ os.unlink(path)
135
+ except FileNotFoundError:
136
+ # Already removed (for example, by tempfile cleanup on container
137
+ # shutdown). Best-effort cleanup is intentionally silent here.
138
+ pass
139
+ except OSError as exc:
140
+ logger.warning(f"Failed to remove external-sort spill file `{path}`: {exc}")
@@ -1,5 +1,5 @@
1
1
  #
2
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
+ # Copyright (c) 2026 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
5
 
@@ -14,6 +14,7 @@ import pendulum as pdm
14
14
  import requests
15
15
  from requests.exceptions import RequestException
16
16
  from source_shopify.http_request import ShopifyErrorHandler
17
+ from source_shopify.shopify_graphql.bulk.external_sort import DEFAULT_SORT_CHUNK_SIZE, external_stable_sort
17
18
  from source_shopify.shopify_graphql.bulk.job import ShopifyBulkManager
18
19
  from source_shopify.shopify_graphql.bulk.query import DeliveryZoneList, ShopifyBulkQuery
19
20
  from source_shopify.transform import DataTypeEnforcer
@@ -835,22 +836,38 @@ class IncrementalShopifyGraphQlBulkStream(IncrementalShopifyStream):
835
836
  # for the streams that don't support filtering
836
837
  yield {}
837
838
 
839
+ # Chunk size used by the disk-backed external sort in `sort_output_asc`.
840
+ # Exposed as a class attribute so tests and subclasses can tune it.
841
+ sort_chunk_size: int = DEFAULT_SORT_CHUNK_SIZE
842
+
838
843
  def sort_output_asc(self, non_sorted_records: Iterable[Mapping[str, Any]] = None) -> Iterable[Mapping[str, Any]]:
844
+ """Emit `non_sorted_records` in ascending `cursor_field` order.
845
+
846
+ The previous implementation relied on `sorted(...)` which fully
847
+ materialized the input iterable in memory. For large bulk GraphQL
848
+ slices (notably the metafield streams, where sorting is applied at the
849
+ parent-entity level but records are emitted at the child level) that
850
+ materialization triggered out-of-memory failures in production syncs.
851
+
852
+ This implementation preserves the prior ascending, stable ordering and
853
+ downstream checkpoint/state semantics while bounding peak memory via a
854
+ disk-backed external merge sort. Inputs that fit in a single in-memory
855
+ chunk take the fast path with no disk spill.
839
856
  """
840
- Apply sorting for collected records, to guarantee the `ASC` output.
841
- This handles the STATE and CHECKPOINTING correctly, for the `incremental` streams.
842
- """
843
- if non_sorted_records:
844
- if not self.cursor_field:
845
- yield from non_sorted_records
846
- else:
847
- yield from sorted(
848
- non_sorted_records,
849
- key=lambda x: x.get(self.cursor_field) if x.get(self.cursor_field) else self.default_state_comparison_value,
850
- )
851
- else:
857
+ if not non_sorted_records:
852
858
  # always return an empty iterable, if no records
853
859
  return []
860
+ if not self.cursor_field:
861
+ return non_sorted_records
862
+ return external_stable_sort(
863
+ non_sorted_records,
864
+ key_fn=self._sort_key_for_record,
865
+ chunk_size=self.sort_chunk_size,
866
+ )
867
+
868
+ def _sort_key_for_record(self, record: Mapping[str, Any]) -> Any:
869
+ value = record.get(self.cursor_field)
870
+ return value if value else self.default_state_comparison_value
854
871
 
855
872
  def read_records(
856
873
  self,
@@ -334,7 +334,7 @@ class LimitReducingErrorHandler(HttpStatusErrorHandler):
334
334
  """
335
335
 
336
336
  def __init__(self, max_retries: int, error_mapping: dict):
337
- super().__init__(logger=None, max_retries=max_retries, error_mapping=error_mapping)
337
+ super().__init__(logger=LOGGER, max_retries=max_retries, error_mapping=error_mapping)
338
338
 
339
339
  def interpret_response(self, response_or_exception: Optional[Union[requests.Response, Exception]] = None) -> ErrorResolution:
340
340
  if isinstance(response_or_exception, requests.Response):