airbyte-source-shopify 2.5.6__tar.gz → 2.5.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/PKG-INFO +1 -1
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/pyproject.toml +1 -1
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/shopify_graphql/bulk/job.py +12 -13
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/shopify_graphql/bulk/query.py +7 -48
- airbyte_source_shopify-2.5.8/source_shopify/shopify_graphql/bulk/record.py +382 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/shopify_graphql/bulk/retry.py +8 -8
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/streams/base_streams.py +35 -15
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/utils.py +7 -6
- airbyte_source_shopify-2.5.6/source_shopify/shopify_graphql/bulk/record.py +0 -157
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/README.md +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/__init__.py +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/auth.py +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/config_migrations.py +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/http_request.py +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/run.py +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/abandoned_checkouts.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/articles.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/balance_transactions.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/blogs.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/collections.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/collects.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/countries.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/custom_collections.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/customer_address.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/customer_journey_summary.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/customer_saved_search.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/customers.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/discount_codes.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/disputes.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/draft_orders.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/fulfillment_orders.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/fulfillments.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/inventory_items.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/inventory_levels.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/locations.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_articles.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_blogs.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_collections.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_customers.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_draft_orders.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_locations.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_orders.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_pages.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_product_images.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_product_variants.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_products.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_shops.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_smart_collections.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/order_agreements.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/order_refunds.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/order_risks.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/orders.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/pages.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/price_rules.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/product_images.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/product_variants.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/products.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/products_graph_ql.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/shop.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/smart_collections.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/tender_transactions.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/transactions.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/scopes.py +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/shopify_graphql/bulk/__init__.py +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/shopify_graphql/bulk/exceptions.py +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/shopify_graphql/bulk/status.py +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/shopify_graphql/bulk/tools.py +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/shopify_graphql/graphql.py +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/shopify_graphql/schema.py +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/source.py +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/spec.json +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/streams/streams.py +0 -0
- {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/transform.py +0 -0
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
3
|
#
|
|
4
4
|
|
|
5
|
-
import logging
|
|
6
5
|
from dataclasses import dataclass, field
|
|
7
6
|
from datetime import datetime
|
|
8
7
|
from time import sleep, time
|
|
@@ -12,7 +11,7 @@ import pendulum as pdm
|
|
|
12
11
|
import requests
|
|
13
12
|
from airbyte_cdk.sources.streams.http import HttpClient
|
|
14
13
|
from requests.exceptions import JSONDecodeError
|
|
15
|
-
from source_shopify.utils import ApiTypeEnum
|
|
14
|
+
from source_shopify.utils import LOGGER, ApiTypeEnum
|
|
16
15
|
from source_shopify.utils import ShopifyRateLimiter as limiter
|
|
17
16
|
|
|
18
17
|
from .exceptions import AirbyteTracedException, ShopifyBulkExceptions
|
|
@@ -32,8 +31,8 @@ class ShopifyBulkManager:
|
|
|
32
31
|
job_size: float
|
|
33
32
|
job_checkpoint_interval: int
|
|
34
33
|
|
|
35
|
-
|
|
36
|
-
|
|
34
|
+
parent_stream_name: Optional[str] = None
|
|
35
|
+
parent_stream_cursor: Optional[str] = None
|
|
37
36
|
|
|
38
37
|
# 10Mb chunk size to save the file
|
|
39
38
|
_retrieve_chunk_size: Final[int] = 1024 * 1024 * 10
|
|
@@ -94,7 +93,7 @@ class ShopifyBulkManager:
|
|
|
94
93
|
# how many records should be collected before we use the checkpoining
|
|
95
94
|
self._job_checkpoint_interval = self.job_checkpoint_interval
|
|
96
95
|
# define Record Producer instance
|
|
97
|
-
self.record_producer: ShopifyBulkRecord = ShopifyBulkRecord(self.query)
|
|
96
|
+
self.record_producer: ShopifyBulkRecord = ShopifyBulkRecord(self.query, self.parent_stream_name, self.parent_stream_cursor)
|
|
98
97
|
|
|
99
98
|
@property
|
|
100
99
|
def _tools(self) -> BulkTools:
|
|
@@ -251,9 +250,9 @@ class ShopifyBulkManager:
|
|
|
251
250
|
def _log_state(self, message: Optional[str] = None) -> None:
|
|
252
251
|
pattern = f"Stream: `{self.http_client.name}`, the BULK Job: `{self._job_id}` is {self._job_state}"
|
|
253
252
|
if message:
|
|
254
|
-
|
|
253
|
+
LOGGER.info(f"{pattern}. {message}.")
|
|
255
254
|
else:
|
|
256
|
-
|
|
255
|
+
LOGGER.info(pattern)
|
|
257
256
|
|
|
258
257
|
def _job_get_result(self, response: Optional[requests.Response] = None) -> Optional[str]:
|
|
259
258
|
parsed_response = response.json().get("data", {}).get("node", {}) if response else None
|
|
@@ -309,13 +308,13 @@ class ShopifyBulkManager:
|
|
|
309
308
|
sleep(self._job_check_interval)
|
|
310
309
|
|
|
311
310
|
def _cancel_on_long_running_job(self) -> None:
|
|
312
|
-
|
|
311
|
+
LOGGER.info(
|
|
313
312
|
f"Stream: `{self.http_client.name}` the BULK Job: {self._job_id} runs longer than expected ({self._job_max_elapsed_time} sec). Retry with the reduced `Slice Size` after self-cancelation."
|
|
314
313
|
)
|
|
315
314
|
self._job_cancel()
|
|
316
315
|
|
|
317
316
|
def _cancel_on_checkpointing(self) -> None:
|
|
318
|
-
|
|
317
|
+
LOGGER.info(f"Stream: `{self.http_client.name}`, checkpointing after >= `{self._job_checkpoint_interval}` rows collected.")
|
|
319
318
|
# set the flag to adjust the next slice from the checkpointed cursor value
|
|
320
319
|
self._job_cancel()
|
|
321
320
|
|
|
@@ -434,7 +433,7 @@ class ShopifyBulkManager:
|
|
|
434
433
|
return True
|
|
435
434
|
return False
|
|
436
435
|
|
|
437
|
-
@bulk_retry_on_exception(
|
|
436
|
+
@bulk_retry_on_exception()
|
|
438
437
|
def _job_check_state(self) -> None:
|
|
439
438
|
while not self._job_completed():
|
|
440
439
|
if self._job_canceled():
|
|
@@ -444,7 +443,7 @@ class ShopifyBulkManager:
|
|
|
444
443
|
else:
|
|
445
444
|
self._job_track_running()
|
|
446
445
|
|
|
447
|
-
@bulk_retry_on_exception(
|
|
446
|
+
@bulk_retry_on_exception()
|
|
448
447
|
def create_job(self, stream_slice: Mapping[str, str], filter_field: str) -> None:
|
|
449
448
|
if stream_slice:
|
|
450
449
|
query = self.query.get(filter_field, stream_slice["start"], stream_slice["end"])
|
|
@@ -484,7 +483,7 @@ class ShopifyBulkManager:
|
|
|
484
483
|
self._job_id = bulk_response.get("id")
|
|
485
484
|
self._job_created_at = bulk_response.get("createdAt")
|
|
486
485
|
self._job_state = ShopifyBulkJobStatus.CREATED.value
|
|
487
|
-
|
|
486
|
+
LOGGER.info(f"Stream: `{self.http_client.name}`, the BULK Job: `{self._job_id}` is {ShopifyBulkJobStatus.CREATED.value}")
|
|
488
487
|
|
|
489
488
|
def job_size_normalize(self, start: datetime, end: datetime) -> datetime:
|
|
490
489
|
# adjust slice size when it's bigger than the loop point when it should end,
|
|
@@ -522,7 +521,7 @@ class ShopifyBulkManager:
|
|
|
522
521
|
final_message = final_message + lines_collected_message
|
|
523
522
|
|
|
524
523
|
# emit final Bulk job status message
|
|
525
|
-
|
|
524
|
+
LOGGER.info(f"{final_message}")
|
|
526
525
|
|
|
527
526
|
def _process_bulk_results(self) -> Iterable[Mapping[str, Any]]:
|
|
528
527
|
if self._job_result_filename:
|
|
@@ -80,17 +80,7 @@ class ShopifyBulkTemplates:
|
|
|
80
80
|
@dataclass
|
|
81
81
|
class ShopifyBulkQuery:
|
|
82
82
|
config: Mapping[str, Any]
|
|
83
|
-
|
|
84
|
-
parent_stream_cursor: Optional[str] = None
|
|
85
|
-
|
|
86
|
-
@property
|
|
87
|
-
def has_parent_stream(self) -> bool:
|
|
88
|
-
return True if self.parent_stream_name and self.parent_stream_cursor else False
|
|
89
|
-
|
|
90
|
-
@property
|
|
91
|
-
def parent_cursor_key(self) -> Optional[str]:
|
|
92
|
-
if self.has_parent_stream:
|
|
93
|
-
return f"{self.parent_stream_name}_{self.parent_stream_cursor}"
|
|
83
|
+
parent_stream_cursor_alias: Optional[str] = None
|
|
94
84
|
|
|
95
85
|
@property
|
|
96
86
|
def shop_id(self) -> int:
|
|
@@ -143,38 +133,12 @@ class ShopifyBulkQuery:
|
|
|
143
133
|
"""
|
|
144
134
|
return ["__typename", "id"]
|
|
145
135
|
|
|
146
|
-
def
|
|
147
|
-
if self.
|
|
136
|
+
def inject_parent_cursor_field(self, nodes: List[Field], key: str = "updatedAt", index: int = 2) -> List[Field]:
|
|
137
|
+
if self.parent_stream_cursor_alias:
|
|
148
138
|
# inject parent cursor key as alias to the `updatedAt` parent cursor field
|
|
149
|
-
nodes.insert(index, Field(name=
|
|
150
|
-
|
|
139
|
+
nodes.insert(index, Field(name=key, alias=self.parent_stream_cursor_alias))
|
|
151
140
|
return nodes
|
|
152
141
|
|
|
153
|
-
def _add_parent_record_state(self, record: MutableMapping[str, Any], items: List[dict], to_rfc3339: bool = False) -> List[dict]:
|
|
154
|
-
"""
|
|
155
|
-
Adds a parent cursor value to each item in the list.
|
|
156
|
-
|
|
157
|
-
This method iterates over a list of dictionaries and adds a new key-value pair to each dictionary.
|
|
158
|
-
The key is the value of `self.query_name`, and the value is another dictionary with a single key "updated_at"
|
|
159
|
-
and the provided `parent_cursor_value`.
|
|
160
|
-
|
|
161
|
-
Args:
|
|
162
|
-
items (List[dict]): A list of dictionaries to which the parent cursor value will be added.
|
|
163
|
-
parent_cursor_value (str): The value to be set for the "updated_at" key in the nested dictionary.
|
|
164
|
-
|
|
165
|
-
Returns:
|
|
166
|
-
List[dict]: The modified list of dictionaries with the added parent cursor values.
|
|
167
|
-
"""
|
|
168
|
-
|
|
169
|
-
if self.has_parent_stream:
|
|
170
|
-
parent_cursor_value: Optional[str] = record.get(self.parent_cursor_key, None)
|
|
171
|
-
parent_state = self.tools._datetime_str_to_rfc3339(parent_cursor_value) if to_rfc3339 and parent_cursor_value else None
|
|
172
|
-
|
|
173
|
-
for item in items:
|
|
174
|
-
item[self.parent_stream_name] = {self.parent_stream_cursor: parent_state}
|
|
175
|
-
|
|
176
|
-
return items
|
|
177
|
-
|
|
178
142
|
def get(self, filter_field: Optional[str] = None, start: Optional[str] = None, end: Optional[str] = None) -> str:
|
|
179
143
|
# define filter query string, if passed
|
|
180
144
|
filter_query = f"{filter_field}:>='{start}' AND {filter_field}:<='{end}'" if filter_field else None
|
|
@@ -339,7 +303,7 @@ class Metafield(ShopifyBulkQuery):
|
|
|
339
303
|
elif isinstance(self.type.value, str):
|
|
340
304
|
nodes = [*nodes, metafield_node]
|
|
341
305
|
|
|
342
|
-
nodes = self.
|
|
306
|
+
nodes = self.inject_parent_cursor_field(nodes)
|
|
343
307
|
|
|
344
308
|
return nodes
|
|
345
309
|
|
|
@@ -372,9 +336,6 @@ class Metafield(ShopifyBulkQuery):
|
|
|
372
336
|
else:
|
|
373
337
|
metafields = record_components.get("Metafield", [])
|
|
374
338
|
if len(metafields) > 0:
|
|
375
|
-
if self.has_parent_stream:
|
|
376
|
-
# add parent state to each metafield
|
|
377
|
-
metafields = self._add_parent_record_state(record, metafields, to_rfc3339=True)
|
|
378
339
|
yield from self._process_components(metafields)
|
|
379
340
|
|
|
380
341
|
|
|
@@ -637,7 +598,7 @@ class MetafieldProductImage(Metafield):
|
|
|
637
598
|
media_node = self.get_edge_node("media", media_fields)
|
|
638
599
|
|
|
639
600
|
fields: List[Field] = ["__typename", "id", media_node]
|
|
640
|
-
fields = self.
|
|
601
|
+
fields = self.inject_parent_cursor_field(fields)
|
|
641
602
|
|
|
642
603
|
return fields
|
|
643
604
|
|
|
@@ -2422,7 +2383,7 @@ class ProductImage(ShopifyBulkQuery):
|
|
|
2422
2383
|
|
|
2423
2384
|
@property
|
|
2424
2385
|
def query_nodes(self) -> List[Field]:
|
|
2425
|
-
return self.
|
|
2386
|
+
return self.inject_parent_cursor_field(self.nodes)
|
|
2426
2387
|
|
|
2427
2388
|
def _process_component(self, entity: List[dict]) -> List[dict]:
|
|
2428
2389
|
for item in entity:
|
|
@@ -2499,8 +2460,6 @@ class ProductImage(ShopifyBulkQuery):
|
|
|
2499
2460
|
|
|
2500
2461
|
# add the product_id to each `Image`
|
|
2501
2462
|
record["images"] = self._add_product_id(record.get("images", []), record.get("id"))
|
|
2502
|
-
# add the product cursor to each `Image`
|
|
2503
|
-
record["images"] = self._add_parent_record_state(record, record.get("images", []), to_rfc3339=True)
|
|
2504
2463
|
record["images"] = self._merge_with_media(record_components)
|
|
2505
2464
|
record.pop("record_components")
|
|
2506
2465
|
|
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from functools import cached_property
|
|
8
|
+
from io import TextIOWrapper
|
|
9
|
+
from json import loads
|
|
10
|
+
from os import remove
|
|
11
|
+
from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional, Union
|
|
12
|
+
|
|
13
|
+
from source_shopify.utils import LOGGER
|
|
14
|
+
|
|
15
|
+
from .exceptions import ShopifyBulkExceptions
|
|
16
|
+
from .query import ShopifyBulkQuery
|
|
17
|
+
from .tools import END_OF_FILE, BulkTools
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class ShopifyBulkRecord:
|
|
22
|
+
"""
|
|
23
|
+
ShopifyBulkRecord is a class designed to handle the processing of bulk records from Shopify's GraphQL API.
|
|
24
|
+
|
|
25
|
+
Attributes:
|
|
26
|
+
query (ShopifyBulkQuery): The query object associated with the bulk record.
|
|
27
|
+
parent_stream_name (Optional[str]): The name of the parent stream, if any.
|
|
28
|
+
parent_stream_cursor (Optional[str]): The cursor of the parent stream, if any.
|
|
29
|
+
buffer (List[MutableMapping[str, Any]]): A buffer to store records before processing.
|
|
30
|
+
composition (Optional[Mapping[str, Any]]): The composition of the record, derived from the query.
|
|
31
|
+
record_process_components (Optional[Callable[[MutableMapping], MutableMapping]]): A callable to process record components.
|
|
32
|
+
components (List[str]): A list of components derived from the record composition.
|
|
33
|
+
_parent_stream_cursor_value (Optional[str | int]): The current value of the parent stream cursor.
|
|
34
|
+
record_composed (int): The count of records composed.
|
|
35
|
+
|
|
36
|
+
Methods:
|
|
37
|
+
__post_init__(): Initializes additional attributes after the object is created.
|
|
38
|
+
tools(): Returns an instance of BulkTools.
|
|
39
|
+
has_parent_stream(): Checks if the record has a parent stream.
|
|
40
|
+
parent_cursor_key(): Returns the key for the parent cursor if a parent stream exists.
|
|
41
|
+
check_type(record, types): Checks if the record's type matches the given type(s).
|
|
42
|
+
_parse_parent_state_value(value): Parses the parent state value and converts it to the appropriate format.
|
|
43
|
+
_set_parent_state_value(value): Sets the parent state value by parsing the provided value and updating the parent stream cursor value.
|
|
44
|
+
_track_parent_cursor(record): Tracks the cursor value from the parent stream if it exists and updates the parent state.
|
|
45
|
+
get_parent_stream_state(): Retrieves the state of the parent stream if it exists.
|
|
46
|
+
record_new(record): Processes a new record by preparing it, removing the "__typename" key, and appending it to the buffer.
|
|
47
|
+
record_new_component(record): Processes a new record by extracting its component type and adding it to the appropriate placeholder in the components list.
|
|
48
|
+
component_prepare(record): Prepares the given record by initializing a "record_components" dictionary.
|
|
49
|
+
buffer_flush(): Flushes the buffer by processing each record in the buffer.
|
|
50
|
+
record_compose(record): Processes a given record and yields buffered records if certain conditions are met.
|
|
51
|
+
process_line(jsonl_file): Processes a JSON Lines (jsonl) file and yields records.
|
|
52
|
+
record_resolve_id(record): Resolves and updates the 'id' field in the given record.
|
|
53
|
+
produce_records(filename): Reads the JSONL content saved from `job.job_retrieve_result()` line-by-line to avoid OOM.
|
|
54
|
+
read_file(filename, remove_file): Reads a file and produces records from it.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
query: ShopifyBulkQuery
|
|
58
|
+
parent_stream_name: Optional[str] = None
|
|
59
|
+
parent_stream_cursor: Optional[str] = None
|
|
60
|
+
|
|
61
|
+
# default buffer
|
|
62
|
+
buffer: List[MutableMapping[str, Any]] = field(init=False, default_factory=list)
|
|
63
|
+
|
|
64
|
+
def __post_init__(self) -> None:
|
|
65
|
+
self.composition: Optional[Mapping[str, Any]] = self.query.record_composition
|
|
66
|
+
self.record_process_components: Optional[Callable[[MutableMapping], MutableMapping]] = self.query.record_process_components
|
|
67
|
+
self.components: List[str] = self.composition.get("record_components", []) if self.composition else []
|
|
68
|
+
# We track the parent state for BULK substreams outside of the main CDK methods,
|
|
69
|
+
# to be able to update the moving parent state when there are no substream records to emit.
|
|
70
|
+
self._parent_stream_cursor_value: Optional[str | int] = None
|
|
71
|
+
# how many records composed
|
|
72
|
+
self.record_composed: int = 0
|
|
73
|
+
|
|
74
|
+
@cached_property
|
|
75
|
+
def tools(self) -> BulkTools:
|
|
76
|
+
return BulkTools()
|
|
77
|
+
|
|
78
|
+
@cached_property
|
|
79
|
+
def has_parent_stream(self) -> bool:
|
|
80
|
+
return True if self.parent_stream_name and self.parent_stream_cursor else False
|
|
81
|
+
|
|
82
|
+
@cached_property
|
|
83
|
+
def parent_cursor_key(self) -> Optional[str]:
|
|
84
|
+
if self.has_parent_stream:
|
|
85
|
+
return f"{self.parent_stream_name}_{self.parent_stream_cursor}"
|
|
86
|
+
|
|
87
|
+
@staticmethod
|
|
88
|
+
def check_type(record: Mapping[str, Any], types: Union[List[str], str]) -> bool:
|
|
89
|
+
"""
|
|
90
|
+
Check if the record's type matches the given type(s).
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
record (Mapping[str, Any]): The record to check, expected to have a "__typename" key.
|
|
94
|
+
types (Union[List[str], str]): The type(s) to check against. Can be a single type (str) or a list of types (List[str]).
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
bool: True if the record's type matches one of the given types, False otherwise.
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
record_type = record.get("__typename")
|
|
101
|
+
if isinstance(types, list):
|
|
102
|
+
return any(record_type == t for t in types)
|
|
103
|
+
else:
|
|
104
|
+
return record_type == types
|
|
105
|
+
|
|
106
|
+
def _parse_parent_state_value(self, value: str | int) -> str | int:
|
|
107
|
+
"""
|
|
108
|
+
Parses the parent state value and converts it to the appropriate format.
|
|
109
|
+
|
|
110
|
+
If the value is a string, it converts it to RFC 3339 datetime format using the `_datetime_str_to_rfc3339` method.
|
|
111
|
+
If the value is an integer, it returns the value as is.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
value (str | int): The parent state value to be parsed.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
str | int: The parsed parent state value in the appropriate format.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
if isinstance(value, str):
|
|
121
|
+
return self.tools._datetime_str_to_rfc3339(value)
|
|
122
|
+
elif isinstance(value, int):
|
|
123
|
+
return value
|
|
124
|
+
|
|
125
|
+
def _set_parent_state_value(self, value: str | int) -> None:
|
|
126
|
+
"""
|
|
127
|
+
Sets the parent state value by parsing the provided value and updating the
|
|
128
|
+
parent stream cursor value. If the parent stream cursor value is already set,
|
|
129
|
+
it updates it to the maximum of the current and parsed values.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
value (str | int): The value to be parsed and set as the parent state value.
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
parsed_value = self._parse_parent_state_value(value)
|
|
136
|
+
if not self._parent_stream_cursor_value:
|
|
137
|
+
self._parent_stream_cursor_value = parsed_value
|
|
138
|
+
else:
|
|
139
|
+
self._parent_stream_cursor_value = max(self._parent_stream_cursor_value, parsed_value)
|
|
140
|
+
|
|
141
|
+
def _track_parent_cursor(self, record: MutableMapping[str, Any]) -> None:
|
|
142
|
+
"""
|
|
143
|
+
Tracks the cursor value from the parent stream if it exists and updates the parent state.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
record (MutableMapping[str, Any]): The record from which to extract the parent cursor value.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
None
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
if self.has_parent_stream:
|
|
153
|
+
cursor_value: Optional[str | int] = record.get(self.parent_cursor_key, None)
|
|
154
|
+
if cursor_value:
|
|
155
|
+
self._set_parent_state_value(cursor_value)
|
|
156
|
+
|
|
157
|
+
def get_parent_stream_state(self) -> Optional[Union[str, Mapping[str, Any]]]:
|
|
158
|
+
"""
|
|
159
|
+
Retrieve the state of the parent stream if it exists.
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
Optional[Union[str, Mapping[str, Any]]]: A dictionary containing the parent stream cursor and its value
|
|
163
|
+
if the parent stream exists and has a cursor value, otherwise None.
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
if self.has_parent_stream and self._parent_stream_cursor_value:
|
|
167
|
+
return {self.parent_stream_cursor: self._parent_stream_cursor_value}
|
|
168
|
+
|
|
169
|
+
def record_new(self, record: MutableMapping[str, Any]) -> None:
|
|
170
|
+
"""
|
|
171
|
+
Processes a new record by preparing it, removing the "__typename" key, and appending it to the buffer.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
record (MutableMapping[str, Any]): The record to be processed.
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
record = self.component_prepare(record)
|
|
178
|
+
record.pop("__typename")
|
|
179
|
+
self.buffer.append(record)
|
|
180
|
+
|
|
181
|
+
def record_new_component(self, record: MutableMapping[str, Any]) -> None:
|
|
182
|
+
"""
|
|
183
|
+
Processes a new record by extracting its component type and adding it to the appropriate
|
|
184
|
+
placeholder in the components list.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
record (MutableMapping[str, Any]): The record to be processed.
|
|
188
|
+
It is expected to contain a "__typename" key which indicates the component type.
|
|
189
|
+
"""
|
|
190
|
+
|
|
191
|
+
component = record.get("__typename")
|
|
192
|
+
record.pop("__typename")
|
|
193
|
+
# add component to its placeholder in the components list
|
|
194
|
+
self.buffer[-1]["record_components"][component].append(record)
|
|
195
|
+
|
|
196
|
+
def component_prepare(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
|
|
197
|
+
"""
|
|
198
|
+
Prepares the given record by initializing a "record_components" dictionary.
|
|
199
|
+
|
|
200
|
+
If the instance has components, this method will add a "record_components" key to the record,
|
|
201
|
+
with each component as a key and an empty list as its value.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
record (MutableMapping[str, Any]): The record to be prepared.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
MutableMapping[str, Any]: The updated record with initialized "record_components".
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
if self.components:
|
|
211
|
+
record["record_components"] = {}
|
|
212
|
+
for component in self.components:
|
|
213
|
+
record["record_components"][component] = []
|
|
214
|
+
return record
|
|
215
|
+
|
|
216
|
+
def buffer_flush(self) -> Iterable[Mapping[str, Any]]:
|
|
217
|
+
"""
|
|
218
|
+
Flushes the buffer by processing each record in the buffer.
|
|
219
|
+
|
|
220
|
+
For each record in the buffer:
|
|
221
|
+
- Tracks the parent state using `_track_parent_cursor`.
|
|
222
|
+
- Resolves the record ID from `str` to `int` using `record_resolve_id`.
|
|
223
|
+
- Processes record components using `record_process_components`.
|
|
224
|
+
|
|
225
|
+
Yields:
|
|
226
|
+
Iterable[Mapping[str, Any]]: Processed records from the buffer.
|
|
227
|
+
|
|
228
|
+
After processing, the buffer is cleared.
|
|
229
|
+
"""
|
|
230
|
+
|
|
231
|
+
if len(self.buffer) > 0:
|
|
232
|
+
for record in self.buffer:
|
|
233
|
+
# track the parent state
|
|
234
|
+
self._track_parent_cursor(record)
|
|
235
|
+
# resolve id from `str` to `int`
|
|
236
|
+
record = self.record_resolve_id(record)
|
|
237
|
+
# process record components
|
|
238
|
+
yield from self.record_process_components(record)
|
|
239
|
+
# clean the buffer
|
|
240
|
+
self.buffer.clear()
|
|
241
|
+
|
|
242
|
+
def record_compose(self, record: Mapping[str, Any]) -> Optional[Iterable[MutableMapping[str, Any]]]:
|
|
243
|
+
"""
|
|
244
|
+
Processes a given record and yields buffered records if certain conditions are met.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
record (Mapping[str, Any]): The record to be processed.
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
Optional[Iterable[MutableMapping[str, Any]]]: An iterable of buffered records if conditions are met, otherwise None.
|
|
251
|
+
|
|
252
|
+
The method performs the following steps:
|
|
253
|
+
1. Checks if the record matches the type specified in the "new_record" composition.
|
|
254
|
+
- If it matches, it yields any buffered records from previous iterations and registers the new record.
|
|
255
|
+
2. Checks if the record matches any of the specified components.
|
|
256
|
+
- If it matches, it registers the new component record.
|
|
257
|
+
|
|
258
|
+
Step 1: register the new record by it's `__typename`
|
|
259
|
+
Step 2: check for `components` by their `__typename` and add to the placeholder
|
|
260
|
+
Step 3: repeat until the `<END_OF_FILE>`.
|
|
261
|
+
"""
|
|
262
|
+
|
|
263
|
+
if self.check_type(record, self.composition.get("new_record")):
|
|
264
|
+
# emit from previous iteration, if present
|
|
265
|
+
yield from self.buffer_flush()
|
|
266
|
+
# register the record
|
|
267
|
+
self.record_new(record)
|
|
268
|
+
# components check
|
|
269
|
+
elif self.check_type(record, self.components):
|
|
270
|
+
self.record_new_component(record)
|
|
271
|
+
|
|
272
|
+
def process_line(self, jsonl_file: TextIOWrapper) -> Iterable[MutableMapping[str, Any]]:
|
|
273
|
+
"""
|
|
274
|
+
Processes a JSON Lines (jsonl) file and yields records.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
jsonl_file (TextIOWrapper): A file-like object containing JSON Lines data.
|
|
278
|
+
|
|
279
|
+
Yields:
|
|
280
|
+
Iterable[MutableMapping[str, Any]]: An iterable of dictionaries representing the processed records.
|
|
281
|
+
|
|
282
|
+
The method reads each line from the provided jsonl_file. It exits the loop when it encounters the <end_of_file> marker.
|
|
283
|
+
For non-empty lines, it parses the JSON content and yields the resulting records. Finally, it emits any remaining
|
|
284
|
+
records in the buffer.
|
|
285
|
+
"""
|
|
286
|
+
|
|
287
|
+
for line in jsonl_file:
|
|
288
|
+
if line == END_OF_FILE:
|
|
289
|
+
break
|
|
290
|
+
elif line != "":
|
|
291
|
+
yield from self.record_compose(loads(line))
|
|
292
|
+
|
|
293
|
+
# emit what's left in the buffer, typically last record
|
|
294
|
+
yield from self.buffer_flush()
|
|
295
|
+
|
|
296
|
+
def record_resolve_id(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
|
|
297
|
+
"""
|
|
298
|
+
Resolves and updates the 'id' field in the given record.
|
|
299
|
+
|
|
300
|
+
This method extracts the 'id' from the record, checks if it is a string,
|
|
301
|
+
and if so, assigns it to 'admin_graphql_api_id' in the record. It then
|
|
302
|
+
resolves the string 'id' to an integer using the 'resolve_str_id' method
|
|
303
|
+
from the 'tools' attribute and updates the 'id' field in the record.
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
record (MutableMapping[str, Any]): The record containing the 'id' field to be resolved.
|
|
307
|
+
Example:
|
|
308
|
+
{ "Id": "gid://shopify/Order/19435458986123"}
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
MutableMapping[str, Any]: The updated record with the resolved 'id' field.
|
|
312
|
+
Example:
|
|
313
|
+
{ "id": 19435458986123, "admin_graphql_api_id": "gid://shopify/Order/19435458986123"}
|
|
314
|
+
"""
|
|
315
|
+
|
|
316
|
+
id = record.get("id")
|
|
317
|
+
if id and isinstance(id, str):
|
|
318
|
+
record["admin_graphql_api_id"] = id
|
|
319
|
+
record["id"] = self.tools.resolve_str_id(id)
|
|
320
|
+
return record
|
|
321
|
+
|
|
322
|
+
def produce_records(self, filename: str) -> Iterable[MutableMapping[str, Any]]:
|
|
323
|
+
"""
|
|
324
|
+
Produce records from a JSON Lines (jsonl) file.
|
|
325
|
+
|
|
326
|
+
This method reads a JSON Lines file, processes each line, converts the field names to snake_case,
|
|
327
|
+
and yields each processed record. It also keeps track of the number of records processed.
|
|
328
|
+
|
|
329
|
+
Args:
|
|
330
|
+
filename (str): The path to the JSON Lines file.
|
|
331
|
+
|
|
332
|
+
Yields:
|
|
333
|
+
MutableMapping[str, Any]: A dictionary representing a processed record with field names in snake_case.
|
|
334
|
+
"""
|
|
335
|
+
|
|
336
|
+
with open(filename, "r") as jsonl_file:
|
|
337
|
+
# reset the counter
|
|
338
|
+
self.record_composed = 0
|
|
339
|
+
|
|
340
|
+
for record in self.process_line(jsonl_file):
|
|
341
|
+
yield self.tools.fields_names_to_snake_case(record)
|
|
342
|
+
self.record_composed += 1
|
|
343
|
+
|
|
344
|
+
def read_file(self, filename: str, remove_file: Optional[bool] = True) -> Iterable[Mapping[str, Any]]:
|
|
345
|
+
"""
|
|
346
|
+
Read the JSONL content saved from `job.job_retrieve_result()` line-by-line to avoid OOM.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
filename (str): The name of the file to read.
|
|
350
|
+
remove_file (Optional[bool]): Flag indicating whether to remove the file after reading. Defaults to True.
|
|
351
|
+
|
|
352
|
+
Example:
|
|
353
|
+
Note: typically the `filename` is taken from the `result_url` string provided in the response.
|
|
354
|
+
|
|
355
|
+
`bulk-4039263649981.jsonl` :
|
|
356
|
+
- the `4039263649981` is the `id` of the COMPLETED BULK Jobw with `result_url`
|
|
357
|
+
|
|
358
|
+
Yields:
|
|
359
|
+
Iterable[Mapping[str, Any]]: An iterable of records produced from the file.
|
|
360
|
+
|
|
361
|
+
Raises:
|
|
362
|
+
ShopifyBulkExceptions.BulkRecordProduceError: If an error occurs while producing records from the file.
|
|
363
|
+
|
|
364
|
+
Logs:
|
|
365
|
+
Logs an info message if the file removal fails.
|
|
366
|
+
"""
|
|
367
|
+
|
|
368
|
+
try:
|
|
369
|
+
# produce records from saved result
|
|
370
|
+
yield from self.produce_records(filename)
|
|
371
|
+
except Exception as e:
|
|
372
|
+
raise ShopifyBulkExceptions.BulkRecordProduceError(
|
|
373
|
+
f"An error occured while producing records from BULK Job result. Trace: {repr(e)}.",
|
|
374
|
+
)
|
|
375
|
+
finally:
|
|
376
|
+
# removing the tmp file, if requested
|
|
377
|
+
if remove_file and filename:
|
|
378
|
+
try:
|
|
379
|
+
remove(filename)
|
|
380
|
+
except Exception as e:
|
|
381
|
+
LOGGER.info(f"Failed to remove the `tmp job result` file, the file doen't exist. Details: {repr(e)}.")
|
|
382
|
+
pass
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
|
2
2
|
|
|
3
|
-
import logging
|
|
4
3
|
from functools import wraps
|
|
5
4
|
from time import sleep
|
|
6
5
|
from typing import Any, Callable, Final, Optional, Tuple, Type
|
|
7
6
|
|
|
7
|
+
from source_shopify.utils import LOGGER
|
|
8
|
+
|
|
8
9
|
from .exceptions import ShopifyBulkExceptions
|
|
9
10
|
|
|
10
11
|
BULK_RETRY_ERRORS: Final[Tuple] = (
|
|
@@ -13,11 +14,10 @@ BULK_RETRY_ERRORS: Final[Tuple] = (
|
|
|
13
14
|
)
|
|
14
15
|
|
|
15
16
|
|
|
16
|
-
def bulk_retry_on_exception(
|
|
17
|
+
def bulk_retry_on_exception(more_exceptions: Optional[Tuple[Type[Exception], ...]] = None) -> Callable:
|
|
17
18
|
"""
|
|
18
19
|
A decorator to retry a function when specified exceptions are raised.
|
|
19
20
|
|
|
20
|
-
:param logger: Number of times to retry.
|
|
21
21
|
:param more_exceptions: A tuple of exception types to catch.
|
|
22
22
|
"""
|
|
23
23
|
|
|
@@ -31,26 +31,26 @@ def bulk_retry_on_exception(logger: logging.Logger, more_exceptions: Optional[Tu
|
|
|
31
31
|
except BULK_RETRY_ERRORS or more_exceptions as ex:
|
|
32
32
|
current_retries += 1
|
|
33
33
|
if current_retries > self._job_max_retries:
|
|
34
|
-
|
|
34
|
+
LOGGER.error("Exceeded retry limit. Giving up.")
|
|
35
35
|
raise
|
|
36
36
|
else:
|
|
37
|
-
|
|
37
|
+
LOGGER.warning(
|
|
38
38
|
f"Stream `{self.http_client.name}`: {ex}. Retrying {current_retries}/{self._job_max_retries} after {self._job_backoff_time} seconds."
|
|
39
39
|
)
|
|
40
40
|
sleep(self._job_backoff_time)
|
|
41
41
|
except ShopifyBulkExceptions.BulkJobCreationFailedConcurrentError:
|
|
42
42
|
if self._concurrent_attempt == self._concurrent_max_retry:
|
|
43
43
|
message = f"The BULK Job couldn't be created at this time, since another job is running."
|
|
44
|
-
|
|
44
|
+
LOGGER.error(message)
|
|
45
45
|
raise ShopifyBulkExceptions.BulkJobConcurrentError(message)
|
|
46
46
|
|
|
47
47
|
self._concurrent_attempt += 1
|
|
48
|
-
|
|
48
|
+
LOGGER.warning(
|
|
49
49
|
f"Stream: `{self.http_client.name}`, the BULK concurrency limit has reached. Waiting {self._concurrent_interval} sec before retry, attempt: {self._concurrent_attempt}.",
|
|
50
50
|
)
|
|
51
51
|
sleep(self._concurrent_interval)
|
|
52
52
|
except ShopifyBulkExceptions.BulkJobRedirectToOtherShopError:
|
|
53
|
-
|
|
53
|
+
LOGGER.warning(
|
|
54
54
|
f"Stream: `{self.http_client.name}`, the `shop name` differs from the provided in `input configuration`. Switching to the `{self._tools.shop_name_from_url(self.base_url)}`.",
|
|
55
55
|
)
|
|
56
56
|
|
{airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/streams/base_streams.py
RENAMED
|
@@ -644,12 +644,14 @@ class IncrementalShopifyGraphQlBulkStream(IncrementalShopifyStream):
|
|
|
644
644
|
self.job_manager: ShopifyBulkManager = ShopifyBulkManager(
|
|
645
645
|
http_client=self.bulk_http_client,
|
|
646
646
|
base_url=f"{self.url_base}{self.path()}",
|
|
647
|
-
query=self.bulk_query(config, self.
|
|
647
|
+
query=self.bulk_query(config, self.parent_stream_query_cursor_alias),
|
|
648
648
|
job_termination_threshold=float(config.get("job_termination_threshold", 3600)),
|
|
649
649
|
# overide the default job slice size, if provided (it's auto-adjusted, later on)
|
|
650
650
|
job_size=config.get("bulk_window_in_days", 30.0),
|
|
651
651
|
# provide the job checkpoint interval value, default value is 200k lines collected
|
|
652
652
|
job_checkpoint_interval=config.get("job_checkpoint_interval", 200_000),
|
|
653
|
+
parent_stream_name=self.parent_stream_name,
|
|
654
|
+
parent_stream_cursor=self.parent_stream_cursor,
|
|
653
655
|
)
|
|
654
656
|
|
|
655
657
|
@property
|
|
@@ -670,20 +672,25 @@ class IncrementalShopifyGraphQlBulkStream(IncrementalShopifyStream):
|
|
|
670
672
|
"""
|
|
671
673
|
return self.parent_stream_class(self.config) if self.parent_stream_class else None
|
|
672
674
|
|
|
673
|
-
@
|
|
675
|
+
@cached_property
|
|
674
676
|
def parent_stream_name(self) -> Optional[str]:
|
|
675
677
|
"""
|
|
676
678
|
Returns the parent stream name, if the substream has a `parent_stream_class` dependency.
|
|
677
679
|
"""
|
|
678
680
|
return self.parent_stream.name if self.parent_stream_class else None
|
|
679
681
|
|
|
680
|
-
@
|
|
682
|
+
@cached_property
|
|
681
683
|
def parent_stream_cursor(self) -> Optional[str]:
|
|
682
684
|
"""
|
|
683
685
|
Returns the parent stream cursor, if the substream has a `parent_stream_class` dependency.
|
|
684
686
|
"""
|
|
685
687
|
return self.parent_stream.cursor_field if self.parent_stream_class else None
|
|
686
688
|
|
|
689
|
+
@cached_property
|
|
690
|
+
def parent_stream_query_cursor_alias(self) -> Optional[str]:
|
|
691
|
+
if self.parent_stream_name and self.parent_stream_cursor:
|
|
692
|
+
return f"{self.parent_stream_name}_{self.parent_stream_cursor}"
|
|
693
|
+
|
|
687
694
|
@property
|
|
688
695
|
@abstractmethod
|
|
689
696
|
def bulk_query(self) -> ShopifyBulkQuery:
|
|
@@ -713,7 +720,9 @@ class IncrementalShopifyGraphQlBulkStream(IncrementalShopifyStream):
|
|
|
713
720
|
return None
|
|
714
721
|
|
|
715
722
|
def get_updated_state(
|
|
716
|
-
self,
|
|
723
|
+
self,
|
|
724
|
+
current_stream_state: MutableMapping[str, Any],
|
|
725
|
+
latest_record: Mapping[str, Any],
|
|
717
726
|
) -> MutableMapping[str, Any]:
|
|
718
727
|
"""UPDATING THE STATE OBJECT:
|
|
719
728
|
Stream: CustomerAddress
|
|
@@ -728,29 +737,40 @@ class IncrementalShopifyGraphQlBulkStream(IncrementalShopifyStream):
|
|
|
728
737
|
}
|
|
729
738
|
}
|
|
730
739
|
"""
|
|
740
|
+
|
|
731
741
|
updated_state = super().get_updated_state(current_stream_state, latest_record)
|
|
742
|
+
|
|
732
743
|
if self.parent_stream_class:
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
)
|
|
744
|
+
# the default way of getting the parent stream state is to use the value from the RecordProducer,
|
|
745
|
+
# since the parent record could be present but no substream's records are present to emit,
|
|
746
|
+
# the parent state is tracked for each parent record processed, thus updated regardless having substream records.
|
|
747
|
+
tracked_parent_state = self.job_manager.record_producer.get_parent_stream_state()
|
|
748
|
+
# fallback to the record level to search for the parent cursor or use the stream cursor value
|
|
749
|
+
parent_state = tracked_parent_state if tracked_parent_state else self._get_parent_state_from_record(latest_record)
|
|
737
750
|
# add parent_stream_state to `updated_state`
|
|
738
|
-
updated_state[self.
|
|
751
|
+
updated_state[self.parent_stream_name] = parent_state
|
|
752
|
+
|
|
739
753
|
return updated_state
|
|
740
754
|
|
|
755
|
+
def _get_parent_state_from_record(self, latest_record: Mapping[str, Any]) -> MutableMapping[str, Any]:
|
|
756
|
+
parent_state = latest_record.get(self.parent_stream_name, {})
|
|
757
|
+
parent_state_value = parent_state.get(self.parent_stream_cursor) if parent_state else latest_record.get(self.parent_stream_cursor)
|
|
758
|
+
parent_state[self.parent_stream_cursor] = parent_state_value
|
|
759
|
+
return parent_state
|
|
760
|
+
|
|
741
761
|
def _get_stream_cursor_value(self, stream_state: Optional[Mapping[str, Any]] = None) -> Optional[str]:
|
|
742
762
|
if stream_state:
|
|
743
763
|
return stream_state.get(self.cursor_field, self.default_state_comparison_value)
|
|
744
764
|
else:
|
|
745
765
|
return self.config.get("start_date")
|
|
746
766
|
|
|
747
|
-
def
|
|
767
|
+
def _get_stream_state_value(self, stream_state: Optional[Mapping[str, Any]] = None) -> Optional[str]:
|
|
748
768
|
if stream_state:
|
|
749
769
|
if self.parent_stream_class:
|
|
750
770
|
# get parent stream state from the stream_state object.
|
|
751
|
-
parent_state = stream_state.get(self.
|
|
771
|
+
parent_state = stream_state.get(self.parent_stream_name, {})
|
|
752
772
|
if parent_state:
|
|
753
|
-
return parent_state.get(self.
|
|
773
|
+
return parent_state.get(self.parent_stream_cursor, self.default_state_comparison_value)
|
|
754
774
|
else:
|
|
755
775
|
# use the streams cursor value, if no parent state available
|
|
756
776
|
return self._get_stream_cursor_value(stream_state)
|
|
@@ -760,9 +780,9 @@ class IncrementalShopifyGraphQlBulkStream(IncrementalShopifyStream):
|
|
|
760
780
|
else:
|
|
761
781
|
return self.config.get("start_date")
|
|
762
782
|
|
|
763
|
-
def
|
|
783
|
+
def _get_state_value(self, stream_state: Optional[Mapping[str, Any]] = None) -> Optional[Union[str, int]]:
|
|
764
784
|
if stream_state:
|
|
765
|
-
return self.
|
|
785
|
+
return self._get_stream_state_value(stream_state)
|
|
766
786
|
else:
|
|
767
787
|
# for majority of cases we fallback to start_date, otherwise.
|
|
768
788
|
return self.config.get("start_date")
|
|
@@ -785,7 +805,7 @@ class IncrementalShopifyGraphQlBulkStream(IncrementalShopifyStream):
|
|
|
785
805
|
@stream_state_cache.cache_stream_state
|
|
786
806
|
def stream_slices(self, stream_state: Optional[Mapping[str, Any]] = None, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
|
|
787
807
|
if self.filter_field:
|
|
788
|
-
state = self.
|
|
808
|
+
state = self._get_state_value(stream_state)
|
|
789
809
|
start = pdm.parse(state)
|
|
790
810
|
end = pdm.now()
|
|
791
811
|
while start < end:
|
|
@@ -7,13 +7,16 @@ import enum
|
|
|
7
7
|
import logging
|
|
8
8
|
from functools import wraps
|
|
9
9
|
from time import sleep
|
|
10
|
-
from typing import Any, Callable, Dict, List, Mapping, Optional
|
|
10
|
+
from typing import Any, Callable, Dict, Final, List, Mapping, Optional
|
|
11
11
|
|
|
12
12
|
import requests
|
|
13
13
|
from airbyte_cdk.models import FailureType
|
|
14
14
|
from airbyte_cdk.sources.streams.http.error_handlers.response_models import ErrorResolution, ResponseAction
|
|
15
15
|
from airbyte_cdk.utils import AirbyteTracedException
|
|
16
16
|
|
|
17
|
+
# default logger instance
|
|
18
|
+
LOGGER: Final[logging.Logger] = logging.getLogger("airbyte")
|
|
19
|
+
|
|
17
20
|
|
|
18
21
|
class ShopifyNonRetryableErrors:
|
|
19
22
|
"""Holds the errors classification and messaging scenarios."""
|
|
@@ -112,8 +115,6 @@ class ShopifyRateLimiter:
|
|
|
112
115
|
on_mid_load: float = 1.5
|
|
113
116
|
on_high_load: float = 5.0
|
|
114
117
|
|
|
115
|
-
logger = logging.getLogger("airbyte")
|
|
116
|
-
|
|
117
118
|
log_message_count = 0
|
|
118
119
|
log_message_frequency = 3
|
|
119
120
|
|
|
@@ -124,7 +125,7 @@ class ShopifyRateLimiter:
|
|
|
124
125
|
if ShopifyRateLimiter.log_message_count < ShopifyRateLimiter.log_message_frequency:
|
|
125
126
|
ShopifyRateLimiter.log_message_count += 1
|
|
126
127
|
else:
|
|
127
|
-
|
|
128
|
+
LOGGER.info(message)
|
|
128
129
|
ShopifyRateLimiter.log_message_count = 0
|
|
129
130
|
|
|
130
131
|
def get_response_from_args(*args) -> Optional[requests.Response]:
|
|
@@ -138,8 +139,8 @@ class ShopifyRateLimiter:
|
|
|
138
139
|
Define wait_time based on load conditions.
|
|
139
140
|
|
|
140
141
|
:: load - represents how close we are to being throttled
|
|
141
|
-
|
|
142
|
-
|
|
142
|
+
- 0.5 is half way through our allowance
|
|
143
|
+
- 1 indicates that all of the allowance is used and the api will start rejecting calls
|
|
143
144
|
:: threshold - is the % cutoff for the rate_limits/load
|
|
144
145
|
:: wait_time - time to wait between each request in seconds
|
|
145
146
|
|
|
@@ -1,157 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
-
#
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
import logging
|
|
7
|
-
from dataclasses import dataclass, field
|
|
8
|
-
from io import TextIOWrapper
|
|
9
|
-
from json import loads
|
|
10
|
-
from os import remove
|
|
11
|
-
from typing import Any, Callable, Final, Iterable, List, Mapping, MutableMapping, Optional, Union
|
|
12
|
-
|
|
13
|
-
from .exceptions import ShopifyBulkExceptions
|
|
14
|
-
from .query import ShopifyBulkQuery
|
|
15
|
-
from .tools import END_OF_FILE, BulkTools
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
@dataclass
|
|
19
|
-
class ShopifyBulkRecord:
|
|
20
|
-
query: ShopifyBulkQuery
|
|
21
|
-
|
|
22
|
-
# default buffer
|
|
23
|
-
buffer: List[MutableMapping[str, Any]] = field(init=False, default_factory=list)
|
|
24
|
-
|
|
25
|
-
# default logger
|
|
26
|
-
logger: Final[logging.Logger] = logging.getLogger("airbyte")
|
|
27
|
-
|
|
28
|
-
def __post_init__(self) -> None:
|
|
29
|
-
self.composition: Optional[Mapping[str, Any]] = self.query.record_composition
|
|
30
|
-
self.record_process_components: Optional[Callable[[MutableMapping], MutableMapping]] = self.query.record_process_components
|
|
31
|
-
self.components: List[str] = self.composition.get("record_components", []) if self.composition else []
|
|
32
|
-
# how many records composed
|
|
33
|
-
self.record_composed: int = 0
|
|
34
|
-
|
|
35
|
-
@property
|
|
36
|
-
def tools(self) -> BulkTools:
|
|
37
|
-
return BulkTools()
|
|
38
|
-
|
|
39
|
-
@staticmethod
|
|
40
|
-
def check_type(record: Mapping[str, Any], types: Union[List[str], str]) -> bool:
|
|
41
|
-
record_type = record.get("__typename")
|
|
42
|
-
if isinstance(types, list):
|
|
43
|
-
return any(record_type == t for t in types)
|
|
44
|
-
else:
|
|
45
|
-
return record_type == types
|
|
46
|
-
|
|
47
|
-
def record_new(self, record: MutableMapping[str, Any]) -> None:
|
|
48
|
-
record = self.component_prepare(record)
|
|
49
|
-
record.pop("__typename")
|
|
50
|
-
self.buffer.append(record)
|
|
51
|
-
|
|
52
|
-
def record_new_component(self, record: MutableMapping[str, Any]) -> None:
|
|
53
|
-
component = record.get("__typename")
|
|
54
|
-
record.pop("__typename")
|
|
55
|
-
# add component to its placeholder in the components list
|
|
56
|
-
self.buffer[-1]["record_components"][component].append(record)
|
|
57
|
-
|
|
58
|
-
def component_prepare(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
|
|
59
|
-
if self.components:
|
|
60
|
-
record["record_components"] = {}
|
|
61
|
-
for component in self.components:
|
|
62
|
-
record["record_components"][component] = []
|
|
63
|
-
return record
|
|
64
|
-
|
|
65
|
-
def buffer_flush(self) -> Iterable[Mapping[str, Any]]:
|
|
66
|
-
if len(self.buffer) > 0:
|
|
67
|
-
for record in self.buffer:
|
|
68
|
-
# resolve id from `str` to `int`
|
|
69
|
-
record = self.record_resolve_id(record)
|
|
70
|
-
# process record components
|
|
71
|
-
yield from self.record_process_components(record)
|
|
72
|
-
# clean the buffer
|
|
73
|
-
self.buffer.clear()
|
|
74
|
-
|
|
75
|
-
def record_compose(self, record: Mapping[str, Any]) -> Optional[Iterable[MutableMapping[str, Any]]]:
|
|
76
|
-
"""
|
|
77
|
-
Step 1: register the new record by it's `__typename`
|
|
78
|
-
Step 2: check for `components` by their `__typename` and add to the placeholder
|
|
79
|
-
Step 3: repeat until the `<END_OF_FILE>`.
|
|
80
|
-
"""
|
|
81
|
-
if self.check_type(record, self.composition.get("new_record")):
|
|
82
|
-
# emit from previous iteration, if present
|
|
83
|
-
yield from self.buffer_flush()
|
|
84
|
-
# register the record
|
|
85
|
-
self.record_new(record)
|
|
86
|
-
# components check
|
|
87
|
-
elif self.check_type(record, self.components):
|
|
88
|
-
self.record_new_component(record)
|
|
89
|
-
|
|
90
|
-
def process_line(self, jsonl_file: TextIOWrapper) -> Iterable[MutableMapping[str, Any]]:
|
|
91
|
-
# process the json lines
|
|
92
|
-
for line in jsonl_file:
|
|
93
|
-
# we exit from the loop when receive <end_of_file> (file ends)
|
|
94
|
-
if line == END_OF_FILE:
|
|
95
|
-
break
|
|
96
|
-
elif line != "":
|
|
97
|
-
yield from self.record_compose(loads(line))
|
|
98
|
-
|
|
99
|
-
# emit what's left in the buffer, typically last record
|
|
100
|
-
yield from self.buffer_flush()
|
|
101
|
-
|
|
102
|
-
def record_resolve_id(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
|
|
103
|
-
"""
|
|
104
|
-
The ids are fetched in the format of: " gid://shopify/Order/<Id> "
|
|
105
|
-
Input:
|
|
106
|
-
{ "Id": "gid://shopify/Order/19435458986123"}
|
|
107
|
-
We need to extract the actual id from the string instead.
|
|
108
|
-
Output:
|
|
109
|
-
{ "id": 19435458986123, "admin_graphql_api_id": "gid://shopify/Order/19435458986123"}
|
|
110
|
-
"""
|
|
111
|
-
# save the actual api id to the `admin_graphql_api_id`
|
|
112
|
-
# while resolving the `id` in `record_resolve_id`,
|
|
113
|
-
# we re-assign the original id like `"gid://shopify/Order/19435458986123"`,
|
|
114
|
-
# into `admin_graphql_api_id` have the ability to identify the record oigin correctly in subsequent actions.
|
|
115
|
-
# IF NOT `id` field is provided by the query results, we should return composed record `as is`.
|
|
116
|
-
id = record.get("id")
|
|
117
|
-
if id and isinstance(id, str):
|
|
118
|
-
record["admin_graphql_api_id"] = id
|
|
119
|
-
# extracting the int(id) and reassign
|
|
120
|
-
record["id"] = self.tools.resolve_str_id(id)
|
|
121
|
-
return record
|
|
122
|
-
|
|
123
|
-
def produce_records(self, filename: str) -> Iterable[MutableMapping[str, Any]]:
|
|
124
|
-
"""
|
|
125
|
-
Read the JSONL content saved from `job.job_retrieve_result()` line-by-line to avoid OOM.
|
|
126
|
-
The filename example: `bulk-4039263649981.jsonl`,
|
|
127
|
-
where `4039263649981` is the `id` of the COMPLETED BULK Jobw with `result_url`.
|
|
128
|
-
Note: typically the `filename` is taken from the `result_url` string provided in the response.
|
|
129
|
-
"""
|
|
130
|
-
|
|
131
|
-
with open(filename, "r") as jsonl_file:
|
|
132
|
-
# reset the counter
|
|
133
|
-
self.record_composed = 0
|
|
134
|
-
|
|
135
|
-
for record in self.process_line(jsonl_file):
|
|
136
|
-
yield self.tools.fields_names_to_snake_case(record)
|
|
137
|
-
self.record_composed += 1
|
|
138
|
-
|
|
139
|
-
def read_file(self, filename: str, remove_file: Optional[bool] = True) -> Iterable[Mapping[str, Any]]:
|
|
140
|
-
try:
|
|
141
|
-
# produce records from saved result
|
|
142
|
-
yield from self.produce_records(filename)
|
|
143
|
-
except Exception as e:
|
|
144
|
-
raise ShopifyBulkExceptions.BulkRecordProduceError(
|
|
145
|
-
f"An error occured while producing records from BULK Job result. Trace: {repr(e)}.",
|
|
146
|
-
)
|
|
147
|
-
finally:
|
|
148
|
-
# removing the tmp file, if requested
|
|
149
|
-
if remove_file and filename:
|
|
150
|
-
try:
|
|
151
|
-
remove(filename)
|
|
152
|
-
except Exception as e:
|
|
153
|
-
self.logger.info(f"Failed to remove the `tmp job result` file, the file doen't exist. Details: {repr(e)}.")
|
|
154
|
-
# we should pass here, if the file wasn't removed , it's either:
|
|
155
|
-
# - doesn't exist
|
|
156
|
-
# - will be dropped with the container shut down.
|
|
157
|
-
pass
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/config_migrations.py
RENAMED
|
File without changes
|
{airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/http_request.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/articles.json
RENAMED
|
File without changes
|
|
File without changes
|
{airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/blogs.json
RENAMED
|
File without changes
|
|
File without changes
|
{airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/collects.json
RENAMED
|
File without changes
|
{airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/countries.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/customers.json
RENAMED
|
File without changes
|
|
File without changes
|
{airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/disputes.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/locations.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/orders.json
RENAMED
|
File without changes
|
{airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/pages.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/products.json
RENAMED
|
File without changes
|
|
File without changes
|
{airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/shop.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/streams/streams.py
RENAMED
|
File without changes
|
|
File without changes
|