airbyte-source-shopify 3.0.8.dev202507101541__tar.gz → 3.0.9rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/PKG-INFO +3 -3
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/pyproject.toml +7 -10
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/shopify_graphql/bulk/job.py +25 -160
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/streams/base_streams.py +18 -3
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/streams/streams.py +7 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/README.md +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/__init__.py +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/auth.py +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/config_migrations.py +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/http_request.py +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/run.py +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/abandoned_checkouts.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/articles.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/balance_transactions.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/blogs.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/collections.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/collects.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/countries.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/custom_collections.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/customer_address.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/customer_journey_summary.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/customers.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/discount_codes.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/disputes.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/draft_orders.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/fulfillment_orders.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/fulfillments.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/inventory_items.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/inventory_levels.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/locations.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_articles.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_blogs.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_collections.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_customers.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_draft_orders.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_locations.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_orders.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_pages.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_product_images.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_product_variants.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_products.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_shops.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/metafield_smart_collections.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/order_agreements.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/order_refunds.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/order_risks.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/orders.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/pages.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/price_rules.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/product_images.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/product_variants.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/products.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/profile_location_groups.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/shop.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/smart_collections.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/tender_transactions.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/schemas/transactions.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/scopes.py +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/shopify_graphql/bulk/__init__.py +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/shopify_graphql/bulk/exceptions.py +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/shopify_graphql/bulk/query.py +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/shopify_graphql/bulk/record.py +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/shopify_graphql/bulk/retry.py +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/shopify_graphql/bulk/status.py +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/shopify_graphql/bulk/tools.py +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/source.py +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/spec.json +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/transform.py +0 -0
- {airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/source_shopify/utils.py +0 -0
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
2
|
Name: airbyte-source-shopify
|
|
3
|
-
Version: 3.0.
|
|
3
|
+
Version: 3.0.9rc1
|
|
4
4
|
Summary: Source CDK implementation for Shopify.
|
|
5
|
+
Home-page: https://airbyte.com
|
|
5
6
|
License: ELv2
|
|
6
7
|
Author: Airbyte
|
|
7
8
|
Author-email: contact@airbyte.io
|
|
@@ -15,7 +16,6 @@ Requires-Dist: graphql-query (>=1,<2)
|
|
|
15
16
|
Requires-Dist: pendulum (>=2.1.2,<3.0.0)
|
|
16
17
|
Requires-Dist: sgqlc (==16.3)
|
|
17
18
|
Project-URL: Documentation, https://docs.airbyte.com/integrations/sources/shopify
|
|
18
|
-
Project-URL: Homepage, https://airbyte.com
|
|
19
19
|
Project-URL: Repository, https://github.com/airbytehq/airbyte
|
|
20
20
|
Description-Content-Type: text/markdown
|
|
21
21
|
|
{airbyte_source_shopify-3.0.8.dev202507101541 → airbyte_source_shopify-3.0.9rc1}/pyproject.toml
RENAMED
|
@@ -1,24 +1,19 @@
|
|
|
1
1
|
[build-system]
|
|
2
|
-
requires = [
|
|
3
|
-
"poetry-core>=1.0.0",
|
|
4
|
-
]
|
|
2
|
+
requires = [ "poetry-core>=1.0.0",]
|
|
5
3
|
build-backend = "poetry.core.masonry.api"
|
|
6
4
|
|
|
7
5
|
[tool.poetry]
|
|
8
|
-
version = "3.0.
|
|
6
|
+
version = "3.0.9-rc.1"
|
|
9
7
|
name = "airbyte-source-shopify"
|
|
10
8
|
description = "Source CDK implementation for Shopify."
|
|
11
|
-
authors = [
|
|
12
|
-
"Airbyte <contact@airbyte.io>",
|
|
13
|
-
]
|
|
9
|
+
authors = [ "Airbyte <contact@airbyte.io>",]
|
|
14
10
|
license = "ELv2"
|
|
15
11
|
readme = "README.md"
|
|
16
12
|
documentation = "https://docs.airbyte.com/integrations/sources/shopify"
|
|
17
13
|
homepage = "https://airbyte.com"
|
|
18
14
|
repository = "https://github.com/airbytehq/airbyte"
|
|
19
|
-
packages
|
|
20
|
-
|
|
21
|
-
]
|
|
15
|
+
[[tool.poetry.packages]]
|
|
16
|
+
include = "source_shopify"
|
|
22
17
|
|
|
23
18
|
[tool.poetry.dependencies]
|
|
24
19
|
python = "^3.10,<3.12"
|
|
@@ -38,5 +33,7 @@ freezegun = "^1.4.0"
|
|
|
38
33
|
|
|
39
34
|
[tool.poe]
|
|
40
35
|
include = [
|
|
36
|
+
# Shared tasks definition file(s) can be imported here.
|
|
37
|
+
# Run `poe` or `poe --help` to see the list of available tasks.
|
|
41
38
|
"${POE_GIT_DIR}/poe-tasks/poetry-connector-tasks.toml",
|
|
42
39
|
]
|
|
@@ -5,7 +5,6 @@
|
|
|
5
5
|
from dataclasses import dataclass, field
|
|
6
6
|
from datetime import datetime
|
|
7
7
|
from enum import Enum
|
|
8
|
-
from json import loads
|
|
9
8
|
from time import sleep, time
|
|
10
9
|
from typing import Any, Final, Iterable, List, Mapping, Optional
|
|
11
10
|
|
|
@@ -66,7 +65,7 @@ class ShopifyBulkManager:
|
|
|
66
65
|
|
|
67
66
|
# currents: _job_id, _job_state, _job_created_at, _job_self_canceled
|
|
68
67
|
_job_id: Optional[str] = field(init=False, default=None)
|
|
69
|
-
_job_state:
|
|
68
|
+
_job_state: str | None = field(init=False, default=None) # this string is based on ShopifyBulkJobStatus
|
|
70
69
|
# completed and saved Bulk Job result filename
|
|
71
70
|
_job_result_filename: Optional[str] = field(init=False, default=None)
|
|
72
71
|
# date-time when the Bulk Job was created on the server
|
|
@@ -84,9 +83,7 @@ class ShopifyBulkManager:
|
|
|
84
83
|
# the flag to adjust the next slice from the checkpointed cursor vaue
|
|
85
84
|
_job_adjust_slice_from_checkpoint: bool = field(init=False, default=False)
|
|
86
85
|
# keeps the last checkpointed cursor value for supported streams
|
|
87
|
-
_job_last_checkpoint_cursor_value:
|
|
88
|
-
# stores extracted cursor from INTERNAL_SERVER_ERROR recovery (temporary storage)
|
|
89
|
-
_job_extracted_checkpoint_cursor: Optional[str] = field(init=False, default=None)
|
|
86
|
+
_job_last_checkpoint_cursor_value: str | None = field(init=False, default=None)
|
|
90
87
|
|
|
91
88
|
# expand slice factor
|
|
92
89
|
_job_size_expand_factor: int = field(init=False, default=2)
|
|
@@ -217,8 +214,6 @@ class ShopifyBulkManager:
|
|
|
217
214
|
self._log_job_msg_count = 0
|
|
218
215
|
# set the running job object count to default
|
|
219
216
|
self._job_last_rec_count = 0
|
|
220
|
-
# clear any extracted cursor from INTERNAL_SERVER_ERROR recovery
|
|
221
|
-
self._job_extracted_checkpoint_cursor = None
|
|
222
217
|
|
|
223
218
|
def _set_checkpointing(self) -> None:
|
|
224
219
|
# set the flag to adjust the next slice from the checkpointed cursor value
|
|
@@ -318,24 +313,6 @@ class ShopifyBulkManager:
|
|
|
318
313
|
# fetch the collected records from CANCELED Job on checkpointing
|
|
319
314
|
self._job_result_filename = self._job_get_result(response)
|
|
320
315
|
|
|
321
|
-
# Special handling: For FAILED jobs with INTERNAL_SERVER_ERROR, extract the last processed cursor
|
|
322
|
-
if response:
|
|
323
|
-
parsed_response = response.json().get("data", {}).get("node", {}) if response else {}
|
|
324
|
-
error_code = parsed_response.get("errorCode")
|
|
325
|
-
if error_code == "INTERNAL_SERVER_ERROR":
|
|
326
|
-
last_cursor = self._extract_last_cursor_from_partial_data(response)
|
|
327
|
-
if last_cursor:
|
|
328
|
-
# Check if this cursor would cause a collision before storing it
|
|
329
|
-
if self._checkpoint_cursor_has_collision(last_cursor):
|
|
330
|
-
# Skip cursor extraction to avoid collision
|
|
331
|
-
pass
|
|
332
|
-
else:
|
|
333
|
-
# Store the extracted cursor for later use (don't set it yet to avoid collision)
|
|
334
|
-
self._job_extracted_checkpoint_cursor = last_cursor
|
|
335
|
-
else:
|
|
336
|
-
# Not processing data due to insufficient records or checkpointing disabled
|
|
337
|
-
pass
|
|
338
|
-
|
|
339
316
|
def _job_update_state(self, response: Optional[requests.Response] = None) -> None:
|
|
340
317
|
if response:
|
|
341
318
|
self._job_state = response.json().get("data", {}).get("node", {}).get("status")
|
|
@@ -386,26 +363,7 @@ class ShopifyBulkManager:
|
|
|
386
363
|
def _on_completed_job(self, response: Optional[requests.Response] = None) -> None:
|
|
387
364
|
self._job_result_filename = self._job_get_result(response)
|
|
388
365
|
|
|
389
|
-
def _on_failed_job(self, response: requests.Response) ->
|
|
390
|
-
# Special handling for FAILED jobs with INTERNAL_SERVER_ERROR that support checkpointing
|
|
391
|
-
parsed_response = response.json().get("data", {}).get("node", {}) if response else {}
|
|
392
|
-
error_code = parsed_response.get("errorCode")
|
|
393
|
-
|
|
394
|
-
if error_code == "INTERNAL_SERVER_ERROR" and self._supports_checkpointing:
|
|
395
|
-
LOGGER.info(
|
|
396
|
-
f"Stream: `{self.http_client.name}`, BULK Job: `{self._job_id}` failed with INTERNAL_SERVER_ERROR. Waiting for partial data availability..."
|
|
397
|
-
)
|
|
398
|
-
# For INTERNAL_SERVER_ERROR specifically, wait and retry to check if partial data becomes available
|
|
399
|
-
partial_response = self._wait_for_partial_data_on_failure()
|
|
400
|
-
if partial_response:
|
|
401
|
-
# Use the updated response that may contain partialDataUrl
|
|
402
|
-
response = partial_response
|
|
403
|
-
# Update the job state with the new response to ensure _job_last_rec_count is set correctly
|
|
404
|
-
self._job_update_state(response)
|
|
405
|
-
# For INTERNAL_SERVER_ERROR with partial data, extract cursor and treat as checkpointable
|
|
406
|
-
self._job_get_checkpointed_result(response)
|
|
407
|
-
return None # Don't raise exception, we recovered the data
|
|
408
|
-
|
|
366
|
+
def _on_failed_job(self, response: requests.Response) -> AirbyteTracedException | None:
|
|
409
367
|
if not self._supports_checkpointing:
|
|
410
368
|
raise ShopifyBulkExceptions.BulkJobFailed(
|
|
411
369
|
f"The BULK Job: `{self._job_id}` exited with {self._job_state}, details: {response.text}",
|
|
@@ -415,102 +373,6 @@ class ShopifyBulkManager:
|
|
|
415
373
|
# we leverage the checkpointing in this case.
|
|
416
374
|
self._job_get_checkpointed_result(response)
|
|
417
375
|
|
|
418
|
-
def _wait_for_partial_data_on_failure(self) -> Optional[requests.Response]:
|
|
419
|
-
"""
|
|
420
|
-
Wait for partial data to become available when a BULK job fails with INTERNAL_SERVER_ERROR.
|
|
421
|
-
|
|
422
|
-
This method is specifically designed for INTERNAL_SERVER_ERROR cases where
|
|
423
|
-
Shopify's BULK API may make partial data available (via partialDataUrl)
|
|
424
|
-
after a short wait, even though the job initially failed.
|
|
425
|
-
|
|
426
|
-
Returns:
|
|
427
|
-
Optional[requests.Response]: Updated response with potential partialDataUrl, or None if no data
|
|
428
|
-
"""
|
|
429
|
-
max_wait_attempts = 10 # Maximum number of wait attempts
|
|
430
|
-
wait_interval = 10 # Wait 10 seconds between checks
|
|
431
|
-
|
|
432
|
-
for attempt in range(max_wait_attempts):
|
|
433
|
-
sleep(wait_interval)
|
|
434
|
-
|
|
435
|
-
# Check job status again to see if partial data is now available
|
|
436
|
-
try:
|
|
437
|
-
_, response = self.http_client.send_request(
|
|
438
|
-
http_method="POST",
|
|
439
|
-
url=self.base_url,
|
|
440
|
-
json={"query": ShopifyBulkTemplates.status(self._job_id)},
|
|
441
|
-
request_kwargs={},
|
|
442
|
-
)
|
|
443
|
-
|
|
444
|
-
parsed_response = response.json().get("data", {}).get("node", {}) if response else {}
|
|
445
|
-
partial_data_url = parsed_response.get("partialDataUrl")
|
|
446
|
-
object_count = parsed_response.get("objectCount", "0")
|
|
447
|
-
|
|
448
|
-
# Only stop waiting if we actually have a partialDataUrl - objectCount alone is not sufficient
|
|
449
|
-
if partial_data_url and int(object_count) > 0:
|
|
450
|
-
LOGGER.info(f"Stream: `{self.http_client.name}`, partial data available after wait. Object count: {object_count}")
|
|
451
|
-
return response
|
|
452
|
-
elif int(object_count) > 0:
|
|
453
|
-
# objectCount available but no partialDataUrl yet - continue waiting
|
|
454
|
-
continue
|
|
455
|
-
|
|
456
|
-
except Exception as e:
|
|
457
|
-
# Error during partial data check - continue waiting
|
|
458
|
-
continue
|
|
459
|
-
|
|
460
|
-
LOGGER.warning(f"Stream: `{self.http_client.name}`, no partial data became available after {max_wait_attempts} attempts")
|
|
461
|
-
return None
|
|
462
|
-
|
|
463
|
-
def _extract_last_cursor_from_partial_data(self, response: Optional[requests.Response]) -> Optional[str]:
|
|
464
|
-
"""
|
|
465
|
-
Extract the last processed cursor value from partial data for INTERNAL_SERVER_ERROR recovery.
|
|
466
|
-
|
|
467
|
-
This method retrieves partial data from a failed INTERNAL_SERVER_ERROR job and extracts
|
|
468
|
-
the updatedAt value of the last record, which can be used to resume processing from that point.
|
|
469
|
-
Only used in INTERNAL_SERVER_ERROR scenarios with checkpointing support.
|
|
470
|
-
|
|
471
|
-
Args:
|
|
472
|
-
response: The response containing partial data information
|
|
473
|
-
|
|
474
|
-
Returns:
|
|
475
|
-
Optional[str]: The cursor value of the last processed record, or None if unavailable
|
|
476
|
-
"""
|
|
477
|
-
if not response:
|
|
478
|
-
return None
|
|
479
|
-
|
|
480
|
-
try:
|
|
481
|
-
parsed_response = response.json().get("data", {}).get("node", {})
|
|
482
|
-
partial_data_url = parsed_response.get("partialDataUrl")
|
|
483
|
-
|
|
484
|
-
if not partial_data_url:
|
|
485
|
-
return None
|
|
486
|
-
|
|
487
|
-
# Download the partial data
|
|
488
|
-
_, partial_response = self.http_client.send_request(http_method="GET", url=partial_data_url, request_kwargs={"stream": True})
|
|
489
|
-
partial_response.raise_for_status()
|
|
490
|
-
|
|
491
|
-
last_record = None
|
|
492
|
-
# Read through the JSONL data to find the last record
|
|
493
|
-
for line in partial_response.iter_lines(decode_unicode=True):
|
|
494
|
-
if line and line.strip() and line.strip() != END_OF_FILE:
|
|
495
|
-
try:
|
|
496
|
-
record = loads(line)
|
|
497
|
-
# Look for the main record types (Order, Product, etc.)
|
|
498
|
-
if record.get("__typename") in ["Order", "Product", "Customer", "FulfillmentOrder"]:
|
|
499
|
-
last_record = record
|
|
500
|
-
except Exception:
|
|
501
|
-
continue
|
|
502
|
-
|
|
503
|
-
# Extract the updatedAt cursor from the last record
|
|
504
|
-
if last_record and "updatedAt" in last_record:
|
|
505
|
-
cursor_value = last_record["updatedAt"]
|
|
506
|
-
return cursor_value
|
|
507
|
-
|
|
508
|
-
except Exception as e:
|
|
509
|
-
# Failed to extract cursor from partial data
|
|
510
|
-
pass
|
|
511
|
-
|
|
512
|
-
return None
|
|
513
|
-
|
|
514
376
|
def _on_timeout_job(self, **kwargs) -> AirbyteTracedException:
|
|
515
377
|
raise ShopifyBulkExceptions.BulkJobTimout(
|
|
516
378
|
f"The BULK Job: `{self._job_id}` exited with {self._job_state}, please reduce the `GraphQL BULK Date Range in Days` in SOURCES > Your Shopify Source > SETTINGS.",
|
|
@@ -665,42 +527,45 @@ class ShopifyBulkManager:
|
|
|
665
527
|
step = self._job_size if self._job_size else self._job_size_min
|
|
666
528
|
return slice_start.add(days=step)
|
|
667
529
|
|
|
668
|
-
def _adjust_slice_end(
|
|
530
|
+
def _adjust_slice_end(
|
|
531
|
+
self, slice_end: datetime, checkpointed_cursor: Optional[str] = None, filter_checkpointed_cursor: Optional[str] = None
|
|
532
|
+
) -> datetime:
|
|
669
533
|
"""
|
|
670
|
-
Choose between the existing `slice_end` value or `checkpointed_cursor` value, if provided.
|
|
534
|
+
Choose between the existing `slice_end` value or `checkpointed_cursor` value or `filter_checkpointed_cursor` value, if provided.
|
|
671
535
|
|
|
672
536
|
Optionally: raises the `transient` error if the checkpoint collision occurs.
|
|
537
|
+
|
|
538
|
+
Note: filter_checkpointed_cursor is only used when cursor field is ID for streams like Customer Address etc.
|
|
539
|
+
This method should return a datetime from last checkpointed value to adjust slice end, when cursor value is ID (int type)
|
|
540
|
+
method gets end datetime from filter_checkpointed_cursor, which is value from filter field from last record.
|
|
541
|
+
See https://github.com/airbytehq/oncall/issues/9052 for more details.
|
|
673
542
|
"""
|
|
674
543
|
|
|
675
544
|
if checkpointed_cursor:
|
|
676
|
-
# Check for collision and provide more context in the error
|
|
677
545
|
if self._checkpoint_cursor_has_collision(checkpointed_cursor):
|
|
678
|
-
# For INTERNAL_SERVER_ERROR recovery, if the cursor is the same, we might need to skip ahead slightly
|
|
679
|
-
# This can happen if the failure occurred right at the boundary of what was already processed
|
|
680
|
-
if hasattr(self, "_job_extracted_checkpoint_cursor") and self._job_extracted_checkpoint_cursor == checkpointed_cursor:
|
|
681
|
-
pass # Collision from INTERNAL_SERVER_ERROR recovery at boundary
|
|
682
|
-
|
|
683
546
|
raise ShopifyBulkExceptions.BulkJobCheckpointCollisionError(
|
|
684
|
-
f"The stream: `{self.http_client.name}` checkpoint collision is detected.
|
|
547
|
+
f"The stream: `{self.http_client.name}` checkpoint collision is detected. Try to increase the `BULK Job checkpoint (rows collected)` to the bigger value. The stream will be synced again during the next sync attempt."
|
|
685
548
|
)
|
|
686
549
|
# set the checkpointed cursor value
|
|
687
550
|
self._set_last_checkpoint_cursor_value(checkpointed_cursor)
|
|
688
|
-
|
|
551
|
+
if isinstance(checkpointed_cursor, str):
|
|
552
|
+
return pdm.parse(checkpointed_cursor)
|
|
553
|
+
if isinstance(checkpointed_cursor, int):
|
|
554
|
+
return pdm.parse(filter_checkpointed_cursor)
|
|
689
555
|
|
|
690
556
|
return slice_end
|
|
691
557
|
|
|
692
|
-
def get_adjusted_job_end(
|
|
558
|
+
def get_adjusted_job_end(
|
|
559
|
+
self,
|
|
560
|
+
slice_start: datetime,
|
|
561
|
+
slice_end: datetime,
|
|
562
|
+
checkpointed_cursor: Optional[str] = None,
|
|
563
|
+
filter_checkpointed_cursor: Optional[str] = None,
|
|
564
|
+
) -> datetime:
|
|
693
565
|
if self._job_adjust_slice_from_checkpoint:
|
|
694
566
|
# set the checkpointing to default, before the next slice is emitted, to avoid inf.loop
|
|
695
567
|
self._reset_checkpointing()
|
|
696
|
-
|
|
697
|
-
if self._job_extracted_checkpoint_cursor:
|
|
698
|
-
extracted_cursor = self._job_extracted_checkpoint_cursor
|
|
699
|
-
self._job_extracted_checkpoint_cursor = None
|
|
700
|
-
cursor_to_use = extracted_cursor
|
|
701
|
-
else:
|
|
702
|
-
cursor_to_use = checkpointed_cursor or self._job_last_checkpoint_cursor_value
|
|
703
|
-
return self._adjust_slice_end(slice_end, cursor_to_use)
|
|
568
|
+
return self._adjust_slice_end(slice_end, checkpointed_cursor, filter_checkpointed_cursor)
|
|
704
569
|
|
|
705
570
|
if self._is_long_running_job:
|
|
706
571
|
self._job_size_reduce_next()
|
|
@@ -178,6 +178,11 @@ class IncrementalShopifyStream(ShopifyStream, ABC):
|
|
|
178
178
|
# Setting the check point interval to the limit of the records output
|
|
179
179
|
state_checkpoint_interval = 250
|
|
180
180
|
|
|
181
|
+
def __init__(self, config: Dict):
|
|
182
|
+
super().__init__(config)
|
|
183
|
+
# _filter_checkpointed_cursor used to checkpoint streams with cursor field - ID in job.get_adjusted_job_end
|
|
184
|
+
self._filter_checkpointed_cursor = None
|
|
185
|
+
|
|
181
186
|
@property
|
|
182
187
|
def filter_by_state_checkpoint(self) -> bool:
|
|
183
188
|
"""
|
|
@@ -216,7 +221,12 @@ class IncrementalShopifyStream(ShopifyStream, ABC):
|
|
|
216
221
|
params[self.filter_field] = stream_state.get(self.cursor_field)
|
|
217
222
|
return params
|
|
218
223
|
|
|
219
|
-
def track_checkpoint_cursor(self, record_value: Union[str, int]) -> None:
|
|
224
|
+
def track_checkpoint_cursor(self, record_value: Union[str, int], filter_record_value: Optional[str] = None) -> None:
|
|
225
|
+
"""
|
|
226
|
+
Tracks _checkpoint_cursor value (values from cursor field) and _filter_checkpointed_cursor value (value from filter field).
|
|
227
|
+
_filter_checkpointed_cursor value is only used when cursor field is ID for streams like Customer Address etc.
|
|
228
|
+
When after canceled/failed job source tries to adjust stream slice (see ShopifyBulkManager._adjust_slice_end()).
|
|
229
|
+
"""
|
|
220
230
|
if self.filter_by_state_checkpoint:
|
|
221
231
|
# set checkpoint cursor
|
|
222
232
|
if not self._checkpoint_cursor:
|
|
@@ -225,6 +235,10 @@ class IncrementalShopifyStream(ShopifyStream, ABC):
|
|
|
225
235
|
if str(record_value) >= str(self._checkpoint_cursor):
|
|
226
236
|
self._checkpoint_cursor = record_value
|
|
227
237
|
|
|
238
|
+
if filter_record_value:
|
|
239
|
+
if not self._filter_checkpointed_cursor or str(filter_record_value) >= str(self._filter_checkpointed_cursor):
|
|
240
|
+
self._filter_checkpointed_cursor = filter_record_value
|
|
241
|
+
|
|
228
242
|
def should_checkpoint(self, index: int) -> bool:
|
|
229
243
|
return self.filter_by_state_checkpoint and index >= self.state_checkpoint_interval
|
|
230
244
|
|
|
@@ -242,7 +256,8 @@ class IncrementalShopifyStream(ShopifyStream, ABC):
|
|
|
242
256
|
for index, record in enumerate(records_slice, 1):
|
|
243
257
|
if self.cursor_field in record:
|
|
244
258
|
record_value = record.get(self.cursor_field, self.default_state_comparison_value)
|
|
245
|
-
self.
|
|
259
|
+
filter_record_value = record.get(self.filter_field) if self.filter_field else None
|
|
260
|
+
self.track_checkpoint_cursor(record_value, filter_record_value)
|
|
246
261
|
if record_value:
|
|
247
262
|
if record_value >= state_value:
|
|
248
263
|
yield record
|
|
@@ -815,7 +830,7 @@ class IncrementalShopifyGraphQlBulkStream(IncrementalShopifyStream):
|
|
|
815
830
|
self.emit_slice_message(start, slice_end)
|
|
816
831
|
yield {"start": start.to_rfc3339_string(), "end": slice_end.to_rfc3339_string()}
|
|
817
832
|
# increment the end of the slice or reduce the next slice
|
|
818
|
-
start = self.job_manager.get_adjusted_job_end(start, slice_end, self._checkpoint_cursor)
|
|
833
|
+
start = self.job_manager.get_adjusted_job_end(start, slice_end, self._checkpoint_cursor, self._filter_checkpointed_cursor)
|
|
819
834
|
else:
|
|
820
835
|
# for the streams that don't support filtering
|
|
821
836
|
yield {}
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
import logging
|
|
7
|
+
import sys
|
|
7
8
|
from typing import Any, Iterable, Mapping, MutableMapping, Optional
|
|
8
9
|
|
|
9
10
|
import requests
|
|
@@ -138,6 +139,12 @@ class MetafieldProducts(IncrementalShopifyGraphQlBulkStream):
|
|
|
138
139
|
parent_stream_class = Products
|
|
139
140
|
bulk_query: MetafieldProduct = MetafieldProduct
|
|
140
141
|
|
|
142
|
+
state_checkpoint_interval = sys.maxsize
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def filter_by_state_checkpoint(self) -> bool:
|
|
146
|
+
return True
|
|
147
|
+
|
|
141
148
|
|
|
142
149
|
class ProductImages(IncrementalShopifyGraphQlBulkStream):
|
|
143
150
|
parent_stream_class = Products
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|