airbyte-source-shopify 2.4.14.dev202407181247__py3-none-any.whl → 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {airbyte_source_shopify-2.4.14.dev202407181247.dist-info → airbyte_source_shopify-3.1.0.dist-info}/METADATA +4 -4
- {airbyte_source_shopify-2.4.14.dev202407181247.dist-info → airbyte_source_shopify-3.1.0.dist-info}/RECORD +25 -27
- {airbyte_source_shopify-2.4.14.dev202407181247.dist-info → airbyte_source_shopify-3.1.0.dist-info}/WHEEL +1 -1
- source_shopify/auth.py +0 -1
- source_shopify/config_migrations.py +4 -1
- source_shopify/http_request.py +4 -2
- source_shopify/schemas/countries.json +7 -19
- source_shopify/schemas/customer_journey_summary.json +228 -148
- source_shopify/schemas/deleted_products.json +27 -0
- source_shopify/schemas/orders.json +38 -0
- source_shopify/schemas/product_variants.json +26 -8
- source_shopify/schemas/profile_location_groups.json +10 -0
- source_shopify/scopes.py +7 -6
- source_shopify/shopify_graphql/bulk/exceptions.py +6 -1
- source_shopify/shopify_graphql/bulk/job.py +173 -65
- source_shopify/shopify_graphql/bulk/query.py +440 -88
- source_shopify/shopify_graphql/bulk/record.py +260 -29
- source_shopify/shopify_graphql/bulk/retry.py +12 -12
- source_shopify/shopify_graphql/bulk/tools.py +17 -2
- source_shopify/source.py +6 -10
- source_shopify/spec.json +11 -5
- source_shopify/streams/base_streams.py +181 -54
- source_shopify/streams/streams.py +211 -58
- source_shopify/utils.py +47 -12
- source_shopify/schemas/customer_saved_search.json +0 -32
- source_shopify/schemas/products_graph_ql.json +0 -123
- source_shopify/shopify_graphql/graphql.py +0 -64
- source_shopify/shopify_graphql/schema.py +0 -29442
- {airbyte_source_shopify-2.4.14.dev202407181247.dist-info → airbyte_source_shopify-3.1.0.dist-info}/entry_points.txt +0 -0
|
@@ -3,12 +3,14 @@
|
|
|
3
3
|
#
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
import logging
|
|
7
6
|
from dataclasses import dataclass, field
|
|
7
|
+
from functools import cached_property
|
|
8
8
|
from io import TextIOWrapper
|
|
9
9
|
from json import loads
|
|
10
10
|
from os import remove
|
|
11
|
-
from typing import Any, Callable,
|
|
11
|
+
from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional, Union
|
|
12
|
+
|
|
13
|
+
from source_shopify.utils import LOGGER
|
|
12
14
|
|
|
13
15
|
from .exceptions import ShopifyBulkExceptions
|
|
14
16
|
from .query import ShopifyBulkQuery
|
|
@@ -17,43 +19,194 @@ from .tools import END_OF_FILE, BulkTools
|
|
|
17
19
|
|
|
18
20
|
@dataclass
|
|
19
21
|
class ShopifyBulkRecord:
|
|
22
|
+
"""
|
|
23
|
+
ShopifyBulkRecord is a class designed to handle the processing of bulk records from Shopify's GraphQL API.
|
|
24
|
+
|
|
25
|
+
Attributes:
|
|
26
|
+
query (ShopifyBulkQuery): The query object associated with the bulk record.
|
|
27
|
+
parent_stream_name (Optional[str]): The name of the parent stream, if any.
|
|
28
|
+
parent_stream_cursor (Optional[str]): The cursor of the parent stream, if any.
|
|
29
|
+
buffer (List[MutableMapping[str, Any]]): A buffer to store records before processing.
|
|
30
|
+
composition (Optional[Mapping[str, Any]]): The composition of the record, derived from the query.
|
|
31
|
+
record_process_components (Optional[Callable[[MutableMapping], MutableMapping]]): A callable to process record components.
|
|
32
|
+
components (List[str]): A list of components derived from the record composition.
|
|
33
|
+
_parent_stream_cursor_value (Optional[str | int]): The current value of the parent stream cursor.
|
|
34
|
+
record_composed (int): The count of records composed.
|
|
35
|
+
|
|
36
|
+
Methods:
|
|
37
|
+
__post_init__(): Initializes additional attributes after the object is created.
|
|
38
|
+
tools(): Returns an instance of BulkTools.
|
|
39
|
+
has_parent_stream(): Checks if the record has a parent stream.
|
|
40
|
+
parent_cursor_key(): Returns the key for the parent cursor if a parent stream exists.
|
|
41
|
+
check_type(record, types): Checks if the record's type matches the given type(s).
|
|
42
|
+
_parse_parent_state_value(value): Parses the parent state value and converts it to the appropriate format.
|
|
43
|
+
_set_parent_state_value(value): Sets the parent state value by parsing the provided value and updating the parent stream cursor value.
|
|
44
|
+
_track_parent_cursor(record): Tracks the cursor value from the parent stream if it exists and updates the parent state.
|
|
45
|
+
get_parent_stream_state(): Retrieves the state of the parent stream if it exists.
|
|
46
|
+
record_new(record): Processes a new record by preparing it, removing the "__typename" key, and appending it to the buffer.
|
|
47
|
+
record_new_component(record): Processes a new record by extracting its component type and adding it to the appropriate placeholder in the components list.
|
|
48
|
+
component_prepare(record): Prepares the given record by initializing a "record_components" dictionary.
|
|
49
|
+
buffer_flush(): Flushes the buffer by processing each record in the buffer.
|
|
50
|
+
record_compose(record): Processes a given record and yields buffered records if certain conditions are met.
|
|
51
|
+
process_line(jsonl_file): Processes a JSON Lines (jsonl) file and yields records.
|
|
52
|
+
record_resolve_id(record): Resolves and updates the 'id' field in the given record.
|
|
53
|
+
produce_records(filename): Reads the JSONL content saved from `job.job_retrieve_result()` line-by-line to avoid OOM.
|
|
54
|
+
read_file(filename, remove_file): Reads a file and produces records from it.
|
|
55
|
+
"""
|
|
56
|
+
|
|
20
57
|
query: ShopifyBulkQuery
|
|
58
|
+
parent_stream_name: Optional[str] = None
|
|
59
|
+
parent_stream_cursor: Optional[str] = None
|
|
21
60
|
|
|
22
61
|
# default buffer
|
|
23
62
|
buffer: List[MutableMapping[str, Any]] = field(init=False, default_factory=list)
|
|
24
63
|
|
|
25
|
-
# default logger
|
|
26
|
-
logger: Final[logging.Logger] = logging.getLogger("airbyte")
|
|
27
|
-
|
|
28
64
|
def __post_init__(self) -> None:
|
|
29
65
|
self.composition: Optional[Mapping[str, Any]] = self.query.record_composition
|
|
30
66
|
self.record_process_components: Optional[Callable[[MutableMapping], MutableMapping]] = self.query.record_process_components
|
|
31
67
|
self.components: List[str] = self.composition.get("record_components", []) if self.composition else []
|
|
68
|
+
# We track the parent state for BULK substreams outside of the main CDK methods,
|
|
69
|
+
# to be able to update the moving parent state when there are no substream records to emit.
|
|
70
|
+
self._parent_stream_cursor_value: Optional[str | int] = None
|
|
71
|
+
# how many records composed
|
|
72
|
+
self.record_composed: int = 0
|
|
32
73
|
|
|
33
|
-
@
|
|
74
|
+
@cached_property
|
|
34
75
|
def tools(self) -> BulkTools:
|
|
35
76
|
return BulkTools()
|
|
36
77
|
|
|
78
|
+
@cached_property
|
|
79
|
+
def has_parent_stream(self) -> bool:
|
|
80
|
+
return True if self.parent_stream_name and self.parent_stream_cursor else False
|
|
81
|
+
|
|
82
|
+
@cached_property
|
|
83
|
+
def parent_cursor_key(self) -> Optional[str]:
|
|
84
|
+
if self.has_parent_stream:
|
|
85
|
+
return f"{self.parent_stream_name}_{self.parent_stream_cursor}"
|
|
86
|
+
|
|
37
87
|
@staticmethod
|
|
38
88
|
def check_type(record: Mapping[str, Any], types: Union[List[str], str]) -> bool:
|
|
89
|
+
"""
|
|
90
|
+
Check if the record's type matches the given type(s).
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
record (Mapping[str, Any]): The record to check, expected to have a "__typename" key.
|
|
94
|
+
types (Union[List[str], str]): The type(s) to check against. Can be a single type (str) or a list of types (List[str]).
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
bool: True if the record's type matches one of the given types, False otherwise.
|
|
98
|
+
"""
|
|
99
|
+
|
|
39
100
|
record_type = record.get("__typename")
|
|
40
101
|
if isinstance(types, list):
|
|
41
102
|
return any(record_type == t for t in types)
|
|
42
103
|
else:
|
|
43
104
|
return record_type == types
|
|
44
105
|
|
|
106
|
+
def _parse_parent_state_value(self, value: str | int) -> str | int:
|
|
107
|
+
"""
|
|
108
|
+
Parses the parent state value and converts it to the appropriate format.
|
|
109
|
+
|
|
110
|
+
If the value is a string, it converts it to RFC 3339 datetime format using the `_datetime_str_to_rfc3339` method.
|
|
111
|
+
If the value is an integer, it returns the value as is.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
value (str | int): The parent state value to be parsed.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
str | int: The parsed parent state value in the appropriate format.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
if isinstance(value, str):
|
|
121
|
+
return self.tools._datetime_str_to_rfc3339(value)
|
|
122
|
+
elif isinstance(value, int):
|
|
123
|
+
return value
|
|
124
|
+
|
|
125
|
+
def _set_parent_state_value(self, value: str | int) -> None:
|
|
126
|
+
"""
|
|
127
|
+
Sets the parent state value by parsing the provided value and updating the
|
|
128
|
+
parent stream cursor value. If the parent stream cursor value is already set,
|
|
129
|
+
it updates it to the maximum of the current and parsed values.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
value (str | int): The value to be parsed and set as the parent state value.
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
parsed_value = self._parse_parent_state_value(value)
|
|
136
|
+
if not self._parent_stream_cursor_value:
|
|
137
|
+
self._parent_stream_cursor_value = parsed_value
|
|
138
|
+
else:
|
|
139
|
+
self._parent_stream_cursor_value = max(self._parent_stream_cursor_value, parsed_value)
|
|
140
|
+
|
|
141
|
+
def _track_parent_cursor(self, record: MutableMapping[str, Any]) -> None:
|
|
142
|
+
"""
|
|
143
|
+
Tracks the cursor value from the parent stream if it exists and updates the parent state.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
record (MutableMapping[str, Any]): The record from which to extract the parent cursor value.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
None
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
if self.has_parent_stream:
|
|
153
|
+
cursor_value: Optional[str | int] = record.get(self.parent_cursor_key, None)
|
|
154
|
+
if cursor_value:
|
|
155
|
+
self._set_parent_state_value(cursor_value)
|
|
156
|
+
|
|
157
|
+
def get_parent_stream_state(self) -> Optional[Union[str, Mapping[str, Any]]]:
|
|
158
|
+
"""
|
|
159
|
+
Retrieve the state of the parent stream if it exists.
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
Optional[Union[str, Mapping[str, Any]]]: A dictionary containing the parent stream cursor and its value
|
|
163
|
+
if the parent stream exists and has a cursor value, otherwise None.
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
if self.has_parent_stream and self._parent_stream_cursor_value:
|
|
167
|
+
return {self.parent_stream_cursor: self._parent_stream_cursor_value}
|
|
168
|
+
|
|
45
169
|
def record_new(self, record: MutableMapping[str, Any]) -> None:
|
|
170
|
+
"""
|
|
171
|
+
Processes a new record by preparing it, removing the "__typename" key, and appending it to the buffer.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
record (MutableMapping[str, Any]): The record to be processed.
|
|
175
|
+
"""
|
|
176
|
+
|
|
46
177
|
record = self.component_prepare(record)
|
|
47
178
|
record.pop("__typename")
|
|
48
179
|
self.buffer.append(record)
|
|
49
180
|
|
|
50
181
|
def record_new_component(self, record: MutableMapping[str, Any]) -> None:
|
|
182
|
+
"""
|
|
183
|
+
Processes a new record by extracting its component type and adding it to the appropriate
|
|
184
|
+
placeholder in the components list.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
record (MutableMapping[str, Any]): The record to be processed.
|
|
188
|
+
It is expected to contain a "__typename" key which indicates the component type.
|
|
189
|
+
"""
|
|
190
|
+
|
|
51
191
|
component = record.get("__typename")
|
|
52
192
|
record.pop("__typename")
|
|
53
193
|
# add component to its placeholder in the components list
|
|
54
194
|
self.buffer[-1]["record_components"][component].append(record)
|
|
55
195
|
|
|
56
196
|
def component_prepare(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
|
|
197
|
+
"""
|
|
198
|
+
Prepares the given record by initializing a "record_components" dictionary.
|
|
199
|
+
|
|
200
|
+
If the instance has components, this method will add a "record_components" key to the record,
|
|
201
|
+
with each component as a key and an empty list as its value.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
record (MutableMapping[str, Any]): The record to be prepared.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
MutableMapping[str, Any]: The updated record with initialized "record_components".
|
|
208
|
+
"""
|
|
209
|
+
|
|
57
210
|
if self.components:
|
|
58
211
|
record["record_components"] = {}
|
|
59
212
|
for component in self.components:
|
|
@@ -61,8 +214,24 @@ class ShopifyBulkRecord:
|
|
|
61
214
|
return record
|
|
62
215
|
|
|
63
216
|
def buffer_flush(self) -> Iterable[Mapping[str, Any]]:
|
|
217
|
+
"""
|
|
218
|
+
Flushes the buffer by processing each record in the buffer.
|
|
219
|
+
|
|
220
|
+
For each record in the buffer:
|
|
221
|
+
- Tracks the parent state using `_track_parent_cursor`.
|
|
222
|
+
- Resolves the record ID from `str` to `int` using `record_resolve_id`.
|
|
223
|
+
- Processes record components using `record_process_components`.
|
|
224
|
+
|
|
225
|
+
Yields:
|
|
226
|
+
Iterable[Mapping[str, Any]]: Processed records from the buffer.
|
|
227
|
+
|
|
228
|
+
After processing, the buffer is cleared.
|
|
229
|
+
"""
|
|
230
|
+
|
|
64
231
|
if len(self.buffer) > 0:
|
|
65
232
|
for record in self.buffer:
|
|
233
|
+
# track the parent state
|
|
234
|
+
self._track_parent_cursor(record)
|
|
66
235
|
# resolve id from `str` to `int`
|
|
67
236
|
record = self.record_resolve_id(record)
|
|
68
237
|
# process record components
|
|
@@ -72,10 +241,25 @@ class ShopifyBulkRecord:
|
|
|
72
241
|
|
|
73
242
|
def record_compose(self, record: Mapping[str, Any]) -> Optional[Iterable[MutableMapping[str, Any]]]:
|
|
74
243
|
"""
|
|
244
|
+
Processes a given record and yields buffered records if certain conditions are met.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
record (Mapping[str, Any]): The record to be processed.
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
Optional[Iterable[MutableMapping[str, Any]]]: An iterable of buffered records if conditions are met, otherwise None.
|
|
251
|
+
|
|
252
|
+
The method performs the following steps:
|
|
253
|
+
1. Checks if the record matches the type specified in the "new_record" composition.
|
|
254
|
+
- If it matches, it yields any buffered records from previous iterations and registers the new record.
|
|
255
|
+
2. Checks if the record matches any of the specified components.
|
|
256
|
+
- If it matches, it registers the new component record.
|
|
257
|
+
|
|
75
258
|
Step 1: register the new record by it's `__typename`
|
|
76
259
|
Step 2: check for `components` by their `__typename` and add to the placeholder
|
|
77
260
|
Step 3: repeat until the `<END_OF_FILE>`.
|
|
78
261
|
"""
|
|
262
|
+
|
|
79
263
|
if self.check_type(record, self.composition.get("new_record")):
|
|
80
264
|
# emit from previous iteration, if present
|
|
81
265
|
yield from self.buffer_flush()
|
|
@@ -86,9 +270,21 @@ class ShopifyBulkRecord:
|
|
|
86
270
|
self.record_new_component(record)
|
|
87
271
|
|
|
88
272
|
def process_line(self, jsonl_file: TextIOWrapper) -> Iterable[MutableMapping[str, Any]]:
|
|
89
|
-
|
|
273
|
+
"""
|
|
274
|
+
Processes a JSON Lines (jsonl) file and yields records.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
jsonl_file (TextIOWrapper): A file-like object containing JSON Lines data.
|
|
278
|
+
|
|
279
|
+
Yields:
|
|
280
|
+
Iterable[MutableMapping[str, Any]]: An iterable of dictionaries representing the processed records.
|
|
281
|
+
|
|
282
|
+
The method reads each line from the provided jsonl_file. It exits the loop when it encounters the <end_of_file> marker.
|
|
283
|
+
For non-empty lines, it parses the JSON content and yields the resulting records. Finally, it emits any remaining
|
|
284
|
+
records in the buffer.
|
|
285
|
+
"""
|
|
286
|
+
|
|
90
287
|
for line in jsonl_file:
|
|
91
|
-
# we exit from the loop when receive <end_of_file> (file ends)
|
|
92
288
|
if line == END_OF_FILE:
|
|
93
289
|
break
|
|
94
290
|
elif line != "":
|
|
@@ -99,38 +295,76 @@ class ShopifyBulkRecord:
|
|
|
99
295
|
|
|
100
296
|
def record_resolve_id(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
|
|
101
297
|
"""
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
298
|
+
Resolves and updates the 'id' field in the given record.
|
|
299
|
+
|
|
300
|
+
This method extracts the 'id' from the record, checks if it is a string,
|
|
301
|
+
and if so, assigns it to 'admin_graphql_api_id' in the record. It then
|
|
302
|
+
resolves the string 'id' to an integer using the 'resolve_str_id' method
|
|
303
|
+
from the 'tools' attribute and updates the 'id' field in the record.
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
record (MutableMapping[str, Any]): The record containing the 'id' field to be resolved.
|
|
307
|
+
Example:
|
|
308
|
+
{ "Id": "gid://shopify/Order/19435458986123"}
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
MutableMapping[str, Any]: The updated record with the resolved 'id' field.
|
|
312
|
+
Example:
|
|
313
|
+
{ "id": 19435458986123, "admin_graphql_api_id": "gid://shopify/Order/19435458986123"}
|
|
314
|
+
"""
|
|
315
|
+
|
|
114
316
|
id = record.get("id")
|
|
115
317
|
if id and isinstance(id, str):
|
|
116
318
|
record["admin_graphql_api_id"] = id
|
|
117
|
-
# extracting the int(id) and reassign
|
|
118
319
|
record["id"] = self.tools.resolve_str_id(id)
|
|
119
320
|
return record
|
|
120
321
|
|
|
121
322
|
def produce_records(self, filename: str) -> Iterable[MutableMapping[str, Any]]:
|
|
122
323
|
"""
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
324
|
+
Produce records from a JSON Lines (jsonl) file.
|
|
325
|
+
|
|
326
|
+
This method reads a JSON Lines file, processes each line, converts the field names to snake_case,
|
|
327
|
+
and yields each processed record. It also keeps track of the number of records processed.
|
|
328
|
+
|
|
329
|
+
Args:
|
|
330
|
+
filename (str): The path to the JSON Lines file.
|
|
331
|
+
|
|
332
|
+
Yields:
|
|
333
|
+
MutableMapping[str, Any]: A dictionary representing a processed record with field names in snake_case.
|
|
127
334
|
"""
|
|
128
335
|
|
|
129
336
|
with open(filename, "r") as jsonl_file:
|
|
337
|
+
# reset the counter
|
|
338
|
+
self.record_composed = 0
|
|
339
|
+
|
|
130
340
|
for record in self.process_line(jsonl_file):
|
|
131
341
|
yield self.tools.fields_names_to_snake_case(record)
|
|
342
|
+
self.record_composed += 1
|
|
132
343
|
|
|
133
344
|
def read_file(self, filename: str, remove_file: Optional[bool] = True) -> Iterable[Mapping[str, Any]]:
|
|
345
|
+
"""
|
|
346
|
+
Read the JSONL content saved from `job.job_retrieve_result()` line-by-line to avoid OOM.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
filename (str): The name of the file to read.
|
|
350
|
+
remove_file (Optional[bool]): Flag indicating whether to remove the file after reading. Defaults to True.
|
|
351
|
+
|
|
352
|
+
Example:
|
|
353
|
+
Note: typically the `filename` is taken from the `result_url` string provided in the response.
|
|
354
|
+
|
|
355
|
+
`bulk-4039263649981.jsonl` :
|
|
356
|
+
- the `4039263649981` is the `id` of the COMPLETED BULK Jobw with `result_url`
|
|
357
|
+
|
|
358
|
+
Yields:
|
|
359
|
+
Iterable[Mapping[str, Any]]: An iterable of records produced from the file.
|
|
360
|
+
|
|
361
|
+
Raises:
|
|
362
|
+
ShopifyBulkExceptions.BulkRecordProduceError: If an error occurs while producing records from the file.
|
|
363
|
+
|
|
364
|
+
Logs:
|
|
365
|
+
Logs an info message if the file removal fails.
|
|
366
|
+
"""
|
|
367
|
+
|
|
134
368
|
try:
|
|
135
369
|
# produce records from saved result
|
|
136
370
|
yield from self.produce_records(filename)
|
|
@@ -144,8 +378,5 @@ class ShopifyBulkRecord:
|
|
|
144
378
|
try:
|
|
145
379
|
remove(filename)
|
|
146
380
|
except Exception as e:
|
|
147
|
-
|
|
148
|
-
# we should pass here, if the file wasn't removed , it's either:
|
|
149
|
-
# - doesn't exist
|
|
150
|
-
# - will be dropped with the container shut down.
|
|
381
|
+
LOGGER.info(f"Failed to remove the `tmp job result` file, the file doen't exist. Details: {repr(e)}.")
|
|
151
382
|
pass
|
|
@@ -1,23 +1,24 @@
|
|
|
1
1
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
|
2
2
|
|
|
3
|
-
import logging
|
|
4
3
|
from functools import wraps
|
|
5
4
|
from time import sleep
|
|
6
5
|
from typing import Any, Callable, Final, Optional, Tuple, Type
|
|
7
6
|
|
|
7
|
+
from source_shopify.utils import LOGGER
|
|
8
|
+
|
|
8
9
|
from .exceptions import ShopifyBulkExceptions
|
|
9
10
|
|
|
11
|
+
|
|
10
12
|
BULK_RETRY_ERRORS: Final[Tuple] = (
|
|
11
13
|
ShopifyBulkExceptions.BulkJobBadResponse,
|
|
12
14
|
ShopifyBulkExceptions.BulkJobError,
|
|
13
15
|
)
|
|
14
16
|
|
|
15
17
|
|
|
16
|
-
def bulk_retry_on_exception(
|
|
18
|
+
def bulk_retry_on_exception(more_exceptions: Optional[Tuple[Type[Exception], ...]] = None) -> Callable:
|
|
17
19
|
"""
|
|
18
20
|
A decorator to retry a function when specified exceptions are raised.
|
|
19
21
|
|
|
20
|
-
:param logger: Number of times to retry.
|
|
21
22
|
:param more_exceptions: A tuple of exception types to catch.
|
|
22
23
|
"""
|
|
23
24
|
|
|
@@ -31,29 +32,28 @@ def bulk_retry_on_exception(logger: logging.Logger, more_exceptions: Optional[Tu
|
|
|
31
32
|
except BULK_RETRY_ERRORS or more_exceptions as ex:
|
|
32
33
|
current_retries += 1
|
|
33
34
|
if current_retries > self._job_max_retries:
|
|
34
|
-
|
|
35
|
+
LOGGER.error("Exceeded retry limit. Giving up.")
|
|
35
36
|
raise
|
|
36
37
|
else:
|
|
37
|
-
|
|
38
|
-
f"Stream `{self.http_client.
|
|
38
|
+
LOGGER.warning(
|
|
39
|
+
f"Stream `{self.http_client.name}`: {ex}. Retrying {current_retries}/{self._job_max_retries} after {self._job_backoff_time} seconds."
|
|
39
40
|
)
|
|
40
41
|
sleep(self._job_backoff_time)
|
|
41
42
|
except ShopifyBulkExceptions.BulkJobCreationFailedConcurrentError:
|
|
42
43
|
if self._concurrent_attempt == self._concurrent_max_retry:
|
|
43
44
|
message = f"The BULK Job couldn't be created at this time, since another job is running."
|
|
44
|
-
|
|
45
|
+
LOGGER.error(message)
|
|
45
46
|
raise ShopifyBulkExceptions.BulkJobConcurrentError(message)
|
|
46
47
|
|
|
47
48
|
self._concurrent_attempt += 1
|
|
48
|
-
|
|
49
|
-
f"Stream: `{self.http_client.
|
|
49
|
+
LOGGER.warning(
|
|
50
|
+
f"Stream: `{self.http_client.name}`, the BULK concurrency limit has reached. Waiting {self._concurrent_interval} sec before retry, attempt: {self._concurrent_attempt}.",
|
|
50
51
|
)
|
|
51
52
|
sleep(self._concurrent_interval)
|
|
52
53
|
except ShopifyBulkExceptions.BulkJobRedirectToOtherShopError:
|
|
53
|
-
|
|
54
|
-
f"Stream: `{self.http_client.
|
|
54
|
+
LOGGER.warning(
|
|
55
|
+
f"Stream: `{self.http_client.name}`, the `shop name` differs from the provided in `input configuration`. Switching to the `{self._tools.shop_name_from_url(self.base_url)}`.",
|
|
55
56
|
)
|
|
56
|
-
self._switch_base_url()
|
|
57
57
|
|
|
58
58
|
return wrapper
|
|
59
59
|
|
|
@@ -11,6 +11,7 @@ import pendulum as pdm
|
|
|
11
11
|
|
|
12
12
|
from .exceptions import ShopifyBulkExceptions
|
|
13
13
|
|
|
14
|
+
|
|
14
15
|
# default end line tag
|
|
15
16
|
END_OF_FILE: str = "<end_of_file>"
|
|
16
17
|
BULK_PARENT_KEY: str = "__parentId"
|
|
@@ -53,7 +54,21 @@ class BulkTools:
|
|
|
53
54
|
)
|
|
54
55
|
|
|
55
56
|
@staticmethod
|
|
56
|
-
def
|
|
57
|
+
def shop_name_from_url(url: str) -> str:
|
|
58
|
+
match = re.search(r"https://(.*?)(\.myshopify)", url)
|
|
59
|
+
if match:
|
|
60
|
+
return match.group(1)
|
|
61
|
+
else:
|
|
62
|
+
# safety net, if there is an error parsing url,
|
|
63
|
+
# on no match is found
|
|
64
|
+
return url
|
|
65
|
+
|
|
66
|
+
@staticmethod
|
|
67
|
+
def _datetime_str_to_rfc3339(value: str) -> str:
|
|
68
|
+
return pdm.parse(value).to_rfc3339_string()
|
|
69
|
+
|
|
70
|
+
@staticmethod
|
|
71
|
+
def from_iso8601_to_rfc3339(record: Mapping[str, Any], field: str) -> Optional[str]:
|
|
57
72
|
"""
|
|
58
73
|
Converts date-time as follows:
|
|
59
74
|
Input: "2023-01-01T15:00:00Z"
|
|
@@ -63,7 +78,7 @@ class BulkTools:
|
|
|
63
78
|
# some fields that expected to be resolved as ids, might not be populated for the particular `RECORD`,
|
|
64
79
|
# we should return `None` to make the field `null` in the output as the result of the transformation.
|
|
65
80
|
target_value = record.get(field)
|
|
66
|
-
return
|
|
81
|
+
return BulkTools._datetime_str_to_rfc3339(target_value) if target_value else record.get(field)
|
|
67
82
|
|
|
68
83
|
def fields_names_to_snake_case(self, dict_input: Optional[Mapping[str, Any]] = None) -> Optional[MutableMapping[str, Any]]:
|
|
69
84
|
# transforming record field names from camel to snake case, leaving the `__parent_id` relation in place
|
source_shopify/source.py
CHANGED
|
@@ -6,11 +6,12 @@
|
|
|
6
6
|
import logging
|
|
7
7
|
from typing import Any, List, Mapping, Tuple
|
|
8
8
|
|
|
9
|
+
from requests.exceptions import ConnectionError, RequestException, SSLError
|
|
10
|
+
|
|
9
11
|
from airbyte_cdk.models import FailureType, SyncMode
|
|
10
12
|
from airbyte_cdk.sources import AbstractSource
|
|
11
13
|
from airbyte_cdk.sources.streams import Stream
|
|
12
14
|
from airbyte_cdk.utils import AirbyteTracedException
|
|
13
|
-
from requests.exceptions import ConnectionError, RequestException, SSLError
|
|
14
15
|
|
|
15
16
|
from .auth import MissingAccessTokenError, ShopifyAuthenticator
|
|
16
17
|
from .scopes import ShopifyScopes
|
|
@@ -26,7 +27,7 @@ from .streams.streams import (
|
|
|
26
27
|
CustomerAddress,
|
|
27
28
|
CustomerJourneySummary,
|
|
28
29
|
Customers,
|
|
29
|
-
|
|
30
|
+
DeletedProducts,
|
|
30
31
|
DiscountCodes,
|
|
31
32
|
Disputes,
|
|
32
33
|
DraftOrders,
|
|
@@ -56,8 +57,8 @@ from .streams.streams import (
|
|
|
56
57
|
PriceRules,
|
|
57
58
|
ProductImages,
|
|
58
59
|
Products,
|
|
59
|
-
ProductsGraphQl,
|
|
60
60
|
ProductVariants,
|
|
61
|
+
ProfileLocationGroups,
|
|
61
62
|
Shop,
|
|
62
63
|
SmartCollections,
|
|
63
64
|
TenderTransactions,
|
|
@@ -131,10 +132,6 @@ class SourceShopify(AbstractSource):
|
|
|
131
132
|
def continue_sync_on_stream_failure(self) -> bool:
|
|
132
133
|
return True
|
|
133
134
|
|
|
134
|
-
@property
|
|
135
|
-
def raise_exception_on_missing_stream(self) -> bool:
|
|
136
|
-
return False
|
|
137
|
-
|
|
138
135
|
@staticmethod
|
|
139
136
|
def get_shop_name(config) -> str:
|
|
140
137
|
split_pattern = ".myshopify.com"
|
|
@@ -215,15 +212,14 @@ class SourceShopify(AbstractSource):
|
|
|
215
212
|
PriceRules(config),
|
|
216
213
|
ProductImages(config),
|
|
217
214
|
Products(config),
|
|
218
|
-
|
|
215
|
+
DeletedProducts(config),
|
|
219
216
|
ProductVariants(config),
|
|
220
217
|
Shop(config),
|
|
221
218
|
SmartCollections(config),
|
|
222
219
|
TenderTransactions(config),
|
|
223
220
|
self.select_transactions_stream(config),
|
|
224
|
-
CustomerSavedSearch(config),
|
|
225
221
|
CustomerAddress(config),
|
|
226
|
-
Countries(config),
|
|
222
|
+
Countries(config=config, parent=ProfileLocationGroups(config)),
|
|
227
223
|
]
|
|
228
224
|
|
|
229
225
|
return [
|
source_shopify/spec.json
CHANGED
|
@@ -98,20 +98,26 @@
|
|
|
98
98
|
"description": "Defines which API type (REST/BULK) to use to fetch `Transactions` data. If you are a `Shopify Plus` user, leave the default value to speed up the fetch.",
|
|
99
99
|
"default": false
|
|
100
100
|
},
|
|
101
|
+
"job_product_variants_include_pres_prices": {
|
|
102
|
+
"type": "boolean",
|
|
103
|
+
"title": "Add `Presentment prices` to Product Variants",
|
|
104
|
+
"description": "If enabled, the `Product Variants` stream attempts to include `Presentment prices` field (may affect the performance).",
|
|
105
|
+
"default": true
|
|
106
|
+
},
|
|
101
107
|
"job_termination_threshold": {
|
|
102
108
|
"type": "integer",
|
|
103
109
|
"title": "BULK Job termination threshold",
|
|
104
110
|
"description": "The max time in seconds, after which the single BULK Job should be `CANCELED` and retried. The bigger the value the longer the BULK Job is allowed to run.",
|
|
105
|
-
"default":
|
|
111
|
+
"default": 7200,
|
|
106
112
|
"minimum": 3600,
|
|
107
113
|
"maximum": 21600
|
|
108
114
|
},
|
|
109
115
|
"job_checkpoint_interval": {
|
|
110
116
|
"type": "integer",
|
|
111
|
-
"title": "BULK Job checkpoint (
|
|
112
|
-
"description": "The threshold, after which the single BULK Job should be
|
|
113
|
-
"default":
|
|
114
|
-
"minimum":
|
|
117
|
+
"title": "BULK Job checkpoint (rows collected)",
|
|
118
|
+
"description": "The threshold, after which the single BULK Job should be checkpointed (min: 15k, max: 1M)",
|
|
119
|
+
"default": 100000,
|
|
120
|
+
"minimum": 15000,
|
|
115
121
|
"maximum": 1000000
|
|
116
122
|
}
|
|
117
123
|
}
|