airbyte-source-shopify 2.4.14.dev202407181247__py3-none-any.whl → 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {airbyte_source_shopify-2.4.14.dev202407181247.dist-info → airbyte_source_shopify-3.1.0.dist-info}/METADATA +4 -4
  2. {airbyte_source_shopify-2.4.14.dev202407181247.dist-info → airbyte_source_shopify-3.1.0.dist-info}/RECORD +25 -27
  3. {airbyte_source_shopify-2.4.14.dev202407181247.dist-info → airbyte_source_shopify-3.1.0.dist-info}/WHEEL +1 -1
  4. source_shopify/auth.py +0 -1
  5. source_shopify/config_migrations.py +4 -1
  6. source_shopify/http_request.py +4 -2
  7. source_shopify/schemas/countries.json +7 -19
  8. source_shopify/schemas/customer_journey_summary.json +228 -148
  9. source_shopify/schemas/deleted_products.json +27 -0
  10. source_shopify/schemas/orders.json +38 -0
  11. source_shopify/schemas/product_variants.json +26 -8
  12. source_shopify/schemas/profile_location_groups.json +10 -0
  13. source_shopify/scopes.py +7 -6
  14. source_shopify/shopify_graphql/bulk/exceptions.py +6 -1
  15. source_shopify/shopify_graphql/bulk/job.py +173 -65
  16. source_shopify/shopify_graphql/bulk/query.py +440 -88
  17. source_shopify/shopify_graphql/bulk/record.py +260 -29
  18. source_shopify/shopify_graphql/bulk/retry.py +12 -12
  19. source_shopify/shopify_graphql/bulk/tools.py +17 -2
  20. source_shopify/source.py +6 -10
  21. source_shopify/spec.json +11 -5
  22. source_shopify/streams/base_streams.py +181 -54
  23. source_shopify/streams/streams.py +211 -58
  24. source_shopify/utils.py +47 -12
  25. source_shopify/schemas/customer_saved_search.json +0 -32
  26. source_shopify/schemas/products_graph_ql.json +0 -123
  27. source_shopify/shopify_graphql/graphql.py +0 -64
  28. source_shopify/shopify_graphql/schema.py +0 -29442
  29. {airbyte_source_shopify-2.4.14.dev202407181247.dist-info → airbyte_source_shopify-3.1.0.dist-info}/entry_points.txt +0 -0
@@ -3,12 +3,14 @@
3
3
  #
4
4
 
5
5
 
6
- import logging
7
6
  from dataclasses import dataclass, field
7
+ from functools import cached_property
8
8
  from io import TextIOWrapper
9
9
  from json import loads
10
10
  from os import remove
11
- from typing import Any, Callable, Final, Iterable, List, Mapping, MutableMapping, Optional, Union
11
+ from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional, Union
12
+
13
+ from source_shopify.utils import LOGGER
12
14
 
13
15
  from .exceptions import ShopifyBulkExceptions
14
16
  from .query import ShopifyBulkQuery
@@ -17,43 +19,194 @@ from .tools import END_OF_FILE, BulkTools
17
19
 
18
20
  @dataclass
19
21
  class ShopifyBulkRecord:
22
+ """
23
+ ShopifyBulkRecord is a class designed to handle the processing of bulk records from Shopify's GraphQL API.
24
+
25
+ Attributes:
26
+ query (ShopifyBulkQuery): The query object associated with the bulk record.
27
+ parent_stream_name (Optional[str]): The name of the parent stream, if any.
28
+ parent_stream_cursor (Optional[str]): The cursor of the parent stream, if any.
29
+ buffer (List[MutableMapping[str, Any]]): A buffer to store records before processing.
30
+ composition (Optional[Mapping[str, Any]]): The composition of the record, derived from the query.
31
+ record_process_components (Optional[Callable[[MutableMapping], MutableMapping]]): A callable to process record components.
32
+ components (List[str]): A list of components derived from the record composition.
33
+ _parent_stream_cursor_value (Optional[str | int]): The current value of the parent stream cursor.
34
+ record_composed (int): The count of records composed.
35
+
36
+ Methods:
37
+ __post_init__(): Initializes additional attributes after the object is created.
38
+ tools(): Returns an instance of BulkTools.
39
+ has_parent_stream(): Checks if the record has a parent stream.
40
+ parent_cursor_key(): Returns the key for the parent cursor if a parent stream exists.
41
+ check_type(record, types): Checks if the record's type matches the given type(s).
42
+ _parse_parent_state_value(value): Parses the parent state value and converts it to the appropriate format.
43
+ _set_parent_state_value(value): Sets the parent state value by parsing the provided value and updating the parent stream cursor value.
44
+ _track_parent_cursor(record): Tracks the cursor value from the parent stream if it exists and updates the parent state.
45
+ get_parent_stream_state(): Retrieves the state of the parent stream if it exists.
46
+ record_new(record): Processes a new record by preparing it, removing the "__typename" key, and appending it to the buffer.
47
+ record_new_component(record): Processes a new record by extracting its component type and adding it to the appropriate placeholder in the components list.
48
+ component_prepare(record): Prepares the given record by initializing a "record_components" dictionary.
49
+ buffer_flush(): Flushes the buffer by processing each record in the buffer.
50
+ record_compose(record): Processes a given record and yields buffered records if certain conditions are met.
51
+ process_line(jsonl_file): Processes a JSON Lines (jsonl) file and yields records.
52
+ record_resolve_id(record): Resolves and updates the 'id' field in the given record.
53
+ produce_records(filename): Reads the JSONL content saved from `job.job_retrieve_result()` line-by-line to avoid OOM.
54
+ read_file(filename, remove_file): Reads a file and produces records from it.
55
+ """
56
+
20
57
  query: ShopifyBulkQuery
58
+ parent_stream_name: Optional[str] = None
59
+ parent_stream_cursor: Optional[str] = None
21
60
 
22
61
  # default buffer
23
62
  buffer: List[MutableMapping[str, Any]] = field(init=False, default_factory=list)
24
63
 
25
- # default logger
26
- logger: Final[logging.Logger] = logging.getLogger("airbyte")
27
-
28
64
  def __post_init__(self) -> None:
29
65
  self.composition: Optional[Mapping[str, Any]] = self.query.record_composition
30
66
  self.record_process_components: Optional[Callable[[MutableMapping], MutableMapping]] = self.query.record_process_components
31
67
  self.components: List[str] = self.composition.get("record_components", []) if self.composition else []
68
+ # We track the parent state for BULK substreams outside of the main CDK methods,
69
+ # to be able to update the moving parent state when there are no substream records to emit.
70
+ self._parent_stream_cursor_value: Optional[str | int] = None
71
+ # how many records composed
72
+ self.record_composed: int = 0
32
73
 
33
- @property
74
+ @cached_property
34
75
  def tools(self) -> BulkTools:
35
76
  return BulkTools()
36
77
 
78
+ @cached_property
79
+ def has_parent_stream(self) -> bool:
80
+ return True if self.parent_stream_name and self.parent_stream_cursor else False
81
+
82
+ @cached_property
83
+ def parent_cursor_key(self) -> Optional[str]:
84
+ if self.has_parent_stream:
85
+ return f"{self.parent_stream_name}_{self.parent_stream_cursor}"
86
+
37
87
  @staticmethod
38
88
  def check_type(record: Mapping[str, Any], types: Union[List[str], str]) -> bool:
89
+ """
90
+ Check if the record's type matches the given type(s).
91
+
92
+ Args:
93
+ record (Mapping[str, Any]): The record to check, expected to have a "__typename" key.
94
+ types (Union[List[str], str]): The type(s) to check against. Can be a single type (str) or a list of types (List[str]).
95
+
96
+ Returns:
97
+ bool: True if the record's type matches one of the given types, False otherwise.
98
+ """
99
+
39
100
  record_type = record.get("__typename")
40
101
  if isinstance(types, list):
41
102
  return any(record_type == t for t in types)
42
103
  else:
43
104
  return record_type == types
44
105
 
106
+ def _parse_parent_state_value(self, value: str | int) -> str | int:
107
+ """
108
+ Parses the parent state value and converts it to the appropriate format.
109
+
110
+ If the value is a string, it converts it to RFC 3339 datetime format using the `_datetime_str_to_rfc3339` method.
111
+ If the value is an integer, it returns the value as is.
112
+
113
+ Args:
114
+ value (str | int): The parent state value to be parsed.
115
+
116
+ Returns:
117
+ str | int: The parsed parent state value in the appropriate format.
118
+ """
119
+
120
+ if isinstance(value, str):
121
+ return self.tools._datetime_str_to_rfc3339(value)
122
+ elif isinstance(value, int):
123
+ return value
124
+
125
+ def _set_parent_state_value(self, value: str | int) -> None:
126
+ """
127
+ Sets the parent state value by parsing the provided value and updating the
128
+ parent stream cursor value. If the parent stream cursor value is already set,
129
+ it updates it to the maximum of the current and parsed values.
130
+
131
+ Args:
132
+ value (str | int): The value to be parsed and set as the parent state value.
133
+ """
134
+
135
+ parsed_value = self._parse_parent_state_value(value)
136
+ if not self._parent_stream_cursor_value:
137
+ self._parent_stream_cursor_value = parsed_value
138
+ else:
139
+ self._parent_stream_cursor_value = max(self._parent_stream_cursor_value, parsed_value)
140
+
141
+ def _track_parent_cursor(self, record: MutableMapping[str, Any]) -> None:
142
+ """
143
+ Tracks the cursor value from the parent stream if it exists and updates the parent state.
144
+
145
+ Args:
146
+ record (MutableMapping[str, Any]): The record from which to extract the parent cursor value.
147
+
148
+ Returns:
149
+ None
150
+ """
151
+
152
+ if self.has_parent_stream:
153
+ cursor_value: Optional[str | int] = record.get(self.parent_cursor_key, None)
154
+ if cursor_value:
155
+ self._set_parent_state_value(cursor_value)
156
+
157
+ def get_parent_stream_state(self) -> Optional[Union[str, Mapping[str, Any]]]:
158
+ """
159
+ Retrieve the state of the parent stream if it exists.
160
+
161
+ Returns:
162
+ Optional[Union[str, Mapping[str, Any]]]: A dictionary containing the parent stream cursor and its value
163
+ if the parent stream exists and has a cursor value, otherwise None.
164
+ """
165
+
166
+ if self.has_parent_stream and self._parent_stream_cursor_value:
167
+ return {self.parent_stream_cursor: self._parent_stream_cursor_value}
168
+
45
169
  def record_new(self, record: MutableMapping[str, Any]) -> None:
170
+ """
171
+ Processes a new record by preparing it, removing the "__typename" key, and appending it to the buffer.
172
+
173
+ Args:
174
+ record (MutableMapping[str, Any]): The record to be processed.
175
+ """
176
+
46
177
  record = self.component_prepare(record)
47
178
  record.pop("__typename")
48
179
  self.buffer.append(record)
49
180
 
50
181
  def record_new_component(self, record: MutableMapping[str, Any]) -> None:
182
+ """
183
+ Processes a new record by extracting its component type and adding it to the appropriate
184
+ placeholder in the components list.
185
+
186
+ Args:
187
+ record (MutableMapping[str, Any]): The record to be processed.
188
+ It is expected to contain a "__typename" key which indicates the component type.
189
+ """
190
+
51
191
  component = record.get("__typename")
52
192
  record.pop("__typename")
53
193
  # add component to its placeholder in the components list
54
194
  self.buffer[-1]["record_components"][component].append(record)
55
195
 
56
196
  def component_prepare(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
197
+ """
198
+ Prepares the given record by initializing a "record_components" dictionary.
199
+
200
+ If the instance has components, this method will add a "record_components" key to the record,
201
+ with each component as a key and an empty list as its value.
202
+
203
+ Args:
204
+ record (MutableMapping[str, Any]): The record to be prepared.
205
+
206
+ Returns:
207
+ MutableMapping[str, Any]: The updated record with initialized "record_components".
208
+ """
209
+
57
210
  if self.components:
58
211
  record["record_components"] = {}
59
212
  for component in self.components:
@@ -61,8 +214,24 @@ class ShopifyBulkRecord:
61
214
  return record
62
215
 
63
216
  def buffer_flush(self) -> Iterable[Mapping[str, Any]]:
217
+ """
218
+ Flushes the buffer by processing each record in the buffer.
219
+
220
+ For each record in the buffer:
221
+ - Tracks the parent state using `_track_parent_cursor`.
222
+ - Resolves the record ID from `str` to `int` using `record_resolve_id`.
223
+ - Processes record components using `record_process_components`.
224
+
225
+ Yields:
226
+ Iterable[Mapping[str, Any]]: Processed records from the buffer.
227
+
228
+ After processing, the buffer is cleared.
229
+ """
230
+
64
231
  if len(self.buffer) > 0:
65
232
  for record in self.buffer:
233
+ # track the parent state
234
+ self._track_parent_cursor(record)
66
235
  # resolve id from `str` to `int`
67
236
  record = self.record_resolve_id(record)
68
237
  # process record components
@@ -72,10 +241,25 @@ class ShopifyBulkRecord:
72
241
 
73
242
  def record_compose(self, record: Mapping[str, Any]) -> Optional[Iterable[MutableMapping[str, Any]]]:
74
243
  """
244
+ Processes a given record and yields buffered records if certain conditions are met.
245
+
246
+ Args:
247
+ record (Mapping[str, Any]): The record to be processed.
248
+
249
+ Returns:
250
+ Optional[Iterable[MutableMapping[str, Any]]]: An iterable of buffered records if conditions are met, otherwise None.
251
+
252
+ The method performs the following steps:
253
+ 1. Checks if the record matches the type specified in the "new_record" composition.
254
+ - If it matches, it yields any buffered records from previous iterations and registers the new record.
255
+ 2. Checks if the record matches any of the specified components.
256
+ - If it matches, it registers the new component record.
257
+
75
258
  Step 1: register the new record by it's `__typename`
76
259
  Step 2: check for `components` by their `__typename` and add to the placeholder
77
260
  Step 3: repeat until the `<END_OF_FILE>`.
78
261
  """
262
+
79
263
  if self.check_type(record, self.composition.get("new_record")):
80
264
  # emit from previous iteration, if present
81
265
  yield from self.buffer_flush()
@@ -86,9 +270,21 @@ class ShopifyBulkRecord:
86
270
  self.record_new_component(record)
87
271
 
88
272
  def process_line(self, jsonl_file: TextIOWrapper) -> Iterable[MutableMapping[str, Any]]:
89
- # process the json lines
273
+ """
274
+ Processes a JSON Lines (jsonl) file and yields records.
275
+
276
+ Args:
277
+ jsonl_file (TextIOWrapper): A file-like object containing JSON Lines data.
278
+
279
+ Yields:
280
+ Iterable[MutableMapping[str, Any]]: An iterable of dictionaries representing the processed records.
281
+
282
+ The method reads each line from the provided jsonl_file. It exits the loop when it encounters the <end_of_file> marker.
283
+ For non-empty lines, it parses the JSON content and yields the resulting records. Finally, it emits any remaining
284
+ records in the buffer.
285
+ """
286
+
90
287
  for line in jsonl_file:
91
- # we exit from the loop when receive <end_of_file> (file ends)
92
288
  if line == END_OF_FILE:
93
289
  break
94
290
  elif line != "":
@@ -99,38 +295,76 @@ class ShopifyBulkRecord:
99
295
 
100
296
  def record_resolve_id(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
101
297
  """
102
- The ids are fetched in the format of: " gid://shopify/Order/<Id> "
103
- Input:
104
- { "Id": "gid://shopify/Order/19435458986123"}
105
- We need to extract the actual id from the string instead.
106
- Output:
107
- { "id": 19435458986123, "admin_graphql_api_id": "gid://shopify/Order/19435458986123"}
108
- """
109
- # save the actual api id to the `admin_graphql_api_id`
110
- # while resolving the `id` in `record_resolve_id`,
111
- # we re-assign the original id like `"gid://shopify/Order/19435458986123"`,
112
- # into `admin_graphql_api_id` have the ability to identify the record oigin correctly in subsequent actions.
113
- # IF NOT `id` field is provided by the query results, we should return composed record `as is`.
298
+ Resolves and updates the 'id' field in the given record.
299
+
300
+ This method extracts the 'id' from the record, checks if it is a string,
301
+ and if so, assigns it to 'admin_graphql_api_id' in the record. It then
302
+ resolves the string 'id' to an integer using the 'resolve_str_id' method
303
+ from the 'tools' attribute and updates the 'id' field in the record.
304
+
305
+ Args:
306
+ record (MutableMapping[str, Any]): The record containing the 'id' field to be resolved.
307
+ Example:
308
+ { "Id": "gid://shopify/Order/19435458986123"}
309
+
310
+ Returns:
311
+ MutableMapping[str, Any]: The updated record with the resolved 'id' field.
312
+ Example:
313
+ { "id": 19435458986123, "admin_graphql_api_id": "gid://shopify/Order/19435458986123"}
314
+ """
315
+
114
316
  id = record.get("id")
115
317
  if id and isinstance(id, str):
116
318
  record["admin_graphql_api_id"] = id
117
- # extracting the int(id) and reassign
118
319
  record["id"] = self.tools.resolve_str_id(id)
119
320
  return record
120
321
 
121
322
  def produce_records(self, filename: str) -> Iterable[MutableMapping[str, Any]]:
122
323
  """
123
- Read the JSONL content saved from `job.job_retrieve_result()` line-by-line to avoid OOM.
124
- The filename example: `bulk-4039263649981.jsonl`,
125
- where `4039263649981` is the `id` of the COMPLETED BULK Jobw with `result_url`.
126
- Note: typically the `filename` is taken from the `result_url` string provided in the response.
324
+ Produce records from a JSON Lines (jsonl) file.
325
+
326
+ This method reads a JSON Lines file, processes each line, converts the field names to snake_case,
327
+ and yields each processed record. It also keeps track of the number of records processed.
328
+
329
+ Args:
330
+ filename (str): The path to the JSON Lines file.
331
+
332
+ Yields:
333
+ MutableMapping[str, Any]: A dictionary representing a processed record with field names in snake_case.
127
334
  """
128
335
 
129
336
  with open(filename, "r") as jsonl_file:
337
+ # reset the counter
338
+ self.record_composed = 0
339
+
130
340
  for record in self.process_line(jsonl_file):
131
341
  yield self.tools.fields_names_to_snake_case(record)
342
+ self.record_composed += 1
132
343
 
133
344
  def read_file(self, filename: str, remove_file: Optional[bool] = True) -> Iterable[Mapping[str, Any]]:
345
+ """
346
+ Read the JSONL content saved from `job.job_retrieve_result()` line-by-line to avoid OOM.
347
+
348
+ Args:
349
+ filename (str): The name of the file to read.
350
+ remove_file (Optional[bool]): Flag indicating whether to remove the file after reading. Defaults to True.
351
+
352
+ Example:
353
+ Note: typically the `filename` is taken from the `result_url` string provided in the response.
354
+
355
+ `bulk-4039263649981.jsonl` :
356
+ - the `4039263649981` is the `id` of the COMPLETED BULK Jobw with `result_url`
357
+
358
+ Yields:
359
+ Iterable[Mapping[str, Any]]: An iterable of records produced from the file.
360
+
361
+ Raises:
362
+ ShopifyBulkExceptions.BulkRecordProduceError: If an error occurs while producing records from the file.
363
+
364
+ Logs:
365
+ Logs an info message if the file removal fails.
366
+ """
367
+
134
368
  try:
135
369
  # produce records from saved result
136
370
  yield from self.produce_records(filename)
@@ -144,8 +378,5 @@ class ShopifyBulkRecord:
144
378
  try:
145
379
  remove(filename)
146
380
  except Exception as e:
147
- self.logger.info(f"Failed to remove the `tmp job result` file, the file doen't exist. Details: {repr(e)}.")
148
- # we should pass here, if the file wasn't removed , it's either:
149
- # - doesn't exist
150
- # - will be dropped with the container shut down.
381
+ LOGGER.info(f"Failed to remove the `tmp job result` file, the file doen't exist. Details: {repr(e)}.")
151
382
  pass
@@ -1,23 +1,24 @@
1
1
  # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
2
 
3
- import logging
4
3
  from functools import wraps
5
4
  from time import sleep
6
5
  from typing import Any, Callable, Final, Optional, Tuple, Type
7
6
 
7
+ from source_shopify.utils import LOGGER
8
+
8
9
  from .exceptions import ShopifyBulkExceptions
9
10
 
11
+
10
12
  BULK_RETRY_ERRORS: Final[Tuple] = (
11
13
  ShopifyBulkExceptions.BulkJobBadResponse,
12
14
  ShopifyBulkExceptions.BulkJobError,
13
15
  )
14
16
 
15
17
 
16
- def bulk_retry_on_exception(logger: logging.Logger, more_exceptions: Optional[Tuple[Type[Exception], ...]] = None) -> Callable:
18
+ def bulk_retry_on_exception(more_exceptions: Optional[Tuple[Type[Exception], ...]] = None) -> Callable:
17
19
  """
18
20
  A decorator to retry a function when specified exceptions are raised.
19
21
 
20
- :param logger: Number of times to retry.
21
22
  :param more_exceptions: A tuple of exception types to catch.
22
23
  """
23
24
 
@@ -31,29 +32,28 @@ def bulk_retry_on_exception(logger: logging.Logger, more_exceptions: Optional[Tu
31
32
  except BULK_RETRY_ERRORS or more_exceptions as ex:
32
33
  current_retries += 1
33
34
  if current_retries > self._job_max_retries:
34
- logger.error("Exceeded retry limit. Giving up.")
35
+ LOGGER.error("Exceeded retry limit. Giving up.")
35
36
  raise
36
37
  else:
37
- logger.warning(
38
- f"Stream `{self.http_client._name}`: {ex}. Retrying {current_retries}/{self._job_max_retries} after {self._job_backoff_time} seconds."
38
+ LOGGER.warning(
39
+ f"Stream `{self.http_client.name}`: {ex}. Retrying {current_retries}/{self._job_max_retries} after {self._job_backoff_time} seconds."
39
40
  )
40
41
  sleep(self._job_backoff_time)
41
42
  except ShopifyBulkExceptions.BulkJobCreationFailedConcurrentError:
42
43
  if self._concurrent_attempt == self._concurrent_max_retry:
43
44
  message = f"The BULK Job couldn't be created at this time, since another job is running."
44
- logger.error(message)
45
+ LOGGER.error(message)
45
46
  raise ShopifyBulkExceptions.BulkJobConcurrentError(message)
46
47
 
47
48
  self._concurrent_attempt += 1
48
- logger.warning(
49
- f"Stream: `{self.http_client._name}`, the BULK concurrency limit has reached. Waiting {self._concurrent_interval} sec before retry, attempt: {self._concurrent_attempt}.",
49
+ LOGGER.warning(
50
+ f"Stream: `{self.http_client.name}`, the BULK concurrency limit has reached. Waiting {self._concurrent_interval} sec before retry, attempt: {self._concurrent_attempt}.",
50
51
  )
51
52
  sleep(self._concurrent_interval)
52
53
  except ShopifyBulkExceptions.BulkJobRedirectToOtherShopError:
53
- logger.warning(
54
- f"Stream: `{self.http_client._name}`, the `shop name` differs from the provided by the User: `{self.base_url}`. Switching to the `{self._new_base_url}`.",
54
+ LOGGER.warning(
55
+ f"Stream: `{self.http_client.name}`, the `shop name` differs from the provided in `input configuration`. Switching to the `{self._tools.shop_name_from_url(self.base_url)}`.",
55
56
  )
56
- self._switch_base_url()
57
57
 
58
58
  return wrapper
59
59
 
@@ -11,6 +11,7 @@ import pendulum as pdm
11
11
 
12
12
  from .exceptions import ShopifyBulkExceptions
13
13
 
14
+
14
15
  # default end line tag
15
16
  END_OF_FILE: str = "<end_of_file>"
16
17
  BULK_PARENT_KEY: str = "__parentId"
@@ -53,7 +54,21 @@ class BulkTools:
53
54
  )
54
55
 
55
56
  @staticmethod
56
- def from_iso8601_to_rfc3339(record: Mapping[str, Any], field: str) -> Mapping[str, Any]:
57
+ def shop_name_from_url(url: str) -> str:
58
+ match = re.search(r"https://(.*?)(\.myshopify)", url)
59
+ if match:
60
+ return match.group(1)
61
+ else:
62
+ # safety net, if there is an error parsing url,
63
+ # on no match is found
64
+ return url
65
+
66
+ @staticmethod
67
+ def _datetime_str_to_rfc3339(value: str) -> str:
68
+ return pdm.parse(value).to_rfc3339_string()
69
+
70
+ @staticmethod
71
+ def from_iso8601_to_rfc3339(record: Mapping[str, Any], field: str) -> Optional[str]:
57
72
  """
58
73
  Converts date-time as follows:
59
74
  Input: "2023-01-01T15:00:00Z"
@@ -63,7 +78,7 @@ class BulkTools:
63
78
  # some fields that expected to be resolved as ids, might not be populated for the particular `RECORD`,
64
79
  # we should return `None` to make the field `null` in the output as the result of the transformation.
65
80
  target_value = record.get(field)
66
- return pdm.parse(target_value).to_rfc3339_string() if target_value else record.get(field)
81
+ return BulkTools._datetime_str_to_rfc3339(target_value) if target_value else record.get(field)
67
82
 
68
83
  def fields_names_to_snake_case(self, dict_input: Optional[Mapping[str, Any]] = None) -> Optional[MutableMapping[str, Any]]:
69
84
  # transforming record field names from camel to snake case, leaving the `__parent_id` relation in place
source_shopify/source.py CHANGED
@@ -6,11 +6,12 @@
6
6
  import logging
7
7
  from typing import Any, List, Mapping, Tuple
8
8
 
9
+ from requests.exceptions import ConnectionError, RequestException, SSLError
10
+
9
11
  from airbyte_cdk.models import FailureType, SyncMode
10
12
  from airbyte_cdk.sources import AbstractSource
11
13
  from airbyte_cdk.sources.streams import Stream
12
14
  from airbyte_cdk.utils import AirbyteTracedException
13
- from requests.exceptions import ConnectionError, RequestException, SSLError
14
15
 
15
16
  from .auth import MissingAccessTokenError, ShopifyAuthenticator
16
17
  from .scopes import ShopifyScopes
@@ -26,7 +27,7 @@ from .streams.streams import (
26
27
  CustomerAddress,
27
28
  CustomerJourneySummary,
28
29
  Customers,
29
- CustomerSavedSearch,
30
+ DeletedProducts,
30
31
  DiscountCodes,
31
32
  Disputes,
32
33
  DraftOrders,
@@ -56,8 +57,8 @@ from .streams.streams import (
56
57
  PriceRules,
57
58
  ProductImages,
58
59
  Products,
59
- ProductsGraphQl,
60
60
  ProductVariants,
61
+ ProfileLocationGroups,
61
62
  Shop,
62
63
  SmartCollections,
63
64
  TenderTransactions,
@@ -131,10 +132,6 @@ class SourceShopify(AbstractSource):
131
132
  def continue_sync_on_stream_failure(self) -> bool:
132
133
  return True
133
134
 
134
- @property
135
- def raise_exception_on_missing_stream(self) -> bool:
136
- return False
137
-
138
135
  @staticmethod
139
136
  def get_shop_name(config) -> str:
140
137
  split_pattern = ".myshopify.com"
@@ -215,15 +212,14 @@ class SourceShopify(AbstractSource):
215
212
  PriceRules(config),
216
213
  ProductImages(config),
217
214
  Products(config),
218
- ProductsGraphQl(config),
215
+ DeletedProducts(config),
219
216
  ProductVariants(config),
220
217
  Shop(config),
221
218
  SmartCollections(config),
222
219
  TenderTransactions(config),
223
220
  self.select_transactions_stream(config),
224
- CustomerSavedSearch(config),
225
221
  CustomerAddress(config),
226
- Countries(config),
222
+ Countries(config=config, parent=ProfileLocationGroups(config)),
227
223
  ]
228
224
 
229
225
  return [
source_shopify/spec.json CHANGED
@@ -98,20 +98,26 @@
98
98
  "description": "Defines which API type (REST/BULK) to use to fetch `Transactions` data. If you are a `Shopify Plus` user, leave the default value to speed up the fetch.",
99
99
  "default": false
100
100
  },
101
+ "job_product_variants_include_pres_prices": {
102
+ "type": "boolean",
103
+ "title": "Add `Presentment prices` to Product Variants",
104
+ "description": "If enabled, the `Product Variants` stream attempts to include `Presentment prices` field (may affect the performance).",
105
+ "default": true
106
+ },
101
107
  "job_termination_threshold": {
102
108
  "type": "integer",
103
109
  "title": "BULK Job termination threshold",
104
110
  "description": "The max time in seconds, after which the single BULK Job should be `CANCELED` and retried. The bigger the value the longer the BULK Job is allowed to run.",
105
- "default": 3600,
111
+ "default": 7200,
106
112
  "minimum": 3600,
107
113
  "maximum": 21600
108
114
  },
109
115
  "job_checkpoint_interval": {
110
116
  "type": "integer",
111
- "title": "BULK Job checkpoint (lines collected)",
112
- "description": "The threshold, after which the single BULK Job should be `CANCELED` and checkpointed.",
113
- "default": 200000,
114
- "minimum": 50000,
117
+ "title": "BULK Job checkpoint (rows collected)",
118
+ "description": "The threshold, after which the single BULK Job should be checkpointed (min: 15k, max: 1M)",
119
+ "default": 100000,
120
+ "minimum": 15000,
115
121
  "maximum": 1000000
116
122
  }
117
123
  }