airbyte-source-shopify 2.5.6__tar.gz → 2.5.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/PKG-INFO +1 -1
  2. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/pyproject.toml +1 -1
  3. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/shopify_graphql/bulk/job.py +12 -13
  4. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/shopify_graphql/bulk/query.py +7 -48
  5. airbyte_source_shopify-2.5.8/source_shopify/shopify_graphql/bulk/record.py +382 -0
  6. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/shopify_graphql/bulk/retry.py +8 -8
  7. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/streams/base_streams.py +35 -15
  8. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/utils.py +7 -6
  9. airbyte_source_shopify-2.5.6/source_shopify/shopify_graphql/bulk/record.py +0 -157
  10. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/README.md +0 -0
  11. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/__init__.py +0 -0
  12. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/auth.py +0 -0
  13. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/config_migrations.py +0 -0
  14. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/http_request.py +0 -0
  15. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/run.py +0 -0
  16. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/abandoned_checkouts.json +0 -0
  17. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/articles.json +0 -0
  18. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/balance_transactions.json +0 -0
  19. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/blogs.json +0 -0
  20. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/collections.json +0 -0
  21. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/collects.json +0 -0
  22. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/countries.json +0 -0
  23. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/custom_collections.json +0 -0
  24. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/customer_address.json +0 -0
  25. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/customer_journey_summary.json +0 -0
  26. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/customer_saved_search.json +0 -0
  27. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/customers.json +0 -0
  28. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/discount_codes.json +0 -0
  29. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/disputes.json +0 -0
  30. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/draft_orders.json +0 -0
  31. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/fulfillment_orders.json +0 -0
  32. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/fulfillments.json +0 -0
  33. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/inventory_items.json +0 -0
  34. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/inventory_levels.json +0 -0
  35. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/locations.json +0 -0
  36. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_articles.json +0 -0
  37. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_blogs.json +0 -0
  38. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_collections.json +0 -0
  39. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_customers.json +0 -0
  40. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_draft_orders.json +0 -0
  41. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_locations.json +0 -0
  42. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_orders.json +0 -0
  43. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_pages.json +0 -0
  44. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_product_images.json +0 -0
  45. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_product_variants.json +0 -0
  46. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_products.json +0 -0
  47. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_shops.json +0 -0
  48. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/metafield_smart_collections.json +0 -0
  49. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/order_agreements.json +0 -0
  50. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/order_refunds.json +0 -0
  51. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/order_risks.json +0 -0
  52. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/orders.json +0 -0
  53. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/pages.json +0 -0
  54. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/price_rules.json +0 -0
  55. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/product_images.json +0 -0
  56. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/product_variants.json +0 -0
  57. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/products.json +0 -0
  58. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/products_graph_ql.json +0 -0
  59. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/shop.json +0 -0
  60. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/smart_collections.json +0 -0
  61. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/tender_transactions.json +0 -0
  62. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/schemas/transactions.json +0 -0
  63. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/scopes.py +0 -0
  64. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/shopify_graphql/bulk/__init__.py +0 -0
  65. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/shopify_graphql/bulk/exceptions.py +0 -0
  66. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/shopify_graphql/bulk/status.py +0 -0
  67. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/shopify_graphql/bulk/tools.py +0 -0
  68. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/shopify_graphql/graphql.py +0 -0
  69. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/shopify_graphql/schema.py +0 -0
  70. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/source.py +0 -0
  71. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/spec.json +0 -0
  72. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/streams/streams.py +0 -0
  73. {airbyte_source_shopify-2.5.6 → airbyte_source_shopify-2.5.8}/source_shopify/transform.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-source-shopify
3
- Version: 2.5.6
3
+ Version: 2.5.8
4
4
  Summary: Source CDK implementation for Shopify.
5
5
  Home-page: https://airbyte.com
6
6
  License: ELv2
@@ -5,7 +5,7 @@ requires = [
5
5
  build-backend = "poetry.core.masonry.api"
6
6
 
7
7
  [tool.poetry]
8
- version = "2.5.6"
8
+ version = "2.5.8"
9
9
  name = "airbyte-source-shopify"
10
10
  description = "Source CDK implementation for Shopify."
11
11
  authors = [
@@ -2,7 +2,6 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- import logging
6
5
  from dataclasses import dataclass, field
7
6
  from datetime import datetime
8
7
  from time import sleep, time
@@ -12,7 +11,7 @@ import pendulum as pdm
12
11
  import requests
13
12
  from airbyte_cdk.sources.streams.http import HttpClient
14
13
  from requests.exceptions import JSONDecodeError
15
- from source_shopify.utils import ApiTypeEnum
14
+ from source_shopify.utils import LOGGER, ApiTypeEnum
16
15
  from source_shopify.utils import ShopifyRateLimiter as limiter
17
16
 
18
17
  from .exceptions import AirbyteTracedException, ShopifyBulkExceptions
@@ -32,8 +31,8 @@ class ShopifyBulkManager:
32
31
  job_size: float
33
32
  job_checkpoint_interval: int
34
33
 
35
- # default logger
36
- logger: Final[logging.Logger] = logging.getLogger("airbyte")
34
+ parent_stream_name: Optional[str] = None
35
+ parent_stream_cursor: Optional[str] = None
37
36
 
38
37
  # 10Mb chunk size to save the file
39
38
  _retrieve_chunk_size: Final[int] = 1024 * 1024 * 10
@@ -94,7 +93,7 @@ class ShopifyBulkManager:
94
93
  # how many records should be collected before we use the checkpoining
95
94
  self._job_checkpoint_interval = self.job_checkpoint_interval
96
95
  # define Record Producer instance
97
- self.record_producer: ShopifyBulkRecord = ShopifyBulkRecord(self.query)
96
+ self.record_producer: ShopifyBulkRecord = ShopifyBulkRecord(self.query, self.parent_stream_name, self.parent_stream_cursor)
98
97
 
99
98
  @property
100
99
  def _tools(self) -> BulkTools:
@@ -251,9 +250,9 @@ class ShopifyBulkManager:
251
250
  def _log_state(self, message: Optional[str] = None) -> None:
252
251
  pattern = f"Stream: `{self.http_client.name}`, the BULK Job: `{self._job_id}` is {self._job_state}"
253
252
  if message:
254
- self.logger.info(f"{pattern}. {message}.")
253
+ LOGGER.info(f"{pattern}. {message}.")
255
254
  else:
256
- self.logger.info(pattern)
255
+ LOGGER.info(pattern)
257
256
 
258
257
  def _job_get_result(self, response: Optional[requests.Response] = None) -> Optional[str]:
259
258
  parsed_response = response.json().get("data", {}).get("node", {}) if response else None
@@ -309,13 +308,13 @@ class ShopifyBulkManager:
309
308
  sleep(self._job_check_interval)
310
309
 
311
310
  def _cancel_on_long_running_job(self) -> None:
312
- self.logger.info(
311
+ LOGGER.info(
313
312
  f"Stream: `{self.http_client.name}` the BULK Job: {self._job_id} runs longer than expected ({self._job_max_elapsed_time} sec). Retry with the reduced `Slice Size` after self-cancelation."
314
313
  )
315
314
  self._job_cancel()
316
315
 
317
316
  def _cancel_on_checkpointing(self) -> None:
318
- self.logger.info(f"Stream: `{self.http_client.name}`, checkpointing after >= `{self._job_checkpoint_interval}` rows collected.")
317
+ LOGGER.info(f"Stream: `{self.http_client.name}`, checkpointing after >= `{self._job_checkpoint_interval}` rows collected.")
319
318
  # set the flag to adjust the next slice from the checkpointed cursor value
320
319
  self._job_cancel()
321
320
 
@@ -434,7 +433,7 @@ class ShopifyBulkManager:
434
433
  return True
435
434
  return False
436
435
 
437
- @bulk_retry_on_exception(logger)
436
+ @bulk_retry_on_exception()
438
437
  def _job_check_state(self) -> None:
439
438
  while not self._job_completed():
440
439
  if self._job_canceled():
@@ -444,7 +443,7 @@ class ShopifyBulkManager:
444
443
  else:
445
444
  self._job_track_running()
446
445
 
447
- @bulk_retry_on_exception(logger)
446
+ @bulk_retry_on_exception()
448
447
  def create_job(self, stream_slice: Mapping[str, str], filter_field: str) -> None:
449
448
  if stream_slice:
450
449
  query = self.query.get(filter_field, stream_slice["start"], stream_slice["end"])
@@ -484,7 +483,7 @@ class ShopifyBulkManager:
484
483
  self._job_id = bulk_response.get("id")
485
484
  self._job_created_at = bulk_response.get("createdAt")
486
485
  self._job_state = ShopifyBulkJobStatus.CREATED.value
487
- self.logger.info(f"Stream: `{self.http_client.name}`, the BULK Job: `{self._job_id}` is {ShopifyBulkJobStatus.CREATED.value}")
486
+ LOGGER.info(f"Stream: `{self.http_client.name}`, the BULK Job: `{self._job_id}` is {ShopifyBulkJobStatus.CREATED.value}")
488
487
 
489
488
  def job_size_normalize(self, start: datetime, end: datetime) -> datetime:
490
489
  # adjust slice size when it's bigger than the loop point when it should end,
@@ -522,7 +521,7 @@ class ShopifyBulkManager:
522
521
  final_message = final_message + lines_collected_message
523
522
 
524
523
  # emit final Bulk job status message
525
- self.logger.info(f"{final_message}")
524
+ LOGGER.info(f"{final_message}")
526
525
 
527
526
  def _process_bulk_results(self) -> Iterable[Mapping[str, Any]]:
528
527
  if self._job_result_filename:
@@ -80,17 +80,7 @@ class ShopifyBulkTemplates:
80
80
  @dataclass
81
81
  class ShopifyBulkQuery:
82
82
  config: Mapping[str, Any]
83
- parent_stream_name: Optional[str] = None
84
- parent_stream_cursor: Optional[str] = None
85
-
86
- @property
87
- def has_parent_stream(self) -> bool:
88
- return True if self.parent_stream_name and self.parent_stream_cursor else False
89
-
90
- @property
91
- def parent_cursor_key(self) -> Optional[str]:
92
- if self.has_parent_stream:
93
- return f"{self.parent_stream_name}_{self.parent_stream_cursor}"
83
+ parent_stream_cursor_alias: Optional[str] = None
94
84
 
95
85
  @property
96
86
  def shop_id(self) -> int:
@@ -143,38 +133,12 @@ class ShopifyBulkQuery:
143
133
  """
144
134
  return ["__typename", "id"]
145
135
 
146
- def _inject_parent_cursor_field(self, nodes: List[Field], key: str = "updatedAt", index: int = 2) -> List[Field]:
147
- if self.has_parent_stream:
136
+ def inject_parent_cursor_field(self, nodes: List[Field], key: str = "updatedAt", index: int = 2) -> List[Field]:
137
+ if self.parent_stream_cursor_alias:
148
138
  # inject parent cursor key as alias to the `updatedAt` parent cursor field
149
- nodes.insert(index, Field(name="updatedAt", alias=self.parent_cursor_key))
150
-
139
+ nodes.insert(index, Field(name=key, alias=self.parent_stream_cursor_alias))
151
140
  return nodes
152
141
 
153
- def _add_parent_record_state(self, record: MutableMapping[str, Any], items: List[dict], to_rfc3339: bool = False) -> List[dict]:
154
- """
155
- Adds a parent cursor value to each item in the list.
156
-
157
- This method iterates over a list of dictionaries and adds a new key-value pair to each dictionary.
158
- The key is the value of `self.query_name`, and the value is another dictionary with a single key "updated_at"
159
- and the provided `parent_cursor_value`.
160
-
161
- Args:
162
- items (List[dict]): A list of dictionaries to which the parent cursor value will be added.
163
- parent_cursor_value (str): The value to be set for the "updated_at" key in the nested dictionary.
164
-
165
- Returns:
166
- List[dict]: The modified list of dictionaries with the added parent cursor values.
167
- """
168
-
169
- if self.has_parent_stream:
170
- parent_cursor_value: Optional[str] = record.get(self.parent_cursor_key, None)
171
- parent_state = self.tools._datetime_str_to_rfc3339(parent_cursor_value) if to_rfc3339 and parent_cursor_value else None
172
-
173
- for item in items:
174
- item[self.parent_stream_name] = {self.parent_stream_cursor: parent_state}
175
-
176
- return items
177
-
178
142
  def get(self, filter_field: Optional[str] = None, start: Optional[str] = None, end: Optional[str] = None) -> str:
179
143
  # define filter query string, if passed
180
144
  filter_query = f"{filter_field}:>='{start}' AND {filter_field}:<='{end}'" if filter_field else None
@@ -339,7 +303,7 @@ class Metafield(ShopifyBulkQuery):
339
303
  elif isinstance(self.type.value, str):
340
304
  nodes = [*nodes, metafield_node]
341
305
 
342
- nodes = self._inject_parent_cursor_field(nodes)
306
+ nodes = self.inject_parent_cursor_field(nodes)
343
307
 
344
308
  return nodes
345
309
 
@@ -372,9 +336,6 @@ class Metafield(ShopifyBulkQuery):
372
336
  else:
373
337
  metafields = record_components.get("Metafield", [])
374
338
  if len(metafields) > 0:
375
- if self.has_parent_stream:
376
- # add parent state to each metafield
377
- metafields = self._add_parent_record_state(record, metafields, to_rfc3339=True)
378
339
  yield from self._process_components(metafields)
379
340
 
380
341
 
@@ -637,7 +598,7 @@ class MetafieldProductImage(Metafield):
637
598
  media_node = self.get_edge_node("media", media_fields)
638
599
 
639
600
  fields: List[Field] = ["__typename", "id", media_node]
640
- fields = self._inject_parent_cursor_field(fields)
601
+ fields = self.inject_parent_cursor_field(fields)
641
602
 
642
603
  return fields
643
604
 
@@ -2422,7 +2383,7 @@ class ProductImage(ShopifyBulkQuery):
2422
2383
 
2423
2384
  @property
2424
2385
  def query_nodes(self) -> List[Field]:
2425
- return self._inject_parent_cursor_field(self.nodes)
2386
+ return self.inject_parent_cursor_field(self.nodes)
2426
2387
 
2427
2388
  def _process_component(self, entity: List[dict]) -> List[dict]:
2428
2389
  for item in entity:
@@ -2499,8 +2460,6 @@ class ProductImage(ShopifyBulkQuery):
2499
2460
 
2500
2461
  # add the product_id to each `Image`
2501
2462
  record["images"] = self._add_product_id(record.get("images", []), record.get("id"))
2502
- # add the product cursor to each `Image`
2503
- record["images"] = self._add_parent_record_state(record, record.get("images", []), to_rfc3339=True)
2504
2463
  record["images"] = self._merge_with_media(record_components)
2505
2464
  record.pop("record_components")
2506
2465
 
@@ -0,0 +1,382 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+
6
+ from dataclasses import dataclass, field
7
+ from functools import cached_property
8
+ from io import TextIOWrapper
9
+ from json import loads
10
+ from os import remove
11
+ from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional, Union
12
+
13
+ from source_shopify.utils import LOGGER
14
+
15
+ from .exceptions import ShopifyBulkExceptions
16
+ from .query import ShopifyBulkQuery
17
+ from .tools import END_OF_FILE, BulkTools
18
+
19
+
20
+ @dataclass
21
+ class ShopifyBulkRecord:
22
+ """
23
+ ShopifyBulkRecord is a class designed to handle the processing of bulk records from Shopify's GraphQL API.
24
+
25
+ Attributes:
26
+ query (ShopifyBulkQuery): The query object associated with the bulk record.
27
+ parent_stream_name (Optional[str]): The name of the parent stream, if any.
28
+ parent_stream_cursor (Optional[str]): The cursor of the parent stream, if any.
29
+ buffer (List[MutableMapping[str, Any]]): A buffer to store records before processing.
30
+ composition (Optional[Mapping[str, Any]]): The composition of the record, derived from the query.
31
+ record_process_components (Optional[Callable[[MutableMapping], MutableMapping]]): A callable to process record components.
32
+ components (List[str]): A list of components derived from the record composition.
33
+ _parent_stream_cursor_value (Optional[str | int]): The current value of the parent stream cursor.
34
+ record_composed (int): The count of records composed.
35
+
36
+ Methods:
37
+ __post_init__(): Initializes additional attributes after the object is created.
38
+ tools(): Returns an instance of BulkTools.
39
+ has_parent_stream(): Checks if the record has a parent stream.
40
+ parent_cursor_key(): Returns the key for the parent cursor if a parent stream exists.
41
+ check_type(record, types): Checks if the record's type matches the given type(s).
42
+ _parse_parent_state_value(value): Parses the parent state value and converts it to the appropriate format.
43
+ _set_parent_state_value(value): Sets the parent state value by parsing the provided value and updating the parent stream cursor value.
44
+ _track_parent_cursor(record): Tracks the cursor value from the parent stream if it exists and updates the parent state.
45
+ get_parent_stream_state(): Retrieves the state of the parent stream if it exists.
46
+ record_new(record): Processes a new record by preparing it, removing the "__typename" key, and appending it to the buffer.
47
+ record_new_component(record): Processes a new record by extracting its component type and adding it to the appropriate placeholder in the components list.
48
+ component_prepare(record): Prepares the given record by initializing a "record_components" dictionary.
49
+ buffer_flush(): Flushes the buffer by processing each record in the buffer.
50
+ record_compose(record): Processes a given record and yields buffered records if certain conditions are met.
51
+ process_line(jsonl_file): Processes a JSON Lines (jsonl) file and yields records.
52
+ record_resolve_id(record): Resolves and updates the 'id' field in the given record.
53
+ produce_records(filename): Reads the JSONL content saved from `job.job_retrieve_result()` line-by-line to avoid OOM.
54
+ read_file(filename, remove_file): Reads a file and produces records from it.
55
+ """
56
+
57
+ query: ShopifyBulkQuery
58
+ parent_stream_name: Optional[str] = None
59
+ parent_stream_cursor: Optional[str] = None
60
+
61
+ # default buffer
62
+ buffer: List[MutableMapping[str, Any]] = field(init=False, default_factory=list)
63
+
64
+ def __post_init__(self) -> None:
65
+ self.composition: Optional[Mapping[str, Any]] = self.query.record_composition
66
+ self.record_process_components: Optional[Callable[[MutableMapping], MutableMapping]] = self.query.record_process_components
67
+ self.components: List[str] = self.composition.get("record_components", []) if self.composition else []
68
+ # We track the parent state for BULK substreams outside of the main CDK methods,
69
+ # to be able to update the moving parent state when there are no substream records to emit.
70
+ self._parent_stream_cursor_value: Optional[str | int] = None
71
+ # how many records composed
72
+ self.record_composed: int = 0
73
+
74
+ @cached_property
75
+ def tools(self) -> BulkTools:
76
+ return BulkTools()
77
+
78
+ @cached_property
79
+ def has_parent_stream(self) -> bool:
80
+ return True if self.parent_stream_name and self.parent_stream_cursor else False
81
+
82
+ @cached_property
83
+ def parent_cursor_key(self) -> Optional[str]:
84
+ if self.has_parent_stream:
85
+ return f"{self.parent_stream_name}_{self.parent_stream_cursor}"
86
+
87
+ @staticmethod
88
+ def check_type(record: Mapping[str, Any], types: Union[List[str], str]) -> bool:
89
+ """
90
+ Check if the record's type matches the given type(s).
91
+
92
+ Args:
93
+ record (Mapping[str, Any]): The record to check, expected to have a "__typename" key.
94
+ types (Union[List[str], str]): The type(s) to check against. Can be a single type (str) or a list of types (List[str]).
95
+
96
+ Returns:
97
+ bool: True if the record's type matches one of the given types, False otherwise.
98
+ """
99
+
100
+ record_type = record.get("__typename")
101
+ if isinstance(types, list):
102
+ return any(record_type == t for t in types)
103
+ else:
104
+ return record_type == types
105
+
106
+ def _parse_parent_state_value(self, value: str | int) -> str | int:
107
+ """
108
+ Parses the parent state value and converts it to the appropriate format.
109
+
110
+ If the value is a string, it converts it to RFC 3339 datetime format using the `_datetime_str_to_rfc3339` method.
111
+ If the value is an integer, it returns the value as is.
112
+
113
+ Args:
114
+ value (str | int): The parent state value to be parsed.
115
+
116
+ Returns:
117
+ str | int: The parsed parent state value in the appropriate format.
118
+ """
119
+
120
+ if isinstance(value, str):
121
+ return self.tools._datetime_str_to_rfc3339(value)
122
+ elif isinstance(value, int):
123
+ return value
124
+
125
+ def _set_parent_state_value(self, value: str | int) -> None:
126
+ """
127
+ Sets the parent state value by parsing the provided value and updating the
128
+ parent stream cursor value. If the parent stream cursor value is already set,
129
+ it updates it to the maximum of the current and parsed values.
130
+
131
+ Args:
132
+ value (str | int): The value to be parsed and set as the parent state value.
133
+ """
134
+
135
+ parsed_value = self._parse_parent_state_value(value)
136
+ if not self._parent_stream_cursor_value:
137
+ self._parent_stream_cursor_value = parsed_value
138
+ else:
139
+ self._parent_stream_cursor_value = max(self._parent_stream_cursor_value, parsed_value)
140
+
141
+ def _track_parent_cursor(self, record: MutableMapping[str, Any]) -> None:
142
+ """
143
+ Tracks the cursor value from the parent stream if it exists and updates the parent state.
144
+
145
+ Args:
146
+ record (MutableMapping[str, Any]): The record from which to extract the parent cursor value.
147
+
148
+ Returns:
149
+ None
150
+ """
151
+
152
+ if self.has_parent_stream:
153
+ cursor_value: Optional[str | int] = record.get(self.parent_cursor_key, None)
154
+ if cursor_value:
155
+ self._set_parent_state_value(cursor_value)
156
+
157
+ def get_parent_stream_state(self) -> Optional[Union[str, Mapping[str, Any]]]:
158
+ """
159
+ Retrieve the state of the parent stream if it exists.
160
+
161
+ Returns:
162
+ Optional[Union[str, Mapping[str, Any]]]: A dictionary containing the parent stream cursor and its value
163
+ if the parent stream exists and has a cursor value, otherwise None.
164
+ """
165
+
166
+ if self.has_parent_stream and self._parent_stream_cursor_value:
167
+ return {self.parent_stream_cursor: self._parent_stream_cursor_value}
168
+
169
+ def record_new(self, record: MutableMapping[str, Any]) -> None:
170
+ """
171
+ Processes a new record by preparing it, removing the "__typename" key, and appending it to the buffer.
172
+
173
+ Args:
174
+ record (MutableMapping[str, Any]): The record to be processed.
175
+ """
176
+
177
+ record = self.component_prepare(record)
178
+ record.pop("__typename")
179
+ self.buffer.append(record)
180
+
181
+ def record_new_component(self, record: MutableMapping[str, Any]) -> None:
182
+ """
183
+ Processes a new record by extracting its component type and adding it to the appropriate
184
+ placeholder in the components list.
185
+
186
+ Args:
187
+ record (MutableMapping[str, Any]): The record to be processed.
188
+ It is expected to contain a "__typename" key which indicates the component type.
189
+ """
190
+
191
+ component = record.get("__typename")
192
+ record.pop("__typename")
193
+ # add component to its placeholder in the components list
194
+ self.buffer[-1]["record_components"][component].append(record)
195
+
196
+ def component_prepare(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
197
+ """
198
+ Prepares the given record by initializing a "record_components" dictionary.
199
+
200
+ If the instance has components, this method will add a "record_components" key to the record,
201
+ with each component as a key and an empty list as its value.
202
+
203
+ Args:
204
+ record (MutableMapping[str, Any]): The record to be prepared.
205
+
206
+ Returns:
207
+ MutableMapping[str, Any]: The updated record with initialized "record_components".
208
+ """
209
+
210
+ if self.components:
211
+ record["record_components"] = {}
212
+ for component in self.components:
213
+ record["record_components"][component] = []
214
+ return record
215
+
216
+ def buffer_flush(self) -> Iterable[Mapping[str, Any]]:
217
+ """
218
+ Flushes the buffer by processing each record in the buffer.
219
+
220
+ For each record in the buffer:
221
+ - Tracks the parent state using `_track_parent_cursor`.
222
+ - Resolves the record ID from `str` to `int` using `record_resolve_id`.
223
+ - Processes record components using `record_process_components`.
224
+
225
+ Yields:
226
+ Iterable[Mapping[str, Any]]: Processed records from the buffer.
227
+
228
+ After processing, the buffer is cleared.
229
+ """
230
+
231
+ if len(self.buffer) > 0:
232
+ for record in self.buffer:
233
+ # track the parent state
234
+ self._track_parent_cursor(record)
235
+ # resolve id from `str` to `int`
236
+ record = self.record_resolve_id(record)
237
+ # process record components
238
+ yield from self.record_process_components(record)
239
+ # clean the buffer
240
+ self.buffer.clear()
241
+
242
+ def record_compose(self, record: Mapping[str, Any]) -> Optional[Iterable[MutableMapping[str, Any]]]:
243
+ """
244
+ Processes a given record and yields buffered records if certain conditions are met.
245
+
246
+ Args:
247
+ record (Mapping[str, Any]): The record to be processed.
248
+
249
+ Returns:
250
+ Optional[Iterable[MutableMapping[str, Any]]]: An iterable of buffered records if conditions are met, otherwise None.
251
+
252
+ The method performs the following steps:
253
+ 1. Checks if the record matches the type specified in the "new_record" composition.
254
+ - If it matches, it yields any buffered records from previous iterations and registers the new record.
255
+ 2. Checks if the record matches any of the specified components.
256
+ - If it matches, it registers the new component record.
257
+
258
+ Step 1: register the new record by it's `__typename`
259
+ Step 2: check for `components` by their `__typename` and add to the placeholder
260
+ Step 3: repeat until the `<END_OF_FILE>`.
261
+ """
262
+
263
+ if self.check_type(record, self.composition.get("new_record")):
264
+ # emit from previous iteration, if present
265
+ yield from self.buffer_flush()
266
+ # register the record
267
+ self.record_new(record)
268
+ # components check
269
+ elif self.check_type(record, self.components):
270
+ self.record_new_component(record)
271
+
272
+ def process_line(self, jsonl_file: TextIOWrapper) -> Iterable[MutableMapping[str, Any]]:
273
+ """
274
+ Processes a JSON Lines (jsonl) file and yields records.
275
+
276
+ Args:
277
+ jsonl_file (TextIOWrapper): A file-like object containing JSON Lines data.
278
+
279
+ Yields:
280
+ Iterable[MutableMapping[str, Any]]: An iterable of dictionaries representing the processed records.
281
+
282
+ The method reads each line from the provided jsonl_file. It exits the loop when it encounters the <end_of_file> marker.
283
+ For non-empty lines, it parses the JSON content and yields the resulting records. Finally, it emits any remaining
284
+ records in the buffer.
285
+ """
286
+
287
+ for line in jsonl_file:
288
+ if line == END_OF_FILE:
289
+ break
290
+ elif line != "":
291
+ yield from self.record_compose(loads(line))
292
+
293
+ # emit what's left in the buffer, typically last record
294
+ yield from self.buffer_flush()
295
+
296
+ def record_resolve_id(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
297
+ """
298
+ Resolves and updates the 'id' field in the given record.
299
+
300
+ This method extracts the 'id' from the record, checks if it is a string,
301
+ and if so, assigns it to 'admin_graphql_api_id' in the record. It then
302
+ resolves the string 'id' to an integer using the 'resolve_str_id' method
303
+ from the 'tools' attribute and updates the 'id' field in the record.
304
+
305
+ Args:
306
+ record (MutableMapping[str, Any]): The record containing the 'id' field to be resolved.
307
+ Example:
308
+ { "Id": "gid://shopify/Order/19435458986123"}
309
+
310
+ Returns:
311
+ MutableMapping[str, Any]: The updated record with the resolved 'id' field.
312
+ Example:
313
+ { "id": 19435458986123, "admin_graphql_api_id": "gid://shopify/Order/19435458986123"}
314
+ """
315
+
316
+ id = record.get("id")
317
+ if id and isinstance(id, str):
318
+ record["admin_graphql_api_id"] = id
319
+ record["id"] = self.tools.resolve_str_id(id)
320
+ return record
321
+
322
+ def produce_records(self, filename: str) -> Iterable[MutableMapping[str, Any]]:
323
+ """
324
+ Produce records from a JSON Lines (jsonl) file.
325
+
326
+ This method reads a JSON Lines file, processes each line, converts the field names to snake_case,
327
+ and yields each processed record. It also keeps track of the number of records processed.
328
+
329
+ Args:
330
+ filename (str): The path to the JSON Lines file.
331
+
332
+ Yields:
333
+ MutableMapping[str, Any]: A dictionary representing a processed record with field names in snake_case.
334
+ """
335
+
336
+ with open(filename, "r") as jsonl_file:
337
+ # reset the counter
338
+ self.record_composed = 0
339
+
340
+ for record in self.process_line(jsonl_file):
341
+ yield self.tools.fields_names_to_snake_case(record)
342
+ self.record_composed += 1
343
+
344
+ def read_file(self, filename: str, remove_file: Optional[bool] = True) -> Iterable[Mapping[str, Any]]:
345
+ """
346
+ Read the JSONL content saved from `job.job_retrieve_result()` line-by-line to avoid OOM.
347
+
348
+ Args:
349
+ filename (str): The name of the file to read.
350
+ remove_file (Optional[bool]): Flag indicating whether to remove the file after reading. Defaults to True.
351
+
352
+ Example:
353
+ Note: typically the `filename` is taken from the `result_url` string provided in the response.
354
+
355
+ `bulk-4039263649981.jsonl` :
356
+ - the `4039263649981` is the `id` of the COMPLETED BULK Jobw with `result_url`
357
+
358
+ Yields:
359
+ Iterable[Mapping[str, Any]]: An iterable of records produced from the file.
360
+
361
+ Raises:
362
+ ShopifyBulkExceptions.BulkRecordProduceError: If an error occurs while producing records from the file.
363
+
364
+ Logs:
365
+ Logs an info message if the file removal fails.
366
+ """
367
+
368
+ try:
369
+ # produce records from saved result
370
+ yield from self.produce_records(filename)
371
+ except Exception as e:
372
+ raise ShopifyBulkExceptions.BulkRecordProduceError(
373
+ f"An error occured while producing records from BULK Job result. Trace: {repr(e)}.",
374
+ )
375
+ finally:
376
+ # removing the tmp file, if requested
377
+ if remove_file and filename:
378
+ try:
379
+ remove(filename)
380
+ except Exception as e:
381
+ LOGGER.info(f"Failed to remove the `tmp job result` file, the file doen't exist. Details: {repr(e)}.")
382
+ pass
@@ -1,10 +1,11 @@
1
1
  # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
2
 
3
- import logging
4
3
  from functools import wraps
5
4
  from time import sleep
6
5
  from typing import Any, Callable, Final, Optional, Tuple, Type
7
6
 
7
+ from source_shopify.utils import LOGGER
8
+
8
9
  from .exceptions import ShopifyBulkExceptions
9
10
 
10
11
  BULK_RETRY_ERRORS: Final[Tuple] = (
@@ -13,11 +14,10 @@ BULK_RETRY_ERRORS: Final[Tuple] = (
13
14
  )
14
15
 
15
16
 
16
- def bulk_retry_on_exception(logger: logging.Logger, more_exceptions: Optional[Tuple[Type[Exception], ...]] = None) -> Callable:
17
+ def bulk_retry_on_exception(more_exceptions: Optional[Tuple[Type[Exception], ...]] = None) -> Callable:
17
18
  """
18
19
  A decorator to retry a function when specified exceptions are raised.
19
20
 
20
- :param logger: Number of times to retry.
21
21
  :param more_exceptions: A tuple of exception types to catch.
22
22
  """
23
23
 
@@ -31,26 +31,26 @@ def bulk_retry_on_exception(logger: logging.Logger, more_exceptions: Optional[Tu
31
31
  except BULK_RETRY_ERRORS or more_exceptions as ex:
32
32
  current_retries += 1
33
33
  if current_retries > self._job_max_retries:
34
- logger.error("Exceeded retry limit. Giving up.")
34
+ LOGGER.error("Exceeded retry limit. Giving up.")
35
35
  raise
36
36
  else:
37
- logger.warning(
37
+ LOGGER.warning(
38
38
  f"Stream `{self.http_client.name}`: {ex}. Retrying {current_retries}/{self._job_max_retries} after {self._job_backoff_time} seconds."
39
39
  )
40
40
  sleep(self._job_backoff_time)
41
41
  except ShopifyBulkExceptions.BulkJobCreationFailedConcurrentError:
42
42
  if self._concurrent_attempt == self._concurrent_max_retry:
43
43
  message = f"The BULK Job couldn't be created at this time, since another job is running."
44
- logger.error(message)
44
+ LOGGER.error(message)
45
45
  raise ShopifyBulkExceptions.BulkJobConcurrentError(message)
46
46
 
47
47
  self._concurrent_attempt += 1
48
- logger.warning(
48
+ LOGGER.warning(
49
49
  f"Stream: `{self.http_client.name}`, the BULK concurrency limit has reached. Waiting {self._concurrent_interval} sec before retry, attempt: {self._concurrent_attempt}.",
50
50
  )
51
51
  sleep(self._concurrent_interval)
52
52
  except ShopifyBulkExceptions.BulkJobRedirectToOtherShopError:
53
- logger.warning(
53
+ LOGGER.warning(
54
54
  f"Stream: `{self.http_client.name}`, the `shop name` differs from the provided in `input configuration`. Switching to the `{self._tools.shop_name_from_url(self.base_url)}`.",
55
55
  )
56
56
 
@@ -644,12 +644,14 @@ class IncrementalShopifyGraphQlBulkStream(IncrementalShopifyStream):
644
644
  self.job_manager: ShopifyBulkManager = ShopifyBulkManager(
645
645
  http_client=self.bulk_http_client,
646
646
  base_url=f"{self.url_base}{self.path()}",
647
- query=self.bulk_query(config, self.parent_stream_name, self.parent_stream_cursor),
647
+ query=self.bulk_query(config, self.parent_stream_query_cursor_alias),
648
648
  job_termination_threshold=float(config.get("job_termination_threshold", 3600)),
649
649
  # overide the default job slice size, if provided (it's auto-adjusted, later on)
650
650
  job_size=config.get("bulk_window_in_days", 30.0),
651
651
  # provide the job checkpoint interval value, default value is 200k lines collected
652
652
  job_checkpoint_interval=config.get("job_checkpoint_interval", 200_000),
653
+ parent_stream_name=self.parent_stream_name,
654
+ parent_stream_cursor=self.parent_stream_cursor,
653
655
  )
654
656
 
655
657
  @property
@@ -670,20 +672,25 @@ class IncrementalShopifyGraphQlBulkStream(IncrementalShopifyStream):
670
672
  """
671
673
  return self.parent_stream_class(self.config) if self.parent_stream_class else None
672
674
 
673
- @property
675
+ @cached_property
674
676
  def parent_stream_name(self) -> Optional[str]:
675
677
  """
676
678
  Returns the parent stream name, if the substream has a `parent_stream_class` dependency.
677
679
  """
678
680
  return self.parent_stream.name if self.parent_stream_class else None
679
681
 
680
- @property
682
+ @cached_property
681
683
  def parent_stream_cursor(self) -> Optional[str]:
682
684
  """
683
685
  Returns the parent stream cursor, if the substream has a `parent_stream_class` dependency.
684
686
  """
685
687
  return self.parent_stream.cursor_field if self.parent_stream_class else None
686
688
 
689
+ @cached_property
690
+ def parent_stream_query_cursor_alias(self) -> Optional[str]:
691
+ if self.parent_stream_name and self.parent_stream_cursor:
692
+ return f"{self.parent_stream_name}_{self.parent_stream_cursor}"
693
+
687
694
  @property
688
695
  @abstractmethod
689
696
  def bulk_query(self) -> ShopifyBulkQuery:
@@ -713,7 +720,9 @@ class IncrementalShopifyGraphQlBulkStream(IncrementalShopifyStream):
713
720
  return None
714
721
 
715
722
  def get_updated_state(
716
- self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]
723
+ self,
724
+ current_stream_state: MutableMapping[str, Any],
725
+ latest_record: Mapping[str, Any],
717
726
  ) -> MutableMapping[str, Any]:
718
727
  """UPDATING THE STATE OBJECT:
719
728
  Stream: CustomerAddress
@@ -728,29 +737,40 @@ class IncrementalShopifyGraphQlBulkStream(IncrementalShopifyStream):
728
737
  }
729
738
  }
730
739
  """
740
+
731
741
  updated_state = super().get_updated_state(current_stream_state, latest_record)
742
+
732
743
  if self.parent_stream_class:
733
- parent_state = latest_record.get(self.parent_stream.name, {})
734
- parent_state_value = (
735
- parent_state.get(self.parent_stream.cursor_field) if parent_state else latest_record.get(self.parent_stream.cursor_field)
736
- )
744
+ # the default way of getting the parent stream state is to use the value from the RecordProducer,
745
+ # since the parent record could be present but no substream's records are present to emit,
746
+ # the parent state is tracked for each parent record processed, thus updated regardless having substream records.
747
+ tracked_parent_state = self.job_manager.record_producer.get_parent_stream_state()
748
+ # fallback to the record level to search for the parent cursor or use the stream cursor value
749
+ parent_state = tracked_parent_state if tracked_parent_state else self._get_parent_state_from_record(latest_record)
737
750
  # add parent_stream_state to `updated_state`
738
- updated_state[self.parent_stream.name] = {self.parent_stream.cursor_field: parent_state_value}
751
+ updated_state[self.parent_stream_name] = parent_state
752
+
739
753
  return updated_state
740
754
 
755
+ def _get_parent_state_from_record(self, latest_record: Mapping[str, Any]) -> MutableMapping[str, Any]:
756
+ parent_state = latest_record.get(self.parent_stream_name, {})
757
+ parent_state_value = parent_state.get(self.parent_stream_cursor) if parent_state else latest_record.get(self.parent_stream_cursor)
758
+ parent_state[self.parent_stream_cursor] = parent_state_value
759
+ return parent_state
760
+
741
761
  def _get_stream_cursor_value(self, stream_state: Optional[Mapping[str, Any]] = None) -> Optional[str]:
742
762
  if stream_state:
743
763
  return stream_state.get(self.cursor_field, self.default_state_comparison_value)
744
764
  else:
745
765
  return self.config.get("start_date")
746
766
 
747
- def get_stream_state_value(self, stream_state: Optional[Mapping[str, Any]] = None) -> Optional[str]:
767
+ def _get_stream_state_value(self, stream_state: Optional[Mapping[str, Any]] = None) -> Optional[str]:
748
768
  if stream_state:
749
769
  if self.parent_stream_class:
750
770
  # get parent stream state from the stream_state object.
751
- parent_state = stream_state.get(self.parent_stream.name, {})
771
+ parent_state = stream_state.get(self.parent_stream_name, {})
752
772
  if parent_state:
753
- return parent_state.get(self.parent_stream.cursor_field, self.default_state_comparison_value)
773
+ return parent_state.get(self.parent_stream_cursor, self.default_state_comparison_value)
754
774
  else:
755
775
  # use the streams cursor value, if no parent state available
756
776
  return self._get_stream_cursor_value(stream_state)
@@ -760,9 +780,9 @@ class IncrementalShopifyGraphQlBulkStream(IncrementalShopifyStream):
760
780
  else:
761
781
  return self.config.get("start_date")
762
782
 
763
- def get_state_value(self, stream_state: Optional[Mapping[str, Any]] = None) -> Optional[Union[str, int]]:
783
+ def _get_state_value(self, stream_state: Optional[Mapping[str, Any]] = None) -> Optional[Union[str, int]]:
764
784
  if stream_state:
765
- return self.get_stream_state_value(stream_state)
785
+ return self._get_stream_state_value(stream_state)
766
786
  else:
767
787
  # for majority of cases we fallback to start_date, otherwise.
768
788
  return self.config.get("start_date")
@@ -785,7 +805,7 @@ class IncrementalShopifyGraphQlBulkStream(IncrementalShopifyStream):
785
805
  @stream_state_cache.cache_stream_state
786
806
  def stream_slices(self, stream_state: Optional[Mapping[str, Any]] = None, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
787
807
  if self.filter_field:
788
- state = self.get_state_value(stream_state)
808
+ state = self._get_state_value(stream_state)
789
809
  start = pdm.parse(state)
790
810
  end = pdm.now()
791
811
  while start < end:
@@ -7,13 +7,16 @@ import enum
7
7
  import logging
8
8
  from functools import wraps
9
9
  from time import sleep
10
- from typing import Any, Callable, Dict, List, Mapping, Optional
10
+ from typing import Any, Callable, Dict, Final, List, Mapping, Optional
11
11
 
12
12
  import requests
13
13
  from airbyte_cdk.models import FailureType
14
14
  from airbyte_cdk.sources.streams.http.error_handlers.response_models import ErrorResolution, ResponseAction
15
15
  from airbyte_cdk.utils import AirbyteTracedException
16
16
 
17
+ # default logger instance
18
+ LOGGER: Final[logging.Logger] = logging.getLogger("airbyte")
19
+
17
20
 
18
21
  class ShopifyNonRetryableErrors:
19
22
  """Holds the errors classification and messaging scenarios."""
@@ -112,8 +115,6 @@ class ShopifyRateLimiter:
112
115
  on_mid_load: float = 1.5
113
116
  on_high_load: float = 5.0
114
117
 
115
- logger = logging.getLogger("airbyte")
116
-
117
118
  log_message_count = 0
118
119
  log_message_frequency = 3
119
120
 
@@ -124,7 +125,7 @@ class ShopifyRateLimiter:
124
125
  if ShopifyRateLimiter.log_message_count < ShopifyRateLimiter.log_message_frequency:
125
126
  ShopifyRateLimiter.log_message_count += 1
126
127
  else:
127
- ShopifyRateLimiter.logger.info(message)
128
+ LOGGER.info(message)
128
129
  ShopifyRateLimiter.log_message_count = 0
129
130
 
130
131
  def get_response_from_args(*args) -> Optional[requests.Response]:
@@ -138,8 +139,8 @@ class ShopifyRateLimiter:
138
139
  Define wait_time based on load conditions.
139
140
 
140
141
  :: load - represents how close we are to being throttled
141
- - 0.5 is half way through our allowance
142
- - 1 indicates that all of the allowance is used and the api will start rejecting calls
142
+ - 0.5 is half way through our allowance
143
+ - 1 indicates that all of the allowance is used and the api will start rejecting calls
143
144
  :: threshold - is the % cutoff for the rate_limits/load
144
145
  :: wait_time - time to wait between each request in seconds
145
146
 
@@ -1,157 +0,0 @@
1
- #
2
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
- #
4
-
5
-
6
- import logging
7
- from dataclasses import dataclass, field
8
- from io import TextIOWrapper
9
- from json import loads
10
- from os import remove
11
- from typing import Any, Callable, Final, Iterable, List, Mapping, MutableMapping, Optional, Union
12
-
13
- from .exceptions import ShopifyBulkExceptions
14
- from .query import ShopifyBulkQuery
15
- from .tools import END_OF_FILE, BulkTools
16
-
17
-
18
- @dataclass
19
- class ShopifyBulkRecord:
20
- query: ShopifyBulkQuery
21
-
22
- # default buffer
23
- buffer: List[MutableMapping[str, Any]] = field(init=False, default_factory=list)
24
-
25
- # default logger
26
- logger: Final[logging.Logger] = logging.getLogger("airbyte")
27
-
28
- def __post_init__(self) -> None:
29
- self.composition: Optional[Mapping[str, Any]] = self.query.record_composition
30
- self.record_process_components: Optional[Callable[[MutableMapping], MutableMapping]] = self.query.record_process_components
31
- self.components: List[str] = self.composition.get("record_components", []) if self.composition else []
32
- # how many records composed
33
- self.record_composed: int = 0
34
-
35
- @property
36
- def tools(self) -> BulkTools:
37
- return BulkTools()
38
-
39
- @staticmethod
40
- def check_type(record: Mapping[str, Any], types: Union[List[str], str]) -> bool:
41
- record_type = record.get("__typename")
42
- if isinstance(types, list):
43
- return any(record_type == t for t in types)
44
- else:
45
- return record_type == types
46
-
47
- def record_new(self, record: MutableMapping[str, Any]) -> None:
48
- record = self.component_prepare(record)
49
- record.pop("__typename")
50
- self.buffer.append(record)
51
-
52
- def record_new_component(self, record: MutableMapping[str, Any]) -> None:
53
- component = record.get("__typename")
54
- record.pop("__typename")
55
- # add component to its placeholder in the components list
56
- self.buffer[-1]["record_components"][component].append(record)
57
-
58
- def component_prepare(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
59
- if self.components:
60
- record["record_components"] = {}
61
- for component in self.components:
62
- record["record_components"][component] = []
63
- return record
64
-
65
- def buffer_flush(self) -> Iterable[Mapping[str, Any]]:
66
- if len(self.buffer) > 0:
67
- for record in self.buffer:
68
- # resolve id from `str` to `int`
69
- record = self.record_resolve_id(record)
70
- # process record components
71
- yield from self.record_process_components(record)
72
- # clean the buffer
73
- self.buffer.clear()
74
-
75
- def record_compose(self, record: Mapping[str, Any]) -> Optional[Iterable[MutableMapping[str, Any]]]:
76
- """
77
- Step 1: register the new record by it's `__typename`
78
- Step 2: check for `components` by their `__typename` and add to the placeholder
79
- Step 3: repeat until the `<END_OF_FILE>`.
80
- """
81
- if self.check_type(record, self.composition.get("new_record")):
82
- # emit from previous iteration, if present
83
- yield from self.buffer_flush()
84
- # register the record
85
- self.record_new(record)
86
- # components check
87
- elif self.check_type(record, self.components):
88
- self.record_new_component(record)
89
-
90
- def process_line(self, jsonl_file: TextIOWrapper) -> Iterable[MutableMapping[str, Any]]:
91
- # process the json lines
92
- for line in jsonl_file:
93
- # we exit from the loop when receive <end_of_file> (file ends)
94
- if line == END_OF_FILE:
95
- break
96
- elif line != "":
97
- yield from self.record_compose(loads(line))
98
-
99
- # emit what's left in the buffer, typically last record
100
- yield from self.buffer_flush()
101
-
102
- def record_resolve_id(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
103
- """
104
- The ids are fetched in the format of: " gid://shopify/Order/<Id> "
105
- Input:
106
- { "Id": "gid://shopify/Order/19435458986123"}
107
- We need to extract the actual id from the string instead.
108
- Output:
109
- { "id": 19435458986123, "admin_graphql_api_id": "gid://shopify/Order/19435458986123"}
110
- """
111
- # save the actual api id to the `admin_graphql_api_id`
112
- # while resolving the `id` in `record_resolve_id`,
113
- # we re-assign the original id like `"gid://shopify/Order/19435458986123"`,
114
- # into `admin_graphql_api_id` have the ability to identify the record oigin correctly in subsequent actions.
115
- # IF NOT `id` field is provided by the query results, we should return composed record `as is`.
116
- id = record.get("id")
117
- if id and isinstance(id, str):
118
- record["admin_graphql_api_id"] = id
119
- # extracting the int(id) and reassign
120
- record["id"] = self.tools.resolve_str_id(id)
121
- return record
122
-
123
- def produce_records(self, filename: str) -> Iterable[MutableMapping[str, Any]]:
124
- """
125
- Read the JSONL content saved from `job.job_retrieve_result()` line-by-line to avoid OOM.
126
- The filename example: `bulk-4039263649981.jsonl`,
127
- where `4039263649981` is the `id` of the COMPLETED BULK Jobw with `result_url`.
128
- Note: typically the `filename` is taken from the `result_url` string provided in the response.
129
- """
130
-
131
- with open(filename, "r") as jsonl_file:
132
- # reset the counter
133
- self.record_composed = 0
134
-
135
- for record in self.process_line(jsonl_file):
136
- yield self.tools.fields_names_to_snake_case(record)
137
- self.record_composed += 1
138
-
139
- def read_file(self, filename: str, remove_file: Optional[bool] = True) -> Iterable[Mapping[str, Any]]:
140
- try:
141
- # produce records from saved result
142
- yield from self.produce_records(filename)
143
- except Exception as e:
144
- raise ShopifyBulkExceptions.BulkRecordProduceError(
145
- f"An error occured while producing records from BULK Job result. Trace: {repr(e)}.",
146
- )
147
- finally:
148
- # removing the tmp file, if requested
149
- if remove_file and filename:
150
- try:
151
- remove(filename)
152
- except Exception as e:
153
- self.logger.info(f"Failed to remove the `tmp job result` file, the file doen't exist. Details: {repr(e)}.")
154
- # we should pass here, if the file wasn't removed , it's either:
155
- # - doesn't exist
156
- # - will be dropped with the container shut down.
157
- pass