ingestr 0.7.8__py3-none-any.whl → 0.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

@@ -14,6 +14,53 @@ from .settings import DEFAULT_API_VERSION, DEFAULT_PARTNER_API_VERSION
14
14
  TOrderStatus = Literal["open", "closed", "cancelled", "any"]
15
15
 
16
16
 
17
+ def convert_datetime_fields(item: Dict[str, Any]) -> Dict[str, Any]:
18
+ """Convert timestamp fields in the item to pendulum datetime objects
19
+
20
+ The item is modified in place, including nested items.
21
+
22
+ Args:
23
+ item: The item to convert
24
+
25
+ Returns:
26
+ The same data item (for convenience)
27
+ """
28
+ fields = ["created_at", "updated_at", "createdAt", "updatedAt"]
29
+
30
+ def convert_nested(obj: Any) -> Any:
31
+ if isinstance(obj, dict):
32
+ for key, value in obj.items():
33
+ if key in fields and isinstance(value, str):
34
+ obj[key] = ensure_pendulum_datetime(value)
35
+ else:
36
+ obj[key] = convert_nested(value)
37
+ elif isinstance(obj, list):
38
+ return [convert_nested(elem) for elem in obj]
39
+ return obj
40
+
41
+ return convert_nested(item)
42
+
43
+
44
+ def remove_nodes_key(item: Any) -> Any:
45
+ """
46
+ Recursively remove the 'nodes' key from dictionaries if it's the only key and its value is an array.
47
+
48
+ Args:
49
+ item: The item to process (can be a dict, list, or any other type)
50
+
51
+ Returns:
52
+ The processed item
53
+ """
54
+ if isinstance(item, dict):
55
+ if len(item) == 1 and "nodes" in item and isinstance(item["nodes"], list):
56
+ return [remove_nodes_key(node) for node in item["nodes"]]
57
+ return {k: remove_nodes_key(v) for k, v in item.items()}
58
+ elif isinstance(item, list):
59
+ return [remove_nodes_key(element) for element in item]
60
+ else:
61
+ return item
62
+
63
+
17
64
  class ShopifyApi:
18
65
  """
19
66
  A Shopify API client that can be used to get pages of data from Shopify.
@@ -50,57 +97,38 @@ class ShopifyApi:
50
97
  """
51
98
  url = urljoin(self.shop_url, f"/admin/api/{self.api_version}/{resource}.json")
52
99
 
100
+ resource_last = resource.split("/")[-1]
101
+
53
102
  headers = {"X-Shopify-Access-Token": self.private_app_password}
54
103
  while url:
55
104
  response = requests.get(url, params=params, headers=headers)
56
105
  response.raise_for_status()
57
106
  json = response.json()
58
- # Get item list from the page
59
- yield [self._convert_datetime_fields(item) for item in json[resource]]
107
+ yield [convert_datetime_fields(item) for item in json[resource_last]]
60
108
  url = response.links.get("next", {}).get("url")
61
109
  # Query params are included in subsequent page URLs
62
110
  params = None
63
111
 
64
- def _convert_datetime_fields(self, item: Dict[str, Any]) -> Dict[str, Any]:
65
- """Convert timestamp fields in the item to pendulum datetime objects
66
-
67
- The item is modified in place.
68
112
 
69
- Args:
70
- item: The item to convert
71
-
72
- Returns:
73
- The same data item (for convenience)
74
- """
75
- fields = ["created_at", "updated_at"]
76
- for field in fields:
77
- if field in item:
78
- item[field] = ensure_pendulum_datetime(item[field])
79
- return item
80
-
81
-
82
- class ShopifyPartnerApi:
83
- """Client for Shopify Partner grapql API"""
113
+ class ShopifyGraphQLApi:
114
+ """Client for Shopify GraphQL API"""
84
115
 
85
116
  def __init__(
86
117
  self,
87
118
  access_token: str,
88
- organization_id: str,
89
119
  api_version: str = DEFAULT_PARTNER_API_VERSION,
120
+ base_url: str = "partners.shopify.com",
90
121
  ) -> None:
91
- """
92
- Args:
93
- access_token: The access token to use
94
- organization_id: The organization id to query
95
- api_version: The API version to use (e.g. 2023-01)
96
- """
97
122
  self.access_token = access_token
98
- self.organization_id = organization_id
99
123
  self.api_version = api_version
124
+ self.base_url = base_url
100
125
 
101
126
  @property
102
127
  def graphql_url(self) -> str:
103
- return f"https://partners.shopify.com/{self.organization_id}/api/{self.api_version}/graphql.json"
128
+ if self.base_url.startswith("https://"):
129
+ return f"{self.base_url}/admin/api/{self.api_version}/graphql.json"
130
+
131
+ return f"https://{self.base_url}/admin/api/{self.api_version}/graphql.json"
104
132
 
105
133
  def run_graphql_query(
106
134
  self, query: str, variables: Optional[DictStrAny] = None
@@ -130,18 +158,31 @@ class ShopifyPartnerApi:
130
158
  query: str,
131
159
  data_items_path: jsonpath.TJsonPath,
132
160
  pagination_cursor_path: jsonpath.TJsonPath,
161
+ pagination_cursor_has_next_page_path: jsonpath.TJsonPath,
133
162
  pagination_variable_name: str,
134
163
  variables: Optional[DictStrAny] = None,
135
164
  ) -> Iterable[TDataItems]:
136
165
  variables = dict(variables or {})
137
166
  while True:
138
167
  data = self.run_graphql_query(query, variables)
139
- print(data)
140
168
  data_items = jsonpath.find_values(data_items_path, data)
169
+
141
170
  if not data_items:
142
171
  break
143
- yield data_items
172
+
173
+ yield [
174
+ remove_nodes_key(convert_datetime_fields(item)) for item in data_items
175
+ ]
176
+
144
177
  cursors = jsonpath.find_values(pagination_cursor_path, data)
145
178
  if not cursors:
146
179
  break
180
+
181
+ if pagination_cursor_has_next_page_path:
182
+ has_next_page = jsonpath.find_values(
183
+ pagination_cursor_has_next_page_path, data
184
+ )
185
+ if not has_next_page or not has_next_page[0]:
186
+ break
187
+
147
188
  variables[pagination_variable_name] = cursors[-1]
ingestr/src/sources.py CHANGED
@@ -1,13 +1,15 @@
1
1
  import base64
2
2
  import csv
3
3
  import json
4
- from datetime import date
4
+ from datetime import date, datetime
5
5
  from typing import Any, Callable, Optional
6
6
  from urllib.parse import parse_qs, urlparse
7
7
 
8
8
  import dlt
9
9
 
10
+ from ingestr.src.adjust._init_ import adjust_source
10
11
  from ingestr.src.airtable import airtable_source
12
+ from ingestr.src.appsflyer._init_ import appsflyer_source
11
13
  from ingestr.src.chess import source
12
14
  from ingestr.src.facebook_ads import facebook_ads_source, facebook_insights_source
13
15
  from ingestr.src.google_sheets import google_spreadsheet
@@ -188,11 +190,6 @@ class ShopifySource:
188
190
  return True
189
191
 
190
192
  def dlt_source(self, uri: str, table: str, **kwargs):
191
- if kwargs.get("incremental_key"):
192
- raise ValueError(
193
- "Shopify takes care of incrementality on its own, you should not provide incremental_key"
194
- )
195
-
196
193
  source_fields = urlparse(uri)
197
194
  source_params = parse_qs(source_fields.query)
198
195
  api_key = source_params.get("api_key")
@@ -207,7 +204,19 @@ class ShopifySource:
207
204
  date_args["end_date"] = kwargs.get("interval_end")
208
205
 
209
206
  resource = None
210
- if table in ["products", "orders", "customers"]:
207
+ if table in [
208
+ "products",
209
+ "products_legacy",
210
+ "orders",
211
+ "customers",
212
+ "inventory_items",
213
+ "transactions",
214
+ "balance",
215
+ "events",
216
+ "price_rules",
217
+ "discounts",
218
+ "taxonomy",
219
+ ]:
211
220
  resource = table
212
221
  else:
213
222
  raise ValueError(
@@ -652,3 +661,76 @@ class KafkaSource:
652
661
  batch_size=int(batch_size[0]),
653
662
  batch_timeout=int(batch_timeout[0]),
654
663
  )
664
+
665
+
666
+ class AdjustSource:
667
+ def handles_incrementality(self) -> bool:
668
+ return True
669
+
670
+ def dlt_source(self, uri: str, table: str, **kwargs):
671
+ if kwargs.get("incremental_key"):
672
+ raise ValueError(
673
+ "Adjust takes care of incrementality on its own, you should not provide incremental_key"
674
+ )
675
+
676
+ source_part = urlparse(uri)
677
+ source_params = parse_qs(source_part.query)
678
+ api_key = source_params.get("api_key")
679
+
680
+ if not api_key:
681
+ raise ValueError("api_key in the URI is required to connect to Adjust")
682
+
683
+ interval_start = kwargs.get("interval_start")
684
+ interval_end = kwargs.get("interval_end")
685
+
686
+ start_date = (
687
+ interval_start.strftime("%Y-%m-%d") if interval_start else "2000-01-01"
688
+ )
689
+ end_date = (
690
+ interval_end.strftime("%Y-%m-%d")
691
+ if interval_end
692
+ else datetime.now().strftime("%Y-%m-%d")
693
+ )
694
+
695
+ Endpoint = None
696
+ if table in ["campaigns", "creatives"]:
697
+ Endpoint = table
698
+
699
+ return adjust_source(
700
+ start_date=start_date, end_date=end_date, api_key=api_key[0]
701
+ ).with_resources(Endpoint)
702
+
703
+
704
+ class AppsflyerSource:
705
+ def handles_incrementality(self) -> bool:
706
+ return True
707
+
708
+ def dlt_source(self, uri: str, table: str, **kwargs):
709
+ if kwargs.get("incremental_key"):
710
+ raise ValueError(
711
+ "Appsflyer_Source takes care of incrementality on its own, you should not provide incremental_key"
712
+ )
713
+
714
+ source_fields = urlparse(uri)
715
+ source_params = parse_qs(source_fields.query)
716
+ api_key = source_params.get("api_key")
717
+
718
+ if not api_key:
719
+ raise ValueError("api_key in the URI is required to connect to Appsflyer")
720
+
721
+ resource = None
722
+ if table in ["campaigns", "creatives"]:
723
+ resource = table
724
+ else:
725
+ raise ValueError(
726
+ f"Resource '{table}' is not supported for Appsflyer source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
727
+ )
728
+
729
+ start_date = kwargs.get("interval_start") or "2024-01-02"
730
+ end_date = kwargs.get("interval_end") or "2024-01-29"
731
+
732
+ return appsflyer_source(
733
+ api_key=api_key[0],
734
+ start_date=start_date,
735
+ end_date=end_date,
736
+ ).with_resources(resource)
ingestr/src/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.7.8"
1
+ __version__ = "0.8.2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ingestr
3
- Version: 0.7.8
3
+ Version: 0.8.2
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -44,6 +44,7 @@ Requires-Dist: sqlalchemy==1.4.52
44
44
  Requires-Dist: stripe==10.7.0
45
45
  Requires-Dist: tqdm==4.66.2
46
46
  Requires-Dist: typer==0.12.3
47
+ Requires-Dist: types-requests==2.32.0.20240907
47
48
  Description-Content-Type: text/markdown
48
49
 
49
50
  <div align="center">
@@ -178,10 +179,18 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
178
179
  <tr>
179
180
  <td colspan="3" style='text-align:center;'><strong>Platforms</strong></td>
180
181
  </tr>
182
+ <td>Adjust</td>
183
+ <td>✅</td>
184
+ <td>-</td>
181
185
  <tr>
182
186
  <td>Airtable</td>
183
187
  <td>✅</td>
184
188
  <td>-</td>
189
+ </tr>
190
+ <tr>
191
+ <td>AppsFlyer</td>
192
+ <td>✅</td>
193
+ <td>-</td>
185
194
  </tr>
186
195
  <tr>
187
196
  <td>Chess.com</td>
@@ -1,11 +1,15 @@
1
- ingestr/main.py,sha256=Hlcb8mUAWoGZr4ZKtQnoEhjLkjroiwx2-J86C6fN37E,17596
1
+ ingestr/main.py,sha256=U66TM57ePv-RdoAftQ0pFZx8woZUQnLepKa50C-bA5I,17655
2
2
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
3
3
  ingestr/src/destinations.py,sha256=2SfPMjtTelPmzQmc3zNs8xGcKIPuGn_hoZFIBUuhjXI,6338
4
- ingestr/src/factory.py,sha256=CTVaFeMVgZO1fC9AKOqx-Wu89l5_YL6GlmvDF-FkAew,4442
5
- ingestr/src/sources.py,sha256=BlMsajIMcu_oqmU38uqlasXz2vtN_J8yXa24NHFcwJA,22696
4
+ ingestr/src/factory.py,sha256=-_KwBpbNAegv_oXIB57klyjUb3K6e0lw_xdi5bwmarI,4645
5
+ ingestr/src/sources.py,sha256=0IGguMm85E3Rahu6zVLawoe2d4lqRY31uHuxlqCsiQc,25324
6
6
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
7
- ingestr/src/version.py,sha256=uC8wB9mRblQ0jUBAOUyCQLUQJ39MC2xybVLB_8ZsevU,22
7
+ ingestr/src/version.py,sha256=B7GiO0rd49YwtLYjvPg4lmCZEDlMTonslQKdSImaMJk,22
8
+ ingestr/src/adjust/_init_.py,sha256=_jJE3Ywvv-YyJ7ywICdht_X2Gnd1cKm6F1wAfnpXuWM,890
9
+ ingestr/src/adjust/helpers.py,sha256=kkYC3MqMHLNucuQ50klZWrvd3o8VfUysNtZTQSsKZ_c,2588
8
10
  ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
11
+ ingestr/src/appsflyer/_init_.py,sha256=ne2-9FQ654Drtd3GkKQv8Bwb6LEqCnJw49MfO5Jyzgs,739
12
+ ingestr/src/appsflyer/client.py,sha256=TNmwakLzmO6DZW3wcfLfQRl7aNBHgFqSsk4ef-MmJ1w,3084
9
13
  ingestr/src/chess/__init__.py,sha256=PaxT2DObudOGlhyoENE5LjR6rTdsxiqKKpAZeyzVLCA,6791
10
14
  ingestr/src/chess/helpers.py,sha256=v1HTImOMjAF7AzZUPDIuHu00e7ut0o5y1kWcVYo4QZw,549
11
15
  ingestr/src/chess/settings.py,sha256=p0RlCGgtXUacPDEvZmwzSWmzX0Apj1riwfz-nrMK89k,158
@@ -35,9 +39,9 @@ ingestr/src/notion/settings.py,sha256=MwQVZViJtnvOegfjXYc_pJ50oUYgSRPgwqu7TvpeMO
35
39
  ingestr/src/notion/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
40
  ingestr/src/notion/helpers/client.py,sha256=QXuudkf5Zzff98HRsCqA1g1EZWIrnfn1falPrnKg_y4,5500
37
41
  ingestr/src/notion/helpers/database.py,sha256=gigPibTeVefP3lA-8w4aOwX67pj7RlciPk5koDs1ry8,2737
38
- ingestr/src/shopify/__init__.py,sha256=EWjpvZz7K6Pms7uUoqqkM4Wj0XeE2NrDvVp4BNM8dPk,9163
42
+ ingestr/src/shopify/__init__.py,sha256=wFVqyvgh3tXfOmgMBbsU4U08EhdiVWuJGbCMcZWkioc,62704
39
43
  ingestr/src/shopify/exceptions.py,sha256=BhV3lIVWeBt8Eh4CWGW_REFJpGCzvW6-62yZrBWa3nQ,50
40
- ingestr/src/shopify/helpers.py,sha256=OO_Tw-HwVLnRhwT3vqUWEQEEcWIS9KWE6VDDe8BCC2w,4972
44
+ ingestr/src/shopify/helpers.py,sha256=2MlqyCc7VJxpYlGAxw-bYpSuCZNfat0E5zDe1jQYVP4,6279
41
45
  ingestr/src/shopify/settings.py,sha256=StY0EPr7wFJ7KzRRDN4TKxV0_gkIS1wPj2eR4AYSsDk,141
42
46
  ingestr/src/slack/__init__.py,sha256=UfUhkS6FnCKJeXkkJ5QrmdT5nZm5czjtomsQu_x9WUM,9987
43
47
  ingestr/src/slack/helpers.py,sha256=08TLK7vhFvH_uekdLVOLF3bTDe1zgH0QxHObXHzk1a8,6545
@@ -60,8 +64,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
60
64
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
61
65
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
62
66
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
63
- ingestr-0.7.8.dist-info/METADATA,sha256=JGJ_76vC0icT_tJSYDkbtRXuc_63sgHXJYYIksTSyOE,6561
64
- ingestr-0.7.8.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
65
- ingestr-0.7.8.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
66
- ingestr-0.7.8.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
67
- ingestr-0.7.8.dist-info/RECORD,,
67
+ ingestr-0.8.2.dist-info/METADATA,sha256=q9yjG1V9XDZFQDRcAQodSreucMEcoRtnHbPE9EzXtFo,6755
68
+ ingestr-0.8.2.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
69
+ ingestr-0.8.2.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
70
+ ingestr-0.8.2.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
71
+ ingestr-0.8.2.dist-info/RECORD,,