ingestr 0.5.0__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

Files changed (82) hide show
  1. {ingestr-0.5.0 → ingestr-0.6.0}/Dockerfile +1 -1
  2. {ingestr-0.5.0 → ingestr-0.6.0}/PKG-INFO +6 -1
  3. {ingestr-0.5.0 → ingestr-0.6.0}/README.md +5 -0
  4. {ingestr-0.5.0 → ingestr-0.6.0}/docs/.vitepress/config.mjs +1 -0
  5. {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/overview.md +5 -0
  6. ingestr-0.6.0/docs/supported-sources/shopify.md +37 -0
  7. {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/snowflake.md +1 -1
  8. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/destinations.py +1 -1
  9. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/factory.py +3 -0
  10. ingestr-0.6.0/ingestr/src/shopify/__init__.py +227 -0
  11. ingestr-0.6.0/ingestr/src/shopify/exceptions.py +2 -0
  12. ingestr-0.6.0/ingestr/src/shopify/helpers.py +147 -0
  13. ingestr-0.6.0/ingestr/src/shopify/settings.py +5 -0
  14. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/sources.py +38 -0
  15. ingestr-0.6.0/ingestr/src/version.py +1 -0
  16. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/testdata/test_append.db +0 -0
  17. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/testdata/test_create_replace.db +0 -0
  18. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/testdata/test_delete_insert_with_timerange.db +0 -0
  19. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/testdata/test_delete_insert_without_primary_key.db +0 -0
  20. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/testdata/test_merge_with_primary_key.db +0 -0
  21. {ingestr-0.5.0 → ingestr-0.6.0}/pyproject.toml +2 -0
  22. ingestr-0.5.0/ingestr/src/version.py +0 -1
  23. {ingestr-0.5.0 → ingestr-0.6.0}/.dockerignore +0 -0
  24. {ingestr-0.5.0 → ingestr-0.6.0}/.github/workflows/deploy-docs.yml +0 -0
  25. {ingestr-0.5.0 → ingestr-0.6.0}/.github/workflows/docker.yml +0 -0
  26. {ingestr-0.5.0 → ingestr-0.6.0}/.gitignore +0 -0
  27. {ingestr-0.5.0 → ingestr-0.6.0}/LICENSE.md +0 -0
  28. {ingestr-0.5.0 → ingestr-0.6.0}/Makefile +0 -0
  29. {ingestr-0.5.0 → ingestr-0.6.0}/docs/.vitepress/theme/custom.css +0 -0
  30. {ingestr-0.5.0 → ingestr-0.6.0}/docs/.vitepress/theme/index.js +0 -0
  31. {ingestr-0.5.0 → ingestr-0.6.0}/docs/commands/example-uris.md +0 -0
  32. {ingestr-0.5.0 → ingestr-0.6.0}/docs/commands/ingest.md +0 -0
  33. {ingestr-0.5.0 → ingestr-0.6.0}/docs/getting-started/core-concepts.md +0 -0
  34. {ingestr-0.5.0 → ingestr-0.6.0}/docs/getting-started/incremental-loading.md +0 -0
  35. {ingestr-0.5.0 → ingestr-0.6.0}/docs/getting-started/quickstart.md +0 -0
  36. {ingestr-0.5.0 → ingestr-0.6.0}/docs/getting-started/telemetry.md +0 -0
  37. {ingestr-0.5.0 → ingestr-0.6.0}/docs/index.md +0 -0
  38. {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/bigquery.md +0 -0
  39. {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/csv.md +0 -0
  40. {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/databricks.md +0 -0
  41. {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/duckdb.md +0 -0
  42. {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/gsheets.md +0 -0
  43. {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/mongodb.md +0 -0
  44. {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/mssql.md +0 -0
  45. {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/mysql.md +0 -0
  46. {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/notion.md +0 -0
  47. {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/oracle.md +0 -0
  48. {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/postgres.md +0 -0
  49. {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/redshift.md +0 -0
  50. {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/sap-hana.md +0 -0
  51. {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/sqlite.md +0 -0
  52. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/main.py +0 -0
  53. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/main_test.py +0 -0
  54. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/destinations_test.py +0 -0
  55. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/factory_test.py +0 -0
  56. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/google_sheets/README.md +0 -0
  57. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/google_sheets/__init__.py +0 -0
  58. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
  59. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
  60. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
  61. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/mongodb/__init__.py +0 -0
  62. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/mongodb/helpers.py +0 -0
  63. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/notion/__init__.py +0 -0
  64. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/notion/helpers/__init__.py +0 -0
  65. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/notion/helpers/client.py +0 -0
  66. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/notion/helpers/database.py +0 -0
  67. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/notion/settings.py +0 -0
  68. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/sources_test.py +0 -0
  69. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/sql_database/__init__.py +0 -0
  70. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/sql_database/helpers.py +0 -0
  71. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/sql_database/override.py +0 -0
  72. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/sql_database/schema_types.py +0 -0
  73. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/telemetry/event.py +0 -0
  74. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/testdata/fakebqcredentials.json +0 -0
  75. {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/testdata/.gitignore +0 -0
  76. {ingestr-0.5.0 → ingestr-0.6.0}/package-lock.json +0 -0
  77. {ingestr-0.5.0 → ingestr-0.6.0}/package.json +0 -0
  78. {ingestr-0.5.0 → ingestr-0.6.0}/requirements-dev.txt +0 -0
  79. {ingestr-0.5.0 → ingestr-0.6.0}/requirements.txt +0 -0
  80. {ingestr-0.5.0 → ingestr-0.6.0}/resources/demo.gif +0 -0
  81. {ingestr-0.5.0 → ingestr-0.6.0}/resources/demo.tape +0 -0
  82. {ingestr-0.5.0 → ingestr-0.6.0}/resources/ingestr.svg +0 -0
@@ -10,7 +10,7 @@ ENV VIRTUAL_ENV=/usr/local
10
10
  ADD --chmod=755 https://astral.sh/uv/install.sh /install.sh
11
11
  RUN /install.sh && rm /install.sh
12
12
 
13
- RUN /root/.cargo/bin/uv pip install --no-cache -r requirements.txt
13
+ RUN /root/.cargo/bin/uv pip install --system --no-cache -r requirements.txt
14
14
 
15
15
  COPY . /app
16
16
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ingestr
3
- Version: 0.5.0
3
+ Version: 0.6.0
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -180,6 +180,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
180
180
  <td>✅</td>
181
181
  <td>❌</td>
182
182
  </tr>
183
+ <tr>
184
+ <td>Shopify</td>
185
+ <td>✅</td>
186
+ <td>❌</td>
187
+ </tr>
183
188
  </table>
184
189
 
185
190
  More to come soon!
@@ -138,6 +138,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
138
138
  <td>✅</td>
139
139
  <td>❌</td>
140
140
  </tr>
141
+ <tr>
142
+ <td>Shopify</td>
143
+ <td>✅</td>
144
+ <td>❌</td>
145
+ </tr>
141
146
  </table>
142
147
 
143
148
  More to come soon!
@@ -70,6 +70,7 @@ export default defineConfig({
70
70
  items: [
71
71
  { text: "Google Sheets", link: "/supported-sources/gsheets.md" },
72
72
  { text: "Notion", link: "/supported-sources/notion.md" },
73
+ { text: "Shopify", link: "/supported-sources/shopify.md" },
73
74
  ],
74
75
  },
75
76
  ],
@@ -89,6 +89,11 @@ ingestr supports the following sources and destinations:
89
89
  <td>✅</td>
90
90
  <td>❌</td>
91
91
  </tr>
92
+ <tr>
93
+ <td>Shopify</td>
94
+ <td>✅</td>
95
+ <td>❌</td>
96
+ </tr>
92
97
  </table>
93
98
 
94
99
  More to come soon!
@@ -0,0 +1,37 @@
1
+ # Shopify
2
+ [Shopify](https://www.shopify.com/) is a comprehensive e-commerce platform that enables individuals and businesses to create online stores.
3
+
4
+ ingestr supports Shopify as a source.
5
+
6
+ ## URI Format
7
+ The URI format for Shopify is as follows:
8
+
9
+ ```plaintext
10
+ shopify://<shopify store URL>?api_key=token
11
+ ```
12
+
13
+ URI parameters:
14
+ - `shopify store URI`: the URL of the Shopify store you'd like to connect to, e.g. `myawesomestore.myshopify.com`
15
+ - `api_key`: the API key used for authentication with the Shopify API
16
+
17
+ The URI is used to connect to the Shopify API for extracting data. More details on setting up Shopify integrations can be found [here](https://shopify.dev/docs/admin-api/getting-started).
18
+
19
+ ## Setting up a Shopify Integration
20
+
21
+ Shopify requires a few steps to set up an integration, please follow the guide dltHub [has built here](https://dlthub.com/docs/dlt-ecosystem/verified-sources/shopify#setup-guide).
22
+
23
+ Once you complete the guide, you should have an API key and the store name to connect to. Let's say your API key is `shpkey_12345` and the store you'd like to connect to is `my-store`, here's a sample command that will copy the data from the Shopify store into a duckdb database:
24
+
25
+ ```sh
26
+ ingestr ingest --source-uri 'shopify://my-store.myshopify.com?api_key=shpkey_12345' --source-table 'orders' --dest-uri duckdb:///shopify.duckdb --dest-table 'shopify.orders'
27
+ ```
28
+
29
+ The result of this command will be a table in the `shopify.duckdb` database with JSON columns.
30
+
31
+ ## Available Tables
32
+ Shopify source allows ingesting the following sources into separate tables:
33
+ - `orders`
34
+ - `customers`
35
+ - `products`
36
+
37
+ Use these as `--source-table` parameter in the `ingestr ingest` command.
@@ -13,7 +13,7 @@ snowflake://user:password@account/dbname?warehouse=COMPUTE_WH&role=data_scientis
13
13
  URI parameters:
14
14
  - `user`: the user name to connect to the database
15
15
  - `password`: the password for the user
16
- - `account`: your Snowflake account identifier
16
+ - `account`: your Snowflake account identifier (copying from snowflake interface gives you org_name.account_name, modify the "." to "-" in the ingestr command)
17
17
  - `dbname`: the name of the database to connect to
18
18
  - `warehouse`: the name of the warehouse to use (optional)
19
19
  - `role`: the name of the role to use (optional)
@@ -98,7 +98,7 @@ class RedshiftDestination(GenericSqlDestination):
98
98
 
99
99
  class DuckDBDestination(GenericSqlDestination):
100
100
  def dlt_dest(self, uri: str, **kwargs):
101
- return dlt.destinations.duckdb(credentials=uri, **kwargs)
101
+ return dlt.destinations.duckdb(uri, **kwargs)
102
102
 
103
103
 
104
104
  class MsSQLDestination(GenericSqlDestination):
@@ -19,6 +19,7 @@ from ingestr.src.sources import (
19
19
  LocalCsvSource,
20
20
  MongoDbSource,
21
21
  NotionSource,
22
+ ShopifySource,
22
23
  SqlSource,
23
24
  )
24
25
 
@@ -91,6 +92,8 @@ class SourceDestinationFactory:
91
92
  return NotionSource()
92
93
  elif self.source_scheme == "gsheets":
93
94
  return GoogleSheetsSource()
95
+ elif self.source_scheme == "shopify":
96
+ return ShopifySource()
94
97
  else:
95
98
  raise ValueError(f"Unsupported source scheme: {self.source_scheme}")
96
99
 
@@ -0,0 +1,227 @@
1
+ """Fetches Shopify Orders and Products."""
2
+
3
+ from typing import Any, Dict, Iterable, Optional
4
+
5
+ import dlt
6
+ from dlt.common import jsonpath as jp
7
+ from dlt.common import pendulum
8
+ from dlt.common.time import ensure_pendulum_datetime
9
+ from dlt.common.typing import TAnyDateTime, TDataItem
10
+ from dlt.sources import DltResource
11
+
12
+ from .helpers import ShopifyApi, ShopifyPartnerApi, TOrderStatus
13
+ from .settings import (
14
+ DEFAULT_API_VERSION,
15
+ DEFAULT_ITEMS_PER_PAGE,
16
+ DEFAULT_PARTNER_API_VERSION,
17
+ FIRST_DAY_OF_MILLENNIUM,
18
+ )
19
+
20
+
21
+ @dlt.source(name="shopify", max_table_nesting=0)
22
+ def shopify_source(
23
+ private_app_password: str = dlt.secrets.value,
24
+ api_version: str = DEFAULT_API_VERSION,
25
+ shop_url: str = dlt.config.value,
26
+ start_date: TAnyDateTime = FIRST_DAY_OF_MILLENNIUM,
27
+ end_date: Optional[TAnyDateTime] = None,
28
+ created_at_min: TAnyDateTime = FIRST_DAY_OF_MILLENNIUM,
29
+ items_per_page: int = DEFAULT_ITEMS_PER_PAGE,
30
+ order_status: TOrderStatus = "any",
31
+ ) -> Iterable[DltResource]:
32
+ """
33
+ The source for the Shopify pipeline. Available resources are products, orders, and customers.
34
+
35
+ `start_time` argument can be used on its own or together with `end_time`. When both are provided
36
+ data is limited to items updated in that time range.
37
+ The range is "half-open", meaning elements equal and newer than `start_time` and elements older than `end_time` are included.
38
+ All resources opt-in to use Airflow scheduler if run as Airflow task
39
+
40
+ Args:
41
+ private_app_password: The app password to the app on your shop.
42
+ api_version: The API version to use (e.g. 2023-01).
43
+ shop_url: The URL of your shop (e.g. https://my-shop.myshopify.com).
44
+ items_per_page: The max number of items to fetch per page. Defaults to 250.
45
+ start_date: Items updated on or after this date are imported. Defaults to 2000-01-01.
46
+ If end date is not provided, this is used as the initial value for incremental loading and after the initial run, only new data will be retrieved.
47
+ Accepts any `date`/`datetime` object or a date/datetime string in ISO 8601 format.
48
+ end_time: The end time of the range for which to load data.
49
+ Should be used together with `start_date` to limit the data to items updated in that time range.
50
+ If end time is not provided, the incremental loading will be enabled and after initial run, only new data will be retrieved
51
+ created_at_min: The minimum creation date of items to import. Items created on or after this date are loaded. Defaults to 2000-01-01.
52
+ order_status: The order status to filter by. Can be 'open', 'closed', 'cancelled', or 'any'. Defaults to 'any'.
53
+
54
+ Returns:
55
+ Iterable[DltResource]: A list of DltResource objects representing the data resources.
56
+ """
57
+
58
+ # build client
59
+ client = ShopifyApi(shop_url, private_app_password, api_version)
60
+
61
+ start_date_obj = ensure_pendulum_datetime(start_date)
62
+ end_date_obj = ensure_pendulum_datetime(end_date) if end_date else None
63
+ created_at_min_obj = ensure_pendulum_datetime(created_at_min)
64
+
65
+ # define resources
66
+ @dlt.resource(primary_key="id", write_disposition="merge")
67
+ def products(
68
+ updated_at: dlt.sources.incremental[
69
+ pendulum.DateTime
70
+ ] = dlt.sources.incremental(
71
+ "updated_at",
72
+ initial_value=start_date_obj,
73
+ end_value=end_date_obj,
74
+ allow_external_schedulers=True,
75
+ ),
76
+ created_at_min: pendulum.DateTime = created_at_min_obj,
77
+ items_per_page: int = items_per_page,
78
+ ) -> Iterable[TDataItem]:
79
+ """
80
+ The resource for products on your shop, supports incremental loading and pagination.
81
+
82
+ Args:
83
+ updated_at: The saved state of the last 'updated_at' value.
84
+
85
+ Returns:
86
+ Iterable[TDataItem]: A generator of products.
87
+ """
88
+ params = dict(
89
+ updated_at_min=updated_at.last_value.isoformat(),
90
+ limit=items_per_page,
91
+ order="updated_at asc",
92
+ created_at_min=created_at_min.isoformat(),
93
+ )
94
+ if updated_at.end_value is not None:
95
+ params["updated_at_max"] = updated_at.end_value.isoformat()
96
+ yield from client.get_pages("products", params)
97
+
98
+ @dlt.resource(primary_key="id", write_disposition="merge")
99
+ def orders(
100
+ updated_at: dlt.sources.incremental[
101
+ pendulum.DateTime
102
+ ] = dlt.sources.incremental(
103
+ "updated_at",
104
+ initial_value=start_date_obj,
105
+ end_value=end_date_obj,
106
+ allow_external_schedulers=True,
107
+ ),
108
+ created_at_min: pendulum.DateTime = created_at_min_obj,
109
+ items_per_page: int = items_per_page,
110
+ status: TOrderStatus = order_status,
111
+ ) -> Iterable[TDataItem]:
112
+ """
113
+ The resource for orders on your shop, supports incremental loading and pagination.
114
+
115
+ Args:
116
+ updated_at: The saved state of the last 'updated_at' value.
117
+
118
+ Returns:
119
+ Iterable[TDataItem]: A generator of orders.
120
+ """
121
+ params = dict(
122
+ updated_at_min=updated_at.last_value.isoformat(),
123
+ limit=items_per_page,
124
+ status=status,
125
+ order="updated_at asc",
126
+ created_at_min=created_at_min.isoformat(),
127
+ )
128
+ if updated_at.end_value is not None:
129
+ params["updated_at_max"] = updated_at.end_value.isoformat()
130
+ yield from client.get_pages("orders", params)
131
+
132
+ @dlt.resource(primary_key="id", write_disposition="merge")
133
+ def customers(
134
+ updated_at: dlt.sources.incremental[
135
+ pendulum.DateTime
136
+ ] = dlt.sources.incremental(
137
+ "updated_at",
138
+ initial_value=start_date_obj,
139
+ end_value=end_date_obj,
140
+ allow_external_schedulers=True,
141
+ ),
142
+ created_at_min: pendulum.DateTime = created_at_min_obj,
143
+ items_per_page: int = items_per_page,
144
+ ) -> Iterable[TDataItem]:
145
+ """
146
+ The resource for customers on your shop, supports incremental loading and pagination.
147
+
148
+ Args:
149
+ updated_at: The saved state of the last 'updated_at' value.
150
+
151
+ Returns:
152
+ Iterable[TDataItem]: A generator of customers.
153
+ """
154
+ params = dict(
155
+ updated_at_min=updated_at.last_value.isoformat(),
156
+ limit=items_per_page,
157
+ order="updated_at asc",
158
+ created_at_min=created_at_min.isoformat(),
159
+ )
160
+ if updated_at.end_value is not None:
161
+ params["updated_at_max"] = updated_at.end_value.isoformat()
162
+ yield from client.get_pages("customers", params)
163
+
164
+ return (products, orders, customers)
165
+
166
+
167
+ @dlt.resource
168
+ def shopify_partner_query(
169
+ query: str,
170
+ data_items_path: jp.TJsonPath,
171
+ pagination_cursor_path: jp.TJsonPath,
172
+ pagination_variable_name: str = "after",
173
+ variables: Optional[Dict[str, Any]] = None,
174
+ access_token: str = dlt.secrets.value,
175
+ organization_id: str = dlt.config.value,
176
+ api_version: str = DEFAULT_PARTNER_API_VERSION,
177
+ ) -> Iterable[TDataItem]:
178
+ """
179
+ Resource for getting paginated results from the Shopify Partner GraphQL API.
180
+
181
+ This resource will run the given GraphQL query and extract a list of data items from the result.
182
+ It will then run the query again with a pagination cursor to get the next page of results.
183
+
184
+ Example:
185
+ query = '''query Transactions($after: String) {
186
+ transactions(after: $after, first: 100) {
187
+ edges {
188
+ cursor
189
+ node {
190
+ id
191
+ }
192
+ }
193
+ }
194
+ }'''
195
+
196
+ partner_query_pages(
197
+ query,
198
+ data_items_path="data.transactions.edges[*].node",
199
+ pagination_cursor_path="data.transactions.edges[-1].cursor",
200
+ pagination_variable_name="after",
201
+ )
202
+
203
+ Args:
204
+ query: The GraphQL query to run.
205
+ data_items_path: The JSONPath to the data items in the query result. Should resolve to array items.
206
+ pagination_cursor_path: The JSONPath to the pagination cursor in the query result, will be piped to the next query via variables.
207
+ pagination_variable_name: The name of the variable to pass the pagination cursor to.
208
+ variables: Mapping of extra variables used in the query.
209
+ access_token: The Partner API Client access token, created in the Partner Dashboard.
210
+ organization_id: Your Organization ID, found in the Partner Dashboard.
211
+ api_version: The API version to use (e.g. 2024-01). Use `unstable` for the latest version.
212
+ Returns:
213
+ Iterable[TDataItem]: A generator of the query results.
214
+ """
215
+ client = ShopifyPartnerApi(
216
+ access_token=access_token,
217
+ organization_id=organization_id,
218
+ api_version=api_version,
219
+ )
220
+
221
+ yield from client.get_graphql_pages(
222
+ query,
223
+ data_items_path=data_items_path,
224
+ pagination_cursor_path=pagination_cursor_path,
225
+ pagination_variable_name=pagination_variable_name,
226
+ variables=variables,
227
+ )
@@ -0,0 +1,2 @@
1
+ class ShopifyPartnerApiError(Exception):
2
+ pass
@@ -0,0 +1,147 @@
1
+ """Shopify source helpers"""
2
+
3
+ from typing import Any, Iterable, Literal, Optional
4
+ from urllib.parse import urljoin
5
+
6
+ from dlt.common import jsonpath
7
+ from dlt.common.time import ensure_pendulum_datetime
8
+ from dlt.common.typing import Dict, DictStrAny, TDataItems
9
+ from dlt.sources.helpers import requests
10
+
11
+ from .exceptions import ShopifyPartnerApiError
12
+ from .settings import DEFAULT_API_VERSION, DEFAULT_PARTNER_API_VERSION
13
+
14
+ TOrderStatus = Literal["open", "closed", "cancelled", "any"]
15
+
16
+
17
+ class ShopifyApi:
18
+ """
19
+ A Shopify API client that can be used to get pages of data from Shopify.
20
+ """
21
+
22
+ def __init__(
23
+ self,
24
+ shop_url: str,
25
+ private_app_password: str,
26
+ api_version: str = DEFAULT_API_VERSION,
27
+ ) -> None:
28
+ """
29
+ Args:
30
+ shop_url: The URL of your shop (e.g. https://my-shop.myshopify.com).
31
+ private_app_password: The private app password to the app on your shop.
32
+ api_version: The API version to use (e.g. 2023-01)
33
+ """
34
+ self.shop_url = shop_url
35
+ self.private_app_password = private_app_password
36
+ self.api_version = api_version
37
+
38
+ def get_pages(
39
+ self, resource: str, params: Optional[Dict[str, Any]] = None
40
+ ) -> Iterable[TDataItems]:
41
+ """Get all pages from shopify using requests.
42
+ Iterates through all pages and yield each page items.
43
+
44
+ Args:
45
+ resource: The resource to get pages for (e.g. products, orders, customers).
46
+ params: Query params to include in the request.
47
+
48
+ Yields:
49
+ List of data items from the page
50
+ """
51
+ url = urljoin(self.shop_url, f"/admin/api/{self.api_version}/{resource}.json")
52
+
53
+ headers = {"X-Shopify-Access-Token": self.private_app_password}
54
+ while url:
55
+ response = requests.get(url, params=params, headers=headers)
56
+ response.raise_for_status()
57
+ json = response.json()
58
+ # Get item list from the page
59
+ yield [self._convert_datetime_fields(item) for item in json[resource]]
60
+ url = response.links.get("next", {}).get("url")
61
+ # Query params are included in subsequent page URLs
62
+ params = None
63
+
64
+ def _convert_datetime_fields(self, item: Dict[str, Any]) -> Dict[str, Any]:
65
+ """Convert timestamp fields in the item to pendulum datetime objects
66
+
67
+ The item is modified in place.
68
+
69
+ Args:
70
+ item: The item to convert
71
+
72
+ Returns:
73
+ The same data item (for convenience)
74
+ """
75
+ fields = ["created_at", "updated_at"]
76
+ for field in fields:
77
+ if field in item:
78
+ item[field] = ensure_pendulum_datetime(item[field])
79
+ return item
80
+
81
+
82
+ class ShopifyPartnerApi:
83
+ """Client for Shopify Partner grapql API"""
84
+
85
+ def __init__(
86
+ self,
87
+ access_token: str,
88
+ organization_id: str,
89
+ api_version: str = DEFAULT_PARTNER_API_VERSION,
90
+ ) -> None:
91
+ """
92
+ Args:
93
+ access_token: The access token to use
94
+ organization_id: The organization id to query
95
+ api_version: The API version to use (e.g. 2023-01)
96
+ """
97
+ self.access_token = access_token
98
+ self.organization_id = organization_id
99
+ self.api_version = api_version
100
+
101
+ @property
102
+ def graphql_url(self) -> str:
103
+ return f"https://partners.shopify.com/{self.organization_id}/api/{self.api_version}/graphql.json"
104
+
105
+ def run_graphql_query(
106
+ self, query: str, variables: Optional[DictStrAny] = None
107
+ ) -> DictStrAny:
108
+ """Run a graphql query against the Shopify Partner API
109
+
110
+ Args:
111
+ query: The query to run
112
+ variables: The variables to include in the query
113
+
114
+ Returns:
115
+ The response JSON
116
+ """
117
+ headers = {"X-Shopify-Access-Token": self.access_token}
118
+ response = requests.post(
119
+ self.graphql_url,
120
+ json={"query": query, "variables": variables},
121
+ headers=headers,
122
+ )
123
+ data = response.json()
124
+ if data.get("errors"):
125
+ raise ShopifyPartnerApiError(response.text)
126
+ return data # type: ignore[no-any-return]
127
+
128
+ def get_graphql_pages(
129
+ self,
130
+ query: str,
131
+ data_items_path: jsonpath.TJsonPath,
132
+ pagination_cursor_path: jsonpath.TJsonPath,
133
+ pagination_variable_name: str,
134
+ variables: Optional[DictStrAny] = None,
135
+ ) -> Iterable[TDataItems]:
136
+ variables = dict(variables or {})
137
+ while True:
138
+ data = self.run_graphql_query(query, variables)
139
+ print(data)
140
+ data_items = jsonpath.find_values(data_items_path, data)
141
+ if not data_items:
142
+ break
143
+ yield data_items
144
+ cursors = jsonpath.find_values(pagination_cursor_path, data)
145
+ if not cursors:
146
+ break
147
+ variables[pagination_variable_name] = cursors[-1]
@@ -0,0 +1,5 @@
1
+ FIRST_DAY_OF_MILLENNIUM = "2000-01-01"
2
+ DEFAULT_API_VERSION = "2023-10"
3
+ DEFAULT_ITEMS_PER_PAGE = 250
4
+
5
+ DEFAULT_PARTNER_API_VERSION = "2024-01"
@@ -9,6 +9,7 @@ import dlt
9
9
  from ingestr.src.google_sheets import google_spreadsheet
10
10
  from ingestr.src.mongodb import mongodb_collection
11
11
  from ingestr.src.notion import notion_databases
12
+ from ingestr.src.shopify import shopify_source
12
13
  from ingestr.src.sql_database import sql_table
13
14
 
14
15
 
@@ -134,6 +135,43 @@ class NotionSource:
134
135
  )
135
136
 
136
137
 
138
+ class ShopifySource:
139
+ def dlt_source(self, uri: str, table: str, **kwargs):
140
+ if kwargs.get("incremental_key"):
141
+ raise ValueError(
142
+ "Shopify takes care of incrementality on its own, you should not provide incremental_key"
143
+ )
144
+
145
+ # shopify://shop_url?api_key=private_app_password
146
+
147
+ source_fields = urlparse(uri)
148
+ source_params = parse_qs(source_fields.query)
149
+ api_key = source_params.get("api_key")
150
+ if not api_key:
151
+ raise ValueError("api_key in the URI is required to connect to Shopify")
152
+
153
+ date_args = {}
154
+ if kwargs.get("interval_start"):
155
+ date_args["start_date"] = kwargs.get("interval_start")
156
+
157
+ if kwargs.get("interval_end"):
158
+ date_args["end_date"] = kwargs.get("interval_end")
159
+
160
+ resource = None
161
+ if table in ["products", "orders", "customers"]:
162
+ resource = table
163
+ else:
164
+ raise ValueError(
165
+ f"Table name '{table}' is not supported for Shopify source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
166
+ )
167
+
168
+ return shopify_source(
169
+ private_app_password=api_key[0],
170
+ shop_url=f"https://{source_fields.netloc}",
171
+ **date_args,
172
+ ).with_resources(resource)
173
+
174
+
137
175
  class GoogleSheetsSource:
138
176
  table_builder: Callable
139
177
 
@@ -0,0 +1 @@
1
+ __version__ = "0.6.0"
@@ -66,6 +66,7 @@ exclude = [
66
66
  'src/sql_database/.*',
67
67
  'src/mongodb/.*',
68
68
  'src/google_sheets/.*',
69
+ 'src/shopify/.*',
69
70
  ]
70
71
 
71
72
  [[tool.mypy.overrides]]
@@ -73,6 +74,7 @@ module = [
73
74
  "ingestr.src.sql_database.*",
74
75
  "ingestr.src.mongodb.*",
75
76
  "ingestr.src.google_sheets.*",
77
+ "ingestr.src.shopify.*",
76
78
  ]
77
79
  follow_imports = "skip"
78
80
 
@@ -1 +0,0 @@
1
- __version__ = "0.5.0"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes