ingestr 0.5.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- {ingestr-0.5.0 → ingestr-0.6.0}/Dockerfile +1 -1
- {ingestr-0.5.0 → ingestr-0.6.0}/PKG-INFO +6 -1
- {ingestr-0.5.0 → ingestr-0.6.0}/README.md +5 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/.vitepress/config.mjs +1 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/overview.md +5 -0
- ingestr-0.6.0/docs/supported-sources/shopify.md +37 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/snowflake.md +1 -1
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/destinations.py +1 -1
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/factory.py +3 -0
- ingestr-0.6.0/ingestr/src/shopify/__init__.py +227 -0
- ingestr-0.6.0/ingestr/src/shopify/exceptions.py +2 -0
- ingestr-0.6.0/ingestr/src/shopify/helpers.py +147 -0
- ingestr-0.6.0/ingestr/src/shopify/settings.py +5 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/sources.py +38 -0
- ingestr-0.6.0/ingestr/src/version.py +1 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/testdata/test_append.db +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/testdata/test_create_replace.db +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/testdata/test_delete_insert_with_timerange.db +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/testdata/test_delete_insert_without_primary_key.db +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/testdata/test_merge_with_primary_key.db +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/pyproject.toml +2 -0
- ingestr-0.5.0/ingestr/src/version.py +0 -1
- {ingestr-0.5.0 → ingestr-0.6.0}/.dockerignore +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/.github/workflows/deploy-docs.yml +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/.github/workflows/docker.yml +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/.gitignore +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/LICENSE.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/Makefile +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/.vitepress/theme/custom.css +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/.vitepress/theme/index.js +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/commands/example-uris.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/commands/ingest.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/getting-started/core-concepts.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/getting-started/incremental-loading.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/getting-started/quickstart.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/getting-started/telemetry.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/index.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/bigquery.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/csv.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/databricks.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/duckdb.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/gsheets.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/mongodb.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/mssql.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/mysql.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/notion.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/oracle.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/postgres.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/redshift.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/sap-hana.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/docs/supported-sources/sqlite.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/main.py +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/main_test.py +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/destinations_test.py +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/factory_test.py +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/google_sheets/README.md +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/google_sheets/__init__.py +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/mongodb/__init__.py +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/mongodb/helpers.py +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/notion/__init__.py +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/notion/helpers/__init__.py +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/notion/helpers/client.py +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/notion/helpers/database.py +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/notion/settings.py +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/sources_test.py +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/sql_database/__init__.py +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/sql_database/helpers.py +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/sql_database/override.py +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/sql_database/schema_types.py +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/telemetry/event.py +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/ingestr/testdata/.gitignore +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/package-lock.json +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/package.json +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/requirements-dev.txt +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/requirements.txt +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/resources/demo.gif +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/resources/demo.tape +0 -0
- {ingestr-0.5.0 → ingestr-0.6.0}/resources/ingestr.svg +0 -0
|
@@ -10,7 +10,7 @@ ENV VIRTUAL_ENV=/usr/local
|
|
|
10
10
|
ADD --chmod=755 https://astral.sh/uv/install.sh /install.sh
|
|
11
11
|
RUN /install.sh && rm /install.sh
|
|
12
12
|
|
|
13
|
-
RUN /root/.cargo/bin/uv pip install --no-cache -r requirements.txt
|
|
13
|
+
RUN /root/.cargo/bin/uv pip install --system --no-cache -r requirements.txt
|
|
14
14
|
|
|
15
15
|
COPY . /app
|
|
16
16
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -180,6 +180,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
|
|
|
180
180
|
<td>✅</td>
|
|
181
181
|
<td>❌</td>
|
|
182
182
|
</tr>
|
|
183
|
+
<tr>
|
|
184
|
+
<td>Shopify</td>
|
|
185
|
+
<td>✅</td>
|
|
186
|
+
<td>❌</td>
|
|
187
|
+
</tr>
|
|
183
188
|
</table>
|
|
184
189
|
|
|
185
190
|
More to come soon!
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Shopify
|
|
2
|
+
[Shopify](https://www.shopify.com/) is a comprehensive e-commerce platform that enables individuals and businesses to create online stores.
|
|
3
|
+
|
|
4
|
+
ingestr supports Shopify as a source.
|
|
5
|
+
|
|
6
|
+
## URI Format
|
|
7
|
+
The URI format for Shopify is as follows:
|
|
8
|
+
|
|
9
|
+
```plaintext
|
|
10
|
+
shopify://<shopify store URL>?api_key=token
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
URI parameters:
|
|
14
|
+
- `shopify store URI`: the URL of the Shopify store you'd like to connect to, e.g. `myawesomestore.myshopify.com`
|
|
15
|
+
- `api_key`: the API key used for authentication with the Shopify API
|
|
16
|
+
|
|
17
|
+
The URI is used to connect to the Shopify API for extracting data. More details on setting up Shopify integrations can be found [here](https://shopify.dev/docs/admin-api/getting-started).
|
|
18
|
+
|
|
19
|
+
## Setting up a Shopify Integration
|
|
20
|
+
|
|
21
|
+
Shopify requires a few steps to set up an integration, please follow the guide dltHub [has built here](https://dlthub.com/docs/dlt-ecosystem/verified-sources/shopify#setup-guide).
|
|
22
|
+
|
|
23
|
+
Once you complete the guide, you should have an API key and the store name to connect to. Let's say your API key is `shpkey_12345` and the store you'd like to connect to is `my-store`, here's a sample command that will copy the data from the Shopify store into a duckdb database:
|
|
24
|
+
|
|
25
|
+
```sh
|
|
26
|
+
ingestr ingest --source-uri 'shopify://my-store.myshopify.com?api_key=shpkey_12345' --source-table 'orders' --dest-uri duckdb:///shopify.duckdb --dest-table 'shopify.orders'
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
The result of this command will be a table in the `shopify.duckdb` database with JSON columns.
|
|
30
|
+
|
|
31
|
+
## Available Tables
|
|
32
|
+
Shopify source allows ingesting the following sources into separate tables:
|
|
33
|
+
- `orders`
|
|
34
|
+
- `customers`
|
|
35
|
+
- `products`
|
|
36
|
+
|
|
37
|
+
Use these as `--source-table` parameter in the `ingestr ingest` command.
|
|
@@ -13,7 +13,7 @@ snowflake://user:password@account/dbname?warehouse=COMPUTE_WH&role=data_scientis
|
|
|
13
13
|
URI parameters:
|
|
14
14
|
- `user`: the user name to connect to the database
|
|
15
15
|
- `password`: the password for the user
|
|
16
|
-
- `account`: your Snowflake account identifier
|
|
16
|
+
- `account`: your Snowflake account identifier (copying from snowflake interface gives you org_name.account_name, modify the "." to "-" in the ingestr command)
|
|
17
17
|
- `dbname`: the name of the database to connect to
|
|
18
18
|
- `warehouse`: the name of the warehouse to use (optional)
|
|
19
19
|
- `role`: the name of the role to use (optional)
|
|
@@ -98,7 +98,7 @@ class RedshiftDestination(GenericSqlDestination):
|
|
|
98
98
|
|
|
99
99
|
class DuckDBDestination(GenericSqlDestination):
|
|
100
100
|
def dlt_dest(self, uri: str, **kwargs):
|
|
101
|
-
return dlt.destinations.duckdb(
|
|
101
|
+
return dlt.destinations.duckdb(uri, **kwargs)
|
|
102
102
|
|
|
103
103
|
|
|
104
104
|
class MsSQLDestination(GenericSqlDestination):
|
|
@@ -19,6 +19,7 @@ from ingestr.src.sources import (
|
|
|
19
19
|
LocalCsvSource,
|
|
20
20
|
MongoDbSource,
|
|
21
21
|
NotionSource,
|
|
22
|
+
ShopifySource,
|
|
22
23
|
SqlSource,
|
|
23
24
|
)
|
|
24
25
|
|
|
@@ -91,6 +92,8 @@ class SourceDestinationFactory:
|
|
|
91
92
|
return NotionSource()
|
|
92
93
|
elif self.source_scheme == "gsheets":
|
|
93
94
|
return GoogleSheetsSource()
|
|
95
|
+
elif self.source_scheme == "shopify":
|
|
96
|
+
return ShopifySource()
|
|
94
97
|
else:
|
|
95
98
|
raise ValueError(f"Unsupported source scheme: {self.source_scheme}")
|
|
96
99
|
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
"""Fetches Shopify Orders and Products."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Iterable, Optional
|
|
4
|
+
|
|
5
|
+
import dlt
|
|
6
|
+
from dlt.common import jsonpath as jp
|
|
7
|
+
from dlt.common import pendulum
|
|
8
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
9
|
+
from dlt.common.typing import TAnyDateTime, TDataItem
|
|
10
|
+
from dlt.sources import DltResource
|
|
11
|
+
|
|
12
|
+
from .helpers import ShopifyApi, ShopifyPartnerApi, TOrderStatus
|
|
13
|
+
from .settings import (
|
|
14
|
+
DEFAULT_API_VERSION,
|
|
15
|
+
DEFAULT_ITEMS_PER_PAGE,
|
|
16
|
+
DEFAULT_PARTNER_API_VERSION,
|
|
17
|
+
FIRST_DAY_OF_MILLENNIUM,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dlt.source(name="shopify", max_table_nesting=0)
|
|
22
|
+
def shopify_source(
|
|
23
|
+
private_app_password: str = dlt.secrets.value,
|
|
24
|
+
api_version: str = DEFAULT_API_VERSION,
|
|
25
|
+
shop_url: str = dlt.config.value,
|
|
26
|
+
start_date: TAnyDateTime = FIRST_DAY_OF_MILLENNIUM,
|
|
27
|
+
end_date: Optional[TAnyDateTime] = None,
|
|
28
|
+
created_at_min: TAnyDateTime = FIRST_DAY_OF_MILLENNIUM,
|
|
29
|
+
items_per_page: int = DEFAULT_ITEMS_PER_PAGE,
|
|
30
|
+
order_status: TOrderStatus = "any",
|
|
31
|
+
) -> Iterable[DltResource]:
|
|
32
|
+
"""
|
|
33
|
+
The source for the Shopify pipeline. Available resources are products, orders, and customers.
|
|
34
|
+
|
|
35
|
+
`start_time` argument can be used on its own or together with `end_time`. When both are provided
|
|
36
|
+
data is limited to items updated in that time range.
|
|
37
|
+
The range is "half-open", meaning elements equal and newer than `start_time` and elements older than `end_time` are included.
|
|
38
|
+
All resources opt-in to use Airflow scheduler if run as Airflow task
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
private_app_password: The app password to the app on your shop.
|
|
42
|
+
api_version: The API version to use (e.g. 2023-01).
|
|
43
|
+
shop_url: The URL of your shop (e.g. https://my-shop.myshopify.com).
|
|
44
|
+
items_per_page: The max number of items to fetch per page. Defaults to 250.
|
|
45
|
+
start_date: Items updated on or after this date are imported. Defaults to 2000-01-01.
|
|
46
|
+
If end date is not provided, this is used as the initial value for incremental loading and after the initial run, only new data will be retrieved.
|
|
47
|
+
Accepts any `date`/`datetime` object or a date/datetime string in ISO 8601 format.
|
|
48
|
+
end_time: The end time of the range for which to load data.
|
|
49
|
+
Should be used together with `start_date` to limit the data to items updated in that time range.
|
|
50
|
+
If end time is not provided, the incremental loading will be enabled and after initial run, only new data will be retrieved
|
|
51
|
+
created_at_min: The minimum creation date of items to import. Items created on or after this date are loaded. Defaults to 2000-01-01.
|
|
52
|
+
order_status: The order status to filter by. Can be 'open', 'closed', 'cancelled', or 'any'. Defaults to 'any'.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Iterable[DltResource]: A list of DltResource objects representing the data resources.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
# build client
|
|
59
|
+
client = ShopifyApi(shop_url, private_app_password, api_version)
|
|
60
|
+
|
|
61
|
+
start_date_obj = ensure_pendulum_datetime(start_date)
|
|
62
|
+
end_date_obj = ensure_pendulum_datetime(end_date) if end_date else None
|
|
63
|
+
created_at_min_obj = ensure_pendulum_datetime(created_at_min)
|
|
64
|
+
|
|
65
|
+
# define resources
|
|
66
|
+
@dlt.resource(primary_key="id", write_disposition="merge")
|
|
67
|
+
def products(
|
|
68
|
+
updated_at: dlt.sources.incremental[
|
|
69
|
+
pendulum.DateTime
|
|
70
|
+
] = dlt.sources.incremental(
|
|
71
|
+
"updated_at",
|
|
72
|
+
initial_value=start_date_obj,
|
|
73
|
+
end_value=end_date_obj,
|
|
74
|
+
allow_external_schedulers=True,
|
|
75
|
+
),
|
|
76
|
+
created_at_min: pendulum.DateTime = created_at_min_obj,
|
|
77
|
+
items_per_page: int = items_per_page,
|
|
78
|
+
) -> Iterable[TDataItem]:
|
|
79
|
+
"""
|
|
80
|
+
The resource for products on your shop, supports incremental loading and pagination.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
updated_at: The saved state of the last 'updated_at' value.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Iterable[TDataItem]: A generator of products.
|
|
87
|
+
"""
|
|
88
|
+
params = dict(
|
|
89
|
+
updated_at_min=updated_at.last_value.isoformat(),
|
|
90
|
+
limit=items_per_page,
|
|
91
|
+
order="updated_at asc",
|
|
92
|
+
created_at_min=created_at_min.isoformat(),
|
|
93
|
+
)
|
|
94
|
+
if updated_at.end_value is not None:
|
|
95
|
+
params["updated_at_max"] = updated_at.end_value.isoformat()
|
|
96
|
+
yield from client.get_pages("products", params)
|
|
97
|
+
|
|
98
|
+
@dlt.resource(primary_key="id", write_disposition="merge")
|
|
99
|
+
def orders(
|
|
100
|
+
updated_at: dlt.sources.incremental[
|
|
101
|
+
pendulum.DateTime
|
|
102
|
+
] = dlt.sources.incremental(
|
|
103
|
+
"updated_at",
|
|
104
|
+
initial_value=start_date_obj,
|
|
105
|
+
end_value=end_date_obj,
|
|
106
|
+
allow_external_schedulers=True,
|
|
107
|
+
),
|
|
108
|
+
created_at_min: pendulum.DateTime = created_at_min_obj,
|
|
109
|
+
items_per_page: int = items_per_page,
|
|
110
|
+
status: TOrderStatus = order_status,
|
|
111
|
+
) -> Iterable[TDataItem]:
|
|
112
|
+
"""
|
|
113
|
+
The resource for orders on your shop, supports incremental loading and pagination.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
updated_at: The saved state of the last 'updated_at' value.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
Iterable[TDataItem]: A generator of orders.
|
|
120
|
+
"""
|
|
121
|
+
params = dict(
|
|
122
|
+
updated_at_min=updated_at.last_value.isoformat(),
|
|
123
|
+
limit=items_per_page,
|
|
124
|
+
status=status,
|
|
125
|
+
order="updated_at asc",
|
|
126
|
+
created_at_min=created_at_min.isoformat(),
|
|
127
|
+
)
|
|
128
|
+
if updated_at.end_value is not None:
|
|
129
|
+
params["updated_at_max"] = updated_at.end_value.isoformat()
|
|
130
|
+
yield from client.get_pages("orders", params)
|
|
131
|
+
|
|
132
|
+
@dlt.resource(primary_key="id", write_disposition="merge")
|
|
133
|
+
def customers(
|
|
134
|
+
updated_at: dlt.sources.incremental[
|
|
135
|
+
pendulum.DateTime
|
|
136
|
+
] = dlt.sources.incremental(
|
|
137
|
+
"updated_at",
|
|
138
|
+
initial_value=start_date_obj,
|
|
139
|
+
end_value=end_date_obj,
|
|
140
|
+
allow_external_schedulers=True,
|
|
141
|
+
),
|
|
142
|
+
created_at_min: pendulum.DateTime = created_at_min_obj,
|
|
143
|
+
items_per_page: int = items_per_page,
|
|
144
|
+
) -> Iterable[TDataItem]:
|
|
145
|
+
"""
|
|
146
|
+
The resource for customers on your shop, supports incremental loading and pagination.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
updated_at: The saved state of the last 'updated_at' value.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Iterable[TDataItem]: A generator of customers.
|
|
153
|
+
"""
|
|
154
|
+
params = dict(
|
|
155
|
+
updated_at_min=updated_at.last_value.isoformat(),
|
|
156
|
+
limit=items_per_page,
|
|
157
|
+
order="updated_at asc",
|
|
158
|
+
created_at_min=created_at_min.isoformat(),
|
|
159
|
+
)
|
|
160
|
+
if updated_at.end_value is not None:
|
|
161
|
+
params["updated_at_max"] = updated_at.end_value.isoformat()
|
|
162
|
+
yield from client.get_pages("customers", params)
|
|
163
|
+
|
|
164
|
+
return (products, orders, customers)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@dlt.resource
|
|
168
|
+
def shopify_partner_query(
|
|
169
|
+
query: str,
|
|
170
|
+
data_items_path: jp.TJsonPath,
|
|
171
|
+
pagination_cursor_path: jp.TJsonPath,
|
|
172
|
+
pagination_variable_name: str = "after",
|
|
173
|
+
variables: Optional[Dict[str, Any]] = None,
|
|
174
|
+
access_token: str = dlt.secrets.value,
|
|
175
|
+
organization_id: str = dlt.config.value,
|
|
176
|
+
api_version: str = DEFAULT_PARTNER_API_VERSION,
|
|
177
|
+
) -> Iterable[TDataItem]:
|
|
178
|
+
"""
|
|
179
|
+
Resource for getting paginated results from the Shopify Partner GraphQL API.
|
|
180
|
+
|
|
181
|
+
This resource will run the given GraphQL query and extract a list of data items from the result.
|
|
182
|
+
It will then run the query again with a pagination cursor to get the next page of results.
|
|
183
|
+
|
|
184
|
+
Example:
|
|
185
|
+
query = '''query Transactions($after: String) {
|
|
186
|
+
transactions(after: $after, first: 100) {
|
|
187
|
+
edges {
|
|
188
|
+
cursor
|
|
189
|
+
node {
|
|
190
|
+
id
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}'''
|
|
195
|
+
|
|
196
|
+
partner_query_pages(
|
|
197
|
+
query,
|
|
198
|
+
data_items_path="data.transactions.edges[*].node",
|
|
199
|
+
pagination_cursor_path="data.transactions.edges[-1].cursor",
|
|
200
|
+
pagination_variable_name="after",
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
query: The GraphQL query to run.
|
|
205
|
+
data_items_path: The JSONPath to the data items in the query result. Should resolve to array items.
|
|
206
|
+
pagination_cursor_path: The JSONPath to the pagination cursor in the query result, will be piped to the next query via variables.
|
|
207
|
+
pagination_variable_name: The name of the variable to pass the pagination cursor to.
|
|
208
|
+
variables: Mapping of extra variables used in the query.
|
|
209
|
+
access_token: The Partner API Client access token, created in the Partner Dashboard.
|
|
210
|
+
organization_id: Your Organization ID, found in the Partner Dashboard.
|
|
211
|
+
api_version: The API version to use (e.g. 2024-01). Use `unstable` for the latest version.
|
|
212
|
+
Returns:
|
|
213
|
+
Iterable[TDataItem]: A generator of the query results.
|
|
214
|
+
"""
|
|
215
|
+
client = ShopifyPartnerApi(
|
|
216
|
+
access_token=access_token,
|
|
217
|
+
organization_id=organization_id,
|
|
218
|
+
api_version=api_version,
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
yield from client.get_graphql_pages(
|
|
222
|
+
query,
|
|
223
|
+
data_items_path=data_items_path,
|
|
224
|
+
pagination_cursor_path=pagination_cursor_path,
|
|
225
|
+
pagination_variable_name=pagination_variable_name,
|
|
226
|
+
variables=variables,
|
|
227
|
+
)
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""Shopify source helpers"""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Iterable, Literal, Optional
|
|
4
|
+
from urllib.parse import urljoin
|
|
5
|
+
|
|
6
|
+
from dlt.common import jsonpath
|
|
7
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
8
|
+
from dlt.common.typing import Dict, DictStrAny, TDataItems
|
|
9
|
+
from dlt.sources.helpers import requests
|
|
10
|
+
|
|
11
|
+
from .exceptions import ShopifyPartnerApiError
|
|
12
|
+
from .settings import DEFAULT_API_VERSION, DEFAULT_PARTNER_API_VERSION
|
|
13
|
+
|
|
14
|
+
TOrderStatus = Literal["open", "closed", "cancelled", "any"]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ShopifyApi:
|
|
18
|
+
"""
|
|
19
|
+
A Shopify API client that can be used to get pages of data from Shopify.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
shop_url: str,
|
|
25
|
+
private_app_password: str,
|
|
26
|
+
api_version: str = DEFAULT_API_VERSION,
|
|
27
|
+
) -> None:
|
|
28
|
+
"""
|
|
29
|
+
Args:
|
|
30
|
+
shop_url: The URL of your shop (e.g. https://my-shop.myshopify.com).
|
|
31
|
+
private_app_password: The private app password to the app on your shop.
|
|
32
|
+
api_version: The API version to use (e.g. 2023-01)
|
|
33
|
+
"""
|
|
34
|
+
self.shop_url = shop_url
|
|
35
|
+
self.private_app_password = private_app_password
|
|
36
|
+
self.api_version = api_version
|
|
37
|
+
|
|
38
|
+
def get_pages(
|
|
39
|
+
self, resource: str, params: Optional[Dict[str, Any]] = None
|
|
40
|
+
) -> Iterable[TDataItems]:
|
|
41
|
+
"""Get all pages from shopify using requests.
|
|
42
|
+
Iterates through all pages and yield each page items.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
resource: The resource to get pages for (e.g. products, orders, customers).
|
|
46
|
+
params: Query params to include in the request.
|
|
47
|
+
|
|
48
|
+
Yields:
|
|
49
|
+
List of data items from the page
|
|
50
|
+
"""
|
|
51
|
+
url = urljoin(self.shop_url, f"/admin/api/{self.api_version}/{resource}.json")
|
|
52
|
+
|
|
53
|
+
headers = {"X-Shopify-Access-Token": self.private_app_password}
|
|
54
|
+
while url:
|
|
55
|
+
response = requests.get(url, params=params, headers=headers)
|
|
56
|
+
response.raise_for_status()
|
|
57
|
+
json = response.json()
|
|
58
|
+
# Get item list from the page
|
|
59
|
+
yield [self._convert_datetime_fields(item) for item in json[resource]]
|
|
60
|
+
url = response.links.get("next", {}).get("url")
|
|
61
|
+
# Query params are included in subsequent page URLs
|
|
62
|
+
params = None
|
|
63
|
+
|
|
64
|
+
def _convert_datetime_fields(self, item: Dict[str, Any]) -> Dict[str, Any]:
|
|
65
|
+
"""Convert timestamp fields in the item to pendulum datetime objects
|
|
66
|
+
|
|
67
|
+
The item is modified in place.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
item: The item to convert
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
The same data item (for convenience)
|
|
74
|
+
"""
|
|
75
|
+
fields = ["created_at", "updated_at"]
|
|
76
|
+
for field in fields:
|
|
77
|
+
if field in item:
|
|
78
|
+
item[field] = ensure_pendulum_datetime(item[field])
|
|
79
|
+
return item
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class ShopifyPartnerApi:
|
|
83
|
+
"""Client for Shopify Partner grapql API"""
|
|
84
|
+
|
|
85
|
+
def __init__(
|
|
86
|
+
self,
|
|
87
|
+
access_token: str,
|
|
88
|
+
organization_id: str,
|
|
89
|
+
api_version: str = DEFAULT_PARTNER_API_VERSION,
|
|
90
|
+
) -> None:
|
|
91
|
+
"""
|
|
92
|
+
Args:
|
|
93
|
+
access_token: The access token to use
|
|
94
|
+
organization_id: The organization id to query
|
|
95
|
+
api_version: The API version to use (e.g. 2023-01)
|
|
96
|
+
"""
|
|
97
|
+
self.access_token = access_token
|
|
98
|
+
self.organization_id = organization_id
|
|
99
|
+
self.api_version = api_version
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def graphql_url(self) -> str:
|
|
103
|
+
return f"https://partners.shopify.com/{self.organization_id}/api/{self.api_version}/graphql.json"
|
|
104
|
+
|
|
105
|
+
def run_graphql_query(
|
|
106
|
+
self, query: str, variables: Optional[DictStrAny] = None
|
|
107
|
+
) -> DictStrAny:
|
|
108
|
+
"""Run a graphql query against the Shopify Partner API
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
query: The query to run
|
|
112
|
+
variables: The variables to include in the query
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
The response JSON
|
|
116
|
+
"""
|
|
117
|
+
headers = {"X-Shopify-Access-Token": self.access_token}
|
|
118
|
+
response = requests.post(
|
|
119
|
+
self.graphql_url,
|
|
120
|
+
json={"query": query, "variables": variables},
|
|
121
|
+
headers=headers,
|
|
122
|
+
)
|
|
123
|
+
data = response.json()
|
|
124
|
+
if data.get("errors"):
|
|
125
|
+
raise ShopifyPartnerApiError(response.text)
|
|
126
|
+
return data # type: ignore[no-any-return]
|
|
127
|
+
|
|
128
|
+
def get_graphql_pages(
|
|
129
|
+
self,
|
|
130
|
+
query: str,
|
|
131
|
+
data_items_path: jsonpath.TJsonPath,
|
|
132
|
+
pagination_cursor_path: jsonpath.TJsonPath,
|
|
133
|
+
pagination_variable_name: str,
|
|
134
|
+
variables: Optional[DictStrAny] = None,
|
|
135
|
+
) -> Iterable[TDataItems]:
|
|
136
|
+
variables = dict(variables or {})
|
|
137
|
+
while True:
|
|
138
|
+
data = self.run_graphql_query(query, variables)
|
|
139
|
+
print(data)
|
|
140
|
+
data_items = jsonpath.find_values(data_items_path, data)
|
|
141
|
+
if not data_items:
|
|
142
|
+
break
|
|
143
|
+
yield data_items
|
|
144
|
+
cursors = jsonpath.find_values(pagination_cursor_path, data)
|
|
145
|
+
if not cursors:
|
|
146
|
+
break
|
|
147
|
+
variables[pagination_variable_name] = cursors[-1]
|
|
@@ -9,6 +9,7 @@ import dlt
|
|
|
9
9
|
from ingestr.src.google_sheets import google_spreadsheet
|
|
10
10
|
from ingestr.src.mongodb import mongodb_collection
|
|
11
11
|
from ingestr.src.notion import notion_databases
|
|
12
|
+
from ingestr.src.shopify import shopify_source
|
|
12
13
|
from ingestr.src.sql_database import sql_table
|
|
13
14
|
|
|
14
15
|
|
|
@@ -134,6 +135,43 @@ class NotionSource:
|
|
|
134
135
|
)
|
|
135
136
|
|
|
136
137
|
|
|
138
|
+
class ShopifySource:
|
|
139
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
140
|
+
if kwargs.get("incremental_key"):
|
|
141
|
+
raise ValueError(
|
|
142
|
+
"Shopify takes care of incrementality on its own, you should not provide incremental_key"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# shopify://shop_url?api_key=private_app_password
|
|
146
|
+
|
|
147
|
+
source_fields = urlparse(uri)
|
|
148
|
+
source_params = parse_qs(source_fields.query)
|
|
149
|
+
api_key = source_params.get("api_key")
|
|
150
|
+
if not api_key:
|
|
151
|
+
raise ValueError("api_key in the URI is required to connect to Shopify")
|
|
152
|
+
|
|
153
|
+
date_args = {}
|
|
154
|
+
if kwargs.get("interval_start"):
|
|
155
|
+
date_args["start_date"] = kwargs.get("interval_start")
|
|
156
|
+
|
|
157
|
+
if kwargs.get("interval_end"):
|
|
158
|
+
date_args["end_date"] = kwargs.get("interval_end")
|
|
159
|
+
|
|
160
|
+
resource = None
|
|
161
|
+
if table in ["products", "orders", "customers"]:
|
|
162
|
+
resource = table
|
|
163
|
+
else:
|
|
164
|
+
raise ValueError(
|
|
165
|
+
f"Table name '{table}' is not supported for Shopify source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
return shopify_source(
|
|
169
|
+
private_app_password=api_key[0],
|
|
170
|
+
shop_url=f"https://{source_fields.netloc}",
|
|
171
|
+
**date_args,
|
|
172
|
+
).with_resources(resource)
|
|
173
|
+
|
|
174
|
+
|
|
137
175
|
class GoogleSheetsSource:
|
|
138
176
|
table_builder: Callable
|
|
139
177
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.6.0"
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -66,6 +66,7 @@ exclude = [
|
|
|
66
66
|
'src/sql_database/.*',
|
|
67
67
|
'src/mongodb/.*',
|
|
68
68
|
'src/google_sheets/.*',
|
|
69
|
+
'src/shopify/.*',
|
|
69
70
|
]
|
|
70
71
|
|
|
71
72
|
[[tool.mypy.overrides]]
|
|
@@ -73,6 +74,7 @@ module = [
|
|
|
73
74
|
"ingestr.src.sql_database.*",
|
|
74
75
|
"ingestr.src.mongodb.*",
|
|
75
76
|
"ingestr.src.google_sheets.*",
|
|
77
|
+
"ingestr.src.shopify.*",
|
|
76
78
|
]
|
|
77
79
|
follow_imports = "skip"
|
|
78
80
|
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.5.0"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|