ingestr 0.5.1__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/main.py +16 -3
- ingestr/main_test.py +287 -7
- ingestr/src/destinations.py +1 -1
- ingestr/src/factory.py +9 -0
- ingestr/src/gorgias/__init__.py +587 -0
- ingestr/src/gorgias/helpers.py +149 -0
- ingestr/src/gorgias/helpers_test.py +45 -0
- ingestr/src/shopify/__init__.py +227 -0
- ingestr/src/shopify/exceptions.py +2 -0
- ingestr/src/shopify/helpers.py +147 -0
- ingestr/src/shopify/settings.py +5 -0
- ingestr/src/sources.py +133 -3
- ingestr/src/version.py +1 -1
- ingestr/testdata/create_replace.csv +21 -0
- ingestr/testdata/delete_insert_expected.csv +6 -0
- ingestr/testdata/delete_insert_part1.csv +5 -0
- ingestr/testdata/delete_insert_part2.csv +6 -0
- ingestr/testdata/merge_expected.csv +5 -0
- ingestr/testdata/merge_part1.csv +4 -0
- ingestr/testdata/merge_part2.csv +5 -0
- {ingestr-0.5.1.dist-info → ingestr-0.6.1.dist-info}/METADATA +8 -2
- {ingestr-0.5.1.dist-info → ingestr-0.6.1.dist-info}/RECORD +25 -16
- {ingestr-0.5.1.dist-info → ingestr-0.6.1.dist-info}/WHEEL +1 -1
- ingestr/testdata/test_append.db +0 -0
- ingestr/testdata/test_create_replace.db +0 -0
- ingestr/testdata/test_delete_insert_with_timerange.db +0 -0
- ingestr/testdata/test_delete_insert_without_primary_key.db +0 -0
- ingestr/testdata/test_merge_with_primary_key.db +0 -0
- {ingestr-0.5.1.dist-info → ingestr-0.6.1.dist-info}/entry_points.txt +0 -0
- {ingestr-0.5.1.dist-info → ingestr-0.6.1.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""Gorgias source helpers"""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Iterable, Optional, Tuple
|
|
4
|
+
|
|
5
|
+
from dlt.common.pendulum import pendulum
|
|
6
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
7
|
+
from dlt.common.typing import Dict, TDataItems
|
|
8
|
+
from dlt.sources.helpers import requests
|
|
9
|
+
from pyrate_limiter import Duration, Limiter, Rate
|
|
10
|
+
from requests.auth import HTTPBasicAuth
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_max_datetime_from_datetime_fields(
|
|
14
|
+
item: Dict[str, Any],
|
|
15
|
+
) -> Tuple[str, Optional[pendulum.DateTime]]:
|
|
16
|
+
"""Get the maximum datetime from any field that ends with _datetime"""
|
|
17
|
+
|
|
18
|
+
max_field_name = None
|
|
19
|
+
max_field_value = None
|
|
20
|
+
for field in item:
|
|
21
|
+
if field.endswith("_datetime") and item[field] is not None:
|
|
22
|
+
dt = ensure_pendulum_datetime(item[field])
|
|
23
|
+
if not max_field_name or dt > max_field_value:
|
|
24
|
+
max_field_name = field
|
|
25
|
+
max_field_value = dt
|
|
26
|
+
|
|
27
|
+
return max_field_name, max_field_value
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def convert_datetime_fields(item: Dict[str, Any]) -> Dict[str, Any]:
|
|
31
|
+
for field in item:
|
|
32
|
+
if field.endswith("_datetime") and item[field] is not None:
|
|
33
|
+
item[field] = ensure_pendulum_datetime(item[field])
|
|
34
|
+
|
|
35
|
+
if "updated_datetime" not in item:
|
|
36
|
+
_, max_datetime = get_max_datetime_from_datetime_fields(item)
|
|
37
|
+
item["updated_datetime"] = max_datetime
|
|
38
|
+
|
|
39
|
+
return item
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def find_latest_timestamp_from_page(
|
|
43
|
+
items: list[Dict[str, Any]],
|
|
44
|
+
) -> Optional[Dict[str, Any]]:
|
|
45
|
+
latest_time = None
|
|
46
|
+
for item in items:
|
|
47
|
+
_, max_field_value = get_max_datetime_from_datetime_fields(item)
|
|
48
|
+
if not latest_time or ensure_pendulum_datetime(max_field_value) > latest_time:
|
|
49
|
+
latest_time = max_field_value
|
|
50
|
+
|
|
51
|
+
return latest_time
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class GorgiasApi:
|
|
55
|
+
"""
|
|
56
|
+
A Gorgias API client that can be used to get pages of data from Gorgias.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(
|
|
60
|
+
self,
|
|
61
|
+
domain: str,
|
|
62
|
+
email: str,
|
|
63
|
+
api_key: str,
|
|
64
|
+
) -> None:
|
|
65
|
+
"""
|
|
66
|
+
Args:
|
|
67
|
+
domain: The domain of your Gorgias account.
|
|
68
|
+
email: The email associated with your Gorgias account.
|
|
69
|
+
api_key: The API key for accessing the Gorgias API.
|
|
70
|
+
"""
|
|
71
|
+
self.domain = domain
|
|
72
|
+
self.email = email
|
|
73
|
+
self.api_key = api_key
|
|
74
|
+
|
|
75
|
+
def get_pages(
|
|
76
|
+
self,
|
|
77
|
+
resource: str,
|
|
78
|
+
params: Optional[Dict[str, Any]] = None,
|
|
79
|
+
start_date: Optional[str] = None,
|
|
80
|
+
end_date: Optional[str] = None,
|
|
81
|
+
) -> Iterable[TDataItems]:
|
|
82
|
+
"""Get all pages from Gorgias using requests.
|
|
83
|
+
Iterates through all pages and yield each page items.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
resource: The resource to get pages for (e.g. products, orders, customers).
|
|
87
|
+
params: Query params to include in the request.
|
|
88
|
+
|
|
89
|
+
Yields:
|
|
90
|
+
List of data items from the page
|
|
91
|
+
"""
|
|
92
|
+
url = f"https://{self.domain}.gorgias.com/api/{resource}"
|
|
93
|
+
rate = Rate(2, Duration.SECOND)
|
|
94
|
+
limiter = Limiter(rate, raise_when_fail=False)
|
|
95
|
+
|
|
96
|
+
start_date_obj = ensure_pendulum_datetime(start_date) if start_date else None
|
|
97
|
+
|
|
98
|
+
if not params:
|
|
99
|
+
params = {}
|
|
100
|
+
|
|
101
|
+
params["limit"] = 100
|
|
102
|
+
if "order_by" not in params:
|
|
103
|
+
params["order_by"] = "updated_datetime:desc"
|
|
104
|
+
|
|
105
|
+
while True:
|
|
106
|
+
limiter.try_acquire(f"gorgias-{self.domain}")
|
|
107
|
+
response = requests.get(
|
|
108
|
+
url, params=params, auth=HTTPBasicAuth(self.email, self.api_key)
|
|
109
|
+
)
|
|
110
|
+
response.raise_for_status()
|
|
111
|
+
if len(response.json()["data"]) == 0:
|
|
112
|
+
break
|
|
113
|
+
|
|
114
|
+
json = response.json()
|
|
115
|
+
|
|
116
|
+
items = self.__filter_items_in_range(json["data"], start_date, end_date)
|
|
117
|
+
if len(items) > 0:
|
|
118
|
+
yield items
|
|
119
|
+
|
|
120
|
+
# if there is no cursor, yield the items first and then break the loop
|
|
121
|
+
cursor = json.get("meta", {}).get("next_cursor")
|
|
122
|
+
params["cursor"] = cursor
|
|
123
|
+
if not cursor:
|
|
124
|
+
break
|
|
125
|
+
|
|
126
|
+
if start_date_obj:
|
|
127
|
+
max_datetime = find_latest_timestamp_from_page(json["data"])
|
|
128
|
+
if start_date_obj > ensure_pendulum_datetime(max_datetime):
|
|
129
|
+
break
|
|
130
|
+
|
|
131
|
+
def __filter_items_in_range(
|
|
132
|
+
self,
|
|
133
|
+
items: list[Dict[str, Any]],
|
|
134
|
+
start_date: Optional[str],
|
|
135
|
+
end_date: Optional[str],
|
|
136
|
+
) -> list[Dict[str, Any]]:
|
|
137
|
+
start_date_obj = ensure_pendulum_datetime(start_date) if start_date else None
|
|
138
|
+
end_date_obj = ensure_pendulum_datetime(end_date) if end_date else None
|
|
139
|
+
|
|
140
|
+
filtered = []
|
|
141
|
+
for item in items:
|
|
142
|
+
converted_item = convert_datetime_fields(item)
|
|
143
|
+
if start_date_obj and item["updated_datetime"] < start_date_obj:
|
|
144
|
+
continue
|
|
145
|
+
if end_date_obj and item["updated_datetime"] > end_date_obj:
|
|
146
|
+
continue
|
|
147
|
+
filtered.append(converted_item)
|
|
148
|
+
|
|
149
|
+
return filtered
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from dlt.common.pendulum import pendulum
|
|
2
|
+
|
|
3
|
+
from .helpers import convert_datetime_fields, find_latest_timestamp_from_page
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_convert_datetime_fields():
|
|
7
|
+
item = {
|
|
8
|
+
"key1": "val1",
|
|
9
|
+
"created_datetime": "2024-06-20T07:39:36.514848+00:00",
|
|
10
|
+
"sent_datetime": "2024-06-20T07:40:20.166593+00:00",
|
|
11
|
+
"should_send_datetime": "2024-06-20T07:39:37.514848+00:00",
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
actual = convert_datetime_fields(item)
|
|
15
|
+
|
|
16
|
+
assert actual == {
|
|
17
|
+
"key1": "val1",
|
|
18
|
+
"created_datetime": pendulum.datetime(2024, 6, 20, 7, 39, 36, 514848, tz="UTC"),
|
|
19
|
+
"sent_datetime": pendulum.datetime(2024, 6, 20, 7, 40, 20, 166593, tz="UTC"),
|
|
20
|
+
"should_send_datetime": pendulum.datetime(
|
|
21
|
+
2024, 6, 20, 7, 39, 37, 514848, tz="UTC"
|
|
22
|
+
),
|
|
23
|
+
"updated_datetime": pendulum.datetime(2024, 6, 20, 7, 40, 20, 166593, tz="UTC"),
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_find_latest_timestamp_from_page():
|
|
28
|
+
items = [
|
|
29
|
+
{
|
|
30
|
+
"key1": "val1",
|
|
31
|
+
"created_datetime": "2024-06-20T07:39:36.514848+00:00",
|
|
32
|
+
"sent_datetime": "2024-06-20T07:40:20.166593+00:00",
|
|
33
|
+
"should_send_datetime": "2024-06-20T07:39:37.514848+00:00",
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"key1": "val2",
|
|
37
|
+
"created_datetime": "2024-06-20T07:39:36.514848+00:00",
|
|
38
|
+
"sent_datetime": "2024-06-20T07:40:21.123123+00:00",
|
|
39
|
+
"should_send_datetime": "2024-06-20T07:39:37.514848+00:00",
|
|
40
|
+
},
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
actual = find_latest_timestamp_from_page(items)
|
|
44
|
+
|
|
45
|
+
assert actual == pendulum.datetime(2024, 6, 20, 7, 40, 21, 123123, tz="UTC")
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
"""Fetches Shopify Orders and Products."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Iterable, Optional
|
|
4
|
+
|
|
5
|
+
import dlt
|
|
6
|
+
from dlt.common import jsonpath as jp
|
|
7
|
+
from dlt.common import pendulum
|
|
8
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
9
|
+
from dlt.common.typing import TAnyDateTime, TDataItem
|
|
10
|
+
from dlt.sources import DltResource
|
|
11
|
+
|
|
12
|
+
from .helpers import ShopifyApi, ShopifyPartnerApi, TOrderStatus
|
|
13
|
+
from .settings import (
|
|
14
|
+
DEFAULT_API_VERSION,
|
|
15
|
+
DEFAULT_ITEMS_PER_PAGE,
|
|
16
|
+
DEFAULT_PARTNER_API_VERSION,
|
|
17
|
+
FIRST_DAY_OF_MILLENNIUM,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dlt.source(name="shopify", max_table_nesting=0)
|
|
22
|
+
def shopify_source(
|
|
23
|
+
private_app_password: str = dlt.secrets.value,
|
|
24
|
+
api_version: str = DEFAULT_API_VERSION,
|
|
25
|
+
shop_url: str = dlt.config.value,
|
|
26
|
+
start_date: TAnyDateTime = FIRST_DAY_OF_MILLENNIUM,
|
|
27
|
+
end_date: Optional[TAnyDateTime] = None,
|
|
28
|
+
created_at_min: TAnyDateTime = FIRST_DAY_OF_MILLENNIUM,
|
|
29
|
+
items_per_page: int = DEFAULT_ITEMS_PER_PAGE,
|
|
30
|
+
order_status: TOrderStatus = "any",
|
|
31
|
+
) -> Iterable[DltResource]:
|
|
32
|
+
"""
|
|
33
|
+
The source for the Shopify pipeline. Available resources are products, orders, and customers.
|
|
34
|
+
|
|
35
|
+
`start_time` argument can be used on its own or together with `end_time`. When both are provided
|
|
36
|
+
data is limited to items updated in that time range.
|
|
37
|
+
The range is "half-open", meaning elements equal and newer than `start_time` and elements older than `end_time` are included.
|
|
38
|
+
All resources opt-in to use Airflow scheduler if run as Airflow task
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
private_app_password: The app password to the app on your shop.
|
|
42
|
+
api_version: The API version to use (e.g. 2023-01).
|
|
43
|
+
shop_url: The URL of your shop (e.g. https://my-shop.myshopify.com).
|
|
44
|
+
items_per_page: The max number of items to fetch per page. Defaults to 250.
|
|
45
|
+
start_date: Items updated on or after this date are imported. Defaults to 2000-01-01.
|
|
46
|
+
If end date is not provided, this is used as the initial value for incremental loading and after the initial run, only new data will be retrieved.
|
|
47
|
+
Accepts any `date`/`datetime` object or a date/datetime string in ISO 8601 format.
|
|
48
|
+
end_time: The end time of the range for which to load data.
|
|
49
|
+
Should be used together with `start_date` to limit the data to items updated in that time range.
|
|
50
|
+
If end time is not provided, the incremental loading will be enabled and after initial run, only new data will be retrieved
|
|
51
|
+
created_at_min: The minimum creation date of items to import. Items created on or after this date are loaded. Defaults to 2000-01-01.
|
|
52
|
+
order_status: The order status to filter by. Can be 'open', 'closed', 'cancelled', or 'any'. Defaults to 'any'.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Iterable[DltResource]: A list of DltResource objects representing the data resources.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
# build client
|
|
59
|
+
client = ShopifyApi(shop_url, private_app_password, api_version)
|
|
60
|
+
|
|
61
|
+
start_date_obj = ensure_pendulum_datetime(start_date)
|
|
62
|
+
end_date_obj = ensure_pendulum_datetime(end_date) if end_date else None
|
|
63
|
+
created_at_min_obj = ensure_pendulum_datetime(created_at_min)
|
|
64
|
+
|
|
65
|
+
# define resources
|
|
66
|
+
@dlt.resource(primary_key="id", write_disposition="merge")
|
|
67
|
+
def products(
|
|
68
|
+
updated_at: dlt.sources.incremental[
|
|
69
|
+
pendulum.DateTime
|
|
70
|
+
] = dlt.sources.incremental(
|
|
71
|
+
"updated_at",
|
|
72
|
+
initial_value=start_date_obj,
|
|
73
|
+
end_value=end_date_obj,
|
|
74
|
+
allow_external_schedulers=True,
|
|
75
|
+
),
|
|
76
|
+
created_at_min: pendulum.DateTime = created_at_min_obj,
|
|
77
|
+
items_per_page: int = items_per_page,
|
|
78
|
+
) -> Iterable[TDataItem]:
|
|
79
|
+
"""
|
|
80
|
+
The resource for products on your shop, supports incremental loading and pagination.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
updated_at: The saved state of the last 'updated_at' value.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Iterable[TDataItem]: A generator of products.
|
|
87
|
+
"""
|
|
88
|
+
params = dict(
|
|
89
|
+
updated_at_min=updated_at.last_value.isoformat(),
|
|
90
|
+
limit=items_per_page,
|
|
91
|
+
order="updated_at asc",
|
|
92
|
+
created_at_min=created_at_min.isoformat(),
|
|
93
|
+
)
|
|
94
|
+
if updated_at.end_value is not None:
|
|
95
|
+
params["updated_at_max"] = updated_at.end_value.isoformat()
|
|
96
|
+
yield from client.get_pages("products", params)
|
|
97
|
+
|
|
98
|
+
@dlt.resource(primary_key="id", write_disposition="merge")
|
|
99
|
+
def orders(
|
|
100
|
+
updated_at: dlt.sources.incremental[
|
|
101
|
+
pendulum.DateTime
|
|
102
|
+
] = dlt.sources.incremental(
|
|
103
|
+
"updated_at",
|
|
104
|
+
initial_value=start_date_obj,
|
|
105
|
+
end_value=end_date_obj,
|
|
106
|
+
allow_external_schedulers=True,
|
|
107
|
+
),
|
|
108
|
+
created_at_min: pendulum.DateTime = created_at_min_obj,
|
|
109
|
+
items_per_page: int = items_per_page,
|
|
110
|
+
status: TOrderStatus = order_status,
|
|
111
|
+
) -> Iterable[TDataItem]:
|
|
112
|
+
"""
|
|
113
|
+
The resource for orders on your shop, supports incremental loading and pagination.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
updated_at: The saved state of the last 'updated_at' value.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
Iterable[TDataItem]: A generator of orders.
|
|
120
|
+
"""
|
|
121
|
+
params = dict(
|
|
122
|
+
updated_at_min=updated_at.last_value.isoformat(),
|
|
123
|
+
limit=items_per_page,
|
|
124
|
+
status=status,
|
|
125
|
+
order="updated_at asc",
|
|
126
|
+
created_at_min=created_at_min.isoformat(),
|
|
127
|
+
)
|
|
128
|
+
if updated_at.end_value is not None:
|
|
129
|
+
params["updated_at_max"] = updated_at.end_value.isoformat()
|
|
130
|
+
yield from client.get_pages("orders", params)
|
|
131
|
+
|
|
132
|
+
@dlt.resource(primary_key="id", write_disposition="merge")
|
|
133
|
+
def customers(
|
|
134
|
+
updated_at: dlt.sources.incremental[
|
|
135
|
+
pendulum.DateTime
|
|
136
|
+
] = dlt.sources.incremental(
|
|
137
|
+
"updated_at",
|
|
138
|
+
initial_value=start_date_obj,
|
|
139
|
+
end_value=end_date_obj,
|
|
140
|
+
allow_external_schedulers=True,
|
|
141
|
+
),
|
|
142
|
+
created_at_min: pendulum.DateTime = created_at_min_obj,
|
|
143
|
+
items_per_page: int = items_per_page,
|
|
144
|
+
) -> Iterable[TDataItem]:
|
|
145
|
+
"""
|
|
146
|
+
The resource for customers on your shop, supports incremental loading and pagination.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
updated_at: The saved state of the last 'updated_at' value.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Iterable[TDataItem]: A generator of customers.
|
|
153
|
+
"""
|
|
154
|
+
params = dict(
|
|
155
|
+
updated_at_min=updated_at.last_value.isoformat(),
|
|
156
|
+
limit=items_per_page,
|
|
157
|
+
order="updated_at asc",
|
|
158
|
+
created_at_min=created_at_min.isoformat(),
|
|
159
|
+
)
|
|
160
|
+
if updated_at.end_value is not None:
|
|
161
|
+
params["updated_at_max"] = updated_at.end_value.isoformat()
|
|
162
|
+
yield from client.get_pages("customers", params)
|
|
163
|
+
|
|
164
|
+
return (products, orders, customers)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@dlt.resource
|
|
168
|
+
def shopify_partner_query(
|
|
169
|
+
query: str,
|
|
170
|
+
data_items_path: jp.TJsonPath,
|
|
171
|
+
pagination_cursor_path: jp.TJsonPath,
|
|
172
|
+
pagination_variable_name: str = "after",
|
|
173
|
+
variables: Optional[Dict[str, Any]] = None,
|
|
174
|
+
access_token: str = dlt.secrets.value,
|
|
175
|
+
organization_id: str = dlt.config.value,
|
|
176
|
+
api_version: str = DEFAULT_PARTNER_API_VERSION,
|
|
177
|
+
) -> Iterable[TDataItem]:
|
|
178
|
+
"""
|
|
179
|
+
Resource for getting paginated results from the Shopify Partner GraphQL API.
|
|
180
|
+
|
|
181
|
+
This resource will run the given GraphQL query and extract a list of data items from the result.
|
|
182
|
+
It will then run the query again with a pagination cursor to get the next page of results.
|
|
183
|
+
|
|
184
|
+
Example:
|
|
185
|
+
query = '''query Transactions($after: String) {
|
|
186
|
+
transactions(after: $after, first: 100) {
|
|
187
|
+
edges {
|
|
188
|
+
cursor
|
|
189
|
+
node {
|
|
190
|
+
id
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}'''
|
|
195
|
+
|
|
196
|
+
partner_query_pages(
|
|
197
|
+
query,
|
|
198
|
+
data_items_path="data.transactions.edges[*].node",
|
|
199
|
+
pagination_cursor_path="data.transactions.edges[-1].cursor",
|
|
200
|
+
pagination_variable_name="after",
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
query: The GraphQL query to run.
|
|
205
|
+
data_items_path: The JSONPath to the data items in the query result. Should resolve to array items.
|
|
206
|
+
pagination_cursor_path: The JSONPath to the pagination cursor in the query result, will be piped to the next query via variables.
|
|
207
|
+
pagination_variable_name: The name of the variable to pass the pagination cursor to.
|
|
208
|
+
variables: Mapping of extra variables used in the query.
|
|
209
|
+
access_token: The Partner API Client access token, created in the Partner Dashboard.
|
|
210
|
+
organization_id: Your Organization ID, found in the Partner Dashboard.
|
|
211
|
+
api_version: The API version to use (e.g. 2024-01). Use `unstable` for the latest version.
|
|
212
|
+
Returns:
|
|
213
|
+
Iterable[TDataItem]: A generator of the query results.
|
|
214
|
+
"""
|
|
215
|
+
client = ShopifyPartnerApi(
|
|
216
|
+
access_token=access_token,
|
|
217
|
+
organization_id=organization_id,
|
|
218
|
+
api_version=api_version,
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
yield from client.get_graphql_pages(
|
|
222
|
+
query,
|
|
223
|
+
data_items_path=data_items_path,
|
|
224
|
+
pagination_cursor_path=pagination_cursor_path,
|
|
225
|
+
pagination_variable_name=pagination_variable_name,
|
|
226
|
+
variables=variables,
|
|
227
|
+
)
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""Shopify source helpers"""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Iterable, Literal, Optional
|
|
4
|
+
from urllib.parse import urljoin
|
|
5
|
+
|
|
6
|
+
from dlt.common import jsonpath
|
|
7
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
8
|
+
from dlt.common.typing import Dict, DictStrAny, TDataItems
|
|
9
|
+
from dlt.sources.helpers import requests
|
|
10
|
+
|
|
11
|
+
from .exceptions import ShopifyPartnerApiError
|
|
12
|
+
from .settings import DEFAULT_API_VERSION, DEFAULT_PARTNER_API_VERSION
|
|
13
|
+
|
|
14
|
+
TOrderStatus = Literal["open", "closed", "cancelled", "any"]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ShopifyApi:
|
|
18
|
+
"""
|
|
19
|
+
A Shopify API client that can be used to get pages of data from Shopify.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
shop_url: str,
|
|
25
|
+
private_app_password: str,
|
|
26
|
+
api_version: str = DEFAULT_API_VERSION,
|
|
27
|
+
) -> None:
|
|
28
|
+
"""
|
|
29
|
+
Args:
|
|
30
|
+
shop_url: The URL of your shop (e.g. https://my-shop.myshopify.com).
|
|
31
|
+
private_app_password: The private app password to the app on your shop.
|
|
32
|
+
api_version: The API version to use (e.g. 2023-01)
|
|
33
|
+
"""
|
|
34
|
+
self.shop_url = shop_url
|
|
35
|
+
self.private_app_password = private_app_password
|
|
36
|
+
self.api_version = api_version
|
|
37
|
+
|
|
38
|
+
def get_pages(
|
|
39
|
+
self, resource: str, params: Optional[Dict[str, Any]] = None
|
|
40
|
+
) -> Iterable[TDataItems]:
|
|
41
|
+
"""Get all pages from shopify using requests.
|
|
42
|
+
Iterates through all pages and yield each page items.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
resource: The resource to get pages for (e.g. products, orders, customers).
|
|
46
|
+
params: Query params to include in the request.
|
|
47
|
+
|
|
48
|
+
Yields:
|
|
49
|
+
List of data items from the page
|
|
50
|
+
"""
|
|
51
|
+
url = urljoin(self.shop_url, f"/admin/api/{self.api_version}/{resource}.json")
|
|
52
|
+
|
|
53
|
+
headers = {"X-Shopify-Access-Token": self.private_app_password}
|
|
54
|
+
while url:
|
|
55
|
+
response = requests.get(url, params=params, headers=headers)
|
|
56
|
+
response.raise_for_status()
|
|
57
|
+
json = response.json()
|
|
58
|
+
# Get item list from the page
|
|
59
|
+
yield [self._convert_datetime_fields(item) for item in json[resource]]
|
|
60
|
+
url = response.links.get("next", {}).get("url")
|
|
61
|
+
# Query params are included in subsequent page URLs
|
|
62
|
+
params = None
|
|
63
|
+
|
|
64
|
+
def _convert_datetime_fields(self, item: Dict[str, Any]) -> Dict[str, Any]:
|
|
65
|
+
"""Convert timestamp fields in the item to pendulum datetime objects
|
|
66
|
+
|
|
67
|
+
The item is modified in place.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
item: The item to convert
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
The same data item (for convenience)
|
|
74
|
+
"""
|
|
75
|
+
fields = ["created_at", "updated_at"]
|
|
76
|
+
for field in fields:
|
|
77
|
+
if field in item:
|
|
78
|
+
item[field] = ensure_pendulum_datetime(item[field])
|
|
79
|
+
return item
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class ShopifyPartnerApi:
|
|
83
|
+
"""Client for Shopify Partner grapql API"""
|
|
84
|
+
|
|
85
|
+
def __init__(
|
|
86
|
+
self,
|
|
87
|
+
access_token: str,
|
|
88
|
+
organization_id: str,
|
|
89
|
+
api_version: str = DEFAULT_PARTNER_API_VERSION,
|
|
90
|
+
) -> None:
|
|
91
|
+
"""
|
|
92
|
+
Args:
|
|
93
|
+
access_token: The access token to use
|
|
94
|
+
organization_id: The organization id to query
|
|
95
|
+
api_version: The API version to use (e.g. 2023-01)
|
|
96
|
+
"""
|
|
97
|
+
self.access_token = access_token
|
|
98
|
+
self.organization_id = organization_id
|
|
99
|
+
self.api_version = api_version
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def graphql_url(self) -> str:
|
|
103
|
+
return f"https://partners.shopify.com/{self.organization_id}/api/{self.api_version}/graphql.json"
|
|
104
|
+
|
|
105
|
+
def run_graphql_query(
|
|
106
|
+
self, query: str, variables: Optional[DictStrAny] = None
|
|
107
|
+
) -> DictStrAny:
|
|
108
|
+
"""Run a graphql query against the Shopify Partner API
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
query: The query to run
|
|
112
|
+
variables: The variables to include in the query
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
The response JSON
|
|
116
|
+
"""
|
|
117
|
+
headers = {"X-Shopify-Access-Token": self.access_token}
|
|
118
|
+
response = requests.post(
|
|
119
|
+
self.graphql_url,
|
|
120
|
+
json={"query": query, "variables": variables},
|
|
121
|
+
headers=headers,
|
|
122
|
+
)
|
|
123
|
+
data = response.json()
|
|
124
|
+
if data.get("errors"):
|
|
125
|
+
raise ShopifyPartnerApiError(response.text)
|
|
126
|
+
return data # type: ignore[no-any-return]
|
|
127
|
+
|
|
128
|
+
def get_graphql_pages(
|
|
129
|
+
self,
|
|
130
|
+
query: str,
|
|
131
|
+
data_items_path: jsonpath.TJsonPath,
|
|
132
|
+
pagination_cursor_path: jsonpath.TJsonPath,
|
|
133
|
+
pagination_variable_name: str,
|
|
134
|
+
variables: Optional[DictStrAny] = None,
|
|
135
|
+
) -> Iterable[TDataItems]:
|
|
136
|
+
variables = dict(variables or {})
|
|
137
|
+
while True:
|
|
138
|
+
data = self.run_graphql_query(query, variables)
|
|
139
|
+
print(data)
|
|
140
|
+
data_items = jsonpath.find_values(data_items_path, data)
|
|
141
|
+
if not data_items:
|
|
142
|
+
break
|
|
143
|
+
yield data_items
|
|
144
|
+
cursors = jsonpath.find_values(pagination_cursor_path, data)
|
|
145
|
+
if not cursors:
|
|
146
|
+
break
|
|
147
|
+
variables[pagination_variable_name] = cursors[-1]
|