ingestr 0.13.13__py3-none-any.whl → 0.14.104__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ingestr/conftest.py +72 -0
- ingestr/main.py +134 -87
- ingestr/src/adjust/__init__.py +4 -4
- ingestr/src/adjust/adjust_helpers.py +7 -3
- ingestr/src/airtable/__init__.py +3 -2
- ingestr/src/allium/__init__.py +128 -0
- ingestr/src/anthropic/__init__.py +277 -0
- ingestr/src/anthropic/helpers.py +525 -0
- ingestr/src/applovin_max/__init__.py +6 -4
- ingestr/src/appsflyer/__init__.py +325 -0
- ingestr/src/appsflyer/client.py +49 -45
- ingestr/src/appstore/__init__.py +1 -0
- ingestr/src/arrow/__init__.py +9 -1
- ingestr/src/asana_source/__init__.py +1 -1
- ingestr/src/attio/__init__.py +102 -0
- ingestr/src/attio/helpers.py +65 -0
- ingestr/src/blob.py +37 -10
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/chess/__init__.py +1 -1
- ingestr/src/clickup/__init__.py +85 -0
- ingestr/src/clickup/helpers.py +47 -0
- ingestr/src/collector/spinner.py +43 -0
- ingestr/src/couchbase_source/__init__.py +118 -0
- ingestr/src/couchbase_source/helpers.py +135 -0
- ingestr/src/cursor/__init__.py +83 -0
- ingestr/src/cursor/helpers.py +188 -0
- ingestr/src/destinations.py +508 -27
- ingestr/src/docebo/__init__.py +589 -0
- ingestr/src/docebo/client.py +435 -0
- ingestr/src/docebo/helpers.py +97 -0
- ingestr/src/elasticsearch/__init__.py +80 -0
- ingestr/src/elasticsearch/helpers.py +138 -0
- ingestr/src/errors.py +8 -0
- ingestr/src/facebook_ads/__init__.py +47 -28
- ingestr/src/facebook_ads/helpers.py +59 -37
- ingestr/src/facebook_ads/settings.py +2 -0
- ingestr/src/facebook_ads/utils.py +39 -0
- ingestr/src/factory.py +107 -2
- ingestr/src/filesystem/__init__.py +8 -3
- ingestr/src/filters.py +46 -3
- ingestr/src/fluxx/__init__.py +9906 -0
- ingestr/src/fluxx/helpers.py +209 -0
- ingestr/src/frankfurter/__init__.py +157 -0
- ingestr/src/frankfurter/helpers.py +48 -0
- ingestr/src/freshdesk/__init__.py +89 -0
- ingestr/src/freshdesk/freshdesk_client.py +137 -0
- ingestr/src/freshdesk/settings.py +9 -0
- ingestr/src/fundraiseup/__init__.py +95 -0
- ingestr/src/fundraiseup/client.py +81 -0
- ingestr/src/github/__init__.py +41 -6
- ingestr/src/github/helpers.py +5 -5
- ingestr/src/google_analytics/__init__.py +22 -4
- ingestr/src/google_analytics/helpers.py +124 -6
- ingestr/src/google_sheets/__init__.py +4 -4
- ingestr/src/google_sheets/helpers/data_processing.py +2 -2
- ingestr/src/hostaway/__init__.py +302 -0
- ingestr/src/hostaway/client.py +288 -0
- ingestr/src/http/__init__.py +35 -0
- ingestr/src/http/readers.py +114 -0
- ingestr/src/http_client.py +24 -0
- ingestr/src/hubspot/__init__.py +66 -23
- ingestr/src/hubspot/helpers.py +52 -22
- ingestr/src/hubspot/settings.py +14 -7
- ingestr/src/influxdb/__init__.py +46 -0
- ingestr/src/influxdb/client.py +34 -0
- ingestr/src/intercom/__init__.py +142 -0
- ingestr/src/intercom/helpers.py +674 -0
- ingestr/src/intercom/settings.py +279 -0
- ingestr/src/isoc_pulse/__init__.py +159 -0
- ingestr/src/jira_source/__init__.py +340 -0
- ingestr/src/jira_source/helpers.py +439 -0
- ingestr/src/jira_source/settings.py +170 -0
- ingestr/src/kafka/__init__.py +4 -1
- ingestr/src/kinesis/__init__.py +139 -0
- ingestr/src/kinesis/helpers.py +82 -0
- ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
- ingestr/src/linear/__init__.py +634 -0
- ingestr/src/linear/helpers.py +111 -0
- ingestr/src/linkedin_ads/helpers.py +0 -1
- ingestr/src/mailchimp/__init__.py +126 -0
- ingestr/src/mailchimp/helpers.py +226 -0
- ingestr/src/mailchimp/settings.py +164 -0
- ingestr/src/masking.py +344 -0
- ingestr/src/mixpanel/__init__.py +62 -0
- ingestr/src/mixpanel/client.py +99 -0
- ingestr/src/monday/__init__.py +246 -0
- ingestr/src/monday/helpers.py +392 -0
- ingestr/src/monday/settings.py +328 -0
- ingestr/src/mongodb/__init__.py +72 -8
- ingestr/src/mongodb/helpers.py +915 -38
- ingestr/src/partition.py +32 -0
- ingestr/src/phantombuster/__init__.py +65 -0
- ingestr/src/phantombuster/client.py +87 -0
- ingestr/src/pinterest/__init__.py +82 -0
- ingestr/src/pipedrive/__init__.py +198 -0
- ingestr/src/pipedrive/helpers/__init__.py +23 -0
- ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
- ingestr/src/pipedrive/helpers/pages.py +115 -0
- ingestr/src/pipedrive/settings.py +27 -0
- ingestr/src/pipedrive/typing.py +3 -0
- ingestr/src/plusvibeai/__init__.py +335 -0
- ingestr/src/plusvibeai/helpers.py +544 -0
- ingestr/src/plusvibeai/settings.py +252 -0
- ingestr/src/quickbooks/__init__.py +117 -0
- ingestr/src/resource.py +40 -0
- ingestr/src/revenuecat/__init__.py +83 -0
- ingestr/src/revenuecat/helpers.py +237 -0
- ingestr/src/salesforce/__init__.py +15 -8
- ingestr/src/shopify/__init__.py +1 -17
- ingestr/src/smartsheets/__init__.py +82 -0
- ingestr/src/snapchat_ads/__init__.py +489 -0
- ingestr/src/snapchat_ads/client.py +72 -0
- ingestr/src/snapchat_ads/helpers.py +535 -0
- ingestr/src/socrata_source/__init__.py +83 -0
- ingestr/src/socrata_source/helpers.py +85 -0
- ingestr/src/socrata_source/settings.py +8 -0
- ingestr/src/solidgate/__init__.py +219 -0
- ingestr/src/solidgate/helpers.py +154 -0
- ingestr/src/sources.py +2933 -245
- ingestr/src/stripe_analytics/__init__.py +49 -21
- ingestr/src/stripe_analytics/helpers.py +286 -1
- ingestr/src/stripe_analytics/settings.py +62 -10
- ingestr/src/telemetry/event.py +10 -9
- ingestr/src/tiktok_ads/__init__.py +12 -6
- ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
- ingestr/src/trustpilot/__init__.py +48 -0
- ingestr/src/trustpilot/client.py +48 -0
- ingestr/src/wise/__init__.py +68 -0
- ingestr/src/wise/client.py +63 -0
- ingestr/src/zoom/__init__.py +99 -0
- ingestr/src/zoom/helpers.py +102 -0
- ingestr/tests/unit/test_smartsheets.py +133 -0
- {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/METADATA +229 -19
- ingestr-0.14.104.dist-info/RECORD +203 -0
- ingestr/src/appsflyer/_init_.py +0 -24
- ingestr-0.13.13.dist-info/RECORD +0 -115
- {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
- {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import time
|
|
3
|
+
from typing import Any, Dict, Iterator, List, Optional
|
|
4
|
+
|
|
5
|
+
import aiohttp
|
|
6
|
+
import pendulum
|
|
7
|
+
import requests
|
|
8
|
+
|
|
9
|
+
REVENUECAT_API_BASE = "https://api.revenuecat.com/v2"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _make_request(
|
|
13
|
+
api_key: str,
|
|
14
|
+
endpoint: str,
|
|
15
|
+
params: Optional[Dict[str, Any]] = None,
|
|
16
|
+
max_retries: int = 3,
|
|
17
|
+
) -> Dict[str, Any]:
|
|
18
|
+
"""Make a REST API request to RevenueCat API v2 with rate limiting."""
|
|
19
|
+
auth_header = f"Bearer {api_key}"
|
|
20
|
+
|
|
21
|
+
headers = {"Authorization": auth_header, "Content-Type": "application/json"}
|
|
22
|
+
|
|
23
|
+
url = f"{REVENUECAT_API_BASE}{endpoint}"
|
|
24
|
+
|
|
25
|
+
for attempt in range(max_retries + 1):
|
|
26
|
+
try:
|
|
27
|
+
response = requests.get(url, headers=headers, params=params or {})
|
|
28
|
+
|
|
29
|
+
# Handle rate limiting (429 Too Many Requests)
|
|
30
|
+
if response.status_code == 429:
|
|
31
|
+
if attempt < max_retries:
|
|
32
|
+
# Wait based on Retry-After header or exponential backoff
|
|
33
|
+
retry_after = response.headers.get("Retry-After")
|
|
34
|
+
if retry_after:
|
|
35
|
+
wait_time = int(retry_after)
|
|
36
|
+
else:
|
|
37
|
+
wait_time = (2**attempt) * 5 # 5, 10, 20 seconds
|
|
38
|
+
|
|
39
|
+
time.sleep(wait_time)
|
|
40
|
+
continue
|
|
41
|
+
|
|
42
|
+
response.raise_for_status()
|
|
43
|
+
return response.json()
|
|
44
|
+
|
|
45
|
+
except requests.exceptions.RequestException:
|
|
46
|
+
if attempt < max_retries:
|
|
47
|
+
wait_time = (2**attempt) * 2 # 2, 4, 8 seconds
|
|
48
|
+
time.sleep(wait_time)
|
|
49
|
+
continue
|
|
50
|
+
raise
|
|
51
|
+
|
|
52
|
+
# If we get here, all retries failed
|
|
53
|
+
response.raise_for_status()
|
|
54
|
+
return response.json()
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _paginate(
|
|
58
|
+
api_key: str, endpoint: str, params: Optional[Dict[str, Any]] = None
|
|
59
|
+
) -> Iterator[Dict[str, Any]]:
|
|
60
|
+
"""Paginate through RevenueCat API results."""
|
|
61
|
+
current_params = params.copy() if params is not None else {}
|
|
62
|
+
current_params["limit"] = 1000
|
|
63
|
+
|
|
64
|
+
while True:
|
|
65
|
+
data = _make_request(api_key, endpoint, current_params)
|
|
66
|
+
|
|
67
|
+
if "items" in data and data["items"] is not None:
|
|
68
|
+
yield data["items"]
|
|
69
|
+
|
|
70
|
+
if "next_page" not in data:
|
|
71
|
+
break
|
|
72
|
+
|
|
73
|
+
# Extract starting_after parameter from next_page URL
|
|
74
|
+
next_page_url = data["next_page"]
|
|
75
|
+
if next_page_url and "starting_after=" in next_page_url:
|
|
76
|
+
starting_after = next_page_url.split("starting_after=")[1].split("&")[0]
|
|
77
|
+
current_params["starting_after"] = starting_after
|
|
78
|
+
else:
|
|
79
|
+
break
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def convert_timestamps_to_iso(
|
|
83
|
+
record: Dict[str, Any], timestamp_fields: List[str]
|
|
84
|
+
) -> Dict[str, Any]:
|
|
85
|
+
"""Convert timestamp fields from milliseconds to ISO format."""
|
|
86
|
+
for field in timestamp_fields:
|
|
87
|
+
if field in record and record[field] is not None:
|
|
88
|
+
timestamp_ms = record[field]
|
|
89
|
+
dt = pendulum.from_timestamp(timestamp_ms / 1000)
|
|
90
|
+
record[field] = dt.to_iso8601_string()
|
|
91
|
+
|
|
92
|
+
return record
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
async def _make_request_async(
|
|
96
|
+
session: aiohttp.ClientSession,
|
|
97
|
+
api_key: str,
|
|
98
|
+
endpoint: str,
|
|
99
|
+
params: Optional[Dict[str, Any]] = None,
|
|
100
|
+
max_retries: int = 3,
|
|
101
|
+
) -> Dict[str, Any]:
|
|
102
|
+
"""Make an async REST API request to RevenueCat API v2 with rate limiting."""
|
|
103
|
+
auth_header = f"Bearer {api_key}"
|
|
104
|
+
|
|
105
|
+
headers = {"Authorization": auth_header, "Content-Type": "application/json"}
|
|
106
|
+
|
|
107
|
+
url = f"{REVENUECAT_API_BASE}{endpoint}"
|
|
108
|
+
|
|
109
|
+
for attempt in range(max_retries + 1):
|
|
110
|
+
try:
|
|
111
|
+
async with session.get(
|
|
112
|
+
url, headers=headers, params=params or {}
|
|
113
|
+
) as response:
|
|
114
|
+
# Handle rate limiting (429 Too Many Requests)
|
|
115
|
+
if response.status == 429:
|
|
116
|
+
if attempt < max_retries:
|
|
117
|
+
# Wait based on Retry-After header or exponential backoff
|
|
118
|
+
retry_after = response.headers.get("Retry-After")
|
|
119
|
+
if retry_after:
|
|
120
|
+
wait_time = int(retry_after)
|
|
121
|
+
else:
|
|
122
|
+
wait_time = (2**attempt) * 5 # 5, 10, 20 seconds
|
|
123
|
+
|
|
124
|
+
await asyncio.sleep(wait_time)
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
response.raise_for_status()
|
|
128
|
+
return await response.json()
|
|
129
|
+
|
|
130
|
+
except aiohttp.ClientError:
|
|
131
|
+
if attempt < max_retries:
|
|
132
|
+
wait_time = (2**attempt) * 2 # 2, 4, 8 seconds
|
|
133
|
+
await asyncio.sleep(wait_time)
|
|
134
|
+
continue
|
|
135
|
+
raise
|
|
136
|
+
|
|
137
|
+
# If we get here, all retries failed
|
|
138
|
+
async with session.get(url, headers=headers, params=params or {}) as response:
|
|
139
|
+
response.raise_for_status()
|
|
140
|
+
return await response.json()
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
async def _paginate_async(
|
|
144
|
+
session: aiohttp.ClientSession,
|
|
145
|
+
api_key: str,
|
|
146
|
+
endpoint: str,
|
|
147
|
+
params: Optional[Dict[str, Any]] = None,
|
|
148
|
+
) -> List[Dict[str, Any]]:
|
|
149
|
+
"""Paginate through RevenueCat API results asynchronously."""
|
|
150
|
+
items = []
|
|
151
|
+
current_params = params.copy() if params is not None else {}
|
|
152
|
+
current_params["limit"] = 1000
|
|
153
|
+
|
|
154
|
+
while True:
|
|
155
|
+
data = await _make_request_async(session, api_key, endpoint, current_params)
|
|
156
|
+
|
|
157
|
+
# Collect items from the current page
|
|
158
|
+
if "items" in data and data["items"] is not None:
|
|
159
|
+
items.extend(data["items"])
|
|
160
|
+
|
|
161
|
+
# Check if there's a next page
|
|
162
|
+
if "next_page" not in data:
|
|
163
|
+
break
|
|
164
|
+
|
|
165
|
+
# Extract starting_after parameter from next_page URL
|
|
166
|
+
next_page_url = data["next_page"]
|
|
167
|
+
if next_page_url and "starting_after=" in next_page_url:
|
|
168
|
+
starting_after = next_page_url.split("starting_after=")[1].split("&")[0]
|
|
169
|
+
current_params["starting_after"] = starting_after
|
|
170
|
+
else:
|
|
171
|
+
break
|
|
172
|
+
|
|
173
|
+
return items
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
async def process_customer_with_nested_resources_async(
|
|
177
|
+
session: aiohttp.ClientSession,
|
|
178
|
+
api_key: str,
|
|
179
|
+
project_id: str,
|
|
180
|
+
customer: Dict[str, Any],
|
|
181
|
+
) -> Dict[str, Any]:
|
|
182
|
+
customer_id = customer["id"]
|
|
183
|
+
customer = convert_timestamps_to_iso(customer, ["first_seen_at", "last_seen_at"])
|
|
184
|
+
nested_resources = [
|
|
185
|
+
("subscriptions", ["purchased_at", "expires_at", "grace_period_expires_at"]),
|
|
186
|
+
("purchases", ["purchased_at", "expires_at"]),
|
|
187
|
+
]
|
|
188
|
+
|
|
189
|
+
async def fetch_and_convert(resource_name, timestamp_fields):
|
|
190
|
+
if resource_name not in customer or customer[resource_name] is None:
|
|
191
|
+
endpoint = f"/projects/{project_id}/customers/{customer_id}/{resource_name}"
|
|
192
|
+
customer[resource_name] = await _paginate_async(session, api_key, endpoint)
|
|
193
|
+
if (
|
|
194
|
+
timestamp_fields
|
|
195
|
+
and resource_name in customer
|
|
196
|
+
and customer[resource_name] is not None
|
|
197
|
+
):
|
|
198
|
+
for item in customer[resource_name]:
|
|
199
|
+
convert_timestamps_to_iso(item, timestamp_fields)
|
|
200
|
+
|
|
201
|
+
await asyncio.gather(
|
|
202
|
+
*[
|
|
203
|
+
fetch_and_convert(resource_name, timestamp_fields)
|
|
204
|
+
for resource_name, timestamp_fields in nested_resources
|
|
205
|
+
]
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
return customer
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def create_project_resource(
|
|
212
|
+
resource_name: str,
|
|
213
|
+
api_key: str,
|
|
214
|
+
project_id: str = None,
|
|
215
|
+
timestamp_fields: List[str] = None,
|
|
216
|
+
) -> Iterator[Dict[str, Any]]:
|
|
217
|
+
"""
|
|
218
|
+
Helper function to create DLT resources for project-dependent endpoints.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
resource_name: Name of the resource (e.g., 'products', 'entitlements', 'offerings')
|
|
222
|
+
api_key: RevenueCat API key
|
|
223
|
+
project_id: RevenueCat project ID
|
|
224
|
+
timestamp_fields: List of timestamp fields to convert to ISO format
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
Iterator of resource data
|
|
228
|
+
"""
|
|
229
|
+
if project_id is None:
|
|
230
|
+
raise ValueError(f"project_id is required for {resource_name} resource")
|
|
231
|
+
|
|
232
|
+
endpoint = f"/projects/{project_id}/{resource_name}"
|
|
233
|
+
default_timestamp_fields = timestamp_fields or ["created_at", "updated_at"]
|
|
234
|
+
|
|
235
|
+
for item in _paginate(api_key, endpoint):
|
|
236
|
+
item = convert_timestamps_to_iso(item, default_timestamp_fields)
|
|
237
|
+
yield item
|
|
@@ -13,6 +13,8 @@ def salesforce_source(
|
|
|
13
13
|
username: str,
|
|
14
14
|
password: str,
|
|
15
15
|
token: str,
|
|
16
|
+
domain: str,
|
|
17
|
+
custom_object: str = None,
|
|
16
18
|
) -> Iterable[DltResource]:
|
|
17
19
|
"""
|
|
18
20
|
Retrieves data from Salesforce using the Salesforce API.
|
|
@@ -26,7 +28,7 @@ def salesforce_source(
|
|
|
26
28
|
DltResource: Data resources from Salesforce.
|
|
27
29
|
"""
|
|
28
30
|
|
|
29
|
-
client = Salesforce(username, password, token)
|
|
31
|
+
client = Salesforce(username, password, token, domain=domain)
|
|
30
32
|
|
|
31
33
|
# define resources
|
|
32
34
|
@dlt.resource(write_disposition="replace")
|
|
@@ -37,7 +39,7 @@ def salesforce_source(
|
|
|
37
39
|
def user_role() -> Iterable[TDataItem]:
|
|
38
40
|
yield get_records(client, "UserRole")
|
|
39
41
|
|
|
40
|
-
@dlt.resource(write_disposition="merge")
|
|
42
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
41
43
|
def opportunity(
|
|
42
44
|
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
43
45
|
"SystemModstamp", initial_value=None
|
|
@@ -47,7 +49,7 @@ def salesforce_source(
|
|
|
47
49
|
client, "Opportunity", last_timestamp.last_value, "SystemModstamp"
|
|
48
50
|
)
|
|
49
51
|
|
|
50
|
-
@dlt.resource(write_disposition="merge")
|
|
52
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
51
53
|
def opportunity_line_item(
|
|
52
54
|
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
53
55
|
"SystemModstamp", initial_value=None
|
|
@@ -57,7 +59,7 @@ def salesforce_source(
|
|
|
57
59
|
client, "OpportunityLineItem", last_timestamp.last_value, "SystemModstamp"
|
|
58
60
|
)
|
|
59
61
|
|
|
60
|
-
@dlt.resource(write_disposition="merge")
|
|
62
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
61
63
|
def opportunity_contact_role(
|
|
62
64
|
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
63
65
|
"SystemModstamp", initial_value=None
|
|
@@ -70,7 +72,7 @@ def salesforce_source(
|
|
|
70
72
|
"SystemModstamp",
|
|
71
73
|
)
|
|
72
74
|
|
|
73
|
-
@dlt.resource(write_disposition="merge")
|
|
75
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
74
76
|
def account(
|
|
75
77
|
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
76
78
|
"LastModifiedDate", initial_value=None
|
|
@@ -92,7 +94,7 @@ def salesforce_source(
|
|
|
92
94
|
def campaign() -> Iterable[TDataItem]:
|
|
93
95
|
yield get_records(client, "Campaign")
|
|
94
96
|
|
|
95
|
-
@dlt.resource(write_disposition="merge")
|
|
97
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
96
98
|
def campaign_member(
|
|
97
99
|
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
98
100
|
"SystemModstamp", initial_value=None
|
|
@@ -114,7 +116,7 @@ def salesforce_source(
|
|
|
114
116
|
def pricebook_entry() -> Iterable[TDataItem]:
|
|
115
117
|
yield get_records(client, "PricebookEntry")
|
|
116
118
|
|
|
117
|
-
@dlt.resource(write_disposition="merge")
|
|
119
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
118
120
|
def task(
|
|
119
121
|
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
120
122
|
"SystemModstamp", initial_value=None
|
|
@@ -122,7 +124,7 @@ def salesforce_source(
|
|
|
122
124
|
) -> Iterable[TDataItem]:
|
|
123
125
|
yield get_records(client, "Task", last_timestamp.last_value, "SystemModstamp")
|
|
124
126
|
|
|
125
|
-
@dlt.resource(write_disposition="merge")
|
|
127
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
126
128
|
def event(
|
|
127
129
|
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
128
130
|
"SystemModstamp", initial_value=None
|
|
@@ -130,6 +132,10 @@ def salesforce_source(
|
|
|
130
132
|
) -> Iterable[TDataItem]:
|
|
131
133
|
yield get_records(client, "Event", last_timestamp.last_value, "SystemModstamp")
|
|
132
134
|
|
|
135
|
+
@dlt.resource(write_disposition="replace")
|
|
136
|
+
def custom() -> Iterable[TDataItem]:
|
|
137
|
+
yield get_records(client, custom_object)
|
|
138
|
+
|
|
133
139
|
return (
|
|
134
140
|
user,
|
|
135
141
|
user_role,
|
|
@@ -146,4 +152,5 @@ def salesforce_source(
|
|
|
146
152
|
pricebook_entry,
|
|
147
153
|
task,
|
|
148
154
|
event,
|
|
155
|
+
custom,
|
|
149
156
|
)
|
ingestr/src/shopify/__init__.py
CHANGED
|
@@ -669,7 +669,7 @@ def shopify_source(
|
|
|
669
669
|
params["updated_at_max"] = updated_at.end_value.isoformat()
|
|
670
670
|
yield from client.get_pages("customers", params)
|
|
671
671
|
|
|
672
|
-
@dlt.resource(primary_key="id", write_disposition="
|
|
672
|
+
@dlt.resource(primary_key="id", write_disposition="merge")
|
|
673
673
|
def events(
|
|
674
674
|
created_at: dlt.sources.incremental[
|
|
675
675
|
pendulum.DateTime
|
|
@@ -1690,16 +1690,6 @@ query discountNodes($after: String, $query: String, $first: Int) {
|
|
|
1690
1690
|
"nullable": True,
|
|
1691
1691
|
"description": "The category of the product from Shopify's Standard Product Taxonomy.",
|
|
1692
1692
|
},
|
|
1693
|
-
"combinedListing": {
|
|
1694
|
-
"data_type": "json",
|
|
1695
|
-
"nullable": True,
|
|
1696
|
-
"description": "A special product type that combines separate products into a single product listing.",
|
|
1697
|
-
},
|
|
1698
|
-
"combinedListingRole": {
|
|
1699
|
-
"data_type": "json",
|
|
1700
|
-
"nullable": True,
|
|
1701
|
-
"description": "The role of the product in a combined listing.",
|
|
1702
|
-
},
|
|
1703
1693
|
"compareAtPriceRange": {
|
|
1704
1694
|
"data_type": "json",
|
|
1705
1695
|
"nullable": True,
|
|
@@ -1841,12 +1831,6 @@ query products($after: String, $query: String, $first: Int) {
|
|
|
1841
1831
|
category {
|
|
1842
1832
|
id
|
|
1843
1833
|
}
|
|
1844
|
-
combinedListing {
|
|
1845
|
-
parentProduct {
|
|
1846
|
-
id
|
|
1847
|
-
}
|
|
1848
|
-
}
|
|
1849
|
-
combinedListingRole
|
|
1850
1834
|
compareAtPriceRange {
|
|
1851
1835
|
maxVariantCompareAtPrice {
|
|
1852
1836
|
amount
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from typing import Iterable
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
import smartsheet # type: ignore
|
|
5
|
+
from dlt.extract import DltResource
|
|
6
|
+
from smartsheet.models.enums import ColumnType # type: ignore
|
|
7
|
+
from smartsheet.models.sheet import Sheet # type: ignore
|
|
8
|
+
|
|
9
|
+
TYPE_MAPPING = {
|
|
10
|
+
ColumnType.TEXT_NUMBER: "text",
|
|
11
|
+
ColumnType.DATE: "date",
|
|
12
|
+
ColumnType.DATETIME: "timestamp",
|
|
13
|
+
ColumnType.CONTACT_LIST: "text",
|
|
14
|
+
ColumnType.CHECKBOX: "bool",
|
|
15
|
+
ColumnType.PICKLIST: "text",
|
|
16
|
+
ColumnType.DURATION: "text",
|
|
17
|
+
ColumnType.PREDECESSOR: "text",
|
|
18
|
+
ColumnType.ABSTRACT_DATETIME: "timestamp",
|
|
19
|
+
ColumnType.MULTI_CONTACT_LIST: "text",
|
|
20
|
+
ColumnType.MULTI_PICKLIST: "text",
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dlt.source
|
|
25
|
+
def smartsheet_source(
|
|
26
|
+
access_token: str,
|
|
27
|
+
sheet_id: str,
|
|
28
|
+
) -> Iterable[DltResource]:
|
|
29
|
+
"""
|
|
30
|
+
A DLT source for Smartsheet.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
access_token: The Smartsheet API access token.
|
|
34
|
+
sheet_id: The ID of the sheet to load.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
An iterable of DLT resources.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
# Initialize Smartsheet client
|
|
41
|
+
smartsheet_client = smartsheet.Smartsheet(access_token)
|
|
42
|
+
smartsheet_client.errors_as_exceptions(True)
|
|
43
|
+
|
|
44
|
+
# The SDK expects sheet_id to be an int
|
|
45
|
+
sheet_id_int = int(sheet_id)
|
|
46
|
+
# Sanitize the sheet name to be a valid resource name
|
|
47
|
+
# We get objectValue to ensure `name` attribute is populated for the sheet
|
|
48
|
+
sheet_details = smartsheet_client.Sheets.get_sheet(
|
|
49
|
+
sheet_id_int, include=["objectValue"]
|
|
50
|
+
)
|
|
51
|
+
sheet_name = sheet_details.name
|
|
52
|
+
resource_name = f"sheet_{sheet_name.replace(' ', '_').lower()}"
|
|
53
|
+
sheet = smartsheet_client.Sheets.get_sheet(sheet_id_int)
|
|
54
|
+
|
|
55
|
+
yield dlt.resource(
|
|
56
|
+
_get_sheet_data(sheet),
|
|
57
|
+
name=resource_name,
|
|
58
|
+
columns=_generate_type_hints(sheet),
|
|
59
|
+
write_disposition="replace",
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _get_sheet_data(sheet: Sheet):
|
|
64
|
+
"""Helper function to get all rows from a sheet."""
|
|
65
|
+
|
|
66
|
+
column_titles = [col.title for col in sheet.columns]
|
|
67
|
+
for row in sheet.rows:
|
|
68
|
+
row_data = {"_row_id": row.id}
|
|
69
|
+
for i, cell in enumerate(row.cells):
|
|
70
|
+
row_data[column_titles[i]] = cell.value
|
|
71
|
+
yield row_data
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _generate_type_hints(sheet: Sheet):
|
|
75
|
+
return {
|
|
76
|
+
col.title: {
|
|
77
|
+
"data_type": TYPE_MAPPING.get(col.type.value),
|
|
78
|
+
"nullable": True,
|
|
79
|
+
}
|
|
80
|
+
for col in sheet.columns
|
|
81
|
+
if col.type.value in TYPE_MAPPING
|
|
82
|
+
}
|