ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ingestr/conftest.py +72 -0
- ingestr/main.py +134 -87
- ingestr/src/adjust/__init__.py +4 -4
- ingestr/src/adjust/adjust_helpers.py +7 -3
- ingestr/src/airtable/__init__.py +3 -2
- ingestr/src/allium/__init__.py +128 -0
- ingestr/src/anthropic/__init__.py +277 -0
- ingestr/src/anthropic/helpers.py +525 -0
- ingestr/src/applovin/__init__.py +262 -0
- ingestr/src/applovin_max/__init__.py +117 -0
- ingestr/src/appsflyer/__init__.py +325 -0
- ingestr/src/appsflyer/client.py +49 -45
- ingestr/src/appstore/__init__.py +1 -0
- ingestr/src/arrow/__init__.py +9 -1
- ingestr/src/asana_source/__init__.py +1 -1
- ingestr/src/attio/__init__.py +102 -0
- ingestr/src/attio/helpers.py +65 -0
- ingestr/src/blob.py +38 -11
- ingestr/src/buildinfo.py +1 -0
- ingestr/src/chess/__init__.py +1 -1
- ingestr/src/clickup/__init__.py +85 -0
- ingestr/src/clickup/helpers.py +47 -0
- ingestr/src/collector/spinner.py +43 -0
- ingestr/src/couchbase_source/__init__.py +118 -0
- ingestr/src/couchbase_source/helpers.py +135 -0
- ingestr/src/cursor/__init__.py +83 -0
- ingestr/src/cursor/helpers.py +188 -0
- ingestr/src/destinations.py +520 -33
- ingestr/src/docebo/__init__.py +589 -0
- ingestr/src/docebo/client.py +435 -0
- ingestr/src/docebo/helpers.py +97 -0
- ingestr/src/elasticsearch/__init__.py +80 -0
- ingestr/src/elasticsearch/helpers.py +138 -0
- ingestr/src/errors.py +8 -0
- ingestr/src/facebook_ads/__init__.py +47 -28
- ingestr/src/facebook_ads/helpers.py +59 -37
- ingestr/src/facebook_ads/settings.py +2 -0
- ingestr/src/facebook_ads/utils.py +39 -0
- ingestr/src/factory.py +116 -2
- ingestr/src/filesystem/__init__.py +8 -3
- ingestr/src/filters.py +46 -3
- ingestr/src/fluxx/__init__.py +9906 -0
- ingestr/src/fluxx/helpers.py +209 -0
- ingestr/src/frankfurter/__init__.py +157 -0
- ingestr/src/frankfurter/helpers.py +48 -0
- ingestr/src/freshdesk/__init__.py +89 -0
- ingestr/src/freshdesk/freshdesk_client.py +137 -0
- ingestr/src/freshdesk/settings.py +9 -0
- ingestr/src/fundraiseup/__init__.py +95 -0
- ingestr/src/fundraiseup/client.py +81 -0
- ingestr/src/github/__init__.py +41 -6
- ingestr/src/github/helpers.py +5 -5
- ingestr/src/google_analytics/__init__.py +22 -4
- ingestr/src/google_analytics/helpers.py +124 -6
- ingestr/src/google_sheets/__init__.py +4 -4
- ingestr/src/google_sheets/helpers/data_processing.py +2 -2
- ingestr/src/hostaway/__init__.py +302 -0
- ingestr/src/hostaway/client.py +288 -0
- ingestr/src/http/__init__.py +35 -0
- ingestr/src/http/readers.py +114 -0
- ingestr/src/http_client.py +24 -0
- ingestr/src/hubspot/__init__.py +66 -23
- ingestr/src/hubspot/helpers.py +52 -22
- ingestr/src/hubspot/settings.py +14 -7
- ingestr/src/influxdb/__init__.py +46 -0
- ingestr/src/influxdb/client.py +34 -0
- ingestr/src/intercom/__init__.py +142 -0
- ingestr/src/intercom/helpers.py +674 -0
- ingestr/src/intercom/settings.py +279 -0
- ingestr/src/isoc_pulse/__init__.py +159 -0
- ingestr/src/jira_source/__init__.py +340 -0
- ingestr/src/jira_source/helpers.py +439 -0
- ingestr/src/jira_source/settings.py +170 -0
- ingestr/src/kafka/__init__.py +4 -1
- ingestr/src/kinesis/__init__.py +139 -0
- ingestr/src/kinesis/helpers.py +82 -0
- ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
- ingestr/src/linear/__init__.py +634 -0
- ingestr/src/linear/helpers.py +111 -0
- ingestr/src/linkedin_ads/helpers.py +0 -1
- ingestr/src/loader.py +69 -0
- ingestr/src/mailchimp/__init__.py +126 -0
- ingestr/src/mailchimp/helpers.py +226 -0
- ingestr/src/mailchimp/settings.py +164 -0
- ingestr/src/masking.py +344 -0
- ingestr/src/mixpanel/__init__.py +62 -0
- ingestr/src/mixpanel/client.py +99 -0
- ingestr/src/monday/__init__.py +246 -0
- ingestr/src/monday/helpers.py +392 -0
- ingestr/src/monday/settings.py +328 -0
- ingestr/src/mongodb/__init__.py +72 -8
- ingestr/src/mongodb/helpers.py +915 -38
- ingestr/src/partition.py +32 -0
- ingestr/src/personio/__init__.py +331 -0
- ingestr/src/personio/helpers.py +86 -0
- ingestr/src/phantombuster/__init__.py +65 -0
- ingestr/src/phantombuster/client.py +87 -0
- ingestr/src/pinterest/__init__.py +82 -0
- ingestr/src/pipedrive/__init__.py +198 -0
- ingestr/src/pipedrive/helpers/__init__.py +23 -0
- ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
- ingestr/src/pipedrive/helpers/pages.py +115 -0
- ingestr/src/pipedrive/settings.py +27 -0
- ingestr/src/pipedrive/typing.py +3 -0
- ingestr/src/plusvibeai/__init__.py +335 -0
- ingestr/src/plusvibeai/helpers.py +544 -0
- ingestr/src/plusvibeai/settings.py +252 -0
- ingestr/src/quickbooks/__init__.py +117 -0
- ingestr/src/resource.py +40 -0
- ingestr/src/revenuecat/__init__.py +83 -0
- ingestr/src/revenuecat/helpers.py +237 -0
- ingestr/src/salesforce/__init__.py +156 -0
- ingestr/src/salesforce/helpers.py +64 -0
- ingestr/src/shopify/__init__.py +1 -17
- ingestr/src/smartsheets/__init__.py +82 -0
- ingestr/src/snapchat_ads/__init__.py +489 -0
- ingestr/src/snapchat_ads/client.py +72 -0
- ingestr/src/snapchat_ads/helpers.py +535 -0
- ingestr/src/socrata_source/__init__.py +83 -0
- ingestr/src/socrata_source/helpers.py +85 -0
- ingestr/src/socrata_source/settings.py +8 -0
- ingestr/src/solidgate/__init__.py +219 -0
- ingestr/src/solidgate/helpers.py +154 -0
- ingestr/src/sources.py +3132 -212
- ingestr/src/stripe_analytics/__init__.py +49 -21
- ingestr/src/stripe_analytics/helpers.py +286 -1
- ingestr/src/stripe_analytics/settings.py +62 -10
- ingestr/src/telemetry/event.py +10 -9
- ingestr/src/tiktok_ads/__init__.py +12 -6
- ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
- ingestr/src/trustpilot/__init__.py +48 -0
- ingestr/src/trustpilot/client.py +48 -0
- ingestr/src/version.py +6 -1
- ingestr/src/wise/__init__.py +68 -0
- ingestr/src/wise/client.py +63 -0
- ingestr/src/zoom/__init__.py +99 -0
- ingestr/src/zoom/helpers.py +102 -0
- ingestr/tests/unit/test_smartsheets.py +133 -0
- ingestr-0.14.104.dist-info/METADATA +563 -0
- ingestr-0.14.104.dist-info/RECORD +203 -0
- ingestr/src/appsflyer/_init_.py +0 -24
- ingestr-0.13.2.dist-info/METADATA +0 -302
- ingestr-0.13.2.dist-info/RECORD +0 -107
- {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
- {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import time
|
|
3
|
+
from typing import Any, Dict, Iterator, List, Optional
|
|
4
|
+
|
|
5
|
+
import aiohttp
|
|
6
|
+
import pendulum
|
|
7
|
+
import requests
|
|
8
|
+
|
|
9
|
+
REVENUECAT_API_BASE = "https://api.revenuecat.com/v2"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _make_request(
|
|
13
|
+
api_key: str,
|
|
14
|
+
endpoint: str,
|
|
15
|
+
params: Optional[Dict[str, Any]] = None,
|
|
16
|
+
max_retries: int = 3,
|
|
17
|
+
) -> Dict[str, Any]:
|
|
18
|
+
"""Make a REST API request to RevenueCat API v2 with rate limiting."""
|
|
19
|
+
auth_header = f"Bearer {api_key}"
|
|
20
|
+
|
|
21
|
+
headers = {"Authorization": auth_header, "Content-Type": "application/json"}
|
|
22
|
+
|
|
23
|
+
url = f"{REVENUECAT_API_BASE}{endpoint}"
|
|
24
|
+
|
|
25
|
+
for attempt in range(max_retries + 1):
|
|
26
|
+
try:
|
|
27
|
+
response = requests.get(url, headers=headers, params=params or {})
|
|
28
|
+
|
|
29
|
+
# Handle rate limiting (429 Too Many Requests)
|
|
30
|
+
if response.status_code == 429:
|
|
31
|
+
if attempt < max_retries:
|
|
32
|
+
# Wait based on Retry-After header or exponential backoff
|
|
33
|
+
retry_after = response.headers.get("Retry-After")
|
|
34
|
+
if retry_after:
|
|
35
|
+
wait_time = int(retry_after)
|
|
36
|
+
else:
|
|
37
|
+
wait_time = (2**attempt) * 5 # 5, 10, 20 seconds
|
|
38
|
+
|
|
39
|
+
time.sleep(wait_time)
|
|
40
|
+
continue
|
|
41
|
+
|
|
42
|
+
response.raise_for_status()
|
|
43
|
+
return response.json()
|
|
44
|
+
|
|
45
|
+
except requests.exceptions.RequestException:
|
|
46
|
+
if attempt < max_retries:
|
|
47
|
+
wait_time = (2**attempt) * 2 # 2, 4, 8 seconds
|
|
48
|
+
time.sleep(wait_time)
|
|
49
|
+
continue
|
|
50
|
+
raise
|
|
51
|
+
|
|
52
|
+
# If we get here, all retries failed
|
|
53
|
+
response.raise_for_status()
|
|
54
|
+
return response.json()
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _paginate(
|
|
58
|
+
api_key: str, endpoint: str, params: Optional[Dict[str, Any]] = None
|
|
59
|
+
) -> Iterator[Dict[str, Any]]:
|
|
60
|
+
"""Paginate through RevenueCat API results."""
|
|
61
|
+
current_params = params.copy() if params is not None else {}
|
|
62
|
+
current_params["limit"] = 1000
|
|
63
|
+
|
|
64
|
+
while True:
|
|
65
|
+
data = _make_request(api_key, endpoint, current_params)
|
|
66
|
+
|
|
67
|
+
if "items" in data and data["items"] is not None:
|
|
68
|
+
yield data["items"]
|
|
69
|
+
|
|
70
|
+
if "next_page" not in data:
|
|
71
|
+
break
|
|
72
|
+
|
|
73
|
+
# Extract starting_after parameter from next_page URL
|
|
74
|
+
next_page_url = data["next_page"]
|
|
75
|
+
if next_page_url and "starting_after=" in next_page_url:
|
|
76
|
+
starting_after = next_page_url.split("starting_after=")[1].split("&")[0]
|
|
77
|
+
current_params["starting_after"] = starting_after
|
|
78
|
+
else:
|
|
79
|
+
break
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def convert_timestamps_to_iso(
|
|
83
|
+
record: Dict[str, Any], timestamp_fields: List[str]
|
|
84
|
+
) -> Dict[str, Any]:
|
|
85
|
+
"""Convert timestamp fields from milliseconds to ISO format."""
|
|
86
|
+
for field in timestamp_fields:
|
|
87
|
+
if field in record and record[field] is not None:
|
|
88
|
+
timestamp_ms = record[field]
|
|
89
|
+
dt = pendulum.from_timestamp(timestamp_ms / 1000)
|
|
90
|
+
record[field] = dt.to_iso8601_string()
|
|
91
|
+
|
|
92
|
+
return record
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
async def _make_request_async(
|
|
96
|
+
session: aiohttp.ClientSession,
|
|
97
|
+
api_key: str,
|
|
98
|
+
endpoint: str,
|
|
99
|
+
params: Optional[Dict[str, Any]] = None,
|
|
100
|
+
max_retries: int = 3,
|
|
101
|
+
) -> Dict[str, Any]:
|
|
102
|
+
"""Make an async REST API request to RevenueCat API v2 with rate limiting."""
|
|
103
|
+
auth_header = f"Bearer {api_key}"
|
|
104
|
+
|
|
105
|
+
headers = {"Authorization": auth_header, "Content-Type": "application/json"}
|
|
106
|
+
|
|
107
|
+
url = f"{REVENUECAT_API_BASE}{endpoint}"
|
|
108
|
+
|
|
109
|
+
for attempt in range(max_retries + 1):
|
|
110
|
+
try:
|
|
111
|
+
async with session.get(
|
|
112
|
+
url, headers=headers, params=params or {}
|
|
113
|
+
) as response:
|
|
114
|
+
# Handle rate limiting (429 Too Many Requests)
|
|
115
|
+
if response.status == 429:
|
|
116
|
+
if attempt < max_retries:
|
|
117
|
+
# Wait based on Retry-After header or exponential backoff
|
|
118
|
+
retry_after = response.headers.get("Retry-After")
|
|
119
|
+
if retry_after:
|
|
120
|
+
wait_time = int(retry_after)
|
|
121
|
+
else:
|
|
122
|
+
wait_time = (2**attempt) * 5 # 5, 10, 20 seconds
|
|
123
|
+
|
|
124
|
+
await asyncio.sleep(wait_time)
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
response.raise_for_status()
|
|
128
|
+
return await response.json()
|
|
129
|
+
|
|
130
|
+
except aiohttp.ClientError:
|
|
131
|
+
if attempt < max_retries:
|
|
132
|
+
wait_time = (2**attempt) * 2 # 2, 4, 8 seconds
|
|
133
|
+
await asyncio.sleep(wait_time)
|
|
134
|
+
continue
|
|
135
|
+
raise
|
|
136
|
+
|
|
137
|
+
# If we get here, all retries failed
|
|
138
|
+
async with session.get(url, headers=headers, params=params or {}) as response:
|
|
139
|
+
response.raise_for_status()
|
|
140
|
+
return await response.json()
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
async def _paginate_async(
|
|
144
|
+
session: aiohttp.ClientSession,
|
|
145
|
+
api_key: str,
|
|
146
|
+
endpoint: str,
|
|
147
|
+
params: Optional[Dict[str, Any]] = None,
|
|
148
|
+
) -> List[Dict[str, Any]]:
|
|
149
|
+
"""Paginate through RevenueCat API results asynchronously."""
|
|
150
|
+
items = []
|
|
151
|
+
current_params = params.copy() if params is not None else {}
|
|
152
|
+
current_params["limit"] = 1000
|
|
153
|
+
|
|
154
|
+
while True:
|
|
155
|
+
data = await _make_request_async(session, api_key, endpoint, current_params)
|
|
156
|
+
|
|
157
|
+
# Collect items from the current page
|
|
158
|
+
if "items" in data and data["items"] is not None:
|
|
159
|
+
items.extend(data["items"])
|
|
160
|
+
|
|
161
|
+
# Check if there's a next page
|
|
162
|
+
if "next_page" not in data:
|
|
163
|
+
break
|
|
164
|
+
|
|
165
|
+
# Extract starting_after parameter from next_page URL
|
|
166
|
+
next_page_url = data["next_page"]
|
|
167
|
+
if next_page_url and "starting_after=" in next_page_url:
|
|
168
|
+
starting_after = next_page_url.split("starting_after=")[1].split("&")[0]
|
|
169
|
+
current_params["starting_after"] = starting_after
|
|
170
|
+
else:
|
|
171
|
+
break
|
|
172
|
+
|
|
173
|
+
return items
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
async def process_customer_with_nested_resources_async(
|
|
177
|
+
session: aiohttp.ClientSession,
|
|
178
|
+
api_key: str,
|
|
179
|
+
project_id: str,
|
|
180
|
+
customer: Dict[str, Any],
|
|
181
|
+
) -> Dict[str, Any]:
|
|
182
|
+
customer_id = customer["id"]
|
|
183
|
+
customer = convert_timestamps_to_iso(customer, ["first_seen_at", "last_seen_at"])
|
|
184
|
+
nested_resources = [
|
|
185
|
+
("subscriptions", ["purchased_at", "expires_at", "grace_period_expires_at"]),
|
|
186
|
+
("purchases", ["purchased_at", "expires_at"]),
|
|
187
|
+
]
|
|
188
|
+
|
|
189
|
+
async def fetch_and_convert(resource_name, timestamp_fields):
|
|
190
|
+
if resource_name not in customer or customer[resource_name] is None:
|
|
191
|
+
endpoint = f"/projects/{project_id}/customers/{customer_id}/{resource_name}"
|
|
192
|
+
customer[resource_name] = await _paginate_async(session, api_key, endpoint)
|
|
193
|
+
if (
|
|
194
|
+
timestamp_fields
|
|
195
|
+
and resource_name in customer
|
|
196
|
+
and customer[resource_name] is not None
|
|
197
|
+
):
|
|
198
|
+
for item in customer[resource_name]:
|
|
199
|
+
convert_timestamps_to_iso(item, timestamp_fields)
|
|
200
|
+
|
|
201
|
+
await asyncio.gather(
|
|
202
|
+
*[
|
|
203
|
+
fetch_and_convert(resource_name, timestamp_fields)
|
|
204
|
+
for resource_name, timestamp_fields in nested_resources
|
|
205
|
+
]
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
return customer
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def create_project_resource(
|
|
212
|
+
resource_name: str,
|
|
213
|
+
api_key: str,
|
|
214
|
+
project_id: str = None,
|
|
215
|
+
timestamp_fields: List[str] = None,
|
|
216
|
+
) -> Iterator[Dict[str, Any]]:
|
|
217
|
+
"""
|
|
218
|
+
Helper function to create DLT resources for project-dependent endpoints.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
resource_name: Name of the resource (e.g., 'products', 'entitlements', 'offerings')
|
|
222
|
+
api_key: RevenueCat API key
|
|
223
|
+
project_id: RevenueCat project ID
|
|
224
|
+
timestamp_fields: List of timestamp fields to convert to ISO format
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
Iterator of resource data
|
|
228
|
+
"""
|
|
229
|
+
if project_id is None:
|
|
230
|
+
raise ValueError(f"project_id is required for {resource_name} resource")
|
|
231
|
+
|
|
232
|
+
endpoint = f"/projects/{project_id}/{resource_name}"
|
|
233
|
+
default_timestamp_fields = timestamp_fields or ["created_at", "updated_at"]
|
|
234
|
+
|
|
235
|
+
for item in _paginate(api_key, endpoint):
|
|
236
|
+
item = convert_timestamps_to_iso(item, default_timestamp_fields)
|
|
237
|
+
yield item
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
from typing import Iterable
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
from dlt.common.typing import TDataItem
|
|
5
|
+
from dlt.sources import DltResource, incremental
|
|
6
|
+
from simple_salesforce import Salesforce
|
|
7
|
+
|
|
8
|
+
from .helpers import get_records
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dlt.source(name="salesforce")
|
|
12
|
+
def salesforce_source(
|
|
13
|
+
username: str,
|
|
14
|
+
password: str,
|
|
15
|
+
token: str,
|
|
16
|
+
domain: str,
|
|
17
|
+
custom_object: str = None,
|
|
18
|
+
) -> Iterable[DltResource]:
|
|
19
|
+
"""
|
|
20
|
+
Retrieves data from Salesforce using the Salesforce API.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
username (str): The username for authentication.
|
|
24
|
+
password (str): The password for authentication.
|
|
25
|
+
token (str): The security token for authentication.
|
|
26
|
+
|
|
27
|
+
Yields:
|
|
28
|
+
DltResource: Data resources from Salesforce.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
client = Salesforce(username, password, token, domain=domain)
|
|
32
|
+
|
|
33
|
+
# define resources
|
|
34
|
+
@dlt.resource(write_disposition="replace")
|
|
35
|
+
def user() -> Iterable[TDataItem]:
|
|
36
|
+
yield get_records(client, "User")
|
|
37
|
+
|
|
38
|
+
@dlt.resource(write_disposition="replace")
|
|
39
|
+
def user_role() -> Iterable[TDataItem]:
|
|
40
|
+
yield get_records(client, "UserRole")
|
|
41
|
+
|
|
42
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
43
|
+
def opportunity(
|
|
44
|
+
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
45
|
+
"SystemModstamp", initial_value=None
|
|
46
|
+
),
|
|
47
|
+
) -> Iterable[TDataItem]:
|
|
48
|
+
yield get_records(
|
|
49
|
+
client, "Opportunity", last_timestamp.last_value, "SystemModstamp"
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
53
|
+
def opportunity_line_item(
|
|
54
|
+
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
55
|
+
"SystemModstamp", initial_value=None
|
|
56
|
+
),
|
|
57
|
+
) -> Iterable[TDataItem]:
|
|
58
|
+
yield get_records(
|
|
59
|
+
client, "OpportunityLineItem", last_timestamp.last_value, "SystemModstamp"
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
63
|
+
def opportunity_contact_role(
|
|
64
|
+
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
65
|
+
"SystemModstamp", initial_value=None
|
|
66
|
+
),
|
|
67
|
+
) -> Iterable[TDataItem]:
|
|
68
|
+
yield get_records(
|
|
69
|
+
client,
|
|
70
|
+
"OpportunityContactRole",
|
|
71
|
+
last_timestamp.last_value,
|
|
72
|
+
"SystemModstamp",
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
76
|
+
def account(
|
|
77
|
+
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
78
|
+
"LastModifiedDate", initial_value=None
|
|
79
|
+
),
|
|
80
|
+
) -> Iterable[TDataItem]:
|
|
81
|
+
yield get_records(
|
|
82
|
+
client, "Account", last_timestamp.last_value, "LastModifiedDate"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
@dlt.resource(write_disposition="replace")
|
|
86
|
+
def contact() -> Iterable[TDataItem]:
|
|
87
|
+
yield get_records(client, "Contact")
|
|
88
|
+
|
|
89
|
+
@dlt.resource(write_disposition="replace")
|
|
90
|
+
def lead() -> Iterable[TDataItem]:
|
|
91
|
+
yield get_records(client, "Lead")
|
|
92
|
+
|
|
93
|
+
@dlt.resource(write_disposition="replace")
|
|
94
|
+
def campaign() -> Iterable[TDataItem]:
|
|
95
|
+
yield get_records(client, "Campaign")
|
|
96
|
+
|
|
97
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
98
|
+
def campaign_member(
|
|
99
|
+
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
100
|
+
"SystemModstamp", initial_value=None
|
|
101
|
+
),
|
|
102
|
+
) -> Iterable[TDataItem]:
|
|
103
|
+
yield get_records(
|
|
104
|
+
client, "CampaignMember", last_timestamp.last_value, "SystemModstamp"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
@dlt.resource(write_disposition="replace")
|
|
108
|
+
def product() -> Iterable[TDataItem]:
|
|
109
|
+
yield get_records(client, "Product2")
|
|
110
|
+
|
|
111
|
+
@dlt.resource(write_disposition="replace")
|
|
112
|
+
def pricebook() -> Iterable[TDataItem]:
|
|
113
|
+
yield get_records(client, "Pricebook2")
|
|
114
|
+
|
|
115
|
+
@dlt.resource(write_disposition="replace")
|
|
116
|
+
def pricebook_entry() -> Iterable[TDataItem]:
|
|
117
|
+
yield get_records(client, "PricebookEntry")
|
|
118
|
+
|
|
119
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
120
|
+
def task(
|
|
121
|
+
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
122
|
+
"SystemModstamp", initial_value=None
|
|
123
|
+
),
|
|
124
|
+
) -> Iterable[TDataItem]:
|
|
125
|
+
yield get_records(client, "Task", last_timestamp.last_value, "SystemModstamp")
|
|
126
|
+
|
|
127
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
128
|
+
def event(
|
|
129
|
+
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
130
|
+
"SystemModstamp", initial_value=None
|
|
131
|
+
),
|
|
132
|
+
) -> Iterable[TDataItem]:
|
|
133
|
+
yield get_records(client, "Event", last_timestamp.last_value, "SystemModstamp")
|
|
134
|
+
|
|
135
|
+
@dlt.resource(write_disposition="replace")
|
|
136
|
+
def custom() -> Iterable[TDataItem]:
|
|
137
|
+
yield get_records(client, custom_object)
|
|
138
|
+
|
|
139
|
+
return (
|
|
140
|
+
user,
|
|
141
|
+
user_role,
|
|
142
|
+
opportunity,
|
|
143
|
+
opportunity_line_item,
|
|
144
|
+
opportunity_contact_role,
|
|
145
|
+
account,
|
|
146
|
+
contact,
|
|
147
|
+
lead,
|
|
148
|
+
campaign,
|
|
149
|
+
campaign_member,
|
|
150
|
+
product,
|
|
151
|
+
pricebook,
|
|
152
|
+
pricebook_entry,
|
|
153
|
+
task,
|
|
154
|
+
event,
|
|
155
|
+
custom,
|
|
156
|
+
)
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Salesforce source helpers"""
|
|
2
|
+
|
|
3
|
+
from typing import Iterable, Optional
|
|
4
|
+
|
|
5
|
+
import pendulum
|
|
6
|
+
from dlt.common.typing import TDataItem
|
|
7
|
+
from simple_salesforce import Salesforce
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_records(
|
|
11
|
+
sf: Salesforce,
|
|
12
|
+
sobject: str,
|
|
13
|
+
last_state: Optional[str] = None,
|
|
14
|
+
replication_key: Optional[str] = None,
|
|
15
|
+
) -> Iterable[TDataItem]:
|
|
16
|
+
"""
|
|
17
|
+
Retrieves records from Salesforce for a specified sObject.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
sf (Salesforce): An instance of the Salesforce API client.
|
|
21
|
+
sobject (str): The name of the sObject to retrieve records from.
|
|
22
|
+
last_state (str, optional): The last known state for incremental loading. Defaults to None.
|
|
23
|
+
replication_key (str, optional): The replication key for incremental loading. Defaults to None.
|
|
24
|
+
|
|
25
|
+
Yields:
|
|
26
|
+
Dict[TDataItem]: A dictionary representing a record from the Salesforce sObject.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
# Get all fields for the sobject
|
|
30
|
+
desc = getattr(sf, sobject).describe()
|
|
31
|
+
# Salesforce returns compound fields as separate fields, so we need to filter them out
|
|
32
|
+
compound_fields = {
|
|
33
|
+
f["compoundFieldName"]
|
|
34
|
+
for f in desc["fields"]
|
|
35
|
+
if f["compoundFieldName"] is not None
|
|
36
|
+
} - {"Name"}
|
|
37
|
+
# Salesforce returns datetime fields as timestamps, so we need to convert them
|
|
38
|
+
date_fields = {
|
|
39
|
+
f["name"] for f in desc["fields"] if f["type"] in ("datetime",) and f["name"]
|
|
40
|
+
}
|
|
41
|
+
# If no fields are specified, use all fields except compound fields
|
|
42
|
+
fields = [f["name"] for f in desc["fields"] if f["name"] not in compound_fields]
|
|
43
|
+
|
|
44
|
+
# Generate a predicate to filter records by the replication key
|
|
45
|
+
predicate, order_by, n_records = "", "", 0
|
|
46
|
+
if replication_key:
|
|
47
|
+
if last_state:
|
|
48
|
+
predicate = f"WHERE {replication_key} > {last_state}"
|
|
49
|
+
order_by = f"ORDER BY {replication_key} ASC"
|
|
50
|
+
query = f"SELECT {', '.join(fields)} FROM {sobject} {predicate} {order_by}"
|
|
51
|
+
|
|
52
|
+
# Query all records in batches
|
|
53
|
+
for page in getattr(sf.bulk, sobject).query_all(query, lazy_operation=True):
|
|
54
|
+
for record in page:
|
|
55
|
+
# Strip out the attributes field
|
|
56
|
+
record.pop("attributes", None)
|
|
57
|
+
for field in date_fields:
|
|
58
|
+
# Convert Salesforce timestamps to ISO 8601
|
|
59
|
+
if record.get(field):
|
|
60
|
+
record[field] = pendulum.from_timestamp(
|
|
61
|
+
record[field] / 1000,
|
|
62
|
+
).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
|
|
63
|
+
yield from page
|
|
64
|
+
n_records += len(page)
|
ingestr/src/shopify/__init__.py
CHANGED
|
@@ -669,7 +669,7 @@ def shopify_source(
|
|
|
669
669
|
params["updated_at_max"] = updated_at.end_value.isoformat()
|
|
670
670
|
yield from client.get_pages("customers", params)
|
|
671
671
|
|
|
672
|
-
@dlt.resource(primary_key="id", write_disposition="
|
|
672
|
+
@dlt.resource(primary_key="id", write_disposition="merge")
|
|
673
673
|
def events(
|
|
674
674
|
created_at: dlt.sources.incremental[
|
|
675
675
|
pendulum.DateTime
|
|
@@ -1690,16 +1690,6 @@ query discountNodes($after: String, $query: String, $first: Int) {
|
|
|
1690
1690
|
"nullable": True,
|
|
1691
1691
|
"description": "The category of the product from Shopify's Standard Product Taxonomy.",
|
|
1692
1692
|
},
|
|
1693
|
-
"combinedListing": {
|
|
1694
|
-
"data_type": "json",
|
|
1695
|
-
"nullable": True,
|
|
1696
|
-
"description": "A special product type that combines separate products into a single product listing.",
|
|
1697
|
-
},
|
|
1698
|
-
"combinedListingRole": {
|
|
1699
|
-
"data_type": "json",
|
|
1700
|
-
"nullable": True,
|
|
1701
|
-
"description": "The role of the product in a combined listing.",
|
|
1702
|
-
},
|
|
1703
1693
|
"compareAtPriceRange": {
|
|
1704
1694
|
"data_type": "json",
|
|
1705
1695
|
"nullable": True,
|
|
@@ -1841,12 +1831,6 @@ query products($after: String, $query: String, $first: Int) {
|
|
|
1841
1831
|
category {
|
|
1842
1832
|
id
|
|
1843
1833
|
}
|
|
1844
|
-
combinedListing {
|
|
1845
|
-
parentProduct {
|
|
1846
|
-
id
|
|
1847
|
-
}
|
|
1848
|
-
}
|
|
1849
|
-
combinedListingRole
|
|
1850
1834
|
compareAtPriceRange {
|
|
1851
1835
|
maxVariantCompareAtPrice {
|
|
1852
1836
|
amount
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from typing import Iterable
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
import smartsheet # type: ignore
|
|
5
|
+
from dlt.extract import DltResource
|
|
6
|
+
from smartsheet.models.enums import ColumnType # type: ignore
|
|
7
|
+
from smartsheet.models.sheet import Sheet # type: ignore
|
|
8
|
+
|
|
9
|
+
TYPE_MAPPING = {
|
|
10
|
+
ColumnType.TEXT_NUMBER: "text",
|
|
11
|
+
ColumnType.DATE: "date",
|
|
12
|
+
ColumnType.DATETIME: "timestamp",
|
|
13
|
+
ColumnType.CONTACT_LIST: "text",
|
|
14
|
+
ColumnType.CHECKBOX: "bool",
|
|
15
|
+
ColumnType.PICKLIST: "text",
|
|
16
|
+
ColumnType.DURATION: "text",
|
|
17
|
+
ColumnType.PREDECESSOR: "text",
|
|
18
|
+
ColumnType.ABSTRACT_DATETIME: "timestamp",
|
|
19
|
+
ColumnType.MULTI_CONTACT_LIST: "text",
|
|
20
|
+
ColumnType.MULTI_PICKLIST: "text",
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dlt.source
|
|
25
|
+
def smartsheet_source(
|
|
26
|
+
access_token: str,
|
|
27
|
+
sheet_id: str,
|
|
28
|
+
) -> Iterable[DltResource]:
|
|
29
|
+
"""
|
|
30
|
+
A DLT source for Smartsheet.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
access_token: The Smartsheet API access token.
|
|
34
|
+
sheet_id: The ID of the sheet to load.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
An iterable of DLT resources.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
# Initialize Smartsheet client
|
|
41
|
+
smartsheet_client = smartsheet.Smartsheet(access_token)
|
|
42
|
+
smartsheet_client.errors_as_exceptions(True)
|
|
43
|
+
|
|
44
|
+
# The SDK expects sheet_id to be an int
|
|
45
|
+
sheet_id_int = int(sheet_id)
|
|
46
|
+
# Sanitize the sheet name to be a valid resource name
|
|
47
|
+
# We get objectValue to ensure `name` attribute is populated for the sheet
|
|
48
|
+
sheet_details = smartsheet_client.Sheets.get_sheet(
|
|
49
|
+
sheet_id_int, include=["objectValue"]
|
|
50
|
+
)
|
|
51
|
+
sheet_name = sheet_details.name
|
|
52
|
+
resource_name = f"sheet_{sheet_name.replace(' ', '_').lower()}"
|
|
53
|
+
sheet = smartsheet_client.Sheets.get_sheet(sheet_id_int)
|
|
54
|
+
|
|
55
|
+
yield dlt.resource(
|
|
56
|
+
_get_sheet_data(sheet),
|
|
57
|
+
name=resource_name,
|
|
58
|
+
columns=_generate_type_hints(sheet),
|
|
59
|
+
write_disposition="replace",
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _get_sheet_data(sheet: Sheet):
|
|
64
|
+
"""Helper function to get all rows from a sheet."""
|
|
65
|
+
|
|
66
|
+
column_titles = [col.title for col in sheet.columns]
|
|
67
|
+
for row in sheet.rows:
|
|
68
|
+
row_data = {"_row_id": row.id}
|
|
69
|
+
for i, cell in enumerate(row.cells):
|
|
70
|
+
row_data[column_titles[i]] = cell.value
|
|
71
|
+
yield row_data
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _generate_type_hints(sheet: Sheet):
|
|
75
|
+
return {
|
|
76
|
+
col.title: {
|
|
77
|
+
"data_type": TYPE_MAPPING.get(col.type.value),
|
|
78
|
+
"nullable": True,
|
|
79
|
+
}
|
|
80
|
+
for col in sheet.columns
|
|
81
|
+
if col.type.value in TYPE_MAPPING
|
|
82
|
+
}
|