ingestr 0.13.75__py3-none-any.whl → 0.14.98__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/main.py +22 -3
- ingestr/src/adjust/__init__.py +4 -4
- ingestr/src/allium/__init__.py +128 -0
- ingestr/src/anthropic/__init__.py +277 -0
- ingestr/src/anthropic/helpers.py +525 -0
- ingestr/src/appstore/__init__.py +1 -0
- ingestr/src/asana_source/__init__.py +1 -1
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/chess/__init__.py +1 -1
- ingestr/src/couchbase_source/__init__.py +118 -0
- ingestr/src/couchbase_source/helpers.py +135 -0
- ingestr/src/cursor/__init__.py +83 -0
- ingestr/src/cursor/helpers.py +188 -0
- ingestr/src/destinations.py +169 -1
- ingestr/src/docebo/__init__.py +589 -0
- ingestr/src/docebo/client.py +435 -0
- ingestr/src/docebo/helpers.py +97 -0
- ingestr/src/elasticsearch/helpers.py +138 -0
- ingestr/src/errors.py +8 -0
- ingestr/src/facebook_ads/__init__.py +26 -23
- ingestr/src/facebook_ads/helpers.py +47 -1
- ingestr/src/factory.py +48 -0
- ingestr/src/filesystem/__init__.py +8 -3
- ingestr/src/filters.py +9 -0
- ingestr/src/fluxx/__init__.py +9906 -0
- ingestr/src/fluxx/helpers.py +209 -0
- ingestr/src/frankfurter/__init__.py +157 -163
- ingestr/src/frankfurter/helpers.py +3 -3
- ingestr/src/freshdesk/__init__.py +25 -8
- ingestr/src/freshdesk/freshdesk_client.py +40 -5
- ingestr/src/fundraiseup/__init__.py +49 -0
- ingestr/src/fundraiseup/client.py +81 -0
- ingestr/src/github/__init__.py +6 -4
- ingestr/src/google_analytics/__init__.py +1 -1
- ingestr/src/hostaway/__init__.py +302 -0
- ingestr/src/hostaway/client.py +288 -0
- ingestr/src/http/__init__.py +35 -0
- ingestr/src/http/readers.py +114 -0
- ingestr/src/hubspot/__init__.py +6 -12
- ingestr/src/influxdb/__init__.py +1 -0
- ingestr/src/intercom/__init__.py +142 -0
- ingestr/src/intercom/helpers.py +674 -0
- ingestr/src/intercom/settings.py +279 -0
- ingestr/src/jira_source/__init__.py +340 -0
- ingestr/src/jira_source/helpers.py +439 -0
- ingestr/src/jira_source/settings.py +170 -0
- ingestr/src/klaviyo/__init__.py +5 -5
- ingestr/src/linear/__init__.py +553 -116
- ingestr/src/linear/helpers.py +77 -38
- ingestr/src/mailchimp/__init__.py +126 -0
- ingestr/src/mailchimp/helpers.py +226 -0
- ingestr/src/mailchimp/settings.py +164 -0
- ingestr/src/masking.py +344 -0
- ingestr/src/monday/__init__.py +246 -0
- ingestr/src/monday/helpers.py +392 -0
- ingestr/src/monday/settings.py +328 -0
- ingestr/src/mongodb/__init__.py +5 -2
- ingestr/src/mongodb/helpers.py +384 -10
- ingestr/src/plusvibeai/__init__.py +335 -0
- ingestr/src/plusvibeai/helpers.py +544 -0
- ingestr/src/plusvibeai/settings.py +252 -0
- ingestr/src/revenuecat/__init__.py +83 -0
- ingestr/src/revenuecat/helpers.py +237 -0
- ingestr/src/salesforce/__init__.py +15 -8
- ingestr/src/shopify/__init__.py +1 -1
- ingestr/src/smartsheets/__init__.py +33 -5
- ingestr/src/socrata_source/__init__.py +83 -0
- ingestr/src/socrata_source/helpers.py +85 -0
- ingestr/src/socrata_source/settings.py +8 -0
- ingestr/src/sources.py +1418 -54
- ingestr/src/stripe_analytics/__init__.py +2 -19
- ingestr/src/wise/__init__.py +68 -0
- ingestr/src/wise/client.py +63 -0
- ingestr/tests/unit/test_smartsheets.py +6 -9
- {ingestr-0.13.75.dist-info → ingestr-0.14.98.dist-info}/METADATA +24 -12
- {ingestr-0.13.75.dist-info → ingestr-0.14.98.dist-info}/RECORD +79 -37
- {ingestr-0.13.75.dist-info → ingestr-0.14.98.dist-info}/WHEEL +0 -0
- {ingestr-0.13.75.dist-info → ingestr-0.14.98.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.75.dist-info → ingestr-0.14.98.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
"""PlusVibeAI source settings and constants"""
|
|
2
|
+
|
|
3
|
+
# Default start date for PlusVibeAI API requests
|
|
4
|
+
DEFAULT_START_DATE = "2020-01-01"
|
|
5
|
+
|
|
6
|
+
# PlusVibeAI API request timeout in seconds
|
|
7
|
+
REQUEST_TIMEOUT = 300
|
|
8
|
+
|
|
9
|
+
# Default page size for paginated requests
|
|
10
|
+
DEFAULT_PAGE_SIZE = 100
|
|
11
|
+
|
|
12
|
+
# Maximum page size (adjust based on API limits)
|
|
13
|
+
MAX_PAGE_SIZE = 1000
|
|
14
|
+
|
|
15
|
+
# Base API path for PlusVibeAI
|
|
16
|
+
API_BASE_PATH = "/api/v1"
|
|
17
|
+
|
|
18
|
+
# Campaign fields to retrieve from PlusVibeAI API
|
|
19
|
+
CAMPAIGN_FIELDS = (
|
|
20
|
+
# Basic Information
|
|
21
|
+
"id",
|
|
22
|
+
"camp_name",
|
|
23
|
+
"parent_camp_id",
|
|
24
|
+
"campaign_type",
|
|
25
|
+
"organization_id",
|
|
26
|
+
"workspace_id",
|
|
27
|
+
"status",
|
|
28
|
+
# Timestamps
|
|
29
|
+
"created_at",
|
|
30
|
+
"modified_at",
|
|
31
|
+
"last_lead_sent",
|
|
32
|
+
"last_paused_at_bounced",
|
|
33
|
+
# Campaign Configuration
|
|
34
|
+
"tags",
|
|
35
|
+
"template_id",
|
|
36
|
+
"email_accounts",
|
|
37
|
+
"daily_limit",
|
|
38
|
+
"interval_limit_in_min",
|
|
39
|
+
"send_priority",
|
|
40
|
+
"send_as_txt",
|
|
41
|
+
# Tracking & Settings
|
|
42
|
+
"is_emailopened_tracking",
|
|
43
|
+
"is_unsubscribed_link",
|
|
44
|
+
"exclude_ooo",
|
|
45
|
+
"is_acc_based_sending",
|
|
46
|
+
"send_risky_email",
|
|
47
|
+
"unsub_blocklist",
|
|
48
|
+
"other_email_acc",
|
|
49
|
+
"is_esp_match",
|
|
50
|
+
"stop_on_lead_replied",
|
|
51
|
+
# Bounce Settings
|
|
52
|
+
"is_pause_on_bouncerate",
|
|
53
|
+
"bounce_rate_limit",
|
|
54
|
+
"is_paused_at_bounced",
|
|
55
|
+
# Schedule
|
|
56
|
+
"schedule",
|
|
57
|
+
"first_wait_time",
|
|
58
|
+
"camp_st_date",
|
|
59
|
+
"camp_end_date",
|
|
60
|
+
# Events & Sequences
|
|
61
|
+
"events",
|
|
62
|
+
"sequences",
|
|
63
|
+
"sequence_steps",
|
|
64
|
+
"camp_emails",
|
|
65
|
+
# Lead Statistics
|
|
66
|
+
"lead_count",
|
|
67
|
+
"completed_lead_count",
|
|
68
|
+
"lead_contacted_count",
|
|
69
|
+
# Email Performance Metrics
|
|
70
|
+
"sent_count",
|
|
71
|
+
"opened_count",
|
|
72
|
+
"unique_opened_count",
|
|
73
|
+
"replied_count",
|
|
74
|
+
"bounced_count",
|
|
75
|
+
"unsubscribed_count",
|
|
76
|
+
# Reply Classification
|
|
77
|
+
"positive_reply_count",
|
|
78
|
+
"negative_reply_count",
|
|
79
|
+
"neutral_reply_count",
|
|
80
|
+
# Daily & Business Metrics
|
|
81
|
+
"email_sent_today",
|
|
82
|
+
"opportunity_val",
|
|
83
|
+
"open_rate",
|
|
84
|
+
"replied_rate",
|
|
85
|
+
# Custom Data
|
|
86
|
+
"custom_fields",
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# Lead fields to retrieve from PlusVibeAI API
|
|
90
|
+
LEAD_FIELDS = (
|
|
91
|
+
# Basic Information
|
|
92
|
+
"_id",
|
|
93
|
+
"organization_id",
|
|
94
|
+
"campaign_id",
|
|
95
|
+
"workspace_id",
|
|
96
|
+
# Lead Status & Progress
|
|
97
|
+
"is_completed",
|
|
98
|
+
"current_step",
|
|
99
|
+
"status",
|
|
100
|
+
"label",
|
|
101
|
+
# Email Account Info
|
|
102
|
+
"email_account_id",
|
|
103
|
+
"email_acc_name",
|
|
104
|
+
# Campaign Info
|
|
105
|
+
"camp_name",
|
|
106
|
+
# Timestamps
|
|
107
|
+
"created_at",
|
|
108
|
+
"modified_at",
|
|
109
|
+
"last_sent_at",
|
|
110
|
+
# Email Engagement Metrics
|
|
111
|
+
"sent_step",
|
|
112
|
+
"replied_count",
|
|
113
|
+
"opened_count",
|
|
114
|
+
# Email Verification
|
|
115
|
+
"is_mx",
|
|
116
|
+
"mx",
|
|
117
|
+
# Contact Information
|
|
118
|
+
"email",
|
|
119
|
+
"first_name",
|
|
120
|
+
"last_name",
|
|
121
|
+
"phone_number",
|
|
122
|
+
# Address Information
|
|
123
|
+
"address_line",
|
|
124
|
+
"city",
|
|
125
|
+
"state",
|
|
126
|
+
"country",
|
|
127
|
+
"country_code",
|
|
128
|
+
# Professional Information
|
|
129
|
+
"job_title",
|
|
130
|
+
"department",
|
|
131
|
+
"company_name",
|
|
132
|
+
"company_website",
|
|
133
|
+
"industry",
|
|
134
|
+
# Social Media
|
|
135
|
+
"linkedin_person_url",
|
|
136
|
+
"linkedin_company_url",
|
|
137
|
+
# Workflow
|
|
138
|
+
"total_steps",
|
|
139
|
+
# Bounce Information
|
|
140
|
+
"bounce_msg",
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Email Account fields to retrieve from PlusVibeAI API
|
|
144
|
+
EMAIL_ACCOUNT_FIELDS = (
|
|
145
|
+
# Basic Information
|
|
146
|
+
"_id",
|
|
147
|
+
"email",
|
|
148
|
+
"status",
|
|
149
|
+
"warmup_status",
|
|
150
|
+
# Timestamps
|
|
151
|
+
"timestamp_created",
|
|
152
|
+
"timestamp_updated",
|
|
153
|
+
# Payload - nested object containing all configuration
|
|
154
|
+
"payload",
|
|
155
|
+
# Payload sub-fields (for reference, stored in payload object):
|
|
156
|
+
# - name (first_name, last_name)
|
|
157
|
+
# - warmup (limit, warmup_custom_words, warmup_signature, advanced, increment, reply_rate)
|
|
158
|
+
# - imap_host, imap_port
|
|
159
|
+
# - smtp_host, smtp_port
|
|
160
|
+
# - daily_limit, sending_gap
|
|
161
|
+
# - reply_to, custom_domain, signature
|
|
162
|
+
# - tags, cmps
|
|
163
|
+
# - analytics (health_scores, reply_rates, daily_counters)
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Email fields to retrieve from PlusVibeAI API
|
|
167
|
+
EMAIL_FIELDS = (
|
|
168
|
+
# Basic Information
|
|
169
|
+
"id",
|
|
170
|
+
"message_id",
|
|
171
|
+
"is_unread",
|
|
172
|
+
# Lead Information
|
|
173
|
+
"lead",
|
|
174
|
+
"lead_id",
|
|
175
|
+
"campaign_id",
|
|
176
|
+
# From Address
|
|
177
|
+
"from_address_email",
|
|
178
|
+
"from_address_json",
|
|
179
|
+
# Subject & Content
|
|
180
|
+
"subject",
|
|
181
|
+
"content_preview",
|
|
182
|
+
"body",
|
|
183
|
+
# Headers & Metadata
|
|
184
|
+
"headers",
|
|
185
|
+
"label",
|
|
186
|
+
"thread_id",
|
|
187
|
+
"eaccount",
|
|
188
|
+
# To/CC/BCC Addresses
|
|
189
|
+
"to_address_email_list",
|
|
190
|
+
"to_address_json",
|
|
191
|
+
"cc_address_email_list",
|
|
192
|
+
"cc_address_json",
|
|
193
|
+
"bcc_address_email_list",
|
|
194
|
+
# Timestamps
|
|
195
|
+
"timestamp_created",
|
|
196
|
+
"source_modified_at",
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# Blocklist fields to retrieve from PlusVibeAI API
|
|
200
|
+
BLOCKLIST_FIELDS = (
|
|
201
|
+
# Basic Information
|
|
202
|
+
"_id",
|
|
203
|
+
"workspace_id",
|
|
204
|
+
"value",
|
|
205
|
+
"created_by_label",
|
|
206
|
+
# Timestamps
|
|
207
|
+
"created_at",
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
# Webhook fields to retrieve from PlusVibeAI API
|
|
211
|
+
WEBHOOK_FIELDS = (
|
|
212
|
+
# Basic Information
|
|
213
|
+
"_id",
|
|
214
|
+
"workspace_id",
|
|
215
|
+
"org_id",
|
|
216
|
+
"url",
|
|
217
|
+
"name",
|
|
218
|
+
"secret",
|
|
219
|
+
# Configuration
|
|
220
|
+
"camp_ids",
|
|
221
|
+
"evt_types",
|
|
222
|
+
"status",
|
|
223
|
+
"integration_type",
|
|
224
|
+
# Settings
|
|
225
|
+
"ignore_ooo",
|
|
226
|
+
"ignore_automatic",
|
|
227
|
+
# Timestamps
|
|
228
|
+
"created_at",
|
|
229
|
+
"modified_at",
|
|
230
|
+
"last_run",
|
|
231
|
+
# Response Data
|
|
232
|
+
"last_resp",
|
|
233
|
+
"last_recv_resp",
|
|
234
|
+
# User Information
|
|
235
|
+
"created_by",
|
|
236
|
+
"modified_by",
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
# Tag fields to retrieve from PlusVibeAI API
|
|
240
|
+
TAG_FIELDS = (
|
|
241
|
+
# Basic Information
|
|
242
|
+
"_id",
|
|
243
|
+
"workspace_id",
|
|
244
|
+
"org_id",
|
|
245
|
+
"name",
|
|
246
|
+
"color",
|
|
247
|
+
"description",
|
|
248
|
+
"status",
|
|
249
|
+
# Timestamps
|
|
250
|
+
"created_at",
|
|
251
|
+
"modified_at",
|
|
252
|
+
)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
from typing import Any, Dict, Iterable, Iterator
|
|
2
|
+
|
|
3
|
+
import aiohttp
|
|
4
|
+
import dlt
|
|
5
|
+
|
|
6
|
+
from .helpers import (
|
|
7
|
+
_make_request,
|
|
8
|
+
_paginate,
|
|
9
|
+
convert_timestamps_to_iso,
|
|
10
|
+
create_project_resource,
|
|
11
|
+
process_customer_with_nested_resources_async,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dlt.source(name="revenuecat", max_table_nesting=0)
|
|
16
|
+
def revenuecat_source(
|
|
17
|
+
api_key: str,
|
|
18
|
+
project_id: str = None,
|
|
19
|
+
) -> Iterable[dlt.sources.DltResource]:
|
|
20
|
+
"""
|
|
21
|
+
RevenueCat source for extracting data from RevenueCat API v2.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
api_key: RevenueCat API v2 secret key with Bearer token format
|
|
25
|
+
project_id: RevenueCat project ID (required for customers, products, entitlements, offerings, subscriptions, purchases)
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Iterable of DLT resources for customers, products, entitlements, offerings, purchases, subscriptions, and projects
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
@dlt.resource(name="projects", primary_key="id", write_disposition="merge")
|
|
32
|
+
def projects() -> Iterator[Dict[str, Any]]:
|
|
33
|
+
"""Get list of projects."""
|
|
34
|
+
# Get projects list
|
|
35
|
+
data = _make_request(api_key, "/projects")
|
|
36
|
+
if "items" in data:
|
|
37
|
+
for project in data["items"]:
|
|
38
|
+
project = convert_timestamps_to_iso(project, ["created_at"])
|
|
39
|
+
yield project
|
|
40
|
+
|
|
41
|
+
@dlt.resource(
|
|
42
|
+
name="customer_ids",
|
|
43
|
+
write_disposition="replace",
|
|
44
|
+
selected=False,
|
|
45
|
+
parallelized=True,
|
|
46
|
+
)
|
|
47
|
+
def customer_ids():
|
|
48
|
+
if project_id is None:
|
|
49
|
+
raise ValueError("project_id is required for customers resource")
|
|
50
|
+
|
|
51
|
+
yield _paginate(api_key, f"/projects/{project_id}/customers")
|
|
52
|
+
|
|
53
|
+
@dlt.transformer(
|
|
54
|
+
data_from=customer_ids, write_disposition="replace", parallelized=True
|
|
55
|
+
)
|
|
56
|
+
async def customers(customers) -> Iterator[Dict[str, Any]]:
|
|
57
|
+
async with aiohttp.ClientSession() as session:
|
|
58
|
+
for customer in customers:
|
|
59
|
+
yield await process_customer_with_nested_resources_async(
|
|
60
|
+
session, api_key, project_id, customer
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Create project-dependent resources dynamically
|
|
64
|
+
project_resources = []
|
|
65
|
+
resource_names = ["products", "entitlements", "offerings"]
|
|
66
|
+
|
|
67
|
+
for resource_name in resource_names:
|
|
68
|
+
|
|
69
|
+
@dlt.resource(name=resource_name, primary_key="id", write_disposition="merge")
|
|
70
|
+
def create_resource(resource_name=resource_name) -> Iterator[Dict[str, Any]]:
|
|
71
|
+
"""Get list of project resource."""
|
|
72
|
+
yield from create_project_resource(resource_name, api_key, project_id)
|
|
73
|
+
|
|
74
|
+
# Set the function name for better identification
|
|
75
|
+
create_resource.__name__ = resource_name
|
|
76
|
+
project_resources.append(create_resource)
|
|
77
|
+
|
|
78
|
+
return [
|
|
79
|
+
projects,
|
|
80
|
+
customer_ids,
|
|
81
|
+
customers,
|
|
82
|
+
*project_resources,
|
|
83
|
+
]
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import time
|
|
3
|
+
from typing import Any, Dict, Iterator, List, Optional
|
|
4
|
+
|
|
5
|
+
import aiohttp
|
|
6
|
+
import pendulum
|
|
7
|
+
import requests
|
|
8
|
+
|
|
9
|
+
REVENUECAT_API_BASE = "https://api.revenuecat.com/v2"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _make_request(
|
|
13
|
+
api_key: str,
|
|
14
|
+
endpoint: str,
|
|
15
|
+
params: Optional[Dict[str, Any]] = None,
|
|
16
|
+
max_retries: int = 3,
|
|
17
|
+
) -> Dict[str, Any]:
|
|
18
|
+
"""Make a REST API request to RevenueCat API v2 with rate limiting."""
|
|
19
|
+
auth_header = f"Bearer {api_key}"
|
|
20
|
+
|
|
21
|
+
headers = {"Authorization": auth_header, "Content-Type": "application/json"}
|
|
22
|
+
|
|
23
|
+
url = f"{REVENUECAT_API_BASE}{endpoint}"
|
|
24
|
+
|
|
25
|
+
for attempt in range(max_retries + 1):
|
|
26
|
+
try:
|
|
27
|
+
response = requests.get(url, headers=headers, params=params or {})
|
|
28
|
+
|
|
29
|
+
# Handle rate limiting (429 Too Many Requests)
|
|
30
|
+
if response.status_code == 429:
|
|
31
|
+
if attempt < max_retries:
|
|
32
|
+
# Wait based on Retry-After header or exponential backoff
|
|
33
|
+
retry_after = response.headers.get("Retry-After")
|
|
34
|
+
if retry_after:
|
|
35
|
+
wait_time = int(retry_after)
|
|
36
|
+
else:
|
|
37
|
+
wait_time = (2**attempt) * 5 # 5, 10, 20 seconds
|
|
38
|
+
|
|
39
|
+
time.sleep(wait_time)
|
|
40
|
+
continue
|
|
41
|
+
|
|
42
|
+
response.raise_for_status()
|
|
43
|
+
return response.json()
|
|
44
|
+
|
|
45
|
+
except requests.exceptions.RequestException:
|
|
46
|
+
if attempt < max_retries:
|
|
47
|
+
wait_time = (2**attempt) * 2 # 2, 4, 8 seconds
|
|
48
|
+
time.sleep(wait_time)
|
|
49
|
+
continue
|
|
50
|
+
raise
|
|
51
|
+
|
|
52
|
+
# If we get here, all retries failed
|
|
53
|
+
response.raise_for_status()
|
|
54
|
+
return response.json()
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _paginate(
|
|
58
|
+
api_key: str, endpoint: str, params: Optional[Dict[str, Any]] = None
|
|
59
|
+
) -> Iterator[Dict[str, Any]]:
|
|
60
|
+
"""Paginate through RevenueCat API results."""
|
|
61
|
+
current_params = params.copy() if params is not None else {}
|
|
62
|
+
current_params["limit"] = 1000
|
|
63
|
+
|
|
64
|
+
while True:
|
|
65
|
+
data = _make_request(api_key, endpoint, current_params)
|
|
66
|
+
|
|
67
|
+
if "items" in data and data["items"] is not None:
|
|
68
|
+
yield data["items"]
|
|
69
|
+
|
|
70
|
+
if "next_page" not in data:
|
|
71
|
+
break
|
|
72
|
+
|
|
73
|
+
# Extract starting_after parameter from next_page URL
|
|
74
|
+
next_page_url = data["next_page"]
|
|
75
|
+
if next_page_url and "starting_after=" in next_page_url:
|
|
76
|
+
starting_after = next_page_url.split("starting_after=")[1].split("&")[0]
|
|
77
|
+
current_params["starting_after"] = starting_after
|
|
78
|
+
else:
|
|
79
|
+
break
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def convert_timestamps_to_iso(
|
|
83
|
+
record: Dict[str, Any], timestamp_fields: List[str]
|
|
84
|
+
) -> Dict[str, Any]:
|
|
85
|
+
"""Convert timestamp fields from milliseconds to ISO format."""
|
|
86
|
+
for field in timestamp_fields:
|
|
87
|
+
if field in record and record[field] is not None:
|
|
88
|
+
timestamp_ms = record[field]
|
|
89
|
+
dt = pendulum.from_timestamp(timestamp_ms / 1000)
|
|
90
|
+
record[field] = dt.to_iso8601_string()
|
|
91
|
+
|
|
92
|
+
return record
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
async def _make_request_async(
|
|
96
|
+
session: aiohttp.ClientSession,
|
|
97
|
+
api_key: str,
|
|
98
|
+
endpoint: str,
|
|
99
|
+
params: Optional[Dict[str, Any]] = None,
|
|
100
|
+
max_retries: int = 3,
|
|
101
|
+
) -> Dict[str, Any]:
|
|
102
|
+
"""Make an async REST API request to RevenueCat API v2 with rate limiting."""
|
|
103
|
+
auth_header = f"Bearer {api_key}"
|
|
104
|
+
|
|
105
|
+
headers = {"Authorization": auth_header, "Content-Type": "application/json"}
|
|
106
|
+
|
|
107
|
+
url = f"{REVENUECAT_API_BASE}{endpoint}"
|
|
108
|
+
|
|
109
|
+
for attempt in range(max_retries + 1):
|
|
110
|
+
try:
|
|
111
|
+
async with session.get(
|
|
112
|
+
url, headers=headers, params=params or {}
|
|
113
|
+
) as response:
|
|
114
|
+
# Handle rate limiting (429 Too Many Requests)
|
|
115
|
+
if response.status == 429:
|
|
116
|
+
if attempt < max_retries:
|
|
117
|
+
# Wait based on Retry-After header or exponential backoff
|
|
118
|
+
retry_after = response.headers.get("Retry-After")
|
|
119
|
+
if retry_after:
|
|
120
|
+
wait_time = int(retry_after)
|
|
121
|
+
else:
|
|
122
|
+
wait_time = (2**attempt) * 5 # 5, 10, 20 seconds
|
|
123
|
+
|
|
124
|
+
await asyncio.sleep(wait_time)
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
response.raise_for_status()
|
|
128
|
+
return await response.json()
|
|
129
|
+
|
|
130
|
+
except aiohttp.ClientError:
|
|
131
|
+
if attempt < max_retries:
|
|
132
|
+
wait_time = (2**attempt) * 2 # 2, 4, 8 seconds
|
|
133
|
+
await asyncio.sleep(wait_time)
|
|
134
|
+
continue
|
|
135
|
+
raise
|
|
136
|
+
|
|
137
|
+
# If we get here, all retries failed
|
|
138
|
+
async with session.get(url, headers=headers, params=params or {}) as response:
|
|
139
|
+
response.raise_for_status()
|
|
140
|
+
return await response.json()
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
async def _paginate_async(
|
|
144
|
+
session: aiohttp.ClientSession,
|
|
145
|
+
api_key: str,
|
|
146
|
+
endpoint: str,
|
|
147
|
+
params: Optional[Dict[str, Any]] = None,
|
|
148
|
+
) -> List[Dict[str, Any]]:
|
|
149
|
+
"""Paginate through RevenueCat API results asynchronously."""
|
|
150
|
+
items = []
|
|
151
|
+
current_params = params.copy() if params is not None else {}
|
|
152
|
+
current_params["limit"] = 1000
|
|
153
|
+
|
|
154
|
+
while True:
|
|
155
|
+
data = await _make_request_async(session, api_key, endpoint, current_params)
|
|
156
|
+
|
|
157
|
+
# Collect items from the current page
|
|
158
|
+
if "items" in data and data["items"] is not None:
|
|
159
|
+
items.extend(data["items"])
|
|
160
|
+
|
|
161
|
+
# Check if there's a next page
|
|
162
|
+
if "next_page" not in data:
|
|
163
|
+
break
|
|
164
|
+
|
|
165
|
+
# Extract starting_after parameter from next_page URL
|
|
166
|
+
next_page_url = data["next_page"]
|
|
167
|
+
if next_page_url and "starting_after=" in next_page_url:
|
|
168
|
+
starting_after = next_page_url.split("starting_after=")[1].split("&")[0]
|
|
169
|
+
current_params["starting_after"] = starting_after
|
|
170
|
+
else:
|
|
171
|
+
break
|
|
172
|
+
|
|
173
|
+
return items
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
async def process_customer_with_nested_resources_async(
|
|
177
|
+
session: aiohttp.ClientSession,
|
|
178
|
+
api_key: str,
|
|
179
|
+
project_id: str,
|
|
180
|
+
customer: Dict[str, Any],
|
|
181
|
+
) -> Dict[str, Any]:
|
|
182
|
+
customer_id = customer["id"]
|
|
183
|
+
customer = convert_timestamps_to_iso(customer, ["first_seen_at", "last_seen_at"])
|
|
184
|
+
nested_resources = [
|
|
185
|
+
("subscriptions", ["purchased_at", "expires_at", "grace_period_expires_at"]),
|
|
186
|
+
("purchases", ["purchased_at", "expires_at"]),
|
|
187
|
+
]
|
|
188
|
+
|
|
189
|
+
async def fetch_and_convert(resource_name, timestamp_fields):
|
|
190
|
+
if resource_name not in customer or customer[resource_name] is None:
|
|
191
|
+
endpoint = f"/projects/{project_id}/customers/{customer_id}/{resource_name}"
|
|
192
|
+
customer[resource_name] = await _paginate_async(session, api_key, endpoint)
|
|
193
|
+
if (
|
|
194
|
+
timestamp_fields
|
|
195
|
+
and resource_name in customer
|
|
196
|
+
and customer[resource_name] is not None
|
|
197
|
+
):
|
|
198
|
+
for item in customer[resource_name]:
|
|
199
|
+
convert_timestamps_to_iso(item, timestamp_fields)
|
|
200
|
+
|
|
201
|
+
await asyncio.gather(
|
|
202
|
+
*[
|
|
203
|
+
fetch_and_convert(resource_name, timestamp_fields)
|
|
204
|
+
for resource_name, timestamp_fields in nested_resources
|
|
205
|
+
]
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
return customer
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def create_project_resource(
|
|
212
|
+
resource_name: str,
|
|
213
|
+
api_key: str,
|
|
214
|
+
project_id: str = None,
|
|
215
|
+
timestamp_fields: List[str] = None,
|
|
216
|
+
) -> Iterator[Dict[str, Any]]:
|
|
217
|
+
"""
|
|
218
|
+
Helper function to create DLT resources for project-dependent endpoints.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
resource_name: Name of the resource (e.g., 'products', 'entitlements', 'offerings')
|
|
222
|
+
api_key: RevenueCat API key
|
|
223
|
+
project_id: RevenueCat project ID
|
|
224
|
+
timestamp_fields: List of timestamp fields to convert to ISO format
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
Iterator of resource data
|
|
228
|
+
"""
|
|
229
|
+
if project_id is None:
|
|
230
|
+
raise ValueError(f"project_id is required for {resource_name} resource")
|
|
231
|
+
|
|
232
|
+
endpoint = f"/projects/{project_id}/{resource_name}"
|
|
233
|
+
default_timestamp_fields = timestamp_fields or ["created_at", "updated_at"]
|
|
234
|
+
|
|
235
|
+
for item in _paginate(api_key, endpoint):
|
|
236
|
+
item = convert_timestamps_to_iso(item, default_timestamp_fields)
|
|
237
|
+
yield item
|
|
@@ -13,6 +13,8 @@ def salesforce_source(
|
|
|
13
13
|
username: str,
|
|
14
14
|
password: str,
|
|
15
15
|
token: str,
|
|
16
|
+
domain: str,
|
|
17
|
+
custom_object: str = None,
|
|
16
18
|
) -> Iterable[DltResource]:
|
|
17
19
|
"""
|
|
18
20
|
Retrieves data from Salesforce using the Salesforce API.
|
|
@@ -26,7 +28,7 @@ def salesforce_source(
|
|
|
26
28
|
DltResource: Data resources from Salesforce.
|
|
27
29
|
"""
|
|
28
30
|
|
|
29
|
-
client = Salesforce(username, password, token)
|
|
31
|
+
client = Salesforce(username, password, token, domain=domain)
|
|
30
32
|
|
|
31
33
|
# define resources
|
|
32
34
|
@dlt.resource(write_disposition="replace")
|
|
@@ -37,7 +39,7 @@ def salesforce_source(
|
|
|
37
39
|
def user_role() -> Iterable[TDataItem]:
|
|
38
40
|
yield get_records(client, "UserRole")
|
|
39
41
|
|
|
40
|
-
@dlt.resource(write_disposition="merge")
|
|
42
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
41
43
|
def opportunity(
|
|
42
44
|
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
43
45
|
"SystemModstamp", initial_value=None
|
|
@@ -47,7 +49,7 @@ def salesforce_source(
|
|
|
47
49
|
client, "Opportunity", last_timestamp.last_value, "SystemModstamp"
|
|
48
50
|
)
|
|
49
51
|
|
|
50
|
-
@dlt.resource(write_disposition="merge")
|
|
52
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
51
53
|
def opportunity_line_item(
|
|
52
54
|
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
53
55
|
"SystemModstamp", initial_value=None
|
|
@@ -57,7 +59,7 @@ def salesforce_source(
|
|
|
57
59
|
client, "OpportunityLineItem", last_timestamp.last_value, "SystemModstamp"
|
|
58
60
|
)
|
|
59
61
|
|
|
60
|
-
@dlt.resource(write_disposition="merge")
|
|
62
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
61
63
|
def opportunity_contact_role(
|
|
62
64
|
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
63
65
|
"SystemModstamp", initial_value=None
|
|
@@ -70,7 +72,7 @@ def salesforce_source(
|
|
|
70
72
|
"SystemModstamp",
|
|
71
73
|
)
|
|
72
74
|
|
|
73
|
-
@dlt.resource(write_disposition="merge")
|
|
75
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
74
76
|
def account(
|
|
75
77
|
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
76
78
|
"LastModifiedDate", initial_value=None
|
|
@@ -92,7 +94,7 @@ def salesforce_source(
|
|
|
92
94
|
def campaign() -> Iterable[TDataItem]:
|
|
93
95
|
yield get_records(client, "Campaign")
|
|
94
96
|
|
|
95
|
-
@dlt.resource(write_disposition="merge")
|
|
97
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
96
98
|
def campaign_member(
|
|
97
99
|
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
98
100
|
"SystemModstamp", initial_value=None
|
|
@@ -114,7 +116,7 @@ def salesforce_source(
|
|
|
114
116
|
def pricebook_entry() -> Iterable[TDataItem]:
|
|
115
117
|
yield get_records(client, "PricebookEntry")
|
|
116
118
|
|
|
117
|
-
@dlt.resource(write_disposition="merge")
|
|
119
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
118
120
|
def task(
|
|
119
121
|
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
120
122
|
"SystemModstamp", initial_value=None
|
|
@@ -122,7 +124,7 @@ def salesforce_source(
|
|
|
122
124
|
) -> Iterable[TDataItem]:
|
|
123
125
|
yield get_records(client, "Task", last_timestamp.last_value, "SystemModstamp")
|
|
124
126
|
|
|
125
|
-
@dlt.resource(write_disposition="merge")
|
|
127
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
126
128
|
def event(
|
|
127
129
|
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
128
130
|
"SystemModstamp", initial_value=None
|
|
@@ -130,6 +132,10 @@ def salesforce_source(
|
|
|
130
132
|
) -> Iterable[TDataItem]:
|
|
131
133
|
yield get_records(client, "Event", last_timestamp.last_value, "SystemModstamp")
|
|
132
134
|
|
|
135
|
+
@dlt.resource(write_disposition="replace")
|
|
136
|
+
def custom() -> Iterable[TDataItem]:
|
|
137
|
+
yield get_records(client, custom_object)
|
|
138
|
+
|
|
133
139
|
return (
|
|
134
140
|
user,
|
|
135
141
|
user_role,
|
|
@@ -146,4 +152,5 @@ def salesforce_source(
|
|
|
146
152
|
pricebook_entry,
|
|
147
153
|
task,
|
|
148
154
|
event,
|
|
155
|
+
custom,
|
|
149
156
|
)
|
ingestr/src/shopify/__init__.py
CHANGED
|
@@ -669,7 +669,7 @@ def shopify_source(
|
|
|
669
669
|
params["updated_at_max"] = updated_at.end_value.isoformat()
|
|
670
670
|
yield from client.get_pages("customers", params)
|
|
671
671
|
|
|
672
|
-
@dlt.resource(primary_key="id", write_disposition="
|
|
672
|
+
@dlt.resource(primary_key="id", write_disposition="merge")
|
|
673
673
|
def events(
|
|
674
674
|
created_at: dlt.sources.incremental[
|
|
675
675
|
pendulum.DateTime
|