ingestr 0.13.84__py3-none-any.whl → 0.13.86__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/factory.py +4 -0
- ingestr/src/fluxx/__init__.py +5725 -0
- ingestr/src/fluxx/helpers.py +216 -0
- ingestr/src/frankfurter/__init__.py +121 -123
- ingestr/src/frankfurter/helpers.py +4 -4
- ingestr/src/linear/__init__.py +10 -9
- ingestr/src/linear/helpers.py +20 -10
- ingestr/src/revenuecat/__init__.py +103 -0
- ingestr/src/revenuecat/helpers.py +262 -0
- ingestr/src/sources.py +132 -13
- ingestr/src/stripe_analytics/__init__.py +1 -18
- {ingestr-0.13.84.dist-info → ingestr-0.13.86.dist-info}/METADATA +1 -1
- {ingestr-0.13.84.dist-info → ingestr-0.13.86.dist-info}/RECORD +17 -13
- {ingestr-0.13.84.dist-info → ingestr-0.13.86.dist-info}/WHEEL +0 -0
- {ingestr-0.13.84.dist-info → ingestr-0.13.86.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.84.dist-info → ingestr-0.13.86.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import Any, Dict, Iterable, Iterator
|
|
3
|
+
|
|
4
|
+
import aiohttp
|
|
5
|
+
import dlt
|
|
6
|
+
|
|
7
|
+
from .helpers import (
|
|
8
|
+
_make_request,
|
|
9
|
+
_paginate,
|
|
10
|
+
convert_timestamps_to_iso,
|
|
11
|
+
process_customer_with_nested_resources_async,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dlt.source(name="revenuecat", max_table_nesting=0)
|
|
16
|
+
def revenuecat_source(
|
|
17
|
+
api_key: str,
|
|
18
|
+
project_id: str = None,
|
|
19
|
+
) -> Iterable[dlt.sources.DltResource]:
|
|
20
|
+
"""
|
|
21
|
+
RevenueCat source for extracting data from RevenueCat API v2.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
api_key: RevenueCat API v2 secret key with Bearer token format
|
|
25
|
+
project_id: RevenueCat project ID (required for customers, products, subscriptions, purchases)
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Iterable of DLT resources for customers, products, purchases, subscriptions, and projects
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
@dlt.resource(name="projects", primary_key="id", write_disposition="merge")
|
|
32
|
+
def projects() -> Iterator[Dict[str, Any]]:
|
|
33
|
+
"""Get list of projects."""
|
|
34
|
+
# Get projects list
|
|
35
|
+
data = _make_request(api_key, "/projects")
|
|
36
|
+
if "items" in data:
|
|
37
|
+
for project in data["items"]:
|
|
38
|
+
project = convert_timestamps_to_iso(project, ["created_at"])
|
|
39
|
+
yield project
|
|
40
|
+
|
|
41
|
+
@dlt.resource(
|
|
42
|
+
name="customers", primary_key="id", write_disposition="merge", parallelized=True
|
|
43
|
+
)
|
|
44
|
+
def customers() -> Iterator[Dict[str, Any]]:
|
|
45
|
+
"""Get list of customers with nested purchases and subscriptions."""
|
|
46
|
+
if project_id is None:
|
|
47
|
+
raise ValueError("project_id is required for customers resource")
|
|
48
|
+
endpoint = f"/projects/{project_id}/customers"
|
|
49
|
+
|
|
50
|
+
async def process_customer_batch(customer_batch):
|
|
51
|
+
"""Process a batch of customers with async operations."""
|
|
52
|
+
async with aiohttp.ClientSession() as session:
|
|
53
|
+
tasks = []
|
|
54
|
+
for customer in customer_batch:
|
|
55
|
+
task = process_customer_with_nested_resources_async(
|
|
56
|
+
session, api_key, project_id, customer
|
|
57
|
+
)
|
|
58
|
+
tasks.append(task)
|
|
59
|
+
|
|
60
|
+
return await asyncio.gather(*tasks)
|
|
61
|
+
|
|
62
|
+
def process_customers_sync():
|
|
63
|
+
"""Process customers in batches using asyncio."""
|
|
64
|
+
batch_size = 50 # Conservative batch size due to 60 req/min rate limit
|
|
65
|
+
current_batch = []
|
|
66
|
+
|
|
67
|
+
for customer in _paginate(api_key, endpoint):
|
|
68
|
+
current_batch.append(customer)
|
|
69
|
+
|
|
70
|
+
if len(current_batch) >= batch_size:
|
|
71
|
+
# Process the batch asynchronously
|
|
72
|
+
processed_customers = asyncio.run(
|
|
73
|
+
process_customer_batch(current_batch)
|
|
74
|
+
)
|
|
75
|
+
for processed_customer in processed_customers:
|
|
76
|
+
yield processed_customer
|
|
77
|
+
current_batch = []
|
|
78
|
+
|
|
79
|
+
# Process any remaining customers in the final batch
|
|
80
|
+
if current_batch:
|
|
81
|
+
processed_customers = asyncio.run(process_customer_batch(current_batch))
|
|
82
|
+
for processed_customer in processed_customers:
|
|
83
|
+
yield processed_customer
|
|
84
|
+
|
|
85
|
+
# Yield each processed customer
|
|
86
|
+
yield from process_customers_sync()
|
|
87
|
+
|
|
88
|
+
@dlt.resource(name="products", primary_key="id", write_disposition="merge")
|
|
89
|
+
def products() -> Iterator[Dict[str, Any]]:
|
|
90
|
+
"""Get list of products."""
|
|
91
|
+
if project_id is None:
|
|
92
|
+
raise ValueError("project_id is required for products resource")
|
|
93
|
+
endpoint = f"/projects/{project_id}/products"
|
|
94
|
+
|
|
95
|
+
for product in _paginate(api_key, endpoint):
|
|
96
|
+
product = convert_timestamps_to_iso(product, ["created_at", "updated_at"])
|
|
97
|
+
yield product
|
|
98
|
+
|
|
99
|
+
return [
|
|
100
|
+
projects,
|
|
101
|
+
customers,
|
|
102
|
+
products,
|
|
103
|
+
]
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import time
|
|
3
|
+
from typing import Any, Dict, Iterator, List, Optional
|
|
4
|
+
|
|
5
|
+
import aiohttp
|
|
6
|
+
import pendulum
|
|
7
|
+
import requests
|
|
8
|
+
|
|
9
|
+
REVENUECAT_API_BASE = "https://api.revenuecat.com/v2"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _make_request(
|
|
13
|
+
api_key: str,
|
|
14
|
+
endpoint: str,
|
|
15
|
+
params: Optional[Dict[str, Any]] = None,
|
|
16
|
+
max_retries: int = 3,
|
|
17
|
+
) -> Dict[str, Any]:
|
|
18
|
+
"""Make a REST API request to RevenueCat API v2 with rate limiting."""
|
|
19
|
+
auth_header = f"Bearer {api_key}"
|
|
20
|
+
|
|
21
|
+
headers = {"Authorization": auth_header, "Content-Type": "application/json"}
|
|
22
|
+
|
|
23
|
+
url = f"{REVENUECAT_API_BASE}{endpoint}"
|
|
24
|
+
|
|
25
|
+
for attempt in range(max_retries + 1):
|
|
26
|
+
try:
|
|
27
|
+
response = requests.get(url, headers=headers, params=params or {})
|
|
28
|
+
|
|
29
|
+
# Handle rate limiting (429 Too Many Requests)
|
|
30
|
+
if response.status_code == 429:
|
|
31
|
+
if attempt < max_retries:
|
|
32
|
+
# Wait based on Retry-After header or exponential backoff
|
|
33
|
+
retry_after = response.headers.get("Retry-After")
|
|
34
|
+
if retry_after:
|
|
35
|
+
wait_time = int(retry_after)
|
|
36
|
+
else:
|
|
37
|
+
wait_time = (2**attempt) * 5 # 5, 10, 20 seconds
|
|
38
|
+
|
|
39
|
+
time.sleep(wait_time)
|
|
40
|
+
continue
|
|
41
|
+
|
|
42
|
+
response.raise_for_status()
|
|
43
|
+
return response.json()
|
|
44
|
+
|
|
45
|
+
except requests.exceptions.RequestException:
|
|
46
|
+
if attempt < max_retries:
|
|
47
|
+
wait_time = (2**attempt) * 2 # 2, 4, 8 seconds
|
|
48
|
+
time.sleep(wait_time)
|
|
49
|
+
continue
|
|
50
|
+
raise
|
|
51
|
+
|
|
52
|
+
# If we get here, all retries failed
|
|
53
|
+
response.raise_for_status()
|
|
54
|
+
return response.json()
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _paginate(
|
|
58
|
+
api_key: str, endpoint: str, params: Optional[Dict[str, Any]] = None
|
|
59
|
+
) -> Iterator[Dict[str, Any]]:
|
|
60
|
+
"""Paginate through RevenueCat API results."""
|
|
61
|
+
current_params = params.copy() if params is not None else {}
|
|
62
|
+
current_params["limit"] = 1000
|
|
63
|
+
|
|
64
|
+
while True:
|
|
65
|
+
data = _make_request(api_key, endpoint, current_params)
|
|
66
|
+
|
|
67
|
+
# Yield items from the current page
|
|
68
|
+
if "items" in data and data["items"] is not None:
|
|
69
|
+
for item in data["items"]:
|
|
70
|
+
yield item
|
|
71
|
+
|
|
72
|
+
# Check if there's a next page
|
|
73
|
+
if "next_page" not in data:
|
|
74
|
+
break
|
|
75
|
+
|
|
76
|
+
# Extract starting_after parameter from next_page URL
|
|
77
|
+
next_page_url = data["next_page"]
|
|
78
|
+
if next_page_url and "starting_after=" in next_page_url:
|
|
79
|
+
starting_after = next_page_url.split("starting_after=")[1].split("&")[0]
|
|
80
|
+
current_params["starting_after"] = starting_after
|
|
81
|
+
else:
|
|
82
|
+
break
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def convert_timestamps_to_iso(
|
|
86
|
+
record: Dict[str, Any], timestamp_fields: List[str]
|
|
87
|
+
) -> Dict[str, Any]:
|
|
88
|
+
"""Convert timestamp fields from milliseconds to ISO format."""
|
|
89
|
+
for field in timestamp_fields:
|
|
90
|
+
if field in record and record[field] is not None:
|
|
91
|
+
# Convert from milliseconds timestamp to ISO datetime string
|
|
92
|
+
timestamp_ms = record[field]
|
|
93
|
+
dt = pendulum.from_timestamp(timestamp_ms / 1000)
|
|
94
|
+
record[field] = dt.to_iso8601_string()
|
|
95
|
+
|
|
96
|
+
return record
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
async def _make_request_async(
|
|
100
|
+
session: aiohttp.ClientSession,
|
|
101
|
+
api_key: str,
|
|
102
|
+
endpoint: str,
|
|
103
|
+
params: Optional[Dict[str, Any]] = None,
|
|
104
|
+
max_retries: int = 3,
|
|
105
|
+
) -> Dict[str, Any]:
|
|
106
|
+
"""Make an async REST API request to RevenueCat API v2 with rate limiting."""
|
|
107
|
+
auth_header = f"Bearer {api_key}"
|
|
108
|
+
|
|
109
|
+
headers = {"Authorization": auth_header, "Content-Type": "application/json"}
|
|
110
|
+
|
|
111
|
+
url = f"{REVENUECAT_API_BASE}{endpoint}"
|
|
112
|
+
|
|
113
|
+
for attempt in range(max_retries + 1):
|
|
114
|
+
try:
|
|
115
|
+
async with session.get(
|
|
116
|
+
url, headers=headers, params=params or {}
|
|
117
|
+
) as response:
|
|
118
|
+
# Handle rate limiting (429 Too Many Requests)
|
|
119
|
+
if response.status == 429:
|
|
120
|
+
if attempt < max_retries:
|
|
121
|
+
# Wait based on Retry-After header or exponential backoff
|
|
122
|
+
retry_after = response.headers.get("Retry-After")
|
|
123
|
+
if retry_after:
|
|
124
|
+
wait_time = int(retry_after)
|
|
125
|
+
else:
|
|
126
|
+
wait_time = (2**attempt) * 5 # 5, 10, 20 seconds
|
|
127
|
+
|
|
128
|
+
await asyncio.sleep(wait_time)
|
|
129
|
+
continue
|
|
130
|
+
|
|
131
|
+
response.raise_for_status()
|
|
132
|
+
return await response.json()
|
|
133
|
+
|
|
134
|
+
except aiohttp.ClientError:
|
|
135
|
+
if attempt < max_retries:
|
|
136
|
+
wait_time = (2**attempt) * 2 # 2, 4, 8 seconds
|
|
137
|
+
await asyncio.sleep(wait_time)
|
|
138
|
+
continue
|
|
139
|
+
raise
|
|
140
|
+
|
|
141
|
+
# If we get here, all retries failed
|
|
142
|
+
async with session.get(url, headers=headers, params=params or {}) as response:
|
|
143
|
+
response.raise_for_status()
|
|
144
|
+
return await response.json()
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
async def _paginate_async(
|
|
148
|
+
session: aiohttp.ClientSession,
|
|
149
|
+
api_key: str,
|
|
150
|
+
endpoint: str,
|
|
151
|
+
params: Optional[Dict[str, Any]] = None,
|
|
152
|
+
) -> List[Dict[str, Any]]:
|
|
153
|
+
"""Paginate through RevenueCat API results asynchronously."""
|
|
154
|
+
items = []
|
|
155
|
+
current_params = params.copy() if params is not None else {}
|
|
156
|
+
current_params["limit"] = 1000
|
|
157
|
+
|
|
158
|
+
while True:
|
|
159
|
+
data = await _make_request_async(session, api_key, endpoint, current_params)
|
|
160
|
+
|
|
161
|
+
# Collect items from the current page
|
|
162
|
+
if "items" in data and data["items"] is not None:
|
|
163
|
+
items.extend(data["items"])
|
|
164
|
+
|
|
165
|
+
# Check if there's a next page
|
|
166
|
+
if "next_page" not in data:
|
|
167
|
+
break
|
|
168
|
+
|
|
169
|
+
# Extract starting_after parameter from next_page URL
|
|
170
|
+
next_page_url = data["next_page"]
|
|
171
|
+
if next_page_url and "starting_after=" in next_page_url:
|
|
172
|
+
starting_after = next_page_url.split("starting_after=")[1].split("&")[0]
|
|
173
|
+
current_params["starting_after"] = starting_after
|
|
174
|
+
else:
|
|
175
|
+
break
|
|
176
|
+
|
|
177
|
+
return items
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
async def fetch_and_process_nested_resource_async(
|
|
181
|
+
session: aiohttp.ClientSession,
|
|
182
|
+
api_key: str,
|
|
183
|
+
project_id: str,
|
|
184
|
+
customer_id: str,
|
|
185
|
+
customer: Dict[str, Any],
|
|
186
|
+
resource_name: str,
|
|
187
|
+
timestamp_fields: Optional[List[str]] = None,
|
|
188
|
+
) -> None:
|
|
189
|
+
"""
|
|
190
|
+
Fetch and process any nested resource for a customer asynchronously.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
session: aiohttp ClientSession
|
|
194
|
+
api_key: RevenueCat API key
|
|
195
|
+
project_id: Project ID
|
|
196
|
+
customer_id: Customer ID
|
|
197
|
+
customer: Customer data dictionary to modify
|
|
198
|
+
resource_name: Name of the nested resource (e.g., 'purchases', 'subscriptions', 'events')
|
|
199
|
+
timestamp_fields: List of timestamp fields to convert to ISO format
|
|
200
|
+
"""
|
|
201
|
+
# If resource not included in customer data, fetch separately
|
|
202
|
+
if resource_name not in customer or customer[resource_name] is None:
|
|
203
|
+
endpoint = f"/projects/{project_id}/customers/{customer_id}/{resource_name}"
|
|
204
|
+
customer[resource_name] = await _paginate_async(session, api_key, endpoint)
|
|
205
|
+
|
|
206
|
+
# Convert timestamps if fields specified
|
|
207
|
+
if (
|
|
208
|
+
timestamp_fields
|
|
209
|
+
and resource_name in customer
|
|
210
|
+
and customer[resource_name] is not None
|
|
211
|
+
):
|
|
212
|
+
for item in customer[resource_name]:
|
|
213
|
+
convert_timestamps_to_iso(item, timestamp_fields)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
async def process_customer_with_nested_resources_async(
|
|
217
|
+
session: aiohttp.ClientSession,
|
|
218
|
+
api_key: str,
|
|
219
|
+
project_id: str,
|
|
220
|
+
customer: Dict[str, Any],
|
|
221
|
+
) -> Dict[str, Any]:
|
|
222
|
+
"""
|
|
223
|
+
Process a customer and fetch nested resources concurrently.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
session: aiohttp ClientSession
|
|
227
|
+
api_key: RevenueCat API key
|
|
228
|
+
project_id: Project ID
|
|
229
|
+
customer: Customer data to process
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
Customer data with nested resources populated
|
|
233
|
+
"""
|
|
234
|
+
customer_id = customer["id"]
|
|
235
|
+
|
|
236
|
+
# Convert customer timestamps
|
|
237
|
+
customer = convert_timestamps_to_iso(customer, ["first_seen_at", "last_seen_at"])
|
|
238
|
+
|
|
239
|
+
# Define nested resources to fetch concurrently
|
|
240
|
+
nested_resources = [
|
|
241
|
+
("subscriptions", ["purchased_at", "expires_at", "grace_period_expires_at"]),
|
|
242
|
+
("purchases", ["purchased_at", "expires_at"]),
|
|
243
|
+
]
|
|
244
|
+
|
|
245
|
+
# Create concurrent tasks for fetching nested resources
|
|
246
|
+
tasks = []
|
|
247
|
+
for resource_name, timestamp_fields in nested_resources:
|
|
248
|
+
task = fetch_and_process_nested_resource_async(
|
|
249
|
+
session,
|
|
250
|
+
api_key,
|
|
251
|
+
project_id,
|
|
252
|
+
customer_id,
|
|
253
|
+
customer,
|
|
254
|
+
resource_name,
|
|
255
|
+
timestamp_fields,
|
|
256
|
+
)
|
|
257
|
+
tasks.append(task)
|
|
258
|
+
|
|
259
|
+
# Wait for all nested resources to be fetched
|
|
260
|
+
await asyncio.gather(*tasks)
|
|
261
|
+
|
|
262
|
+
return customer
|
ingestr/src/sources.py
CHANGED
|
@@ -1054,9 +1054,10 @@ class FacebookAdsSource:
|
|
|
1054
1054
|
)
|
|
1055
1055
|
|
|
1056
1056
|
# Validate breakdown type against available options from settings
|
|
1057
|
-
import typing
|
|
1058
1057
|
|
|
1059
|
-
from ingestr.src.facebook_ads.helpers import
|
|
1058
|
+
from ingestr.src.facebook_ads.helpers import (
|
|
1059
|
+
parse_insights_table_to_source_kwargs,
|
|
1060
|
+
)
|
|
1060
1061
|
|
|
1061
1062
|
source_kwargs = {
|
|
1062
1063
|
"access_token": access_token[0],
|
|
@@ -2689,13 +2690,13 @@ class FrankfurterSource:
|
|
|
2689
2690
|
|
|
2690
2691
|
if kwargs.get("interval_start"):
|
|
2691
2692
|
start_date = ensure_pendulum_datetime(str(kwargs.get("interval_start")))
|
|
2692
|
-
if kwargs.get("interval_end"):
|
|
2693
|
-
end_date = ensure_pendulum_datetime(str(kwargs.get("interval_end")))
|
|
2694
|
-
else:
|
|
2695
|
-
end_date = pendulum.now()
|
|
2696
2693
|
else:
|
|
2697
|
-
start_date = pendulum.
|
|
2698
|
-
|
|
2694
|
+
start_date = pendulum.yesterday()
|
|
2695
|
+
|
|
2696
|
+
if kwargs.get("interval_end"):
|
|
2697
|
+
end_date = ensure_pendulum_datetime(str(kwargs.get("interval_end")))
|
|
2698
|
+
else:
|
|
2699
|
+
end_date = None
|
|
2699
2700
|
|
|
2700
2701
|
validate_dates(start_date=start_date, end_date=end_date)
|
|
2701
2702
|
|
|
@@ -3224,6 +3225,76 @@ class PinterestSource:
|
|
|
3224
3225
|
).with_resources(table)
|
|
3225
3226
|
|
|
3226
3227
|
|
|
3228
|
+
class FluxxSource:
|
|
3229
|
+
def handles_incrementality(self) -> bool:
|
|
3230
|
+
return True
|
|
3231
|
+
|
|
3232
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
3233
|
+
if kwargs.get("incremental_key"):
|
|
3234
|
+
raise ValueError(
|
|
3235
|
+
"Fluxx takes care of incrementality on its own, you should not provide incremental_key"
|
|
3236
|
+
)
|
|
3237
|
+
|
|
3238
|
+
# Parse URI: fluxx://instance?client_id=xxx&client_secret=xxx
|
|
3239
|
+
parsed_uri = urlparse(uri)
|
|
3240
|
+
source_params = parse_qs(parsed_uri.query)
|
|
3241
|
+
|
|
3242
|
+
instance = parsed_uri.hostname
|
|
3243
|
+
if not instance:
|
|
3244
|
+
raise ValueError(
|
|
3245
|
+
"Instance is required in the URI (e.g., fluxx://mycompany.preprod)"
|
|
3246
|
+
)
|
|
3247
|
+
|
|
3248
|
+
client_id = source_params.get("client_id")
|
|
3249
|
+
if not client_id:
|
|
3250
|
+
raise ValueError("client_id in the URI is required to connect to Fluxx")
|
|
3251
|
+
|
|
3252
|
+
client_secret = source_params.get("client_secret")
|
|
3253
|
+
if not client_secret:
|
|
3254
|
+
raise ValueError("client_secret in the URI is required to connect to Fluxx")
|
|
3255
|
+
|
|
3256
|
+
# Parse date parameters
|
|
3257
|
+
start_date = kwargs.get("interval_start")
|
|
3258
|
+
if start_date:
|
|
3259
|
+
start_date = ensure_pendulum_datetime(start_date)
|
|
3260
|
+
|
|
3261
|
+
end_date = kwargs.get("interval_end")
|
|
3262
|
+
if end_date:
|
|
3263
|
+
end_date = ensure_pendulum_datetime(end_date)
|
|
3264
|
+
|
|
3265
|
+
# Import Fluxx source
|
|
3266
|
+
from ingestr.src.fluxx import fluxx_source
|
|
3267
|
+
|
|
3268
|
+
# Parse table specification for custom column selection
|
|
3269
|
+
# Format: "resource_name:field1,field2,field3" or "resource_name"
|
|
3270
|
+
resources = None
|
|
3271
|
+
custom_fields = {}
|
|
3272
|
+
|
|
3273
|
+
if table:
|
|
3274
|
+
# Handle single resource with custom fields or multiple resources
|
|
3275
|
+
if ":" in table and table.count(":") == 1:
|
|
3276
|
+
# Single resource with custom fields: "grant_request:id,name,amount"
|
|
3277
|
+
resource_name, field_list = table.split(":", 1)
|
|
3278
|
+
resource_name = resource_name.strip()
|
|
3279
|
+
fields = [f.strip() for f in field_list.split(",")]
|
|
3280
|
+
resources = [resource_name]
|
|
3281
|
+
custom_fields[resource_name] = fields
|
|
3282
|
+
else:
|
|
3283
|
+
# Multiple resources or single resource without custom fields
|
|
3284
|
+
# Support comma-separated list: "grant_request,user"
|
|
3285
|
+
resources = [r.strip() for r in table.split(",")]
|
|
3286
|
+
|
|
3287
|
+
return fluxx_source(
|
|
3288
|
+
instance=instance,
|
|
3289
|
+
client_id=client_id[0],
|
|
3290
|
+
client_secret=client_secret[0],
|
|
3291
|
+
start_date=start_date,
|
|
3292
|
+
end_date=end_date,
|
|
3293
|
+
resources=resources,
|
|
3294
|
+
custom_fields=custom_fields,
|
|
3295
|
+
)
|
|
3296
|
+
|
|
3297
|
+
|
|
3227
3298
|
class LinearSource:
|
|
3228
3299
|
def handles_incrementality(self) -> bool:
|
|
3229
3300
|
return True
|
|
@@ -3241,11 +3312,11 @@ class LinearSource:
|
|
|
3241
3312
|
raise MissingValueError("api_key", "Linear")
|
|
3242
3313
|
|
|
3243
3314
|
if table not in [
|
|
3244
|
-
"issues",
|
|
3245
|
-
"projects",
|
|
3246
|
-
"teams",
|
|
3247
|
-
"users",
|
|
3248
|
-
"workflow_states",
|
|
3315
|
+
"issues",
|
|
3316
|
+
"projects",
|
|
3317
|
+
"teams",
|
|
3318
|
+
"users",
|
|
3319
|
+
"workflow_states",
|
|
3249
3320
|
"cycles",
|
|
3250
3321
|
"attachments",
|
|
3251
3322
|
"comments",
|
|
@@ -3282,6 +3353,54 @@ class LinearSource:
|
|
|
3282
3353
|
).with_resources(table)
|
|
3283
3354
|
|
|
3284
3355
|
|
|
3356
|
+
class RevenueCatSource:
|
|
3357
|
+
def handles_incrementality(self) -> bool:
|
|
3358
|
+
return True
|
|
3359
|
+
|
|
3360
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
3361
|
+
if kwargs.get("incremental_key"):
|
|
3362
|
+
raise ValueError(
|
|
3363
|
+
"RevenueCat takes care of incrementality on its own, you should not provide incremental_key"
|
|
3364
|
+
)
|
|
3365
|
+
|
|
3366
|
+
parsed_uri = urlparse(uri)
|
|
3367
|
+
params = parse_qs(parsed_uri.query)
|
|
3368
|
+
|
|
3369
|
+
api_key = params.get("api_key")
|
|
3370
|
+
if api_key is None:
|
|
3371
|
+
raise MissingValueError("api_key", "RevenueCat")
|
|
3372
|
+
|
|
3373
|
+
project_id = params.get("project_id")
|
|
3374
|
+
if project_id is None and table != "projects":
|
|
3375
|
+
raise MissingValueError("project_id", "RevenueCat")
|
|
3376
|
+
|
|
3377
|
+
if table not in [
|
|
3378
|
+
"customers",
|
|
3379
|
+
"products",
|
|
3380
|
+
"subscriptions",
|
|
3381
|
+
"purchases",
|
|
3382
|
+
"projects",
|
|
3383
|
+
]:
|
|
3384
|
+
raise UnsupportedResourceError(table, "RevenueCat")
|
|
3385
|
+
|
|
3386
|
+
start_date = kwargs.get("interval_start")
|
|
3387
|
+
if start_date is not None:
|
|
3388
|
+
start_date = ensure_pendulum_datetime(start_date)
|
|
3389
|
+
else:
|
|
3390
|
+
start_date = pendulum.datetime(2020, 1, 1).in_tz("UTC")
|
|
3391
|
+
|
|
3392
|
+
end_date = kwargs.get("interval_end")
|
|
3393
|
+
if end_date is not None:
|
|
3394
|
+
end_date = ensure_pendulum_datetime(end_date).in_tz("UTC")
|
|
3395
|
+
|
|
3396
|
+
from ingestr.src.revenuecat import revenuecat_source
|
|
3397
|
+
|
|
3398
|
+
return revenuecat_source(
|
|
3399
|
+
api_key=api_key[0],
|
|
3400
|
+
project_id=project_id[0] if project_id is not None else None,
|
|
3401
|
+
).with_resources(table)
|
|
3402
|
+
|
|
3403
|
+
|
|
3285
3404
|
class ZoomSource:
|
|
3286
3405
|
def handles_incrementality(self) -> bool:
|
|
3287
3406
|
return True
|
|
@@ -101,23 +101,6 @@ def incremental_stripe_source(
|
|
|
101
101
|
initial_start_date: Optional[DateTime] = None,
|
|
102
102
|
end_date: Optional[DateTime] = None,
|
|
103
103
|
) -> Iterable[DltResource]:
|
|
104
|
-
"""
|
|
105
|
-
As Stripe API does not include the "updated" key in its responses,
|
|
106
|
-
we are only able to perform incremental downloads from endpoints where all objects are uneditable.
|
|
107
|
-
This source yields the resources with incremental loading based on "append" mode.
|
|
108
|
-
You will load only the newest data without duplicating and without downloading a huge amount of data each time.
|
|
109
|
-
|
|
110
|
-
Args:
|
|
111
|
-
endpoints (tuple): A tuple of endpoint names to retrieve data from. Defaults to Stripe API endpoints with uneditable data.
|
|
112
|
-
stripe_secret_key (str): The API access token for authentication. Defaults to the value in the `dlt.secrets` object.
|
|
113
|
-
initial_start_date (Optional[DateTime]): An optional parameter that specifies the initial value for dlt.sources.incremental.
|
|
114
|
-
If parameter is not None, then load only data that were created after initial_start_date on the first run.
|
|
115
|
-
Defaults to None. Format: datetime(YYYY, MM, DD).
|
|
116
|
-
end_date (Optional[DateTime]): An optional end date to limit the data retrieved.
|
|
117
|
-
Defaults to None. Format: datetime(YYYY, MM, DD).
|
|
118
|
-
Returns:
|
|
119
|
-
Iterable[DltResource]: Resources with only that data has not yet been loaded.
|
|
120
|
-
"""
|
|
121
104
|
stripe.api_key = stripe_secret_key
|
|
122
105
|
stripe.api_version = "2022-11-15"
|
|
123
106
|
start_date_unix = (
|
|
@@ -142,6 +125,6 @@ def incremental_stripe_source(
|
|
|
142
125
|
yield dlt.resource(
|
|
143
126
|
incremental_resource,
|
|
144
127
|
name=endpoint,
|
|
145
|
-
write_disposition="
|
|
128
|
+
write_disposition="merge",
|
|
146
129
|
primary_key="id",
|
|
147
130
|
)(endpoint)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.86
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -2,16 +2,16 @@ ingestr/conftest.py,sha256=OE2yxeTCosS9CUFVuqNypm-2ftYvVBeeq7egm3878cI,1981
|
|
|
2
2
|
ingestr/main.py,sha256=qoWHNcHh0-xVnyQxbQ-SKuTxPb1RNV3ENkCpqO7CLrk,26694
|
|
3
3
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
4
4
|
ingestr/src/blob.py,sha256=UUWMjHUuoR9xP1XZQ6UANQmnMVyDx3d0X4-2FQC271I,2138
|
|
5
|
-
ingestr/src/buildinfo.py,sha256=
|
|
5
|
+
ingestr/src/buildinfo.py,sha256=Sau1WKfATfGbfhYBf36HIMjBxy3Ri3NHPH1bcv0qOvU,21
|
|
6
6
|
ingestr/src/destinations.py,sha256=M2Yni6wiWcrvZ8EPJemidqxN156l0rehgCc7xuil7mo,22840
|
|
7
7
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
8
|
-
ingestr/src/factory.py,sha256=
|
|
8
|
+
ingestr/src/factory.py,sha256=hC5E_XgrgTHMqwqPc6ihUYvRGTGMTzdPfQhrgPyD0tY,6945
|
|
9
9
|
ingestr/src/filters.py,sha256=LLecXe9QkLFkFLUZ92OXNdcANr1a8edDxrflc2ko_KA,1452
|
|
10
10
|
ingestr/src/http_client.py,sha256=bxqsk6nJNXCo-79gW04B53DQO-yr25vaSsqP0AKtjx4,732
|
|
11
11
|
ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
12
12
|
ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
|
|
13
13
|
ingestr/src/resource.py,sha256=ZqmZxFQVGlF8rFPhBiUB08HES0yoTj8sZ--jKfaaVps,1164
|
|
14
|
-
ingestr/src/sources.py,sha256=
|
|
14
|
+
ingestr/src/sources.py,sha256=CMXQRJlbHcGwKtrD-nt_ov-UlAn5UOQe08cdc7Wzel4,125068
|
|
15
15
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
16
16
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
17
17
|
ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
|
|
@@ -49,8 +49,10 @@ ingestr/src/facebook_ads/utils.py,sha256=ES2ylPoW3j3fjp6OMUgp21n1cG1OktXsmWWMk5v
|
|
|
49
49
|
ingestr/src/filesystem/__init__.py,sha256=zkIwbRr0ir0EUdniI25p2zGiVc-7M9EmR351AjNb0eA,4163
|
|
50
50
|
ingestr/src/filesystem/helpers.py,sha256=bg0muSHZr3hMa8H4jN2-LGWzI-SUoKlQNiWJ74-YYms,3211
|
|
51
51
|
ingestr/src/filesystem/readers.py,sha256=a0fKkaRpnAOGsXI3EBNYZa7x6tlmAOsgRzb883StY30,3987
|
|
52
|
-
ingestr/src/
|
|
53
|
-
ingestr/src/
|
|
52
|
+
ingestr/src/fluxx/__init__.py,sha256=Ei8BE0KAEzpadJT9RO5-8zMA7LvnIPhNPDKF4EyBcLo,328980
|
|
53
|
+
ingestr/src/fluxx/helpers.py,sha256=dCNgvMMTSEO4LNp6luNZ-XrV4NPW-_OUfmp0k3jFhuc,6602
|
|
54
|
+
ingestr/src/frankfurter/__init__.py,sha256=z98RblQx1ab2GFowDq4l5xdnv-sLb41MPGitH-y2ahc,5242
|
|
55
|
+
ingestr/src/frankfurter/helpers.py,sha256=tEtx9VU7IchRmtKRIEq_r8MclNVs8vL4E_RjGW2ZSh0,1504
|
|
54
56
|
ingestr/src/freshdesk/__init__.py,sha256=ukyorgCNsW_snzsYBDsr3Q0WB8f-to9Fk0enqHHFQlk,3087
|
|
55
57
|
ingestr/src/freshdesk/freshdesk_client.py,sha256=1nFf0K4MQ0KZbWwk4xSbYHaykVqmPLfN39miOFDpWVc,4385
|
|
56
58
|
ingestr/src/freshdesk/settings.py,sha256=0Wr_OMnUZcTlry7BmALssLxD2yh686JW4moLNv12Jnw,409
|
|
@@ -85,8 +87,8 @@ ingestr/src/kinesis/helpers.py,sha256=SO2cFmWNGcykUYmjHdfxWsOQSkLQXyhFtfWnkcUOM0
|
|
|
85
87
|
ingestr/src/klaviyo/__init__.py,sha256=o_noUgbxLk36s4f9W56_ibPorF0n7kVapPUlV0p-jfA,7875
|
|
86
88
|
ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
|
|
87
89
|
ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYHk,496
|
|
88
|
-
ingestr/src/linear/__init__.py,sha256=
|
|
89
|
-
ingestr/src/linear/helpers.py,sha256=
|
|
90
|
+
ingestr/src/linear/__init__.py,sha256=rufjwhLip7RK6j2DpFzCRQEvA_oOqgPEEdREJkc53_U,12295
|
|
91
|
+
ingestr/src/linear/helpers.py,sha256=J9lTuu8rHHM3YTA082_wfvByW6Teie4_44eYaVmDBhQ,3683
|
|
90
92
|
ingestr/src/linkedin_ads/__init__.py,sha256=CAPWFyV24loziiphbLmODxZUXZJwm4JxlFkr56q0jfo,1855
|
|
91
93
|
ingestr/src/linkedin_ads/dimension_time_enum.py,sha256=EmHRdkFyTAfo4chGjThrwqffWJxmAadZMbpTvf0xkQc,198
|
|
92
94
|
ingestr/src/linkedin_ads/helpers.py,sha256=eUWudRVlXl4kqIhfXQ1eVsUpZwJn7UFqKSpnbLfxzds,4498
|
|
@@ -111,6 +113,8 @@ ingestr/src/pipedrive/helpers/__init__.py,sha256=UX1K_qnGXB0ShtnBOfp2XuVbK8RRoCK
|
|
|
111
113
|
ingestr/src/pipedrive/helpers/custom_fields_munger.py,sha256=rZ4AjdITHfJE2NNomCR7vMBS1KnWpEGVF6fADwsIHUE,4488
|
|
112
114
|
ingestr/src/pipedrive/helpers/pages.py,sha256=Klpjw2OnMuhzit3PpiHKsfzGcJ3rQPSQBl3HhE3-6eA,3358
|
|
113
115
|
ingestr/src/quickbooks/__init__.py,sha256=cZUuVCOTGPHTscRj6i0DytO63_fWF-4ieMxoU4PcyTg,3727
|
|
116
|
+
ingestr/src/revenuecat/__init__.py,sha256=HrI4Ht8PWTHiBYphAO26tK-2S-z1FuSIq97wu7erPIw,3785
|
|
117
|
+
ingestr/src/revenuecat/helpers.py,sha256=ntdorpAdPoPBcga1fifFeAl07rKZ-CnF5u5QiFdHbW8,8664
|
|
114
118
|
ingestr/src/salesforce/__init__.py,sha256=2hik5pRrxVODdDTlUEMoyccNC07zozjnxkMHcjMT1qA,4558
|
|
115
119
|
ingestr/src/salesforce/helpers.py,sha256=QTdazBt-qRTBbCQMZnyclIaDQFmBixBy_RDKD00Lt-8,2492
|
|
116
120
|
ingestr/src/shopify/__init__.py,sha256=dp6Ybk5LIKA5suzVt923v5LzHz5rMUuDfhjTNPqSjAc,62603
|
|
@@ -125,7 +129,7 @@ ingestr/src/solidgate/__init__.py,sha256=Ts83j-JSnFsFuF4tDhVOfZKg7H0-bIpfn3kg1ZO
|
|
|
125
129
|
ingestr/src/solidgate/helpers.py,sha256=mAsW_1hpD7ab3Y2vw8fxHi4yD3aT1geLdIYZ7ycyxBc,5690
|
|
126
130
|
ingestr/src/sql_database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
127
131
|
ingestr/src/sql_database/callbacks.py,sha256=sEFFmXxAURY3yeBjnawigDtq9LBCvi8HFqG4kLd7tMU,2002
|
|
128
|
-
ingestr/src/stripe_analytics/__init__.py,sha256=
|
|
132
|
+
ingestr/src/stripe_analytics/__init__.py,sha256=smTK8aqRsGnSdGdQAyyuItWT_k9CPKdlnTGh5DRCcDg,5144
|
|
129
133
|
ingestr/src/stripe_analytics/helpers.py,sha256=KGtRcSrhKEqzJ3AWpgDV2o4cuBFaIwu2Gc1KgvVWTtg,11764
|
|
130
134
|
ingestr/src/stripe_analytics/settings.py,sha256=xt1-ljwP4nLTNUa8l3KwFbtK8FtQHgHpzGF5uPKfRsw,2246
|
|
131
135
|
ingestr/src/telemetry/event.py,sha256=W7bs4uVfPakQ5otmiqgqu1l5SqjYx1p87wudnWXckBc,949
|
|
@@ -153,8 +157,8 @@ ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ
|
|
|
153
157
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
154
158
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
155
159
|
ingestr/tests/unit/test_smartsheets.py,sha256=eiC2CCO4iNJcuN36ONvqmEDryCA1bA1REpayHpu42lk,5058
|
|
156
|
-
ingestr-0.13.
|
|
157
|
-
ingestr-0.13.
|
|
158
|
-
ingestr-0.13.
|
|
159
|
-
ingestr-0.13.
|
|
160
|
-
ingestr-0.13.
|
|
160
|
+
ingestr-0.13.86.dist-info/METADATA,sha256=EYqj1B1PK2F2EGHKmzuoxvQRSdXZThmlL0UutcFxzeo,15182
|
|
161
|
+
ingestr-0.13.86.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
162
|
+
ingestr-0.13.86.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
163
|
+
ingestr-0.13.86.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
164
|
+
ingestr-0.13.86.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|