ingestr 0.13.85__py3-none-any.whl → 0.13.87__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

@@ -101,7 +101,7 @@ def mongodb_collection(
101
101
  write_disposition: Optional[str] = dlt.config.value,
102
102
  parallel: Optional[bool] = False,
103
103
  limit: Optional[int] = None,
104
- chunk_size: Optional[int] = 10000,
104
+ chunk_size: Optional[int] = 1000,
105
105
  data_item_format: Optional[TDataItemFormat] = "object",
106
106
  filter_: Optional[Dict[str, Any]] = None,
107
107
  projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = dlt.config.value,
@@ -518,21 +518,42 @@ class CollectionAggregationLoader(CollectionLoader):
518
518
  if limit and limit > 0:
519
519
  pipeline.append({"$limit": limit})
520
520
 
521
- print("pipeline", pipeline)
522
- # Execute aggregation
523
- cursor = self.collection.aggregate(pipeline, allowDiskUse=True)
524
-
525
- # Process results in chunks
526
- while docs_slice := list(islice(cursor, self.chunk_size)):
527
- res = map_nested_in_place(convert_mongo_objs, docs_slice)
528
- print("res", res)
529
- if len(res) > 0 and "_id" in res[0] and isinstance(res[0]["_id"], dict):
530
- yield dlt.mark.with_hints(
531
- res,
532
- dlt.mark.make_hints(columns={"_id": {"data_type": "json"}}),
533
- )
534
- else:
535
- yield res
521
+ # Add maxTimeMS to prevent hanging
522
+ cursor = self.collection.aggregate(
523
+ pipeline,
524
+ allowDiskUse=True,
525
+ batchSize=min(self.chunk_size, 101),
526
+ maxTimeMS=30000 # 30 second timeout
527
+ )
528
+
529
+ docs_buffer = []
530
+ try:
531
+ for doc in cursor:
532
+ docs_buffer.append(doc)
533
+
534
+ if len(docs_buffer) >= self.chunk_size:
535
+ res = map_nested_in_place(convert_mongo_objs, docs_buffer)
536
+ if len(res) > 0 and "_id" in res[0] and isinstance(res[0]["_id"], dict):
537
+ yield dlt.mark.with_hints(
538
+ res,
539
+ dlt.mark.make_hints(columns={"_id": {"data_type": "json"}}),
540
+ )
541
+ else:
542
+ yield res
543
+ docs_buffer = []
544
+
545
+ # Yield any remaining documents
546
+ if docs_buffer:
547
+ res = map_nested_in_place(convert_mongo_objs, docs_buffer)
548
+ if len(res) > 0 and "_id" in res[0] and isinstance(res[0]["_id"], dict):
549
+ yield dlt.mark.with_hints(
550
+ res,
551
+ dlt.mark.make_hints(columns={"_id": {"data_type": "json"}}),
552
+ )
553
+ else:
554
+ yield res
555
+ finally:
556
+ cursor.close()
536
557
 
537
558
 
538
559
  class CollectionAggregationLoaderParallel(CollectionAggregationLoader):
@@ -0,0 +1,108 @@
1
+ import asyncio
2
+ from typing import Any, Dict, Iterable, Iterator
3
+
4
+ import aiohttp
5
+ import dlt
6
+
7
+ from .helpers import (
8
+ _make_request,
9
+ _paginate,
10
+ convert_timestamps_to_iso,
11
+ process_customer_with_nested_resources_async,
12
+ create_project_resource,
13
+ )
14
+
15
+
16
+
17
+ @dlt.source(name="revenuecat", max_table_nesting=0)
18
+ def revenuecat_source(
19
+ api_key: str,
20
+ project_id: str = None,
21
+ ) -> Iterable[dlt.sources.DltResource]:
22
+ """
23
+ RevenueCat source for extracting data from RevenueCat API v2.
24
+
25
+ Args:
26
+ api_key: RevenueCat API v2 secret key with Bearer token format
27
+ project_id: RevenueCat project ID (required for customers, products, entitlements, offerings, subscriptions, purchases)
28
+
29
+ Returns:
30
+ Iterable of DLT resources for customers, products, entitlements, offerings, purchases, subscriptions, and projects
31
+ """
32
+
33
+ @dlt.resource(name="projects", primary_key="id", write_disposition="merge")
34
+ def projects() -> Iterator[Dict[str, Any]]:
35
+ """Get list of projects."""
36
+ # Get projects list
37
+ data = _make_request(api_key, "/projects")
38
+ if "items" in data:
39
+ for project in data["items"]:
40
+ project = convert_timestamps_to_iso(project, ["created_at"])
41
+ yield project
42
+
43
+ @dlt.resource(
44
+ name="customers", primary_key="id", write_disposition="merge", parallelized=True
45
+ )
46
+ def customers() -> Iterator[Dict[str, Any]]:
47
+ """Get list of customers with nested purchases and subscriptions."""
48
+ if project_id is None:
49
+ raise ValueError("project_id is required for customers resource")
50
+ endpoint = f"/projects/{project_id}/customers"
51
+
52
+ async def process_customer_batch(customer_batch):
53
+ """Process a batch of customers with async operations."""
54
+ async with aiohttp.ClientSession() as session:
55
+ tasks = []
56
+ for customer in customer_batch:
57
+ task = process_customer_with_nested_resources_async(
58
+ session, api_key, project_id, customer
59
+ )
60
+ tasks.append(task)
61
+
62
+ return await asyncio.gather(*tasks)
63
+
64
+ def process_customers_sync():
65
+ """Process customers in batches using asyncio."""
66
+ batch_size = 50 # Conservative batch size due to 60 req/min rate limit
67
+ current_batch = []
68
+
69
+ for customer in _paginate(api_key, endpoint):
70
+ current_batch.append(customer)
71
+
72
+ if len(current_batch) >= batch_size:
73
+ # Process the batch asynchronously
74
+ processed_customers = asyncio.run(
75
+ process_customer_batch(current_batch)
76
+ )
77
+ for processed_customer in processed_customers:
78
+ yield processed_customer
79
+ current_batch = []
80
+
81
+ # Process any remaining customers in the final batch
82
+ if current_batch:
83
+ processed_customers = asyncio.run(process_customer_batch(current_batch))
84
+ for processed_customer in processed_customers:
85
+ yield processed_customer
86
+
87
+ # Yield each processed customer
88
+ yield from process_customers_sync()
89
+
90
+ # Create project-dependent resources dynamically
91
+ project_resources = []
92
+ resource_names = ["products", "entitlements", "offerings"]
93
+
94
+ for resource_name in resource_names:
95
+ @dlt.resource(name=resource_name, primary_key="id", write_disposition="merge")
96
+ def create_resource(resource_name=resource_name) -> Iterator[Dict[str, Any]]:
97
+ """Get list of project resource."""
98
+ yield from create_project_resource(resource_name, api_key, project_id)
99
+
100
+ # Set the function name for better identification
101
+ create_resource.__name__ = resource_name
102
+ project_resources.append(create_resource)
103
+
104
+ return [
105
+ projects,
106
+ customers,
107
+ *project_resources,
108
+ ]
@@ -0,0 +1,291 @@
1
+ import asyncio
2
+ import time
3
+ from typing import Any, Dict, Iterator, List, Optional
4
+
5
+ import aiohttp
6
+ import pendulum
7
+ import requests
8
+
9
+ REVENUECAT_API_BASE = "https://api.revenuecat.com/v2"
10
+
11
+
12
+ def _make_request(
13
+ api_key: str,
14
+ endpoint: str,
15
+ params: Optional[Dict[str, Any]] = None,
16
+ max_retries: int = 3,
17
+ ) -> Dict[str, Any]:
18
+ """Make a REST API request to RevenueCat API v2 with rate limiting."""
19
+ auth_header = f"Bearer {api_key}"
20
+
21
+ headers = {"Authorization": auth_header, "Content-Type": "application/json"}
22
+
23
+ url = f"{REVENUECAT_API_BASE}{endpoint}"
24
+
25
+ for attempt in range(max_retries + 1):
26
+ try:
27
+ response = requests.get(url, headers=headers, params=params or {})
28
+
29
+ # Handle rate limiting (429 Too Many Requests)
30
+ if response.status_code == 429:
31
+ if attempt < max_retries:
32
+ # Wait based on Retry-After header or exponential backoff
33
+ retry_after = response.headers.get("Retry-After")
34
+ if retry_after:
35
+ wait_time = int(retry_after)
36
+ else:
37
+ wait_time = (2**attempt) * 5 # 5, 10, 20 seconds
38
+
39
+ time.sleep(wait_time)
40
+ continue
41
+
42
+ response.raise_for_status()
43
+ return response.json()
44
+
45
+ except requests.exceptions.RequestException:
46
+ if attempt < max_retries:
47
+ wait_time = (2**attempt) * 2 # 2, 4, 8 seconds
48
+ time.sleep(wait_time)
49
+ continue
50
+ raise
51
+
52
+ # If we get here, all retries failed
53
+ response.raise_for_status()
54
+ return response.json()
55
+
56
+
57
+ def _paginate(
58
+ api_key: str, endpoint: str, params: Optional[Dict[str, Any]] = None
59
+ ) -> Iterator[Dict[str, Any]]:
60
+ """Paginate through RevenueCat API results."""
61
+ current_params = params.copy() if params is not None else {}
62
+ current_params["limit"] = 1000
63
+
64
+ while True:
65
+ data = _make_request(api_key, endpoint, current_params)
66
+
67
+ # Yield items from the current page
68
+ if "items" in data and data["items"] is not None:
69
+ for item in data["items"]:
70
+ yield item
71
+
72
+ # Check if there's a next page
73
+ if "next_page" not in data:
74
+ break
75
+
76
+ # Extract starting_after parameter from next_page URL
77
+ next_page_url = data["next_page"]
78
+ if next_page_url and "starting_after=" in next_page_url:
79
+ starting_after = next_page_url.split("starting_after=")[1].split("&")[0]
80
+ current_params["starting_after"] = starting_after
81
+ else:
82
+ break
83
+
84
+
85
+ def convert_timestamps_to_iso(
86
+ record: Dict[str, Any], timestamp_fields: List[str]
87
+ ) -> Dict[str, Any]:
88
+ """Convert timestamp fields from milliseconds to ISO format."""
89
+ for field in timestamp_fields:
90
+ if field in record and record[field] is not None:
91
+ # Convert from milliseconds timestamp to ISO datetime string
92
+ timestamp_ms = record[field]
93
+ dt = pendulum.from_timestamp(timestamp_ms / 1000)
94
+ record[field] = dt.to_iso8601_string()
95
+
96
+ return record
97
+
98
+
99
+ async def _make_request_async(
100
+ session: aiohttp.ClientSession,
101
+ api_key: str,
102
+ endpoint: str,
103
+ params: Optional[Dict[str, Any]] = None,
104
+ max_retries: int = 3,
105
+ ) -> Dict[str, Any]:
106
+ """Make an async REST API request to RevenueCat API v2 with rate limiting."""
107
+ auth_header = f"Bearer {api_key}"
108
+
109
+ headers = {"Authorization": auth_header, "Content-Type": "application/json"}
110
+
111
+ url = f"{REVENUECAT_API_BASE}{endpoint}"
112
+
113
+ for attempt in range(max_retries + 1):
114
+ try:
115
+ async with session.get(
116
+ url, headers=headers, params=params or {}
117
+ ) as response:
118
+ # Handle rate limiting (429 Too Many Requests)
119
+ if response.status == 429:
120
+ if attempt < max_retries:
121
+ # Wait based on Retry-After header or exponential backoff
122
+ retry_after = response.headers.get("Retry-After")
123
+ if retry_after:
124
+ wait_time = int(retry_after)
125
+ else:
126
+ wait_time = (2**attempt) * 5 # 5, 10, 20 seconds
127
+
128
+ await asyncio.sleep(wait_time)
129
+ continue
130
+
131
+ response.raise_for_status()
132
+ return await response.json()
133
+
134
+ except aiohttp.ClientError:
135
+ if attempt < max_retries:
136
+ wait_time = (2**attempt) * 2 # 2, 4, 8 seconds
137
+ await asyncio.sleep(wait_time)
138
+ continue
139
+ raise
140
+
141
+ # If we get here, all retries failed
142
+ async with session.get(url, headers=headers, params=params or {}) as response:
143
+ response.raise_for_status()
144
+ return await response.json()
145
+
146
+
147
+ async def _paginate_async(
148
+ session: aiohttp.ClientSession,
149
+ api_key: str,
150
+ endpoint: str,
151
+ params: Optional[Dict[str, Any]] = None,
152
+ ) -> List[Dict[str, Any]]:
153
+ """Paginate through RevenueCat API results asynchronously."""
154
+ items = []
155
+ current_params = params.copy() if params is not None else {}
156
+ current_params["limit"] = 1000
157
+
158
+ while True:
159
+ data = await _make_request_async(session, api_key, endpoint, current_params)
160
+
161
+ # Collect items from the current page
162
+ if "items" in data and data["items"] is not None:
163
+ items.extend(data["items"])
164
+
165
+ # Check if there's a next page
166
+ if "next_page" not in data:
167
+ break
168
+
169
+ # Extract starting_after parameter from next_page URL
170
+ next_page_url = data["next_page"]
171
+ if next_page_url and "starting_after=" in next_page_url:
172
+ starting_after = next_page_url.split("starting_after=")[1].split("&")[0]
173
+ current_params["starting_after"] = starting_after
174
+ else:
175
+ break
176
+
177
+ return items
178
+
179
+
180
+ async def fetch_and_process_nested_resource_async(
181
+ session: aiohttp.ClientSession,
182
+ api_key: str,
183
+ project_id: str,
184
+ customer_id: str,
185
+ customer: Dict[str, Any],
186
+ resource_name: str,
187
+ timestamp_fields: Optional[List[str]] = None,
188
+ ) -> None:
189
+ """
190
+ Fetch and process any nested resource for a customer asynchronously.
191
+
192
+ Args:
193
+ session: aiohttp ClientSession
194
+ api_key: RevenueCat API key
195
+ project_id: Project ID
196
+ customer_id: Customer ID
197
+ customer: Customer data dictionary to modify
198
+ resource_name: Name of the nested resource (e.g., 'purchases', 'subscriptions', 'events')
199
+ timestamp_fields: List of timestamp fields to convert to ISO format
200
+ """
201
+ # If resource not included in customer data, fetch separately
202
+ if resource_name not in customer or customer[resource_name] is None:
203
+ endpoint = f"/projects/{project_id}/customers/{customer_id}/{resource_name}"
204
+ customer[resource_name] = await _paginate_async(session, api_key, endpoint)
205
+
206
+ # Convert timestamps if fields specified
207
+ if (
208
+ timestamp_fields
209
+ and resource_name in customer
210
+ and customer[resource_name] is not None
211
+ ):
212
+ for item in customer[resource_name]:
213
+ convert_timestamps_to_iso(item, timestamp_fields)
214
+
215
+
216
+ async def process_customer_with_nested_resources_async(
217
+ session: aiohttp.ClientSession,
218
+ api_key: str,
219
+ project_id: str,
220
+ customer: Dict[str, Any],
221
+ ) -> Dict[str, Any]:
222
+ """
223
+ Process a customer and fetch nested resources concurrently.
224
+
225
+ Args:
226
+ session: aiohttp ClientSession
227
+ api_key: RevenueCat API key
228
+ project_id: Project ID
229
+ customer: Customer data to process
230
+
231
+ Returns:
232
+ Customer data with nested resources populated
233
+ """
234
+ customer_id = customer["id"]
235
+
236
+ # Convert customer timestamps
237
+ customer = convert_timestamps_to_iso(customer, ["first_seen_at", "last_seen_at"])
238
+
239
+ # Define nested resources to fetch concurrently
240
+ nested_resources = [
241
+ ("subscriptions", ["purchased_at", "expires_at", "grace_period_expires_at"]),
242
+ ("purchases", ["purchased_at", "expires_at"]),
243
+ ]
244
+
245
+ # Create concurrent tasks for fetching nested resources
246
+ tasks = []
247
+ for resource_name, timestamp_fields in nested_resources:
248
+ task = fetch_and_process_nested_resource_async(
249
+ session,
250
+ api_key,
251
+ project_id,
252
+ customer_id,
253
+ customer,
254
+ resource_name,
255
+ timestamp_fields,
256
+ )
257
+ tasks.append(task)
258
+
259
+ # Wait for all nested resources to be fetched
260
+ await asyncio.gather(*tasks)
261
+
262
+ return customer
263
+
264
+
265
+ def create_project_resource(
266
+ resource_name: str,
267
+ api_key: str,
268
+ project_id: str = None,
269
+ timestamp_fields: List[str] = None,
270
+ ) -> Iterator[Dict[str, Any]]:
271
+ """
272
+ Helper function to create DLT resources for project-dependent endpoints.
273
+
274
+ Args:
275
+ resource_name: Name of the resource (e.g., 'products', 'entitlements', 'offerings')
276
+ api_key: RevenueCat API key
277
+ project_id: RevenueCat project ID
278
+ timestamp_fields: List of timestamp fields to convert to ISO format
279
+
280
+ Returns:
281
+ Iterator of resource data
282
+ """
283
+ if project_id is None:
284
+ raise ValueError(f"project_id is required for {resource_name} resource")
285
+
286
+ endpoint = f"/projects/{project_id}/{resource_name}"
287
+ default_timestamp_fields = timestamp_fields or ["created_at", "updated_at"]
288
+
289
+ for item in _paginate(api_key, endpoint):
290
+ item = convert_timestamps_to_iso(item, default_timestamp_fields)
291
+ yield item
@@ -669,7 +669,7 @@ def shopify_source(
669
669
  params["updated_at_max"] = updated_at.end_value.isoformat()
670
670
  yield from client.get_pages("customers", params)
671
671
 
672
- @dlt.resource(primary_key="id", write_disposition="append")
672
+ @dlt.resource(primary_key="id", write_disposition="merge")
673
673
  def events(
674
674
  created_at: dlt.sources.incremental[
675
675
  pendulum.DateTime
ingestr/src/sources.py CHANGED
@@ -1054,9 +1054,10 @@ class FacebookAdsSource:
1054
1054
  )
1055
1055
 
1056
1056
  # Validate breakdown type against available options from settings
1057
- import typing
1058
1057
 
1059
- from ingestr.src.facebook_ads.helpers import parse_insights_table_to_source_kwargs
1058
+ from ingestr.src.facebook_ads.helpers import (
1059
+ parse_insights_table_to_source_kwargs,
1060
+ )
1060
1061
 
1061
1062
  source_kwargs = {
1062
1063
  "access_token": access_token[0],
@@ -2689,13 +2690,13 @@ class FrankfurterSource:
2689
2690
 
2690
2691
  if kwargs.get("interval_start"):
2691
2692
  start_date = ensure_pendulum_datetime(str(kwargs.get("interval_start")))
2692
- if kwargs.get("interval_end"):
2693
- end_date = ensure_pendulum_datetime(str(kwargs.get("interval_end")))
2694
- else:
2695
- end_date = pendulum.now()
2696
2693
  else:
2697
- start_date = pendulum.now()
2698
- end_date = pendulum.now()
2694
+ start_date = pendulum.yesterday()
2695
+
2696
+ if kwargs.get("interval_end"):
2697
+ end_date = ensure_pendulum_datetime(str(kwargs.get("interval_end")))
2698
+ else:
2699
+ end_date = None
2699
2700
 
2700
2701
  validate_dates(start_date=start_date, end_date=end_date)
2701
2702
 
@@ -3224,6 +3225,76 @@ class PinterestSource:
3224
3225
  ).with_resources(table)
3225
3226
 
3226
3227
 
3228
+ class FluxxSource:
3229
+ def handles_incrementality(self) -> bool:
3230
+ return True
3231
+
3232
+ def dlt_source(self, uri: str, table: str, **kwargs):
3233
+ if kwargs.get("incremental_key"):
3234
+ raise ValueError(
3235
+ "Fluxx takes care of incrementality on its own, you should not provide incremental_key"
3236
+ )
3237
+
3238
+ # Parse URI: fluxx://instance?client_id=xxx&client_secret=xxx
3239
+ parsed_uri = urlparse(uri)
3240
+ source_params = parse_qs(parsed_uri.query)
3241
+
3242
+ instance = parsed_uri.hostname
3243
+ if not instance:
3244
+ raise ValueError(
3245
+ "Instance is required in the URI (e.g., fluxx://mycompany.preprod)"
3246
+ )
3247
+
3248
+ client_id = source_params.get("client_id")
3249
+ if not client_id:
3250
+ raise ValueError("client_id in the URI is required to connect to Fluxx")
3251
+
3252
+ client_secret = source_params.get("client_secret")
3253
+ if not client_secret:
3254
+ raise ValueError("client_secret in the URI is required to connect to Fluxx")
3255
+
3256
+ # Parse date parameters
3257
+ start_date = kwargs.get("interval_start")
3258
+ if start_date:
3259
+ start_date = ensure_pendulum_datetime(start_date)
3260
+
3261
+ end_date = kwargs.get("interval_end")
3262
+ if end_date:
3263
+ end_date = ensure_pendulum_datetime(end_date)
3264
+
3265
+ # Import Fluxx source
3266
+ from ingestr.src.fluxx import fluxx_source
3267
+
3268
+ # Parse table specification for custom column selection
3269
+ # Format: "resource_name:field1,field2,field3" or "resource_name"
3270
+ resources = None
3271
+ custom_fields = {}
3272
+
3273
+ if table:
3274
+ # Handle single resource with custom fields or multiple resources
3275
+ if ":" in table and table.count(":") == 1:
3276
+ # Single resource with custom fields: "grant_request:id,name,amount"
3277
+ resource_name, field_list = table.split(":", 1)
3278
+ resource_name = resource_name.strip()
3279
+ fields = [f.strip() for f in field_list.split(",")]
3280
+ resources = [resource_name]
3281
+ custom_fields[resource_name] = fields
3282
+ else:
3283
+ # Multiple resources or single resource without custom fields
3284
+ # Support comma-separated list: "grant_request,user"
3285
+ resources = [r.strip() for r in table.split(",")]
3286
+
3287
+ return fluxx_source(
3288
+ instance=instance,
3289
+ client_id=client_id[0],
3290
+ client_secret=client_secret[0],
3291
+ start_date=start_date,
3292
+ end_date=end_date,
3293
+ resources=resources,
3294
+ custom_fields=custom_fields,
3295
+ )
3296
+
3297
+
3227
3298
  class LinearSource:
3228
3299
  def handles_incrementality(self) -> bool:
3229
3300
  return True
@@ -3241,11 +3312,11 @@ class LinearSource:
3241
3312
  raise MissingValueError("api_key", "Linear")
3242
3313
 
3243
3314
  if table not in [
3244
- "issues",
3245
- "projects",
3246
- "teams",
3247
- "users",
3248
- "workflow_states",
3315
+ "issues",
3316
+ "projects",
3317
+ "teams",
3318
+ "users",
3319
+ "workflow_states",
3249
3320
  "cycles",
3250
3321
  "attachments",
3251
3322
  "comments",
@@ -3282,6 +3353,56 @@ class LinearSource:
3282
3353
  ).with_resources(table)
3283
3354
 
3284
3355
 
3356
+ class RevenueCatSource:
3357
+ def handles_incrementality(self) -> bool:
3358
+ return True
3359
+
3360
+ def dlt_source(self, uri: str, table: str, **kwargs):
3361
+ if kwargs.get("incremental_key"):
3362
+ raise ValueError(
3363
+ "RevenueCat takes care of incrementality on its own, you should not provide incremental_key"
3364
+ )
3365
+
3366
+ parsed_uri = urlparse(uri)
3367
+ params = parse_qs(parsed_uri.query)
3368
+
3369
+ api_key = params.get("api_key")
3370
+ if api_key is None:
3371
+ raise MissingValueError("api_key", "RevenueCat")
3372
+
3373
+ project_id = params.get("project_id")
3374
+ if project_id is None and table != "projects":
3375
+ raise MissingValueError("project_id", "RevenueCat")
3376
+
3377
+ if table not in [
3378
+ "customers",
3379
+ "products",
3380
+ "entitlements",
3381
+ "offerings",
3382
+ "subscriptions",
3383
+ "purchases",
3384
+ "projects",
3385
+ ]:
3386
+ raise UnsupportedResourceError(table, "RevenueCat")
3387
+
3388
+ start_date = kwargs.get("interval_start")
3389
+ if start_date is not None:
3390
+ start_date = ensure_pendulum_datetime(start_date)
3391
+ else:
3392
+ start_date = pendulum.datetime(2020, 1, 1).in_tz("UTC")
3393
+
3394
+ end_date = kwargs.get("interval_end")
3395
+ if end_date is not None:
3396
+ end_date = ensure_pendulum_datetime(end_date).in_tz("UTC")
3397
+
3398
+ from ingestr.src.revenuecat import revenuecat_source
3399
+
3400
+ return revenuecat_source(
3401
+ api_key=api_key[0],
3402
+ project_id=project_id[0] if project_id is not None else None,
3403
+ ).with_resources(table)
3404
+
3405
+
3285
3406
  class ZoomSource:
3286
3407
  def handles_incrementality(self) -> bool:
3287
3408
  return True
@@ -101,23 +101,6 @@ def incremental_stripe_source(
101
101
  initial_start_date: Optional[DateTime] = None,
102
102
  end_date: Optional[DateTime] = None,
103
103
  ) -> Iterable[DltResource]:
104
- """
105
- As Stripe API does not include the "updated" key in its responses,
106
- we are only able to perform incremental downloads from endpoints where all objects are uneditable.
107
- This source yields the resources with incremental loading based on "append" mode.
108
- You will load only the newest data without duplicating and without downloading a huge amount of data each time.
109
-
110
- Args:
111
- endpoints (tuple): A tuple of endpoint names to retrieve data from. Defaults to Stripe API endpoints with uneditable data.
112
- stripe_secret_key (str): The API access token for authentication. Defaults to the value in the `dlt.secrets` object.
113
- initial_start_date (Optional[DateTime]): An optional parameter that specifies the initial value for dlt.sources.incremental.
114
- If parameter is not None, then load only data that were created after initial_start_date on the first run.
115
- Defaults to None. Format: datetime(YYYY, MM, DD).
116
- end_date (Optional[DateTime]): An optional end date to limit the data retrieved.
117
- Defaults to None. Format: datetime(YYYY, MM, DD).
118
- Returns:
119
- Iterable[DltResource]: Resources with only that data has not yet been loaded.
120
- """
121
104
  stripe.api_key = stripe_secret_key
122
105
  stripe.api_version = "2022-11-15"
123
106
  start_date_unix = (
@@ -142,6 +125,6 @@ def incremental_stripe_source(
142
125
  yield dlt.resource(
143
126
  incremental_resource,
144
127
  name=endpoint,
145
- write_disposition="append",
128
+ write_disposition="merge",
146
129
  primary_key="id",
147
130
  )(endpoint)