ingestr 0.13.86__py3-none-any.whl → 0.13.88__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/main.py CHANGED
@@ -282,6 +282,13 @@ def ingest(
282
282
  envvar=["STAGING_BUCKET", "INGESTR_STAGING_BUCKET"],
283
283
  ),
284
284
  ] = None, # type: ignore
285
+ mask: Annotated[
286
+ Optional[list[str]],
287
+ typer.Option(
288
+ help="Column masking configuration in format 'column:algorithm[:param]'. Can be specified multiple times.",
289
+ envvar=["MASK", "INGESTR_MASK"],
290
+ ),
291
+ ] = [], # type: ignore
285
292
  ):
286
293
  import hashlib
287
294
  import tempfile
@@ -302,6 +309,7 @@ def ingest(
302
309
  from ingestr.src.filters import (
303
310
  cast_set_to_list,
304
311
  cast_spanner_types,
312
+ create_masking_filter,
305
313
  handle_mysql_empty_dates,
306
314
  )
307
315
  from ingestr.src.sources import MongoDbSource
@@ -562,6 +570,10 @@ def ingest(
562
570
  if factory.source_scheme.startswith("spanner"):
563
571
  resource.for_each(dlt_source, lambda x: x.add_map(cast_spanner_types))
564
572
 
573
+ if mask:
574
+ masking_filter = create_masking_filter(mask)
575
+ resource.for_each(dlt_source, lambda x: x.add_map(masking_filter))
576
+
565
577
  if yield_limit:
566
578
  resource.for_each(dlt_source, lambda x: x.add_limit(yield_limit))
567
579
 
@@ -38,6 +38,7 @@ def app_store(
38
38
  name=resource.name,
39
39
  primary_key=resource.primary_key,
40
40
  columns=resource.columns,
41
+ write_disposition="merge",
41
42
  )(client, app_ids, resource.report_name, start_date, end_date)
42
43
 
43
44
 
ingestr/src/buildinfo.py CHANGED
@@ -1 +1 @@
1
- version = "v0.13.86"
1
+ version = "v0.13.88"
@@ -75,7 +75,7 @@ def players_archives(players: List[str]) -> Iterator[List[TDataItem]]:
75
75
 
76
76
 
77
77
  @dlt.resource(
78
- write_disposition="append", columns={"end_time": {"data_type": "timestamp"}}
78
+ write_disposition="replace", columns={"end_time": {"data_type": "timestamp"}}
79
79
  )
80
80
  def players_games(
81
81
  players: List[str], start_month: str = None, end_month: str = None
ingestr/src/filters.py CHANGED
@@ -51,3 +51,12 @@ def table_adapter_exclude_columns(cols: list[str]):
51
51
  table._columns.remove(col) # type: ignore
52
52
 
53
53
  return excluder
54
+
55
+
56
+ def create_masking_filter(mask_configs: list[str]):
57
+ from ingestr.src.masking import create_masking_mapper
58
+
59
+ if not mask_configs:
60
+ return lambda x: x
61
+
62
+ return create_masking_mapper(mask_configs)
@@ -14,14 +14,13 @@ from ingestr.src.frankfurter.helpers import get_path_with_retry
14
14
  )
15
15
  def frankfurter_source(
16
16
  start_date: TAnyDateTime,
17
- end_date: TAnyDateTime|None,
17
+ end_date: TAnyDateTime | None,
18
18
  base_currency: str,
19
19
  ) -> Any:
20
20
  """
21
21
  A dlt source for the frankfurter.dev API. It groups several resources (in this case frankfurter.dev API endpoints) containing
22
22
  various types of data: currencies, latest rates, historical rates.
23
23
  """
24
-
25
24
 
26
25
  @dlt.resource(
27
26
  write_disposition="replace",
@@ -36,7 +35,6 @@ def frankfurter_source(
36
35
  for currency_code, currency_name in currencies_data.items():
37
36
  yield {"currency_code": currency_code, "currency_name": currency_name}
38
37
 
39
-
40
38
  @dlt.resource(
41
39
  write_disposition="merge",
42
40
  columns={
@@ -81,7 +79,6 @@ def frankfurter_source(
81
79
  "base_currency": base_currency,
82
80
  }
83
81
 
84
-
85
82
  @dlt.resource(
86
83
  write_disposition="merge",
87
84
  columns={
@@ -93,13 +90,13 @@ def frankfurter_source(
93
90
  primary_key=("date", "currency_code", "base_currency"),
94
91
  )
95
92
  def exchange_rates(
96
- date_time = dlt.sources.incremental(
97
- "date",
98
- initial_value=start_date,
99
- end_value=end_date,
100
- range_start="closed",
101
- range_end="closed",
102
- )
93
+ date_time=dlt.sources.incremental(
94
+ "date",
95
+ initial_value=start_date,
96
+ end_value=end_date,
97
+ range_start="closed",
98
+ range_end="closed",
99
+ ),
103
100
  ) -> Iterator[dict]:
104
101
  """
105
102
  Fetches exchange rates for a specified date range.
@@ -115,9 +112,9 @@ def frankfurter_source(
115
112
  end_date = date_time.end_value
116
113
  else:
117
114
  end_date = pendulum.now()
118
-
115
+
119
116
  # Ensure start_date.last_value is a pendulum.DateTime object
120
- start_date_obj = ensure_pendulum_datetime(start_date) # type: ignore
117
+ start_date_obj = ensure_pendulum_datetime(start_date) # type: ignore
121
118
  start_date_str = start_date_obj.format("YYYY-MM-DD")
122
119
 
123
120
  # Ensure end_date is a pendulum.DateTime object
@@ -158,4 +155,3 @@ def frankfurter_source(
158
155
  }
159
156
 
160
157
  return currencies, latest, exchange_rates
161
-
@@ -16,9 +16,9 @@ def get_path_with_retry(path: str) -> StrAny:
16
16
  return get_url_with_retry(f"{FRANKFURTER_API_URL}{path}")
17
17
 
18
18
 
19
- def validate_dates(start_date: datetime, end_date: datetime|None) -> None:
19
+ def validate_dates(start_date: datetime, end_date: datetime | None) -> None:
20
20
  current_date = pendulum.now()
21
-
21
+
22
22
  # Check if start_date is in the futurep
23
23
  if start_date > current_date:
24
24
  raise ValueError("Interval-start cannot be in the future.")
@@ -30,7 +30,7 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
30
30
  start_date_obj = ensure_pendulum_datetime(start_date)
31
31
  client = KlaviyoClient(api_key)
32
32
 
33
- @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
33
+ @dlt.resource(write_disposition="merge", primary_key="id", parallelized=True)
34
34
  def events(
35
35
  datetime=dlt.sources.incremental(
36
36
  "datetime",
@@ -135,7 +135,7 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
135
135
  ) -> Iterable[TDataItem]:
136
136
  yield from client.fetch_catalog_item(create_client(), updated.start_value)
137
137
 
138
- @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
138
+ @dlt.resource(write_disposition="merge", primary_key="id", parallelized=True)
139
139
  def forms(
140
140
  updated_at=dlt.sources.incremental(
141
141
  "updated_at",
@@ -162,7 +162,7 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
162
162
  ) -> Iterable[TDataItem]:
163
163
  yield from client.fetch_lists(create_client(), updated.start_value)
164
164
 
165
- @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
165
+ @dlt.resource(write_disposition="merge", primary_key="id", parallelized=True)
166
166
  def images(
167
167
  updated_at=dlt.sources.incremental(
168
168
  "updated_at",
@@ -188,7 +188,7 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
188
188
  ) -> Iterable[TDataItem]:
189
189
  yield from client.fetch_segments(create_client(), updated.start_value)
190
190
 
191
- @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
191
+ @dlt.resource(write_disposition="merge", primary_key="id", parallelized=True)
192
192
  def flows(
193
193
  updated=dlt.sources.incremental(
194
194
  "updated",
@@ -203,7 +203,7 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
203
203
  for start, end in intervals:
204
204
  yield lambda s=start, e=end: client.fetch_flows(create_client(), s, e)
205
205
 
206
- @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
206
+ @dlt.resource(write_disposition="merge", primary_key="id", parallelized=True)
207
207
  def templates(
208
208
  updated=dlt.sources.incremental(
209
209
  "updated",
ingestr/src/masking.py ADDED
@@ -0,0 +1,344 @@
1
+ import hashlib
2
+ import hmac
3
+ import random
4
+ import re
5
+ import string
6
+ import uuid
7
+ from datetime import date, datetime, timedelta
8
+ from typing import Any, Callable, Dict, Optional, Tuple, Union
9
+
10
+
11
+ class MaskingEngine:
12
+ def __init__(self):
13
+ self.token_cache: Dict[str, Union[str, int]] = {}
14
+ self.sequential_counter = 0
15
+
16
+ def parse_mask_config(self, config: str) -> Tuple[str, str, Optional[str]]:
17
+ parts = config.split(":")
18
+ if len(parts) == 2:
19
+ return parts[0], parts[1], None
20
+ elif len(parts) == 3:
21
+ return parts[0], parts[1], parts[2]
22
+ else:
23
+ raise ValueError(
24
+ f"Invalid mask configuration: {config}. Expected format: 'column:algorithm[:param]'"
25
+ )
26
+
27
+ def get_masking_function(
28
+ self, algorithm: str, param: Optional[str] = None
29
+ ) -> Callable:
30
+ algorithm = algorithm.lower()
31
+
32
+ # Hash-based masking
33
+ if algorithm == "hash" or algorithm == "sha256":
34
+ return self._hash_sha256
35
+ elif algorithm == "md5":
36
+ return self._hash_md5
37
+ elif algorithm == "hmac":
38
+ return lambda x: self._hash_hmac(x, param or "default-key")
39
+
40
+ # Format-preserving masking
41
+ elif algorithm == "email":
42
+ return self._mask_email
43
+ elif algorithm == "phone":
44
+ return self._mask_phone
45
+ elif algorithm == "credit_card":
46
+ return self._mask_credit_card
47
+ elif algorithm == "ssn":
48
+ return self._mask_ssn
49
+
50
+ # Redaction strategies
51
+ elif algorithm == "redact":
52
+ return lambda x: "REDACTED"
53
+ elif algorithm == "stars":
54
+ return lambda x: "*" * len(str(x)) if x else ""
55
+ elif algorithm == "fixed":
56
+ return lambda x: param or "MASKED"
57
+ elif algorithm == "random":
58
+ return self._random_replace
59
+
60
+ # Partial masking
61
+ elif algorithm == "partial":
62
+ chars = int(param) if param else 2
63
+ return lambda x: self._partial_mask(x, chars)
64
+ elif algorithm == "first_letter":
65
+ return self._first_letter_mask
66
+
67
+ # Tokenization
68
+ elif algorithm == "uuid":
69
+ return self._tokenize_uuid
70
+ elif algorithm == "sequential":
71
+ return self._tokenize_sequential
72
+
73
+ # Numeric masking
74
+ elif algorithm == "round":
75
+ precision = int(param) if param else 10
76
+ return lambda x: self._round_number(x, precision)
77
+ elif algorithm == "range":
78
+ bucket_size = int(param) if param else 100
79
+ return lambda x: self._range_mask(x, bucket_size)
80
+ elif algorithm == "noise":
81
+ noise_level = float(param) if param else 0.1
82
+ return lambda x: self._add_noise(x, noise_level)
83
+
84
+ # Date masking
85
+ elif algorithm == "date_shift":
86
+ max_days = int(param) if param else 30
87
+ return lambda x: self._date_shift(x, max_days)
88
+ elif algorithm == "year_only":
89
+ return self._year_only
90
+ elif algorithm == "month_year":
91
+ return self._month_year
92
+
93
+ else:
94
+ raise ValueError(f"Unknown masking algorithm: {algorithm}")
95
+
96
+ # Hash functions
97
+ def _hash_sha256(self, value: Any) -> Optional[str]:
98
+ if value is None:
99
+ return None
100
+ return hashlib.sha256(str(value).encode()).hexdigest()
101
+
102
+ def _hash_md5(self, value: Any) -> Optional[str]:
103
+ if value is None:
104
+ return None
105
+ return hashlib.md5(str(value).encode()).hexdigest()
106
+
107
+ def _hash_hmac(self, value: Any, key: str) -> Optional[str]:
108
+ if value is None:
109
+ return None
110
+ return hmac.new(key.encode(), str(value).encode(), hashlib.sha256).hexdigest()
111
+
112
+ # Format-preserving masks
113
+ def _mask_email(self, value: Any) -> Any:
114
+ if value is None or not value:
115
+ return value
116
+ email_str = str(value)
117
+ if "@" not in email_str:
118
+ return self._partial_mask(email_str, 2)
119
+
120
+ local, domain = email_str.split("@", 1)
121
+ if len(local) <= 2:
122
+ masked_local = "*" * len(local)
123
+ else:
124
+ masked_local = local[0] + "*" * (len(local) - 2) + local[-1]
125
+ return f"{masked_local}@{domain}"
126
+
127
+ def _mask_phone(self, value: Any) -> Any:
128
+ if value is None or not value:
129
+ return value
130
+ phone_str = re.sub(r"\D", "", str(value))
131
+ if len(phone_str) < 10:
132
+ return "*" * len(phone_str)
133
+
134
+ # Keep country code and area code, mask the rest
135
+ if len(phone_str) >= 10:
136
+ return phone_str[:3] + "-***-****"
137
+ return phone_str
138
+
139
+ def _mask_credit_card(self, value: Any) -> Any:
140
+ if value is None or not value:
141
+ return value
142
+ cc_str = re.sub(r"\D", "", str(value))
143
+ if len(cc_str) < 12:
144
+ return "*" * len(cc_str)
145
+ return "*" * (len(cc_str) - 4) + cc_str[-4:]
146
+
147
+ def _mask_ssn(self, value: Any) -> Any:
148
+ if value is None or not value:
149
+ return value
150
+ ssn_str = re.sub(r"\D", "", str(value))
151
+ if len(ssn_str) != 9:
152
+ return "*" * len(ssn_str)
153
+ return "***-**-" + ssn_str[-4:]
154
+
155
+ # Partial masking
156
+ def _partial_mask(self, value: Any, chars_to_show: int) -> Any:
157
+ if value is None or not value:
158
+ return value
159
+ val_str = str(value)
160
+ if len(val_str) <= chars_to_show * 2:
161
+ return "*" * len(val_str)
162
+ return (
163
+ val_str[:chars_to_show]
164
+ + "*" * (len(val_str) - chars_to_show * 2)
165
+ + val_str[-chars_to_show:]
166
+ )
167
+
168
+ def _first_letter_mask(self, value: Any) -> Any:
169
+ if value is None or not value:
170
+ return value
171
+ val_str = str(value)
172
+ if len(val_str) <= 1:
173
+ return val_str
174
+ return val_str[0] + "*" * (len(val_str) - 1)
175
+
176
+ # Random replacement
177
+ def _random_replace(self, value: Any) -> Any:
178
+ if value is None:
179
+ return value
180
+
181
+ if isinstance(value, (int, float)):
182
+ # Generate random number in similar range
183
+ if isinstance(value, int):
184
+ magnitude = len(str(abs(value)))
185
+ return random.randint(10 ** (magnitude - 1), 10**magnitude - 1)
186
+ else:
187
+ return random.uniform(0, abs(value) * 2)
188
+ elif isinstance(value, str):
189
+ # Generate random string of same length
190
+ return "".join(
191
+ random.choices(string.ascii_letters + string.digits, k=len(value))
192
+ )
193
+ else:
194
+ return str(value)
195
+
196
+ # Tokenization
197
+ def _tokenize_uuid(self, value: Any) -> Optional[str]:
198
+ if value is None:
199
+ return None
200
+ val_str = str(value)
201
+ if val_str not in self.token_cache:
202
+ self.token_cache[val_str] = str(uuid.uuid4())
203
+ return str(self.token_cache[val_str])
204
+
205
+ def _tokenize_sequential(self, value: Any) -> Optional[int]:
206
+ if value is None:
207
+ return None
208
+ val_str = str(value)
209
+ if val_str not in self.token_cache:
210
+ self.sequential_counter += 1
211
+ self.token_cache[val_str] = self.sequential_counter
212
+ return int(self.token_cache[val_str])
213
+
214
+ # Numeric masking
215
+ def _round_number(self, value: Any, precision: int) -> Any:
216
+ if value is None:
217
+ return value
218
+ try:
219
+ num = float(value)
220
+ return round(num / precision) * precision
221
+ except (ValueError, TypeError):
222
+ return value
223
+
224
+ def _range_mask(self, value: Any, bucket_size: int) -> Any:
225
+ if value is None:
226
+ return value
227
+ try:
228
+ num = float(value)
229
+ lower = int(num // bucket_size) * bucket_size
230
+ upper = lower + bucket_size
231
+ return f"{lower}-{upper}"
232
+ except (ValueError, TypeError):
233
+ return value
234
+
235
+ def _add_noise(self, value: Any, noise_level: float) -> Any:
236
+ if value is None:
237
+ return value
238
+ try:
239
+ num = float(value)
240
+ noise = random.uniform(-noise_level, noise_level) * abs(num)
241
+ result = num + noise
242
+ if isinstance(value, int):
243
+ return int(result)
244
+ return result
245
+ except (ValueError, TypeError):
246
+ return value
247
+
248
+ # Date masking
249
+ def _date_shift(self, value: Any, max_days: int) -> Any:
250
+ if value is None:
251
+ return value
252
+
253
+ if isinstance(value, (date, datetime)):
254
+ shift_days = random.randint(-max_days, max_days)
255
+ return value + timedelta(days=shift_days)
256
+
257
+ # Try to parse string dates
258
+ try:
259
+ from dateutil import parser # type: ignore
260
+
261
+ dt = parser.parse(str(value))
262
+ shift_days = random.randint(-max_days, max_days)
263
+ result = dt + timedelta(days=shift_days)
264
+ if isinstance(value, str):
265
+ return result.strftime("%Y-%m-%d")
266
+ return result
267
+ except Exception:
268
+ return value
269
+
270
+ def _year_only(self, value: Any) -> Any:
271
+ if value is None:
272
+ return value
273
+
274
+ if isinstance(value, (date, datetime)):
275
+ return value.year
276
+
277
+ # Try to parse string dates
278
+ try:
279
+ from dateutil import parser
280
+
281
+ dt = parser.parse(str(value))
282
+ return dt.year
283
+ except Exception:
284
+ return value
285
+
286
+ def _month_year(self, value: Any) -> Any:
287
+ if value is None:
288
+ return value
289
+
290
+ if isinstance(value, (date, datetime)):
291
+ return f"{value.year}-{value.month:02d}"
292
+
293
+ # Try to parse string dates
294
+ try:
295
+ from dateutil import parser
296
+
297
+ dt = parser.parse(str(value))
298
+ return f"{dt.year}-{dt.month:02d}"
299
+ except Exception:
300
+ return value
301
+
302
+
303
+ def create_masking_mapper(mask_configs: list[str]) -> Callable:
304
+ engine = MaskingEngine()
305
+
306
+ # Parse all configurations
307
+ masks = {}
308
+ for config in mask_configs:
309
+ column, algorithm, param = engine.parse_mask_config(config)
310
+ masks[column] = engine.get_masking_function(algorithm, param)
311
+
312
+ def apply_masks(data: Any) -> Any:
313
+ # Handle PyArrow tables
314
+ try:
315
+ import pyarrow as pa # type: ignore
316
+
317
+ if isinstance(data, pa.Table):
318
+ # Convert to pandas for easier manipulation
319
+ df = data.to_pandas()
320
+
321
+ # Apply masks to each column
322
+ for column, mask_func in masks.items():
323
+ if column in df.columns:
324
+ df[column] = df[column].apply(mask_func)
325
+
326
+ # Convert back to PyArrow table
327
+ return pa.Table.from_pandas(df)
328
+ except ImportError:
329
+ pass
330
+
331
+ # Handle dictionaries (original behavior)
332
+ if isinstance(data, dict):
333
+ for column, mask_func in masks.items():
334
+ if column in data:
335
+ try:
336
+ data[column] = mask_func(data[column])
337
+ except Exception as e:
338
+ print(f"Warning: Failed to mask column {column}: {e}")
339
+ return data
340
+
341
+ # Return as-is if not a supported type
342
+ return data
343
+
344
+ return apply_masks
@@ -101,7 +101,7 @@ def mongodb_collection(
101
101
  write_disposition: Optional[str] = dlt.config.value,
102
102
  parallel: Optional[bool] = False,
103
103
  limit: Optional[int] = None,
104
- chunk_size: Optional[int] = 10000,
104
+ chunk_size: Optional[int] = 1000,
105
105
  data_item_format: Optional[TDataItemFormat] = "object",
106
106
  filter_: Optional[Dict[str, Any]] = None,
107
107
  projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = dlt.config.value,
@@ -518,21 +518,46 @@ class CollectionAggregationLoader(CollectionLoader):
518
518
  if limit and limit > 0:
519
519
  pipeline.append({"$limit": limit})
520
520
 
521
- print("pipeline", pipeline)
522
- # Execute aggregation
523
- cursor = self.collection.aggregate(pipeline, allowDiskUse=True)
521
+ # Add maxTimeMS to prevent hanging
522
+ cursor = self.collection.aggregate(
523
+ pipeline,
524
+ allowDiskUse=True,
525
+ batchSize=min(self.chunk_size, 101),
526
+ maxTimeMS=30000, # 30 second timeout
527
+ )
524
528
 
525
- # Process results in chunks
526
- while docs_slice := list(islice(cursor, self.chunk_size)):
527
- res = map_nested_in_place(convert_mongo_objs, docs_slice)
528
- print("res", res)
529
- if len(res) > 0 and "_id" in res[0] and isinstance(res[0]["_id"], dict):
530
- yield dlt.mark.with_hints(
531
- res,
532
- dlt.mark.make_hints(columns={"_id": {"data_type": "json"}}),
533
- )
534
- else:
535
- yield res
529
+ docs_buffer = []
530
+ try:
531
+ for doc in cursor:
532
+ docs_buffer.append(doc)
533
+
534
+ if len(docs_buffer) >= self.chunk_size:
535
+ res = map_nested_in_place(convert_mongo_objs, docs_buffer)
536
+ if (
537
+ len(res) > 0
538
+ and "_id" in res[0]
539
+ and isinstance(res[0]["_id"], dict)
540
+ ):
541
+ yield dlt.mark.with_hints(
542
+ res,
543
+ dlt.mark.make_hints(columns={"_id": {"data_type": "json"}}),
544
+ )
545
+ else:
546
+ yield res
547
+ docs_buffer = []
548
+
549
+ # Yield any remaining documents
550
+ if docs_buffer:
551
+ res = map_nested_in_place(convert_mongo_objs, docs_buffer)
552
+ if len(res) > 0 and "_id" in res[0] and isinstance(res[0]["_id"], dict):
553
+ yield dlt.mark.with_hints(
554
+ res,
555
+ dlt.mark.make_hints(columns={"_id": {"data_type": "json"}}),
556
+ )
557
+ else:
558
+ yield res
559
+ finally:
560
+ cursor.close()
536
561
 
537
562
 
538
563
  class CollectionAggregationLoaderParallel(CollectionAggregationLoader):
@@ -8,6 +8,7 @@ from .helpers import (
8
8
  _make_request,
9
9
  _paginate,
10
10
  convert_timestamps_to_iso,
11
+ create_project_resource,
11
12
  process_customer_with_nested_resources_async,
12
13
  )
13
14
 
@@ -22,10 +23,10 @@ def revenuecat_source(
22
23
 
23
24
  Args:
24
25
  api_key: RevenueCat API v2 secret key with Bearer token format
25
- project_id: RevenueCat project ID (required for customers, products, subscriptions, purchases)
26
+ project_id: RevenueCat project ID (required for customers, products, entitlements, offerings, subscriptions, purchases)
26
27
 
27
28
  Returns:
28
- Iterable of DLT resources for customers, products, purchases, subscriptions, and projects
29
+ Iterable of DLT resources for customers, products, entitlements, offerings, purchases, subscriptions, and projects
29
30
  """
30
31
 
31
32
  @dlt.resource(name="projects", primary_key="id", write_disposition="merge")
@@ -85,19 +86,23 @@ def revenuecat_source(
85
86
  # Yield each processed customer
86
87
  yield from process_customers_sync()
87
88
 
88
- @dlt.resource(name="products", primary_key="id", write_disposition="merge")
89
- def products() -> Iterator[Dict[str, Any]]:
90
- """Get list of products."""
91
- if project_id is None:
92
- raise ValueError("project_id is required for products resource")
93
- endpoint = f"/projects/{project_id}/products"
89
+ # Create project-dependent resources dynamically
90
+ project_resources = []
91
+ resource_names = ["products", "entitlements", "offerings"]
92
+
93
+ for resource_name in resource_names:
94
+
95
+ @dlt.resource(name=resource_name, primary_key="id", write_disposition="merge")
96
+ def create_resource(resource_name=resource_name) -> Iterator[Dict[str, Any]]:
97
+ """Get list of project resource."""
98
+ yield from create_project_resource(resource_name, api_key, project_id)
94
99
 
95
- for product in _paginate(api_key, endpoint):
96
- product = convert_timestamps_to_iso(product, ["created_at", "updated_at"])
97
- yield product
100
+ # Set the function name for better identification
101
+ create_resource.__name__ = resource_name
102
+ project_resources.append(create_resource)
98
103
 
99
104
  return [
100
105
  projects,
101
106
  customers,
102
- products,
107
+ *project_resources,
103
108
  ]
@@ -260,3 +260,32 @@ async def process_customer_with_nested_resources_async(
260
260
  await asyncio.gather(*tasks)
261
261
 
262
262
  return customer
263
+
264
+
265
+ def create_project_resource(
266
+ resource_name: str,
267
+ api_key: str,
268
+ project_id: str = None,
269
+ timestamp_fields: List[str] = None,
270
+ ) -> Iterator[Dict[str, Any]]:
271
+ """
272
+ Helper function to create DLT resources for project-dependent endpoints.
273
+
274
+ Args:
275
+ resource_name: Name of the resource (e.g., 'products', 'entitlements', 'offerings')
276
+ api_key: RevenueCat API key
277
+ project_id: RevenueCat project ID
278
+ timestamp_fields: List of timestamp fields to convert to ISO format
279
+
280
+ Returns:
281
+ Iterator of resource data
282
+ """
283
+ if project_id is None:
284
+ raise ValueError(f"project_id is required for {resource_name} resource")
285
+
286
+ endpoint = f"/projects/{project_id}/{resource_name}"
287
+ default_timestamp_fields = timestamp_fields or ["created_at", "updated_at"]
288
+
289
+ for item in _paginate(api_key, endpoint):
290
+ item = convert_timestamps_to_iso(item, default_timestamp_fields)
291
+ yield item
@@ -669,7 +669,7 @@ def shopify_source(
669
669
  params["updated_at_max"] = updated_at.end_value.isoformat()
670
670
  yield from client.get_pages("customers", params)
671
671
 
672
- @dlt.resource(primary_key="id", write_disposition="append")
672
+ @dlt.resource(primary_key="id", write_disposition="merge")
673
673
  def events(
674
674
  created_at: dlt.sources.incremental[
675
675
  pendulum.DateTime
ingestr/src/sources.py CHANGED
@@ -3377,6 +3377,8 @@ class RevenueCatSource:
3377
3377
  if table not in [
3378
3378
  "customers",
3379
3379
  "products",
3380
+ "entitlements",
3381
+ "offerings",
3380
3382
  "subscriptions",
3381
3383
  "purchases",
3382
3384
  "projects",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.13.86
3
+ Version: 0.13.88
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -1,17 +1,18 @@
1
1
  ingestr/conftest.py,sha256=OE2yxeTCosS9CUFVuqNypm-2ftYvVBeeq7egm3878cI,1981
2
- ingestr/main.py,sha256=qoWHNcHh0-xVnyQxbQ-SKuTxPb1RNV3ENkCpqO7CLrk,26694
2
+ ingestr/main.py,sha256=qo0g3wCFl8a_1jUwXagX8L1Q8PKKQlTF7md9pfnzW0Y,27155
3
3
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
4
4
  ingestr/src/blob.py,sha256=UUWMjHUuoR9xP1XZQ6UANQmnMVyDx3d0X4-2FQC271I,2138
5
- ingestr/src/buildinfo.py,sha256=Sau1WKfATfGbfhYBf36HIMjBxy3Ri3NHPH1bcv0qOvU,21
5
+ ingestr/src/buildinfo.py,sha256=HKIWe5l7QAN_f0qXt18bMVKJYb_guRTpX7gXDtwcRlc,21
6
6
  ingestr/src/destinations.py,sha256=M2Yni6wiWcrvZ8EPJemidqxN156l0rehgCc7xuil7mo,22840
7
7
  ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
8
8
  ingestr/src/factory.py,sha256=hC5E_XgrgTHMqwqPc6ihUYvRGTGMTzdPfQhrgPyD0tY,6945
9
- ingestr/src/filters.py,sha256=LLecXe9QkLFkFLUZ92OXNdcANr1a8edDxrflc2ko_KA,1452
9
+ ingestr/src/filters.py,sha256=0n0sNAVG_f-B_1r7lW5iNtw9z_G1bxWzPaiL1i6tnbU,1665
10
10
  ingestr/src/http_client.py,sha256=bxqsk6nJNXCo-79gW04B53DQO-yr25vaSsqP0AKtjx4,732
11
11
  ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
12
+ ingestr/src/masking.py,sha256=VN0LdfvExhQ1bZMRylGtaBUIoH-vjuIUmRnYKwo3yiY,11358
12
13
  ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
13
14
  ingestr/src/resource.py,sha256=ZqmZxFQVGlF8rFPhBiUB08HES0yoTj8sZ--jKfaaVps,1164
14
- ingestr/src/sources.py,sha256=CMXQRJlbHcGwKtrD-nt_ov-UlAn5UOQe08cdc7Wzel4,125068
15
+ ingestr/src/sources.py,sha256=YtqbkrF_z5n6Ccmj6kiYgjGMPL08r_1vc9YOvNhXlcw,125121
15
16
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
16
17
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
17
18
  ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
@@ -22,7 +23,7 @@ ingestr/src/applovin/__init__.py,sha256=X_YCLppPrnL8KXfYWICE_uDfMzHHH3JZ-DBGZ1Rl
22
23
  ingestr/src/applovin_max/__init__.py,sha256=fxXqsIibJarp5NOGe08G964HftwLDymTtYS_LqPJht4,3315
23
24
  ingestr/src/appsflyer/__init__.py,sha256=QoK-B3cYYMD3bqzQaLWNH6FkJyjRbzRkBF2n6urxubs,8071
24
25
  ingestr/src/appsflyer/client.py,sha256=E6xPW4KlbBnQZ0K4eq2Xgb3AmGrtrzIX9bX8EnQr-D4,3615
25
- ingestr/src/appstore/__init__.py,sha256=3P4VZH2WJF477QjW19jMTwu6L8DXcLkYSdutnvp3AmM,4742
26
+ ingestr/src/appstore/__init__.py,sha256=np8AkAIVZPnJt2pjHYgzEX9UhbxseMW9MKVnJ8qowUA,4781
26
27
  ingestr/src/appstore/client.py,sha256=qY9nBZPNIAveR-Dn-pW141Mr9xi9LMOz2HHfnfueHvE,3975
27
28
  ingestr/src/appstore/errors.py,sha256=KVpPWth5qlv6_QWEm3aJAt3cdf6miPJs0UDzxknx2Ms,481
28
29
  ingestr/src/appstore/models.py,sha256=tW1JSATHBIxZ6a77-RTCBQptJk6iRC8fWcmx4NW7SVA,1716
@@ -33,7 +34,7 @@ ingestr/src/asana_source/helpers.py,sha256=PukcdDQWIGqnGxuuobbLw4hUy4-t6gxXg_Xyw
33
34
  ingestr/src/asana_source/settings.py,sha256=-2tpdkwh04RvLKFvwQodnFLYn9MaxOO1hsebGnDQMTU,2829
34
35
  ingestr/src/attio/__init__.py,sha256=CLejJjp5vGkt6r18nfNNZ-Xjc1SZgQ5IlcBW5XFQR90,3243
35
36
  ingestr/src/attio/helpers.py,sha256=fCySmG5E6Iyh3Nm9a-HGbHNedxPH_2_otXYMTQsCibw,2185
36
- ingestr/src/chess/__init__.py,sha256=y0Q8aKBigeKf3N7wuB_gadMQjVJzBPUT8Jhp1ObEWjk,6812
37
+ ingestr/src/chess/__init__.py,sha256=mvMLZdexSgDAHIk7Ps18sOrCVGCYKq35PrG2Etgj_P8,6813
37
38
  ingestr/src/chess/helpers.py,sha256=v1HTImOMjAF7AzZUPDIuHu00e7ut0o5y1kWcVYo4QZw,549
38
39
  ingestr/src/chess/settings.py,sha256=p0RlCGgtXUacPDEvZmwzSWmzX0Apj1riwfz-nrMK89k,158
39
40
  ingestr/src/clickup/__init__.py,sha256=uvfAqNturT4bMvU4NS3E8BdL6nvDFzNuh7bMlih8HJk,2547
@@ -51,8 +52,8 @@ ingestr/src/filesystem/helpers.py,sha256=bg0muSHZr3hMa8H4jN2-LGWzI-SUoKlQNiWJ74-
51
52
  ingestr/src/filesystem/readers.py,sha256=a0fKkaRpnAOGsXI3EBNYZa7x6tlmAOsgRzb883StY30,3987
52
53
  ingestr/src/fluxx/__init__.py,sha256=Ei8BE0KAEzpadJT9RO5-8zMA7LvnIPhNPDKF4EyBcLo,328980
53
54
  ingestr/src/fluxx/helpers.py,sha256=dCNgvMMTSEO4LNp6luNZ-XrV4NPW-_OUfmp0k3jFhuc,6602
54
- ingestr/src/frankfurter/__init__.py,sha256=z98RblQx1ab2GFowDq4l5xdnv-sLb41MPGitH-y2ahc,5242
55
- ingestr/src/frankfurter/helpers.py,sha256=tEtx9VU7IchRmtKRIEq_r8MclNVs8vL4E_RjGW2ZSh0,1504
55
+ ingestr/src/frankfurter/__init__.py,sha256=aeyiv1jwcwblV5OeqG81vFcJo_Wc1bUlDwzdE4gnQiw,5246
56
+ ingestr/src/frankfurter/helpers.py,sha256=SpRr992OcSf7IDI5y-ToUdO6m6sGpqFz59LTY0ojchI,1502
56
57
  ingestr/src/freshdesk/__init__.py,sha256=ukyorgCNsW_snzsYBDsr3Q0WB8f-to9Fk0enqHHFQlk,3087
57
58
  ingestr/src/freshdesk/freshdesk_client.py,sha256=1nFf0K4MQ0KZbWwk4xSbYHaykVqmPLfN39miOFDpWVc,4385
58
59
  ingestr/src/freshdesk/settings.py,sha256=0Wr_OMnUZcTlry7BmALssLxD2yh686JW4moLNv12Jnw,409
@@ -84,7 +85,7 @@ ingestr/src/kafka/__init__.py,sha256=QUHsGmdv5_E-3z0GDHXvbk39puwuGDBsyYSDhvbA89E
84
85
  ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
85
86
  ingestr/src/kinesis/__init__.py,sha256=YretSz4F28tbkcPhd55mBp2Xk7XE9unyWx0nmvl8iEc,6235
86
87
  ingestr/src/kinesis/helpers.py,sha256=SO2cFmWNGcykUYmjHdfxWsOQSkLQXyhFtfWnkcUOM0s,3152
87
- ingestr/src/klaviyo/__init__.py,sha256=o_noUgbxLk36s4f9W56_ibPorF0n7kVapPUlV0p-jfA,7875
88
+ ingestr/src/klaviyo/__init__.py,sha256=Tg5EqAgsEK8xM5RO2im8vFMzPGc7yDpSCUkprGjMooI,7870
88
89
  ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
89
90
  ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYHk,496
90
91
  ingestr/src/linear/__init__.py,sha256=rufjwhLip7RK6j2DpFzCRQEvA_oOqgPEEdREJkc53_U,12295
@@ -94,8 +95,8 @@ ingestr/src/linkedin_ads/dimension_time_enum.py,sha256=EmHRdkFyTAfo4chGjThrwqffW
94
95
  ingestr/src/linkedin_ads/helpers.py,sha256=eUWudRVlXl4kqIhfXQ1eVsUpZwJn7UFqKSpnbLfxzds,4498
95
96
  ingestr/src/mixpanel/__init__.py,sha256=s1QtqMP0BTGW6YtdCabJFWj7lEn7KujzELwGpBOQgfs,1796
96
97
  ingestr/src/mixpanel/client.py,sha256=c_reouegOVYBOwHLfgYFwpmkba0Sxro1Zkml07NCYf0,3602
97
- ingestr/src/mongodb/__init__.py,sha256=5KNdR2mxJoHSOU1pt-FIJNg9HT4aHPwl6mI31xPBQLA,7487
98
- ingestr/src/mongodb/helpers.py,sha256=VMGKkSN6FIQ4l-4TUqoc-Ou7r52_zPXuLF33ZN23B_I,30881
98
+ ingestr/src/mongodb/__init__.py,sha256=wu3KJ3VH5FF67gctJqm4T3ZTdBOQam1u6xuFBohq7bs,7486
99
+ ingestr/src/mongodb/helpers.py,sha256=TmEbQ-Rz5ajxmaMgZa7nrI13-L7Z_ClbFCFPnmPIrgE,31739
99
100
  ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
100
101
  ingestr/src/notion/settings.py,sha256=MwQVZViJtnvOegfjXYc_pJ50oUYgSRPgwqu7TvpeMOA,82
101
102
  ingestr/src/notion/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -113,11 +114,11 @@ ingestr/src/pipedrive/helpers/__init__.py,sha256=UX1K_qnGXB0ShtnBOfp2XuVbK8RRoCK
113
114
  ingestr/src/pipedrive/helpers/custom_fields_munger.py,sha256=rZ4AjdITHfJE2NNomCR7vMBS1KnWpEGVF6fADwsIHUE,4488
114
115
  ingestr/src/pipedrive/helpers/pages.py,sha256=Klpjw2OnMuhzit3PpiHKsfzGcJ3rQPSQBl3HhE3-6eA,3358
115
116
  ingestr/src/quickbooks/__init__.py,sha256=cZUuVCOTGPHTscRj6i0DytO63_fWF-4ieMxoU4PcyTg,3727
116
- ingestr/src/revenuecat/__init__.py,sha256=HrI4Ht8PWTHiBYphAO26tK-2S-z1FuSIq97wu7erPIw,3785
117
- ingestr/src/revenuecat/helpers.py,sha256=ntdorpAdPoPBcga1fifFeAl07rKZ-CnF5u5QiFdHbW8,8664
117
+ ingestr/src/revenuecat/__init__.py,sha256=5HbyZuEOekkbeeT72sM_bnGygSyYdmd_vczfAUz7xoM,4029
118
+ ingestr/src/revenuecat/helpers.py,sha256=CYU6l79kplnfL87GfdxyGeEBrBSWEZfGP0GyjPHuVDk,9619
118
119
  ingestr/src/salesforce/__init__.py,sha256=2hik5pRrxVODdDTlUEMoyccNC07zozjnxkMHcjMT1qA,4558
119
120
  ingestr/src/salesforce/helpers.py,sha256=QTdazBt-qRTBbCQMZnyclIaDQFmBixBy_RDKD00Lt-8,2492
120
- ingestr/src/shopify/__init__.py,sha256=dp6Ybk5LIKA5suzVt923v5LzHz5rMUuDfhjTNPqSjAc,62603
121
+ ingestr/src/shopify/__init__.py,sha256=RzSSG93g-Qlkz6TAxi1XasFDdxxtVXIo53ZTtjGczW4,62602
121
122
  ingestr/src/shopify/exceptions.py,sha256=BhV3lIVWeBt8Eh4CWGW_REFJpGCzvW6-62yZrBWa3nQ,50
122
123
  ingestr/src/shopify/helpers.py,sha256=NfHD6lWXe88ybR0ri-FCQuh2Vf8l5WG0a0FVjmdoSC4,6296
123
124
  ingestr/src/shopify/settings.py,sha256=StY0EPr7wFJ7KzRRDN4TKxV0_gkIS1wPj2eR4AYSsDk,141
@@ -157,8 +158,8 @@ ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ
157
158
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
158
159
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
159
160
  ingestr/tests/unit/test_smartsheets.py,sha256=eiC2CCO4iNJcuN36ONvqmEDryCA1bA1REpayHpu42lk,5058
160
- ingestr-0.13.86.dist-info/METADATA,sha256=EYqj1B1PK2F2EGHKmzuoxvQRSdXZThmlL0UutcFxzeo,15182
161
- ingestr-0.13.86.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
162
- ingestr-0.13.86.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
163
- ingestr-0.13.86.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
164
- ingestr-0.13.86.dist-info/RECORD,,
161
+ ingestr-0.13.88.dist-info/METADATA,sha256=IypTsrgDspKt59K01ip36dHQYNCqAkj4ROGhuoj1kGk,15182
162
+ ingestr-0.13.88.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
163
+ ingestr-0.13.88.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
164
+ ingestr-0.13.88.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
165
+ ingestr-0.13.88.dist-info/RECORD,,