PyPI - ingestr - Versions diffs - 0.13.86__py3-none-any.whl → 0.13.88__py3-none-any.whl - Mend

ingestr 0.13.86py3-none-any.whl → 0.13.88py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ingestr might be problematic. Click here for more details.

Files changed (20) hide show

ingestr/main.py +12 -0
ingestr/src/appstore/__init__.py +1 -0
ingestr/src/buildinfo.py +1 -1
ingestr/src/chess/__init__.py +1 -1
ingestr/src/filters.py +9 -0
ingestr/src/frankfurter/__init__.py +10 -14
ingestr/src/frankfurter/helpers.py +2 -2
ingestr/src/klaviyo/__init__.py +5 -5
ingestr/src/masking.py +344 -0
ingestr/src/mongodb/__init__.py +1 -1
ingestr/src/mongodb/helpers.py +39 -14
ingestr/src/revenuecat/__init__.py +17 -12
ingestr/src/revenuecat/helpers.py +29 -0
ingestr/src/shopify/__init__.py +1 -1
ingestr/src/sources.py +2 -0
{ingestr-0.13.86.dist-info → ingestr-0.13.88.dist-info}/METADATA +1 -1
{ingestr-0.13.86.dist-info → ingestr-0.13.88.dist-info}/RECORD +20 -19
{ingestr-0.13.86.dist-info → ingestr-0.13.88.dist-info}/WHEEL +0 -0
{ingestr-0.13.86.dist-info → ingestr-0.13.88.dist-info}/entry_points.txt +0 -0
{ingestr-0.13.86.dist-info → ingestr-0.13.88.dist-info}/licenses/LICENSE.md +0 -0

ingestr/main.py CHANGED Viewed

@@ -282,6 +282,13 @@ def ingest(
             envvar=["STAGING_BUCKET", "INGESTR_STAGING_BUCKET"],
         ),
     ] = None,  # type: ignore
+    mask: Annotated[
+        Optional[list[str]],
+        typer.Option(
+            help="Column masking configuration in format 'column:algorithm[:param]'. Can be specified multiple times.",
+            envvar=["MASK", "INGESTR_MASK"],
+        ),
+    ] = [],  # type: ignore
 ):
     import hashlib
     import tempfile
@@ -302,6 +309,7 @@ def ingest(
     from ingestr.src.filters import (
         cast_set_to_list,
         cast_spanner_types,
+        create_masking_filter,
         handle_mysql_empty_dates,
     )
     from ingestr.src.sources import MongoDbSource
@@ -562,6 +570,10 @@ def ingest(
         if factory.source_scheme.startswith("spanner"):
             resource.for_each(dlt_source, lambda x: x.add_map(cast_spanner_types))
+        if mask:
+            masking_filter = create_masking_filter(mask)
+            resource.for_each(dlt_source, lambda x: x.add_map(masking_filter))
         if yield_limit:
             resource.for_each(dlt_source, lambda x: x.add_limit(yield_limit))

ingestr/src/appstore/__init__.py CHANGED Viewed

@@ -38,6 +38,7 @@ def app_store(
             name=resource.name,
             primary_key=resource.primary_key,
             columns=resource.columns,
+            write_disposition="merge",
         )(client, app_ids, resource.report_name, start_date, end_date)

ingestr/src/buildinfo.py CHANGED Viewed

	@@ -1 +1 @@
1	- version = "v0.13.86"
1	+ version = "v0.13.88"

ingestr/src/chess/__init__.py CHANGED Viewed

@@ -75,7 +75,7 @@ def players_archives(players: List[str]) -> Iterator[List[TDataItem]]:
 @dlt.resource(
-    write_disposition="append", columns={"end_time": {"data_type": "timestamp"}}
+    write_disposition="replace", columns={"end_time": {"data_type": "timestamp"}}
 )
 def players_games(
     players: List[str], start_month: str = None, end_month: str = None

ingestr/src/filters.py CHANGED Viewed

@@ -51,3 +51,12 @@ def table_adapter_exclude_columns(cols: list[str]):
             table._columns.remove(col)  # type: ignore
     return excluder
+def create_masking_filter(mask_configs: list[str]):
+    from ingestr.src.masking import create_masking_mapper
+    if not mask_configs:
+        return lambda x: x
+    return create_masking_mapper(mask_configs)

ingestr/src/frankfurter/__init__.py CHANGED Viewed

@@ -14,14 +14,13 @@ from ingestr.src.frankfurter.helpers import get_path_with_retry
 )
 def frankfurter_source(
     start_date: TAnyDateTime,
-    end_date: TAnyDateTime|None,
+    end_date: TAnyDateTime | None,
     base_currency: str,
 ) -> Any:
     """
     A dlt source for the frankfurter.dev API. It groups several resources (in this case frankfurter.dev API endpoints) containing
     various types of data: currencies, latest rates, historical rates.
     """
     @dlt.resource(
         write_disposition="replace",
@@ -36,7 +35,6 @@ def frankfurter_source(
         for currency_code, currency_name in currencies_data.items():
             yield {"currency_code": currency_code, "currency_name": currency_name}
     @dlt.resource(
         write_disposition="merge",
         columns={
@@ -81,7 +79,6 @@ def frankfurter_source(
                 "base_currency": base_currency,
             }
     @dlt.resource(
         write_disposition="merge",
         columns={
@@ -93,13 +90,13 @@ def frankfurter_source(
         primary_key=("date", "currency_code", "base_currency"),
     )
     def exchange_rates(
-        date_time = dlt.sources.incremental(
-        "date",
-        initial_value=start_date,
-        end_value=end_date,
-        range_start="closed",
-        range_end="closed",
-    )
+        date_time=dlt.sources.incremental(
+            "date",
+            initial_value=start_date,
+            end_value=end_date,
+            range_start="closed",
+            range_end="closed",
+        ),
     ) -> Iterator[dict]:
         """
         Fetches exchange rates for a specified date range.
@@ -115,9 +112,9 @@ def frankfurter_source(
             end_date = date_time.end_value
         else:
             end_date = pendulum.now()
         # Ensure start_date.last_value is a pendulum.DateTime object
-        start_date_obj = ensure_pendulum_datetime(start_date) # type: ignore
+        start_date_obj = ensure_pendulum_datetime(start_date)  # type: ignore
         start_date_str = start_date_obj.format("YYYY-MM-DD")
         # Ensure end_date is a pendulum.DateTime object
@@ -158,4 +155,3 @@ def frankfurter_source(
                 }
     return currencies, latest, exchange_rates

ingestr/src/frankfurter/helpers.py CHANGED Viewed

@@ -16,9 +16,9 @@ def get_path_with_retry(path: str) -> StrAny:
     return get_url_with_retry(f"{FRANKFURTER_API_URL}{path}")
-def validate_dates(start_date: datetime, end_date: datetime|None) -> None:
+def validate_dates(start_date: datetime, end_date: datetime | None) -> None:
     current_date = pendulum.now()
     # Check if start_date is in the futurep
     if start_date > current_date:
         raise ValueError("Interval-start cannot be in the future.")

ingestr/src/klaviyo/__init__.py CHANGED Viewed

@@ -30,7 +30,7 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
     start_date_obj = ensure_pendulum_datetime(start_date)
     client = KlaviyoClient(api_key)
-    @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
+    @dlt.resource(write_disposition="merge", primary_key="id", parallelized=True)
     def events(
         datetime=dlt.sources.incremental(
             "datetime",
@@ -135,7 +135,7 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
     ) -> Iterable[TDataItem]:
         yield from client.fetch_catalog_item(create_client(), updated.start_value)
-    @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
+    @dlt.resource(write_disposition="merge", primary_key="id", parallelized=True)
     def forms(
         updated_at=dlt.sources.incremental(
             "updated_at",
@@ -162,7 +162,7 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
     ) -> Iterable[TDataItem]:
         yield from client.fetch_lists(create_client(), updated.start_value)
-    @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
+    @dlt.resource(write_disposition="merge", primary_key="id", parallelized=True)
     def images(
         updated_at=dlt.sources.incremental(
             "updated_at",
@@ -188,7 +188,7 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
     ) -> Iterable[TDataItem]:
         yield from client.fetch_segments(create_client(), updated.start_value)
-    @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
+    @dlt.resource(write_disposition="merge", primary_key="id", parallelized=True)
     def flows(
         updated=dlt.sources.incremental(
             "updated",
@@ -203,7 +203,7 @@ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResour
         for start, end in intervals:
             yield lambda s=start, e=end: client.fetch_flows(create_client(), s, e)
-    @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
+    @dlt.resource(write_disposition="merge", primary_key="id", parallelized=True)
     def templates(
         updated=dlt.sources.incremental(
             "updated",

ingestr/src/masking.py ADDED Viewed

@@ -0,0 +1,344 @@
+import hashlib
+import hmac
+import random
+import re
+import string
+import uuid
+from datetime import date, datetime, timedelta
+from typing import Any, Callable, Dict, Optional, Tuple, Union
+class MaskingEngine:
+    def __init__(self):
+        self.token_cache: Dict[str, Union[str, int]] = {}
+        self.sequential_counter = 0
+    def parse_mask_config(self, config: str) -> Tuple[str, str, Optional[str]]:
+        parts = config.split(":")
+        if len(parts) == 2:
+            return parts[0], parts[1], None
+        elif len(parts) == 3:
+            return parts[0], parts[1], parts[2]
+        else:
+            raise ValueError(
+                f"Invalid mask configuration: {config}. Expected format: 'column:algorithm[:param]'"
+            )
+    def get_masking_function(
+        self, algorithm: str, param: Optional[str] = None
+    ) -> Callable:
+        algorithm = algorithm.lower()
+        # Hash-based masking
+        if algorithm == "hash" or algorithm == "sha256":
+            return self._hash_sha256
+        elif algorithm == "md5":
+            return self._hash_md5
+        elif algorithm == "hmac":
+            return lambda x: self._hash_hmac(x, param or "default-key")
+        # Format-preserving masking
+        elif algorithm == "email":
+            return self._mask_email
+        elif algorithm == "phone":
+            return self._mask_phone
+        elif algorithm == "credit_card":
+            return self._mask_credit_card
+        elif algorithm == "ssn":
+            return self._mask_ssn
+        # Redaction strategies
+        elif algorithm == "redact":
+            return lambda x: "REDACTED"
+        elif algorithm == "stars":
+            return lambda x: "*" * len(str(x)) if x else ""
+        elif algorithm == "fixed":
+            return lambda x: param or "MASKED"
+        elif algorithm == "random":
+            return self._random_replace
+        # Partial masking
+        elif algorithm == "partial":
+            chars = int(param) if param else 2
+            return lambda x: self._partial_mask(x, chars)
+        elif algorithm == "first_letter":
+            return self._first_letter_mask
+        # Tokenization
+        elif algorithm == "uuid":
+            return self._tokenize_uuid
+        elif algorithm == "sequential":
+            return self._tokenize_sequential
+        # Numeric masking
+        elif algorithm == "round":
+            precision = int(param) if param else 10
+            return lambda x: self._round_number(x, precision)
+        elif algorithm == "range":
+            bucket_size = int(param) if param else 100
+            return lambda x: self._range_mask(x, bucket_size)
+        elif algorithm == "noise":
+            noise_level = float(param) if param else 0.1
+            return lambda x: self._add_noise(x, noise_level)
+        # Date masking
+        elif algorithm == "date_shift":
+            max_days = int(param) if param else 30
+            return lambda x: self._date_shift(x, max_days)
+        elif algorithm == "year_only":
+            return self._year_only
+        elif algorithm == "month_year":
+            return self._month_year
+        else:
+            raise ValueError(f"Unknown masking algorithm: {algorithm}")
+    # Hash functions
+    def _hash_sha256(self, value: Any) -> Optional[str]:
+        if value is None:
+            return None
+        return hashlib.sha256(str(value).encode()).hexdigest()
+    def _hash_md5(self, value: Any) -> Optional[str]:
+        if value is None:
+            return None
+        return hashlib.md5(str(value).encode()).hexdigest()
+    def _hash_hmac(self, value: Any, key: str) -> Optional[str]:
+        if value is None:
+            return None
+        return hmac.new(key.encode(), str(value).encode(), hashlib.sha256).hexdigest()
+    # Format-preserving masks
+    def _mask_email(self, value: Any) -> Any:
+        if value is None or not value:
+            return value
+        email_str = str(value)
+        if "@" not in email_str:
+            return self._partial_mask(email_str, 2)
+        local, domain = email_str.split("@", 1)
+        if len(local) <= 2:
+            masked_local = "*" * len(local)
+        else:
+            masked_local = local[0] + "*" * (len(local) - 2) + local[-1]
+        return f"{masked_local}@{domain}"
+    def _mask_phone(self, value: Any) -> Any:
+        if value is None or not value:
+            return value
+        phone_str = re.sub(r"\D", "", str(value))
+        if len(phone_str) < 10:
+            return "*" * len(phone_str)
+        # Keep country code and area code, mask the rest
+        if len(phone_str) >= 10:
+            return phone_str[:3] + "-***-****"
+        return phone_str
+    def _mask_credit_card(self, value: Any) -> Any:
+        if value is None or not value:
+            return value
+        cc_str = re.sub(r"\D", "", str(value))
+        if len(cc_str) < 12:
+            return "*" * len(cc_str)
+        return "*" * (len(cc_str) - 4) + cc_str[-4:]
+    def _mask_ssn(self, value: Any) -> Any:
+        if value is None or not value:
+            return value
+        ssn_str = re.sub(r"\D", "", str(value))
+        if len(ssn_str) != 9:
+            return "*" * len(ssn_str)
+        return "***-**-" + ssn_str[-4:]
+    # Partial masking
+    def _partial_mask(self, value: Any, chars_to_show: int) -> Any:
+        if value is None or not value:
+            return value
+        val_str = str(value)
+        if len(val_str) <= chars_to_show * 2:
+            return "*" * len(val_str)
+        return (
+            val_str[:chars_to_show]
+            + "*" * (len(val_str) - chars_to_show * 2)
+            + val_str[-chars_to_show:]
+        )
+    def _first_letter_mask(self, value: Any) -> Any:
+        if value is None or not value:
+            return value
+        val_str = str(value)
+        if len(val_str) <= 1:
+            return val_str
+        return val_str[0] + "*" * (len(val_str) - 1)
+    # Random replacement
+    def _random_replace(self, value: Any) -> Any:
+        if value is None:
+            return value
+        if isinstance(value, (int, float)):
+            # Generate random number in similar range
+            if isinstance(value, int):
+                magnitude = len(str(abs(value)))
+                return random.randint(10 ** (magnitude - 1), 10**magnitude - 1)
+            else:
+                return random.uniform(0, abs(value) * 2)
+        elif isinstance(value, str):
+            # Generate random string of same length
+            return "".join(
+                random.choices(string.ascii_letters + string.digits, k=len(value))
+            )
+        else:
+            return str(value)
+    # Tokenization
+    def _tokenize_uuid(self, value: Any) -> Optional[str]:
+        if value is None:
+            return None
+        val_str = str(value)
+        if val_str not in self.token_cache:
+            self.token_cache[val_str] = str(uuid.uuid4())
+        return str(self.token_cache[val_str])
+    def _tokenize_sequential(self, value: Any) -> Optional[int]:
+        if value is None:
+            return None
+        val_str = str(value)
+        if val_str not in self.token_cache:
+            self.sequential_counter += 1
+            self.token_cache[val_str] = self.sequential_counter
+        return int(self.token_cache[val_str])
+    # Numeric masking
+    def _round_number(self, value: Any, precision: int) -> Any:
+        if value is None:
+            return value
+        try:
+            num = float(value)
+            return round(num / precision) * precision
+        except (ValueError, TypeError):
+            return value
+    def _range_mask(self, value: Any, bucket_size: int) -> Any:
+        if value is None:
+            return value
+        try:
+            num = float(value)
+            lower = int(num // bucket_size) * bucket_size
+            upper = lower + bucket_size
+            return f"{lower}-{upper}"
+        except (ValueError, TypeError):
+            return value
+    def _add_noise(self, value: Any, noise_level: float) -> Any:
+        if value is None:
+            return value
+        try:
+            num = float(value)
+            noise = random.uniform(-noise_level, noise_level) * abs(num)
+            result = num + noise
+            if isinstance(value, int):
+                return int(result)
+            return result
+        except (ValueError, TypeError):
+            return value
+    # Date masking
+    def _date_shift(self, value: Any, max_days: int) -> Any:
+        if value is None:
+            return value
+        if isinstance(value, (date, datetime)):
+            shift_days = random.randint(-max_days, max_days)
+            return value + timedelta(days=shift_days)
+        # Try to parse string dates
+        try:
+            from dateutil import parser  # type: ignore
+            dt = parser.parse(str(value))
+            shift_days = random.randint(-max_days, max_days)
+            result = dt + timedelta(days=shift_days)
+            if isinstance(value, str):
+                return result.strftime("%Y-%m-%d")
+            return result
+        except Exception:
+            return value
+    def _year_only(self, value: Any) -> Any:
+        if value is None:
+            return value
+        if isinstance(value, (date, datetime)):
+            return value.year
+        # Try to parse string dates
+        try:
+            from dateutil import parser
+            dt = parser.parse(str(value))
+            return dt.year
+        except Exception:
+            return value
+    def _month_year(self, value: Any) -> Any:
+        if value is None:
+            return value
+        if isinstance(value, (date, datetime)):
+            return f"{value.year}-{value.month:02d}"
+        # Try to parse string dates
+        try:
+            from dateutil import parser
+            dt = parser.parse(str(value))
+            return f"{dt.year}-{dt.month:02d}"
+        except Exception:
+            return value
+def create_masking_mapper(mask_configs: list[str]) -> Callable:
+    engine = MaskingEngine()
+    # Parse all configurations
+    masks = {}
+    for config in mask_configs:
+        column, algorithm, param = engine.parse_mask_config(config)
+        masks[column] = engine.get_masking_function(algorithm, param)
+    def apply_masks(data: Any) -> Any:
+        # Handle PyArrow tables
+        try:
+            import pyarrow as pa  # type: ignore
+            if isinstance(data, pa.Table):
+                # Convert to pandas for easier manipulation
+                df = data.to_pandas()
+                # Apply masks to each column
+                for column, mask_func in masks.items():
+                    if column in df.columns:
+                        df[column] = df[column].apply(mask_func)
+                # Convert back to PyArrow table
+                return pa.Table.from_pandas(df)
+        except ImportError:
+            pass
+        # Handle dictionaries (original behavior)
+        if isinstance(data, dict):
+            for column, mask_func in masks.items():
+                if column in data:
+                    try:
+                        data[column] = mask_func(data[column])
+                    except Exception as e:
+                        print(f"Warning: Failed to mask column {column}: {e}")
+            return data
+        # Return as-is if not a supported type
+        return data
+    return apply_masks

ingestr/src/mongodb/__init__.py CHANGED Viewed

@@ -101,7 +101,7 @@ def mongodb_collection(
     write_disposition: Optional[str] = dlt.config.value,
     parallel: Optional[bool] = False,
     limit: Optional[int] = None,
-    chunk_size: Optional[int] = 10000,
+    chunk_size: Optional[int] = 1000,
     data_item_format: Optional[TDataItemFormat] = "object",
     filter_: Optional[Dict[str, Any]] = None,
     projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = dlt.config.value,

ingestr/src/mongodb/helpers.py CHANGED Viewed

@@ -518,21 +518,46 @@ class CollectionAggregationLoader(CollectionLoader):
         if limit and limit > 0:
             pipeline.append({"$limit": limit})
-        print("pipeline", pipeline)
-        # Execute aggregation
-        cursor = self.collection.aggregate(pipeline, allowDiskUse=True)
+        # Add maxTimeMS to prevent hanging
+        cursor = self.collection.aggregate(
+            pipeline,
+            allowDiskUse=True,
+            batchSize=min(self.chunk_size, 101),
+            maxTimeMS=30000,  # 30 second timeout
+        )
-        # Process results in chunks
-        while docs_slice := list(islice(cursor, self.chunk_size)):
-            res = map_nested_in_place(convert_mongo_objs, docs_slice)
-            print("res", res)
-            if len(res) > 0 and "_id" in res[0] and isinstance(res[0]["_id"], dict):
-                yield dlt.mark.with_hints(
-                    res,
-                    dlt.mark.make_hints(columns={"_id": {"data_type": "json"}}),
-                )
-            else:
-                yield res
+        docs_buffer = []
+        try:
+            for doc in cursor:
+                docs_buffer.append(doc)
+                if len(docs_buffer) >= self.chunk_size:
+                    res = map_nested_in_place(convert_mongo_objs, docs_buffer)
+                    if (
+                        len(res) > 0
+                        and "_id" in res[0]
+                        and isinstance(res[0]["_id"], dict)
+                    ):
+                        yield dlt.mark.with_hints(
+                            res,
+                            dlt.mark.make_hints(columns={"_id": {"data_type": "json"}}),
+                        )
+                    else:
+                        yield res
+                    docs_buffer = []
+            # Yield any remaining documents
+            if docs_buffer:
+                res = map_nested_in_place(convert_mongo_objs, docs_buffer)
+                if len(res) > 0 and "_id" in res[0] and isinstance(res[0]["_id"], dict):
+                    yield dlt.mark.with_hints(
+                        res,
+                        dlt.mark.make_hints(columns={"_id": {"data_type": "json"}}),
+                    )
+                else:
+                    yield res
+        finally:
+            cursor.close()
 class CollectionAggregationLoaderParallel(CollectionAggregationLoader):

ingestr/src/revenuecat/__init__.py CHANGED Viewed

@@ -8,6 +8,7 @@ from .helpers import (
     _make_request,
     _paginate,
     convert_timestamps_to_iso,
+    create_project_resource,
     process_customer_with_nested_resources_async,
 )
@@ -22,10 +23,10 @@ def revenuecat_source(
     Args:
         api_key: RevenueCat API v2 secret key with Bearer token format
-        project_id: RevenueCat project ID (required for customers, products, subscriptions, purchases)
+        project_id: RevenueCat project ID (required for customers, products, entitlements, offerings, subscriptions, purchases)
     Returns:
-        Iterable of DLT resources for customers, products, purchases, subscriptions, and projects
+        Iterable of DLT resources for customers, products, entitlements, offerings, purchases, subscriptions, and projects
     """
     @dlt.resource(name="projects", primary_key="id", write_disposition="merge")
@@ -85,19 +86,23 @@ def revenuecat_source(
         # Yield each processed customer
         yield from process_customers_sync()
-    @dlt.resource(name="products", primary_key="id", write_disposition="merge")
-    def products() -> Iterator[Dict[str, Any]]:
-        """Get list of products."""
-        if project_id is None:
-            raise ValueError("project_id is required for products resource")
-        endpoint = f"/projects/{project_id}/products"
+    # Create project-dependent resources dynamically
+    project_resources = []
+    resource_names = ["products", "entitlements", "offerings"]
+    for resource_name in resource_names:
+        @dlt.resource(name=resource_name, primary_key="id", write_disposition="merge")
+        def create_resource(resource_name=resource_name) -> Iterator[Dict[str, Any]]:
+            """Get list of project resource."""
+            yield from create_project_resource(resource_name, api_key, project_id)
-        for product in _paginate(api_key, endpoint):
-            product = convert_timestamps_to_iso(product, ["created_at", "updated_at"])
-            yield product
+        # Set the function name for better identification
+        create_resource.__name__ = resource_name
+        project_resources.append(create_resource)
     return [
         projects,
         customers,
-        products,
+        *project_resources,
     ]

ingestr/src/revenuecat/helpers.py CHANGED Viewed

@@ -260,3 +260,32 @@ async def process_customer_with_nested_resources_async(
     await asyncio.gather(*tasks)
     return customer
+def create_project_resource(
+    resource_name: str,
+    api_key: str,
+    project_id: str = None,
+    timestamp_fields: List[str] = None,
+) -> Iterator[Dict[str, Any]]:
+    """
+    Helper function to create DLT resources for project-dependent endpoints.
+    Args:
+        resource_name: Name of the resource (e.g., 'products', 'entitlements', 'offerings')
+        api_key: RevenueCat API key
+        project_id: RevenueCat project ID
+        timestamp_fields: List of timestamp fields to convert to ISO format
+    Returns:
+        Iterator of resource data
+    """
+    if project_id is None:
+        raise ValueError(f"project_id is required for {resource_name} resource")
+    endpoint = f"/projects/{project_id}/{resource_name}"
+    default_timestamp_fields = timestamp_fields or ["created_at", "updated_at"]
+    for item in _paginate(api_key, endpoint):
+        item = convert_timestamps_to_iso(item, default_timestamp_fields)
+        yield item

ingestr/src/shopify/__init__.py CHANGED Viewed

@@ -669,7 +669,7 @@ def shopify_source(
             params["updated_at_max"] = updated_at.end_value.isoformat()
         yield from client.get_pages("customers", params)
-    @dlt.resource(primary_key="id", write_disposition="append")
+    @dlt.resource(primary_key="id", write_disposition="merge")
     def events(
         created_at: dlt.sources.incremental[
             pendulum.DateTime

ingestr/src/sources.py CHANGED Viewed

@@ -3377,6 +3377,8 @@ class RevenueCatSource:
         if table not in [
             "customers",
             "products",
+            "entitlements",
+            "offerings",
             "subscriptions",
             "purchases",
             "projects",

{ingestr-0.13.86.dist-info → ingestr-0.13.88.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ingestr
-Version: 0.13.86
+Version: 0.13.88
 Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
 Project-URL: Homepage, https://github.com/bruin-data/ingestr
 Project-URL: Issues, https://github.com/bruin-data/ingestr/issues

{ingestr-0.13.86.dist-info → ingestr-0.13.88.dist-info}/RECORD RENAMED Viewed

@@ -1,17 +1,18 @@
 ingestr/conftest.py,sha256=OE2yxeTCosS9CUFVuqNypm-2ftYvVBeeq7egm3878cI,1981
-ingestr/main.py,sha256=qoWHNcHh0-xVnyQxbQ-SKuTxPb1RNV3ENkCpqO7CLrk,26694
+ingestr/main.py,sha256=qo0g3wCFl8a_1jUwXagX8L1Q8PKKQlTF7md9pfnzW0Y,27155
 ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
 ingestr/src/blob.py,sha256=UUWMjHUuoR9xP1XZQ6UANQmnMVyDx3d0X4-2FQC271I,2138
-ingestr/src/buildinfo.py,sha256=Sau1WKfATfGbfhYBf36HIMjBxy3Ri3NHPH1bcv0qOvU,21
+ingestr/src/buildinfo.py,sha256=HKIWe5l7QAN_f0qXt18bMVKJYb_guRTpX7gXDtwcRlc,21
 ingestr/src/destinations.py,sha256=M2Yni6wiWcrvZ8EPJemidqxN156l0rehgCc7xuil7mo,22840
 ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
 ingestr/src/factory.py,sha256=hC5E_XgrgTHMqwqPc6ihUYvRGTGMTzdPfQhrgPyD0tY,6945
-ingestr/src/filters.py,sha256=LLecXe9QkLFkFLUZ92OXNdcANr1a8edDxrflc2ko_KA,1452
+ingestr/src/filters.py,sha256=0n0sNAVG_f-B_1r7lW5iNtw9z_G1bxWzPaiL1i6tnbU,1665
 ingestr/src/http_client.py,sha256=bxqsk6nJNXCo-79gW04B53DQO-yr25vaSsqP0AKtjx4,732
 ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
+ingestr/src/masking.py,sha256=VN0LdfvExhQ1bZMRylGtaBUIoH-vjuIUmRnYKwo3yiY,11358
 ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
 ingestr/src/resource.py,sha256=ZqmZxFQVGlF8rFPhBiUB08HES0yoTj8sZ--jKfaaVps,1164
-ingestr/src/sources.py,sha256=CMXQRJlbHcGwKtrD-nt_ov-UlAn5UOQe08cdc7Wzel4,125068
+ingestr/src/sources.py,sha256=YtqbkrF_z5n6Ccmj6kiYgjGMPL08r_1vc9YOvNhXlcw,125121
 ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
 ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
 ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
@@ -22,7 +23,7 @@ ingestr/src/applovin/__init__.py,sha256=X_YCLppPrnL8KXfYWICE_uDfMzHHH3JZ-DBGZ1Rl
 ingestr/src/applovin_max/__init__.py,sha256=fxXqsIibJarp5NOGe08G964HftwLDymTtYS_LqPJht4,3315
 ingestr/src/appsflyer/__init__.py,sha256=QoK-B3cYYMD3bqzQaLWNH6FkJyjRbzRkBF2n6urxubs,8071
 ingestr/src/appsflyer/client.py,sha256=E6xPW4KlbBnQZ0K4eq2Xgb3AmGrtrzIX9bX8EnQr-D4,3615
-ingestr/src/appstore/__init__.py,sha256=3P4VZH2WJF477QjW19jMTwu6L8DXcLkYSdutnvp3AmM,4742
+ingestr/src/appstore/__init__.py,sha256=np8AkAIVZPnJt2pjHYgzEX9UhbxseMW9MKVnJ8qowUA,4781
 ingestr/src/appstore/client.py,sha256=qY9nBZPNIAveR-Dn-pW141Mr9xi9LMOz2HHfnfueHvE,3975
 ingestr/src/appstore/errors.py,sha256=KVpPWth5qlv6_QWEm3aJAt3cdf6miPJs0UDzxknx2Ms,481
 ingestr/src/appstore/models.py,sha256=tW1JSATHBIxZ6a77-RTCBQptJk6iRC8fWcmx4NW7SVA,1716
@@ -33,7 +34,7 @@ ingestr/src/asana_source/helpers.py,sha256=PukcdDQWIGqnGxuuobbLw4hUy4-t6gxXg_Xyw
 ingestr/src/asana_source/settings.py,sha256=-2tpdkwh04RvLKFvwQodnFLYn9MaxOO1hsebGnDQMTU,2829
 ingestr/src/attio/__init__.py,sha256=CLejJjp5vGkt6r18nfNNZ-Xjc1SZgQ5IlcBW5XFQR90,3243
 ingestr/src/attio/helpers.py,sha256=fCySmG5E6Iyh3Nm9a-HGbHNedxPH_2_otXYMTQsCibw,2185
-ingestr/src/chess/__init__.py,sha256=y0Q8aKBigeKf3N7wuB_gadMQjVJzBPUT8Jhp1ObEWjk,6812
+ingestr/src/chess/__init__.py,sha256=mvMLZdexSgDAHIk7Ps18sOrCVGCYKq35PrG2Etgj_P8,6813
 ingestr/src/chess/helpers.py,sha256=v1HTImOMjAF7AzZUPDIuHu00e7ut0o5y1kWcVYo4QZw,549
 ingestr/src/chess/settings.py,sha256=p0RlCGgtXUacPDEvZmwzSWmzX0Apj1riwfz-nrMK89k,158
 ingestr/src/clickup/__init__.py,sha256=uvfAqNturT4bMvU4NS3E8BdL6nvDFzNuh7bMlih8HJk,2547
@@ -51,8 +52,8 @@ ingestr/src/filesystem/helpers.py,sha256=bg0muSHZr3hMa8H4jN2-LGWzI-SUoKlQNiWJ74-
 ingestr/src/filesystem/readers.py,sha256=a0fKkaRpnAOGsXI3EBNYZa7x6tlmAOsgRzb883StY30,3987
 ingestr/src/fluxx/__init__.py,sha256=Ei8BE0KAEzpadJT9RO5-8zMA7LvnIPhNPDKF4EyBcLo,328980
 ingestr/src/fluxx/helpers.py,sha256=dCNgvMMTSEO4LNp6luNZ-XrV4NPW-_OUfmp0k3jFhuc,6602
-ingestr/src/frankfurter/__init__.py,sha256=z98RblQx1ab2GFowDq4l5xdnv-sLb41MPGitH-y2ahc,5242
-ingestr/src/frankfurter/helpers.py,sha256=tEtx9VU7IchRmtKRIEq_r8MclNVs8vL4E_RjGW2ZSh0,1504
+ingestr/src/frankfurter/__init__.py,sha256=aeyiv1jwcwblV5OeqG81vFcJo_Wc1bUlDwzdE4gnQiw,5246
+ingestr/src/frankfurter/helpers.py,sha256=SpRr992OcSf7IDI5y-ToUdO6m6sGpqFz59LTY0ojchI,1502
 ingestr/src/freshdesk/__init__.py,sha256=ukyorgCNsW_snzsYBDsr3Q0WB8f-to9Fk0enqHHFQlk,3087
 ingestr/src/freshdesk/freshdesk_client.py,sha256=1nFf0K4MQ0KZbWwk4xSbYHaykVqmPLfN39miOFDpWVc,4385
 ingestr/src/freshdesk/settings.py,sha256=0Wr_OMnUZcTlry7BmALssLxD2yh686JW4moLNv12Jnw,409
@@ -84,7 +85,7 @@ ingestr/src/kafka/__init__.py,sha256=QUHsGmdv5_E-3z0GDHXvbk39puwuGDBsyYSDhvbA89E
 ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
 ingestr/src/kinesis/__init__.py,sha256=YretSz4F28tbkcPhd55mBp2Xk7XE9unyWx0nmvl8iEc,6235
 ingestr/src/kinesis/helpers.py,sha256=SO2cFmWNGcykUYmjHdfxWsOQSkLQXyhFtfWnkcUOM0s,3152
-ingestr/src/klaviyo/__init__.py,sha256=o_noUgbxLk36s4f9W56_ibPorF0n7kVapPUlV0p-jfA,7875
+ingestr/src/klaviyo/__init__.py,sha256=Tg5EqAgsEK8xM5RO2im8vFMzPGc7yDpSCUkprGjMooI,7870
 ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
 ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYHk,496
 ingestr/src/linear/__init__.py,sha256=rufjwhLip7RK6j2DpFzCRQEvA_oOqgPEEdREJkc53_U,12295
@@ -94,8 +95,8 @@ ingestr/src/linkedin_ads/dimension_time_enum.py,sha256=EmHRdkFyTAfo4chGjThrwqffW
 ingestr/src/linkedin_ads/helpers.py,sha256=eUWudRVlXl4kqIhfXQ1eVsUpZwJn7UFqKSpnbLfxzds,4498
 ingestr/src/mixpanel/__init__.py,sha256=s1QtqMP0BTGW6YtdCabJFWj7lEn7KujzELwGpBOQgfs,1796
 ingestr/src/mixpanel/client.py,sha256=c_reouegOVYBOwHLfgYFwpmkba0Sxro1Zkml07NCYf0,3602
-ingestr/src/mongodb/__init__.py,sha256=5KNdR2mxJoHSOU1pt-FIJNg9HT4aHPwl6mI31xPBQLA,7487
-ingestr/src/mongodb/helpers.py,sha256=VMGKkSN6FIQ4l-4TUqoc-Ou7r52_zPXuLF33ZN23B_I,30881
+ingestr/src/mongodb/__init__.py,sha256=wu3KJ3VH5FF67gctJqm4T3ZTdBOQam1u6xuFBohq7bs,7486
+ingestr/src/mongodb/helpers.py,sha256=TmEbQ-Rz5ajxmaMgZa7nrI13-L7Z_ClbFCFPnmPIrgE,31739
 ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
 ingestr/src/notion/settings.py,sha256=MwQVZViJtnvOegfjXYc_pJ50oUYgSRPgwqu7TvpeMOA,82
 ingestr/src/notion/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -113,11 +114,11 @@ ingestr/src/pipedrive/helpers/__init__.py,sha256=UX1K_qnGXB0ShtnBOfp2XuVbK8RRoCK
 ingestr/src/pipedrive/helpers/custom_fields_munger.py,sha256=rZ4AjdITHfJE2NNomCR7vMBS1KnWpEGVF6fADwsIHUE,4488
 ingestr/src/pipedrive/helpers/pages.py,sha256=Klpjw2OnMuhzit3PpiHKsfzGcJ3rQPSQBl3HhE3-6eA,3358
 ingestr/src/quickbooks/__init__.py,sha256=cZUuVCOTGPHTscRj6i0DytO63_fWF-4ieMxoU4PcyTg,3727
-ingestr/src/revenuecat/__init__.py,sha256=HrI4Ht8PWTHiBYphAO26tK-2S-z1FuSIq97wu7erPIw,3785
-ingestr/src/revenuecat/helpers.py,sha256=ntdorpAdPoPBcga1fifFeAl07rKZ-CnF5u5QiFdHbW8,8664
+ingestr/src/revenuecat/__init__.py,sha256=5HbyZuEOekkbeeT72sM_bnGygSyYdmd_vczfAUz7xoM,4029
+ingestr/src/revenuecat/helpers.py,sha256=CYU6l79kplnfL87GfdxyGeEBrBSWEZfGP0GyjPHuVDk,9619
 ingestr/src/salesforce/__init__.py,sha256=2hik5pRrxVODdDTlUEMoyccNC07zozjnxkMHcjMT1qA,4558
 ingestr/src/salesforce/helpers.py,sha256=QTdazBt-qRTBbCQMZnyclIaDQFmBixBy_RDKD00Lt-8,2492
-ingestr/src/shopify/__init__.py,sha256=dp6Ybk5LIKA5suzVt923v5LzHz5rMUuDfhjTNPqSjAc,62603
+ingestr/src/shopify/__init__.py,sha256=RzSSG93g-Qlkz6TAxi1XasFDdxxtVXIo53ZTtjGczW4,62602
 ingestr/src/shopify/exceptions.py,sha256=BhV3lIVWeBt8Eh4CWGW_REFJpGCzvW6-62yZrBWa3nQ,50
 ingestr/src/shopify/helpers.py,sha256=NfHD6lWXe88ybR0ri-FCQuh2Vf8l5WG0a0FVjmdoSC4,6296
 ingestr/src/shopify/settings.py,sha256=StY0EPr7wFJ7KzRRDN4TKxV0_gkIS1wPj2eR4AYSsDk,141
@@ -157,8 +158,8 @@ ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ
 ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
 ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
 ingestr/tests/unit/test_smartsheets.py,sha256=eiC2CCO4iNJcuN36ONvqmEDryCA1bA1REpayHpu42lk,5058
-ingestr-0.13.86.dist-info/METADATA,sha256=EYqj1B1PK2F2EGHKmzuoxvQRSdXZThmlL0UutcFxzeo,15182
-ingestr-0.13.86.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-ingestr-0.13.86.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
-ingestr-0.13.86.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
-ingestr-0.13.86.dist-info/RECORD,,
+ingestr-0.13.88.dist-info/METADATA,sha256=IypTsrgDspKt59K01ip36dHQYNCqAkj4ROGhuoj1kGk,15182
+ingestr-0.13.88.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+ingestr-0.13.88.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
+ingestr-0.13.88.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
+ingestr-0.13.88.dist-info/RECORD,,

{ingestr-0.13.86.dist-info → ingestr-0.13.88.dist-info}/WHEEL RENAMED Viewed

File without changes

{ingestr-0.13.86.dist-info → ingestr-0.13.88.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ingestr-0.13.86.dist-info → ingestr-0.13.88.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

ingestr 0.13.86__py3-none-any.whl → 0.13.88__py3-none-any.whl

Potentially problematic release.

ingestr 0.13.86py3-none-any.whl → 0.13.88py3-none-any.whl