PyPI - ingestr - Versions diffs - 0.13.87__py3-none-any.whl → 0.13.89__py3-none-any.whl - Mend

ingestr 0.13.87py3-none-any.whl → 0.13.89py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ingestr might be problematic. Click here for more details.

Files changed (20) hide show

ingestr/main.py +12 -0
ingestr/src/applovin/__init__.py +1 -1
ingestr/src/asana_source/__init__.py +1 -1
ingestr/src/buildinfo.py +1 -1
ingestr/src/destinations.py +37 -2
ingestr/src/filesystem/__init__.py +8 -3
ingestr/src/filters.py +9 -0
ingestr/src/frankfurter/__init__.py +10 -14
ingestr/src/frankfurter/helpers.py +2 -2
ingestr/src/masking.py +344 -0
ingestr/src/mongodb/helpers.py +11 -7
ingestr/src/revenuecat/__init__.py +4 -4
ingestr/src/revenuecat/helpers.py +4 -4
ingestr/src/salesforce/__init__.py +9 -8
ingestr/src/sources.py +1 -0
{ingestr-0.13.87.dist-info → ingestr-0.13.89.dist-info}/METADATA +2 -2
{ingestr-0.13.87.dist-info → ingestr-0.13.89.dist-info}/RECORD +20 -19
{ingestr-0.13.87.dist-info → ingestr-0.13.89.dist-info}/WHEEL +0 -0
{ingestr-0.13.87.dist-info → ingestr-0.13.89.dist-info}/entry_points.txt +0 -0
{ingestr-0.13.87.dist-info → ingestr-0.13.89.dist-info}/licenses/LICENSE.md +0 -0

ingestr/main.py CHANGED Viewed

@@ -282,6 +282,13 @@ def ingest(
             envvar=["STAGING_BUCKET", "INGESTR_STAGING_BUCKET"],
         ),
     ] = None,  # type: ignore
+    mask: Annotated[
+        Optional[list[str]],
+        typer.Option(
+            help="Column masking configuration in format 'column:algorithm[:param]'. Can be specified multiple times.",
+            envvar=["MASK", "INGESTR_MASK"],
+        ),
+    ] = [],  # type: ignore
 ):
     import hashlib
     import tempfile
@@ -302,6 +309,7 @@ def ingest(
     from ingestr.src.filters import (
         cast_set_to_list,
         cast_spanner_types,
+        create_masking_filter,
         handle_mysql_empty_dates,
     )
     from ingestr.src.sources import MongoDbSource
@@ -562,6 +570,10 @@ def ingest(
         if factory.source_scheme.startswith("spanner"):
             resource.for_each(dlt_source, lambda x: x.add_map(cast_spanner_types))
+        if mask:
+            masking_filter = create_masking_filter(mask)
+            resource.for_each(dlt_source, lambda x: x.add_map(masking_filter))
         if yield_limit:
             resource.for_each(dlt_source, lambda x: x.add_limit(yield_limit))

ingestr/src/applovin/__init__.py CHANGED Viewed

@@ -224,7 +224,7 @@ def resource(
 def custom_report_from_spec(spec: str) -> EndpointResource:
     parts = spec.split(":")
     if len(parts) != 4:
-        raise InvalidCustomReportError()
+        raise InvalidCustomReportError()
     _, endpoint, report, dims = parts
     report_type = ReportType(report.strip())

ingestr/src/asana_source/__init__.py CHANGED Viewed

@@ -182,7 +182,7 @@ def tasks(
 @dlt.transformer(
     data_from=tasks,
-    write_disposition="append",
+    write_disposition="replace",
 )
 @dlt.defer
 def stories(

ingestr/src/buildinfo.py CHANGED Viewed

	@@ -1 +1 @@
1	- version = "v0.13.87"
1	+ version = "v0.13.89"

ingestr/src/destinations.py CHANGED Viewed

@@ -25,7 +25,20 @@ from ingestr.src.loader import load_dlt_file
 class GenericSqlDestination:
     def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
-        table_fields = table.split(".")
+        if uri.startswith("databricks://"):
+            p = urlparse(uri)
+            q = parse_qs(p.query)
+            schema = q.get("schema", [None])[0]
+            if not schema:
+                raise ValueError("Databricks requires schema in the URI.")
+            res = {
+                "dataset_name": schema,
+                "table_name": table,
+            }
+            return res
+        table_fields = table.split(".")
         if len(table_fields) != 2:
             raise ValueError("Table name must be in the format <schema>.<table>")
@@ -270,8 +283,30 @@ class MsSQLDestination(GenericSqlDestination):
 class DatabricksDestination(GenericSqlDestination):
     def dlt_dest(self, uri: str, **kwargs):
-        return dlt.destinations.databricks(credentials=uri, **kwargs)
+        p = urlparse(uri)
+        q = parse_qs(p.query)
+        access_token = p.password
+        server_hostname = p.hostname
+        http_path = q.get("http_path", [None])[0]
+        catalog = q.get("catalog", [None])[0]
+        schema = q.get("schema", [None])[0]
+        creds = {
+            "access_token": access_token,
+            "server_hostname": server_hostname,
+            "http_path": http_path,
+            "catalog": catalog,
+            "schema": schema,
+        }
+        return dlt.destinations.databricks(
+            credentials=creds,
+            **kwargs,
+        )
 class SynapseDestination(GenericSqlDestination):
     def dlt_dest(self, uri: str, **kwargs):

ingestr/src/filesystem/__init__.py CHANGED Viewed

@@ -37,9 +37,14 @@ def readers(
         file_glob (str, optional): The filter to apply to the files in glob format. by default lists all files in bucket_url non-recursively
     """
     filesystem_resource = filesystem(bucket_url, credentials, file_glob=file_glob)
-    filesystem_resource.apply_hints(
-        incremental=dlt.sources.incremental("modification_date"),
-    )
+    # NOTE: incremental support is disabled until we can figure out
+    #       how to support incremental loads per matching file, rather
+    #       than a blanket threshold.
+    #
+    # filesystem_resource.apply_hints(
+    #     incremental=dlt.sources.incremental("modification_date"),
+    # )
     return (
         filesystem_resource | dlt.transformer(name="read_csv")(_read_csv),
         filesystem_resource | dlt.transformer(name="read_jsonl")(_read_jsonl),

ingestr/src/filters.py CHANGED Viewed

@@ -51,3 +51,12 @@ def table_adapter_exclude_columns(cols: list[str]):
             table._columns.remove(col)  # type: ignore
     return excluder
+def create_masking_filter(mask_configs: list[str]):
+    from ingestr.src.masking import create_masking_mapper
+    if not mask_configs:
+        return lambda x: x
+    return create_masking_mapper(mask_configs)

ingestr/src/frankfurter/__init__.py CHANGED Viewed

@@ -14,14 +14,13 @@ from ingestr.src.frankfurter.helpers import get_path_with_retry
 )
 def frankfurter_source(
     start_date: TAnyDateTime,
-    end_date: TAnyDateTime|None,
+    end_date: TAnyDateTime | None,
     base_currency: str,
 ) -> Any:
     """
     A dlt source for the frankfurter.dev API. It groups several resources (in this case frankfurter.dev API endpoints) containing
     various types of data: currencies, latest rates, historical rates.
     """
     @dlt.resource(
         write_disposition="replace",
@@ -36,7 +35,6 @@ def frankfurter_source(
         for currency_code, currency_name in currencies_data.items():
             yield {"currency_code": currency_code, "currency_name": currency_name}
     @dlt.resource(
         write_disposition="merge",
         columns={
@@ -81,7 +79,6 @@ def frankfurter_source(
                 "base_currency": base_currency,
             }
     @dlt.resource(
         write_disposition="merge",
         columns={
@@ -93,13 +90,13 @@ def frankfurter_source(
         primary_key=("date", "currency_code", "base_currency"),
     )
     def exchange_rates(
-        date_time = dlt.sources.incremental(
-        "date",
-        initial_value=start_date,
-        end_value=end_date,
-        range_start="closed",
-        range_end="closed",
-    )
+        date_time=dlt.sources.incremental(
+            "date",
+            initial_value=start_date,
+            end_value=end_date,
+            range_start="closed",
+            range_end="closed",
+        ),
     ) -> Iterator[dict]:
         """
         Fetches exchange rates for a specified date range.
@@ -115,9 +112,9 @@ def frankfurter_source(
             end_date = date_time.end_value
         else:
             end_date = pendulum.now()
         # Ensure start_date.last_value is a pendulum.DateTime object
-        start_date_obj = ensure_pendulum_datetime(start_date) # type: ignore
+        start_date_obj = ensure_pendulum_datetime(start_date)  # type: ignore
         start_date_str = start_date_obj.format("YYYY-MM-DD")
         # Ensure end_date is a pendulum.DateTime object
@@ -158,4 +155,3 @@ def frankfurter_source(
                 }
     return currencies, latest, exchange_rates

ingestr/src/frankfurter/helpers.py CHANGED Viewed

@@ -16,9 +16,9 @@ def get_path_with_retry(path: str) -> StrAny:
     return get_url_with_retry(f"{FRANKFURTER_API_URL}{path}")
-def validate_dates(start_date: datetime, end_date: datetime|None) -> None:
+def validate_dates(start_date: datetime, end_date: datetime | None) -> None:
     current_date = pendulum.now()
     # Check if start_date is in the futurep
     if start_date > current_date:
         raise ValueError("Interval-start cannot be in the future.")

ingestr/src/masking.py ADDED Viewed

@@ -0,0 +1,344 @@
+import hashlib
+import hmac
+import random
+import re
+import string
+import uuid
+from datetime import date, datetime, timedelta
+from typing import Any, Callable, Dict, Optional, Tuple, Union
+class MaskingEngine:
+    def __init__(self):
+        self.token_cache: Dict[str, Union[str, int]] = {}
+        self.sequential_counter = 0
+    def parse_mask_config(self, config: str) -> Tuple[str, str, Optional[str]]:
+        parts = config.split(":")
+        if len(parts) == 2:
+            return parts[0], parts[1], None
+        elif len(parts) == 3:
+            return parts[0], parts[1], parts[2]
+        else:
+            raise ValueError(
+                f"Invalid mask configuration: {config}. Expected format: 'column:algorithm[:param]'"
+            )
+    def get_masking_function(
+        self, algorithm: str, param: Optional[str] = None
+    ) -> Callable:
+        algorithm = algorithm.lower()
+        # Hash-based masking
+        if algorithm == "hash" or algorithm == "sha256":
+            return self._hash_sha256
+        elif algorithm == "md5":
+            return self._hash_md5
+        elif algorithm == "hmac":
+            return lambda x: self._hash_hmac(x, param or "default-key")
+        # Format-preserving masking
+        elif algorithm == "email":
+            return self._mask_email
+        elif algorithm == "phone":
+            return self._mask_phone
+        elif algorithm == "credit_card":
+            return self._mask_credit_card
+        elif algorithm == "ssn":
+            return self._mask_ssn
+        # Redaction strategies
+        elif algorithm == "redact":
+            return lambda x: "REDACTED"
+        elif algorithm == "stars":
+            return lambda x: "*" * len(str(x)) if x else ""
+        elif algorithm == "fixed":
+            return lambda x: param or "MASKED"
+        elif algorithm == "random":
+            return self._random_replace
+        # Partial masking
+        elif algorithm == "partial":
+            chars = int(param) if param else 2
+            return lambda x: self._partial_mask(x, chars)
+        elif algorithm == "first_letter":
+            return self._first_letter_mask
+        # Tokenization
+        elif algorithm == "uuid":
+            return self._tokenize_uuid
+        elif algorithm == "sequential":
+            return self._tokenize_sequential
+        # Numeric masking
+        elif algorithm == "round":
+            precision = int(param) if param else 10
+            return lambda x: self._round_number(x, precision)
+        elif algorithm == "range":
+            bucket_size = int(param) if param else 100
+            return lambda x: self._range_mask(x, bucket_size)
+        elif algorithm == "noise":
+            noise_level = float(param) if param else 0.1
+            return lambda x: self._add_noise(x, noise_level)
+        # Date masking
+        elif algorithm == "date_shift":
+            max_days = int(param) if param else 30
+            return lambda x: self._date_shift(x, max_days)
+        elif algorithm == "year_only":
+            return self._year_only
+        elif algorithm == "month_year":
+            return self._month_year
+        else:
+            raise ValueError(f"Unknown masking algorithm: {algorithm}")
+    # Hash functions
+    def _hash_sha256(self, value: Any) -> Optional[str]:
+        if value is None:
+            return None
+        return hashlib.sha256(str(value).encode()).hexdigest()
+    def _hash_md5(self, value: Any) -> Optional[str]:
+        if value is None:
+            return None
+        return hashlib.md5(str(value).encode()).hexdigest()
+    def _hash_hmac(self, value: Any, key: str) -> Optional[str]:
+        if value is None:
+            return None
+        return hmac.new(key.encode(), str(value).encode(), hashlib.sha256).hexdigest()
+    # Format-preserving masks
+    def _mask_email(self, value: Any) -> Any:
+        if value is None or not value:
+            return value
+        email_str = str(value)
+        if "@" not in email_str:
+            return self._partial_mask(email_str, 2)
+        local, domain = email_str.split("@", 1)
+        if len(local) <= 2:
+            masked_local = "*" * len(local)
+        else:
+            masked_local = local[0] + "*" * (len(local) - 2) + local[-1]
+        return f"{masked_local}@{domain}"
+    def _mask_phone(self, value: Any) -> Any:
+        if value is None or not value:
+            return value
+        phone_str = re.sub(r"\D", "", str(value))
+        if len(phone_str) < 10:
+            return "*" * len(phone_str)
+        # Keep country code and area code, mask the rest
+        if len(phone_str) >= 10:
+            return phone_str[:3] + "-***-****"
+        return phone_str
+    def _mask_credit_card(self, value: Any) -> Any:
+        if value is None or not value:
+            return value
+        cc_str = re.sub(r"\D", "", str(value))
+        if len(cc_str) < 12:
+            return "*" * len(cc_str)
+        return "*" * (len(cc_str) - 4) + cc_str[-4:]
+    def _mask_ssn(self, value: Any) -> Any:
+        if value is None or not value:
+            return value
+        ssn_str = re.sub(r"\D", "", str(value))
+        if len(ssn_str) != 9:
+            return "*" * len(ssn_str)
+        return "***-**-" + ssn_str[-4:]
+    # Partial masking
+    def _partial_mask(self, value: Any, chars_to_show: int) -> Any:
+        if value is None or not value:
+            return value
+        val_str = str(value)
+        if len(val_str) <= chars_to_show * 2:
+            return "*" * len(val_str)
+        return (
+            val_str[:chars_to_show]
+            + "*" * (len(val_str) - chars_to_show * 2)
+            + val_str[-chars_to_show:]
+        )
+    def _first_letter_mask(self, value: Any) -> Any:
+        if value is None or not value:
+            return value
+        val_str = str(value)
+        if len(val_str) <= 1:
+            return val_str
+        return val_str[0] + "*" * (len(val_str) - 1)
+    # Random replacement
+    def _random_replace(self, value: Any) -> Any:
+        if value is None:
+            return value
+        if isinstance(value, (int, float)):
+            # Generate random number in similar range
+            if isinstance(value, int):
+                magnitude = len(str(abs(value)))
+                return random.randint(10 ** (magnitude - 1), 10**magnitude - 1)
+            else:
+                return random.uniform(0, abs(value) * 2)
+        elif isinstance(value, str):
+            # Generate random string of same length
+            return "".join(
+                random.choices(string.ascii_letters + string.digits, k=len(value))
+            )
+        else:
+            return str(value)
+    # Tokenization
+    def _tokenize_uuid(self, value: Any) -> Optional[str]:
+        if value is None:
+            return None
+        val_str = str(value)
+        if val_str not in self.token_cache:
+            self.token_cache[val_str] = str(uuid.uuid4())
+        return str(self.token_cache[val_str])
+    def _tokenize_sequential(self, value: Any) -> Optional[int]:
+        if value is None:
+            return None
+        val_str = str(value)
+        if val_str not in self.token_cache:
+            self.sequential_counter += 1
+            self.token_cache[val_str] = self.sequential_counter
+        return int(self.token_cache[val_str])
+    # Numeric masking
+    def _round_number(self, value: Any, precision: int) -> Any:
+        if value is None:
+            return value
+        try:
+            num = float(value)
+            return round(num / precision) * precision
+        except (ValueError, TypeError):
+            return value
+    def _range_mask(self, value: Any, bucket_size: int) -> Any:
+        if value is None:
+            return value
+        try:
+            num = float(value)
+            lower = int(num // bucket_size) * bucket_size
+            upper = lower + bucket_size
+            return f"{lower}-{upper}"
+        except (ValueError, TypeError):
+            return value
+    def _add_noise(self, value: Any, noise_level: float) -> Any:
+        if value is None:
+            return value
+        try:
+            num = float(value)
+            noise = random.uniform(-noise_level, noise_level) * abs(num)
+            result = num + noise
+            if isinstance(value, int):
+                return int(result)
+            return result
+        except (ValueError, TypeError):
+            return value
+    # Date masking
+    def _date_shift(self, value: Any, max_days: int) -> Any:
+        if value is None:
+            return value
+        if isinstance(value, (date, datetime)):
+            shift_days = random.randint(-max_days, max_days)
+            return value + timedelta(days=shift_days)
+        # Try to parse string dates
+        try:
+            from dateutil import parser  # type: ignore
+            dt = parser.parse(str(value))
+            shift_days = random.randint(-max_days, max_days)
+            result = dt + timedelta(days=shift_days)
+            if isinstance(value, str):
+                return result.strftime("%Y-%m-%d")
+            return result
+        except Exception:
+            return value
+    def _year_only(self, value: Any) -> Any:
+        if value is None:
+            return value
+        if isinstance(value, (date, datetime)):
+            return value.year
+        # Try to parse string dates
+        try:
+            from dateutil import parser
+            dt = parser.parse(str(value))
+            return dt.year
+        except Exception:
+            return value
+    def _month_year(self, value: Any) -> Any:
+        if value is None:
+            return value
+        if isinstance(value, (date, datetime)):
+            return f"{value.year}-{value.month:02d}"
+        # Try to parse string dates
+        try:
+            from dateutil import parser
+            dt = parser.parse(str(value))
+            return f"{dt.year}-{dt.month:02d}"
+        except Exception:
+            return value
+def create_masking_mapper(mask_configs: list[str]) -> Callable:
+    engine = MaskingEngine()
+    # Parse all configurations
+    masks = {}
+    for config in mask_configs:
+        column, algorithm, param = engine.parse_mask_config(config)
+        masks[column] = engine.get_masking_function(algorithm, param)
+    def apply_masks(data: Any) -> Any:
+        # Handle PyArrow tables
+        try:
+            import pyarrow as pa  # type: ignore
+            if isinstance(data, pa.Table):
+                # Convert to pandas for easier manipulation
+                df = data.to_pandas()
+                # Apply masks to each column
+                for column, mask_func in masks.items():
+                    if column in df.columns:
+                        df[column] = df[column].apply(mask_func)
+                # Convert back to PyArrow table
+                return pa.Table.from_pandas(df)
+        except ImportError:
+            pass
+        # Handle dictionaries (original behavior)
+        if isinstance(data, dict):
+            for column, mask_func in masks.items():
+                if column in data:
+                    try:
+                        data[column] = mask_func(data[column])
+                    except Exception as e:
+                        print(f"Warning: Failed to mask column {column}: {e}")
+            return data
+        # Return as-is if not a supported type
+        return data
+    return apply_masks

ingestr/src/mongodb/helpers.py CHANGED Viewed

@@ -520,20 +520,24 @@ class CollectionAggregationLoader(CollectionLoader):
         # Add maxTimeMS to prevent hanging
         cursor = self.collection.aggregate(
-            pipeline,
-            allowDiskUse=True,
+            pipeline,
+            allowDiskUse=True,
             batchSize=min(self.chunk_size, 101),
-            maxTimeMS=30000  # 30 second timeout
+            maxTimeMS=30000,  # 30 second timeout
         )
         docs_buffer = []
         try:
             for doc in cursor:
                 docs_buffer.append(doc)
                 if len(docs_buffer) >= self.chunk_size:
                     res = map_nested_in_place(convert_mongo_objs, docs_buffer)
-                    if len(res) > 0 and "_id" in res[0] and isinstance(res[0]["_id"], dict):
+                    if (
+                        len(res) > 0
+                        and "_id" in res[0]
+                        and isinstance(res[0]["_id"], dict)
+                    ):
                         yield dlt.mark.with_hints(
                             res,
                             dlt.mark.make_hints(columns={"_id": {"data_type": "json"}}),
@@ -541,7 +545,7 @@ class CollectionAggregationLoader(CollectionLoader):
                     else:
                         yield res
                     docs_buffer = []
             # Yield any remaining documents
             if docs_buffer:
                 res = map_nested_in_place(convert_mongo_objs, docs_buffer)

ingestr/src/revenuecat/__init__.py CHANGED Viewed

@@ -8,12 +8,11 @@ from .helpers import (
     _make_request,
     _paginate,
     convert_timestamps_to_iso,
-    process_customer_with_nested_resources_async,
     create_project_resource,
+    process_customer_with_nested_resources_async,
 )
 @dlt.source(name="revenuecat", max_table_nesting=0)
 def revenuecat_source(
     api_key: str,
@@ -90,13 +89,14 @@ def revenuecat_source(
     # Create project-dependent resources dynamically
     project_resources = []
     resource_names = ["products", "entitlements", "offerings"]
     for resource_name in resource_names:
         @dlt.resource(name=resource_name, primary_key="id", write_disposition="merge")
         def create_resource(resource_name=resource_name) -> Iterator[Dict[str, Any]]:
             """Get list of project resource."""
             yield from create_project_resource(resource_name, api_key, project_id)
         # Set the function name for better identification
         create_resource.__name__ = resource_name
         project_resources.append(create_resource)

ingestr/src/revenuecat/helpers.py CHANGED Viewed

@@ -270,22 +270,22 @@ def create_project_resource(
 ) -> Iterator[Dict[str, Any]]:
     """
     Helper function to create DLT resources for project-dependent endpoints.
     Args:
         resource_name: Name of the resource (e.g., 'products', 'entitlements', 'offerings')
         api_key: RevenueCat API key
         project_id: RevenueCat project ID
         timestamp_fields: List of timestamp fields to convert to ISO format
     Returns:
         Iterator of resource data
     """
     if project_id is None:
         raise ValueError(f"project_id is required for {resource_name} resource")
     endpoint = f"/projects/{project_id}/{resource_name}"
     default_timestamp_fields = timestamp_fields or ["created_at", "updated_at"]
     for item in _paginate(api_key, endpoint):
         item = convert_timestamps_to_iso(item, default_timestamp_fields)
         yield item

ingestr/src/salesforce/__init__.py CHANGED Viewed

@@ -13,6 +13,7 @@ def salesforce_source(
     username: str,
     password: str,
     token: str,
+    domain: str,
 ) -> Iterable[DltResource]:
     """
     Retrieves data from Salesforce using the Salesforce API.
@@ -26,7 +27,7 @@ def salesforce_source(
         DltResource: Data resources from Salesforce.
     """
-    client = Salesforce(username, password, token)
+    client = Salesforce(username, password, token, domain=domain)
     # define resources
     @dlt.resource(write_disposition="replace")
@@ -37,7 +38,7 @@ def salesforce_source(
     def user_role() -> Iterable[TDataItem]:
         yield get_records(client, "UserRole")
-    @dlt.resource(write_disposition="merge")
+    @dlt.resource(write_disposition="merge", primary_key="id")
     def opportunity(
         last_timestamp: incremental[str] = dlt.sources.incremental(
             "SystemModstamp", initial_value=None
@@ -47,7 +48,7 @@ def salesforce_source(
             client, "Opportunity", last_timestamp.last_value, "SystemModstamp"
         )
-    @dlt.resource(write_disposition="merge")
+    @dlt.resource(write_disposition="merge", primary_key="id")
     def opportunity_line_item(
         last_timestamp: incremental[str] = dlt.sources.incremental(
             "SystemModstamp", initial_value=None
@@ -57,7 +58,7 @@ def salesforce_source(
             client, "OpportunityLineItem", last_timestamp.last_value, "SystemModstamp"
         )
-    @dlt.resource(write_disposition="merge")
+    @dlt.resource(write_disposition="merge", primary_key="id")
     def opportunity_contact_role(
         last_timestamp: incremental[str] = dlt.sources.incremental(
             "SystemModstamp", initial_value=None
@@ -70,7 +71,7 @@ def salesforce_source(
             "SystemModstamp",
         )
-    @dlt.resource(write_disposition="merge")
+    @dlt.resource(write_disposition="merge", primary_key="id")
     def account(
         last_timestamp: incremental[str] = dlt.sources.incremental(
             "LastModifiedDate", initial_value=None
@@ -92,7 +93,7 @@ def salesforce_source(
     def campaign() -> Iterable[TDataItem]:
         yield get_records(client, "Campaign")
-    @dlt.resource(write_disposition="merge")
+    @dlt.resource(write_disposition="merge", primary_key="id")
     def campaign_member(
         last_timestamp: incremental[str] = dlt.sources.incremental(
             "SystemModstamp", initial_value=None
@@ -114,7 +115,7 @@ def salesforce_source(
     def pricebook_entry() -> Iterable[TDataItem]:
         yield get_records(client, "PricebookEntry")
-    @dlt.resource(write_disposition="merge")
+    @dlt.resource(write_disposition="merge", primary_key="id")
     def task(
         last_timestamp: incremental[str] = dlt.sources.incremental(
             "SystemModstamp", initial_value=None
@@ -122,7 +123,7 @@ def salesforce_source(
     ) -> Iterable[TDataItem]:
         yield get_records(client, "Task", last_timestamp.last_value, "SystemModstamp")
-    @dlt.resource(write_disposition="merge")
+    @dlt.resource(write_disposition="merge", primary_key="id")
     def event(
         last_timestamp: incremental[str] = dlt.sources.incremental(
             "SystemModstamp", initial_value=None

ingestr/src/sources.py CHANGED Viewed

@@ -2515,6 +2515,7 @@ class SalesforceSource:
             "username": params.get("username", [None])[0],
             "password": params.get("password", [None])[0],
             "token": params.get("token", [None])[0],
+            "domain": params.get("domain", [None])[0],
         }
         for k, v in creds.items():
             if v is None:

{ingestr-0.13.87.dist-info → ingestr-0.13.89.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ingestr
-Version: 0.13.87
+Version: 0.13.89
 Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
 Project-URL: Homepage, https://github.com/bruin-data/ingestr
 Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -42,7 +42,7 @@ Requires-Dist: confluent-kafka==2.8.0
 Requires-Dist: crate==2.0.0
 Requires-Dist: cryptography==44.0.2
 Requires-Dist: curlify==2.2.1
-Requires-Dist: databricks-sql-connector==2.9.3
+Requires-Dist: databricks-sql-connector==4.0.5
 Requires-Dist: databricks-sqlalchemy==1.0.2
 Requires-Dist: dataclasses-json==0.6.7
 Requires-Dist: decorator==5.2.1

{ingestr-0.13.87.dist-info → ingestr-0.13.89.dist-info}/RECORD RENAMED Viewed

@@ -1,24 +1,25 @@
 ingestr/conftest.py,sha256=OE2yxeTCosS9CUFVuqNypm-2ftYvVBeeq7egm3878cI,1981
-ingestr/main.py,sha256=qoWHNcHh0-xVnyQxbQ-SKuTxPb1RNV3ENkCpqO7CLrk,26694
+ingestr/main.py,sha256=qo0g3wCFl8a_1jUwXagX8L1Q8PKKQlTF7md9pfnzW0Y,27155
 ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
 ingestr/src/blob.py,sha256=UUWMjHUuoR9xP1XZQ6UANQmnMVyDx3d0X4-2FQC271I,2138
-ingestr/src/buildinfo.py,sha256=1rVIau-By8hnJDfUNsHKA3_2BZoQHq9yn0wY8bWtb3U,21
-ingestr/src/destinations.py,sha256=M2Yni6wiWcrvZ8EPJemidqxN156l0rehgCc7xuil7mo,22840
+ingestr/src/buildinfo.py,sha256=YfKg385xjqfTlxcN3drjtO_64R31p1f28goiV70TrQY,21
+ingestr/src/destinations.py,sha256=QNT2rm91cZmY1_Zyj4VnbI14qGmZOUQOQUg9xUTVVYs,23799
 ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
 ingestr/src/factory.py,sha256=hC5E_XgrgTHMqwqPc6ihUYvRGTGMTzdPfQhrgPyD0tY,6945
-ingestr/src/filters.py,sha256=LLecXe9QkLFkFLUZ92OXNdcANr1a8edDxrflc2ko_KA,1452
+ingestr/src/filters.py,sha256=0n0sNAVG_f-B_1r7lW5iNtw9z_G1bxWzPaiL1i6tnbU,1665
 ingestr/src/http_client.py,sha256=bxqsk6nJNXCo-79gW04B53DQO-yr25vaSsqP0AKtjx4,732
 ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
+ingestr/src/masking.py,sha256=VN0LdfvExhQ1bZMRylGtaBUIoH-vjuIUmRnYKwo3yiY,11358
 ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
 ingestr/src/resource.py,sha256=ZqmZxFQVGlF8rFPhBiUB08HES0yoTj8sZ--jKfaaVps,1164
-ingestr/src/sources.py,sha256=YtqbkrF_z5n6Ccmj6kiYgjGMPL08r_1vc9YOvNhXlcw,125121
+ingestr/src/sources.py,sha256=MM_-6ZmIwFLS_L4kBkwJJc0XDyjDyHUkxMMnQaRfuRA,125176
 ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
 ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
 ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
 ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
 ingestr/src/adjust/adjust_helpers.py,sha256=IHSS94A7enOWkZ8cP5iW3RdYt0Xl3qZGAmDc1Xy4qkI,3802
 ingestr/src/airtable/__init__.py,sha256=XzRsS39xszUlh_s7P1_zq5v8vLfjz3m-NtTPaa8TTZU,2818
-ingestr/src/applovin/__init__.py,sha256=X_YCLppPrnL8KXfYWICE_uDfMzHHH3JZ-DBGZ1RlaOI,6984
+ingestr/src/applovin/__init__.py,sha256=Y02ysL2vRiDoP3uN9ven2OVcH9zTd8PbmIjqIHds4zU,6985
 ingestr/src/applovin_max/__init__.py,sha256=fxXqsIibJarp5NOGe08G964HftwLDymTtYS_LqPJht4,3315
 ingestr/src/appsflyer/__init__.py,sha256=QoK-B3cYYMD3bqzQaLWNH6FkJyjRbzRkBF2n6urxubs,8071
 ingestr/src/appsflyer/client.py,sha256=E6xPW4KlbBnQZ0K4eq2Xgb3AmGrtrzIX9bX8EnQr-D4,3615
@@ -28,7 +29,7 @@ ingestr/src/appstore/errors.py,sha256=KVpPWth5qlv6_QWEm3aJAt3cdf6miPJs0UDzxknx2M
 ingestr/src/appstore/models.py,sha256=tW1JSATHBIxZ6a77-RTCBQptJk6iRC8fWcmx4NW7SVA,1716
 ingestr/src/appstore/resources.py,sha256=DJxnNrBohVV0uSeruGV-N_e7UHSlhMhjhYNYdBuqECU,5375
 ingestr/src/arrow/__init__.py,sha256=8fEntgHseKjFMiPQIzxYzw_raicNsEgnveLi1IzBca0,2848
-ingestr/src/asana_source/__init__.py,sha256=QwQTCb5PXts8I4wLHG9UfRP-5ChfjSe88XAVfxMV5Ag,8183
+ingestr/src/asana_source/__init__.py,sha256=p9p89e62Qd3YmrrCCkIclswciSX51pBOMCuT7Ukeq2I,8184
 ingestr/src/asana_source/helpers.py,sha256=PukcdDQWIGqnGxuuobbLw4hUy4-t6gxXg_XywR7Lg9M,375
 ingestr/src/asana_source/settings.py,sha256=-2tpdkwh04RvLKFvwQodnFLYn9MaxOO1hsebGnDQMTU,2829
 ingestr/src/attio/__init__.py,sha256=CLejJjp5vGkt6r18nfNNZ-Xjc1SZgQ5IlcBW5XFQR90,3243
@@ -46,13 +47,13 @@ ingestr/src/facebook_ads/exceptions.py,sha256=4Nlbc0Mv3i5g-9AoyT-n1PIa8IDi3VCTfE
 ingestr/src/facebook_ads/helpers.py,sha256=c-WG008yU_zIdhFwljtqE2jfjVYuaVoNKldxcnJN3U4,9761
 ingestr/src/facebook_ads/settings.py,sha256=Bsic8RcmH-NfEZ7r_NGospTCmwISK9XaMT5y2NZirtg,4938
 ingestr/src/facebook_ads/utils.py,sha256=ES2ylPoW3j3fjp6OMUgp21n1cG1OktXsmWWMk5vBW_I,1590
-ingestr/src/filesystem/__init__.py,sha256=zkIwbRr0ir0EUdniI25p2zGiVc-7M9EmR351AjNb0eA,4163
+ingestr/src/filesystem/__init__.py,sha256=42YAOHQxZ7TkTXC1eeaLUJpjqJ3l7DH7C8j927pV4pc,4353
 ingestr/src/filesystem/helpers.py,sha256=bg0muSHZr3hMa8H4jN2-LGWzI-SUoKlQNiWJ74-YYms,3211
 ingestr/src/filesystem/readers.py,sha256=a0fKkaRpnAOGsXI3EBNYZa7x6tlmAOsgRzb883StY30,3987
 ingestr/src/fluxx/__init__.py,sha256=Ei8BE0KAEzpadJT9RO5-8zMA7LvnIPhNPDKF4EyBcLo,328980
 ingestr/src/fluxx/helpers.py,sha256=dCNgvMMTSEO4LNp6luNZ-XrV4NPW-_OUfmp0k3jFhuc,6602
-ingestr/src/frankfurter/__init__.py,sha256=z98RblQx1ab2GFowDq4l5xdnv-sLb41MPGitH-y2ahc,5242
-ingestr/src/frankfurter/helpers.py,sha256=tEtx9VU7IchRmtKRIEq_r8MclNVs8vL4E_RjGW2ZSh0,1504
+ingestr/src/frankfurter/__init__.py,sha256=aeyiv1jwcwblV5OeqG81vFcJo_Wc1bUlDwzdE4gnQiw,5246
+ingestr/src/frankfurter/helpers.py,sha256=SpRr992OcSf7IDI5y-ToUdO6m6sGpqFz59LTY0ojchI,1502
 ingestr/src/freshdesk/__init__.py,sha256=ukyorgCNsW_snzsYBDsr3Q0WB8f-to9Fk0enqHHFQlk,3087
 ingestr/src/freshdesk/freshdesk_client.py,sha256=1nFf0K4MQ0KZbWwk4xSbYHaykVqmPLfN39miOFDpWVc,4385
 ingestr/src/freshdesk/settings.py,sha256=0Wr_OMnUZcTlry7BmALssLxD2yh686JW4moLNv12Jnw,409
@@ -95,7 +96,7 @@ ingestr/src/linkedin_ads/helpers.py,sha256=eUWudRVlXl4kqIhfXQ1eVsUpZwJn7UFqKSpnb
 ingestr/src/mixpanel/__init__.py,sha256=s1QtqMP0BTGW6YtdCabJFWj7lEn7KujzELwGpBOQgfs,1796
 ingestr/src/mixpanel/client.py,sha256=c_reouegOVYBOwHLfgYFwpmkba0Sxro1Zkml07NCYf0,3602
 ingestr/src/mongodb/__init__.py,sha256=wu3KJ3VH5FF67gctJqm4T3ZTdBOQam1u6xuFBohq7bs,7486
-ingestr/src/mongodb/helpers.py,sha256=JyZvi93_WFUowctEqOdYHNnVOWXcDdAhae-25W3jvLA,31680
+ingestr/src/mongodb/helpers.py,sha256=TmEbQ-Rz5ajxmaMgZa7nrI13-L7Z_ClbFCFPnmPIrgE,31739
 ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
 ingestr/src/notion/settings.py,sha256=MwQVZViJtnvOegfjXYc_pJ50oUYgSRPgwqu7TvpeMOA,82
 ingestr/src/notion/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -113,9 +114,9 @@ ingestr/src/pipedrive/helpers/__init__.py,sha256=UX1K_qnGXB0ShtnBOfp2XuVbK8RRoCK
 ingestr/src/pipedrive/helpers/custom_fields_munger.py,sha256=rZ4AjdITHfJE2NNomCR7vMBS1KnWpEGVF6fADwsIHUE,4488
 ingestr/src/pipedrive/helpers/pages.py,sha256=Klpjw2OnMuhzit3PpiHKsfzGcJ3rQPSQBl3HhE3-6eA,3358
 ingestr/src/quickbooks/__init__.py,sha256=cZUuVCOTGPHTscRj6i0DytO63_fWF-4ieMxoU4PcyTg,3727
-ingestr/src/revenuecat/__init__.py,sha256=2UBEkIPlsuJKq0TYR-LSsLk2F4ubcQ6g_H4Fw1I8zDQ,4041
-ingestr/src/revenuecat/helpers.py,sha256=QpgszejLEBsn9Km-DNTidPPnapBesDCnweg1IPOgoRw,9635
-ingestr/src/salesforce/__init__.py,sha256=2hik5pRrxVODdDTlUEMoyccNC07zozjnxkMHcjMT1qA,4558
+ingestr/src/revenuecat/__init__.py,sha256=5HbyZuEOekkbeeT72sM_bnGygSyYdmd_vczfAUz7xoM,4029
+ingestr/src/revenuecat/helpers.py,sha256=CYU6l79kplnfL87GfdxyGeEBrBSWEZfGP0GyjPHuVDk,9619
+ingestr/src/salesforce/__init__.py,sha256=HVHY8pDngB498B6g6KDzwq-q2KPU4PxuEd9Y_8tDDFs,4716
 ingestr/src/salesforce/helpers.py,sha256=QTdazBt-qRTBbCQMZnyclIaDQFmBixBy_RDKD00Lt-8,2492
 ingestr/src/shopify/__init__.py,sha256=RzSSG93g-Qlkz6TAxi1XasFDdxxtVXIo53ZTtjGczW4,62602
 ingestr/src/shopify/exceptions.py,sha256=BhV3lIVWeBt8Eh4CWGW_REFJpGCzvW6-62yZrBWa3nQ,50
@@ -157,8 +158,8 @@ ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ
 ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
 ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
 ingestr/tests/unit/test_smartsheets.py,sha256=eiC2CCO4iNJcuN36ONvqmEDryCA1bA1REpayHpu42lk,5058
-ingestr-0.13.87.dist-info/METADATA,sha256=JVXcUZ0Q-y74Les20MSztmp0u_d6Y0M1XHpW6z8v-T4,15182
-ingestr-0.13.87.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-ingestr-0.13.87.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
-ingestr-0.13.87.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
-ingestr-0.13.87.dist-info/RECORD,,
+ingestr-0.13.89.dist-info/METADATA,sha256=EfmN2TdrNG9oBfU-U78YNWe9tSZr6smMS5kynvFUBZE,15182
+ingestr-0.13.89.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+ingestr-0.13.89.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
+ingestr-0.13.89.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
+ingestr-0.13.89.dist-info/RECORD,,

{ingestr-0.13.87.dist-info → ingestr-0.13.89.dist-info}/WHEEL RENAMED Viewed

File without changes

{ingestr-0.13.87.dist-info → ingestr-0.13.89.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ingestr-0.13.87.dist-info → ingestr-0.13.89.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

ingestr 0.13.87__py3-none-any.whl → 0.13.89__py3-none-any.whl

Potentially problematic release.

ingestr 0.13.87py3-none-any.whl → 0.13.89py3-none-any.whl