PyPI - ingestr - Versions diffs - 0.13.9__py3-none-any.whl → 0.13.11__py3-none-any.whl - Mend

ingestr 0.13.9py3-none-any.whl → 0.13.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ingestr might be problematic. Click here for more details.

Files changed (13) hide show

ingestr/src/applovin/__init__.py +8 -7
ingestr/src/buildinfo.py +1 -1
ingestr/src/factory.py +4 -0
ingestr/src/personio/__init__.py +331 -0
ingestr/src/personio/helpers.py +85 -0
ingestr/src/salesforce/__init__.py +149 -0
ingestr/src/salesforce/helpers.py +64 -0
ingestr/src/sources.py +118 -5
{ingestr-0.13.9.dist-info → ingestr-0.13.11.dist-info}/METADATA +27 -16
{ingestr-0.13.9.dist-info → ingestr-0.13.11.dist-info}/RECORD +13 -9
{ingestr-0.13.9.dist-info → ingestr-0.13.11.dist-info}/WHEEL +0 -0
{ingestr-0.13.9.dist-info → ingestr-0.13.11.dist-info}/entry_points.txt +0 -0
{ingestr-0.13.9.dist-info → ingestr-0.13.11.dist-info}/licenses/LICENSE.md +0 -0

ingestr/src/applovin/__init__.py CHANGED Viewed

@@ -1,10 +1,10 @@
-from datetime import datetime, timezone, timedelta
+from datetime import datetime, timedelta, timezone
 from enum import Enum
 from typing import Dict, List, Optional
-from requests import Response
 import dlt
 from dlt.sources.rest_api import EndpointResource, RESTAPIConfig, rest_api_resources
+from requests import Response
 class InvalidCustomReportError(Exception):
@@ -13,9 +13,11 @@ class InvalidCustomReportError(Exception):
             "Custom report should be in the format 'custom:{endpoint}:{report_type}:{dimensions}"
         )
 class ClientError(Exception):
     pass
 TYPE_HINTS = {
     "application_is_hidden": {"data_type": "bool"},
     "average_cpa": {"data_type": "double"},
@@ -119,7 +121,6 @@ def applovin_source(
     end_date: Optional[str],
     custom: Optional[str],
 ):
     backfill = False
     if end_date is None:
         backfill = True
@@ -127,7 +128,7 @@ def applovin_source(
         # use the greatest of yesterday and start_date
         end_date = max(
             datetime.now(timezone.utc) - timedelta(days=1),
-            datetime.fromisoformat(start_date).replace(tzinfo=timezone.utc)
+            datetime.fromisoformat(start_date).replace(tzinfo=timezone.utc),
         ).strftime("%Y-%m-%d")
     config: RESTAPIConfig = {
@@ -157,7 +158,7 @@ def applovin_source(
                 "paginator": "single_page",
                 "response_actions": [
                     http_error_handler,
-                ]
+                ],
             },
         },
         "resources": [
@@ -177,8 +178,7 @@ def applovin_source(
                 "advertiser-probabilistic-report",
                 "probabilisticReport",
                 exclude(
-                    REPORT_SCHEMA[ReportType.ADVERTISER],
-                    PROBABILISTIC_REPORT_EXCLUDE
+                    REPORT_SCHEMA[ReportType.ADVERTISER], PROBABILISTIC_REPORT_EXCLUDE
                 ),
                 ReportType.ADVERTISER,
             ),
@@ -256,6 +256,7 @@ def exclude(source: List[str], exclude_list: List[str]) -> List[str]:
 def build_type_hints(cols: List[str]) -> dict:
     return {col: TYPE_HINTS[col] for col in cols if col in TYPE_HINTS}
 def http_error_handler(resp: Response):
     if not resp.ok:
         raise ClientError(f"HTTP Status {resp.status_code}: {resp.text}")

ingestr/src/buildinfo.py CHANGED Viewed

	@@ -1 +1 @@
1	- version = "v0.13.9"
1	+ version = "v0.13.11"

ingestr/src/factory.py CHANGED Viewed

@@ -42,12 +42,14 @@ from ingestr.src.sources import (
     MongoDbSource,
     NotionSource,
     S3Source,
+    SalesforceSource,
     ShopifySource,
     SlackSource,
     SqlSource,
     StripeAnalyticsSource,
     TikTokSource,
     ZendeskSource,
+    PersonioSource,
 )
 SQL_SOURCE_SCHEMES = [
@@ -136,6 +138,8 @@ class SourceDestinationFactory:
         "linkedinads": LinkedInAdsSource,
         "applovin": AppLovinSource,
         "applovinmax": ApplovinMaxSource,
+        "salesforce": SalesforceSource,
+        "personio": PersonioSource,
     }
     destinations: Dict[str, Type[DestinationProtocol]] = {
         "bigquery": BigQueryDestination,

ingestr/src/personio/__init__.py ADDED Viewed

@@ -0,0 +1,331 @@
+"""Fetches Personio Employees, Absences, Attendances."""
+from typing import Iterable, Optional
+import dlt
+from dlt.common import pendulum
+from dlt.common.time import ensure_pendulum_datetime
+from dlt.common.typing import TAnyDateTime, TDataItem
+from dlt.sources import DltResource
+from .helpers import PersonioAPI
+@dlt.source(name="personio", max_table_nesting=0)
+def personio_source(
+    start_date: TAnyDateTime,
+    end_date: Optional[TAnyDateTime] = None,
+    client_id: str = dlt.secrets.value,
+    client_secret: str = dlt.secrets.value,
+    items_per_page: int = 200,
+) -> Iterable[DltResource]:
+    """
+    The source for the Personio pipeline. Available resources are employees, absences, and attendances.
+    Args:
+        client_id: The client ID of your app.
+        client_secret: The client secret of your app.
+        items_per_page: The max number of items to fetch per page. Defaults to 200.
+    Returns:
+        Iterable: A list of DltResource objects representing the data resources.
+    """
+    client = PersonioAPI(client_id, client_secret)
+    @dlt.resource(primary_key="id", write_disposition="merge", max_table_nesting=0)
+    def employees(
+        updated_at: dlt.sources.incremental[
+            pendulum.DateTime
+        ] = dlt.sources.incremental(
+            "last_modified_at", initial_value=None, allow_external_schedulers=True
+        ),
+        items_per_page: int = items_per_page,
+    ) -> Iterable[TDataItem]:
+        """
+        The resource for employees, supports incremental loading and pagination.
+        Args:
+            updated_at: The saved state of the last 'last_modified_at' value.
+            items_per_page: The max number of items to fetch per page. Defaults to 200.
+        Returns:
+            Iterable: A generator of employees.
+        """
+        def convert_item(item: TDataItem) -> TDataItem:
+            """Converts an employee item."""
+            attributes = item.get("attributes", {})
+            output = {}
+            for value in attributes.values():
+                name = value["universal_id"]
+                if not name:
+                    label: str = value["label"].replace(" ", "_")
+                    name = label.lower()
+                if value["type"] == "date" and value["value"]:
+                    output[name] = ensure_pendulum_datetime(value["value"])
+                else:
+                    output[name] = value["value"]
+            return output
+        if updated_at.last_value:
+            last_value = updated_at.last_value.format("YYYY-MM-DDTHH:mm:ss")
+        else:
+            last_value = None
+        params = {"limit": items_per_page, "updated_since": last_value}
+        pages = client.get_pages("company/employees", params=params)
+        for page in pages:
+            yield [convert_item(item) for item in page]
+    @dlt.resource(primary_key="id", write_disposition="replace", max_table_nesting=0)
+    def absence_types(items_per_page: int = items_per_page) -> Iterable[TDataItem]:
+        """
+        The resource for absence types (time-off-types), supports pagination.
+        Args:
+            items_per_page: The max number of items to fetch per page. Defaults to 200.
+        Returns:
+            Iterable: A generator of absences.
+        """
+        pages = client.get_pages(
+            "company/time-off-types", params={"limit": items_per_page}
+        )
+        for page in pages:
+            yield [item.get("attributes", {}) for item in page]
+    @dlt.resource(primary_key="id", write_disposition="merge", max_table_nesting=0)
+    def absences(
+        updated_at: dlt.sources.incremental[
+            pendulum.DateTime
+        ] = dlt.sources.incremental(
+            "updated_at", initial_value=None, allow_external_schedulers=True
+        ),
+        items_per_page: int = items_per_page,
+    ) -> Iterable[TDataItem]:
+        """
+        The resource for absence (time-offs), supports incremental loading and pagination.
+        Args:
+            updated_at: The saved state of the last 'updated_at' value.
+            items_per_page: The max number of items to fetch per page. Defaults to 200.
+        Returns:
+            Iterable: A generator of absences.
+        """
+        if updated_at.last_value:
+            updated_iso = updated_at.last_value.format("YYYY-MM-DDTHH:mm:ss")
+        else:
+            updated_iso = None
+        params = {
+            "limit": items_per_page,
+            "updated_since": updated_iso,
+        }
+        def convert_item(item: TDataItem) -> TDataItem:
+            output = item.get("attributes", {})
+            output["created_at"] = ensure_pendulum_datetime(output["created_at"])
+            output["updated_at"] = ensure_pendulum_datetime(output["updated_at"])
+            return output
+        pages = client.get_pages(
+            "company/time-offs",
+            params=params,
+            offset_by_page=True,
+        )
+        for page in pages:
+            yield [convert_item(item) for item in page]
+    @dlt.resource(primary_key="id", write_disposition="merge", max_table_nesting=0)
+    def attendances(
+        start_date: TAnyDateTime = start_date,
+        end_date: Optional[TAnyDateTime] = end_date,
+        updated_at: dlt.sources.incremental[
+            pendulum.DateTime
+        ] = dlt.sources.incremental(
+            "updated_at", initial_value=None, allow_external_schedulers=True
+        ),
+        items_per_page: int = items_per_page,
+    ) -> Iterable[TDataItem]:
+        """
+        The resource for attendances, supports incremental loading and pagination.
+        Args:
+            start_date: The start date to fetch attendances from.
+            end_date: The end date to fetch attendances from. Defaults to now.
+            updated_at: The saved state of the last 'updated_at' value.
+            items_per_page: The max number of items to fetch per page. Defaults to 200.
+        Returns:
+            Iterable: A generator of attendances.
+        """
+        end_date = end_date or pendulum.now()
+        if updated_at.last_value:
+            updated_iso = updated_at.last_value.format("YYYY-MM-DDTHH:mm:ss")
+        else:
+            updated_iso = None
+        params = {
+            "limit": items_per_page,
+            "start_date": ensure_pendulum_datetime(start_date).to_date_string(),
+            "end_date": ensure_pendulum_datetime(end_date).to_date_string(),
+            "updated_from": updated_iso,
+            "includePending": True,
+        }
+        pages = client.get_pages(
+            "company/attendances",
+            params=params,
+        )
+        def convert_item(item: TDataItem) -> TDataItem:
+            """Converts an attendance item."""
+            output = dict(id=item["id"], **item.get("attributes"))
+            output["date"] = ensure_pendulum_datetime(output["date"]).date()
+            output["updated_at"] = ensure_pendulum_datetime(output["updated_at"])
+            return output
+        for page in pages:
+            yield [convert_item(item) for item in page]
+    @dlt.resource(primary_key="id", write_disposition="replace", max_table_nesting=0)
+    def projects() -> Iterable[TDataItem]:
+        """
+        The resource for projects.
+        Returns:
+            Iterable: A generator of projects.
+        """
+        pages = client.get_pages("company/attendances/projects")
+        def convert_item(item: TDataItem) -> TDataItem:
+            """Converts an attendance item."""
+            output = dict(id=item["id"], **item.get("attributes"))
+            output["created_at"] = ensure_pendulum_datetime(output["created_at"])
+            output["updated_at"] = ensure_pendulum_datetime(output["updated_at"])
+            return output
+        for page in pages:
+            yield [convert_item(item) for item in page]
+    @dlt.resource(primary_key="id", write_disposition="replace", max_table_nesting=0)
+    def document_categories() -> Iterable[TDataItem]:
+        """
+        The resource for document_categories.
+        Returns:
+            Iterable: A generator of document_categories.
+        """
+        pages = client.get_pages("company/document-categories")
+        def convert_item(item: TDataItem) -> TDataItem:
+            """Converts an document_categories item."""
+            output = dict(id=item["id"], **item.get("attributes"))
+            return output
+        for page in pages:
+            yield [convert_item(item) for item in page]
+    @dlt.resource(primary_key="id", write_disposition="replace", max_table_nesting=0)
+    def custom_reports_list() -> Iterable[TDataItem]:
+        """
+        The resource for custom_reports.
+        Returns:
+            Iterable: A generator of custom_reports.
+        """
+        pages = client.get_pages("company/custom-reports/reports")
+        for page in pages:
+            yield [item.get("attributes", {}) for item in page]
+    @dlt.transformer(
+        data_from=employees,
+        write_disposition="merge",
+        primary_key=["employee_id", "id"],
+    )
+    @dlt.defer
+    def employees_absences_balance(employees_item: TDataItem) -> Iterable[TDataItem]:
+        """
+        The transformer for employees_absences_balance.
+        Args:
+            employees_item: The employee data.
+        Returns:
+            Iterable: A generator of employees_absences_balance for each employee.
+        """
+        for employee in employees_item:
+            employee_id = employee["id"]
+            pages = client.get_pages(
+                f"company/employees/{employee_id}/absences/balance",
+            )
+            for page in pages:
+                yield [dict(employee_id=employee_id, **i) for i in page]
+    @dlt.transformer(
+        data_from=custom_reports_list,
+        write_disposition="merge",
+        primary_key=["report_id", "item_id"],
+    )
+    @dlt.defer
+    def custom_reports(
+        custom_reports_item: TDataItem, items_per_page: int = items_per_page
+    ) -> Iterable[TDataItem]:
+        """
+        The transformer for custom reports, supports pagination.
+        Args:
+            custom_reports_item: The custom_report data.
+            items_per_page: The max number of items to fetch per page. Defaults to 200.
+        Returns:
+            Iterable: A generator of employees_absences_balance for each employee.
+        """
+        def convert_item(item: TDataItem, report_id: str) -> TDataItem:
+            """Converts an employee item."""
+            attributes = item.pop("attributes")
+            output = dict(report_id=report_id, item_id=list(item.values())[0])
+            for value in attributes:
+                name = value["attribute_id"]
+                if value["data_type"] == "date" and value["value"]:
+                    output[name] = ensure_pendulum_datetime(value["value"])
+                else:
+                    output[name] = value["value"]
+            return output
+        for custom_report in custom_reports_item:
+            report_id = custom_report["id"]
+            pages = client.get_pages(
+                f"company/custom-reports/reports/{report_id}",
+                params={"limit": items_per_page},
+                offset_by_page=True,
+            )
+            for page in pages:
+                for report in page:
+                    report_items = report.get("attributes", {}).get("items", [])
+                    yield [convert_item(item, report_id) for item in report_items]
+    return (
+        employees,
+        absence_types,
+        absences,
+        attendances,
+        projects,
+        document_categories,
+        employees_absences_balance,
+        custom_reports_list,
+        custom_reports,
+    )

ingestr/src/personio/helpers.py ADDED Viewed

@@ -0,0 +1,85 @@
+"""Personio source helpers"""
+from typing import Any, Iterable, Optional
+from urllib.parse import urljoin
+from dlt.common.typing import Dict, TDataItems
+from dlt.sources.helpers import requests
+class PersonioAPI:
+    """A Personio API client."""
+    base_url = "https://api.personio.de/v1/"
+    def __init__(self, client_id: str, client_secret: str) -> None:
+        """
+        Args:
+            client_id: The client ID of your app.
+            client_secret: The client secret of your app.
+        """
+        self.client_id = client_id
+        self.client_secret = client_secret
+        self.access_token = self.get_token()
+    def get_token(self) -> str:
+        """Get an access token from Personio.
+        Returns:
+            The access token.
+        """
+        headers = {"Content-Type": "application/json", "Accept": "application/json"}
+        data = {"client_id": self.client_id, "client_secret": self.client_secret}
+        url = urljoin(self.base_url, "auth")
+        response = requests.request("POST", url, headers=headers, json=data)
+        json_response = response.json()
+        token: str = json_response["data"]["token"]
+        return token
+    def get_pages(
+        self,
+        resource: str,
+        params: Optional[Dict[str, Any]] = None,
+        offset_by_page: bool = False,
+    ) -> Iterable[TDataItems]:
+        """Get all pages from Personio using requests.
+        Args:
+            resource: The resource to get pages for (e.g. employees, absences, attendances).
+            params: The parameters for the resource.
+            offset_by_page (bool): If True, offset increases by 1 per page; else, increases by page_size.
+        Yields:
+            List of data items from the page
+        """
+        params = params or {}
+        headers = {"Authorization": f"Bearer {self.access_token}"}
+        params.update({"offset": int(offset_by_page), "page": int(offset_by_page)})
+        url = urljoin(self.base_url, resource)
+        starts_from_zero = False
+        while True:
+            response = requests.get(url, headers=headers, params=params)
+            json_response = response.json()
+            # Get an item list from the page
+            yield json_response["data"]
+            metadata = json_response.get("metadata")
+            if not metadata:
+                break
+            total_pages = metadata.get("total_pages")
+            current_page = metadata.get("current_page")
+            if current_page == 0:
+                starts_from_zero = True
+            if (
+                current_page >= (total_pages - int(starts_from_zero))
+                or not json_response["data"]
+            ):
+                break
+            if offset_by_page:
+                params["offset"] += 1
+                params["page"] += 1
+            else:
+                params["offset"] += params["limit"]
+                params["page"] += 1

ingestr/src/salesforce/__init__.py ADDED Viewed

@@ -0,0 +1,149 @@
+from typing import Iterable
+import dlt
+from dlt.common.typing import TDataItem
+from dlt.sources import DltResource, incremental
+from simple_salesforce import Salesforce
+from .helpers import get_records
+@dlt.source(name="salesforce")
+def salesforce_source(
+    username: str,
+    password: str,
+    token: str,
+) -> Iterable[DltResource]:
+    """
+    Retrieves data from Salesforce using the Salesforce API.
+    Args:
+        username (str): The username for authentication.
+        password (str): The password for authentication.
+        token (str): The security token for authentication.
+    Yields:
+        DltResource: Data resources from Salesforce.
+    """
+    client = Salesforce(username, password, token)
+    # define resources
+    @dlt.resource(write_disposition="replace")
+    def user() -> Iterable[TDataItem]:
+        yield get_records(client, "User")
+    @dlt.resource(write_disposition="replace")
+    def user_role() -> Iterable[TDataItem]:
+        yield get_records(client, "UserRole")
+    @dlt.resource(write_disposition="merge")
+    def opportunity(
+        last_timestamp: incremental[str] = dlt.sources.incremental(
+            "SystemModstamp", initial_value=None
+        ),
+    ) -> Iterable[TDataItem]:
+        yield get_records(
+            client, "Opportunity", last_timestamp.last_value, "SystemModstamp"
+        )
+    @dlt.resource(write_disposition="merge")
+    def opportunity_line_item(
+        last_timestamp: incremental[str] = dlt.sources.incremental(
+            "SystemModstamp", initial_value=None
+        ),
+    ) -> Iterable[TDataItem]:
+        yield get_records(
+            client, "OpportunityLineItem", last_timestamp.last_value, "SystemModstamp"
+        )
+    @dlt.resource(write_disposition="merge")
+    def opportunity_contact_role(
+        last_timestamp: incremental[str] = dlt.sources.incremental(
+            "SystemModstamp", initial_value=None
+        ),
+    ) -> Iterable[TDataItem]:
+        yield get_records(
+            client,
+            "OpportunityContactRole",
+            last_timestamp.last_value,
+            "SystemModstamp",
+        )
+    @dlt.resource(write_disposition="merge")
+    def account(
+        last_timestamp: incremental[str] = dlt.sources.incremental(
+            "LastModifiedDate", initial_value=None
+        ),
+    ) -> Iterable[TDataItem]:
+        yield get_records(
+            client, "Account", last_timestamp.last_value, "LastModifiedDate"
+        )
+    @dlt.resource(write_disposition="replace")
+    def contact() -> Iterable[TDataItem]:
+        yield get_records(client, "Contact")
+    @dlt.resource(write_disposition="replace")
+    def lead() -> Iterable[TDataItem]:
+        yield get_records(client, "Lead")
+    @dlt.resource(write_disposition="replace")
+    def campaign() -> Iterable[TDataItem]:
+        yield get_records(client, "Campaign")
+    @dlt.resource(write_disposition="merge")
+    def campaign_member(
+        last_timestamp: incremental[str] = dlt.sources.incremental(
+            "SystemModstamp", initial_value=None
+        ),
+    ) -> Iterable[TDataItem]:
+        yield get_records(
+            client, "CampaignMember", last_timestamp.last_value, "SystemModstamp"
+        )
+    @dlt.resource(write_disposition="replace")
+    def product() -> Iterable[TDataItem]:
+        yield get_records(client, "Product2")
+    @dlt.resource(write_disposition="replace")
+    def pricebook() -> Iterable[TDataItem]:
+        yield get_records(client, "Pricebook2")
+    @dlt.resource(write_disposition="replace")
+    def pricebook_entry() -> Iterable[TDataItem]:
+        yield get_records(client, "PricebookEntry")
+    @dlt.resource(write_disposition="merge")
+    def task(
+        last_timestamp: incremental[str] = dlt.sources.incremental(
+            "SystemModstamp", initial_value=None
+        ),
+    ) -> Iterable[TDataItem]:
+        yield get_records(client, "Task", last_timestamp.last_value, "SystemModstamp")
+    @dlt.resource(write_disposition="merge")
+    def event(
+        last_timestamp: incremental[str] = dlt.sources.incremental(
+            "SystemModstamp", initial_value=None
+        ),
+    ) -> Iterable[TDataItem]:
+        yield get_records(client, "Event", last_timestamp.last_value, "SystemModstamp")
+    return (
+        user,
+        user_role,
+        opportunity,
+        opportunity_line_item,
+        opportunity_contact_role,
+        account,
+        contact,
+        lead,
+        campaign,
+        campaign_member,
+        product,
+        pricebook,
+        pricebook_entry,
+        task,
+        event,
+    )

ingestr/src/salesforce/helpers.py ADDED Viewed

@@ -0,0 +1,64 @@
+"""Salesforce source helpers"""
+from typing import Iterable, Optional
+import pendulum
+from dlt.common.typing import TDataItem
+from simple_salesforce import Salesforce
+def get_records(
+    sf: Salesforce,
+    sobject: str,
+    last_state: Optional[str] = None,
+    replication_key: Optional[str] = None,
+) -> Iterable[TDataItem]:
+    """
+    Retrieves records from Salesforce for a specified sObject.
+    Args:
+        sf (Salesforce): An instance of the Salesforce API client.
+        sobject (str): The name of the sObject to retrieve records from.
+        last_state (str, optional): The last known state for incremental loading. Defaults to None.
+        replication_key (str, optional): The replication key for incremental loading. Defaults to None.
+    Yields:
+        Dict[TDataItem]: A dictionary representing a record from the Salesforce sObject.
+    """
+    # Get all fields for the sobject
+    desc = getattr(sf, sobject).describe()
+    # Salesforce returns compound fields as separate fields, so we need to filter them out
+    compound_fields = {
+        f["compoundFieldName"]
+        for f in desc["fields"]
+        if f["compoundFieldName"] is not None
+    } - {"Name"}
+    # Salesforce returns datetime fields as timestamps, so we need to convert them
+    date_fields = {
+        f["name"] for f in desc["fields"] if f["type"] in ("datetime",) and f["name"]
+    }
+    # If no fields are specified, use all fields except compound fields
+    fields = [f["name"] for f in desc["fields"] if f["name"] not in compound_fields]
+    # Generate a predicate to filter records by the replication key
+    predicate, order_by, n_records = "", "", 0
+    if replication_key:
+        if last_state:
+            predicate = f"WHERE {replication_key} > {last_state}"
+        order_by = f"ORDER BY {replication_key} ASC"
+    query = f"SELECT {', '.join(fields)} FROM {sobject} {predicate} {order_by}"
+    # Query all records in batches
+    for page in getattr(sf.bulk, sobject).query_all(query, lazy_operation=True):
+        for record in page:
+            # Strip out the attributes field
+            record.pop("attributes", None)
+            for field in date_fields:
+                # Convert Salesforce timestamps to ISO 8601
+                if record.get(field):
+                    record[field] = pendulum.from_timestamp(
+                        record[field] / 1000,
+                    ).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
+        yield from page
+        n_records += len(page)

ingestr/src/sources.py CHANGED Viewed

@@ -15,7 +15,7 @@ from typing import (
     Optional,
     Union,
 )
-from urllib.parse import ParseResult, parse_qs, quote, urlparse
+from urllib.parse import ParseResult, parse_qs, quote, urlencode, urlparse
 import dlt
 import gcsfs  # type: ignore
@@ -83,6 +83,8 @@ from ingestr.src.linkedin_ads.dimension_time_enum import (
 )
 from ingestr.src.mongodb import mongodb_collection
 from ingestr.src.notion import notion_databases
+from ingestr.src.personio import personio_source
+from ingestr.src.salesforce import salesforce_source
 from ingestr.src.shopify import shopify_source
 from ingestr.src.slack import slack_source
 from ingestr.src.sql_database.callbacks import (
@@ -134,10 +136,46 @@ class SqlSource:
         if uri.startswith("mysql://"):
             uri = uri.replace("mysql://", "mysql+pymysql://")
+        # clickhouse://<username>:<password>@<host>:<port>?secure=<secure>
         if uri.startswith("clickhouse://"):
-            uri = uri.replace("clickhouse://", "clickhouse+native://")
-            if "secure=" not in uri:
-                uri += "?secure=1"
+            parsed_uri = urlparse(uri)
+            username = parsed_uri.username
+            if not username:
+                raise ValueError(
+                    "A username is required to connect to the ClickHouse database."
+                )
+            password = parsed_uri.password
+            if not password:
+                raise ValueError(
+                    "A password is required to authenticate with the ClickHouse database."
+                )
+            host = parsed_uri.hostname
+            if not host:
+                raise ValueError(
+                    "The hostname or IP address of the ClickHouse server is required to establish a connection."
+                )
+            port = parsed_uri.port
+            if not port:
+                raise ValueError(
+                    "The TCP port of the ClickHouse server is required to establish a connection."
+                )
+            query_params = parse_qs(parsed_uri.query)
+            if "http_port" in query_params:
+                del query_params["http_port"]
+            if "secure" not in query_params:
+                query_params["secure"] = ["1"]
+            uri = parsed_uri._replace(
+                scheme="clickhouse+native",
+                query=urlencode(query_params, doseq=True),
+            ).geturl()
         query_adapters = []
         if kwargs.get("sql_limit"):
@@ -1753,7 +1791,7 @@ class AppLovinSource:
     def dlt_source(self, uri: str, table: str, **kwargs):
         if kwargs.get("incremental_key") is not None:
             raise ValueError(
-                "Google Ads takes care of incrementality on its own, you should not provide incremental_key"
+                "Applovin takes care of incrementality on its own, you should not provide incremental_key"
             )
         parsed_uri = urlparse(uri)
@@ -1833,3 +1871,78 @@ class ApplovinMaxSource:
             api_key=api_key[0],
             application=application[0],
         ).with_resources(table)
+class SalesforceSource:
+    def handles_incrementality(self) -> bool:
+        return True
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "Salesforce takes care of incrementality on its own, you should not provide incremental_key"
+            )
+        params = parse_qs(urlparse(uri).query)
+        creds = {
+            "username": params.get("username", [None])[0],
+            "password": params.get("password", [None])[0],
+            "token": params.get("token", [None])[0],
+        }
+        for k, v in creds.items():
+            if v is None:
+                raise MissingValueError(k, "Salesforce")
+        src = salesforce_source(**creds)  # type: ignore
+        if table not in src.resources:
+            raise UnsupportedResourceError(table, "Salesforce")
+        return src.with_resources(table)
+class PersonioSource:
+    def handles_incrementality(self) -> bool:
+        return True
+    # applovin://?client_id=123&client_secret=123
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        parsed_uri = urlparse(uri)
+        params = parse_qs(parsed_uri.query)
+        client_id = params.get("client_id")
+        client_secret = params.get("client_secret")
+        interval_start = kwargs.get("interval_start")
+        interval_end = kwargs.get("interval_end")
+        interval_start_date = (
+            interval_start if interval_start is not None else "2018-01-01"
+        )
+        interval_end_date = (
+            interval_end.strftime("%Y-%m-%d") if interval_end is not None else None
+        )
+        if client_id is None:
+            raise MissingValueError("client_id", "Personio")
+        if client_secret is None:
+            raise MissingValueError("client_secret", "Personio")
+        if table not in [
+            "employees",
+            "absences",
+            "absence_types",
+            "attendances",
+            "projects",
+            "document_categories",
+            "employees_absences_balance",
+            "custom_reports_list",
+        ]:
+            raise UnsupportedResourceError(table, "Personio")
+        return personio_source(
+            client_id=client_id[0],
+            client_secret=client_secret[0],
+            start_date=interval_start_date,
+            end_date=interval_end_date,
+        ).with_resources(table)

{ingestr-0.13.9.dist-info → ingestr-0.13.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ingestr
-Version: 0.13.9
+Version: 0.13.11
 Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
 Project-URL: Homepage, https://github.com/bruin-data/ingestr
 Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -18,42 +18,43 @@ Requires-Dist: asana==3.2.3
 Requires-Dist: clickhouse-connect==0.8.14
 Requires-Dist: clickhouse-driver==0.2.9
 Requires-Dist: clickhouse-sqlalchemy==0.2.7
-Requires-Dist: confluent-kafka>=2.6.1
+Requires-Dist: confluent-kafka>=2.8.0
 Requires-Dist: databricks-sql-connector==2.9.3
 Requires-Dist: dataclasses-json==0.6.7
-Requires-Dist: dlt==1.5.0
-Requires-Dist: duckdb-engine==0.13.5
-Requires-Dist: duckdb==1.1.3
+Requires-Dist: dlt==1.6.1
+Requires-Dist: duckdb-engine==0.15.0
+Requires-Dist: duckdb==1.2.0
 Requires-Dist: facebook-business==20.0.0
 Requires-Dist: flatten-json==0.1.14
 Requires-Dist: gcsfs==2024.10.0
 Requires-Dist: google-ads==25.1.0
-Requires-Dist: google-analytics-data==0.18.16
+Requires-Dist: google-analytics-data==0.18.17
 Requires-Dist: google-api-python-client==2.130.0
 Requires-Dist: google-cloud-bigquery-storage==2.24.0
-Requires-Dist: mysql-connector-python==9.1.0
+Requires-Dist: mysql-connector-python==9.2.0
 Requires-Dist: pendulum==3.0.0
 Requires-Dist: psutil==6.1.1
 Requires-Dist: psycopg2-binary==2.9.10
 Requires-Dist: py-machineid==0.6.0
 Requires-Dist: pyairtable==2.3.3
 Requires-Dist: pyarrow==18.1.0
-Requires-Dist: pyathena==3.9.0
-Requires-Dist: pymongo==4.10.1
+Requires-Dist: pyathena==3.12.2
+Requires-Dist: pymongo==4.11.1
 Requires-Dist: pymysql==1.1.1
 Requires-Dist: pyrate-limiter==3.7.0
 Requires-Dist: redshift-connector==2.1.5
 Requires-Dist: rich==13.9.4
 Requires-Dist: rudder-sdk-python==2.1.4
 Requires-Dist: s3fs==2024.10.0
+Requires-Dist: simple-salesforce==1.12.6
 Requires-Dist: snowflake-sqlalchemy==1.6.1
-Requires-Dist: sqlalchemy-bigquery==1.12.0
+Requires-Dist: sqlalchemy-bigquery==1.12.1
 Requires-Dist: sqlalchemy-hana==2.0.0
 Requires-Dist: sqlalchemy-redshift==0.8.14
 Requires-Dist: sqlalchemy2-stubs==0.0.2a38
 Requires-Dist: sqlalchemy==1.4.52
 Requires-Dist: stripe==10.7.0
-Requires-Dist: tqdm==4.67.0
+Requires-Dist: tqdm==4.67.1
 Requires-Dist: typer==0.13.1
 Requires-Dist: types-requests==2.32.0.20240907
 Provides-Extra: odbc
@@ -161,6 +162,11 @@ Pull requests are welcome. However, please open an issue first to discuss what y
         <td>✅</td>
         <td>✅</td>
     </tr>
+    <tr>
+        <td>DynamoDB</td>
+        <td>✅</td>
+        <td>-</td>
+    </tr>
     <tr>
         <td>Local CSV file</td>
         <td>✅</td>
@@ -247,11 +253,6 @@ Pull requests are welcome. However, please open an issue first to discuss what y
         <td>✅</td>
         <td>-</td>
     </tr>
-    <tr>
-        <td>DynamoDB</td>
-        <td>✅</td>
-        <td>-</td>
-    </tr>
     <tr>
         <td>Facebook Ads</td>
         <td>✅</td>
@@ -301,12 +302,22 @@ Pull requests are welcome. However, please open an issue first to discuss what y
         <td>Notion</td>
         <td>✅</td>
         <td>-</td>
+    </tr>
+    <tr>
+        <td>Personio</td>
+        <td>✅</td>
+        <td>-</td>
     </tr>
      <tr>
         <td>S3</td>
         <td>✅</td>
         <td>-</td>
     </tr>
+    <tr>
+        <td>Salesforce</td>
+        <td>✅</td>
+        <td>-</td>
+    </tr>
     <tr>
         <td>Shopify</td>
         <td>✅</td>

{ingestr-0.13.9.dist-info → ingestr-0.13.11.dist-info}/RECORD RENAMED Viewed

@@ -1,20 +1,20 @@
 ingestr/main.py,sha256=ufn8AcM2ID80ChUApJzYDjnQaurMXOkYfTm6GzAggSQ,24746
 ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
 ingestr/src/blob.py,sha256=LtEZWoUhm5i2aKerdgEpLtNCf3fdhGGMM4td-LRZVbY,1407
-ingestr/src/buildinfo.py,sha256=gK4juI0DAKgzAPnkZE1wP2N3AmMh6EZjH3gXGTAxWlc,20
+ingestr/src/buildinfo.py,sha256=PnFKBMVizeXpYaYJ6rkY9m_oU0QCJzbLAOJyEQ8gyRg,21
 ingestr/src/destinations.py,sha256=vrGij4qMPCdXTMIimROWBJFqzOqCM4DFmgyubgSHejA,11279
 ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
-ingestr/src/factory.py,sha256=XYwjy5dfG5mLIU1v-mS17Kwl0cxSs3MG7NtgPPwZ_0U,5009
+ingestr/src/factory.py,sha256=dOdY4fzeQ-2dgFBGIDFD5ilxpYNfCVqQOureuWzOL-w,5127
 ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
 ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
-ingestr/src/sources.py,sha256=ljh__y_ZXj8NUT0v63ZAT42K1SZsEJEB88YtQHG0IXQ,64830
+ingestr/src/sources.py,sha256=YlWokgTZoeMQ6PVb9UVU3I99R0cdhkYjEzPf5LNGs30,68582
 ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
 ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
 ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
 ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
 ingestr/src/adjust/adjust_helpers.py,sha256=av97NPSn-hQtTbAC0vUSCAWYePmOiG5R-DGdMssm7FQ,3646
 ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
-ingestr/src/applovin/__init__.py,sha256=vtmYnRKnNOSzFWQIbKGbrcu6AcBdHuhPMsNruUvEIgg,7000
+ingestr/src/applovin/__init__.py,sha256=X_YCLppPrnL8KXfYWICE_uDfMzHHH3JZ-DBGZ1RlaOI,6984
 ingestr/src/applovin_max/__init__.py,sha256=1NUOeJzRyZZQ95KEirbrlSrk-8SNc9JrlM_5pGgBgHg,2878
 ingestr/src/appsflyer/_init_.py,sha256=ne2-9FQ654Drtd3GkKQv8Bwb6LEqCnJw49MfO5Jyzgs,739
 ingestr/src/appsflyer/client.py,sha256=TNmwakLzmO6DZW3wcfLfQRl7aNBHgFqSsk4ef-MmJ1w,3084
@@ -74,6 +74,10 @@ ingestr/src/notion/settings.py,sha256=MwQVZViJtnvOegfjXYc_pJ50oUYgSRPgwqu7TvpeMO
 ingestr/src/notion/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ingestr/src/notion/helpers/client.py,sha256=QXuudkf5Zzff98HRsCqA1g1EZWIrnfn1falPrnKg_y4,5500
 ingestr/src/notion/helpers/database.py,sha256=gigPibTeVefP3lA-8w4aOwX67pj7RlciPk5koDs1ry8,2737
+ingestr/src/personio/__init__.py,sha256=CQ8XX8Q8BG-wgoen3emhe_r8Cx414Fux7P8jQNawWvY,11646
+ingestr/src/personio/helpers.py,sha256=OmeMzfg4MVtpI7f75D3-9OGZb8SDsKyz0svNm1zJLTw,2900
+ingestr/src/salesforce/__init__.py,sha256=2hik5pRrxVODdDTlUEMoyccNC07zozjnxkMHcjMT1qA,4558
+ingestr/src/salesforce/helpers.py,sha256=QTdazBt-qRTBbCQMZnyclIaDQFmBixBy_RDKD00Lt-8,2492
 ingestr/src/shopify/__init__.py,sha256=PF_6VQnS065Br1UzSIekTVXBu3WtrMQL_v5CfbfaX5Y,63151
 ingestr/src/shopify/exceptions.py,sha256=BhV3lIVWeBt8Eh4CWGW_REFJpGCzvW6-62yZrBWa3nQ,50
 ingestr/src/shopify/helpers.py,sha256=NfHD6lWXe88ybR0ri-FCQuh2Vf8l5WG0a0FVjmdoSC4,6296
@@ -104,8 +108,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
 ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
 ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
 ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
-ingestr-0.13.9.dist-info/METADATA,sha256=aPaAzUYc-2EPu4a0xtimG6l9InUxsWPJ1hFb6-qbUdQ,8956
-ingestr-0.13.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-ingestr-0.13.9.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
-ingestr-0.13.9.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
-ingestr-0.13.9.dist-info/RECORD,,
+ingestr-0.13.11.dist-info/METADATA,sha256=8vjvshEDHgAZEMt3ykbUSlEl_Ky0KtHf6p6vjT6RDGI,9171
+ingestr-0.13.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+ingestr-0.13.11.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
+ingestr-0.13.11.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
+ingestr-0.13.11.dist-info/RECORD,,

{ingestr-0.13.9.dist-info → ingestr-0.13.11.dist-info}/WHEEL RENAMED Viewed

File without changes

{ingestr-0.13.9.dist-info → ingestr-0.13.11.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ingestr-0.13.9.dist-info → ingestr-0.13.11.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

ingestr 0.13.9__py3-none-any.whl → 0.13.11__py3-none-any.whl

Potentially problematic release.

ingestr 0.13.9py3-none-any.whl → 0.13.11py3-none-any.whl