PyPI - ingestr - Versions diffs - 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl - Mend

ingestr 0.13.2py3-none-any.whl → 0.14.104py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (146) hide show

ingestr/conftest.py +72 -0
ingestr/main.py +134 -87
ingestr/src/adjust/__init__.py +4 -4
ingestr/src/adjust/adjust_helpers.py +7 -3
ingestr/src/airtable/__init__.py +3 -2
ingestr/src/allium/__init__.py +128 -0
ingestr/src/anthropic/__init__.py +277 -0
ingestr/src/anthropic/helpers.py +525 -0
ingestr/src/applovin/__init__.py +262 -0
ingestr/src/applovin_max/__init__.py +117 -0
ingestr/src/appsflyer/__init__.py +325 -0
ingestr/src/appsflyer/client.py +49 -45
ingestr/src/appstore/__init__.py +1 -0
ingestr/src/arrow/__init__.py +9 -1
ingestr/src/asana_source/__init__.py +1 -1
ingestr/src/attio/__init__.py +102 -0
ingestr/src/attio/helpers.py +65 -0
ingestr/src/blob.py +38 -11
ingestr/src/buildinfo.py +1 -0
ingestr/src/chess/__init__.py +1 -1
ingestr/src/clickup/__init__.py +85 -0
ingestr/src/clickup/helpers.py +47 -0
ingestr/src/collector/spinner.py +43 -0
ingestr/src/couchbase_source/__init__.py +118 -0
ingestr/src/couchbase_source/helpers.py +135 -0
ingestr/src/cursor/__init__.py +83 -0
ingestr/src/cursor/helpers.py +188 -0
ingestr/src/destinations.py +520 -33
ingestr/src/docebo/__init__.py +589 -0
ingestr/src/docebo/client.py +435 -0
ingestr/src/docebo/helpers.py +97 -0
ingestr/src/elasticsearch/__init__.py +80 -0
ingestr/src/elasticsearch/helpers.py +138 -0
ingestr/src/errors.py +8 -0
ingestr/src/facebook_ads/__init__.py +47 -28
ingestr/src/facebook_ads/helpers.py +59 -37
ingestr/src/facebook_ads/settings.py +2 -0
ingestr/src/facebook_ads/utils.py +39 -0
ingestr/src/factory.py +116 -2
ingestr/src/filesystem/__init__.py +8 -3
ingestr/src/filters.py +46 -3
ingestr/src/fluxx/__init__.py +9906 -0
ingestr/src/fluxx/helpers.py +209 -0
ingestr/src/frankfurter/__init__.py +157 -0
ingestr/src/frankfurter/helpers.py +48 -0
ingestr/src/freshdesk/__init__.py +89 -0
ingestr/src/freshdesk/freshdesk_client.py +137 -0
ingestr/src/freshdesk/settings.py +9 -0
ingestr/src/fundraiseup/__init__.py +95 -0
ingestr/src/fundraiseup/client.py +81 -0
ingestr/src/github/__init__.py +41 -6
ingestr/src/github/helpers.py +5 -5
ingestr/src/google_analytics/__init__.py +22 -4
ingestr/src/google_analytics/helpers.py +124 -6
ingestr/src/google_sheets/__init__.py +4 -4
ingestr/src/google_sheets/helpers/data_processing.py +2 -2
ingestr/src/hostaway/__init__.py +302 -0
ingestr/src/hostaway/client.py +288 -0
ingestr/src/http/__init__.py +35 -0
ingestr/src/http/readers.py +114 -0
ingestr/src/http_client.py +24 -0
ingestr/src/hubspot/__init__.py +66 -23
ingestr/src/hubspot/helpers.py +52 -22
ingestr/src/hubspot/settings.py +14 -7
ingestr/src/influxdb/__init__.py +46 -0
ingestr/src/influxdb/client.py +34 -0
ingestr/src/intercom/__init__.py +142 -0
ingestr/src/intercom/helpers.py +674 -0
ingestr/src/intercom/settings.py +279 -0
ingestr/src/isoc_pulse/__init__.py +159 -0
ingestr/src/jira_source/__init__.py +340 -0
ingestr/src/jira_source/helpers.py +439 -0
ingestr/src/jira_source/settings.py +170 -0
ingestr/src/kafka/__init__.py +4 -1
ingestr/src/kinesis/__init__.py +139 -0
ingestr/src/kinesis/helpers.py +82 -0
ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
ingestr/src/linear/__init__.py +634 -0
ingestr/src/linear/helpers.py +111 -0
ingestr/src/linkedin_ads/helpers.py +0 -1
ingestr/src/loader.py +69 -0
ingestr/src/mailchimp/__init__.py +126 -0
ingestr/src/mailchimp/helpers.py +226 -0
ingestr/src/mailchimp/settings.py +164 -0
ingestr/src/masking.py +344 -0
ingestr/src/mixpanel/__init__.py +62 -0
ingestr/src/mixpanel/client.py +99 -0
ingestr/src/monday/__init__.py +246 -0
ingestr/src/monday/helpers.py +392 -0
ingestr/src/monday/settings.py +328 -0
ingestr/src/mongodb/__init__.py +72 -8
ingestr/src/mongodb/helpers.py +915 -38
ingestr/src/partition.py +32 -0
ingestr/src/personio/__init__.py +331 -0
ingestr/src/personio/helpers.py +86 -0
ingestr/src/phantombuster/__init__.py +65 -0
ingestr/src/phantombuster/client.py +87 -0
ingestr/src/pinterest/__init__.py +82 -0
ingestr/src/pipedrive/__init__.py +198 -0
ingestr/src/pipedrive/helpers/__init__.py +23 -0
ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
ingestr/src/pipedrive/helpers/pages.py +115 -0
ingestr/src/pipedrive/settings.py +27 -0
ingestr/src/pipedrive/typing.py +3 -0
ingestr/src/plusvibeai/__init__.py +335 -0
ingestr/src/plusvibeai/helpers.py +544 -0
ingestr/src/plusvibeai/settings.py +252 -0
ingestr/src/quickbooks/__init__.py +117 -0
ingestr/src/resource.py +40 -0
ingestr/src/revenuecat/__init__.py +83 -0
ingestr/src/revenuecat/helpers.py +237 -0
ingestr/src/salesforce/__init__.py +156 -0
ingestr/src/salesforce/helpers.py +64 -0
ingestr/src/shopify/__init__.py +1 -17
ingestr/src/smartsheets/__init__.py +82 -0
ingestr/src/snapchat_ads/__init__.py +489 -0
ingestr/src/snapchat_ads/client.py +72 -0
ingestr/src/snapchat_ads/helpers.py +535 -0
ingestr/src/socrata_source/__init__.py +83 -0
ingestr/src/socrata_source/helpers.py +85 -0
ingestr/src/socrata_source/settings.py +8 -0
ingestr/src/solidgate/__init__.py +219 -0
ingestr/src/solidgate/helpers.py +154 -0
ingestr/src/sources.py +3132 -212
ingestr/src/stripe_analytics/__init__.py +49 -21
ingestr/src/stripe_analytics/helpers.py +286 -1
ingestr/src/stripe_analytics/settings.py +62 -10
ingestr/src/telemetry/event.py +10 -9
ingestr/src/tiktok_ads/__init__.py +12 -6
ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
ingestr/src/trustpilot/__init__.py +48 -0
ingestr/src/trustpilot/client.py +48 -0
ingestr/src/version.py +6 -1
ingestr/src/wise/__init__.py +68 -0
ingestr/src/wise/client.py +63 -0
ingestr/src/zoom/__init__.py +99 -0
ingestr/src/zoom/helpers.py +102 -0
ingestr/tests/unit/test_smartsheets.py +133 -0
ingestr-0.14.104.dist-info/METADATA +563 -0
ingestr-0.14.104.dist-info/RECORD +203 -0
ingestr/src/appsflyer/_init_.py +0 -24
ingestr-0.13.2.dist-info/METADATA +0 -302
ingestr-0.13.2.dist-info/RECORD +0 -107
{ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
{ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
{ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0

ingestr/src/http/readers.py ADDED Viewed

@@ -0,0 +1,114 @@
+"""Readers for HTTP file sources"""
+import io
+from typing import Any, Iterator, Optional
+from urllib.parse import urlparse
+import requests
+from dlt.sources import TDataItems
+class HttpReader:
+    """Reader for HTTP-based file sources"""
+    def __init__(self, url: str, file_format: Optional[str] = None):
+        self.url = url
+        self.file_format = file_format or self._infer_format(url)
+        if self.file_format not in ["csv", "json", "parquet"]:
+            raise ValueError(
+                f"Unsupported file format: {self.file_format}. "
+                "Supported formats: csv, json, parquet"
+            )
+    def _infer_format(self, url: str) -> str:
+        """Infer file format from URL extension"""
+        parsed = urlparse(url)
+        path = parsed.path.lower()
+        if path.endswith(".csv"):
+            return "csv"
+        elif path.endswith(".json") or path.endswith(".jsonl"):
+            return "json"
+        elif path.endswith(".parquet"):
+            return "parquet"
+        else:
+            raise ValueError(
+                f"Cannot infer file format from URL: {url}. "
+                "Please specify file_format parameter."
+            )
+    def _download_file(self) -> bytes:
+        """Download file from URL"""
+        response = requests.get(self.url, stream=True, timeout=30)
+        response.raise_for_status()
+        return response.content
+    def read_file(self, **kwargs: Any) -> Iterator[TDataItems]:
+        """Read file and yield data in chunks"""
+        content = self._download_file()
+        if self.file_format == "csv":
+            yield from self._read_csv(content, **kwargs)
+        elif self.file_format == "json":
+            yield from self._read_json(content, **kwargs)
+        elif self.file_format == "parquet":
+            yield from self._read_parquet(content, **kwargs)
+    def _read_csv(
+        self, content: bytes, chunksize: int = 10000, **pandas_kwargs: Any
+    ) -> Iterator[TDataItems]:
+        """Read CSV file with Pandas chunk by chunk"""
+        import pandas as pd  # type: ignore
+        kwargs = {**{"header": "infer", "chunksize": chunksize}, **pandas_kwargs}
+        file_obj = io.BytesIO(content)
+        for df in pd.read_csv(file_obj, **kwargs):
+            yield df.to_dict(orient="records")
+    def _read_json(
+        self, content: bytes, chunksize: int = 1000, **kwargs: Any
+    ) -> Iterator[TDataItems]:
+        """Read JSON or JSONL file"""
+        from dlt.common import json
+        file_obj = io.BytesIO(content)
+        text = file_obj.read().decode("utf-8")
+        # Try to detect if it's JSONL format (one JSON object per line)
+        lines = text.strip().split("\n")
+        if len(lines) > 1:
+            # Likely JSONL format
+            lines_chunk = []
+            for line in lines:
+                if line.strip():
+                    lines_chunk.append(json.loads(line))
+                    if len(lines_chunk) >= chunksize:
+                        yield lines_chunk
+                        lines_chunk = []
+            if lines_chunk:
+                yield lines_chunk
+        else:
+            # Single JSON object or array
+            data = json.loads(text)
+            if isinstance(data, list):
+                # Chunk the list
+                for i in range(0, len(data), chunksize):
+                    yield data[i : i + chunksize]
+            else:
+                # Single object
+                yield [data]
+    def _read_parquet(
+        self, content: bytes, chunksize: int = 10000, **kwargs: Any
+    ) -> Iterator[TDataItems]:
+        """Read Parquet file"""
+        from pyarrow import parquet as pq  # type: ignore
+        file_obj = io.BytesIO(content)
+        parquet_file = pq.ParquetFile(file_obj)
+        for batch in parquet_file.iter_batches(batch_size=chunksize):
+            yield batch.to_pylist()

ingestr/src/http_client.py ADDED Viewed

@@ -0,0 +1,24 @@
+import requests
+from dlt.sources.helpers.requests import Client
+def create_client(retry_status_codes: list[int] | None = None) -> requests.Session:
+    if retry_status_codes is None:
+        retry_status_codes = [502]
+    return Client(
+        raise_for_status=False,
+        retry_condition=retry_on_status_code(retry_status_codes),
+        request_max_attempts=12,
+        request_backoff_factor=10,
+    ).session
+def retry_on_status_code(retry_status_codes: list[int]):
+    def retry_on_limit(
+        response: requests.Response | None, exception: BaseException | None
+    ) -> bool:
+        if response is None:
+            return False
+        return response.status_code in retry_status_codes
+    return retry_on_limit

ingestr/src/hubspot/__init__.py CHANGED Viewed

@@ -32,10 +32,16 @@ from dlt.common import pendulum
 from dlt.common.typing import TDataItems
 from dlt.sources import DltResource
-from .helpers import _get_property_names, fetch_data, fetch_property_history
+from .helpers import (
+    _get_property_names,
+    fetch_data,
+    fetch_data_raw,
+    fetch_property_history,
+)
 from .settings import (
     ALL,
     CRM_OBJECT_ENDPOINTS,
+    CRM_SCHEMAS_ENDPOINT,
     DEFAULT_COMPANY_PROPS,
     DEFAULT_CONTACT_PROPS,
     DEFAULT_DEAL_PROPS,
@@ -55,6 +61,7 @@ def hubspot(
     api_key: str = dlt.secrets.value,
     include_history: bool = False,
     include_custom_props: bool = True,
+    custom_object: str = None,
 ) -> Sequence[DltResource]:
     """
     A DLT source that retrieves data from the HubSpot API using the
@@ -86,7 +93,6 @@ def hubspot(
     def companies(
         api_key: str = api_key,
         include_history: bool = include_history,
-        props: Sequence[str] = DEFAULT_COMPANY_PROPS,
         include_custom_props: bool = include_custom_props,
     ) -> Iterator[TDataItems]:
         """Hubspot companies resource"""
@@ -94,7 +100,7 @@ def hubspot(
             "company",
             api_key,
             include_history=include_history,
-            props=props,
+            props=DEFAULT_COMPANY_PROPS,
             include_custom_props=include_custom_props,
         )
@@ -102,7 +108,6 @@ def hubspot(
     def contacts(
         api_key: str = api_key,
         include_history: bool = include_history,
-        props: Sequence[str] = DEFAULT_CONTACT_PROPS,
         include_custom_props: bool = include_custom_props,
     ) -> Iterator[TDataItems]:
         """Hubspot contacts resource"""
@@ -110,7 +115,7 @@ def hubspot(
             "contact",
             api_key,
             include_history,
-            props,
+            DEFAULT_CONTACT_PROPS,
             include_custom_props,
         )
@@ -118,7 +123,6 @@ def hubspot(
     def deals(
         api_key: str = api_key,
         include_history: bool = include_history,
-        props: Sequence[str] = DEFAULT_DEAL_PROPS,
         include_custom_props: bool = include_custom_props,
     ) -> Iterator[TDataItems]:
         """Hubspot deals resource"""
@@ -126,7 +130,7 @@ def hubspot(
             "deal",
             api_key,
             include_history,
-            props,
+            DEFAULT_DEAL_PROPS,
             include_custom_props,
         )
@@ -134,7 +138,6 @@ def hubspot(
     def tickets(
         api_key: str = api_key,
         include_history: bool = include_history,
-        props: Sequence[str] = DEFAULT_TICKET_PROPS,
         include_custom_props: bool = include_custom_props,
     ) -> Iterator[TDataItems]:
         """Hubspot tickets resource"""
@@ -142,7 +145,7 @@ def hubspot(
             "ticket",
             api_key,
             include_history,
-            props,
+            DEFAULT_TICKET_PROPS,
             include_custom_props,
         )
@@ -150,7 +153,6 @@ def hubspot(
     def products(
         api_key: str = api_key,
         include_history: bool = include_history,
-        props: Sequence[str] = DEFAULT_PRODUCT_PROPS,
         include_custom_props: bool = include_custom_props,
     ) -> Iterator[TDataItems]:
         """Hubspot products resource"""
@@ -158,15 +160,21 @@ def hubspot(
             "product",
             api_key,
             include_history,
-            props,
+            DEFAULT_PRODUCT_PROPS,
             include_custom_props,
         )
+    @dlt.resource(name="schemas", write_disposition="merge", primary_key="id")
+    def schemas(
+        api_key: str = api_key,
+    ) -> Iterator[TDataItems]:
+        """Hubspot schemas resource"""
+        yield from fetch_data(CRM_SCHEMAS_ENDPOINT, api_key, resource_name="schemas")
     @dlt.resource(name="quotes", write_disposition="replace")
     def quotes(
         api_key: str = api_key,
         include_history: bool = include_history,
-        props: Sequence[str] = DEFAULT_QUOTE_PROPS,
         include_custom_props: bool = include_custom_props,
     ) -> Iterator[TDataItems]:
         """Hubspot quotes resource"""
@@ -174,11 +182,55 @@ def hubspot(
             "quote",
             api_key,
             include_history,
-            props,
+            DEFAULT_QUOTE_PROPS,
             include_custom_props,
         )
-    return companies, contacts, deals, tickets, products, quotes
+    @dlt.resource(write_disposition="merge", primary_key="hs_object_id")
+    def custom(
+        api_key: str = api_key,
+        custom_object_name: str = custom_object,
+    ) -> Iterator[TDataItems]:
+        custom_objects = fetch_data_raw(CRM_SCHEMAS_ENDPOINT, api_key)
+        object_type_id = None
+        associations = None
+        if ":" in custom_object_name:
+            fields = custom_object_name.split(":")
+            if len(fields) == 2:
+                custom_object_name = fields[0]
+                associations = fields[1]
+        custom_object_lowercase = custom_object_name.lower()
+        for custom_object in custom_objects["results"]:
+            if custom_object["name"].lower() == custom_object_lowercase:
+                object_type_id = custom_object["objectTypeId"]
+                break
+            # sometimes people use the plural name of the object type by accident, we should try to match that if we can
+            if "labels" in custom_object:
+                if custom_object_lowercase == custom_object["labels"]["plural"].lower():
+                    object_type_id = custom_object["objectTypeId"]
+                    break
+        if object_type_id is None:
+            raise ValueError(f"There is no such custom object as {custom_object_name}")
+        custom_object_properties = f"crm/v3/properties/{object_type_id}"
+        props_pages = fetch_data(custom_object_properties, api_key)
+        props = []
+        for page in props_pages:
+            props.extend([prop["name"] for prop in page])
+        props = ",".join(sorted(list(set(props))))
+        custom_object_endpoint = f"crm/v3/objects/{object_type_id}/?properties={props}"
+        if associations:
+            custom_object_endpoint += f"&associations={associations}"
+        """Hubspot custom object details resource"""
+        yield from fetch_data(custom_object_endpoint, api_key, resource_name="custom")
+    return companies, contacts, deals, tickets, products, quotes, schemas, custom
 def crm_objects(
@@ -199,15 +251,6 @@ def crm_objects(
     props = ",".join(sorted(list(set(props))))
-    if len(props) > 2000:
-        raise ValueError(
-            "Your request to Hubspot is too long to process. "
-            "Maximum allowed query length is 2000 symbols, while "
-            f"your list of properties `{props[:200]}`... is {len(props)} "
-            "symbols long. Use the `props` argument of the resource to "
-            "set the list of properties to extract from the endpoint."
-        )
     params = {"properties": props, "limit": 100}
     yield from fetch_data(CRM_OBJECT_ENDPOINTS[object_type], api_key, params=params)

ingestr/src/hubspot/helpers.py CHANGED Viewed

@@ -90,7 +90,10 @@ def fetch_property_history(
 def fetch_data(
-    endpoint: str, api_key: str, params: Optional[Dict[str, Any]] = None
+    endpoint: str,
+    api_key: str,
+    params: Optional[Dict[str, Any]] = None,
+    resource_name: str = None,
 ) -> Iterator[List[Dict[str, Any]]]:
     """
     Fetch data from HUBSPOT endpoint using a specified API key and yield the properties of each result.
@@ -127,32 +130,50 @@ def fetch_data(
     # Parse the API response and yield the properties of each result
     # Parse the response JSON data
     _data = r.json()
     # Yield the properties of each result in the API response
     while _data is not None:
         if "results" in _data:
             _objects: List[Dict[str, Any]] = []
             for _result in _data["results"]:
-                _obj = _result.get("properties", _result)
-                if "id" not in _obj and "id" in _result:
-                    # Move id from properties to top level
-                    _obj["id"] = _result["id"]
-                if "associations" in _result:
-                    for association in _result["associations"]:
-                        __values = [
-                            {
-                                "value": _obj["hs_object_id"],
-                                f"{association}_id": __r["id"],
-                            }
-                            for __r in _result["associations"][association]["results"]
-                        ]
-                        # remove duplicates from list of dicts
-                        __values = [
-                            dict(t) for t in {tuple(d.items()) for d in __values}
-                        ]
-                        _obj[association] = __values
-                _objects.append(_obj)
+                if resource_name == "schemas":
+                    _objects.append(
+                        {
+                            "name": _result["labels"].get("singular", ""),
+                            "objectTypeId": _result.get("objectTypeId", ""),
+                            "id": _result.get("id", ""),
+                            "fullyQualifiedName": _result.get("fullyQualifiedName", ""),
+                            "properties": _result.get("properties", ""),
+                            "createdAt": _result.get("createdAt", ""),
+                            "updatedAt": _result.get("updatedAt", ""),
+                        }
+                    )
+                else:
+                    _obj = _result.get("properties", _result)
+                    if "id" not in _obj and "id" in _result:
+                        # Move id from properties to top level
+                        _obj["id"] = _result["id"]
+                    if "associations" in _result:
+                        for association in _result["associations"]:
+                            __values = [
+                                {
+                                    "value": _obj["hs_object_id"],
+                                    f"{association}_id": __r["id"],
+                                }
+                                for __r in _result["associations"][association][
+                                    "results"
+                                ]
+                            ]
+                            # remove duplicates from list of dicts
+                            __values = [
+                                dict(t) for t in {tuple(d.items()) for d in __values}
+                            ]
+                            _obj[association] = __values
+                    _objects.append(_obj)
             yield _objects
         # Follow pagination links if they exist
@@ -186,3 +207,12 @@ def _get_property_names(api_key: str, object_type: str) -> List[str]:
         properties.extend([prop["name"] for prop in page])
     return properties
+def fetch_data_raw(
+    endpoint: str, api_key: str, params: Optional[Dict[str, Any]] = None
+) -> Iterator[List[Dict[str, Any]]]:
+    url = get_url(endpoint)
+    headers = _get_headers(api_key)
+    r = requests.get(url, headers=headers, params=params)
+    return r.json()

ingestr/src/hubspot/settings.py CHANGED Viewed

@@ -5,15 +5,22 @@ from dlt.common import pendulum
 STARTDATE = pendulum.datetime(year=2000, month=1, day=1)
 CRM_CONTACTS_ENDPOINT = (
-    "/crm/v3/objects/contacts?associations=deals,products,tickets,quotes"
+    "/crm/v3/objects/contacts?associations=companies,deals,products,tickets,quotes"
 )
-CRM_COMPANIES_ENDPOINT = (
-    "/crm/v3/objects/companies?associations=contacts,deals,products,tickets,quotes"
+CRM_COMPANIES_ENDPOINT = "/crm/v3/objects/companies?associations=products"
+CRM_DEALS_ENDPOINT = (
+    "/crm/v3/objects/deals?associations=companies,contacts,products,tickets,quotes"
 )
-CRM_DEALS_ENDPOINT = "/crm/v3/objects/deals"
-CRM_PRODUCTS_ENDPOINT = "/crm/v3/objects/products"
-CRM_TICKETS_ENDPOINT = "/crm/v3/objects/tickets"
-CRM_QUOTES_ENDPOINT = "/crm/v3/objects/quotes"
+CRM_PRODUCTS_ENDPOINT = (
+    "/crm/v3/objects/products?associations=companies,contacts,deals,tickets,quotes"
+)
+CRM_TICKETS_ENDPOINT = (
+    "/crm/v3/objects/tickets?associations=companies,contacts,deals,products,quotes"
+)
+CRM_QUOTES_ENDPOINT = (
+    "/crm/v3/objects/quotes?associations=companies,contacts,deals,products,tickets"
+)
+CRM_SCHEMAS_ENDPOINT = "/crm/v3/schemas"
 CRM_OBJECT_ENDPOINTS = {
     "contact": CRM_CONTACTS_ENDPOINT,

ingestr/src/influxdb/__init__.py ADDED Viewed

@@ -0,0 +1,46 @@
+from typing import Iterable
+import dlt
+import pendulum
+from dlt.common.typing import TDataItem
+from dlt.sources import DltResource
+from .client import InfluxClient
+@dlt.source(max_table_nesting=0)
+def influxdb_source(
+    measurement: str,
+    host: str,
+    org: str,
+    bucket: str,
+    token: str = dlt.secrets.value,
+    secure: bool = True,
+    start_date: pendulum.DateTime = pendulum.datetime(2024, 1, 1),
+    end_date: pendulum.DateTime | None = None,
+) -> Iterable[DltResource]:
+    client = InfluxClient(
+        url=host, token=token, org=org, bucket=bucket, verify_ssl=secure
+    )
+    @dlt.resource(name=measurement)
+    def fetch_table(
+        timestamp=dlt.sources.incremental(
+            "time",
+            initial_value=start_date,
+            end_value=end_date,
+            range_start="closed",
+            range_end="closed",
+        ),
+    ) -> Iterable[TDataItem]:
+        if timestamp.last_value is None:
+            start = start_date.isoformat()
+        else:
+            start = timestamp.last_value.isoformat()
+        if timestamp.end_value is None:
+            end = pendulum.now().isoformat()
+        else:
+            end = timestamp.end_value.isoformat()
+        yield from client.fetch_measurement(measurement, start, end)
+    return fetch_table

ingestr/src/influxdb/client.py ADDED Viewed

@@ -0,0 +1,34 @@
+from typing import Any, Dict, Iterable
+from influxdb_client import InfluxDBClient  # type: ignore
+class InfluxClient:
+    def __init__(
+        self, url: str, token: str, org: str, bucket: str, verify_ssl: bool = True
+    ) -> None:
+        self.client = InfluxDBClient(
+            url=url, token=token, org=org, verify_ssl=verify_ssl
+        )
+        self.bucket = bucket
+    def fetch_measurement(
+        self, measurement: str, start: str, end: str | None = None
+    ) -> Iterable[Dict[str, Any]]:
+        query = f'from(bucket: "{self.bucket}") |> range(start: {start}'
+        if end:
+            query += f", stop: {end}"
+        query += f') |> filter(fn: (r) => r["_measurement"] == "{measurement}")'
+        query_api = self.client.query_api()
+        for record in query_api.query_stream(query):
+            cleaned_record = {}
+            exclude_keys = {"result", "table", "_start", "_stop"}
+            for key, value in record.values.items():
+                if key in exclude_keys:
+                    continue
+                if key.startswith("_"):
+                    cleaned_record[key[1:]] = value
+                else:
+                    cleaned_record[key] = value
+            yield cleaned_record

ingestr/src/intercom/__init__.py ADDED Viewed

@@ -0,0 +1,142 @@
+"""
+Intercom source implementation for data ingestion.
+This module provides DLT sources for retrieving data from Intercom API endpoints
+including contacts, companies, conversations, tickets, and more.
+"""
+from typing import Optional, Sequence
+import dlt
+from dlt.common.time import ensure_pendulum_datetime
+from dlt.common.typing import TAnyDateTime
+from dlt.sources import DltResource, DltSource
+from .helpers import (
+    IntercomAPIClient,
+    IntercomCredentialsAccessToken,
+    TIntercomCredentials,
+    convert_datetime_to_timestamp,
+    create_resource_from_config,
+    transform_company,
+    transform_contact,
+    transform_conversation,
+)
+from .helpers import (
+    IntercomCredentialsOAuth as IntercomCredentialsOAuth,
+)
+from .settings import (
+    DEFAULT_START_DATE,
+    RESOURCE_CONFIGS,
+)
+@dlt.source(name="intercom", max_table_nesting=0)
+def intercom_source(
+    credentials: TIntercomCredentials = dlt.secrets.value,
+    start_date: Optional[TAnyDateTime] = DEFAULT_START_DATE,
+    end_date: Optional[TAnyDateTime] = None,
+) -> Sequence[DltResource]:
+    """
+    A DLT source that retrieves data from Intercom API.
+    This source provides access to various Intercom resources including contacts,
+    companies, conversations, tickets, and more. It supports incremental loading
+    for resources that track updated timestamps.
+    Args:
+        credentials: Intercom API credentials (AccessToken or OAuth).
+            Defaults to dlt.secrets.value.
+        start_date: The start date for incremental loading.
+            Defaults to January 1, 2020.
+        end_date: Optional end date for incremental loading.
+            If not provided, loads all data from start_date to present.
+    Returns:
+        Sequence of DLT resources for different Intercom endpoints.
+    Example:
+        >>> source = intercom_source(
+        ...     credentials=IntercomCredentialsAccessToken(
+        ...         access_token="your_token",
+        ...         region="us"
+        ...     ),
+        ...     start_date=datetime(2024, 1, 1)
+        ... )
+    """
+    # Initialize API client
+    api_client = IntercomAPIClient(credentials)
+    # Convert dates to pendulum and then to unix timestamps for Intercom API
+    start_date_obj = ensure_pendulum_datetime(start_date) if start_date else None
+    end_date_obj = ensure_pendulum_datetime(end_date) if end_date else None
+    # Convert to unix timestamps for API compatibility
+    # Use default start date if none provided
+    if not start_date_obj:
+        from .settings import DEFAULT_START_DATE
+        start_date_obj = ensure_pendulum_datetime(DEFAULT_START_DATE)
+    start_timestamp = convert_datetime_to_timestamp(start_date_obj)
+    end_timestamp = (
+        convert_datetime_to_timestamp(end_date_obj) if end_date_obj else None
+    )
+    # Transform function mapping
+    transform_functions = {
+        "transform_contact": transform_contact,
+        "transform_company": transform_company,
+        "transform_conversation": transform_conversation,
+    }
+    # Generate all resources from configuration
+    resources = []
+    for resource_name, config in RESOURCE_CONFIGS.items():
+        resource_func = create_resource_from_config(
+            resource_name,
+            config,
+            api_client,
+            start_timestamp,
+            end_timestamp,
+            transform_functions,
+        )
+        # Call the resource function to get the actual resource
+        resources.append(resource_func())
+    return resources
+def intercom(
+    api_key: str,
+    region: str = "us",
+    start_date: Optional[TAnyDateTime] = DEFAULT_START_DATE,
+    end_date: Optional[TAnyDateTime] = None,
+) -> DltSource:
+    """
+    Convenience function to create Intercom source with access token.
+    Args:
+        api_key: Intercom API access token.
+        region: Data region (us, eu, or au). Defaults to "us".
+        start_date: Start date for incremental loading.
+        end_date: Optional end date for incremental loading.
+    Returns:
+        Sequence of DLT resources.
+    Example:
+        >>> source = intercom(
+        ...     api_key="your_access_token",
+        ...     region="us",
+        ...     start_date=datetime(2024, 1, 1)
+        ... )
+    """
+    credentials = IntercomCredentialsAccessToken(access_token=api_key, region=region)
+    return intercom_source(
+        credentials=credentials,
+        start_date=start_date,
+        end_date=end_date,
+    )

ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl

ingestr 0.13.2py3-none-any.whl → 0.14.104py3-none-any.whl