PyPI - ingestr - Versions diffs - 0.13.35__py3-none-any.whl → 0.13.37__py3-none-any.whl - Mend

ingestr 0.13.35py3-none-any.whl → 0.13.37py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ingestr might be problematic. Click here for more details.

Files changed (17) hide show

ingestr/src/buildinfo.py +1 -1
ingestr/src/factory.py +4 -0
ingestr/src/frankfurter/__init__.py +44 -36
ingestr/src/frankfurter/helpers.py +2 -2
ingestr/src/freshdesk/__init__.py +72 -0
ingestr/src/freshdesk/freshdesk_client.py +102 -0
ingestr/src/freshdesk/settings.py +9 -0
ingestr/src/google_analytics/__init__.py +21 -3
ingestr/src/google_analytics/helpers.py +121 -6
ingestr/src/phantombuster/__init__.py +38 -0
ingestr/src/phantombuster/client.py +65 -0
ingestr/src/sources.py +120 -54
{ingestr-0.13.35.dist-info → ingestr-0.13.37.dist-info}/METADATA +1 -1
{ingestr-0.13.35.dist-info → ingestr-0.13.37.dist-info}/RECORD +17 -12
{ingestr-0.13.35.dist-info → ingestr-0.13.37.dist-info}/WHEEL +0 -0
{ingestr-0.13.35.dist-info → ingestr-0.13.37.dist-info}/entry_points.txt +0 -0
{ingestr-0.13.35.dist-info → ingestr-0.13.37.dist-info}/licenses/LICENSE.md +0 -0

ingestr/src/buildinfo.py CHANGED Viewed

	@@ -1 +1 @@
1	- version = "v0.13.35"
1	+ version = "v0.13.37"

ingestr/src/factory.py CHANGED Viewed

@@ -53,6 +53,8 @@ from ingestr.src.sources import (
     StripeAnalyticsSource,
     TikTokSource,
     ZendeskSource,
+    FreshdeskSource,
+    PhantombusterSource,
 )
 SQL_SOURCE_SCHEMES = [
@@ -148,6 +150,8 @@ class SourceDestinationFactory:
         "kinesis": KinesisSource,
         "pipedrive": PipedriveSource,
         "frankfurter": FrankfurterSource,
+        "freshdesk": FreshdeskSource,
+        "phantombuster": PhantombusterSource,
     }
     destinations: Dict[str, Type[DestinationProtocol]] = {
         "bigquery": BigQueryDestination,

ingestr/src/frankfurter/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, Iterator, Optional
+from typing import Any, Iterator
 import dlt
 from dlt.common.pendulum import pendulum
@@ -13,25 +13,28 @@ from ingestr.src.frankfurter.helpers import get_path_with_retry
     max_table_nesting=0,
 )
 def frankfurter_source(
-    table: str,
-    start_date: Optional[TAnyDateTime] = None,
-    end_date: Optional[TAnyDateTime] = None,
+    start_date: TAnyDateTime,
+    end_date: TAnyDateTime,
 ) -> Any:
     """
     A dlt source for the frankfurter.dev API. It groups several resources (in this case frankfurter.dev API endpoints) containing
     various types of data: currencies, latest rates, historical rates.
-    Returns the appropriate resource based on the provided parameters.
     """
-    # Determine which resource to return based on the `table` parameter
-    if table == "currencies":
-        return currencies()
+    date_time = dlt.sources.incremental(
+        "date",
+        initial_value=start_date,
+        end_value=end_date,
+        range_start="closed",
+        range_end="closed",
+    )
-    elif table == "latest":
-        return latest()
+    return (
+        currencies(),
+        latest(),
+        exchange_rates(start_date=date_time, end_date=end_date),
-    elif table == "exchange_rates":
-        return exchange_rates(start_date=start_date, end_date=end_date)
+    )
 @dlt.resource(
@@ -53,13 +56,13 @@ def currencies() -> Iterator[dict]:
 @dlt.resource(
-    write_disposition="replace",
+    write_disposition="merge",
     columns={
         "date": {"data_type": "text"},
-        "currency_name": {"data_type": "text"},
+        "currency_code": {"data_type": "text"},
         "rate": {"data_type": "double"},
     },
-    primary_key=["date", "currency_name"],  # Composite primary key
+    primary_key=["date", "currency_code"],  # Composite primary key
 )
 def latest() -> Iterator[dict]:
     """
@@ -69,50 +72,54 @@ def latest() -> Iterator[dict]:
     url = "latest?"
     # Fetch data
-    latest_data = get_path_with_retry(url)
+    data = get_path_with_retry(url)
     # Extract rates and base currency
-    rates = latest_data["rates"]
+    rates = data["rates"]
-    # Prepare the date
-    date = pendulum.now().to_date_string()
+    date = pendulum.parse(data["date"])
     # Add the base currency (EUR) with a rate of 1.0
     yield {
         "date": date,
-        "currency_name": "EUR",
+        "currency_code": "EUR",
         "rate": 1.0,
     }
     # Add all currencies and their rates
-    for currency_name, rate in rates.items():
+    for currency_code, rate in rates.items():
         yield {
             "date": date,
-            "currency_name": currency_name,
+            "currency_code": currency_code,
             "rate": rate,
         }
 @dlt.resource(
-    write_disposition="replace",
+    write_disposition="merge",
     columns={
         "date": {"data_type": "text"},
-        "currency_name": {"data_type": "text"},
+        "currency_code": {"data_type": "text"},
         "rate": {"data_type": "double"},
     },
-    primary_key=["date", "currency_name"],  # Composite primary key
+    primary_key=("date", "currency_code"),  # Composite primary key
 )
 def exchange_rates(
-    start_date: TAnyDateTime,
     end_date: TAnyDateTime,
+    start_date: dlt.sources.incremental[TAnyDateTime] = dlt.sources.incremental("date"),
 ) -> Iterator[dict]:
     """
     Fetches exchange rates for a specified date range.
-    If only start_date is provided, fetches data for that date.
+    If only start_date is provided, fetches data until now.
     If both start_date and end_date are provided, fetches data for each day in the range.
     """
-    start_date_str = ensure_pendulum_datetime(start_date).format("YYYY-MM-DD")
-    end_date_str = ensure_pendulum_datetime(end_date).format("YYYY-MM-DD")
+    # Ensure start_date.last_value is a pendulum.DateTime object
+    start_date_obj = ensure_pendulum_datetime(start_date.last_value)  # type: ignore
+    start_date_str = start_date_obj.format("YYYY-MM-DD")
+    # Ensure end_date is a pendulum.DateTime object
+    end_date_obj = ensure_pendulum_datetime(end_date)
+    end_date_str = end_date_obj.format("YYYY-MM-DD")
     # Compose the URL
     url = f"{start_date_str}..{end_date_str}?"
@@ -121,22 +128,23 @@ def exchange_rates(
     data = get_path_with_retry(url)
     # Extract base currency and rates from the API response
-    base_currency = data["base"]
     rates = data["rates"]
     # Iterate over the rates dictionary (one entry per date)
     for date, daily_rates in rates.items():
+        formatted_date = pendulum.parse(date)
         # Add the base currency with a rate of 1.0
         yield {
-            "date": date,
-            "currency_name": base_currency,
+            "date": formatted_date,
+            "currency_code": "EUR",
             "rate": 1.0,
         }
         # Add all other currencies and their rates
-        for currency_name, rate in daily_rates.items():
+        for currency_code, rate in daily_rates.items():
             yield {
-                "date": date,
-                "currency_name": currency_name,
+                "date": formatted_date,
+                "currency_code": currency_code,
                 "rate": rate,
             }

ingestr/src/frankfurter/helpers.py CHANGED Viewed

@@ -8,7 +8,7 @@ FRANKFURTER_API_URL = "https://api.frankfurter.dev/v1/"
 def get_url_with_retry(url: str) -> StrAny:
-    r = requests.get(url)
+    r = requests.get(url, timeout=5)
     return r.json()  # type: ignore
@@ -19,7 +19,7 @@ def get_path_with_retry(path: str) -> StrAny:
 def validate_dates(start_date: datetime, end_date: datetime) -> None:
     current_date = pendulum.now()
-    # Check if start_date is in the future
+    # Check if start_date is in the futurep
     if start_date > current_date:
         raise ValueError("Interval-start cannot be in the future.")

ingestr/src/freshdesk/__init__.py ADDED Viewed

@@ -0,0 +1,72 @@
+"""This source uses Freshdesk API and dlt to load data such as Agents, Companies, Tickets
+etc. to the database"""
+from typing import Any, Dict, Generator, Iterable, List, Optional
+import dlt
+from dlt.sources import DltResource
+from .freshdesk_client import FreshdeskClient
+from .settings import DEFAULT_ENDPOINTS
+@dlt.source()
+def freshdesk_source(
+    endpoints: Optional[List[str]] = None,
+    per_page: int = 100,
+    domain: str = dlt.secrets.value,
+    api_secret_key: str = dlt.secrets.value,
+) -> Iterable[DltResource]:
+    """
+    Retrieves data from specified Freshdesk API endpoints.
+    This source supports pagination and incremental data loading. It fetches data from a list of
+    specified endpoints, or defaults to predefined endpoints in 'settings.py'.
+    Args:
+        endpoints: A list of Freshdesk API endpoints to fetch. Deafults to 'settings.py'.
+        per_page: The number of items to fetch per page, with a maximum of 100.
+        domain: The Freshdesk domain from which to fetch the data. Defaults to 'config.toml'.
+        api_secret_key: Freshdesk API key. Defaults to 'secrets.toml'.
+    Yields:
+        Iterable[DltResource]: Resources with data updated after the last 'updated_at'
+        timestamp for each endpoint.
+    """
+    # Instantiate FreshdeskClient with the provided domain and API key
+    freshdesk = FreshdeskClient(api_key=api_secret_key, domain=domain)
+    def incremental_resource(
+        endpoint: str,
+        updated_at: Optional[Any] = dlt.sources.incremental(
+            "updated_at", initial_value="2022-01-01T00:00:00Z"
+        ),
+    ) -> Generator[Dict[Any, Any], Any, None]:
+        """
+        Fetches and yields paginated data from a specified API endpoint.
+        Each page of data is fetched based on the `updated_at` timestamp
+        to ensure incremental loading.
+        """
+        # Retrieve the last updated timestamp to fetch only new or updated records.
+        if updated_at is not None:
+            updated_at = updated_at.last_value
+        # Use the FreshdeskClient instance to fetch paginated responses
+        yield from freshdesk.paginated_response(
+            endpoint=endpoint,
+            per_page=per_page,
+            updated_at=updated_at,
+        )
+    # Set default endpoints if not provided
+    endpoints = endpoints or DEFAULT_ENDPOINTS
+    # For each endpoint, create and yield a DLT resource
+    for endpoint in endpoints:
+        yield dlt.resource(
+            incremental_resource,
+            name=endpoint,
+            write_disposition="merge",
+            primary_key="id",
+        )(endpoint=endpoint)

ingestr/src/freshdesk/freshdesk_client.py ADDED Viewed

@@ -0,0 +1,102 @@
+"""Freshdesk Client for making authenticated requests"""
+import logging
+import time
+from typing import Any, Dict, Iterable, Optional
+from dlt.common.typing import TDataItem
+from dlt.sources.helpers import requests
+class FreshdeskClient:
+    """
+    Client for making authenticated requests to the Freshdesk API. It incorporates API requests with
+    rate limit and pagination.
+    Attributes:
+        api_key (str): The API key used for authenticating requests to the Freshdesk API.
+        domain (str): The Freshdesk domain specific to the user, used in constructing the base URL.
+        base_url (str): The base URL constructed from the domain, targeting the Freshdesk API v2.
+    """
+    def __init__(self, api_key: str, domain: str):
+        # Initialize the FreshdeskClient instance with API key and domain.
+        # The API key is used for authentication with the Freshdesk API.
+        # The domain specifies the unique Freshdesk domain of the user.
+        # Store the API key provided during initialization.
+        self.api_key = api_key
+        # Store the Freshdesk domain provided during initialization.
+        self.domain = domain
+        # Construct the base URL for the API requests.
+        # This URL is formed by appending the domain to the standard Freshdesk API base URL format.
+        # All API requests will use this base URL as their starting point.
+        self.base_url = f"https://{domain}.freshdesk.com/api/v2"
+    def _request_with_rate_limit(self, url: str, **kwargs: Any) -> requests.Response:
+        """
+        Handles rate limits in HTTP requests and ensures
+        that the client doesn't exceed the limit set by the server.
+        """
+        while True:
+            try:
+                response = requests.get(url, **kwargs, auth=(self.api_key, "X"))
+                response.raise_for_status()
+                return response
+            except requests.HTTPError as e:
+                if e.response.status_code == 429:
+                    # Get the 'Retry-After' header to know how long to wait
+                    # Fallback to 60 seconds if header is missing
+                    seconds_to_wait = int(e.response.headers.get("Retry-After", 60))
+                    # Log a warning message
+                    logging.warning(
+                        "Rate limited. Waiting to retry after: %s secs", seconds_to_wait
+                    )
+                    # Wait for the specified number of seconds before retrying
+                    time.sleep(seconds_to_wait)
+                else:
+                    # If the error is not a rate limit (429), raise the exception to be
+                    # handled elsewhere or stop execution
+                    raise
+    def paginated_response(
+        self,
+        endpoint: str,
+        per_page: int,
+        updated_at: Optional[str] = None,
+    ) -> Iterable[TDataItem]:
+        """
+        Fetches a paginated response from a specified endpoint.
+        This method will continuously fetch data from the given endpoint,
+        page by page, until no more data is available or until it reaches data
+        updated at the specified timestamp.
+        """
+        page = 1
+        while True:
+            # Construct the URL for the specific endpoint
+            url = f"{self.base_url}/{endpoint}"
+            params: Dict[str, Any] = {"per_page": per_page, "page": page}
+            # Implement date range splitting logic here, if applicable
+            if endpoint in ["tickets", "contacts"]:
+                param_key = (
+                    "updated_since" if endpoint == "tickets" else "_updated_since"
+                )
+                if updated_at:
+                    params[param_key] = updated_at
+            # Handle requests with rate-limiting
+            # A maximum of 300 pages (30000 tickets) will be returned.
+            response = self._request_with_rate_limit(url, params=params)
+            data = response.json()
+            if not data:
+                break  # Stop if no data or max page limit reached
+            yield data
+            page += 1

ingestr/src/freshdesk/settings.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""
+This module defines default settings for the Freshdesk integration.
+It specifies a list of default endpoints to be used when interacting with the Freshdesk API,
+covering common entities such as agents, companies, contacts, groups, roles, and tickets.
+"""
+# Define default endpoints for the Freshdesk API integration.
+DEFAULT_ENDPOINTS = ["agents", "companies", "contacts", "groups", "roles", "tickets"]

ingestr/src/google_analytics/__init__.py CHANGED Viewed

@@ -13,9 +13,10 @@ from google.analytics.data_v1beta import BetaAnalyticsDataClient
 from google.analytics.data_v1beta.types import (
     Dimension,
     Metric,
+    MinuteRange,
 )
-from .helpers import get_report
+from .helpers import get_realtime_report, get_report
 @dlt.source(max_table_nesting=0)
@@ -29,6 +30,7 @@ def google_analytics(
     start_date: Optional[pendulum.DateTime] = pendulum.datetime(2024, 1, 1),
     end_date: Optional[pendulum.DateTime] = None,
     rows_per_page: int = 10000,
+    minute_range_objects: List[MinuteRange] | None = None,
 ) -> List[DltResource]:
     try:
         property_id = int(property_id)
@@ -58,7 +60,7 @@ def google_analytics(
     dimensions = query["dimensions"]
     @dlt.resource(
-        name="basic_report",
+        name="custom",
         merge_key=datetime_dimension,
         write_disposition="merge",
     )
@@ -87,6 +89,22 @@ def google_analytics(
             end_date=end_date,
         )
+    # real time report
+    @dlt.resource(
+        name="realtime",
+        merge_key="ingested_at",
+        write_disposition="merge",
+    )
+    def real_time_report() -> Iterator[TDataItem]:
+        yield from get_realtime_report(
+            client=client,
+            property_id=property_id,
+            dimension_list=[Dimension(name=dimension) for dimension in dimensions],
+            metric_list=[Metric(name=metric) for metric in query["metrics"]],
+            per_page=rows_per_page,
+            minute_range_objects=minute_range_objects,
+        )
     # res = dlt.resource(
     #     basic_report, name="basic_report", merge_key=datetime_dimension, write_disposition="merge"
     # )(
@@ -103,4 +121,4 @@ def google_analytics(
     #     ),
     # )
-    return [basic_report]
+    return [basic_report, real_time_report]

ingestr/src/google_analytics/helpers.py CHANGED Viewed

@@ -2,8 +2,10 @@
 This module contains helpers that process data and make it ready for loading into the database
 """
+import base64
 import json
 from typing import Any, Iterator, List, Union
+from urllib.parse import parse_qs, urlparse
 import proto
 from dlt.common.exceptions import MissingDependencyException
@@ -22,6 +24,8 @@ try:
         Metric,
         MetricMetadata,  # noqa: F401
         MetricType,
+        MinuteRange,
+        RunRealtimeReportRequest,
         RunReportRequest,
         RunReportResponse,
     )
@@ -52,6 +56,53 @@ def to_dict(item: Any) -> Iterator[TDataItem]:
     yield item
+def get_realtime_report(
+    client: Resource,
+    property_id: int,
+    dimension_list: List[Dimension],
+    metric_list: List[Metric],
+    per_page: int,
+    minute_range_objects: List[MinuteRange] | None = None,
+) -> Iterator[TDataItem]:
+    """
+    Gets all the possible pages of reports with the given query parameters.
+    Processes every page and yields a dictionary for every row of the report.
+    Args:
+        client: The Google Analytics client used to make requests.
+        property_id: A reference to the Google Analytics project.
+            More info: https://developers.google.com/analytics/devguides/reporting/data/v1/property-id
+        dimension_list: A list of all the dimensions requested in the query.
+        metric_list: A list of all the metrics requested in the query.
+        limit: Describes how many rows there should be per page.
+    Yields:
+        Generator of all rows of data in the report.
+    """
+    offset = 0
+    ingest_at = pendulum.now().to_date_string()
+    while True:
+        request = RunRealtimeReportRequest(
+            property=f"properties/{property_id}",
+            dimensions=dimension_list,
+            metrics=metric_list,
+            limit=per_page,
+            minute_ranges=minute_range_objects if minute_range_objects else None,
+        )
+        response = client.run_realtime_report(request)
+        # process request
+        processed_response_generator = process_report(
+            response=response, ingest_at=ingest_at
+        )
+        # import pdb; pdb.set_trace()
+        yield from processed_response_generator
+        offset += per_page
+        if len(response.rows) < per_page or offset > 1000000:
+            break
 def get_report(
     client: Resource,
     property_id: int,
@@ -79,10 +130,6 @@ def get_report(
         Generator of all rows of data in the report.
     """
-    print(
-        "fetching for daterange", start_date.to_date_string(), end_date.to_date_string()
-    )
     offset = 0
     while True:
         request = RunReportRequest(
@@ -98,9 +145,11 @@ def get_report(
                 )
             ],
         )
-        # process request
         response = client.run_report(request)
+        # process request
         processed_response_generator = process_report(response=response)
         # import pdb; pdb.set_trace()
         yield from processed_response_generator
         offset += per_page
@@ -108,7 +157,9 @@ def get_report(
             break
-def process_report(response: RunReportResponse) -> Iterator[TDataItems]:
+def process_report(
+    response: RunReportResponse, ingest_at: str | None = None
+) -> Iterator[TDataItems]:
     metrics_headers = [header.name for header in response.metric_headers]
     dimensions_headers = [header.name for header in response.dimension_headers]
@@ -131,6 +182,8 @@ def process_report(response: RunReportResponse) -> Iterator[TDataItems]:
                 metric_type=metric_type, value=row.metric_values[i].value
             )
             response_dict[metrics_headers[i]] = metric_value
+        if ingest_at is not None:
+            response_dict["ingested_at"] = ingest_at
         unique_key = "-".join(list(response_dict.keys()))
         if unique_key not in distinct_key_combinations:
@@ -170,3 +223,65 @@ def _resolve_dimension_value(dimension_name: str, dimension_value: str) -> Any:
         return pendulum.from_format(dimension_value, "YYYYMMDDHHmm", tz="UTC")
     else:
         return dimension_value
+def convert_minutes_ranges_to_minute_range_objects(minutes_ranges: str) -> List[MinuteRange]:
+    minutes_ranges = minutes_ranges.strip()
+    minutes = minutes_ranges.replace(" ", "").split(",")
+    if minutes == "":
+        raise ValueError(
+            "Invalid input. Minutes range should be startminute-endminute format. For example: 1-2,5-6"
+        )
+    minute_range_objects = []
+    for min_range in minutes:
+        if "-" not in min_range:
+            raise ValueError(
+                "Invalid input. Minutes range should be startminute-endminute format. For example: 1-2,5-6"
+            )
+        parts = min_range.split("-")
+        if not parts[0].isdigit() or not parts[1].isdigit():
+            raise ValueError(
+                f"Invalid input '{min_range}'. Both start and end minutes must be digits. For example: 1-2,5-6"
+            )
+        end_minutes_ago = int(parts[0])
+        start_minutes_ago = int(parts[1])
+        minute_range_objects.append(MinuteRange(
+            name=f"{end_minutes_ago}-{start_minutes_ago} minutes ago",
+            start_minutes_ago= start_minutes_ago,
+            end_minutes_ago=end_minutes_ago
+        ))
+    return minute_range_objects
+def parse_google_analytics_uri(uri: str):
+    parse_uri = urlparse(uri)
+    source_fields = parse_qs(parse_uri.query)
+    cred_path = source_fields.get("credentials_path")
+    cred_base64 = source_fields.get("credentials_base64")
+    if not cred_path and not cred_base64:
+        raise ValueError(
+            "credentials_path or credentials_base64 is required to connect Google Analytics"
+        )
+    credentials = {}
+    if cred_path:
+        with open(cred_path[0], "r") as f:
+            credentials = json.load(f)
+    elif cred_base64:
+        credentials = json.loads(base64.b64decode(cred_base64[0]).decode("utf-8"))
+    property_id = source_fields.get("property_id")
+    if not property_id:
+        raise ValueError("property_id is required to connect to Google Analytics")
+    if (not cred_path and not cred_base64) or (not property_id):
+        raise ValueError(
+            "credentials_path or credentials_base64 and property_id are required to connect Google Analytics"
+        )
+    return {"credentials": credentials, "property_id": property_id[0]}

ingestr/src/phantombuster/__init__.py ADDED Viewed

@@ -0,0 +1,38 @@
+from typing import Iterable, Optional
+import dlt
+import pendulum
+import requests
+from dlt.common.typing import TDataItem
+from dlt.sources import DltResource
+from dlt.sources.helpers.requests import Client
+from ingestr.src.phantombuster.client import PhantombusterClient
+def retry_on_limit(
+    response: Optional[requests.Response], exception: Optional[BaseException]
+) -> bool:
+    if response is not None and response.status_code == 429:
+        return True
+    return False
+def create_client() -> requests.Session:
+    return Client(
+        raise_for_status=False,
+        retry_condition=retry_on_limit,
+        request_max_attempts=12,
+        request_backoff_factor=2,
+    ).session
+@dlt.source(max_table_nesting=0)
+def phantombuster_source(api_key: str, agent_id: str, start_date: pendulum.DateTime, end_date: pendulum.DateTime) -> Iterable[DltResource]:
+    client = PhantombusterClient(api_key)
+    @dlt.resource()
+    def completed_phantoms() -> Iterable[TDataItem]:
+        yield client.fetch_containers_result(create_client(), agent_id, start_date, end_date)
+    return completed_phantoms

ingestr/src/phantombuster/client.py ADDED Viewed

@@ -0,0 +1,65 @@
+from typing import Union
+import pendulum
+import requests
+class PhantombusterClient:
+    def __init__(self, api_key: str):
+        self.api_key = api_key
+    def _get_headers(self):
+        return {
+            "X-Phantombuster-Key-1": self.api_key,
+            "accept": "application/json",
+        }
+    def fetch_containers_result(self, session: requests.Session, agent_id: str, start_date: pendulum.DateTime, end_date: pendulum.DateTime):
+        url = "https://api.phantombuster.com/api/v2/containers/fetch-all/"
+        before_ended_at = None
+        limit = 100
+        started_at = start_date.int_timestamp * 1000 + int(start_date.microsecond / 1000)
+        ended_at = end_date.int_timestamp * 1000 + int(end_date.microsecond / 1000)
+        while True:
+            params: dict[str, Union[str, int, float, bytes, None]] = {
+                "agentId": agent_id,
+                "limit": limit,
+                "mode": "finalized",
+            }
+            if before_ended_at:
+                params["beforeEndedAt"] = before_ended_at
+            response = session.get(url=url, headers=self._get_headers(), params=params)
+            data = response.json()
+            containers = data.get("containers", [])
+            for container in containers:
+                container_ended_at = container.get("endedAt")
+                if before_ended_at is None or before_ended_at > container["endedAt"]:
+                        before_ended_at = container["endedAt"]
+                if not (started_at <= container_ended_at <= ended_at):
+                    continue
+                try:
+                    result = self.fetch_result_object(session, container["id"])
+                    partition_dt = pendulum.from_timestamp(container_ended_at / 1000, tz="UTC").to_date_string()
+                    row = {"container": container, "result": result, "partition_dt": partition_dt}
+                    yield row
+                except requests.RequestException as e:
+                    print(f"Error fetching result for container {container['id']}: {e}")
+            if data["maxLimitReached"] is False:
+                break
+    def fetch_result_object(self, session: requests.Session, container_id: str):
+        result_url = (
+            "https://api.phantombuster.com/api/v2/containers/fetch-result-object"
+        )
+        params = {"id": container_id}
+        response = session.get(result_url, headers=self._get_headers(), params=params)
+        response.raise_for_status()
+        return response.json()

ingestr/src/sources.py CHANGED Viewed

@@ -1469,48 +1469,49 @@ class GoogleAnalyticsSource:
         return True
     def dlt_source(self, uri: str, table: str, **kwargs):
-        parse_uri = urlparse(uri)
-        source_fields = parse_qs(parse_uri.query)
-        cred_path = source_fields.get("credentials_path")
-        cred_base64 = source_fields.get("credentials_base64")
+        import ingestr.src.google_analytics.helpers as helpers
-        if not cred_path and not cred_base64:
+        result = helpers.parse_google_analytics_uri(uri)
+        credentials = result["credentials"]
+        property_id = result["property_id"]
+        fields = table.split(":")
+        if len(fields) != 3 and len(fields) != 4:
             raise ValueError(
-                "credentials_path or credentials_base64 is required to connect Google Analytics"
+                "Invalid table format. Expected format: <report_type>:<dimensions>:<metrics> or <report_type>:<dimensions>:<metrics>:<minute_ranges>"
             )
-        credentials = {}
-        if cred_path:
-            with open(cred_path[0], "r") as f:
-                credentials = json.load(f)
-        elif cred_base64:
-            credentials = json.loads(base64.b64decode(cred_base64[0]).decode("utf-8"))
-        property_id = source_fields.get("property_id")
-        if not property_id:
-            raise ValueError("property_id is required to connect to Google Analytics")
-        fields = table.split(":")
-        if len(fields) != 3:
+        report_type = fields[0]
+        if report_type not in ["custom", "realtime"]:
             raise ValueError(
-                "Invalid table format. Expected format: custom:<dimensions>:<metrics>"
+                "Invalid report type. Expected format: <report_type>:<dimensions>:<metrics>. Available report types: custom, realtime"
             )
         dimensions = fields[1].replace(" ", "").split(",")
+        metrics = fields[2].replace(" ", "").split(",")
+        minute_range_objects = []
+        if len(fields) == 4:
+            minute_range_objects = helpers.convert_minutes_ranges_to_minute_range_objects(fields[3])
         datetime = ""
-        for dimension_datetime in ["date", "dateHourMinute", "dateHour"]:
-            if dimension_datetime in dimensions:
-                datetime = dimension_datetime
-                break
-        else:
-            raise ValueError(
-                "You must provide at least one dimension: [dateHour, dateHourMinute, date]"
-            )
+        resource_name = fields[0].lower()
+        if resource_name == "custom":
+            for dimension_datetime in ["date", "dateHourMinute", "dateHour"]:
+                if dimension_datetime in dimensions:
+                    datetime = dimension_datetime
+                    break
+            else:
+                raise ValueError(
+                    "You must provide at least one dimension: [dateHour, dateHourMinute, date]"
+                )
-        metrics = fields[2].replace(" ", "").split(",")
         queries = [
-            {"resource_name": "custom", "dimensions": dimensions, "metrics": metrics}
+            {
+                "resource_name": resource_name,
+                "dimensions": dimensions,
+                "metrics": metrics,
+            }
         ]
         start_date = pendulum.now().subtract(days=30).start_of("day")
@@ -1524,13 +1525,14 @@ class GoogleAnalyticsSource:
         from ingestr.src.google_analytics import google_analytics
         return google_analytics(
-            property_id=property_id[0],
+            property_id=property_id,
             start_date=start_date,
             end_date=end_date,
             datetime_dimension=datetime,
             queries=queries,
             credentials=credentials,
-        ).with_resources("basic_report")
+            minute_range_objects=minute_range_objects if minute_range_objects else None,
+        ).with_resources(resource_name)
 class GitHubSource:
@@ -2173,36 +2175,100 @@ class FrankfurterSource:
         return True
     def dlt_source(self, uri: str, table: str, **kwargs):
-        # start and end dates only assigned and validated for exchange_rates table
-        # Note: if an end date but no start date is provided, start date and end date will be set to current date
-        from ingestr.src.frankfurter import frankfurter_source
-        from ingestr.src.frankfurter.helpers import validate_dates
+        if kwargs.get("incremental_key"):
+            raise ValueError(
+                "Frankfurter takes care of incrementality on its own, you should not provide incremental_key"
+            )
-        if table == "exchange_rates":
-            if kwargs.get("interval_start"):
-                start_date = ensure_pendulum_datetime(str(kwargs.get("interval_start")))
-                if kwargs.get("interval_end"):
-                    end_date = ensure_pendulum_datetime(str(kwargs.get("interval_end")))
-                else:
-                    end_date = start_date
+        if kwargs.get("interval_start"):
+            start_date = ensure_pendulum_datetime(str(kwargs.get("interval_start")))
+            if kwargs.get("interval_end"):
+                end_date = ensure_pendulum_datetime(str(kwargs.get("interval_end")))
             else:
-                start_date = pendulum.now()
                 end_date = pendulum.now()
-            validate_dates(start_date=start_date, end_date=end_date)
-        # For currencies and latest tables, set start and end dates to current date
         else:
             start_date = pendulum.now()
             end_date = pendulum.now()
-        # Validate table
-        if table not in ["currencies", "latest", "exchange_rates"]:
-            raise ValueError(
-                f"Table '{table}' is not supported for Frankfurter source."
-            )
+        from ingestr.src.frankfurter import frankfurter_source
+        from ingestr.src.frankfurter.helpers import validate_dates
-        return frankfurter_source(
-            table=table,
+        validate_dates(start_date=start_date, end_date=end_date)
+        src = frankfurter_source(
             start_date=start_date,
             end_date=end_date,
         )
+        if table not in src.resources:
+            raise UnsupportedResourceError(table, "Frankfurter")
+        return src.with_resources(table)
+class FreshdeskSource:
+     # freshdesk://domain?api_key=<api_key>
+    def handles_incrementality(self) -> bool:
+        return True
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        parsed_uri = urlparse(uri)
+        domain = parsed_uri.netloc
+        query = parsed_uri.query
+        params = parse_qs(query)
+        if not domain:
+            raise MissingValueError("domain", "Freshdesk")
+        if '.' in domain:
+            domain = domain.split('.')[0]
+        api_key = params.get("api_key")
+        if api_key is None:
+            raise MissingValueError("api_key", "Freshdesk")
+        if table not in ["agents", "companies", "contacts", "groups", "roles", "tickets"]:
+            raise UnsupportedResourceError(table, "Freshdesk")
+        from ingestr.src.freshdesk import freshdesk_source
+        return freshdesk_source(api_secret_key=api_key[0], domain=domain).with_resources(table)
+class PhantombusterSource:
+    def handles_incrementality(self) -> bool:
+        return True
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        #phantombuster://?api_key=<api_key>
+        #source table = phantom_results:agent_id
+        parsed_uri = urlparse(uri)
+        params = parse_qs(parsed_uri.query)
+        api_key = params.get("api_key")
+        if api_key is None:
+            raise MissingValueError("api_key", "Phantombuster")
+        table_fields = table.replace(" ", "").split(":")
+        table_name = table_fields[0]
+        agent_id = table_fields[1] if len(table_fields) > 1 else None
+        if table_name not in ["completed_phantoms"]:
+            raise UnsupportedResourceError(table_name, "Phantombuster")
+        if not agent_id:
+            raise MissingValueError("agent_id", "Phantombuster")
+        start_date = kwargs.get("interval_start")
+        if start_date is not None:
+            start_date = ensure_pendulum_datetime(start_date)
+        else:
+            start_date = pendulum.parse("2018-01-01")
+        end_date = kwargs.get("interval_end")
+        #doesnot support incremental loading
+        if end_date is not None:
+            end_date = ensure_pendulum_datetime(end_date)
+        else:
+            end_date = pendulum.now()
+        from ingestr.src.phantombuster import phantombuster_source
+        return phantombuster_source(api_key=api_key[0], agent_id=agent_id, start_date=start_date, end_date=end_date).with_resources(table_name)

{ingestr-0.13.35.dist-info → ingestr-0.13.37.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ingestr
-Version: 0.13.35
+Version: 0.13.37
 Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
 Project-URL: Homepage, https://github.com/bruin-data/ingestr
 Project-URL: Issues, https://github.com/bruin-data/ingestr/issues

{ingestr-0.13.35.dist-info → ingestr-0.13.37.dist-info}/RECORD RENAMED Viewed

@@ -2,15 +2,15 @@ ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
 ingestr/main.py,sha256=mRlGSqi2sHcZ2AKlwn5MqoMvFxXlSjcZxmPJr76rmRk,25187
 ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
 ingestr/src/blob.py,sha256=onMe5ZHxPXTdcB_s2oGNdMo-XQJ3ajwOsWE9eSTGFmc,1495
-ingestr/src/buildinfo.py,sha256=-fdK0r3dEieckm9FbunVnN7VEWpVvtyhbo9bq89u0Es,21
+ingestr/src/buildinfo.py,sha256=zGfudKvUvWbTMFprtyFws2zsqeHGQj08eCKTrwTnVj8,21
 ingestr/src/destinations.py,sha256=Z79f01BSmEaXnQno2IQVt4Th4dmD-BiOQXlibZJ5sTw,13180
 ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
-ingestr/src/factory.py,sha256=659h_sVRBhtPv2dvtOK8tf3PtUhlK3KsWLrb20_iQKw,5333
+ingestr/src/factory.py,sha256=Mm_Be60PFO4mUIeJLBMDVU_uyH0IeCiZ1dtNDFiDFSo,5463
 ingestr/src/filters.py,sha256=C-_TIVkF_cxZBgG-Run2Oyn0TAhJgA8IWXZ-OPY3uek,1136
 ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
 ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
 ingestr/src/resource.py,sha256=XG-sbBapFVEM7OhHQFQRTdTLlh-mHB-N4V1t8F8Tsww,543
-ingestr/src/sources.py,sha256=Xinebylg-PqzyQ-r2wFukqhsPsv611fEoTvTWY1L-B4,76461
+ingestr/src/sources.py,sha256=9ESEgdlaSQQszpRfp-etKvfFDvvmYfCc9sBlEPJxh3Q,78809
 ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
 ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
 ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
@@ -42,8 +42,11 @@ ingestr/src/facebook_ads/settings.py,sha256=1IxZeP_4rN3IBvAncNHOoqpzAirx0Hz-MUK_
 ingestr/src/filesystem/__init__.py,sha256=zkIwbRr0ir0EUdniI25p2zGiVc-7M9EmR351AjNb0eA,4163
 ingestr/src/filesystem/helpers.py,sha256=bg0muSHZr3hMa8H4jN2-LGWzI-SUoKlQNiWJ74-YYms,3211
 ingestr/src/filesystem/readers.py,sha256=a0fKkaRpnAOGsXI3EBNYZa7x6tlmAOsgRzb883StY30,3987
-ingestr/src/frankfurter/__init__.py,sha256=xJUicENGYtOPsGznKP8IA_5Jt-_gJP29onrByBgUf-g,4259
-ingestr/src/frankfurter/helpers.py,sha256=RSqI-WAAJfunWnLqiBRmPuonRg7rDOqmY76beb8a6rM,967
+ingestr/src/frankfurter/__init__.py,sha256=sjxfq377-lryuFC3JswcbHBRoBjLnGLKNRTwBpDZyLw,4403
+ingestr/src/frankfurter/helpers.py,sha256=wqm087QVPcyTuMl6yj_Pl1wcuqElwcBMPz3P4773wcM,979
+ingestr/src/freshdesk/__init__.py,sha256=uFQW_cJyymxtHQiYb_xjzZAklc487L0n9GkgHgC7yAI,2618
+ingestr/src/freshdesk/freshdesk_client.py,sha256=3z5Yc008ADzRcJWtNc00PwjkLzG-RMI8jVIOOyYA-Rw,4088
+ingestr/src/freshdesk/settings.py,sha256=0Wr_OMnUZcTlry7BmALssLxD2yh686JW4moLNv12Jnw,409
 ingestr/src/github/__init__.py,sha256=xVijF-Wi4p88hkVJnKH-oTixismjD3aUcGqGa6Wr4e4,5889
 ingestr/src/github/helpers.py,sha256=rpv_3HzuOl4PQ-FUeA66pev-pgze9SaE8RUHIPYfZ_A,6759
 ingestr/src/github/queries.py,sha256=W34C02jUEdjFmOE7f7u9xvYyBNDMfVZAu0JIRZI2mkU,2302
@@ -53,8 +56,8 @@ ingestr/src/google_ads/field.py,sha256=uc8KEaYQrwgQoQPUdxIQWZxpFeZHbiV98FM0ZSael
 ingestr/src/google_ads/metrics.py,sha256=tAqpBpm-8l95oPT9cBxMWaEoDTNHVXnqUphYDHWKDiE,12099
 ingestr/src/google_ads/predicates.py,sha256=K4wTuqfmJ9ko1RKeHTBDfQO_mUADVyuRqtywBPP-72w,683
 ingestr/src/google_ads/reports.py,sha256=AVY1pPt5yaIFskQe1k5VW2Dhlux3bzewsHlDrdGEems,12686
-ingestr/src/google_analytics/__init__.py,sha256=8Evpmoy464YpNbCI_NmvFHIzWCu7J7SjJw-RrPZ6AL8,3674
-ingestr/src/google_analytics/helpers.py,sha256=vLmFyQ_IEJEK5LlxBJQeJw0VHaE5gRRZdBa54U72CaQ,5965
+ingestr/src/google_analytics/__init__.py,sha256=8b9CBWJFrBpHVRl993Z7J01sKKbYyXEtngdfEUwqlfE,4343
+ingestr/src/google_analytics/helpers.py,sha256=bUTPp5C-k5wqq-ccEAn-asRH2CLbBS2SOs1v9wiRU6U,10087
 ingestr/src/google_sheets/README.md,sha256=wFQhvmGpRA38Ba2N_WIax6duyD4c7c_pwvvprRfQDnw,5470
 ingestr/src/google_sheets/__init__.py,sha256=CL0HfY74uxX8-ge0ucI0VhWMYZVAfoX7WRPBitRi-CI,6647
 ingestr/src/google_sheets/helpers/__init__.py,sha256=5hXZrZK8cMO3UOuL-s4OKOpdACdihQD0hYYlSEu-iQ8,35
@@ -84,6 +87,8 @@ ingestr/src/notion/helpers/client.py,sha256=QXuudkf5Zzff98HRsCqA1g1EZWIrnfn1falP
 ingestr/src/notion/helpers/database.py,sha256=gigPibTeVefP3lA-8w4aOwX67pj7RlciPk5koDs1ry8,2737
 ingestr/src/personio/__init__.py,sha256=sHYpoV-rg-kA1YsflctChis0hKcTrL6mka9O0CHV4zA,11638
 ingestr/src/personio/helpers.py,sha256=EKmBN0Lf4R0lc3yqqs7D-RjoZ75E8gPcctt59xwHxrY,2901
+ingestr/src/phantombuster/__init__.py,sha256=FJJiVP0ciR48FTmXYLAasZ4JQAB1Ow4M_Hh39J6hWks,1112
+ingestr/src/phantombuster/client.py,sha256=HFJ46f_IU1NMMCA94ttoY1LBc0L7qfqeQEawczlbBvQ,2584
 ingestr/src/pipedrive/__init__.py,sha256=iRrxeMwo8_83ptgGnTFTNHV1nYvIsFfg0a3XzugPYeI,6982
 ingestr/src/pipedrive/settings.py,sha256=q119Fy4C5Ip1rMoCILX2BkHV3bwiXC_dW58KIiDUzsY,708
 ingestr/src/pipedrive/typing.py,sha256=lEMXu4hhAA3XkhVSlBUa-juqyupisd3c-qSQKxFvzoE,69
@@ -122,8 +127,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
 ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
 ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
 ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
-ingestr-0.13.35.dist-info/METADATA,sha256=HazXK_VyPcaappMDArhp7cBeRRaVc1oOTzgo3S7Gtr0,13575
-ingestr-0.13.35.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-ingestr-0.13.35.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
-ingestr-0.13.35.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
-ingestr-0.13.35.dist-info/RECORD,,
+ingestr-0.13.37.dist-info/METADATA,sha256=Mmc9hAE_zCJ_b5U9hCLpJXpU0858FirZdoO-FyPuOI4,13575
+ingestr-0.13.37.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+ingestr-0.13.37.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
+ingestr-0.13.37.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
+ingestr-0.13.37.dist-info/RECORD,,

{ingestr-0.13.35.dist-info → ingestr-0.13.37.dist-info}/WHEEL RENAMED Viewed

File without changes

{ingestr-0.13.35.dist-info → ingestr-0.13.37.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ingestr-0.13.35.dist-info → ingestr-0.13.37.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

ingestr 0.13.35__py3-none-any.whl → 0.13.37__py3-none-any.whl

Potentially problematic release.

ingestr 0.13.35py3-none-any.whl → 0.13.37py3-none-any.whl