PyPI - ingestr - Versions diffs - 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl - Mend

ingestr 0.13.2py3-none-any.whl → 0.14.104py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (146) hide show

ingestr/conftest.py +72 -0
ingestr/main.py +134 -87
ingestr/src/adjust/__init__.py +4 -4
ingestr/src/adjust/adjust_helpers.py +7 -3
ingestr/src/airtable/__init__.py +3 -2
ingestr/src/allium/__init__.py +128 -0
ingestr/src/anthropic/__init__.py +277 -0
ingestr/src/anthropic/helpers.py +525 -0
ingestr/src/applovin/__init__.py +262 -0
ingestr/src/applovin_max/__init__.py +117 -0
ingestr/src/appsflyer/__init__.py +325 -0
ingestr/src/appsflyer/client.py +49 -45
ingestr/src/appstore/__init__.py +1 -0
ingestr/src/arrow/__init__.py +9 -1
ingestr/src/asana_source/__init__.py +1 -1
ingestr/src/attio/__init__.py +102 -0
ingestr/src/attio/helpers.py +65 -0
ingestr/src/blob.py +38 -11
ingestr/src/buildinfo.py +1 -0
ingestr/src/chess/__init__.py +1 -1
ingestr/src/clickup/__init__.py +85 -0
ingestr/src/clickup/helpers.py +47 -0
ingestr/src/collector/spinner.py +43 -0
ingestr/src/couchbase_source/__init__.py +118 -0
ingestr/src/couchbase_source/helpers.py +135 -0
ingestr/src/cursor/__init__.py +83 -0
ingestr/src/cursor/helpers.py +188 -0
ingestr/src/destinations.py +520 -33
ingestr/src/docebo/__init__.py +589 -0
ingestr/src/docebo/client.py +435 -0
ingestr/src/docebo/helpers.py +97 -0
ingestr/src/elasticsearch/__init__.py +80 -0
ingestr/src/elasticsearch/helpers.py +138 -0
ingestr/src/errors.py +8 -0
ingestr/src/facebook_ads/__init__.py +47 -28
ingestr/src/facebook_ads/helpers.py +59 -37
ingestr/src/facebook_ads/settings.py +2 -0
ingestr/src/facebook_ads/utils.py +39 -0
ingestr/src/factory.py +116 -2
ingestr/src/filesystem/__init__.py +8 -3
ingestr/src/filters.py +46 -3
ingestr/src/fluxx/__init__.py +9906 -0
ingestr/src/fluxx/helpers.py +209 -0
ingestr/src/frankfurter/__init__.py +157 -0
ingestr/src/frankfurter/helpers.py +48 -0
ingestr/src/freshdesk/__init__.py +89 -0
ingestr/src/freshdesk/freshdesk_client.py +137 -0
ingestr/src/freshdesk/settings.py +9 -0
ingestr/src/fundraiseup/__init__.py +95 -0
ingestr/src/fundraiseup/client.py +81 -0
ingestr/src/github/__init__.py +41 -6
ingestr/src/github/helpers.py +5 -5
ingestr/src/google_analytics/__init__.py +22 -4
ingestr/src/google_analytics/helpers.py +124 -6
ingestr/src/google_sheets/__init__.py +4 -4
ingestr/src/google_sheets/helpers/data_processing.py +2 -2
ingestr/src/hostaway/__init__.py +302 -0
ingestr/src/hostaway/client.py +288 -0
ingestr/src/http/__init__.py +35 -0
ingestr/src/http/readers.py +114 -0
ingestr/src/http_client.py +24 -0
ingestr/src/hubspot/__init__.py +66 -23
ingestr/src/hubspot/helpers.py +52 -22
ingestr/src/hubspot/settings.py +14 -7
ingestr/src/influxdb/__init__.py +46 -0
ingestr/src/influxdb/client.py +34 -0
ingestr/src/intercom/__init__.py +142 -0
ingestr/src/intercom/helpers.py +674 -0
ingestr/src/intercom/settings.py +279 -0
ingestr/src/isoc_pulse/__init__.py +159 -0
ingestr/src/jira_source/__init__.py +340 -0
ingestr/src/jira_source/helpers.py +439 -0
ingestr/src/jira_source/settings.py +170 -0
ingestr/src/kafka/__init__.py +4 -1
ingestr/src/kinesis/__init__.py +139 -0
ingestr/src/kinesis/helpers.py +82 -0
ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
ingestr/src/linear/__init__.py +634 -0
ingestr/src/linear/helpers.py +111 -0
ingestr/src/linkedin_ads/helpers.py +0 -1
ingestr/src/loader.py +69 -0
ingestr/src/mailchimp/__init__.py +126 -0
ingestr/src/mailchimp/helpers.py +226 -0
ingestr/src/mailchimp/settings.py +164 -0
ingestr/src/masking.py +344 -0
ingestr/src/mixpanel/__init__.py +62 -0
ingestr/src/mixpanel/client.py +99 -0
ingestr/src/monday/__init__.py +246 -0
ingestr/src/monday/helpers.py +392 -0
ingestr/src/monday/settings.py +328 -0
ingestr/src/mongodb/__init__.py +72 -8
ingestr/src/mongodb/helpers.py +915 -38
ingestr/src/partition.py +32 -0
ingestr/src/personio/__init__.py +331 -0
ingestr/src/personio/helpers.py +86 -0
ingestr/src/phantombuster/__init__.py +65 -0
ingestr/src/phantombuster/client.py +87 -0
ingestr/src/pinterest/__init__.py +82 -0
ingestr/src/pipedrive/__init__.py +198 -0
ingestr/src/pipedrive/helpers/__init__.py +23 -0
ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
ingestr/src/pipedrive/helpers/pages.py +115 -0
ingestr/src/pipedrive/settings.py +27 -0
ingestr/src/pipedrive/typing.py +3 -0
ingestr/src/plusvibeai/__init__.py +335 -0
ingestr/src/plusvibeai/helpers.py +544 -0
ingestr/src/plusvibeai/settings.py +252 -0
ingestr/src/quickbooks/__init__.py +117 -0
ingestr/src/resource.py +40 -0
ingestr/src/revenuecat/__init__.py +83 -0
ingestr/src/revenuecat/helpers.py +237 -0
ingestr/src/salesforce/__init__.py +156 -0
ingestr/src/salesforce/helpers.py +64 -0
ingestr/src/shopify/__init__.py +1 -17
ingestr/src/smartsheets/__init__.py +82 -0
ingestr/src/snapchat_ads/__init__.py +489 -0
ingestr/src/snapchat_ads/client.py +72 -0
ingestr/src/snapchat_ads/helpers.py +535 -0
ingestr/src/socrata_source/__init__.py +83 -0
ingestr/src/socrata_source/helpers.py +85 -0
ingestr/src/socrata_source/settings.py +8 -0
ingestr/src/solidgate/__init__.py +219 -0
ingestr/src/solidgate/helpers.py +154 -0
ingestr/src/sources.py +3132 -212
ingestr/src/stripe_analytics/__init__.py +49 -21
ingestr/src/stripe_analytics/helpers.py +286 -1
ingestr/src/stripe_analytics/settings.py +62 -10
ingestr/src/telemetry/event.py +10 -9
ingestr/src/tiktok_ads/__init__.py +12 -6
ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
ingestr/src/trustpilot/__init__.py +48 -0
ingestr/src/trustpilot/client.py +48 -0
ingestr/src/version.py +6 -1
ingestr/src/wise/__init__.py +68 -0
ingestr/src/wise/client.py +63 -0
ingestr/src/zoom/__init__.py +99 -0
ingestr/src/zoom/helpers.py +102 -0
ingestr/tests/unit/test_smartsheets.py +133 -0
ingestr-0.14.104.dist-info/METADATA +563 -0
ingestr-0.14.104.dist-info/RECORD +203 -0
ingestr/src/appsflyer/_init_.py +0 -24
ingestr-0.13.2.dist-info/METADATA +0 -302
ingestr-0.13.2.dist-info/RECORD +0 -107
{ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
{ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
{ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0

ingestr/src/fundraiseup/__init__.py ADDED Viewed

@@ -0,0 +1,95 @@
+"""Fundraiseup source for ingesting donations, events, fundraisers, recurring plans, and supporters."""
+from typing import Any, Dict, Generator, Iterable, TypedDict
+import dlt
+import pendulum
+from dlt.common.time import ensure_pendulum_datetime
+from dlt.sources import DltResource
+from .client import FundraiseupClient
+class DonationCursor(TypedDict):
+    id: str
+    created_at: pendulum.DateTime
+def order_by_created(record) -> DonationCursor:
+    last_value = None
+    if len(record) == 1:
+        (record,) = record
+    else:
+        record, last_value = record
+    cursor: DonationCursor = {
+        "id": record["id"],
+        "created_at": ensure_pendulum_datetime(record["created_at"]),
+    }
+    if last_value is None:
+        return cursor
+    return max(cursor, last_value, key=lambda v: v["created_at"])
+@dlt.source(name="fundraiseup", max_table_nesting=0)
+def fundraiseup_source(api_key: str) -> Iterable[DltResource]:
+    """
+    Return resources for Fundraiseup API.
+    Args:
+        api_key: API key for authentication
+    Returns:
+        Iterable of DLT resources
+    """
+    client = FundraiseupClient(api_key=api_key)
+    # Define available resources and their configurations
+    resources = {
+        "donations": {"write_disposition": "replace", "primary_key": "id"},
+        "events": {"write_disposition": "replace", "primary_key": "id"},
+        "fundraisers": {"write_disposition": "replace", "primary_key": "id"},
+        "recurring_plans": {"write_disposition": "replace", "primary_key": "id"},
+        "supporters": {"write_disposition": "replace", "primary_key": "id"},
+    }
+    def create_resource(resource_name: str, config: Dict[str, Any]) -> DltResource:
+        """Create a DLT resource dynamically."""
+        @dlt.resource(
+            name=resource_name,
+            write_disposition=config["write_disposition"],
+            primary_key=config["primary_key"],
+        )
+        def generic_resource() -> Generator[Dict[str, Any], None, None]:
+            """Generic resource that yields batches directly."""
+            for batch in client.get_paginated_data(resource_name):
+                yield batch  # type: ignore[misc]
+        return generic_resource()
+    @dlt.resource(
+        name="donations:incremental",
+        write_disposition="merge",
+        primary_key="id",
+    )
+    def donations_incremental(
+        last_record: dlt.sources.incremental[DonationCursor] = dlt.sources.incremental(
+            "$",
+            range_start="closed",
+            range_end="closed",
+            last_value_func=order_by_created,
+        ),
+    ):
+        params = {}
+        if last_record.last_value is not None:
+            params["starting_after"] = last_record.last_value["id"]
+        for batch in client.get_paginated_data("donations", params=params):
+            yield batch  # type: ignore[misc]
+    # Return all resources
+    return [donations_incremental] + [
+        create_resource(name, config) for name, config in resources.items()
+    ]

ingestr/src/fundraiseup/client.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""Fundraiseup API Client for handling authentication and paginated requests."""
+from typing import Any, Dict, Iterator, Optional
+from ingestr.src.http_client import create_client
+class FundraiseupClient:
+    """Client for interacting with Fundraiseup API v1."""
+    def __init__(self, api_key: str):
+        """
+        Initialize Fundraiseup API client.
+        Args:
+            api_key: API key for authentication
+        """
+        self.api_key = api_key
+        self.base_url = "https://api.fundraiseup.com/v1"
+        # Use shared HTTP client with retry logic for rate limiting
+        self.client = create_client(retry_status_codes=[429, 500, 502, 503, 504])
+    def get_paginated_data(
+        self,
+        endpoint: str,
+        params: Optional[Dict[str, Any]] = None,
+        page_size: int = 100,
+    ) -> Iterator[list[Dict[str, Any]]]:
+        """
+        Fetch paginated data from a Fundraiseup API endpoint using cursor-based pagination.
+        Args:
+            endpoint: API endpoint path (e.g., "donations")
+            params: Additional query parameters
+            page_size: Number of items per page (default 100)
+        Yields:
+            Batches of items from the API
+        """
+        url = f"{self.base_url}/{endpoint}"
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+        }
+        if params is None:
+            params = {}
+        params["limit"] = page_size
+        starting_after = None
+        while True:
+            # Add cursor for pagination if not first page
+            if starting_after:
+                params["starting_after"] = starting_after
+            response = self.client.get(url=url, headers=headers, params=params)
+            response.raise_for_status()
+            data = response.json()
+            # Handle both list response and object with data array
+            if isinstance(data, list):
+                items = data
+                has_more = len(items) == page_size
+            else:
+                items = data.get("data", [])
+                has_more = data.get("has_more", False)
+            if not items:
+                break
+            yield items
+            # Set cursor for next page
+            if has_more and items:
+                starting_after = items[-1].get("id")
+                if not starting_after:
+                    break
+            else:
+                break

ingestr/src/github/__init__.py CHANGED Viewed

@@ -4,13 +4,14 @@ import urllib.parse
 from typing import Iterator, Optional, Sequence
 import dlt
+import pendulum
 from dlt.common.typing import TDataItems
 from dlt.sources import DltResource
 from .helpers import get_reactions_data, get_rest_pages, get_stargazers
-@dlt.source
+@dlt.source(max_table_nesting=0)
 def github_reactions(
     owner: str,
     name: str,
@@ -67,7 +68,11 @@ def github_reactions(
 @dlt.source(max_table_nesting=0)
 def github_repo_events(
-    owner: str, name: str, access_token: Optional[str] = None
+    owner: str,
+    name: str,
+    access_token: str,
+    start_date: pendulum.DateTime,
+    end_date: Optional[pendulum.DateTime] = None,
 ) -> DltResource:
     """Gets events for repository `name` with owner `owner` incrementally.
@@ -86,11 +91,14 @@ def github_repo_events(
     """
     # use naming function in table name to generate separate tables for each event
-    @dlt.resource(primary_key="id", table_name=lambda i: i["type"])
+    @dlt.resource(
+        primary_key="id", table_name=lambda i: i["type"], write_disposition="merge"
+    )
     def repo_events(
         last_created_at: dlt.sources.incremental[str] = dlt.sources.incremental(
             "created_at",
-            initial_value="1970-01-01T00:00:00Z",
+            initial_value=start_date.isoformat(),
+            end_value=end_date.isoformat() if end_date else None,
             last_value_func=max,
             range_end="closed",
             range_start="closed",
@@ -100,8 +108,35 @@ def github_repo_events(
             f"/repos/{urllib.parse.quote(owner)}/{urllib.parse.quote(name)}/events"
         )
+        # Get the date range from the incremental state
+        start_filter = pendulum.parse(
+            last_created_at.last_value or last_created_at.initial_value
+        )
+        end_filter = (
+            pendulum.parse(last_created_at.end_value)
+            if last_created_at.end_value
+            else pendulum.now()
+        )
         for page in get_rest_pages(access_token, repos_path + "?per_page=100"):
-            yield page
+            # Filter events by date range
+            filtered_events = []
+            for event in page:
+                event_date = pendulum.parse(event["created_at"])
+                # Check if event is within the date range
+                if event_date >= start_filter:
+                    if end_filter is None or event_date <= end_filter:
+                        filtered_events.append(event)
+                    elif event_date > end_filter:
+                        # Skip events that are newer than our end date
+                        continue
+                else:
+                    # Events are ordered by date desc, so if we hit an older event, we can stop
+                    break
+            if filtered_events:
+                yield filtered_events
             # stop requesting pages if the last element was already older than initial value
             # note: incremental will skip those items anyway, we just do not want to use the api limits
@@ -114,7 +149,7 @@ def github_repo_events(
     return repo_events
-@dlt.source
+@dlt.source(max_table_nesting=0)
 def github_stargazers(
     owner: str,
     name: str,

ingestr/src/github/helpers.py CHANGED Viewed

@@ -103,9 +103,9 @@ def get_reactions_data(
 def _extract_top_connection(data: StrAny, node_type: str) -> StrAny:
-    assert (
-        isinstance(data, dict) and len(data) == 1
-    ), f"The data with list of {node_type} must be a dictionary and contain only one element"
+    assert isinstance(data, dict) and len(data) == 1, (
+        f"The data with list of {node_type} must be a dictionary and contain only one element"
+    )
     data = next(iter(data.values()))
     return data[node_type]  # type: ignore
@@ -158,7 +158,7 @@ def _get_graphql_pages(
         )
         items_count += len(data_items)
         print(
-            f'Got {len(data_items)}/{items_count} {node_type}s, query cost {rate_limit["cost"]}, remaining credits: {rate_limit["remaining"]}'
+            f"Got {len(data_items)}/{items_count} {node_type}s, query cost {rate_limit['cost']}, remaining credits: {rate_limit['remaining']}"
         )
         if data_items:
             yield data_items
@@ -187,7 +187,7 @@ def _get_comment_reaction(comment_ids: List[str], access_token: str) -> StrAny:
         # print(query)
         page, rate_limit = _run_graphql_query(access_token, query, {})
         print(
-            f'Got {len(page)} comments, query cost {rate_limit["cost"]}, remaining credits: {rate_limit["remaining"]}'
+            f"Got {len(page)} comments, query cost {rate_limit['cost']}, remaining credits: {rate_limit['remaining']}"
         )
         data.update(page)
     return data

ingestr/src/google_analytics/__init__.py CHANGED Viewed

@@ -7,15 +7,16 @@ from typing import Iterator, List, Optional, Union
 import dlt
 from dlt.common import pendulum
 from dlt.common.typing import DictStrAny, TDataItem
-from dlt.extract import DltResource
+from dlt.sources import DltResource
 from dlt.sources.credentials import GcpOAuthCredentials, GcpServiceAccountCredentials
 from google.analytics.data_v1beta import BetaAnalyticsDataClient
 from google.analytics.data_v1beta.types import (
     Dimension,
     Metric,
+    MinuteRange,
 )
-from .helpers import get_report
+from .helpers import get_realtime_report, get_report
 @dlt.source(max_table_nesting=0)
@@ -29,6 +30,7 @@ def google_analytics(
     start_date: Optional[pendulum.DateTime] = pendulum.datetime(2024, 1, 1),
     end_date: Optional[pendulum.DateTime] = None,
     rows_per_page: int = 10000,
+    minute_range_objects: List[MinuteRange] | None = None,
 ) -> List[DltResource]:
     try:
         property_id = int(property_id)
@@ -58,7 +60,7 @@ def google_analytics(
     dimensions = query["dimensions"]
     @dlt.resource(
-        name="basic_report",
+        name="custom",
         merge_key=datetime_dimension,
         write_disposition="merge",
     )
@@ -87,6 +89,22 @@ def google_analytics(
             end_date=end_date,
         )
+    # real time report
+    @dlt.resource(
+        name="realtime",
+        merge_key="ingested_at",
+        write_disposition="merge",
+    )
+    def real_time_report() -> Iterator[TDataItem]:
+        yield from get_realtime_report(
+            client=client,
+            property_id=property_id,
+            dimension_list=[Dimension(name=dimension) for dimension in dimensions],
+            metric_list=[Metric(name=metric) for metric in query["metrics"]],
+            per_page=rows_per_page,
+            minute_range_objects=minute_range_objects,
+        )
     # res = dlt.resource(
     #     basic_report, name="basic_report", merge_key=datetime_dimension, write_disposition="merge"
     # )(
@@ -103,4 +121,4 @@ def google_analytics(
     #     ),
     # )
-    return [basic_report]
+    return [basic_report, real_time_report]

ingestr/src/google_analytics/helpers.py CHANGED Viewed

@@ -2,8 +2,10 @@
 This module contains helpers that process data and make it ready for loading into the database
 """
+import base64
 import json
 from typing import Any, Iterator, List, Union
+from urllib.parse import parse_qs, urlparse
 import proto
 from dlt.common.exceptions import MissingDependencyException
@@ -22,6 +24,8 @@ try:
         Metric,
         MetricMetadata,  # noqa: F401
         MetricType,
+        MinuteRange,
+        RunRealtimeReportRequest,
         RunReportRequest,
         RunReportResponse,
     )
@@ -52,6 +56,53 @@ def to_dict(item: Any) -> Iterator[TDataItem]:
     yield item
+def get_realtime_report(
+    client: Resource,
+    property_id: int,
+    dimension_list: List[Dimension],
+    metric_list: List[Metric],
+    per_page: int,
+    minute_range_objects: List[MinuteRange] | None = None,
+) -> Iterator[TDataItem]:
+    """
+    Gets all the possible pages of reports with the given query parameters.
+    Processes every page and yields a dictionary for every row of the report.
+    Args:
+        client: The Google Analytics client used to make requests.
+        property_id: A reference to the Google Analytics project.
+            More info: https://developers.google.com/analytics/devguides/reporting/data/v1/property-id
+        dimension_list: A list of all the dimensions requested in the query.
+        metric_list: A list of all the metrics requested in the query.
+        limit: Describes how many rows there should be per page.
+    Yields:
+        Generator of all rows of data in the report.
+    """
+    offset = 0
+    ingest_at = pendulum.now().to_date_string()
+    while True:
+        request = RunRealtimeReportRequest(
+            property=f"properties/{property_id}",
+            dimensions=dimension_list,
+            metrics=metric_list,
+            limit=per_page,
+            minute_ranges=minute_range_objects if minute_range_objects else None,
+        )
+        response = client.run_realtime_report(request)
+        # process request
+        processed_response_generator = process_report(
+            response=response, ingest_at=ingest_at
+        )
+        # import pdb; pdb.set_trace()
+        yield from processed_response_generator
+        offset += per_page
+        if len(response.rows) < per_page or offset > 1000000:
+            break
 def get_report(
     client: Resource,
     property_id: int,
@@ -79,10 +130,6 @@ def get_report(
         Generator of all rows of data in the report.
     """
-    print(
-        "fetching for daterange", start_date.to_date_string(), end_date.to_date_string()
-    )
     offset = 0
     while True:
         request = RunReportRequest(
@@ -98,9 +145,11 @@ def get_report(
                 )
             ],
         )
-        # process request
         response = client.run_report(request)
+        # process request
         processed_response_generator = process_report(response=response)
         # import pdb; pdb.set_trace()
         yield from processed_response_generator
         offset += per_page
@@ -108,7 +157,9 @@ def get_report(
             break
-def process_report(response: RunReportResponse) -> Iterator[TDataItems]:
+def process_report(
+    response: RunReportResponse, ingest_at: str | None = None
+) -> Iterator[TDataItems]:
     metrics_headers = [header.name for header in response.metric_headers]
     dimensions_headers = [header.name for header in response.dimension_headers]
@@ -131,6 +182,8 @@ def process_report(response: RunReportResponse) -> Iterator[TDataItems]:
                 metric_type=metric_type, value=row.metric_values[i].value
             )
             response_dict[metrics_headers[i]] = metric_value
+        if ingest_at is not None:
+            response_dict["ingested_at"] = ingest_at
         unique_key = "-".join(list(response_dict.keys()))
         if unique_key not in distinct_key_combinations:
@@ -170,3 +223,68 @@ def _resolve_dimension_value(dimension_name: str, dimension_value: str) -> Any:
         return pendulum.from_format(dimension_value, "YYYYMMDDHHmm", tz="UTC")
     else:
         return dimension_value
+def convert_minutes_ranges_to_minute_range_objects(
+    minutes_ranges: str,
+) -> List[MinuteRange]:
+    minutes_ranges = minutes_ranges.strip()
+    minutes = minutes_ranges.replace(" ", "").split(",")
+    if minutes == "":
+        raise ValueError(
+            "Invalid input. Minutes range should be startminute-endminute format. For example: 1-2,5-6"
+        )
+    minute_range_objects = []
+    for min_range in minutes:
+        if "-" not in min_range:
+            raise ValueError(
+                "Invalid input. Minutes range should be startminute-endminute format. For example: 1-2,5-6"
+            )
+        parts = min_range.split("-")
+        if not parts[0].isdigit() or not parts[1].isdigit():
+            raise ValueError(
+                f"Invalid input '{min_range}'. Both start and end minutes must be digits. For example: 1-2,5-6"
+            )
+        end_minutes_ago = int(parts[0])
+        start_minutes_ago = int(parts[1])
+        minute_range_objects.append(
+            MinuteRange(
+                name=f"{end_minutes_ago}-{start_minutes_ago} minutes ago",
+                start_minutes_ago=start_minutes_ago,
+                end_minutes_ago=end_minutes_ago,
+            )
+        )
+    return minute_range_objects
+def parse_google_analytics_uri(uri: str):
+    parse_uri = urlparse(uri)
+    source_fields = parse_qs(parse_uri.query)
+    cred_path = source_fields.get("credentials_path")
+    cred_base64 = source_fields.get("credentials_base64")
+    if not cred_path and not cred_base64:
+        raise ValueError(
+            "credentials_path or credentials_base64 is required to connect Google Analytics"
+        )
+    credentials = {}
+    if cred_path:
+        with open(cred_path[0], "r") as f:
+            credentials = json.load(f)
+    elif cred_base64:
+        credentials = json.loads(base64.b64decode(cred_base64[0]).decode("utf-8"))
+    property_id = source_fields.get("property_id")
+    if not property_id:
+        raise ValueError("property_id is required to connect to Google Analytics")
+    if (not cred_path and not cred_base64) or (not property_id):
+        raise ValueError(
+            "credentials_path or credentials_base64 and property_id are required to connect Google Analytics"
+        )
+    return {"credentials": credentials, "property_id": property_id[0]}

ingestr/src/google_sheets/__init__.py CHANGED Viewed

@@ -70,9 +70,9 @@ def google_spreadsheet(
         spreadsheet_id=spreadsheet_id,
         range_names=list(all_range_names),
     )
-    assert len(all_range_names) == len(
-        all_range_data
-    ), "Google Sheets API must return values for all requested ranges"
+    assert len(all_range_names) == len(all_range_data), (
+        "Google Sheets API must return values for all requested ranges"
+    )
     # get metadata for two first rows of each range
     # first should contain headers
@@ -126,7 +126,7 @@ def google_spreadsheet(
         headers = get_range_headers(headers_metadata, name)
         if headers is None:
             # generate automatic headers and treat the first row as data
-            headers = [f"col_{idx+1}" for idx in range(len(headers_metadata))]
+            headers = [f"col_{idx + 1}" for idx in range(len(headers_metadata))]
             data_row_metadata = headers_metadata
             rows_data = values[0:]
             logger.warning(

ingestr/src/google_sheets/helpers/data_processing.py CHANGED Viewed

@@ -149,12 +149,12 @@ def get_range_headers(headers_metadata: List[DictStrAny], range_name: str) -> Li
                     header_val = str(f"col_{idx + 1}")
                 else:
                     logger.warning(
-                        f"In range {range_name}, header value: {header_val} at position {idx+1} is not a string!"
+                        f"In range {range_name}, header value: {header_val} at position {idx + 1} is not a string!"
                     )
                     return None
         else:
             logger.warning(
-                f"In range {range_name}, header at position {idx+1} is not missing!"
+                f"In range {range_name}, header at position {idx + 1} is not missing!"
             )
             return None
         headers.append(header_val)

ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl

ingestr 0.13.2py3-none-any.whl → 0.14.104py3-none-any.whl