PyPI - ingestr - Versions diffs - 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl - Mend

ingestr 0.13.2py3-none-any.whl → 0.14.104py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (146) hide show

ingestr/conftest.py +72 -0
ingestr/main.py +134 -87
ingestr/src/adjust/__init__.py +4 -4
ingestr/src/adjust/adjust_helpers.py +7 -3
ingestr/src/airtable/__init__.py +3 -2
ingestr/src/allium/__init__.py +128 -0
ingestr/src/anthropic/__init__.py +277 -0
ingestr/src/anthropic/helpers.py +525 -0
ingestr/src/applovin/__init__.py +262 -0
ingestr/src/applovin_max/__init__.py +117 -0
ingestr/src/appsflyer/__init__.py +325 -0
ingestr/src/appsflyer/client.py +49 -45
ingestr/src/appstore/__init__.py +1 -0
ingestr/src/arrow/__init__.py +9 -1
ingestr/src/asana_source/__init__.py +1 -1
ingestr/src/attio/__init__.py +102 -0
ingestr/src/attio/helpers.py +65 -0
ingestr/src/blob.py +38 -11
ingestr/src/buildinfo.py +1 -0
ingestr/src/chess/__init__.py +1 -1
ingestr/src/clickup/__init__.py +85 -0
ingestr/src/clickup/helpers.py +47 -0
ingestr/src/collector/spinner.py +43 -0
ingestr/src/couchbase_source/__init__.py +118 -0
ingestr/src/couchbase_source/helpers.py +135 -0
ingestr/src/cursor/__init__.py +83 -0
ingestr/src/cursor/helpers.py +188 -0
ingestr/src/destinations.py +520 -33
ingestr/src/docebo/__init__.py +589 -0
ingestr/src/docebo/client.py +435 -0
ingestr/src/docebo/helpers.py +97 -0
ingestr/src/elasticsearch/__init__.py +80 -0
ingestr/src/elasticsearch/helpers.py +138 -0
ingestr/src/errors.py +8 -0
ingestr/src/facebook_ads/__init__.py +47 -28
ingestr/src/facebook_ads/helpers.py +59 -37
ingestr/src/facebook_ads/settings.py +2 -0
ingestr/src/facebook_ads/utils.py +39 -0
ingestr/src/factory.py +116 -2
ingestr/src/filesystem/__init__.py +8 -3
ingestr/src/filters.py +46 -3
ingestr/src/fluxx/__init__.py +9906 -0
ingestr/src/fluxx/helpers.py +209 -0
ingestr/src/frankfurter/__init__.py +157 -0
ingestr/src/frankfurter/helpers.py +48 -0
ingestr/src/freshdesk/__init__.py +89 -0
ingestr/src/freshdesk/freshdesk_client.py +137 -0
ingestr/src/freshdesk/settings.py +9 -0
ingestr/src/fundraiseup/__init__.py +95 -0
ingestr/src/fundraiseup/client.py +81 -0
ingestr/src/github/__init__.py +41 -6
ingestr/src/github/helpers.py +5 -5
ingestr/src/google_analytics/__init__.py +22 -4
ingestr/src/google_analytics/helpers.py +124 -6
ingestr/src/google_sheets/__init__.py +4 -4
ingestr/src/google_sheets/helpers/data_processing.py +2 -2
ingestr/src/hostaway/__init__.py +302 -0
ingestr/src/hostaway/client.py +288 -0
ingestr/src/http/__init__.py +35 -0
ingestr/src/http/readers.py +114 -0
ingestr/src/http_client.py +24 -0
ingestr/src/hubspot/__init__.py +66 -23
ingestr/src/hubspot/helpers.py +52 -22
ingestr/src/hubspot/settings.py +14 -7
ingestr/src/influxdb/__init__.py +46 -0
ingestr/src/influxdb/client.py +34 -0
ingestr/src/intercom/__init__.py +142 -0
ingestr/src/intercom/helpers.py +674 -0
ingestr/src/intercom/settings.py +279 -0
ingestr/src/isoc_pulse/__init__.py +159 -0
ingestr/src/jira_source/__init__.py +340 -0
ingestr/src/jira_source/helpers.py +439 -0
ingestr/src/jira_source/settings.py +170 -0
ingestr/src/kafka/__init__.py +4 -1
ingestr/src/kinesis/__init__.py +139 -0
ingestr/src/kinesis/helpers.py +82 -0
ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
ingestr/src/linear/__init__.py +634 -0
ingestr/src/linear/helpers.py +111 -0
ingestr/src/linkedin_ads/helpers.py +0 -1
ingestr/src/loader.py +69 -0
ingestr/src/mailchimp/__init__.py +126 -0
ingestr/src/mailchimp/helpers.py +226 -0
ingestr/src/mailchimp/settings.py +164 -0
ingestr/src/masking.py +344 -0
ingestr/src/mixpanel/__init__.py +62 -0
ingestr/src/mixpanel/client.py +99 -0
ingestr/src/monday/__init__.py +246 -0
ingestr/src/monday/helpers.py +392 -0
ingestr/src/monday/settings.py +328 -0
ingestr/src/mongodb/__init__.py +72 -8
ingestr/src/mongodb/helpers.py +915 -38
ingestr/src/partition.py +32 -0
ingestr/src/personio/__init__.py +331 -0
ingestr/src/personio/helpers.py +86 -0
ingestr/src/phantombuster/__init__.py +65 -0
ingestr/src/phantombuster/client.py +87 -0
ingestr/src/pinterest/__init__.py +82 -0
ingestr/src/pipedrive/__init__.py +198 -0
ingestr/src/pipedrive/helpers/__init__.py +23 -0
ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
ingestr/src/pipedrive/helpers/pages.py +115 -0
ingestr/src/pipedrive/settings.py +27 -0
ingestr/src/pipedrive/typing.py +3 -0
ingestr/src/plusvibeai/__init__.py +335 -0
ingestr/src/plusvibeai/helpers.py +544 -0
ingestr/src/plusvibeai/settings.py +252 -0
ingestr/src/quickbooks/__init__.py +117 -0
ingestr/src/resource.py +40 -0
ingestr/src/revenuecat/__init__.py +83 -0
ingestr/src/revenuecat/helpers.py +237 -0
ingestr/src/salesforce/__init__.py +156 -0
ingestr/src/salesforce/helpers.py +64 -0
ingestr/src/shopify/__init__.py +1 -17
ingestr/src/smartsheets/__init__.py +82 -0
ingestr/src/snapchat_ads/__init__.py +489 -0
ingestr/src/snapchat_ads/client.py +72 -0
ingestr/src/snapchat_ads/helpers.py +535 -0
ingestr/src/socrata_source/__init__.py +83 -0
ingestr/src/socrata_source/helpers.py +85 -0
ingestr/src/socrata_source/settings.py +8 -0
ingestr/src/solidgate/__init__.py +219 -0
ingestr/src/solidgate/helpers.py +154 -0
ingestr/src/sources.py +3132 -212
ingestr/src/stripe_analytics/__init__.py +49 -21
ingestr/src/stripe_analytics/helpers.py +286 -1
ingestr/src/stripe_analytics/settings.py +62 -10
ingestr/src/telemetry/event.py +10 -9
ingestr/src/tiktok_ads/__init__.py +12 -6
ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
ingestr/src/trustpilot/__init__.py +48 -0
ingestr/src/trustpilot/client.py +48 -0
ingestr/src/version.py +6 -1
ingestr/src/wise/__init__.py +68 -0
ingestr/src/wise/client.py +63 -0
ingestr/src/zoom/__init__.py +99 -0
ingestr/src/zoom/helpers.py +102 -0
ingestr/tests/unit/test_smartsheets.py +133 -0
ingestr-0.14.104.dist-info/METADATA +563 -0
ingestr-0.14.104.dist-info/RECORD +203 -0
ingestr/src/appsflyer/_init_.py +0 -24
ingestr-0.13.2.dist-info/METADATA +0 -302
ingestr-0.13.2.dist-info/RECORD +0 -107
{ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
{ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
{ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0

ingestr/src/stripe_analytics/__init__.py CHANGED Viewed

@@ -7,13 +7,16 @@ import stripe
 from dlt.sources import DltResource
 from pendulum import DateTime
-from .helpers import pagination, transform_date
-from .settings import ENDPOINTS, INCREMENTAL_ENDPOINTS
+from .helpers import (
+    async_parallel_pagination,
+    pagination,
+    transform_date,
+)
 @dlt.source(max_table_nesting=0)
 def stripe_source(
-    endpoints: Tuple[str, ...] = ENDPOINTS,
+    endpoints: Tuple[str, ...],
     stripe_secret_key: str = dlt.secrets.value,
     start_date: Optional[DateTime] = None,
     end_date: Optional[DateTime] = None,
@@ -51,32 +54,55 @@ def stripe_source(
         )(endpoint)
-@dlt.source
-def incremental_stripe_source(
-    endpoints: Tuple[str, ...] = INCREMENTAL_ENDPOINTS,
+@dlt.source(max_table_nesting=0)
+def async_stripe_source(
+    endpoints: Tuple[str, ...],
     stripe_secret_key: str = dlt.secrets.value,
-    initial_start_date: Optional[DateTime] = None,
+    start_date: Optional[DateTime] = None,
     end_date: Optional[DateTime] = None,
+    max_workers: int = 4,
+    rate_limit_delay: float = 0.03,
 ) -> Iterable[DltResource]:
     """
-    As Stripe API does not include the "updated" key in its responses,
-    we are only able to perform incremental downloads from endpoints where all objects are uneditable.
-    This source yields the resources with incremental loading based on "append" mode.
-    You will load only the newest data without duplicating and without downloading a huge amount of data each time.
+    ULTRA-FAST async Stripe source optimized for maximum speed and throughput.
+    WARNING: Returns data in RANDOM ORDER for maximum performance.
+    Uses aggressive concurrency and minimal delays to maximize API throughput.
     Args:
-        endpoints (tuple): A tuple of endpoint names to retrieve data from. Defaults to Stripe API endpoints with uneditable data.
+        endpoints (Tuple[str, ...]): A tuple of endpoint names to retrieve data from.
         stripe_secret_key (str): The API access token for authentication. Defaults to the value in the `dlt.secrets` object.
-        initial_start_date (Optional[DateTime]): An optional parameter that specifies the initial value for dlt.sources.incremental.
-                            If parameter is not None, then load only data that were created after initial_start_date on the first run.
-                            Defaults to None. Format: datetime(YYYY, MM, DD).
-        end_date (Optional[DateTime]): An optional end date to limit the data retrieved.
-                  Defaults to None. Format: datetime(YYYY, MM, DD).
+        start_date (Optional[DateTime]): An optional start date to limit the data retrieved. Format: datetime(YYYY, MM, DD). Defaults to 2010-01-01.
+        end_date (Optional[DateTime]): An optional end date to limit the data retrieved. Format: datetime(YYYY, MM, DD). Defaults to today.
+        max_workers (int): Maximum number of concurrent async tasks. Defaults to 40 for maximum speed.
+        rate_limit_delay (float): Minimal delay between requests. Defaults to 0.03 seconds.
     Returns:
-        Iterable[DltResource]: Resources with only that data has not yet been loaded.
+        Iterable[DltResource]: Resources with data in RANDOM ORDER (optimized for speed).
     """
     stripe.api_key = stripe_secret_key
     stripe.api_version = "2022-11-15"
+    async def async_stripe_resource(endpoint: str):
+        yield async_parallel_pagination(endpoint, max_workers, rate_limit_delay)
+    for endpoint in endpoints:
+        yield dlt.resource(
+            async_stripe_resource,
+            name=endpoint,
+            write_disposition="replace",
+        )(endpoint)
+@dlt.source(max_table_nesting=0)
+def incremental_stripe_source(
+    endpoints: Tuple[str, ...],
+    stripe_secret_key: str = dlt.secrets.value,
+    initial_start_date: Optional[DateTime] = None,
+    end_date: Optional[DateTime] = None,
+) -> Iterable[DltResource]:
+    stripe.api_key = stripe_secret_key
+    stripe.api_version = "2022-11-15"
     start_date_unix = (
         transform_date(initial_start_date) if initial_start_date is not None else -1
     )
@@ -86,17 +112,19 @@ def incremental_stripe_source(
         created: Optional[Any] = dlt.sources.incremental(
             "created",
             initial_value=start_date_unix,
+            end_value=transform_date(end_date) if end_date is not None else None,
             range_end="closed",
             range_start="closed",
         ),
     ) -> Generator[Dict[Any, Any], Any, None]:
-        start_value = created.last_value
-        yield from pagination(endpoint, start_date=start_value, end_date=end_date)
+        yield from pagination(
+            endpoint, start_date=created.last_value, end_date=created.end_value
+        )
     for endpoint in endpoints:
         yield dlt.resource(
             incremental_resource,
             name=endpoint,
-            write_disposition="append",
+            write_disposition="merge",
             primary_key="id",
         )(endpoint)

ingestr/src/stripe_analytics/helpers.py CHANGED Viewed

@@ -1,6 +1,9 @@
 """Stripe analytics source helpers"""
-from typing import Any, Dict, Iterable, Optional, Union
+import asyncio
+import math
+from datetime import datetime, timedelta
+from typing import Any, Dict, Iterable, List, Optional, Union
 import stripe
 from dlt.common import pendulum
@@ -39,6 +42,238 @@ def pagination(
             break
+def _create_time_chunks(start_ts: int, end_ts: int, num_chunks: int) -> List[tuple]:
+    """
+    Divide a time range into equal chunks for parallel processing.
+    Args:
+        start_ts (int): Start timestamp
+        end_ts (int): End timestamp
+        num_chunks (int): Number of chunks to create
+    Returns:
+        List[tuple]: List of (chunk_start, chunk_end) timestamp pairs
+    """
+    total_duration = end_ts - start_ts
+    chunk_duration = math.ceil(total_duration / num_chunks)
+    chunks = []
+    current_start = start_ts
+    for i in range(num_chunks):
+        current_end = min(current_start + chunk_duration, end_ts)
+        if current_start < end_ts:
+            chunks.append((current_start, current_end))
+        current_start = current_end
+        if current_start >= end_ts:
+            break
+    return chunks
+def _create_adaptive_time_chunks(
+    start_ts: int, end_ts: int, max_workers: int
+) -> List[tuple]:
+    """
+    Create time chunks with adaptive sizing - larger chunks for 2010s (less data expected).
+    Args:
+        start_ts (int): Start timestamp
+        end_ts (int): End timestamp
+        max_workers (int): Maximum number of workers
+    Returns:
+        List[tuple]: List of (chunk_start, chunk_end) timestamp pairs
+    """
+    chunks = []
+    # Key timestamps
+    year_2020_ts = int(pendulum.datetime(2020, 1, 1).timestamp())
+    year_2015_ts = int(pendulum.datetime(2015, 1, 1).timestamp())
+    current_start = start_ts
+    # Handle 2010-2015: Large chunks (2-3 year periods)
+    if current_start < year_2015_ts:
+        chunk_end = min(year_2015_ts, end_ts)
+        if current_start < chunk_end:
+            # Split 2010-2015 into 2-3 chunks max
+            pre_2015_chunks = _create_time_chunks(
+                current_start, chunk_end, min(3, max_workers)
+            )
+            chunks.extend(pre_2015_chunks)
+        current_start = chunk_end
+    # Handle 2015-2020: Medium chunks (6 month to 1 year periods)
+    if current_start < year_2020_ts and current_start < end_ts:
+        chunk_end = min(year_2020_ts, end_ts)
+        if current_start < chunk_end:
+            # Split 2015-2020 into smaller chunks
+            duration_2015_2020 = chunk_end - current_start
+            years_2015_2020 = duration_2015_2020 / (365 * 24 * 60 * 60)
+            num_chunks_2015_2020 = min(
+                max_workers, max(2, int(years_2015_2020 * 2))
+            )  # ~6 months per chunk
+            pre_2020_chunks = _create_time_chunks(
+                current_start, chunk_end, num_chunks_2015_2020
+            )
+            chunks.extend(pre_2020_chunks)
+        current_start = chunk_end
+    if current_start < end_ts:
+        # Split post-2020 data into daily chunks for maximum granularity
+        current_chunk_start = current_start
+        while current_chunk_start < end_ts:
+            # Calculate end of current day
+            current_date = datetime.fromtimestamp(current_chunk_start)
+            next_day = current_date + timedelta(days=1)
+            chunk_end = min(int(next_day.timestamp()), end_ts)
+            chunks.append((current_chunk_start, chunk_end))
+            current_chunk_start = chunk_end
+    return chunks
+def _fetch_chunk_data_streaming(
+    endpoint: str, start_ts: int, end_ts: int
+) -> List[List[TDataItem]]:
+    """
+    Fetch data for a specific time chunk using sequential pagination with memory-efficient approach.
+    Args:
+        endpoint (str): The Stripe endpoint to fetch from
+        start_ts (int): Start timestamp for this chunk
+        end_ts (int): End timestamp for this chunk
+    Returns:
+        List[List[TDataItem]]: List of batches of data items
+    """
+    # For streaming, we still need to collect the chunk data to maintain order
+    # but we can optimize by not holding all data in memory at once
+    print(
+        f"Fetching chunk {datetime.fromtimestamp(start_ts).strftime('%Y-%m-%d')}-{datetime.fromtimestamp(end_ts).strftime('%Y-%m-%d')}"
+    )
+    chunk_data = []
+    batch_count = 0
+    for batch in pagination(endpoint, start_ts, end_ts):
+        chunk_data.append(batch)
+        print(
+            f"Processed {batch_count} batches for chunk {datetime.fromtimestamp(start_ts).strftime('%Y-%m-%d')}-{datetime.fromtimestamp(end_ts).strftime('%Y-%m-%d')}"
+        )
+        batch_count += 1
+    return chunk_data
+async def async_pagination(
+    endpoint: str, start_date: Optional[Any] = None, end_date: Optional[Any] = None
+) -> Iterable[TDataItem]:
+    """
+    Async version of pagination that retrieves data from an endpoint with pagination.
+    Args:
+        endpoint (str): The endpoint to retrieve data from.
+        start_date (Optional[Any]): An optional start date to limit the data retrieved. Defaults to None.
+        end_date (Optional[Any]): An optional end date to limit the data retrieved. Defaults to None.
+    Returns:
+        Iterable[TDataItem]: Data items retrieved from the endpoint.
+    """
+    starting_after = None
+    while True:
+        response = await stripe_get_data_async(
+            endpoint,
+            start_date=start_date,
+            end_date=end_date,
+            starting_after=starting_after,
+        )
+        if len(response["data"]) > 0:
+            starting_after = response["data"][-1]["id"]
+        yield response["data"]
+        if not response["has_more"]:
+            break
+async def async_parallel_pagination(
+    endpoint: str,
+    max_workers: int = 8,
+    rate_limit_delay: float = 5,
+) -> Iterable[TDataItem]:
+    """
+    ULTRA-FAST async parallel pagination - yields data in random order for maximum speed.
+    No ordering constraints - pure performance optimization.
+    Args:
+        endpoint (str): The endpoint to retrieve data from.
+        start_date (Optional[Any]): An optional start date to limit the data retrieved. Defaults to 2010-01-01 if None.
+        end_date (Optional[Any]): An optional end date to limit the data retrieved. Defaults to today if None.
+        max_workers (int): Maximum number of concurrent async tasks. Defaults to 8 for balanced speed/rate limit respect.
+        rate_limit_delay (float): Minimal delay between requests. Defaults to 5 seconds.
+    Returns:
+        Iterable[TDataItem]: Data items retrieved from the endpoint (RANDOM ORDER FOR SPEED).
+    """
+    start_date = pendulum.datetime(2010, 1, 1)
+    end_date = pendulum.now()
+    start_ts = transform_date(start_date)
+    end_ts = transform_date(end_date)
+    # Create time chunks with larger chunks for 2010s (less data expected)
+    time_chunks = _create_adaptive_time_chunks(start_ts, end_ts, max_workers)
+    # Use asyncio semaphore to control concurrency and respect rate limits
+    semaphore = asyncio.Semaphore(max_workers)
+    async def fetch_chunk_with_semaphore(chunk_start: int, chunk_end: int):
+        async with semaphore:
+            return await _fetch_chunk_data_async_fast(endpoint, chunk_start, chunk_end)
+    # Create all tasks
+    tasks = [
+        fetch_chunk_with_semaphore(chunk_start, chunk_end)
+        for chunk_start, chunk_end in time_chunks
+    ]
+    for coro in asyncio.as_completed(tasks):
+        try:
+            chunk_data = await coro
+            for batch in chunk_data:
+                yield batch
+        except Exception as exc:
+            print(f"Async chunk processing generated an exception: {exc}")
+            raise exc
+async def _fetch_chunk_data_async_fast(
+    endpoint: str, start_ts: int, end_ts: int
+) -> List[List[TDataItem]]:
+    """
+    ULTRA-FAST async chunk fetcher - no metadata overhead, direct data return.
+    Args:
+        endpoint (str): The Stripe endpoint to fetch from
+        start_ts (int): Start timestamp for this chunk
+        end_ts (int): End timestamp for this chunk
+    Returns:
+        List[List[TDataItem]]: Raw batches with zero overhead
+    """
+    chunk_data = []
+    async for batch in async_pagination(endpoint, start_ts, end_ts):
+        chunk_data.append(batch)
+    return chunk_data
 def transform_date(date: Union[str, DateTime, int]) -> int:
     if isinstance(date, str):
         date = pendulum.from_format(date, "%Y-%m-%dT%H:%M:%SZ")
@@ -66,3 +301,53 @@ def stripe_get_data(
         created={"gte": start_date, "lt": end_date}, limit=100, **kwargs
     )
     return dict(resource_dict)
+async def stripe_get_data_async(
+    resource: str,
+    start_date: Optional[Any] = None,
+    end_date: Optional[Any] = None,
+    **kwargs: Any,
+) -> Dict[Any, Any]:
+    """Async version of stripe_get_data"""
+    if start_date:
+        start_date = transform_date(start_date)
+    if end_date:
+        end_date = transform_date(end_date)
+    if resource == "Subscription":
+        kwargs.update({"status": "all"})
+    import asyncio
+    from stripe import RateLimitError
+    max_retries = 50
+    retry_count = 0
+    max_wait_time_ms = 10000
+    while retry_count < max_retries:
+        # print(
+        #     f"Fetching {resource} from {datetime.fromtimestamp(start_date).strftime('%Y-%m-%d %H:%M:%S') if start_date else 'None'} to {datetime.fromtimestamp(end_date).strftime('%Y-%m-%d %H:%M:%S') if end_date else 'None'}, retry {retry_count} of {max_retries}",
+        #     flush=True,
+        # )
+        try:
+            resource_dict = await getattr(stripe, resource).list_async(
+                created={"gte": start_date, "lt": end_date}, limit=100, **kwargs
+            )
+            return dict(resource_dict)
+        except RateLimitError:
+            retry_count += 1
+            if retry_count < max_retries:
+                wait_time = min(2**retry_count * 0.001, max_wait_time_ms)
+                print(
+                    f"Got rate limited, sleeping {wait_time} seconds before retrying...",
+                    flush=True,
+                )
+                await asyncio.sleep(wait_time)
+            else:
+                # Re-raise the last exception if we've exhausted retries
+                print(f"✗ Failed to fetch {resource} after {max_retries} retries")
+                raise
+    return dict(resource_dict)

ingestr/src/stripe_analytics/settings.py CHANGED Viewed

@@ -2,13 +2,65 @@
 # the most popular endpoints
 # Full list of the Stripe API endpoints you can find here: https://stripe.com/docs/api.
-ENDPOINTS = (
-    "Subscription",
-    "Account",
-    "Coupon",
-    "Customer",
-    "Product",
-    "Price",
-)
-# possible incremental endpoints
-INCREMENTAL_ENDPOINTS = ("Event", "Invoice", "BalanceTransaction")
+ENDPOINTS = {
+    "account": "Account",
+    "applepaydomain": "ApplePayDomain",
+    "apple_pay_domain": "ApplePayDomain",
+    "applicationfee": "ApplicationFee",
+    "application_fee": "ApplicationFee",
+    "checkoutsession": "CheckoutSession",
+    "checkout_session": "CheckoutSession",
+    "coupon": "Coupon",
+    "charge": "Charge",
+    "customer": "Customer",
+    "dispute": "Dispute",
+    "paymentintent": "PaymentIntent",
+    "payment_intent": "PaymentIntent",
+    "paymentlink": "PaymentLink",
+    "payment_link": "PaymentLink",
+    "paymentmethod": "PaymentMethod",
+    "payment_method": "PaymentMethod",
+    "paymentmethoddomain": "PaymentMethodDomain",
+    "payment_method_domain": "PaymentMethodDomain",
+    "payout": "Payout",
+    "plan": "Plan",
+    "price": "Price",
+    "product": "Product",
+    "promotioncode": "PromotionCode",
+    "promotion_code": "PromotionCode",
+    "quote": "Quote",
+    "refund": "Refund",
+    "review": "Review",
+    "setupattempt": "SetupAttempt",
+    "setup_attempt": "SetupAttempt",
+    "setupintent": "SetupIntent",
+    "setup_intent": "SetupIntent",
+    "shippingrate": "ShippingRate",
+    "shipping_rate": "ShippingRate",
+    "subscription": "Subscription",
+    "subscriptionitem": "SubscriptionItem",
+    "subscription_item": "SubscriptionItem",
+    "subscriptionschedule": "SubscriptionSchedule",
+    "subscription_schedule": "SubscriptionSchedule",
+    "transfer": "Transfer",
+    "taxcode": "TaxCode",
+    "tax_code": "TaxCode",
+    "taxid": "TaxId",
+    "tax_id": "TaxId",
+    "taxrate": "TaxRate",
+    "tax_rate": "TaxRate",
+    "topup": "Topup",
+    "top_up": "Topup",
+    "webhookendpoint": "WebhookEndpoint",
+    "webhook_endpoint": "WebhookEndpoint",
+    "invoice": "Invoice",
+    "invoiceitem": "InvoiceItem",
+    "invoice_item": "InvoiceItem",
+    "invoicelineitem": "InvoiceLineItem",
+    "invoice_line_item": "InvoiceLineItem",
+    "balancetransaction": "BalanceTransaction",
+    "balance_transaction": "BalanceTransaction",
+    "creditnote": "CreditNote",
+    "credit_note": "CreditNote",
+    "event": "Event",
+}

ingestr/src/telemetry/event.py CHANGED Viewed

@@ -1,13 +1,4 @@
 import os
-import platform
-import machineid
-import rudderstack.analytics as rudder_analytics  # type: ignore
-from ingestr.src.version import __version__  # type: ignore
-rudder_analytics.write_key = "2cUr13DDQcX2x2kAfMEfdrKvrQa"
-rudder_analytics.dataPlaneUrl = "https://getbruinbumlky.dataplane.rudderstack.com"
 def track(event_name, event_properties: dict):
@@ -16,6 +7,16 @@ def track(event_name, event_properties: dict):
     ):
         return
+    import platform
+    import machineid
+    import rudderstack.analytics as rudder_analytics  # type: ignore
+    from ingestr.src.version import __version__  # type: ignore
+    rudder_analytics.write_key = "2cUr13DDQcX2x2kAfMEfdrKvrQa"
+    rudder_analytics.dataPlaneUrl = "https://getbruinbumlky.dataplane.rudderstack.com"
     try:
         if not event_properties:
             event_properties = {}

ingestr/src/tiktok_ads/__init__.py CHANGED Viewed

@@ -112,7 +112,8 @@ def tiktok_source(
         datetime=(
             dlt.sources.incremental(
                 incremental_loading_param,
-                start_date,
+                initial_value=start_date,
+                end_value=end_date,
                 range_end="closed",
                 range_start="closed",
             )
@@ -120,15 +121,20 @@ def tiktok_source(
             else None
         ),
     ) -> Iterable[TDataItem]:
-        current_date = start_date.in_tz(timezone)
+        start_date_tz_adjusted = start_date.in_tz(timezone)
+        end_date_tz_adjusted = end_date.in_tz(timezone)
         if datetime is not None:
-            datetime_str = datetime.last_value
-            current_date = ensure_pendulum_datetime(datetime_str).in_tz(timezone)
+            start_date_tz_adjusted = ensure_pendulum_datetime(
+                datetime.last_value
+            ).in_tz(timezone)
+            end_date_tz_adjusted = ensure_pendulum_datetime(datetime.end_value).in_tz(
+                timezone
+            )
         list_of_interval = find_intervals(
-            current_date=current_date,
-            end_date=end_date,
+            current_date=start_date_tz_adjusted,
+            end_date=end_date_tz_adjusted,
             interval_days=interval_days,
         )

ingestr/src/tiktok_ads/tiktok_helpers.py CHANGED Viewed

@@ -17,7 +17,6 @@ def retry_on_limit(
 def create_client() -> requests.Session:
     return Client(
-        request_timeout=10.0,
         raise_for_status=False,
         retry_condition=retry_on_limit,
         request_max_attempts=12,

ingestr/src/trustpilot/__init__.py ADDED Viewed

@@ -0,0 +1,48 @@
+"""Trustpilot source for ingesting reviews."""
+from typing import Any, Dict, Generator, Iterable
+import dlt
+import pendulum
+from dlt.sources import DltResource
+from .client import TrustpilotClient
+@dlt.source()
+def trustpilot_source(
+    business_unit_id: str,
+    start_date: str,
+    end_date: str | None,
+    api_key: str,
+    per_page: int = 1000,
+) -> Iterable[DltResource]:
+    """Return resources for Trustpilot."""
+    client = TrustpilotClient(api_key=api_key)
+    @dlt.resource(name="reviews", write_disposition="merge", primary_key="id")
+    def reviews(
+        dateTime=(
+            dlt.sources.incremental(
+                "updated_at",
+                initial_value=start_date,
+                end_value=end_date,
+                range_start="closed",
+                range_end="closed",
+            )
+        ),
+    ) -> Generator[Dict[str, Any], None, None]:
+        if end_date is None:
+            end_dt = pendulum.now(tz="UTC").isoformat()
+        else:
+            end_dt = dateTime.end_value
+        start_dt = dateTime.last_value
+        yield from client.paginated_reviews(
+            business_unit_id=business_unit_id,
+            per_page=per_page,
+            updated_since=start_dt,
+            end_date=end_dt,
+        )
+    yield reviews

ingestr/src/trustpilot/client.py ADDED Viewed

@@ -0,0 +1,48 @@
+"""Simple Trustpilot API client."""
+from typing import Any, Dict, Iterable
+import pendulum
+from dlt.sources.helpers import requests
+class TrustpilotClient:
+    """Client for the Trustpilot public API."""
+    def __init__(self, api_key: str) -> None:
+        self.api_key = api_key
+        self.base_url = "https://api.trustpilot.com/v1"
+    def _get(self, endpoint: str, params: Dict[str, Any]) -> Dict[str, Any]:
+        params = dict(params)
+        params["apikey"] = self.api_key
+        response = requests.get(f"{self.base_url}{endpoint}", params=params)
+        response.raise_for_status()
+        return response.json()
+    def paginated_reviews(
+        self,
+        business_unit_id: str,
+        updated_since: str,
+        end_date: str,
+        per_page: int = 1000,
+    ) -> Iterable[Dict[str, Any]]:
+        page = 1
+        while True:
+            params: Dict[str, Any] = {"perPage": per_page, "page": page}
+            if updated_since:
+                params["updatedSince"] = updated_since
+            data = self._get(f"/business-units/{business_unit_id}/reviews", params)
+            reviews = data.get("reviews", data)
+            if not reviews:
+                break
+            for review in reviews:
+                end_date_dt = pendulum.parse(end_date)
+                review["updated_at"] = review["updatedAt"]
+                review_dt = pendulum.parse(review["updated_at"])
+                if review_dt > end_date_dt:  # type: ignore
+                    continue
+                yield review
+            if len(reviews) < per_page:
+                break
+            page += 1

ingestr/src/version.py CHANGED Viewed

@@ -1 +1,6 @@
-__version__ = "0.13.2"
+try:
+    from ingestr.src import buildinfo  # type: ignore[import-not-found,attr-defined]
+    __version__ = buildinfo.version.lstrip("v")
+except ImportError:
+    __version__ = "0.0.0-dev"

ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl

ingestr 0.13.2py3-none-any.whl → 0.14.104py3-none-any.whl