PyPI - ingestr - Versions diffs - 0.10.4__py3-none-any.whl → 0.12.2__py3-none-any.whl - Mend

ingestr 0.10.4py3-none-any.whl → 0.12.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ingestr might be problematic. Click here for more details.

Files changed (16) hide show

ingestr/src/asana_source/__init__.py +264 -0
ingestr/src/asana_source/helpers.py +16 -0
ingestr/src/asana_source/settings.py +144 -0
ingestr/src/dynamodb/__init__.py +86 -0
ingestr/src/factory.py +48 -58
ingestr/src/sources.py +181 -4
ingestr/src/tiktok_ads/__init__.py +106 -0
ingestr/src/tiktok_ads/tiktok_helpers.py +112 -0
ingestr/src/time.py +11 -0
ingestr/src/version.py +1 -1
ingestr/src/zendesk/__init__.py +1 -0
{ingestr-0.10.4.dist-info → ingestr-0.12.2.dist-info}/METADATA +17 -5
{ingestr-0.10.4.dist-info → ingestr-0.12.2.dist-info}/RECORD +16 -9
{ingestr-0.10.4.dist-info → ingestr-0.12.2.dist-info}/WHEEL +1 -1
{ingestr-0.10.4.dist-info → ingestr-0.12.2.dist-info}/entry_points.txt +0 -0
{ingestr-0.10.4.dist-info → ingestr-0.12.2.dist-info}/licenses/LICENSE.md +0 -0

ingestr/src/asana_source/__init__.py ADDED Viewed

@@ -0,0 +1,264 @@
+"""
+This source provides data extraction from the Asana platform via their API.
+It defines several functions to fetch data from different parts of Asana including
+workspaces, projects, sections, tags, tasks, stories, teams, and users. These
+functions are meant to be used as part of a data loading pipeline.
+"""
+import typing as t
+from typing import Any, Iterable
+import dlt
+from dlt.common.typing import TDataItem
+from .helpers import get_client
+from .settings import (
+    DEFAULT_START_DATE,
+    PROJECT_FIELDS,
+    REQUEST_TIMEOUT,
+    SECTION_FIELDS,
+    STORY_FIELDS,
+    TAG_FIELDS,
+    TASK_FIELDS,
+    TEAMS_FIELD,
+    USER_FIELDS,
+    WORKSPACE_FIELDS,
+)
+@dlt.source
+def asana_source() -> Any:  # should be Sequence[DltResource]:
+    """
+    The main function that runs all the other functions to fetch data from Asana.
+    Returns:
+        Sequence[DltResource]: A sequence of DltResource objects containing the fetched data.
+    """
+    return [
+        workspaces,
+        projects,
+        sections,
+        tags,
+        tasks,
+        stories,
+        teams,
+        users,
+    ]
+@dlt.resource(write_disposition="replace")
+def workspaces(
+    access_token: str = dlt.secrets.value, fields: Iterable[str] = WORKSPACE_FIELDS
+) -> Iterable[TDataItem]:
+    """
+    Fetches and returns a list of workspaces from Asana.
+    Args:
+        access_token (str): The access token to authenticate the Asana API client, provided in the secrets file
+        fields (Iterable[str]): The list of workspace fields to be retrieved from Asana API.
+    Yields:
+        dict: The workspace data.
+    """
+    yield from get_client(access_token).workspaces.find_all(opt_fields=",".join(fields))
+@dlt.transformer(
+    data_from=workspaces,
+    write_disposition="replace",
+)
+@dlt.defer
+def projects(
+    workspace: TDataItem,
+    access_token: str = dlt.secrets.value,
+    fields: Iterable[str] = PROJECT_FIELDS,
+) -> Iterable[TDataItem]:
+    """
+    Fetches and returns a list of projects for a given workspace from Asana.
+    Args:
+        workspace (dict): The workspace data.
+        access_token (str): The access token to authenticate the Asana API client, provided in the secrets file
+        fields (Iterable[str]): The list of workspace fields to be retrieved from Asana API.
+    Returns:
+        list[dict]: The project data for the given workspace.
+    """
+    return list(
+        get_client(access_token).projects.find_all(
+            workspace=workspace["gid"],
+            timeout=REQUEST_TIMEOUT,
+            opt_fields=",".join(fields),
+        )
+    )
+@dlt.transformer(
+    data_from=projects,
+    write_disposition="replace",
+)
+@dlt.defer
+def sections(
+    project_array: t.List[TDataItem],
+    access_token: str = dlt.secrets.value,
+    fields: Iterable[str] = SECTION_FIELDS,
+) -> Iterable[TDataItem]:
+    """
+    Fetches all sections for a given project from Asana.
+    Args:
+        project_array (list): The project data.
+        access_token (str): The access token to authenticate the Asana API client, provided in the secrets file
+        fields (Iterable[str]): The list of workspace fields to be retrieved from Asana API.
+    Returns:
+        list[dict]: The sections data for the given project.
+    """
+    return [
+        section
+        for project in project_array
+        for section in get_client(access_token).sections.get_sections_for_project(
+            project_gid=project["gid"],
+            timeout=REQUEST_TIMEOUT,
+            opt_fields=",".join(fields),
+        )
+    ]
+@dlt.transformer(data_from=workspaces, write_disposition="replace")
+@dlt.defer
+def tags(
+    workspace: TDataItem,
+    access_token: str = dlt.secrets.value,
+    fields: Iterable[str] = TAG_FIELDS,
+) -> Iterable[TDataItem]:
+    """
+    Fetches all tags for a given workspace from Asana.
+    Args:
+        workspace (dict): The workspace data.
+        access_token (str): The access token to authenticate the Asana API client, provided in the secrets file
+        fields (Iterable[str]): The list of workspace fields to be retrieved from Asana API.
+    Returns:
+        list[dict]: The tags data for the given workspace.
+    """
+    return [
+        tag
+        for tag in get_client(access_token).tags.find_all(
+            workspace=workspace["gid"],
+            timeout=REQUEST_TIMEOUT,
+            opt_fields=",".join(fields),
+        )
+    ]
+@dlt.transformer(data_from=projects, write_disposition="merge", primary_key="gid")
+def tasks(
+    project_array: t.List[TDataItem],
+    access_token: str = dlt.secrets.value,
+    modified_at: dlt.sources.incremental[str] = dlt.sources.incremental(
+        "modified_at", initial_value=DEFAULT_START_DATE
+    ),
+    fields: Iterable[str] = TASK_FIELDS,
+) -> Iterable[TDataItem]:
+    """
+    Fetches all tasks for a given project from Asana.
+    Args:
+        project_array (list): The project data.
+        access_token (str): The access token to authenticate the Asana API client, provided in the secrets file
+        modified_at (str): The date from which to fetch modified tasks.
+        fields (Iterable[str]): The list of workspace fields to be retrieved from Asana API.
+    Yields:
+        dict: The task data for the given project.
+    """
+    yield from (
+        task
+        for project in project_array
+        for task in get_client(access_token).tasks.find_all(
+            project=project["gid"],
+            timeout=REQUEST_TIMEOUT,
+            modified_since=modified_at.start_value,
+            opt_fields=",".join(fields),
+        )
+    )
+@dlt.transformer(
+    data_from=tasks,
+    write_disposition="append",
+)
+@dlt.defer
+def stories(
+    task: TDataItem,
+    access_token: str = dlt.secrets.value,
+    fields: Iterable[str] = STORY_FIELDS,
+) -> Iterable[TDataItem]:
+    """
+    Fetches stories for a task from Asana.
+    Args:
+        task (dict): The task data.
+        access_token (str): The access token to authenticate the Asana API client, provided in the secrets file
+        fields (Iterable[str]): The list of workspace fields to be retrieved from Asana API.
+    Returns:
+        list[dict]: The stories data for the given task.
+    """
+    return [
+        story
+        for story in get_client(access_token).stories.get_stories_for_task(
+            task_gid=task["gid"],
+            timeout=REQUEST_TIMEOUT,
+            opt_fields=",".join(fields),
+        )
+    ]
+@dlt.transformer(
+    data_from=workspaces,
+    write_disposition="replace",
+)
+@dlt.defer
+def teams(
+    workspace: TDataItem,
+    access_token: str = dlt.secrets.value,
+    fields: Iterable[str] = TEAMS_FIELD,
+) -> Iterable[TDataItem]:
+    """
+    Fetches all teams for a given workspace from Asana.
+    Args:
+        workspace (dict): The workspace data.
+        access_token (str): The access token to authenticate the Asana API client, provided in the secrets file
+        fields (Iterable[str]): The list of workspace fields to be retrieved from Asana API.
+    Returns:
+        list[dict]: The teams data for the given workspace.
+    """
+    return [
+        team
+        for team in get_client(access_token).teams.find_by_organization(
+            organization=workspace["gid"],
+            timeout=REQUEST_TIMEOUT,
+            opt_fields=",".join(fields),
+        )
+    ]
+@dlt.transformer(
+    data_from=workspaces,
+    write_disposition="replace",
+)
+@dlt.defer
+def users(
+    workspace: TDataItem,
+    access_token: str = dlt.secrets.value,
+    fields: Iterable[str] = USER_FIELDS,
+) -> Iterable[TDataItem]:
+    """
+    Fetches all users for a given workspace from Asana.
+    Args:
+        workspace (dict): The workspace data.
+        access_token (str): The access token to authenticate the Asana API client, provided in the secrets file
+        fields (Iterable[str]): The list of workspace fields to be retrieved from Asana API.
+    Returns:
+        list[dict]: The user data for the given workspace.
+    """
+    return [
+        user
+        for user in get_client(access_token).users.find_all(
+            workspace=workspace["gid"],
+            timeout=REQUEST_TIMEOUT,
+            opt_fields=",".join(fields),
+        )
+    ]

ingestr/src/asana_source/helpers.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""Asana source helpers"""
+from asana import Client as AsanaClient
+def get_client(
+    access_token: str,
+) -> AsanaClient:
+    """
+    Returns an Asana API client.
+    Args:
+        access_token (str): The access token to authenticate the Asana API client.
+    Returns:
+        AsanaClient: The Asana API client.
+    """
+    return AsanaClient.access_token(access_token)

ingestr/src/asana_source/settings.py ADDED Viewed

@@ -0,0 +1,144 @@
+"""Asana source settings and constants"""
+# Default start date for Asana API requests, only tasks started after this date will be collected
+DEFAULT_START_DATE = "2010-01-01T00:00:00.000Z"
+# Asana API request timeout
+REQUEST_TIMEOUT = 300
+# list of workspace fields to be retrieved from Asana API
+WORKSPACE_FIELDS = ("gid", "name", "is_organization", "resource_type", "email_domains")
+# List of project fields to be retrieved from Asana API
+PROJECT_FIELDS = (
+    "name",
+    "gid",
+    "owner",
+    "current_status",
+    "custom_fields",
+    "default_view",
+    "due_date",
+    "due_on",
+    "is_template",
+    "created_at",
+    "modified_at",
+    "start_on",
+    "archived",
+    "public",
+    "members",
+    "followers",
+    "color",
+    "notes",
+    "icon",
+    "permalink_url",
+    "workspace",
+    "team",
+    "resource_type",
+    "current_status_update",
+    "custom_field_settings",
+    "completed",
+    "completed_at",
+    "completed_by",
+    "created_from_template",
+    "project_brief",
+)
+# List of section fields to be retrieved from Asana API
+SECTION_FIELDS = (
+    "gid",
+    "resource_type",
+    "name",
+    "created_at",
+    "project",
+    "projects",
+)
+# List of tag fields to be retrieved from Asana API
+TAG_FIELDS = (
+    "gid",
+    "resource_type",
+    "created_at",
+    "followers",
+    "name",
+    "color",
+    "notes",
+    "permalink_url",
+    "workspace",
+)
+# List of task fields to be retrieved from Asana API
+TASK_FIELDS = (
+    "gid",
+    "resource_type",
+    "name",
+    "approval_status",
+    "assignee_status",
+    "created_at",
+    "assignee",
+    "start_on",
+    "start_at",
+    "due_on",
+    "due_at",
+    "completed",
+    "completed_at",
+    "completed_by",
+    "modified_at",
+    "dependencies",
+    "dependents",
+    "external",
+    "notes",
+    "num_subtasks",
+    "resource_subtype",
+    "followers",
+    "parent",
+    "permalink_url",
+    "tags",
+    "workspace",
+    "custom_fields",
+    "project",
+    "memberships",
+    "memberships.project.name",
+    "memberships.section.name",
+)
+# List of story fields to be retrieved from Asana API
+STORY_FIELDS = (
+    "gid",
+    "resource_type",
+    "created_at",
+    "created_by",
+    "resource_subtype",
+    "text",
+    "is_pinned",
+    "assignee",
+    "dependency",
+    "follower",
+    "new_section",
+    "old_section",
+    "new_text_value",
+    "old_text_value",
+    "preview",
+    "project",
+    "source",
+    "story",
+    "tag",
+    "target",
+    "task",
+    "sticker_name",
+    "custom_field",
+    "type",
+)
+# List of team fields to be retrieved from Asana API
+TEAMS_FIELD = (
+    "gid",
+    "resource_type",
+    "name",
+    "description",
+    "organization",
+    "permalink_url",
+    "visibility",
+)
+# List of user fields to be retrieved from Asana API
+USER_FIELDS = ("gid", "resource_type", "name", "email", "photo", "workspaces")

ingestr/src/dynamodb/__init__.py ADDED Viewed

@@ -0,0 +1,86 @@
+from dataclasses import dataclass
+from typing import Optional
+import boto3
+import dlt
+from boto3.dynamodb.conditions import Attr
+from dlt.common.configuration.specs import AwsCredentials
+PAGINATION_KEY = "LastEvaluatedKey"
+FILTER_KEY = "FilterExpression"
+DATA_KEY = "Items"
+@dataclass
+class TableSchema:
+    primary_key: Optional[str]
+    sort_key: Optional[str]
+def parseSchema(table) -> TableSchema:
+    schema = TableSchema(None, None)
+    for key in table.key_schema:
+        match key["KeyType"]:
+            case "HASH":
+                schema.primary_key = key["AttributeName"]
+            case "RANGE":
+                schema.sort_key = key["AttributeName"]
+    if schema.primary_key is None:
+        raise ValueError(f"Table {table.name} has no primary key!")
+    return schema
+@dlt.source
+def dynamodb(
+    table_name: str,
+    credentials: AwsCredentials,
+    incremental: Optional[dlt.sources.incremental] = None,
+):
+    sesh = boto3.Session(
+        aws_access_key_id=credentials.aws_access_key_id,
+        aws_secret_access_key=credentials.aws_secret_access_key,
+        region_name=credentials.region_name,
+    )
+    db = sesh.resource("dynamodb", endpoint_url=credentials.endpoint_url)
+    table = db.Table(table_name)
+    schema = parseSchema(table)
+    resource = dlt.resource(
+        dynamodb_table,
+        primary_key=schema.primary_key,
+    )
+    yield resource(table, incremental)
+def dynamodb_table(
+    table,
+    incremental: Optional[dlt.sources.incremental] = None,
+):
+    args = build_scan_args(incremental)
+    scan = table.scan(**args)
+    while True:
+        yield from scan[DATA_KEY]
+        if PAGINATION_KEY not in scan:
+            break
+        scan = table.scan(ExclusiveStartKey=scan[PAGINATION_KEY], **args)
+def build_scan_args(
+    incremental: Optional[dlt.sources.incremental] = None,
+):
+    scan_args = {}
+    if incremental is None:
+        return scan_args
+    if incremental.last_value:
+        criteria = Attr(incremental.cursor_path).gte(incremental.last_value)
+        if incremental.end_value:
+            criteria = Attr(incremental.cursor_path).between(
+                incremental.last_value, incremental.end_value
+            )
+        scan_args[FILTER_KEY] = criteria
+    return scan_args

ingestr/src/factory.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Protocol
+from typing import Dict, Protocol, Type
 from urllib.parse import urlparse
 from dlt.common.destination import Destination
@@ -20,7 +20,9 @@ from ingestr.src.sources import (
     AirtableSource,
     AppsflyerSource,
     ArrowMemoryMappedSource,
+    AsanaSource,
     ChessSource,
+    DynamoDBSource,
     FacebookAdsSource,
     GoogleSheetsSource,
     GorgiasSource,
@@ -35,6 +37,7 @@ from ingestr.src.sources import (
     SlackSource,
     SqlSource,
     StripeAnalyticsSource,
+    TikTokSource,
     ZendeskSource,
 )
@@ -92,6 +95,46 @@ def parse_scheme_from_uri(uri: str) -> str:
 class SourceDestinationFactory:
     source_scheme: str
     destination_scheme: str
+    sources: Dict[str, Type[SourceProtocol]] = {
+        "csv": LocalCsvSource,
+        "mongodb": MongoDbSource,
+        "notion": NotionSource,
+        "gsheets": GoogleSheetsSource,
+        "shopify": ShopifySource,
+        "gorgias": GorgiasSource,
+        "chess": ChessSource,
+        "stripe": StripeAnalyticsSource,
+        "facebookads": FacebookAdsSource,
+        "slack": SlackSource,
+        "hubspot": HubspotSource,
+        "airtable": AirtableSource,
+        "klaviyo": KlaviyoSource,
+        "appsflyer": AppsflyerSource,
+        "kafka": KafkaSource,
+        "adjust": AdjustSource,
+        "zendesk": ZendeskSource,
+        "mmap": ArrowMemoryMappedSource,
+        "s3": S3Source,
+        "dynamodb": DynamoDBSource,
+        "asana": AsanaSource,
+        "tiktok": TikTokSource,
+    }
+    destinations: Dict[str, Type[DestinationProtocol]] = {
+        "bigquery": BigQueryDestination,
+        "databricks": DatabricksDestination,
+        "duckdb": DuckDBDestination,
+        "mssql": MsSQLDestination,
+        "postgres": PostgresDestination,
+        "postgresql": PostgresDestination,
+        "postgresql+psycopg2": PostgresDestination,
+        "redshift": RedshiftDestination,
+        "redshift+psycopg2": RedshiftDestination,
+        "redshift+redshift_connector": RedshiftDestination,
+        "snowflake": SnowflakeDestination,
+        "synapse": SynapseDestination,
+        "csv": CsvDestination,
+        "athena": AthenaDestination,
+    }
     def __init__(self, source_uri: str, destination_uri: str):
         self.source_uri = source_uri
@@ -104,67 +147,14 @@ class SourceDestinationFactory:
     def get_source(self) -> SourceProtocol:
         if self.source_scheme in SQL_SOURCE_SCHEMES:
             return SqlSource()
-        elif self.source_scheme == "csv":
-            return LocalCsvSource()
-        elif self.source_scheme == "mongodb":
-            return MongoDbSource()
-        elif self.source_scheme == "notion":
-            return NotionSource()
-        elif self.source_scheme == "gsheets":
-            return GoogleSheetsSource()
-        elif self.source_scheme == "shopify":
-            return ShopifySource()
-        elif self.source_scheme == "gorgias":
-            return GorgiasSource()
-        elif self.source_scheme == "chess":
-            return ChessSource()
-        elif self.source_scheme == "stripe":
-            return StripeAnalyticsSource()
-        elif self.source_scheme == "facebookads":
-            return FacebookAdsSource()
-        elif self.source_scheme == "slack":
-            return SlackSource()
-        elif self.source_scheme == "hubspot":
-            return HubspotSource()
-        elif self.source_scheme == "airtable":
-            return AirtableSource()
-        elif self.source_scheme == "klaviyo":
-            return KlaviyoSource()
-        elif self.source_scheme == "appsflyer":
-            return AppsflyerSource()
-        elif self.source_scheme == "kafka":
-            return KafkaSource()
-        elif self.source_scheme == "adjust":
-            return AdjustSource()
-        elif self.source_scheme == "zendesk":
-            return ZendeskSource()
-        elif self.source_scheme == "mmap":
-            return ArrowMemoryMappedSource()
-        elif self.source_scheme == "s3":
-            return S3Source()
+        elif self.source_scheme in self.sources:
+            return self.sources[self.source_scheme]()
         else:
             raise ValueError(f"Unsupported source scheme: {self.source_scheme}")
     def get_destination(self) -> DestinationProtocol:
-        match: dict[str, DestinationProtocol] = {
-            "bigquery": BigQueryDestination(),
-            "databricks": DatabricksDestination(),
-            "duckdb": DuckDBDestination(),
-            "mssql": MsSQLDestination(),
-            "postgres": PostgresDestination(),
-            "postgresql": PostgresDestination(),
-            "postgresql+psycopg2": PostgresDestination(),
-            "redshift": RedshiftDestination(),
-            "redshift+psycopg2": RedshiftDestination(),
-            "redshift+redshift_connector": RedshiftDestination(),
-            "snowflake": SnowflakeDestination(),
-            "synapse": SynapseDestination(),
-            "csv": CsvDestination(),
-            "athena": AthenaDestination(),
-        }
-        if self.destination_scheme in match:
-            return match[self.destination_scheme]
+        if self.destination_scheme in self.destinations:
+            return self.destinations[self.destination_scheme]()
         else:
             raise ValueError(
                 f"Unsupported destination scheme: {self.destination_scheme}"

ingestr/src/sources.py CHANGED Viewed

@@ -1,9 +1,11 @@
 import base64
 import csv
 import json
+import os
+import re
 from datetime import date
 from typing import Any, Callable, Optional
-from urllib.parse import parse_qs, urlparse
+from urllib.parse import ParseResult, parse_qs, quote, urlparse
 import dlt
 import pendulum
@@ -20,7 +22,9 @@ from ingestr.src.adjust.adjust_helpers import parse_filters
 from ingestr.src.airtable import airtable_source
 from ingestr.src.appsflyer._init_ import appsflyer_source
 from ingestr.src.arrow import memory_mapped_arrow
+from ingestr.src.asana_source import asana_source
 from ingestr.src.chess import source
+from ingestr.src.dynamodb import dynamodb
 from ingestr.src.facebook_ads import facebook_ads_source, facebook_insights_source
 from ingestr.src.filesystem import readers
 from ingestr.src.filters import table_adapter_exclude_columns
@@ -36,6 +40,8 @@ from ingestr.src.shopify import shopify_source
 from ingestr.src.slack import slack_source
 from ingestr.src.stripe_analytics import stripe_source
 from ingestr.src.table_definition import table_string_to_dataclass
+from ingestr.src.tiktok_ads import tiktok_source
+from ingestr.src.time import isotime
 from ingestr.src.zendesk import zendesk_chat, zendesk_support, zendesk_talk
 from ingestr.src.zendesk.helpers.credentials import (
     ZendeskCredentialsOAuth,
@@ -114,8 +120,6 @@ class ArrowMemoryMappedSource:
         return False
     def dlt_source(self, uri: str, table: str, **kwargs):
-        import os
         incremental = None
         if kwargs.get("incremental_key"):
             start_value = kwargs.get("interval_start")
@@ -952,7 +956,7 @@ class S3Source:
             )
         parsed_uri = urlparse(uri)
-        source_fields = parse_qs(parsed_uri.query)
+        source_fields = parse_qs(quote(parsed_uri.query, safe="=&"))
         access_key_id = source_fields.get("access_key_id")
         if not access_key_id:
             raise ValueError("access_key_id is required to connect to S3")
@@ -994,3 +998,176 @@ class S3Source:
         return readers(
             bucket_url=bucket_url, credentials=aws_credentials, file_glob=path_to_file
         ).with_resources(endpoint)
+class TikTokSource:
+    # tittok://?access_token=<access_token>&advertiser_id=<advertiser_id>
+    def handles_incrementality(self) -> bool:
+        return True
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        endpoint = "custom_reports"
+        parsed_uri = urlparse(uri)
+        source_fields = parse_qs(parsed_uri.query)
+        access_token = source_fields.get("access_token")
+        if not access_token:
+            raise ValueError("access_token is required to connect to TikTok")
+        time_zone = source_fields.get("time_zone", "UTC")
+        advertiser_id = source_fields.get("advertiser_id")
+        if not advertiser_id:
+            raise ValueError("advertiser_id is required to connect to TikTok")
+        start_date = pendulum.now().subtract(days=90).in_tz(time_zone[0])
+        end_date = ensure_pendulum_datetime(pendulum.now()).in_tz(time_zone[0])
+        interval_start = kwargs.get("interval_start")
+        if interval_start is not None:
+            start_date = ensure_pendulum_datetime(interval_start).in_tz(time_zone[0])
+        interval_end = kwargs.get("interval_end")
+        if interval_end is not None:
+            end_date = ensure_pendulum_datetime(interval_end).in_tz(time_zone[0])
+        page_size = kwargs.get("page_size")
+        if page_size is not None and not isinstance(page_size, int):
+            page_size = int(page_size)
+        if page_size > 1000:
+            page_size = 1000
+        if table.startswith("custom:"):
+            fields = table.split(":", 3)
+            if len(fields) != 3 and len(fields) != 4:
+                raise ValueError(
+                    "Invalid TikTok custom table format. Expected format: custom:<dimensions>,<metrics> or custom:<dimensions>:<metrics>:<filters>"
+                )
+            dimensions = fields[1].replace(" ", "").split(",")
+            if (
+                "campaign_id" not in dimensions
+                and "advertiser_id" not in dimensions
+                and "adgroup_id" not in dimensions
+                and "ad_id" not in dimensions
+            ):
+                raise ValueError(
+                    "You must provide one ID dimension. Please use one ID dimension from the following options: [campaign_id, advertiser_id, adgroup_id, ad_id]"
+                )
+            metrics = fields[2].replace(" ", "").split(",")
+            filters = []
+            if len(fields) == 4:
+                filters = fields[3].replace(" ", "").split(",")
+        return tiktok_source(
+            start_date=start_date,
+            end_date=end_date,
+            access_token=access_token[0],
+            advertiser_id=advertiser_id[0],
+            time_zone=time_zone[0],
+            dimensions=dimensions,
+            metrics=metrics,
+            filters=filters,
+            page_size=page_size,
+        ).with_resources(endpoint)
+class AsanaSource:
+    resources = [
+        "workspaces",
+        "projects",
+        "sections",
+        "tags",
+        "tasks",
+        "stories",
+        "teams",
+        "users",
+    ]
+    def handles_incrementality(self) -> bool:
+        return False
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        parsed_uri = urlparse(uri)
+        params = parse_qs(parsed_uri.query)
+        workspace = parsed_uri.hostname
+        access_token = params.get("access_token")
+        if not workspace:
+            raise ValueError("workspace ID must be specified in the URI")
+        if not access_token:
+            raise ValueError("access_token is required for connecting to Asana")
+        if table not in self.resources:
+            raise ValueError(
+                f"Resource '{table}' is not supported for Asana source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
+            )
+        dlt.secrets["sources.asana_source.access_token"] = access_token[0]
+        src = asana_source()
+        src.workspaces.add_filter(lambda w: w["gid"] == workspace)
+        return src.with_resources(table)
+class DynamoDBSource:
+    AWS_ENDPOINT_PATTERN = re.compile(".*\.(.+)\.amazonaws\.com")
+    def infer_aws_region(self, uri: ParseResult) -> Optional[str]:
+        # try to infer from URI
+        matches = self.AWS_ENDPOINT_PATTERN.match(uri.netloc)
+        if matches is not None:
+            return matches[1]
+        # else obtain region from query string
+        region = parse_qs(uri.query).get("region")
+        if region is None:
+            return None
+        return region[0]
+    def get_endpoint_url(self, url: ParseResult) -> str:
+        if self.AWS_ENDPOINT_PATTERN.match(url.netloc) is not None:
+            return f"https://{url.hostname}"
+        return f"http://{url.netloc}"
+    def handles_incrementality(self) -> bool:
+        return False
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        parsed_uri = urlparse(uri)
+        region = self.infer_aws_region(parsed_uri)
+        if not region:
+            raise ValueError("region is required to connect to Dynamodb")
+        qs = parse_qs(quote(parsed_uri.query, safe="=&"))
+        access_key = qs.get("access_key_id")
+        if not access_key:
+            raise ValueError("access_key_id is required to connect to Dynamodb")
+        secret_key = qs.get("secret_access_key")
+        if not secret_key:
+            raise ValueError("secret_access_key is required to connect to Dynamodb")
+        creds = AwsCredentials(
+            aws_access_key_id=access_key[0],
+            aws_secret_access_key=TSecretStrValue(secret_key[0]),
+            region_name=region,
+            endpoint_url=self.get_endpoint_url(parsed_uri),
+        )
+        incremental = None
+        incremental_key = kwargs.get("incremental_key")
+        if incremental_key:
+            incremental = dlt.sources.incremental(
+                incremental_key.strip(),
+                initial_value=isotime(kwargs.get("interval_start")),
+                end_value=isotime(kwargs.get("interval_end")),
+            )
+        return dynamodb(table, creds, incremental)

ingestr/src/tiktok_ads/__init__.py ADDED Viewed

@@ -0,0 +1,106 @@
+from typing import Iterable, Optional
+import dlt
+import pendulum
+from dlt.common.time import ensure_pendulum_datetime
+from dlt.common.typing import TDataItem
+from dlt.sources import DltResource
+from .tiktok_helpers import TikTokAPI
+def find_intervals(
+    current_date: pendulum.DateTime,
+    end_date: pendulum.DateTime,
+    interval_days: int,
+):
+    intervals = []
+    while current_date <= end_date:
+        interval_end = min(current_date.add(days=interval_days), end_date)
+        intervals.append((current_date, interval_end))
+        current_date = interval_end.add(days=1)
+    return intervals
+def fetch_tiktok_reports(
+    tiktok_api: TikTokAPI,
+    current_date: pendulum.DateTime,
+    interval_end: pendulum.DateTime,
+    advertiser_id: str,
+    dimensions: list[str],
+    metrics: list[str],
+    filters: Optional[dict] | None,
+) -> Iterable[TDataItem]:
+    try:
+        yield from tiktok_api.fetch_pages(
+            advertiser_id=advertiser_id,
+            start_time=current_date,
+            end_time=interval_end,
+            dimensions=dimensions,
+            metrics=metrics,
+            filters=None,
+        )
+    except Exception as e:
+        raise RuntimeError(f"Error fetching TikTok report: {e}")
+@dlt.source(max_table_nesting=0)
+def tiktok_source(
+    start_date: pendulum.DateTime,
+    end_date: pendulum.DateTime,
+    access_token: str,
+    advertiser_id: str,
+    time_zone: str,
+    page_size: int,
+    dimensions: list[str],
+    metrics: list[str],
+    filters=None,
+) -> DltResource:
+    tiktok_api = TikTokAPI(
+        access_token=access_token, time_zone=time_zone, page_size=page_size
+    )
+    incremental_loading_param = ""
+    is_incremental = False
+    interval_days = 365
+    if "stat_time_day" in dimensions:
+        incremental_loading_param = "stat_time_day"
+        is_incremental = True
+        interval_days = 30
+    if "stat_time_hour" in dimensions:
+        incremental_loading_param = "stat_time_hour"
+        is_incremental = True
+        interval_days = 0
+    @dlt.resource(write_disposition="merge", primary_key=dimensions)
+    def custom_reports(
+        datetime=dlt.sources.incremental(incremental_loading_param, start_date)
+        if is_incremental
+        else None,
+    ) -> Iterable[TDataItem]:
+        current_date = start_date.in_tz(time_zone)
+        if datetime is not None:
+            datetime_str = datetime.last_value
+            current_date = ensure_pendulum_datetime(datetime_str).in_tz(time_zone)
+        list_of_interval = find_intervals(
+            current_date=current_date,
+            end_date=end_date,
+            interval_days=interval_days,
+        )
+        for start, end in list_of_interval:
+            yield from fetch_tiktok_reports(
+                tiktok_api=tiktok_api,
+                current_date=start,
+                interval_end=end,
+                advertiser_id=advertiser_id,
+                dimensions=dimensions,
+                metrics=metrics,
+                filters=None,
+            )
+    return custom_reports

ingestr/src/tiktok_ads/tiktok_helpers.py ADDED Viewed

@@ -0,0 +1,112 @@
+import json
+import requests
+from dlt.common.time import ensure_pendulum_datetime
+from dlt.sources.helpers.requests import Client
+BASE_URL = "https://business-api.tiktok.com/open_api/v1.3/report/integrated/get/"
+def retry_on_limit(
+    response: requests.Response | None, exception: BaseException | None
+) -> bool:
+    if response is None:
+        return False
+    return response.status_code == 429
+def create_client() -> requests.Session:
+    return Client(
+        request_timeout=10.0,
+        raise_for_status=False,
+        retry_condition=retry_on_limit,
+        request_max_attempts=12,
+        request_backoff_factor=2,
+    ).session
+def flat_structure(items, time_zone="UTC"):
+    for item in items:
+        if "dimensions" in item:
+            for key, value in item["dimensions"].items():
+                if key == "stat_time_day":
+                    item["stat_time_day"] = ensure_pendulum_datetime(value).in_tz(
+                        time_zone
+                    )
+                elif key == "stat_time_hour":
+                    item["stat_time_hour"] = ensure_pendulum_datetime(value).in_tz(
+                        time_zone
+                    )
+                else:
+                    item[key] = value
+            del item["dimensions"]
+            for key, value in item["metrics"].items():
+                item[key] = value
+            del item["metrics"]
+    return items
+class TikTokAPI:
+    def __init__(self, access_token, time_zone, page_size):
+        self.headers = {
+            "Access-Token": access_token,
+        }
+        self.time_zone = time_zone
+        self.page_size = page_size
+    def fetch_pages(
+        self, advertiser_id: str, start_time, end_time, dimensions, metrics, filters
+    ):
+        data_level_mapping = {
+            "advertiser_id": "AUCTION_ADVERTISER",
+            "campaign_id": "AUCTION_CAMPAIGN",
+            "adgroup_id": "AUCTION_ADGROUP",
+        }
+        data_level = "AUCTION_AD"
+        for id_dimension in dimensions:
+            if id_dimension in data_level_mapping:
+                data_level = data_level_mapping[id_dimension]
+                break
+        current_page = 1
+        start_time = ensure_pendulum_datetime(start_time).to_date_string()
+        end_time = ensure_pendulum_datetime(end_time).to_date_string()
+        self.params = {
+            "advertiser_id": advertiser_id,
+            "report_type": "BASIC",
+            "data_level": data_level,
+            "start_date": start_time,
+            "end_date": end_time,
+            "page_size": self.page_size,
+            "dimensions": json.dumps(dimensions),
+            "metrics": json.dumps(metrics),
+        }
+        client = create_client()
+        while True:
+            self.params["page"] = current_page
+            response = client.get(
+                url=BASE_URL, headers=self.headers, params=self.params
+            )
+            result = response.json()
+            if result.get("message") != "OK":
+                raise ValueError(result.get("message", ""))
+            result_data = result.get("data", {})
+            items = result_data.get("list", [])
+            flat_structure(items=items, time_zone=self.time_zone)
+            yield items
+            page_info = result_data.get("page_info", {})
+            total_pages = page_info.get("total_page", 1)
+            if current_page >= total_pages:
+                break
+            current_page += 1

ingestr/src/time.py ADDED Viewed

@@ -0,0 +1,11 @@
+import datetime
+from typing import Optional
+def isotime(dt: Optional[datetime.datetime]) -> Optional[str]:
+    """
+    Converts a datetime object to an iso 8601 formatted string.
+    """
+    if dt is None:
+        return None
+    return dt.isoformat()

ingestr/src/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.10.4"
1	+ __version__ = "0.12.2"

ingestr/src/zendesk/__init__.py CHANGED Viewed

@@ -23,6 +23,7 @@ from .settings import (
     TALK_ENDPOINTS,
 )
 @dlt.source(max_table_nesting=0)
 def zendesk_talk(
     credentials: TZendeskCredentials = dlt.secrets.value,

{ingestr-0.10.4.dist-info → ingestr-0.12.2.dist-info}/METADATA RENAMED Viewed

@@ -1,10 +1,11 @@
-Metadata-Version: 2.3
+Metadata-Version: 2.4
 Name: ingestr
-Version: 0.10.4
+Version: 0.12.2
 Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
 Project-URL: Homepage, https://github.com/bruin-data/ingestr
 Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
 Author-email: Burak Karakan <burak.karakan@getbruin.com>
+License-File: LICENSE.md
 Classifier: Development Status :: 4 - Beta
 Classifier: Environment :: Console
 Classifier: Intended Audience :: Developers
@@ -13,6 +14,7 @@ Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python :: 3
 Classifier: Topic :: Database
 Requires-Python: >=3.9
+Requires-Dist: asana==3.2.3
 Requires-Dist: confluent-kafka>=2.6.1
 Requires-Dist: databricks-sql-connector==2.9.3
 Requires-Dist: dlt==1.4.0
@@ -199,7 +201,7 @@ Pull requests are welcome. However, please open an issue first to discuss what y
     <tr>
         <td colspan="3" style='text-align:center;'><strong>Platforms</strong></td>
     </tr>
-    <td>Adjust</td>
+        <td>Adjust</td>
         <td>✅</td>
         <td>-</td>
     <tr>
@@ -207,17 +209,27 @@ Pull requests are welcome. However, please open an issue first to discuss what y
         <td>✅</td>
         <td>-</td>
     </tr>
-     <tr>
+    <tr>
         <td>AppsFlyer</td>
         <td>✅</td>
         <td>-</td>
     </tr>
+    <tr>
+        <td>Asana</td>
+        <td>✅</td>
+        <td>-</td>
+    </tr>
     <tr>
         <td>Chess.com</td>
         <td>✅</td>
         <td>-</td>
     </tr>
-     <tr>
+    <tr>
+        <td>DynamoDB</td>
+        <td>✅</td>
+        <td>-</td>
+    </tr>
+    <tr>
         <td>Facebook Ads</td>
         <td>✅</td>
         <td>-</td>

{ingestr-0.10.4.dist-info → ingestr-0.12.2.dist-info}/RECORD RENAMED Viewed

@@ -1,20 +1,25 @@
 ingestr/main.py,sha256=wkU2uLMy1q8YarJ9mXNfJepeRjp6AuPDeNDOmMUt6n0,22309
 ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
 ingestr/src/destinations.py,sha256=zcHJIIHAZmcD9sJomd6G1Bc-1KsxnBD2aByOSV_9L3g,8850
-ingestr/src/factory.py,sha256=nYWgWQINQEQKPeELwGY7MCeiOSoCP6JDPozfKKyGNXk,5013
+ingestr/src/factory.py,sha256=UyE1TzTHn_V8JZno5SSYfQsho1eFYzzvOylogw4S49E,4389
 ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
-ingestr/src/sources.py,sha256=Zw8bcAUhD_QLFNau5V768RBUhUuKhfkrV7ZXohrkaRE,34811
+ingestr/src/sources.py,sha256=QCyfkhLl5jgmosZUeh4BTrmqHk74Vus7zLgk_MBdPhc,41096
 ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
-ingestr/src/version.py,sha256=fGZMaoPHZfTX9I4TDkr07gp-kj_1U_SD-gjQC_2flQs,23
+ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
+ingestr/src/version.py,sha256=NJQQPiZZfrBXFMqZlsia0JrhloS2PexbdxYYUs0c2Us,23
 ingestr/src/adjust/__init__.py,sha256=NaRNwDhItG8Q7vUHw7zQvyfWjmT32M0CSc5ufjmBM9U,3067
 ingestr/src/adjust/adjust_helpers.py,sha256=-tmmxy9k3wms-ZEIgxmlp2cAQ2X_O1lgjY1128bbMu4,3224
 ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
 ingestr/src/appsflyer/_init_.py,sha256=ne2-9FQ654Drtd3GkKQv8Bwb6LEqCnJw49MfO5Jyzgs,739
 ingestr/src/appsflyer/client.py,sha256=TNmwakLzmO6DZW3wcfLfQRl7aNBHgFqSsk4ef-MmJ1w,3084
 ingestr/src/arrow/__init__.py,sha256=AgU7S9Ra3ZeeG00Mf32zxO5sgMFfRnTdOSirUJ1Pu10,2976
+ingestr/src/asana_source/__init__.py,sha256=Y4Ti_876Yong420fQ2o4A97TdgrZNlZVxlTMLyXdSjA,8116
+ingestr/src/asana_source/helpers.py,sha256=PukcdDQWIGqnGxuuobbLw4hUy4-t6gxXg_XywR7Lg9M,375
+ingestr/src/asana_source/settings.py,sha256=-2tpdkwh04RvLKFvwQodnFLYn9MaxOO1hsebGnDQMTU,2829
 ingestr/src/chess/__init__.py,sha256=y0Q8aKBigeKf3N7wuB_gadMQjVJzBPUT8Jhp1ObEWjk,6812
 ingestr/src/chess/helpers.py,sha256=v1HTImOMjAF7AzZUPDIuHu00e7ut0o5y1kWcVYo4QZw,549
 ingestr/src/chess/settings.py,sha256=p0RlCGgtXUacPDEvZmwzSWmzX0Apj1riwfz-nrMK89k,158
+ingestr/src/dynamodb/__init__.py,sha256=swhxkeYBbJ35jn1IghCtvYWT2BM33KynVCh_oR4z28A,2264
 ingestr/src/facebook_ads/__init__.py,sha256=ZZyogV48gmhDcC3CYQEsC4qT3Q6JI9IOnMff2NS1M-A,9207
 ingestr/src/facebook_ads/exceptions.py,sha256=4Nlbc0Mv3i5g-9AoyT-n1PIa8IDi3VCTfEAzholx4Wc,115
 ingestr/src/facebook_ads/helpers.py,sha256=ZLbNHiKer5lPb4g3_435XeBJr57Wv0o1KTyBA1mQ100,9068
@@ -56,7 +61,9 @@ ingestr/src/stripe_analytics/helpers.py,sha256=iqZOyiGIOhOAhVXXU16DP0hkkTKcTrDu6
 ingestr/src/stripe_analytics/settings.py,sha256=rl9L5XumxO0pjkZf7MGesXHp4QLRgnz3RWLuDWDBKXo,380
 ingestr/src/telemetry/event.py,sha256=MpWc5tt0lSJ1pWKe9HQ11BHrcPBxSH40l4wjZi9u0tI,924
 ingestr/src/testdata/fakebqcredentials.json,sha256=scc6TUc963KAbKTLZCfcmqVzbtzDCW1_8JNRnyAXyy8,628
-ingestr/src/zendesk/__init__.py,sha256=x3RNxF0kqAu81nZkg1STmb1OPhd3Gdd4-wC8gUMNLic,17563
+ingestr/src/tiktok_ads/__init__.py,sha256=vJjVxEw3W1Rvc2QDQbox_8Ma0Cp1RT7iKsQ9MAv6Cgc,3036
+ingestr/src/tiktok_ads/tiktok_helpers.py,sha256=lY7yWl_aJh5Hj-bVvt07MHvhfvXnghaGOLhGHF5gLh4,3444
+ingestr/src/zendesk/__init__.py,sha256=C7HkN195DGdOHId2_Sa_kAlcBrUmnVYZUa_tPkiyf1Q,17564
 ingestr/src/zendesk/settings.py,sha256=Vdj706nTJFQ-3KH4nO97iYCQuba3dV3E9gfnmLK6xwU,2294
 ingestr/src/zendesk/helpers/__init__.py,sha256=YTJejCiUjfIcsj9FrkY0l-JGYDI7RRte1Ydq5FDH_0c,888
 ingestr/src/zendesk/helpers/api_helpers.py,sha256=dMkNn4ZQXgJTDOXAAXdmRt41phNFoRhYyPaLJih0pZY,4184
@@ -70,8 +77,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
 ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
 ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
 ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
-ingestr-0.10.4.dist-info/METADATA,sha256=ARKr36Ta0VKvviovDwWmDVXeGC4l4h9b3HTaNUUDpYg,7688
-ingestr-0.10.4.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
-ingestr-0.10.4.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
-ingestr-0.10.4.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
-ingestr-0.10.4.dist-info/RECORD,,
+ingestr-0.12.2.dist-info/METADATA,sha256=SAZJKqigL1ARQdv3eGX4RZVigZwYJCEcCt36lpvZtsQ,7910
+ingestr-0.12.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+ingestr-0.12.2.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
+ingestr-0.12.2.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
+ingestr-0.12.2.dist-info/RECORD,,

{ingestr-0.10.4.dist-info → ingestr-0.12.2.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: hatchling 1.26.3
+Generator: hatchling 1.27.0
 Root-Is-Purelib: true
 Tag: py3-none-any

{ingestr-0.10.4.dist-info → ingestr-0.12.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ingestr-0.10.4.dist-info → ingestr-0.12.2.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

ingestr 0.10.4__py3-none-any.whl → 0.12.2__py3-none-any.whl

Potentially problematic release.

ingestr 0.10.4py3-none-any.whl → 0.12.2py3-none-any.whl