PyPI - ingestr - Versions diffs - 0.12.9__py3-none-any.whl → 0.12.11__py3-none-any.whl - Mend

ingestr 0.12.9py3-none-any.whl → 0.12.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ingestr might be problematic. Click here for more details.

Files changed (18) hide show

ingestr/src/blob.py +49 -0
ingestr/src/errors.py +8 -0
ingestr/src/factory.py +4 -0
ingestr/src/google_ads/__init__.py +116 -0
ingestr/src/google_ads/field.py +2 -0
ingestr/src/google_ads/metrics.py +240 -0
ingestr/src/google_ads/predicates.py +23 -0
ingestr/src/google_ads/reports.py +380 -0
ingestr/src/linkedin_ads/__init__.py +63 -0
ingestr/src/linkedin_ads/dimension_time_enum.py +12 -0
ingestr/src/linkedin_ads/helpers.py +148 -0
ingestr/src/sources.py +195 -28
ingestr/src/version.py +1 -1
{ingestr-0.12.9.dist-info → ingestr-0.12.11.dist-info}/METADATA +3 -1
{ingestr-0.12.9.dist-info → ingestr-0.12.11.dist-info}/RECORD +18 -9
{ingestr-0.12.9.dist-info → ingestr-0.12.11.dist-info}/WHEEL +0 -0
{ingestr-0.12.9.dist-info → ingestr-0.12.11.dist-info}/entry_points.txt +0 -0
{ingestr-0.12.9.dist-info → ingestr-0.12.11.dist-info}/licenses/LICENSE.md +0 -0

ingestr/src/sources.py CHANGED Viewed

@@ -3,7 +3,8 @@ import csv
 import json
 import os
 import re
-from datetime import date, datetime, timedelta
+import tempfile
+from datetime import date, datetime, timedelta, timezone
 from typing import (
     Any,
     Callable,
@@ -18,8 +19,8 @@ from urllib.parse import ParseResult, parse_qs, quote, urlparse
 import dlt
 import gcsfs  # type: ignore
-import s3fs # type: ignore
 import pendulum
+import s3fs  # type: ignore
 from dlt.common.configuration.specs import (
     AwsCredentials,
 )
@@ -41,9 +42,11 @@ from dlt.sources.sql_database.schema_types import (
     Table,
     TTypeAdapter,
 )
+from google.ads.googleads.client import GoogleAdsClient  # type: ignore
 from sqlalchemy import Column
 from sqlalchemy import types as sa
+from ingestr.src import blob
 from ingestr.src.adjust import REQUIRED_CUSTOM_DIMENSIONS, adjust_source
 from ingestr.src.adjust.adjust_helpers import parse_filters
 from ingestr.src.airtable import airtable_source
@@ -55,6 +58,7 @@ from ingestr.src.asana_source import asana_source
 from ingestr.src.chess import source
 from ingestr.src.dynamodb import dynamodb
 from ingestr.src.errors import (
+    InvalidBlobTableError,
     MissingValueError,
     UnsupportedResourceError,
 )
@@ -62,6 +66,7 @@ from ingestr.src.facebook_ads import facebook_ads_source, facebook_insights_sour
 from ingestr.src.filesystem import readers
 from ingestr.src.filters import table_adapter_exclude_columns
 from ingestr.src.github import github_reactions, github_repo_events, github_stargazers
+from ingestr.src.google_ads import google_ads
 from ingestr.src.google_analytics import google_analytics
 from ingestr.src.google_sheets import google_spreadsheet
 from ingestr.src.gorgias import gorgias_source
@@ -69,6 +74,11 @@ from ingestr.src.hubspot import hubspot
 from ingestr.src.kafka import kafka_consumer
 from ingestr.src.kafka.helpers import KafkaCredentials
 from ingestr.src.klaviyo._init_ import klaviyo_source
+from ingestr.src.linkedin_ads import linked_in_ads_source
+from ingestr.src.linkedin_ads.dimension_time_enum import (
+    Dimension,
+    TimeGranularity,
+)
 from ingestr.src.mongodb import mongodb_collection
 from ingestr.src.notion import notion_databases
 from ingestr.src.shopify import shopify_source
@@ -1090,16 +1100,11 @@ class S3Source:
         if not secret_access_key:
             raise ValueError("secret_access_key is required to connect to S3")
-        bucket_name = parsed_uri.hostname
-        if not bucket_name:
-            raise ValueError(
-                "Invalid S3 URI: The bucket name is missing. Ensure your S3 URI follows the format 's3://bucket-name"
-            )
-        bucket_url = f"s3://{bucket_name}"
+        bucket_name, path_to_file = blob.parse_uri(parsed_uri, table)
+        if not bucket_name or not path_to_file:
+            raise InvalidBlobTableError("S3")
-        path_to_file = parsed_uri.path.lstrip("/") or table.lstrip("/")
-        if not path_to_file:
-            raise ValueError("--source-table must be specified")
+        bucket_url = f"s3://{bucket_name}/"
         fs = s3fs.S3FileSystem(
             key=access_key_id[0],
@@ -1118,9 +1123,7 @@ class S3Source:
                 "S3 Source only supports specific formats files: csv, jsonl, parquet"
             )
-        return readers(
-            bucket_url, fs, path_to_file
-        ).with_resources(endpoint)
+        return readers(bucket_url, fs, path_to_file).with_resources(endpoint)
 class TikTokSource:
@@ -1327,6 +1330,7 @@ class DynamoDBSource:
                 range_start="closed",
             )
+        # bug: we never validate table.
         return dynamodb(table, creds, incremental)
@@ -1517,6 +1521,13 @@ class GCSSource:
         parsed_uri = urlparse(uri)
         params = parse_qs(parsed_uri.query)
+        bucket_name, path_to_file = blob.parse_uri(parsed_uri, table)
+        if not bucket_name or not path_to_file:
+            raise InvalidBlobTableError("GCS")
+        bucket_url = f"gs://{bucket_name}"
         credentials_path = params.get("credentials_path")
         credentials_base64 = params.get("credentials_base64")
         credentials_available = any(
@@ -1528,17 +1539,6 @@ class GCSSource:
         if credentials_available is False:
             raise MissingValueError("credentials_path or credentials_base64", "GCS")
-        bucket_name = parsed_uri.hostname
-        if not bucket_name:
-            raise ValueError(
-                "Invalid GCS URI: The bucket name is missing. Ensure your GCS URI follows the format 'gs://bucket-name/path/to/file"
-            )
-        bucket_url = f"gs://{bucket_name}/"
-        path_to_file = parsed_uri.path.lstrip("/") or table.lstrip("/")
-        if not path_to_file:
-            raise ValueError("--source-table must be specified")
         credentials = None
         if credentials_path:
             credentials = credentials_path[0]
@@ -1566,6 +1566,173 @@ class GCSSource:
                 "GCS Source only supports specific formats files: csv, jsonl, parquet"
             )
-        return readers(
-            bucket_url, fs, path_to_file
-        ).with_resources(endpoint)
+        return readers(bucket_url, fs, path_to_file).with_resources(endpoint)
+class GoogleAdsSource:
+    def handles_incrementality(self) -> bool:
+        return True
+    def init_client(self, params: Dict[str, List[str]]) -> GoogleAdsClient:
+        dev_token = params.get("dev_token")
+        if dev_token is None or len(dev_token) == 0:
+            raise MissingValueError("dev_token", "Google Ads")
+        credentials_path = params.get("credentials_path")
+        credentials_base64 = params.get("credentials_base64")
+        credentials_available = any(
+            map(
+                lambda x: x is not None,
+                [credentials_path, credentials_base64],
+            )
+        )
+        if credentials_available is False:
+            raise MissingValueError(
+                "credentials_path or credentials_base64", "Google Ads"
+            )
+        path = None
+        fd = None
+        if credentials_path:
+            path = credentials_path[0]
+        else:
+            (fd, path) = tempfile.mkstemp(prefix="secret-")
+            secret = base64.b64decode(credentials_base64[0])  # type: ignore
+            os.write(fd, secret)
+            os.close(fd)
+        conf = {
+            "json_key_file_path": path,
+            "use_proto_plus": True,
+            "developer_token": dev_token[0],
+        }
+        try:
+            client = GoogleAdsClient.load_from_dict(conf)
+        finally:
+            if fd is not None:
+                os.remove(path)
+        return client
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        if kwargs.get("incremental_key") is not None:
+            raise ValueError(
+                "Google Ads takes care of incrementality on its own, you should not provide incremental_key"
+            )
+        parsed_uri = urlparse(uri)
+        customer_id = parsed_uri.hostname
+        if not customer_id:
+            raise MissingValueError("customer_id", "Google Ads")
+        params = parse_qs(parsed_uri.query)
+        client = self.init_client(params)
+        start_date = kwargs.get("interval_start") or datetime.now(
+            tz=timezone.utc
+        ) - timedelta(days=30)
+        end_date = kwargs.get("interval_end")
+        # most combinations of explict start/end dates are automatically handled.
+        # however, in the scenario where only the end date is provided, we need to
+        # calculate the start date based on the end date.
+        if (
+            kwargs.get("interval_end") is not None
+            and kwargs.get("interval_start") is None
+        ):
+            start_date = end_date - timedelta(days=30)  # type: ignore
+        report_spec = None
+        if table.startswith("daily:"):
+            report_spec = table
+            table = "daily_report"
+        src = google_ads(
+            client,
+            customer_id,
+            report_spec,
+            start_date=start_date,
+            end_date=end_date,
+        )
+        if table not in src.resources:
+            raise UnsupportedResourceError(table, "Google Ads")
+        return src.with_resources(table)
+class LinkedInAdsSource:
+    def handles_incrementality(self) -> bool:
+        return True
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        parsed_uri = urlparse(uri)
+        source_fields = parse_qs(parsed_uri.query)
+        access_token = source_fields.get("access_token")
+        if not access_token:
+            raise ValueError("access_token is required to connect to LinkedIn Ads")
+        account_ids = source_fields.get("account_ids")
+        if not account_ids:
+            raise ValueError("account_ids is required to connect to LinkedIn Ads")
+        account_ids = account_ids[0].replace(" ", "").split(",")
+        interval_start = kwargs.get("interval_start")
+        interval_end = kwargs.get("interval_end")
+        start_date = (
+            ensure_pendulum_datetime(interval_start).date()
+            if interval_start
+            else pendulum.datetime(2018, 1, 1).date()
+        )
+        end_date = (
+            ensure_pendulum_datetime(interval_end).date() if interval_end else None
+        )
+        fields = table.split(":")
+        if len(fields) != 3:
+            raise ValueError(
+                "Invalid table format. Expected format: custom:<dimensions>:<metrics>"
+            )
+        dimensions = fields[1].replace(" ", "").split(",")
+        dimensions = [item for item in dimensions if item.strip()]
+        if (
+            "campaign" not in dimensions
+            and "creative" not in dimensions
+            and "account" not in dimensions
+        ):
+            raise ValueError(
+                "'campaign', 'creative' or 'account' is required to connect to LinkedIn Ads, please provide at least one of these dimensions."
+            )
+        if "date" not in dimensions and "month" not in dimensions:
+            raise ValueError(
+                "'date' or 'month' is required to connect to LinkedIn Ads, please provide at least one of these dimensions."
+            )
+        if "date" in dimensions:
+            time_granularity = TimeGranularity.daily
+            dimensions.remove("date")
+        else:
+            time_granularity = TimeGranularity.monthly
+            dimensions.remove("month")
+        dimension = Dimension[dimensions[0]]
+        metrics = fields[2].replace(" ", "").split(",")
+        metrics = [item for item in metrics if item.strip()]
+        if "dateRange" not in metrics:
+            metrics.append("dateRange")
+        if "pivotValues" not in metrics:
+            metrics.append("pivotValues")
+        return linked_in_ads_source(
+            start_date=start_date,
+            end_date=end_date,
+            access_token=access_token[0],
+            account_ids=account_ids,
+            dimension=dimension,
+            metrics=metrics,
+            time_granularity=time_granularity,
+        ).with_resources("custom_reports")

ingestr/src/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.12.9"
1	+ __version__ = "0.12.11"

{ingestr-0.12.9.dist-info → ingestr-0.12.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ingestr
-Version: 0.12.9
+Version: 0.12.11
 Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
 Project-URL: Homepage, https://github.com/bruin-data/ingestr
 Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -22,7 +22,9 @@ Requires-Dist: dlt==1.5.0
 Requires-Dist: duckdb-engine==0.13.5
 Requires-Dist: duckdb==1.1.3
 Requires-Dist: facebook-business==20.0.0
+Requires-Dist: flatten-json==0.1.14
 Requires-Dist: gcsfs==2024.10.0
+Requires-Dist: google-ads==25.1.0
 Requires-Dist: google-analytics-data==0.18.16
 Requires-Dist: google-api-python-client==2.130.0
 Requires-Dist: google-cloud-bigquery-storage==2.24.0

{ingestr-0.12.9.dist-info → ingestr-0.12.11.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,14 @@
 ingestr/main.py,sha256=fRWnyoPzMvvxTa61EIAP_dsKu0B_0yOwoyt0Slq9WQU,24723
 ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
+ingestr/src/blob.py,sha256=XDk_XqmU_He4sQ1brY3ceoZgpq_ZBZihz1gHW9MzqUk,1381
 ingestr/src/destinations.py,sha256=zcHJIIHAZmcD9sJomd6G1Bc-1KsxnBD2aByOSV_9L3g,8850
-ingestr/src/errors.py,sha256=MrdLY5Gpr3g3qbYjl-U8-m8kxBJQOJo4ZVOsQpQbRR8,447
-ingestr/src/factory.py,sha256=oNF9dovovLG34xLgRZ5fbyA_XSHxEuTW27s1cb35KDM,4622
+ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
+ingestr/src/factory.py,sha256=D__Oy029z6y2OsAUMGab5K5ZmYhRXxDbD_SDc21b9Eo,4746
 ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
-ingestr/src/sources.py,sha256=JoO-IQ_eB4Ia1fC1GWs6N74l9A3tXQT-Fj0uNBiSI_Y,55978
+ingestr/src/sources.py,sha256=jIq1qVj8_uOVbdrVuvs2uHkrLydd1i8XHMx5vhPVqAo,61682
 ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
 ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
-ingestr/src/version.py,sha256=FSGqM7DffUSCa5R2rqVlNo-yNzBd6cgAXS1_0tElLy0,23
+ingestr/src/version.py,sha256=92OWM_xUUgc7wxFngCUAzVKFahsSWsF4UXOgDEn2uVI,24
 ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
 ingestr/src/adjust/adjust_helpers.py,sha256=av97NPSn-hQtTbAC0vUSCAWYePmOiG5R-DGdMssm7FQ,3646
 ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
@@ -37,6 +38,11 @@ ingestr/src/github/__init__.py,sha256=xVijF-Wi4p88hkVJnKH-oTixismjD3aUcGqGa6Wr4e
 ingestr/src/github/helpers.py,sha256=Tmnik9811zBWNO6cJwV9PFQxEx2j32LHAQCvNbubsEI,6759
 ingestr/src/github/queries.py,sha256=W34C02jUEdjFmOE7f7u9xvYyBNDMfVZAu0JIRZI2mkU,2302
 ingestr/src/github/settings.py,sha256=N5ahWrDIQ_4IWV9i-hTXxyYduqY9Ym2BTwqsWxcDdJ8,258
+ingestr/src/google_ads/__init__.py,sha256=bH0TtnRWcOUESezpvoA7VEUHAq_0ITGQeX4GGVBfl1I,3725
+ingestr/src/google_ads/field.py,sha256=uc8KEaYQrwgQoQPUdxIQWZxpFeZHbiV98FM0ZSaelS0,69
+ingestr/src/google_ads/metrics.py,sha256=tAqpBpm-8l95oPT9cBxMWaEoDTNHVXnqUphYDHWKDiE,12099
+ingestr/src/google_ads/predicates.py,sha256=K4wTuqfmJ9ko1RKeHTBDfQO_mUADVyuRqtywBPP-72w,683
+ingestr/src/google_ads/reports.py,sha256=AVY1pPt5yaIFskQe1k5VW2Dhlux3bzewsHlDrdGEems,12686
 ingestr/src/google_analytics/__init__.py,sha256=8Evpmoy464YpNbCI_NmvFHIzWCu7J7SjJw-RrPZ6AL8,3674
 ingestr/src/google_analytics/helpers.py,sha256=vLmFyQ_IEJEK5LlxBJQeJw0VHaE5gRRZdBa54U72CaQ,5965
 ingestr/src/google_sheets/README.md,sha256=wFQhvmGpRA38Ba2N_WIax6duyD4c7c_pwvvprRfQDnw,5470
@@ -54,6 +60,9 @@ ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,
 ingestr/src/klaviyo/_init_.py,sha256=ucWHqBe8DQvXVpbmxKFAV5ljpCFb4ps_2QTD0OSiWxY,7905
 ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
 ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYHk,496
+ingestr/src/linkedin_ads/__init__.py,sha256=CAPWFyV24loziiphbLmODxZUXZJwm4JxlFkr56q0jfo,1855
+ingestr/src/linkedin_ads/dimension_time_enum.py,sha256=EmHRdkFyTAfo4chGjThrwqffWJxmAadZMbpTvf0xkQc,198
+ingestr/src/linkedin_ads/helpers.py,sha256=6jSIp4DF0iUafJWU3Y7DbIJGKRH6hrx4S7zCTDOjNuE,4528
 ingestr/src/mongodb/__init__.py,sha256=aMr1PFIDUMRv--ne61lR17HudsN-fsrzMeyxe9PqK2s,4335
 ingestr/src/mongodb/helpers.py,sha256=y9rYKR8eyIqam_eNsZmwSYevgi8mghh7Zp8qhTHl65s,5652
 ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
@@ -91,8 +100,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
 ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
 ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
 ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
-ingestr-0.12.9.dist-info/METADATA,sha256=p7RGcw0cnHPU93RLIPWOkMtj36Ax9BnA7bPSKIQ3pfg,8056
-ingestr-0.12.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-ingestr-0.12.9.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
-ingestr-0.12.9.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
-ingestr-0.12.9.dist-info/RECORD,,
+ingestr-0.12.11.dist-info/METADATA,sha256=fxNa7pb3GLEvLuUjHSOviflBwIBJto0ck1PyQp893jU,8127
+ingestr-0.12.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+ingestr-0.12.11.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
+ingestr-0.12.11.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
+ingestr-0.12.11.dist-info/RECORD,,

{ingestr-0.12.9.dist-info → ingestr-0.12.11.dist-info}/WHEEL RENAMED Viewed

File without changes

{ingestr-0.12.9.dist-info → ingestr-0.12.11.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ingestr-0.12.9.dist-info → ingestr-0.12.11.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

ingestr 0.12.9__py3-none-any.whl → 0.12.11__py3-none-any.whl

Potentially problematic release.

ingestr 0.12.9py3-none-any.whl → 0.12.11py3-none-any.whl