ingestr 0.12.9__tar.gz → 0.12.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- {ingestr-0.12.9 → ingestr-0.12.11}/PKG-INFO +3 -1
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/.vitepress/config.mjs +2 -0
- ingestr-0.12.11/docs/media/linkedin_ads.png +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/gcs.md +10 -6
- ingestr-0.12.11/docs/supported-sources/google-ads.md +134 -0
- ingestr-0.12.11/docs/supported-sources/linkedin_ads.md +109 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/s3.md +9 -6
- ingestr-0.12.11/ingestr/src/blob.py +49 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/errors.py +8 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/factory.py +4 -0
- ingestr-0.12.11/ingestr/src/google_ads/__init__.py +116 -0
- ingestr-0.12.11/ingestr/src/google_ads/field.py +2 -0
- ingestr-0.12.11/ingestr/src/google_ads/metrics.py +240 -0
- ingestr-0.12.11/ingestr/src/google_ads/predicates.py +23 -0
- ingestr-0.12.11/ingestr/src/google_ads/reports.py +380 -0
- ingestr-0.12.11/ingestr/src/linkedin_ads/__init__.py +63 -0
- ingestr-0.12.11/ingestr/src/linkedin_ads/dimension_time_enum.py +12 -0
- ingestr-0.12.11/ingestr/src/linkedin_ads/helpers.py +148 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/sources.py +195 -28
- ingestr-0.12.11/ingestr/src/version.py +1 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/requirements.txt +3 -0
- ingestr-0.12.9/ingestr/src/version.py +0 -1
- {ingestr-0.12.9 → ingestr-0.12.11}/.dockerignore +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/.githooks/pre-commit-hook.sh +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/.github/workflows/deploy-docs.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/.github/workflows/secrets-scan.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/.github/workflows/tests.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/.gitignore +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/.gitleaksignore +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/.python-version +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/.vale.ini +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/Dockerfile +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/LICENSE.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/Makefile +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/README.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/.vitepress/theme/custom.css +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/.vitepress/theme/index.js +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/commands/example-uris.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/commands/ingest.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/getting-started/core-concepts.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/getting-started/incremental-loading.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/getting-started/quickstart.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/getting-started/telemetry.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/index.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/media/athena.png +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/media/github.png +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/media/googleanalytics.png +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/media/tiktok.png +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/adjust.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/airtable.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/appsflyer.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/appstore.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/asana.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/athena.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/bigquery.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/chess.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/csv.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/custom_queries.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/databricks.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/duckdb.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/dynamodb.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/facebook-ads.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/github.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/google_analytics.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/gorgias.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/gsheets.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/hubspot.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/kafka.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/klaviyo.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/mongodb.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/mssql.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/mysql.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/notion.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/oracle.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/postgres.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/redshift.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/sap-hana.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/shopify.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/slack.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/snowflake.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/sqlite.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/stripe.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/tiktok-ads.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/docs/supported-sources/zendesk.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/main.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/.gitignore +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/adjust/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/adjust/adjust_helpers.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/airtable/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/appsflyer/_init_.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/appsflyer/client.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/appstore/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/appstore/client.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/appstore/errors.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/appstore/models.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/appstore/resources.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/arrow/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/asana_source/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/asana_source/helpers.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/asana_source/settings.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/chess/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/chess/helpers.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/chess/settings.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/destinations.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/dynamodb/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/facebook_ads/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/facebook_ads/exceptions.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/facebook_ads/helpers.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/facebook_ads/settings.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/filesystem/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/filesystem/helpers.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/filesystem/readers.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/filters.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/github/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/github/helpers.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/github/queries.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/github/settings.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/google_analytics/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/google_analytics/helpers.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/google_sheets/README.md +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/google_sheets/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/gorgias/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/gorgias/helpers.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/hubspot/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/hubspot/helpers.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/hubspot/settings.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/kafka/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/kafka/helpers.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/klaviyo/_init_.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/klaviyo/client.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/klaviyo/helpers.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/mongodb/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/mongodb/helpers.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/notion/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/notion/helpers/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/notion/helpers/client.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/notion/helpers/database.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/notion/settings.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/shopify/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/shopify/exceptions.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/shopify/helpers.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/shopify/settings.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/slack/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/slack/helpers.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/slack/settings.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/sql_database/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/sql_database/callbacks.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/stripe_analytics/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/stripe_analytics/helpers.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/stripe_analytics/settings.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/table_definition.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/telemetry/event.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/tiktok_ads/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/tiktok_ads/tiktok_helpers.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/time.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/zendesk/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/zendesk/helpers/__init__.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/zendesk/helpers/credentials.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/src/zendesk/settings.py +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/testdata/.gitignore +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/testdata/create_replace.csv +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/testdata/delete_insert_expected.csv +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/testdata/delete_insert_part1.csv +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/testdata/delete_insert_part2.csv +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/testdata/merge_expected.csv +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/testdata/merge_part1.csv +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/ingestr/testdata/merge_part2.csv +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/package-lock.json +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/package.json +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/pyproject.toml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/requirements-dev.txt +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/resources/demo.gif +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/resources/demo.tape +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/resources/ingestr.svg +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/AMPM.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/Acronyms.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/Colons.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/Contractions.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/DateFormat.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/Ellipses.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/EmDash.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/Exclamation.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/FirstPerson.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/Gender.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/GenderBias.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/HeadingPunctuation.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/Headings.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/Latin.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/LyHyphens.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/OptionalPlurals.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/Ordinal.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/OxfordComma.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/Parens.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/Passive.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/Periods.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/Quotes.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/Ranges.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/Semicolons.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/Slang.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/Spacing.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/Spelling.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/Units.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/We.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/Will.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/WordList.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/meta.json +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/Google/vocab.txt +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/bruin/Ingestr.yml +0 -0
- {ingestr-0.12.9 → ingestr-0.12.11}/styles/config/vocabularies/bruin/accept.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.11
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -22,7 +22,9 @@ Requires-Dist: dlt==1.5.0
|
|
|
22
22
|
Requires-Dist: duckdb-engine==0.13.5
|
|
23
23
|
Requires-Dist: duckdb==1.1.3
|
|
24
24
|
Requires-Dist: facebook-business==20.0.0
|
|
25
|
+
Requires-Dist: flatten-json==0.1.14
|
|
25
26
|
Requires-Dist: gcsfs==2024.10.0
|
|
27
|
+
Requires-Dist: google-ads==25.1.0
|
|
26
28
|
Requires-Dist: google-analytics-data==0.18.16
|
|
27
29
|
Requires-Dist: google-api-python-client==2.130.0
|
|
28
30
|
Requires-Dist: google-cloud-bigquery-storage==2.24.0
|
|
@@ -104,11 +104,13 @@ export default defineConfig({
|
|
|
104
104
|
},
|
|
105
105
|
{ text: "Google Cloud Storage (GCS)", link: "/supported-sources/gcs.md" },
|
|
106
106
|
{ text: "Google Analytics", link: "/supported-sources/google_analytics.md" },
|
|
107
|
+
{ text: "Google Ads", link: "/supported-sources/google-ads.md" },
|
|
107
108
|
{ text: "GitHub", link: "/supported-sources/github.md" },
|
|
108
109
|
{ text: "Google Sheets", link: "/supported-sources/gsheets.md" },
|
|
109
110
|
{ text: "Gorgias", link: "/supported-sources/gorgias.md" },
|
|
110
111
|
{ text: "HubSpot", link: "/supported-sources/hubspot.md" },
|
|
111
112
|
{ text: "Klaviyo", link: "/supported-sources/klaviyo.md" },
|
|
113
|
+
{ text: "LinkedIn Ads", link: "/supported-sources/linkedin_ads.md" },
|
|
112
114
|
{ text: "Notion", link: "/supported-sources/notion.md" },
|
|
113
115
|
{ text: "S3", link: "/supported-sources/s3.md" },
|
|
114
116
|
{ text: "Shopify", link: "/supported-sources/shopify.md" },
|
|
Binary file
|
|
@@ -7,14 +7,18 @@
|
|
|
7
7
|
The URI format for Google Cloud Storage is as follows:
|
|
8
8
|
|
|
9
9
|
```plaintext
|
|
10
|
-
gs
|
|
10
|
+
gs://?credentials_path=/path/to/service-account.json>
|
|
11
11
|
```
|
|
12
12
|
|
|
13
13
|
URI parameters:
|
|
14
14
|
|
|
15
|
-
- `bucket_name`: The name of the bucket
|
|
16
15
|
- `credentials_path`: path to file containing your Google Cloud [Service Account](https://cloud.google.com/iam/docs/service-account-overview)
|
|
17
16
|
|
|
17
|
+
The `--source-table` must be in the format:
|
|
18
|
+
```
|
|
19
|
+
{bucket name}/{file glob}
|
|
20
|
+
```
|
|
21
|
+
|
|
18
22
|
## Setting up a GCS Integration
|
|
19
23
|
|
|
20
24
|
To use Google Cloud Storage source in `ingestr`, you will need:
|
|
@@ -29,7 +33,7 @@ For more information on how to create a Service Account or it's keys, see [Creat
|
|
|
29
33
|
Let's assume that:
|
|
30
34
|
* Service account key in available in the current directory, under the filename `service_account.json`.
|
|
31
35
|
* The bucket you want to load data from is called `my-org-bucket`
|
|
32
|
-
* The source file is available at
|
|
36
|
+
* The source file is available at `data/latest/dump.csv`
|
|
33
37
|
* The data needs to be saved in a DuckDB database called `local.db`
|
|
34
38
|
* The destination table name will be `public.latest_dump`
|
|
35
39
|
|
|
@@ -37,8 +41,8 @@ You can run the following command line to achieve this:
|
|
|
37
41
|
|
|
38
42
|
```sh
|
|
39
43
|
ingestr ingest \
|
|
40
|
-
--source-uri "gs
|
|
41
|
-
--source-table "/data/latest/dump.csv" \
|
|
44
|
+
--source-uri "gs://?credentials_path=$PWD/service_account.json" \
|
|
45
|
+
--source-table "my-org-bucket/data/latest/dump.csv" \
|
|
42
46
|
--dest-uri "duckdb:///local.db" \
|
|
43
47
|
--dest-table "public.latest_dump"
|
|
44
48
|
```
|
|
@@ -53,7 +57,7 @@ ingestr ingest \
|
|
|
53
57
|
`ingestr` supports [glob](https://en.wikipedia.org/wiki/Glob_(programming)) like pattern matching for `gs` source.
|
|
54
58
|
This allows for a powerful pattern matching mechanism that allows you to specify multiple files in a single `--source-table`.
|
|
55
59
|
|
|
56
|
-
Below are some examples of path patterns, each path pattern is
|
|
60
|
+
Below are some examples of path patterns, each path pattern is glob you can specify after the bucket name:
|
|
57
61
|
|
|
58
62
|
- `**/*.csv`: Retrieves all the CSV files, regardless of how deep they are within the folder structure.
|
|
59
63
|
- `*.csv`: Retrieves all the CSV files from the first level of a folder.
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
# Google Ads
|
|
2
|
+
[Google Ads](https://ads.google.com/), formerly known as Google Adwords, is an online advertising platform developed by Google, where advertisers bid to display brief advertisements, service offerings, product listings, and videos to web users. It can place ads in the results of search engines like Google Search (the Google Search Network), mobile apps, videos, and on non-search websites.
|
|
3
|
+
|
|
4
|
+
## URI format
|
|
5
|
+
|
|
6
|
+
The URI format for Google Ads is as follows:
|
|
7
|
+
```plaintext
|
|
8
|
+
googleads://<customer_id>?credentials_path=/path/to/service-account.json&dev_token=<dev_token>
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
URI parameters:
|
|
12
|
+
|
|
13
|
+
- `customer_id`: Customer ID of the Google Ads account to use.
|
|
14
|
+
- `credentials_path`: path to the service account JSON file.
|
|
15
|
+
- `dev_token`: [developer token](https://developers.google.com/google-ads/api/docs/get-started/dev-token) to use for accessing the account.
|
|
16
|
+
|
|
17
|
+
> [!NOTE]
|
|
18
|
+
> You may specify credentials using `credentials_base64` instead of `credentials_path`.
|
|
19
|
+
> The value of this parameter is the base64 encoded contents of the
|
|
20
|
+
> service account json file. However, we don't recommend using this
|
|
21
|
+
> parameter, unless you're integrating ingestr into another system.
|
|
22
|
+
## Setting up a Google Ads integration
|
|
23
|
+
|
|
24
|
+
### Prerequisites
|
|
25
|
+
* A Google cloud [service account](https://cloud.google.com/iam/docs/service-account-overview)
|
|
26
|
+
* A Google Ads [developer token](https://developers.google.com/google-ads/api/docs/get-started/dev-token)
|
|
27
|
+
* A Google Ads account
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
### Obtaining necessary credentials
|
|
31
|
+
|
|
32
|
+
You can use the [Google Cloud IAM Console](https://cloud.google.com/security/products/iam) to create a service account for ingesting data from Google Ads. Make sure to enable Google Ads API in your console.
|
|
33
|
+
|
|
34
|
+
Next, you need to add your service account user to your Google Ads account. See [Google Developers Docs](https://developers.google.com/google-ads/api/docs/oauth/service-accounts) for exact steps.
|
|
35
|
+
|
|
36
|
+
Finally, you need to obtain a Google Ads Developer Token. Developer token lets your app connect to the Google Ads API. Each developer token is assigned an API access level which controls the number of API calls you can make per day with as well as the environment to which you can make calls. See [Google Ads docs](https://developers.google.com/google-ads/api/docs/get-started/dev-token) for more information on how to obtain this token.
|
|
37
|
+
|
|
38
|
+
You also need the 10-digit customer id of the account you're making API calls to. This is displayed in the Google Ads web interface in the form 123-456-7890. In this case, your customer id would be `1234567890`
|
|
39
|
+
|
|
40
|
+
### Example
|
|
41
|
+
|
|
42
|
+
Let's say we want to ingest information about campaigns (on a daily interval) and save them to a table `public.campaigns` in duckdb database called `adverts.db`.
|
|
43
|
+
|
|
44
|
+
For this example, we'll assume that:
|
|
45
|
+
* The service account JSON file is located in the current directory and is named `svc_account.json`
|
|
46
|
+
* customer id is `1234567890`
|
|
47
|
+
* the developer token is `dev-token-spec-1`
|
|
48
|
+
|
|
49
|
+
You can run the following to achieve this:
|
|
50
|
+
```sh
|
|
51
|
+
ingestr ingest \
|
|
52
|
+
--source-uri "googleads://12345678?credentials_path=./svc_account.json&dev_token=dev-token-spec-1" \
|
|
53
|
+
--source-table "campaign_report_daily" \
|
|
54
|
+
--dest-uri "duckdb://./adverts.db" \
|
|
55
|
+
--dest-table "public.campaigns"
|
|
56
|
+
```
|
|
57
|
+
## Tables
|
|
58
|
+
|
|
59
|
+
| Name | Description |
|
|
60
|
+
|------------------|-------------------------------------------------------------------------|
|
|
61
|
+
| `account_report_daily` | Provides daily metrics aggregated at the account level. |
|
|
62
|
+
| `campaign_report_daily` | Provides daily metrics aggregated at the campaign level. |
|
|
63
|
+
| `ad_group_report_daily` | Provides daily metrics aggregated at the ad group level. |
|
|
64
|
+
| `ad_report_daily` | Provides daily metrics aggregated at the ad level. |
|
|
65
|
+
| `audience_report_daily` | Provides daily metrics aggregated at the audience level. |
|
|
66
|
+
| `keyword_report_daily` | Provides daily metrics aggregated at the keyword level. |
|
|
67
|
+
| `click_report_daily` | Provides daily metrics on clicks. |
|
|
68
|
+
| `landing_page_report_daily` | Provides daily metrics on landing page performance. |
|
|
69
|
+
| `search_keyword_report_daily` | Provides daily metrics on search keywords. |
|
|
70
|
+
| `search_term_report_daily` | Provides daily metrics on search terms. |
|
|
71
|
+
| `lead_form_submission_data_report_daily` | Provides daily metrics on lead form submissions. |
|
|
72
|
+
| `local_services_lead_report_daily` | Provides daily metrics on local services leads. |
|
|
73
|
+
| `local_services_lead_conversations_report_daily` | Provides daily metrics on local services lead conversations. |
|
|
74
|
+
|
|
75
|
+
## Custom Reports
|
|
76
|
+
`googleads` source supports custom reports. You can pass a custom report definition to `--source-table` and it will dynamically create a report for you. These reports are aggregated at a daily interval.
|
|
77
|
+
|
|
78
|
+
The format of a custom report looks like the following:
|
|
79
|
+
```
|
|
80
|
+
daily:{resource_name}:{dimensions}:{metrics}
|
|
81
|
+
```
|
|
82
|
+
Where:
|
|
83
|
+
* `{resource_name}` is a [Google Ads Resource](https://developers.google.com/google-ads/api/fields/v18/overview_query_builder#list-of-all-resources).
|
|
84
|
+
* `{dimensions}` is a comma separated list of the Resource's attribute fields, or fields of [attributed resources](https://developers.google.com/google-ads/api/docs/query/overview).
|
|
85
|
+
* `{metrics}` is a comma separated list of the Resource's [metrics](https://developers.google.com/google-ads/api/fields/v18/metrics). Note that the `metrics.` prefix is optional.
|
|
86
|
+
|
|
87
|
+
Notes:
|
|
88
|
+
* `{dimensions}` and `{metrics}` are optional. If you don't need them, you can leave their respective segment blank.
|
|
89
|
+
* `segments` are currently not supported as dimensions.
|
|
90
|
+
* `segments.date` is automatically added to all custom reports.
|
|
91
|
+
|
|
92
|
+
### Custom Report Example
|
|
93
|
+
For this example, we will ingest data from `ad_group_ad_asset_view`.
|
|
94
|
+
We want to obtain the following info:
|
|
95
|
+
**dimensions**
|
|
96
|
+
* ad_group.id
|
|
97
|
+
* campagin.id
|
|
98
|
+
* customer.id
|
|
99
|
+
**metrics**
|
|
100
|
+
* metrics.clicks
|
|
101
|
+
* metrics.conversions
|
|
102
|
+
* metrics.impressions
|
|
103
|
+
|
|
104
|
+
To achieve this, we pass a `daily` report specification to `ingestr` source table as follows:
|
|
105
|
+
```sh
|
|
106
|
+
ingestr ingest \
|
|
107
|
+
--source-uri "googleads://12345678?credentials_path=./svc_account.json&dev_token=dev-token-spec-1" \
|
|
108
|
+
--source-table "daily:ad_group_ad_asset_view:ad_group.id,campaign.id,customer.id:clicks,conversions,impressions" \
|
|
109
|
+
--dest-uri "duckdb:///custom.db" \
|
|
110
|
+
--dest-table "public.report"
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Notice the lack of `metrics.` prefix in the metrics segment. Please note that `--dest-table` is mandatory when creating
|
|
114
|
+
a custom report.
|
|
115
|
+
|
|
116
|
+
**Without Metrics**
|
|
117
|
+
|
|
118
|
+
Here's an example of the above report, without any associated metrics:
|
|
119
|
+
```sh
|
|
120
|
+
ingestr ingest \
|
|
121
|
+
--source-uri "googleads://12345678?credentials_path=./svc_account.json&dev_token=dev-token-spec-1" \
|
|
122
|
+
--source-table "daily:ad_group_ad_asset_view:ad_group.id,campaign.id,customer.id:" \
|
|
123
|
+
--dest-uri "duckdb:///custom.db" \
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
**Without Dimensions**
|
|
127
|
+
|
|
128
|
+
Here's an example of the above report, without any associated dimensions:
|
|
129
|
+
```sh
|
|
130
|
+
ingestr ingest \
|
|
131
|
+
--source-uri "googleads://12345678?credentials_path=./svc_account.json&dev_token=dev-token-spec-1" \
|
|
132
|
+
--source-table "daily:ad_group_ad_asset_view::clicks,conversions,impressions" \
|
|
133
|
+
--dest-uri "duckdb:///custom.db" \
|
|
134
|
+
```
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# LinkedIn Ads
|
|
2
|
+
LinkedIn Ads is a platform that allows businesses and marketers to create, manage, and analyze advertising campaigns.
|
|
3
|
+
|
|
4
|
+
Ingestr supports LinkedIn Ads as a source.
|
|
5
|
+
|
|
6
|
+
## URI format
|
|
7
|
+
The URI format for LinkedIn Ads as a source is as follows:
|
|
8
|
+
|
|
9
|
+
```plaintext
|
|
10
|
+
linkedinads://?access_token=<access_token>&account_ids=<account_ids>
|
|
11
|
+
```
|
|
12
|
+
## URI parameters:
|
|
13
|
+
- `access_token`(required): It is used for authentication and is necessary to access data and reports through the LinkedIn Ads API. The access token lets your app access data using the permissions you set in the Developer App for your LinkedIn account.
|
|
14
|
+
- `account_ids`(required): The comma-separated list of Ad Account IDs specifies the LinkedIn Ad Accounts for which you want to retrieve data. These IDs uniquely identify the LinkedIn Ad Accounts associated with a company, business, or individual, depending on the ownership of the Ad Accounts. They are required to fetch data for campaigns, creatives, and other related resources.
|
|
15
|
+
|
|
16
|
+
LinkedIn Ads requires an `access_token` and `account_ids` to retrieve reports from the [LinkedIn Ads API](https://learn.microsoft.com/en-us/linkedin/marketing/integrations/ads-reporting/ads-reporting?view=li-lms-2024-11&tabs=http#analytics-finder). Please follow these steps to obtain the `access_token` and `account_ids`
|
|
17
|
+
|
|
18
|
+
### Create a LinkedIn developer application to obtain an access token
|
|
19
|
+
1. Log in to LinkedIn with a [developer account](https://www.linkedin.com/developers)
|
|
20
|
+
2. Click Create App. Please fill out:
|
|
21
|
+
- App Name
|
|
22
|
+
- Your company's LinkedIn page
|
|
23
|
+
- Your company's privacy policy URL
|
|
24
|
+
- Your company logo
|
|
25
|
+
- Accept the terms and click "Create App"
|
|
26
|
+
3. To verify your app:
|
|
27
|
+
- Go to "Settings" tab
|
|
28
|
+
- Find "App Settings"
|
|
29
|
+
- Click "Verify" under Company
|
|
30
|
+
- Click "Generate URL"
|
|
31
|
+
- Send this URL to your Page Admin
|
|
32
|
+
- Click "I'm done" and open the URL in a new tab to verify, if you are the admin of your page.
|
|
33
|
+
- Go to the "Products" tab, and click "Request access" for the Advertising API. It will take about a few minutes to get approved and then you need to fill out the form where you have to provide your company name, website, and other details.
|
|
34
|
+
|
|
35
|
+
#### Authorize your app and obtain access token
|
|
36
|
+
1. Go to the "Auth" tab
|
|
37
|
+
4. Click "OAuth 2.0 tools" which is on top right corner of the page [link](https://www.linkedin.com/developers/tools/oauth)
|
|
38
|
+
5. Click "Create token"
|
|
39
|
+
6. Choose these permissions:
|
|
40
|
+
- `r_ads`
|
|
41
|
+
- `r_ads_reporting`
|
|
42
|
+
7. Click "Request access token"
|
|
43
|
+
8. You will be redirected to an authorization page. Use your LinkedIn credentials to log in and authorize your app and obtain your Access Token and Refresh Token. Copy the Access Token.
|
|
44
|
+
|
|
45
|
+
> [!NOTE]
|
|
46
|
+
> Access tokens last for 2 months. After they expire, you'll need to make new ones using
|
|
47
|
+
> [LinkedIn's Token Generator](https://www.linkedin.com/developers/tools/oauth/token-generator).
|
|
48
|
+
|
|
49
|
+
To find the Ad Account IDs, the ad account owner can refer to the detailed instructions provided in this [guide](https://www.linkedin.com/help/linkedin/answer/a424270/find-linkedin-ads-account-details?lang=en).
|
|
50
|
+
|
|
51
|
+
## Table: Custom Reports
|
|
52
|
+
Custom reports allow you to retrieve data based on specific dimensions and metrics.
|
|
53
|
+
|
|
54
|
+
Custom Table Format:
|
|
55
|
+
```
|
|
56
|
+
custom:<dimensions>:<metrics>
|
|
57
|
+
```
|
|
58
|
+
### Parameters:
|
|
59
|
+
- `dimensions`(required): A comma-separated list of dimensions is required. It must include at least one of the following: `campaign`, `account`, or `creative`, along with one time-based dimension, either `date` or `month`.
|
|
60
|
+
- `date`: group the data in your report by day
|
|
61
|
+
- `month`: group the data in your report by month
|
|
62
|
+
- `metrics`(required): A comma-separated list of [metrics](https://learn.microsoft.com/en-us/linkedin/marketing/integrations/ads-reporting/ads-reporting?view=li-lms-2024-11&tabs=http#metrics-available) to retrieve.
|
|
63
|
+
|
|
64
|
+
> [!NOTE]
|
|
65
|
+
> By default, ingestr fetches data from January 1, 2018 to today's date. You can specify a custom date range using the `--interval-start` and `--interval-end` parameters.
|
|
66
|
+
|
|
67
|
+
### Example
|
|
68
|
+
|
|
69
|
+
Retrieve data for campaign with `account_ids` id_123 and id_456:
|
|
70
|
+
```sh
|
|
71
|
+
ingestr ingest \
|
|
72
|
+
--source-uri "linkedinads://?access_token=token_123&account_ids=id_123,id_456" \
|
|
73
|
+
--source-table 'custom:campaign,date:impressions,clicks' \
|
|
74
|
+
--dest-uri 'duckdb:///linkedin.duckdb' \
|
|
75
|
+
--dest-table 'dest.campaign'
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
The applied parameters for the report are:
|
|
79
|
+
- dimensions: `campaign`, `date`
|
|
80
|
+
- metrics: `impressions`, `clicks`
|
|
81
|
+
|
|
82
|
+
Retrieve data for creative with `account_ids` id_123 and id_456 for the date range from 2024-10-15 to 2024-12-31:
|
|
83
|
+
```sh
|
|
84
|
+
ingestr ingest \
|
|
85
|
+
--source-uri "linkedinads://?access_token=token_123&account_ids=id_123,id_456" \
|
|
86
|
+
--source-table 'custom:creative,month:impressions,shares,videoCompletions' \
|
|
87
|
+
--dest-uri 'duckdb:///linkedin.duckdb' \
|
|
88
|
+
--dest-table 'dest.creative'
|
|
89
|
+
--interval-start '2024-10-15'
|
|
90
|
+
--interval-end '2024-12-31'
|
|
91
|
+
```
|
|
92
|
+
The applied parameters for the report are:
|
|
93
|
+
- dimensions: `creative`, `month`
|
|
94
|
+
- metrics: `shares`, `impressions`, `videoCompletions`
|
|
95
|
+
|
|
96
|
+
```sh
|
|
97
|
+
ingestr ingest \
|
|
98
|
+
--source-uri "linkedinads://?access_token=token_123&account_ids=id_123,id_456" \
|
|
99
|
+
--source-table 'custom:account,month:totalEngagements,impressions,' \
|
|
100
|
+
--dest-uri 'duckdb:///linkedin.duckdb' \
|
|
101
|
+
--dest-table 'dest.account'
|
|
102
|
+
```
|
|
103
|
+
The applied parameters for the report are:
|
|
104
|
+
- dimensions: `account`, `month`
|
|
105
|
+
- metrics: `totalEngagements`, `impressions`
|
|
106
|
+
|
|
107
|
+
This command will retrieve data and save it to the destination table in the DuckDB database.
|
|
108
|
+
|
|
109
|
+
<img alt="linkedin_ads_img" src="../media/linkedin_ads.png"/>
|
|
@@ -9,15 +9,18 @@ ingestr supports S3 as a source.
|
|
|
9
9
|
The URI format for S3 is as follows:
|
|
10
10
|
|
|
11
11
|
```plaintext
|
|
12
|
-
s3
|
|
12
|
+
s3://?access_key_id=<access_key_id>&secret_access_key=<secret_access_key>
|
|
13
13
|
```
|
|
14
14
|
|
|
15
15
|
URI parameters:
|
|
16
16
|
|
|
17
|
-
- `bucket_name`: The name of the bucket
|
|
18
|
-
- `path_to_files`: The relative path from the root of the bucket. You can find this from the S3 URI. For example, if your S3 URI is `s3://mybucket/students/students_details.csv`, then your bucket name is `mybucket` and `path_to_files` is `students/students_details.csv`.
|
|
19
17
|
- `access_key_id` and `secret_access_key` : Used for accessing S3 bucket.
|
|
20
18
|
|
|
19
|
+
The `--source-table` must be in the format:
|
|
20
|
+
```
|
|
21
|
+
{bucket name}/{file glob}
|
|
22
|
+
```
|
|
23
|
+
|
|
21
24
|
## Setting up a S3 Integration
|
|
22
25
|
|
|
23
26
|
S3 requires an `access_key_id` and a `secret_access_key` to access the bucket. Please follow the guide on dltHub to [obtain credentials](https://dlthub.com/docs/dlt-ecosystem/verified-sources/filesystem/basic#get-credentials). Once you've completed the guide, you should have an `access_key_id` and `secret_access_key`. From the S3 URI, you can extract the `bucket_name` and `path_to_files`
|
|
@@ -26,15 +29,15 @@ For example, if your `access_key_id` is `AKC3YOW7E`, `secret_access_key` is `XCt
|
|
|
26
29
|
|
|
27
30
|
```sh
|
|
28
31
|
ingestr ingest \
|
|
29
|
-
--source-uri 's3
|
|
30
|
-
--source-table '/students/students_details.csv' \
|
|
32
|
+
--source-uri 's3://?access_key_id=AKC3YOW7E&secret_access_key=XCtkpL5B' \
|
|
33
|
+
--source-table 'my_bucket/students/students_details.csv' \
|
|
31
34
|
--dest-uri duckdb:///s3.duckdb \
|
|
32
35
|
--dest-table 'dest.students_details'
|
|
33
36
|
```
|
|
34
37
|
|
|
35
38
|
The result of this command will be a table in the DuckDB database in the path `s3.duckdb`.
|
|
36
39
|
|
|
37
|
-
Below are some examples of path patterns, each path pattern is a
|
|
40
|
+
Below are some examples of path patterns, each path pattern is a glob you can specify after the bucket name:
|
|
38
41
|
|
|
39
42
|
- `**/*.csv`: Retrieves all the CSV files, regardless of how deep they are within the folder structure.
|
|
40
43
|
- `*.csv`: Retrieves all the CSV files from the first level of a folder.
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
from typing import Tuple, TypeAlias
|
|
3
|
+
from urllib.parse import ParseResult
|
|
4
|
+
|
|
5
|
+
BucketName: TypeAlias = str
|
|
6
|
+
FileGlob: TypeAlias = str
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def parse_uri(uri: ParseResult, table: str) -> Tuple[BucketName, FileGlob]:
|
|
10
|
+
"""
|
|
11
|
+
parse the URI of a blob storage and
|
|
12
|
+
return the bucket name and the file glob.
|
|
13
|
+
|
|
14
|
+
Supports the following Forms:
|
|
15
|
+
- uri: "gs://"
|
|
16
|
+
table: "bucket-name/file-glob"
|
|
17
|
+
- uri: gs://bucket-name/file-glob
|
|
18
|
+
table: None
|
|
19
|
+
- uri: "gs://bucket-name"
|
|
20
|
+
table: "file-glob"
|
|
21
|
+
|
|
22
|
+
The first form is the prefered method. Other forms are supported
|
|
23
|
+
for backward compatibility, but discouraged.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
table = table.strip()
|
|
27
|
+
host = uri.netloc.strip()
|
|
28
|
+
|
|
29
|
+
if table == "":
|
|
30
|
+
warnings.warn(
|
|
31
|
+
f"Using the form '{uri.scheme}://bucket-name/file-glob' is deprecated and will be removed in future versions.",
|
|
32
|
+
DeprecationWarning,
|
|
33
|
+
stacklevel=2,
|
|
34
|
+
)
|
|
35
|
+
return host, uri.path.lstrip("/")
|
|
36
|
+
|
|
37
|
+
if host != "":
|
|
38
|
+
warnings.warn(
|
|
39
|
+
f"Using the form '{uri.scheme}://bucket-name' is deprecated and will be removed in future versions.",
|
|
40
|
+
DeprecationWarning,
|
|
41
|
+
stacklevel=2,
|
|
42
|
+
)
|
|
43
|
+
return host, table.lstrip("/")
|
|
44
|
+
|
|
45
|
+
parts = table.lstrip("/").split("/", maxsplit=1)
|
|
46
|
+
if len(parts) != 2:
|
|
47
|
+
return "", parts[0]
|
|
48
|
+
|
|
49
|
+
return parts[0], parts[1]
|
|
@@ -8,3 +8,11 @@ class UnsupportedResourceError(Exception):
|
|
|
8
8
|
super().__init__(
|
|
9
9
|
f"Resource '{resource}' is not supported for {source} source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
10
10
|
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class InvalidBlobTableError(Exception):
|
|
14
|
+
def __init__(self, source):
|
|
15
|
+
super().__init__(
|
|
16
|
+
f"Invalid source table for {source} "
|
|
17
|
+
"Ensure that the table is in the format {bucket-name}/{file glob}"
|
|
18
|
+
)
|
|
@@ -27,12 +27,14 @@ from ingestr.src.sources import (
|
|
|
27
27
|
FacebookAdsSource,
|
|
28
28
|
GCSSource,
|
|
29
29
|
GitHubSource,
|
|
30
|
+
GoogleAdsSource,
|
|
30
31
|
GoogleAnalyticsSource,
|
|
31
32
|
GoogleSheetsSource,
|
|
32
33
|
GorgiasSource,
|
|
33
34
|
HubspotSource,
|
|
34
35
|
KafkaSource,
|
|
35
36
|
KlaviyoSource,
|
|
37
|
+
LinkedInAdsSource,
|
|
36
38
|
LocalCsvSource,
|
|
37
39
|
MongoDbSource,
|
|
38
40
|
NotionSource,
|
|
@@ -124,8 +126,10 @@ class SourceDestinationFactory:
|
|
|
124
126
|
"asana": AsanaSource,
|
|
125
127
|
"tiktok": TikTokSource,
|
|
126
128
|
"googleanalytics": GoogleAnalyticsSource,
|
|
129
|
+
"googleads": GoogleAdsSource,
|
|
127
130
|
"appstore": AppleAppStoreSource,
|
|
128
131
|
"gs": GCSSource,
|
|
132
|
+
"linkedinads": LinkedInAdsSource,
|
|
129
133
|
}
|
|
130
134
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
131
135
|
"bigquery": BigQueryDestination,
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from datetime import date, datetime
|
|
3
|
+
from typing import Any, Iterator, Optional
|
|
4
|
+
|
|
5
|
+
import dlt
|
|
6
|
+
import proto # type: ignore
|
|
7
|
+
from dlt.common.exceptions import MissingDependencyException
|
|
8
|
+
from dlt.common.typing import TDataItem
|
|
9
|
+
from dlt.sources import DltResource
|
|
10
|
+
from flatten_json import flatten # type: ignore
|
|
11
|
+
from googleapiclient.discovery import Resource # type: ignore
|
|
12
|
+
|
|
13
|
+
from . import field
|
|
14
|
+
from .metrics import dlt_metrics_schema
|
|
15
|
+
from .predicates import date_predicate
|
|
16
|
+
from .reports import BUILTIN_REPORTS, Report
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
from google.ads.googleads.client import GoogleAdsClient # type: ignore
|
|
20
|
+
except ImportError:
|
|
21
|
+
raise MissingDependencyException("Requests-OAuthlib", ["google-ads"])
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dlt.source
|
|
25
|
+
def google_ads(
|
|
26
|
+
client: GoogleAdsClient,
|
|
27
|
+
customer_id: str,
|
|
28
|
+
report_spec: Optional[str] = None,
|
|
29
|
+
start_date: Optional[datetime] = None,
|
|
30
|
+
end_date: Optional[datetime] = None,
|
|
31
|
+
) -> Iterator[DltResource]:
|
|
32
|
+
date_range = dlt.sources.incremental(
|
|
33
|
+
"segments_date",
|
|
34
|
+
initial_value=start_date.date(), # type: ignore
|
|
35
|
+
end_value=end_date.date() if end_date is not None else None, # type: ignore
|
|
36
|
+
range_start="closed",
|
|
37
|
+
range_end="closed",
|
|
38
|
+
)
|
|
39
|
+
if report_spec is not None:
|
|
40
|
+
custom_report = Report().from_spec(report_spec)
|
|
41
|
+
yield dlt.resource(
|
|
42
|
+
daily_report,
|
|
43
|
+
name="daily_report",
|
|
44
|
+
write_disposition="merge",
|
|
45
|
+
primary_key=custom_report.primary_keys(),
|
|
46
|
+
columns=dlt_metrics_schema(custom_report.metrics),
|
|
47
|
+
)(client, customer_id, custom_report, date_range)
|
|
48
|
+
|
|
49
|
+
for report_name, report in BUILTIN_REPORTS.items():
|
|
50
|
+
yield dlt.resource(
|
|
51
|
+
daily_report,
|
|
52
|
+
name=report_name,
|
|
53
|
+
write_disposition="merge",
|
|
54
|
+
primary_key=report.primary_keys(),
|
|
55
|
+
columns=dlt_metrics_schema(report.metrics),
|
|
56
|
+
)(client, customer_id, report, date_range)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def daily_report(
|
|
60
|
+
client: Resource,
|
|
61
|
+
customer_id: str,
|
|
62
|
+
report: Report,
|
|
63
|
+
date: dlt.sources.incremental[date],
|
|
64
|
+
) -> Iterator[TDataItem]:
|
|
65
|
+
ga_service = client.get_service("GoogleAdsService")
|
|
66
|
+
fields = report.dimensions + report.metrics + report.segments
|
|
67
|
+
criteria = date_predicate("segments.date", date.last_value, date.end_value) # type:ignore
|
|
68
|
+
query = f"""
|
|
69
|
+
SELECT
|
|
70
|
+
{", ".join(fields)}
|
|
71
|
+
FROM
|
|
72
|
+
{report.resource}
|
|
73
|
+
WHERE
|
|
74
|
+
{criteria}
|
|
75
|
+
"""
|
|
76
|
+
if report.unfilterable is True:
|
|
77
|
+
i = query.index("WHERE", 0)
|
|
78
|
+
query = query[:i]
|
|
79
|
+
|
|
80
|
+
allowed_keys = set([field.to_column(k) for k in fields])
|
|
81
|
+
stream = ga_service.search_stream(customer_id=customer_id, query=query)
|
|
82
|
+
for batch in stream:
|
|
83
|
+
for row in batch.results:
|
|
84
|
+
data = flatten(merge_lists(to_dict(row)))
|
|
85
|
+
if "segments_date" in data:
|
|
86
|
+
data["segments_date"] = datetime.strptime(
|
|
87
|
+
data["segments_date"], "%Y-%m-%d"
|
|
88
|
+
).date()
|
|
89
|
+
yield {k: v for k, v in data.items() if k in allowed_keys}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def to_dict(item: Any) -> TDataItem:
|
|
93
|
+
"""
|
|
94
|
+
Processes a batch result (page of results per dimension) accordingly
|
|
95
|
+
:param batch:
|
|
96
|
+
:return:
|
|
97
|
+
"""
|
|
98
|
+
return json.loads(
|
|
99
|
+
proto.Message.to_json(
|
|
100
|
+
item,
|
|
101
|
+
preserving_proto_field_name=True,
|
|
102
|
+
use_integers_for_enums=False,
|
|
103
|
+
including_default_value_fields=False,
|
|
104
|
+
)
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def merge_lists(item: dict) -> dict:
|
|
109
|
+
replacements = {}
|
|
110
|
+
for k, v in item.get("metrics", {}).items():
|
|
111
|
+
if isinstance(v, list):
|
|
112
|
+
replacements[k] = ",".join(v)
|
|
113
|
+
if len(replacements) == 0:
|
|
114
|
+
return item
|
|
115
|
+
item["metrics"].update(replacements)
|
|
116
|
+
return item
|