ingestr 0.12.10__tar.gz → 0.13.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- {ingestr-0.12.10 → ingestr-0.13.0}/PKG-INFO +6 -1
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/.vitepress/config.mjs +2 -0
- ingestr-0.13.0/docs/media/clickhouse_img.png +0 -0
- ingestr-0.13.0/docs/supported-sources/clickhouse.md +31 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/gcs.md +10 -6
- ingestr-0.13.0/docs/supported-sources/google-ads.md +134 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/s3.md +9 -6
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/main.py +1 -1
- ingestr-0.13.0/ingestr/src/blob.py +49 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/destinations.py +68 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/errors.py +8 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/factory.py +5 -0
- ingestr-0.13.0/ingestr/src/google_ads/__init__.py +116 -0
- ingestr-0.13.0/ingestr/src/google_ads/field.py +2 -0
- ingestr-0.13.0/ingestr/src/google_ads/metrics.py +240 -0
- ingestr-0.13.0/ingestr/src/google_ads/predicates.py +23 -0
- ingestr-0.13.0/ingestr/src/google_ads/reports.py +380 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/sources.py +114 -28
- ingestr-0.13.0/ingestr/src/version.py +1 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/package-lock.json +1015 -422
- {ingestr-0.12.10 → ingestr-0.13.0}/package.json +1 -1
- {ingestr-0.12.10 → ingestr-0.13.0}/pyproject.toml +2 -1
- {ingestr-0.12.10 → ingestr-0.13.0}/requirements.txt +5 -0
- ingestr-0.12.10/ingestr/src/version.py +0 -1
- {ingestr-0.12.10 → ingestr-0.13.0}/.dockerignore +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/.githooks/pre-commit-hook.sh +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/.github/workflows/deploy-docs.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/.github/workflows/secrets-scan.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/.github/workflows/tests.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/.gitignore +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/.gitleaksignore +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/.python-version +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/.vale.ini +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/Dockerfile +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/LICENSE.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/Makefile +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/README.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/.vitepress/theme/custom.css +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/.vitepress/theme/index.js +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/commands/example-uris.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/commands/ingest.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/getting-started/core-concepts.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/getting-started/incremental-loading.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/getting-started/quickstart.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/getting-started/telemetry.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/index.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/media/athena.png +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/media/github.png +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/media/googleanalytics.png +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/media/linkedin_ads.png +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/media/tiktok.png +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/adjust.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/airtable.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/appsflyer.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/appstore.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/asana.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/athena.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/bigquery.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/chess.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/csv.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/custom_queries.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/databricks.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/duckdb.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/dynamodb.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/facebook-ads.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/github.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/google_analytics.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/gorgias.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/gsheets.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/hubspot.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/kafka.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/klaviyo.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/linkedin_ads.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/mongodb.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/mssql.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/mysql.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/notion.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/oracle.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/postgres.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/redshift.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/sap-hana.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/shopify.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/slack.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/snowflake.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/sqlite.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/stripe.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/tiktok-ads.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/docs/supported-sources/zendesk.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/.gitignore +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/adjust/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/adjust/adjust_helpers.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/airtable/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/appsflyer/_init_.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/appsflyer/client.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/appstore/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/appstore/client.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/appstore/errors.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/appstore/models.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/appstore/resources.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/arrow/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/asana_source/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/asana_source/helpers.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/asana_source/settings.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/chess/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/chess/helpers.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/chess/settings.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/dynamodb/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/facebook_ads/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/facebook_ads/exceptions.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/facebook_ads/helpers.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/facebook_ads/settings.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/filesystem/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/filesystem/helpers.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/filesystem/readers.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/filters.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/github/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/github/helpers.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/github/queries.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/github/settings.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/google_analytics/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/google_analytics/helpers.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/google_sheets/README.md +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/google_sheets/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/gorgias/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/gorgias/helpers.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/hubspot/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/hubspot/helpers.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/hubspot/settings.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/kafka/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/kafka/helpers.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/klaviyo/_init_.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/klaviyo/client.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/klaviyo/helpers.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/linkedin_ads/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/linkedin_ads/dimension_time_enum.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/linkedin_ads/helpers.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/mongodb/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/mongodb/helpers.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/notion/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/notion/helpers/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/notion/helpers/client.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/notion/helpers/database.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/notion/settings.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/shopify/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/shopify/exceptions.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/shopify/helpers.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/shopify/settings.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/slack/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/slack/helpers.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/slack/settings.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/sql_database/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/sql_database/callbacks.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/stripe_analytics/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/stripe_analytics/helpers.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/stripe_analytics/settings.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/table_definition.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/telemetry/event.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/tiktok_ads/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/tiktok_ads/tiktok_helpers.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/time.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/zendesk/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/zendesk/helpers/__init__.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/zendesk/helpers/credentials.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/src/zendesk/settings.py +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/testdata/.gitignore +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/testdata/create_replace.csv +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/testdata/delete_insert_expected.csv +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/testdata/delete_insert_part1.csv +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/testdata/delete_insert_part2.csv +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/testdata/merge_expected.csv +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/testdata/merge_part1.csv +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/ingestr/testdata/merge_part2.csv +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/requirements-dev.txt +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/resources/demo.gif +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/resources/demo.tape +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/resources/ingestr.svg +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/AMPM.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/Acronyms.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/Colons.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/Contractions.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/DateFormat.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/Ellipses.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/EmDash.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/Exclamation.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/FirstPerson.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/Gender.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/GenderBias.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/HeadingPunctuation.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/Headings.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/Latin.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/LyHyphens.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/OptionalPlurals.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/Ordinal.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/OxfordComma.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/Parens.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/Passive.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/Periods.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/Quotes.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/Ranges.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/Semicolons.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/Slang.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/Spacing.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/Spelling.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/Units.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/We.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/Will.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/WordList.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/meta.json +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/Google/vocab.txt +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/bruin/Ingestr.yml +0 -0
- {ingestr-0.12.10 → ingestr-0.13.0}/styles/config/vocabularies/bruin/accept.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.13.0
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -15,6 +15,9 @@ Classifier: Programming Language :: Python :: 3
|
|
|
15
15
|
Classifier: Topic :: Database
|
|
16
16
|
Requires-Python: >=3.9
|
|
17
17
|
Requires-Dist: asana==3.2.3
|
|
18
|
+
Requires-Dist: clickhouse-connect==0.8.14
|
|
19
|
+
Requires-Dist: clickhouse-driver==0.2.9
|
|
20
|
+
Requires-Dist: clickhouse-sqlalchemy==0.2.7
|
|
18
21
|
Requires-Dist: confluent-kafka>=2.6.1
|
|
19
22
|
Requires-Dist: databricks-sql-connector==2.9.3
|
|
20
23
|
Requires-Dist: dataclasses-json==0.6.7
|
|
@@ -22,7 +25,9 @@ Requires-Dist: dlt==1.5.0
|
|
|
22
25
|
Requires-Dist: duckdb-engine==0.13.5
|
|
23
26
|
Requires-Dist: duckdb==1.1.3
|
|
24
27
|
Requires-Dist: facebook-business==20.0.0
|
|
28
|
+
Requires-Dist: flatten-json==0.1.14
|
|
25
29
|
Requires-Dist: gcsfs==2024.10.0
|
|
30
|
+
Requires-Dist: google-ads==25.1.0
|
|
26
31
|
Requires-Dist: google-analytics-data==0.18.16
|
|
27
32
|
Requires-Dist: google-api-python-client==2.130.0
|
|
28
33
|
Requires-Dist: google-cloud-bigquery-storage==2.24.0
|
|
@@ -59,6 +59,7 @@ export default defineConfig({
|
|
|
59
59
|
items: [
|
|
60
60
|
{ text: "AWS Athena", link: "/supported-sources/athena.md" },
|
|
61
61
|
{ text: "AWS Redshift", link: "/supported-sources/redshift.md" },
|
|
62
|
+
{ text: "ClickHouse", link: "/supported-sources/clickhouse.md" },
|
|
62
63
|
{ text: "Databricks", link: "/supported-sources/databricks.md" },
|
|
63
64
|
{ text: "DuckDB", link: "/supported-sources/duckdb.md" },
|
|
64
65
|
{
|
|
@@ -104,6 +105,7 @@ export default defineConfig({
|
|
|
104
105
|
},
|
|
105
106
|
{ text: "Google Cloud Storage (GCS)", link: "/supported-sources/gcs.md" },
|
|
106
107
|
{ text: "Google Analytics", link: "/supported-sources/google_analytics.md" },
|
|
108
|
+
{ text: "Google Ads", link: "/supported-sources/google-ads.md" },
|
|
107
109
|
{ text: "GitHub", link: "/supported-sources/github.md" },
|
|
108
110
|
{ text: "Google Sheets", link: "/supported-sources/gsheets.md" },
|
|
109
111
|
{ text: "Gorgias", link: "/supported-sources/gorgias.md" },
|
|
Binary file
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# ClickHouse
|
|
2
|
+
ClickHouse is a fast, open-source, column-oriented database management system that allows for high performance data ingestion and querying.
|
|
3
|
+
|
|
4
|
+
Ingestr supports ClickHouse as a destination.
|
|
5
|
+
|
|
6
|
+
## URI format
|
|
7
|
+
The URI format for ClickHouse as a destination is as follows:
|
|
8
|
+
|
|
9
|
+
```plaintext
|
|
10
|
+
clickhouse://<username>:<password>@<host>:<port>?http_port=<http_port>
|
|
11
|
+
```
|
|
12
|
+
## URI parameters:
|
|
13
|
+
- `username` (required): The username is required to authenticate with the ClickHouse server.
|
|
14
|
+
- `password` (required): The password is required to authenticate the provided username.
|
|
15
|
+
- `host` (required): The hostname or IP address of the ClickHouse server where the database is hosted.
|
|
16
|
+
- `port` (required): The TCP port number used by the ClickHouse server.
|
|
17
|
+
- `http_port` (optional): The port number to use when connecting to the ClickHouse server's HTTP interface. Make sure your ClickHouse server is configured to accept HTTP connections on the port specified by http_port. By default, ClickHouse uses port 8123.
|
|
18
|
+
|
|
19
|
+
ClickHouse requires a `username`, `password`, `host` and `port` to connect to the ClickHouse server. For more information, read [here](https://dlthub.com/docs/dlt-ecosystem/destinations/clickhouse#2-setup-clickhouse-database). Once you've completed the guide, you should have all the above-mentioned credentials.
|
|
20
|
+
|
|
21
|
+
```
|
|
22
|
+
ingestr ingest \
|
|
23
|
+
--source-uri "stripe://?api_key=key123" \
|
|
24
|
+
--source-table 'event' \
|
|
25
|
+
--dest-uri "clickhouse://user_123:pass123@localhost:9000" \
|
|
26
|
+
--dest-table 'stripe.event'
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
This is a sample command that will copy the data from the Stripe source into Athena.
|
|
30
|
+
|
|
31
|
+
<img alt="clickhouse_img" src="../media/clickhouse_img.png" />
|
|
@@ -7,14 +7,18 @@
|
|
|
7
7
|
The URI format for Google Cloud Storage is as follows:
|
|
8
8
|
|
|
9
9
|
```plaintext
|
|
10
|
-
gs
|
|
10
|
+
gs://?credentials_path=/path/to/service-account.json>
|
|
11
11
|
```
|
|
12
12
|
|
|
13
13
|
URI parameters:
|
|
14
14
|
|
|
15
|
-
- `bucket_name`: The name of the bucket
|
|
16
15
|
- `credentials_path`: path to file containing your Google Cloud [Service Account](https://cloud.google.com/iam/docs/service-account-overview)
|
|
17
16
|
|
|
17
|
+
The `--source-table` must be in the format:
|
|
18
|
+
```
|
|
19
|
+
{bucket name}/{file glob}
|
|
20
|
+
```
|
|
21
|
+
|
|
18
22
|
## Setting up a GCS Integration
|
|
19
23
|
|
|
20
24
|
To use Google Cloud Storage source in `ingestr`, you will need:
|
|
@@ -29,7 +33,7 @@ For more information on how to create a Service Account or it's keys, see [Creat
|
|
|
29
33
|
Let's assume that:
|
|
30
34
|
* Service account key in available in the current directory, under the filename `service_account.json`.
|
|
31
35
|
* The bucket you want to load data from is called `my-org-bucket`
|
|
32
|
-
* The source file is available at
|
|
36
|
+
* The source file is available at `data/latest/dump.csv`
|
|
33
37
|
* The data needs to be saved in a DuckDB database called `local.db`
|
|
34
38
|
* The destination table name will be `public.latest_dump`
|
|
35
39
|
|
|
@@ -37,8 +41,8 @@ You can run the following command line to achieve this:
|
|
|
37
41
|
|
|
38
42
|
```sh
|
|
39
43
|
ingestr ingest \
|
|
40
|
-
--source-uri "gs
|
|
41
|
-
--source-table "/data/latest/dump.csv" \
|
|
44
|
+
--source-uri "gs://?credentials_path=$PWD/service_account.json" \
|
|
45
|
+
--source-table "my-org-bucket/data/latest/dump.csv" \
|
|
42
46
|
--dest-uri "duckdb:///local.db" \
|
|
43
47
|
--dest-table "public.latest_dump"
|
|
44
48
|
```
|
|
@@ -53,7 +57,7 @@ ingestr ingest \
|
|
|
53
57
|
`ingestr` supports [glob](https://en.wikipedia.org/wiki/Glob_(programming)) like pattern matching for `gs` source.
|
|
54
58
|
This allows for a powerful pattern matching mechanism that allows you to specify multiple files in a single `--source-table`.
|
|
55
59
|
|
|
56
|
-
Below are some examples of path patterns, each path pattern is
|
|
60
|
+
Below are some examples of path patterns, each path pattern is glob you can specify after the bucket name:
|
|
57
61
|
|
|
58
62
|
- `**/*.csv`: Retrieves all the CSV files, regardless of how deep they are within the folder structure.
|
|
59
63
|
- `*.csv`: Retrieves all the CSV files from the first level of a folder.
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
# Google Ads
|
|
2
|
+
[Google Ads](https://ads.google.com/), formerly known as Google Adwords, is an online advertising platform developed by Google, where advertisers bid to display brief advertisements, service offerings, product listings, and videos to web users. It can place ads in the results of search engines like Google Search (the Google Search Network), mobile apps, videos, and on non-search websites.
|
|
3
|
+
|
|
4
|
+
## URI format
|
|
5
|
+
|
|
6
|
+
The URI format for Google Ads is as follows:
|
|
7
|
+
```plaintext
|
|
8
|
+
googleads://<customer_id>?credentials_path=/path/to/service-account.json&dev_token=<dev_token>
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
URI parameters:
|
|
12
|
+
|
|
13
|
+
- `customer_id`: Customer ID of the Google Ads account to use.
|
|
14
|
+
- `credentials_path`: path to the service account JSON file.
|
|
15
|
+
- `dev_token`: [developer token](https://developers.google.com/google-ads/api/docs/get-started/dev-token) to use for accessing the account.
|
|
16
|
+
|
|
17
|
+
> [!NOTE]
|
|
18
|
+
> You may specify credentials using `credentials_base64` instead of `credentials_path`.
|
|
19
|
+
> The value of this parameter is the base64 encoded contents of the
|
|
20
|
+
> service account json file. However, we don't recommend using this
|
|
21
|
+
> parameter, unless you're integrating ingestr into another system.
|
|
22
|
+
## Setting up a Google Ads integration
|
|
23
|
+
|
|
24
|
+
### Prerequisites
|
|
25
|
+
* A Google cloud [service account](https://cloud.google.com/iam/docs/service-account-overview)
|
|
26
|
+
* A Google Ads [developer token](https://developers.google.com/google-ads/api/docs/get-started/dev-token)
|
|
27
|
+
* A Google Ads account
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
### Obtaining necessary credentials
|
|
31
|
+
|
|
32
|
+
You can use the [Google Cloud IAM Console](https://cloud.google.com/security/products/iam) to create a service account for ingesting data from Google Ads. Make sure to enable Google Ads API in your console.
|
|
33
|
+
|
|
34
|
+
Next, you need to add your service account user to your Google Ads account. See [Google Developers Docs](https://developers.google.com/google-ads/api/docs/oauth/service-accounts) for exact steps.
|
|
35
|
+
|
|
36
|
+
Finally, you need to obtain a Google Ads Developer Token. Developer token lets your app connect to the Google Ads API. Each developer token is assigned an API access level which controls the number of API calls you can make per day with as well as the environment to which you can make calls. See [Google Ads docs](https://developers.google.com/google-ads/api/docs/get-started/dev-token) for more information on how to obtain this token.
|
|
37
|
+
|
|
38
|
+
You also need the 10-digit customer id of the account you're making API calls to. This is displayed in the Google Ads web interface in the form 123-456-7890. In this case, your customer id would be `1234567890`
|
|
39
|
+
|
|
40
|
+
### Example
|
|
41
|
+
|
|
42
|
+
Let's say we want to ingest information about campaigns (on a daily interval) and save them to a table `public.campaigns` in duckdb database called `adverts.db`.
|
|
43
|
+
|
|
44
|
+
For this example, we'll assume that:
|
|
45
|
+
* The service account JSON file is located in the current directory and is named `svc_account.json`
|
|
46
|
+
* customer id is `1234567890`
|
|
47
|
+
* the developer token is `dev-token-spec-1`
|
|
48
|
+
|
|
49
|
+
You can run the following to achieve this:
|
|
50
|
+
```sh
|
|
51
|
+
ingestr ingest \
|
|
52
|
+
--source-uri "googleads://12345678?credentials_path=./svc_account.json&dev_token=dev-token-spec-1" \
|
|
53
|
+
--source-table "campaign_report_daily" \
|
|
54
|
+
--dest-uri "duckdb://./adverts.db" \
|
|
55
|
+
--dest-table "public.campaigns"
|
|
56
|
+
```
|
|
57
|
+
## Tables
|
|
58
|
+
|
|
59
|
+
| Name | Description |
|
|
60
|
+
|------------------|-------------------------------------------------------------------------|
|
|
61
|
+
| `account_report_daily` | Provides daily metrics aggregated at the account level. |
|
|
62
|
+
| `campaign_report_daily` | Provides daily metrics aggregated at the campaign level. |
|
|
63
|
+
| `ad_group_report_daily` | Provides daily metrics aggregated at the ad group level. |
|
|
64
|
+
| `ad_report_daily` | Provides daily metrics aggregated at the ad level. |
|
|
65
|
+
| `audience_report_daily` | Provides daily metrics aggregated at the audience level. |
|
|
66
|
+
| `keyword_report_daily` | Provides daily metrics aggregated at the keyword level. |
|
|
67
|
+
| `click_report_daily` | Provides daily metrics on clicks. |
|
|
68
|
+
| `landing_page_report_daily` | Provides daily metrics on landing page performance. |
|
|
69
|
+
| `search_keyword_report_daily` | Provides daily metrics on search keywords. |
|
|
70
|
+
| `search_term_report_daily` | Provides daily metrics on search terms. |
|
|
71
|
+
| `lead_form_submission_data_report_daily` | Provides daily metrics on lead form submissions. |
|
|
72
|
+
| `local_services_lead_report_daily` | Provides daily metrics on local services leads. |
|
|
73
|
+
| `local_services_lead_conversations_report_daily` | Provides daily metrics on local services lead conversations. |
|
|
74
|
+
|
|
75
|
+
## Custom Reports
|
|
76
|
+
`googleads` source supports custom reports. You can pass a custom report definition to `--source-table` and it will dynamically create a report for you. These reports are aggregated at a daily interval.
|
|
77
|
+
|
|
78
|
+
The format of a custom report looks like the following:
|
|
79
|
+
```
|
|
80
|
+
daily:{resource_name}:{dimensions}:{metrics}
|
|
81
|
+
```
|
|
82
|
+
Where:
|
|
83
|
+
* `{resource_name}` is a [Google Ads Resource](https://developers.google.com/google-ads/api/fields/v18/overview_query_builder#list-of-all-resources).
|
|
84
|
+
* `{dimensions}` is a comma separated list of the Resource's attribute fields, or fields of [attributed resources](https://developers.google.com/google-ads/api/docs/query/overview).
|
|
85
|
+
* `{metrics}` is a comma separated list of the Resource's [metrics](https://developers.google.com/google-ads/api/fields/v18/metrics). Note that the `metrics.` prefix is optional.
|
|
86
|
+
|
|
87
|
+
Notes:
|
|
88
|
+
* `{dimensions}` and `{metrics}` are optional. If you don't need them, you can leave their respective segment blank.
|
|
89
|
+
* `segments` are currently not supported as dimensions.
|
|
90
|
+
* `segments.date` is automatically added to all custom reports.
|
|
91
|
+
|
|
92
|
+
### Custom Report Example
|
|
93
|
+
For this example, we will ingest data from `ad_group_ad_asset_view`.
|
|
94
|
+
We want to obtain the following info:
|
|
95
|
+
**dimensions**
|
|
96
|
+
* ad_group.id
|
|
97
|
+
* campagin.id
|
|
98
|
+
* customer.id
|
|
99
|
+
**metrics**
|
|
100
|
+
* metrics.clicks
|
|
101
|
+
* metrics.conversions
|
|
102
|
+
* metrics.impressions
|
|
103
|
+
|
|
104
|
+
To achieve this, we pass a `daily` report specification to `ingestr` source table as follows:
|
|
105
|
+
```sh
|
|
106
|
+
ingestr ingest \
|
|
107
|
+
--source-uri "googleads://12345678?credentials_path=./svc_account.json&dev_token=dev-token-spec-1" \
|
|
108
|
+
--source-table "daily:ad_group_ad_asset_view:ad_group.id,campaign.id,customer.id:clicks,conversions,impressions" \
|
|
109
|
+
--dest-uri "duckdb:///custom.db" \
|
|
110
|
+
--dest-table "public.report"
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Notice the lack of `metrics.` prefix in the metrics segment. Please note that `--dest-table` is mandatory when creating
|
|
114
|
+
a custom report.
|
|
115
|
+
|
|
116
|
+
**Without Metrics**
|
|
117
|
+
|
|
118
|
+
Here's an example of the above report, without any associated metrics:
|
|
119
|
+
```sh
|
|
120
|
+
ingestr ingest \
|
|
121
|
+
--source-uri "googleads://12345678?credentials_path=./svc_account.json&dev_token=dev-token-spec-1" \
|
|
122
|
+
--source-table "daily:ad_group_ad_asset_view:ad_group.id,campaign.id,customer.id:" \
|
|
123
|
+
--dest-uri "duckdb:///custom.db" \
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
**Without Dimensions**
|
|
127
|
+
|
|
128
|
+
Here's an example of the above report, without any associated dimensions:
|
|
129
|
+
```sh
|
|
130
|
+
ingestr ingest \
|
|
131
|
+
--source-uri "googleads://12345678?credentials_path=./svc_account.json&dev_token=dev-token-spec-1" \
|
|
132
|
+
--source-table "daily:ad_group_ad_asset_view::clicks,conversions,impressions" \
|
|
133
|
+
--dest-uri "duckdb:///custom.db" \
|
|
134
|
+
```
|
|
@@ -9,15 +9,18 @@ ingestr supports S3 as a source.
|
|
|
9
9
|
The URI format for S3 is as follows:
|
|
10
10
|
|
|
11
11
|
```plaintext
|
|
12
|
-
s3
|
|
12
|
+
s3://?access_key_id=<access_key_id>&secret_access_key=<secret_access_key>
|
|
13
13
|
```
|
|
14
14
|
|
|
15
15
|
URI parameters:
|
|
16
16
|
|
|
17
|
-
- `bucket_name`: The name of the bucket
|
|
18
|
-
- `path_to_files`: The relative path from the root of the bucket. You can find this from the S3 URI. For example, if your S3 URI is `s3://mybucket/students/students_details.csv`, then your bucket name is `mybucket` and `path_to_files` is `students/students_details.csv`.
|
|
19
17
|
- `access_key_id` and `secret_access_key` : Used for accessing S3 bucket.
|
|
20
18
|
|
|
19
|
+
The `--source-table` must be in the format:
|
|
20
|
+
```
|
|
21
|
+
{bucket name}/{file glob}
|
|
22
|
+
```
|
|
23
|
+
|
|
21
24
|
## Setting up a S3 Integration
|
|
22
25
|
|
|
23
26
|
S3 requires an `access_key_id` and a `secret_access_key` to access the bucket. Please follow the guide on dltHub to [obtain credentials](https://dlthub.com/docs/dlt-ecosystem/verified-sources/filesystem/basic#get-credentials). Once you've completed the guide, you should have an `access_key_id` and `secret_access_key`. From the S3 URI, you can extract the `bucket_name` and `path_to_files`
|
|
@@ -26,15 +29,15 @@ For example, if your `access_key_id` is `AKC3YOW7E`, `secret_access_key` is `XCt
|
|
|
26
29
|
|
|
27
30
|
```sh
|
|
28
31
|
ingestr ingest \
|
|
29
|
-
--source-uri 's3
|
|
30
|
-
--source-table '/students/students_details.csv' \
|
|
32
|
+
--source-uri 's3://?access_key_id=AKC3YOW7E&secret_access_key=XCtkpL5B' \
|
|
33
|
+
--source-table 'my_bucket/students/students_details.csv' \
|
|
31
34
|
--dest-uri duckdb:///s3.duckdb \
|
|
32
35
|
--dest-table 'dest.students_details'
|
|
33
36
|
```
|
|
34
37
|
|
|
35
38
|
The result of this command will be a table in the DuckDB database in the path `s3.duckdb`.
|
|
36
39
|
|
|
37
|
-
Below are some examples of path patterns, each path pattern is a
|
|
40
|
+
Below are some examples of path patterns, each path pattern is a glob you can specify after the bucket name:
|
|
38
41
|
|
|
39
42
|
- `**/*.csv`: Retrieves all the CSV files, regardless of how deep they are within the folder structure.
|
|
40
43
|
- `*.csv`: Retrieves all the CSV files from the first level of a folder.
|
|
@@ -451,7 +451,7 @@ def ingest(
|
|
|
451
451
|
pipelines_dir = tempfile.mkdtemp()
|
|
452
452
|
is_pipelines_dir_temp = True
|
|
453
453
|
|
|
454
|
-
dlt_dest = destination.dlt_dest(uri=dest_uri)
|
|
454
|
+
dlt_dest = destination.dlt_dest(uri=dest_uri, dest_table=dest_table)
|
|
455
455
|
validate_loader_file_format(dlt_dest, loader_file_format)
|
|
456
456
|
|
|
457
457
|
if partition_by:
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
from typing import Tuple, TypeAlias
|
|
3
|
+
from urllib.parse import ParseResult
|
|
4
|
+
|
|
5
|
+
BucketName: TypeAlias = str
|
|
6
|
+
FileGlob: TypeAlias = str
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def parse_uri(uri: ParseResult, table: str) -> Tuple[BucketName, FileGlob]:
|
|
10
|
+
"""
|
|
11
|
+
parse the URI of a blob storage and
|
|
12
|
+
return the bucket name and the file glob.
|
|
13
|
+
|
|
14
|
+
Supports the following Forms:
|
|
15
|
+
- uri: "gs://"
|
|
16
|
+
table: "bucket-name/file-glob"
|
|
17
|
+
- uri: gs://bucket-name/file-glob
|
|
18
|
+
table: None
|
|
19
|
+
- uri: "gs://bucket-name"
|
|
20
|
+
table: "file-glob"
|
|
21
|
+
|
|
22
|
+
The first form is the prefered method. Other forms are supported
|
|
23
|
+
for backward compatibility, but discouraged.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
table = table.strip()
|
|
27
|
+
host = uri.netloc.strip()
|
|
28
|
+
|
|
29
|
+
if table == "":
|
|
30
|
+
warnings.warn(
|
|
31
|
+
f"Using the form '{uri.scheme}://bucket-name/file-glob' is deprecated and will be removed in future versions.",
|
|
32
|
+
DeprecationWarning,
|
|
33
|
+
stacklevel=2,
|
|
34
|
+
)
|
|
35
|
+
return host, uri.path.lstrip("/")
|
|
36
|
+
|
|
37
|
+
if host != "":
|
|
38
|
+
warnings.warn(
|
|
39
|
+
f"Using the form '{uri.scheme}://bucket-name' is deprecated and will be removed in future versions.",
|
|
40
|
+
DeprecationWarning,
|
|
41
|
+
stacklevel=2,
|
|
42
|
+
)
|
|
43
|
+
return host, table.lstrip("/")
|
|
44
|
+
|
|
45
|
+
parts = table.lstrip("/").split("/", maxsplit=1)
|
|
46
|
+
if len(parts) != 2:
|
|
47
|
+
return "", parts[0]
|
|
48
|
+
|
|
49
|
+
return parts[0], parts[1]
|
|
@@ -9,6 +9,9 @@ from urllib.parse import parse_qs, quote, urlparse
|
|
|
9
9
|
|
|
10
10
|
import dlt
|
|
11
11
|
from dlt.common.configuration.specs import AwsCredentials
|
|
12
|
+
from dlt.destinations.impl.clickhouse.configuration import (
|
|
13
|
+
ClickHouseCredentials,
|
|
14
|
+
)
|
|
12
15
|
|
|
13
16
|
|
|
14
17
|
class GenericSqlDestination:
|
|
@@ -261,3 +264,68 @@ class AthenaDestination:
|
|
|
261
264
|
|
|
262
265
|
def post_load(self):
|
|
263
266
|
pass
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
class ClickhouseDestination:
|
|
270
|
+
def dlt_dest(self, uri: str, **kwargs):
|
|
271
|
+
parsed_uri = urlparse(uri)
|
|
272
|
+
|
|
273
|
+
if "dest_table" in kwargs:
|
|
274
|
+
table = kwargs["dest_table"]
|
|
275
|
+
database = table.split(".")[0]
|
|
276
|
+
else:
|
|
277
|
+
database = parsed_uri.path.lstrip("/")
|
|
278
|
+
|
|
279
|
+
username = parsed_uri.username
|
|
280
|
+
if not username:
|
|
281
|
+
raise ValueError(
|
|
282
|
+
"A username is required to connect to the ClickHouse database."
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
password = parsed_uri.password
|
|
286
|
+
if not password:
|
|
287
|
+
raise ValueError(
|
|
288
|
+
"A password is required to authenticate with the ClickHouse database."
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
host = parsed_uri.hostname
|
|
292
|
+
if not host:
|
|
293
|
+
raise ValueError(
|
|
294
|
+
"The hostname or IP address of the ClickHouse server is required to establish a connection."
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
port = parsed_uri.port
|
|
298
|
+
if not port:
|
|
299
|
+
raise ValueError(
|
|
300
|
+
"The TCP port of the ClickHouse server is required to establish a connection."
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
query_params = parse_qs(parsed_uri.query)
|
|
304
|
+
http_port = (
|
|
305
|
+
int(query_params["http_port"][0]) if "http_port" in query_params else 8123
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
credentials = ClickHouseCredentials(
|
|
309
|
+
{
|
|
310
|
+
"host": host,
|
|
311
|
+
"port": port,
|
|
312
|
+
"username": username,
|
|
313
|
+
"password": password,
|
|
314
|
+
"database": database,
|
|
315
|
+
"http_port": http_port,
|
|
316
|
+
"secure": 0,
|
|
317
|
+
}
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
return dlt.destinations.clickhouse(credentials=credentials)
|
|
321
|
+
|
|
322
|
+
def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
|
|
323
|
+
table_fields = table.split(".")
|
|
324
|
+
if len(table_fields) != 2:
|
|
325
|
+
raise ValueError("Table name must be in the format <schema>.<table>")
|
|
326
|
+
return {
|
|
327
|
+
"table_name": table_fields[-1],
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
def post_load(self):
|
|
331
|
+
pass
|
|
@@ -8,3 +8,11 @@ class UnsupportedResourceError(Exception):
|
|
|
8
8
|
super().__init__(
|
|
9
9
|
f"Resource '{resource}' is not supported for {source} source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
10
10
|
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class InvalidBlobTableError(Exception):
|
|
14
|
+
def __init__(self, source):
|
|
15
|
+
super().__init__(
|
|
16
|
+
f"Invalid source table for {source} "
|
|
17
|
+
"Ensure that the table is in the format {bucket-name}/{file glob}"
|
|
18
|
+
)
|
|
@@ -6,6 +6,7 @@ from dlt.common.destination import Destination
|
|
|
6
6
|
from ingestr.src.destinations import (
|
|
7
7
|
AthenaDestination,
|
|
8
8
|
BigQueryDestination,
|
|
9
|
+
ClickhouseDestination,
|
|
9
10
|
CsvDestination,
|
|
10
11
|
DatabricksDestination,
|
|
11
12
|
DuckDBDestination,
|
|
@@ -27,6 +28,7 @@ from ingestr.src.sources import (
|
|
|
27
28
|
FacebookAdsSource,
|
|
28
29
|
GCSSource,
|
|
29
30
|
GitHubSource,
|
|
31
|
+
GoogleAdsSource,
|
|
30
32
|
GoogleAnalyticsSource,
|
|
31
33
|
GoogleSheetsSource,
|
|
32
34
|
GorgiasSource,
|
|
@@ -125,6 +127,7 @@ class SourceDestinationFactory:
|
|
|
125
127
|
"asana": AsanaSource,
|
|
126
128
|
"tiktok": TikTokSource,
|
|
127
129
|
"googleanalytics": GoogleAnalyticsSource,
|
|
130
|
+
"googleads": GoogleAdsSource,
|
|
128
131
|
"appstore": AppleAppStoreSource,
|
|
129
132
|
"gs": GCSSource,
|
|
130
133
|
"linkedinads": LinkedInAdsSource,
|
|
@@ -144,6 +147,8 @@ class SourceDestinationFactory:
|
|
|
144
147
|
"synapse": SynapseDestination,
|
|
145
148
|
"csv": CsvDestination,
|
|
146
149
|
"athena": AthenaDestination,
|
|
150
|
+
"clickhouse+native": ClickhouseDestination,
|
|
151
|
+
"clickhouse": ClickhouseDestination,
|
|
147
152
|
}
|
|
148
153
|
|
|
149
154
|
def __init__(self, source_uri: str, destination_uri: str):
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from datetime import date, datetime
|
|
3
|
+
from typing import Any, Iterator, Optional
|
|
4
|
+
|
|
5
|
+
import dlt
|
|
6
|
+
import proto # type: ignore
|
|
7
|
+
from dlt.common.exceptions import MissingDependencyException
|
|
8
|
+
from dlt.common.typing import TDataItem
|
|
9
|
+
from dlt.sources import DltResource
|
|
10
|
+
from flatten_json import flatten # type: ignore
|
|
11
|
+
from googleapiclient.discovery import Resource # type: ignore
|
|
12
|
+
|
|
13
|
+
from . import field
|
|
14
|
+
from .metrics import dlt_metrics_schema
|
|
15
|
+
from .predicates import date_predicate
|
|
16
|
+
from .reports import BUILTIN_REPORTS, Report
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
from google.ads.googleads.client import GoogleAdsClient # type: ignore
|
|
20
|
+
except ImportError:
|
|
21
|
+
raise MissingDependencyException("Requests-OAuthlib", ["google-ads"])
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dlt.source
|
|
25
|
+
def google_ads(
|
|
26
|
+
client: GoogleAdsClient,
|
|
27
|
+
customer_id: str,
|
|
28
|
+
report_spec: Optional[str] = None,
|
|
29
|
+
start_date: Optional[datetime] = None,
|
|
30
|
+
end_date: Optional[datetime] = None,
|
|
31
|
+
) -> Iterator[DltResource]:
|
|
32
|
+
date_range = dlt.sources.incremental(
|
|
33
|
+
"segments_date",
|
|
34
|
+
initial_value=start_date.date(), # type: ignore
|
|
35
|
+
end_value=end_date.date() if end_date is not None else None, # type: ignore
|
|
36
|
+
range_start="closed",
|
|
37
|
+
range_end="closed",
|
|
38
|
+
)
|
|
39
|
+
if report_spec is not None:
|
|
40
|
+
custom_report = Report().from_spec(report_spec)
|
|
41
|
+
yield dlt.resource(
|
|
42
|
+
daily_report,
|
|
43
|
+
name="daily_report",
|
|
44
|
+
write_disposition="merge",
|
|
45
|
+
primary_key=custom_report.primary_keys(),
|
|
46
|
+
columns=dlt_metrics_schema(custom_report.metrics),
|
|
47
|
+
)(client, customer_id, custom_report, date_range)
|
|
48
|
+
|
|
49
|
+
for report_name, report in BUILTIN_REPORTS.items():
|
|
50
|
+
yield dlt.resource(
|
|
51
|
+
daily_report,
|
|
52
|
+
name=report_name,
|
|
53
|
+
write_disposition="merge",
|
|
54
|
+
primary_key=report.primary_keys(),
|
|
55
|
+
columns=dlt_metrics_schema(report.metrics),
|
|
56
|
+
)(client, customer_id, report, date_range)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def daily_report(
|
|
60
|
+
client: Resource,
|
|
61
|
+
customer_id: str,
|
|
62
|
+
report: Report,
|
|
63
|
+
date: dlt.sources.incremental[date],
|
|
64
|
+
) -> Iterator[TDataItem]:
|
|
65
|
+
ga_service = client.get_service("GoogleAdsService")
|
|
66
|
+
fields = report.dimensions + report.metrics + report.segments
|
|
67
|
+
criteria = date_predicate("segments.date", date.last_value, date.end_value) # type:ignore
|
|
68
|
+
query = f"""
|
|
69
|
+
SELECT
|
|
70
|
+
{", ".join(fields)}
|
|
71
|
+
FROM
|
|
72
|
+
{report.resource}
|
|
73
|
+
WHERE
|
|
74
|
+
{criteria}
|
|
75
|
+
"""
|
|
76
|
+
if report.unfilterable is True:
|
|
77
|
+
i = query.index("WHERE", 0)
|
|
78
|
+
query = query[:i]
|
|
79
|
+
|
|
80
|
+
allowed_keys = set([field.to_column(k) for k in fields])
|
|
81
|
+
stream = ga_service.search_stream(customer_id=customer_id, query=query)
|
|
82
|
+
for batch in stream:
|
|
83
|
+
for row in batch.results:
|
|
84
|
+
data = flatten(merge_lists(to_dict(row)))
|
|
85
|
+
if "segments_date" in data:
|
|
86
|
+
data["segments_date"] = datetime.strptime(
|
|
87
|
+
data["segments_date"], "%Y-%m-%d"
|
|
88
|
+
).date()
|
|
89
|
+
yield {k: v for k, v in data.items() if k in allowed_keys}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def to_dict(item: Any) -> TDataItem:
|
|
93
|
+
"""
|
|
94
|
+
Processes a batch result (page of results per dimension) accordingly
|
|
95
|
+
:param batch:
|
|
96
|
+
:return:
|
|
97
|
+
"""
|
|
98
|
+
return json.loads(
|
|
99
|
+
proto.Message.to_json(
|
|
100
|
+
item,
|
|
101
|
+
preserving_proto_field_name=True,
|
|
102
|
+
use_integers_for_enums=False,
|
|
103
|
+
including_default_value_fields=False,
|
|
104
|
+
)
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def merge_lists(item: dict) -> dict:
|
|
109
|
+
replacements = {}
|
|
110
|
+
for k, v in item.get("metrics", {}).items():
|
|
111
|
+
if isinstance(v, list):
|
|
112
|
+
replacements[k] = ",".join(v)
|
|
113
|
+
if len(replacements) == 0:
|
|
114
|
+
return item
|
|
115
|
+
item["metrics"].update(replacements)
|
|
116
|
+
return item
|