ingestr 0.12.8__tar.gz → 0.12.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- {ingestr-0.12.8 → ingestr-0.12.9}/PKG-INFO +2 -1
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/.vitepress/config.mjs +1 -0
- ingestr-0.12.9/docs/supported-sources/gcs.md +62 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/s3.md +6 -2
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/appstore/__init__.py +4 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/factory.py +2 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/filesystem/__init__.py +0 -2
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/sources.py +75 -9
- ingestr-0.12.9/ingestr/src/version.py +1 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/requirements.txt +1 -1
- ingestr-0.12.8/ingestr/src/version.py +0 -1
- {ingestr-0.12.8 → ingestr-0.12.9}/.dockerignore +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/.githooks/pre-commit-hook.sh +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/.github/workflows/deploy-docs.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/.github/workflows/secrets-scan.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/.github/workflows/tests.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/.gitignore +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/.gitleaksignore +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/.python-version +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/.vale.ini +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/Dockerfile +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/LICENSE.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/Makefile +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/README.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/.vitepress/theme/custom.css +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/.vitepress/theme/index.js +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/commands/example-uris.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/commands/ingest.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/getting-started/core-concepts.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/getting-started/incremental-loading.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/getting-started/quickstart.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/getting-started/telemetry.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/index.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/media/athena.png +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/media/github.png +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/media/googleanalytics.png +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/media/tiktok.png +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/adjust.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/airtable.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/appsflyer.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/appstore.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/asana.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/athena.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/bigquery.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/chess.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/csv.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/custom_queries.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/databricks.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/duckdb.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/dynamodb.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/facebook-ads.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/github.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/google_analytics.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/gorgias.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/gsheets.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/hubspot.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/kafka.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/klaviyo.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/mongodb.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/mssql.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/mysql.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/notion.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/oracle.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/postgres.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/redshift.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/sap-hana.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/shopify.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/slack.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/snowflake.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/sqlite.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/stripe.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/tiktok-ads.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/zendesk.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/main.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/.gitignore +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/adjust/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/adjust/adjust_helpers.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/airtable/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/appsflyer/_init_.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/appsflyer/client.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/appstore/client.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/appstore/errors.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/appstore/models.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/appstore/resources.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/arrow/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/asana_source/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/asana_source/helpers.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/asana_source/settings.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/chess/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/chess/helpers.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/chess/settings.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/destinations.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/dynamodb/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/errors.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/facebook_ads/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/facebook_ads/exceptions.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/facebook_ads/helpers.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/facebook_ads/settings.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/filesystem/helpers.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/filesystem/readers.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/filters.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/github/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/github/helpers.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/github/queries.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/github/settings.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/google_analytics/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/google_analytics/helpers.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/google_sheets/README.md +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/google_sheets/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/gorgias/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/gorgias/helpers.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/hubspot/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/hubspot/helpers.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/hubspot/settings.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/kafka/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/kafka/helpers.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/klaviyo/_init_.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/klaviyo/client.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/klaviyo/helpers.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/mongodb/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/mongodb/helpers.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/notion/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/notion/helpers/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/notion/helpers/client.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/notion/helpers/database.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/notion/settings.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/shopify/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/shopify/exceptions.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/shopify/helpers.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/shopify/settings.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/slack/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/slack/helpers.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/slack/settings.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/sql_database/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/sql_database/callbacks.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/stripe_analytics/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/stripe_analytics/helpers.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/stripe_analytics/settings.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/table_definition.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/telemetry/event.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/tiktok_ads/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/tiktok_ads/tiktok_helpers.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/time.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/zendesk/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/zendesk/helpers/__init__.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/zendesk/helpers/credentials.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/zendesk/settings.py +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/testdata/.gitignore +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/testdata/create_replace.csv +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/testdata/delete_insert_expected.csv +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/testdata/delete_insert_part1.csv +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/testdata/delete_insert_part2.csv +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/testdata/merge_expected.csv +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/testdata/merge_part1.csv +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/testdata/merge_part2.csv +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/package-lock.json +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/package.json +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/pyproject.toml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/requirements-dev.txt +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/resources/demo.gif +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/resources/demo.tape +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/resources/ingestr.svg +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/AMPM.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Acronyms.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Colons.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Contractions.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/DateFormat.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Ellipses.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/EmDash.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Exclamation.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/FirstPerson.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Gender.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/GenderBias.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/HeadingPunctuation.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Headings.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Latin.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/LyHyphens.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/OptionalPlurals.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Ordinal.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/OxfordComma.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Parens.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Passive.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Periods.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Quotes.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Ranges.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Semicolons.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Slang.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Spacing.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Spelling.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Units.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/We.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Will.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/WordList.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/meta.json +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/vocab.txt +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/bruin/Ingestr.yml +0 -0
- {ingestr-0.12.8 → ingestr-0.12.9}/styles/config/vocabularies/bruin/accept.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.9
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -22,6 +22,7 @@ Requires-Dist: dlt==1.5.0
|
|
|
22
22
|
Requires-Dist: duckdb-engine==0.13.5
|
|
23
23
|
Requires-Dist: duckdb==1.1.3
|
|
24
24
|
Requires-Dist: facebook-business==20.0.0
|
|
25
|
+
Requires-Dist: gcsfs==2024.10.0
|
|
25
26
|
Requires-Dist: google-analytics-data==0.18.16
|
|
26
27
|
Requires-Dist: google-api-python-client==2.130.0
|
|
27
28
|
Requires-Dist: google-cloud-bigquery-storage==2.24.0
|
|
@@ -102,6 +102,7 @@ export default defineConfig({
|
|
|
102
102
|
text: "Facebook Ads",
|
|
103
103
|
link: "/supported-sources/facebook-ads.md",
|
|
104
104
|
},
|
|
105
|
+
{ text: "Google Cloud Storage (GCS)", link: "/supported-sources/gcs.md" },
|
|
105
106
|
{ text: "Google Analytics", link: "/supported-sources/google_analytics.md" },
|
|
106
107
|
{ text: "GitHub", link: "/supported-sources/github.md" },
|
|
107
108
|
{ text: "Google Sheets", link: "/supported-sources/gsheets.md" },
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Google Cloud Storage
|
|
2
|
+
|
|
3
|
+
[Google Cloud Storage](https://cloud.google.com/storage?hl=en) is an online file storage web service for storing and accessing data on Google Cloud Platform infrastructure. The service combines the performance and scalability of Google's cloud with advanced security and sharing capabilities. It is an Infrastructure as a Service (IaaS), comparable to Amazon S3.
|
|
4
|
+
|
|
5
|
+
## URI format
|
|
6
|
+
|
|
7
|
+
The URI format for Google Cloud Storage is as follows:
|
|
8
|
+
|
|
9
|
+
```plaintext
|
|
10
|
+
gs://<bucket_name>?credentials_path=/path/to/service-account.json>
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
URI parameters:
|
|
14
|
+
|
|
15
|
+
- `bucket_name`: The name of the bucket
|
|
16
|
+
- `credentials_path`: path to file containing your Google Cloud [Service Account](https://cloud.google.com/iam/docs/service-account-overview)
|
|
17
|
+
|
|
18
|
+
## Setting up a GCS Integration
|
|
19
|
+
|
|
20
|
+
To use Google Cloud Storage source in `ingestr`, you will need:
|
|
21
|
+
* A Google Cloud Project.
|
|
22
|
+
* A Service Account with atleast [roles/storage.objectUser](https://cloud.google.com/storage/docs/access-control/iam-roles) IAM permission.
|
|
23
|
+
* A Service Account key file for the corresponding service account.
|
|
24
|
+
|
|
25
|
+
For more information on how to create a Service Account or it's keys, see [Create service accounts](https://cloud.google.com/iam/docs/service-accounts-create) and [Create or delete service account keys](https://cloud.google.com/iam/docs/keys-create-delete) on Google Cloud docs.
|
|
26
|
+
|
|
27
|
+
## Example
|
|
28
|
+
|
|
29
|
+
Let's assume that:
|
|
30
|
+
* Service account key in available in the current directory, under the filename `service_account.json`.
|
|
31
|
+
* The bucket you want to load data from is called `my-org-bucket`
|
|
32
|
+
* The source file is available at `/data/latest/dump.csv`
|
|
33
|
+
* The data needs to be saved in a DuckDB database called `local.db`
|
|
34
|
+
* The destination table name will be `public.latest_dump`
|
|
35
|
+
|
|
36
|
+
You can run the following command line to achieve this:
|
|
37
|
+
|
|
38
|
+
```sh
|
|
39
|
+
ingestr ingest \
|
|
40
|
+
--source-uri "gs://my-org-bucket?credentials_path=$PWD/service_account.json" \
|
|
41
|
+
--source-table "/data/latest/dump.csv" \
|
|
42
|
+
--dest-uri "duckdb:///local.db" \
|
|
43
|
+
--dest-table "public.latest_dump"
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Supported File Formats
|
|
47
|
+
`gs` source only supports loading files in the following formats:
|
|
48
|
+
* `csv`: Comma Separated Values (supports Tab Separated Values as well)
|
|
49
|
+
* `parquet`: [Apache Parquet](https://parquet.apache.org/) storage format.
|
|
50
|
+
* `jsonl`: Line delimited JSON. see [https://jsonlines.org/](https://jsonlines.org/)
|
|
51
|
+
|
|
52
|
+
## File Pattern
|
|
53
|
+
`ingestr` supports [glob](https://en.wikipedia.org/wiki/Glob_(programming)) like pattern matching for `gs` source.
|
|
54
|
+
This allows for a powerful pattern matching mechanism that allows you to specify multiple files in a single `--source-table`.
|
|
55
|
+
|
|
56
|
+
Below are some examples of path patterns, each path pattern is a reference from the root of the bucket:
|
|
57
|
+
|
|
58
|
+
- `**/*.csv`: Retrieves all the CSV files, regardless of how deep they are within the folder structure.
|
|
59
|
+
- `*.csv`: Retrieves all the CSV files from the first level of a folder.
|
|
60
|
+
- `myFolder/**/*.jsonl`: Retrieves all the JSONL files from anywhere under `myFolder`.
|
|
61
|
+
- `myFolder/mySubFolder/users.parquet`: Retrieves the `users.parquet` file from `mySubFolder`.
|
|
62
|
+
- `employees.jsonl`: Retrieves the `employees.jsonl` file from the root level of the bucket.
|
|
@@ -9,7 +9,7 @@ ingestr supports S3 as a source.
|
|
|
9
9
|
The URI format for S3 is as follows:
|
|
10
10
|
|
|
11
11
|
```plaintext
|
|
12
|
-
s3://<bucket_name
|
|
12
|
+
s3://<bucket_name>?access_key_id=<access_key_id>&secret_access_key=<secret_access_key>
|
|
13
13
|
```
|
|
14
14
|
|
|
15
15
|
URI parameters:
|
|
@@ -25,7 +25,11 @@ S3 requires an `access_key_id` and a `secret_access_key` to access the bucket. P
|
|
|
25
25
|
For example, if your `access_key_id` is `AKC3YOW7E`, `secret_access_key` is `XCtkpL5B`, bucket name is `my_bucket`, and `path_to_files` is `students/students_details.csv`, here's a sample command that will copy the data from the S3 bucket into a DuckDB database:
|
|
26
26
|
|
|
27
27
|
```sh
|
|
28
|
-
ingestr ingest
|
|
28
|
+
ingestr ingest \
|
|
29
|
+
--source-uri 's3://my_bucket?access_key_id=AKC3YOW7E&secret_access_key=XCtkpL5B' \
|
|
30
|
+
--source-table '/students/students_details.csv' \
|
|
31
|
+
--dest-uri duckdb:///s3.duckdb \
|
|
32
|
+
--dest-table 'dest.students_details'
|
|
29
33
|
```
|
|
30
34
|
|
|
31
35
|
The result of this command will be a table in the DuckDB database in the path `s3.duckdb`.
|
|
@@ -28,6 +28,10 @@ def app_store(
|
|
|
28
28
|
start_date: Optional[datetime] = None,
|
|
29
29
|
end_date: Optional[datetime] = None,
|
|
30
30
|
) -> Iterable[DltResource]:
|
|
31
|
+
if start_date and start_date.tzinfo is not None:
|
|
32
|
+
start_date = start_date.replace(tzinfo=None)
|
|
33
|
+
if end_date and end_date.tzinfo is not None:
|
|
34
|
+
end_date = end_date.replace(tzinfo=None)
|
|
31
35
|
for resource in RESOURCES:
|
|
32
36
|
yield dlt.resource(
|
|
33
37
|
get_analytics_reports,
|
|
@@ -25,6 +25,7 @@ from ingestr.src.sources import (
|
|
|
25
25
|
ChessSource,
|
|
26
26
|
DynamoDBSource,
|
|
27
27
|
FacebookAdsSource,
|
|
28
|
+
GCSSource,
|
|
28
29
|
GitHubSource,
|
|
29
30
|
GoogleAnalyticsSource,
|
|
30
31
|
GoogleSheetsSource,
|
|
@@ -124,6 +125,7 @@ class SourceDestinationFactory:
|
|
|
124
125
|
"tiktok": TikTokSource,
|
|
125
126
|
"googleanalytics": GoogleAnalyticsSource,
|
|
126
127
|
"appstore": AppleAppStoreSource,
|
|
128
|
+
"gs": GCSSource,
|
|
127
129
|
}
|
|
128
130
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
129
131
|
"bigquery": BigQueryDestination,
|
|
@@ -39,8 +39,6 @@ def readers(
|
|
|
39
39
|
filesystem_resource = filesystem(bucket_url, credentials, file_glob=file_glob)
|
|
40
40
|
filesystem_resource.apply_hints(
|
|
41
41
|
incremental=dlt.sources.incremental("modification_date"),
|
|
42
|
-
range_end="closed",
|
|
43
|
-
range_start="closed",
|
|
44
42
|
)
|
|
45
43
|
return (
|
|
46
44
|
filesystem_resource | dlt.transformer(name="read_csv")(_read_csv),
|
|
@@ -17,6 +17,8 @@ from typing import (
|
|
|
17
17
|
from urllib.parse import ParseResult, parse_qs, quote, urlparse
|
|
18
18
|
|
|
19
19
|
import dlt
|
|
20
|
+
import gcsfs # type: ignore
|
|
21
|
+
import s3fs # type: ignore
|
|
20
22
|
import pendulum
|
|
21
23
|
from dlt.common.configuration.specs import (
|
|
22
24
|
AwsCredentials,
|
|
@@ -1091,19 +1093,17 @@ class S3Source:
|
|
|
1091
1093
|
bucket_name = parsed_uri.hostname
|
|
1092
1094
|
if not bucket_name:
|
|
1093
1095
|
raise ValueError(
|
|
1094
|
-
"Invalid S3 URI: The bucket name is missing. Ensure your S3 URI follows the format 's3://bucket-name
|
|
1096
|
+
"Invalid S3 URI: The bucket name is missing. Ensure your S3 URI follows the format 's3://bucket-name"
|
|
1095
1097
|
)
|
|
1096
1098
|
bucket_url = f"s3://{bucket_name}"
|
|
1097
1099
|
|
|
1098
|
-
path_to_file = parsed_uri.path.lstrip("/")
|
|
1100
|
+
path_to_file = parsed_uri.path.lstrip("/") or table.lstrip("/")
|
|
1099
1101
|
if not path_to_file:
|
|
1100
|
-
raise ValueError(
|
|
1101
|
-
"Invalid S3 URI: The file path is missing. Ensure your S3 URI follows the format 's3://bucket-name/path/to/file"
|
|
1102
|
-
)
|
|
1102
|
+
raise ValueError("--source-table must be specified")
|
|
1103
1103
|
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1104
|
+
fs = s3fs.S3FileSystem(
|
|
1105
|
+
key=access_key_id[0],
|
|
1106
|
+
secret=secret_access_key[0],
|
|
1107
1107
|
)
|
|
1108
1108
|
|
|
1109
1109
|
file_extension = path_to_file.split(".")[-1]
|
|
@@ -1119,7 +1119,7 @@ class S3Source:
|
|
|
1119
1119
|
)
|
|
1120
1120
|
|
|
1121
1121
|
return readers(
|
|
1122
|
-
bucket_url
|
|
1122
|
+
bucket_url, fs, path_to_file
|
|
1123
1123
|
).with_resources(endpoint)
|
|
1124
1124
|
|
|
1125
1125
|
|
|
@@ -1503,3 +1503,69 @@ class AppleAppStoreSource:
|
|
|
1503
1503
|
raise UnsupportedResourceError(table, "AppStore")
|
|
1504
1504
|
|
|
1505
1505
|
return src.with_resources(table)
|
|
1506
|
+
|
|
1507
|
+
|
|
1508
|
+
class GCSSource:
|
|
1509
|
+
def handles_incrementality(self) -> bool:
|
|
1510
|
+
return True
|
|
1511
|
+
|
|
1512
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
1513
|
+
if kwargs.get("incremental_key"):
|
|
1514
|
+
raise ValueError(
|
|
1515
|
+
"GCS takes care of incrementality on its own, you should not provide incremental_key"
|
|
1516
|
+
)
|
|
1517
|
+
|
|
1518
|
+
parsed_uri = urlparse(uri)
|
|
1519
|
+
params = parse_qs(parsed_uri.query)
|
|
1520
|
+
credentials_path = params.get("credentials_path")
|
|
1521
|
+
credentials_base64 = params.get("credentials_base64")
|
|
1522
|
+
credentials_available = any(
|
|
1523
|
+
map(
|
|
1524
|
+
lambda x: x is not None,
|
|
1525
|
+
[credentials_path, credentials_base64],
|
|
1526
|
+
)
|
|
1527
|
+
)
|
|
1528
|
+
if credentials_available is False:
|
|
1529
|
+
raise MissingValueError("credentials_path or credentials_base64", "GCS")
|
|
1530
|
+
|
|
1531
|
+
bucket_name = parsed_uri.hostname
|
|
1532
|
+
if not bucket_name:
|
|
1533
|
+
raise ValueError(
|
|
1534
|
+
"Invalid GCS URI: The bucket name is missing. Ensure your GCS URI follows the format 'gs://bucket-name/path/to/file"
|
|
1535
|
+
)
|
|
1536
|
+
bucket_url = f"gs://{bucket_name}/"
|
|
1537
|
+
|
|
1538
|
+
path_to_file = parsed_uri.path.lstrip("/") or table.lstrip("/")
|
|
1539
|
+
if not path_to_file:
|
|
1540
|
+
raise ValueError("--source-table must be specified")
|
|
1541
|
+
|
|
1542
|
+
credentials = None
|
|
1543
|
+
if credentials_path:
|
|
1544
|
+
credentials = credentials_path[0]
|
|
1545
|
+
else:
|
|
1546
|
+
credentials = json.loads(base64.b64decode(credentials_base64[0]).decode()) # type: ignore
|
|
1547
|
+
|
|
1548
|
+
# There's a compatiblity issue between google-auth, dlt and gcsfs
|
|
1549
|
+
# that makes it difficult to use google.oauth2.service_account.Credentials
|
|
1550
|
+
# (The RECOMMENDED way of passing service account credentials)
|
|
1551
|
+
# directly with gcsfs. As a workaround, we construct the GCSFileSystem
|
|
1552
|
+
# and pass it directly to filesystem.readers.
|
|
1553
|
+
fs = gcsfs.GCSFileSystem(
|
|
1554
|
+
token=credentials,
|
|
1555
|
+
)
|
|
1556
|
+
|
|
1557
|
+
file_extension = path_to_file.split(".")[-1]
|
|
1558
|
+
if file_extension == "csv":
|
|
1559
|
+
endpoint = "read_csv"
|
|
1560
|
+
elif file_extension == "jsonl":
|
|
1561
|
+
endpoint = "read_jsonl"
|
|
1562
|
+
elif file_extension == "parquet":
|
|
1563
|
+
endpoint = "read_parquet"
|
|
1564
|
+
else:
|
|
1565
|
+
raise ValueError(
|
|
1566
|
+
"GCS Source only supports specific formats files: csv, jsonl, parquet"
|
|
1567
|
+
)
|
|
1568
|
+
|
|
1569
|
+
return readers(
|
|
1570
|
+
bucket_url, fs, path_to_file
|
|
1571
|
+
).with_resources(endpoint)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.12.9"
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.12.8"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|