ingestr 0.13.55__tar.gz → 0.13.57__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- {ingestr-0.13.55 → ingestr-0.13.57}/PKG-INFO +1 -5
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/.vitepress/config.mjs +1 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/kinesis.md +5 -1
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/s3.md +46 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/stripe.md +63 -9
- ingestr-0.13.57/docs/supported-sources/trustpilot.md +35 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/blob.py +24 -0
- ingestr-0.13.57/ingestr/src/buildinfo.py +1 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/factory.py +2 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/kinesis/__init__.py +3 -3
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/kinesis/helpers.py +19 -2
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/sources.py +119 -44
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/stripe_analytics/__init__.py +86 -1
- ingestr-0.13.57/ingestr/src/stripe_analytics/helpers.py +408 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/stripe_analytics/settings.py +6 -13
- ingestr-0.13.57/ingestr/src/trustpilot/__init__.py +48 -0
- ingestr-0.13.57/ingestr/src/trustpilot/client.py +48 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/requirements.in +0 -1
- {ingestr-0.13.55 → ingestr-0.13.57}/requirements.txt +3 -23
- {ingestr-0.13.55 → ingestr-0.13.57}/requirements_arm64.txt +3 -23
- ingestr-0.13.55/ingestr/src/buildinfo.py +0 -1
- ingestr-0.13.55/ingestr/src/stripe_analytics/helpers.py +0 -68
- {ingestr-0.13.55 → ingestr-0.13.57}/.dockerignore +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/.githooks/pre-commit-hook.sh +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/.github/workflows/deploy-docs.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/.github/workflows/release.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/.github/workflows/secrets-scan.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/.github/workflows/tests.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/.gitignore +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/.gitleaksignore +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/.python-version +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/.vale.ini +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/Dockerfile +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/LICENSE.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/Makefile +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/README.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/.vitepress/theme/custom.css +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/.vitepress/theme/index.js +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/commands/example-uris.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/commands/ingest.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/getting-started/core-concepts.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/getting-started/incremental-loading.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/getting-started/quickstart.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/getting-started/telemetry.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/index.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/media/applovin_max.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/media/athena.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/media/clickhouse_img.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/media/cratedb-source.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/media/freshdesk_ingestion.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/media/gcp_spanner_ingestion.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/media/github.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/media/google_analytics_realtime_report.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/media/googleanalytics.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/media/ingestion_elasticsearch_img.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/media/kinesis.bigquery.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/media/linkedin_ads.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/media/mixpanel_ingestion.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/media/personio.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/media/personio_duckdb.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/media/phantombuster.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/media/pipedrive.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/media/quickbook_ingestion.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/media/sftp.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/media/stripe_postgres.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/media/tiktok.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/adjust.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/airtable.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/applovin.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/applovin_max.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/appsflyer.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/appstore.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/asana.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/athena.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/attio.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/bigquery.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/chess.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/clickhouse.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/cratedb.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/csv.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/custom_queries.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/databricks.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/db2.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/duckdb.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/dynamodb.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/elasticsearch.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/facebook-ads.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/frankfurter.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/freshdesk.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/gcs.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/github.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/google-ads.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/google_analytics.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/gorgias.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/gsheets.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/hubspot.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/kafka.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/klaviyo.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/linkedin_ads.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/mixpanel.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/mongodb.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/mssql.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/mysql.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/notion.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/oracle.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/personio.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/phantombuster.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/pipedrive.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/postgres.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/quickbooks.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/redshift.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/salesforce.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/sap-hana.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/sftp.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/shopify.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/slack.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/smartsheets.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/snowflake.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/solidgate.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/spanner.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/sqlite.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/tiktok-ads.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/supported-sources/zendesk.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/tutorials/load-kinesis-bigquery.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/tutorials/load-personio-duckdb.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/docs/tutorials/load-stripe-postgres.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/conftest.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/main.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/.gitignore +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/adjust/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/adjust/adjust_helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/airtable/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/applovin/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/applovin_max/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/appsflyer/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/appsflyer/client.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/appstore/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/appstore/client.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/appstore/errors.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/appstore/models.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/appstore/resources.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/arrow/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/asana_source/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/asana_source/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/asana_source/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/attio/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/attio/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/chess/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/chess/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/chess/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/collector/spinner.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/destinations.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/dynamodb/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/elasticsearch/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/errors.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/facebook_ads/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/facebook_ads/exceptions.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/facebook_ads/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/facebook_ads/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/facebook_ads/utils.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/filesystem/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/filesystem/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/filesystem/readers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/filters.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/frankfurter/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/frankfurter/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/freshdesk/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/freshdesk/freshdesk_client.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/freshdesk/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/github/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/github/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/github/queries.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/github/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/google_ads/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/google_ads/field.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/google_ads/metrics.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/google_ads/predicates.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/google_ads/reports.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/google_analytics/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/google_analytics/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/google_sheets/README.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/google_sheets/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/gorgias/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/gorgias/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/http_client.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/hubspot/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/hubspot/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/hubspot/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/kafka/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/kafka/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/klaviyo/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/klaviyo/client.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/klaviyo/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/linkedin_ads/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/linkedin_ads/dimension_time_enum.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/linkedin_ads/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/loader.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/mixpanel/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/mixpanel/client.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/mongodb/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/mongodb/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/notion/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/notion/helpers/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/notion/helpers/client.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/notion/helpers/database.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/notion/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/partition.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/personio/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/personio/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/phantombuster/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/phantombuster/client.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/pipedrive/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/pipedrive/helpers/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/pipedrive/helpers/custom_fields_munger.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/pipedrive/helpers/pages.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/pipedrive/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/pipedrive/typing.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/quickbooks/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/resource.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/salesforce/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/salesforce/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/shopify/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/shopify/exceptions.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/shopify/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/shopify/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/slack/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/slack/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/slack/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/smartsheets/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/solidgate/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/solidgate/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/sql_database/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/sql_database/callbacks.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/table_definition.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/telemetry/event.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/tiktok_ads/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/tiktok_ads/tiktok_helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/time.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/version.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/zendesk/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/zendesk/helpers/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/zendesk/helpers/credentials.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/src/zendesk/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/testdata/.gitignore +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/testdata/create_replace.csv +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/testdata/delete_insert_expected.csv +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/testdata/delete_insert_part1.csv +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/testdata/delete_insert_part2.csv +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/testdata/merge_expected.csv +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/testdata/merge_part1.csv +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/testdata/merge_part2.csv +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/ingestr/tests/unit/test_smartsheets.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/package-lock.json +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/package.json +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/pyproject.toml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/requirements-dev.txt +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/resources/demo.gif +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/resources/demo.tape +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/resources/ingestr.svg +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/AMPM.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/Acronyms.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/Colons.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/Contractions.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/DateFormat.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/Ellipses.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/EmDash.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/Exclamation.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/FirstPerson.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/Gender.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/GenderBias.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/HeadingPunctuation.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/Headings.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/Latin.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/LyHyphens.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/OptionalPlurals.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/Ordinal.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/OxfordComma.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/Parens.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/Passive.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/Periods.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/Quotes.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/Ranges.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/Semicolons.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/Slang.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/Spacing.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/Spelling.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/Units.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/We.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/Will.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/WordList.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/meta.json +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/Google/vocab.txt +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/bruin/Ingestr.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/styles/config/vocabularies/bruin/accept.txt +0 -0
- {ingestr-0.13.55 → ingestr-0.13.57}/test.env.template +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.57
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -27,7 +27,6 @@ Requires-Dist: asynch==0.2.4
|
|
|
27
27
|
Requires-Dist: attrs==25.1.0
|
|
28
28
|
Requires-Dist: backoff==2.2.1
|
|
29
29
|
Requires-Dist: bcrypt==4.3.0
|
|
30
|
-
Requires-Dist: beautifulsoup4==4.13.3
|
|
31
30
|
Requires-Dist: boto3==1.37.1
|
|
32
31
|
Requires-Dist: botocore==1.37.1
|
|
33
32
|
Requires-Dist: cachetools==5.5.2
|
|
@@ -156,7 +155,6 @@ Requires-Dist: python-quickbooks==0.9.2
|
|
|
156
155
|
Requires-Dist: pytz==2025.1
|
|
157
156
|
Requires-Dist: pyyaml==6.0.2
|
|
158
157
|
Requires-Dist: rauth==0.7.3
|
|
159
|
-
Requires-Dist: redshift-connector==2.1.5
|
|
160
158
|
Requires-Dist: requests-file==2.1.0
|
|
161
159
|
Requires-Dist: requests-oauthlib==1.3.1
|
|
162
160
|
Requires-Dist: requests-toolbelt==1.0.0
|
|
@@ -168,7 +166,6 @@ Requires-Dist: rsa==4.9
|
|
|
168
166
|
Requires-Dist: rudder-sdk-python==2.1.4
|
|
169
167
|
Requires-Dist: s3fs==2025.3.2
|
|
170
168
|
Requires-Dist: s3transfer==0.11.3
|
|
171
|
-
Requires-Dist: scramp==1.4.5
|
|
172
169
|
Requires-Dist: semver==3.0.4
|
|
173
170
|
Requires-Dist: setuptools==75.8.2
|
|
174
171
|
Requires-Dist: shellingham==1.5.4
|
|
@@ -180,7 +177,6 @@ Requires-Dist: smmap==5.0.2
|
|
|
180
177
|
Requires-Dist: snowflake-connector-python==3.14.0
|
|
181
178
|
Requires-Dist: snowflake-sqlalchemy==1.6.1
|
|
182
179
|
Requires-Dist: sortedcontainers==2.4.0
|
|
183
|
-
Requires-Dist: soupsieve==2.6
|
|
184
180
|
Requires-Dist: sqlalchemy-bigquery==1.12.1
|
|
185
181
|
Requires-Dist: sqlalchemy-cratedb==0.42.0.dev2
|
|
186
182
|
Requires-Dist: sqlalchemy-hana==2.0.0
|
|
@@ -123,6 +123,7 @@ export default defineConfig({
|
|
|
123
123
|
},
|
|
124
124
|
{ text: "Frankfurter", link: "/supported-sources/frankfurter.md" },
|
|
125
125
|
{ text: "Freshdesk", link: "/supported-sources/freshdesk.md" },
|
|
126
|
+
{ text: "Trustpilot", link: "/supported-sources/trustpilot.md" },
|
|
126
127
|
{ text: "Google Cloud Storage (GCS)", link: "/supported-sources/gcs.md" },
|
|
127
128
|
{ text: "Google Analytics", link: "/supported-sources/google_analytics.md" },
|
|
128
129
|
{ text: "Google Ads", link: "/supported-sources/google-ads.md" },
|
|
@@ -31,9 +31,13 @@ ingestr ingest --source-uri 'kinesis://?aws_access_key_id=id_123&aws_secret_acce
|
|
|
31
31
|
--dest-table 'dest.results'
|
|
32
32
|
```
|
|
33
33
|
|
|
34
|
-
When using Kinesis as a source, specify the
|
|
34
|
+
When using Kinesis as a source, specify the [StreamName] you want to read from as the `--source-table` parameter. For example, if you want to read from a Kinesis stream named "customer_events", you would use `--source-table 'customer_events'`.
|
|
35
|
+
You can also use a full Kinesis [StreamARN] to address the stream in [ARN] format, like `arn:aws:kinesis:eu-central-1:842404475894:stream/customer_events`.
|
|
35
36
|
|
|
36
37
|
### Initial Load Configuration
|
|
37
38
|
By default, ingestr reads from the beginning of the Kinesis stream. To start reading from a specific time, use the `interval_start` parameter.
|
|
38
39
|
|
|
39
40
|
|
|
41
|
+
[ARN]: https://docs.aws.amazon.com/IAM/latest/UserGuide/reference-arns.html
|
|
42
|
+
[StreamARN]: https://docs.aws.amazon.com/kinesis/latest/APIReference/API_StreamDescription.html#Streams-Type-StreamDescription-StreamARN
|
|
43
|
+
[StreamName]: https://docs.aws.amazon.com/kinesis/latest/APIReference/API_StreamDescription.html#Streams-Type-StreamDescription-StreamName
|
|
@@ -134,3 +134,49 @@ The `<file-glob-pattern>` in the `--source-table` argument allows for flexible f
|
|
|
134
134
|
| `bucket/myFolder/**/*.jsonl` | Retrieves all JSONL files recursively from the `myFolder` directory and its subdirectories in `s3://bucket`. |
|
|
135
135
|
| `bucket/myFolder/mySubFolder/users.parquet` | Retrieves the specific `users.parquet` file from the `myFolder/mySubFolder/` path in `s3://bucket`. |
|
|
136
136
|
| `bucket/employees.jsonl` | Retrieves the `employees.jsonl` file located at the root level of the `s3://bucket`. |
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
### Working with compressed files
|
|
140
|
+
|
|
141
|
+
`ingestr` automatically detects and handles gzipped files in your S3 bucket. You can load data from compressed files with the `.gz` extension without any additional configuration.
|
|
142
|
+
|
|
143
|
+
For example, to load data from a gzipped CSV file:
|
|
144
|
+
|
|
145
|
+
```sh
|
|
146
|
+
ingestr ingest \
|
|
147
|
+
--source-uri 's3://?access_key_id=AKC3YOW7E&secret_access_key=XCtkpL5B' \
|
|
148
|
+
--source-table 'my_bucket/logs/event-data.csv.gz' \
|
|
149
|
+
--dest-uri duckdb:///compressed_data.duckdb \
|
|
150
|
+
--dest-table 'logs.events'
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
You can also use glob patterns to load multiple compressed files:
|
|
154
|
+
|
|
155
|
+
```sh
|
|
156
|
+
ingestr ingest \
|
|
157
|
+
--source-uri 's3://?access_key_id=AKC3YOW7E&secret_access_key=XCtkpL5B' \
|
|
158
|
+
--source-table 'my_bucket/logs/**/*.csv.gz' \
|
|
159
|
+
--dest-uri duckdb:///compressed_data.duckdb \
|
|
160
|
+
--dest-table 'logs.events'
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### File type hinting
|
|
164
|
+
|
|
165
|
+
If your files are properly encoded but lack the correct file extension (CSV, JSONL, or Parquet), you can provide a file type hint to inform `ingestr` about the format of the files. This is done by appending a fragment identifier (`#format`) to the end of the path in your `--source-table` parameter.
|
|
166
|
+
|
|
167
|
+
For example, if you have JSONL-formatted log files stored in S3 with a non-standard extension:
|
|
168
|
+
|
|
169
|
+
```
|
|
170
|
+
--source-table "my_bucket/logs/event-data#jsonl"
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
This tells `ingestr` to process the files as JSONL, regardless of their actual extension.
|
|
174
|
+
|
|
175
|
+
Supported format hints include:
|
|
176
|
+
- `#csv` - For comma-separated values files
|
|
177
|
+
- `#jsonl` - For line-delimited JSON files
|
|
178
|
+
- `#parquet` - For Parquet format files
|
|
179
|
+
|
|
180
|
+
::: tip
|
|
181
|
+
File type hinting works with `gzip` compressed files as well.
|
|
182
|
+
:::
|
|
@@ -30,11 +30,68 @@ ingestr ingest --source-uri 'stripe://?api_key=sk_test_12345' --source-table 'ch
|
|
|
30
30
|
|
|
31
31
|
The result of this command will be a table in the `stripe.duckdb` database with JSON columns.
|
|
32
32
|
|
|
33
|
+
## Table Name Structure
|
|
34
|
+
|
|
35
|
+
Stripe source supports different loading modes that can be specified using the table name structure:
|
|
36
|
+
|
|
37
|
+
- `<endpoint>` - Standard async loading (default)
|
|
38
|
+
- `<endpoint>:sync` - Full loading with synchronous processing
|
|
39
|
+
- `<endpoint>:sync:incremental` - Incremental loading mode with synchronous processing
|
|
40
|
+
|
|
41
|
+
### Loading Modes and Trade-offs
|
|
42
|
+
|
|
43
|
+
#### Standard Async Loading (Default)
|
|
44
|
+
**Format**: `<endpoint>` (e.g., `charges`, `subscriptions`)
|
|
45
|
+
|
|
46
|
+
- **Use case**: Full data loading from all time periods
|
|
47
|
+
- **Performance**: Loads data in parallel using async processing
|
|
48
|
+
- **Data completeness**: Captures all historical data and updates
|
|
49
|
+
- **Speed**: Slower due to comprehensive data retrieval
|
|
50
|
+
- **Best for**: You want to have all updated data in your database
|
|
51
|
+
|
|
52
|
+
**Example**:
|
|
53
|
+
```sh
|
|
54
|
+
ingestr ingest --source-uri 'stripe://?api_key=sk_test_12345' --source-table 'subscriptions' --dest-uri duckdb:///stripe.duckdb --dest-table 'dest.subscriptions'
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
#### Sync Loading
|
|
58
|
+
**Format**: `<endpoint>:sync` (e.g., `charges:sync`, `subscriptions:sync`)
|
|
59
|
+
|
|
60
|
+
- **Use case**: Full data loading from all time periods
|
|
61
|
+
- **Performance**: Loads data in parallel using sync processing
|
|
62
|
+
- **Data completeness**: Captures all historical data and updates
|
|
63
|
+
- **Speed**: Slower due to comprehensive data retrieval, faster if you have less data
|
|
64
|
+
|
|
65
|
+
#### Incremental Loading
|
|
66
|
+
**Format**: `<endpoint>:sync:incremental` (e.g., `charges:sync:incremental`, `events:sync:incremental`)
|
|
67
|
+
|
|
68
|
+
- **Use case**: Loading data within specific time windows
|
|
69
|
+
- **Performance**: Fast, processes only data within the specified interval
|
|
70
|
+
- **Data completeness**: Limited to the specified time window, does not track updates from past dates
|
|
71
|
+
- **Speed**: Faster due to filtered data retrieval
|
|
72
|
+
- **Processing**: Runs in synchronous mode only
|
|
73
|
+
- **Best for**: Quick loads, you don't care about the updates to past data
|
|
74
|
+
|
|
75
|
+
**Example**:
|
|
76
|
+
```sh
|
|
77
|
+
ingestr ingest --source-uri 'stripe://?api_key=sk_test_12345' --source-table 'charges:sync:incremental' --dest-uri duckdb:///stripe.duckdb --dest-table 'dest.charges' --interval-start '2024-01-01' --interval-end '2024-01-31'
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Choosing the Right Approach
|
|
81
|
+
|
|
82
|
+
| Approach | Speed | Data Completeness | Use Case |
|
|
83
|
+
|----------|--------|------------------|----------|
|
|
84
|
+
| **Standard Async** | Faster for larger data, slower for smaller data | Complete historical data | Initial loads, full historical analysis |
|
|
85
|
+
| **Sync** | Slow for larger data, faster for smaller data | Complete historical data | Initial loads, full historical analysis |
|
|
86
|
+
| **Incremental** | Fastest | Time-window specific | Regular updates, recent data analysis |
|
|
87
|
+
|
|
33
88
|
## Tables
|
|
34
89
|
|
|
35
90
|
Stripe source allows ingesting the following sources into separate tables:
|
|
36
91
|
|
|
37
|
-
###
|
|
92
|
+
### All Endpoints
|
|
93
|
+
|
|
94
|
+
All endpoints support the standard async loading mode. The following endpoints are available:
|
|
38
95
|
|
|
39
96
|
- `account`: Contains information about a Stripe account, including balances, payouts, and account settings.
|
|
40
97
|
- `apple_pay_domain`: Represents Apple Pay domains registered with Stripe for processing Apple Pay payments.
|
|
@@ -67,12 +124,7 @@ Stripe source allows ingesting the following sources into separate tables:
|
|
|
67
124
|
- `top_up`: Records top-ups made to Stripe accounts.
|
|
68
125
|
- `transfer`: Records transfers between Stripe accounts.
|
|
69
126
|
- `webhook_endpoint`: Contains webhook endpoint configurations for receiving event notifications.
|
|
70
|
-
|
|
71
|
-
### Incremental Endpoints
|
|
72
|
-
|
|
73
|
-
The following endpoints support incremental loading, meaning only new or updated records will be fetched:
|
|
74
|
-
|
|
75
|
-
- `application_fee`: Records fees collected by platforms (incremental).
|
|
127
|
+
- `application_fee`: Records fees collected by platforms.
|
|
76
128
|
- `balance_transaction`: Records transactions that affect the Stripe account balance, such as charges, refunds, and payouts.
|
|
77
129
|
- `charge`: Returns a list of charges.
|
|
78
130
|
- `credit_note`: Contains credit note information for refunds and adjustments.
|
|
@@ -80,12 +132,14 @@ The following endpoints support incremental loading, meaning only new or updated
|
|
|
80
132
|
- `invoice`: Represents invoices sent to customers, detailing line items, amounts, and payment status.
|
|
81
133
|
- `invoice_item`: Contains individual line items that can be added to invoices.
|
|
82
134
|
- `invoice_line_item`: Represents line items within invoices.
|
|
83
|
-
- `setup_attempt`: Records attempts to set up payment methods (also available as incremental).
|
|
84
135
|
|
|
85
136
|
Use these as `--source-table` parameter in the `ingestr ingest` command.
|
|
86
137
|
|
|
138
|
+
> [!TIP]
|
|
139
|
+
> For time-sensitive data analysis or regular updates, use incremental loading (`:incremental`) with `--interval-start` and `--interval-end` parameters for faster processing. For comprehensive historical analysis, use standard async loading without any suffix.
|
|
140
|
+
|
|
87
141
|
> [!WARNING]
|
|
88
|
-
>
|
|
142
|
+
> Incremental loading filters data based on the specified time window and does not track updates to records created outside that window. Use standard async loading if you need to capture all historical updates.
|
|
89
143
|
|
|
90
144
|
> [!NOTE]
|
|
91
145
|
> For backward compatibility, non-underscored versions of table names (e.g., `checkoutsession`, `paymentintent`, `subscriptionitem`) are still supported but will be deprecated in future versions. Please use the underscored versions (e.g., `checkout_session`, `payment_intent`, `subscription_item`) for new integrations.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Trustpilot
|
|
2
|
+
|
|
3
|
+
[Trustpilot](https://www.trustpilot.com/) provides a platform for collecting and
|
|
4
|
+
sharing customer reviews.
|
|
5
|
+
|
|
6
|
+
ingestr supports Trustpilot as a source.
|
|
7
|
+
|
|
8
|
+
## URI format
|
|
9
|
+
|
|
10
|
+
The URI format for Trustpilot is:
|
|
11
|
+
|
|
12
|
+
```
|
|
13
|
+
trustpilot://<business_unit_id>?api_key=<api_key>
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
URI parameters:
|
|
17
|
+
- `api_key`: Your Trustpilot API key.
|
|
18
|
+
- `business_unit_id`: Identifier of the business unit whose reviews you want to fetch.
|
|
19
|
+
|
|
20
|
+
## Example usage
|
|
21
|
+
|
|
22
|
+
Assuming your `business_unit_id` is `123` and your API key is `key_abc`, you can ingest reviews into DuckDB using:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
ingestr ingest --source-uri 'trustpilot://123?api_key=key_abc' --source-table 'reviews' --dest-uri duckdb:///trustpilot.duckdb --dest-table 'dest.reviews'
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Tables
|
|
29
|
+
|
|
30
|
+
Currently the Trustpilot source exposes the following table:
|
|
31
|
+
|
|
32
|
+
| Name | Description |
|
|
33
|
+
| ------- | ------------------------------------------- |
|
|
34
|
+
| reviews | Customer reviews for the specified business |
|
|
35
|
+
|
|
@@ -6,6 +6,10 @@ BucketName: TypeAlias = str
|
|
|
6
6
|
FileGlob: TypeAlias = str
|
|
7
7
|
|
|
8
8
|
|
|
9
|
+
class UnsupportedEndpointError(Exception):
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
|
|
9
13
|
def parse_uri(uri: ParseResult, table: str) -> Tuple[BucketName, FileGlob]:
|
|
10
14
|
"""
|
|
11
15
|
parse the URI of a blob storage and
|
|
@@ -50,3 +54,23 @@ def parse_uri(uri: ParseResult, table: str) -> Tuple[BucketName, FileGlob]:
|
|
|
50
54
|
return "", parts[0]
|
|
51
55
|
|
|
52
56
|
return parts[0], parts[1]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def parse_endpoint(path: str) -> str:
|
|
60
|
+
"""
|
|
61
|
+
Parse the endpoint kind from the URI.
|
|
62
|
+
|
|
63
|
+
kind is a file format. one of [csv, jsonl, parquet]
|
|
64
|
+
"""
|
|
65
|
+
file_extension = path.split(".")[-1]
|
|
66
|
+
if file_extension == "gz":
|
|
67
|
+
file_extension = path.split(".")[-2]
|
|
68
|
+
if file_extension == "csv":
|
|
69
|
+
endpoint = "read_csv"
|
|
70
|
+
elif file_extension == "jsonl":
|
|
71
|
+
endpoint = "read_jsonl"
|
|
72
|
+
elif file_extension == "parquet":
|
|
73
|
+
endpoint = "read_parquet"
|
|
74
|
+
else:
|
|
75
|
+
raise UnsupportedEndpointError(f"Unsupported file format: {file_extension}")
|
|
76
|
+
return endpoint
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
version = "v0.13.57"
|
|
@@ -64,6 +64,7 @@ from ingestr.src.sources import (
|
|
|
64
64
|
SqlSource,
|
|
65
65
|
StripeAnalyticsSource,
|
|
66
66
|
TikTokSource,
|
|
67
|
+
TrustpilotSource,
|
|
67
68
|
ZendeskSource,
|
|
68
69
|
)
|
|
69
70
|
|
|
@@ -165,6 +166,7 @@ class SourceDestinationFactory:
|
|
|
165
166
|
"pipedrive": PipedriveSource,
|
|
166
167
|
"frankfurter": FrankfurterSource,
|
|
167
168
|
"freshdesk": FreshdeskSource,
|
|
169
|
+
"trustpilot": TrustpilotSource,
|
|
168
170
|
"phantombuster": PhantombusterSource,
|
|
169
171
|
"elasticsearch": ElasticsearchSource,
|
|
170
172
|
"attio": AttioSource,
|
|
@@ -9,7 +9,7 @@ from dlt.common.time import ensure_pendulum_datetime
|
|
|
9
9
|
from dlt.common.typing import StrStr, TAnyDateTime, TDataItem
|
|
10
10
|
from dlt.common.utils import digest128
|
|
11
11
|
|
|
12
|
-
from .helpers import get_shard_iterator, max_sequence_by_shard
|
|
12
|
+
from .helpers import get_shard_iterator, get_stream_address, max_sequence_by_shard
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@dlt.resource(
|
|
@@ -42,7 +42,7 @@ def kinesis_stream(
|
|
|
42
42
|
initial_at_timestamp (TAnyDateTime): An initial timestamp used to generate AT_TIMESTAMP or LATEST iterator when timestamp value is 0
|
|
43
43
|
max_number_of_messages (int): Maximum number of messages to read in one run. Actual read may exceed that number by up to chunk_size. Defaults to None (no limit).
|
|
44
44
|
milliseconds_behind_latest (int): The number of milliseconds behind the top of the shard to stop reading messages, defaults to 1000.
|
|
45
|
-
parse_json (bool): If True, assumes that messages are json strings, parses them and returns instead of `data` (otherwise). Defaults to
|
|
45
|
+
parse_json (bool): If True, assumes that messages are json strings, parses them and returns instead of `data` (otherwise). Defaults to True.
|
|
46
46
|
chunk_size (int): The number of records to fetch at once. Defaults to 1000.
|
|
47
47
|
Yields:
|
|
48
48
|
Iterable[TDataItem]: Messages. Contain Kinesis envelope in `kinesis` and bytes data in `data` (if `parse_json` disabled)
|
|
@@ -65,7 +65,7 @@ def kinesis_stream(
|
|
|
65
65
|
# so next time we request shards at AT_TIMESTAMP that is now
|
|
66
66
|
resource_state["initial_at_timestamp"] = pendulum.now("UTC").subtract(seconds=1)
|
|
67
67
|
|
|
68
|
-
shards_list = kinesis_client.list_shards(
|
|
68
|
+
shards_list = kinesis_client.list_shards(**get_stream_address(stream_name))
|
|
69
69
|
shards: List[StrStr] = shards_list["Shards"]
|
|
70
70
|
while next_token := shards_list.get("NextToken"):
|
|
71
71
|
shards_list = kinesis_client.list_shards(NextToken=next_token)
|
|
@@ -2,7 +2,7 @@ from typing import Any, Sequence, Tuple
|
|
|
2
2
|
|
|
3
3
|
import dlt
|
|
4
4
|
from dlt.common import pendulum
|
|
5
|
-
from dlt.common.typing import DictStrAny, StrAny, StrStr
|
|
5
|
+
from dlt.common.typing import DictStrAny, DictStrStr, StrAny, StrStr
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def get_shard_iterator(
|
|
@@ -40,7 +40,7 @@ def get_shard_iterator(
|
|
|
40
40
|
)
|
|
41
41
|
|
|
42
42
|
shard_iterator: StrStr = kinesis_client.get_shard_iterator(
|
|
43
|
-
|
|
43
|
+
**get_stream_address(stream_name), ShardId=shard_id, **iterator_params
|
|
44
44
|
)
|
|
45
45
|
return shard_iterator["ShardIterator"], iterator_params
|
|
46
46
|
|
|
@@ -63,3 +63,20 @@ def max_sequence_by_shard(values: Sequence[StrStr]) -> StrStr:
|
|
|
63
63
|
# we compare message sequence at shard_id
|
|
64
64
|
last_value[shard_id] = max(item["seq_no"], last_value.get(shard_id, ""))
|
|
65
65
|
return last_value
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def get_stream_address(stream_name: str) -> DictStrStr:
|
|
69
|
+
"""
|
|
70
|
+
Return address of stream, either as StreamName or StreamARN, when applicable.
|
|
71
|
+
|
|
72
|
+
Examples:
|
|
73
|
+
- customer_events
|
|
74
|
+
- arn:aws:kinesis:eu-central-1:842404475894:stream/customer_events
|
|
75
|
+
|
|
76
|
+
https://docs.aws.amazon.com/kinesis/latest/APIReference/API_StreamDescription.html#Streams-Type-StreamDescription-StreamName
|
|
77
|
+
https://docs.aws.amazon.com/kinesis/latest/APIReference/API_StreamDescription.html#Streams-Type-StreamDescription-StreamARN
|
|
78
|
+
"""
|
|
79
|
+
if stream_name.startswith("arn:"):
|
|
80
|
+
return {"StreamARN": stream_name}
|
|
81
|
+
else:
|
|
82
|
+
return {"StreamName": stream_name}
|
|
@@ -677,24 +677,33 @@ class StripeAnalyticsSource:
|
|
|
677
677
|
|
|
678
678
|
table = table.lower()
|
|
679
679
|
|
|
680
|
-
from ingestr.src.stripe_analytics.settings import
|
|
681
|
-
ENDPOINTS,
|
|
682
|
-
INCREMENTAL_ENDPOINTS,
|
|
683
|
-
)
|
|
680
|
+
from ingestr.src.stripe_analytics.settings import ENDPOINTS
|
|
684
681
|
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
682
|
+
endpoint = None
|
|
683
|
+
incremental = False
|
|
684
|
+
sync = False
|
|
688
685
|
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
686
|
+
table_fields = table.split(":")
|
|
687
|
+
if len(table_fields) == 1:
|
|
688
|
+
endpoint = table_fields[0]
|
|
689
|
+
elif len(table_fields) == 2:
|
|
690
|
+
endpoint = table_fields[0]
|
|
691
|
+
sync = table_fields[1] == "sync"
|
|
692
|
+
elif len(table_fields) == 3:
|
|
693
|
+
endpoint = table_fields[0]
|
|
694
|
+
sync = table_fields[1] == "sync"
|
|
695
|
+
incremental = table_fields[2] == "incremental"
|
|
696
|
+
else:
|
|
697
|
+
raise ValueError(
|
|
698
|
+
"Invalid Stripe table format. Expected: stripe:<endpoint> or stripe:<endpoint>:<sync> or stripe:<endpoint>:<sync>:<incremental>"
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
if incremental and not sync:
|
|
702
|
+
raise ValueError(
|
|
703
|
+
"incremental loads must be used with sync loading"
|
|
704
|
+
)
|
|
695
705
|
|
|
696
|
-
|
|
697
|
-
endpoint = INCREMENTAL_ENDPOINTS[table]
|
|
706
|
+
if incremental:
|
|
698
707
|
from ingestr.src.stripe_analytics import incremental_stripe_source
|
|
699
708
|
|
|
700
709
|
def nullable_date(date_str: Optional[str]):
|
|
@@ -702,6 +711,7 @@ class StripeAnalyticsSource:
|
|
|
702
711
|
return ensure_pendulum_datetime(date_str)
|
|
703
712
|
return None
|
|
704
713
|
|
|
714
|
+
endpoint = ENDPOINTS[endpoint]
|
|
705
715
|
return incremental_stripe_source(
|
|
706
716
|
endpoints=[
|
|
707
717
|
endpoint,
|
|
@@ -710,6 +720,26 @@ class StripeAnalyticsSource:
|
|
|
710
720
|
initial_start_date=nullable_date(kwargs.get("interval_start", None)),
|
|
711
721
|
end_date=nullable_date(kwargs.get("interval_end", None)),
|
|
712
722
|
).with_resources(endpoint)
|
|
723
|
+
else:
|
|
724
|
+
endpoint = ENDPOINTS[endpoint]
|
|
725
|
+
if sync:
|
|
726
|
+
from ingestr.src.stripe_analytics import stripe_source
|
|
727
|
+
|
|
728
|
+
return stripe_source(
|
|
729
|
+
endpoints=[
|
|
730
|
+
endpoint,
|
|
731
|
+
],
|
|
732
|
+
stripe_secret_key=api_key[0],
|
|
733
|
+
).with_resources(endpoint)
|
|
734
|
+
else:
|
|
735
|
+
from ingestr.src.stripe_analytics import async_stripe_source
|
|
736
|
+
|
|
737
|
+
return async_stripe_source(
|
|
738
|
+
endpoints=[
|
|
739
|
+
endpoint,
|
|
740
|
+
],
|
|
741
|
+
stripe_secret_key=api_key[0],
|
|
742
|
+
).with_resources(endpoint)
|
|
713
743
|
|
|
714
744
|
raise ValueError(
|
|
715
745
|
f"Resource '{table}' is not supported for stripe source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
@@ -1362,17 +1392,25 @@ class S3Source:
|
|
|
1362
1392
|
secret=secret_access_key[0],
|
|
1363
1393
|
)
|
|
1364
1394
|
|
|
1365
|
-
|
|
1366
|
-
if
|
|
1367
|
-
endpoint = "
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1395
|
+
endpoint: Optional[str] = None
|
|
1396
|
+
if "#" in table:
|
|
1397
|
+
_, endpoint = table.split("#")
|
|
1398
|
+
if endpoint not in ["csv", "jsonl", "parquet"]:
|
|
1399
|
+
raise ValueError(
|
|
1400
|
+
"S3 Source only supports specific formats files: csv, jsonl, parquet"
|
|
1401
|
+
)
|
|
1402
|
+
endpoint = f"read_{endpoint}"
|
|
1372
1403
|
else:
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1404
|
+
try:
|
|
1405
|
+
endpoint = blob.parse_endpoint(path_to_file)
|
|
1406
|
+
except blob.UnsupportedEndpointError:
|
|
1407
|
+
raise ValueError(
|
|
1408
|
+
"S3 Source only supports specific formats files: csv, jsonl, parquet"
|
|
1409
|
+
)
|
|
1410
|
+
except Exception as e:
|
|
1411
|
+
raise ValueError(
|
|
1412
|
+
f"Failed to parse endpoint from path: {path_to_file}"
|
|
1413
|
+
) from e
|
|
1376
1414
|
|
|
1377
1415
|
from ingestr.src.filesystem import readers
|
|
1378
1416
|
|
|
@@ -1844,17 +1882,16 @@ class GCSSource:
|
|
|
1844
1882
|
token=credentials,
|
|
1845
1883
|
)
|
|
1846
1884
|
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
elif file_extension == "jsonl":
|
|
1851
|
-
endpoint = "read_jsonl"
|
|
1852
|
-
elif file_extension == "parquet":
|
|
1853
|
-
endpoint = "read_parquet"
|
|
1854
|
-
else:
|
|
1885
|
+
try:
|
|
1886
|
+
endpoint = blob.parse_endpoint(path_to_file)
|
|
1887
|
+
except blob.UnsupportedEndpointError:
|
|
1855
1888
|
raise ValueError(
|
|
1856
|
-
"
|
|
1889
|
+
"S3 Source only supports specific formats files: csv, jsonl, parquet"
|
|
1857
1890
|
)
|
|
1891
|
+
except Exception as e:
|
|
1892
|
+
raise ValueError(
|
|
1893
|
+
f"Failed to parse endpoint from path: {path_to_file}"
|
|
1894
|
+
) from e
|
|
1858
1895
|
|
|
1859
1896
|
from ingestr.src.filesystem import readers
|
|
1860
1897
|
|
|
@@ -2392,6 +2429,47 @@ class FreshdeskSource:
|
|
|
2392
2429
|
).with_resources(table)
|
|
2393
2430
|
|
|
2394
2431
|
|
|
2432
|
+
class TrustpilotSource:
|
|
2433
|
+
# trustpilot://<business_unit_id>?api_key=<api_key>
|
|
2434
|
+
def handles_incrementality(self) -> bool:
|
|
2435
|
+
return True
|
|
2436
|
+
|
|
2437
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
2438
|
+
parsed_uri = urlparse(uri)
|
|
2439
|
+
business_unit_id = parsed_uri.netloc
|
|
2440
|
+
params = parse_qs(parsed_uri.query)
|
|
2441
|
+
|
|
2442
|
+
if not business_unit_id:
|
|
2443
|
+
raise MissingValueError("business_unit_id", "Trustpilot")
|
|
2444
|
+
|
|
2445
|
+
api_key = params.get("api_key")
|
|
2446
|
+
if api_key is None:
|
|
2447
|
+
raise MissingValueError("api_key", "Trustpilot")
|
|
2448
|
+
|
|
2449
|
+
start_date = kwargs.get("interval_start")
|
|
2450
|
+
if start_date is None:
|
|
2451
|
+
start_date = ensure_pendulum_datetime("2000-01-01").in_tz("UTC").isoformat()
|
|
2452
|
+
else:
|
|
2453
|
+
start_date = ensure_pendulum_datetime(start_date).in_tz("UTC").isoformat()
|
|
2454
|
+
|
|
2455
|
+
end_date = kwargs.get("interval_end")
|
|
2456
|
+
|
|
2457
|
+
if end_date is not None:
|
|
2458
|
+
end_date = ensure_pendulum_datetime(end_date).in_tz("UTC").isoformat()
|
|
2459
|
+
|
|
2460
|
+
if table not in ["reviews"]:
|
|
2461
|
+
raise UnsupportedResourceError(table, "Trustpilot")
|
|
2462
|
+
|
|
2463
|
+
from ingestr.src.trustpilot import trustpilot_source
|
|
2464
|
+
|
|
2465
|
+
return trustpilot_source(
|
|
2466
|
+
business_unit_id=business_unit_id,
|
|
2467
|
+
api_key=api_key[0],
|
|
2468
|
+
start_date=start_date,
|
|
2469
|
+
end_date=end_date,
|
|
2470
|
+
).with_resources(table)
|
|
2471
|
+
|
|
2472
|
+
|
|
2395
2473
|
class PhantombusterSource:
|
|
2396
2474
|
def handles_incrementality(self) -> bool:
|
|
2397
2475
|
return True
|
|
@@ -2622,18 +2700,15 @@ class SFTPSource:
|
|
|
2622
2700
|
else:
|
|
2623
2701
|
file_glob = f"/{table}"
|
|
2624
2702
|
|
|
2625
|
-
|
|
2626
|
-
|
|
2627
|
-
|
|
2628
|
-
endpoint = "read_csv"
|
|
2629
|
-
elif file_extension == "jsonl":
|
|
2630
|
-
endpoint = "read_jsonl"
|
|
2631
|
-
elif file_extension == "parquet":
|
|
2632
|
-
endpoint = "read_parquet"
|
|
2633
|
-
else:
|
|
2703
|
+
try:
|
|
2704
|
+
endpoint = blob.parse_endpoint(table)
|
|
2705
|
+
except blob.UnsupportedEndpointError:
|
|
2634
2706
|
raise ValueError(
|
|
2635
|
-
"
|
|
2707
|
+
"SFTP Source only supports specific formats files: csv, jsonl, parquet"
|
|
2636
2708
|
)
|
|
2709
|
+
except Exception as e:
|
|
2710
|
+
raise ValueError(f"Failed to parse endpoint from path: {table}") from e
|
|
2711
|
+
|
|
2637
2712
|
from ingestr.src.filesystem import readers
|
|
2638
2713
|
|
|
2639
2714
|
dlt_source_resource = readers(bucket_url, fs, file_glob)
|