ingestr 0.13.55__tar.gz → 0.13.56__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- {ingestr-0.13.55 → ingestr-0.13.56}/PKG-INFO +1 -1
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/.vitepress/config.mjs +1 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/s3.md +46 -0
- ingestr-0.13.56/docs/supported-sources/trustpilot.md +35 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/blob.py +24 -0
- ingestr-0.13.56/ingestr/src/buildinfo.py +1 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/factory.py +2 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/sources.py +74 -29
- ingestr-0.13.56/ingestr/src/trustpilot/__init__.py +48 -0
- ingestr-0.13.56/ingestr/src/trustpilot/client.py +48 -0
- ingestr-0.13.55/ingestr/src/buildinfo.py +0 -1
- {ingestr-0.13.55 → ingestr-0.13.56}/.dockerignore +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/.githooks/pre-commit-hook.sh +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/.github/workflows/deploy-docs.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/.github/workflows/release.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/.github/workflows/secrets-scan.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/.github/workflows/tests.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/.gitignore +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/.gitleaksignore +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/.python-version +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/.vale.ini +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/Dockerfile +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/LICENSE.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/Makefile +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/README.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/.vitepress/theme/custom.css +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/.vitepress/theme/index.js +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/commands/example-uris.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/commands/ingest.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/getting-started/core-concepts.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/getting-started/incremental-loading.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/getting-started/quickstart.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/getting-started/telemetry.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/index.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/media/applovin_max.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/media/athena.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/media/clickhouse_img.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/media/cratedb-source.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/media/freshdesk_ingestion.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/media/gcp_spanner_ingestion.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/media/github.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/media/google_analytics_realtime_report.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/media/googleanalytics.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/media/ingestion_elasticsearch_img.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/media/kinesis.bigquery.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/media/linkedin_ads.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/media/mixpanel_ingestion.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/media/personio.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/media/personio_duckdb.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/media/phantombuster.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/media/pipedrive.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/media/quickbook_ingestion.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/media/sftp.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/media/stripe_postgres.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/media/tiktok.png +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/adjust.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/airtable.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/applovin.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/applovin_max.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/appsflyer.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/appstore.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/asana.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/athena.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/attio.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/bigquery.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/chess.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/clickhouse.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/cratedb.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/csv.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/custom_queries.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/databricks.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/db2.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/duckdb.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/dynamodb.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/elasticsearch.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/facebook-ads.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/frankfurter.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/freshdesk.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/gcs.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/github.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/google-ads.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/google_analytics.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/gorgias.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/gsheets.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/hubspot.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/kafka.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/kinesis.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/klaviyo.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/linkedin_ads.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/mixpanel.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/mongodb.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/mssql.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/mysql.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/notion.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/oracle.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/personio.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/phantombuster.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/pipedrive.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/postgres.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/quickbooks.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/redshift.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/salesforce.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/sap-hana.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/sftp.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/shopify.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/slack.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/smartsheets.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/snowflake.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/solidgate.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/spanner.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/sqlite.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/stripe.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/tiktok-ads.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/supported-sources/zendesk.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/tutorials/load-kinesis-bigquery.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/tutorials/load-personio-duckdb.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/docs/tutorials/load-stripe-postgres.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/conftest.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/main.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/.gitignore +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/adjust/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/adjust/adjust_helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/airtable/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/applovin/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/applovin_max/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/appsflyer/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/appsflyer/client.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/appstore/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/appstore/client.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/appstore/errors.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/appstore/models.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/appstore/resources.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/arrow/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/asana_source/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/asana_source/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/asana_source/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/attio/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/attio/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/chess/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/chess/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/chess/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/collector/spinner.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/destinations.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/dynamodb/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/elasticsearch/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/errors.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/facebook_ads/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/facebook_ads/exceptions.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/facebook_ads/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/facebook_ads/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/facebook_ads/utils.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/filesystem/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/filesystem/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/filesystem/readers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/filters.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/frankfurter/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/frankfurter/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/freshdesk/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/freshdesk/freshdesk_client.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/freshdesk/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/github/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/github/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/github/queries.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/github/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/google_ads/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/google_ads/field.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/google_ads/metrics.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/google_ads/predicates.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/google_ads/reports.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/google_analytics/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/google_analytics/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/google_sheets/README.md +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/google_sheets/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/gorgias/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/gorgias/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/http_client.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/hubspot/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/hubspot/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/hubspot/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/kafka/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/kafka/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/kinesis/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/kinesis/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/klaviyo/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/klaviyo/client.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/klaviyo/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/linkedin_ads/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/linkedin_ads/dimension_time_enum.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/linkedin_ads/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/loader.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/mixpanel/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/mixpanel/client.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/mongodb/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/mongodb/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/notion/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/notion/helpers/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/notion/helpers/client.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/notion/helpers/database.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/notion/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/partition.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/personio/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/personio/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/phantombuster/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/phantombuster/client.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/pipedrive/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/pipedrive/helpers/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/pipedrive/helpers/custom_fields_munger.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/pipedrive/helpers/pages.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/pipedrive/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/pipedrive/typing.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/quickbooks/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/resource.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/salesforce/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/salesforce/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/shopify/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/shopify/exceptions.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/shopify/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/shopify/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/slack/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/slack/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/slack/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/smartsheets/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/solidgate/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/solidgate/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/sql_database/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/sql_database/callbacks.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/stripe_analytics/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/stripe_analytics/helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/stripe_analytics/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/table_definition.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/telemetry/event.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/tiktok_ads/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/tiktok_ads/tiktok_helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/time.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/version.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/zendesk/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/zendesk/helpers/__init__.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/zendesk/helpers/credentials.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/src/zendesk/settings.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/testdata/.gitignore +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/testdata/create_replace.csv +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/testdata/delete_insert_expected.csv +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/testdata/delete_insert_part1.csv +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/testdata/delete_insert_part2.csv +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/testdata/merge_expected.csv +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/testdata/merge_part1.csv +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/testdata/merge_part2.csv +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/ingestr/tests/unit/test_smartsheets.py +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/package-lock.json +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/package.json +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/pyproject.toml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/requirements-dev.txt +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/requirements.in +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/requirements.txt +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/requirements_arm64.txt +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/resources/demo.gif +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/resources/demo.tape +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/resources/ingestr.svg +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/AMPM.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/Acronyms.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/Colons.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/Contractions.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/DateFormat.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/Ellipses.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/EmDash.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/Exclamation.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/FirstPerson.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/Gender.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/GenderBias.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/HeadingPunctuation.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/Headings.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/Latin.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/LyHyphens.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/OptionalPlurals.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/Ordinal.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/OxfordComma.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/Parens.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/Passive.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/Periods.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/Quotes.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/Ranges.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/Semicolons.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/Slang.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/Spacing.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/Spelling.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/Units.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/We.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/Will.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/WordList.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/meta.json +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/Google/vocab.txt +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/bruin/Ingestr.yml +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/styles/config/vocabularies/bruin/accept.txt +0 -0
- {ingestr-0.13.55 → ingestr-0.13.56}/test.env.template +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.56
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -123,6 +123,7 @@ export default defineConfig({
|
|
|
123
123
|
},
|
|
124
124
|
{ text: "Frankfurter", link: "/supported-sources/frankfurter.md" },
|
|
125
125
|
{ text: "Freshdesk", link: "/supported-sources/freshdesk.md" },
|
|
126
|
+
{ text: "Trustpilot", link: "/supported-sources/trustpilot.md" },
|
|
126
127
|
{ text: "Google Cloud Storage (GCS)", link: "/supported-sources/gcs.md" },
|
|
127
128
|
{ text: "Google Analytics", link: "/supported-sources/google_analytics.md" },
|
|
128
129
|
{ text: "Google Ads", link: "/supported-sources/google-ads.md" },
|
|
@@ -134,3 +134,49 @@ The `<file-glob-pattern>` in the `--source-table` argument allows for flexible f
|
|
|
134
134
|
| `bucket/myFolder/**/*.jsonl` | Retrieves all JSONL files recursively from the `myFolder` directory and its subdirectories in `s3://bucket`. |
|
|
135
135
|
| `bucket/myFolder/mySubFolder/users.parquet` | Retrieves the specific `users.parquet` file from the `myFolder/mySubFolder/` path in `s3://bucket`. |
|
|
136
136
|
| `bucket/employees.jsonl` | Retrieves the `employees.jsonl` file located at the root level of the `s3://bucket`. |
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
### Working with compressed files
|
|
140
|
+
|
|
141
|
+
`ingestr` automatically detects and handles gzipped files in your S3 bucket. You can load data from compressed files with the `.gz` extension without any additional configuration.
|
|
142
|
+
|
|
143
|
+
For example, to load data from a gzipped CSV file:
|
|
144
|
+
|
|
145
|
+
```sh
|
|
146
|
+
ingestr ingest \
|
|
147
|
+
--source-uri 's3://?access_key_id=AKC3YOW7E&secret_access_key=XCtkpL5B' \
|
|
148
|
+
--source-table 'my_bucket/logs/event-data.csv.gz' \
|
|
149
|
+
--dest-uri duckdb:///compressed_data.duckdb \
|
|
150
|
+
--dest-table 'logs.events'
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
You can also use glob patterns to load multiple compressed files:
|
|
154
|
+
|
|
155
|
+
```sh
|
|
156
|
+
ingestr ingest \
|
|
157
|
+
--source-uri 's3://?access_key_id=AKC3YOW7E&secret_access_key=XCtkpL5B' \
|
|
158
|
+
--source-table 'my_bucket/logs/**/*.csv.gz' \
|
|
159
|
+
--dest-uri duckdb:///compressed_data.duckdb \
|
|
160
|
+
--dest-table 'logs.events'
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### File type hinting
|
|
164
|
+
|
|
165
|
+
If your files are properly encoded but lack the correct file extension (CSV, JSONL, or Parquet), you can provide a file type hint to inform `ingestr` about the format of the files. This is done by appending a fragment identifier (`#format`) to the end of the path in your `--source-table` parameter.
|
|
166
|
+
|
|
167
|
+
For example, if you have JSONL-formatted log files stored in S3 with a non-standard extension:
|
|
168
|
+
|
|
169
|
+
```
|
|
170
|
+
--source-table "my_bucket/logs/event-data#jsonl"
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
This tells `ingestr` to process the files as JSONL, regardless of their actual extension.
|
|
174
|
+
|
|
175
|
+
Supported format hints include:
|
|
176
|
+
- `#csv` - For comma-separated values files
|
|
177
|
+
- `#jsonl` - For line-delimited JSON files
|
|
178
|
+
- `#parquet` - For Parquet format files
|
|
179
|
+
|
|
180
|
+
::: tip
|
|
181
|
+
File type hinting works with `gzip` compressed files as well.
|
|
182
|
+
:::
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Trustpilot
|
|
2
|
+
|
|
3
|
+
[Trustpilot](https://www.trustpilot.com/) provides a platform for collecting and
|
|
4
|
+
sharing customer reviews.
|
|
5
|
+
|
|
6
|
+
ingestr supports Trustpilot as a source.
|
|
7
|
+
|
|
8
|
+
## URI format
|
|
9
|
+
|
|
10
|
+
The URI format for Trustpilot is:
|
|
11
|
+
|
|
12
|
+
```
|
|
13
|
+
trustpilot://<business_unit_id>?api_key=<api_key>
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
URI parameters:
|
|
17
|
+
- `api_key`: Your Trustpilot API key.
|
|
18
|
+
- `business_unit_id`: Identifier of the business unit whose reviews you want to fetch.
|
|
19
|
+
|
|
20
|
+
## Example usage
|
|
21
|
+
|
|
22
|
+
Assuming your `business_unit_id` is `123` and your API key is `key_abc`, you can ingest reviews into DuckDB using:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
ingestr ingest --source-uri 'trustpilot://123?api_key=key_abc' --source-table 'reviews' --dest-uri duckdb:///trustpilot.duckdb --dest-table 'dest.reviews'
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Tables
|
|
29
|
+
|
|
30
|
+
Currently the Trustpilot source exposes the following table:
|
|
31
|
+
|
|
32
|
+
| Name | Description |
|
|
33
|
+
| ------- | ------------------------------------------- |
|
|
34
|
+
| reviews | Customer reviews for the specified business |
|
|
35
|
+
|
|
@@ -6,6 +6,10 @@ BucketName: TypeAlias = str
|
|
|
6
6
|
FileGlob: TypeAlias = str
|
|
7
7
|
|
|
8
8
|
|
|
9
|
+
class UnsupportedEndpointError(Exception):
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
|
|
9
13
|
def parse_uri(uri: ParseResult, table: str) -> Tuple[BucketName, FileGlob]:
|
|
10
14
|
"""
|
|
11
15
|
parse the URI of a blob storage and
|
|
@@ -50,3 +54,23 @@ def parse_uri(uri: ParseResult, table: str) -> Tuple[BucketName, FileGlob]:
|
|
|
50
54
|
return "", parts[0]
|
|
51
55
|
|
|
52
56
|
return parts[0], parts[1]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def parse_endpoint(path: str) -> str:
|
|
60
|
+
"""
|
|
61
|
+
Parse the endpoint kind from the URI.
|
|
62
|
+
|
|
63
|
+
kind is a file format. one of [csv, jsonl, parquet]
|
|
64
|
+
"""
|
|
65
|
+
file_extension = path.split(".")[-1]
|
|
66
|
+
if file_extension == "gz":
|
|
67
|
+
file_extension = path.split(".")[-2]
|
|
68
|
+
if file_extension == "csv":
|
|
69
|
+
endpoint = "read_csv"
|
|
70
|
+
elif file_extension == "jsonl":
|
|
71
|
+
endpoint = "read_jsonl"
|
|
72
|
+
elif file_extension == "parquet":
|
|
73
|
+
endpoint = "read_parquet"
|
|
74
|
+
else:
|
|
75
|
+
raise UnsupportedEndpointError(f"Unsupported file format: {file_extension}")
|
|
76
|
+
return endpoint
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
version = "v0.13.56"
|
|
@@ -64,6 +64,7 @@ from ingestr.src.sources import (
|
|
|
64
64
|
SqlSource,
|
|
65
65
|
StripeAnalyticsSource,
|
|
66
66
|
TikTokSource,
|
|
67
|
+
TrustpilotSource,
|
|
67
68
|
ZendeskSource,
|
|
68
69
|
)
|
|
69
70
|
|
|
@@ -165,6 +166,7 @@ class SourceDestinationFactory:
|
|
|
165
166
|
"pipedrive": PipedriveSource,
|
|
166
167
|
"frankfurter": FrankfurterSource,
|
|
167
168
|
"freshdesk": FreshdeskSource,
|
|
169
|
+
"trustpilot": TrustpilotSource,
|
|
168
170
|
"phantombuster": PhantombusterSource,
|
|
169
171
|
"elasticsearch": ElasticsearchSource,
|
|
170
172
|
"attio": AttioSource,
|
|
@@ -1362,17 +1362,25 @@ class S3Source:
|
|
|
1362
1362
|
secret=secret_access_key[0],
|
|
1363
1363
|
)
|
|
1364
1364
|
|
|
1365
|
-
|
|
1366
|
-
if
|
|
1367
|
-
endpoint = "
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1365
|
+
endpoint: Optional[str] = None
|
|
1366
|
+
if "#" in table:
|
|
1367
|
+
_, endpoint = table.split("#")
|
|
1368
|
+
if endpoint not in ["csv", "jsonl", "parquet"]:
|
|
1369
|
+
raise ValueError(
|
|
1370
|
+
"S3 Source only supports specific formats files: csv, jsonl, parquet"
|
|
1371
|
+
)
|
|
1372
|
+
endpoint = f"read_{endpoint}"
|
|
1372
1373
|
else:
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1374
|
+
try:
|
|
1375
|
+
endpoint = blob.parse_endpoint(path_to_file)
|
|
1376
|
+
except blob.UnsupportedEndpointError:
|
|
1377
|
+
raise ValueError(
|
|
1378
|
+
"S3 Source only supports specific formats files: csv, jsonl, parquet"
|
|
1379
|
+
)
|
|
1380
|
+
except Exception as e:
|
|
1381
|
+
raise ValueError(
|
|
1382
|
+
f"Failed to parse endpoint from path: {path_to_file}"
|
|
1383
|
+
) from e
|
|
1376
1384
|
|
|
1377
1385
|
from ingestr.src.filesystem import readers
|
|
1378
1386
|
|
|
@@ -1844,17 +1852,16 @@ class GCSSource:
|
|
|
1844
1852
|
token=credentials,
|
|
1845
1853
|
)
|
|
1846
1854
|
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
elif file_extension == "jsonl":
|
|
1851
|
-
endpoint = "read_jsonl"
|
|
1852
|
-
elif file_extension == "parquet":
|
|
1853
|
-
endpoint = "read_parquet"
|
|
1854
|
-
else:
|
|
1855
|
+
try:
|
|
1856
|
+
endpoint = blob.parse_endpoint(path_to_file)
|
|
1857
|
+
except blob.UnsupportedEndpointError:
|
|
1855
1858
|
raise ValueError(
|
|
1856
|
-
"
|
|
1859
|
+
"S3 Source only supports specific formats files: csv, jsonl, parquet"
|
|
1857
1860
|
)
|
|
1861
|
+
except Exception as e:
|
|
1862
|
+
raise ValueError(
|
|
1863
|
+
f"Failed to parse endpoint from path: {path_to_file}"
|
|
1864
|
+
) from e
|
|
1858
1865
|
|
|
1859
1866
|
from ingestr.src.filesystem import readers
|
|
1860
1867
|
|
|
@@ -2392,6 +2399,47 @@ class FreshdeskSource:
|
|
|
2392
2399
|
).with_resources(table)
|
|
2393
2400
|
|
|
2394
2401
|
|
|
2402
|
+
class TrustpilotSource:
|
|
2403
|
+
# trustpilot://<business_unit_id>?api_key=<api_key>
|
|
2404
|
+
def handles_incrementality(self) -> bool:
|
|
2405
|
+
return True
|
|
2406
|
+
|
|
2407
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
2408
|
+
parsed_uri = urlparse(uri)
|
|
2409
|
+
business_unit_id = parsed_uri.netloc
|
|
2410
|
+
params = parse_qs(parsed_uri.query)
|
|
2411
|
+
|
|
2412
|
+
if not business_unit_id:
|
|
2413
|
+
raise MissingValueError("business_unit_id", "Trustpilot")
|
|
2414
|
+
|
|
2415
|
+
api_key = params.get("api_key")
|
|
2416
|
+
if api_key is None:
|
|
2417
|
+
raise MissingValueError("api_key", "Trustpilot")
|
|
2418
|
+
|
|
2419
|
+
start_date = kwargs.get("interval_start")
|
|
2420
|
+
if start_date is None:
|
|
2421
|
+
start_date = ensure_pendulum_datetime("2000-01-01").in_tz("UTC").isoformat()
|
|
2422
|
+
else:
|
|
2423
|
+
start_date = ensure_pendulum_datetime(start_date).in_tz("UTC").isoformat()
|
|
2424
|
+
|
|
2425
|
+
end_date = kwargs.get("interval_end")
|
|
2426
|
+
|
|
2427
|
+
if end_date is not None:
|
|
2428
|
+
end_date = ensure_pendulum_datetime(end_date).in_tz("UTC").isoformat()
|
|
2429
|
+
|
|
2430
|
+
if table not in ["reviews"]:
|
|
2431
|
+
raise UnsupportedResourceError(table, "Trustpilot")
|
|
2432
|
+
|
|
2433
|
+
from ingestr.src.trustpilot import trustpilot_source
|
|
2434
|
+
|
|
2435
|
+
return trustpilot_source(
|
|
2436
|
+
business_unit_id=business_unit_id,
|
|
2437
|
+
api_key=api_key[0],
|
|
2438
|
+
start_date=start_date,
|
|
2439
|
+
end_date=end_date,
|
|
2440
|
+
).with_resources(table)
|
|
2441
|
+
|
|
2442
|
+
|
|
2395
2443
|
class PhantombusterSource:
|
|
2396
2444
|
def handles_incrementality(self) -> bool:
|
|
2397
2445
|
return True
|
|
@@ -2622,18 +2670,15 @@ class SFTPSource:
|
|
|
2622
2670
|
else:
|
|
2623
2671
|
file_glob = f"/{table}"
|
|
2624
2672
|
|
|
2625
|
-
|
|
2626
|
-
|
|
2627
|
-
|
|
2628
|
-
endpoint = "read_csv"
|
|
2629
|
-
elif file_extension == "jsonl":
|
|
2630
|
-
endpoint = "read_jsonl"
|
|
2631
|
-
elif file_extension == "parquet":
|
|
2632
|
-
endpoint = "read_parquet"
|
|
2633
|
-
else:
|
|
2673
|
+
try:
|
|
2674
|
+
endpoint = blob.parse_endpoint(table)
|
|
2675
|
+
except blob.UnsupportedEndpointError:
|
|
2634
2676
|
raise ValueError(
|
|
2635
|
-
"
|
|
2677
|
+
"SFTP Source only supports specific formats files: csv, jsonl, parquet"
|
|
2636
2678
|
)
|
|
2679
|
+
except Exception as e:
|
|
2680
|
+
raise ValueError(f"Failed to parse endpoint from path: {table}") from e
|
|
2681
|
+
|
|
2637
2682
|
from ingestr.src.filesystem import readers
|
|
2638
2683
|
|
|
2639
2684
|
dlt_source_resource = readers(bucket_url, fs, file_glob)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Trustpilot source for ingesting reviews."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Generator, Iterable
|
|
4
|
+
|
|
5
|
+
import dlt
|
|
6
|
+
import pendulum
|
|
7
|
+
from dlt.sources import DltResource
|
|
8
|
+
|
|
9
|
+
from .client import TrustpilotClient
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dlt.source()
|
|
13
|
+
def trustpilot_source(
|
|
14
|
+
business_unit_id: str,
|
|
15
|
+
start_date: str,
|
|
16
|
+
end_date: str | None,
|
|
17
|
+
api_key: str,
|
|
18
|
+
per_page: int = 1000,
|
|
19
|
+
) -> Iterable[DltResource]:
|
|
20
|
+
"""Return resources for Trustpilot."""
|
|
21
|
+
|
|
22
|
+
client = TrustpilotClient(api_key=api_key)
|
|
23
|
+
|
|
24
|
+
@dlt.resource(name="reviews", write_disposition="merge", primary_key="id")
|
|
25
|
+
def reviews(
|
|
26
|
+
dateTime=(
|
|
27
|
+
dlt.sources.incremental(
|
|
28
|
+
"updated_at",
|
|
29
|
+
initial_value=start_date,
|
|
30
|
+
end_value=end_date,
|
|
31
|
+
range_start="closed",
|
|
32
|
+
range_end="closed",
|
|
33
|
+
)
|
|
34
|
+
),
|
|
35
|
+
) -> Generator[Dict[str, Any], None, None]:
|
|
36
|
+
if end_date is None:
|
|
37
|
+
end_dt = pendulum.now(tz="UTC").isoformat()
|
|
38
|
+
else:
|
|
39
|
+
end_dt = dateTime.end_value
|
|
40
|
+
start_dt = dateTime.last_value
|
|
41
|
+
yield from client.paginated_reviews(
|
|
42
|
+
business_unit_id=business_unit_id,
|
|
43
|
+
per_page=per_page,
|
|
44
|
+
updated_since=start_dt,
|
|
45
|
+
end_date=end_dt,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
yield reviews
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Simple Trustpilot API client."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Iterable
|
|
4
|
+
|
|
5
|
+
import pendulum
|
|
6
|
+
from dlt.sources.helpers import requests
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TrustpilotClient:
|
|
10
|
+
"""Client for the Trustpilot public API."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, api_key: str) -> None:
|
|
13
|
+
self.api_key = api_key
|
|
14
|
+
self.base_url = "https://api.trustpilot.com/v1"
|
|
15
|
+
|
|
16
|
+
def _get(self, endpoint: str, params: Dict[str, Any]) -> Dict[str, Any]:
|
|
17
|
+
params = dict(params)
|
|
18
|
+
params["apikey"] = self.api_key
|
|
19
|
+
response = requests.get(f"{self.base_url}{endpoint}", params=params)
|
|
20
|
+
response.raise_for_status()
|
|
21
|
+
return response.json()
|
|
22
|
+
|
|
23
|
+
def paginated_reviews(
|
|
24
|
+
self,
|
|
25
|
+
business_unit_id: str,
|
|
26
|
+
updated_since: str,
|
|
27
|
+
end_date: str,
|
|
28
|
+
per_page: int = 1000,
|
|
29
|
+
) -> Iterable[Dict[str, Any]]:
|
|
30
|
+
page = 1
|
|
31
|
+
while True:
|
|
32
|
+
params: Dict[str, Any] = {"perPage": per_page, "page": page}
|
|
33
|
+
if updated_since:
|
|
34
|
+
params["updatedSince"] = updated_since
|
|
35
|
+
data = self._get(f"/business-units/{business_unit_id}/reviews", params)
|
|
36
|
+
reviews = data.get("reviews", data)
|
|
37
|
+
if not reviews:
|
|
38
|
+
break
|
|
39
|
+
for review in reviews:
|
|
40
|
+
end_date_dt = pendulum.parse(end_date)
|
|
41
|
+
review["updated_at"] = review["updatedAt"]
|
|
42
|
+
review_dt = pendulum.parse(review["updated_at"])
|
|
43
|
+
if review_dt > end_date_dt: # type: ignore
|
|
44
|
+
continue
|
|
45
|
+
yield review
|
|
46
|
+
if len(reviews) < per_page:
|
|
47
|
+
break
|
|
48
|
+
page += 1
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
version = "v0.13.55"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|