ingestr 0.14.93__tar.gz → 0.14.96__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- {ingestr-0.14.93 → ingestr-0.14.96}/PKG-INFO +2 -1
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/.vitepress/config.mjs +2 -0
- ingestr-0.14.96/docs/supported-sources/couchbase.md +136 -0
- ingestr-0.14.96/docs/supported-sources/socrata.md +127 -0
- ingestr-0.14.96/ingestr/src/buildinfo.py +1 -0
- ingestr-0.14.96/ingestr/src/couchbase_source/__init__.py +118 -0
- ingestr-0.14.96/ingestr/src/couchbase_source/helpers.py +135 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/factory.py +4 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/revenuecat/__init__.py +16 -41
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/revenuecat/helpers.py +19 -73
- ingestr-0.14.96/ingestr/src/socrata_source/__init__.py +83 -0
- ingestr-0.14.96/ingestr/src/socrata_source/helpers.py +85 -0
- ingestr-0.14.96/ingestr/src/socrata_source/settings.py +8 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/sources.py +257 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/requirements.in +1 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/requirements.txt +2 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/requirements_arm64.txt +2 -0
- ingestr-0.14.93/ingestr/src/buildinfo.py +0 -1
- {ingestr-0.14.93 → ingestr-0.14.96}/.dlt/config.toml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/.dockerignore +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/.githooks/pre-commit-hook.sh +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/.github/workflows/deploy-docs.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/.github/workflows/release.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/.github/workflows/secrets-scan.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/.github/workflows/tests.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/.gitignore +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/.gitleaksignore +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/.python-version +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/.vale.ini +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/Dockerfile +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/LICENSE.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/Makefile +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/README.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/.vitepress/theme/custom.css +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/.vitepress/theme/index.js +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/commands/example-uris.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/commands/ingest.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/getting-started/core-concepts.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/getting-started/data-masking.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/getting-started/incremental-loading.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/getting-started/quickstart.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/getting-started/telemetry.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/index.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/applovin_max.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/athena.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/clickhouse_img.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/clickup_ingestion.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/cratedb-destination.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/cratedb-source.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/freshdesk_ingestion.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/gcp_spanner_ingestion.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/github.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/google_analytics_realtime_report.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/googleanalytics.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/ingestion_elasticsearch_img.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/kinesis.bigquery.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/linear.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/linkedin_ads.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/mixpanel_ingestion.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/personio.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/personio_duckdb.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/phantombuster.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/pipedrive.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/quickbook_ingestion.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/sftp.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/stripe_postgres.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/tiktok.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/wise_ingestion.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/media/zoom_ingestion.png +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/public/demo.gif +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/adjust.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/airtable.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/allium.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/anthropic.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/applovin.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/applovin_max.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/appsflyer.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/appstore.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/asana.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/athena.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/attio.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/bigquery.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/chess.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/clickhouse.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/clickup.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/cratedb.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/csv.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/custom_queries.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/databricks.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/db2.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/docebo.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/duckdb.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/dynamodb.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/elasticsearch.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/facebook-ads.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/fluxx.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/frankfurter.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/freshdesk.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/fundraiseup.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/gcs.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/github.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/google-ads.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/google_analytics.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/gorgias.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/gsheets.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/http.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/hubspot.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/influxdb.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/intercom.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/isoc-pulse.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/jira.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/kafka.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/kinesis.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/klaviyo.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/linear.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/linkedin_ads.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/mailchimp.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/mixpanel.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/monday.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/mongodb.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/motherduck.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/mssql.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/mysql.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/notion.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/oracle.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/personio.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/phantombuster.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/pinterest.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/pipedrive.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/plusvibeai.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/postgres.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/quickbooks.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/redshift.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/revenuecat.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/s3.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/salesforce.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/sap-hana.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/sftp.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/shopify.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/slack.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/smartsheets.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/snowflake.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/solidgate.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/spanner.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/sqlite.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/stripe.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/tiktok-ads.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/trino.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/trustpilot.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/wise.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/zendesk.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/supported-sources/zoom.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/tutorials/load-kinesis-bigquery.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/tutorials/load-personio-duckdb.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/docs/tutorials/load-stripe-postgres.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/conftest.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/main.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/.gitignore +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/adjust/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/adjust/adjust_helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/airtable/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/allium/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/anthropic/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/anthropic/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/applovin/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/applovin_max/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/appsflyer/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/appsflyer/client.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/appstore/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/appstore/client.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/appstore/errors.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/appstore/models.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/appstore/resources.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/arrow/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/asana_source/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/asana_source/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/asana_source/settings.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/attio/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/attio/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/blob.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/chess/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/chess/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/chess/settings.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/clickup/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/clickup/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/collector/spinner.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/destinations.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/docebo/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/docebo/client.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/docebo/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/dynamodb/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/elasticsearch/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/elasticsearch/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/errors.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/facebook_ads/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/facebook_ads/exceptions.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/facebook_ads/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/facebook_ads/settings.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/facebook_ads/utils.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/filesystem/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/filesystem/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/filesystem/readers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/filters.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/fluxx/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/fluxx/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/frankfurter/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/frankfurter/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/freshdesk/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/freshdesk/freshdesk_client.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/freshdesk/settings.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/fundraiseup/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/fundraiseup/client.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/github/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/github/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/github/queries.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/github/settings.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/google_ads/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/google_ads/field.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/google_ads/metrics.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/google_ads/predicates.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/google_ads/reports.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/google_analytics/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/google_analytics/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/google_sheets/README.md +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/google_sheets/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/gorgias/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/gorgias/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/http/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/http/readers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/http_client.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/hubspot/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/hubspot/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/hubspot/settings.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/influxdb/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/influxdb/client.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/intercom/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/intercom/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/intercom/settings.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/isoc_pulse/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/jira_source/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/jira_source/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/jira_source/settings.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/kafka/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/kafka/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/kinesis/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/kinesis/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/klaviyo/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/klaviyo/client.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/klaviyo/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/linear/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/linear/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/linkedin_ads/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/linkedin_ads/dimension_time_enum.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/linkedin_ads/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/loader.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/mailchimp/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/mailchimp/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/mailchimp/settings.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/masking.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/mixpanel/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/mixpanel/client.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/monday/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/monday/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/monday/settings.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/mongodb/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/mongodb/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/notion/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/notion/helpers/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/notion/helpers/client.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/notion/helpers/database.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/notion/settings.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/partition.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/personio/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/personio/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/phantombuster/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/phantombuster/client.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/pinterest/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/pipedrive/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/pipedrive/helpers/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/pipedrive/helpers/custom_fields_munger.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/pipedrive/helpers/pages.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/pipedrive/settings.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/pipedrive/typing.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/plusvibeai/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/plusvibeai/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/plusvibeai/settings.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/quickbooks/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/resource.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/salesforce/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/salesforce/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/shopify/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/shopify/exceptions.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/shopify/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/shopify/settings.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/slack/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/slack/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/slack/settings.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/smartsheets/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/solidgate/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/solidgate/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/sql_database/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/sql_database/callbacks.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/stripe_analytics/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/stripe_analytics/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/stripe_analytics/settings.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/table_definition.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/telemetry/event.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/tiktok_ads/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/tiktok_ads/tiktok_helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/time.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/trustpilot/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/trustpilot/client.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/version.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/wise/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/wise/client.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/zendesk/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/zendesk/helpers/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/zendesk/helpers/credentials.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/zendesk/settings.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/zoom/__init__.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/src/zoom/helpers.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/testdata/.gitignore +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/testdata/create_replace.csv +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/testdata/delete_insert_expected.csv +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/testdata/delete_insert_part1.csv +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/testdata/delete_insert_part2.csv +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/testdata/merge_expected.csv +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/testdata/merge_part1.csv +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/testdata/merge_part2.csv +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/ingestr/tests/unit/test_smartsheets.py +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/package-lock.json +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/package.json +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/pyproject.toml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/requirements-dev.txt +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/resources/demo.gif +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/resources/demo.tape +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/resources/ingestr.svg +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/AMPM.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/Acronyms.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/Colons.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/Contractions.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/DateFormat.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/Ellipses.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/EmDash.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/Exclamation.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/FirstPerson.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/Gender.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/GenderBias.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/HeadingPunctuation.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/Headings.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/Latin.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/LyHyphens.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/OptionalPlurals.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/Ordinal.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/OxfordComma.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/Parens.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/Passive.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/Periods.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/Quotes.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/Ranges.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/Semicolons.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/Slang.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/Spacing.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/Spelling.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/Units.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/We.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/Will.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/WordList.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/meta.json +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/Google/vocab.txt +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/bruin/Ingestr.yml +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/styles/config/vocabularies/bruin/accept.txt +0 -0
- {ingestr-0.14.93 → ingestr-0.14.96}/test.env.template +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.14.
|
|
3
|
+
Version: 0.14.96
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -39,6 +39,7 @@ Requires-Dist: clickhouse-connect==0.8.14
|
|
|
39
39
|
Requires-Dist: clickhouse-driver==0.2.9
|
|
40
40
|
Requires-Dist: clickhouse-sqlalchemy==0.2.7
|
|
41
41
|
Requires-Dist: confluent-kafka==2.8.0
|
|
42
|
+
Requires-Dist: couchbase==4.3.6
|
|
42
43
|
Requires-Dist: crate==2.0.0
|
|
43
44
|
Requires-Dist: cryptography==44.0.2
|
|
44
45
|
Requires-Dist: curlify==2.2.1
|
|
@@ -82,6 +82,7 @@ j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
|
|
|
82
82
|
{ text: "AWS Athena", link: "/supported-sources/athena.md" },
|
|
83
83
|
{ text: "AWS Redshift", link: "/supported-sources/redshift.md" },
|
|
84
84
|
{ text: "ClickHouse", link: "/supported-sources/clickhouse.md" },
|
|
85
|
+
{ text: "Couchbase", link: "/supported-sources/couchbase.md" },
|
|
85
86
|
{ text: "CrateDB", link: "/supported-sources/cratedb.md" },
|
|
86
87
|
{ text: "Databricks", link: "/supported-sources/databricks.md" },
|
|
87
88
|
{ text: "DuckDB", link: "/supported-sources/duckdb.md" },
|
|
@@ -107,6 +108,7 @@ j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
|
|
|
107
108
|
{ text: "Postgres", link: "/supported-sources/postgres.md" },
|
|
108
109
|
{ text: "SAP Hana", link: "/supported-sources/sap-hana.md" },
|
|
109
110
|
{ text: "Snowflake", link: "/supported-sources/snowflake.md" },
|
|
111
|
+
{ text: "Socrata", link: "/supported-sources/socrata.md" },
|
|
110
112
|
{ text: "SQLite", link: "/supported-sources/sqlite.md" },
|
|
111
113
|
{
|
|
112
114
|
text: "Experimental",
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# Couchbase
|
|
2
|
+
|
|
3
|
+
[Couchbase](https://www.couchbase.com/) is a distributed NoSQL cloud database that delivers unmatched performance, scalability, and flexibility for building modern applications.
|
|
4
|
+
|
|
5
|
+
ingestr supports Couchbase as a source.
|
|
6
|
+
|
|
7
|
+
## URI format
|
|
8
|
+
|
|
9
|
+
### Standard format (without SSL)
|
|
10
|
+
```plaintext
|
|
11
|
+
couchbase://username:password@host
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
### With SSL/TLS enabled
|
|
15
|
+
```plaintext
|
|
16
|
+
couchbase://username:password@host?ssl=true
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
### Including bucket in URI
|
|
20
|
+
```plaintext
|
|
21
|
+
couchbase://username:password@host/bucket
|
|
22
|
+
couchbase://username:password@host/bucket?ssl=true
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
URI parameters:
|
|
26
|
+
- `username`: the username to connect to the Couchbase cluster
|
|
27
|
+
- `password`: the password for the user
|
|
28
|
+
- `host`: the host address of the Couchbase server
|
|
29
|
+
- `bucket`: optional bucket name in the URI path
|
|
30
|
+
- `ssl`: SSL/TLS connection parameter
|
|
31
|
+
- `ssl=true`: Required for Couchbase Capella (cloud) deployments
|
|
32
|
+
- `ssl=false` or omitted: Use for Couchbase Server (self-hosted/on-premises) deployments
|
|
33
|
+
|
|
34
|
+
> [!NOTE]
|
|
35
|
+
> **SSL Parameter Usage:**
|
|
36
|
+
> - Use `ssl=true` when connecting to **Couchbase Capella (cloud)**
|
|
37
|
+
> - Use `ssl=false` or omit the parameter when connecting to **Couchbase Server (self-hosted/on-premises)**
|
|
38
|
+
|
|
39
|
+
The URI structure can be used for connecting to both local/self-hosted Couchbase instances and Couchbase Capella (cloud).
|
|
40
|
+
|
|
41
|
+
## Source table format
|
|
42
|
+
|
|
43
|
+
The `--source-table` option for Couchbase supports two formats depending on whether the bucket is specified in the URI:
|
|
44
|
+
|
|
45
|
+
### When bucket is NOT in URI
|
|
46
|
+
```plaintext
|
|
47
|
+
bucket.scope.collection
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### When bucket IS in URI path
|
|
51
|
+
```plaintext
|
|
52
|
+
scope.collection
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
For default scope and collection, you can use:
|
|
56
|
+
```plaintext
|
|
57
|
+
bucket._default._default
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Using Couchbase as a source
|
|
61
|
+
|
|
62
|
+
### Local/self-hosted Couchbase
|
|
63
|
+
|
|
64
|
+
#### Basic connection without SSL
|
|
65
|
+
```bash
|
|
66
|
+
ingestr ingest \
|
|
67
|
+
--source-uri "couchbase://admin:password123@localhost" \
|
|
68
|
+
--source-table "mybucket.myscope.mycollection" \
|
|
69
|
+
--dest-uri "duckdb:///output.db" \
|
|
70
|
+
--dest-table "main.couchbase_data"
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
#### For Couchbase Capella (Cloud)
|
|
74
|
+
```bash
|
|
75
|
+
ingestr ingest \
|
|
76
|
+
--source-uri "couchbase://admin:password123@localhost?ssl=true" \
|
|
77
|
+
--source-table "mybucket._default._default" \
|
|
78
|
+
--dest-uri "duckdb:///output.db" \
|
|
79
|
+
--dest-table "main.couchbase_data"
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
#### With bucket in URI
|
|
83
|
+
```bash
|
|
84
|
+
ingestr ingest \
|
|
85
|
+
--source-uri "couchbase://admin:password123@localhost/mybucket" \
|
|
86
|
+
--source-table "myscope.mycollection" \
|
|
87
|
+
--dest-uri "duckdb:///output.db" \
|
|
88
|
+
--dest-table "main.couchbase_data"
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Couchbase Capella (Cloud)
|
|
92
|
+
|
|
93
|
+
> [!IMPORTANT]
|
|
94
|
+
> Couchbase Capella (cloud) **requires SSL connections**. You must use `?ssl=true` in your connection URI and prefix the host with `cb.`
|
|
95
|
+
|
|
96
|
+
> [!TIP]
|
|
97
|
+
> You can obtain the connection string for Capella from the SDK connection details in your Couchbase Capella dashboard.
|
|
98
|
+
|
|
99
|
+
Use the `couchbase://` scheme with `ssl=true` parameter. Note the `cb.` prefix in the hostname:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
ingestr ingest \
|
|
103
|
+
--source-uri "couchbase://username:password@cb.xxx.cloud.couchbase.com?ssl=true" \
|
|
104
|
+
--source-table "travel-sample.inventory.airport" \
|
|
105
|
+
--dest-uri "duckdb:///airports.db" \
|
|
106
|
+
--dest-table "main.airports"
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
With bucket in URI for Couchbase Capella
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
ingestr ingest \
|
|
113
|
+
--source-uri "couchbase://username:password@cb.xxx.cloud.couchbase.com/travel-sample?ssl=true" \
|
|
114
|
+
--source-table "inventory.airport" \
|
|
115
|
+
--dest-uri "duckdb:///airports.db" \
|
|
116
|
+
--dest-table "main.airports"
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
### With URL-encoded password
|
|
121
|
+
|
|
122
|
+
> [!IMPORTANT]
|
|
123
|
+
> When using ingestr CLI, passwords containing special characters (`@`, `:`, `/`, `#`, `?`, etc.) **must be URL-encoded** in the connection URI.
|
|
124
|
+
|
|
125
|
+
If your password contains special characters, you need to URL-encode them:
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
ingestr ingest \
|
|
129
|
+
--source-uri "couchbase://admin:MyPass%40123%21@localhost" \
|
|
130
|
+
--source-table "mybucket.myscope.mycollection" \
|
|
131
|
+
--dest-uri "duckdb:///output.db" \
|
|
132
|
+
--dest-table "main.couchbase_data"
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
This example encodes the password `MyPass@123!` as `MyPass%40123%21`.
|
|
136
|
+
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# Socrata
|
|
2
|
+
|
|
3
|
+
[Socrata](https://dev.socrata.com/) is an open data platform used by governments and organizations to publish and share public datasets. The platform powers thousands of open data portals worldwide, including data.gov and many city, state, and federal government sites.
|
|
4
|
+
|
|
5
|
+
`ingestr` allows ingesting data from any Socrata-powered open data portal using the [Socrata Open Data API (SODA)](https://dev.socrata.com/docs/endpoints.html).
|
|
6
|
+
|
|
7
|
+
## URI Format
|
|
8
|
+
|
|
9
|
+
The URI format for Socrata is as follows:
|
|
10
|
+
```
|
|
11
|
+
socrata://?domain=<domain>&dataset_id=<dataset_id>&app_token=<app_token>
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
URI Parameters:
|
|
15
|
+
* `domain`: The Socrata domain (e.g., `data.seattle.gov`, `data.cityofnewyork.us`)
|
|
16
|
+
* `dataset_id`: The dataset identifier (4x4 format like `6udu-fhnu`)
|
|
17
|
+
* `app_token`: Socrata app token for API access (required)
|
|
18
|
+
* `username` (optional): Username for authentication (required for private datasets)
|
|
19
|
+
* `password` (optional): Password for authentication (required for private datasets)
|
|
20
|
+
|
|
21
|
+
## Setting up Socrata Integration
|
|
22
|
+
|
|
23
|
+
### Finding Domain and Dataset ID
|
|
24
|
+
|
|
25
|
+
1. Navigate to any Socrata-powered open data portal
|
|
26
|
+
2. Find the dataset you want to ingest
|
|
27
|
+
3. The domain is the base URL (e.g., `data.seattle.gov`)
|
|
28
|
+
4. The dataset ID is in the URL or API endpoint (e.g., `6udu-fhnu`)
|
|
29
|
+
|
|
30
|
+
Example: For `https://data.seattle.gov/City-Business/City-of-Seattle-Wage-Data/2khk-5ukd`, the domain is `data.seattle.gov` and the dataset ID is `2khk-5ukd`.
|
|
31
|
+
|
|
32
|
+
### Generate an App Token
|
|
33
|
+
|
|
34
|
+
You need to obtain an app token to access the Socrata API.
|
|
35
|
+
|
|
36
|
+
1. Sign up for a free account at the Socrata portal you're using
|
|
37
|
+
2. Navigate to the developer settings or API documentation
|
|
38
|
+
3. Generate an app token
|
|
39
|
+
4. Use this token in the `app_token` parameter
|
|
40
|
+
|
|
41
|
+
### Example: Loading a Public Dataset
|
|
42
|
+
|
|
43
|
+
For this example, we'll load the Seattle City wage data:
|
|
44
|
+
* Domain: `data.seattle.gov`
|
|
45
|
+
* Dataset ID: `2khk-5ukd`
|
|
46
|
+
* App token: `your_app_token_here`
|
|
47
|
+
|
|
48
|
+
We will run `ingestr` to save this data to a [duckdb](https://duckdb.org/) database called `socrata.db` under the table name `public.wage_data`.
|
|
49
|
+
|
|
50
|
+
```sh
|
|
51
|
+
ingestr ingest \
|
|
52
|
+
--source-uri "socrata://?domain=data.seattle.gov&dataset_id=2khk-5ukd&app_token=your_app_token_here" \
|
|
53
|
+
--source-table "dataset" \
|
|
54
|
+
--dest-uri "duckdb:///socrata.db" \
|
|
55
|
+
--dest-table "public.wage_data"
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### Example: Incremental Loading
|
|
59
|
+
|
|
60
|
+
Socrata supports incremental loading using the `:updated_at` system field or any other date/timestamp field in your dataset that gets updated. You must specify the incremental key using the `--incremental-key` flag.
|
|
61
|
+
|
|
62
|
+
First, run an initial load for a specific time range using `:updated_at` as the incremental key:
|
|
63
|
+
|
|
64
|
+
```sh
|
|
65
|
+
ingestr ingest \
|
|
66
|
+
--source-uri "socrata://?domain=data.seattle.gov&dataset_id=2khk-5ukd&app_token=your_app_token_here" \
|
|
67
|
+
--source-table "dataset" \
|
|
68
|
+
--dest-uri "duckdb:///socrata.db" \
|
|
69
|
+
--dest-table "public.wage_data" \
|
|
70
|
+
--incremental-key ":updated_at" \
|
|
71
|
+
--interval-start "2024-01-01" \
|
|
72
|
+
--interval-end "2024-06-30" \
|
|
73
|
+
--incremental-strategy "merge"
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
Now, we will run `ingestr` again without specifying dates to load only new or updated records:
|
|
79
|
+
|
|
80
|
+
```sh
|
|
81
|
+
ingestr ingest \
|
|
82
|
+
--source-uri "socrata://?domain=data.seattle.gov&dataset_id=2khk-5ukd&app_token=your_app_token_here" \
|
|
83
|
+
--source-table "dataset" \
|
|
84
|
+
--dest-uri "duckdb:///socrata.db" \
|
|
85
|
+
--dest-table "public.wage_data" \
|
|
86
|
+
--incremental-key ":updated_at"
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
This will automatically fetch only records that were created or updated after the last ingestion.
|
|
90
|
+
|
|
91
|
+
### Example: Loading Private Datasets
|
|
92
|
+
|
|
93
|
+
For private datasets that require authentication:
|
|
94
|
+
|
|
95
|
+
```sh
|
|
96
|
+
ingestr ingest \
|
|
97
|
+
--source-uri "socrata://?domain=your.domain.com&dataset_id=xxxx-xxxx&app_token=your_token&username=your_username&password=your_password" \
|
|
98
|
+
--source-table "dataset" \
|
|
99
|
+
--dest-uri "duckdb:///socrata.db" \
|
|
100
|
+
--dest-table "public.private_data"
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Tables
|
|
104
|
+
|
|
105
|
+
Socrata source provides a single table called `dataset` that represents the Socrata dataset.
|
|
106
|
+
|
|
107
|
+
| Name | Merge Key | Inc Key | Inc Strategy | Details |
|
|
108
|
+
| --- | --- | --- | --- | --- |
|
|
109
|
+
| `dataset` | `:id` | user-defined | replace/merge | Loads all records from the specified Socrata dataset. Uses `replace` by default, or `merge` when `--incremental-key` is specified |
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
## Troubleshooting
|
|
116
|
+
|
|
117
|
+
### Rate Limit Errors
|
|
118
|
+
If you hit rate limits, register for an app token or reduce the frequency of requests.
|
|
119
|
+
|
|
120
|
+
### Authentication Errors
|
|
121
|
+
For private datasets, ensure username and password are correct and that you have access to the dataset.
|
|
122
|
+
|
|
123
|
+
### Invalid Dataset ID
|
|
124
|
+
Verify the dataset ID is in the correct 4x4 format (e.g., `xxxx-xxxx`) and exists on the specified domain.
|
|
125
|
+
|
|
126
|
+
### Timeout Errors
|
|
127
|
+
For very large datasets, the initial load may take time. Consider using date ranges to break up large loads.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
version = "v0.14.96"
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""Source that loads data from Couchbase buckets, supports incremental loads."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import dlt
|
|
6
|
+
from dlt.sources import DltResource
|
|
7
|
+
|
|
8
|
+
from .helpers import (
|
|
9
|
+
CouchbaseConfiguration,
|
|
10
|
+
client_from_credentials,
|
|
11
|
+
fetch_documents,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dlt.source(max_table_nesting=0)
|
|
16
|
+
def couchbase_source(
|
|
17
|
+
connection_string: str = dlt.secrets.value,
|
|
18
|
+
username: str = dlt.secrets.value,
|
|
19
|
+
password: str = dlt.secrets.value,
|
|
20
|
+
bucket: str = dlt.config.value,
|
|
21
|
+
scope: Optional[str] = dlt.config.value,
|
|
22
|
+
collection: Optional[str] = dlt.config.value,
|
|
23
|
+
incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
|
|
24
|
+
write_disposition: Optional[str] = dlt.config.value,
|
|
25
|
+
limit: Optional[int] = None,
|
|
26
|
+
) -> DltResource:
|
|
27
|
+
"""
|
|
28
|
+
A DLT source which loads data from a Couchbase bucket using Couchbase Python SDK.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
connection_string (str): Couchbase connection string (e.g., 'couchbase://localhost')
|
|
32
|
+
username (str): Couchbase username
|
|
33
|
+
password (str): Couchbase password
|
|
34
|
+
bucket (str): Bucket name to load data from
|
|
35
|
+
scope (Optional[str]): Scope name (defaults to '_default')
|
|
36
|
+
collection (Optional[str]): Collection name (defaults to '_default')
|
|
37
|
+
incremental (Optional[dlt.sources.incremental]): Option to enable incremental loading.
|
|
38
|
+
E.g., `incremental=dlt.sources.incremental('updated_at', pendulum.parse('2022-01-01T00:00:00Z'))`
|
|
39
|
+
write_disposition (str): Write disposition of the resource.
|
|
40
|
+
limit (Optional[int]): The maximum number of documents to load.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
DltResource: A DLT resource for the Couchbase collection.
|
|
44
|
+
"""
|
|
45
|
+
# Set up Couchbase client
|
|
46
|
+
cluster = client_from_credentials(connection_string, username, password)
|
|
47
|
+
|
|
48
|
+
resource_name = f"{bucket}_{scope}_{collection}"
|
|
49
|
+
|
|
50
|
+
return dlt.resource( # type: ignore[call-overload, arg-type]
|
|
51
|
+
fetch_documents,
|
|
52
|
+
name=resource_name,
|
|
53
|
+
primary_key="id",
|
|
54
|
+
write_disposition=write_disposition or "replace",
|
|
55
|
+
spec=CouchbaseConfiguration,
|
|
56
|
+
max_table_nesting=0,
|
|
57
|
+
)(
|
|
58
|
+
cluster=cluster,
|
|
59
|
+
bucket_name=bucket,
|
|
60
|
+
scope_name=scope,
|
|
61
|
+
collection_name=collection,
|
|
62
|
+
incremental=incremental,
|
|
63
|
+
limit=limit,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dlt.resource(
|
|
68
|
+
name=lambda args: f"{args['bucket']}_{args['scope']}_{args['collection']}",
|
|
69
|
+
standalone=True,
|
|
70
|
+
spec=CouchbaseConfiguration, # type: ignore[arg-type]
|
|
71
|
+
)
|
|
72
|
+
def couchbase_collection(
|
|
73
|
+
connection_string: str = dlt.secrets.value,
|
|
74
|
+
username: str = dlt.secrets.value,
|
|
75
|
+
password: str = dlt.secrets.value,
|
|
76
|
+
bucket: str = dlt.config.value,
|
|
77
|
+
scope: Optional[str] = dlt.config.value,
|
|
78
|
+
collection: Optional[str] = dlt.config.value,
|
|
79
|
+
incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
|
|
80
|
+
write_disposition: Optional[str] = dlt.config.value,
|
|
81
|
+
limit: Optional[int] = None,
|
|
82
|
+
chunk_size: Optional[int] = 1000,
|
|
83
|
+
) -> DltResource:
|
|
84
|
+
"""
|
|
85
|
+
A DLT resource which loads a collection from Couchbase.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
connection_string (str): Couchbase connection string (e.g., 'couchbase://localhost')
|
|
89
|
+
username (str): Couchbase username
|
|
90
|
+
password (str): Couchbase password
|
|
91
|
+
bucket (str): Bucket name to load data from
|
|
92
|
+
scope (Optional[str]): Scope name (defaults to '_default')
|
|
93
|
+
collection (Optional[str]): Collection name (defaults to '_default')
|
|
94
|
+
incremental (Optional[dlt.sources.incremental]): Option to enable incremental loading.
|
|
95
|
+
write_disposition (str): Write disposition of the resource.
|
|
96
|
+
limit (Optional[int]): The maximum number of documents to load.
|
|
97
|
+
chunk_size (Optional[int]): The number of documents to load in each batch.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
DltResource: A DLT resource for the Couchbase collection.
|
|
101
|
+
"""
|
|
102
|
+
# Set up Couchbase client
|
|
103
|
+
cluster = client_from_credentials(connection_string, username, password)
|
|
104
|
+
|
|
105
|
+
return dlt.resource( # type: ignore[call-overload]
|
|
106
|
+
fetch_documents,
|
|
107
|
+
name=f"{bucket}_{scope}_{collection}",
|
|
108
|
+
primary_key="id",
|
|
109
|
+
write_disposition=write_disposition or "replace",
|
|
110
|
+
)(
|
|
111
|
+
cluster=cluster,
|
|
112
|
+
bucket_name=bucket,
|
|
113
|
+
scope_name=scope,
|
|
114
|
+
collection_name=collection,
|
|
115
|
+
incremental=incremental,
|
|
116
|
+
limit=limit,
|
|
117
|
+
chunk_size=chunk_size,
|
|
118
|
+
)
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Helper functions for Couchbase source."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime, timedelta
|
|
4
|
+
from typing import Any, Dict, Iterator, Optional
|
|
5
|
+
|
|
6
|
+
import dlt
|
|
7
|
+
from couchbase.auth import PasswordAuthenticator # type: ignore[import-untyped]
|
|
8
|
+
from couchbase.cluster import Cluster # type: ignore[import-untyped]
|
|
9
|
+
from couchbase.options import ( # type: ignore[import-untyped]
|
|
10
|
+
ClusterOptions,
|
|
11
|
+
QueryOptions,
|
|
12
|
+
)
|
|
13
|
+
from dlt.common.configuration import configspec
|
|
14
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@configspec
|
|
18
|
+
class CouchbaseConfiguration:
|
|
19
|
+
"""Configuration for Couchbase source."""
|
|
20
|
+
|
|
21
|
+
connection_string: str = dlt.secrets.value
|
|
22
|
+
username: str = dlt.secrets.value
|
|
23
|
+
password: str = dlt.secrets.value
|
|
24
|
+
bucket: str = dlt.config.value
|
|
25
|
+
scope: Optional[str] = dlt.config.value
|
|
26
|
+
collection: Optional[str] = dlt.config.value
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def client_from_credentials(
|
|
30
|
+
connection_string: str, username: str, password: str
|
|
31
|
+
) -> Cluster:
|
|
32
|
+
"""
|
|
33
|
+
Create a Couchbase cluster client from credentials.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
connection_string: Couchbase connection string
|
|
37
|
+
- Local/self-hosted: 'couchbase://localhost'
|
|
38
|
+
- Capella (cloud): 'couchbases://your-instance.cloud.couchbase.com'
|
|
39
|
+
username: Couchbase username
|
|
40
|
+
password: Couchbase password
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Cluster: Connected Couchbase cluster instance
|
|
44
|
+
"""
|
|
45
|
+
auth = PasswordAuthenticator(username, password)
|
|
46
|
+
options = ClusterOptions(auth)
|
|
47
|
+
|
|
48
|
+
# Apply wan_development profile for Capella (couchbases://) connections
|
|
49
|
+
# This helps avoid latency issues when accessing from different networks
|
|
50
|
+
if connection_string.startswith("couchbases://"):
|
|
51
|
+
options.apply_profile("wan_development")
|
|
52
|
+
|
|
53
|
+
cluster = Cluster(connection_string, options)
|
|
54
|
+
cluster.wait_until_ready(timedelta(seconds=30))
|
|
55
|
+
|
|
56
|
+
return cluster
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def fetch_documents(
|
|
60
|
+
cluster: Cluster,
|
|
61
|
+
bucket_name: str,
|
|
62
|
+
scope_name: str,
|
|
63
|
+
collection_name: str,
|
|
64
|
+
incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
|
|
65
|
+
limit: Optional[int] = None,
|
|
66
|
+
chunk_size: Optional[int] = 1000,
|
|
67
|
+
) -> Iterator[Dict[str, Any]]:
|
|
68
|
+
"""
|
|
69
|
+
Fetch documents from a Couchbase collection using N1QL queries.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
cluster: Couchbase cluster instance
|
|
73
|
+
bucket_name: Name of the bucket
|
|
74
|
+
scope_name: Name of the scope
|
|
75
|
+
collection_name: Name of the collection
|
|
76
|
+
incremental: Incremental loading configuration
|
|
77
|
+
limit: Maximum number of documents to fetch
|
|
78
|
+
chunk_size: Number of documents to fetch per batch
|
|
79
|
+
|
|
80
|
+
Yields:
|
|
81
|
+
Dict[str, Any]: Document data
|
|
82
|
+
"""
|
|
83
|
+
# Build N1QL query with full path
|
|
84
|
+
full_collection_path = f"`{bucket_name}`.`{scope_name}`.`{collection_name}`"
|
|
85
|
+
n1ql_query = f"SELECT META().id as id, c.* FROM {full_collection_path} c"
|
|
86
|
+
|
|
87
|
+
# Add incremental filter if provided
|
|
88
|
+
if incremental and incremental.cursor_path:
|
|
89
|
+
where_clause = f" WHERE {incremental.cursor_path} >= $start_value"
|
|
90
|
+
if incremental.end_value is not None:
|
|
91
|
+
where_clause += f" AND {incremental.cursor_path} < $end_value"
|
|
92
|
+
n1ql_query += where_clause
|
|
93
|
+
|
|
94
|
+
# Add limit if provided
|
|
95
|
+
if limit:
|
|
96
|
+
n1ql_query += f" LIMIT {limit}"
|
|
97
|
+
|
|
98
|
+
# Execute query
|
|
99
|
+
try:
|
|
100
|
+
query_options = QueryOptions()
|
|
101
|
+
|
|
102
|
+
# Add parameters if incremental
|
|
103
|
+
if incremental and incremental.cursor_path:
|
|
104
|
+
named_parameters = {"start_value": incremental.last_value}
|
|
105
|
+
if incremental.end_value is not None:
|
|
106
|
+
named_parameters["end_value"] = incremental.end_value
|
|
107
|
+
query_options = QueryOptions(named_parameters=named_parameters)
|
|
108
|
+
|
|
109
|
+
result = cluster.query(n1ql_query, query_options)
|
|
110
|
+
|
|
111
|
+
# Yield documents
|
|
112
|
+
count = 0
|
|
113
|
+
for row in result:
|
|
114
|
+
doc = dict(row)
|
|
115
|
+
|
|
116
|
+
# Convert datetime fields to proper format
|
|
117
|
+
if (
|
|
118
|
+
incremental
|
|
119
|
+
and incremental.cursor_path
|
|
120
|
+
and incremental.cursor_path in doc
|
|
121
|
+
):
|
|
122
|
+
cursor_value = doc[incremental.cursor_path]
|
|
123
|
+
if isinstance(cursor_value, (str, datetime)):
|
|
124
|
+
doc[incremental.cursor_path] = ensure_pendulum_datetime(
|
|
125
|
+
cursor_value
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
yield doc
|
|
129
|
+
|
|
130
|
+
count += 1
|
|
131
|
+
if limit and count >= limit:
|
|
132
|
+
break
|
|
133
|
+
|
|
134
|
+
except Exception as e:
|
|
135
|
+
raise Exception(f"Error executing Couchbase N1QL query: {str(e)}")
|
|
@@ -39,6 +39,7 @@ from ingestr.src.sources import (
|
|
|
39
39
|
AttioSource,
|
|
40
40
|
ChessSource,
|
|
41
41
|
ClickupSource,
|
|
42
|
+
CouchbaseSource,
|
|
42
43
|
DoceboSource,
|
|
43
44
|
DynamoDBSource,
|
|
44
45
|
ElasticsearchSource,
|
|
@@ -83,6 +84,7 @@ from ingestr.src.sources import (
|
|
|
83
84
|
ShopifySource,
|
|
84
85
|
SlackSource,
|
|
85
86
|
SmartsheetSource,
|
|
87
|
+
SocrataSource,
|
|
86
88
|
SolidgateSource,
|
|
87
89
|
SqlSource,
|
|
88
90
|
StripeAnalyticsSource,
|
|
@@ -160,6 +162,7 @@ class SourceDestinationFactory:
|
|
|
160
162
|
"allium": AlliumSource,
|
|
161
163
|
"anthropic": AnthropicSource,
|
|
162
164
|
"csv": LocalCsvSource,
|
|
165
|
+
"couchbase": CouchbaseSource,
|
|
163
166
|
"docebo": DoceboSource,
|
|
164
167
|
"http": HttpSource,
|
|
165
168
|
"https": HttpSource,
|
|
@@ -216,6 +219,7 @@ class SourceDestinationFactory:
|
|
|
216
219
|
"sftp": SFTPSource,
|
|
217
220
|
"pinterest": PinterestSource,
|
|
218
221
|
"revenuecat": RevenueCatSource,
|
|
222
|
+
"socrata": SocrataSource,
|
|
219
223
|
"zoom": ZoomSource,
|
|
220
224
|
"clickup": ClickupSource,
|
|
221
225
|
"influxdb": InfluxDBSource,
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import asyncio
|
|
2
1
|
from typing import Any, Dict, Iterable, Iterator
|
|
3
2
|
|
|
4
3
|
import aiohttp
|
|
@@ -40,51 +39,26 @@ def revenuecat_source(
|
|
|
40
39
|
yield project
|
|
41
40
|
|
|
42
41
|
@dlt.resource(
|
|
43
|
-
name="
|
|
42
|
+
name="customer_ids",
|
|
43
|
+
write_disposition="replace",
|
|
44
|
+
selected=False,
|
|
45
|
+
parallelized=True,
|
|
44
46
|
)
|
|
45
|
-
def
|
|
46
|
-
"""Get list of customers with nested purchases and subscriptions."""
|
|
47
|
+
def customer_ids():
|
|
47
48
|
if project_id is None:
|
|
48
49
|
raise ValueError("project_id is required for customers resource")
|
|
49
|
-
endpoint = f"/projects/{project_id}/customers"
|
|
50
50
|
|
|
51
|
-
|
|
52
|
-
"""Process a batch of customers with async operations."""
|
|
53
|
-
async with aiohttp.ClientSession() as session:
|
|
54
|
-
tasks = []
|
|
55
|
-
for customer in customer_batch:
|
|
56
|
-
task = process_customer_with_nested_resources_async(
|
|
57
|
-
session, api_key, project_id, customer
|
|
58
|
-
)
|
|
59
|
-
tasks.append(task)
|
|
51
|
+
yield _paginate(api_key, f"/projects/{project_id}/customers")
|
|
60
52
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
if len(current_batch) >= batch_size:
|
|
72
|
-
# Process the batch asynchronously
|
|
73
|
-
processed_customers = asyncio.run(
|
|
74
|
-
process_customer_batch(current_batch)
|
|
75
|
-
)
|
|
76
|
-
for processed_customer in processed_customers:
|
|
77
|
-
yield processed_customer
|
|
78
|
-
current_batch = []
|
|
79
|
-
|
|
80
|
-
# Process any remaining customers in the final batch
|
|
81
|
-
if current_batch:
|
|
82
|
-
processed_customers = asyncio.run(process_customer_batch(current_batch))
|
|
83
|
-
for processed_customer in processed_customers:
|
|
84
|
-
yield processed_customer
|
|
85
|
-
|
|
86
|
-
# Yield each processed customer
|
|
87
|
-
yield from process_customers_sync()
|
|
53
|
+
@dlt.transformer(
|
|
54
|
+
data_from=customer_ids, write_disposition="replace", parallelized=True
|
|
55
|
+
)
|
|
56
|
+
async def customers(customers) -> Iterator[Dict[str, Any]]:
|
|
57
|
+
async with aiohttp.ClientSession() as session:
|
|
58
|
+
for customer in customers:
|
|
59
|
+
yield await process_customer_with_nested_resources_async(
|
|
60
|
+
session, api_key, project_id, customer
|
|
61
|
+
)
|
|
88
62
|
|
|
89
63
|
# Create project-dependent resources dynamically
|
|
90
64
|
project_resources = []
|
|
@@ -103,6 +77,7 @@ def revenuecat_source(
|
|
|
103
77
|
|
|
104
78
|
return [
|
|
105
79
|
projects,
|
|
80
|
+
customer_ids,
|
|
106
81
|
customers,
|
|
107
82
|
*project_resources,
|
|
108
83
|
]
|