ingestr 0.13.77__tar.gz → 0.13.79__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- {ingestr-0.13.77 → ingestr-0.13.79}/.gitignore +2 -1
- {ingestr-0.13.77 → ingestr-0.13.79}/PKG-INFO +6 -1
- {ingestr-0.13.77 → ingestr-0.13.79}/README.md +5 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/.vitepress/config.mjs +1 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/attio.md +4 -1
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/hubspot.md +2 -17
- ingestr-0.13.79/docs/supported-sources/mongodb.md +150 -0
- ingestr-0.13.79/docs/supported-sources/motherduck.md +46 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/main.py +10 -3
- ingestr-0.13.79/ingestr/src/buildinfo.py +1 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/destinations.py +18 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/facebook_ads/__init__.py +0 -1
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/factory.py +5 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/freshdesk/__init__.py +23 -8
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/freshdesk/freshdesk_client.py +16 -5
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/github/__init__.py +5 -3
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/github/helpers.py +1 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/influxdb/__init__.py +1 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/mongodb/__init__.py +3 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/mongodb/helpers.py +184 -9
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/sources.py +203 -24
- {ingestr-0.13.77 → ingestr-0.13.79}/pyproject.toml +3 -1
- ingestr-0.13.77/docs/supported-sources/mongodb.md +0 -24
- ingestr-0.13.77/ingestr/src/buildinfo.py +0 -1
- {ingestr-0.13.77 → ingestr-0.13.79}/.dlt/config.toml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/.dockerignore +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/.githooks/pre-commit-hook.sh +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/.github/workflows/deploy-docs.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/.github/workflows/release.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/.github/workflows/secrets-scan.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/.github/workflows/tests.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/.gitleaksignore +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/.python-version +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/.vale.ini +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/Dockerfile +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/LICENSE.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/Makefile +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/.vitepress/theme/custom.css +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/.vitepress/theme/index.js +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/commands/example-uris.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/commands/ingest.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/getting-started/core-concepts.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/getting-started/incremental-loading.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/getting-started/quickstart.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/getting-started/telemetry.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/index.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/applovin_max.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/athena.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/clickhouse_img.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/clickup_ingestion.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/cratedb-destination.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/cratedb-source.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/freshdesk_ingestion.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/gcp_spanner_ingestion.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/github.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/google_analytics_realtime_report.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/googleanalytics.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/ingestion_elasticsearch_img.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/kinesis.bigquery.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/linear.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/linkedin_ads.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/mixpanel_ingestion.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/personio.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/personio_duckdb.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/phantombuster.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/pipedrive.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/quickbook_ingestion.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/sftp.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/stripe_postgres.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/tiktok.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/media/zoom_ingestion.png +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/adjust.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/airtable.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/applovin.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/applovin_max.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/appsflyer.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/appstore.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/asana.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/athena.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/bigquery.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/chess.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/clickhouse.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/clickup.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/cratedb.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/csv.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/custom_queries.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/databricks.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/db2.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/duckdb.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/dynamodb.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/elasticsearch.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/facebook-ads.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/frankfurter.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/freshdesk.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/gcs.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/github.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/google-ads.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/google_analytics.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/gorgias.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/gsheets.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/influxdb.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/isoc-pulse.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/kafka.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/kinesis.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/klaviyo.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/linear.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/linkedin_ads.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/mixpanel.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/mssql.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/mysql.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/notion.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/oracle.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/personio.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/phantombuster.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/pinterest.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/pipedrive.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/postgres.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/quickbooks.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/redshift.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/s3.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/salesforce.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/sap-hana.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/sftp.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/shopify.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/slack.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/smartsheets.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/snowflake.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/solidgate.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/spanner.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/sqlite.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/stripe.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/tiktok-ads.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/trustpilot.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/zendesk.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/supported-sources/zoom.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/tutorials/load-kinesis-bigquery.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/tutorials/load-personio-duckdb.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/docs/tutorials/load-stripe-postgres.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/conftest.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/.gitignore +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/adjust/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/adjust/adjust_helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/airtable/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/applovin/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/applovin_max/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/appsflyer/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/appsflyer/client.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/appstore/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/appstore/client.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/appstore/errors.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/appstore/models.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/appstore/resources.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/arrow/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/asana_source/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/asana_source/helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/asana_source/settings.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/attio/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/attio/helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/blob.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/chess/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/chess/helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/chess/settings.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/clickup/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/clickup/helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/collector/spinner.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/dynamodb/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/elasticsearch/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/errors.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/facebook_ads/exceptions.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/facebook_ads/helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/facebook_ads/settings.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/facebook_ads/utils.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/filesystem/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/filesystem/helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/filesystem/readers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/filters.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/frankfurter/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/frankfurter/helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/freshdesk/settings.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/github/queries.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/github/settings.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/google_ads/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/google_ads/field.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/google_ads/metrics.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/google_ads/predicates.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/google_ads/reports.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/google_analytics/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/google_analytics/helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/google_sheets/README.md +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/google_sheets/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/gorgias/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/gorgias/helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/http_client.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/hubspot/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/hubspot/helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/hubspot/settings.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/influxdb/client.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/isoc_pulse/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/kafka/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/kafka/helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/kinesis/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/kinesis/helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/klaviyo/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/klaviyo/client.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/klaviyo/helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/linear/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/linear/helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/linkedin_ads/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/linkedin_ads/dimension_time_enum.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/linkedin_ads/helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/loader.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/mixpanel/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/mixpanel/client.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/notion/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/notion/helpers/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/notion/helpers/client.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/notion/helpers/database.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/notion/settings.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/partition.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/personio/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/personio/helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/phantombuster/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/phantombuster/client.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/pinterest/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/pipedrive/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/pipedrive/helpers/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/pipedrive/helpers/custom_fields_munger.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/pipedrive/helpers/pages.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/pipedrive/settings.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/pipedrive/typing.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/quickbooks/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/resource.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/salesforce/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/salesforce/helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/shopify/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/shopify/exceptions.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/shopify/helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/shopify/settings.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/slack/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/slack/helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/slack/settings.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/smartsheets/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/solidgate/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/solidgate/helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/sql_database/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/sql_database/callbacks.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/stripe_analytics/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/stripe_analytics/helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/stripe_analytics/settings.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/table_definition.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/telemetry/event.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/tiktok_ads/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/tiktok_ads/tiktok_helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/time.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/trustpilot/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/trustpilot/client.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/version.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/zendesk/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/zendesk/helpers/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/zendesk/helpers/credentials.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/zendesk/settings.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/zoom/__init__.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/src/zoom/helpers.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/testdata/.gitignore +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/testdata/create_replace.csv +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/testdata/delete_insert_expected.csv +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/testdata/delete_insert_part1.csv +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/testdata/delete_insert_part2.csv +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/testdata/merge_expected.csv +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/testdata/merge_part1.csv +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/testdata/merge_part2.csv +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/ingestr/tests/unit/test_smartsheets.py +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/package-lock.json +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/package.json +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/requirements-dev.txt +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/requirements.in +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/requirements.txt +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/requirements_arm64.txt +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/resources/demo.gif +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/resources/demo.tape +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/resources/ingestr.svg +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/AMPM.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/Acronyms.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/Colons.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/Contractions.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/DateFormat.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/Ellipses.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/EmDash.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/Exclamation.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/FirstPerson.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/Gender.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/GenderBias.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/HeadingPunctuation.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/Headings.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/Latin.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/LyHyphens.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/OptionalPlurals.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/Ordinal.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/OxfordComma.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/Parens.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/Passive.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/Periods.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/Quotes.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/Ranges.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/Semicolons.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/Slang.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/Spacing.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/Spelling.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/Units.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/We.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/Will.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/WordList.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/meta.json +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/Google/vocab.txt +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/bruin/Ingestr.yml +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/styles/config/vocabularies/bruin/accept.txt +0 -0
- {ingestr-0.13.77 → ingestr-0.13.79}/test.env.template +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.79
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -355,6 +355,11 @@ Pull requests are welcome. However, please open an issue first to discuss what y
|
|
|
355
355
|
<td>✅</td>
|
|
356
356
|
<td>❌</td>
|
|
357
357
|
</tr>
|
|
358
|
+
<tr>
|
|
359
|
+
<td>MotherDuck</td>
|
|
360
|
+
<td>✅</td>
|
|
361
|
+
<td>✅</td>
|
|
362
|
+
</tr>
|
|
358
363
|
<tr>
|
|
359
364
|
<td>MySQL</td>
|
|
360
365
|
<td>✅</td>
|
|
@@ -90,6 +90,7 @@ export default defineConfig({
|
|
|
90
90
|
link: "/supported-sources/mssql.md",
|
|
91
91
|
},
|
|
92
92
|
{ text: "MongoDB", link: "/supported-sources/mongodb.md" },
|
|
93
|
+
{ text: "MotherDuck", link: "/supported-sources/motherduck.md" },
|
|
93
94
|
{ text: "MySQL", link: "/supported-sources/mysql.md" },
|
|
94
95
|
{ text: "Oracle", link: "/supported-sources/oracle.md" },
|
|
95
96
|
{ text: "Postgres", link: "/supported-sources/postgres.md" },
|
|
@@ -39,4 +39,7 @@ Attio source supports ingesting the following sources into separate tables:
|
|
|
39
39
|
- `list_entries:{list_id}`: Lists all items in a specific list. For example: `list_entries:8abc-123-456-789d-123`
|
|
40
40
|
- `all_list_entries:{object_api_slug}`: Fetches all the lists for an object, and then fetches all the entries from that list. For eg: Fetches all lists for an object, and then fetches all entries from those lists. For example: `all_list_entries:companies`
|
|
41
41
|
|
|
42
|
-
Use this as `--source-table` parameter in the `ingestr ingest` command.
|
|
42
|
+
Use this as `--source-table` parameter in the `ingestr ingest` command.
|
|
43
|
+
|
|
44
|
+
> [!WARNING]
|
|
45
|
+
> Attio does not support incremental loading, which means ingestr will do a full-refresh.
|
|
@@ -43,22 +43,7 @@ HubSpot source allows ingesting the following sources into separate tables:
|
|
|
43
43
|
|
|
44
44
|
Use these as `--source-table` parameter in the `ingestr ingest` command.
|
|
45
45
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
HubSpot custom objects are supported by using the `custom` table.
|
|
49
|
-
|
|
50
|
-
The format for the custom object is as follows:
|
|
51
|
-
|
|
52
|
-
```plaintext
|
|
53
|
-
custom:<custom_object_name>:<optional associations>
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
For example, to ingest the `course` custom object with the `contacts` and `companies` associations, the format would be:
|
|
57
|
-
|
|
58
|
-
```plaintext
|
|
59
|
-
custom:course:contacts,companies
|
|
60
|
-
```
|
|
61
|
-
|
|
62
|
-
This would pull all the data for the `course` custom object and include the `contacts` and `companies` associations in the resulting table.
|
|
46
|
+
> [!WARNING]
|
|
47
|
+
> Hubspot does not support incremental loading, which means ingestr will do a full-refresh.
|
|
63
48
|
|
|
64
49
|
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# MongoDB
|
|
2
|
+
MongoDB is a popular, open source NoSQL database known for its flexibility, scalability, and wide adoption in a variety of applications.
|
|
3
|
+
|
|
4
|
+
ingestr supports MongoDB as a source.
|
|
5
|
+
|
|
6
|
+
## URI format
|
|
7
|
+
The URI format for MongoDB is as follows:
|
|
8
|
+
|
|
9
|
+
```plaintext
|
|
10
|
+
mongodb://user:password@host:port
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
URI parameters:
|
|
14
|
+
- `user`: the user name to connect to the database
|
|
15
|
+
- `password`: the password for the user
|
|
16
|
+
- `host`: the host address of the database server
|
|
17
|
+
- `port`: the port number the database server is listening on, default is 27017 for MongoDB
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
> [!CAUTION]
|
|
21
|
+
> Do not put the database name at the end of the URI for MongoDB, instead make it a part of `--source-table` option as `database.collection` format.
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
You can read more about MongoDB's connection string format [here](https://docs.mongodb.com/manual/reference/connection-string/).
|
|
25
|
+
|
|
26
|
+
## Source table format
|
|
27
|
+
|
|
28
|
+
The `--source-table` option for MongoDB supports two formats:
|
|
29
|
+
|
|
30
|
+
### Basic format
|
|
31
|
+
```plaintext
|
|
32
|
+
database.collection
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
This performs a simple collection scan, equivalent to `db.collection.find()`.
|
|
36
|
+
|
|
37
|
+
### Custom aggregation format
|
|
38
|
+
```plaintext
|
|
39
|
+
database.collection:[aggregation_pipeline]
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
This allows you to specify a custom MongoDB aggregation pipeline as a JSON array.
|
|
43
|
+
|
|
44
|
+
## Custom aggregations
|
|
45
|
+
|
|
46
|
+
ingestr supports custom MongoDB aggregation pipelines, similar to how SQL sources support custom queries. This allows you to perform complex data transformations, filtering, and projections directly in MongoDB before the data is ingested.
|
|
47
|
+
|
|
48
|
+
### Basic syntax
|
|
49
|
+
|
|
50
|
+
Use the following format for custom aggregations:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
ingestr ingest \
|
|
54
|
+
--source-uri "mongodb://user:password@host:port" \
|
|
55
|
+
--source-table 'database.collection:[{"$match": {...}}, {"$project": {...}}]'
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### Examples
|
|
59
|
+
|
|
60
|
+
#### Simple filtering
|
|
61
|
+
```bash
|
|
62
|
+
ingestr ingest \
|
|
63
|
+
--source-uri "mongodb://localhost:27017" \
|
|
64
|
+
--source-table 'mydb.users:[{"$match": {"status": "active"}}]'
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
#### Complex aggregation with grouping
|
|
68
|
+
```bash
|
|
69
|
+
ingestr ingest \
|
|
70
|
+
--source-uri "mongodb://localhost:27017" \
|
|
71
|
+
--source-table 'mydb.orders:[
|
|
72
|
+
{"$match": {"status": "completed"}},
|
|
73
|
+
{"$group": {
|
|
74
|
+
"_id": "$customer_id",
|
|
75
|
+
"total_orders": {"$sum": 1},
|
|
76
|
+
"total_amount": {"$sum": "$amount"}
|
|
77
|
+
}}
|
|
78
|
+
]'
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
#### Projection and transformation
|
|
82
|
+
```bash
|
|
83
|
+
ingestr ingest \
|
|
84
|
+
--source-uri "mongodb://localhost:27017" \
|
|
85
|
+
--source-table 'mydb.products:[
|
|
86
|
+
{"$project": {
|
|
87
|
+
"name": 1,
|
|
88
|
+
"price": 1,
|
|
89
|
+
"category": 1,
|
|
90
|
+
"price_usd": {"$multiply": ["$price", 1.1]}
|
|
91
|
+
}}
|
|
92
|
+
]'
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Incremental loads with custom aggregations
|
|
96
|
+
|
|
97
|
+
Custom aggregations support incremental loading when combined with the `--incremental-key` option. The incremental key must be included in the projected fields of your aggregation pipeline.
|
|
98
|
+
|
|
99
|
+
#### Using interval placeholders
|
|
100
|
+
|
|
101
|
+
You can use `:interval_start` and `:interval_end` placeholders in your aggregation pipeline, which will be automatically replaced with the actual datetime values during incremental loads:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
ingestr ingest \
|
|
105
|
+
--source-uri "mongodb://localhost:27017" \
|
|
106
|
+
--source-table 'mydb.events:[
|
|
107
|
+
{"$match": {
|
|
108
|
+
"created_at": {
|
|
109
|
+
"$gte": ":interval_start",
|
|
110
|
+
"$lt": ":interval_end"
|
|
111
|
+
}
|
|
112
|
+
}},
|
|
113
|
+
{"$project": {
|
|
114
|
+
"_id": 1,
|
|
115
|
+
"event_type": 1,
|
|
116
|
+
"user_id": 1,
|
|
117
|
+
"created_at": 1
|
|
118
|
+
}}
|
|
119
|
+
]' \
|
|
120
|
+
--incremental-key "created_at"
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
#### Requirements for incremental loads
|
|
124
|
+
|
|
125
|
+
When using incremental loads with custom aggregations:
|
|
126
|
+
|
|
127
|
+
1. **Incremental key projection**: The field specified in `--incremental-key` must be included in your projection
|
|
128
|
+
2. **Datetime type**: The incremental key should be a datetime field
|
|
129
|
+
3. **Pipeline validation**: ingestr validates that your aggregation pipeline properly projects the incremental key
|
|
130
|
+
|
|
131
|
+
### Validation and error handling
|
|
132
|
+
|
|
133
|
+
ingestr performs several validations on custom aggregation pipelines:
|
|
134
|
+
|
|
135
|
+
- **JSON validation**: Ensures the aggregation pipeline is valid JSON
|
|
136
|
+
- **Array format**: Aggregation pipelines must be JSON arrays
|
|
137
|
+
- **Incremental key validation**: When using `--incremental-key`, validates that the key is projected in the pipeline
|
|
138
|
+
- **Clear error messages**: Provides specific error messages for common issues
|
|
139
|
+
|
|
140
|
+
### Limitations
|
|
141
|
+
|
|
142
|
+
- **Parallel loading**: Custom aggregations don't support parallel loading due to MongoDB cursor limitations. The loader automatically falls back to sequential processing.
|
|
143
|
+
- **Arrow format**: When using Arrow data format with custom aggregations, data is converted to Arrow format after loading rather than using native MongoDB Arrow integration.
|
|
144
|
+
|
|
145
|
+
### Performance considerations
|
|
146
|
+
|
|
147
|
+
- Use `$match` stages early in your pipeline to filter data as soon as possible
|
|
148
|
+
- Add appropriate indexes to support your aggregation pipeline
|
|
149
|
+
- Consider using `$limit` to restrict the number of documents processed
|
|
150
|
+
- For large datasets, MongoDB's `allowDiskUse: true` option is automatically enabled for aggregation pipelines
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# MotherDuck
|
|
2
|
+
MotherDuck is a managed cloud service built on DuckDB, designed for fast analytics and data processing in the cloud.
|
|
3
|
+
|
|
4
|
+
ingestr supports MotherDuck as both a source and destination.
|
|
5
|
+
|
|
6
|
+
## URI format
|
|
7
|
+
The URI format for MotherDuck is as follows:
|
|
8
|
+
|
|
9
|
+
```plaintext
|
|
10
|
+
motherduck://<database-name>?token=<your-token>
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Alternatively, you can use the `md://` scheme:
|
|
14
|
+
```plaintext
|
|
15
|
+
md://<database-name>?token=<your-token>
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
URI parameters:
|
|
19
|
+
- `database-name`: the name of your MotherDuck database (optional, can be omitted for default connection)
|
|
20
|
+
- `token`: your MotherDuck authentication token
|
|
21
|
+
|
|
22
|
+
## Authentication
|
|
23
|
+
|
|
24
|
+
### Using Token in URI
|
|
25
|
+
Include the token directly in the URI:
|
|
26
|
+
```plaintext
|
|
27
|
+
md://<database-name>?token=<your-token>
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### Connection without Database Name
|
|
31
|
+
If you want to connect without specifying a specific database:
|
|
32
|
+
```plaintext
|
|
33
|
+
md://?token=<your-token>
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Getting Your Token
|
|
37
|
+
|
|
38
|
+
1. Go to the MotherDuck UI
|
|
39
|
+
2. Click on your organization name in the top left and select "Settings"
|
|
40
|
+
3. Click "+ Create token"
|
|
41
|
+
4. Specify a name for the token
|
|
42
|
+
5. Choose between Read/Write or Read Scaling token type
|
|
43
|
+
6. Set expiration if desired and click "Create token"
|
|
44
|
+
7. Copy the generated token
|
|
45
|
+
|
|
46
|
+
The same URI structure can be used both for sources and destinations. You can read more about MotherDuck's connection options in their [official documentation](https://motherduck.com/docs/key-tasks/authenticating-and-connecting-to-motherduck/).
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import warnings
|
|
1
2
|
from datetime import datetime
|
|
2
3
|
from enum import Enum
|
|
3
4
|
from typing import Optional
|
|
@@ -8,6 +9,14 @@ from typing_extensions import Annotated
|
|
|
8
9
|
|
|
9
10
|
from ingestr.src.telemetry.event import track
|
|
10
11
|
|
|
12
|
+
try:
|
|
13
|
+
from duckdb_engine import DuckDBEngineWarning
|
|
14
|
+
|
|
15
|
+
warnings.filterwarnings("ignore", category=DuckDBEngineWarning)
|
|
16
|
+
except ImportError:
|
|
17
|
+
# duckdb-engine not installed
|
|
18
|
+
pass
|
|
19
|
+
|
|
11
20
|
app = typer.Typer(
|
|
12
21
|
name="ingestr",
|
|
13
22
|
help="ingestr is the CLI tool to ingest data from one source to another",
|
|
@@ -506,7 +515,6 @@ def ingest(
|
|
|
506
515
|
|
|
507
516
|
if factory.source_scheme == "sqlite":
|
|
508
517
|
source_table = "main." + source_table.split(".")[-1]
|
|
509
|
-
|
|
510
518
|
|
|
511
519
|
if (
|
|
512
520
|
incremental_key
|
|
@@ -600,10 +608,9 @@ def ingest(
|
|
|
600
608
|
if factory.source_scheme == "influxdb":
|
|
601
609
|
if primary_key:
|
|
602
610
|
write_disposition = "merge"
|
|
603
|
-
|
|
604
611
|
|
|
605
612
|
start_time = datetime.now()
|
|
606
|
-
|
|
613
|
+
|
|
607
614
|
run_info: LoadInfo = pipeline.run(
|
|
608
615
|
dlt_source,
|
|
609
616
|
**destination.dlt_run_params(
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
version = "v0.13.79"
|
|
@@ -147,6 +147,24 @@ class DuckDBDestination(GenericSqlDestination):
|
|
|
147
147
|
return dlt.destinations.duckdb(uri, **kwargs)
|
|
148
148
|
|
|
149
149
|
|
|
150
|
+
class MotherduckDestination(GenericSqlDestination):
|
|
151
|
+
def dlt_dest(self, uri: str, **kwargs):
|
|
152
|
+
from urllib.parse import parse_qs, urlparse
|
|
153
|
+
|
|
154
|
+
parsed = urlparse(uri)
|
|
155
|
+
query = parse_qs(parsed.query)
|
|
156
|
+
token = query.get("token", [None])[0]
|
|
157
|
+
from dlt.destinations.impl.motherduck.configuration import MotherDuckCredentials
|
|
158
|
+
|
|
159
|
+
creds = {
|
|
160
|
+
"password": token,
|
|
161
|
+
}
|
|
162
|
+
if parsed.path.lstrip("/"):
|
|
163
|
+
creds["database"] = parsed.path.lstrip("/")
|
|
164
|
+
|
|
165
|
+
return dlt.destinations.motherduck(MotherDuckCredentials(creds), **kwargs)
|
|
166
|
+
|
|
167
|
+
|
|
150
168
|
def handle_datetimeoffset(dto_value: bytes) -> datetime.datetime:
|
|
151
169
|
# ref: https://github.com/mkleehammer/pyodbc/issues/134#issuecomment-281739794
|
|
152
170
|
tup = struct.unpack(
|
|
@@ -12,6 +12,7 @@ from ingestr.src.destinations import (
|
|
|
12
12
|
DatabricksDestination,
|
|
13
13
|
DuckDBDestination,
|
|
14
14
|
GCSDestination,
|
|
15
|
+
MotherduckDestination,
|
|
15
16
|
MsSQLDestination,
|
|
16
17
|
MySqlDestination,
|
|
17
18
|
PostgresDestination,
|
|
@@ -85,6 +86,8 @@ SQL_SOURCE_SCHEMES = [
|
|
|
85
86
|
"mysql",
|
|
86
87
|
"mysql+pymysql",
|
|
87
88
|
"mysql+mysqlconnector",
|
|
89
|
+
"md",
|
|
90
|
+
"motherduck",
|
|
88
91
|
"postgres",
|
|
89
92
|
"postgresql",
|
|
90
93
|
"postgresql+psycopg2",
|
|
@@ -195,6 +198,8 @@ class SourceDestinationFactory:
|
|
|
195
198
|
"cratedb": CrateDBDestination,
|
|
196
199
|
"databricks": DatabricksDestination,
|
|
197
200
|
"duckdb": DuckDBDestination,
|
|
201
|
+
"motherduck": MotherduckDestination,
|
|
202
|
+
"md": MotherduckDestination,
|
|
198
203
|
"mssql": MsSQLDestination,
|
|
199
204
|
"postgres": PostgresDestination,
|
|
200
205
|
"postgresql": PostgresDestination,
|
|
@@ -4,6 +4,8 @@ etc. to the database"""
|
|
|
4
4
|
from typing import Any, Dict, Generator, Iterable, List, Optional
|
|
5
5
|
|
|
6
6
|
import dlt
|
|
7
|
+
import pendulum
|
|
8
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
7
9
|
from dlt.sources import DltResource
|
|
8
10
|
|
|
9
11
|
from .freshdesk_client import FreshdeskClient
|
|
@@ -12,10 +14,12 @@ from .settings import DEFAULT_ENDPOINTS
|
|
|
12
14
|
|
|
13
15
|
@dlt.source()
|
|
14
16
|
def freshdesk_source(
|
|
15
|
-
|
|
17
|
+
domain: str,
|
|
18
|
+
api_secret_key: str,
|
|
19
|
+
start_date: pendulum.DateTime,
|
|
20
|
+
end_date: Optional[pendulum.DateTime] = None,
|
|
16
21
|
per_page: int = 100,
|
|
17
|
-
|
|
18
|
-
api_secret_key: str = dlt.secrets.value,
|
|
22
|
+
endpoints: Optional[List[str]] = None,
|
|
19
23
|
) -> Iterable[DltResource]:
|
|
20
24
|
"""
|
|
21
25
|
Retrieves data from specified Freshdesk API endpoints.
|
|
@@ -39,7 +43,11 @@ def freshdesk_source(
|
|
|
39
43
|
def incremental_resource(
|
|
40
44
|
endpoint: str,
|
|
41
45
|
updated_at: Optional[Any] = dlt.sources.incremental(
|
|
42
|
-
"updated_at",
|
|
46
|
+
"updated_at",
|
|
47
|
+
initial_value=start_date.isoformat(),
|
|
48
|
+
end_value=end_date.isoformat() if end_date else None,
|
|
49
|
+
range_start="closed",
|
|
50
|
+
range_end="closed",
|
|
43
51
|
),
|
|
44
52
|
) -> Generator[Dict[Any, Any], Any, None]:
|
|
45
53
|
"""
|
|
@@ -48,15 +56,22 @@ def freshdesk_source(
|
|
|
48
56
|
to ensure incremental loading.
|
|
49
57
|
"""
|
|
50
58
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
59
|
+
if updated_at.last_value is not None:
|
|
60
|
+
start_date = ensure_pendulum_datetime(updated_at.last_value)
|
|
61
|
+
else:
|
|
62
|
+
start_date = start_date
|
|
63
|
+
|
|
64
|
+
if updated_at.end_value is not None:
|
|
65
|
+
end_date = ensure_pendulum_datetime(updated_at.end_value)
|
|
66
|
+
else:
|
|
67
|
+
end_date = pendulum.now(tz="UTC")
|
|
54
68
|
|
|
55
69
|
# Use the FreshdeskClient instance to fetch paginated responses
|
|
56
70
|
yield from freshdesk.paginated_response(
|
|
57
71
|
endpoint=endpoint,
|
|
58
72
|
per_page=per_page,
|
|
59
|
-
|
|
73
|
+
start_date=start_date,
|
|
74
|
+
end_date=end_date,
|
|
60
75
|
)
|
|
61
76
|
|
|
62
77
|
# Set default endpoints if not provided
|
|
@@ -2,8 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import time
|
|
5
|
-
from typing import Any, Dict, Iterable
|
|
5
|
+
from typing import Any, Dict, Iterable
|
|
6
6
|
|
|
7
|
+
import pendulum
|
|
7
8
|
from dlt.common.typing import TDataItem
|
|
8
9
|
from dlt.sources.helpers import requests
|
|
9
10
|
|
|
@@ -67,7 +68,8 @@ class FreshdeskClient:
|
|
|
67
68
|
self,
|
|
68
69
|
endpoint: str,
|
|
69
70
|
per_page: int,
|
|
70
|
-
|
|
71
|
+
start_date: pendulum.DateTime,
|
|
72
|
+
end_date: pendulum.DateTime,
|
|
71
73
|
) -> Iterable[TDataItem]:
|
|
72
74
|
"""
|
|
73
75
|
Fetches a paginated response from a specified endpoint.
|
|
@@ -88,8 +90,8 @@ class FreshdeskClient:
|
|
|
88
90
|
param_key = (
|
|
89
91
|
"updated_since" if endpoint == "tickets" else "_updated_since"
|
|
90
92
|
)
|
|
91
|
-
|
|
92
|
-
|
|
93
|
+
|
|
94
|
+
params[param_key] = start_date.to_iso8601_string()
|
|
93
95
|
|
|
94
96
|
# Handle requests with rate-limiting
|
|
95
97
|
# A maximum of 300 pages (30000 tickets) will be returned.
|
|
@@ -98,5 +100,14 @@ class FreshdeskClient:
|
|
|
98
100
|
|
|
99
101
|
if not data:
|
|
100
102
|
break # Stop if no data or max page limit reached
|
|
101
|
-
|
|
103
|
+
|
|
104
|
+
filtered_data = [
|
|
105
|
+
item
|
|
106
|
+
for item in data
|
|
107
|
+
if "updated_at" in item
|
|
108
|
+
and pendulum.parse(item["updated_at"]) <= end_date
|
|
109
|
+
]
|
|
110
|
+
if not filtered_data:
|
|
111
|
+
break
|
|
112
|
+
yield filtered_data
|
|
102
113
|
page += 1
|
|
@@ -91,7 +91,9 @@ def github_repo_events(
|
|
|
91
91
|
"""
|
|
92
92
|
|
|
93
93
|
# use naming function in table name to generate separate tables for each event
|
|
94
|
-
@dlt.resource(
|
|
94
|
+
@dlt.resource(
|
|
95
|
+
primary_key="id", table_name=lambda i: i["type"], write_disposition="merge"
|
|
96
|
+
)
|
|
95
97
|
def repo_events(
|
|
96
98
|
last_created_at: dlt.sources.incremental[str] = dlt.sources.incremental(
|
|
97
99
|
"created_at",
|
|
@@ -105,7 +107,7 @@ def github_repo_events(
|
|
|
105
107
|
repos_path = (
|
|
106
108
|
f"/repos/{urllib.parse.quote(owner)}/{urllib.parse.quote(name)}/events"
|
|
107
109
|
)
|
|
108
|
-
|
|
110
|
+
|
|
109
111
|
# Get the date range from the incremental state
|
|
110
112
|
start_filter = pendulum.parse(
|
|
111
113
|
last_created_at.last_value or last_created_at.initial_value
|
|
@@ -115,7 +117,7 @@ def github_repo_events(
|
|
|
115
117
|
if last_created_at.end_value
|
|
116
118
|
else pendulum.now()
|
|
117
119
|
)
|
|
118
|
-
|
|
120
|
+
|
|
119
121
|
for page in get_rest_pages(access_token, repos_path + "?per_page=100"):
|
|
120
122
|
# Filter events by date range
|
|
121
123
|
filtered_events = []
|
|
@@ -106,6 +106,7 @@ def mongodb_collection(
|
|
|
106
106
|
filter_: Optional[Dict[str, Any]] = None,
|
|
107
107
|
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = dlt.config.value,
|
|
108
108
|
pymongoarrow_schema: Optional[Any] = None,
|
|
109
|
+
custom_query: Optional[List[Dict[str, Any]]] = None,
|
|
109
110
|
) -> Any:
|
|
110
111
|
"""
|
|
111
112
|
A DLT source which loads a collection from a mongo database using PyMongo.
|
|
@@ -132,6 +133,7 @@ def mongodb_collection(
|
|
|
132
133
|
exclude (dict) - {"released": False, "runtime": False}
|
|
133
134
|
Note: Can't mix include and exclude statements '{"title": True, "released": False}`
|
|
134
135
|
pymongoarrow_schema (pymongoarrow.schema.Schema): Mapping of expected field types to convert BSON to Arrow
|
|
136
|
+
custom_query (Optional[List[Dict[str, Any]]]): Custom MongoDB aggregation pipeline to execute instead of find()
|
|
135
137
|
|
|
136
138
|
Returns:
|
|
137
139
|
Iterable[DltResource]: A list of DLT resources for each collection to be loaded.
|
|
@@ -161,4 +163,5 @@ def mongodb_collection(
|
|
|
161
163
|
filter_=filter_ or {},
|
|
162
164
|
projection=projection,
|
|
163
165
|
pymongoarrow_schema=pymongoarrow_schema,
|
|
166
|
+
custom_query=custom_query,
|
|
164
167
|
)
|