ingestr 0.13.41__tar.gz → 0.13.43__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- {ingestr-0.13.41 → ingestr-0.13.43}/Makefile +1 -1
- {ingestr-0.13.41 → ingestr-0.13.43}/PKG-INFO +1 -1
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/.vitepress/config.mjs +1 -0
- ingestr-0.13.43/docs/supported-sources/attio.md +42 -0
- ingestr-0.13.43/ingestr/src/attio/__init__.py +100 -0
- ingestr-0.13.43/ingestr/src/attio/helpers.py +54 -0
- ingestr-0.13.43/ingestr/src/buildinfo.py +1 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/factory.py +3 -0
- ingestr-0.13.43/ingestr/src/http_client.py +17 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/mongodb/__init__.py +66 -6
- ingestr-0.13.43/ingestr/src/mongodb/helpers.py +669 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/sources.py +27 -0
- ingestr-0.13.41/ingestr/src/buildinfo.py +0 -1
- ingestr-0.13.41/ingestr/src/mongodb/helpers.py +0 -166
- {ingestr-0.13.41 → ingestr-0.13.43}/.dockerignore +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/.githooks/pre-commit-hook.sh +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/.github/workflows/deploy-docs.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/.github/workflows/release.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/.github/workflows/secrets-scan.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/.github/workflows/tests.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/.gitignore +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/.gitleaksignore +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/.python-version +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/.vale.ini +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/Dockerfile +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/LICENSE.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/README.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/.vitepress/theme/custom.css +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/.vitepress/theme/index.js +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/commands/example-uris.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/commands/ingest.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/getting-started/core-concepts.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/getting-started/incremental-loading.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/getting-started/quickstart.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/getting-started/telemetry.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/index.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/media/applovin_max.png +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/media/athena.png +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/media/clickhouse_img.png +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/media/freshdesk_ingestion.png +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/media/github.png +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/media/google_analytics_realtime_report.png +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/media/googleanalytics.png +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/media/ingestion_elasticsearch_img.png +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/media/kinesis.bigquery.png +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/media/linkedin_ads.png +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/media/personio.png +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/media/personio_duckdb.png +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/media/phantombuster.png +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/media/pipedrive.png +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/media/spanner_ingestion.png +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/media/stripe_postgres.png +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/media/tiktok.png +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/adjust.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/airtable.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/applovin.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/applovin_max.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/appsflyer.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/appstore.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/asana.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/athena.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/bigquery.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/chess.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/clickhouse.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/csv.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/custom_queries.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/databricks.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/db2.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/duckdb.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/dynamodb.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/elasticsearch.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/facebook-ads.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/frankfurter.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/freshdesk.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/gcs.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/github.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/google-ads.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/google_analytics.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/gorgias.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/gsheets.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/hubspot.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/kafka.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/kinesis.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/klaviyo.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/linkedin_ads.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/mongodb.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/mssql.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/mysql.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/notion.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/oracle.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/personio.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/phantombuster.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/pipedrive.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/postgres.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/redshift.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/s3.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/salesforce.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/sap-hana.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/shopify.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/slack.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/snowflake.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/spanner.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/sqlite.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/stripe.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/tiktok-ads.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/supported-sources/zendesk.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/tutorials/load-kinesis-bigquery.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/tutorials/load-personio-duckdb.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/docs/tutorials/load-stripe-postgres.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/conftest.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/main.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/.gitignore +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/adjust/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/adjust/adjust_helpers.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/airtable/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/applovin/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/applovin_max/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/appsflyer/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/appsflyer/client.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/appstore/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/appstore/client.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/appstore/errors.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/appstore/models.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/appstore/resources.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/arrow/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/asana_source/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/asana_source/helpers.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/asana_source/settings.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/blob.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/chess/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/chess/helpers.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/chess/settings.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/collector/spinner.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/destinations.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/dynamodb/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/elasticsearch/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/errors.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/facebook_ads/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/facebook_ads/exceptions.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/facebook_ads/helpers.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/facebook_ads/settings.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/filesystem/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/filesystem/helpers.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/filesystem/readers.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/filters.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/frankfurter/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/frankfurter/helpers.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/freshdesk/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/freshdesk/freshdesk_client.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/freshdesk/settings.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/github/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/github/helpers.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/github/queries.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/github/settings.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/google_ads/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/google_ads/field.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/google_ads/metrics.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/google_ads/predicates.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/google_ads/reports.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/google_analytics/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/google_analytics/helpers.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/google_sheets/README.md +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/google_sheets/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/gorgias/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/gorgias/helpers.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/hubspot/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/hubspot/helpers.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/hubspot/settings.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/kafka/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/kafka/helpers.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/kinesis/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/kinesis/helpers.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/klaviyo/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/klaviyo/client.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/klaviyo/helpers.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/linkedin_ads/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/linkedin_ads/dimension_time_enum.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/linkedin_ads/helpers.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/loader.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/notion/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/notion/helpers/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/notion/helpers/client.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/notion/helpers/database.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/notion/settings.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/partition.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/personio/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/personio/helpers.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/phantombuster/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/phantombuster/client.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/pipedrive/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/pipedrive/helpers/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/pipedrive/helpers/custom_fields_munger.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/pipedrive/helpers/pages.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/pipedrive/settings.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/pipedrive/typing.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/resource.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/salesforce/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/salesforce/helpers.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/shopify/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/shopify/exceptions.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/shopify/helpers.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/shopify/settings.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/slack/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/slack/helpers.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/slack/settings.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/sql_database/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/sql_database/callbacks.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/stripe_analytics/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/stripe_analytics/helpers.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/stripe_analytics/settings.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/table_definition.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/telemetry/event.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/tiktok_ads/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/tiktok_ads/tiktok_helpers.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/time.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/version.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/zendesk/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/zendesk/helpers/__init__.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/zendesk/helpers/credentials.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/src/zendesk/settings.py +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/testdata/.gitignore +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/testdata/create_replace.csv +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/testdata/delete_insert_expected.csv +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/testdata/delete_insert_part1.csv +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/testdata/delete_insert_part2.csv +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/testdata/merge_expected.csv +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/testdata/merge_part1.csv +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/ingestr/testdata/merge_part2.csv +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/package-lock.json +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/package.json +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/pyproject.toml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/requirements-dev.txt +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/requirements.in +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/requirements.txt +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/requirements_arm64.txt +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/resources/demo.gif +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/resources/demo.tape +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/resources/ingestr.svg +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/AMPM.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/Acronyms.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/Colons.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/Contractions.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/DateFormat.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/Ellipses.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/EmDash.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/Exclamation.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/FirstPerson.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/Gender.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/GenderBias.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/HeadingPunctuation.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/Headings.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/Latin.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/LyHyphens.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/OptionalPlurals.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/Ordinal.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/OxfordComma.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/Parens.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/Passive.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/Periods.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/Quotes.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/Ranges.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/Semicolons.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/Slang.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/Spacing.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/Spelling.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/Units.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/We.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/Will.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/WordList.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/meta.json +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/Google/vocab.txt +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/bruin/Ingestr.yml +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/styles/config/vocabularies/bruin/accept.txt +0 -0
- {ingestr-0.13.41 → ingestr-0.13.43}/test.env.template +0 -0
|
@@ -8,7 +8,7 @@ venv: venv/touchfile
|
|
|
8
8
|
|
|
9
9
|
venv/touchfile: requirements-dev.txt requirements.txt
|
|
10
10
|
test -d venv || python3 -m venv venv
|
|
11
|
-
. venv/bin/activate; pip install uv; $(MAKE) deps
|
|
11
|
+
. venv/bin/activate; pip install --disable-pip-version-check uv; $(MAKE) deps
|
|
12
12
|
touch venv/touchfile
|
|
13
13
|
|
|
14
14
|
lock-deps:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.43
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -114,6 +114,7 @@ export default defineConfig({
|
|
|
114
114
|
{ text: "Applovin", link: "/supported-sources/applovin.md"},
|
|
115
115
|
{ text: "Applovin Max", link: "/supported-sources/applovin_max.md"},
|
|
116
116
|
{ text: "Asana", link: "/supported-sources/asana.md" },
|
|
117
|
+
{ text: "Attio", link: "/supported-sources/attio.md" },
|
|
117
118
|
{ text: "Chess.com", link: "/supported-sources/chess.md" },
|
|
118
119
|
{
|
|
119
120
|
text: "Facebook Ads",
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Attio
|
|
2
|
+
[Attio](https://attio.com/) is an AI-native CRM platform that helps companies build, scale, and grow their business.
|
|
3
|
+
|
|
4
|
+
ingestr supports Attio as a source.
|
|
5
|
+
|
|
6
|
+
## URI format
|
|
7
|
+
|
|
8
|
+
The URI format for Attio is as follows:
|
|
9
|
+
|
|
10
|
+
```plaintext
|
|
11
|
+
attio://?api_key=<api_key>
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
URI parameters:
|
|
15
|
+
- `api_key`: the API key used for authentication with the Attio API
|
|
16
|
+
|
|
17
|
+
## Setting up a Attio Integration
|
|
18
|
+
|
|
19
|
+
You can find your Attio API key by following the guide [here](https://attio.com/help/apps/other-apps/generating-an-api-key).
|
|
20
|
+
|
|
21
|
+
Let's say your `api_key` is key_123, here's a sample command that will copy the data from Attio into a DuckDB database:
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
ingestr ingest \
|
|
26
|
+
--source-uri 'Attio://?api_key=key_123' \
|
|
27
|
+
--source-table 'objects' \
|
|
28
|
+
--dest-uri duckdb:///attio.duckdb \
|
|
29
|
+
--dest-table 'dest.objects'
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Tables
|
|
33
|
+
|
|
34
|
+
Attio source supports ingesting the following sources into separate tables:
|
|
35
|
+
|
|
36
|
+
- `objects`: Objects are the data types used to store facts about your customers. Fetches all objects.
|
|
37
|
+
- `records:{object_api_slug}`: Fetches all records of an object. For example: `records:companies`
|
|
38
|
+
- `lists`: Fetches all lists
|
|
39
|
+
- `list_entries:{list_id}`: Lists all items in a specific list. For example: `list_entries:8abc-123-456-789d-123`
|
|
40
|
+
- `all_list_entries:{object_api_slug}`: Fetches all the lists for an object, and then fetches all the entries from that list. For eg: Fetches all lists for an object, and then fetches all entries from those lists. For example: `all_list_entries:companies`
|
|
41
|
+
|
|
42
|
+
Use this as `--source-table` parameter in the `ingestr ingest` command.
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
from typing import Iterable, Iterator
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
from dlt.sources import DltResource
|
|
5
|
+
|
|
6
|
+
from .helpers import AttioClient
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dlt.source(max_table_nesting=0)
|
|
10
|
+
def attio_source(
|
|
11
|
+
api_key: str,
|
|
12
|
+
params: list[str],
|
|
13
|
+
) -> Iterable[DltResource]:
|
|
14
|
+
|
|
15
|
+
attio_client = AttioClient(api_key)
|
|
16
|
+
|
|
17
|
+
@dlt.resource(
|
|
18
|
+
name="objects",
|
|
19
|
+
write_disposition="replace",
|
|
20
|
+
columns={
|
|
21
|
+
"created_at": {"data_type": "timestamp", "partition": True},
|
|
22
|
+
},
|
|
23
|
+
)
|
|
24
|
+
def fetch_objects() -> Iterator[dict]:
|
|
25
|
+
if len(params) != 0:
|
|
26
|
+
raise ValueError("Objects table must be in the format `objects`")
|
|
27
|
+
|
|
28
|
+
path = "objects"
|
|
29
|
+
yield attio_client.fetch_data(path, "get")
|
|
30
|
+
|
|
31
|
+
@dlt.resource(
|
|
32
|
+
name="records",
|
|
33
|
+
write_disposition="replace",
|
|
34
|
+
columns={
|
|
35
|
+
"created_at": {"data_type": "timestamp", "partition": True},
|
|
36
|
+
},
|
|
37
|
+
)
|
|
38
|
+
def fetch_records() -> Iterator[dict]:
|
|
39
|
+
if len(params) != 1:
|
|
40
|
+
raise ValueError(
|
|
41
|
+
"Records table must be in the format `records:{object_api_slug}`"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
object_id = params[0]
|
|
45
|
+
path = f"objects/{object_id}/records/query"
|
|
46
|
+
|
|
47
|
+
yield attio_client.fetch_data(path, "post")
|
|
48
|
+
|
|
49
|
+
@dlt.resource(
|
|
50
|
+
name="lists",
|
|
51
|
+
write_disposition="replace",
|
|
52
|
+
columns={
|
|
53
|
+
"created_at": {"data_type": "timestamp", "partition": True},
|
|
54
|
+
},
|
|
55
|
+
)
|
|
56
|
+
def fetch_lists() -> Iterator[dict]:
|
|
57
|
+
path = "lists"
|
|
58
|
+
yield attio_client.fetch_data(path, "get")
|
|
59
|
+
|
|
60
|
+
@dlt.resource(
|
|
61
|
+
name="list_entries",
|
|
62
|
+
write_disposition="replace",
|
|
63
|
+
columns={
|
|
64
|
+
"created_at": {"data_type": "timestamp", "partition": True},
|
|
65
|
+
},
|
|
66
|
+
)
|
|
67
|
+
def fetch_list_entries() -> Iterator[dict]:
|
|
68
|
+
if len(params) != 1:
|
|
69
|
+
raise ValueError(
|
|
70
|
+
"List entries table must be in the format `list_entries:{list_id}`"
|
|
71
|
+
)
|
|
72
|
+
path = f"lists/{params[0]}/entries/query"
|
|
73
|
+
|
|
74
|
+
yield attio_client.fetch_data(path, "post")
|
|
75
|
+
|
|
76
|
+
@dlt.resource(
|
|
77
|
+
name="all_list_entries",
|
|
78
|
+
write_disposition="replace",
|
|
79
|
+
columns={
|
|
80
|
+
"created_at": {"data_type": "timestamp", "partition": True},
|
|
81
|
+
},
|
|
82
|
+
)
|
|
83
|
+
def fetch_all_list_entries() -> Iterator[dict]:
|
|
84
|
+
if len(params) != 1:
|
|
85
|
+
raise ValueError(
|
|
86
|
+
"All list entries table must be in the format `all_list_entries:{object_api_slug}`"
|
|
87
|
+
)
|
|
88
|
+
path = "lists"
|
|
89
|
+
for lst in attio_client.fetch_data(path, "get"):
|
|
90
|
+
if params[0] in lst["parent_object"]:
|
|
91
|
+
path = f"lists/{lst['id']['list_id']}/entries/query"
|
|
92
|
+
yield from attio_client.fetch_data(path, "post")
|
|
93
|
+
|
|
94
|
+
return (
|
|
95
|
+
fetch_objects,
|
|
96
|
+
fetch_records,
|
|
97
|
+
fetch_lists,
|
|
98
|
+
fetch_list_entries,
|
|
99
|
+
fetch_all_list_entries,
|
|
100
|
+
)
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from ingestr.src.http_client import create_client
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class AttioClient:
|
|
5
|
+
def __init__(self, api_key: str):
|
|
6
|
+
self.base_url = "https://api.attio.com/v2"
|
|
7
|
+
self.headers = {
|
|
8
|
+
"Accept": "application/json",
|
|
9
|
+
"Authorization": f"Bearer {api_key}",
|
|
10
|
+
}
|
|
11
|
+
self.client = create_client()
|
|
12
|
+
|
|
13
|
+
def fetch_data(self, path: str, method: str, limit: int = 1000, params=None):
|
|
14
|
+
url = f"{self.base_url}/{path}"
|
|
15
|
+
if params is None:
|
|
16
|
+
params = {}
|
|
17
|
+
offset = 0
|
|
18
|
+
while True:
|
|
19
|
+
query_params = {**params, "limit": limit, "offset": offset}
|
|
20
|
+
if method == "get":
|
|
21
|
+
response = self.client.get(
|
|
22
|
+
url, headers=self.headers, params=query_params
|
|
23
|
+
)
|
|
24
|
+
else:
|
|
25
|
+
response = self.client.post(
|
|
26
|
+
url, headers=self.headers, params=query_params
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
if response.status_code != 200:
|
|
30
|
+
raise Exception(f"HTTP {response.status_code} error: {response.text}")
|
|
31
|
+
|
|
32
|
+
response_data = response.json()
|
|
33
|
+
if "data" not in response_data:
|
|
34
|
+
print(f"API Response: {response_data}")
|
|
35
|
+
raise Exception(
|
|
36
|
+
"Attio API returned a response without the expected data"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
data = response_data["data"]
|
|
40
|
+
|
|
41
|
+
for item in data:
|
|
42
|
+
flat_item = flatten_item(item)
|
|
43
|
+
yield flat_item
|
|
44
|
+
|
|
45
|
+
if len(data) < limit:
|
|
46
|
+
break
|
|
47
|
+
offset += limit
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def flatten_item(item: dict) -> dict:
|
|
51
|
+
if "id" in item:
|
|
52
|
+
for key, value in item["id"].items():
|
|
53
|
+
item[key] = value
|
|
54
|
+
return item
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
version = "v0.13.43"
|
|
@@ -26,6 +26,7 @@ from ingestr.src.sources import (
|
|
|
26
26
|
AppsflyerSource,
|
|
27
27
|
ArrowMemoryMappedSource,
|
|
28
28
|
AsanaSource,
|
|
29
|
+
AttioSource,
|
|
29
30
|
ChessSource,
|
|
30
31
|
DynamoDBSource,
|
|
31
32
|
ElasticsearchSource,
|
|
@@ -120,6 +121,7 @@ class SourceDestinationFactory:
|
|
|
120
121
|
sources: Dict[str, Type[SourceProtocol]] = {
|
|
121
122
|
"csv": LocalCsvSource,
|
|
122
123
|
"mongodb": MongoDbSource,
|
|
124
|
+
"mongodb+srv": MongoDbSource,
|
|
123
125
|
"notion": NotionSource,
|
|
124
126
|
"gsheets": GoogleSheetsSource,
|
|
125
127
|
"shopify": ShopifySource,
|
|
@@ -156,6 +158,7 @@ class SourceDestinationFactory:
|
|
|
156
158
|
"freshdesk": FreshdeskSource,
|
|
157
159
|
"phantombuster": PhantombusterSource,
|
|
158
160
|
"elasticsearch": ElasticsearchSource,
|
|
161
|
+
"attio": AttioSource,
|
|
159
162
|
}
|
|
160
163
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
161
164
|
"bigquery": BigQueryDestination,
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from dlt.sources.helpers.requests import Client
|
|
3
|
+
|
|
4
|
+
def create_client() -> requests.Session:
|
|
5
|
+
return Client(
|
|
6
|
+
raise_for_status=False,
|
|
7
|
+
retry_condition=retry_on_limit,
|
|
8
|
+
request_max_attempts=12,
|
|
9
|
+
).session
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def retry_on_limit(
|
|
13
|
+
response: requests.Response | None, exception: BaseException | None
|
|
14
|
+
) -> bool:
|
|
15
|
+
if response is None:
|
|
16
|
+
return False
|
|
17
|
+
return response.status_code == 502
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
"""Source that loads collections form any a mongo database, supports incremental loads."""
|
|
2
2
|
|
|
3
|
-
from typing import Any, Iterable, List, Optional
|
|
3
|
+
from typing import Any, Dict, Iterable, List, Mapping, Optional, Union
|
|
4
4
|
|
|
5
5
|
import dlt
|
|
6
|
+
from dlt.common.data_writers import TDataItemFormat
|
|
6
7
|
from dlt.sources import DltResource
|
|
7
8
|
|
|
8
9
|
from .helpers import (
|
|
@@ -21,6 +22,10 @@ def mongodb(
|
|
|
21
22
|
incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
|
|
22
23
|
write_disposition: Optional[str] = dlt.config.value,
|
|
23
24
|
parallel: Optional[bool] = dlt.config.value,
|
|
25
|
+
limit: Optional[int] = None,
|
|
26
|
+
filter_: Optional[Dict[str, Any]] = None,
|
|
27
|
+
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
|
|
28
|
+
pymongoarrow_schema: Optional[Any] = None,
|
|
24
29
|
) -> Iterable[DltResource]:
|
|
25
30
|
"""
|
|
26
31
|
A DLT source which loads data from a mongo database using PyMongo.
|
|
@@ -34,6 +39,18 @@ def mongodb(
|
|
|
34
39
|
E.g., `incremental=dlt.sources.incremental('updated_at', pendulum.parse('2022-01-01T00:00:00Z'))`
|
|
35
40
|
write_disposition (str): Write disposition of the resource.
|
|
36
41
|
parallel (Optional[bool]): Option to enable parallel loading for the collection. Default is False.
|
|
42
|
+
limit (Optional[int]):
|
|
43
|
+
The maximum number of documents to load. The limit is
|
|
44
|
+
applied to each requested collection separately.
|
|
45
|
+
filter_ (Optional[Dict[str, Any]]): The filter to apply to the collection.
|
|
46
|
+
projection: (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields of a collection
|
|
47
|
+
when loading the collection. Supported inputs:
|
|
48
|
+
include (list) - ["year", "title"]
|
|
49
|
+
include (dict) - {"year": True, "title": True}
|
|
50
|
+
exclude (dict) - {"released": False, "runtime": False}
|
|
51
|
+
Note: Can't mix include and exclude statements '{"title": True, "released": False}`
|
|
52
|
+
pymongoarrow_schema (pymongoarrow.schema.Schema): Mapping of expected field types of a collection to convert BSON to Arrow
|
|
53
|
+
|
|
37
54
|
Returns:
|
|
38
55
|
Iterable[DltResource]: A list of DLT resources for each collection to be loaded.
|
|
39
56
|
"""
|
|
@@ -58,19 +75,36 @@ def mongodb(
|
|
|
58
75
|
primary_key="_id",
|
|
59
76
|
write_disposition=write_disposition,
|
|
60
77
|
spec=MongoDbCollectionConfiguration,
|
|
61
|
-
)(
|
|
78
|
+
)(
|
|
79
|
+
client,
|
|
80
|
+
collection,
|
|
81
|
+
incremental=incremental,
|
|
82
|
+
parallel=parallel,
|
|
83
|
+
limit=limit,
|
|
84
|
+
filter_=filter_ or {},
|
|
85
|
+
projection=projection,
|
|
86
|
+
pymongoarrow_schema=pymongoarrow_schema,
|
|
87
|
+
)
|
|
62
88
|
|
|
63
89
|
|
|
64
|
-
@dlt.
|
|
65
|
-
|
|
90
|
+
@dlt.resource(
|
|
91
|
+
name=lambda args: args["collection"],
|
|
92
|
+
standalone=True,
|
|
93
|
+
spec=MongoDbCollectionResourceConfiguration,
|
|
66
94
|
)
|
|
67
95
|
def mongodb_collection(
|
|
68
|
-
connection_url: str = dlt.
|
|
96
|
+
connection_url: str = dlt.secrets.value,
|
|
69
97
|
database: Optional[str] = dlt.config.value,
|
|
70
98
|
collection: str = dlt.config.value,
|
|
71
99
|
incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
|
|
72
100
|
write_disposition: Optional[str] = dlt.config.value,
|
|
73
101
|
parallel: Optional[bool] = False,
|
|
102
|
+
limit: Optional[int] = None,
|
|
103
|
+
chunk_size: Optional[int] = 10000,
|
|
104
|
+
data_item_format: Optional[TDataItemFormat] = "object",
|
|
105
|
+
filter_: Optional[Dict[str, Any]] = None,
|
|
106
|
+
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = dlt.config.value,
|
|
107
|
+
pymongoarrow_schema: Optional[Any] = None,
|
|
74
108
|
) -> Any:
|
|
75
109
|
"""
|
|
76
110
|
A DLT source which loads a collection from a mongo database using PyMongo.
|
|
@@ -83,6 +117,21 @@ def mongodb_collection(
|
|
|
83
117
|
E.g., `incremental=dlt.sources.incremental('updated_at', pendulum.parse('2022-01-01T00:00:00Z'))`
|
|
84
118
|
write_disposition (str): Write disposition of the resource.
|
|
85
119
|
parallel (Optional[bool]): Option to enable parallel loading for the collection. Default is False.
|
|
120
|
+
limit (Optional[int]): The number of documents load.
|
|
121
|
+
chunk_size (Optional[int]): The number of documents load in each batch.
|
|
122
|
+
data_item_format (Optional[TDataItemFormat]): The data format to use for loading.
|
|
123
|
+
Supported formats:
|
|
124
|
+
object - Python objects (dicts, lists).
|
|
125
|
+
arrow - Apache Arrow tables.
|
|
126
|
+
filter_ (Optional[Dict[str, Any]]): The filter to apply to the collection.
|
|
127
|
+
projection: (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields
|
|
128
|
+
when loading the collection. Supported inputs:
|
|
129
|
+
include (list) - ["year", "title"]
|
|
130
|
+
include (dict) - {"year": True, "title": True}
|
|
131
|
+
exclude (dict) - {"released": False, "runtime": False}
|
|
132
|
+
Note: Can't mix include and exclude statements '{"title": True, "released": False}`
|
|
133
|
+
pymongoarrow_schema (pymongoarrow.schema.Schema): Mapping of expected field types to convert BSON to Arrow
|
|
134
|
+
|
|
86
135
|
Returns:
|
|
87
136
|
Iterable[DltResource]: A list of DLT resources for each collection to be loaded.
|
|
88
137
|
"""
|
|
@@ -100,4 +149,15 @@ def mongodb_collection(
|
|
|
100
149
|
name=collection_obj.name,
|
|
101
150
|
primary_key="_id",
|
|
102
151
|
write_disposition=write_disposition,
|
|
103
|
-
)(
|
|
152
|
+
)(
|
|
153
|
+
client,
|
|
154
|
+
collection_obj,
|
|
155
|
+
incremental=incremental,
|
|
156
|
+
parallel=parallel,
|
|
157
|
+
limit=limit,
|
|
158
|
+
chunk_size=chunk_size,
|
|
159
|
+
data_item_format=data_item_format,
|
|
160
|
+
filter_=filter_ or {},
|
|
161
|
+
projection=projection,
|
|
162
|
+
pymongoarrow_schema=pymongoarrow_schema,
|
|
163
|
+
)
|