ingestr 0.13.21__tar.gz → 0.13.22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- {ingestr-0.13.21 → ingestr-0.13.22}/PKG-INFO +5 -5
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/.vitepress/config.mjs +1 -0
- ingestr-0.13.22/docs/media/pipedrive.png +0 -0
- ingestr-0.13.22/docs/supported-sources/pipedrive.md +43 -0
- ingestr-0.13.22/docs/tutorials/load-kinesis-bigquery.md +130 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/adjust/adjust_helpers.py +6 -2
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/applovin_max/__init__.py +5 -3
- ingestr-0.13.22/ingestr/src/buildinfo.py +1 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/factory.py +2 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/hubspot/__init__.py +0 -1
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/kinesis/__init__.py +3 -4
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/partition.py +2 -2
- ingestr-0.13.22/ingestr/src/pipedrive/__init__.py +198 -0
- ingestr-0.13.22/ingestr/src/pipedrive/helpers/__init__.py +23 -0
- ingestr-0.13.22/ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
- ingestr-0.13.22/ingestr/src/pipedrive/helpers/pages.py +115 -0
- ingestr-0.13.22/ingestr/src/pipedrive/settings.py +27 -0
- ingestr-0.13.22/ingestr/src/pipedrive/typing.py +3 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/sources.py +46 -14
- {ingestr-0.13.21 → ingestr-0.13.22}/pyproject.toml +2 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/requirements.in +4 -4
- {ingestr-0.13.21 → ingestr-0.13.22}/requirements.txt +5 -4
- ingestr-0.13.21/docs/tutorials/load-kinesis-bigquery.md +0 -67
- ingestr-0.13.21/ingestr/src/buildinfo.py +0 -1
- {ingestr-0.13.21 → ingestr-0.13.22}/.dockerignore +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/.githooks/pre-commit-hook.sh +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/.github/workflows/deploy-docs.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/.github/workflows/release.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/.github/workflows/secrets-scan.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/.github/workflows/tests.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/.gitignore +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/.gitleaksignore +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/.python-version +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/.vale.ini +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/Dockerfile +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/LICENSE.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/Makefile +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/README.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/.vitepress/theme/custom.css +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/.vitepress/theme/index.js +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/commands/example-uris.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/commands/ingest.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/getting-started/core-concepts.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/getting-started/incremental-loading.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/getting-started/quickstart.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/getting-started/telemetry.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/index.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/media/applovin_max.png +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/media/athena.png +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/media/clickhouse_img.png +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/media/github.png +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/media/googleanalytics.png +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/media/kinesis.bigquery.png +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/media/linkedin_ads.png +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/media/personio.png +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/media/personio_duckdb.png +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/media/stripe_postgres.png +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/media/tiktok.png +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/adjust.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/airtable.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/applovin.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/applovin_max.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/appsflyer.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/appstore.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/asana.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/athena.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/bigquery.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/chess.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/clickhouse.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/csv.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/custom_queries.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/databricks.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/db2.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/duckdb.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/dynamodb.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/facebook-ads.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/gcs.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/github.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/google-ads.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/google_analytics.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/gorgias.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/gsheets.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/hubspot.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/kafka.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/kinesis.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/klaviyo.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/linkedin_ads.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/mongodb.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/mssql.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/mysql.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/notion.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/oracle.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/personio.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/postgres.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/redshift.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/s3.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/salesforce.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/sap-hana.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/shopify.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/slack.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/snowflake.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/sqlite.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/stripe.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/tiktok-ads.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/supported-sources/zendesk.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/tutorials/load-personio-duckdb.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/docs/tutorials/load-stripe-postgres.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/main.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/.gitignore +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/adjust/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/airtable/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/applovin/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/appsflyer/_init_.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/appsflyer/client.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/appstore/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/appstore/client.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/appstore/errors.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/appstore/models.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/appstore/resources.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/arrow/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/asana_source/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/asana_source/helpers.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/asana_source/settings.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/blob.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/chess/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/chess/helpers.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/chess/settings.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/destinations.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/dynamodb/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/errors.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/facebook_ads/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/facebook_ads/exceptions.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/facebook_ads/helpers.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/facebook_ads/settings.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/filesystem/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/filesystem/helpers.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/filesystem/readers.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/filters.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/github/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/github/helpers.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/github/queries.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/github/settings.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/google_ads/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/google_ads/field.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/google_ads/metrics.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/google_ads/predicates.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/google_ads/reports.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/google_analytics/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/google_analytics/helpers.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/google_sheets/README.md +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/google_sheets/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/gorgias/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/gorgias/helpers.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/hubspot/helpers.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/hubspot/settings.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/kafka/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/kafka/helpers.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/kinesis/helpers.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/klaviyo/_init_.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/klaviyo/client.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/klaviyo/helpers.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/linkedin_ads/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/linkedin_ads/dimension_time_enum.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/linkedin_ads/helpers.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/loader.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/mongodb/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/mongodb/helpers.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/notion/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/notion/helpers/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/notion/helpers/client.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/notion/helpers/database.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/notion/settings.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/personio/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/personio/helpers.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/resource.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/salesforce/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/salesforce/helpers.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/shopify/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/shopify/exceptions.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/shopify/helpers.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/shopify/settings.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/slack/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/slack/helpers.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/slack/settings.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/sql_database/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/sql_database/callbacks.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/stripe_analytics/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/stripe_analytics/helpers.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/stripe_analytics/settings.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/table_definition.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/telemetry/event.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/tiktok_ads/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/tiktok_ads/tiktok_helpers.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/time.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/version.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/zendesk/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/zendesk/helpers/__init__.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/zendesk/helpers/credentials.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/src/zendesk/settings.py +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/testdata/.gitignore +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/testdata/create_replace.csv +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/testdata/delete_insert_expected.csv +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/testdata/delete_insert_part1.csv +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/testdata/delete_insert_part2.csv +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/testdata/merge_expected.csv +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/testdata/merge_part1.csv +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/ingestr/testdata/merge_part2.csv +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/package-lock.json +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/package.json +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/requirements-dev.txt +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/resources/demo.gif +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/resources/demo.tape +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/resources/ingestr.svg +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/AMPM.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/Acronyms.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/Colons.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/Contractions.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/DateFormat.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/Ellipses.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/EmDash.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/Exclamation.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/FirstPerson.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/Gender.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/GenderBias.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/HeadingPunctuation.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/Headings.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/Latin.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/LyHyphens.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/OptionalPlurals.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/Ordinal.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/OxfordComma.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/Parens.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/Passive.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/Periods.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/Quotes.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/Ranges.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/Semicolons.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/Slang.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/Spacing.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/Spelling.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/Units.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/We.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/Will.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/WordList.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/meta.json +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/Google/vocab.txt +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/bruin/Ingestr.yml +0 -0
- {ingestr-0.13.21 → ingestr-0.13.22}/styles/config/vocabularies/bruin/accept.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.22
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -16,7 +16,7 @@ Classifier: Topic :: Database
|
|
|
16
16
|
Requires-Python: >=3.9
|
|
17
17
|
Requires-Dist: aiobotocore==2.21.1
|
|
18
18
|
Requires-Dist: aiohappyeyeballs==2.4.8
|
|
19
|
-
Requires-Dist: aiohttp==3.11.
|
|
19
|
+
Requires-Dist: aiohttp==3.11.15
|
|
20
20
|
Requires-Dist: aioitertools==0.12.0
|
|
21
21
|
Requires-Dist: aiosignal==1.3.2
|
|
22
22
|
Requires-Dist: alembic==1.15.1
|
|
@@ -55,8 +55,8 @@ Requires-Dist: facebook-business==20.0.0
|
|
|
55
55
|
Requires-Dist: filelock==3.17.0
|
|
56
56
|
Requires-Dist: flatten-json==0.1.14
|
|
57
57
|
Requires-Dist: frozenlist==1.5.0
|
|
58
|
-
Requires-Dist: fsspec==
|
|
59
|
-
Requires-Dist: gcsfs==
|
|
58
|
+
Requires-Dist: fsspec==2025.3.2
|
|
59
|
+
Requires-Dist: gcsfs==2025.3.2
|
|
60
60
|
Requires-Dist: gitdb==4.0.12
|
|
61
61
|
Requires-Dist: gitpython==3.1.44
|
|
62
62
|
Requires-Dist: giturlparse==0.12.0
|
|
@@ -149,7 +149,7 @@ Requires-Dist: rich-argparse==1.7.0
|
|
|
149
149
|
Requires-Dist: rich==13.9.4
|
|
150
150
|
Requires-Dist: rsa==4.9
|
|
151
151
|
Requires-Dist: rudder-sdk-python==2.1.4
|
|
152
|
-
Requires-Dist: s3fs==
|
|
152
|
+
Requires-Dist: s3fs==2025.3.2
|
|
153
153
|
Requires-Dist: s3transfer==0.11.3
|
|
154
154
|
Requires-Dist: scramp==1.4.5
|
|
155
155
|
Requires-Dist: semver==3.0.4
|
|
@@ -126,6 +126,7 @@ export default defineConfig({
|
|
|
126
126
|
{ text: "LinkedIn Ads", link: "/supported-sources/linkedin_ads.md" },
|
|
127
127
|
{ text: "Notion", link: "/supported-sources/notion.md" },
|
|
128
128
|
{ text: "Personio", link: "/supported-sources/personio.md" },
|
|
129
|
+
{ text: "Pipedrive", link: "/supported-sources/pipedrive.md" },
|
|
129
130
|
{ text: "S3", link: "/supported-sources/s3.md" },
|
|
130
131
|
{ text: "Salesforce", link: "/supported-sources/salesforce.md" },
|
|
131
132
|
{ text: "Shopify", link: "/supported-sources/shopify.md" },
|
|
Binary file
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Pipedrive
|
|
2
|
+
[Pipedrive](https://www.pipedrive.com/) is a cloud-based sales Customer Relationship Management (CRM) tool designed to help businesses manage leads and deals, track communication, and automate sales processes.
|
|
3
|
+
|
|
4
|
+
ingestr supports pipedrive as a source.
|
|
5
|
+
|
|
6
|
+
## URI format
|
|
7
|
+
|
|
8
|
+
The URI format for pipedrive is as follows:
|
|
9
|
+
|
|
10
|
+
```plaintext
|
|
11
|
+
pipedrive://?api_token=<api_token>
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
URI parameters:
|
|
15
|
+
- api_token: token used for authentication with the Pipedrive API
|
|
16
|
+
|
|
17
|
+
## Setting up a pipedrive Integration
|
|
18
|
+
|
|
19
|
+
To grab pipedrive credentials, please follow the guide [here](https://dlthub.com/docs/dlt-ecosystem/verified-sources/pipedrive#grab-api-token).
|
|
20
|
+
|
|
21
|
+
Once you complete the guide, you should have a `api_token`. Let's say your `api_token` is token_123, here's a sample command that will copy the data from pipedriveinto a DuckDB database:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
ingestr ingest \
|
|
25
|
+
--source-uri 'pipedrive://?api_token=token' \
|
|
26
|
+
--source-table 'users' \
|
|
27
|
+
--dest-uri duckdb:///pipedrive.duckdb \
|
|
28
|
+
--dest-table 'dest.users'
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
<img alt="pipedrive_img" src="../media/pipedrive.png"/>
|
|
32
|
+
|
|
33
|
+
pipedrive source allows ingesting the following resources into separate tables:
|
|
34
|
+
|
|
35
|
+
- `activities`: Refers to scheduled events or tasks associated with deals, contacts, or organizations
|
|
36
|
+
- `organizations`: Refers to company or entity with which you have potential or existing business dealings.
|
|
37
|
+
- `products`: Refers to items or services offered for sale that can be associated with deals
|
|
38
|
+
- `deals`: Refers to potential sale or transaction that you can track through various stages
|
|
39
|
+
- `users`: Refers to Individual with a unique login credential who can access and use the platform
|
|
40
|
+
- `persons`: Refers individual contacts or leads that can be linked to sales deals
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
Use these as `--source-table` parameter in the `ingestr ingest` command.
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# Load Data from Amazon Kinesis to Google BigQuery
|
|
2
|
+
|
|
3
|
+
Welcome! 👋
|
|
4
|
+
This beginner-friendly guide will help you load data from `Amazon Kinesis` into `Google BigQuery` using `ingestr` — a simple yet powerful command-line tool. No prior experience is needed, and best of all, no coding required!
|
|
5
|
+
|
|
6
|
+
By the end of this guide, you'll have your Kinesis data securely stored in BigQuery. But before we dive in, let’s take a quick look at `ingestr`.
|
|
7
|
+
|
|
8
|
+
## Overview of ingestr
|
|
9
|
+
|
|
10
|
+
`ingestr` is a command-line tool that simplifies data ingestion by allowing users to load data from a source to a destination using simple command-line flags.
|
|
11
|
+
|
|
12
|
+
### ingestr Command
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
ingestr ingest \
|
|
16
|
+
--source-uri '<your-source-uri-here>' \
|
|
17
|
+
--source-table '<your-schema>.<your-table>' \
|
|
18
|
+
--dest-uri '<your-destination-uri-here>' \
|
|
19
|
+
--dest-table '<your-schema>.<your-table>'
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
- `ingestr ingest`: Executes the data ingestion process.
|
|
23
|
+
- `--source-uri TEXT`: Specifies the URI of the data source.
|
|
24
|
+
- `--dest-uri TEXT`: Specifies the URI of the destination.
|
|
25
|
+
- `--source-table TEXT`: Defines the table to fetch data from.
|
|
26
|
+
- `--dest-table TEXT`: Specifies the destination table. If not provided, it defaults to `--source-table`.
|
|
27
|
+
|
|
28
|
+
With this command, we connect to the source, retrieve the specified data, and load it into the destination database.
|
|
29
|
+
|
|
30
|
+
## Let's Load Data from Kinesis to BigQuery Together!
|
|
31
|
+
|
|
32
|
+
Amazon Kinesis is a cloud-based service for real-time data streaming and analytics that processes large data streams. To analyze this data, you may need to load it into a data warehouse like Google BigQuery. `ingestr` makes this process simple.
|
|
33
|
+
|
|
34
|
+
### Step 1: Install ingestr
|
|
35
|
+
|
|
36
|
+
Ensure `ingestr` is installed. If not, follow the installation guide [here](../getting-started/quickstart.md#Installation).
|
|
37
|
+
|
|
38
|
+
### Step 2: Get AWS Credentials
|
|
39
|
+
Kinesis will be our data source. To access it, you need AWS credentials.
|
|
40
|
+
|
|
41
|
+
1. Log in to your AWS account.
|
|
42
|
+
2. Navigate to `IAM` (Identity and Access Management).
|
|
43
|
+
3. Create a new IAM user or select an existing one.
|
|
44
|
+
4. Assign necessary permissions (e.g., `AmazonKinesisReadOnlyAccess`).
|
|
45
|
+
5. Generate and copy the `Access Key ID` and `Secret Access Key`.
|
|
46
|
+
|
|
47
|
+
For more details, read [here](https://docs.aws.amazon.com/IAM/latest/UserGuide/introduction.html).
|
|
48
|
+
|
|
49
|
+
### Step 3: Configure Kinesis as Source
|
|
50
|
+
|
|
51
|
+
#### `--source-uri`
|
|
52
|
+
This flag connects to your Kinesis stream. The URI format is:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
kinesis://?aws_access_key_id=<YOUR_KEY_ID>&aws_secret_access_key=<YOUR_SECRET_KEY>®ion_name=<YOUR_REGION>
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Required parameters:
|
|
59
|
+
- `aws_access_key_id`: Your AWS access key
|
|
60
|
+
- `aws_secret_access_key`: Your AWS secret key
|
|
61
|
+
- `region_name`: AWS region of your Kinesis stream
|
|
62
|
+
|
|
63
|
+
#### `--source-table`
|
|
64
|
+
This flag specifies which Kinesis stream to read from:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
--source-table 'kinesis_stream_name'
|
|
68
|
+
```
|
|
69
|
+
This flag specifies which Kinesis stream to read from:
|
|
70
|
+
### Step 4: Configure BigQuery as Destination
|
|
71
|
+
|
|
72
|
+
#### `--dest-uri`
|
|
73
|
+
This flag connects to BigQuery. The URI format is:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
bigquery://<project-name>?credentials_path=/path/to/service/account.json&location=<location>
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Required parameters:
|
|
80
|
+
- `project-name`: Your BigQuery project name
|
|
81
|
+
- `credentials_path`: Path to the service account JSON file
|
|
82
|
+
- `location`: (Optional) Dataset location
|
|
83
|
+
|
|
84
|
+
#### `--dest-table`
|
|
85
|
+
This flag specifies where to save the data:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
--dest-table 'dataset.table_name'
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Step 5: Run the ingestr Command
|
|
92
|
+
|
|
93
|
+
Execute the following command to load data from Kinesis to BigQuery:
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
ingestr ingest \
|
|
97
|
+
--source-uri 'kinesis://?aws_access_key_id=<YOUR_KEY_ID>&aws_secret_access_key=<YOUR_SECRET_KEY>®ion_name=eu-central-1' \
|
|
98
|
+
--source-table 'kinesis_stream_name' \
|
|
99
|
+
--dest-uri 'bigquery://project-name?credentials_path=/Users/abc.json' \
|
|
100
|
+
--dest-table 'dataset.results'
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Step 6: Verify Data in BigQuery
|
|
104
|
+
Once the command runs successfully, your Kinesis data will be available in BigQuery. Follow these steps to verify the data:
|
|
105
|
+
|
|
106
|
+
1. Open the [BigQuery Console](https://console.cloud.google.com/bigquery) and select your project.
|
|
107
|
+
|
|
108
|
+
2. In the left-hand side panel:
|
|
109
|
+
- Expand your project.
|
|
110
|
+
- Navigate to the appropriate dataset and click on the table name.
|
|
111
|
+
|
|
112
|
+
3. Select the "Preview" tab to view a sample of the ingested data.
|
|
113
|
+
- Confirm that rows are present and fields appear as expected.
|
|
114
|
+
|
|
115
|
+
5. Go to the "Query" tab and run a basic query to inspect your data more closely. For example:
|
|
116
|
+
```sql
|
|
117
|
+
SELECT * FROM `project-name.dataset.results` LIMIT 100;
|
|
118
|
+
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Ensure that the retrieved data matches what was expected from the Kinesis stream.
|
|
122
|
+
|
|
123
|
+
### Example Output
|
|
124
|
+
|
|
125
|
+
After running the ingestion process, your Kinesis data will be available in BigQuery. Here's an example of what the data might look like:
|
|
126
|
+
|
|
127
|
+
<img alt="kinesis_bigquery" src="../media/kinesis.bigquery.png" />
|
|
128
|
+
|
|
129
|
+
## 🎉 Congratulations!
|
|
130
|
+
You have successfully loaded data from Amazon Kinesis to BigQuery using `ingestr`.
|
|
@@ -82,7 +82,9 @@ class AdjustAPI:
|
|
|
82
82
|
items = result.get("rows", [])
|
|
83
83
|
yield items
|
|
84
84
|
else:
|
|
85
|
-
raise HTTPError(
|
|
85
|
+
raise HTTPError(
|
|
86
|
+
f"Request failed with status code: {response.status_code}, {response.text}."
|
|
87
|
+
)
|
|
86
88
|
|
|
87
89
|
def fetch_events(self):
|
|
88
90
|
headers = {"Authorization": f"Bearer {self.api_key}"}
|
|
@@ -93,7 +95,9 @@ class AdjustAPI:
|
|
|
93
95
|
result = response.json()
|
|
94
96
|
yield result
|
|
95
97
|
else:
|
|
96
|
-
raise HTTPError(
|
|
98
|
+
raise HTTPError(
|
|
99
|
+
f"Request failed with status code: {response.status_code}, {response.text}."
|
|
100
|
+
)
|
|
97
101
|
|
|
98
102
|
|
|
99
103
|
def parse_filters(filters_raw: str) -> dict:
|
|
@@ -105,11 +105,13 @@ def get_data(
|
|
|
105
105
|
if response.status_code == 404:
|
|
106
106
|
if "No Mediation App Id found for platform" in response.text:
|
|
107
107
|
return None
|
|
108
|
-
error_message =
|
|
108
|
+
error_message = (
|
|
109
|
+
f"AppLovin MAX API error (status {response.status_code}): {response.text}"
|
|
110
|
+
)
|
|
109
111
|
raise requests.HTTPError(error_message)
|
|
110
|
-
|
|
112
|
+
|
|
111
113
|
response_url = response.json().get("ad_revenue_report_url")
|
|
112
114
|
df = pd.read_csv(response_url)
|
|
113
115
|
df["Date"] = pd.to_datetime(df["Date"])
|
|
114
116
|
df["partition_date"] = df["Date"].dt.date
|
|
115
|
-
return df
|
|
117
|
+
return df
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
version = "v0.13.22"
|
|
@@ -43,6 +43,7 @@ from ingestr.src.sources import (
|
|
|
43
43
|
MongoDbSource,
|
|
44
44
|
NotionSource,
|
|
45
45
|
PersonioSource,
|
|
46
|
+
PipedriveSource,
|
|
46
47
|
S3Source,
|
|
47
48
|
SalesforceSource,
|
|
48
49
|
ShopifySource,
|
|
@@ -144,6 +145,7 @@ class SourceDestinationFactory:
|
|
|
144
145
|
"salesforce": SalesforceSource,
|
|
145
146
|
"personio": PersonioSource,
|
|
146
147
|
"kinesis": KinesisSource,
|
|
148
|
+
"pipedrive": PipedriveSource,
|
|
147
149
|
}
|
|
148
150
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
149
151
|
"bigquery": BigQueryDestination,
|
|
@@ -16,7 +16,7 @@ from .helpers import get_shard_iterator, max_sequence_by_shard
|
|
|
16
16
|
name=lambda args: args["stream_name"],
|
|
17
17
|
primary_key="kinesis_msg_id",
|
|
18
18
|
standalone=True,
|
|
19
|
-
max_table_nesting=0
|
|
19
|
+
max_table_nesting=0,
|
|
20
20
|
)
|
|
21
21
|
def kinesis_stream(
|
|
22
22
|
stream_name: str,
|
|
@@ -75,7 +75,6 @@ def kinesis_stream(
|
|
|
75
75
|
|
|
76
76
|
# get next shard to fetch messages from
|
|
77
77
|
while shard_id := shard_ids.pop(0) if shard_ids else None:
|
|
78
|
-
|
|
79
78
|
shard_iterator, _ = get_shard_iterator(
|
|
80
79
|
kinesis_client,
|
|
81
80
|
stream_name,
|
|
@@ -83,14 +82,14 @@ def kinesis_stream(
|
|
|
83
82
|
last_msg, # type: ignore
|
|
84
83
|
initial_at_datetime, # type: ignore
|
|
85
84
|
)
|
|
86
|
-
|
|
85
|
+
|
|
87
86
|
while shard_iterator:
|
|
88
87
|
records = []
|
|
89
88
|
records_response = kinesis_client.get_records(
|
|
90
89
|
ShardIterator=shard_iterator,
|
|
91
90
|
Limit=chunk_size, # The size of data can be up to 1 MB, it must be controlled by the user
|
|
92
91
|
)
|
|
93
|
-
|
|
92
|
+
|
|
94
93
|
for record in records_response["Records"]:
|
|
95
94
|
sequence_number = record["SequenceNumber"]
|
|
96
95
|
content = record["Data"]
|
|
@@ -13,7 +13,6 @@ def apply_athena_hints(
|
|
|
13
13
|
additional_hints: Dict[str, TColumnSchema] = {},
|
|
14
14
|
) -> None:
|
|
15
15
|
def _apply_partition_hint(resource: DltResource) -> None:
|
|
16
|
-
|
|
17
16
|
columns = resource.columns if resource.columns else {}
|
|
18
17
|
|
|
19
18
|
partition_hint = (
|
|
@@ -24,7 +23,8 @@ def apply_athena_hints(
|
|
|
24
23
|
athena_adapter(
|
|
25
24
|
resource,
|
|
26
25
|
athena_partition.day(partition_column)
|
|
27
|
-
if partition_hint
|
|
26
|
+
if partition_hint
|
|
27
|
+
and partition_hint.get("data_type") in ("timestamp", "date")
|
|
28
28
|
else partition_column,
|
|
29
29
|
)
|
|
30
30
|
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""Highly customizable source for Pipedrive, supports endpoint addition, selection and column rename
|
|
2
|
+
|
|
3
|
+
Pipedrive api docs: https://developers.pipedrive.com/docs/api/v1
|
|
4
|
+
|
|
5
|
+
Pipedrive changes or deprecates fields and endpoints without versioning the api.
|
|
6
|
+
If something breaks, it's a good idea to check the changelog.
|
|
7
|
+
Api changelog: https://developers.pipedrive.com/changelog
|
|
8
|
+
|
|
9
|
+
To get an api key: https://pipedrive.readme.io/docs/how-to-find-the-api-token
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from typing import Any, Dict, Iterator, List, Optional, Union # noqa: F401
|
|
13
|
+
|
|
14
|
+
import dlt
|
|
15
|
+
from dlt.common import pendulum
|
|
16
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
17
|
+
from dlt.sources import DltResource, TDataItems
|
|
18
|
+
|
|
19
|
+
from .helpers import group_deal_flows
|
|
20
|
+
from .helpers.custom_fields_munger import rename_fields, update_fields_mapping
|
|
21
|
+
from .helpers.pages import get_pages, get_recent_items_incremental
|
|
22
|
+
from .settings import ENTITY_MAPPINGS, RECENTS_ENTITIES
|
|
23
|
+
from .typing import TDataPage
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dlt.source(name="pipedrive", max_table_nesting=0)
|
|
27
|
+
def pipedrive_source(
|
|
28
|
+
pipedrive_api_key: str = dlt.secrets.value,
|
|
29
|
+
since_timestamp: Optional[Union[pendulum.DateTime, str]] = "1970-01-01 00:00:00",
|
|
30
|
+
) -> Iterator[DltResource]:
|
|
31
|
+
"""
|
|
32
|
+
Get data from the Pipedrive API. Supports incremental loading and custom fields mapping.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
pipedrive_api_key: https://pipedrive.readme.io/docs/how-to-find-the-api-token
|
|
36
|
+
since_timestamp: Starting timestamp for incremental loading. By default complete history is loaded on first run.
|
|
37
|
+
incremental: Enable or disable incremental loading.
|
|
38
|
+
|
|
39
|
+
Returns resources:
|
|
40
|
+
custom_fields_mapping
|
|
41
|
+
activities
|
|
42
|
+
activityTypes
|
|
43
|
+
deals
|
|
44
|
+
deals_flow
|
|
45
|
+
deals_participants
|
|
46
|
+
files
|
|
47
|
+
filters
|
|
48
|
+
notes
|
|
49
|
+
persons
|
|
50
|
+
organizations
|
|
51
|
+
pipelines
|
|
52
|
+
products
|
|
53
|
+
stages
|
|
54
|
+
users
|
|
55
|
+
leads
|
|
56
|
+
|
|
57
|
+
For custom fields rename the `custom_fields_mapping` resource must be selected or loaded before other resources.
|
|
58
|
+
|
|
59
|
+
Resources that depend on another resource are implemented as transformers
|
|
60
|
+
so they can re-use the original resource data without re-downloading.
|
|
61
|
+
Examples: deals_participants, deals_flow
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
# yield nice rename mapping
|
|
65
|
+
yield create_state(pipedrive_api_key) | parsed_mapping
|
|
66
|
+
|
|
67
|
+
# parse timestamp and build kwargs
|
|
68
|
+
since_timestamp = ensure_pendulum_datetime(since_timestamp).strftime(
|
|
69
|
+
"%Y-%m-%d %H:%M:%S"
|
|
70
|
+
)
|
|
71
|
+
resource_kwargs: Any = (
|
|
72
|
+
{"since_timestamp": since_timestamp} if since_timestamp else {}
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# create resources for all endpoints
|
|
76
|
+
endpoints_resources = {}
|
|
77
|
+
for entity, resource_name in RECENTS_ENTITIES.items():
|
|
78
|
+
endpoints_resources[resource_name] = dlt.resource(
|
|
79
|
+
get_recent_items_incremental,
|
|
80
|
+
name=resource_name,
|
|
81
|
+
primary_key="id",
|
|
82
|
+
write_disposition="merge",
|
|
83
|
+
)(entity, pipedrive_api_key, **resource_kwargs)
|
|
84
|
+
|
|
85
|
+
yield from endpoints_resources.values()
|
|
86
|
+
|
|
87
|
+
# create transformers for deals to participants and flows
|
|
88
|
+
yield endpoints_resources["deals"] | dlt.transformer(
|
|
89
|
+
name="deals_participants", write_disposition="merge", primary_key="id"
|
|
90
|
+
)(_get_deals_participants)(pipedrive_api_key)
|
|
91
|
+
|
|
92
|
+
yield endpoints_resources["deals"] | dlt.transformer(
|
|
93
|
+
name="deals_flow", write_disposition="merge", primary_key="id"
|
|
94
|
+
)(_get_deals_flow)(pipedrive_api_key)
|
|
95
|
+
|
|
96
|
+
yield leads(pipedrive_api_key, update_time=since_timestamp)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _get_deals_flow(
|
|
100
|
+
deals_page: TDataPage, pipedrive_api_key: str
|
|
101
|
+
) -> Iterator[TDataItems]:
|
|
102
|
+
custom_fields_mapping = dlt.current.source_state().get("custom_fields_mapping", {})
|
|
103
|
+
for row in deals_page:
|
|
104
|
+
url = f"deals/{row['id']}/flow"
|
|
105
|
+
pages = get_pages(url, pipedrive_api_key)
|
|
106
|
+
for entity, page in group_deal_flows(pages):
|
|
107
|
+
yield dlt.mark.with_table_name(
|
|
108
|
+
rename_fields(page, custom_fields_mapping.get(entity, {})),
|
|
109
|
+
"deals_flow_" + entity,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _get_deals_participants(
|
|
114
|
+
deals_page: TDataPage, pipedrive_api_key: str
|
|
115
|
+
) -> Iterator[TDataPage]:
|
|
116
|
+
for row in deals_page:
|
|
117
|
+
url = f"deals/{row['id']}/participants"
|
|
118
|
+
yield from get_pages(url, pipedrive_api_key)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@dlt.resource(selected=False)
|
|
122
|
+
def create_state(pipedrive_api_key: str) -> Iterator[Dict[str, Any]]:
|
|
123
|
+
def _get_pages_for_rename(
|
|
124
|
+
entity: str, fields_entity: str, pipedrive_api_key: str
|
|
125
|
+
) -> Dict[str, Any]:
|
|
126
|
+
existing_fields_mapping: Dict[str, Dict[str, str]] = (
|
|
127
|
+
custom_fields_mapping.setdefault(entity, {})
|
|
128
|
+
)
|
|
129
|
+
# we need to process all pages before yielding
|
|
130
|
+
for page in get_pages(fields_entity, pipedrive_api_key):
|
|
131
|
+
existing_fields_mapping = update_fields_mapping(
|
|
132
|
+
page, existing_fields_mapping
|
|
133
|
+
)
|
|
134
|
+
return existing_fields_mapping
|
|
135
|
+
|
|
136
|
+
# gets all *Fields data and stores in state
|
|
137
|
+
custom_fields_mapping = dlt.current.source_state().setdefault(
|
|
138
|
+
"custom_fields_mapping", {}
|
|
139
|
+
)
|
|
140
|
+
for entity, fields_entity, _ in ENTITY_MAPPINGS:
|
|
141
|
+
if fields_entity is None:
|
|
142
|
+
continue
|
|
143
|
+
custom_fields_mapping[entity] = _get_pages_for_rename(
|
|
144
|
+
entity, fields_entity, pipedrive_api_key
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
yield custom_fields_mapping
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@dlt.transformer(
|
|
151
|
+
name="custom_fields_mapping",
|
|
152
|
+
write_disposition="replace",
|
|
153
|
+
columns={"options": {"data_type": "json"}},
|
|
154
|
+
)
|
|
155
|
+
def parsed_mapping(
|
|
156
|
+
custom_fields_mapping: Dict[str, Any],
|
|
157
|
+
) -> Optional[Iterator[List[Dict[str, str]]]]:
|
|
158
|
+
"""
|
|
159
|
+
Parses and yields custom fields' mapping in order to be stored in destiny by dlt
|
|
160
|
+
"""
|
|
161
|
+
for endpoint, data_item_mapping in custom_fields_mapping.items():
|
|
162
|
+
yield [
|
|
163
|
+
{
|
|
164
|
+
"endpoint": endpoint,
|
|
165
|
+
"hash_string": hash_string,
|
|
166
|
+
"name": names["name"],
|
|
167
|
+
"normalized_name": names["normalized_name"],
|
|
168
|
+
"options": names["options"],
|
|
169
|
+
"field_type": names["field_type"],
|
|
170
|
+
}
|
|
171
|
+
for hash_string, names in data_item_mapping.items()
|
|
172
|
+
]
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
@dlt.resource(primary_key="id", write_disposition="merge")
|
|
176
|
+
def leads(
|
|
177
|
+
pipedrive_api_key: str = dlt.secrets.value,
|
|
178
|
+
update_time: dlt.sources.incremental[str] = dlt.sources.incremental(
|
|
179
|
+
"update_time", "1970-01-01 00:00:00"
|
|
180
|
+
),
|
|
181
|
+
) -> Iterator[TDataPage]:
|
|
182
|
+
"""Resource to incrementally load pipedrive leads by update_time"""
|
|
183
|
+
# Leads inherit custom fields from deals
|
|
184
|
+
fields_mapping = (
|
|
185
|
+
dlt.current.source_state().get("custom_fields_mapping", {}).get("deals", {})
|
|
186
|
+
)
|
|
187
|
+
# Load leads pages sorted from newest to oldest and stop loading when
|
|
188
|
+
# last incremental value is reached
|
|
189
|
+
pages = get_pages(
|
|
190
|
+
"leads",
|
|
191
|
+
pipedrive_api_key,
|
|
192
|
+
extra_params={"sort": "update_time DESC"},
|
|
193
|
+
)
|
|
194
|
+
for page in pages:
|
|
195
|
+
yield rename_fields(page, fields_mapping)
|
|
196
|
+
|
|
197
|
+
if update_time.start_out_of_range:
|
|
198
|
+
return
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Pipedrive source helpers"""
|
|
2
|
+
|
|
3
|
+
from itertools import groupby
|
|
4
|
+
from typing import Any, Dict, Iterable, List, Tuple, cast # noqa: F401
|
|
5
|
+
|
|
6
|
+
from dlt.common import pendulum # noqa: F401
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _deals_flow_group_key(item: Dict[str, Any]) -> str:
|
|
10
|
+
return item["object"] # type: ignore[no-any-return]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def group_deal_flows(
|
|
14
|
+
pages: Iterable[Iterable[Dict[str, Any]]],
|
|
15
|
+
) -> Iterable[Tuple[str, List[Dict[str, Any]]]]:
|
|
16
|
+
for page in pages:
|
|
17
|
+
for entity, items in groupby(
|
|
18
|
+
sorted(page, key=_deals_flow_group_key), key=_deals_flow_group_key
|
|
19
|
+
):
|
|
20
|
+
yield (
|
|
21
|
+
entity,
|
|
22
|
+
[dict(item["data"], timestamp=item["timestamp"]) for item in items],
|
|
23
|
+
)
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
from typing import Any, Dict, Optional, TypedDict
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
|
|
5
|
+
from ..typing import TDataPage
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TFieldMapping(TypedDict):
|
|
9
|
+
name: str
|
|
10
|
+
normalized_name: str
|
|
11
|
+
options: Optional[Dict[str, str]]
|
|
12
|
+
field_type: str
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def update_fields_mapping(
|
|
16
|
+
new_fields_mapping: TDataPage, existing_fields_mapping: Dict[str, Any]
|
|
17
|
+
) -> Dict[str, Any]:
|
|
18
|
+
"""
|
|
19
|
+
Specific function to perform data munging and push changes to custom fields' mapping stored in dlt's state
|
|
20
|
+
The endpoint must be an entity fields' endpoint
|
|
21
|
+
"""
|
|
22
|
+
for data_item in new_fields_mapping:
|
|
23
|
+
# 'edit_flag' field contains a boolean value, which is set to 'True' for custom fields and 'False' otherwise.
|
|
24
|
+
if data_item.get("edit_flag"):
|
|
25
|
+
# Regarding custom fields, 'key' field contains pipedrive's hash string representation of its name
|
|
26
|
+
# We assume that pipedrive's hash strings are meant to be an univoque representation of custom fields' name, so dlt's state shouldn't be updated while those values
|
|
27
|
+
# remain unchanged
|
|
28
|
+
existing_fields_mapping = _update_field(data_item, existing_fields_mapping)
|
|
29
|
+
# Built in enum and set fields are mapped if their options have int ids
|
|
30
|
+
# Enum fields with bool and string key options are left intact
|
|
31
|
+
elif data_item.get("field_type") in {"set", "enum"}:
|
|
32
|
+
options = data_item.get("options", [])
|
|
33
|
+
first_option = options[0]["id"] if len(options) >= 1 else None
|
|
34
|
+
if isinstance(first_option, int) and not isinstance(first_option, bool):
|
|
35
|
+
existing_fields_mapping = _update_field(
|
|
36
|
+
data_item, existing_fields_mapping
|
|
37
|
+
)
|
|
38
|
+
return existing_fields_mapping
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _update_field(
|
|
42
|
+
data_item: Dict[str, Any],
|
|
43
|
+
existing_fields_mapping: Optional[Dict[str, TFieldMapping]],
|
|
44
|
+
) -> Dict[str, TFieldMapping]:
|
|
45
|
+
"""Create or update the given field's info the custom fields state
|
|
46
|
+
If the field hash already exists in the state from previous runs the name is not updated.
|
|
47
|
+
New enum options (if any) are appended to the state.
|
|
48
|
+
"""
|
|
49
|
+
existing_fields_mapping = existing_fields_mapping or {}
|
|
50
|
+
key = data_item["key"]
|
|
51
|
+
options = data_item.get("options", [])
|
|
52
|
+
new_options_map = {str(o["id"]): o["label"] for o in options}
|
|
53
|
+
existing_field = existing_fields_mapping.get(key)
|
|
54
|
+
if not existing_field:
|
|
55
|
+
existing_fields_mapping[key] = dict(
|
|
56
|
+
name=data_item["name"],
|
|
57
|
+
normalized_name=_normalized_name(data_item["name"]),
|
|
58
|
+
options=new_options_map,
|
|
59
|
+
field_type=data_item["field_type"],
|
|
60
|
+
)
|
|
61
|
+
return existing_fields_mapping
|
|
62
|
+
existing_options = existing_field.get("options", {})
|
|
63
|
+
if not existing_options or existing_options == new_options_map:
|
|
64
|
+
existing_field["options"] = new_options_map
|
|
65
|
+
existing_field["field_type"] = data_item[
|
|
66
|
+
"field_type"
|
|
67
|
+
] # Add for backwards compat
|
|
68
|
+
return existing_fields_mapping
|
|
69
|
+
# Add new enum options to the existing options array
|
|
70
|
+
# so that when option is renamed the original label remains valid
|
|
71
|
+
new_option_keys = set(new_options_map) - set(existing_options)
|
|
72
|
+
for key in new_option_keys:
|
|
73
|
+
existing_options[key] = new_options_map[key]
|
|
74
|
+
existing_field["options"] = existing_options
|
|
75
|
+
return existing_fields_mapping
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _normalized_name(name: str) -> str:
|
|
79
|
+
source_schema = dlt.current.source_schema()
|
|
80
|
+
normalized_name = name.strip() # remove leading and trailing spaces
|
|
81
|
+
return source_schema.naming.normalize_identifier(normalized_name)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def rename_fields(data: TDataPage, fields_mapping: Dict[str, Any]) -> TDataPage:
|
|
85
|
+
if not fields_mapping:
|
|
86
|
+
return data
|
|
87
|
+
for data_item in data:
|
|
88
|
+
for hash_string, field in fields_mapping.items():
|
|
89
|
+
if hash_string not in data_item:
|
|
90
|
+
continue
|
|
91
|
+
field_value = data_item.pop(hash_string)
|
|
92
|
+
field_name = field["name"]
|
|
93
|
+
options_map = field["options"]
|
|
94
|
+
# Get label instead of ID for 'enum' and 'set' fields
|
|
95
|
+
if field_value and field["field_type"] == "set": # Multiple choice
|
|
96
|
+
field_value = [
|
|
97
|
+
options_map.get(str(enum_id), enum_id) for enum_id in field_value
|
|
98
|
+
]
|
|
99
|
+
elif field_value and field["field_type"] == "enum":
|
|
100
|
+
field_value = options_map.get(str(field_value), field_value)
|
|
101
|
+
data_item[field_name] = field_value
|
|
102
|
+
return data
|