ingestr 0.13.37__tar.gz → 0.13.39__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- {ingestr-0.13.37 → ingestr-0.13.39}/PKG-INFO +2 -2
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/.vitepress/config.mjs +1 -0
- ingestr-0.13.39/docs/media/phantombuster.png +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/frankfurter.md +21 -17
- ingestr-0.13.39/docs/supported-sources/phantombuster.md +38 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/main.py +2 -0
- ingestr-0.13.39/ingestr/src/buildinfo.py +1 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/destinations.py +70 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/factory.py +4 -2
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/frankfurter/__init__.py +25 -12
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/frankfurter/helpers.py +16 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/google_analytics/helpers.py +12 -9
- ingestr-0.13.39/ingestr/src/phantombuster/__init__.py +65 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/phantombuster/client.py +33 -11
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/sources.py +58 -32
- {ingestr-0.13.37 → ingestr-0.13.39}/requirements.txt +1 -1
- ingestr-0.13.37/ingestr/src/buildinfo.py +0 -1
- ingestr-0.13.37/ingestr/src/phantombuster/__init__.py +0 -38
- {ingestr-0.13.37 → ingestr-0.13.39}/.dockerignore +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/.githooks/pre-commit-hook.sh +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/.github/workflows/deploy-docs.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/.github/workflows/release.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/.github/workflows/secrets-scan.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/.github/workflows/tests.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/.gitignore +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/.gitleaksignore +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/.python-version +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/.vale.ini +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/Dockerfile +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/LICENSE.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/Makefile +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/README.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/.vitepress/theme/custom.css +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/.vitepress/theme/index.js +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/commands/example-uris.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/commands/ingest.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/getting-started/core-concepts.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/getting-started/incremental-loading.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/getting-started/quickstart.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/getting-started/telemetry.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/index.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/media/applovin_max.png +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/media/athena.png +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/media/clickhouse_img.png +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/media/freshdesk_ingestion.png +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/media/github.png +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/media/google_analytics_realtime_report.png +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/media/googleanalytics.png +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/media/kinesis.bigquery.png +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/media/linkedin_ads.png +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/media/personio.png +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/media/personio_duckdb.png +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/media/pipedrive.png +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/media/stripe_postgres.png +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/media/tiktok.png +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/adjust.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/airtable.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/applovin.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/applovin_max.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/appsflyer.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/appstore.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/asana.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/athena.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/bigquery.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/chess.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/clickhouse.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/csv.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/custom_queries.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/databricks.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/db2.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/duckdb.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/dynamodb.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/facebook-ads.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/freshdesk.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/gcs.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/github.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/google-ads.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/google_analytics.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/gorgias.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/gsheets.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/hubspot.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/kafka.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/kinesis.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/klaviyo.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/linkedin_ads.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/mongodb.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/mssql.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/mysql.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/notion.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/oracle.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/personio.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/pipedrive.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/postgres.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/redshift.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/s3.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/salesforce.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/sap-hana.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/shopify.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/slack.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/snowflake.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/sqlite.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/stripe.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/tiktok-ads.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/supported-sources/zendesk.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/tutorials/load-kinesis-bigquery.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/tutorials/load-personio-duckdb.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/docs/tutorials/load-stripe-postgres.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/conftest.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/.gitignore +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/adjust/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/adjust/adjust_helpers.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/airtable/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/applovin/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/applovin_max/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/appsflyer/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/appsflyer/client.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/appstore/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/appstore/client.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/appstore/errors.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/appstore/models.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/appstore/resources.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/arrow/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/asana_source/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/asana_source/helpers.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/asana_source/settings.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/blob.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/chess/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/chess/helpers.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/chess/settings.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/collector/spinner.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/dynamodb/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/errors.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/facebook_ads/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/facebook_ads/exceptions.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/facebook_ads/helpers.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/facebook_ads/settings.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/filesystem/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/filesystem/helpers.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/filesystem/readers.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/filters.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/freshdesk/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/freshdesk/freshdesk_client.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/freshdesk/settings.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/github/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/github/helpers.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/github/queries.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/github/settings.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/google_ads/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/google_ads/field.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/google_ads/metrics.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/google_ads/predicates.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/google_ads/reports.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/google_analytics/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/google_sheets/README.md +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/google_sheets/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/gorgias/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/gorgias/helpers.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/hubspot/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/hubspot/helpers.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/hubspot/settings.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/kafka/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/kafka/helpers.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/kinesis/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/kinesis/helpers.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/klaviyo/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/klaviyo/client.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/klaviyo/helpers.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/linkedin_ads/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/linkedin_ads/dimension_time_enum.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/linkedin_ads/helpers.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/loader.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/mongodb/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/mongodb/helpers.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/notion/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/notion/helpers/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/notion/helpers/client.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/notion/helpers/database.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/notion/settings.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/partition.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/personio/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/personio/helpers.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/pipedrive/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/pipedrive/helpers/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/pipedrive/helpers/custom_fields_munger.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/pipedrive/helpers/pages.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/pipedrive/settings.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/pipedrive/typing.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/resource.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/salesforce/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/salesforce/helpers.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/shopify/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/shopify/exceptions.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/shopify/helpers.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/shopify/settings.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/slack/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/slack/helpers.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/slack/settings.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/sql_database/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/sql_database/callbacks.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/stripe_analytics/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/stripe_analytics/helpers.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/stripe_analytics/settings.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/table_definition.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/telemetry/event.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/tiktok_ads/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/tiktok_ads/tiktok_helpers.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/time.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/version.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/zendesk/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/zendesk/helpers/__init__.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/zendesk/helpers/credentials.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/src/zendesk/settings.py +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/testdata/.gitignore +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/testdata/create_replace.csv +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/testdata/delete_insert_expected.csv +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/testdata/delete_insert_part1.csv +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/testdata/delete_insert_part2.csv +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/testdata/merge_expected.csv +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/testdata/merge_part1.csv +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/ingestr/testdata/merge_part2.csv +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/package-lock.json +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/package.json +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/pyproject.toml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/requirements-dev.txt +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/requirements.in +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/requirements_arm64.txt +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/resources/demo.gif +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/resources/demo.tape +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/resources/ingestr.svg +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/AMPM.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/Acronyms.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/Colons.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/Contractions.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/DateFormat.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/Ellipses.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/EmDash.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/Exclamation.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/FirstPerson.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/Gender.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/GenderBias.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/HeadingPunctuation.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/Headings.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/Latin.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/LyHyphens.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/OptionalPlurals.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/Ordinal.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/OxfordComma.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/Parens.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/Passive.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/Periods.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/Quotes.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/Ranges.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/Semicolons.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/Slang.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/Spacing.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/Spelling.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/Units.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/We.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/Will.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/WordList.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/meta.json +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/Google/vocab.txt +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/bruin/Ingestr.yml +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/styles/config/vocabularies/bruin/accept.txt +0 -0
- {ingestr-0.13.37 → ingestr-0.13.39}/test.env.template +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.39
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -74,7 +74,7 @@ Requires-Dist: google-cloud-storage==3.1.0
|
|
|
74
74
|
Requires-Dist: google-crc32c==1.6.0
|
|
75
75
|
Requires-Dist: google-resumable-media==2.7.2
|
|
76
76
|
Requires-Dist: googleapis-common-protos==1.69.0
|
|
77
|
-
Requires-Dist: greenlet==3.2.
|
|
77
|
+
Requires-Dist: greenlet==3.2.2
|
|
78
78
|
Requires-Dist: grpcio-status==1.62.3
|
|
79
79
|
Requires-Dist: grpcio==1.70.0
|
|
80
80
|
Requires-Dist: hdbcli==2.23.27
|
|
@@ -131,6 +131,7 @@ export default defineConfig({
|
|
|
131
131
|
{ text: "LinkedIn Ads", link: "/supported-sources/linkedin_ads.md" },
|
|
132
132
|
{ text: "Notion", link: "/supported-sources/notion.md" },
|
|
133
133
|
{ text: "Personio", link: "/supported-sources/personio.md" },
|
|
134
|
+
{ text: "PhantomBuster", link: "/supported-sources/phantombuster.md" },
|
|
134
135
|
{ text: "Pipedrive", link: "/supported-sources/pipedrive.md" },
|
|
135
136
|
{ text: "S3", link: "/supported-sources/s3.md" },
|
|
136
137
|
{ text: "Salesforce", link: "/supported-sources/salesforce.md" },
|
|
Binary file
|
|
@@ -10,7 +10,7 @@ The `ingestr` command to use the `frankfurter` source is as follows:
|
|
|
10
10
|
|
|
11
11
|
```bash
|
|
12
12
|
ingestr ingest \
|
|
13
|
-
--source-uri 'frankfurter
|
|
13
|
+
--source-uri 'frankfurter://?base=IDR' \
|
|
14
14
|
--interval-start '2025-03-27' \ # Optional. See 'exchange_rates'.
|
|
15
15
|
--interval-end '2025-03-28' \ # Optional.
|
|
16
16
|
--source-table '<table_name>' \ # E.g 'currencies', 'latest', 'exchange_rates'. See below.
|
|
@@ -25,7 +25,9 @@ ingestr ingest \
|
|
|
25
25
|
### **`--source-uri`**
|
|
26
26
|
- **Description**: Specifies the source URI for the Frankfurter API.
|
|
27
27
|
- **Value**: `'frankfurter://'`
|
|
28
|
-
- **Purpose**: Indicates that the data will be fetched from the Frankfurter API.
|
|
28
|
+
- **Purpose**: Indicates that the data will be fetched from the Frankfurter API.
|
|
29
|
+
- An optional base currency can be added `?base={base_currency}`.
|
|
30
|
+
- If no base currency is included, base currency defaults to USD.
|
|
29
31
|
|
|
30
32
|
---
|
|
31
33
|
|
|
@@ -89,9 +91,10 @@ ingestr ingest \
|
|
|
89
91
|
- **Description**: Fetches the latest exchange rates.
|
|
90
92
|
- **Columns**:
|
|
91
93
|
- `date`: The date of the exchange rates.
|
|
92
|
-
- `
|
|
94
|
+
- `currency_code`: The ISO 4217 currency code (e.g., `USD`, `EUR`).
|
|
93
95
|
- `rate`: The exchange rate relative to the base currency.
|
|
94
|
-
-
|
|
96
|
+
- `base_currency`: The base currency used to calculate the exchange rate.
|
|
97
|
+
- **Primary Key**: Composite key of `date`, `currency_code` and `base_currency`.
|
|
95
98
|
- **Notes**:
|
|
96
99
|
- The base currency (e.g., `EUR`) is included with a rate of `1.0`.
|
|
97
100
|
|
|
@@ -101,9 +104,10 @@ ingestr ingest \
|
|
|
101
104
|
- **Description**: Fetches historical exchange rates for a specified date range.
|
|
102
105
|
- **Columns**:
|
|
103
106
|
- `date`: The date of the exchange rates.
|
|
104
|
-
- `
|
|
107
|
+
- `currency_code`: The ISO 4217 currency code (e.g., `USD`, `EUR`).
|
|
105
108
|
- `rate`: The exchange rate relative to the base currency.
|
|
106
|
-
-
|
|
109
|
+
- `base_currency`: The base currency used to calculate the exchange rate.
|
|
110
|
+
- **Primary Key**: Composite key of `date`, `currency_code` and `base_currency`.
|
|
107
111
|
- **Notes**:
|
|
108
112
|
- An optional start and end date can be added via the arguments `--interval-start` and optionally `--interval-end` to define the date range (see examples below). If no start date is specified, the date will default today's date (and thus return the latest exchange rates).
|
|
109
113
|
- If a start date but no end date is specified, then the end date will default to today's date and ingestr will retrieve data up until the latest published data.
|
|
@@ -114,24 +118,24 @@ Here `--interval-start` is set to a weekend date (e.g., `2025-03-29` -- a Saturd
|
|
|
114
118
|
|
|
115
119
|
`--interval-start` defaults to the previous Friday (`2025-03-28`) and the next data is from the following Monday (for simplicity, only a subset of currencies is shown below):
|
|
116
120
|
|
|
117
|
-
| **date** | **
|
|
118
|
-
|
|
119
|
-
| 2025-03-28 | EUR | 1.0 |
|
|
120
|
-
| 2025-03-28 | USD | 1.0783 |
|
|
121
|
-
| 2025-03-28 | GBP | 0.8571 |
|
|
122
|
-
| 2025-03-31 | EUR | 1.0 |
|
|
123
|
-
| 2025-03-31 | USD | 1.0783 |
|
|
124
|
-
| 2025-03-31 | GBP | 0.8571 |
|
|
121
|
+
| **date** | **currency_code** | **rate** | **base_currency** |
|
|
122
|
+
|--------------|-------------------|----------|-------------------|
|
|
123
|
+
| 2025-03-28 | EUR | 1.0 | EUR |
|
|
124
|
+
| 2025-03-28 | USD | 1.0783 | EUR |
|
|
125
|
+
| 2025-03-28 | GBP | 0.8571 | EUR |
|
|
126
|
+
| 2025-03-31 | EUR | 1.0 | EUR |
|
|
127
|
+
| 2025-03-31 | USD | 1.0783 | EUR |
|
|
128
|
+
| 2025-03-31 | GBP | 0.8571 | EUR |
|
|
125
129
|
|
|
126
130
|
|
|
127
131
|
---
|
|
128
132
|
|
|
129
133
|
## **Examples**
|
|
130
134
|
|
|
131
|
-
### **1. Fetch the Latest Exchange Rates**
|
|
135
|
+
### **1. Fetch the Latest Exchange Rates with GBP as Base Currency**
|
|
132
136
|
```bash
|
|
133
137
|
ingestr ingest \
|
|
134
|
-
--source-uri 'frankfurter
|
|
138
|
+
--source-uri 'frankfurter://?base=GBP' \
|
|
135
139
|
--source-table 'latest' \
|
|
136
140
|
--dest-uri 'duckdb.db' \
|
|
137
141
|
--dest-table 'schema.latest_new_scheme'
|
|
@@ -139,7 +143,7 @@ ingestr ingest \
|
|
|
139
143
|
|
|
140
144
|
---
|
|
141
145
|
|
|
142
|
-
### **2. Fetch Historical Exchange Rates**
|
|
146
|
+
### **2. Fetch Historical Exchange Rates with USD as Default Base Currency**
|
|
143
147
|
```bash
|
|
144
148
|
ingestr ingest \
|
|
145
149
|
--source-uri 'frankfurter://' \
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# PhantomBuster
|
|
2
|
+
[PhantomBuster](https://phantombuster.com/) is a cloud-based data automation and web scraping platform that allows users to extract data from websites, automate actions.
|
|
3
|
+
|
|
4
|
+
ingestr supports PhantomBuster as a source.
|
|
5
|
+
|
|
6
|
+
## URI format
|
|
7
|
+
|
|
8
|
+
The URI format for PhantomBuster is as follows:
|
|
9
|
+
|
|
10
|
+
```plaintext
|
|
11
|
+
PhantomBuster://?api_key=<api_key>
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
URI parameters:
|
|
15
|
+
- `api_key`: the API key used for authentication with the PhantomBuster API
|
|
16
|
+
|
|
17
|
+
## Setting up a PhantomBuster Integration
|
|
18
|
+
|
|
19
|
+
You can find your PhantomBuster API key by following the guide [here](https://hub.phantombuster.com/docs/api#how-to-find-my-api-key).
|
|
20
|
+
|
|
21
|
+
Let's say your `api_key` is key_123, here's a sample command that will copy the data from PhantomBuster into a DuckDB database:
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
ingestr ingest \
|
|
26
|
+
--source-uri 'PhantomBuster://?api_key=key_123' \
|
|
27
|
+
--source-table 'completed_phantoms:<agent_id>' \
|
|
28
|
+
--dest-uri duckdb:///PhantomBuster.duckdb \
|
|
29
|
+
--dest-table 'dest.result'
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
<img alt="PhantomBuster_img" src="../media/phantombuster.png"/>
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
For now, we only support `completed_phantoms` table followed by an `agent_id`. For example: `completed_phantoms:<agent_id>` Where agent id is a unique identifier for a specific Phantom which can be found in URI of a specific phantom.
|
|
37
|
+
|
|
38
|
+
Use this as `--source-table` parameter in the `ingestr ingest` command.
|
|
@@ -34,6 +34,7 @@ PARQUET_SUPPORTED_DESTINATIONS = [
|
|
|
34
34
|
"snowflake",
|
|
35
35
|
"databricks",
|
|
36
36
|
"synapse",
|
|
37
|
+
"s3",
|
|
37
38
|
]
|
|
38
39
|
|
|
39
40
|
# these sources would return a JSON for sure, which means they cannot be used with Parquet loader for BigQuery
|
|
@@ -485,6 +486,7 @@ def ingest(
|
|
|
485
486
|
print(
|
|
486
487
|
f"[bold yellow] Primary Key:[/bold yellow] {primary_key if primary_key else 'None'}"
|
|
487
488
|
)
|
|
489
|
+
print(f"[bold yellow] Pipeline ID:[/bold yellow] {m.hexdigest()}")
|
|
488
490
|
print()
|
|
489
491
|
|
|
490
492
|
if not yes:
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
version = "v0.13.39"
|
|
@@ -7,11 +7,13 @@ import tempfile
|
|
|
7
7
|
from urllib.parse import parse_qs, quote, urlparse
|
|
8
8
|
|
|
9
9
|
import dlt
|
|
10
|
+
import dlt.destinations.impl.filesystem.filesystem
|
|
10
11
|
from dlt.common.configuration.specs import AwsCredentials
|
|
11
12
|
from dlt.destinations.impl.clickhouse.configuration import (
|
|
12
13
|
ClickHouseCredentials,
|
|
13
14
|
)
|
|
14
15
|
|
|
16
|
+
from ingestr.src.errors import MissingValueError
|
|
15
17
|
from ingestr.src.loader import load_dlt_file
|
|
16
18
|
|
|
17
19
|
|
|
@@ -382,3 +384,71 @@ class ClickhouseDestination:
|
|
|
382
384
|
|
|
383
385
|
def post_load(self):
|
|
384
386
|
pass
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
class S3FSClient(dlt.destinations.impl.filesystem.filesystem.FilesystemClient):
|
|
390
|
+
@property
|
|
391
|
+
def dataset_path(self):
|
|
392
|
+
# override to remove dataset path
|
|
393
|
+
return self.bucket_path
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
class S3FS(dlt.destinations.filesystem):
|
|
397
|
+
@property
|
|
398
|
+
def client_class(self):
|
|
399
|
+
return S3FSClient
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
class S3Destination:
|
|
403
|
+
def dlt_dest(self, uri: str, **kwargs):
|
|
404
|
+
parsed_uri = urlparse(uri)
|
|
405
|
+
params = parse_qs(parsed_uri.query)
|
|
406
|
+
|
|
407
|
+
access_key_id = params.get("access_key_id", [None])[0]
|
|
408
|
+
if access_key_id is None:
|
|
409
|
+
raise MissingValueError("access_key_id", "S3")
|
|
410
|
+
|
|
411
|
+
secret_access_key = params.get("secret_access_key", [None])[0]
|
|
412
|
+
if secret_access_key is None:
|
|
413
|
+
raise MissingValueError("secret_access_key", "S3")
|
|
414
|
+
|
|
415
|
+
endpoint_url = params.get("endpoint_url", [None])[0]
|
|
416
|
+
|
|
417
|
+
creds = AwsCredentials(
|
|
418
|
+
aws_access_key_id=access_key_id,
|
|
419
|
+
aws_secret_access_key=secret_access_key,
|
|
420
|
+
endpoint_url=endpoint_url,
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
dest_table = self.validate_table(kwargs["dest_table"])
|
|
424
|
+
table_parts = dest_table.split("/")
|
|
425
|
+
base_path = "/".join(table_parts[:-1])
|
|
426
|
+
|
|
427
|
+
opts = {
|
|
428
|
+
"bucket_url": f"s3://{base_path}",
|
|
429
|
+
"credentials": creds,
|
|
430
|
+
# supresses dlt warnings about dataset name normalization.
|
|
431
|
+
# we don't use dataset names in S3 so it's fine to disable this.
|
|
432
|
+
"enable_dataset_name_normalization": False,
|
|
433
|
+
}
|
|
434
|
+
layout = params.get("layout", [None])[0]
|
|
435
|
+
if layout is not None:
|
|
436
|
+
opts["layout"] = layout
|
|
437
|
+
|
|
438
|
+
return S3FS(**opts) # type: ignore
|
|
439
|
+
|
|
440
|
+
def validate_table(self, table: str):
|
|
441
|
+
table = table.strip("/ ")
|
|
442
|
+
if len(table.split("/")) < 2:
|
|
443
|
+
raise ValueError("Table name must be in the format {bucket-name}/{path}")
|
|
444
|
+
return table
|
|
445
|
+
|
|
446
|
+
def dlt_run_params(self, uri: str, table: str, **kwargs):
|
|
447
|
+
table = self.validate_table(table)
|
|
448
|
+
table_parts = table.split("/")
|
|
449
|
+
return {
|
|
450
|
+
"table_name": table_parts[-1],
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
def post_load(self) -> None:
|
|
454
|
+
pass
|
|
@@ -13,6 +13,7 @@ from ingestr.src.destinations import (
|
|
|
13
13
|
MsSQLDestination,
|
|
14
14
|
PostgresDestination,
|
|
15
15
|
RedshiftDestination,
|
|
16
|
+
S3Destination,
|
|
16
17
|
SnowflakeDestination,
|
|
17
18
|
SynapseDestination,
|
|
18
19
|
)
|
|
@@ -29,6 +30,7 @@ from ingestr.src.sources import (
|
|
|
29
30
|
DynamoDBSource,
|
|
30
31
|
FacebookAdsSource,
|
|
31
32
|
FrankfurterSource,
|
|
33
|
+
FreshdeskSource,
|
|
32
34
|
GCSSource,
|
|
33
35
|
GitHubSource,
|
|
34
36
|
GoogleAdsSource,
|
|
@@ -44,6 +46,7 @@ from ingestr.src.sources import (
|
|
|
44
46
|
MongoDbSource,
|
|
45
47
|
NotionSource,
|
|
46
48
|
PersonioSource,
|
|
49
|
+
PhantombusterSource,
|
|
47
50
|
PipedriveSource,
|
|
48
51
|
S3Source,
|
|
49
52
|
SalesforceSource,
|
|
@@ -53,8 +56,6 @@ from ingestr.src.sources import (
|
|
|
53
56
|
StripeAnalyticsSource,
|
|
54
57
|
TikTokSource,
|
|
55
58
|
ZendeskSource,
|
|
56
|
-
FreshdeskSource,
|
|
57
|
-
PhantombusterSource,
|
|
58
59
|
)
|
|
59
60
|
|
|
60
61
|
SQL_SOURCE_SCHEMES = [
|
|
@@ -170,6 +171,7 @@ class SourceDestinationFactory:
|
|
|
170
171
|
"athena": AthenaDestination,
|
|
171
172
|
"clickhouse+native": ClickhouseDestination,
|
|
172
173
|
"clickhouse": ClickhouseDestination,
|
|
174
|
+
"s3": S3Destination,
|
|
173
175
|
}
|
|
174
176
|
|
|
175
177
|
def __init__(self, source_uri: str, destination_uri: str):
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any, Iterator
|
|
1
|
+
from typing import Any, Iterator, Optional
|
|
2
2
|
|
|
3
3
|
import dlt
|
|
4
4
|
from dlt.common.pendulum import pendulum
|
|
@@ -15,13 +15,13 @@ from ingestr.src.frankfurter.helpers import get_path_with_retry
|
|
|
15
15
|
def frankfurter_source(
|
|
16
16
|
start_date: TAnyDateTime,
|
|
17
17
|
end_date: TAnyDateTime,
|
|
18
|
+
base_currency: str,
|
|
18
19
|
) -> Any:
|
|
19
20
|
"""
|
|
20
21
|
A dlt source for the frankfurter.dev API. It groups several resources (in this case frankfurter.dev API endpoints) containing
|
|
21
22
|
various types of data: currencies, latest rates, historical rates.
|
|
22
23
|
"""
|
|
23
24
|
date_time = dlt.sources.incremental(
|
|
24
|
-
|
|
25
25
|
"date",
|
|
26
26
|
initial_value=start_date,
|
|
27
27
|
end_value=end_date,
|
|
@@ -31,9 +31,10 @@ def frankfurter_source(
|
|
|
31
31
|
|
|
32
32
|
return (
|
|
33
33
|
currencies(),
|
|
34
|
-
latest(),
|
|
35
|
-
exchange_rates(
|
|
36
|
-
|
|
34
|
+
latest(base_currency=base_currency),
|
|
35
|
+
exchange_rates(
|
|
36
|
+
start_date=date_time, end_date=end_date, base_currency=base_currency
|
|
37
|
+
),
|
|
37
38
|
)
|
|
38
39
|
|
|
39
40
|
|
|
@@ -61,29 +62,33 @@ def currencies() -> Iterator[dict]:
|
|
|
61
62
|
"date": {"data_type": "text"},
|
|
62
63
|
"currency_code": {"data_type": "text"},
|
|
63
64
|
"rate": {"data_type": "double"},
|
|
65
|
+
"base_currency": {"data_type": "text"},
|
|
64
66
|
},
|
|
65
|
-
primary_key=["date", "currency_code"],
|
|
67
|
+
primary_key=["date", "currency_code", "base_currency"],
|
|
66
68
|
)
|
|
67
|
-
def latest() -> Iterator[dict]:
|
|
69
|
+
def latest(base_currency: Optional[str] = "") -> Iterator[dict]:
|
|
68
70
|
"""
|
|
69
71
|
Fetches the latest exchange rates and yields them as rows.
|
|
70
72
|
"""
|
|
71
73
|
# Base URL
|
|
72
74
|
url = "latest?"
|
|
73
75
|
|
|
76
|
+
if base_currency:
|
|
77
|
+
url += f"base={base_currency}"
|
|
78
|
+
|
|
74
79
|
# Fetch data
|
|
75
80
|
data = get_path_with_retry(url)
|
|
76
81
|
|
|
77
82
|
# Extract rates and base currency
|
|
78
83
|
rates = data["rates"]
|
|
79
|
-
|
|
80
84
|
date = pendulum.parse(data["date"])
|
|
81
85
|
|
|
82
|
-
# Add the base currency
|
|
86
|
+
# Add the base currency with a rate of 1.0
|
|
83
87
|
yield {
|
|
84
88
|
"date": date,
|
|
85
|
-
"currency_code":
|
|
89
|
+
"currency_code": base_currency,
|
|
86
90
|
"rate": 1.0,
|
|
91
|
+
"base_currency": base_currency,
|
|
87
92
|
}
|
|
88
93
|
|
|
89
94
|
# Add all currencies and their rates
|
|
@@ -92,6 +97,7 @@ def latest() -> Iterator[dict]:
|
|
|
92
97
|
"date": date,
|
|
93
98
|
"currency_code": currency_code,
|
|
94
99
|
"rate": rate,
|
|
100
|
+
"base_currency": base_currency,
|
|
95
101
|
}
|
|
96
102
|
|
|
97
103
|
|
|
@@ -101,12 +107,14 @@ def latest() -> Iterator[dict]:
|
|
|
101
107
|
"date": {"data_type": "text"},
|
|
102
108
|
"currency_code": {"data_type": "text"},
|
|
103
109
|
"rate": {"data_type": "double"},
|
|
110
|
+
"base_currency": {"data_type": "text"},
|
|
104
111
|
},
|
|
105
|
-
primary_key=("date", "currency_code"),
|
|
112
|
+
primary_key=("date", "currency_code", "base_currency"),
|
|
106
113
|
)
|
|
107
114
|
def exchange_rates(
|
|
108
115
|
end_date: TAnyDateTime,
|
|
109
116
|
start_date: dlt.sources.incremental[TAnyDateTime] = dlt.sources.incremental("date"),
|
|
117
|
+
base_currency: Optional[str] = "",
|
|
110
118
|
) -> Iterator[dict]:
|
|
111
119
|
"""
|
|
112
120
|
Fetches exchange rates for a specified date range.
|
|
@@ -124,6 +132,9 @@ def exchange_rates(
|
|
|
124
132
|
# Compose the URL
|
|
125
133
|
url = f"{start_date_str}..{end_date_str}?"
|
|
126
134
|
|
|
135
|
+
if base_currency:
|
|
136
|
+
url += f"base={base_currency}"
|
|
137
|
+
|
|
127
138
|
# Fetch data from the API
|
|
128
139
|
data = get_path_with_retry(url)
|
|
129
140
|
|
|
@@ -137,8 +148,9 @@ def exchange_rates(
|
|
|
137
148
|
# Add the base currency with a rate of 1.0
|
|
138
149
|
yield {
|
|
139
150
|
"date": formatted_date,
|
|
140
|
-
"currency_code":
|
|
151
|
+
"currency_code": base_currency,
|
|
141
152
|
"rate": 1.0,
|
|
153
|
+
"base_currency": base_currency,
|
|
142
154
|
}
|
|
143
155
|
|
|
144
156
|
# Add all other currencies and their rates
|
|
@@ -147,4 +159,5 @@ def exchange_rates(
|
|
|
147
159
|
"date": formatted_date,
|
|
148
160
|
"currency_code": currency_code,
|
|
149
161
|
"rate": rate,
|
|
162
|
+
"base_currency": base_currency,
|
|
150
163
|
}
|
|
@@ -30,3 +30,19 @@ def validate_dates(start_date: datetime, end_date: datetime) -> None:
|
|
|
30
30
|
# Check if start_date is before end_date
|
|
31
31
|
if start_date > end_date:
|
|
32
32
|
raise ValueError("Interval-end cannot be before interval-start.")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def validate_currency(currency_code: str) -> bool:
|
|
36
|
+
url = "https://api.frankfurter.dev/v1/currencies"
|
|
37
|
+
|
|
38
|
+
response = requests.get(url, timeout=5)
|
|
39
|
+
currencies = response.json()
|
|
40
|
+
|
|
41
|
+
if currency_code.upper() in currencies:
|
|
42
|
+
return True
|
|
43
|
+
else:
|
|
44
|
+
supported_currencies = list(currencies.keys())
|
|
45
|
+
print(
|
|
46
|
+
f"Invalid base currency '{currency_code}'. Supported currencies are: {supported_currencies}"
|
|
47
|
+
)
|
|
48
|
+
return False
|
|
@@ -149,7 +149,7 @@ def get_report(
|
|
|
149
149
|
|
|
150
150
|
# process request
|
|
151
151
|
processed_response_generator = process_report(response=response)
|
|
152
|
-
|
|
152
|
+
|
|
153
153
|
# import pdb; pdb.set_trace()
|
|
154
154
|
yield from processed_response_generator
|
|
155
155
|
offset += per_page
|
|
@@ -225,7 +225,9 @@ def _resolve_dimension_value(dimension_name: str, dimension_value: str) -> Any:
|
|
|
225
225
|
return dimension_value
|
|
226
226
|
|
|
227
227
|
|
|
228
|
-
def convert_minutes_ranges_to_minute_range_objects(
|
|
228
|
+
def convert_minutes_ranges_to_minute_range_objects(
|
|
229
|
+
minutes_ranges: str,
|
|
230
|
+
) -> List[MinuteRange]:
|
|
229
231
|
minutes_ranges = minutes_ranges.strip()
|
|
230
232
|
minutes = minutes_ranges.replace(" ", "").split(",")
|
|
231
233
|
if minutes == "":
|
|
@@ -233,7 +235,6 @@ def convert_minutes_ranges_to_minute_range_objects(minutes_ranges: str) -> List[
|
|
|
233
235
|
"Invalid input. Minutes range should be startminute-endminute format. For example: 1-2,5-6"
|
|
234
236
|
)
|
|
235
237
|
|
|
236
|
-
|
|
237
238
|
minute_range_objects = []
|
|
238
239
|
for min_range in minutes:
|
|
239
240
|
if "-" not in min_range:
|
|
@@ -246,14 +247,16 @@ def convert_minutes_ranges_to_minute_range_objects(minutes_ranges: str) -> List[
|
|
|
246
247
|
raise ValueError(
|
|
247
248
|
f"Invalid input '{min_range}'. Both start and end minutes must be digits. For example: 1-2,5-6"
|
|
248
249
|
)
|
|
249
|
-
|
|
250
|
+
|
|
250
251
|
end_minutes_ago = int(parts[0])
|
|
251
252
|
start_minutes_ago = int(parts[1])
|
|
252
|
-
minute_range_objects.append(
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
253
|
+
minute_range_objects.append(
|
|
254
|
+
MinuteRange(
|
|
255
|
+
name=f"{end_minutes_ago}-{start_minutes_ago} minutes ago",
|
|
256
|
+
start_minutes_ago=start_minutes_ago,
|
|
257
|
+
end_minutes_ago=end_minutes_ago,
|
|
258
|
+
)
|
|
259
|
+
)
|
|
257
260
|
|
|
258
261
|
return minute_range_objects
|
|
259
262
|
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from typing import Iterable, Optional
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
import pendulum
|
|
5
|
+
import requests
|
|
6
|
+
from dlt.common.typing import TAnyDateTime, TDataItem
|
|
7
|
+
from dlt.sources import DltResource
|
|
8
|
+
from dlt.sources.helpers.requests import Client
|
|
9
|
+
|
|
10
|
+
from ingestr.src.phantombuster.client import PhantombusterClient
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def retry_on_limit(
|
|
14
|
+
response: Optional[requests.Response], exception: Optional[BaseException]
|
|
15
|
+
) -> bool:
|
|
16
|
+
if response is not None and response.status_code == 429:
|
|
17
|
+
return True
|
|
18
|
+
return False
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def create_client() -> requests.Session:
|
|
22
|
+
return Client(
|
|
23
|
+
raise_for_status=False,
|
|
24
|
+
retry_condition=retry_on_limit,
|
|
25
|
+
request_max_attempts=12,
|
|
26
|
+
request_backoff_factor=2,
|
|
27
|
+
).session
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dlt.source(max_table_nesting=0)
|
|
31
|
+
def phantombuster_source(
|
|
32
|
+
api_key: str, agent_id: str, start_date: TAnyDateTime, end_date: TAnyDateTime | None
|
|
33
|
+
) -> Iterable[DltResource]:
|
|
34
|
+
client = PhantombusterClient(api_key)
|
|
35
|
+
|
|
36
|
+
@dlt.resource(
|
|
37
|
+
write_disposition="merge",
|
|
38
|
+
primary_key="container_id",
|
|
39
|
+
columns={
|
|
40
|
+
"partition_dt": {"data_type": "date", "partition": True},
|
|
41
|
+
},
|
|
42
|
+
)
|
|
43
|
+
def completed_phantoms(
|
|
44
|
+
dateTime=(
|
|
45
|
+
dlt.sources.incremental(
|
|
46
|
+
"ended_at",
|
|
47
|
+
initial_value=start_date,
|
|
48
|
+
end_value=end_date,
|
|
49
|
+
range_start="closed",
|
|
50
|
+
range_end="closed",
|
|
51
|
+
)
|
|
52
|
+
),
|
|
53
|
+
) -> Iterable[TDataItem]:
|
|
54
|
+
if dateTime.end_value is None:
|
|
55
|
+
end_dt = pendulum.now(tz="UTC")
|
|
56
|
+
else:
|
|
57
|
+
end_dt = dateTime.end_value
|
|
58
|
+
|
|
59
|
+
start_dt = dateTime.last_value
|
|
60
|
+
|
|
61
|
+
yield client.fetch_containers_result(
|
|
62
|
+
create_client(), agent_id, start_date=start_dt, end_date=end_dt
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
return completed_phantoms
|
|
@@ -14,12 +14,22 @@ class PhantombusterClient:
|
|
|
14
14
|
"accept": "application/json",
|
|
15
15
|
}
|
|
16
16
|
|
|
17
|
-
def fetch_containers_result(
|
|
17
|
+
def fetch_containers_result(
|
|
18
|
+
self,
|
|
19
|
+
session: requests.Session,
|
|
20
|
+
agent_id: str,
|
|
21
|
+
start_date: pendulum.DateTime,
|
|
22
|
+
end_date: pendulum.DateTime,
|
|
23
|
+
):
|
|
18
24
|
url = "https://api.phantombuster.com/api/v2/containers/fetch-all/"
|
|
19
25
|
before_ended_at = None
|
|
20
26
|
limit = 100
|
|
21
|
-
|
|
27
|
+
|
|
28
|
+
started_at = start_date.int_timestamp * 1000 + int(
|
|
29
|
+
start_date.microsecond / 1000
|
|
30
|
+
)
|
|
22
31
|
ended_at = end_date.int_timestamp * 1000 + int(end_date.microsecond / 1000)
|
|
32
|
+
|
|
23
33
|
while True:
|
|
24
34
|
params: dict[str, Union[str, int, float, bytes, None]] = {
|
|
25
35
|
"agentId": agent_id,
|
|
@@ -36,23 +46,35 @@ class PhantombusterClient:
|
|
|
36
46
|
|
|
37
47
|
for container in containers:
|
|
38
48
|
container_ended_at = container.get("endedAt")
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
49
|
+
|
|
50
|
+
if before_ended_at is None or before_ended_at > container_ended_at:
|
|
51
|
+
before_ended_at = container_ended_at
|
|
52
|
+
|
|
53
|
+
if container_ended_at < started_at or container_ended_at > ended_at:
|
|
43
54
|
continue
|
|
55
|
+
|
|
44
56
|
try:
|
|
45
57
|
result = self.fetch_result_object(session, container["id"])
|
|
46
|
-
partition_dt = pendulum.from_timestamp(
|
|
47
|
-
|
|
58
|
+
partition_dt = pendulum.from_timestamp(
|
|
59
|
+
container_ended_at / 1000, tz="UTC"
|
|
60
|
+
).date()
|
|
61
|
+
container_ended_at_datetime = pendulum.from_timestamp(
|
|
62
|
+
container_ended_at / 1000, tz="UTC"
|
|
63
|
+
)
|
|
64
|
+
row = {
|
|
65
|
+
"container_id": container["id"],
|
|
66
|
+
"container": container,
|
|
67
|
+
"result": result,
|
|
68
|
+
"partition_dt": partition_dt,
|
|
69
|
+
"ended_at": container_ended_at_datetime,
|
|
70
|
+
}
|
|
48
71
|
yield row
|
|
49
|
-
|
|
72
|
+
|
|
50
73
|
except requests.RequestException as e:
|
|
51
74
|
print(f"Error fetching result for container {container['id']}: {e}")
|
|
52
|
-
|
|
75
|
+
|
|
53
76
|
if data["maxLimitReached"] is False:
|
|
54
77
|
break
|
|
55
|
-
|
|
56
78
|
|
|
57
79
|
def fetch_result_object(self, session: requests.Session, container_id: str):
|
|
58
80
|
result_url = (
|