ingestr 0.13.87__tar.gz → 0.13.88__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- {ingestr-0.13.87 → ingestr-0.13.88}/PKG-INFO +1 -1
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/.vitepress/config.mjs +1 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/commands/ingest.md +21 -0
- ingestr-0.13.88/docs/getting-started/data-masking.md +377 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/main.py +12 -0
- ingestr-0.13.88/ingestr/src/buildinfo.py +1 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/filters.py +9 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/frankfurter/__init__.py +10 -14
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/frankfurter/helpers.py +2 -2
- ingestr-0.13.88/ingestr/src/masking.py +344 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/mongodb/helpers.py +11 -7
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/revenuecat/__init__.py +4 -4
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/revenuecat/helpers.py +4 -4
- ingestr-0.13.87/ingestr/src/buildinfo.py +0 -1
- {ingestr-0.13.87 → ingestr-0.13.88}/.dlt/config.toml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/.dockerignore +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/.githooks/pre-commit-hook.sh +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/.github/workflows/deploy-docs.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/.github/workflows/release.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/.github/workflows/secrets-scan.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/.github/workflows/tests.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/.gitignore +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/.gitleaksignore +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/.python-version +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/.vale.ini +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/Dockerfile +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/LICENSE.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/Makefile +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/README.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/.vitepress/theme/custom.css +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/.vitepress/theme/index.js +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/commands/example-uris.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/getting-started/core-concepts.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/getting-started/incremental-loading.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/getting-started/quickstart.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/getting-started/telemetry.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/index.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/applovin_max.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/athena.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/clickhouse_img.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/clickup_ingestion.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/cratedb-destination.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/cratedb-source.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/freshdesk_ingestion.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/gcp_spanner_ingestion.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/github.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/google_analytics_realtime_report.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/googleanalytics.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/ingestion_elasticsearch_img.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/kinesis.bigquery.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/linear.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/linkedin_ads.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/mixpanel_ingestion.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/personio.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/personio_duckdb.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/phantombuster.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/pipedrive.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/quickbook_ingestion.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/sftp.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/stripe_postgres.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/tiktok.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/wise_ingestion.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/media/zoom_ingestion.png +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/adjust.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/airtable.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/applovin.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/applovin_max.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/appsflyer.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/appstore.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/asana.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/athena.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/attio.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/bigquery.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/chess.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/clickhouse.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/clickup.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/cratedb.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/csv.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/custom_queries.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/databricks.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/db2.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/duckdb.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/dynamodb.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/elasticsearch.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/facebook-ads.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/fluxx.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/frankfurter.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/freshdesk.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/gcs.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/github.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/google-ads.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/google_analytics.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/gorgias.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/gsheets.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/hubspot.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/influxdb.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/isoc-pulse.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/kafka.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/kinesis.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/klaviyo.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/linear.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/linkedin_ads.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/mixpanel.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/mongodb.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/motherduck.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/mssql.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/mysql.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/notion.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/oracle.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/personio.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/phantombuster.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/pinterest.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/pipedrive.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/postgres.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/quickbooks.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/redshift.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/revenuecat.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/s3.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/salesforce.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/sap-hana.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/sftp.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/shopify.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/slack.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/smartsheets.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/snowflake.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/solidgate.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/spanner.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/sqlite.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/stripe.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/tiktok-ads.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/trustpilot.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/wise.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/zendesk.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/supported-sources/zoom.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/tutorials/load-kinesis-bigquery.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/tutorials/load-personio-duckdb.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/docs/tutorials/load-stripe-postgres.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/conftest.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/.gitignore +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/adjust/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/adjust/adjust_helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/airtable/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/applovin/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/applovin_max/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/appsflyer/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/appsflyer/client.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/appstore/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/appstore/client.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/appstore/errors.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/appstore/models.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/appstore/resources.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/arrow/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/asana_source/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/asana_source/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/asana_source/settings.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/attio/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/attio/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/blob.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/chess/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/chess/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/chess/settings.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/clickup/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/clickup/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/collector/spinner.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/destinations.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/dynamodb/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/elasticsearch/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/errors.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/facebook_ads/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/facebook_ads/exceptions.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/facebook_ads/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/facebook_ads/settings.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/facebook_ads/utils.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/factory.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/filesystem/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/filesystem/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/filesystem/readers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/fluxx/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/fluxx/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/freshdesk/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/freshdesk/freshdesk_client.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/freshdesk/settings.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/github/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/github/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/github/queries.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/github/settings.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/google_ads/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/google_ads/field.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/google_ads/metrics.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/google_ads/predicates.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/google_ads/reports.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/google_analytics/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/google_analytics/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/google_sheets/README.md +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/google_sheets/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/gorgias/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/gorgias/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/http_client.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/hubspot/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/hubspot/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/hubspot/settings.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/influxdb/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/influxdb/client.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/isoc_pulse/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/kafka/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/kafka/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/kinesis/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/kinesis/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/klaviyo/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/klaviyo/client.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/klaviyo/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/linear/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/linear/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/linkedin_ads/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/linkedin_ads/dimension_time_enum.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/linkedin_ads/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/loader.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/mixpanel/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/mixpanel/client.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/mongodb/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/notion/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/notion/helpers/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/notion/helpers/client.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/notion/helpers/database.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/notion/settings.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/partition.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/personio/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/personio/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/phantombuster/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/phantombuster/client.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/pinterest/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/pipedrive/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/pipedrive/helpers/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/pipedrive/helpers/custom_fields_munger.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/pipedrive/helpers/pages.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/pipedrive/settings.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/pipedrive/typing.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/quickbooks/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/resource.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/salesforce/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/salesforce/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/shopify/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/shopify/exceptions.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/shopify/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/shopify/settings.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/slack/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/slack/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/slack/settings.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/smartsheets/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/solidgate/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/solidgate/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/sources.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/sql_database/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/sql_database/callbacks.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/stripe_analytics/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/stripe_analytics/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/stripe_analytics/settings.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/table_definition.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/telemetry/event.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/tiktok_ads/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/tiktok_ads/tiktok_helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/time.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/trustpilot/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/trustpilot/client.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/version.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/wise/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/wise/client.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/zendesk/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/zendesk/helpers/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/zendesk/helpers/credentials.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/zendesk/settings.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/zoom/__init__.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/src/zoom/helpers.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/testdata/.gitignore +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/testdata/create_replace.csv +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/testdata/delete_insert_expected.csv +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/testdata/delete_insert_part1.csv +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/testdata/delete_insert_part2.csv +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/testdata/merge_expected.csv +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/testdata/merge_part1.csv +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/testdata/merge_part2.csv +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/ingestr/tests/unit/test_smartsheets.py +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/package-lock.json +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/package.json +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/pyproject.toml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/requirements-dev.txt +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/requirements.in +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/requirements.txt +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/requirements_arm64.txt +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/resources/demo.gif +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/resources/demo.tape +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/resources/ingestr.svg +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/AMPM.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/Acronyms.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/Colons.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/Contractions.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/DateFormat.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/Ellipses.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/EmDash.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/Exclamation.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/FirstPerson.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/Gender.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/GenderBias.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/HeadingPunctuation.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/Headings.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/Latin.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/LyHyphens.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/OptionalPlurals.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/Ordinal.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/OxfordComma.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/Parens.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/Passive.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/Periods.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/Quotes.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/Ranges.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/Semicolons.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/Slang.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/Spacing.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/Spelling.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/Units.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/We.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/Will.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/WordList.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/meta.json +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/Google/vocab.txt +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/bruin/Ingestr.yml +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/styles/config/vocabularies/bruin/accept.txt +0 -0
- {ingestr-0.13.87 → ingestr-0.13.88}/test.env.template +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.88
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -43,6 +43,7 @@ export default defineConfig({
|
|
|
43
43
|
text: "Incremental Loading",
|
|
44
44
|
link: "/getting-started/incremental-loading.md",
|
|
45
45
|
},
|
|
46
|
+
{ text: "Data Masking", link: "/getting-started/data-masking.md" },
|
|
46
47
|
{ text: "Telemetry", link: "/getting-started/telemetry.md" },
|
|
47
48
|
],
|
|
48
49
|
},
|
|
@@ -28,6 +28,7 @@ ingestr ingest \
|
|
|
28
28
|
- `--interval-end`: Sets the end of the interval for the incremental key. Defaults to `None`.
|
|
29
29
|
- `--primary-key TEXT`: Specifies the primary key for the merge operation. Defaults to `None`.
|
|
30
30
|
- `--columns <column_name>:<column_type>`: Specifies the columns to be ingested. Defaults to `None`.
|
|
31
|
+
- `--mask <column_name>:<algorithm>[:param]`: Applies data masking to specified columns. Can be used multiple times for different columns. See the [Data Masking](../getting-started/data-masking.md) documentation for available algorithms and usage examples. Defaults to `None`.
|
|
31
32
|
|
|
32
33
|
The `interval-start` and `interval-end` options support various datetime formats, here are some examples:
|
|
33
34
|
- `%Y-%m-%d`: `2023-01-31`
|
|
@@ -106,5 +107,25 @@ ingestr ingest
|
|
|
106
107
|
--columns 'dt:date'
|
|
107
108
|
```
|
|
108
109
|
|
|
110
|
+
### Ingesting with Data Masking
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
ingestr ingest \
|
|
114
|
+
--source-uri 'postgresql://user:pass@localhost/customers' \
|
|
115
|
+
--source-table 'customer_data' \
|
|
116
|
+
--dest-uri 'duckdb:///masked_customers.db' \
|
|
117
|
+
--dest-table 'masked_customers' \
|
|
118
|
+
--mask 'email:hash' \
|
|
119
|
+
--mask 'phone:partial:3' \
|
|
120
|
+
--mask 'ssn:redact' \
|
|
121
|
+
--mask 'salary:round:5000'
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
This example demonstrates masking sensitive customer data:
|
|
125
|
+
- Email addresses are hashed for consistent anonymization
|
|
126
|
+
- Phone numbers show only first and last 3 digits
|
|
127
|
+
- SSNs are completely redacted
|
|
128
|
+
- Salaries are rounded to nearest $5000
|
|
129
|
+
|
|
109
130
|
> [!INFO]
|
|
110
131
|
> For more examples, please refer to the specific platforms' documentation on the sidebar.
|
|
@@ -0,0 +1,377 @@
|
|
|
1
|
+
# Data Masking
|
|
2
|
+
|
|
3
|
+
Data masking is a critical security feature that allows you to protect sensitive information while maintaining data utility for development, testing, and analytics purposes. ingestr provides comprehensive masking capabilities that can be applied to any column during the ingestion process.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
Data masking transforms sensitive data into a protected format while preserving the structure and type of the original data. This is essential for:
|
|
8
|
+
|
|
9
|
+
- **Compliance** with regulations like GDPR, CCPA, HIPAA
|
|
10
|
+
- **Security** in development and testing environments
|
|
11
|
+
- **Privacy** protection in analytics and reporting
|
|
12
|
+
- **Data sharing** with third parties or external systems
|
|
13
|
+
|
|
14
|
+
## Usage
|
|
15
|
+
|
|
16
|
+
Apply masking to specific columns using the `--mask` parameter:
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
ingestr ingest \
|
|
20
|
+
--source-uri "postgres://user:pass@localhost/db" \
|
|
21
|
+
--source-table "users" \
|
|
22
|
+
--dest-uri "duckdb:///masked_data.db" \
|
|
23
|
+
--dest-table "masked_users" \
|
|
24
|
+
--mask "email:hash" \
|
|
25
|
+
--mask "ssn:partial:4" \
|
|
26
|
+
--mask "salary:round:1000"
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### Format
|
|
30
|
+
|
|
31
|
+
```
|
|
32
|
+
--mask <column_name>:<algorithm>[:<parameter>]
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
- `column_name`: The name of the column to mask
|
|
36
|
+
- `algorithm`: The masking algorithm to apply
|
|
37
|
+
- `parameter`: Optional parameter for algorithms that require configuration
|
|
38
|
+
|
|
39
|
+
## Masking Algorithms
|
|
40
|
+
|
|
41
|
+
### Irreversible Masking
|
|
42
|
+
|
|
43
|
+
These algorithms permanently transform data in a way that cannot be reversed.
|
|
44
|
+
|
|
45
|
+
#### `hash` / `sha256`
|
|
46
|
+
Creates a SHA-256 hash of the value. Consistent across runs - the same input always produces the same output.
|
|
47
|
+
|
|
48
|
+
**Use cases:** Creating anonymous identifiers, consistent tokenization
|
|
49
|
+
```bash
|
|
50
|
+
--mask "user_id:hash"
|
|
51
|
+
# john.doe@example.com → a94a8fe5ccb19ba61c4c0873d391e987982fbbd3
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
#### `md5`
|
|
55
|
+
Creates an MD5 hash. Faster than SHA-256 but less secure (adequate for non-security purposes).
|
|
56
|
+
|
|
57
|
+
**Use cases:** Quick checksums, non-security tokenization
|
|
58
|
+
```bash
|
|
59
|
+
--mask "session_id:md5"
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
#### `hmac`
|
|
63
|
+
Hash-based message authentication code with a secret key. Provides consistent hashing across systems when using the same key.
|
|
64
|
+
|
|
65
|
+
**Use cases:** Cross-system consistency with shared secret
|
|
66
|
+
```bash
|
|
67
|
+
--mask "customer_id:hmac:my-secret-key"
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
#### `redact`
|
|
71
|
+
Replaces the entire value with "REDACTED".
|
|
72
|
+
|
|
73
|
+
**Use cases:** Complete removal of sensitive data
|
|
74
|
+
```bash
|
|
75
|
+
--mask "comments:redact"
|
|
76
|
+
# "Customer complaint about..." → "REDACTED"
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Format-Preserving Masking
|
|
80
|
+
|
|
81
|
+
These algorithms maintain the format and structure of the original data.
|
|
82
|
+
|
|
83
|
+
#### `email`
|
|
84
|
+
Masks email addresses while preserving the domain.
|
|
85
|
+
|
|
86
|
+
**Use cases:** Protecting email addresses while maintaining domain analysis
|
|
87
|
+
```bash
|
|
88
|
+
--mask "email:email"
|
|
89
|
+
# john.doe@example.com → j******e@example.com
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
#### `phone`
|
|
93
|
+
Masks phone numbers while preserving country and area codes.
|
|
94
|
+
|
|
95
|
+
**Use cases:** Geographic analysis without exposing full numbers
|
|
96
|
+
```bash
|
|
97
|
+
--mask "phone:phone"
|
|
98
|
+
# +1-555-123-4567 → +1-555-***-****
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
#### `credit_card`
|
|
102
|
+
Shows only the last 4 digits of credit card numbers.
|
|
103
|
+
|
|
104
|
+
**Use cases:** Payment processing logs, transaction records
|
|
105
|
+
```bash
|
|
106
|
+
--mask "card_number:credit_card"
|
|
107
|
+
# 4111-1111-1111-1111 → ****-****-****-1111
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
#### `ssn`
|
|
111
|
+
Masks Social Security Numbers showing only last 4 digits.
|
|
112
|
+
|
|
113
|
+
**Use cases:** Identity verification systems
|
|
114
|
+
```bash
|
|
115
|
+
--mask "ssn:ssn"
|
|
116
|
+
# 123-45-6789 → ***-**-6789
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Partial Masking
|
|
120
|
+
|
|
121
|
+
These algorithms show only portions of the original data.
|
|
122
|
+
|
|
123
|
+
#### `partial`
|
|
124
|
+
Shows first and last N characters, masking the middle.
|
|
125
|
+
|
|
126
|
+
**Use cases:** Names, addresses, partial visibility
|
|
127
|
+
```bash
|
|
128
|
+
--mask "name:partial:2"
|
|
129
|
+
# "Jonathan" → "Jo****an"
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
#### `first_letter`
|
|
133
|
+
Shows only the first character.
|
|
134
|
+
|
|
135
|
+
**Use cases:** Initials, abbreviated names
|
|
136
|
+
```bash
|
|
137
|
+
--mask "first_name:first_letter"
|
|
138
|
+
# "Alice" → "A****"
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
#### `stars`
|
|
142
|
+
Replaces entire value with asterisks of the same length.
|
|
143
|
+
|
|
144
|
+
**Use cases:** Password fields, complete obfuscation
|
|
145
|
+
```bash
|
|
146
|
+
--mask "password:stars"
|
|
147
|
+
# "secret123" → "*********"
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
#### `fixed`
|
|
151
|
+
Replaces with a fixed value.
|
|
152
|
+
|
|
153
|
+
**Use cases:** Standardized replacement values
|
|
154
|
+
```bash
|
|
155
|
+
--mask "api_key:fixed:MASKED_KEY"
|
|
156
|
+
# "sk_live_abc123" → "MASKED_KEY"
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### Tokenization
|
|
160
|
+
|
|
161
|
+
These algorithms replace values with tokens or identifiers.
|
|
162
|
+
|
|
163
|
+
#### `uuid`
|
|
164
|
+
Replaces with a UUID token. Same values get the same UUID (consistent).
|
|
165
|
+
|
|
166
|
+
**Use cases:** Creating surrogate keys, maintaining referential integrity
|
|
167
|
+
```bash
|
|
168
|
+
--mask "customer_id:uuid"
|
|
169
|
+
# "CUST001" → "550e8400-e29b-41d4-a716-446655440000"
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
#### `sequential`
|
|
173
|
+
Replaces with sequential integers starting from 1.
|
|
174
|
+
|
|
175
|
+
**Use cases:** Simple anonymization, reducing data size
|
|
176
|
+
```bash
|
|
177
|
+
--mask "account_number:sequential"
|
|
178
|
+
# "ACC-2024-001" → 1
|
|
179
|
+
# "ACC-2024-002" → 2
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
#### `random`
|
|
183
|
+
Replaces with random data of the same type.
|
|
184
|
+
|
|
185
|
+
**Use cases:** Test data generation, complete randomization
|
|
186
|
+
```bash
|
|
187
|
+
--mask "age:random"
|
|
188
|
+
# 35 → 67 (random number)
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
### Numeric Masking
|
|
192
|
+
|
|
193
|
+
These algorithms transform numeric values while preserving their general magnitude.
|
|
194
|
+
|
|
195
|
+
#### `round`
|
|
196
|
+
Rounds numbers to the nearest specified value.
|
|
197
|
+
|
|
198
|
+
**Use cases:** Salary bands, age groups, reducing precision
|
|
199
|
+
```bash
|
|
200
|
+
--mask "salary:round:5000"
|
|
201
|
+
# 52300 → 50000
|
|
202
|
+
|
|
203
|
+
--mask "age:round:10"
|
|
204
|
+
# 34 → 30
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
#### `range`
|
|
208
|
+
Replaces with a range bracket.
|
|
209
|
+
|
|
210
|
+
**Use cases:** Bucketing, categorical analysis
|
|
211
|
+
```bash
|
|
212
|
+
--mask "income:range:10000"
|
|
213
|
+
# 45000 → "40000-50000"
|
|
214
|
+
|
|
215
|
+
--mask "score:range:100"
|
|
216
|
+
# 234 → "200-300"
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
#### `noise`
|
|
220
|
+
Adds random noise to numeric values.
|
|
221
|
+
|
|
222
|
+
**Use cases:** Statistical privacy, differential privacy
|
|
223
|
+
```bash
|
|
224
|
+
--mask "revenue:noise:0.1"
|
|
225
|
+
# 100000 → 91234 (±10% random noise)
|
|
226
|
+
|
|
227
|
+
--mask "temperature:noise:0.05"
|
|
228
|
+
# 98.6 → 97.2 (±5% random noise)
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
### Date Masking
|
|
232
|
+
|
|
233
|
+
These algorithms transform date and datetime values.
|
|
234
|
+
|
|
235
|
+
#### `date_shift`
|
|
236
|
+
Adds or subtracts random days within a specified range.
|
|
237
|
+
|
|
238
|
+
**Use cases:** Preserving date relationships while obscuring exact dates
|
|
239
|
+
```bash
|
|
240
|
+
--mask "birth_date:date_shift:30"
|
|
241
|
+
# 1990-05-15 → 1990-06-02 (shifted ±30 days randomly)
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
#### `year_only`
|
|
245
|
+
Keeps only the year portion of dates.
|
|
246
|
+
|
|
247
|
+
**Use cases:** Age analysis, cohort studies
|
|
248
|
+
```bash
|
|
249
|
+
--mask "registration_date:year_only"
|
|
250
|
+
# 2024-03-15 → 2024
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
#### `month_year`
|
|
254
|
+
Keeps only month and year.
|
|
255
|
+
|
|
256
|
+
**Use cases:** Seasonal analysis, monthly aggregations
|
|
257
|
+
```bash
|
|
258
|
+
--mask "purchase_date:month_year"
|
|
259
|
+
# 2024-03-15 → "2024-03"
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
## Use Case Examples
|
|
263
|
+
|
|
264
|
+
### GDPR Compliance for Development Environment
|
|
265
|
+
|
|
266
|
+
```bash
|
|
267
|
+
ingestr ingest \
|
|
268
|
+
--source-uri "postgres://prod_user:pass@prod.db/customers" \
|
|
269
|
+
--source-table "customer_data" \
|
|
270
|
+
--dest-uri "postgres://dev_user:pass@dev.db/customers" \
|
|
271
|
+
--dest-table "customer_data" \
|
|
272
|
+
--mask "email:hash" \
|
|
273
|
+
--mask "phone:phone" \
|
|
274
|
+
--mask "name:partial:1" \
|
|
275
|
+
--mask "address:redact" \
|
|
276
|
+
--mask "ip_address:hash" \
|
|
277
|
+
--mask "birth_date:year_only"
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
### Healthcare Data for Analytics
|
|
281
|
+
|
|
282
|
+
```bash
|
|
283
|
+
ingestr ingest \
|
|
284
|
+
--source-uri "mysql://user:pass@hospital.db/patients" \
|
|
285
|
+
--source-table "patient_records" \
|
|
286
|
+
--dest-uri "bigquery://project/dataset" \
|
|
287
|
+
--dest-table "patient_analytics" \
|
|
288
|
+
--mask "patient_id:uuid" \
|
|
289
|
+
--mask "ssn:redact" \
|
|
290
|
+
--mask "diagnosis_notes:redact" \
|
|
291
|
+
--mask "admission_date:date_shift:7" \
|
|
292
|
+
--mask "age:round:5"
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
### Financial Data for Testing
|
|
296
|
+
|
|
297
|
+
```bash
|
|
298
|
+
ingestr ingest \
|
|
299
|
+
--source-uri "snowflake://account/database/schema" \
|
|
300
|
+
--source-table "transactions" \
|
|
301
|
+
--dest-uri "duckdb:///test_data.db" \
|
|
302
|
+
--dest-table "test_transactions" \
|
|
303
|
+
--mask "account_number:sequential" \
|
|
304
|
+
--mask "card_number:credit_card" \
|
|
305
|
+
--mask "amount:noise:0.2" \
|
|
306
|
+
--mask "merchant_name:fixed:TEST_MERCHANT"
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
### E-commerce Data Sharing
|
|
310
|
+
|
|
311
|
+
```bash
|
|
312
|
+
ingestr ingest \
|
|
313
|
+
--source-uri "postgres://internal.db/ecommerce" \
|
|
314
|
+
--source-table "orders" \
|
|
315
|
+
--dest-uri "s3://partner-bucket/data.parquet" \
|
|
316
|
+
--dest-table "shared_orders" \
|
|
317
|
+
--mask "customer_email:email" \
|
|
318
|
+
--mask "shipping_address:first_letter" \
|
|
319
|
+
--mask "order_value:round:10" \
|
|
320
|
+
--mask "customer_name:partial:2"
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
## Best Practices
|
|
324
|
+
|
|
325
|
+
### Choosing the Right Algorithm
|
|
326
|
+
|
|
327
|
+
1. **For PII (Personally Identifiable Information)**
|
|
328
|
+
- Use `hash` for consistent anonymization
|
|
329
|
+
- Use `redact` for complete removal
|
|
330
|
+
- Use format-preserving masks (`email`, `phone`, `ssn`) for maintaining data structure
|
|
331
|
+
|
|
332
|
+
2. **For Development/Testing**
|
|
333
|
+
- Use `uuid` or `sequential` for maintaining relationships
|
|
334
|
+
- Use `random` for generating test data
|
|
335
|
+
- Use `partial` for semi-realistic data
|
|
336
|
+
|
|
337
|
+
3. **For Analytics**
|
|
338
|
+
- Use `round` or `range` for numerical aggregations
|
|
339
|
+
- Use `date_shift` for time-series analysis
|
|
340
|
+
- Use `year_only` or `month_year` for temporal grouping
|
|
341
|
+
|
|
342
|
+
4. **For Compliance**
|
|
343
|
+
- GDPR: Consider `hash`, `redact`, or `uuid` for personal data
|
|
344
|
+
- HIPAA: Use `redact` for medical records, `date_shift` for dates
|
|
345
|
+
- PCI DSS: Use `credit_card` for card numbers
|
|
346
|
+
|
|
347
|
+
### Performance Considerations
|
|
348
|
+
|
|
349
|
+
- **Hash-based algorithms** are fast and consistent
|
|
350
|
+
- **Random algorithms** have minimal overhead but don't preserve consistency
|
|
351
|
+
- **Format-preserving masks** have moderate performance impact
|
|
352
|
+
- **Multiple masks** can be applied efficiently in a single pass
|
|
353
|
+
|
|
354
|
+
### Security Notes
|
|
355
|
+
|
|
356
|
+
1. **Hashed values** are one-way transformations but may be vulnerable to rainbow table attacks for common values
|
|
357
|
+
2. **Partial masking** may not provide sufficient protection for highly sensitive data
|
|
358
|
+
3. **Date shifting** preserves intervals between dates, which may leak information
|
|
359
|
+
4. **Consistent tokenization** (uuid, hash) maintains relationships which could be exploited
|
|
360
|
+
5. Always validate that your masking strategy meets your compliance requirements
|
|
361
|
+
|
|
362
|
+
## Environment Variables
|
|
363
|
+
|
|
364
|
+
You can also set masking configurations via environment variables:
|
|
365
|
+
|
|
366
|
+
```bash
|
|
367
|
+
export INGESTR_MASK="email:hash,phone:partial:3,ssn:redact"
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
Multiple masks should be comma-separated when using environment variables.
|
|
371
|
+
|
|
372
|
+
## Limitations
|
|
373
|
+
|
|
374
|
+
- Masking is applied in-memory during the ingestion process
|
|
375
|
+
- The original source data remains unchanged
|
|
376
|
+
- Some algorithms require additional dependencies (e.g., `date_shift` requires `python-dateutil`)
|
|
377
|
+
- Masking adds processing overhead proportional to the data volume and number of masks applied
|
|
@@ -282,6 +282,13 @@ def ingest(
|
|
|
282
282
|
envvar=["STAGING_BUCKET", "INGESTR_STAGING_BUCKET"],
|
|
283
283
|
),
|
|
284
284
|
] = None, # type: ignore
|
|
285
|
+
mask: Annotated[
|
|
286
|
+
Optional[list[str]],
|
|
287
|
+
typer.Option(
|
|
288
|
+
help="Column masking configuration in format 'column:algorithm[:param]'. Can be specified multiple times.",
|
|
289
|
+
envvar=["MASK", "INGESTR_MASK"],
|
|
290
|
+
),
|
|
291
|
+
] = [], # type: ignore
|
|
285
292
|
):
|
|
286
293
|
import hashlib
|
|
287
294
|
import tempfile
|
|
@@ -302,6 +309,7 @@ def ingest(
|
|
|
302
309
|
from ingestr.src.filters import (
|
|
303
310
|
cast_set_to_list,
|
|
304
311
|
cast_spanner_types,
|
|
312
|
+
create_masking_filter,
|
|
305
313
|
handle_mysql_empty_dates,
|
|
306
314
|
)
|
|
307
315
|
from ingestr.src.sources import MongoDbSource
|
|
@@ -562,6 +570,10 @@ def ingest(
|
|
|
562
570
|
if factory.source_scheme.startswith("spanner"):
|
|
563
571
|
resource.for_each(dlt_source, lambda x: x.add_map(cast_spanner_types))
|
|
564
572
|
|
|
573
|
+
if mask:
|
|
574
|
+
masking_filter = create_masking_filter(mask)
|
|
575
|
+
resource.for_each(dlt_source, lambda x: x.add_map(masking_filter))
|
|
576
|
+
|
|
565
577
|
if yield_limit:
|
|
566
578
|
resource.for_each(dlt_source, lambda x: x.add_limit(yield_limit))
|
|
567
579
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
version = "v0.13.88"
|
|
@@ -51,3 +51,12 @@ def table_adapter_exclude_columns(cols: list[str]):
|
|
|
51
51
|
table._columns.remove(col) # type: ignore
|
|
52
52
|
|
|
53
53
|
return excluder
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def create_masking_filter(mask_configs: list[str]):
|
|
57
|
+
from ingestr.src.masking import create_masking_mapper
|
|
58
|
+
|
|
59
|
+
if not mask_configs:
|
|
60
|
+
return lambda x: x
|
|
61
|
+
|
|
62
|
+
return create_masking_mapper(mask_configs)
|
|
@@ -14,14 +14,13 @@ from ingestr.src.frankfurter.helpers import get_path_with_retry
|
|
|
14
14
|
)
|
|
15
15
|
def frankfurter_source(
|
|
16
16
|
start_date: TAnyDateTime,
|
|
17
|
-
end_date: TAnyDateTime|None,
|
|
17
|
+
end_date: TAnyDateTime | None,
|
|
18
18
|
base_currency: str,
|
|
19
19
|
) -> Any:
|
|
20
20
|
"""
|
|
21
21
|
A dlt source for the frankfurter.dev API. It groups several resources (in this case frankfurter.dev API endpoints) containing
|
|
22
22
|
various types of data: currencies, latest rates, historical rates.
|
|
23
23
|
"""
|
|
24
|
-
|
|
25
24
|
|
|
26
25
|
@dlt.resource(
|
|
27
26
|
write_disposition="replace",
|
|
@@ -36,7 +35,6 @@ def frankfurter_source(
|
|
|
36
35
|
for currency_code, currency_name in currencies_data.items():
|
|
37
36
|
yield {"currency_code": currency_code, "currency_name": currency_name}
|
|
38
37
|
|
|
39
|
-
|
|
40
38
|
@dlt.resource(
|
|
41
39
|
write_disposition="merge",
|
|
42
40
|
columns={
|
|
@@ -81,7 +79,6 @@ def frankfurter_source(
|
|
|
81
79
|
"base_currency": base_currency,
|
|
82
80
|
}
|
|
83
81
|
|
|
84
|
-
|
|
85
82
|
@dlt.resource(
|
|
86
83
|
write_disposition="merge",
|
|
87
84
|
columns={
|
|
@@ -93,13 +90,13 @@ def frankfurter_source(
|
|
|
93
90
|
primary_key=("date", "currency_code", "base_currency"),
|
|
94
91
|
)
|
|
95
92
|
def exchange_rates(
|
|
96
|
-
date_time
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
93
|
+
date_time=dlt.sources.incremental(
|
|
94
|
+
"date",
|
|
95
|
+
initial_value=start_date,
|
|
96
|
+
end_value=end_date,
|
|
97
|
+
range_start="closed",
|
|
98
|
+
range_end="closed",
|
|
99
|
+
),
|
|
103
100
|
) -> Iterator[dict]:
|
|
104
101
|
"""
|
|
105
102
|
Fetches exchange rates for a specified date range.
|
|
@@ -115,9 +112,9 @@ def frankfurter_source(
|
|
|
115
112
|
end_date = date_time.end_value
|
|
116
113
|
else:
|
|
117
114
|
end_date = pendulum.now()
|
|
118
|
-
|
|
115
|
+
|
|
119
116
|
# Ensure start_date.last_value is a pendulum.DateTime object
|
|
120
|
-
start_date_obj = ensure_pendulum_datetime(start_date)
|
|
117
|
+
start_date_obj = ensure_pendulum_datetime(start_date) # type: ignore
|
|
121
118
|
start_date_str = start_date_obj.format("YYYY-MM-DD")
|
|
122
119
|
|
|
123
120
|
# Ensure end_date is a pendulum.DateTime object
|
|
@@ -158,4 +155,3 @@ def frankfurter_source(
|
|
|
158
155
|
}
|
|
159
156
|
|
|
160
157
|
return currencies, latest, exchange_rates
|
|
161
|
-
|
|
@@ -16,9 +16,9 @@ def get_path_with_retry(path: str) -> StrAny:
|
|
|
16
16
|
return get_url_with_retry(f"{FRANKFURTER_API_URL}{path}")
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
def validate_dates(start_date: datetime, end_date: datetime|None) -> None:
|
|
19
|
+
def validate_dates(start_date: datetime, end_date: datetime | None) -> None:
|
|
20
20
|
current_date = pendulum.now()
|
|
21
|
-
|
|
21
|
+
|
|
22
22
|
# Check if start_date is in the futurep
|
|
23
23
|
if start_date > current_date:
|
|
24
24
|
raise ValueError("Interval-start cannot be in the future.")
|