ingestr 0.10.2__tar.gz → 0.10.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr-0.10.4/.githooks/pre-commit-hook.sh +23 -0
- ingestr-0.10.4/.github/workflows/secrets-scan.yml +12 -0
- ingestr-0.10.4/.gitleaksignore +3 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/Makefile +6 -2
- {ingestr-0.10.2 → ingestr-0.10.4}/PKG-INFO +18 -2
- {ingestr-0.10.2 → ingestr-0.10.4}/README.md +17 -1
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/.vitepress/config.mjs +1 -1
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/getting-started/quickstart.md +11 -1
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/adjust.md +2 -2
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/athena.md +1 -1
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/gorgias.md +1 -1
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/gsheets.md +1 -1
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/kafka.md +1 -1
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/klaviyo.md +2 -2
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/notion.md +1 -1
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/shopify.md +1 -1
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/stripe.md +1 -1
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/zendesk.md +2 -2
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/main.py +10 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/adjust/__init__.py +4 -1
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/chess/__init__.py +1 -1
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/destinations.py +8 -1
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/hubspot/__init__.py +1 -1
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/sources.py +1 -1
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/stripe_analytics/__init__.py +1 -1
- ingestr-0.10.4/ingestr/src/version.py +1 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/zendesk/__init__.py +0 -1
- ingestr-0.10.2/ingestr/src/version.py +0 -1
- {ingestr-0.10.2 → ingestr-0.10.4}/.dockerignore +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/.github/workflows/deploy-docs.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/.github/workflows/tests.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/.gitignore +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/.python-version +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/.vale.ini +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/Dockerfile +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/LICENSE.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/.vitepress/theme/custom.css +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/.vitepress/theme/index.js +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/commands/example-uris.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/commands/ingest.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/getting-started/core-concepts.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/getting-started/incremental-loading.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/getting-started/telemetry.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/index.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/media/athena.png +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/airtable.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/appsflyer.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/bigquery.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/chess.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/csv.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/databricks.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/duckdb.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/facebook-ads.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/hubspot.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/mongodb.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/mssql.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/mysql.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/oracle.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/postgres.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/redshift.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/s3.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/sap-hana.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/slack.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/snowflake.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/sqlite.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/.gitignore +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/adjust/adjust_helpers.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/airtable/__init__.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/appsflyer/_init_.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/appsflyer/client.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/arrow/__init__.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/chess/helpers.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/chess/settings.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/facebook_ads/__init__.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/facebook_ads/exceptions.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/facebook_ads/helpers.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/facebook_ads/settings.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/factory.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/filesystem/__init__.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/filesystem/helpers.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/filesystem/readers.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/filters.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/google_sheets/README.md +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/google_sheets/__init__.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/gorgias/__init__.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/gorgias/helpers.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/hubspot/helpers.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/hubspot/settings.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/kafka/__init__.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/kafka/helpers.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/klaviyo/_init_.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/klaviyo/client.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/klaviyo/helpers.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/mongodb/__init__.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/mongodb/helpers.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/notion/__init__.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/notion/helpers/__init__.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/notion/helpers/client.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/notion/helpers/database.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/notion/settings.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/shopify/__init__.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/shopify/exceptions.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/shopify/helpers.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/shopify/settings.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/slack/__init__.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/slack/helpers.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/slack/settings.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/stripe_analytics/helpers.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/stripe_analytics/settings.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/table_definition.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/telemetry/event.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/zendesk/helpers/__init__.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/zendesk/helpers/credentials.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/zendesk/settings.py +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/testdata/.gitignore +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/testdata/create_replace.csv +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/testdata/delete_insert_expected.csv +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/testdata/delete_insert_part1.csv +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/testdata/delete_insert_part2.csv +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/testdata/merge_expected.csv +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/testdata/merge_part1.csv +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/testdata/merge_part2.csv +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/package-lock.json +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/package.json +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/pyproject.toml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/requirements-dev.txt +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/requirements.txt +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/resources/demo.gif +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/resources/demo.tape +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/resources/ingestr.svg +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/AMPM.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Acronyms.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Colons.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Contractions.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/DateFormat.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Ellipses.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/EmDash.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Exclamation.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/FirstPerson.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Gender.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/GenderBias.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/HeadingPunctuation.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Headings.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Latin.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/LyHyphens.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/OptionalPlurals.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Ordinal.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/OxfordComma.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Parens.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Passive.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Periods.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Quotes.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Ranges.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Semicolons.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Slang.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Spacing.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Spelling.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Units.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/We.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Will.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/WordList.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/meta.json +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/vocab.txt +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/bruin/Ingestr.yml +0 -0
- {ingestr-0.10.2 → ingestr-0.10.4}/styles/config/vocabularies/bruin/accept.txt +0 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
|
|
3
|
+
set -euo pipefail
|
|
4
|
+
|
|
5
|
+
echo "scanning for secrets ..."
|
|
6
|
+
|
|
7
|
+
WORK_DIR="/root/code"
|
|
8
|
+
|
|
9
|
+
secret_detected() {
|
|
10
|
+
echo "secrets detected in source code. commit aborted."
|
|
11
|
+
exit 1
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
# use gitleaks binary if available
|
|
15
|
+
# else fallback to using docker for running gitleaks
|
|
16
|
+
CMD="gitleaks dir -v"
|
|
17
|
+
|
|
18
|
+
if [[ ! `which gitleaks` ]]; then
|
|
19
|
+
which docker > /dev/null || (echo "gitleaks or docker is required for running secrets scan." && exit 1)
|
|
20
|
+
CMD="docker run -v $PWD:$WORK_DIR -w $WORK_DIR ghcr.io/gitleaks/gitleaks:latest dir -v"
|
|
21
|
+
fi
|
|
22
|
+
|
|
23
|
+
$CMD || secret_detected
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
name: secrets_scan
|
|
2
|
+
on: [pull_request, push, workflow_dispatch]
|
|
3
|
+
jobs:
|
|
4
|
+
scan:
|
|
5
|
+
name: gitleaks
|
|
6
|
+
runs-on: ubuntu-latest
|
|
7
|
+
steps:
|
|
8
|
+
- uses: actions/checkout@v3
|
|
9
|
+
with:
|
|
10
|
+
fetch-depth: 0
|
|
11
|
+
- name: scan for secrets (gitleaks)
|
|
12
|
+
run: docker run -v $PWD:/code -w /code ghcr.io/gitleaks/gitleaks:latest dir -v
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
.ONESHELL:
|
|
2
|
-
.PHONY: test lint format
|
|
2
|
+
.PHONY: test lint format test-ci lint-ci build upload-release setup
|
|
3
3
|
|
|
4
4
|
venv: venv/touchfile
|
|
5
5
|
|
|
6
6
|
venv/touchfile: requirements-dev.txt requirements.txt
|
|
7
7
|
test -d venv || python3 -m venv venv
|
|
8
|
-
. venv/bin/activate; $(MAKE) deps
|
|
8
|
+
. venv/bin/activate; pip install uv; $(MAKE) deps
|
|
9
9
|
touch venv/touchfile
|
|
10
10
|
|
|
11
11
|
deps:
|
|
@@ -40,3 +40,7 @@ build:
|
|
|
40
40
|
|
|
41
41
|
upload-release:
|
|
42
42
|
twine upload --verbose dist/*
|
|
43
|
+
|
|
44
|
+
setup:
|
|
45
|
+
@echo "installing git hooks ..."
|
|
46
|
+
@install -m 755 .githooks/pre-commit-hook.sh .git/hooks/pre-commit
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.10.
|
|
3
|
+
Version: 0.10.4
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -74,11 +74,20 @@ ingestr is a command-line app that allows you to ingest data from any source int
|
|
|
74
74
|
ingestr takes away the complexity of managing any backend or writing any code for ingesting data, simply run the command and watch the data land on its destination.
|
|
75
75
|
|
|
76
76
|
## Installation
|
|
77
|
+
We recommend using [uv](https://github.com/astral-sh/uv) to run `ingestr`.
|
|
77
78
|
|
|
78
79
|
```
|
|
79
|
-
pip install
|
|
80
|
+
pip install uv
|
|
81
|
+
uvx ingestr
|
|
80
82
|
```
|
|
81
83
|
|
|
84
|
+
Alternatively, if you'd like to install it globally:
|
|
85
|
+
```
|
|
86
|
+
uv pip install --system ingestr
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
While installation with vanilla `pip` is possible, it's an order of magnitude slower.
|
|
90
|
+
|
|
82
91
|
## Quickstart
|
|
83
92
|
|
|
84
93
|
```bash
|
|
@@ -104,6 +113,13 @@ You can see the full documentation [here](https://bruin-data.github.io/ingestr/g
|
|
|
104
113
|
|
|
105
114
|
Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shared_invite/zt-2dl2i8foy-bVsuMUauHeN9M2laVm3ZVg).
|
|
106
115
|
|
|
116
|
+
## Contributing
|
|
117
|
+
|
|
118
|
+
Pull requests are welcome. However, please open an issue first to discuss what you would like to change. We maybe able to offer you help and feedback regarding any changes you would like to make.
|
|
119
|
+
|
|
120
|
+
> [!NOTE]
|
|
121
|
+
> After cloning `ingestr` make sure to run `make setup` to install githooks.
|
|
122
|
+
|
|
107
123
|
## Supported sources & destinations
|
|
108
124
|
|
|
109
125
|
<table>
|
|
@@ -21,11 +21,20 @@ ingestr is a command-line app that allows you to ingest data from any source int
|
|
|
21
21
|
ingestr takes away the complexity of managing any backend or writing any code for ingesting data, simply run the command and watch the data land on its destination.
|
|
22
22
|
|
|
23
23
|
## Installation
|
|
24
|
+
We recommend using [uv](https://github.com/astral-sh/uv) to run `ingestr`.
|
|
24
25
|
|
|
25
26
|
```
|
|
26
|
-
pip install
|
|
27
|
+
pip install uv
|
|
28
|
+
uvx ingestr
|
|
27
29
|
```
|
|
28
30
|
|
|
31
|
+
Alternatively, if you'd like to install it globally:
|
|
32
|
+
```
|
|
33
|
+
uv pip install --system ingestr
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
While installation with vanilla `pip` is possible, it's an order of magnitude slower.
|
|
37
|
+
|
|
29
38
|
## Quickstart
|
|
30
39
|
|
|
31
40
|
```bash
|
|
@@ -51,6 +60,13 @@ You can see the full documentation [here](https://bruin-data.github.io/ingestr/g
|
|
|
51
60
|
|
|
52
61
|
Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shared_invite/zt-2dl2i8foy-bVsuMUauHeN9M2laVm3ZVg).
|
|
53
62
|
|
|
63
|
+
## Contributing
|
|
64
|
+
|
|
65
|
+
Pull requests are welcome. However, please open an issue first to discuss what you would like to change. We maybe able to offer you help and feedback regarding any changes you would like to make.
|
|
66
|
+
|
|
67
|
+
> [!NOTE]
|
|
68
|
+
> After cloning `ingestr` make sure to run `make setup` to install githooks.
|
|
69
|
+
|
|
54
70
|
## Supported sources & destinations
|
|
55
71
|
|
|
56
72
|
<table>
|
|
@@ -57,7 +57,7 @@ export default defineConfig({
|
|
|
57
57
|
text: "Databases",
|
|
58
58
|
collapsed: false,
|
|
59
59
|
items: [
|
|
60
|
-
{ text: "Athena", link: "/supported-sources/athena.md" },
|
|
60
|
+
{ text: "AWS Athena", link: "/supported-sources/athena.md" },
|
|
61
61
|
{ text: "AWS Redshift", link: "/supported-sources/redshift.md" },
|
|
62
62
|
{ text: "Databricks", link: "/supported-sources/databricks.md" },
|
|
63
63
|
{ text: "DuckDB", link: "/supported-sources/duckdb.md" },
|
|
@@ -13,9 +13,19 @@ ingestr takes away the complexity of managing any backend or writing any code fo
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
## Installation
|
|
16
|
+
We recommend using [uv](https://github.com/astral-sh/uv) to run `ingestr`.
|
|
17
|
+
|
|
18
|
+
```
|
|
19
|
+
pip install uv
|
|
20
|
+
uvx ingestr
|
|
16
21
|
```
|
|
17
|
-
|
|
22
|
+
|
|
23
|
+
Alternatively, if you'd like to install it globally:
|
|
18
24
|
```
|
|
25
|
+
uv pip install --system ingestr
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
While installation with vanilla `pip` is possible, it's an order of magnitude slower.
|
|
19
29
|
|
|
20
30
|
## Quickstart
|
|
21
31
|
|
|
@@ -62,7 +62,7 @@ ingestr ingest \
|
|
|
62
62
|
--source-uri 'adjust://?api_key=nr_123' \
|
|
63
63
|
--source-table 'campaigns' \
|
|
64
64
|
--dest-uri duckdb:///adjust.duckdb \
|
|
65
|
-
--dest-table '
|
|
65
|
+
--dest-table 'dest.output'
|
|
66
66
|
```
|
|
67
67
|
|
|
68
68
|
Copy creatives data from Adjust into a DuckDB database:
|
|
@@ -71,7 +71,7 @@ ingestr ingest \
|
|
|
71
71
|
--source-uri 'adjust://?api_key=nr_123' \
|
|
72
72
|
--source-table 'creatives' \
|
|
73
73
|
--dest-uri duckdb:///adjust.duckdb \
|
|
74
|
-
--dest-table '
|
|
74
|
+
--dest-table 'dest.output'
|
|
75
75
|
```
|
|
76
76
|
|
|
77
77
|
Copy custom data from Adjust into a DuckDB database:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Athena
|
|
1
|
+
# AWS Athena
|
|
2
2
|
[Athena](https://aws.amazon.com/athena/) is an interactive query service that allows users to analyze data directly in Amazon S3 using standard SQL.
|
|
3
3
|
|
|
4
4
|
The Athena destination stores data as Parquet files in S3 buckets and creates external tables in AWS Glue Catalog.
|
|
@@ -23,7 +23,7 @@ The URI is used to connect to the Gorgias API for extracting data.
|
|
|
23
23
|
ingestr ingest --source-table 'tickets' --source-uri $GORGIAS_URI --dest-uri $BIGQUERY_URI --interval-start 2024-06-19 --dest-table 'gorgias.ticket_messages' --loader-file-format jsonl
|
|
24
24
|
|
|
25
25
|
# get all the customers and write them to `gorgias.customers` table on DuckDB
|
|
26
|
-
ingestr ingest --source-table 'customers' --source-uri $GORGIAS_URI --dest-uri duckdb:///gorgias.duckdb --interval-start 2024-01-01 --dest-table '
|
|
26
|
+
ingestr ingest --source-table 'customers' --source-uri $GORGIAS_URI --dest-uri duckdb:///gorgias.duckdb --interval-start 2024-01-01 --dest-table 'dest.customers'
|
|
27
27
|
```
|
|
28
28
|
|
|
29
29
|
## Supported entities
|
|
@@ -37,7 +37,7 @@ Once you complete the guide, you should have a service account JSON file and the
|
|
|
37
37
|
Based on this assumption, here's a sample command that will copy the data from the Google Sheets spreadsheet into a DuckDB database:
|
|
38
38
|
|
|
39
39
|
```sh
|
|
40
|
-
ingestr ingest --source-uri 'gsheets://?credentials_path=/path/to/file.json' --source-table 'fkdUQ2bjdNfUq2CA.Sheet1' --dest-uri duckdb:///gsheets.duckdb --dest-table '
|
|
40
|
+
ingestr ingest --source-uri 'gsheets://?credentials_path=/path/to/file.json' --source-table 'fkdUQ2bjdNfUq2CA.Sheet1' --dest-uri duckdb:///gsheets.duckdb --dest-table 'dest.output'
|
|
41
41
|
```
|
|
42
42
|
|
|
43
43
|
The result of this command will be a table in the `gsheets.duckdb` database.
|
|
@@ -32,7 +32,7 @@ ingestr ingest \
|
|
|
32
32
|
--source-uri 'kafka://?bootstrap_servers=localhost:9092&group_id=test_group' \
|
|
33
33
|
--source-table 'my-topic' \
|
|
34
34
|
--dest-uri duckdb:///kafka.duckdb \
|
|
35
|
-
--dest-table '
|
|
35
|
+
--dest-table 'dest.my_topic'
|
|
36
36
|
```
|
|
37
37
|
|
|
38
38
|
The result of this command will be a table in the `kafka.duckdb` database with JSON columns.
|
|
@@ -19,10 +19,10 @@ URI parameters:
|
|
|
19
19
|
The URI is used to connect to the Klaviyo API for extracting data.
|
|
20
20
|
|
|
21
21
|
```bash
|
|
22
|
-
ingestr ingest --source-table 'events' --source-uri 'klaviyo://?api_key=pk_test' --dest-uri duckdb:///klaviyo.duckdb --interval-start 2022-01-01 --dest-table '
|
|
22
|
+
ingestr ingest --source-table 'events' --source-uri 'klaviyo://?api_key=pk_test' --dest-uri duckdb:///klaviyo.duckdb --interval-start 2022-01-01 --dest-table 'dest.events' --extract-parallelism 20
|
|
23
23
|
```
|
|
24
24
|
|
|
25
|
-
This command fetches all the events that are created/updated since 2022-01-01 and writes them to `
|
|
25
|
+
This command fetches all the events that are created/updated since 2022-01-01 and writes them to `dest.events` table on DuckDB, using 20 parallel threads to improve performance and efficiently handle large data .
|
|
26
26
|
|
|
27
27
|
## Tables
|
|
28
28
|
|
|
@@ -22,7 +22,7 @@ Notion requires a few steps to set up an integration, please follow the guide dl
|
|
|
22
22
|
Once you complete the guide, you should have an API key, and the table ID to connect to. Let's say your API token is `secret_12345` and the database you'd like to connect to is `bfeaafc0c25f40a9asdasd672a9456f3`, here's a sample command that will copy the data from the Notion table into a DuckDB database:
|
|
23
23
|
|
|
24
24
|
```sh
|
|
25
|
-
ingestr ingest --source-uri 'notion://?api_key=secret_12345' --source-table 'bfeaafc0c25f40a9asdasd672a9456f3' --dest-uri duckdb:///notion.duckdb --dest-table '
|
|
25
|
+
ingestr ingest --source-uri 'notion://?api_key=secret_12345' --source-table 'bfeaafc0c25f40a9asdasd672a9456f3' --dest-uri duckdb:///notion.duckdb --dest-table 'dest.output'
|
|
26
26
|
```
|
|
27
27
|
|
|
28
28
|
The result of this command will be a table in the `notion.duckdb` database with JSON columns.
|
|
@@ -23,7 +23,7 @@ Shopify requires a few steps to set up an integration, please follow the guide d
|
|
|
23
23
|
Once you complete the guide, you should have an API key and the store name to connect to. Let's say your API key is `shpkey_12345` and the store you'd like to connect to is `my-store`, here's a sample command that will copy the data from the Shopify store into a DuckDB database:
|
|
24
24
|
|
|
25
25
|
```sh
|
|
26
|
-
ingestr ingest --source-uri 'shopify://my-store.myshopify.com?api_key=shpkey_12345' --source-table 'orders' --dest-uri duckdb:///shopify.duckdb --dest-table '
|
|
26
|
+
ingestr ingest --source-uri 'shopify://my-store.myshopify.com?api_key=shpkey_12345' --source-table 'orders' --dest-uri duckdb:///shopify.duckdb --dest-table 'dest.orders'
|
|
27
27
|
```
|
|
28
28
|
|
|
29
29
|
The result of this command will be a table in the `shopify.duckdb` database with JSON columns.
|
|
@@ -25,7 +25,7 @@ Stripe requires a few steps to set up an integration, please follow the guide dl
|
|
|
25
25
|
Once you complete the guide, you should have an API key. Let's say your API key is `sk_test_12345`, here's a sample command that will copy the data from Stripe into a DuckDB database:
|
|
26
26
|
|
|
27
27
|
```sh
|
|
28
|
-
ingestr ingest --source-uri 'stripe://?api_key=sk_test_12345' --source-table 'charges' --dest-uri duckdb:///stripe.duckdb --dest-table '
|
|
28
|
+
ingestr ingest --source-uri 'stripe://?api_key=sk_test_12345' --source-table 'charges' --dest-uri duckdb:///stripe.duckdb --dest-table 'dest.charges'
|
|
29
29
|
```
|
|
30
30
|
|
|
31
31
|
The result of this command will be a table in the `stripe.duckdb` database with JSON columns.
|
|
@@ -39,7 +39,7 @@ Once you complete the guide, if you decide to use an OAuth token, you should hav
|
|
|
39
39
|
ingestr ingest --source-uri "zendesk://:qVsbdiasVt@mycompany" \
|
|
40
40
|
--source-table 'tickets' \
|
|
41
41
|
--dest-uri 'duckdb:///zendesk.duckdb' \
|
|
42
|
-
--dest-table '
|
|
42
|
+
--dest-table 'dest.tickets' \
|
|
43
43
|
--interval-start '2024-01-01'
|
|
44
44
|
```
|
|
45
45
|
|
|
@@ -49,7 +49,7 @@ If you decide to use an API Token, you should have a subdomain, email, and API t
|
|
|
49
49
|
ingestr ingest --source-uri "zendesk://john@get.com:nbs123@mycompany" \
|
|
50
50
|
--source-table 'tickets' \
|
|
51
51
|
--dest-uri 'duckdb:///zendesk.duckdb' \
|
|
52
|
-
--dest-table '
|
|
52
|
+
--dest-table 'dest.tickets' \
|
|
53
53
|
--interval-start '2024-01-01'
|
|
54
54
|
```
|
|
55
55
|
|
|
@@ -288,6 +288,7 @@ def ingest(
|
|
|
288
288
|
),
|
|
289
289
|
] = [], # type: ignore
|
|
290
290
|
):
|
|
291
|
+
# TODO(turtledev): can't we move this to the top of this file?
|
|
291
292
|
import hashlib
|
|
292
293
|
import tempfile
|
|
293
294
|
from datetime import datetime
|
|
@@ -383,6 +384,15 @@ def ingest(
|
|
|
383
384
|
)
|
|
384
385
|
|
|
385
386
|
factory = SourceDestinationFactory(source_uri, dest_uri)
|
|
387
|
+
track(
|
|
388
|
+
"command_running",
|
|
389
|
+
{
|
|
390
|
+
"command": "ingest",
|
|
391
|
+
"source_type": factory.source_scheme,
|
|
392
|
+
"destination_type": factory.destination_scheme,
|
|
393
|
+
},
|
|
394
|
+
)
|
|
395
|
+
|
|
386
396
|
source = factory.get_source()
|
|
387
397
|
destination = factory.get_destination()
|
|
388
398
|
|
|
@@ -67,13 +67,16 @@ def adjust_source(
|
|
|
67
67
|
filters=filters,
|
|
68
68
|
)
|
|
69
69
|
|
|
70
|
+
if not dimensions:
|
|
71
|
+
return campaigns, creatives
|
|
72
|
+
|
|
70
73
|
merge_key = merge_key
|
|
74
|
+
type_hints = {}
|
|
71
75
|
for dimension in REQUIRED_CUSTOM_DIMENSIONS:
|
|
72
76
|
if dimension in dimensions:
|
|
73
77
|
merge_key = dimension
|
|
74
78
|
break
|
|
75
79
|
|
|
76
|
-
type_hints = {}
|
|
77
80
|
for dimension in dimensions:
|
|
78
81
|
if dimension in KNOWN_TYPE_HINTS:
|
|
79
82
|
type_hints[dimension] = KNOWN_TYPE_HINTS[dimension]
|
|
@@ -12,7 +12,7 @@ from .helpers import get_path_with_retry, get_url_with_retry, validate_month_str
|
|
|
12
12
|
from .settings import UNOFFICIAL_CHESS_API_URL
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
@dlt.source(name="chess")
|
|
15
|
+
@dlt.source(name="chess", max_table_nesting=0)
|
|
16
16
|
def source(
|
|
17
17
|
players: List[str], start_month: str = None, end_month: str = None
|
|
18
18
|
) -> Sequence[DltResource]:
|
|
@@ -250,7 +250,14 @@ class AthenaDestination:
|
|
|
250
250
|
)
|
|
251
251
|
|
|
252
252
|
def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
|
|
253
|
-
|
|
253
|
+
table_fields = table.split(".")
|
|
254
|
+
if len(table_fields) != 2:
|
|
255
|
+
raise ValueError("Table name must be in the format <schema>.<table>")
|
|
256
|
+
return {
|
|
257
|
+
"table_format": "iceberg",
|
|
258
|
+
"dataset_name": table_fields[-2],
|
|
259
|
+
"table_name": table_fields[-1],
|
|
260
|
+
}
|
|
254
261
|
|
|
255
262
|
def post_load(self):
|
|
256
263
|
pass
|
|
@@ -50,7 +50,7 @@ from .settings import (
|
|
|
50
50
|
THubspotObjectType = Literal["company", "contact", "deal", "ticket", "product", "quote"]
|
|
51
51
|
|
|
52
52
|
|
|
53
|
-
@dlt.source(name="hubspot")
|
|
53
|
+
@dlt.source(name="hubspot", max_table_nesting=0)
|
|
54
54
|
def hubspot(
|
|
55
55
|
api_key: str = dlt.secrets.value,
|
|
56
56
|
include_history: bool = False,
|
|
@@ -11,7 +11,7 @@ from .helpers import pagination, transform_date
|
|
|
11
11
|
from .settings import ENDPOINTS, INCREMENTAL_ENDPOINTS
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
@dlt.source
|
|
14
|
+
@dlt.source(max_table_nesting=0)
|
|
15
15
|
def stripe_source(
|
|
16
16
|
endpoints: Tuple[str, ...] = ENDPOINTS,
|
|
17
17
|
stripe_secret_key: str = dlt.secrets.value,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.10.4"
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.10.2"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|