ingestr 0.10.1__tar.gz → 0.10.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr-0.10.3/.githooks/pre-commit-hook.sh +23 -0
- ingestr-0.10.3/.github/workflows/secrets-scan.yml +12 -0
- ingestr-0.10.3/.gitleaksignore +3 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/Makefile +6 -2
- {ingestr-0.10.1 → ingestr-0.10.3}/PKG-INFO +21 -3
- {ingestr-0.10.1 → ingestr-0.10.3}/README.md +17 -1
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/.vitepress/config.mjs +1 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/getting-started/quickstart.md +11 -1
- ingestr-0.10.3/docs/media/athena.png +0 -0
- ingestr-0.10.3/docs/supported-sources/athena.md +36 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/main.py +11 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/destinations.py +61 -1
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/factory.py +2 -0
- ingestr-0.10.3/ingestr/src/version.py +1 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/package-lock.json +142 -89
- {ingestr-0.10.1 → ingestr-0.10.3}/package.json +1 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/requirements.txt +3 -2
- ingestr-0.10.1/ingestr/src/version.py +0 -1
- {ingestr-0.10.1 → ingestr-0.10.3}/.dockerignore +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/.github/workflows/deploy-docs.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/.github/workflows/tests.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/.gitignore +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/.python-version +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/.vale.ini +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/Dockerfile +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/LICENSE.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/.vitepress/theme/custom.css +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/.vitepress/theme/index.js +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/commands/example-uris.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/commands/ingest.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/getting-started/core-concepts.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/getting-started/incremental-loading.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/getting-started/telemetry.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/index.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/adjust.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/airtable.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/appsflyer.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/bigquery.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/chess.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/csv.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/databricks.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/duckdb.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/facebook-ads.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/gorgias.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/gsheets.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/hubspot.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/kafka.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/klaviyo.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/mongodb.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/mssql.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/mysql.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/notion.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/oracle.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/postgres.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/redshift.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/s3.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/sap-hana.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/shopify.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/slack.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/snowflake.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/sqlite.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/stripe.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/docs/supported-sources/zendesk.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/.gitignore +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/adjust/__init__.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/adjust/adjust_helpers.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/airtable/__init__.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/appsflyer/_init_.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/appsflyer/client.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/arrow/__init__.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/chess/__init__.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/chess/helpers.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/chess/settings.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/facebook_ads/__init__.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/facebook_ads/exceptions.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/facebook_ads/helpers.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/facebook_ads/settings.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/filesystem/__init__.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/filesystem/helpers.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/filesystem/readers.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/filters.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/google_sheets/README.md +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/google_sheets/__init__.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/gorgias/__init__.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/gorgias/helpers.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/hubspot/__init__.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/hubspot/helpers.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/hubspot/settings.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/kafka/__init__.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/kafka/helpers.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/klaviyo/_init_.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/klaviyo/client.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/klaviyo/helpers.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/mongodb/__init__.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/mongodb/helpers.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/notion/__init__.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/notion/helpers/__init__.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/notion/helpers/client.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/notion/helpers/database.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/notion/settings.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/shopify/__init__.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/shopify/exceptions.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/shopify/helpers.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/shopify/settings.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/slack/__init__.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/slack/helpers.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/slack/settings.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/sources.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/stripe_analytics/__init__.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/stripe_analytics/helpers.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/stripe_analytics/settings.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/table_definition.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/telemetry/event.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/zendesk/__init__.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/zendesk/helpers/__init__.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/zendesk/helpers/credentials.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/src/zendesk/settings.py +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/testdata/.gitignore +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/testdata/create_replace.csv +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/testdata/delete_insert_expected.csv +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/testdata/delete_insert_part1.csv +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/testdata/delete_insert_part2.csv +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/testdata/merge_expected.csv +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/testdata/merge_part1.csv +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/ingestr/testdata/merge_part2.csv +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/pyproject.toml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/requirements-dev.txt +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/resources/demo.gif +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/resources/demo.tape +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/resources/ingestr.svg +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/AMPM.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/Acronyms.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/Colons.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/Contractions.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/DateFormat.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/Ellipses.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/EmDash.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/Exclamation.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/FirstPerson.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/Gender.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/GenderBias.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/HeadingPunctuation.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/Headings.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/Latin.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/LyHyphens.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/OptionalPlurals.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/Ordinal.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/OxfordComma.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/Parens.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/Passive.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/Periods.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/Quotes.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/Ranges.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/Semicolons.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/Slang.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/Spacing.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/Spelling.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/Units.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/We.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/Will.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/WordList.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/meta.json +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/Google/vocab.txt +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/bruin/Ingestr.yml +0 -0
- {ingestr-0.10.1 → ingestr-0.10.3}/styles/config/vocabularies/bruin/accept.txt +0 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
|
|
3
|
+
set -euo pipefail
|
|
4
|
+
|
|
5
|
+
echo "scanning for secrets ..."
|
|
6
|
+
|
|
7
|
+
WORK_DIR="/root/code"
|
|
8
|
+
|
|
9
|
+
secret_detected() {
|
|
10
|
+
echo "secrets detected in source code. commit aborted."
|
|
11
|
+
exit 1
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
# use gitleaks binary if available
|
|
15
|
+
# else fallback to using docker for running gitleaks
|
|
16
|
+
CMD="gitleaks dir -v"
|
|
17
|
+
|
|
18
|
+
if [[ ! `which gitleaks` ]]; then
|
|
19
|
+
which docker > /dev/null || (echo "gitleaks or docker is required for running secrets scan." && exit 1)
|
|
20
|
+
CMD="docker run -v $PWD:$WORK_DIR -w $WORK_DIR ghcr.io/gitleaks/gitleaks:latest dir -v"
|
|
21
|
+
fi
|
|
22
|
+
|
|
23
|
+
$CMD || secret_detected
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
name: secrets_scan
|
|
2
|
+
on: [pull_request, push, workflow_dispatch]
|
|
3
|
+
jobs:
|
|
4
|
+
scan:
|
|
5
|
+
name: gitleaks
|
|
6
|
+
runs-on: ubuntu-latest
|
|
7
|
+
steps:
|
|
8
|
+
- uses: actions/checkout@v3
|
|
9
|
+
with:
|
|
10
|
+
fetch-depth: 0
|
|
11
|
+
- name: scan for secrets (gitleaks)
|
|
12
|
+
run: docker run -v $PWD:/code -w /code ghcr.io/gitleaks/gitleaks:latest dir -v
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
.ONESHELL:
|
|
2
|
-
.PHONY: test lint format
|
|
2
|
+
.PHONY: test lint format test-ci lint-ci build upload-release setup
|
|
3
3
|
|
|
4
4
|
venv: venv/touchfile
|
|
5
5
|
|
|
6
6
|
venv/touchfile: requirements-dev.txt requirements.txt
|
|
7
7
|
test -d venv || python3 -m venv venv
|
|
8
|
-
. venv/bin/activate; $(MAKE) deps
|
|
8
|
+
. venv/bin/activate; pip install uv; $(MAKE) deps
|
|
9
9
|
touch venv/touchfile
|
|
10
10
|
|
|
11
11
|
deps:
|
|
@@ -40,3 +40,7 @@ build:
|
|
|
40
40
|
|
|
41
41
|
upload-release:
|
|
42
42
|
twine upload --verbose dist/*
|
|
43
|
+
|
|
44
|
+
setup:
|
|
45
|
+
@echo "installing git hooks ..."
|
|
46
|
+
@install -m 755 .githooks/pre-commit-hook.sh .git/hooks/pre-commit
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.10.
|
|
3
|
+
Version: 0.10.3
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -26,13 +26,15 @@ Requires-Dist: pendulum==3.0.0
|
|
|
26
26
|
Requires-Dist: psycopg2-binary==2.9.10
|
|
27
27
|
Requires-Dist: py-machineid==0.6.0
|
|
28
28
|
Requires-Dist: pyairtable==2.3.3
|
|
29
|
+
Requires-Dist: pyarrow==18.1.0
|
|
30
|
+
Requires-Dist: pyathena==3.9.0
|
|
29
31
|
Requires-Dist: pymongo==4.10.1
|
|
30
32
|
Requires-Dist: pymysql==1.1.1
|
|
31
33
|
Requires-Dist: pyrate-limiter==3.7.0
|
|
32
34
|
Requires-Dist: redshift-connector==2.1.3
|
|
33
35
|
Requires-Dist: rich==13.9.4
|
|
34
36
|
Requires-Dist: rudder-sdk-python==2.1.4
|
|
35
|
-
Requires-Dist: s3fs==2024.
|
|
37
|
+
Requires-Dist: s3fs==2024.10.0
|
|
36
38
|
Requires-Dist: snowflake-sqlalchemy==1.6.1
|
|
37
39
|
Requires-Dist: sqlalchemy-bigquery==1.12.0
|
|
38
40
|
Requires-Dist: sqlalchemy-hana==2.0.0
|
|
@@ -72,11 +74,20 @@ ingestr is a command-line app that allows you to ingest data from any source int
|
|
|
72
74
|
ingestr takes away the complexity of managing any backend or writing any code for ingesting data, simply run the command and watch the data land on its destination.
|
|
73
75
|
|
|
74
76
|
## Installation
|
|
77
|
+
We recommend using [uv](https://github.com/astral-sh/uv) to run `ingestr`.
|
|
75
78
|
|
|
76
79
|
```
|
|
77
|
-
pip install
|
|
80
|
+
pip install uv
|
|
81
|
+
uvx ingestr
|
|
78
82
|
```
|
|
79
83
|
|
|
84
|
+
Alternatively, if you'd like to install it globally:
|
|
85
|
+
```
|
|
86
|
+
uv pip install --system ingestr
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
While installation with vanilla `pip` is possible, it's an order of magnitude slower.
|
|
90
|
+
|
|
80
91
|
## Quickstart
|
|
81
92
|
|
|
82
93
|
```bash
|
|
@@ -102,6 +113,13 @@ You can see the full documentation [here](https://bruin-data.github.io/ingestr/g
|
|
|
102
113
|
|
|
103
114
|
Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shared_invite/zt-2dl2i8foy-bVsuMUauHeN9M2laVm3ZVg).
|
|
104
115
|
|
|
116
|
+
## Contributing
|
|
117
|
+
|
|
118
|
+
Pull requests are welcome. However, please open an issue first to discuss what you would like to change. We maybe able to offer you help and feedback regarding any changes you would like to make.
|
|
119
|
+
|
|
120
|
+
> [!NOTE]
|
|
121
|
+
> After cloning `ingestr` make sure to run `make setup` to install githooks.
|
|
122
|
+
|
|
105
123
|
## Supported sources & destinations
|
|
106
124
|
|
|
107
125
|
<table>
|
|
@@ -21,11 +21,20 @@ ingestr is a command-line app that allows you to ingest data from any source int
|
|
|
21
21
|
ingestr takes away the complexity of managing any backend or writing any code for ingesting data, simply run the command and watch the data land on its destination.
|
|
22
22
|
|
|
23
23
|
## Installation
|
|
24
|
+
We recommend using [uv](https://github.com/astral-sh/uv) to run `ingestr`.
|
|
24
25
|
|
|
25
26
|
```
|
|
26
|
-
pip install
|
|
27
|
+
pip install uv
|
|
28
|
+
uvx ingestr
|
|
27
29
|
```
|
|
28
30
|
|
|
31
|
+
Alternatively, if you'd like to install it globally:
|
|
32
|
+
```
|
|
33
|
+
uv pip install --system ingestr
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
While installation with vanilla `pip` is possible, it's an order of magnitude slower.
|
|
37
|
+
|
|
29
38
|
## Quickstart
|
|
30
39
|
|
|
31
40
|
```bash
|
|
@@ -51,6 +60,13 @@ You can see the full documentation [here](https://bruin-data.github.io/ingestr/g
|
|
|
51
60
|
|
|
52
61
|
Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shared_invite/zt-2dl2i8foy-bVsuMUauHeN9M2laVm3ZVg).
|
|
53
62
|
|
|
63
|
+
## Contributing
|
|
64
|
+
|
|
65
|
+
Pull requests are welcome. However, please open an issue first to discuss what you would like to change. We maybe able to offer you help and feedback regarding any changes you would like to make.
|
|
66
|
+
|
|
67
|
+
> [!NOTE]
|
|
68
|
+
> After cloning `ingestr` make sure to run `make setup` to install githooks.
|
|
69
|
+
|
|
54
70
|
## Supported sources & destinations
|
|
55
71
|
|
|
56
72
|
<table>
|
|
@@ -57,6 +57,7 @@ export default defineConfig({
|
|
|
57
57
|
text: "Databases",
|
|
58
58
|
collapsed: false,
|
|
59
59
|
items: [
|
|
60
|
+
{ text: "Athena", link: "/supported-sources/athena.md" },
|
|
60
61
|
{ text: "AWS Redshift", link: "/supported-sources/redshift.md" },
|
|
61
62
|
{ text: "Databricks", link: "/supported-sources/databricks.md" },
|
|
62
63
|
{ text: "DuckDB", link: "/supported-sources/duckdb.md" },
|
|
@@ -13,9 +13,19 @@ ingestr takes away the complexity of managing any backend or writing any code fo
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
## Installation
|
|
16
|
+
We recommend using [uv](https://github.com/astral-sh/uv) to run `ingestr`.
|
|
17
|
+
|
|
18
|
+
```
|
|
19
|
+
pip install uv
|
|
20
|
+
uvx ingestr
|
|
16
21
|
```
|
|
17
|
-
|
|
22
|
+
|
|
23
|
+
Alternatively, if you'd like to install it globally:
|
|
18
24
|
```
|
|
25
|
+
uv pip install --system ingestr
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
While installation with vanilla `pip` is possible, it's an order of magnitude slower.
|
|
19
29
|
|
|
20
30
|
## Quickstart
|
|
21
31
|
|
|
Binary file
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Athena
|
|
2
|
+
[Athena](https://aws.amazon.com/athena/) is an interactive query service that allows users to analyze data directly in Amazon S3 using standard SQL.
|
|
3
|
+
|
|
4
|
+
The Athena destination stores data as Parquet files in S3 buckets and creates external tables in AWS Glue Catalog.
|
|
5
|
+
|
|
6
|
+
ingestr supports Athena as a destination.
|
|
7
|
+
|
|
8
|
+
## URI format
|
|
9
|
+
The URI format for Athena is as follows:
|
|
10
|
+
|
|
11
|
+
```plaintext
|
|
12
|
+
athena://?bucket=<your-destination-bucket> \
|
|
13
|
+
query_results_path=<your-query-results-location> \
|
|
14
|
+
access_key_id=<your-aws-access-key-id> \
|
|
15
|
+
secret_access_key=<your-aws-secret-access-key> \
|
|
16
|
+
region_name=<your-aws-region>
|
|
17
|
+
```
|
|
18
|
+
URI parameters:
|
|
19
|
+
- `bucket` (required): The name of the bucket where the data will be stored, containing the Parquet files that Athena will work with, e.g. `your_bucket_name` or `s3://your_bucket_name`.
|
|
20
|
+
- `access_key_id` and `secret_access_key` (required): These are AWS credentials that will be used to authenticate with AWS services like S3 and Athena.
|
|
21
|
+
- `region_name` (required): The AWS region of the Athena service and S3 buckets, e.g. `eu-central-1`
|
|
22
|
+
- `query_results_path` (optional): The query location path where the results of Athena queries will be saved, e.g. `dest_path` or `s3://dest_path`. If not provided, it will default to the bucket specified in the `bucket` parameter.
|
|
23
|
+
- `workgroup` (optional): The name of the Athena workgroup, e.g. `my_group`
|
|
24
|
+
|
|
25
|
+
## Setting up an Athena Integration
|
|
26
|
+
Athena requires a `bucket`, `access_key_id`, `secret_access_key` and `region_name` to access the S3 bucket. Please follow the guide on dltHub to obtain [credentials](https://dlthub.com/docs/dlt-ecosystem/destinations/athena#2-setup-bucket-storage-and-athena-credentials). Once you've completed the guide, you should have all the above-mentioned credentials.
|
|
27
|
+
```
|
|
28
|
+
ingestr ingest \
|
|
29
|
+
--source-uri "stripe://?api_key=key123" \
|
|
30
|
+
--source-table 'event' \
|
|
31
|
+
--dest-uri "athena://?bucket=bucket_123&access_key_id=access_123&secret_access_key=secret_123®ion_name=eu-central-1" \
|
|
32
|
+
--dest-table 'stripe.event'
|
|
33
|
+
```
|
|
34
|
+
This is a sample command that will copy the data from the Stripe source into Athena.
|
|
35
|
+
|
|
36
|
+
<img alt="athena_img" src="../media/athena.png" />
|
|
@@ -288,6 +288,8 @@ def ingest(
|
|
|
288
288
|
),
|
|
289
289
|
] = [], # type: ignore
|
|
290
290
|
):
|
|
291
|
+
|
|
292
|
+
# TODO(turtledev): can't we move this to the top of this file?
|
|
291
293
|
import hashlib
|
|
292
294
|
import tempfile
|
|
293
295
|
from datetime import datetime
|
|
@@ -383,6 +385,15 @@ def ingest(
|
|
|
383
385
|
)
|
|
384
386
|
|
|
385
387
|
factory = SourceDestinationFactory(source_uri, dest_uri)
|
|
388
|
+
track(
|
|
389
|
+
"command_running",
|
|
390
|
+
{
|
|
391
|
+
"command": "ingest",
|
|
392
|
+
"source_type": factory.source_scheme,
|
|
393
|
+
"destination_type": factory.destination_scheme,
|
|
394
|
+
},
|
|
395
|
+
)
|
|
396
|
+
|
|
386
397
|
source = factory.get_source()
|
|
387
398
|
destination = factory.get_destination()
|
|
388
399
|
|
|
@@ -5,9 +5,10 @@ import json
|
|
|
5
5
|
import os
|
|
6
6
|
import shutil
|
|
7
7
|
import tempfile
|
|
8
|
-
from urllib.parse import parse_qs, urlparse
|
|
8
|
+
from urllib.parse import parse_qs, quote, urlparse
|
|
9
9
|
|
|
10
10
|
import dlt
|
|
11
|
+
from dlt.common.configuration.specs import AwsCredentials
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
class GenericSqlDestination:
|
|
@@ -194,3 +195,62 @@ class CsvDestination(GenericSqlDestination):
|
|
|
194
195
|
csv_writer.writerow(json_obj)
|
|
195
196
|
|
|
196
197
|
shutil.rmtree(self.temp_path)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
class AthenaDestination:
|
|
201
|
+
def dlt_dest(self, uri: str, **kwargs):
|
|
202
|
+
encoded_uri = quote(uri, safe=":/?&=")
|
|
203
|
+
source_fields = urlparse(encoded_uri)
|
|
204
|
+
source_params = parse_qs(source_fields.query)
|
|
205
|
+
|
|
206
|
+
bucket = source_params.get("bucket", [None])[0]
|
|
207
|
+
if not bucket:
|
|
208
|
+
raise ValueError("A bucket is required to connect to Athena.")
|
|
209
|
+
|
|
210
|
+
if not bucket.startswith("s3://"):
|
|
211
|
+
bucket = f"s3://{bucket}"
|
|
212
|
+
|
|
213
|
+
query_result_path = source_params.get("query_results_path", [None])[0]
|
|
214
|
+
if query_result_path:
|
|
215
|
+
if not query_result_path.startswith("s3://"):
|
|
216
|
+
query_result_path = f"s3://{query_result_path}"
|
|
217
|
+
else:
|
|
218
|
+
query_result_path = bucket
|
|
219
|
+
|
|
220
|
+
access_key_id = source_params.get("access_key_id", [None])[0]
|
|
221
|
+
if not access_key_id:
|
|
222
|
+
raise ValueError("The AWS access_key_id is required to connect to Athena.")
|
|
223
|
+
|
|
224
|
+
secret_access_key = source_params.get("secret_access_key", [None])[0]
|
|
225
|
+
if not secret_access_key:
|
|
226
|
+
raise ValueError("The AWS secret_access_key is required to connect Athena")
|
|
227
|
+
|
|
228
|
+
work_group = source_params.get("workgroup", [None])[0]
|
|
229
|
+
|
|
230
|
+
region_name = source_params.get("region_name", [None])[0]
|
|
231
|
+
if not region_name:
|
|
232
|
+
raise ValueError("The region_name is required to connect to Athena.")
|
|
233
|
+
|
|
234
|
+
os.environ["DESTINATION__BUCKET_URL"] = bucket
|
|
235
|
+
os.environ["DESTINATION__CREDENTIALS__AWS_ACCESS_KEY_ID"] = access_key_id
|
|
236
|
+
os.environ["DESTINATION__CREDENTIALS__AWS_SECRET_ACCESS_KEY"] = (
|
|
237
|
+
secret_access_key
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
credentials = AwsCredentials(
|
|
241
|
+
aws_access_key_id=access_key_id,
|
|
242
|
+
aws_secret_access_key=secret_access_key,
|
|
243
|
+
region_name=region_name,
|
|
244
|
+
)
|
|
245
|
+
return dlt.destinations.athena(
|
|
246
|
+
query_result_bucket=query_result_path,
|
|
247
|
+
athena_work_group=work_group,
|
|
248
|
+
credentials=credentials,
|
|
249
|
+
destination_name=bucket,
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
|
|
253
|
+
return {}
|
|
254
|
+
|
|
255
|
+
def post_load(self):
|
|
256
|
+
pass
|
|
@@ -4,6 +4,7 @@ from urllib.parse import urlparse
|
|
|
4
4
|
from dlt.common.destination import Destination
|
|
5
5
|
|
|
6
6
|
from ingestr.src.destinations import (
|
|
7
|
+
AthenaDestination,
|
|
7
8
|
BigQueryDestination,
|
|
8
9
|
CsvDestination,
|
|
9
10
|
DatabricksDestination,
|
|
@@ -159,6 +160,7 @@ class SourceDestinationFactory:
|
|
|
159
160
|
"snowflake": SnowflakeDestination(),
|
|
160
161
|
"synapse": SynapseDestination(),
|
|
161
162
|
"csv": CsvDestination(),
|
|
163
|
+
"athena": AthenaDestination(),
|
|
162
164
|
}
|
|
163
165
|
|
|
164
166
|
if self.destination_scheme in match:
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.10.3"
|