ingestr 0.9.5__tar.gz → 0.10.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/.github/workflows/tests.yml +39 -11
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/.gitignore +3 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/Makefile +2 -2
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/PKG-INFO +18 -18
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/main.py +130 -37
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/gorgias/__init__.py +17 -17
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/shopify/__init__.py +42 -42
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/slack/__init__.py +2 -2
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/sources.py +13 -3
- ingestr-0.10.0rc1/ingestr/src/version.py +1 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/zendesk/__init__.py +2 -2
- ingestr-0.10.0rc1/requirements-dev.txt +12 -0
- ingestr-0.10.0rc1/requirements.txt +31 -0
- ingestr-0.9.5/ingestr/src/sql_database/__init__.py +0 -206
- ingestr-0.9.5/ingestr/src/sql_database/arrow_helpers.py +0 -139
- ingestr-0.9.5/ingestr/src/sql_database/helpers.py +0 -282
- ingestr-0.9.5/ingestr/src/sql_database/override.py +0 -10
- ingestr-0.9.5/ingestr/src/sql_database/schema_types.py +0 -139
- ingestr-0.9.5/ingestr/src/version.py +0 -1
- ingestr-0.9.5/requirements-dev.txt +0 -10
- ingestr-0.9.5/requirements.txt +0 -31
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/.dockerignore +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/.github/workflows/deploy-docs.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/.python-version +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/.vale.ini +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/Dockerfile +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/LICENSE.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/README.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/.vitepress/config.mjs +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/.vitepress/theme/custom.css +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/.vitepress/theme/index.js +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/commands/example-uris.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/commands/ingest.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/getting-started/core-concepts.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/getting-started/incremental-loading.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/getting-started/quickstart.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/getting-started/telemetry.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/index.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/adjust.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/airtable.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/appsflyer.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/bigquery.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/chess.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/csv.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/databricks.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/duckdb.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/facebook-ads.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/gorgias.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/gsheets.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/hubspot.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/kafka.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/klaviyo.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/mongodb.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/mssql.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/mysql.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/notion.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/oracle.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/postgres.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/redshift.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/s3.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/sap-hana.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/shopify.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/slack.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/snowflake.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/sqlite.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/stripe.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/zendesk.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/.gitignore +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/adjust/__init__.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/adjust/adjust_helpers.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/airtable/__init__.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/appsflyer/_init_.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/appsflyer/client.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/arrow/__init__.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/chess/__init__.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/chess/helpers.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/chess/settings.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/destinations.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/facebook_ads/__init__.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/facebook_ads/exceptions.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/facebook_ads/helpers.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/facebook_ads/settings.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/factory.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/filesystem/__init__.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/filesystem/helpers.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/filesystem/readers.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/google_sheets/README.md +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/google_sheets/__init__.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/gorgias/helpers.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/hubspot/__init__.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/hubspot/helpers.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/hubspot/settings.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/kafka/__init__.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/kafka/helpers.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/klaviyo/_init_.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/klaviyo/client.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/klaviyo/helpers.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/mongodb/__init__.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/mongodb/helpers.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/notion/__init__.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/notion/helpers/__init__.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/notion/helpers/client.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/notion/helpers/database.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/notion/settings.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/shopify/exceptions.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/shopify/helpers.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/shopify/settings.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/slack/helpers.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/slack/settings.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/stripe_analytics/__init__.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/stripe_analytics/helpers.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/stripe_analytics/settings.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/table_definition.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/telemetry/event.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/zendesk/helpers/__init__.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/zendesk/helpers/credentials.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/zendesk/settings.py +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/testdata/.gitignore +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/testdata/create_replace.csv +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/testdata/delete_insert_expected.csv +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/testdata/delete_insert_part1.csv +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/testdata/delete_insert_part2.csv +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/testdata/merge_expected.csv +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/testdata/merge_part1.csv +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/testdata/merge_part2.csv +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/package-lock.json +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/package.json +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/pyproject.toml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/resources/demo.gif +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/resources/demo.tape +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/resources/ingestr.svg +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/AMPM.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Acronyms.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Colons.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Contractions.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/DateFormat.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Ellipses.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/EmDash.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Exclamation.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/FirstPerson.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Gender.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/GenderBias.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/HeadingPunctuation.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Headings.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Latin.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/LyHyphens.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/OptionalPlurals.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Ordinal.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/OxfordComma.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Parens.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Passive.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Periods.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Quotes.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Ranges.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Semicolons.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Slang.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Spacing.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Spelling.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Units.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/We.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Will.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/WordList.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/meta.json +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/vocab.txt +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/bruin/Ingestr.yml +0 -0
- {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/config/vocabularies/bruin/accept.txt +0 -0
|
@@ -20,25 +20,53 @@ env:
|
|
|
20
20
|
|
|
21
21
|
jobs:
|
|
22
22
|
tests:
|
|
23
|
-
|
|
23
|
+
strategy:
|
|
24
|
+
matrix:
|
|
25
|
+
# I tried running stuff on macOS but it was too slow and unreliable.
|
|
26
|
+
# I also tried windows runners but couldn't get Docker to work there, so I gave up.
|
|
27
|
+
os: [ubuntu-latest]
|
|
28
|
+
python-version: ['3.10', '3.11', '3.12']
|
|
29
|
+
runs-on: ${{ matrix.os }}
|
|
24
30
|
steps:
|
|
25
31
|
- uses: actions/checkout@v4
|
|
26
32
|
- name: install Microsoft ODBC
|
|
33
|
+
if: matrix.os == 'ubuntu-latest'
|
|
27
34
|
run: sudo ACCEPT_EULA=Y apt-get install msodbcsql18 -y
|
|
35
|
+
- name: install Microsoft ODBC
|
|
36
|
+
if: matrix.os == 'macos-13'
|
|
37
|
+
run: |
|
|
38
|
+
brew tap microsoft/mssql-release https://github.com/Microsoft/homebrew-mssql-release
|
|
39
|
+
brew update
|
|
40
|
+
HOMEBREW_ACCEPT_EULA=Y brew install msodbcsql18
|
|
41
|
+
- name: Install Docker on macOS
|
|
42
|
+
if: matrix.os == 'macos-13'
|
|
43
|
+
run: |
|
|
44
|
+
brew install docker
|
|
45
|
+
brew install docker-compose
|
|
46
|
+
brew install colima
|
|
47
|
+
colima start
|
|
48
|
+
# Wait for Docker daemon to be ready
|
|
49
|
+
while ! docker system info > /dev/null 2>&1; do sleep 1; done
|
|
50
|
+
- name: install Microsoft ODBC
|
|
51
|
+
if: matrix.os == 'windows-latest'
|
|
52
|
+
run: |
|
|
53
|
+
Invoke-WebRequest -Uri https://go.microsoft.com/fwlink/?linkid=2249006 -OutFile msodbcsql.msi
|
|
54
|
+
Start-Process -FilePath "msiexec.exe" -ArgumentList "/i msodbcsql.msi /qn /norestart IACCEPTMSODBCSQLLICENSETERMS=YES" -Wait
|
|
28
55
|
- uses: actions/setup-python@v4
|
|
29
56
|
with:
|
|
30
|
-
python-version:
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
uses: actions/cache@v3
|
|
34
|
-
id: cache
|
|
35
|
-
with:
|
|
36
|
-
path: ${{ env.pythonLocation }}
|
|
37
|
-
key: ${{ env.pythonLocation }}-${{ hashFiles('requirements.txt') }}
|
|
57
|
+
python-version: ${{ matrix.python-version }}
|
|
58
|
+
- name: install uv
|
|
59
|
+
uses: astral-sh/setup-uv@v3
|
|
38
60
|
- name: Install pip dependencies
|
|
39
|
-
if: steps.cache.outputs.cache-hit != 'true'
|
|
40
61
|
run: make deps-ci
|
|
41
|
-
- name: run tests
|
|
62
|
+
- name: run tests (macOS)
|
|
63
|
+
if: matrix.os == 'macos-13'
|
|
64
|
+
run: make test-ci
|
|
65
|
+
env:
|
|
66
|
+
TESTCONTAINERS_DOCKER_SOCKET_OVERRIDE: /var/run/docker.sock
|
|
67
|
+
DOCKER_HOST: unix:///Users/runner/.colima/docker.sock
|
|
68
|
+
- name: run tests (other OS)
|
|
69
|
+
if: matrix.os != 'macos-13'
|
|
42
70
|
run: make test-ci
|
|
43
71
|
- name: check the formatting
|
|
44
72
|
run: make lint-ci
|
|
@@ -12,10 +12,10 @@ deps:
|
|
|
12
12
|
uv pip install -r requirements-dev.txt
|
|
13
13
|
|
|
14
14
|
deps-ci:
|
|
15
|
-
pip install -r requirements-dev.txt
|
|
15
|
+
uv pip install --system -r requirements-dev.txt
|
|
16
16
|
|
|
17
17
|
test-ci:
|
|
18
|
-
pytest -rP -vv --tb=short --cov=ingestr --no-cov-on-fail
|
|
18
|
+
pytest -n auto -x -rP -vv --tb=short --cov=ingestr --no-cov-on-fail
|
|
19
19
|
|
|
20
20
|
test: venv
|
|
21
21
|
. venv/bin/activate; $(MAKE) test-ci
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.10.0rc1
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -13,35 +13,35 @@ Classifier: Operating System :: OS Independent
|
|
|
13
13
|
Classifier: Programming Language :: Python :: 3
|
|
14
14
|
Classifier: Topic :: Database
|
|
15
15
|
Requires-Python: >=3.9
|
|
16
|
-
Requires-Dist: confluent-kafka>=2.
|
|
16
|
+
Requires-Dist: confluent-kafka>=2.6.1
|
|
17
17
|
Requires-Dist: databricks-sql-connector==2.9.3
|
|
18
|
-
Requires-Dist: dlt==
|
|
19
|
-
Requires-Dist: duckdb-engine==0.
|
|
20
|
-
Requires-Dist: duckdb==
|
|
18
|
+
Requires-Dist: dlt==1.4.0
|
|
19
|
+
Requires-Dist: duckdb-engine==0.13.5
|
|
20
|
+
Requires-Dist: duckdb==1.1.3
|
|
21
21
|
Requires-Dist: facebook-business==20.0.0
|
|
22
22
|
Requires-Dist: google-api-python-client==2.130.0
|
|
23
23
|
Requires-Dist: google-cloud-bigquery-storage==2.24.0
|
|
24
|
-
Requires-Dist: mysql-connector-python==9.
|
|
24
|
+
Requires-Dist: mysql-connector-python==9.1.0
|
|
25
25
|
Requires-Dist: pendulum==3.0.0
|
|
26
|
-
Requires-Dist: psycopg2-binary==2.9.
|
|
27
|
-
Requires-Dist: py-machineid==0.
|
|
26
|
+
Requires-Dist: psycopg2-binary==2.9.10
|
|
27
|
+
Requires-Dist: py-machineid==0.6.0
|
|
28
28
|
Requires-Dist: pyairtable==2.3.3
|
|
29
|
-
Requires-Dist: pymongo==4.
|
|
30
|
-
Requires-Dist: pymysql==1.1.
|
|
31
|
-
Requires-Dist: pyrate-limiter==3.
|
|
32
|
-
Requires-Dist: redshift-connector==2.1.
|
|
33
|
-
Requires-Dist: rich==13.
|
|
34
|
-
Requires-Dist: rudder-sdk-python==2.1.
|
|
29
|
+
Requires-Dist: pymongo==4.10.1
|
|
30
|
+
Requires-Dist: pymysql==1.1.1
|
|
31
|
+
Requires-Dist: pyrate-limiter==3.7.0
|
|
32
|
+
Requires-Dist: redshift-connector==2.1.3
|
|
33
|
+
Requires-Dist: rich==13.9.4
|
|
34
|
+
Requires-Dist: rudder-sdk-python==2.1.4
|
|
35
35
|
Requires-Dist: s3fs==2024.9.0
|
|
36
|
-
Requires-Dist: snowflake-sqlalchemy==1.
|
|
37
|
-
Requires-Dist: sqlalchemy-bigquery==1.
|
|
36
|
+
Requires-Dist: snowflake-sqlalchemy==1.6.1
|
|
37
|
+
Requires-Dist: sqlalchemy-bigquery==1.12.0
|
|
38
38
|
Requires-Dist: sqlalchemy-hana==2.0.0
|
|
39
39
|
Requires-Dist: sqlalchemy-redshift==0.8.14
|
|
40
40
|
Requires-Dist: sqlalchemy2-stubs==0.0.2a38
|
|
41
41
|
Requires-Dist: sqlalchemy==1.4.52
|
|
42
42
|
Requires-Dist: stripe==10.7.0
|
|
43
|
-
Requires-Dist: tqdm==4.
|
|
44
|
-
Requires-Dist: typer==0.
|
|
43
|
+
Requires-Dist: tqdm==4.67.0
|
|
44
|
+
Requires-Dist: typer==0.13.1
|
|
45
45
|
Requires-Dist: types-requests==2.32.0.20240907
|
|
46
46
|
Provides-Extra: odbc
|
|
47
47
|
Requires-Dist: pyodbc==5.1.0; extra == 'odbc'
|
|
@@ -1,19 +1,13 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import tempfile
|
|
3
1
|
from datetime import datetime
|
|
4
2
|
from enum import Enum
|
|
5
3
|
from typing import Optional
|
|
6
4
|
|
|
7
|
-
import dlt
|
|
8
|
-
import humanize
|
|
9
5
|
import typer
|
|
10
|
-
from dlt.common.
|
|
11
|
-
from dlt.common.runtime.collector import Collector, LogCollector
|
|
6
|
+
from dlt.common.runtime.collector import Collector
|
|
12
7
|
from rich.console import Console
|
|
13
8
|
from rich.status import Status
|
|
14
9
|
from typing_extensions import Annotated
|
|
15
10
|
|
|
16
|
-
from ingestr.src.factory import SourceDestinationFactory
|
|
17
11
|
from ingestr.src.telemetry.event import track
|
|
18
12
|
|
|
19
13
|
app = typer.Typer(
|
|
@@ -118,6 +112,12 @@ class SchemaNaming(str, Enum):
|
|
|
118
112
|
direct = "direct"
|
|
119
113
|
|
|
120
114
|
|
|
115
|
+
class SqlReflectionLevel(str, Enum):
|
|
116
|
+
minimal = "minimal"
|
|
117
|
+
full = "full"
|
|
118
|
+
full_with_precision = "full_with_precision"
|
|
119
|
+
|
|
120
|
+
|
|
121
121
|
@app.command()
|
|
122
122
|
def ingest(
|
|
123
123
|
source_uri: Annotated[
|
|
@@ -181,6 +181,20 @@ def ingest(
|
|
|
181
181
|
envvar="PRIMARY_KEY",
|
|
182
182
|
),
|
|
183
183
|
] = None, # type: ignore
|
|
184
|
+
partition_by: Annotated[
|
|
185
|
+
Optional[str],
|
|
186
|
+
typer.Option(
|
|
187
|
+
help="The partition key to be used for partitioning the destination table",
|
|
188
|
+
envvar="PARTITION_BY",
|
|
189
|
+
),
|
|
190
|
+
] = None, # type: ignore
|
|
191
|
+
cluster_by: Annotated[
|
|
192
|
+
Optional[str],
|
|
193
|
+
typer.Option(
|
|
194
|
+
help="The clustering key to be used for clustering the destination table, not every destination supports clustering.",
|
|
195
|
+
envvar="CLUSTER_BY",
|
|
196
|
+
),
|
|
197
|
+
] = None, # type: ignore
|
|
184
198
|
yes: Annotated[
|
|
185
199
|
Optional[bool],
|
|
186
200
|
typer.Option(
|
|
@@ -251,7 +265,81 @@ def ingest(
|
|
|
251
265
|
envvar="EXTRACT_PARALLELISM",
|
|
252
266
|
),
|
|
253
267
|
] = 5, # type: ignore
|
|
268
|
+
sql_reflection_level: Annotated[
|
|
269
|
+
SqlReflectionLevel,
|
|
270
|
+
typer.Option(
|
|
271
|
+
help="The reflection level to use when reflecting the table schema from the source",
|
|
272
|
+
envvar="SQL_REFLECTION_LEVEL",
|
|
273
|
+
),
|
|
274
|
+
] = SqlReflectionLevel.full, # type: ignore
|
|
275
|
+
sql_limit: Annotated[
|
|
276
|
+
Optional[int],
|
|
277
|
+
typer.Option(
|
|
278
|
+
help="The limit to use when fetching data from the source",
|
|
279
|
+
envvar="SQL_LIMIT",
|
|
280
|
+
),
|
|
281
|
+
] = None, # type: ignore
|
|
254
282
|
):
|
|
283
|
+
import hashlib
|
|
284
|
+
import tempfile
|
|
285
|
+
from datetime import datetime
|
|
286
|
+
|
|
287
|
+
import dlt
|
|
288
|
+
import humanize
|
|
289
|
+
import typer
|
|
290
|
+
from dlt.common.destination import Destination
|
|
291
|
+
from dlt.common.pipeline import LoadInfo
|
|
292
|
+
from dlt.common.runtime.collector import Collector, LogCollector
|
|
293
|
+
from dlt.common.schema.typing import TColumnSchema
|
|
294
|
+
|
|
295
|
+
from ingestr.src.factory import SourceDestinationFactory
|
|
296
|
+
from ingestr.src.telemetry.event import track
|
|
297
|
+
|
|
298
|
+
def report_errors(run_info: LoadInfo):
|
|
299
|
+
for load_package in run_info.load_packages:
|
|
300
|
+
failed_jobs = load_package.jobs["failed_jobs"]
|
|
301
|
+
if len(failed_jobs) == 0:
|
|
302
|
+
continue
|
|
303
|
+
|
|
304
|
+
print()
|
|
305
|
+
print("[bold red]Failed jobs:[/bold red]")
|
|
306
|
+
print()
|
|
307
|
+
for job in failed_jobs:
|
|
308
|
+
print(f"[bold red] {job.job_file_info.job_id()}[/bold red]")
|
|
309
|
+
print(f" [bold yellow]Error:[/bold yellow] {job.failed_message}")
|
|
310
|
+
|
|
311
|
+
raise typer.Exit(1)
|
|
312
|
+
|
|
313
|
+
def validate_source_dest_tables(
|
|
314
|
+
source_table: str, dest_table: str
|
|
315
|
+
) -> tuple[str, str]:
|
|
316
|
+
if not dest_table:
|
|
317
|
+
if len(source_table.split(".")) != 2:
|
|
318
|
+
print(
|
|
319
|
+
"[red]Table name must be in the format schema.table for source table when dest-table is not given.[/red]"
|
|
320
|
+
)
|
|
321
|
+
raise typer.Abort()
|
|
322
|
+
|
|
323
|
+
print()
|
|
324
|
+
print(
|
|
325
|
+
"[yellow]Destination table is not given, defaulting to the source table.[/yellow]"
|
|
326
|
+
)
|
|
327
|
+
dest_table = source_table
|
|
328
|
+
return (source_table, dest_table)
|
|
329
|
+
|
|
330
|
+
def validate_loader_file_format(
|
|
331
|
+
dlt_dest: Destination, loader_file_format: Optional[LoaderFileFormat]
|
|
332
|
+
):
|
|
333
|
+
if (
|
|
334
|
+
loader_file_format
|
|
335
|
+
and loader_file_format.value
|
|
336
|
+
not in dlt_dest.capabilities().supported_loader_file_formats
|
|
337
|
+
):
|
|
338
|
+
print(
|
|
339
|
+
f"[red]Loader file format {loader_file_format.value} is not supported by the destination.[/red]"
|
|
340
|
+
)
|
|
341
|
+
raise typer.Abort()
|
|
342
|
+
|
|
255
343
|
track(
|
|
256
344
|
"command_triggered",
|
|
257
345
|
{
|
|
@@ -267,29 +355,23 @@ def ingest(
|
|
|
267
355
|
dlt.config["schema.naming"] = schema_naming.value
|
|
268
356
|
|
|
269
357
|
try:
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
"[red]Table name must be in the format schema.table for source table when dest-table is not given.[/red]"
|
|
274
|
-
)
|
|
275
|
-
raise typer.Abort()
|
|
276
|
-
|
|
277
|
-
print()
|
|
278
|
-
print(
|
|
279
|
-
"[yellow]Destination table is not given, defaulting to the source table.[/yellow]"
|
|
280
|
-
)
|
|
281
|
-
dest_table = source_table
|
|
358
|
+
(source_table, dest_table) = validate_source_dest_tables(
|
|
359
|
+
source_table, dest_table
|
|
360
|
+
)
|
|
282
361
|
|
|
283
362
|
factory = SourceDestinationFactory(source_uri, dest_uri)
|
|
284
363
|
source = factory.get_source()
|
|
285
364
|
destination = factory.get_destination()
|
|
286
365
|
|
|
366
|
+
column_hints: dict[str, TColumnSchema] = {}
|
|
287
367
|
original_incremental_strategy = incremental_strategy
|
|
288
368
|
|
|
289
369
|
merge_key = None
|
|
290
370
|
if incremental_strategy == IncrementalStrategy.delete_insert:
|
|
291
371
|
merge_key = incremental_key
|
|
292
372
|
incremental_strategy = IncrementalStrategy.merge
|
|
373
|
+
if incremental_key:
|
|
374
|
+
column_hints[incremental_key] = {"merge_key": True}
|
|
293
375
|
|
|
294
376
|
m = hashlib.sha256()
|
|
295
377
|
m.update(dest_table.encode("utf-8"))
|
|
@@ -303,11 +385,31 @@ def ingest(
|
|
|
303
385
|
pipelines_dir = tempfile.mkdtemp()
|
|
304
386
|
is_pipelines_dir_temp = True
|
|
305
387
|
|
|
388
|
+
dlt_dest = destination.dlt_dest(uri=dest_uri)
|
|
389
|
+
validate_loader_file_format(dlt_dest, loader_file_format)
|
|
390
|
+
|
|
391
|
+
if partition_by:
|
|
392
|
+
if partition_by not in column_hints:
|
|
393
|
+
column_hints[partition_by] = {}
|
|
394
|
+
|
|
395
|
+
column_hints[partition_by]["partition"] = True
|
|
396
|
+
|
|
397
|
+
if cluster_by:
|
|
398
|
+
if cluster_by not in column_hints:
|
|
399
|
+
column_hints[cluster_by] = {}
|
|
400
|
+
|
|
401
|
+
column_hints[cluster_by]["cluster"] = True
|
|
402
|
+
|
|
403
|
+
if primary_key:
|
|
404
|
+
for key in primary_key:
|
|
405
|
+
if key not in column_hints:
|
|
406
|
+
column_hints[key] = {}
|
|
407
|
+
|
|
408
|
+
column_hints[key]["primary_key"] = True
|
|
409
|
+
|
|
306
410
|
pipeline = dlt.pipeline(
|
|
307
411
|
pipeline_name=m.hexdigest(),
|
|
308
|
-
destination=
|
|
309
|
-
uri=dest_uri,
|
|
310
|
-
),
|
|
412
|
+
destination=dlt_dest,
|
|
311
413
|
progress=progressInstance,
|
|
312
414
|
pipelines_dir=pipelines_dir,
|
|
313
415
|
refresh="drop_resources" if full_refresh else None,
|
|
@@ -365,6 +467,8 @@ def ingest(
|
|
|
365
467
|
interval_end=interval_end,
|
|
366
468
|
sql_backend=sql_backend.value,
|
|
367
469
|
page_size=page_size,
|
|
470
|
+
sql_reflection_level=sql_reflection_level.value,
|
|
471
|
+
sql_limit=sql_limit,
|
|
368
472
|
)
|
|
369
473
|
|
|
370
474
|
if original_incremental_strategy == IncrementalStrategy.delete_insert:
|
|
@@ -400,29 +504,18 @@ def ingest(
|
|
|
400
504
|
loader_file_format=loader_file_format.value
|
|
401
505
|
if loader_file_format is not None
|
|
402
506
|
else None, # type: ignore
|
|
507
|
+
columns=column_hints,
|
|
403
508
|
)
|
|
404
509
|
|
|
405
|
-
|
|
406
|
-
failed_jobs = load_package.jobs["failed_jobs"]
|
|
407
|
-
if len(failed_jobs) > 0:
|
|
408
|
-
print()
|
|
409
|
-
print("[bold red]Failed jobs:[/bold red]")
|
|
410
|
-
print()
|
|
411
|
-
for job in failed_jobs:
|
|
412
|
-
print(f"[bold red] {job.job_file_info.job_id()}[/bold red]")
|
|
413
|
-
print(f" [bold yellow]Error:[/bold yellow] {job.failed_message}")
|
|
414
|
-
|
|
415
|
-
raise typer.Exit(1)
|
|
510
|
+
report_errors(run_info)
|
|
416
511
|
|
|
417
512
|
destination.post_load()
|
|
418
513
|
|
|
419
514
|
end_time = datetime.now()
|
|
420
515
|
elapsedHuman = ""
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
elapsedHuman = f"in {humanize.precisedelta(elapsed)}"
|
|
516
|
+
elapsed = end_time - start_time
|
|
517
|
+
elapsedHuman = f"in {humanize.precisedelta(elapsed)}"
|
|
424
518
|
|
|
425
|
-
# remove the pipelines_dir folder if it was created by ingestr
|
|
426
519
|
if is_pipelines_dir_temp:
|
|
427
520
|
import shutil
|
|
428
521
|
|
|
@@ -99,12 +99,12 @@ def gorgias_source(
|
|
|
99
99
|
"description": "When the user was last updated.",
|
|
100
100
|
},
|
|
101
101
|
"meta": {
|
|
102
|
-
"data_type": "
|
|
102
|
+
"data_type": "json",
|
|
103
103
|
"nullable": True,
|
|
104
104
|
"description": "Meta information associated with the user.",
|
|
105
105
|
},
|
|
106
106
|
"data": {
|
|
107
|
-
"data_type": "
|
|
107
|
+
"data_type": "json",
|
|
108
108
|
"nullable": True,
|
|
109
109
|
"description": "Additional data associated with the user.",
|
|
110
110
|
},
|
|
@@ -185,17 +185,17 @@ def gorgias_source(
|
|
|
185
185
|
"description": "Indicates if the ticket was created by an agent",
|
|
186
186
|
},
|
|
187
187
|
"customer": {
|
|
188
|
-
"data_type": "
|
|
188
|
+
"data_type": "json",
|
|
189
189
|
"nullable": False,
|
|
190
190
|
"description": "The customer linked to the ticket.",
|
|
191
191
|
},
|
|
192
192
|
"assignee_user": {
|
|
193
|
-
"data_type": "
|
|
193
|
+
"data_type": "json",
|
|
194
194
|
"nullable": True,
|
|
195
195
|
"description": "User assigned to the ticket",
|
|
196
196
|
},
|
|
197
197
|
"assignee_team": {
|
|
198
|
-
"data_type": "
|
|
198
|
+
"data_type": "json",
|
|
199
199
|
"nullable": True,
|
|
200
200
|
"description": "Team assigned to the ticket",
|
|
201
201
|
},
|
|
@@ -210,17 +210,17 @@ def gorgias_source(
|
|
|
210
210
|
"description": "Excerpt of the ticket",
|
|
211
211
|
},
|
|
212
212
|
"integrations": {
|
|
213
|
-
"data_type": "
|
|
213
|
+
"data_type": "json",
|
|
214
214
|
"nullable": False,
|
|
215
215
|
"description": "Integration information related to the ticket",
|
|
216
216
|
},
|
|
217
217
|
"meta": {
|
|
218
|
-
"data_type": "
|
|
218
|
+
"data_type": "json",
|
|
219
219
|
"nullable": True,
|
|
220
220
|
"description": "Meta information related to the ticket",
|
|
221
221
|
},
|
|
222
222
|
"tags": {
|
|
223
|
-
"data_type": "
|
|
223
|
+
"data_type": "json",
|
|
224
224
|
"nullable": False,
|
|
225
225
|
"description": "Tags associated with the ticket",
|
|
226
226
|
},
|
|
@@ -354,7 +354,7 @@ def gorgias_source(
|
|
|
354
354
|
"description": "How the message has been received, or sent from Gorgias.",
|
|
355
355
|
},
|
|
356
356
|
"sender": {
|
|
357
|
-
"data_type": "
|
|
357
|
+
"data_type": "json",
|
|
358
358
|
"nullable": False,
|
|
359
359
|
"description": "The person who sent the message. It can be a user or a customer.",
|
|
360
360
|
},
|
|
@@ -364,7 +364,7 @@ def gorgias_source(
|
|
|
364
364
|
"description": "ID of the integration that either received or sent the message.",
|
|
365
365
|
},
|
|
366
366
|
"intents": {
|
|
367
|
-
"data_type": "
|
|
367
|
+
"data_type": "json",
|
|
368
368
|
"nullable": True,
|
|
369
369
|
"description": "",
|
|
370
370
|
},
|
|
@@ -379,7 +379,7 @@ def gorgias_source(
|
|
|
379
379
|
"description": "Whether the message was sent by your company to a customer, or the opposite.",
|
|
380
380
|
},
|
|
381
381
|
"receiver": {
|
|
382
|
-
"data_type": "
|
|
382
|
+
"data_type": "json",
|
|
383
383
|
"nullable": True,
|
|
384
384
|
"description": "The primary receiver of the message. It can be a user or a customer. Optional when the source type is 'internal-note'.",
|
|
385
385
|
},
|
|
@@ -414,27 +414,27 @@ def gorgias_source(
|
|
|
414
414
|
"description": "",
|
|
415
415
|
},
|
|
416
416
|
"headers": {
|
|
417
|
-
"data_type": "
|
|
417
|
+
"data_type": "json",
|
|
418
418
|
"nullable": True,
|
|
419
419
|
"description": "Headers of the message",
|
|
420
420
|
},
|
|
421
421
|
"attachments": {
|
|
422
|
-
"data_type": "
|
|
422
|
+
"data_type": "json",
|
|
423
423
|
"nullable": True,
|
|
424
424
|
"description": "A list of files attached to the message.",
|
|
425
425
|
},
|
|
426
426
|
"actions": {
|
|
427
|
-
"data_type": "
|
|
427
|
+
"data_type": "json",
|
|
428
428
|
"nullable": True,
|
|
429
429
|
"description": "A list of actions performed on the message.",
|
|
430
430
|
},
|
|
431
431
|
"macros": {
|
|
432
|
-
"data_type": "
|
|
432
|
+
"data_type": "json",
|
|
433
433
|
"nullable": True,
|
|
434
434
|
"description": "A list of macros",
|
|
435
435
|
},
|
|
436
436
|
"meta": {
|
|
437
|
-
"data_type": "
|
|
437
|
+
"data_type": "json",
|
|
438
438
|
"nullable": True,
|
|
439
439
|
"description": "Message metadata",
|
|
440
440
|
},
|
|
@@ -526,7 +526,7 @@ def gorgias_source(
|
|
|
526
526
|
"description": "ID of the customer linked to the survey.",
|
|
527
527
|
},
|
|
528
528
|
"meta": {
|
|
529
|
-
"data_type": "
|
|
529
|
+
"data_type": "json",
|
|
530
530
|
"nullable": True,
|
|
531
531
|
"description": "Meta information associated with the survey.",
|
|
532
532
|
},
|