ingestr 0.10.2__tar.gz → 0.10.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

Files changed (171) hide show
  1. ingestr-0.10.4/.githooks/pre-commit-hook.sh +23 -0
  2. ingestr-0.10.4/.github/workflows/secrets-scan.yml +12 -0
  3. ingestr-0.10.4/.gitleaksignore +3 -0
  4. {ingestr-0.10.2 → ingestr-0.10.4}/Makefile +6 -2
  5. {ingestr-0.10.2 → ingestr-0.10.4}/PKG-INFO +18 -2
  6. {ingestr-0.10.2 → ingestr-0.10.4}/README.md +17 -1
  7. {ingestr-0.10.2 → ingestr-0.10.4}/docs/.vitepress/config.mjs +1 -1
  8. {ingestr-0.10.2 → ingestr-0.10.4}/docs/getting-started/quickstart.md +11 -1
  9. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/adjust.md +2 -2
  10. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/athena.md +1 -1
  11. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/gorgias.md +1 -1
  12. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/gsheets.md +1 -1
  13. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/kafka.md +1 -1
  14. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/klaviyo.md +2 -2
  15. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/notion.md +1 -1
  16. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/shopify.md +1 -1
  17. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/stripe.md +1 -1
  18. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/zendesk.md +2 -2
  19. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/main.py +10 -0
  20. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/adjust/__init__.py +4 -1
  21. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/chess/__init__.py +1 -1
  22. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/destinations.py +8 -1
  23. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/hubspot/__init__.py +1 -1
  24. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/sources.py +1 -1
  25. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/stripe_analytics/__init__.py +1 -1
  26. ingestr-0.10.4/ingestr/src/version.py +1 -0
  27. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/zendesk/__init__.py +0 -1
  28. ingestr-0.10.2/ingestr/src/version.py +0 -1
  29. {ingestr-0.10.2 → ingestr-0.10.4}/.dockerignore +0 -0
  30. {ingestr-0.10.2 → ingestr-0.10.4}/.github/workflows/deploy-docs.yml +0 -0
  31. {ingestr-0.10.2 → ingestr-0.10.4}/.github/workflows/tests.yml +0 -0
  32. {ingestr-0.10.2 → ingestr-0.10.4}/.gitignore +0 -0
  33. {ingestr-0.10.2 → ingestr-0.10.4}/.python-version +0 -0
  34. {ingestr-0.10.2 → ingestr-0.10.4}/.vale.ini +0 -0
  35. {ingestr-0.10.2 → ingestr-0.10.4}/Dockerfile +0 -0
  36. {ingestr-0.10.2 → ingestr-0.10.4}/LICENSE.md +0 -0
  37. {ingestr-0.10.2 → ingestr-0.10.4}/docs/.vitepress/theme/custom.css +0 -0
  38. {ingestr-0.10.2 → ingestr-0.10.4}/docs/.vitepress/theme/index.js +0 -0
  39. {ingestr-0.10.2 → ingestr-0.10.4}/docs/commands/example-uris.md +0 -0
  40. {ingestr-0.10.2 → ingestr-0.10.4}/docs/commands/ingest.md +0 -0
  41. {ingestr-0.10.2 → ingestr-0.10.4}/docs/getting-started/core-concepts.md +0 -0
  42. {ingestr-0.10.2 → ingestr-0.10.4}/docs/getting-started/incremental-loading.md +0 -0
  43. {ingestr-0.10.2 → ingestr-0.10.4}/docs/getting-started/telemetry.md +0 -0
  44. {ingestr-0.10.2 → ingestr-0.10.4}/docs/index.md +0 -0
  45. {ingestr-0.10.2 → ingestr-0.10.4}/docs/media/athena.png +0 -0
  46. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/airtable.md +0 -0
  47. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/appsflyer.md +0 -0
  48. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/bigquery.md +0 -0
  49. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/chess.md +0 -0
  50. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/csv.md +0 -0
  51. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/databricks.md +0 -0
  52. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/duckdb.md +0 -0
  53. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/facebook-ads.md +0 -0
  54. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/hubspot.md +0 -0
  55. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/mongodb.md +0 -0
  56. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/mssql.md +0 -0
  57. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/mysql.md +0 -0
  58. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/oracle.md +0 -0
  59. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/postgres.md +0 -0
  60. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/redshift.md +0 -0
  61. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/s3.md +0 -0
  62. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/sap-hana.md +0 -0
  63. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/slack.md +0 -0
  64. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/snowflake.md +0 -0
  65. {ingestr-0.10.2 → ingestr-0.10.4}/docs/supported-sources/sqlite.md +0 -0
  66. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/.gitignore +0 -0
  67. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/adjust/adjust_helpers.py +0 -0
  68. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/airtable/__init__.py +0 -0
  69. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/appsflyer/_init_.py +0 -0
  70. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/appsflyer/client.py +0 -0
  71. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/arrow/__init__.py +0 -0
  72. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/chess/helpers.py +0 -0
  73. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/chess/settings.py +0 -0
  74. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/facebook_ads/__init__.py +0 -0
  75. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/facebook_ads/exceptions.py +0 -0
  76. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/facebook_ads/helpers.py +0 -0
  77. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/facebook_ads/settings.py +0 -0
  78. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/factory.py +0 -0
  79. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/filesystem/__init__.py +0 -0
  80. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/filesystem/helpers.py +0 -0
  81. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/filesystem/readers.py +0 -0
  82. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/filters.py +0 -0
  83. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/google_sheets/README.md +0 -0
  84. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/google_sheets/__init__.py +0 -0
  85. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
  86. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
  87. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
  88. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/gorgias/__init__.py +0 -0
  89. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/gorgias/helpers.py +0 -0
  90. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/hubspot/helpers.py +0 -0
  91. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/hubspot/settings.py +0 -0
  92. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/kafka/__init__.py +0 -0
  93. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/kafka/helpers.py +0 -0
  94. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/klaviyo/_init_.py +0 -0
  95. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/klaviyo/client.py +0 -0
  96. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/klaviyo/helpers.py +0 -0
  97. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/mongodb/__init__.py +0 -0
  98. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/mongodb/helpers.py +0 -0
  99. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/notion/__init__.py +0 -0
  100. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/notion/helpers/__init__.py +0 -0
  101. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/notion/helpers/client.py +0 -0
  102. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/notion/helpers/database.py +0 -0
  103. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/notion/settings.py +0 -0
  104. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/shopify/__init__.py +0 -0
  105. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/shopify/exceptions.py +0 -0
  106. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/shopify/helpers.py +0 -0
  107. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/shopify/settings.py +0 -0
  108. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/slack/__init__.py +0 -0
  109. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/slack/helpers.py +0 -0
  110. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/slack/settings.py +0 -0
  111. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/stripe_analytics/helpers.py +0 -0
  112. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/stripe_analytics/settings.py +0 -0
  113. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/table_definition.py +0 -0
  114. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/telemetry/event.py +0 -0
  115. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/testdata/fakebqcredentials.json +0 -0
  116. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/zendesk/helpers/__init__.py +0 -0
  117. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
  118. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/zendesk/helpers/credentials.py +0 -0
  119. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
  120. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/src/zendesk/settings.py +0 -0
  121. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/testdata/.gitignore +0 -0
  122. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/testdata/create_replace.csv +0 -0
  123. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/testdata/delete_insert_expected.csv +0 -0
  124. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/testdata/delete_insert_part1.csv +0 -0
  125. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/testdata/delete_insert_part2.csv +0 -0
  126. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/testdata/merge_expected.csv +0 -0
  127. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/testdata/merge_part1.csv +0 -0
  128. {ingestr-0.10.2 → ingestr-0.10.4}/ingestr/testdata/merge_part2.csv +0 -0
  129. {ingestr-0.10.2 → ingestr-0.10.4}/package-lock.json +0 -0
  130. {ingestr-0.10.2 → ingestr-0.10.4}/package.json +0 -0
  131. {ingestr-0.10.2 → ingestr-0.10.4}/pyproject.toml +0 -0
  132. {ingestr-0.10.2 → ingestr-0.10.4}/requirements-dev.txt +0 -0
  133. {ingestr-0.10.2 → ingestr-0.10.4}/requirements.txt +0 -0
  134. {ingestr-0.10.2 → ingestr-0.10.4}/resources/demo.gif +0 -0
  135. {ingestr-0.10.2 → ingestr-0.10.4}/resources/demo.tape +0 -0
  136. {ingestr-0.10.2 → ingestr-0.10.4}/resources/ingestr.svg +0 -0
  137. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/AMPM.yml +0 -0
  138. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Acronyms.yml +0 -0
  139. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Colons.yml +0 -0
  140. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Contractions.yml +0 -0
  141. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/DateFormat.yml +0 -0
  142. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Ellipses.yml +0 -0
  143. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/EmDash.yml +0 -0
  144. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Exclamation.yml +0 -0
  145. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/FirstPerson.yml +0 -0
  146. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Gender.yml +0 -0
  147. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/GenderBias.yml +0 -0
  148. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/HeadingPunctuation.yml +0 -0
  149. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Headings.yml +0 -0
  150. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Latin.yml +0 -0
  151. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/LyHyphens.yml +0 -0
  152. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/OptionalPlurals.yml +0 -0
  153. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Ordinal.yml +0 -0
  154. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/OxfordComma.yml +0 -0
  155. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Parens.yml +0 -0
  156. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Passive.yml +0 -0
  157. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Periods.yml +0 -0
  158. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Quotes.yml +0 -0
  159. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Ranges.yml +0 -0
  160. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Semicolons.yml +0 -0
  161. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Slang.yml +0 -0
  162. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Spacing.yml +0 -0
  163. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Spelling.yml +0 -0
  164. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Units.yml +0 -0
  165. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/We.yml +0 -0
  166. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/Will.yml +0 -0
  167. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/WordList.yml +0 -0
  168. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/meta.json +0 -0
  169. {ingestr-0.10.2 → ingestr-0.10.4}/styles/Google/vocab.txt +0 -0
  170. {ingestr-0.10.2 → ingestr-0.10.4}/styles/bruin/Ingestr.yml +0 -0
  171. {ingestr-0.10.2 → ingestr-0.10.4}/styles/config/vocabularies/bruin/accept.txt +0 -0
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env bash
2
+
3
+ set -euo pipefail
4
+
5
+ echo "scanning for secrets ..."
6
+
7
+ WORK_DIR="/root/code"
8
+
9
+ secret_detected() {
10
+ echo "secrets detected in source code. commit aborted."
11
+ exit 1
12
+ }
13
+
14
+ # use gitleaks binary if available
15
+ # else fallback to using docker for running gitleaks
16
+ CMD="gitleaks dir -v"
17
+
18
+ if [[ ! `which gitleaks` ]]; then
19
+ which docker > /dev/null || (echo "gitleaks or docker is required for running secrets scan." && exit 1)
20
+ CMD="docker run -v $PWD:$WORK_DIR -w $WORK_DIR ghcr.io/gitleaks/gitleaks:latest dir -v"
21
+ fi
22
+
23
+ $CMD || secret_detected
@@ -0,0 +1,12 @@
1
+ name: secrets_scan
2
+ on: [pull_request, push, workflow_dispatch]
3
+ jobs:
4
+ scan:
5
+ name: gitleaks
6
+ runs-on: ubuntu-latest
7
+ steps:
8
+ - uses: actions/checkout@v3
9
+ with:
10
+ fetch-depth: 0
11
+ - name: scan for secrets (gitleaks)
12
+ run: docker run -v $PWD:/code -w /code ghcr.io/gitleaks/gitleaks:latest dir -v
@@ -0,0 +1,3 @@
1
+ ingestr/src/telemetry/event.py:generic-api-key:9
2
+ ingestr/src/testdata/fakebqcredentials.json:private-key:5
3
+ docs/supported-sources/shopify.md:generic-api-key:26
@@ -1,11 +1,11 @@
1
1
  .ONESHELL:
2
- .PHONY: test lint format ftl test-ci lint-ci build upload-release
2
+ .PHONY: test lint format test-ci lint-ci build upload-release setup
3
3
 
4
4
  venv: venv/touchfile
5
5
 
6
6
  venv/touchfile: requirements-dev.txt requirements.txt
7
7
  test -d venv || python3 -m venv venv
8
- . venv/bin/activate; $(MAKE) deps
8
+ . venv/bin/activate; pip install uv; $(MAKE) deps
9
9
  touch venv/touchfile
10
10
 
11
11
  deps:
@@ -40,3 +40,7 @@ build:
40
40
 
41
41
  upload-release:
42
42
  twine upload --verbose dist/*
43
+
44
+ setup:
45
+ @echo "installing git hooks ..."
46
+ @install -m 755 .githooks/pre-commit-hook.sh .git/hooks/pre-commit
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ingestr
3
- Version: 0.10.2
3
+ Version: 0.10.4
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -74,11 +74,20 @@ ingestr is a command-line app that allows you to ingest data from any source int
74
74
  ingestr takes away the complexity of managing any backend or writing any code for ingesting data, simply run the command and watch the data land on its destination.
75
75
 
76
76
  ## Installation
77
+ We recommend using [uv](https://github.com/astral-sh/uv) to run `ingestr`.
77
78
 
78
79
  ```
79
- pip install ingestr
80
+ pip install uv
81
+ uvx ingestr
80
82
  ```
81
83
 
84
+ Alternatively, if you'd like to install it globally:
85
+ ```
86
+ uv pip install --system ingestr
87
+ ```
88
+
89
+ While installation with vanilla `pip` is possible, it's an order of magnitude slower.
90
+
82
91
  ## Quickstart
83
92
 
84
93
  ```bash
@@ -104,6 +113,13 @@ You can see the full documentation [here](https://bruin-data.github.io/ingestr/g
104
113
 
105
114
  Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shared_invite/zt-2dl2i8foy-bVsuMUauHeN9M2laVm3ZVg).
106
115
 
116
+ ## Contributing
117
+
118
+ Pull requests are welcome. However, please open an issue first to discuss what you would like to change. We maybe able to offer you help and feedback regarding any changes you would like to make.
119
+
120
+ > [!NOTE]
121
+ > After cloning `ingestr` make sure to run `make setup` to install githooks.
122
+
107
123
  ## Supported sources & destinations
108
124
 
109
125
  <table>
@@ -21,11 +21,20 @@ ingestr is a command-line app that allows you to ingest data from any source int
21
21
  ingestr takes away the complexity of managing any backend or writing any code for ingesting data, simply run the command and watch the data land on its destination.
22
22
 
23
23
  ## Installation
24
+ We recommend using [uv](https://github.com/astral-sh/uv) to run `ingestr`.
24
25
 
25
26
  ```
26
- pip install ingestr
27
+ pip install uv
28
+ uvx ingestr
27
29
  ```
28
30
 
31
+ Alternatively, if you'd like to install it globally:
32
+ ```
33
+ uv pip install --system ingestr
34
+ ```
35
+
36
+ While installation with vanilla `pip` is possible, it's an order of magnitude slower.
37
+
29
38
  ## Quickstart
30
39
 
31
40
  ```bash
@@ -51,6 +60,13 @@ You can see the full documentation [here](https://bruin-data.github.io/ingestr/g
51
60
 
52
61
  Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shared_invite/zt-2dl2i8foy-bVsuMUauHeN9M2laVm3ZVg).
53
62
 
63
+ ## Contributing
64
+
65
+ Pull requests are welcome. However, please open an issue first to discuss what you would like to change. We maybe able to offer you help and feedback regarding any changes you would like to make.
66
+
67
+ > [!NOTE]
68
+ > After cloning `ingestr` make sure to run `make setup` to install githooks.
69
+
54
70
  ## Supported sources & destinations
55
71
 
56
72
  <table>
@@ -57,7 +57,7 @@ export default defineConfig({
57
57
  text: "Databases",
58
58
  collapsed: false,
59
59
  items: [
60
- { text: "Athena", link: "/supported-sources/athena.md" },
60
+ { text: "AWS Athena", link: "/supported-sources/athena.md" },
61
61
  { text: "AWS Redshift", link: "/supported-sources/redshift.md" },
62
62
  { text: "Databricks", link: "/supported-sources/databricks.md" },
63
63
  { text: "DuckDB", link: "/supported-sources/duckdb.md" },
@@ -13,9 +13,19 @@ ingestr takes away the complexity of managing any backend or writing any code fo
13
13
 
14
14
 
15
15
  ## Installation
16
+ We recommend using [uv](https://github.com/astral-sh/uv) to run `ingestr`.
17
+
18
+ ```
19
+ pip install uv
20
+ uvx ingestr
16
21
  ```
17
- pip install ingestr
22
+
23
+ Alternatively, if you'd like to install it globally:
18
24
  ```
25
+ uv pip install --system ingestr
26
+ ```
27
+
28
+ While installation with vanilla `pip` is possible, it's an order of magnitude slower.
19
29
 
20
30
  ## Quickstart
21
31
 
@@ -62,7 +62,7 @@ ingestr ingest \
62
62
  --source-uri 'adjust://?api_key=nr_123' \
63
63
  --source-table 'campaigns' \
64
64
  --dest-uri duckdb:///adjust.duckdb \
65
- --dest-table 'adjust.output'
65
+ --dest-table 'dest.output'
66
66
  ```
67
67
 
68
68
  Copy creatives data from Adjust into a DuckDB database:
@@ -71,7 +71,7 @@ ingestr ingest \
71
71
  --source-uri 'adjust://?api_key=nr_123' \
72
72
  --source-table 'creatives' \
73
73
  --dest-uri duckdb:///adjust.duckdb \
74
- --dest-table 'adjust.output'
74
+ --dest-table 'dest.output'
75
75
  ```
76
76
 
77
77
  Copy custom data from Adjust into a DuckDB database:
@@ -1,4 +1,4 @@
1
- # Athena
1
+ # AWS Athena
2
2
  [Athena](https://aws.amazon.com/athena/) is an interactive query service that allows users to analyze data directly in Amazon S3 using standard SQL.
3
3
 
4
4
  The Athena destination stores data as Parquet files in S3 buckets and creates external tables in AWS Glue Catalog.
@@ -23,7 +23,7 @@ The URI is used to connect to the Gorgias API for extracting data.
23
23
  ingestr ingest --source-table 'tickets' --source-uri $GORGIAS_URI --dest-uri $BIGQUERY_URI --interval-start 2024-06-19 --dest-table 'gorgias.ticket_messages' --loader-file-format jsonl
24
24
 
25
25
  # get all the customers and write them to `gorgias.customers` table on DuckDB
26
- ingestr ingest --source-table 'customers' --source-uri $GORGIAS_URI --dest-uri duckdb:///gorgias.duckdb --interval-start 2024-01-01 --dest-table 'gorgias.customers'
26
+ ingestr ingest --source-table 'customers' --source-uri $GORGIAS_URI --dest-uri duckdb:///gorgias.duckdb --interval-start 2024-01-01 --dest-table 'dest.customers'
27
27
  ```
28
28
 
29
29
  ## Supported entities
@@ -37,7 +37,7 @@ Once you complete the guide, you should have a service account JSON file and the
37
37
  Based on this assumption, here's a sample command that will copy the data from the Google Sheets spreadsheet into a DuckDB database:
38
38
 
39
39
  ```sh
40
- ingestr ingest --source-uri 'gsheets://?credentials_path=/path/to/file.json' --source-table 'fkdUQ2bjdNfUq2CA.Sheet1' --dest-uri duckdb:///gsheets.duckdb --dest-table 'gsheets.output'
40
+ ingestr ingest --source-uri 'gsheets://?credentials_path=/path/to/file.json' --source-table 'fkdUQ2bjdNfUq2CA.Sheet1' --dest-uri duckdb:///gsheets.duckdb --dest-table 'dest.output'
41
41
  ```
42
42
 
43
43
  The result of this command will be a table in the `gsheets.duckdb` database.
@@ -32,7 +32,7 @@ ingestr ingest \
32
32
  --source-uri 'kafka://?bootstrap_servers=localhost:9092&group_id=test_group' \
33
33
  --source-table 'my-topic' \
34
34
  --dest-uri duckdb:///kafka.duckdb \
35
- --dest-table 'kafka.my_topic'
35
+ --dest-table 'dest.my_topic'
36
36
  ```
37
37
 
38
38
  The result of this command will be a table in the `kafka.duckdb` database with JSON columns.
@@ -19,10 +19,10 @@ URI parameters:
19
19
  The URI is used to connect to the Klaviyo API for extracting data.
20
20
 
21
21
  ```bash
22
- ingestr ingest --source-table 'events' --source-uri 'klaviyo://?api_key=pk_test' --dest-uri duckdb:///klaviyo.duckdb --interval-start 2022-01-01 --dest-table 'klaviyo.events' --extract-parallelism 20
22
+ ingestr ingest --source-table 'events' --source-uri 'klaviyo://?api_key=pk_test' --dest-uri duckdb:///klaviyo.duckdb --interval-start 2022-01-01 --dest-table 'dest.events' --extract-parallelism 20
23
23
  ```
24
24
 
25
- This command fetches all the events that are created/updated since 2022-01-01 and writes them to `klaviyo.events` table on DuckDB, using 20 parallel threads to improve performance and efficiently handle large data .
25
+ This command fetches all the events that are created/updated since 2022-01-01 and writes them to `dest.events` table on DuckDB, using 20 parallel threads to improve performance and efficiently handle large data .
26
26
 
27
27
  ## Tables
28
28
 
@@ -22,7 +22,7 @@ Notion requires a few steps to set up an integration, please follow the guide dl
22
22
  Once you complete the guide, you should have an API key, and the table ID to connect to. Let's say your API token is `secret_12345` and the database you'd like to connect to is `bfeaafc0c25f40a9asdasd672a9456f3`, here's a sample command that will copy the data from the Notion table into a DuckDB database:
23
23
 
24
24
  ```sh
25
- ingestr ingest --source-uri 'notion://?api_key=secret_12345' --source-table 'bfeaafc0c25f40a9asdasd672a9456f3' --dest-uri duckdb:///notion.duckdb --dest-table 'notion.output'
25
+ ingestr ingest --source-uri 'notion://?api_key=secret_12345' --source-table 'bfeaafc0c25f40a9asdasd672a9456f3' --dest-uri duckdb:///notion.duckdb --dest-table 'dest.output'
26
26
  ```
27
27
 
28
28
  The result of this command will be a table in the `notion.duckdb` database with JSON columns.
@@ -23,7 +23,7 @@ Shopify requires a few steps to set up an integration, please follow the guide d
23
23
  Once you complete the guide, you should have an API key and the store name to connect to. Let's say your API key is `shpkey_12345` and the store you'd like to connect to is `my-store`, here's a sample command that will copy the data from the Shopify store into a DuckDB database:
24
24
 
25
25
  ```sh
26
- ingestr ingest --source-uri 'shopify://my-store.myshopify.com?api_key=shpkey_12345' --source-table 'orders' --dest-uri duckdb:///shopify.duckdb --dest-table 'shopify.orders'
26
+ ingestr ingest --source-uri 'shopify://my-store.myshopify.com?api_key=shpkey_12345' --source-table 'orders' --dest-uri duckdb:///shopify.duckdb --dest-table 'dest.orders'
27
27
  ```
28
28
 
29
29
  The result of this command will be a table in the `shopify.duckdb` database with JSON columns.
@@ -25,7 +25,7 @@ Stripe requires a few steps to set up an integration, please follow the guide dl
25
25
  Once you complete the guide, you should have an API key. Let's say your API key is `sk_test_12345`, here's a sample command that will copy the data from Stripe into a DuckDB database:
26
26
 
27
27
  ```sh
28
- ingestr ingest --source-uri 'stripe://?api_key=sk_test_12345' --source-table 'charges' --dest-uri duckdb:///stripe.duckdb --dest-table 'stripe.charges'
28
+ ingestr ingest --source-uri 'stripe://?api_key=sk_test_12345' --source-table 'charges' --dest-uri duckdb:///stripe.duckdb --dest-table 'dest.charges'
29
29
  ```
30
30
 
31
31
  The result of this command will be a table in the `stripe.duckdb` database with JSON columns.
@@ -39,7 +39,7 @@ Once you complete the guide, if you decide to use an OAuth token, you should hav
39
39
  ingestr ingest --source-uri "zendesk://:qVsbdiasVt@mycompany" \
40
40
  --source-table 'tickets' \
41
41
  --dest-uri 'duckdb:///zendesk.duckdb' \
42
- --dest-table 'zendesk.tickets' \
42
+ --dest-table 'dest.tickets' \
43
43
  --interval-start '2024-01-01'
44
44
  ```
45
45
 
@@ -49,7 +49,7 @@ If you decide to use an API Token, you should have a subdomain, email, and API t
49
49
  ingestr ingest --source-uri "zendesk://john@get.com:nbs123@mycompany" \
50
50
  --source-table 'tickets' \
51
51
  --dest-uri 'duckdb:///zendesk.duckdb' \
52
- --dest-table 'zendesk.tickets' \
52
+ --dest-table 'dest.tickets' \
53
53
  --interval-start '2024-01-01'
54
54
  ```
55
55
 
@@ -288,6 +288,7 @@ def ingest(
288
288
  ),
289
289
  ] = [], # type: ignore
290
290
  ):
291
+ # TODO(turtledev): can't we move this to the top of this file?
291
292
  import hashlib
292
293
  import tempfile
293
294
  from datetime import datetime
@@ -383,6 +384,15 @@ def ingest(
383
384
  )
384
385
 
385
386
  factory = SourceDestinationFactory(source_uri, dest_uri)
387
+ track(
388
+ "command_running",
389
+ {
390
+ "command": "ingest",
391
+ "source_type": factory.source_scheme,
392
+ "destination_type": factory.destination_scheme,
393
+ },
394
+ )
395
+
386
396
  source = factory.get_source()
387
397
  destination = factory.get_destination()
388
398
 
@@ -67,13 +67,16 @@ def adjust_source(
67
67
  filters=filters,
68
68
  )
69
69
 
70
+ if not dimensions:
71
+ return campaigns, creatives
72
+
70
73
  merge_key = merge_key
74
+ type_hints = {}
71
75
  for dimension in REQUIRED_CUSTOM_DIMENSIONS:
72
76
  if dimension in dimensions:
73
77
  merge_key = dimension
74
78
  break
75
79
 
76
- type_hints = {}
77
80
  for dimension in dimensions:
78
81
  if dimension in KNOWN_TYPE_HINTS:
79
82
  type_hints[dimension] = KNOWN_TYPE_HINTS[dimension]
@@ -12,7 +12,7 @@ from .helpers import get_path_with_retry, get_url_with_retry, validate_month_str
12
12
  from .settings import UNOFFICIAL_CHESS_API_URL
13
13
 
14
14
 
15
- @dlt.source(name="chess")
15
+ @dlt.source(name="chess", max_table_nesting=0)
16
16
  def source(
17
17
  players: List[str], start_month: str = None, end_month: str = None
18
18
  ) -> Sequence[DltResource]:
@@ -250,7 +250,14 @@ class AthenaDestination:
250
250
  )
251
251
 
252
252
  def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
253
- return {}
253
+ table_fields = table.split(".")
254
+ if len(table_fields) != 2:
255
+ raise ValueError("Table name must be in the format <schema>.<table>")
256
+ return {
257
+ "table_format": "iceberg",
258
+ "dataset_name": table_fields[-2],
259
+ "table_name": table_fields[-1],
260
+ }
254
261
 
255
262
  def post_load(self):
256
263
  pass
@@ -50,7 +50,7 @@ from .settings import (
50
50
  THubspotObjectType = Literal["company", "contact", "deal", "ticket", "product", "quote"]
51
51
 
52
52
 
53
- @dlt.source(name="hubspot")
53
+ @dlt.source(name="hubspot", max_table_nesting=0)
54
54
  def hubspot(
55
55
  api_key: str = dlt.secrets.value,
56
56
  include_history: bool = False,
@@ -622,7 +622,7 @@ class HubspotSource:
622
622
 
623
623
  class AirtableSource:
624
624
  def handles_incrementality(self) -> bool:
625
- return True
625
+ return False
626
626
 
627
627
  # airtable://?access_token=<access_token>&base_id=<base_id>
628
628
 
@@ -11,7 +11,7 @@ from .helpers import pagination, transform_date
11
11
  from .settings import ENDPOINTS, INCREMENTAL_ENDPOINTS
12
12
 
13
13
 
14
- @dlt.source
14
+ @dlt.source(max_table_nesting=0)
15
15
  def stripe_source(
16
16
  endpoints: Tuple[str, ...] = ENDPOINTS,
17
17
  stripe_secret_key: str = dlt.secrets.value,
@@ -0,0 +1 @@
1
+ __version__ = "0.10.4"
@@ -23,7 +23,6 @@ from .settings import (
23
23
  TALK_ENDPOINTS,
24
24
  )
25
25
 
26
-
27
26
  @dlt.source(max_table_nesting=0)
28
27
  def zendesk_talk(
29
28
  credentials: TZendeskCredentials = dlt.secrets.value,
@@ -1 +0,0 @@
1
- __version__ = "0.10.2"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes