ingestr 0.9.5__tar.gz → 0.10.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

Files changed (172) hide show
  1. {ingestr-0.9.5 → ingestr-0.10.0rc1}/.github/workflows/tests.yml +39 -11
  2. {ingestr-0.9.5 → ingestr-0.10.0rc1}/.gitignore +3 -0
  3. {ingestr-0.9.5 → ingestr-0.10.0rc1}/Makefile +2 -2
  4. {ingestr-0.9.5 → ingestr-0.10.0rc1}/PKG-INFO +18 -18
  5. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/main.py +130 -37
  6. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/gorgias/__init__.py +17 -17
  7. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/shopify/__init__.py +42 -42
  8. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/slack/__init__.py +2 -2
  9. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/sources.py +13 -3
  10. ingestr-0.10.0rc1/ingestr/src/version.py +1 -0
  11. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/zendesk/__init__.py +2 -2
  12. ingestr-0.10.0rc1/requirements-dev.txt +12 -0
  13. ingestr-0.10.0rc1/requirements.txt +31 -0
  14. ingestr-0.9.5/ingestr/src/sql_database/__init__.py +0 -206
  15. ingestr-0.9.5/ingestr/src/sql_database/arrow_helpers.py +0 -139
  16. ingestr-0.9.5/ingestr/src/sql_database/helpers.py +0 -282
  17. ingestr-0.9.5/ingestr/src/sql_database/override.py +0 -10
  18. ingestr-0.9.5/ingestr/src/sql_database/schema_types.py +0 -139
  19. ingestr-0.9.5/ingestr/src/version.py +0 -1
  20. ingestr-0.9.5/requirements-dev.txt +0 -10
  21. ingestr-0.9.5/requirements.txt +0 -31
  22. {ingestr-0.9.5 → ingestr-0.10.0rc1}/.dockerignore +0 -0
  23. {ingestr-0.9.5 → ingestr-0.10.0rc1}/.github/workflows/deploy-docs.yml +0 -0
  24. {ingestr-0.9.5 → ingestr-0.10.0rc1}/.python-version +0 -0
  25. {ingestr-0.9.5 → ingestr-0.10.0rc1}/.vale.ini +0 -0
  26. {ingestr-0.9.5 → ingestr-0.10.0rc1}/Dockerfile +0 -0
  27. {ingestr-0.9.5 → ingestr-0.10.0rc1}/LICENSE.md +0 -0
  28. {ingestr-0.9.5 → ingestr-0.10.0rc1}/README.md +0 -0
  29. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/.vitepress/config.mjs +0 -0
  30. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/.vitepress/theme/custom.css +0 -0
  31. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/.vitepress/theme/index.js +0 -0
  32. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/commands/example-uris.md +0 -0
  33. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/commands/ingest.md +0 -0
  34. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/getting-started/core-concepts.md +0 -0
  35. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/getting-started/incremental-loading.md +0 -0
  36. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/getting-started/quickstart.md +0 -0
  37. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/getting-started/telemetry.md +0 -0
  38. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/index.md +0 -0
  39. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/adjust.md +0 -0
  40. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/airtable.md +0 -0
  41. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/appsflyer.md +0 -0
  42. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/bigquery.md +0 -0
  43. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/chess.md +0 -0
  44. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/csv.md +0 -0
  45. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/databricks.md +0 -0
  46. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/duckdb.md +0 -0
  47. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/facebook-ads.md +0 -0
  48. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/gorgias.md +0 -0
  49. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/gsheets.md +0 -0
  50. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/hubspot.md +0 -0
  51. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/kafka.md +0 -0
  52. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/klaviyo.md +0 -0
  53. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/mongodb.md +0 -0
  54. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/mssql.md +0 -0
  55. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/mysql.md +0 -0
  56. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/notion.md +0 -0
  57. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/oracle.md +0 -0
  58. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/postgres.md +0 -0
  59. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/redshift.md +0 -0
  60. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/s3.md +0 -0
  61. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/sap-hana.md +0 -0
  62. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/shopify.md +0 -0
  63. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/slack.md +0 -0
  64. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/snowflake.md +0 -0
  65. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/sqlite.md +0 -0
  66. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/stripe.md +0 -0
  67. {ingestr-0.9.5 → ingestr-0.10.0rc1}/docs/supported-sources/zendesk.md +0 -0
  68. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/.gitignore +0 -0
  69. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/adjust/__init__.py +0 -0
  70. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/adjust/adjust_helpers.py +0 -0
  71. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/airtable/__init__.py +0 -0
  72. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/appsflyer/_init_.py +0 -0
  73. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/appsflyer/client.py +0 -0
  74. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/arrow/__init__.py +0 -0
  75. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/chess/__init__.py +0 -0
  76. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/chess/helpers.py +0 -0
  77. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/chess/settings.py +0 -0
  78. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/destinations.py +0 -0
  79. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/facebook_ads/__init__.py +0 -0
  80. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/facebook_ads/exceptions.py +0 -0
  81. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/facebook_ads/helpers.py +0 -0
  82. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/facebook_ads/settings.py +0 -0
  83. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/factory.py +0 -0
  84. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/filesystem/__init__.py +0 -0
  85. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/filesystem/helpers.py +0 -0
  86. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/filesystem/readers.py +0 -0
  87. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/google_sheets/README.md +0 -0
  88. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/google_sheets/__init__.py +0 -0
  89. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
  90. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
  91. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
  92. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/gorgias/helpers.py +0 -0
  93. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/hubspot/__init__.py +0 -0
  94. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/hubspot/helpers.py +0 -0
  95. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/hubspot/settings.py +0 -0
  96. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/kafka/__init__.py +0 -0
  97. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/kafka/helpers.py +0 -0
  98. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/klaviyo/_init_.py +0 -0
  99. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/klaviyo/client.py +0 -0
  100. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/klaviyo/helpers.py +0 -0
  101. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/mongodb/__init__.py +0 -0
  102. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/mongodb/helpers.py +0 -0
  103. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/notion/__init__.py +0 -0
  104. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/notion/helpers/__init__.py +0 -0
  105. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/notion/helpers/client.py +0 -0
  106. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/notion/helpers/database.py +0 -0
  107. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/notion/settings.py +0 -0
  108. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/shopify/exceptions.py +0 -0
  109. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/shopify/helpers.py +0 -0
  110. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/shopify/settings.py +0 -0
  111. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/slack/helpers.py +0 -0
  112. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/slack/settings.py +0 -0
  113. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/stripe_analytics/__init__.py +0 -0
  114. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/stripe_analytics/helpers.py +0 -0
  115. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/stripe_analytics/settings.py +0 -0
  116. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/table_definition.py +0 -0
  117. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/telemetry/event.py +0 -0
  118. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/testdata/fakebqcredentials.json +0 -0
  119. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/zendesk/helpers/__init__.py +0 -0
  120. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
  121. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/zendesk/helpers/credentials.py +0 -0
  122. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
  123. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/src/zendesk/settings.py +0 -0
  124. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/testdata/.gitignore +0 -0
  125. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/testdata/create_replace.csv +0 -0
  126. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/testdata/delete_insert_expected.csv +0 -0
  127. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/testdata/delete_insert_part1.csv +0 -0
  128. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/testdata/delete_insert_part2.csv +0 -0
  129. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/testdata/merge_expected.csv +0 -0
  130. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/testdata/merge_part1.csv +0 -0
  131. {ingestr-0.9.5 → ingestr-0.10.0rc1}/ingestr/testdata/merge_part2.csv +0 -0
  132. {ingestr-0.9.5 → ingestr-0.10.0rc1}/package-lock.json +0 -0
  133. {ingestr-0.9.5 → ingestr-0.10.0rc1}/package.json +0 -0
  134. {ingestr-0.9.5 → ingestr-0.10.0rc1}/pyproject.toml +0 -0
  135. {ingestr-0.9.5 → ingestr-0.10.0rc1}/resources/demo.gif +0 -0
  136. {ingestr-0.9.5 → ingestr-0.10.0rc1}/resources/demo.tape +0 -0
  137. {ingestr-0.9.5 → ingestr-0.10.0rc1}/resources/ingestr.svg +0 -0
  138. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/AMPM.yml +0 -0
  139. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Acronyms.yml +0 -0
  140. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Colons.yml +0 -0
  141. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Contractions.yml +0 -0
  142. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/DateFormat.yml +0 -0
  143. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Ellipses.yml +0 -0
  144. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/EmDash.yml +0 -0
  145. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Exclamation.yml +0 -0
  146. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/FirstPerson.yml +0 -0
  147. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Gender.yml +0 -0
  148. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/GenderBias.yml +0 -0
  149. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/HeadingPunctuation.yml +0 -0
  150. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Headings.yml +0 -0
  151. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Latin.yml +0 -0
  152. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/LyHyphens.yml +0 -0
  153. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/OptionalPlurals.yml +0 -0
  154. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Ordinal.yml +0 -0
  155. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/OxfordComma.yml +0 -0
  156. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Parens.yml +0 -0
  157. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Passive.yml +0 -0
  158. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Periods.yml +0 -0
  159. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Quotes.yml +0 -0
  160. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Ranges.yml +0 -0
  161. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Semicolons.yml +0 -0
  162. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Slang.yml +0 -0
  163. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Spacing.yml +0 -0
  164. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Spelling.yml +0 -0
  165. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Units.yml +0 -0
  166. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/We.yml +0 -0
  167. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/Will.yml +0 -0
  168. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/WordList.yml +0 -0
  169. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/meta.json +0 -0
  170. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/Google/vocab.txt +0 -0
  171. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/bruin/Ingestr.yml +0 -0
  172. {ingestr-0.9.5 → ingestr-0.10.0rc1}/styles/config/vocabularies/bruin/accept.txt +0 -0
@@ -20,25 +20,53 @@ env:
20
20
 
21
21
  jobs:
22
22
  tests:
23
- runs-on: ubuntu-latest
23
+ strategy:
24
+ matrix:
25
+ # I tried running stuff on macOS but it was too slow and unreliable.
26
+ # I also tried windows runners but couldn't get Docker to work there, so I gave up.
27
+ os: [ubuntu-latest]
28
+ python-version: ['3.10', '3.11', '3.12']
29
+ runs-on: ${{ matrix.os }}
24
30
  steps:
25
31
  - uses: actions/checkout@v4
26
32
  - name: install Microsoft ODBC
33
+ if: matrix.os == 'ubuntu-latest'
27
34
  run: sudo ACCEPT_EULA=Y apt-get install msodbcsql18 -y
35
+ - name: install Microsoft ODBC
36
+ if: matrix.os == 'macos-13'
37
+ run: |
38
+ brew tap microsoft/mssql-release https://github.com/Microsoft/homebrew-mssql-release
39
+ brew update
40
+ HOMEBREW_ACCEPT_EULA=Y brew install msodbcsql18
41
+ - name: Install Docker on macOS
42
+ if: matrix.os == 'macos-13'
43
+ run: |
44
+ brew install docker
45
+ brew install docker-compose
46
+ brew install colima
47
+ colima start
48
+ # Wait for Docker daemon to be ready
49
+ while ! docker system info > /dev/null 2>&1; do sleep 1; done
50
+ - name: install Microsoft ODBC
51
+ if: matrix.os == 'windows-latest'
52
+ run: |
53
+ Invoke-WebRequest -Uri https://go.microsoft.com/fwlink/?linkid=2249006 -OutFile msodbcsql.msi
54
+ Start-Process -FilePath "msiexec.exe" -ArgumentList "/i msodbcsql.msi /qn /norestart IACCEPTMSODBCSQLLICENSETERMS=YES" -Wait
28
55
  - uses: actions/setup-python@v4
29
56
  with:
30
- python-version: '3.11'
31
- cache: 'pip'
32
- - name: Cache dependencies
33
- uses: actions/cache@v3
34
- id: cache
35
- with:
36
- path: ${{ env.pythonLocation }}
37
- key: ${{ env.pythonLocation }}-${{ hashFiles('requirements.txt') }}
57
+ python-version: ${{ matrix.python-version }}
58
+ - name: install uv
59
+ uses: astral-sh/setup-uv@v3
38
60
  - name: Install pip dependencies
39
- if: steps.cache.outputs.cache-hit != 'true'
40
61
  run: make deps-ci
41
- - name: run tests
62
+ - name: run tests (macOS)
63
+ if: matrix.os == 'macos-13'
64
+ run: make test-ci
65
+ env:
66
+ TESTCONTAINERS_DOCKER_SOCKET_OVERRIDE: /var/run/docker.sock
67
+ DOCKER_HOST: unix:///Users/runner/.colima/docker.sock
68
+ - name: run tests (other OS)
69
+ if: matrix.os != 'macos-13'
42
70
  run: make test-ci
43
71
  - name: check the formatting
44
72
  run: make lint-ci
@@ -5,6 +5,7 @@ venv
5
5
  !.gitkeep
6
6
  .vscode/*
7
7
  .metals/*
8
+ .coverage.*
8
9
  .coverage
9
10
  .ruff_cache
10
11
  .pytest_cache
@@ -15,4 +16,6 @@ docs/.vitepress/dist
15
16
  docs/.vitepress/cache
16
17
  node_modules
17
18
  *.duckdb
19
+ *.duckdb.wal
18
20
  *.db
21
+ *.db.wal
@@ -12,10 +12,10 @@ deps:
12
12
  uv pip install -r requirements-dev.txt
13
13
 
14
14
  deps-ci:
15
- pip install -r requirements-dev.txt
15
+ uv pip install --system -r requirements-dev.txt
16
16
 
17
17
  test-ci:
18
- pytest -rP -vv --tb=short --cov=ingestr --no-cov-on-fail
18
+ pytest -n auto -x -rP -vv --tb=short --cov=ingestr --no-cov-on-fail
19
19
 
20
20
  test: venv
21
21
  . venv/bin/activate; $(MAKE) test-ci
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ingestr
3
- Version: 0.9.5
3
+ Version: 0.10.0rc1
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -13,35 +13,35 @@ Classifier: Operating System :: OS Independent
13
13
  Classifier: Programming Language :: Python :: 3
14
14
  Classifier: Topic :: Database
15
15
  Requires-Python: >=3.9
16
- Requires-Dist: confluent-kafka>=2.3.0
16
+ Requires-Dist: confluent-kafka>=2.6.1
17
17
  Requires-Dist: databricks-sql-connector==2.9.3
18
- Requires-Dist: dlt==0.5.1
19
- Requires-Dist: duckdb-engine==0.11.5
20
- Requires-Dist: duckdb==0.10.2
18
+ Requires-Dist: dlt==1.4.0
19
+ Requires-Dist: duckdb-engine==0.13.5
20
+ Requires-Dist: duckdb==1.1.3
21
21
  Requires-Dist: facebook-business==20.0.0
22
22
  Requires-Dist: google-api-python-client==2.130.0
23
23
  Requires-Dist: google-cloud-bigquery-storage==2.24.0
24
- Requires-Dist: mysql-connector-python==9.0.0
24
+ Requires-Dist: mysql-connector-python==9.1.0
25
25
  Requires-Dist: pendulum==3.0.0
26
- Requires-Dist: psycopg2-binary==2.9.9
27
- Requires-Dist: py-machineid==0.5.1
26
+ Requires-Dist: psycopg2-binary==2.9.10
27
+ Requires-Dist: py-machineid==0.6.0
28
28
  Requires-Dist: pyairtable==2.3.3
29
- Requires-Dist: pymongo==4.6.3
30
- Requires-Dist: pymysql==1.1.0
31
- Requires-Dist: pyrate-limiter==3.6.1
32
- Requires-Dist: redshift-connector==2.1.0
33
- Requires-Dist: rich==13.7.1
34
- Requires-Dist: rudder-sdk-python==2.1.0
29
+ Requires-Dist: pymongo==4.10.1
30
+ Requires-Dist: pymysql==1.1.1
31
+ Requires-Dist: pyrate-limiter==3.7.0
32
+ Requires-Dist: redshift-connector==2.1.3
33
+ Requires-Dist: rich==13.9.4
34
+ Requires-Dist: rudder-sdk-python==2.1.4
35
35
  Requires-Dist: s3fs==2024.9.0
36
- Requires-Dist: snowflake-sqlalchemy==1.5.3
37
- Requires-Dist: sqlalchemy-bigquery==1.11.0
36
+ Requires-Dist: snowflake-sqlalchemy==1.6.1
37
+ Requires-Dist: sqlalchemy-bigquery==1.12.0
38
38
  Requires-Dist: sqlalchemy-hana==2.0.0
39
39
  Requires-Dist: sqlalchemy-redshift==0.8.14
40
40
  Requires-Dist: sqlalchemy2-stubs==0.0.2a38
41
41
  Requires-Dist: sqlalchemy==1.4.52
42
42
  Requires-Dist: stripe==10.7.0
43
- Requires-Dist: tqdm==4.66.2
44
- Requires-Dist: typer==0.12.3
43
+ Requires-Dist: tqdm==4.67.0
44
+ Requires-Dist: typer==0.13.1
45
45
  Requires-Dist: types-requests==2.32.0.20240907
46
46
  Provides-Extra: odbc
47
47
  Requires-Dist: pyodbc==5.1.0; extra == 'odbc'
@@ -1,19 +1,13 @@
1
- import hashlib
2
- import tempfile
3
1
  from datetime import datetime
4
2
  from enum import Enum
5
3
  from typing import Optional
6
4
 
7
- import dlt
8
- import humanize
9
5
  import typer
10
- from dlt.common.pipeline import LoadInfo
11
- from dlt.common.runtime.collector import Collector, LogCollector
6
+ from dlt.common.runtime.collector import Collector
12
7
  from rich.console import Console
13
8
  from rich.status import Status
14
9
  from typing_extensions import Annotated
15
10
 
16
- from ingestr.src.factory import SourceDestinationFactory
17
11
  from ingestr.src.telemetry.event import track
18
12
 
19
13
  app = typer.Typer(
@@ -118,6 +112,12 @@ class SchemaNaming(str, Enum):
118
112
  direct = "direct"
119
113
 
120
114
 
115
+ class SqlReflectionLevel(str, Enum):
116
+ minimal = "minimal"
117
+ full = "full"
118
+ full_with_precision = "full_with_precision"
119
+
120
+
121
121
  @app.command()
122
122
  def ingest(
123
123
  source_uri: Annotated[
@@ -181,6 +181,20 @@ def ingest(
181
181
  envvar="PRIMARY_KEY",
182
182
  ),
183
183
  ] = None, # type: ignore
184
+ partition_by: Annotated[
185
+ Optional[str],
186
+ typer.Option(
187
+ help="The partition key to be used for partitioning the destination table",
188
+ envvar="PARTITION_BY",
189
+ ),
190
+ ] = None, # type: ignore
191
+ cluster_by: Annotated[
192
+ Optional[str],
193
+ typer.Option(
194
+ help="The clustering key to be used for clustering the destination table, not every destination supports clustering.",
195
+ envvar="CLUSTER_BY",
196
+ ),
197
+ ] = None, # type: ignore
184
198
  yes: Annotated[
185
199
  Optional[bool],
186
200
  typer.Option(
@@ -251,7 +265,81 @@ def ingest(
251
265
  envvar="EXTRACT_PARALLELISM",
252
266
  ),
253
267
  ] = 5, # type: ignore
268
+ sql_reflection_level: Annotated[
269
+ SqlReflectionLevel,
270
+ typer.Option(
271
+ help="The reflection level to use when reflecting the table schema from the source",
272
+ envvar="SQL_REFLECTION_LEVEL",
273
+ ),
274
+ ] = SqlReflectionLevel.full, # type: ignore
275
+ sql_limit: Annotated[
276
+ Optional[int],
277
+ typer.Option(
278
+ help="The limit to use when fetching data from the source",
279
+ envvar="SQL_LIMIT",
280
+ ),
281
+ ] = None, # type: ignore
254
282
  ):
283
+ import hashlib
284
+ import tempfile
285
+ from datetime import datetime
286
+
287
+ import dlt
288
+ import humanize
289
+ import typer
290
+ from dlt.common.destination import Destination
291
+ from dlt.common.pipeline import LoadInfo
292
+ from dlt.common.runtime.collector import Collector, LogCollector
293
+ from dlt.common.schema.typing import TColumnSchema
294
+
295
+ from ingestr.src.factory import SourceDestinationFactory
296
+ from ingestr.src.telemetry.event import track
297
+
298
+ def report_errors(run_info: LoadInfo):
299
+ for load_package in run_info.load_packages:
300
+ failed_jobs = load_package.jobs["failed_jobs"]
301
+ if len(failed_jobs) == 0:
302
+ continue
303
+
304
+ print()
305
+ print("[bold red]Failed jobs:[/bold red]")
306
+ print()
307
+ for job in failed_jobs:
308
+ print(f"[bold red] {job.job_file_info.job_id()}[/bold red]")
309
+ print(f" [bold yellow]Error:[/bold yellow] {job.failed_message}")
310
+
311
+ raise typer.Exit(1)
312
+
313
+ def validate_source_dest_tables(
314
+ source_table: str, dest_table: str
315
+ ) -> tuple[str, str]:
316
+ if not dest_table:
317
+ if len(source_table.split(".")) != 2:
318
+ print(
319
+ "[red]Table name must be in the format schema.table for source table when dest-table is not given.[/red]"
320
+ )
321
+ raise typer.Abort()
322
+
323
+ print()
324
+ print(
325
+ "[yellow]Destination table is not given, defaulting to the source table.[/yellow]"
326
+ )
327
+ dest_table = source_table
328
+ return (source_table, dest_table)
329
+
330
+ def validate_loader_file_format(
331
+ dlt_dest: Destination, loader_file_format: Optional[LoaderFileFormat]
332
+ ):
333
+ if (
334
+ loader_file_format
335
+ and loader_file_format.value
336
+ not in dlt_dest.capabilities().supported_loader_file_formats
337
+ ):
338
+ print(
339
+ f"[red]Loader file format {loader_file_format.value} is not supported by the destination.[/red]"
340
+ )
341
+ raise typer.Abort()
342
+
255
343
  track(
256
344
  "command_triggered",
257
345
  {
@@ -267,29 +355,23 @@ def ingest(
267
355
  dlt.config["schema.naming"] = schema_naming.value
268
356
 
269
357
  try:
270
- if not dest_table:
271
- if len(source_table.split(".")) != 2:
272
- print(
273
- "[red]Table name must be in the format schema.table for source table when dest-table is not given.[/red]"
274
- )
275
- raise typer.Abort()
276
-
277
- print()
278
- print(
279
- "[yellow]Destination table is not given, defaulting to the source table.[/yellow]"
280
- )
281
- dest_table = source_table
358
+ (source_table, dest_table) = validate_source_dest_tables(
359
+ source_table, dest_table
360
+ )
282
361
 
283
362
  factory = SourceDestinationFactory(source_uri, dest_uri)
284
363
  source = factory.get_source()
285
364
  destination = factory.get_destination()
286
365
 
366
+ column_hints: dict[str, TColumnSchema] = {}
287
367
  original_incremental_strategy = incremental_strategy
288
368
 
289
369
  merge_key = None
290
370
  if incremental_strategy == IncrementalStrategy.delete_insert:
291
371
  merge_key = incremental_key
292
372
  incremental_strategy = IncrementalStrategy.merge
373
+ if incremental_key:
374
+ column_hints[incremental_key] = {"merge_key": True}
293
375
 
294
376
  m = hashlib.sha256()
295
377
  m.update(dest_table.encode("utf-8"))
@@ -303,11 +385,31 @@ def ingest(
303
385
  pipelines_dir = tempfile.mkdtemp()
304
386
  is_pipelines_dir_temp = True
305
387
 
388
+ dlt_dest = destination.dlt_dest(uri=dest_uri)
389
+ validate_loader_file_format(dlt_dest, loader_file_format)
390
+
391
+ if partition_by:
392
+ if partition_by not in column_hints:
393
+ column_hints[partition_by] = {}
394
+
395
+ column_hints[partition_by]["partition"] = True
396
+
397
+ if cluster_by:
398
+ if cluster_by not in column_hints:
399
+ column_hints[cluster_by] = {}
400
+
401
+ column_hints[cluster_by]["cluster"] = True
402
+
403
+ if primary_key:
404
+ for key in primary_key:
405
+ if key not in column_hints:
406
+ column_hints[key] = {}
407
+
408
+ column_hints[key]["primary_key"] = True
409
+
306
410
  pipeline = dlt.pipeline(
307
411
  pipeline_name=m.hexdigest(),
308
- destination=destination.dlt_dest(
309
- uri=dest_uri,
310
- ),
412
+ destination=dlt_dest,
311
413
  progress=progressInstance,
312
414
  pipelines_dir=pipelines_dir,
313
415
  refresh="drop_resources" if full_refresh else None,
@@ -365,6 +467,8 @@ def ingest(
365
467
  interval_end=interval_end,
366
468
  sql_backend=sql_backend.value,
367
469
  page_size=page_size,
470
+ sql_reflection_level=sql_reflection_level.value,
471
+ sql_limit=sql_limit,
368
472
  )
369
473
 
370
474
  if original_incremental_strategy == IncrementalStrategy.delete_insert:
@@ -400,29 +504,18 @@ def ingest(
400
504
  loader_file_format=loader_file_format.value
401
505
  if loader_file_format is not None
402
506
  else None, # type: ignore
507
+ columns=column_hints,
403
508
  )
404
509
 
405
- for load_package in run_info.load_packages:
406
- failed_jobs = load_package.jobs["failed_jobs"]
407
- if len(failed_jobs) > 0:
408
- print()
409
- print("[bold red]Failed jobs:[/bold red]")
410
- print()
411
- for job in failed_jobs:
412
- print(f"[bold red] {job.job_file_info.job_id()}[/bold red]")
413
- print(f" [bold yellow]Error:[/bold yellow] {job.failed_message}")
414
-
415
- raise typer.Exit(1)
510
+ report_errors(run_info)
416
511
 
417
512
  destination.post_load()
418
513
 
419
514
  end_time = datetime.now()
420
515
  elapsedHuman = ""
421
- if run_info.started_at:
422
- elapsed = end_time - start_time
423
- elapsedHuman = f"in {humanize.precisedelta(elapsed)}"
516
+ elapsed = end_time - start_time
517
+ elapsedHuman = f"in {humanize.precisedelta(elapsed)}"
424
518
 
425
- # remove the pipelines_dir folder if it was created by ingestr
426
519
  if is_pipelines_dir_temp:
427
520
  import shutil
428
521
 
@@ -99,12 +99,12 @@ def gorgias_source(
99
99
  "description": "When the user was last updated.",
100
100
  },
101
101
  "meta": {
102
- "data_type": "complex",
102
+ "data_type": "json",
103
103
  "nullable": True,
104
104
  "description": "Meta information associated with the user.",
105
105
  },
106
106
  "data": {
107
- "data_type": "complex",
107
+ "data_type": "json",
108
108
  "nullable": True,
109
109
  "description": "Additional data associated with the user.",
110
110
  },
@@ -185,17 +185,17 @@ def gorgias_source(
185
185
  "description": "Indicates if the ticket was created by an agent",
186
186
  },
187
187
  "customer": {
188
- "data_type": "complex",
188
+ "data_type": "json",
189
189
  "nullable": False,
190
190
  "description": "The customer linked to the ticket.",
191
191
  },
192
192
  "assignee_user": {
193
- "data_type": "complex",
193
+ "data_type": "json",
194
194
  "nullable": True,
195
195
  "description": "User assigned to the ticket",
196
196
  },
197
197
  "assignee_team": {
198
- "data_type": "complex",
198
+ "data_type": "json",
199
199
  "nullable": True,
200
200
  "description": "Team assigned to the ticket",
201
201
  },
@@ -210,17 +210,17 @@ def gorgias_source(
210
210
  "description": "Excerpt of the ticket",
211
211
  },
212
212
  "integrations": {
213
- "data_type": "complex",
213
+ "data_type": "json",
214
214
  "nullable": False,
215
215
  "description": "Integration information related to the ticket",
216
216
  },
217
217
  "meta": {
218
- "data_type": "complex",
218
+ "data_type": "json",
219
219
  "nullable": True,
220
220
  "description": "Meta information related to the ticket",
221
221
  },
222
222
  "tags": {
223
- "data_type": "complex",
223
+ "data_type": "json",
224
224
  "nullable": False,
225
225
  "description": "Tags associated with the ticket",
226
226
  },
@@ -354,7 +354,7 @@ def gorgias_source(
354
354
  "description": "How the message has been received, or sent from Gorgias.",
355
355
  },
356
356
  "sender": {
357
- "data_type": "complex",
357
+ "data_type": "json",
358
358
  "nullable": False,
359
359
  "description": "The person who sent the message. It can be a user or a customer.",
360
360
  },
@@ -364,7 +364,7 @@ def gorgias_source(
364
364
  "description": "ID of the integration that either received or sent the message.",
365
365
  },
366
366
  "intents": {
367
- "data_type": "complex",
367
+ "data_type": "json",
368
368
  "nullable": True,
369
369
  "description": "",
370
370
  },
@@ -379,7 +379,7 @@ def gorgias_source(
379
379
  "description": "Whether the message was sent by your company to a customer, or the opposite.",
380
380
  },
381
381
  "receiver": {
382
- "data_type": "complex",
382
+ "data_type": "json",
383
383
  "nullable": True,
384
384
  "description": "The primary receiver of the message. It can be a user or a customer. Optional when the source type is 'internal-note'.",
385
385
  },
@@ -414,27 +414,27 @@ def gorgias_source(
414
414
  "description": "",
415
415
  },
416
416
  "headers": {
417
- "data_type": "complex",
417
+ "data_type": "json",
418
418
  "nullable": True,
419
419
  "description": "Headers of the message",
420
420
  },
421
421
  "attachments": {
422
- "data_type": "complex",
422
+ "data_type": "json",
423
423
  "nullable": True,
424
424
  "description": "A list of files attached to the message.",
425
425
  },
426
426
  "actions": {
427
- "data_type": "complex",
427
+ "data_type": "json",
428
428
  "nullable": True,
429
429
  "description": "A list of actions performed on the message.",
430
430
  },
431
431
  "macros": {
432
- "data_type": "complex",
432
+ "data_type": "json",
433
433
  "nullable": True,
434
434
  "description": "A list of macros",
435
435
  },
436
436
  "meta": {
437
- "data_type": "complex",
437
+ "data_type": "json",
438
438
  "nullable": True,
439
439
  "description": "Message metadata",
440
440
  },
@@ -526,7 +526,7 @@ def gorgias_source(
526
526
  "description": "ID of the customer linked to the survey.",
527
527
  },
528
528
  "meta": {
529
- "data_type": "complex",
529
+ "data_type": "json",
530
530
  "nullable": True,
531
531
  "description": "Meta information associated with the survey.",
532
532
  },