ingestr 0.9.5__tar.gz → 0.10.0rc0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

Files changed (172) hide show
  1. {ingestr-0.9.5 → ingestr-0.10.0rc0}/.github/workflows/tests.yml +39 -11
  2. {ingestr-0.9.5 → ingestr-0.10.0rc0}/.gitignore +3 -0
  3. {ingestr-0.9.5 → ingestr-0.10.0rc0}/Makefile +2 -2
  4. {ingestr-0.9.5 → ingestr-0.10.0rc0}/PKG-INFO +18 -18
  5. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/main.py +108 -36
  6. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/gorgias/__init__.py +17 -17
  7. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/shopify/__init__.py +42 -42
  8. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/slack/__init__.py +2 -2
  9. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/sources.py +3 -3
  10. ingestr-0.10.0rc0/ingestr/src/version.py +1 -0
  11. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/zendesk/__init__.py +2 -2
  12. ingestr-0.10.0rc0/requirements-dev.txt +12 -0
  13. ingestr-0.10.0rc0/requirements.txt +31 -0
  14. ingestr-0.9.5/ingestr/src/sql_database/__init__.py +0 -206
  15. ingestr-0.9.5/ingestr/src/sql_database/arrow_helpers.py +0 -139
  16. ingestr-0.9.5/ingestr/src/sql_database/helpers.py +0 -282
  17. ingestr-0.9.5/ingestr/src/sql_database/override.py +0 -10
  18. ingestr-0.9.5/ingestr/src/sql_database/schema_types.py +0 -139
  19. ingestr-0.9.5/ingestr/src/version.py +0 -1
  20. ingestr-0.9.5/requirements-dev.txt +0 -10
  21. ingestr-0.9.5/requirements.txt +0 -31
  22. {ingestr-0.9.5 → ingestr-0.10.0rc0}/.dockerignore +0 -0
  23. {ingestr-0.9.5 → ingestr-0.10.0rc0}/.github/workflows/deploy-docs.yml +0 -0
  24. {ingestr-0.9.5 → ingestr-0.10.0rc0}/.python-version +0 -0
  25. {ingestr-0.9.5 → ingestr-0.10.0rc0}/.vale.ini +0 -0
  26. {ingestr-0.9.5 → ingestr-0.10.0rc0}/Dockerfile +0 -0
  27. {ingestr-0.9.5 → ingestr-0.10.0rc0}/LICENSE.md +0 -0
  28. {ingestr-0.9.5 → ingestr-0.10.0rc0}/README.md +0 -0
  29. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/.vitepress/config.mjs +0 -0
  30. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/.vitepress/theme/custom.css +0 -0
  31. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/.vitepress/theme/index.js +0 -0
  32. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/commands/example-uris.md +0 -0
  33. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/commands/ingest.md +0 -0
  34. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/getting-started/core-concepts.md +0 -0
  35. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/getting-started/incremental-loading.md +0 -0
  36. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/getting-started/quickstart.md +0 -0
  37. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/getting-started/telemetry.md +0 -0
  38. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/index.md +0 -0
  39. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/adjust.md +0 -0
  40. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/airtable.md +0 -0
  41. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/appsflyer.md +0 -0
  42. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/bigquery.md +0 -0
  43. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/chess.md +0 -0
  44. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/csv.md +0 -0
  45. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/databricks.md +0 -0
  46. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/duckdb.md +0 -0
  47. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/facebook-ads.md +0 -0
  48. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/gorgias.md +0 -0
  49. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/gsheets.md +0 -0
  50. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/hubspot.md +0 -0
  51. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/kafka.md +0 -0
  52. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/klaviyo.md +0 -0
  53. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/mongodb.md +0 -0
  54. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/mssql.md +0 -0
  55. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/mysql.md +0 -0
  56. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/notion.md +0 -0
  57. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/oracle.md +0 -0
  58. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/postgres.md +0 -0
  59. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/redshift.md +0 -0
  60. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/s3.md +0 -0
  61. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/sap-hana.md +0 -0
  62. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/shopify.md +0 -0
  63. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/slack.md +0 -0
  64. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/snowflake.md +0 -0
  65. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/sqlite.md +0 -0
  66. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/stripe.md +0 -0
  67. {ingestr-0.9.5 → ingestr-0.10.0rc0}/docs/supported-sources/zendesk.md +0 -0
  68. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/.gitignore +0 -0
  69. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/adjust/__init__.py +0 -0
  70. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/adjust/adjust_helpers.py +0 -0
  71. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/airtable/__init__.py +0 -0
  72. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/appsflyer/_init_.py +0 -0
  73. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/appsflyer/client.py +0 -0
  74. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/arrow/__init__.py +0 -0
  75. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/chess/__init__.py +0 -0
  76. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/chess/helpers.py +0 -0
  77. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/chess/settings.py +0 -0
  78. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/destinations.py +0 -0
  79. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/facebook_ads/__init__.py +0 -0
  80. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/facebook_ads/exceptions.py +0 -0
  81. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/facebook_ads/helpers.py +0 -0
  82. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/facebook_ads/settings.py +0 -0
  83. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/factory.py +0 -0
  84. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/filesystem/__init__.py +0 -0
  85. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/filesystem/helpers.py +0 -0
  86. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/filesystem/readers.py +0 -0
  87. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/google_sheets/README.md +0 -0
  88. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/google_sheets/__init__.py +0 -0
  89. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
  90. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
  91. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
  92. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/gorgias/helpers.py +0 -0
  93. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/hubspot/__init__.py +0 -0
  94. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/hubspot/helpers.py +0 -0
  95. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/hubspot/settings.py +0 -0
  96. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/kafka/__init__.py +0 -0
  97. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/kafka/helpers.py +0 -0
  98. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/klaviyo/_init_.py +0 -0
  99. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/klaviyo/client.py +0 -0
  100. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/klaviyo/helpers.py +0 -0
  101. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/mongodb/__init__.py +0 -0
  102. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/mongodb/helpers.py +0 -0
  103. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/notion/__init__.py +0 -0
  104. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/notion/helpers/__init__.py +0 -0
  105. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/notion/helpers/client.py +0 -0
  106. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/notion/helpers/database.py +0 -0
  107. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/notion/settings.py +0 -0
  108. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/shopify/exceptions.py +0 -0
  109. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/shopify/helpers.py +0 -0
  110. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/shopify/settings.py +0 -0
  111. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/slack/helpers.py +0 -0
  112. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/slack/settings.py +0 -0
  113. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/stripe_analytics/__init__.py +0 -0
  114. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/stripe_analytics/helpers.py +0 -0
  115. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/stripe_analytics/settings.py +0 -0
  116. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/table_definition.py +0 -0
  117. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/telemetry/event.py +0 -0
  118. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/testdata/fakebqcredentials.json +0 -0
  119. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/zendesk/helpers/__init__.py +0 -0
  120. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
  121. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/zendesk/helpers/credentials.py +0 -0
  122. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
  123. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/src/zendesk/settings.py +0 -0
  124. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/testdata/.gitignore +0 -0
  125. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/testdata/create_replace.csv +0 -0
  126. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/testdata/delete_insert_expected.csv +0 -0
  127. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/testdata/delete_insert_part1.csv +0 -0
  128. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/testdata/delete_insert_part2.csv +0 -0
  129. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/testdata/merge_expected.csv +0 -0
  130. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/testdata/merge_part1.csv +0 -0
  131. {ingestr-0.9.5 → ingestr-0.10.0rc0}/ingestr/testdata/merge_part2.csv +0 -0
  132. {ingestr-0.9.5 → ingestr-0.10.0rc0}/package-lock.json +0 -0
  133. {ingestr-0.9.5 → ingestr-0.10.0rc0}/package.json +0 -0
  134. {ingestr-0.9.5 → ingestr-0.10.0rc0}/pyproject.toml +0 -0
  135. {ingestr-0.9.5 → ingestr-0.10.0rc0}/resources/demo.gif +0 -0
  136. {ingestr-0.9.5 → ingestr-0.10.0rc0}/resources/demo.tape +0 -0
  137. {ingestr-0.9.5 → ingestr-0.10.0rc0}/resources/ingestr.svg +0 -0
  138. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/AMPM.yml +0 -0
  139. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/Acronyms.yml +0 -0
  140. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/Colons.yml +0 -0
  141. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/Contractions.yml +0 -0
  142. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/DateFormat.yml +0 -0
  143. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/Ellipses.yml +0 -0
  144. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/EmDash.yml +0 -0
  145. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/Exclamation.yml +0 -0
  146. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/FirstPerson.yml +0 -0
  147. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/Gender.yml +0 -0
  148. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/GenderBias.yml +0 -0
  149. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/HeadingPunctuation.yml +0 -0
  150. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/Headings.yml +0 -0
  151. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/Latin.yml +0 -0
  152. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/LyHyphens.yml +0 -0
  153. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/OptionalPlurals.yml +0 -0
  154. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/Ordinal.yml +0 -0
  155. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/OxfordComma.yml +0 -0
  156. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/Parens.yml +0 -0
  157. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/Passive.yml +0 -0
  158. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/Periods.yml +0 -0
  159. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/Quotes.yml +0 -0
  160. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/Ranges.yml +0 -0
  161. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/Semicolons.yml +0 -0
  162. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/Slang.yml +0 -0
  163. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/Spacing.yml +0 -0
  164. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/Spelling.yml +0 -0
  165. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/Units.yml +0 -0
  166. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/We.yml +0 -0
  167. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/Will.yml +0 -0
  168. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/WordList.yml +0 -0
  169. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/meta.json +0 -0
  170. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/Google/vocab.txt +0 -0
  171. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/bruin/Ingestr.yml +0 -0
  172. {ingestr-0.9.5 → ingestr-0.10.0rc0}/styles/config/vocabularies/bruin/accept.txt +0 -0
@@ -20,25 +20,53 @@ env:
20
20
 
21
21
  jobs:
22
22
  tests:
23
- runs-on: ubuntu-latest
23
+ strategy:
24
+ matrix:
25
+ # I tried running stuff on macOS but it was too slow and unreliable.
26
+ # I also tried windows runners but couldn't get Docker to work there, so I gave up.
27
+ os: [ubuntu-latest]
28
+ python-version: ['3.10', '3.11', '3.12']
29
+ runs-on: ${{ matrix.os }}
24
30
  steps:
25
31
  - uses: actions/checkout@v4
26
32
  - name: install Microsoft ODBC
33
+ if: matrix.os == 'ubuntu-latest'
27
34
  run: sudo ACCEPT_EULA=Y apt-get install msodbcsql18 -y
35
+ - name: install Microsoft ODBC
36
+ if: matrix.os == 'macos-13'
37
+ run: |
38
+ brew tap microsoft/mssql-release https://github.com/Microsoft/homebrew-mssql-release
39
+ brew update
40
+ HOMEBREW_ACCEPT_EULA=Y brew install msodbcsql18
41
+ - name: Install Docker on macOS
42
+ if: matrix.os == 'macos-13'
43
+ run: |
44
+ brew install docker
45
+ brew install docker-compose
46
+ brew install colima
47
+ colima start
48
+ # Wait for Docker daemon to be ready
49
+ while ! docker system info > /dev/null 2>&1; do sleep 1; done
50
+ - name: install Microsoft ODBC
51
+ if: matrix.os == 'windows-latest'
52
+ run: |
53
+ Invoke-WebRequest -Uri https://go.microsoft.com/fwlink/?linkid=2249006 -OutFile msodbcsql.msi
54
+ Start-Process -FilePath "msiexec.exe" -ArgumentList "/i msodbcsql.msi /qn /norestart IACCEPTMSODBCSQLLICENSETERMS=YES" -Wait
28
55
  - uses: actions/setup-python@v4
29
56
  with:
30
- python-version: '3.11'
31
- cache: 'pip'
32
- - name: Cache dependencies
33
- uses: actions/cache@v3
34
- id: cache
35
- with:
36
- path: ${{ env.pythonLocation }}
37
- key: ${{ env.pythonLocation }}-${{ hashFiles('requirements.txt') }}
57
+ python-version: ${{ matrix.python-version }}
58
+ - name: install uv
59
+ uses: astral-sh/setup-uv@v3
38
60
  - name: Install pip dependencies
39
- if: steps.cache.outputs.cache-hit != 'true'
40
61
  run: make deps-ci
41
- - name: run tests
62
+ - name: run tests (macOS)
63
+ if: matrix.os == 'macos-13'
64
+ run: make test-ci
65
+ env:
66
+ TESTCONTAINERS_DOCKER_SOCKET_OVERRIDE: /var/run/docker.sock
67
+ DOCKER_HOST: unix:///Users/runner/.colima/docker.sock
68
+ - name: run tests (other OS)
69
+ if: matrix.os != 'macos-13'
42
70
  run: make test-ci
43
71
  - name: check the formatting
44
72
  run: make lint-ci
@@ -5,6 +5,7 @@ venv
5
5
  !.gitkeep
6
6
  .vscode/*
7
7
  .metals/*
8
+ .coverage.*
8
9
  .coverage
9
10
  .ruff_cache
10
11
  .pytest_cache
@@ -15,4 +16,6 @@ docs/.vitepress/dist
15
16
  docs/.vitepress/cache
16
17
  node_modules
17
18
  *.duckdb
19
+ *.duckdb.wal
18
20
  *.db
21
+ *.db.wal
@@ -12,10 +12,10 @@ deps:
12
12
  uv pip install -r requirements-dev.txt
13
13
 
14
14
  deps-ci:
15
- pip install -r requirements-dev.txt
15
+ uv pip install --system -r requirements-dev.txt
16
16
 
17
17
  test-ci:
18
- pytest -rP -vv --tb=short --cov=ingestr --no-cov-on-fail
18
+ pytest -n logical -x -rP -vv --tb=short --cov=ingestr --no-cov-on-fail
19
19
 
20
20
  test: venv
21
21
  . venv/bin/activate; $(MAKE) test-ci
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ingestr
3
- Version: 0.9.5
3
+ Version: 0.10.0rc0
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -13,35 +13,35 @@ Classifier: Operating System :: OS Independent
13
13
  Classifier: Programming Language :: Python :: 3
14
14
  Classifier: Topic :: Database
15
15
  Requires-Python: >=3.9
16
- Requires-Dist: confluent-kafka>=2.3.0
16
+ Requires-Dist: confluent-kafka>=2.6.1
17
17
  Requires-Dist: databricks-sql-connector==2.9.3
18
- Requires-Dist: dlt==0.5.1
19
- Requires-Dist: duckdb-engine==0.11.5
20
- Requires-Dist: duckdb==0.10.2
18
+ Requires-Dist: dlt==1.4.0
19
+ Requires-Dist: duckdb-engine==0.13.5
20
+ Requires-Dist: duckdb==1.1.3
21
21
  Requires-Dist: facebook-business==20.0.0
22
22
  Requires-Dist: google-api-python-client==2.130.0
23
23
  Requires-Dist: google-cloud-bigquery-storage==2.24.0
24
- Requires-Dist: mysql-connector-python==9.0.0
24
+ Requires-Dist: mysql-connector-python==9.1.0
25
25
  Requires-Dist: pendulum==3.0.0
26
- Requires-Dist: psycopg2-binary==2.9.9
27
- Requires-Dist: py-machineid==0.5.1
26
+ Requires-Dist: psycopg2-binary==2.9.10
27
+ Requires-Dist: py-machineid==0.6.0
28
28
  Requires-Dist: pyairtable==2.3.3
29
- Requires-Dist: pymongo==4.6.3
30
- Requires-Dist: pymysql==1.1.0
31
- Requires-Dist: pyrate-limiter==3.6.1
32
- Requires-Dist: redshift-connector==2.1.0
33
- Requires-Dist: rich==13.7.1
34
- Requires-Dist: rudder-sdk-python==2.1.0
29
+ Requires-Dist: pymongo==4.10.1
30
+ Requires-Dist: pymysql==1.1.1
31
+ Requires-Dist: pyrate-limiter==3.7.0
32
+ Requires-Dist: redshift-connector==2.1.3
33
+ Requires-Dist: rich==13.9.4
34
+ Requires-Dist: rudder-sdk-python==2.1.4
35
35
  Requires-Dist: s3fs==2024.9.0
36
- Requires-Dist: snowflake-sqlalchemy==1.5.3
37
- Requires-Dist: sqlalchemy-bigquery==1.11.0
36
+ Requires-Dist: snowflake-sqlalchemy==1.6.1
37
+ Requires-Dist: sqlalchemy-bigquery==1.12.0
38
38
  Requires-Dist: sqlalchemy-hana==2.0.0
39
39
  Requires-Dist: sqlalchemy-redshift==0.8.14
40
40
  Requires-Dist: sqlalchemy2-stubs==0.0.2a38
41
41
  Requires-Dist: sqlalchemy==1.4.52
42
42
  Requires-Dist: stripe==10.7.0
43
- Requires-Dist: tqdm==4.66.2
44
- Requires-Dist: typer==0.12.3
43
+ Requires-Dist: tqdm==4.67.0
44
+ Requires-Dist: typer==0.13.1
45
45
  Requires-Dist: types-requests==2.32.0.20240907
46
46
  Provides-Extra: odbc
47
47
  Requires-Dist: pyodbc==5.1.0; extra == 'odbc'
@@ -1,19 +1,13 @@
1
- import hashlib
2
- import tempfile
3
1
  from datetime import datetime
4
2
  from enum import Enum
5
3
  from typing import Optional
6
4
 
7
- import dlt
8
- import humanize
9
5
  import typer
10
- from dlt.common.pipeline import LoadInfo
11
- from dlt.common.runtime.collector import Collector, LogCollector
6
+ from dlt.common.runtime.collector import Collector
12
7
  from rich.console import Console
13
8
  from rich.status import Status
14
9
  from typing_extensions import Annotated
15
10
 
16
- from ingestr.src.factory import SourceDestinationFactory
17
11
  from ingestr.src.telemetry.event import track
18
12
 
19
13
  app = typer.Typer(
@@ -181,6 +175,20 @@ def ingest(
181
175
  envvar="PRIMARY_KEY",
182
176
  ),
183
177
  ] = None, # type: ignore
178
+ partition_by: Annotated[
179
+ Optional[str],
180
+ typer.Option(
181
+ help="The partition key to be used for partitioning the destination table",
182
+ envvar="PARTITION_BY",
183
+ ),
184
+ ] = None, # type: ignore
185
+ cluster_by: Annotated[
186
+ Optional[str],
187
+ typer.Option(
188
+ help="The clustering key to be used for clustering the destination table, not every destination supports clustering.",
189
+ envvar="CLUSTER_BY",
190
+ ),
191
+ ] = None, # type: ignore
184
192
  yes: Annotated[
185
193
  Optional[bool],
186
194
  typer.Option(
@@ -252,6 +260,66 @@ def ingest(
252
260
  ),
253
261
  ] = 5, # type: ignore
254
262
  ):
263
+ import hashlib
264
+ import tempfile
265
+ from datetime import datetime
266
+
267
+ import dlt
268
+ import humanize
269
+ import typer
270
+ from dlt.common.destination import Destination
271
+ from dlt.common.pipeline import LoadInfo
272
+ from dlt.common.runtime.collector import Collector, LogCollector
273
+ from dlt.common.schema.typing import TColumnSchema
274
+
275
+ from ingestr.src.factory import SourceDestinationFactory
276
+ from ingestr.src.telemetry.event import track
277
+
278
+ def report_errors(run_info: LoadInfo):
279
+ for load_package in run_info.load_packages:
280
+ failed_jobs = load_package.jobs["failed_jobs"]
281
+ if len(failed_jobs) == 0:
282
+ continue
283
+
284
+ print()
285
+ print("[bold red]Failed jobs:[/bold red]")
286
+ print()
287
+ for job in failed_jobs:
288
+ print(f"[bold red] {job.job_file_info.job_id()}[/bold red]")
289
+ print(f" [bold yellow]Error:[/bold yellow] {job.failed_message}")
290
+
291
+ raise typer.Exit(1)
292
+
293
+ def validate_source_dest_tables(
294
+ source_table: str, dest_table: str
295
+ ) -> tuple[str, str]:
296
+ if not dest_table:
297
+ if len(source_table.split(".")) != 2:
298
+ print(
299
+ "[red]Table name must be in the format schema.table for source table when dest-table is not given.[/red]"
300
+ )
301
+ raise typer.Abort()
302
+
303
+ print()
304
+ print(
305
+ "[yellow]Destination table is not given, defaulting to the source table.[/yellow]"
306
+ )
307
+ dest_table = source_table
308
+ return (source_table, dest_table)
309
+
310
+ def validate_loader_file_format(
311
+ dlt_dest: Destination, loader_file_format: Optional[LoaderFileFormat]
312
+ ):
313
+ if (
314
+ loader_file_format
315
+ and loader_file_format.value
316
+ not in dlt_dest.capabilities().supported_loader_file_formats
317
+ ):
318
+ print(
319
+ f"[red]Loader file format {loader_file_format.value} is not supported by the destination.[/red]"
320
+ )
321
+ raise typer.Abort()
322
+
255
323
  track(
256
324
  "command_triggered",
257
325
  {
@@ -267,29 +335,23 @@ def ingest(
267
335
  dlt.config["schema.naming"] = schema_naming.value
268
336
 
269
337
  try:
270
- if not dest_table:
271
- if len(source_table.split(".")) != 2:
272
- print(
273
- "[red]Table name must be in the format schema.table for source table when dest-table is not given.[/red]"
274
- )
275
- raise typer.Abort()
276
-
277
- print()
278
- print(
279
- "[yellow]Destination table is not given, defaulting to the source table.[/yellow]"
280
- )
281
- dest_table = source_table
338
+ (source_table, dest_table) = validate_source_dest_tables(
339
+ source_table, dest_table
340
+ )
282
341
 
283
342
  factory = SourceDestinationFactory(source_uri, dest_uri)
284
343
  source = factory.get_source()
285
344
  destination = factory.get_destination()
286
345
 
346
+ column_hints: dict[str, TColumnSchema] = {}
287
347
  original_incremental_strategy = incremental_strategy
288
348
 
289
349
  merge_key = None
290
350
  if incremental_strategy == IncrementalStrategy.delete_insert:
291
351
  merge_key = incremental_key
292
352
  incremental_strategy = IncrementalStrategy.merge
353
+ if incremental_key:
354
+ column_hints[incremental_key] = {"merge_key": True}
293
355
 
294
356
  m = hashlib.sha256()
295
357
  m.update(dest_table.encode("utf-8"))
@@ -303,11 +365,31 @@ def ingest(
303
365
  pipelines_dir = tempfile.mkdtemp()
304
366
  is_pipelines_dir_temp = True
305
367
 
368
+ dlt_dest = destination.dlt_dest(uri=dest_uri)
369
+ validate_loader_file_format(dlt_dest, loader_file_format)
370
+
371
+ if partition_by:
372
+ if partition_by not in column_hints:
373
+ column_hints[partition_by] = {}
374
+
375
+ column_hints[partition_by]["partition"] = True
376
+
377
+ if cluster_by:
378
+ if cluster_by not in column_hints:
379
+ column_hints[cluster_by] = {}
380
+
381
+ column_hints[cluster_by]["cluster"] = True
382
+
383
+ if primary_key:
384
+ for key in primary_key:
385
+ if key not in column_hints:
386
+ column_hints[key] = {}
387
+
388
+ column_hints[key]["primary_key"] = True
389
+
306
390
  pipeline = dlt.pipeline(
307
391
  pipeline_name=m.hexdigest(),
308
- destination=destination.dlt_dest(
309
- uri=dest_uri,
310
- ),
392
+ destination=dlt_dest,
311
393
  progress=progressInstance,
312
394
  pipelines_dir=pipelines_dir,
313
395
  refresh="drop_resources" if full_refresh else None,
@@ -400,27 +482,17 @@ def ingest(
400
482
  loader_file_format=loader_file_format.value
401
483
  if loader_file_format is not None
402
484
  else None, # type: ignore
485
+ columns=column_hints,
403
486
  )
404
487
 
405
- for load_package in run_info.load_packages:
406
- failed_jobs = load_package.jobs["failed_jobs"]
407
- if len(failed_jobs) > 0:
408
- print()
409
- print("[bold red]Failed jobs:[/bold red]")
410
- print()
411
- for job in failed_jobs:
412
- print(f"[bold red] {job.job_file_info.job_id()}[/bold red]")
413
- print(f" [bold yellow]Error:[/bold yellow] {job.failed_message}")
414
-
415
- raise typer.Exit(1)
488
+ report_errors(run_info)
416
489
 
417
490
  destination.post_load()
418
491
 
419
492
  end_time = datetime.now()
420
493
  elapsedHuman = ""
421
- if run_info.started_at:
422
- elapsed = end_time - start_time
423
- elapsedHuman = f"in {humanize.precisedelta(elapsed)}"
494
+ elapsed = end_time - start_time
495
+ elapsedHuman = f"in {humanize.precisedelta(elapsed)}"
424
496
 
425
497
  # remove the pipelines_dir folder if it was created by ingestr
426
498
  if is_pipelines_dir_temp:
@@ -99,12 +99,12 @@ def gorgias_source(
99
99
  "description": "When the user was last updated.",
100
100
  },
101
101
  "meta": {
102
- "data_type": "complex",
102
+ "data_type": "json",
103
103
  "nullable": True,
104
104
  "description": "Meta information associated with the user.",
105
105
  },
106
106
  "data": {
107
- "data_type": "complex",
107
+ "data_type": "json",
108
108
  "nullable": True,
109
109
  "description": "Additional data associated with the user.",
110
110
  },
@@ -185,17 +185,17 @@ def gorgias_source(
185
185
  "description": "Indicates if the ticket was created by an agent",
186
186
  },
187
187
  "customer": {
188
- "data_type": "complex",
188
+ "data_type": "json",
189
189
  "nullable": False,
190
190
  "description": "The customer linked to the ticket.",
191
191
  },
192
192
  "assignee_user": {
193
- "data_type": "complex",
193
+ "data_type": "json",
194
194
  "nullable": True,
195
195
  "description": "User assigned to the ticket",
196
196
  },
197
197
  "assignee_team": {
198
- "data_type": "complex",
198
+ "data_type": "json",
199
199
  "nullable": True,
200
200
  "description": "Team assigned to the ticket",
201
201
  },
@@ -210,17 +210,17 @@ def gorgias_source(
210
210
  "description": "Excerpt of the ticket",
211
211
  },
212
212
  "integrations": {
213
- "data_type": "complex",
213
+ "data_type": "json",
214
214
  "nullable": False,
215
215
  "description": "Integration information related to the ticket",
216
216
  },
217
217
  "meta": {
218
- "data_type": "complex",
218
+ "data_type": "json",
219
219
  "nullable": True,
220
220
  "description": "Meta information related to the ticket",
221
221
  },
222
222
  "tags": {
223
- "data_type": "complex",
223
+ "data_type": "json",
224
224
  "nullable": False,
225
225
  "description": "Tags associated with the ticket",
226
226
  },
@@ -354,7 +354,7 @@ def gorgias_source(
354
354
  "description": "How the message has been received, or sent from Gorgias.",
355
355
  },
356
356
  "sender": {
357
- "data_type": "complex",
357
+ "data_type": "json",
358
358
  "nullable": False,
359
359
  "description": "The person who sent the message. It can be a user or a customer.",
360
360
  },
@@ -364,7 +364,7 @@ def gorgias_source(
364
364
  "description": "ID of the integration that either received or sent the message.",
365
365
  },
366
366
  "intents": {
367
- "data_type": "complex",
367
+ "data_type": "json",
368
368
  "nullable": True,
369
369
  "description": "",
370
370
  },
@@ -379,7 +379,7 @@ def gorgias_source(
379
379
  "description": "Whether the message was sent by your company to a customer, or the opposite.",
380
380
  },
381
381
  "receiver": {
382
- "data_type": "complex",
382
+ "data_type": "json",
383
383
  "nullable": True,
384
384
  "description": "The primary receiver of the message. It can be a user or a customer. Optional when the source type is 'internal-note'.",
385
385
  },
@@ -414,27 +414,27 @@ def gorgias_source(
414
414
  "description": "",
415
415
  },
416
416
  "headers": {
417
- "data_type": "complex",
417
+ "data_type": "json",
418
418
  "nullable": True,
419
419
  "description": "Headers of the message",
420
420
  },
421
421
  "attachments": {
422
- "data_type": "complex",
422
+ "data_type": "json",
423
423
  "nullable": True,
424
424
  "description": "A list of files attached to the message.",
425
425
  },
426
426
  "actions": {
427
- "data_type": "complex",
427
+ "data_type": "json",
428
428
  "nullable": True,
429
429
  "description": "A list of actions performed on the message.",
430
430
  },
431
431
  "macros": {
432
- "data_type": "complex",
432
+ "data_type": "json",
433
433
  "nullable": True,
434
434
  "description": "A list of macros",
435
435
  },
436
436
  "meta": {
437
- "data_type": "complex",
437
+ "data_type": "json",
438
438
  "nullable": True,
439
439
  "description": "Message metadata",
440
440
  },
@@ -526,7 +526,7 @@ def gorgias_source(
526
526
  "description": "ID of the customer linked to the survey.",
527
527
  },
528
528
  "meta": {
529
- "data_type": "complex",
529
+ "data_type": "json",
530
530
  "nullable": True,
531
531
  "description": "Meta information associated with the survey.",
532
532
  },