ingestr 0.12.8__tar.gz → 0.12.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

Files changed (203) hide show
  1. {ingestr-0.12.8 → ingestr-0.12.9}/PKG-INFO +2 -1
  2. {ingestr-0.12.8 → ingestr-0.12.9}/docs/.vitepress/config.mjs +1 -0
  3. ingestr-0.12.9/docs/supported-sources/gcs.md +62 -0
  4. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/s3.md +6 -2
  5. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/appstore/__init__.py +4 -0
  6. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/factory.py +2 -0
  7. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/filesystem/__init__.py +0 -2
  8. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/sources.py +75 -9
  9. ingestr-0.12.9/ingestr/src/version.py +1 -0
  10. {ingestr-0.12.8 → ingestr-0.12.9}/requirements.txt +1 -1
  11. ingestr-0.12.8/ingestr/src/version.py +0 -1
  12. {ingestr-0.12.8 → ingestr-0.12.9}/.dockerignore +0 -0
  13. {ingestr-0.12.8 → ingestr-0.12.9}/.githooks/pre-commit-hook.sh +0 -0
  14. {ingestr-0.12.8 → ingestr-0.12.9}/.github/workflows/deploy-docs.yml +0 -0
  15. {ingestr-0.12.8 → ingestr-0.12.9}/.github/workflows/secrets-scan.yml +0 -0
  16. {ingestr-0.12.8 → ingestr-0.12.9}/.github/workflows/tests.yml +0 -0
  17. {ingestr-0.12.8 → ingestr-0.12.9}/.gitignore +0 -0
  18. {ingestr-0.12.8 → ingestr-0.12.9}/.gitleaksignore +0 -0
  19. {ingestr-0.12.8 → ingestr-0.12.9}/.python-version +0 -0
  20. {ingestr-0.12.8 → ingestr-0.12.9}/.vale.ini +0 -0
  21. {ingestr-0.12.8 → ingestr-0.12.9}/Dockerfile +0 -0
  22. {ingestr-0.12.8 → ingestr-0.12.9}/LICENSE.md +0 -0
  23. {ingestr-0.12.8 → ingestr-0.12.9}/Makefile +0 -0
  24. {ingestr-0.12.8 → ingestr-0.12.9}/README.md +0 -0
  25. {ingestr-0.12.8 → ingestr-0.12.9}/docs/.vitepress/theme/custom.css +0 -0
  26. {ingestr-0.12.8 → ingestr-0.12.9}/docs/.vitepress/theme/index.js +0 -0
  27. {ingestr-0.12.8 → ingestr-0.12.9}/docs/commands/example-uris.md +0 -0
  28. {ingestr-0.12.8 → ingestr-0.12.9}/docs/commands/ingest.md +0 -0
  29. {ingestr-0.12.8 → ingestr-0.12.9}/docs/getting-started/core-concepts.md +0 -0
  30. {ingestr-0.12.8 → ingestr-0.12.9}/docs/getting-started/incremental-loading.md +0 -0
  31. {ingestr-0.12.8 → ingestr-0.12.9}/docs/getting-started/quickstart.md +0 -0
  32. {ingestr-0.12.8 → ingestr-0.12.9}/docs/getting-started/telemetry.md +0 -0
  33. {ingestr-0.12.8 → ingestr-0.12.9}/docs/index.md +0 -0
  34. {ingestr-0.12.8 → ingestr-0.12.9}/docs/media/athena.png +0 -0
  35. {ingestr-0.12.8 → ingestr-0.12.9}/docs/media/github.png +0 -0
  36. {ingestr-0.12.8 → ingestr-0.12.9}/docs/media/googleanalytics.png +0 -0
  37. {ingestr-0.12.8 → ingestr-0.12.9}/docs/media/tiktok.png +0 -0
  38. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/adjust.md +0 -0
  39. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/airtable.md +0 -0
  40. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/appsflyer.md +0 -0
  41. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/appstore.md +0 -0
  42. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/asana.md +0 -0
  43. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/athena.md +0 -0
  44. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/bigquery.md +0 -0
  45. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/chess.md +0 -0
  46. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/csv.md +0 -0
  47. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/custom_queries.md +0 -0
  48. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/databricks.md +0 -0
  49. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/duckdb.md +0 -0
  50. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/dynamodb.md +0 -0
  51. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/facebook-ads.md +0 -0
  52. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/github.md +0 -0
  53. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/google_analytics.md +0 -0
  54. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/gorgias.md +0 -0
  55. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/gsheets.md +0 -0
  56. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/hubspot.md +0 -0
  57. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/kafka.md +0 -0
  58. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/klaviyo.md +0 -0
  59. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/mongodb.md +0 -0
  60. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/mssql.md +0 -0
  61. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/mysql.md +0 -0
  62. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/notion.md +0 -0
  63. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/oracle.md +0 -0
  64. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/postgres.md +0 -0
  65. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/redshift.md +0 -0
  66. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/sap-hana.md +0 -0
  67. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/shopify.md +0 -0
  68. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/slack.md +0 -0
  69. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/snowflake.md +0 -0
  70. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/sqlite.md +0 -0
  71. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/stripe.md +0 -0
  72. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/tiktok-ads.md +0 -0
  73. {ingestr-0.12.8 → ingestr-0.12.9}/docs/supported-sources/zendesk.md +0 -0
  74. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/main.py +0 -0
  75. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/.gitignore +0 -0
  76. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/adjust/__init__.py +0 -0
  77. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/adjust/adjust_helpers.py +0 -0
  78. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/airtable/__init__.py +0 -0
  79. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/appsflyer/_init_.py +0 -0
  80. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/appsflyer/client.py +0 -0
  81. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/appstore/client.py +0 -0
  82. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/appstore/errors.py +0 -0
  83. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/appstore/models.py +0 -0
  84. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/appstore/resources.py +0 -0
  85. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/arrow/__init__.py +0 -0
  86. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/asana_source/__init__.py +0 -0
  87. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/asana_source/helpers.py +0 -0
  88. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/asana_source/settings.py +0 -0
  89. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/chess/__init__.py +0 -0
  90. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/chess/helpers.py +0 -0
  91. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/chess/settings.py +0 -0
  92. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/destinations.py +0 -0
  93. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/dynamodb/__init__.py +0 -0
  94. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/errors.py +0 -0
  95. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/facebook_ads/__init__.py +0 -0
  96. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/facebook_ads/exceptions.py +0 -0
  97. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/facebook_ads/helpers.py +0 -0
  98. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/facebook_ads/settings.py +0 -0
  99. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/filesystem/helpers.py +0 -0
  100. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/filesystem/readers.py +0 -0
  101. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/filters.py +0 -0
  102. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/github/__init__.py +0 -0
  103. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/github/helpers.py +0 -0
  104. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/github/queries.py +0 -0
  105. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/github/settings.py +0 -0
  106. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/google_analytics/__init__.py +0 -0
  107. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/google_analytics/helpers.py +0 -0
  108. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/google_sheets/README.md +0 -0
  109. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/google_sheets/__init__.py +0 -0
  110. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
  111. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
  112. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
  113. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/gorgias/__init__.py +0 -0
  114. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/gorgias/helpers.py +0 -0
  115. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/hubspot/__init__.py +0 -0
  116. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/hubspot/helpers.py +0 -0
  117. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/hubspot/settings.py +0 -0
  118. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/kafka/__init__.py +0 -0
  119. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/kafka/helpers.py +0 -0
  120. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/klaviyo/_init_.py +0 -0
  121. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/klaviyo/client.py +0 -0
  122. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/klaviyo/helpers.py +0 -0
  123. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/mongodb/__init__.py +0 -0
  124. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/mongodb/helpers.py +0 -0
  125. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/notion/__init__.py +0 -0
  126. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/notion/helpers/__init__.py +0 -0
  127. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/notion/helpers/client.py +0 -0
  128. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/notion/helpers/database.py +0 -0
  129. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/notion/settings.py +0 -0
  130. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/shopify/__init__.py +0 -0
  131. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/shopify/exceptions.py +0 -0
  132. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/shopify/helpers.py +0 -0
  133. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/shopify/settings.py +0 -0
  134. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/slack/__init__.py +0 -0
  135. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/slack/helpers.py +0 -0
  136. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/slack/settings.py +0 -0
  137. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/sql_database/__init__.py +0 -0
  138. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/sql_database/callbacks.py +0 -0
  139. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/stripe_analytics/__init__.py +0 -0
  140. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/stripe_analytics/helpers.py +0 -0
  141. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/stripe_analytics/settings.py +0 -0
  142. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/table_definition.py +0 -0
  143. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/telemetry/event.py +0 -0
  144. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/testdata/fakebqcredentials.json +0 -0
  145. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/tiktok_ads/__init__.py +0 -0
  146. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/tiktok_ads/tiktok_helpers.py +0 -0
  147. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/time.py +0 -0
  148. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/zendesk/__init__.py +0 -0
  149. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/zendesk/helpers/__init__.py +0 -0
  150. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
  151. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/zendesk/helpers/credentials.py +0 -0
  152. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
  153. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/src/zendesk/settings.py +0 -0
  154. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/testdata/.gitignore +0 -0
  155. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/testdata/create_replace.csv +0 -0
  156. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/testdata/delete_insert_expected.csv +0 -0
  157. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/testdata/delete_insert_part1.csv +0 -0
  158. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/testdata/delete_insert_part2.csv +0 -0
  159. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/testdata/merge_expected.csv +0 -0
  160. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/testdata/merge_part1.csv +0 -0
  161. {ingestr-0.12.8 → ingestr-0.12.9}/ingestr/testdata/merge_part2.csv +0 -0
  162. {ingestr-0.12.8 → ingestr-0.12.9}/package-lock.json +0 -0
  163. {ingestr-0.12.8 → ingestr-0.12.9}/package.json +0 -0
  164. {ingestr-0.12.8 → ingestr-0.12.9}/pyproject.toml +0 -0
  165. {ingestr-0.12.8 → ingestr-0.12.9}/requirements-dev.txt +0 -0
  166. {ingestr-0.12.8 → ingestr-0.12.9}/resources/demo.gif +0 -0
  167. {ingestr-0.12.8 → ingestr-0.12.9}/resources/demo.tape +0 -0
  168. {ingestr-0.12.8 → ingestr-0.12.9}/resources/ingestr.svg +0 -0
  169. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/AMPM.yml +0 -0
  170. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Acronyms.yml +0 -0
  171. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Colons.yml +0 -0
  172. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Contractions.yml +0 -0
  173. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/DateFormat.yml +0 -0
  174. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Ellipses.yml +0 -0
  175. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/EmDash.yml +0 -0
  176. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Exclamation.yml +0 -0
  177. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/FirstPerson.yml +0 -0
  178. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Gender.yml +0 -0
  179. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/GenderBias.yml +0 -0
  180. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/HeadingPunctuation.yml +0 -0
  181. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Headings.yml +0 -0
  182. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Latin.yml +0 -0
  183. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/LyHyphens.yml +0 -0
  184. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/OptionalPlurals.yml +0 -0
  185. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Ordinal.yml +0 -0
  186. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/OxfordComma.yml +0 -0
  187. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Parens.yml +0 -0
  188. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Passive.yml +0 -0
  189. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Periods.yml +0 -0
  190. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Quotes.yml +0 -0
  191. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Ranges.yml +0 -0
  192. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Semicolons.yml +0 -0
  193. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Slang.yml +0 -0
  194. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Spacing.yml +0 -0
  195. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Spelling.yml +0 -0
  196. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Units.yml +0 -0
  197. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/We.yml +0 -0
  198. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/Will.yml +0 -0
  199. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/WordList.yml +0 -0
  200. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/meta.json +0 -0
  201. {ingestr-0.12.8 → ingestr-0.12.9}/styles/Google/vocab.txt +0 -0
  202. {ingestr-0.12.8 → ingestr-0.12.9}/styles/bruin/Ingestr.yml +0 -0
  203. {ingestr-0.12.8 → ingestr-0.12.9}/styles/config/vocabularies/bruin/accept.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.12.8
3
+ Version: 0.12.9
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -22,6 +22,7 @@ Requires-Dist: dlt==1.5.0
22
22
  Requires-Dist: duckdb-engine==0.13.5
23
23
  Requires-Dist: duckdb==1.1.3
24
24
  Requires-Dist: facebook-business==20.0.0
25
+ Requires-Dist: gcsfs==2024.10.0
25
26
  Requires-Dist: google-analytics-data==0.18.16
26
27
  Requires-Dist: google-api-python-client==2.130.0
27
28
  Requires-Dist: google-cloud-bigquery-storage==2.24.0
@@ -102,6 +102,7 @@ export default defineConfig({
102
102
  text: "Facebook Ads",
103
103
  link: "/supported-sources/facebook-ads.md",
104
104
  },
105
+ { text: "Google Cloud Storage (GCS)", link: "/supported-sources/gcs.md" },
105
106
  { text: "Google Analytics", link: "/supported-sources/google_analytics.md" },
106
107
  { text: "GitHub", link: "/supported-sources/github.md" },
107
108
  { text: "Google Sheets", link: "/supported-sources/gsheets.md" },
@@ -0,0 +1,62 @@
1
+ # Google Cloud Storage
2
+
3
+ [Google Cloud Storage](https://cloud.google.com/storage?hl=en) is an online file storage web service for storing and accessing data on Google Cloud Platform infrastructure. The service combines the performance and scalability of Google's cloud with advanced security and sharing capabilities. It is an Infrastructure as a Service (IaaS), comparable to Amazon S3.
4
+
5
+ ## URI format
6
+
7
+ The URI format for Google Cloud Storage is as follows:
8
+
9
+ ```plaintext
10
+ gs://<bucket_name>?credentials_path=/path/to/service-account.json>
11
+ ```
12
+
13
+ URI parameters:
14
+
15
+ - `bucket_name`: The name of the bucket
16
+ - `credentials_path`: path to file containing your Google Cloud [Service Account](https://cloud.google.com/iam/docs/service-account-overview)
17
+
18
+ ## Setting up a GCS Integration
19
+
20
+ To use Google Cloud Storage source in `ingestr`, you will need:
21
+ * A Google Cloud Project.
22
+ * A Service Account with atleast [roles/storage.objectUser](https://cloud.google.com/storage/docs/access-control/iam-roles) IAM permission.
23
+ * A Service Account key file for the corresponding service account.
24
+
25
+ For more information on how to create a Service Account or it's keys, see [Create service accounts](https://cloud.google.com/iam/docs/service-accounts-create) and [Create or delete service account keys](https://cloud.google.com/iam/docs/keys-create-delete) on Google Cloud docs.
26
+
27
+ ## Example
28
+
29
+ Let's assume that:
30
+ * Service account key in available in the current directory, under the filename `service_account.json`.
31
+ * The bucket you want to load data from is called `my-org-bucket`
32
+ * The source file is available at `/data/latest/dump.csv`
33
+ * The data needs to be saved in a DuckDB database called `local.db`
34
+ * The destination table name will be `public.latest_dump`
35
+
36
+ You can run the following command line to achieve this:
37
+
38
+ ```sh
39
+ ingestr ingest \
40
+ --source-uri "gs://my-org-bucket?credentials_path=$PWD/service_account.json" \
41
+ --source-table "/data/latest/dump.csv" \
42
+ --dest-uri "duckdb:///local.db" \
43
+ --dest-table "public.latest_dump"
44
+ ```
45
+
46
+ ## Supported File Formats
47
+ `gs` source only supports loading files in the following formats:
48
+ * `csv`: Comma Separated Values (supports Tab Separated Values as well)
49
+ * `parquet`: [Apache Parquet](https://parquet.apache.org/) storage format.
50
+ * `jsonl`: Line delimited JSON. see [https://jsonlines.org/](https://jsonlines.org/)
51
+
52
+ ## File Pattern
53
+ `ingestr` supports [glob](https://en.wikipedia.org/wiki/Glob_(programming)) like pattern matching for `gs` source.
54
+ This allows for a powerful pattern matching mechanism that allows you to specify multiple files in a single `--source-table`.
55
+
56
+ Below are some examples of path patterns, each path pattern is a reference from the root of the bucket:
57
+
58
+ - `**/*.csv`: Retrieves all the CSV files, regardless of how deep they are within the folder structure.
59
+ - `*.csv`: Retrieves all the CSV files from the first level of a folder.
60
+ - `myFolder/**/*.jsonl`: Retrieves all the JSONL files from anywhere under `myFolder`.
61
+ - `myFolder/mySubFolder/users.parquet`: Retrieves the `users.parquet` file from `mySubFolder`.
62
+ - `employees.jsonl`: Retrieves the `employees.jsonl` file from the root level of the bucket.
@@ -9,7 +9,7 @@ ingestr supports S3 as a source.
9
9
  The URI format for S3 is as follows:
10
10
 
11
11
  ```plaintext
12
- s3://<bucket_name>/<path_to_file>?access_key_id=<access_key_id>&secret_access_key=<secret_access_key>
12
+ s3://<bucket_name>?access_key_id=<access_key_id>&secret_access_key=<secret_access_key>
13
13
  ```
14
14
 
15
15
  URI parameters:
@@ -25,7 +25,11 @@ S3 requires an `access_key_id` and a `secret_access_key` to access the bucket. P
25
25
  For example, if your `access_key_id` is `AKC3YOW7E`, `secret_access_key` is `XCtkpL5B`, bucket name is `my_bucket`, and `path_to_files` is `students/students_details.csv`, here's a sample command that will copy the data from the S3 bucket into a DuckDB database:
26
26
 
27
27
  ```sh
28
- ingestr ingest --source-uri 's3://my_bucket/students/students_details.csv?access_key_id=AKC3YOW7E&secret_access_key=XCtkpL5B' --source-table 'students_details' --dest-uri duckdb:///s3.duckdb --dest-table 'dest.students_details'
28
+ ingestr ingest \
29
+ --source-uri 's3://my_bucket?access_key_id=AKC3YOW7E&secret_access_key=XCtkpL5B' \
30
+ --source-table '/students/students_details.csv' \
31
+ --dest-uri duckdb:///s3.duckdb \
32
+ --dest-table 'dest.students_details'
29
33
  ```
30
34
 
31
35
  The result of this command will be a table in the DuckDB database in the path `s3.duckdb`.
@@ -28,6 +28,10 @@ def app_store(
28
28
  start_date: Optional[datetime] = None,
29
29
  end_date: Optional[datetime] = None,
30
30
  ) -> Iterable[DltResource]:
31
+ if start_date and start_date.tzinfo is not None:
32
+ start_date = start_date.replace(tzinfo=None)
33
+ if end_date and end_date.tzinfo is not None:
34
+ end_date = end_date.replace(tzinfo=None)
31
35
  for resource in RESOURCES:
32
36
  yield dlt.resource(
33
37
  get_analytics_reports,
@@ -25,6 +25,7 @@ from ingestr.src.sources import (
25
25
  ChessSource,
26
26
  DynamoDBSource,
27
27
  FacebookAdsSource,
28
+ GCSSource,
28
29
  GitHubSource,
29
30
  GoogleAnalyticsSource,
30
31
  GoogleSheetsSource,
@@ -124,6 +125,7 @@ class SourceDestinationFactory:
124
125
  "tiktok": TikTokSource,
125
126
  "googleanalytics": GoogleAnalyticsSource,
126
127
  "appstore": AppleAppStoreSource,
128
+ "gs": GCSSource,
127
129
  }
128
130
  destinations: Dict[str, Type[DestinationProtocol]] = {
129
131
  "bigquery": BigQueryDestination,
@@ -39,8 +39,6 @@ def readers(
39
39
  filesystem_resource = filesystem(bucket_url, credentials, file_glob=file_glob)
40
40
  filesystem_resource.apply_hints(
41
41
  incremental=dlt.sources.incremental("modification_date"),
42
- range_end="closed",
43
- range_start="closed",
44
42
  )
45
43
  return (
46
44
  filesystem_resource | dlt.transformer(name="read_csv")(_read_csv),
@@ -17,6 +17,8 @@ from typing import (
17
17
  from urllib.parse import ParseResult, parse_qs, quote, urlparse
18
18
 
19
19
  import dlt
20
+ import gcsfs # type: ignore
21
+ import s3fs # type: ignore
20
22
  import pendulum
21
23
  from dlt.common.configuration.specs import (
22
24
  AwsCredentials,
@@ -1091,19 +1093,17 @@ class S3Source:
1091
1093
  bucket_name = parsed_uri.hostname
1092
1094
  if not bucket_name:
1093
1095
  raise ValueError(
1094
- "Invalid S3 URI: The bucket name is missing. Ensure your S3 URI follows the format 's3://bucket-name/path/to/file"
1096
+ "Invalid S3 URI: The bucket name is missing. Ensure your S3 URI follows the format 's3://bucket-name"
1095
1097
  )
1096
1098
  bucket_url = f"s3://{bucket_name}"
1097
1099
 
1098
- path_to_file = parsed_uri.path.lstrip("/")
1100
+ path_to_file = parsed_uri.path.lstrip("/") or table.lstrip("/")
1099
1101
  if not path_to_file:
1100
- raise ValueError(
1101
- "Invalid S3 URI: The file path is missing. Ensure your S3 URI follows the format 's3://bucket-name/path/to/file"
1102
- )
1102
+ raise ValueError("--source-table must be specified")
1103
1103
 
1104
- aws_credentials = AwsCredentials(
1105
- aws_access_key_id=access_key_id[0],
1106
- aws_secret_access_key=TSecretStrValue(secret_access_key[0]),
1104
+ fs = s3fs.S3FileSystem(
1105
+ key=access_key_id[0],
1106
+ secret=secret_access_key[0],
1107
1107
  )
1108
1108
 
1109
1109
  file_extension = path_to_file.split(".")[-1]
@@ -1119,7 +1119,7 @@ class S3Source:
1119
1119
  )
1120
1120
 
1121
1121
  return readers(
1122
- bucket_url=bucket_url, credentials=aws_credentials, file_glob=path_to_file
1122
+ bucket_url, fs, path_to_file
1123
1123
  ).with_resources(endpoint)
1124
1124
 
1125
1125
 
@@ -1503,3 +1503,69 @@ class AppleAppStoreSource:
1503
1503
  raise UnsupportedResourceError(table, "AppStore")
1504
1504
 
1505
1505
  return src.with_resources(table)
1506
+
1507
+
1508
+ class GCSSource:
1509
+ def handles_incrementality(self) -> bool:
1510
+ return True
1511
+
1512
+ def dlt_source(self, uri: str, table: str, **kwargs):
1513
+ if kwargs.get("incremental_key"):
1514
+ raise ValueError(
1515
+ "GCS takes care of incrementality on its own, you should not provide incremental_key"
1516
+ )
1517
+
1518
+ parsed_uri = urlparse(uri)
1519
+ params = parse_qs(parsed_uri.query)
1520
+ credentials_path = params.get("credentials_path")
1521
+ credentials_base64 = params.get("credentials_base64")
1522
+ credentials_available = any(
1523
+ map(
1524
+ lambda x: x is not None,
1525
+ [credentials_path, credentials_base64],
1526
+ )
1527
+ )
1528
+ if credentials_available is False:
1529
+ raise MissingValueError("credentials_path or credentials_base64", "GCS")
1530
+
1531
+ bucket_name = parsed_uri.hostname
1532
+ if not bucket_name:
1533
+ raise ValueError(
1534
+ "Invalid GCS URI: The bucket name is missing. Ensure your GCS URI follows the format 'gs://bucket-name/path/to/file"
1535
+ )
1536
+ bucket_url = f"gs://{bucket_name}/"
1537
+
1538
+ path_to_file = parsed_uri.path.lstrip("/") or table.lstrip("/")
1539
+ if not path_to_file:
1540
+ raise ValueError("--source-table must be specified")
1541
+
1542
+ credentials = None
1543
+ if credentials_path:
1544
+ credentials = credentials_path[0]
1545
+ else:
1546
+ credentials = json.loads(base64.b64decode(credentials_base64[0]).decode()) # type: ignore
1547
+
1548
+ # There's a compatiblity issue between google-auth, dlt and gcsfs
1549
+ # that makes it difficult to use google.oauth2.service_account.Credentials
1550
+ # (The RECOMMENDED way of passing service account credentials)
1551
+ # directly with gcsfs. As a workaround, we construct the GCSFileSystem
1552
+ # and pass it directly to filesystem.readers.
1553
+ fs = gcsfs.GCSFileSystem(
1554
+ token=credentials,
1555
+ )
1556
+
1557
+ file_extension = path_to_file.split(".")[-1]
1558
+ if file_extension == "csv":
1559
+ endpoint = "read_csv"
1560
+ elif file_extension == "jsonl":
1561
+ endpoint = "read_jsonl"
1562
+ elif file_extension == "parquet":
1563
+ endpoint = "read_parquet"
1564
+ else:
1565
+ raise ValueError(
1566
+ "GCS Source only supports specific formats files: csv, jsonl, parquet"
1567
+ )
1568
+
1569
+ return readers(
1570
+ bucket_url, fs, path_to_file
1571
+ ).with_resources(endpoint)
@@ -0,0 +1 @@
1
+ __version__ = "0.12.9"
@@ -40,4 +40,4 @@ pyathena==3.9.0
40
40
  google-analytics-data==0.18.16
41
41
  asana==3.2.3
42
42
  dataclasses-json==0.6.7
43
-
43
+ gcsfs==2024.10.0
@@ -1 +0,0 @@
1
- __version__ = "0.12.8"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes