ingestr 0.12.7__tar.gz → 0.12.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

Files changed (203) hide show
  1. {ingestr-0.12.7 → ingestr-0.12.9}/.githooks/pre-commit-hook.sh +2 -2
  2. {ingestr-0.12.7 → ingestr-0.12.9}/PKG-INFO +2 -1
  3. {ingestr-0.12.7 → ingestr-0.12.9}/docs/.vitepress/config.mjs +1 -0
  4. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/adjust.md +1 -0
  5. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/appstore.md +2 -1
  6. ingestr-0.12.9/docs/supported-sources/gcs.md +62 -0
  7. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/s3.md +6 -2
  8. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/adjust/__init__.py +7 -2
  9. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/adjust/adjust_helpers.py +27 -15
  10. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/appstore/__init__.py +4 -0
  11. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/factory.py +2 -0
  12. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/filesystem/__init__.py +0 -2
  13. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/sources.py +75 -9
  14. ingestr-0.12.9/ingestr/src/version.py +1 -0
  15. {ingestr-0.12.7 → ingestr-0.12.9}/requirements.txt +1 -1
  16. ingestr-0.12.7/ingestr/src/version.py +0 -1
  17. {ingestr-0.12.7 → ingestr-0.12.9}/.dockerignore +0 -0
  18. {ingestr-0.12.7 → ingestr-0.12.9}/.github/workflows/deploy-docs.yml +0 -0
  19. {ingestr-0.12.7 → ingestr-0.12.9}/.github/workflows/secrets-scan.yml +0 -0
  20. {ingestr-0.12.7 → ingestr-0.12.9}/.github/workflows/tests.yml +0 -0
  21. {ingestr-0.12.7 → ingestr-0.12.9}/.gitignore +0 -0
  22. {ingestr-0.12.7 → ingestr-0.12.9}/.gitleaksignore +0 -0
  23. {ingestr-0.12.7 → ingestr-0.12.9}/.python-version +0 -0
  24. {ingestr-0.12.7 → ingestr-0.12.9}/.vale.ini +0 -0
  25. {ingestr-0.12.7 → ingestr-0.12.9}/Dockerfile +0 -0
  26. {ingestr-0.12.7 → ingestr-0.12.9}/LICENSE.md +0 -0
  27. {ingestr-0.12.7 → ingestr-0.12.9}/Makefile +0 -0
  28. {ingestr-0.12.7 → ingestr-0.12.9}/README.md +0 -0
  29. {ingestr-0.12.7 → ingestr-0.12.9}/docs/.vitepress/theme/custom.css +0 -0
  30. {ingestr-0.12.7 → ingestr-0.12.9}/docs/.vitepress/theme/index.js +0 -0
  31. {ingestr-0.12.7 → ingestr-0.12.9}/docs/commands/example-uris.md +0 -0
  32. {ingestr-0.12.7 → ingestr-0.12.9}/docs/commands/ingest.md +0 -0
  33. {ingestr-0.12.7 → ingestr-0.12.9}/docs/getting-started/core-concepts.md +0 -0
  34. {ingestr-0.12.7 → ingestr-0.12.9}/docs/getting-started/incremental-loading.md +0 -0
  35. {ingestr-0.12.7 → ingestr-0.12.9}/docs/getting-started/quickstart.md +0 -0
  36. {ingestr-0.12.7 → ingestr-0.12.9}/docs/getting-started/telemetry.md +0 -0
  37. {ingestr-0.12.7 → ingestr-0.12.9}/docs/index.md +0 -0
  38. {ingestr-0.12.7 → ingestr-0.12.9}/docs/media/athena.png +0 -0
  39. {ingestr-0.12.7 → ingestr-0.12.9}/docs/media/github.png +0 -0
  40. {ingestr-0.12.7 → ingestr-0.12.9}/docs/media/googleanalytics.png +0 -0
  41. {ingestr-0.12.7 → ingestr-0.12.9}/docs/media/tiktok.png +0 -0
  42. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/airtable.md +0 -0
  43. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/appsflyer.md +0 -0
  44. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/asana.md +0 -0
  45. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/athena.md +0 -0
  46. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/bigquery.md +0 -0
  47. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/chess.md +0 -0
  48. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/csv.md +0 -0
  49. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/custom_queries.md +0 -0
  50. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/databricks.md +0 -0
  51. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/duckdb.md +0 -0
  52. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/dynamodb.md +0 -0
  53. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/facebook-ads.md +0 -0
  54. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/github.md +0 -0
  55. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/google_analytics.md +0 -0
  56. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/gorgias.md +0 -0
  57. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/gsheets.md +0 -0
  58. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/hubspot.md +0 -0
  59. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/kafka.md +0 -0
  60. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/klaviyo.md +0 -0
  61. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/mongodb.md +0 -0
  62. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/mssql.md +0 -0
  63. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/mysql.md +0 -0
  64. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/notion.md +0 -0
  65. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/oracle.md +0 -0
  66. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/postgres.md +0 -0
  67. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/redshift.md +0 -0
  68. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/sap-hana.md +0 -0
  69. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/shopify.md +0 -0
  70. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/slack.md +0 -0
  71. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/snowflake.md +0 -0
  72. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/sqlite.md +0 -0
  73. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/stripe.md +0 -0
  74. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/tiktok-ads.md +0 -0
  75. {ingestr-0.12.7 → ingestr-0.12.9}/docs/supported-sources/zendesk.md +0 -0
  76. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/main.py +0 -0
  77. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/.gitignore +0 -0
  78. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/airtable/__init__.py +0 -0
  79. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/appsflyer/_init_.py +0 -0
  80. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/appsflyer/client.py +0 -0
  81. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/appstore/client.py +0 -0
  82. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/appstore/errors.py +0 -0
  83. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/appstore/models.py +0 -0
  84. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/appstore/resources.py +0 -0
  85. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/arrow/__init__.py +0 -0
  86. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/asana_source/__init__.py +0 -0
  87. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/asana_source/helpers.py +0 -0
  88. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/asana_source/settings.py +0 -0
  89. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/chess/__init__.py +0 -0
  90. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/chess/helpers.py +0 -0
  91. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/chess/settings.py +0 -0
  92. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/destinations.py +0 -0
  93. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/dynamodb/__init__.py +0 -0
  94. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/errors.py +0 -0
  95. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/facebook_ads/__init__.py +0 -0
  96. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/facebook_ads/exceptions.py +0 -0
  97. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/facebook_ads/helpers.py +0 -0
  98. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/facebook_ads/settings.py +0 -0
  99. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/filesystem/helpers.py +0 -0
  100. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/filesystem/readers.py +0 -0
  101. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/filters.py +0 -0
  102. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/github/__init__.py +0 -0
  103. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/github/helpers.py +0 -0
  104. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/github/queries.py +0 -0
  105. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/github/settings.py +0 -0
  106. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/google_analytics/__init__.py +0 -0
  107. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/google_analytics/helpers.py +0 -0
  108. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/google_sheets/README.md +0 -0
  109. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/google_sheets/__init__.py +0 -0
  110. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
  111. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
  112. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
  113. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/gorgias/__init__.py +0 -0
  114. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/gorgias/helpers.py +0 -0
  115. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/hubspot/__init__.py +0 -0
  116. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/hubspot/helpers.py +0 -0
  117. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/hubspot/settings.py +0 -0
  118. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/kafka/__init__.py +0 -0
  119. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/kafka/helpers.py +0 -0
  120. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/klaviyo/_init_.py +0 -0
  121. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/klaviyo/client.py +0 -0
  122. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/klaviyo/helpers.py +0 -0
  123. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/mongodb/__init__.py +0 -0
  124. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/mongodb/helpers.py +0 -0
  125. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/notion/__init__.py +0 -0
  126. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/notion/helpers/__init__.py +0 -0
  127. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/notion/helpers/client.py +0 -0
  128. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/notion/helpers/database.py +0 -0
  129. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/notion/settings.py +0 -0
  130. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/shopify/__init__.py +0 -0
  131. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/shopify/exceptions.py +0 -0
  132. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/shopify/helpers.py +0 -0
  133. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/shopify/settings.py +0 -0
  134. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/slack/__init__.py +0 -0
  135. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/slack/helpers.py +0 -0
  136. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/slack/settings.py +0 -0
  137. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/sql_database/__init__.py +0 -0
  138. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/sql_database/callbacks.py +0 -0
  139. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/stripe_analytics/__init__.py +0 -0
  140. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/stripe_analytics/helpers.py +0 -0
  141. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/stripe_analytics/settings.py +0 -0
  142. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/table_definition.py +0 -0
  143. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/telemetry/event.py +0 -0
  144. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/testdata/fakebqcredentials.json +0 -0
  145. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/tiktok_ads/__init__.py +0 -0
  146. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/tiktok_ads/tiktok_helpers.py +0 -0
  147. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/time.py +0 -0
  148. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/zendesk/__init__.py +0 -0
  149. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/zendesk/helpers/__init__.py +0 -0
  150. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
  151. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/zendesk/helpers/credentials.py +0 -0
  152. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
  153. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/src/zendesk/settings.py +0 -0
  154. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/testdata/.gitignore +0 -0
  155. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/testdata/create_replace.csv +0 -0
  156. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/testdata/delete_insert_expected.csv +0 -0
  157. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/testdata/delete_insert_part1.csv +0 -0
  158. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/testdata/delete_insert_part2.csv +0 -0
  159. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/testdata/merge_expected.csv +0 -0
  160. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/testdata/merge_part1.csv +0 -0
  161. {ingestr-0.12.7 → ingestr-0.12.9}/ingestr/testdata/merge_part2.csv +0 -0
  162. {ingestr-0.12.7 → ingestr-0.12.9}/package-lock.json +0 -0
  163. {ingestr-0.12.7 → ingestr-0.12.9}/package.json +0 -0
  164. {ingestr-0.12.7 → ingestr-0.12.9}/pyproject.toml +0 -0
  165. {ingestr-0.12.7 → ingestr-0.12.9}/requirements-dev.txt +0 -0
  166. {ingestr-0.12.7 → ingestr-0.12.9}/resources/demo.gif +0 -0
  167. {ingestr-0.12.7 → ingestr-0.12.9}/resources/demo.tape +0 -0
  168. {ingestr-0.12.7 → ingestr-0.12.9}/resources/ingestr.svg +0 -0
  169. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/AMPM.yml +0 -0
  170. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/Acronyms.yml +0 -0
  171. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/Colons.yml +0 -0
  172. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/Contractions.yml +0 -0
  173. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/DateFormat.yml +0 -0
  174. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/Ellipses.yml +0 -0
  175. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/EmDash.yml +0 -0
  176. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/Exclamation.yml +0 -0
  177. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/FirstPerson.yml +0 -0
  178. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/Gender.yml +0 -0
  179. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/GenderBias.yml +0 -0
  180. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/HeadingPunctuation.yml +0 -0
  181. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/Headings.yml +0 -0
  182. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/Latin.yml +0 -0
  183. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/LyHyphens.yml +0 -0
  184. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/OptionalPlurals.yml +0 -0
  185. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/Ordinal.yml +0 -0
  186. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/OxfordComma.yml +0 -0
  187. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/Parens.yml +0 -0
  188. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/Passive.yml +0 -0
  189. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/Periods.yml +0 -0
  190. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/Quotes.yml +0 -0
  191. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/Ranges.yml +0 -0
  192. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/Semicolons.yml +0 -0
  193. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/Slang.yml +0 -0
  194. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/Spacing.yml +0 -0
  195. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/Spelling.yml +0 -0
  196. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/Units.yml +0 -0
  197. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/We.yml +0 -0
  198. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/Will.yml +0 -0
  199. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/WordList.yml +0 -0
  200. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/meta.json +0 -0
  201. {ingestr-0.12.7 → ingestr-0.12.9}/styles/Google/vocab.txt +0 -0
  202. {ingestr-0.12.7 → ingestr-0.12.9}/styles/bruin/Ingestr.yml +0 -0
  203. {ingestr-0.12.7 → ingestr-0.12.9}/styles/config/vocabularies/bruin/accept.txt +0 -0
@@ -13,11 +13,11 @@ secret_detected() {
13
13
 
14
14
  # use gitleaks binary if available
15
15
  # else fallback to using docker for running gitleaks
16
- CMD="gitleaks dir -v"
16
+ CMD="gitleaks protect --staged -v"
17
17
 
18
18
  if [[ ! `which gitleaks` ]]; then
19
19
  which docker > /dev/null || (echo "gitleaks or docker is required for running secrets scan." && exit 1)
20
- CMD="docker run -v $PWD:$WORK_DIR -w $WORK_DIR --rm ghcr.io/gitleaks/gitleaks:latest dir -v"
20
+ CMD="docker run -v $PWD:$WORK_DIR -w $WORK_DIR --rm ghcr.io/gitleaks/gitleaks:latest protect --staged -v"
21
21
  fi
22
22
 
23
23
  $CMD || secret_detected
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.12.7
3
+ Version: 0.12.9
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -22,6 +22,7 @@ Requires-Dist: dlt==1.5.0
22
22
  Requires-Dist: duckdb-engine==0.13.5
23
23
  Requires-Dist: duckdb==1.1.3
24
24
  Requires-Dist: facebook-business==20.0.0
25
+ Requires-Dist: gcsfs==2024.10.0
25
26
  Requires-Dist: google-analytics-data==0.18.16
26
27
  Requires-Dist: google-api-python-client==2.130.0
27
28
  Requires-Dist: google-cloud-bigquery-storage==2.24.0
@@ -102,6 +102,7 @@ export default defineConfig({
102
102
  text: "Facebook Ads",
103
103
  link: "/supported-sources/facebook-ads.md",
104
104
  },
105
+ { text: "Google Cloud Storage (GCS)", link: "/supported-sources/gcs.md" },
105
106
  { text: "Google Analytics", link: "/supported-sources/google_analytics.md" },
106
107
  { text: "GitHub", link: "/supported-sources/github.md" },
107
108
  { text: "Google Sheets", link: "/supported-sources/gsheets.md" },
@@ -34,6 +34,7 @@ Adjust source allows ingesting data from various sources:
34
34
 
35
35
  - `campaigns`: Retrieves data for a campaign, showing the app's revenue and network costs over multiple days.
36
36
  - `creatives`: Retrieves data for a creative assets, detailing the app's revenue and network costs across multiple days.
37
+ - `events`: Retrieves data for [events](https://dev.adjust.com/en/api/rs-api/events/) and event slugs.
37
38
  - `custom`: Retrieves custom data based on the dimensions and metrics specified.
38
39
 
39
40
  ### Custom reports: `custom:<dimensions>:<metrics>[:<filters>]`
@@ -17,7 +17,8 @@ URI Parameters:
17
17
  * `key_path`: path to API private key
18
18
  * `key_id`: ID of the generated key
19
19
  * `issuer_id`: Issuer ID of the generated key
20
- * `app_id`: application ID of your app. You can specify `app_id` multiple times with different ids to ingest data for multiple apps.
20
+ * `app_id`: optional, application ID of your app. You can specify `app_id` multiple times with different ids to ingest data for multiple apps.
21
+ * You can also define the app_id in the table name. For example, `app-downloads-detailed:12345,67890` will ingest data for app with id `12345` and `67890`.
21
22
 
22
23
  ## Setting up Appstore Integration
23
24
 
@@ -0,0 +1,62 @@
1
+ # Google Cloud Storage
2
+
3
+ [Google Cloud Storage](https://cloud.google.com/storage?hl=en) is an online file storage web service for storing and accessing data on Google Cloud Platform infrastructure. The service combines the performance and scalability of Google's cloud with advanced security and sharing capabilities. It is an Infrastructure as a Service (IaaS), comparable to Amazon S3.
4
+
5
+ ## URI format
6
+
7
+ The URI format for Google Cloud Storage is as follows:
8
+
9
+ ```plaintext
10
+ gs://<bucket_name>?credentials_path=/path/to/service-account.json>
11
+ ```
12
+
13
+ URI parameters:
14
+
15
+ - `bucket_name`: The name of the bucket
16
+ - `credentials_path`: path to file containing your Google Cloud [Service Account](https://cloud.google.com/iam/docs/service-account-overview)
17
+
18
+ ## Setting up a GCS Integration
19
+
20
+ To use Google Cloud Storage source in `ingestr`, you will need:
21
+ * A Google Cloud Project.
22
+ * A Service Account with atleast [roles/storage.objectUser](https://cloud.google.com/storage/docs/access-control/iam-roles) IAM permission.
23
+ * A Service Account key file for the corresponding service account.
24
+
25
+ For more information on how to create a Service Account or it's keys, see [Create service accounts](https://cloud.google.com/iam/docs/service-accounts-create) and [Create or delete service account keys](https://cloud.google.com/iam/docs/keys-create-delete) on Google Cloud docs.
26
+
27
+ ## Example
28
+
29
+ Let's assume that:
30
+ * Service account key in available in the current directory, under the filename `service_account.json`.
31
+ * The bucket you want to load data from is called `my-org-bucket`
32
+ * The source file is available at `/data/latest/dump.csv`
33
+ * The data needs to be saved in a DuckDB database called `local.db`
34
+ * The destination table name will be `public.latest_dump`
35
+
36
+ You can run the following command line to achieve this:
37
+
38
+ ```sh
39
+ ingestr ingest \
40
+ --source-uri "gs://my-org-bucket?credentials_path=$PWD/service_account.json" \
41
+ --source-table "/data/latest/dump.csv" \
42
+ --dest-uri "duckdb:///local.db" \
43
+ --dest-table "public.latest_dump"
44
+ ```
45
+
46
+ ## Supported File Formats
47
+ `gs` source only supports loading files in the following formats:
48
+ * `csv`: Comma Separated Values (supports Tab Separated Values as well)
49
+ * `parquet`: [Apache Parquet](https://parquet.apache.org/) storage format.
50
+ * `jsonl`: Line delimited JSON. see [https://jsonlines.org/](https://jsonlines.org/)
51
+
52
+ ## File Pattern
53
+ `ingestr` supports [glob](https://en.wikipedia.org/wiki/Glob_(programming)) like pattern matching for `gs` source.
54
+ This allows for a powerful pattern matching mechanism that allows you to specify multiple files in a single `--source-table`.
55
+
56
+ Below are some examples of path patterns, each path pattern is a reference from the root of the bucket:
57
+
58
+ - `**/*.csv`: Retrieves all the CSV files, regardless of how deep they are within the folder structure.
59
+ - `*.csv`: Retrieves all the CSV files from the first level of a folder.
60
+ - `myFolder/**/*.jsonl`: Retrieves all the JSONL files from anywhere under `myFolder`.
61
+ - `myFolder/mySubFolder/users.parquet`: Retrieves the `users.parquet` file from `mySubFolder`.
62
+ - `employees.jsonl`: Retrieves the `employees.jsonl` file from the root level of the bucket.
@@ -9,7 +9,7 @@ ingestr supports S3 as a source.
9
9
  The URI format for S3 is as follows:
10
10
 
11
11
  ```plaintext
12
- s3://<bucket_name>/<path_to_file>?access_key_id=<access_key_id>&secret_access_key=<secret_access_key>
12
+ s3://<bucket_name>?access_key_id=<access_key_id>&secret_access_key=<secret_access_key>
13
13
  ```
14
14
 
15
15
  URI parameters:
@@ -25,7 +25,11 @@ S3 requires an `access_key_id` and a `secret_access_key` to access the bucket. P
25
25
  For example, if your `access_key_id` is `AKC3YOW7E`, `secret_access_key` is `XCtkpL5B`, bucket name is `my_bucket`, and `path_to_files` is `students/students_details.csv`, here's a sample command that will copy the data from the S3 bucket into a DuckDB database:
26
26
 
27
27
  ```sh
28
- ingestr ingest --source-uri 's3://my_bucket/students/students_details.csv?access_key_id=AKC3YOW7E&secret_access_key=XCtkpL5B' --source-table 'students_details' --dest-uri duckdb:///s3.duckdb --dest-table 'dest.students_details'
28
+ ingestr ingest \
29
+ --source-uri 's3://my_bucket?access_key_id=AKC3YOW7E&secret_access_key=XCtkpL5B' \
30
+ --source-table '/students/students_details.csv' \
31
+ --dest-uri duckdb:///s3.duckdb \
32
+ --dest-table 'dest.students_details'
29
33
  ```
30
34
 
31
35
  The result of this command will be a table in the DuckDB database in the path `s3.duckdb`.
@@ -56,6 +56,11 @@ def adjust_source(
56
56
  filters=filters,
57
57
  )
58
58
 
59
+ @dlt.resource(write_disposition="replace", primary_key="id")
60
+ def events():
61
+ adjust_api = AdjustAPI(api_key=api_key)
62
+ yield adjust_api.fetch_events()
63
+
59
64
  @dlt.resource(write_disposition="merge", merge_key="day")
60
65
  def creatives():
61
66
  adjust_api = AdjustAPI(api_key=api_key)
@@ -68,7 +73,7 @@ def adjust_source(
68
73
  )
69
74
 
70
75
  if not dimensions:
71
- return campaigns, creatives
76
+ return campaigns, creatives, events
72
77
 
73
78
  merge_key = merge_key
74
79
  type_hints = {}
@@ -100,4 +105,4 @@ def adjust_source(
100
105
  filters=filters,
101
106
  )
102
107
 
103
- return campaigns, creatives, custom
108
+ return campaigns, creatives, custom, events
@@ -28,10 +28,20 @@ DEFAULT_METRICS = [
28
28
  ]
29
29
 
30
30
 
31
+ def retry_on_limit(response: requests.Response, exception: BaseException) -> bool:
32
+ return response.status_code == 429
33
+
34
+
31
35
  class AdjustAPI:
32
36
  def __init__(self, api_key):
33
37
  self.api_key = api_key
34
- self.uri = "https://automate.adjust.com/reports-service/report"
38
+ self.request_client = Client(
39
+ request_timeout=8.0,
40
+ raise_for_status=False,
41
+ retry_condition=retry_on_limit,
42
+ request_max_attempts=12,
43
+ request_backoff_factor=2,
44
+ ).session
35
45
 
36
46
  def fetch_report_data(
37
47
  self,
@@ -62,20 +72,11 @@ class AdjustAPI:
62
72
  f"Invalid date range: Start date ({start_date}) must be earlier than end date ({end_date})."
63
73
  )
64
74
 
65
- def retry_on_limit(
66
- response: requests.Response, exception: BaseException
67
- ) -> bool:
68
- return response.status_code == 429
69
-
70
- request_client = Client(
71
- request_timeout=8.0,
72
- raise_for_status=False,
73
- retry_condition=retry_on_limit,
74
- request_max_attempts=12,
75
- request_backoff_factor=2,
76
- ).session
77
-
78
- response = request_client.get(self.uri, headers=headers, params=params)
75
+ response = self.request_client.get(
76
+ "https://automate.adjust.com/reports-service/report",
77
+ headers=headers,
78
+ params=params,
79
+ )
79
80
  if response.status_code == 200:
80
81
  result = response.json()
81
82
  items = result.get("rows", [])
@@ -83,6 +84,17 @@ class AdjustAPI:
83
84
  else:
84
85
  raise HTTPError(f"Request failed with status code: {response.status_code}")
85
86
 
87
+ def fetch_events(self):
88
+ headers = {"Authorization": f"Bearer {self.api_key}"}
89
+ response = self.request_client.get(
90
+ "https://automate.adjust.com/reports-service/events", headers=headers
91
+ )
92
+ if response.status_code == 200:
93
+ result = response.json()
94
+ yield result
95
+ else:
96
+ raise HTTPError(f"Request failed with status code: {response.status_code}")
97
+
86
98
 
87
99
  def parse_filters(filters_raw: str) -> dict:
88
100
  # Parse filter string like "key1=value1,key2=value2,value3,value4"
@@ -28,6 +28,10 @@ def app_store(
28
28
  start_date: Optional[datetime] = None,
29
29
  end_date: Optional[datetime] = None,
30
30
  ) -> Iterable[DltResource]:
31
+ if start_date and start_date.tzinfo is not None:
32
+ start_date = start_date.replace(tzinfo=None)
33
+ if end_date and end_date.tzinfo is not None:
34
+ end_date = end_date.replace(tzinfo=None)
31
35
  for resource in RESOURCES:
32
36
  yield dlt.resource(
33
37
  get_analytics_reports,
@@ -25,6 +25,7 @@ from ingestr.src.sources import (
25
25
  ChessSource,
26
26
  DynamoDBSource,
27
27
  FacebookAdsSource,
28
+ GCSSource,
28
29
  GitHubSource,
29
30
  GoogleAnalyticsSource,
30
31
  GoogleSheetsSource,
@@ -124,6 +125,7 @@ class SourceDestinationFactory:
124
125
  "tiktok": TikTokSource,
125
126
  "googleanalytics": GoogleAnalyticsSource,
126
127
  "appstore": AppleAppStoreSource,
128
+ "gs": GCSSource,
127
129
  }
128
130
  destinations: Dict[str, Type[DestinationProtocol]] = {
129
131
  "bigquery": BigQueryDestination,
@@ -39,8 +39,6 @@ def readers(
39
39
  filesystem_resource = filesystem(bucket_url, credentials, file_glob=file_glob)
40
40
  filesystem_resource.apply_hints(
41
41
  incremental=dlt.sources.incremental("modification_date"),
42
- range_end="closed",
43
- range_start="closed",
44
42
  )
45
43
  return (
46
44
  filesystem_resource | dlt.transformer(name="read_csv")(_read_csv),
@@ -17,6 +17,8 @@ from typing import (
17
17
  from urllib.parse import ParseResult, parse_qs, quote, urlparse
18
18
 
19
19
  import dlt
20
+ import gcsfs # type: ignore
21
+ import s3fs # type: ignore
20
22
  import pendulum
21
23
  from dlt.common.configuration.specs import (
22
24
  AwsCredentials,
@@ -1091,19 +1093,17 @@ class S3Source:
1091
1093
  bucket_name = parsed_uri.hostname
1092
1094
  if not bucket_name:
1093
1095
  raise ValueError(
1094
- "Invalid S3 URI: The bucket name is missing. Ensure your S3 URI follows the format 's3://bucket-name/path/to/file"
1096
+ "Invalid S3 URI: The bucket name is missing. Ensure your S3 URI follows the format 's3://bucket-name"
1095
1097
  )
1096
1098
  bucket_url = f"s3://{bucket_name}"
1097
1099
 
1098
- path_to_file = parsed_uri.path.lstrip("/")
1100
+ path_to_file = parsed_uri.path.lstrip("/") or table.lstrip("/")
1099
1101
  if not path_to_file:
1100
- raise ValueError(
1101
- "Invalid S3 URI: The file path is missing. Ensure your S3 URI follows the format 's3://bucket-name/path/to/file"
1102
- )
1102
+ raise ValueError("--source-table must be specified")
1103
1103
 
1104
- aws_credentials = AwsCredentials(
1105
- aws_access_key_id=access_key_id[0],
1106
- aws_secret_access_key=TSecretStrValue(secret_access_key[0]),
1104
+ fs = s3fs.S3FileSystem(
1105
+ key=access_key_id[0],
1106
+ secret=secret_access_key[0],
1107
1107
  )
1108
1108
 
1109
1109
  file_extension = path_to_file.split(".")[-1]
@@ -1119,7 +1119,7 @@ class S3Source:
1119
1119
  )
1120
1120
 
1121
1121
  return readers(
1122
- bucket_url=bucket_url, credentials=aws_credentials, file_glob=path_to_file
1122
+ bucket_url, fs, path_to_file
1123
1123
  ).with_resources(endpoint)
1124
1124
 
1125
1125
 
@@ -1503,3 +1503,69 @@ class AppleAppStoreSource:
1503
1503
  raise UnsupportedResourceError(table, "AppStore")
1504
1504
 
1505
1505
  return src.with_resources(table)
1506
+
1507
+
1508
+ class GCSSource:
1509
+ def handles_incrementality(self) -> bool:
1510
+ return True
1511
+
1512
+ def dlt_source(self, uri: str, table: str, **kwargs):
1513
+ if kwargs.get("incremental_key"):
1514
+ raise ValueError(
1515
+ "GCS takes care of incrementality on its own, you should not provide incremental_key"
1516
+ )
1517
+
1518
+ parsed_uri = urlparse(uri)
1519
+ params = parse_qs(parsed_uri.query)
1520
+ credentials_path = params.get("credentials_path")
1521
+ credentials_base64 = params.get("credentials_base64")
1522
+ credentials_available = any(
1523
+ map(
1524
+ lambda x: x is not None,
1525
+ [credentials_path, credentials_base64],
1526
+ )
1527
+ )
1528
+ if credentials_available is False:
1529
+ raise MissingValueError("credentials_path or credentials_base64", "GCS")
1530
+
1531
+ bucket_name = parsed_uri.hostname
1532
+ if not bucket_name:
1533
+ raise ValueError(
1534
+ "Invalid GCS URI: The bucket name is missing. Ensure your GCS URI follows the format 'gs://bucket-name/path/to/file"
1535
+ )
1536
+ bucket_url = f"gs://{bucket_name}/"
1537
+
1538
+ path_to_file = parsed_uri.path.lstrip("/") or table.lstrip("/")
1539
+ if not path_to_file:
1540
+ raise ValueError("--source-table must be specified")
1541
+
1542
+ credentials = None
1543
+ if credentials_path:
1544
+ credentials = credentials_path[0]
1545
+ else:
1546
+ credentials = json.loads(base64.b64decode(credentials_base64[0]).decode()) # type: ignore
1547
+
1548
+ # There's a compatiblity issue between google-auth, dlt and gcsfs
1549
+ # that makes it difficult to use google.oauth2.service_account.Credentials
1550
+ # (The RECOMMENDED way of passing service account credentials)
1551
+ # directly with gcsfs. As a workaround, we construct the GCSFileSystem
1552
+ # and pass it directly to filesystem.readers.
1553
+ fs = gcsfs.GCSFileSystem(
1554
+ token=credentials,
1555
+ )
1556
+
1557
+ file_extension = path_to_file.split(".")[-1]
1558
+ if file_extension == "csv":
1559
+ endpoint = "read_csv"
1560
+ elif file_extension == "jsonl":
1561
+ endpoint = "read_jsonl"
1562
+ elif file_extension == "parquet":
1563
+ endpoint = "read_parquet"
1564
+ else:
1565
+ raise ValueError(
1566
+ "GCS Source only supports specific formats files: csv, jsonl, parquet"
1567
+ )
1568
+
1569
+ return readers(
1570
+ bucket_url, fs, path_to_file
1571
+ ).with_resources(endpoint)
@@ -0,0 +1 @@
1
+ __version__ = "0.12.9"
@@ -40,4 +40,4 @@ pyathena==3.9.0
40
40
  google-analytics-data==0.18.16
41
41
  asana==3.2.3
42
42
  dataclasses-json==0.6.7
43
-
43
+ gcsfs==2024.10.0
@@ -1 +0,0 @@
1
- __version__ = "0.12.7"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes