ingestr 0.12.10__tar.gz → 0.12.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

Files changed (215) hide show
  1. {ingestr-0.12.10 → ingestr-0.12.11}/PKG-INFO +3 -1
  2. {ingestr-0.12.10 → ingestr-0.12.11}/docs/.vitepress/config.mjs +1 -0
  3. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/gcs.md +10 -6
  4. ingestr-0.12.11/docs/supported-sources/google-ads.md +134 -0
  5. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/s3.md +9 -6
  6. ingestr-0.12.11/ingestr/src/blob.py +49 -0
  7. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/errors.py +8 -0
  8. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/factory.py +2 -0
  9. ingestr-0.12.11/ingestr/src/google_ads/__init__.py +116 -0
  10. ingestr-0.12.11/ingestr/src/google_ads/field.py +2 -0
  11. ingestr-0.12.11/ingestr/src/google_ads/metrics.py +240 -0
  12. ingestr-0.12.11/ingestr/src/google_ads/predicates.py +23 -0
  13. ingestr-0.12.11/ingestr/src/google_ads/reports.py +380 -0
  14. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/sources.py +113 -28
  15. ingestr-0.12.11/ingestr/src/version.py +1 -0
  16. {ingestr-0.12.10 → ingestr-0.12.11}/requirements.txt +3 -0
  17. ingestr-0.12.10/ingestr/src/version.py +0 -1
  18. {ingestr-0.12.10 → ingestr-0.12.11}/.dockerignore +0 -0
  19. {ingestr-0.12.10 → ingestr-0.12.11}/.githooks/pre-commit-hook.sh +0 -0
  20. {ingestr-0.12.10 → ingestr-0.12.11}/.github/workflows/deploy-docs.yml +0 -0
  21. {ingestr-0.12.10 → ingestr-0.12.11}/.github/workflows/secrets-scan.yml +0 -0
  22. {ingestr-0.12.10 → ingestr-0.12.11}/.github/workflows/tests.yml +0 -0
  23. {ingestr-0.12.10 → ingestr-0.12.11}/.gitignore +0 -0
  24. {ingestr-0.12.10 → ingestr-0.12.11}/.gitleaksignore +0 -0
  25. {ingestr-0.12.10 → ingestr-0.12.11}/.python-version +0 -0
  26. {ingestr-0.12.10 → ingestr-0.12.11}/.vale.ini +0 -0
  27. {ingestr-0.12.10 → ingestr-0.12.11}/Dockerfile +0 -0
  28. {ingestr-0.12.10 → ingestr-0.12.11}/LICENSE.md +0 -0
  29. {ingestr-0.12.10 → ingestr-0.12.11}/Makefile +0 -0
  30. {ingestr-0.12.10 → ingestr-0.12.11}/README.md +0 -0
  31. {ingestr-0.12.10 → ingestr-0.12.11}/docs/.vitepress/theme/custom.css +0 -0
  32. {ingestr-0.12.10 → ingestr-0.12.11}/docs/.vitepress/theme/index.js +0 -0
  33. {ingestr-0.12.10 → ingestr-0.12.11}/docs/commands/example-uris.md +0 -0
  34. {ingestr-0.12.10 → ingestr-0.12.11}/docs/commands/ingest.md +0 -0
  35. {ingestr-0.12.10 → ingestr-0.12.11}/docs/getting-started/core-concepts.md +0 -0
  36. {ingestr-0.12.10 → ingestr-0.12.11}/docs/getting-started/incremental-loading.md +0 -0
  37. {ingestr-0.12.10 → ingestr-0.12.11}/docs/getting-started/quickstart.md +0 -0
  38. {ingestr-0.12.10 → ingestr-0.12.11}/docs/getting-started/telemetry.md +0 -0
  39. {ingestr-0.12.10 → ingestr-0.12.11}/docs/index.md +0 -0
  40. {ingestr-0.12.10 → ingestr-0.12.11}/docs/media/athena.png +0 -0
  41. {ingestr-0.12.10 → ingestr-0.12.11}/docs/media/github.png +0 -0
  42. {ingestr-0.12.10 → ingestr-0.12.11}/docs/media/googleanalytics.png +0 -0
  43. {ingestr-0.12.10 → ingestr-0.12.11}/docs/media/linkedin_ads.png +0 -0
  44. {ingestr-0.12.10 → ingestr-0.12.11}/docs/media/tiktok.png +0 -0
  45. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/adjust.md +0 -0
  46. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/airtable.md +0 -0
  47. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/appsflyer.md +0 -0
  48. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/appstore.md +0 -0
  49. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/asana.md +0 -0
  50. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/athena.md +0 -0
  51. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/bigquery.md +0 -0
  52. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/chess.md +0 -0
  53. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/csv.md +0 -0
  54. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/custom_queries.md +0 -0
  55. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/databricks.md +0 -0
  56. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/duckdb.md +0 -0
  57. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/dynamodb.md +0 -0
  58. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/facebook-ads.md +0 -0
  59. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/github.md +0 -0
  60. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/google_analytics.md +0 -0
  61. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/gorgias.md +0 -0
  62. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/gsheets.md +0 -0
  63. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/hubspot.md +0 -0
  64. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/kafka.md +0 -0
  65. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/klaviyo.md +0 -0
  66. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/linkedin_ads.md +0 -0
  67. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/mongodb.md +0 -0
  68. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/mssql.md +0 -0
  69. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/mysql.md +0 -0
  70. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/notion.md +0 -0
  71. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/oracle.md +0 -0
  72. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/postgres.md +0 -0
  73. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/redshift.md +0 -0
  74. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/sap-hana.md +0 -0
  75. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/shopify.md +0 -0
  76. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/slack.md +0 -0
  77. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/snowflake.md +0 -0
  78. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/sqlite.md +0 -0
  79. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/stripe.md +0 -0
  80. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/tiktok-ads.md +0 -0
  81. {ingestr-0.12.10 → ingestr-0.12.11}/docs/supported-sources/zendesk.md +0 -0
  82. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/main.py +0 -0
  83. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/.gitignore +0 -0
  84. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/adjust/__init__.py +0 -0
  85. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/adjust/adjust_helpers.py +0 -0
  86. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/airtable/__init__.py +0 -0
  87. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/appsflyer/_init_.py +0 -0
  88. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/appsflyer/client.py +0 -0
  89. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/appstore/__init__.py +0 -0
  90. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/appstore/client.py +0 -0
  91. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/appstore/errors.py +0 -0
  92. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/appstore/models.py +0 -0
  93. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/appstore/resources.py +0 -0
  94. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/arrow/__init__.py +0 -0
  95. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/asana_source/__init__.py +0 -0
  96. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/asana_source/helpers.py +0 -0
  97. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/asana_source/settings.py +0 -0
  98. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/chess/__init__.py +0 -0
  99. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/chess/helpers.py +0 -0
  100. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/chess/settings.py +0 -0
  101. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/destinations.py +0 -0
  102. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/dynamodb/__init__.py +0 -0
  103. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/facebook_ads/__init__.py +0 -0
  104. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/facebook_ads/exceptions.py +0 -0
  105. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/facebook_ads/helpers.py +0 -0
  106. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/facebook_ads/settings.py +0 -0
  107. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/filesystem/__init__.py +0 -0
  108. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/filesystem/helpers.py +0 -0
  109. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/filesystem/readers.py +0 -0
  110. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/filters.py +0 -0
  111. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/github/__init__.py +0 -0
  112. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/github/helpers.py +0 -0
  113. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/github/queries.py +0 -0
  114. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/github/settings.py +0 -0
  115. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/google_analytics/__init__.py +0 -0
  116. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/google_analytics/helpers.py +0 -0
  117. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/google_sheets/README.md +0 -0
  118. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/google_sheets/__init__.py +0 -0
  119. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
  120. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
  121. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
  122. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/gorgias/__init__.py +0 -0
  123. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/gorgias/helpers.py +0 -0
  124. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/hubspot/__init__.py +0 -0
  125. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/hubspot/helpers.py +0 -0
  126. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/hubspot/settings.py +0 -0
  127. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/kafka/__init__.py +0 -0
  128. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/kafka/helpers.py +0 -0
  129. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/klaviyo/_init_.py +0 -0
  130. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/klaviyo/client.py +0 -0
  131. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/klaviyo/helpers.py +0 -0
  132. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/linkedin_ads/__init__.py +0 -0
  133. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/linkedin_ads/dimension_time_enum.py +0 -0
  134. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/linkedin_ads/helpers.py +0 -0
  135. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/mongodb/__init__.py +0 -0
  136. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/mongodb/helpers.py +0 -0
  137. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/notion/__init__.py +0 -0
  138. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/notion/helpers/__init__.py +0 -0
  139. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/notion/helpers/client.py +0 -0
  140. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/notion/helpers/database.py +0 -0
  141. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/notion/settings.py +0 -0
  142. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/shopify/__init__.py +0 -0
  143. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/shopify/exceptions.py +0 -0
  144. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/shopify/helpers.py +0 -0
  145. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/shopify/settings.py +0 -0
  146. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/slack/__init__.py +0 -0
  147. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/slack/helpers.py +0 -0
  148. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/slack/settings.py +0 -0
  149. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/sql_database/__init__.py +0 -0
  150. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/sql_database/callbacks.py +0 -0
  151. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/stripe_analytics/__init__.py +0 -0
  152. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/stripe_analytics/helpers.py +0 -0
  153. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/stripe_analytics/settings.py +0 -0
  154. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/table_definition.py +0 -0
  155. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/telemetry/event.py +0 -0
  156. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/testdata/fakebqcredentials.json +0 -0
  157. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/tiktok_ads/__init__.py +0 -0
  158. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/tiktok_ads/tiktok_helpers.py +0 -0
  159. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/time.py +0 -0
  160. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/zendesk/__init__.py +0 -0
  161. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/zendesk/helpers/__init__.py +0 -0
  162. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
  163. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/zendesk/helpers/credentials.py +0 -0
  164. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
  165. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/src/zendesk/settings.py +0 -0
  166. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/testdata/.gitignore +0 -0
  167. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/testdata/create_replace.csv +0 -0
  168. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/testdata/delete_insert_expected.csv +0 -0
  169. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/testdata/delete_insert_part1.csv +0 -0
  170. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/testdata/delete_insert_part2.csv +0 -0
  171. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/testdata/merge_expected.csv +0 -0
  172. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/testdata/merge_part1.csv +0 -0
  173. {ingestr-0.12.10 → ingestr-0.12.11}/ingestr/testdata/merge_part2.csv +0 -0
  174. {ingestr-0.12.10 → ingestr-0.12.11}/package-lock.json +0 -0
  175. {ingestr-0.12.10 → ingestr-0.12.11}/package.json +0 -0
  176. {ingestr-0.12.10 → ingestr-0.12.11}/pyproject.toml +0 -0
  177. {ingestr-0.12.10 → ingestr-0.12.11}/requirements-dev.txt +0 -0
  178. {ingestr-0.12.10 → ingestr-0.12.11}/resources/demo.gif +0 -0
  179. {ingestr-0.12.10 → ingestr-0.12.11}/resources/demo.tape +0 -0
  180. {ingestr-0.12.10 → ingestr-0.12.11}/resources/ingestr.svg +0 -0
  181. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/AMPM.yml +0 -0
  182. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/Acronyms.yml +0 -0
  183. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/Colons.yml +0 -0
  184. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/Contractions.yml +0 -0
  185. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/DateFormat.yml +0 -0
  186. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/Ellipses.yml +0 -0
  187. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/EmDash.yml +0 -0
  188. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/Exclamation.yml +0 -0
  189. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/FirstPerson.yml +0 -0
  190. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/Gender.yml +0 -0
  191. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/GenderBias.yml +0 -0
  192. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/HeadingPunctuation.yml +0 -0
  193. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/Headings.yml +0 -0
  194. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/Latin.yml +0 -0
  195. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/LyHyphens.yml +0 -0
  196. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/OptionalPlurals.yml +0 -0
  197. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/Ordinal.yml +0 -0
  198. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/OxfordComma.yml +0 -0
  199. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/Parens.yml +0 -0
  200. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/Passive.yml +0 -0
  201. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/Periods.yml +0 -0
  202. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/Quotes.yml +0 -0
  203. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/Ranges.yml +0 -0
  204. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/Semicolons.yml +0 -0
  205. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/Slang.yml +0 -0
  206. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/Spacing.yml +0 -0
  207. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/Spelling.yml +0 -0
  208. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/Units.yml +0 -0
  209. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/We.yml +0 -0
  210. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/Will.yml +0 -0
  211. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/WordList.yml +0 -0
  212. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/meta.json +0 -0
  213. {ingestr-0.12.10 → ingestr-0.12.11}/styles/Google/vocab.txt +0 -0
  214. {ingestr-0.12.10 → ingestr-0.12.11}/styles/bruin/Ingestr.yml +0 -0
  215. {ingestr-0.12.10 → ingestr-0.12.11}/styles/config/vocabularies/bruin/accept.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.12.10
3
+ Version: 0.12.11
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -22,7 +22,9 @@ Requires-Dist: dlt==1.5.0
22
22
  Requires-Dist: duckdb-engine==0.13.5
23
23
  Requires-Dist: duckdb==1.1.3
24
24
  Requires-Dist: facebook-business==20.0.0
25
+ Requires-Dist: flatten-json==0.1.14
25
26
  Requires-Dist: gcsfs==2024.10.0
27
+ Requires-Dist: google-ads==25.1.0
26
28
  Requires-Dist: google-analytics-data==0.18.16
27
29
  Requires-Dist: google-api-python-client==2.130.0
28
30
  Requires-Dist: google-cloud-bigquery-storage==2.24.0
@@ -104,6 +104,7 @@ export default defineConfig({
104
104
  },
105
105
  { text: "Google Cloud Storage (GCS)", link: "/supported-sources/gcs.md" },
106
106
  { text: "Google Analytics", link: "/supported-sources/google_analytics.md" },
107
+ { text: "Google Ads", link: "/supported-sources/google-ads.md" },
107
108
  { text: "GitHub", link: "/supported-sources/github.md" },
108
109
  { text: "Google Sheets", link: "/supported-sources/gsheets.md" },
109
110
  { text: "Gorgias", link: "/supported-sources/gorgias.md" },
@@ -7,14 +7,18 @@
7
7
  The URI format for Google Cloud Storage is as follows:
8
8
 
9
9
  ```plaintext
10
- gs://<bucket_name>?credentials_path=/path/to/service-account.json>
10
+ gs://?credentials_path=/path/to/service-account.json>
11
11
  ```
12
12
 
13
13
  URI parameters:
14
14
 
15
- - `bucket_name`: The name of the bucket
16
15
  - `credentials_path`: path to file containing your Google Cloud [Service Account](https://cloud.google.com/iam/docs/service-account-overview)
17
16
 
17
+ The `--source-table` must be in the format:
18
+ ```
19
+ {bucket name}/{file glob}
20
+ ```
21
+
18
22
  ## Setting up a GCS Integration
19
23
 
20
24
  To use Google Cloud Storage source in `ingestr`, you will need:
@@ -29,7 +33,7 @@ For more information on how to create a Service Account or it's keys, see [Creat
29
33
  Let's assume that:
30
34
  * Service account key in available in the current directory, under the filename `service_account.json`.
31
35
  * The bucket you want to load data from is called `my-org-bucket`
32
- * The source file is available at `/data/latest/dump.csv`
36
+ * The source file is available at `data/latest/dump.csv`
33
37
  * The data needs to be saved in a DuckDB database called `local.db`
34
38
  * The destination table name will be `public.latest_dump`
35
39
 
@@ -37,8 +41,8 @@ You can run the following command line to achieve this:
37
41
 
38
42
  ```sh
39
43
  ingestr ingest \
40
- --source-uri "gs://my-org-bucket?credentials_path=$PWD/service_account.json" \
41
- --source-table "/data/latest/dump.csv" \
44
+ --source-uri "gs://?credentials_path=$PWD/service_account.json" \
45
+ --source-table "my-org-bucket/data/latest/dump.csv" \
42
46
  --dest-uri "duckdb:///local.db" \
43
47
  --dest-table "public.latest_dump"
44
48
  ```
@@ -53,7 +57,7 @@ ingestr ingest \
53
57
  `ingestr` supports [glob](https://en.wikipedia.org/wiki/Glob_(programming)) like pattern matching for `gs` source.
54
58
  This allows for a powerful pattern matching mechanism that allows you to specify multiple files in a single `--source-table`.
55
59
 
56
- Below are some examples of path patterns, each path pattern is a reference from the root of the bucket:
60
+ Below are some examples of path patterns, each path pattern is glob you can specify after the bucket name:
57
61
 
58
62
  - `**/*.csv`: Retrieves all the CSV files, regardless of how deep they are within the folder structure.
59
63
  - `*.csv`: Retrieves all the CSV files from the first level of a folder.
@@ -0,0 +1,134 @@
1
+ # Google Ads
2
+ [Google Ads](https://ads.google.com/), formerly known as Google Adwords, is an online advertising platform developed by Google, where advertisers bid to display brief advertisements, service offerings, product listings, and videos to web users. It can place ads in the results of search engines like Google Search (the Google Search Network), mobile apps, videos, and on non-search websites.
3
+
4
+ ## URI format
5
+
6
+ The URI format for Google Ads is as follows:
7
+ ```plaintext
8
+ googleads://<customer_id>?credentials_path=/path/to/service-account.json&dev_token=<dev_token>
9
+ ```
10
+
11
+ URI parameters:
12
+
13
+ - `customer_id`: Customer ID of the Google Ads account to use.
14
+ - `credentials_path`: path to the service account JSON file.
15
+ - `dev_token`: [developer token](https://developers.google.com/google-ads/api/docs/get-started/dev-token) to use for accessing the account.
16
+
17
+ > [!NOTE]
18
+ > You may specify credentials using `credentials_base64` instead of `credentials_path`.
19
+ > The value of this parameter is the base64 encoded contents of the
20
+ > service account json file. However, we don't recommend using this
21
+ > parameter, unless you're integrating ingestr into another system.
22
+ ## Setting up a Google Ads integration
23
+
24
+ ### Prerequisites
25
+ * A Google cloud [service account](https://cloud.google.com/iam/docs/service-account-overview)
26
+ * A Google Ads [developer token](https://developers.google.com/google-ads/api/docs/get-started/dev-token)
27
+ * A Google Ads account
28
+
29
+
30
+ ### Obtaining necessary credentials
31
+
32
+ You can use the [Google Cloud IAM Console](https://cloud.google.com/security/products/iam) to create a service account for ingesting data from Google Ads. Make sure to enable Google Ads API in your console.
33
+
34
+ Next, you need to add your service account user to your Google Ads account. See [Google Developers Docs](https://developers.google.com/google-ads/api/docs/oauth/service-accounts) for exact steps.
35
+
36
+ Finally, you need to obtain a Google Ads Developer Token. Developer token lets your app connect to the Google Ads API. Each developer token is assigned an API access level which controls the number of API calls you can make per day with as well as the environment to which you can make calls. See [Google Ads docs](https://developers.google.com/google-ads/api/docs/get-started/dev-token) for more information on how to obtain this token.
37
+
38
+ You also need the 10-digit customer id of the account you're making API calls to. This is displayed in the Google Ads web interface in the form 123-456-7890. In this case, your customer id would be `1234567890`
39
+
40
+ ### Example
41
+
42
+ Let's say we want to ingest information about campaigns (on a daily interval) and save them to a table `public.campaigns` in duckdb database called `adverts.db`.
43
+
44
+ For this example, we'll assume that:
45
+ * The service account JSON file is located in the current directory and is named `svc_account.json`
46
+ * customer id is `1234567890`
47
+ * the developer token is `dev-token-spec-1`
48
+
49
+ You can run the following to achieve this:
50
+ ```sh
51
+ ingestr ingest \
52
+ --source-uri "googleads://12345678?credentials_path=./svc_account.json&dev_token=dev-token-spec-1" \
53
+ --source-table "campaign_report_daily" \
54
+ --dest-uri "duckdb://./adverts.db" \
55
+ --dest-table "public.campaigns"
56
+ ```
57
+ ## Tables
58
+
59
+ | Name | Description |
60
+ |------------------|-------------------------------------------------------------------------|
61
+ | `account_report_daily` | Provides daily metrics aggregated at the account level. |
62
+ | `campaign_report_daily` | Provides daily metrics aggregated at the campaign level. |
63
+ | `ad_group_report_daily` | Provides daily metrics aggregated at the ad group level. |
64
+ | `ad_report_daily` | Provides daily metrics aggregated at the ad level. |
65
+ | `audience_report_daily` | Provides daily metrics aggregated at the audience level. |
66
+ | `keyword_report_daily` | Provides daily metrics aggregated at the keyword level. |
67
+ | `click_report_daily` | Provides daily metrics on clicks. |
68
+ | `landing_page_report_daily` | Provides daily metrics on landing page performance. |
69
+ | `search_keyword_report_daily` | Provides daily metrics on search keywords. |
70
+ | `search_term_report_daily` | Provides daily metrics on search terms. |
71
+ | `lead_form_submission_data_report_daily` | Provides daily metrics on lead form submissions. |
72
+ | `local_services_lead_report_daily` | Provides daily metrics on local services leads. |
73
+ | `local_services_lead_conversations_report_daily` | Provides daily metrics on local services lead conversations. |
74
+
75
+ ## Custom Reports
76
+ `googleads` source supports custom reports. You can pass a custom report definition to `--source-table` and it will dynamically create a report for you. These reports are aggregated at a daily interval.
77
+
78
+ The format of a custom report looks like the following:
79
+ ```
80
+ daily:{resource_name}:{dimensions}:{metrics}
81
+ ```
82
+ Where:
83
+ * `{resource_name}` is a [Google Ads Resource](https://developers.google.com/google-ads/api/fields/v18/overview_query_builder#list-of-all-resources).
84
+ * `{dimensions}` is a comma separated list of the Resource's attribute fields, or fields of [attributed resources](https://developers.google.com/google-ads/api/docs/query/overview).
85
+ * `{metrics}` is a comma separated list of the Resource's [metrics](https://developers.google.com/google-ads/api/fields/v18/metrics). Note that the `metrics.` prefix is optional.
86
+
87
+ Notes:
88
+ * `{dimensions}` and `{metrics}` are optional. If you don't need them, you can leave their respective segment blank.
89
+ * `segments` are currently not supported as dimensions.
90
+ * `segments.date` is automatically added to all custom reports.
91
+
92
+ ### Custom Report Example
93
+ For this example, we will ingest data from `ad_group_ad_asset_view`.
94
+ We want to obtain the following info:
95
+ **dimensions**
96
+ * ad_group.id
97
+ * campagin.id
98
+ * customer.id
99
+ **metrics**
100
+ * metrics.clicks
101
+ * metrics.conversions
102
+ * metrics.impressions
103
+
104
+ To achieve this, we pass a `daily` report specification to `ingestr` source table as follows:
105
+ ```sh
106
+ ingestr ingest \
107
+ --source-uri "googleads://12345678?credentials_path=./svc_account.json&dev_token=dev-token-spec-1" \
108
+ --source-table "daily:ad_group_ad_asset_view:ad_group.id,campaign.id,customer.id:clicks,conversions,impressions" \
109
+ --dest-uri "duckdb:///custom.db" \
110
+ --dest-table "public.report"
111
+ ```
112
+
113
+ Notice the lack of `metrics.` prefix in the metrics segment. Please note that `--dest-table` is mandatory when creating
114
+ a custom report.
115
+
116
+ **Without Metrics**
117
+
118
+ Here's an example of the above report, without any associated metrics:
119
+ ```sh
120
+ ingestr ingest \
121
+ --source-uri "googleads://12345678?credentials_path=./svc_account.json&dev_token=dev-token-spec-1" \
122
+ --source-table "daily:ad_group_ad_asset_view:ad_group.id,campaign.id,customer.id:" \
123
+ --dest-uri "duckdb:///custom.db" \
124
+ ```
125
+
126
+ **Without Dimensions**
127
+
128
+ Here's an example of the above report, without any associated dimensions:
129
+ ```sh
130
+ ingestr ingest \
131
+ --source-uri "googleads://12345678?credentials_path=./svc_account.json&dev_token=dev-token-spec-1" \
132
+ --source-table "daily:ad_group_ad_asset_view::clicks,conversions,impressions" \
133
+ --dest-uri "duckdb:///custom.db" \
134
+ ```
@@ -9,15 +9,18 @@ ingestr supports S3 as a source.
9
9
  The URI format for S3 is as follows:
10
10
 
11
11
  ```plaintext
12
- s3://<bucket_name>?access_key_id=<access_key_id>&secret_access_key=<secret_access_key>
12
+ s3://?access_key_id=<access_key_id>&secret_access_key=<secret_access_key>
13
13
  ```
14
14
 
15
15
  URI parameters:
16
16
 
17
- - `bucket_name`: The name of the bucket
18
- - `path_to_files`: The relative path from the root of the bucket. You can find this from the S3 URI. For example, if your S3 URI is `s3://mybucket/students/students_details.csv`, then your bucket name is `mybucket` and `path_to_files` is `students/students_details.csv`.
19
17
  - `access_key_id` and `secret_access_key` : Used for accessing S3 bucket.
20
18
 
19
+ The `--source-table` must be in the format:
20
+ ```
21
+ {bucket name}/{file glob}
22
+ ```
23
+
21
24
  ## Setting up a S3 Integration
22
25
 
23
26
  S3 requires an `access_key_id` and a `secret_access_key` to access the bucket. Please follow the guide on dltHub to [obtain credentials](https://dlthub.com/docs/dlt-ecosystem/verified-sources/filesystem/basic#get-credentials). Once you've completed the guide, you should have an `access_key_id` and `secret_access_key`. From the S3 URI, you can extract the `bucket_name` and `path_to_files`
@@ -26,15 +29,15 @@ For example, if your `access_key_id` is `AKC3YOW7E`, `secret_access_key` is `XCt
26
29
 
27
30
  ```sh
28
31
  ingestr ingest \
29
- --source-uri 's3://my_bucket?access_key_id=AKC3YOW7E&secret_access_key=XCtkpL5B' \
30
- --source-table '/students/students_details.csv' \
32
+ --source-uri 's3://?access_key_id=AKC3YOW7E&secret_access_key=XCtkpL5B' \
33
+ --source-table 'my_bucket/students/students_details.csv' \
31
34
  --dest-uri duckdb:///s3.duckdb \
32
35
  --dest-table 'dest.students_details'
33
36
  ```
34
37
 
35
38
  The result of this command will be a table in the DuckDB database in the path `s3.duckdb`.
36
39
 
37
- Below are some examples of path patterns, each path pattern is a reference from the root of the bucket:
40
+ Below are some examples of path patterns, each path pattern is a glob you can specify after the bucket name:
38
41
 
39
42
  - `**/*.csv`: Retrieves all the CSV files, regardless of how deep they are within the folder structure.
40
43
  - `*.csv`: Retrieves all the CSV files from the first level of a folder.
@@ -0,0 +1,49 @@
1
+ import warnings
2
+ from typing import Tuple, TypeAlias
3
+ from urllib.parse import ParseResult
4
+
5
+ BucketName: TypeAlias = str
6
+ FileGlob: TypeAlias = str
7
+
8
+
9
+ def parse_uri(uri: ParseResult, table: str) -> Tuple[BucketName, FileGlob]:
10
+ """
11
+ parse the URI of a blob storage and
12
+ return the bucket name and the file glob.
13
+
14
+ Supports the following Forms:
15
+ - uri: "gs://"
16
+ table: "bucket-name/file-glob"
17
+ - uri: gs://bucket-name/file-glob
18
+ table: None
19
+ - uri: "gs://bucket-name"
20
+ table: "file-glob"
21
+
22
+ The first form is the prefered method. Other forms are supported
23
+ for backward compatibility, but discouraged.
24
+ """
25
+
26
+ table = table.strip()
27
+ host = uri.netloc.strip()
28
+
29
+ if table == "":
30
+ warnings.warn(
31
+ f"Using the form '{uri.scheme}://bucket-name/file-glob' is deprecated and will be removed in future versions.",
32
+ DeprecationWarning,
33
+ stacklevel=2,
34
+ )
35
+ return host, uri.path.lstrip("/")
36
+
37
+ if host != "":
38
+ warnings.warn(
39
+ f"Using the form '{uri.scheme}://bucket-name' is deprecated and will be removed in future versions.",
40
+ DeprecationWarning,
41
+ stacklevel=2,
42
+ )
43
+ return host, table.lstrip("/")
44
+
45
+ parts = table.lstrip("/").split("/", maxsplit=1)
46
+ if len(parts) != 2:
47
+ return "", parts[0]
48
+
49
+ return parts[0], parts[1]
@@ -8,3 +8,11 @@ class UnsupportedResourceError(Exception):
8
8
  super().__init__(
9
9
  f"Resource '{resource}' is not supported for {source} source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
10
10
  )
11
+
12
+
13
+ class InvalidBlobTableError(Exception):
14
+ def __init__(self, source):
15
+ super().__init__(
16
+ f"Invalid source table for {source} "
17
+ "Ensure that the table is in the format {bucket-name}/{file glob}"
18
+ )
@@ -27,6 +27,7 @@ from ingestr.src.sources import (
27
27
  FacebookAdsSource,
28
28
  GCSSource,
29
29
  GitHubSource,
30
+ GoogleAdsSource,
30
31
  GoogleAnalyticsSource,
31
32
  GoogleSheetsSource,
32
33
  GorgiasSource,
@@ -125,6 +126,7 @@ class SourceDestinationFactory:
125
126
  "asana": AsanaSource,
126
127
  "tiktok": TikTokSource,
127
128
  "googleanalytics": GoogleAnalyticsSource,
129
+ "googleads": GoogleAdsSource,
128
130
  "appstore": AppleAppStoreSource,
129
131
  "gs": GCSSource,
130
132
  "linkedinads": LinkedInAdsSource,
@@ -0,0 +1,116 @@
1
+ import json
2
+ from datetime import date, datetime
3
+ from typing import Any, Iterator, Optional
4
+
5
+ import dlt
6
+ import proto # type: ignore
7
+ from dlt.common.exceptions import MissingDependencyException
8
+ from dlt.common.typing import TDataItem
9
+ from dlt.sources import DltResource
10
+ from flatten_json import flatten # type: ignore
11
+ from googleapiclient.discovery import Resource # type: ignore
12
+
13
+ from . import field
14
+ from .metrics import dlt_metrics_schema
15
+ from .predicates import date_predicate
16
+ from .reports import BUILTIN_REPORTS, Report
17
+
18
+ try:
19
+ from google.ads.googleads.client import GoogleAdsClient # type: ignore
20
+ except ImportError:
21
+ raise MissingDependencyException("Requests-OAuthlib", ["google-ads"])
22
+
23
+
24
+ @dlt.source
25
+ def google_ads(
26
+ client: GoogleAdsClient,
27
+ customer_id: str,
28
+ report_spec: Optional[str] = None,
29
+ start_date: Optional[datetime] = None,
30
+ end_date: Optional[datetime] = None,
31
+ ) -> Iterator[DltResource]:
32
+ date_range = dlt.sources.incremental(
33
+ "segments_date",
34
+ initial_value=start_date.date(), # type: ignore
35
+ end_value=end_date.date() if end_date is not None else None, # type: ignore
36
+ range_start="closed",
37
+ range_end="closed",
38
+ )
39
+ if report_spec is not None:
40
+ custom_report = Report().from_spec(report_spec)
41
+ yield dlt.resource(
42
+ daily_report,
43
+ name="daily_report",
44
+ write_disposition="merge",
45
+ primary_key=custom_report.primary_keys(),
46
+ columns=dlt_metrics_schema(custom_report.metrics),
47
+ )(client, customer_id, custom_report, date_range)
48
+
49
+ for report_name, report in BUILTIN_REPORTS.items():
50
+ yield dlt.resource(
51
+ daily_report,
52
+ name=report_name,
53
+ write_disposition="merge",
54
+ primary_key=report.primary_keys(),
55
+ columns=dlt_metrics_schema(report.metrics),
56
+ )(client, customer_id, report, date_range)
57
+
58
+
59
+ def daily_report(
60
+ client: Resource,
61
+ customer_id: str,
62
+ report: Report,
63
+ date: dlt.sources.incremental[date],
64
+ ) -> Iterator[TDataItem]:
65
+ ga_service = client.get_service("GoogleAdsService")
66
+ fields = report.dimensions + report.metrics + report.segments
67
+ criteria = date_predicate("segments.date", date.last_value, date.end_value) # type:ignore
68
+ query = f"""
69
+ SELECT
70
+ {", ".join(fields)}
71
+ FROM
72
+ {report.resource}
73
+ WHERE
74
+ {criteria}
75
+ """
76
+ if report.unfilterable is True:
77
+ i = query.index("WHERE", 0)
78
+ query = query[:i]
79
+
80
+ allowed_keys = set([field.to_column(k) for k in fields])
81
+ stream = ga_service.search_stream(customer_id=customer_id, query=query)
82
+ for batch in stream:
83
+ for row in batch.results:
84
+ data = flatten(merge_lists(to_dict(row)))
85
+ if "segments_date" in data:
86
+ data["segments_date"] = datetime.strptime(
87
+ data["segments_date"], "%Y-%m-%d"
88
+ ).date()
89
+ yield {k: v for k, v in data.items() if k in allowed_keys}
90
+
91
+
92
+ def to_dict(item: Any) -> TDataItem:
93
+ """
94
+ Processes a batch result (page of results per dimension) accordingly
95
+ :param batch:
96
+ :return:
97
+ """
98
+ return json.loads(
99
+ proto.Message.to_json(
100
+ item,
101
+ preserving_proto_field_name=True,
102
+ use_integers_for_enums=False,
103
+ including_default_value_fields=False,
104
+ )
105
+ )
106
+
107
+
108
+ def merge_lists(item: dict) -> dict:
109
+ replacements = {}
110
+ for k, v in item.get("metrics", {}).items():
111
+ if isinstance(v, list):
112
+ replacements[k] = ",".join(v)
113
+ if len(replacements) == 0:
114
+ return item
115
+ item["metrics"].update(replacements)
116
+ return item
@@ -0,0 +1,2 @@
1
+ def to_column(field: str) -> str:
2
+ return field.replace(".", "_")