ingestr 0.12.2__tar.gz → 0.12.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

Files changed (196) hide show
  1. {ingestr-0.12.2 → ingestr-0.12.4}/.githooks/pre-commit-hook.sh +1 -1
  2. {ingestr-0.12.2 → ingestr-0.12.4}/PKG-INFO +2 -1
  3. {ingestr-0.12.2 → ingestr-0.12.4}/docs/.vitepress/config.mjs +9 -0
  4. ingestr-0.12.4/docs/commands/ingest.md +110 -0
  5. ingestr-0.12.4/docs/media/github.png +0 -0
  6. ingestr-0.12.4/docs/media/googleanalytics.png +0 -0
  7. ingestr-0.12.4/docs/media/tiktok.png +0 -0
  8. ingestr-0.12.4/docs/supported-sources/custom_queries.md +50 -0
  9. ingestr-0.12.4/docs/supported-sources/github.md +49 -0
  10. ingestr-0.12.4/docs/supported-sources/google_analytics.md +44 -0
  11. ingestr-0.12.4/docs/supported-sources/tiktok-ads.md +58 -0
  12. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/main.py +51 -4
  13. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/factory.py +4 -0
  14. ingestr-0.12.4/ingestr/src/github/__init__.py +149 -0
  15. ingestr-0.12.4/ingestr/src/github/helpers.py +193 -0
  16. ingestr-0.12.4/ingestr/src/github/queries.py +115 -0
  17. ingestr-0.12.4/ingestr/src/github/settings.py +10 -0
  18. ingestr-0.12.4/ingestr/src/google_analytics/__init__.py +70 -0
  19. ingestr-0.12.4/ingestr/src/google_analytics/helpers/__init__.py +70 -0
  20. ingestr-0.12.4/ingestr/src/google_analytics/helpers/data_processing.py +176 -0
  21. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/sources.py +301 -31
  22. ingestr-0.12.4/ingestr/src/tiktok_ads/__init__.py +139 -0
  23. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/tiktok_ads/tiktok_helpers.py +32 -13
  24. ingestr-0.12.4/ingestr/src/version.py +1 -0
  25. {ingestr-0.12.2 → ingestr-0.12.4}/pyproject.toml +4 -0
  26. {ingestr-0.12.2 → ingestr-0.12.4}/requirements.txt +2 -1
  27. ingestr-0.12.2/docs/commands/ingest.md +0 -44
  28. ingestr-0.12.2/ingestr/src/tiktok_ads/__init__.py +0 -106
  29. ingestr-0.12.2/ingestr/src/version.py +0 -1
  30. {ingestr-0.12.2 → ingestr-0.12.4}/.dockerignore +0 -0
  31. {ingestr-0.12.2 → ingestr-0.12.4}/.github/workflows/deploy-docs.yml +0 -0
  32. {ingestr-0.12.2 → ingestr-0.12.4}/.github/workflows/secrets-scan.yml +0 -0
  33. {ingestr-0.12.2 → ingestr-0.12.4}/.github/workflows/tests.yml +0 -0
  34. {ingestr-0.12.2 → ingestr-0.12.4}/.gitignore +0 -0
  35. {ingestr-0.12.2 → ingestr-0.12.4}/.gitleaksignore +0 -0
  36. {ingestr-0.12.2 → ingestr-0.12.4}/.python-version +0 -0
  37. {ingestr-0.12.2 → ingestr-0.12.4}/.vale.ini +0 -0
  38. {ingestr-0.12.2 → ingestr-0.12.4}/Dockerfile +0 -0
  39. {ingestr-0.12.2 → ingestr-0.12.4}/LICENSE.md +0 -0
  40. {ingestr-0.12.2 → ingestr-0.12.4}/Makefile +0 -0
  41. {ingestr-0.12.2 → ingestr-0.12.4}/README.md +0 -0
  42. {ingestr-0.12.2 → ingestr-0.12.4}/docs/.vitepress/theme/custom.css +0 -0
  43. {ingestr-0.12.2 → ingestr-0.12.4}/docs/.vitepress/theme/index.js +0 -0
  44. {ingestr-0.12.2 → ingestr-0.12.4}/docs/commands/example-uris.md +0 -0
  45. {ingestr-0.12.2 → ingestr-0.12.4}/docs/getting-started/core-concepts.md +0 -0
  46. {ingestr-0.12.2 → ingestr-0.12.4}/docs/getting-started/incremental-loading.md +0 -0
  47. {ingestr-0.12.2 → ingestr-0.12.4}/docs/getting-started/quickstart.md +0 -0
  48. {ingestr-0.12.2 → ingestr-0.12.4}/docs/getting-started/telemetry.md +0 -0
  49. {ingestr-0.12.2 → ingestr-0.12.4}/docs/index.md +0 -0
  50. {ingestr-0.12.2 → ingestr-0.12.4}/docs/media/athena.png +0 -0
  51. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/adjust.md +0 -0
  52. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/airtable.md +0 -0
  53. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/appsflyer.md +0 -0
  54. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/asana.md +0 -0
  55. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/athena.md +0 -0
  56. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/bigquery.md +0 -0
  57. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/chess.md +0 -0
  58. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/csv.md +0 -0
  59. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/databricks.md +0 -0
  60. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/duckdb.md +0 -0
  61. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/dynamodb.md +0 -0
  62. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/facebook-ads.md +0 -0
  63. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/gorgias.md +0 -0
  64. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/gsheets.md +0 -0
  65. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/hubspot.md +0 -0
  66. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/kafka.md +0 -0
  67. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/klaviyo.md +0 -0
  68. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/mongodb.md +0 -0
  69. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/mssql.md +0 -0
  70. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/mysql.md +0 -0
  71. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/notion.md +0 -0
  72. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/oracle.md +0 -0
  73. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/postgres.md +0 -0
  74. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/redshift.md +0 -0
  75. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/s3.md +0 -0
  76. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/sap-hana.md +0 -0
  77. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/shopify.md +0 -0
  78. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/slack.md +0 -0
  79. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/snowflake.md +0 -0
  80. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/sqlite.md +0 -0
  81. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/stripe.md +0 -0
  82. {ingestr-0.12.2 → ingestr-0.12.4}/docs/supported-sources/zendesk.md +0 -0
  83. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/.gitignore +0 -0
  84. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/adjust/__init__.py +0 -0
  85. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/adjust/adjust_helpers.py +0 -0
  86. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/airtable/__init__.py +0 -0
  87. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/appsflyer/_init_.py +0 -0
  88. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/appsflyer/client.py +0 -0
  89. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/arrow/__init__.py +0 -0
  90. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/asana_source/__init__.py +0 -0
  91. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/asana_source/helpers.py +0 -0
  92. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/asana_source/settings.py +0 -0
  93. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/chess/__init__.py +0 -0
  94. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/chess/helpers.py +0 -0
  95. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/chess/settings.py +0 -0
  96. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/destinations.py +0 -0
  97. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/dynamodb/__init__.py +0 -0
  98. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/facebook_ads/__init__.py +0 -0
  99. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/facebook_ads/exceptions.py +0 -0
  100. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/facebook_ads/helpers.py +0 -0
  101. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/facebook_ads/settings.py +0 -0
  102. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/filesystem/__init__.py +0 -0
  103. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/filesystem/helpers.py +0 -0
  104. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/filesystem/readers.py +0 -0
  105. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/filters.py +0 -0
  106. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/google_sheets/README.md +0 -0
  107. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/google_sheets/__init__.py +0 -0
  108. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
  109. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
  110. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
  111. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/gorgias/__init__.py +0 -0
  112. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/gorgias/helpers.py +0 -0
  113. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/hubspot/__init__.py +0 -0
  114. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/hubspot/helpers.py +0 -0
  115. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/hubspot/settings.py +0 -0
  116. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/kafka/__init__.py +0 -0
  117. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/kafka/helpers.py +0 -0
  118. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/klaviyo/_init_.py +0 -0
  119. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/klaviyo/client.py +0 -0
  120. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/klaviyo/helpers.py +0 -0
  121. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/mongodb/__init__.py +0 -0
  122. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/mongodb/helpers.py +0 -0
  123. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/notion/__init__.py +0 -0
  124. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/notion/helpers/__init__.py +0 -0
  125. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/notion/helpers/client.py +0 -0
  126. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/notion/helpers/database.py +0 -0
  127. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/notion/settings.py +0 -0
  128. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/shopify/__init__.py +0 -0
  129. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/shopify/exceptions.py +0 -0
  130. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/shopify/helpers.py +0 -0
  131. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/shopify/settings.py +0 -0
  132. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/slack/__init__.py +0 -0
  133. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/slack/helpers.py +0 -0
  134. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/slack/settings.py +0 -0
  135. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/stripe_analytics/__init__.py +0 -0
  136. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/stripe_analytics/helpers.py +0 -0
  137. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/stripe_analytics/settings.py +0 -0
  138. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/table_definition.py +0 -0
  139. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/telemetry/event.py +0 -0
  140. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/testdata/fakebqcredentials.json +0 -0
  141. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/time.py +0 -0
  142. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/zendesk/__init__.py +0 -0
  143. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/zendesk/helpers/__init__.py +0 -0
  144. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
  145. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/zendesk/helpers/credentials.py +0 -0
  146. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
  147. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/src/zendesk/settings.py +0 -0
  148. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/testdata/.gitignore +0 -0
  149. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/testdata/create_replace.csv +0 -0
  150. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/testdata/delete_insert_expected.csv +0 -0
  151. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/testdata/delete_insert_part1.csv +0 -0
  152. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/testdata/delete_insert_part2.csv +0 -0
  153. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/testdata/merge_expected.csv +0 -0
  154. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/testdata/merge_part1.csv +0 -0
  155. {ingestr-0.12.2 → ingestr-0.12.4}/ingestr/testdata/merge_part2.csv +0 -0
  156. {ingestr-0.12.2 → ingestr-0.12.4}/package-lock.json +0 -0
  157. {ingestr-0.12.2 → ingestr-0.12.4}/package.json +0 -0
  158. {ingestr-0.12.2 → ingestr-0.12.4}/requirements-dev.txt +0 -0
  159. {ingestr-0.12.2 → ingestr-0.12.4}/resources/demo.gif +0 -0
  160. {ingestr-0.12.2 → ingestr-0.12.4}/resources/demo.tape +0 -0
  161. {ingestr-0.12.2 → ingestr-0.12.4}/resources/ingestr.svg +0 -0
  162. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/AMPM.yml +0 -0
  163. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/Acronyms.yml +0 -0
  164. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/Colons.yml +0 -0
  165. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/Contractions.yml +0 -0
  166. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/DateFormat.yml +0 -0
  167. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/Ellipses.yml +0 -0
  168. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/EmDash.yml +0 -0
  169. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/Exclamation.yml +0 -0
  170. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/FirstPerson.yml +0 -0
  171. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/Gender.yml +0 -0
  172. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/GenderBias.yml +0 -0
  173. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/HeadingPunctuation.yml +0 -0
  174. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/Headings.yml +0 -0
  175. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/Latin.yml +0 -0
  176. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/LyHyphens.yml +0 -0
  177. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/OptionalPlurals.yml +0 -0
  178. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/Ordinal.yml +0 -0
  179. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/OxfordComma.yml +0 -0
  180. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/Parens.yml +0 -0
  181. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/Passive.yml +0 -0
  182. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/Periods.yml +0 -0
  183. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/Quotes.yml +0 -0
  184. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/Ranges.yml +0 -0
  185. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/Semicolons.yml +0 -0
  186. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/Slang.yml +0 -0
  187. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/Spacing.yml +0 -0
  188. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/Spelling.yml +0 -0
  189. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/Units.yml +0 -0
  190. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/We.yml +0 -0
  191. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/Will.yml +0 -0
  192. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/WordList.yml +0 -0
  193. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/meta.json +0 -0
  194. {ingestr-0.12.2 → ingestr-0.12.4}/styles/Google/vocab.txt +0 -0
  195. {ingestr-0.12.2 → ingestr-0.12.4}/styles/bruin/Ingestr.yml +0 -0
  196. {ingestr-0.12.2 → ingestr-0.12.4}/styles/config/vocabularies/bruin/accept.txt +0 -0
@@ -17,7 +17,7 @@ CMD="gitleaks dir -v"
17
17
 
18
18
  if [[ ! `which gitleaks` ]]; then
19
19
  which docker > /dev/null || (echo "gitleaks or docker is required for running secrets scan." && exit 1)
20
- CMD="docker run -v $PWD:$WORK_DIR -w $WORK_DIR ghcr.io/gitleaks/gitleaks:latest dir -v"
20
+ CMD="docker run -v $PWD:$WORK_DIR -w $WORK_DIR --rm ghcr.io/gitleaks/gitleaks:latest dir -v"
21
21
  fi
22
22
 
23
23
  $CMD || secret_detected
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.12.2
3
+ Version: 0.12.4
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -21,6 +21,7 @@ Requires-Dist: dlt==1.4.0
21
21
  Requires-Dist: duckdb-engine==0.13.5
22
22
  Requires-Dist: duckdb==1.1.3
23
23
  Requires-Dist: facebook-business==20.0.0
24
+ Requires-Dist: google-analytics-data==0.18.15
24
25
  Requires-Dist: google-api-python-client==2.130.0
25
26
  Requires-Dist: google-cloud-bigquery-storage==2.24.0
26
27
  Requires-Dist: mysql-connector-python==9.1.0
@@ -78,6 +78,12 @@ export default defineConfig({
78
78
  { text: "SAP Hana", link: "/supported-sources/sap-hana.md" },
79
79
  { text: "Snowflake", link: "/supported-sources/snowflake.md" },
80
80
  { text: "SQLite", link: "/supported-sources/sqlite.md" },
81
+ {
82
+ text: "Experimental",
83
+ items: [
84
+ { text: "Custom Queries", link: "/supported-sources/custom_queries.md" },
85
+ ],
86
+ },
81
87
  ],
82
88
  },
83
89
 
@@ -95,6 +101,8 @@ export default defineConfig({
95
101
  text: "Facebook Ads",
96
102
  link: "/supported-sources/facebook-ads.md",
97
103
  },
104
+ { text: "Google Analytics", link: "/supported-sources/google_analytics.md" },
105
+ { text: "GitHub", link: "/supported-sources/github.md" },
98
106
  { text: "Google Sheets", link: "/supported-sources/gsheets.md" },
99
107
  { text: "Gorgias", link: "/supported-sources/gorgias.md" },
100
108
  { text: "HubSpot", link: "/supported-sources/hubspot.md" },
@@ -104,6 +112,7 @@ export default defineConfig({
104
112
  { text: "Shopify", link: "/supported-sources/shopify.md" },
105
113
  { text: "Slack", link: "/supported-sources/slack.md" },
106
114
  { text: "Stripe", link: "/supported-sources/stripe.md" },
115
+ { text: "TikTok Ads", link: "/supported-sources/tiktok-ads.md" },
107
116
  { text: "Zendesk", link: "/supported-sources/zendesk.md" },
108
117
  ],
109
118
  },
@@ -0,0 +1,110 @@
1
+ # `ingestr ingest`
2
+
3
+ The `ingest` command is a core feature of the `ingestr` tool, allowing users to transfer data from a source to a destination with optional support for incremental updates.
4
+
5
+ ## Example
6
+
7
+ The following example demonstrates how to use the `ingest` command to transfer data from a source to a destination.
8
+
9
+ ```bash
10
+ ingestr ingest \
11
+ --source-uri '<your-source-uri-here>' \
12
+ --source-table '<your-schema>.<your-table>' \
13
+ --dest-uri '<your-destination-uri-here>'
14
+ ```
15
+
16
+ ## Required flags
17
+
18
+ - `--source-uri TEXT`: Required. Specifies the URI of the data source.
19
+ - `--dest-uri TEXT`: Required. Specifies the URI of the destination where data will be ingested.
20
+ - `--source-table TEXT`: Required. Defines the source table to fetch data from.
21
+
22
+ ## Optional flags
23
+
24
+ - `--dest-table TEXT`: Designates the destination table to save the data. If not specified, defaults to the value of `--source-table`.
25
+ - `--incremental-key TEXT`: Identifies the key used for incremental data strategies. Defaults to `None`.
26
+ - `--incremental-strategy TEXT`: Defines the strategy for incremental updates. Options include `replace`, `append`, `delete+insert`, or `merge`. The default strategy is `replace`.
27
+ - `--interval-start`: Sets the start of the interval for the incremental key. Defaults to `None`.
28
+ - `--interval-end`: Sets the end of the interval for the incremental key. Defaults to `None`.
29
+ - `--primary-key TEXT`: Specifies the primary key for the merge operation. Defaults to `None`.
30
+ - `--columns <column_name>:<column_type>`: Specifies the columns to be ingested. Defaults to `None`.
31
+
32
+ The `interval-start` and `interval-end` options support various datetime formats, here are some examples:
33
+ - `%Y-%m-%d`: `2023-01-31`
34
+ - `%Y-%m-%dT%H:%M:%S`: `2023-01-31T15:00:00`
35
+ - `%Y-%m-%dT%H:%M:%S%z`: `2023-01-31T15:00:00+00:00`
36
+ - `%Y-%m-%dT%H:%M:%S.%f`: `2023-01-31T15:00:00.000123`
37
+ - `%Y-%m-%dT%H:%M:%S.%f%z`: `2023-01-31T15:00:00.000123+00:00`
38
+
39
+ > [!INFO]
40
+ > For the details around the incremental key and the various strategies, please refer to the [Incremental Loading](../getting-started/incremental-loading.md) section.
41
+
42
+ ## General flags
43
+
44
+ - `--help`: Displays the help message and exits the command.
45
+
46
+ ## Examples
47
+
48
+ ### Ingesting a CSV file to DuckDB
49
+
50
+ ```bash
51
+ ingestr ingest \
52
+ --source-uri 'csv://input.csv' \
53
+ --source-table 'sample' \
54
+ --dest-uri 'duckdb://output.duckdb'
55
+ ```
56
+
57
+ ### Copy a table from Postgres to DuckDB
58
+
59
+ ```bash
60
+ ingestr ingest \
61
+ --source-uri 'postgresql://myuser:mypassword@localhost:5432/mydatabase?sslmode=disable' \
62
+ --source-table 'public.input_table' \
63
+ --dest-uri 'duckdb://output.duckdb' \
64
+ --dest-table 'public.output_table'
65
+ ```
66
+
67
+ ### Incrementally ingest a table from Postgres to BigQuery
68
+
69
+ ```bash
70
+ ingestr ingest
71
+ --source-uri 'postgresql://myuser:mypassword@localhost:5432/mydatabase?sslmode=disable' \
72
+ --source-table 'public.users' \
73
+ --dest-uri 'bigquery://my_project?credentials_path=/path/to/service/account.json&location=EU' \
74
+ --dest-table 'raw.users' \
75
+ --incremental-key 'updated_at' \
76
+ --incremental-strategy 'delete+insert'
77
+ ```
78
+
79
+ ### Load an interval of data from Postgres to BigQuery using a date column
80
+
81
+ ```bash
82
+ ingestr ingest
83
+ --source-uri 'postgresql://myuser:mypassword@localhost:5432/mydatabase?sslmode=disable' \
84
+ --source-table 'public.users' \
85
+ --dest-uri 'bigquery://my_project?credentials_path=/path/to/service/account.json&location=EU' \
86
+ --dest-table 'raw.users' \
87
+ --incremental-key 'dt' \
88
+ --incremental-strategy 'delete+insert' \
89
+ --interval-start '2023-01-01' \
90
+ --interval-end '2023-01-31' \
91
+ --columns 'dt:date'
92
+ ```
93
+
94
+ ### Load a specific query from Postgres to Snowflake
95
+
96
+ ```bash
97
+ ingestr ingest
98
+ --source-uri 'postgresql://myuser:mypassword@localhost:5432/mydatabase?sslmode=disable' \
99
+ --dest-uri 'snowflake://user:password@account/dbname?warehouse=COMPUTE_WH&role=my_role' \
100
+ --source-table 'query:SELECT * FROM public.users as pu JOIN public.orders as o ON pu.id = o.user_id WHERE pu.dt BETWEEN :interval_start AND :interval_end' \
101
+ --dest-table 'raw.users' \
102
+ --incremental-key 'dt' \
103
+ --incremental-strategy 'delete+insert' \
104
+ --interval-start '2023-01-01' \
105
+ --interval-end '2023-01-31' \
106
+ --columns 'dt:date'
107
+ ```
108
+
109
+ > [!INFO]
110
+ > For more examples, please refer to the specific platforms' documentation on the sidebar.
Binary file
Binary file
@@ -0,0 +1,50 @@
1
+ # Custom Queries for SQL Sources
2
+
3
+ ingestr has primarily supported table replication for SQL sources due to that being a common use case. However, there are certain scenarios where loading a table only is not possible:
4
+ - you might want to load a subset of rows from a table
5
+ - you might want to load a table that has a complex query that cannot be expressed as a simple table
6
+ - you could technically create a view in the database, but sometimes you don't have access/permissions to do so.
7
+ - you might want to do incremental loads but the table you want to load does not have an incremental key, so it needs to be joined with another table that does.
8
+
9
+ In order to support these scenarios, ingestr has added experimental support for custom queries.
10
+
11
+ > [!DANGER]
12
+ > This is an experimental feature, so do not expect it to work for all use cases. Please create an issue if you find a use case that doesn't work.
13
+
14
+ ## How to use custom queries
15
+
16
+ To use a custom query, you can pass a `query:` prefix to the source name:
17
+
18
+ ```bash
19
+ ingestr ingest \
20
+ --source-uri $POSTGRES_URI \
21
+ --dest-uri "duckdb:///mydb.db" \
22
+ --dest-table "public.output" \
23
+ --source-table "query:select oi.*, o.updated_at from order_items oi join orders o on oi.order_id = o.id"
24
+ ```
25
+
26
+ Ingestr uses SQLAlchemy to run the queries, therefore you can use any valid SQLAlchemy query.
27
+
28
+ ### Incremental loads
29
+
30
+ Custom queries support incremental loads, but there are some caveats:
31
+ - the incremental key must be a column that is returned by the query
32
+ - the incremental key must be a datetime/timestamp column
33
+ - you must do your own filtering in the query for the incremental load
34
+ - you can use the `interval_start` and `interval_end` variables to filter the data
35
+
36
+ Here's an example of how to do an incremental load:
37
+
38
+ ```bash
39
+ ingestr ingest \
40
+ --source-uri $POSTGRES_URI \
41
+ --dest-uri "duckdb:///mydb.db" \
42
+ --dest-table "public.output" \
43
+ --source-table "query:select oi.*, o.updated_at from order_items oi join orders o on oi.order_id = o.id where o.updated_at > :interval_start" \
44
+ --incremental-key updated_at \
45
+ --incremental-strategy merge \
46
+ --primary-key id
47
+ ```
48
+
49
+ In this example, the query is filtering the data to only include rows where the `updated_at` column is greater than the `interval_start` variable.
50
+
@@ -0,0 +1,49 @@
1
+ # GitHub
2
+
3
+ [GitHub](https://github.com/) is a developer platform that allows developers to create, store, manage and share their code.
4
+
5
+ ingestr supports GitHub as a source.
6
+
7
+ ## URI format
8
+
9
+ The URI format for GitHub is as follows:
10
+
11
+ ```plaintext
12
+ github://?access_token=<access_token>&owner=<owner>&repo=<repo>
13
+ ```
14
+
15
+ URI parameters:
16
+
17
+ - `access_token`: Access Token used for authentication with the GitHub API
18
+ - `owner`: Refers to the owner of the repository
19
+ - `repo`: Refers to the name of the repository
20
+
21
+
22
+ ## Setting up a GitHub Integration
23
+
24
+ GitHub requires a few steps to set up an integration, please follow the guide dltHub [has built here](https://dlthub.com/docs/dlt-ecosystem/verified-sources/github#setup-guide).
25
+
26
+ Once you complete the guide, you should have an access token. Let's say your access token is `ghp_test_1234`, the owner is `max`, and the name of the repository is `test_example`. Here is a sample command that will copy the data from GitHub into a DuckDB database:
27
+
28
+ ```sh
29
+ ingestr ingest --source-uri 'github://?access_token=ghp_test_1234&owner=max&repo=test_example' --source-table 'issues' --dest-uri duckdb:///github.duckdb --dest-table 'dest.issues'
30
+ ```
31
+
32
+ This is a sample command that will copy the data from the GitHub source to DuckDB.
33
+
34
+ <img alt="github_img" src="../media/github.png" />
35
+
36
+ ## Tables
37
+
38
+ GitHub source allows ingesting the following sources into separate tables:
39
+
40
+ - `issues`: Retrieves data about issues, their associated comments, and subsequent reactions
41
+ - `pull_requests`: Retrieves all pull requests
42
+ - `repo_events`: Retrieves all the repo events associated with the repository
43
+ - `stargazers`: Retrieves all stargazers
44
+
45
+ Use these as `--source-table` parameter in the `ingestr ingest` command.
46
+
47
+ > [!WARNING]
48
+ > GitHub does not support incremental loading for many endpoints in its APIs, which means ingestr will load endpoints incrementally if they support it, and do a full-refresh if not.
49
+
@@ -0,0 +1,44 @@
1
+ # Google Analytics
2
+ [Google Analytics](https://marketingplatform.google.com/about/analytics/) is a service for web analytics that tracks and provides data regarding user engagement with your website or application.
3
+
4
+ ingestr supports Google Analytics as a source.
5
+
6
+ ## URI format
7
+ The URI format for Google Analytics is as follows:
8
+
9
+ ```plaintext
10
+ googleanalytics://?credentials_path=/path/to/service/account.json&property_id=<property_id>
11
+ ```
12
+
13
+ URI parameters:
14
+ - `credentials_path`: The path to the service account JSON file.
15
+ - `property_id`: It is a unique number that identifies a particular property on Google Analytics. [Follow this guide](https://developers.google.com/analytics/devguides/reporting/data/v1/property-id#what_is_my_property_id) if you don't know your property ID.
16
+
17
+ ## Setting up an Google Analytics Integration
18
+ Google Analytics requires a few steps to set up an integration, please follow the guide dltHub [has built here](https://dlthub.com/docs/dlt-ecosystem/verified-sources/google_analytics#grab-google-service-account-credentials). Once you complete the guide, you should have an `.json` file and `project_id`.
19
+
20
+ ## Table: Custom Reports
21
+ Custom reports allow you to retrieve data based on specific `dimensions` and `metrics`.
22
+
23
+ Custom Table Format:
24
+ ```
25
+ custom:<dimensions>:<metrics>
26
+ ```
27
+
28
+ ### Parameters:
29
+ - `dimensions`(required): A comma-separated list of [dimensions](https://developers.google.com/analytics/devguides/reporting/data/v1/exploration-api-schema#dimensions) to retrieve.
30
+ - `metrics`(required): A comma-separated list of [metrics](https://developers.google.com/analytics/devguides/reporting/data/v1/exploration-api-schema#metrics) to retrieve.
31
+
32
+ ### Example
33
+
34
+ ```sh
35
+ ingestr ingest \
36
+ --source-uri "googleanalytics://?credentials_path="ingestr/src/g_analytics.json&property_id=id123" \
37
+ --source-table "custom:city,date:clicks,activeUsers,newUsers" \
38
+ --dest-uri "duckdb:///analytics.duckdb" \
39
+ --dest-table "dest.custom"
40
+ ```
41
+
42
+ This command will retrieve report and save it to the `dest.custom` table in the DuckDB database.
43
+
44
+ <img alt="google_analytics_img" src="../media/googleanalytics.png" />
@@ -0,0 +1,58 @@
1
+ # TikTok Ads
2
+ TikTok Ads is an advertising platform that enables businesses and marketers to create, manage, and analyze ad campaigns targeting TikTok's user base.
3
+
4
+ Ingestr supports TikTok Ads as a Source.
5
+
6
+ ## URI format
7
+ The URI format for TikTok Ads as a Source is as follows:
8
+
9
+ ```plaintext
10
+ tiktok://?access_token=<ACCESS_TOKEN>&advertiser_ids=<advertiser_ids>&timezone=<timezone>
11
+ ```
12
+ ## URI parameters:
13
+ - `access_token` (required): Used for authentication and is necessary to access reports through the TikTok Marketing API.
14
+ - `advertiser_ids` (required): The comma-separated list of advertiser IDs to retrieve data for.
15
+ - `timezone` (optional): The timezone to use for the data retrieval, you should set this value to the timezone of the advertiser account. Defaults to `UTC`.
16
+
17
+ TikTok requires an `access_token` and `advertiser_ids` to retrieve reports from the TikTok marketing API. Please follow the guide to obtain the [credentials](https://business-api.tiktok.com/portal/docs?id=1738373141733378).
18
+
19
+ ## Table: Custom Reports
20
+ Custom reports allow you to retrieve data based on specific `dimensions`, `metrics`, and `filters`.
21
+
22
+ Custom Table Format:
23
+ ```
24
+ custom:<dimensions>:<metrics>[:<filter_name,filter_values>]
25
+ ```
26
+ ### Parameters:
27
+ - `dimensions`(required): A comma-separated list of [dimensions](https://business-api.tiktok.com/portal/docs?id=1751443956638721) to retrieve.
28
+ - `metrics`(required): A comma-separated list of [metrics](https://business-api.tiktok.com/portal/docs?id=1751443967255553) to retrieve.
29
+ - `filters` (optional): Filters are specified in the format `<filter_name=filter_values>`.
30
+ - `filter_name`: The name of the filter (e.g. `campaign_ids`).
31
+ - `filter_values`: A comma-separated list of one or more values associated with the filter name (e.g., `camp_id123,camp_id456`). Only the `IN` filter type is supported. Learn more about [filters](https://business-api.tiktok.com/portal/docs?id=1751443975608321.).
32
+
33
+ > [!NOTE]
34
+ > Ingestr will fetch data for the last 30 days and use the default page size of `1000`. You can override this by specifying the `interval_start` and `interval_end` parameters.
35
+
36
+ ### Example
37
+
38
+ Retrieve data for campaigns with `campaign_ids` camp_id123 and camp_id456:
39
+ ```sh
40
+ ingestr ingest \
41
+ --source-uri "tiktok://?access_token=token_123&advertiser_ids=0594720014,0594720015" \
42
+ --source-table "custom:campaign_id,stat_time_day:clicks,cpc" \
43
+ --dest-uri "duckdb:///campaigns.duckdb" \
44
+ --dest-table "dest.clicks"
45
+ ```
46
+
47
+ The applied parameters for the report are:
48
+ - dimensions: `campaign_id` and `country_code`
49
+ - metrics: `clicks` and `cpc`
50
+ - filters: `campaign_ids` for `camp_id123` and `camp_id456`
51
+
52
+
53
+ This command will retrieve data for the specified date range and save it to the `dest.clicks` table in the DuckDB database.
54
+
55
+ <img alt="titok_ads_img" src="../media/tiktok.png" />
56
+
57
+
58
+
@@ -32,7 +32,7 @@ DATE_FORMATS = [
32
32
 
33
33
  # https://dlthub.com/docs/dlt-ecosystem/file-formats/parquet#supported-destinations
34
34
  PARQUET_SUPPORTED_DESTINATIONS = [
35
- "bigquery",
35
+ "athena" "bigquery",
36
36
  "duckdb",
37
37
  "snowflake",
38
38
  "databricks",
@@ -287,8 +287,14 @@ def ingest(
287
287
  envvar="SQL_EXCLUDE_COLUMNS",
288
288
  ),
289
289
  ] = [], # type: ignore
290
+ columns: Annotated[
291
+ Optional[list[str]],
292
+ typer.Option(
293
+ help="The column types to be used for the destination table in the format of 'column_name:column_type'",
294
+ envvar="COLUMNS",
295
+ ),
296
+ ] = None, # type: ignore
290
297
  ):
291
- # TODO(turtledev): can't we move this to the top of this file?
292
298
  import hashlib
293
299
  import tempfile
294
300
  from datetime import datetime
@@ -296,6 +302,7 @@ def ingest(
296
302
  import dlt
297
303
  import humanize
298
304
  import typer
305
+ from dlt.common.data_types import TDataType
299
306
  from dlt.common.destination import Destination
300
307
  from dlt.common.pipeline import LoadInfo
301
308
  from dlt.common.runtime.collector import Collector, LogCollector
@@ -345,7 +352,7 @@ def ingest(
345
352
  not in dlt_dest.capabilities().supported_loader_file_formats
346
353
  ):
347
354
  print(
348
- f"[red]Loader file format {loader_file_format.value} is not supported by the destination.[/red]"
355
+ f"[red]Loader file format {loader_file_format.value} is not supported by the destination, available formats: {dlt_dest.capabilities().supported_loader_file_formats}.[/red]"
349
356
  )
350
357
  raise typer.Abort()
351
358
 
@@ -357,6 +364,23 @@ def ingest(
357
364
  else:
358
365
  executable(source)
359
366
 
367
+ def parse_columns(columns: list[str]) -> dict[str, TDataType]:
368
+ from typing import cast, get_args
369
+
370
+ possible_types = get_args(TDataType)
371
+
372
+ types: dict[str, TDataType] = {}
373
+ for column in columns:
374
+ for candidate in column.split(","):
375
+ column_name, column_type = candidate.split(":")
376
+ if column_type not in possible_types:
377
+ print(
378
+ f"[red]Column type '{column_type}' is not supported, supported types: {possible_types}.[/red]"
379
+ )
380
+ raise typer.Abort()
381
+ types[column_name] = cast(TDataType, column_type)
382
+ return types
383
+
360
384
  track(
361
385
  "command_triggered",
362
386
  {
@@ -399,12 +423,20 @@ def ingest(
399
423
  column_hints: dict[str, TColumnSchema] = {}
400
424
  original_incremental_strategy = incremental_strategy
401
425
 
426
+ if columns:
427
+ column_types = parse_columns(columns)
428
+ for column_name, column_type in column_types.items():
429
+ column_hints[column_name] = {"data_type": column_type}
430
+
402
431
  merge_key = None
403
432
  if incremental_strategy == IncrementalStrategy.delete_insert:
404
433
  merge_key = incremental_key
405
434
  incremental_strategy = IncrementalStrategy.merge
406
435
  if incremental_key:
407
- column_hints[incremental_key] = {"merge_key": True}
436
+ if incremental_key not in column_hints:
437
+ column_hints[incremental_key] = {}
438
+
439
+ column_hints[incremental_key]["merge_key"] = True
408
440
 
409
441
  m = hashlib.sha256()
410
442
  m.update(dest_table.encode("utf-8"))
@@ -491,6 +523,21 @@ def ingest(
491
523
  if factory.source_scheme == "sqlite":
492
524
  source_table = "main." + source_table.split(".")[-1]
493
525
 
526
+ if (
527
+ incremental_key
528
+ and incremental_key in column_hints
529
+ and "data_type" in column_hints[incremental_key]
530
+ and column_hints[incremental_key]["data_type"] == "date"
531
+ ):
532
+ # By default, ingestr treats the start and end dates as datetime objects. While this worked fine for many cases, if the
533
+ # incremental field is a date, the start and end dates cannot be compared to the incremental field, and the ingestion would fail.
534
+ # In order to eliminate this, we have introduced a new option to ingestr, --columns, which allows the user to specify the column types for the destination table.
535
+ # This way, ingestr will know the data type of the incremental field, and will be able to convert the start and end dates to the correct data type before running the ingestion.
536
+ if interval_start:
537
+ interval_start = interval_start.date() # type: ignore
538
+ if interval_end:
539
+ interval_end = interval_end.date() # type: ignore
540
+
494
541
  dlt_source = source.dlt_source(
495
542
  uri=source_uri,
496
543
  table=source_table,
@@ -24,6 +24,8 @@ from ingestr.src.sources import (
24
24
  ChessSource,
25
25
  DynamoDBSource,
26
26
  FacebookAdsSource,
27
+ GitHubSource,
28
+ GoogleAnalyticsSource,
27
29
  GoogleSheetsSource,
28
30
  GorgiasSource,
29
31
  HubspotSource,
@@ -102,6 +104,7 @@ class SourceDestinationFactory:
102
104
  "gsheets": GoogleSheetsSource,
103
105
  "shopify": ShopifySource,
104
106
  "gorgias": GorgiasSource,
107
+ "github": GitHubSource,
105
108
  "chess": ChessSource,
106
109
  "stripe": StripeAnalyticsSource,
107
110
  "facebookads": FacebookAdsSource,
@@ -118,6 +121,7 @@ class SourceDestinationFactory:
118
121
  "dynamodb": DynamoDBSource,
119
122
  "asana": AsanaSource,
120
123
  "tiktok": TikTokSource,
124
+ "googleanalytics": GoogleAnalyticsSource,
121
125
  }
122
126
  destinations: Dict[str, Type[DestinationProtocol]] = {
123
127
  "bigquery": BigQueryDestination,
@@ -0,0 +1,149 @@
1
+ """Source that load github issues, pull requests and reactions for a specific repository via customizable graphql query. Loads events incrementally."""
2
+
3
+ import urllib.parse
4
+ from typing import Iterator, Optional, Sequence
5
+
6
+ import dlt
7
+ from dlt.common.typing import TDataItems
8
+ from dlt.sources import DltResource
9
+
10
+ from .helpers import get_reactions_data, get_rest_pages, get_stargazers
11
+
12
+
13
+ @dlt.source
14
+ def github_reactions(
15
+ owner: str,
16
+ name: str,
17
+ access_token: str = dlt.secrets.value,
18
+ items_per_page: int = 100,
19
+ max_items: Optional[int] = None,
20
+ ) -> Sequence[DltResource]:
21
+ """Get reactions associated with issues, pull requests and comments in the repo `name` with owner `owner`.
22
+
23
+ This source uses graphql to retrieve all issues (`issues` resource) and pull requests (`pull requests` resource) with the associated reactions (up to 100),
24
+ comments (up to 100) and reactions to comments (also up to 100). Internally graphql is used to retrieve data. It is cost optimized and you are able to retrieve the
25
+ data for fairly large repos quickly and cheaply.
26
+ You can and should change the queries in `queries.py` to include for example additional fields or connections. The source can be hacked to add more resources for other
27
+ repository nodes easily.
28
+
29
+ Args:
30
+ owner (str): The repository owner
31
+ name (str): The repository name
32
+ access_token (str): The classic access token. Will be injected from secrets if not provided.
33
+ items_per_page (int, optional): How many issues/pull requests to get in single page. Defaults to 100.
34
+ max_items (int, optional): How many issues/pull requests to get in total. None means All.
35
+ max_item_age_seconds (float, optional): Do not get items older than this. Defaults to None. NOT IMPLEMENTED
36
+
37
+ Returns:
38
+ Sequence[DltResource]: Two DltResources: `issues` with issues and `pull_requests` with pull requests
39
+ """
40
+ return (
41
+ dlt.resource(
42
+ get_reactions_data(
43
+ "issues",
44
+ owner,
45
+ name,
46
+ access_token,
47
+ items_per_page,
48
+ max_items,
49
+ ),
50
+ name="issues",
51
+ write_disposition="replace",
52
+ ),
53
+ dlt.resource(
54
+ get_reactions_data(
55
+ "pullRequests",
56
+ owner,
57
+ name,
58
+ access_token,
59
+ items_per_page,
60
+ max_items,
61
+ ),
62
+ name="pull_requests",
63
+ write_disposition="replace",
64
+ ),
65
+ )
66
+
67
+
68
+ @dlt.source(max_table_nesting=0)
69
+ def github_repo_events(
70
+ owner: str, name: str, access_token: Optional[str] = None
71
+ ) -> DltResource:
72
+ """Gets events for repository `name` with owner `owner` incrementally.
73
+
74
+ This source contains a single resource `repo_events` that gets given repository's events and dispatches them to separate tables with names based on event type.
75
+ The data is loaded incrementally. Subsequent runs will get only new events and append them to tables.
76
+ Please note that Github allows only for 300 events to be retrieved for public repositories. You should get the events frequently for the active repos.
77
+
78
+ Args:
79
+ owner (str): The repository owner
80
+ name (str): The repository name
81
+ access_token (str): The classic or fine-grained access token. If not provided, calls are made anonymously
82
+
83
+ Returns:
84
+ DltSource: source with the `repo_events` resource
85
+
86
+ """
87
+
88
+ # use naming function in table name to generate separate tables for each event
89
+ @dlt.resource(primary_key="id", table_name=lambda i: i["type"])
90
+ def repo_events(
91
+ last_created_at: dlt.sources.incremental[str] = dlt.sources.incremental(
92
+ "created_at", initial_value="1970-01-01T00:00:00Z", last_value_func=max
93
+ ),
94
+ ) -> Iterator[TDataItems]:
95
+ repos_path = (
96
+ f"/repos/{urllib.parse.quote(owner)}/{urllib.parse.quote(name)}/events"
97
+ )
98
+
99
+ for page in get_rest_pages(access_token, repos_path + "?per_page=100"):
100
+ yield page
101
+
102
+ # stop requesting pages if the last element was already older than initial value
103
+ # note: incremental will skip those items anyway, we just do not want to use the api limits
104
+ if last_created_at.start_out_of_range:
105
+ print(
106
+ f"Overlap with previous run created at {last_created_at.initial_value}"
107
+ )
108
+ break
109
+
110
+ return repo_events
111
+
112
+
113
+ @dlt.source
114
+ def github_stargazers(
115
+ owner: str,
116
+ name: str,
117
+ access_token: str = dlt.secrets.value,
118
+ items_per_page: int = 100,
119
+ max_items: Optional[int] = None,
120
+ ) -> Sequence[DltResource]:
121
+ """Get stargazers in the repo `name` with owner `owner`.
122
+
123
+ This source uses graphql to retrieve all stargazers with the associated starred date,
124
+ Internally graphql is used to retrieve data. It is cost optimized and you are able to retrieve the
125
+ data for fairly large repos quickly and cheaply.
126
+
127
+ Args:
128
+ owner (str): The repository owner
129
+ name (str): The repository name
130
+ access_token (str): The classic access token. Will be injected from secrets if not provided.
131
+ items_per_page (int, optional): How many issues/pull requests to get in single page. Defaults to 100.
132
+ max_items (int, optional): How many issues/pull requests to get in total. None means All.
133
+
134
+ Returns:
135
+ Sequence[DltResource]: One DltResource: `stargazers`
136
+ """
137
+ return (
138
+ dlt.resource(
139
+ get_stargazers(
140
+ owner,
141
+ name,
142
+ access_token,
143
+ items_per_page,
144
+ max_items,
145
+ ),
146
+ name="stargazers",
147
+ write_disposition="replace",
148
+ ),
149
+ )