ingestr 0.12.2__tar.gz → 0.12.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

Files changed (184) hide show
  1. {ingestr-0.12.2 → ingestr-0.12.3}/PKG-INFO +1 -1
  2. {ingestr-0.12.2 → ingestr-0.12.3}/docs/.vitepress/config.mjs +7 -0
  3. ingestr-0.12.3/docs/media/tiktok.png +0 -0
  4. ingestr-0.12.3/docs/supported-sources/custom_queries.md +50 -0
  5. ingestr-0.12.3/docs/supported-sources/tiktok-ads.md +58 -0
  6. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/sources.py +195 -28
  7. ingestr-0.12.3/ingestr/src/tiktok_ads/__init__.py +139 -0
  8. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/tiktok_ads/tiktok_helpers.py +32 -13
  9. ingestr-0.12.3/ingestr/src/version.py +1 -0
  10. ingestr-0.12.2/ingestr/src/tiktok_ads/__init__.py +0 -106
  11. ingestr-0.12.2/ingestr/src/version.py +0 -1
  12. {ingestr-0.12.2 → ingestr-0.12.3}/.dockerignore +0 -0
  13. {ingestr-0.12.2 → ingestr-0.12.3}/.githooks/pre-commit-hook.sh +0 -0
  14. {ingestr-0.12.2 → ingestr-0.12.3}/.github/workflows/deploy-docs.yml +0 -0
  15. {ingestr-0.12.2 → ingestr-0.12.3}/.github/workflows/secrets-scan.yml +0 -0
  16. {ingestr-0.12.2 → ingestr-0.12.3}/.github/workflows/tests.yml +0 -0
  17. {ingestr-0.12.2 → ingestr-0.12.3}/.gitignore +0 -0
  18. {ingestr-0.12.2 → ingestr-0.12.3}/.gitleaksignore +0 -0
  19. {ingestr-0.12.2 → ingestr-0.12.3}/.python-version +0 -0
  20. {ingestr-0.12.2 → ingestr-0.12.3}/.vale.ini +0 -0
  21. {ingestr-0.12.2 → ingestr-0.12.3}/Dockerfile +0 -0
  22. {ingestr-0.12.2 → ingestr-0.12.3}/LICENSE.md +0 -0
  23. {ingestr-0.12.2 → ingestr-0.12.3}/Makefile +0 -0
  24. {ingestr-0.12.2 → ingestr-0.12.3}/README.md +0 -0
  25. {ingestr-0.12.2 → ingestr-0.12.3}/docs/.vitepress/theme/custom.css +0 -0
  26. {ingestr-0.12.2 → ingestr-0.12.3}/docs/.vitepress/theme/index.js +0 -0
  27. {ingestr-0.12.2 → ingestr-0.12.3}/docs/commands/example-uris.md +0 -0
  28. {ingestr-0.12.2 → ingestr-0.12.3}/docs/commands/ingest.md +0 -0
  29. {ingestr-0.12.2 → ingestr-0.12.3}/docs/getting-started/core-concepts.md +0 -0
  30. {ingestr-0.12.2 → ingestr-0.12.3}/docs/getting-started/incremental-loading.md +0 -0
  31. {ingestr-0.12.2 → ingestr-0.12.3}/docs/getting-started/quickstart.md +0 -0
  32. {ingestr-0.12.2 → ingestr-0.12.3}/docs/getting-started/telemetry.md +0 -0
  33. {ingestr-0.12.2 → ingestr-0.12.3}/docs/index.md +0 -0
  34. {ingestr-0.12.2 → ingestr-0.12.3}/docs/media/athena.png +0 -0
  35. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/adjust.md +0 -0
  36. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/airtable.md +0 -0
  37. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/appsflyer.md +0 -0
  38. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/asana.md +0 -0
  39. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/athena.md +0 -0
  40. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/bigquery.md +0 -0
  41. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/chess.md +0 -0
  42. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/csv.md +0 -0
  43. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/databricks.md +0 -0
  44. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/duckdb.md +0 -0
  45. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/dynamodb.md +0 -0
  46. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/facebook-ads.md +0 -0
  47. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/gorgias.md +0 -0
  48. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/gsheets.md +0 -0
  49. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/hubspot.md +0 -0
  50. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/kafka.md +0 -0
  51. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/klaviyo.md +0 -0
  52. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/mongodb.md +0 -0
  53. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/mssql.md +0 -0
  54. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/mysql.md +0 -0
  55. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/notion.md +0 -0
  56. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/oracle.md +0 -0
  57. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/postgres.md +0 -0
  58. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/redshift.md +0 -0
  59. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/s3.md +0 -0
  60. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/sap-hana.md +0 -0
  61. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/shopify.md +0 -0
  62. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/slack.md +0 -0
  63. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/snowflake.md +0 -0
  64. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/sqlite.md +0 -0
  65. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/stripe.md +0 -0
  66. {ingestr-0.12.2 → ingestr-0.12.3}/docs/supported-sources/zendesk.md +0 -0
  67. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/main.py +0 -0
  68. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/.gitignore +0 -0
  69. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/adjust/__init__.py +0 -0
  70. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/adjust/adjust_helpers.py +0 -0
  71. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/airtable/__init__.py +0 -0
  72. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/appsflyer/_init_.py +0 -0
  73. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/appsflyer/client.py +0 -0
  74. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/arrow/__init__.py +0 -0
  75. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/asana_source/__init__.py +0 -0
  76. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/asana_source/helpers.py +0 -0
  77. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/asana_source/settings.py +0 -0
  78. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/chess/__init__.py +0 -0
  79. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/chess/helpers.py +0 -0
  80. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/chess/settings.py +0 -0
  81. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/destinations.py +0 -0
  82. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/dynamodb/__init__.py +0 -0
  83. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/facebook_ads/__init__.py +0 -0
  84. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/facebook_ads/exceptions.py +0 -0
  85. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/facebook_ads/helpers.py +0 -0
  86. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/facebook_ads/settings.py +0 -0
  87. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/factory.py +0 -0
  88. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/filesystem/__init__.py +0 -0
  89. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/filesystem/helpers.py +0 -0
  90. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/filesystem/readers.py +0 -0
  91. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/filters.py +0 -0
  92. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/google_sheets/README.md +0 -0
  93. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/google_sheets/__init__.py +0 -0
  94. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
  95. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
  96. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
  97. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/gorgias/__init__.py +0 -0
  98. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/gorgias/helpers.py +0 -0
  99. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/hubspot/__init__.py +0 -0
  100. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/hubspot/helpers.py +0 -0
  101. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/hubspot/settings.py +0 -0
  102. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/kafka/__init__.py +0 -0
  103. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/kafka/helpers.py +0 -0
  104. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/klaviyo/_init_.py +0 -0
  105. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/klaviyo/client.py +0 -0
  106. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/klaviyo/helpers.py +0 -0
  107. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/mongodb/__init__.py +0 -0
  108. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/mongodb/helpers.py +0 -0
  109. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/notion/__init__.py +0 -0
  110. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/notion/helpers/__init__.py +0 -0
  111. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/notion/helpers/client.py +0 -0
  112. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/notion/helpers/database.py +0 -0
  113. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/notion/settings.py +0 -0
  114. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/shopify/__init__.py +0 -0
  115. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/shopify/exceptions.py +0 -0
  116. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/shopify/helpers.py +0 -0
  117. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/shopify/settings.py +0 -0
  118. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/slack/__init__.py +0 -0
  119. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/slack/helpers.py +0 -0
  120. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/slack/settings.py +0 -0
  121. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/stripe_analytics/__init__.py +0 -0
  122. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/stripe_analytics/helpers.py +0 -0
  123. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/stripe_analytics/settings.py +0 -0
  124. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/table_definition.py +0 -0
  125. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/telemetry/event.py +0 -0
  126. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/testdata/fakebqcredentials.json +0 -0
  127. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/time.py +0 -0
  128. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/zendesk/__init__.py +0 -0
  129. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/zendesk/helpers/__init__.py +0 -0
  130. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
  131. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/zendesk/helpers/credentials.py +0 -0
  132. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
  133. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/src/zendesk/settings.py +0 -0
  134. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/testdata/.gitignore +0 -0
  135. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/testdata/create_replace.csv +0 -0
  136. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/testdata/delete_insert_expected.csv +0 -0
  137. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/testdata/delete_insert_part1.csv +0 -0
  138. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/testdata/delete_insert_part2.csv +0 -0
  139. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/testdata/merge_expected.csv +0 -0
  140. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/testdata/merge_part1.csv +0 -0
  141. {ingestr-0.12.2 → ingestr-0.12.3}/ingestr/testdata/merge_part2.csv +0 -0
  142. {ingestr-0.12.2 → ingestr-0.12.3}/package-lock.json +0 -0
  143. {ingestr-0.12.2 → ingestr-0.12.3}/package.json +0 -0
  144. {ingestr-0.12.2 → ingestr-0.12.3}/pyproject.toml +0 -0
  145. {ingestr-0.12.2 → ingestr-0.12.3}/requirements-dev.txt +0 -0
  146. {ingestr-0.12.2 → ingestr-0.12.3}/requirements.txt +0 -0
  147. {ingestr-0.12.2 → ingestr-0.12.3}/resources/demo.gif +0 -0
  148. {ingestr-0.12.2 → ingestr-0.12.3}/resources/demo.tape +0 -0
  149. {ingestr-0.12.2 → ingestr-0.12.3}/resources/ingestr.svg +0 -0
  150. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/AMPM.yml +0 -0
  151. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/Acronyms.yml +0 -0
  152. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/Colons.yml +0 -0
  153. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/Contractions.yml +0 -0
  154. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/DateFormat.yml +0 -0
  155. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/Ellipses.yml +0 -0
  156. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/EmDash.yml +0 -0
  157. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/Exclamation.yml +0 -0
  158. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/FirstPerson.yml +0 -0
  159. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/Gender.yml +0 -0
  160. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/GenderBias.yml +0 -0
  161. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/HeadingPunctuation.yml +0 -0
  162. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/Headings.yml +0 -0
  163. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/Latin.yml +0 -0
  164. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/LyHyphens.yml +0 -0
  165. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/OptionalPlurals.yml +0 -0
  166. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/Ordinal.yml +0 -0
  167. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/OxfordComma.yml +0 -0
  168. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/Parens.yml +0 -0
  169. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/Passive.yml +0 -0
  170. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/Periods.yml +0 -0
  171. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/Quotes.yml +0 -0
  172. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/Ranges.yml +0 -0
  173. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/Semicolons.yml +0 -0
  174. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/Slang.yml +0 -0
  175. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/Spacing.yml +0 -0
  176. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/Spelling.yml +0 -0
  177. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/Units.yml +0 -0
  178. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/We.yml +0 -0
  179. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/Will.yml +0 -0
  180. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/WordList.yml +0 -0
  181. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/meta.json +0 -0
  182. {ingestr-0.12.2 → ingestr-0.12.3}/styles/Google/vocab.txt +0 -0
  183. {ingestr-0.12.2 → ingestr-0.12.3}/styles/bruin/Ingestr.yml +0 -0
  184. {ingestr-0.12.2 → ingestr-0.12.3}/styles/config/vocabularies/bruin/accept.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.12.2
3
+ Version: 0.12.3
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -78,6 +78,12 @@ export default defineConfig({
78
78
  { text: "SAP Hana", link: "/supported-sources/sap-hana.md" },
79
79
  { text: "Snowflake", link: "/supported-sources/snowflake.md" },
80
80
  { text: "SQLite", link: "/supported-sources/sqlite.md" },
81
+ {
82
+ text: "Experimental",
83
+ items: [
84
+ { text: "Custom Queries", link: "/supported-sources/custom_queries.md" },
85
+ ],
86
+ },
81
87
  ],
82
88
  },
83
89
 
@@ -104,6 +110,7 @@ export default defineConfig({
104
110
  { text: "Shopify", link: "/supported-sources/shopify.md" },
105
111
  { text: "Slack", link: "/supported-sources/slack.md" },
106
112
  { text: "Stripe", link: "/supported-sources/stripe.md" },
113
+ { text: "TikTok Ads", link: "/supported-sources/tiktok-ads.md" },
107
114
  { text: "Zendesk", link: "/supported-sources/zendesk.md" },
108
115
  ],
109
116
  },
Binary file
@@ -0,0 +1,50 @@
1
+ # Custom Queries for SQL Sources
2
+
3
+ ingestr has primarily supported table replication for SQL sources due to that being a common use case. However, there are certain scenarios where loading a table only is not possible:
4
+ - you might want to load a subset of rows from a table
5
+ - you might want to load a table that has a complex query that cannot be expressed as a simple table
6
+ - you could technically create a view in the database, but sometimes you don't have access/permissions to do so.
7
+ - you might want to do incremental loads but the table you want to load does not have an incremental key, so it needs to be joined with another table that does.
8
+
9
+ In order to support these scenarios, ingestr has added experimental support for custom queries.
10
+
11
+ > [!DANGER]
12
+ > This is an experimental feature, so do not expect it to work for all use cases. Please create an issue if you find a use case that doesn't work.
13
+
14
+ ## How to use custom queries
15
+
16
+ To use a custom query, you can pass a `query:` prefix to the source name:
17
+
18
+ ```bash
19
+ ingestr ingest \
20
+ --source-uri $POSTGRES_URI \
21
+ --dest-uri "duckdb:///mydb.db" \
22
+ --dest-table "public.output" \
23
+ --source-table "query:select oi.*, o.updated_at from order_items oi join orders o on oi.order_id = o.id"
24
+ ```
25
+
26
+ Ingestr uses SQLAlchemy to run the queries, therefore you can use any valid SQLAlchemy query.
27
+
28
+ ### Incremental loads
29
+
30
+ Custom queries support incremental loads, but there are some caveats:
31
+ - the incremental key must be a column that is returned by the query
32
+ - the incremental key must be a datetime/timestamp column
33
+ - you must do your own filtering in the query for the incremental load
34
+ - you can use the `interval_start` and `interval_end` variables to filter the data
35
+
36
+ Here's an example of how to do an incremental load:
37
+
38
+ ```bash
39
+ ingestr ingest \
40
+ --source-uri $POSTGRES_URI \
41
+ --dest-uri "duckdb:///mydb.db" \
42
+ --dest-table "public.output" \
43
+ --source-table "query:select oi.*, o.updated_at from order_items oi join orders o on oi.order_id = o.id where o.updated_at > :interval_start" \
44
+ --incremental-key updated_at \
45
+ --incremental-strategy merge \
46
+ --primary-key id
47
+ ```
48
+
49
+ In this example, the query is filtering the data to only include rows where the `updated_at` column is greater than the `interval_start` variable.
50
+
@@ -0,0 +1,58 @@
1
+ # TikTok Ads
2
+ TikTok Ads is an advertising platform that enables businesses and marketers to create, manage, and analyze ad campaigns targeting TikTok's user base.
3
+
4
+ Ingestr supports TikTok Ads as a Source.
5
+
6
+ ## URI format
7
+ The URI format for TikTok Ads as a Source is as follows:
8
+
9
+ ```plaintext
10
+ tiktok://?access_token=<ACCESS_TOKEN>&advertiser_ids=<advertiser_ids>&timezone=<timezone>
11
+ ```
12
+ ## URI parameters:
13
+ - `access_token` (required): Used for authentication and is necessary to access reports through the TikTok Marketing API.
14
+ - `advertiser_ids` (required): The comma-separated list of advertiser IDs to retrieve data for.
15
+ - `timezone` (optional): The timezone to use for the data retrieval, you should set this value to the timezone of the advertiser account. Defaults to `UTC`.
16
+
17
+ TikTok requires an `access_token` and `advertiser_ids` to retrieve reports from the TikTok marketing API. Please follow the guide to obtain the [credentials](https://business-api.tiktok.com/portal/docs?id=1738373141733378).
18
+
19
+ ## Table: Custom Reports
20
+ Custom reports allow you to retrieve data based on specific `dimensions`, `metrics`, and `filters`.
21
+
22
+ Custom Table Format:
23
+ ```
24
+ custom:<dimensions>:<metrics>[:<filter_name,filter_values>]
25
+ ```
26
+ ### Parameters:
27
+ - `dimensions`(required): A comma-separated list of [dimensions](https://business-api.tiktok.com/portal/docs?id=1751443956638721) to retrieve.
28
+ - `metrics`(required): A comma-separated list of [metrics](https://business-api.tiktok.com/portal/docs?id=1751443967255553) to retrieve.
29
+ - `filters` (optional): Filters are specified in the format `<filter_name=filter_values>`.
30
+ - `filter_name`: The name of the filter (e.g. `campaign_ids`).
31
+ - `filter_values`: A comma-separated list of one or more values associated with the filter name (e.g., `camp_id123,camp_id456`). Only the `IN` filter type is supported. Learn more about [filters](https://business-api.tiktok.com/portal/docs?id=1751443975608321.).
32
+
33
+ > [!NOTE]
34
+ > Ingestr will fetch data for the last 30 days and use the default page size of `1000`. You can override this by specifying the `interval_start` and `interval_end` parameters.
35
+
36
+ ### Example
37
+
38
+ Retrieve data for campaigns with `campaign_ids` camp_id123 and camp_id456:
39
+ ```sh
40
+ ingestr ingest \
41
+ --source-uri "tiktok://?access_token=token_123&advertiser_ids=0594720014,0594720015" \
42
+ --source-table "custom:campaign_id,stat_time_day:clicks,cpc" \
43
+ --dest-uri "duckdb:///campaigns.duckdb" \
44
+ --dest-table "dest.clicks"
45
+ ```
46
+
47
+ The applied parameters for the report are:
48
+ - dimensions: `campaign_id` and `country_code`
49
+ - metrics: `clicks` and `cpc`
50
+ - filters: `campaign_ids` for `camp_id123` and `camp_id456`
51
+
52
+
53
+ This command will retrieve data for the specified date range and save it to the `dest.clicks` table in the DuckDB database.
54
+
55
+ <img alt="titok_ads_img" src="../media/tiktok.png" />
56
+
57
+
58
+
@@ -3,17 +3,42 @@ import csv
3
3
  import json
4
4
  import os
5
5
  import re
6
- from datetime import date
7
- from typing import Any, Callable, Optional
6
+ from datetime import date, datetime
7
+ from typing import (
8
+ Any,
9
+ Callable,
10
+ Dict,
11
+ Iterator,
12
+ List,
13
+ Literal,
14
+ Optional,
15
+ Union,
16
+ )
8
17
  from urllib.parse import ParseResult, parse_qs, quote, urlparse
9
18
 
10
19
  import dlt
11
20
  import pendulum
12
- from dlt.common.configuration.specs import AwsCredentials
21
+ import sqlalchemy
22
+ from dlt.common.configuration.specs import (
23
+ AwsCredentials,
24
+ )
25
+ from dlt.common.libs.sql_alchemy import (
26
+ Engine,
27
+ MetaData,
28
+ )
13
29
  from dlt.common.time import ensure_pendulum_datetime
14
- from dlt.common.typing import TSecretStrValue
30
+ from dlt.common.typing import TDataItem, TSecretStrValue
31
+ from dlt.extract import Incremental
15
32
  from dlt.sources.credentials import ConnectionStringCredentials
16
33
  from dlt.sources.sql_database import sql_table
34
+ from dlt.sources.sql_database.helpers import TableLoader
35
+ from dlt.sources.sql_database.schema_types import (
36
+ ReflectionLevel,
37
+ SelectAny,
38
+ Table,
39
+ TTypeAdapter,
40
+ )
41
+ from sqlalchemy import Column
17
42
  from sqlalchemy import types as sa
18
43
  from sqlalchemy.dialects import mysql
19
44
 
@@ -39,7 +64,7 @@ from ingestr.src.notion import notion_databases
39
64
  from ingestr.src.shopify import shopify_source
40
65
  from ingestr.src.slack import slack_source
41
66
  from ingestr.src.stripe_analytics import stripe_source
42
- from ingestr.src.table_definition import table_string_to_dataclass
67
+ from ingestr.src.table_definition import TableDefinition, table_string_to_dataclass
43
68
  from ingestr.src.tiktok_ads import tiktok_source
44
69
  from ingestr.src.time import isotime
45
70
  from ingestr.src.zendesk import zendesk_chat, zendesk_support, zendesk_talk
@@ -48,6 +73,9 @@ from ingestr.src.zendesk.helpers.credentials import (
48
73
  ZendeskCredentialsToken,
49
74
  )
50
75
 
76
+ TableBackend = Literal["sqlalchemy", "pyarrow", "pandas", "connectorx"]
77
+ TQueryAdapter = Callable[[SelectAny, Table], SelectAny]
78
+
51
79
 
52
80
  class SqlSource:
53
81
  table_builder: Callable
@@ -59,7 +87,9 @@ class SqlSource:
59
87
  return False
60
88
 
61
89
  def dlt_source(self, uri: str, table: str, **kwargs):
62
- table_fields = table_string_to_dataclass(table)
90
+ table_fields = TableDefinition(dataset="custom", table="custom")
91
+ if not table.startswith("query:"):
92
+ table_fields = table_string_to_dataclass(table)
63
93
 
64
94
  incremental = None
65
95
  if kwargs.get("incremental_key"):
@@ -87,6 +117,110 @@ class SqlSource:
87
117
  query = query.order_by(kwargs.get("incremental_key"))
88
118
  return query
89
119
 
120
+ defer_table_reflect = False
121
+ sql_backend = kwargs.get("sql_backend", "sqlalchemy")
122
+ if table.startswith("query:"):
123
+ if kwargs.get("sql_limit"):
124
+ raise ValueError(
125
+ "sql_limit is not supported for custom queries, please apply the limit in the query instead"
126
+ )
127
+
128
+ sql_backend = "sqlalchemy"
129
+ defer_table_reflect = True
130
+ query_value = table.split(":", 1)[1]
131
+
132
+ # this is a very hacky version of the table_rows function. it is built this way to go around the dlt's table loader.
133
+ # I didn't want to write a full fledged sqlalchemy source for now, and wanted to benefit from the existing stuff to begin with.
134
+ # this is by no means a production ready solution, but it works for now.
135
+ # the core idea behind this implementation is to create a mock table instance with the columns that are absolutely necessary for the incremental load to work.
136
+ # the table loader will then use the query adapter callback to apply the actual query and load the rows.
137
+ def table_rows(
138
+ engine: Engine,
139
+ table: Union[Table, str],
140
+ metadata: MetaData,
141
+ chunk_size: int,
142
+ backend: TableBackend,
143
+ incremental: Optional[Incremental[Any]] = None,
144
+ table_adapter_callback: Callable[[Table], None] = None, # type: ignore
145
+ reflection_level: ReflectionLevel = "minimal",
146
+ backend_kwargs: Dict[str, Any] = None, # type: ignore
147
+ type_adapter_callback: Optional[TTypeAdapter] = None,
148
+ included_columns: Optional[List[str]] = None,
149
+ query_adapter_callback: Optional[TQueryAdapter] = None,
150
+ resolve_foreign_keys: bool = False,
151
+ ) -> Iterator[TDataItem]:
152
+ hints = { # type: ignore
153
+ "columns": [],
154
+ }
155
+ cols = [] # type: ignore
156
+
157
+ if incremental:
158
+ switchDict = {
159
+ int: sa.INTEGER,
160
+ datetime: sa.TIMESTAMP,
161
+ pendulum.Date: sa.DATE,
162
+ pendulum.DateTime: sa.TIMESTAMP,
163
+ }
164
+
165
+ if incremental.last_value is not None:
166
+ cols.append(
167
+ Column(
168
+ incremental.cursor_path,
169
+ switchDict[type(incremental.last_value)], # type: ignore
170
+ )
171
+ )
172
+ else:
173
+ cols.append(Column(incremental.cursor_path, sa.TIMESTAMP)) # type: ignore
174
+
175
+ table = Table(
176
+ "query_result",
177
+ metadata,
178
+ *cols,
179
+ )
180
+
181
+ loader = TableLoader(
182
+ engine,
183
+ backend,
184
+ table,
185
+ hints["columns"], # type: ignore
186
+ incremental=incremental,
187
+ chunk_size=chunk_size,
188
+ query_adapter_callback=query_adapter_callback,
189
+ )
190
+ try:
191
+ yield from loader.load_rows(backend_kwargs)
192
+ finally:
193
+ if getattr(engine, "may_dispose_after_use", False):
194
+ engine.dispose()
195
+
196
+ dlt.sources.sql_database.table_rows = table_rows
197
+
198
+ def query_adapter_callback(query, table, incremental=None, engine=None):
199
+ params = {}
200
+ if incremental:
201
+ params["interval_start"] = (
202
+ incremental.last_value
203
+ if incremental.last_value is not None
204
+ else datetime(year=1, month=1, day=1)
205
+ )
206
+ if incremental.end_value is not None:
207
+ params["interval_end"] = incremental.end_value
208
+ else:
209
+ if ":interval_start" in query_value:
210
+ params["interval_start"] = (
211
+ datetime.min
212
+ if kwargs.get("interval_start") is None
213
+ else kwargs.get("interval_start")
214
+ )
215
+ if ":interval_end" in query_value:
216
+ params["interval_end"] = (
217
+ datetime.max
218
+ if kwargs.get("interval_end") is None
219
+ else kwargs.get("interval_end")
220
+ )
221
+
222
+ return sqlalchemy.text(query_value).bindparams(**params)
223
+
90
224
  def type_adapter_callback(sql_type):
91
225
  if isinstance(sql_type, mysql.SET):
92
226
  return sa.JSON
@@ -97,7 +231,7 @@ class SqlSource:
97
231
  schema=table_fields.dataset,
98
232
  table=table_fields.table,
99
233
  incremental=incremental,
100
- backend=kwargs.get("sql_backend", "sqlalchemy"),
234
+ backend=sql_backend,
101
235
  chunk_size=kwargs.get("page_size", None),
102
236
  reflection_level=reflection_level,
103
237
  query_adapter_callback=query_adapter_callback,
@@ -105,6 +239,7 @@ class SqlSource:
105
239
  table_adapter_callback=table_adapter_exclude_columns(
106
240
  kwargs.get("sql_exclude_columns", [])
107
241
  ),
242
+ defer_table_reflect=defer_table_reflect,
108
243
  )
109
244
 
110
245
  return builder_res
@@ -1015,29 +1150,28 @@ class TikTokSource:
1015
1150
  if not access_token:
1016
1151
  raise ValueError("access_token is required to connect to TikTok")
1017
1152
 
1018
- time_zone = source_fields.get("time_zone", "UTC")
1153
+ timezone = "UTC"
1154
+ if source_fields.get("timezone") is not None:
1155
+ timezone = source_fields.get("timezone")[0] # type: ignore
1156
+
1157
+ advertiser_ids = source_fields.get("advertiser_ids")
1158
+ if not advertiser_ids:
1159
+ raise ValueError("advertiser_ids is required to connect to TikTok")
1019
1160
 
1020
- advertiser_id = source_fields.get("advertiser_id")
1021
- if not advertiser_id:
1022
- raise ValueError("advertiser_id is required to connect to TikTok")
1161
+ advertiser_ids = advertiser_ids[0].replace(" ", "").split(",")
1023
1162
 
1024
- start_date = pendulum.now().subtract(days=90).in_tz(time_zone[0])
1025
- end_date = ensure_pendulum_datetime(pendulum.now()).in_tz(time_zone[0])
1163
+ start_date = pendulum.now().subtract(days=30).in_tz(timezone)
1164
+ end_date = ensure_pendulum_datetime(pendulum.now()).in_tz(timezone)
1026
1165
 
1027
1166
  interval_start = kwargs.get("interval_start")
1028
1167
  if interval_start is not None:
1029
- start_date = ensure_pendulum_datetime(interval_start).in_tz(time_zone[0])
1168
+ start_date = ensure_pendulum_datetime(interval_start).in_tz(timezone)
1030
1169
 
1031
1170
  interval_end = kwargs.get("interval_end")
1032
1171
  if interval_end is not None:
1033
- end_date = ensure_pendulum_datetime(interval_end).in_tz(time_zone[0])
1172
+ end_date = ensure_pendulum_datetime(interval_end).in_tz(timezone)
1034
1173
 
1035
- page_size = kwargs.get("page_size")
1036
- if page_size is not None and not isinstance(page_size, int):
1037
- page_size = int(page_size)
1038
-
1039
- if page_size > 1000:
1040
- page_size = 1000
1174
+ page_size = min(1000, kwargs.get("page_size", 1000))
1041
1175
 
1042
1176
  if table.startswith("custom:"):
1043
1177
  fields = table.split(":", 3)
@@ -1049,28 +1183,61 @@ class TikTokSource:
1049
1183
  dimensions = fields[1].replace(" ", "").split(",")
1050
1184
  if (
1051
1185
  "campaign_id" not in dimensions
1052
- and "advertiser_id" not in dimensions
1053
1186
  and "adgroup_id" not in dimensions
1054
1187
  and "ad_id" not in dimensions
1055
1188
  ):
1056
1189
  raise ValueError(
1057
- "You must provide one ID dimension. Please use one ID dimension from the following options: [campaign_id, advertiser_id, adgroup_id, ad_id]"
1190
+ "TikTok API requires at least one ID dimension, please use one of the following dimensions: [campaign_id, adgroup_id, ad_id]"
1058
1191
  )
1059
1192
 
1193
+ if "advertiser_id" in dimensions:
1194
+ dimensions.remove("advertiser_id")
1195
+
1060
1196
  metrics = fields[2].replace(" ", "").split(",")
1061
- filters = []
1197
+ filtering_param = False
1198
+ filter_name = ""
1199
+ filter_value = []
1062
1200
  if len(fields) == 4:
1063
- filters = fields[3].replace(" ", "").split(",")
1201
+
1202
+ def parse_filters(filters_raw: str) -> dict:
1203
+ # Parse filter string like "key1=value1,key2=value2,value3,value4"
1204
+ filters = {}
1205
+ current_key = None
1206
+
1207
+ for item in filters_raw.split(","):
1208
+ if "=" in item:
1209
+ # Start of a new key-value pair
1210
+ key, value = item.split("=")
1211
+ filters[key] = [value] # Always start with a list
1212
+ current_key = key
1213
+ elif current_key is not None:
1214
+ # Additional value for the current key
1215
+ filters[current_key].append(item)
1216
+
1217
+ # Convert single-item lists to simple values
1218
+ return {k: v[0] if len(v) == 1 else v for k, v in filters.items()}
1219
+
1220
+ filtering_param = True
1221
+ filters = parse_filters(fields[3])
1222
+ if len(filters) > 1:
1223
+ raise ValueError(
1224
+ "Only one filter is allowed for TikTok custom reports"
1225
+ )
1226
+ filter_name = list(filters.keys())[0]
1227
+ filter_value = list(map(int, filters[list(filters.keys())[0]]))
1228
+
1064
1229
  return tiktok_source(
1065
1230
  start_date=start_date,
1066
1231
  end_date=end_date,
1067
1232
  access_token=access_token[0],
1068
- advertiser_id=advertiser_id[0],
1069
- time_zone=time_zone[0],
1233
+ advertiser_ids=advertiser_ids,
1234
+ timezone=timezone,
1070
1235
  dimensions=dimensions,
1071
1236
  metrics=metrics,
1072
- filters=filters,
1073
1237
  page_size=page_size,
1238
+ filter_name=filter_name,
1239
+ filter_value=filter_value,
1240
+ filtering_param=filtering_param,
1074
1241
  ).with_resources(endpoint)
1075
1242
 
1076
1243
 
@@ -0,0 +1,139 @@
1
+ from typing import Iterable
2
+
3
+ import dlt
4
+ import pendulum
5
+ from dlt.common.time import ensure_pendulum_datetime
6
+ from dlt.common.typing import TDataItem
7
+ from dlt.sources import DltResource
8
+
9
+ from .tiktok_helpers import TikTokAPI
10
+
11
+ KNOWN_TYPE_HINTS = {
12
+ "spend": {"data_type": "decimal"},
13
+ "billed_cost": {"data_type": "decimal"},
14
+ "cash_spend": {"data_type": "decimal"},
15
+ "voucher_spend": {"data_type": "decimal"},
16
+ "cpc": {"data_type": "decimal"},
17
+ "cpm": {"data_type": "decimal"},
18
+ "impressions": {"data_type": "bigint"},
19
+ "gross_impressions": {"data_type": "bigint"},
20
+ "clicks": {"data_type": "bigint"},
21
+ "ctr": {"data_type": "decimal"},
22
+ "reach": {"data_type": "bigint"},
23
+ "cost_per_1000_reached": {"data_type": "decimal"},
24
+ "frequency": {"data_type": "decimal"},
25
+ "conversion": {"data_type": "bigint"},
26
+ "cost_per_conversion": {"data_type": "decimal"},
27
+ "conversion_rate": {"data_type": "decimal"},
28
+ "conversion_rate_v2": {"data_type": "decimal"},
29
+ "real_time_conversion": {"data_type": "bigint"},
30
+ "real_time_cost_per_conversion": {"data_type": "decimal"},
31
+ "real_time_conversion_rate": {"data_type": "decimal"},
32
+ "real_time_conversion_rate_v2": {"data_type": "decimal"},
33
+ "result": {"data_type": "bigint"},
34
+ "cost_per_result": {"data_type": "decimal"},
35
+ "result_rate": {"data_type": "decimal"},
36
+ "real_time_result": {"data_type": "bigint"},
37
+ "real_time_cost_per_result": {"data_type": "decimal"},
38
+ "real_time_result_rate": {"data_type": "decimal"},
39
+ "secondary_goal_result": {"data_type": "bigint"},
40
+ "cost_per_secondary_goal_result": {"data_type": "decimal"},
41
+ "secondary_goal_result_rate": {"data_type": "decimal"},
42
+ }
43
+
44
+
45
+ def find_intervals(
46
+ current_date: pendulum.DateTime,
47
+ end_date: pendulum.DateTime,
48
+ interval_days: int,
49
+ ):
50
+ intervals = []
51
+ while current_date <= end_date:
52
+ interval_end = min(current_date.add(days=interval_days), end_date)
53
+ intervals.append((current_date, interval_end))
54
+ current_date = interval_end.add(days=1)
55
+
56
+ return intervals
57
+
58
+
59
+ @dlt.source(max_table_nesting=0)
60
+ def tiktok_source(
61
+ start_date: pendulum.DateTime,
62
+ end_date: pendulum.DateTime,
63
+ access_token: str,
64
+ advertiser_ids: list[str],
65
+ timezone: str,
66
+ page_size: int,
67
+ filtering_param: bool,
68
+ filter_name: str,
69
+ filter_value: list[int],
70
+ dimensions: list[str],
71
+ metrics: list[str],
72
+ ) -> DltResource:
73
+ tiktok_api = TikTokAPI(
74
+ access_token=access_token,
75
+ timezone=timezone,
76
+ page_size=page_size,
77
+ filtering_param=filtering_param,
78
+ filter_name=filter_name,
79
+ filter_value=filter_value,
80
+ )
81
+ incremental_loading_param = ""
82
+ is_incremental = False
83
+ interval_days = 365
84
+
85
+ if "stat_time_day" in dimensions:
86
+ incremental_loading_param = "stat_time_day"
87
+ is_incremental = True
88
+ interval_days = 30
89
+
90
+ if "stat_time_hour" in dimensions:
91
+ incremental_loading_param = "stat_time_hour"
92
+ is_incremental = True
93
+ interval_days = 0
94
+
95
+ type_hints = {
96
+ "advertiser_id": {"data_type": "text"},
97
+ }
98
+ for dimension in dimensions:
99
+ if dimension in KNOWN_TYPE_HINTS:
100
+ type_hints[dimension] = KNOWN_TYPE_HINTS[dimension]
101
+ for metric in metrics:
102
+ if metric in KNOWN_TYPE_HINTS:
103
+ type_hints[metric] = KNOWN_TYPE_HINTS[metric]
104
+
105
+ @dlt.resource(
106
+ write_disposition="merge",
107
+ primary_key=dimensions + ["advertiser_id"],
108
+ columns=type_hints,
109
+ parallelized=True,
110
+ )
111
+ def custom_reports(
112
+ datetime=(
113
+ dlt.sources.incremental(incremental_loading_param, start_date)
114
+ if is_incremental
115
+ else None
116
+ ),
117
+ ) -> Iterable[TDataItem]:
118
+ current_date = start_date.in_tz(timezone)
119
+
120
+ if datetime is not None:
121
+ datetime_str = datetime.last_value
122
+ current_date = ensure_pendulum_datetime(datetime_str).in_tz(timezone)
123
+
124
+ list_of_interval = find_intervals(
125
+ current_date=current_date,
126
+ end_date=end_date,
127
+ interval_days=interval_days,
128
+ )
129
+
130
+ for start, end in list_of_interval:
131
+ yield tiktok_api.fetch_pages(
132
+ advertiser_ids=advertiser_ids,
133
+ start_time=start,
134
+ end_time=end,
135
+ dimensions=dimensions,
136
+ metrics=metrics,
137
+ )
138
+
139
+ return custom_reports