ingestr 0.12.8__tar.gz → 0.12.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

Files changed (208) hide show
  1. {ingestr-0.12.8 → ingestr-0.12.10}/PKG-INFO +2 -1
  2. {ingestr-0.12.8 → ingestr-0.12.10}/docs/.vitepress/config.mjs +2 -0
  3. ingestr-0.12.10/docs/media/linkedin_ads.png +0 -0
  4. ingestr-0.12.10/docs/supported-sources/gcs.md +62 -0
  5. ingestr-0.12.10/docs/supported-sources/linkedin_ads.md +109 -0
  6. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/s3.md +6 -2
  7. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/appstore/__init__.py +4 -0
  8. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/factory.py +4 -0
  9. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/filesystem/__init__.py +0 -2
  10. ingestr-0.12.10/ingestr/src/linkedin_ads/__init__.py +63 -0
  11. ingestr-0.12.10/ingestr/src/linkedin_ads/dimension_time_enum.py +12 -0
  12. ingestr-0.12.10/ingestr/src/linkedin_ads/helpers.py +148 -0
  13. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/sources.py +157 -9
  14. ingestr-0.12.10/ingestr/src/version.py +1 -0
  15. {ingestr-0.12.8 → ingestr-0.12.10}/requirements.txt +1 -1
  16. ingestr-0.12.8/ingestr/src/version.py +0 -1
  17. {ingestr-0.12.8 → ingestr-0.12.10}/.dockerignore +0 -0
  18. {ingestr-0.12.8 → ingestr-0.12.10}/.githooks/pre-commit-hook.sh +0 -0
  19. {ingestr-0.12.8 → ingestr-0.12.10}/.github/workflows/deploy-docs.yml +0 -0
  20. {ingestr-0.12.8 → ingestr-0.12.10}/.github/workflows/secrets-scan.yml +0 -0
  21. {ingestr-0.12.8 → ingestr-0.12.10}/.github/workflows/tests.yml +0 -0
  22. {ingestr-0.12.8 → ingestr-0.12.10}/.gitignore +0 -0
  23. {ingestr-0.12.8 → ingestr-0.12.10}/.gitleaksignore +0 -0
  24. {ingestr-0.12.8 → ingestr-0.12.10}/.python-version +0 -0
  25. {ingestr-0.12.8 → ingestr-0.12.10}/.vale.ini +0 -0
  26. {ingestr-0.12.8 → ingestr-0.12.10}/Dockerfile +0 -0
  27. {ingestr-0.12.8 → ingestr-0.12.10}/LICENSE.md +0 -0
  28. {ingestr-0.12.8 → ingestr-0.12.10}/Makefile +0 -0
  29. {ingestr-0.12.8 → ingestr-0.12.10}/README.md +0 -0
  30. {ingestr-0.12.8 → ingestr-0.12.10}/docs/.vitepress/theme/custom.css +0 -0
  31. {ingestr-0.12.8 → ingestr-0.12.10}/docs/.vitepress/theme/index.js +0 -0
  32. {ingestr-0.12.8 → ingestr-0.12.10}/docs/commands/example-uris.md +0 -0
  33. {ingestr-0.12.8 → ingestr-0.12.10}/docs/commands/ingest.md +0 -0
  34. {ingestr-0.12.8 → ingestr-0.12.10}/docs/getting-started/core-concepts.md +0 -0
  35. {ingestr-0.12.8 → ingestr-0.12.10}/docs/getting-started/incremental-loading.md +0 -0
  36. {ingestr-0.12.8 → ingestr-0.12.10}/docs/getting-started/quickstart.md +0 -0
  37. {ingestr-0.12.8 → ingestr-0.12.10}/docs/getting-started/telemetry.md +0 -0
  38. {ingestr-0.12.8 → ingestr-0.12.10}/docs/index.md +0 -0
  39. {ingestr-0.12.8 → ingestr-0.12.10}/docs/media/athena.png +0 -0
  40. {ingestr-0.12.8 → ingestr-0.12.10}/docs/media/github.png +0 -0
  41. {ingestr-0.12.8 → ingestr-0.12.10}/docs/media/googleanalytics.png +0 -0
  42. {ingestr-0.12.8 → ingestr-0.12.10}/docs/media/tiktok.png +0 -0
  43. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/adjust.md +0 -0
  44. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/airtable.md +0 -0
  45. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/appsflyer.md +0 -0
  46. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/appstore.md +0 -0
  47. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/asana.md +0 -0
  48. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/athena.md +0 -0
  49. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/bigquery.md +0 -0
  50. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/chess.md +0 -0
  51. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/csv.md +0 -0
  52. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/custom_queries.md +0 -0
  53. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/databricks.md +0 -0
  54. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/duckdb.md +0 -0
  55. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/dynamodb.md +0 -0
  56. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/facebook-ads.md +0 -0
  57. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/github.md +0 -0
  58. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/google_analytics.md +0 -0
  59. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/gorgias.md +0 -0
  60. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/gsheets.md +0 -0
  61. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/hubspot.md +0 -0
  62. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/kafka.md +0 -0
  63. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/klaviyo.md +0 -0
  64. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/mongodb.md +0 -0
  65. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/mssql.md +0 -0
  66. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/mysql.md +0 -0
  67. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/notion.md +0 -0
  68. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/oracle.md +0 -0
  69. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/postgres.md +0 -0
  70. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/redshift.md +0 -0
  71. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/sap-hana.md +0 -0
  72. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/shopify.md +0 -0
  73. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/slack.md +0 -0
  74. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/snowflake.md +0 -0
  75. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/sqlite.md +0 -0
  76. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/stripe.md +0 -0
  77. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/tiktok-ads.md +0 -0
  78. {ingestr-0.12.8 → ingestr-0.12.10}/docs/supported-sources/zendesk.md +0 -0
  79. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/main.py +0 -0
  80. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/.gitignore +0 -0
  81. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/adjust/__init__.py +0 -0
  82. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/adjust/adjust_helpers.py +0 -0
  83. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/airtable/__init__.py +0 -0
  84. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/appsflyer/_init_.py +0 -0
  85. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/appsflyer/client.py +0 -0
  86. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/appstore/client.py +0 -0
  87. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/appstore/errors.py +0 -0
  88. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/appstore/models.py +0 -0
  89. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/appstore/resources.py +0 -0
  90. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/arrow/__init__.py +0 -0
  91. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/asana_source/__init__.py +0 -0
  92. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/asana_source/helpers.py +0 -0
  93. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/asana_source/settings.py +0 -0
  94. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/chess/__init__.py +0 -0
  95. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/chess/helpers.py +0 -0
  96. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/chess/settings.py +0 -0
  97. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/destinations.py +0 -0
  98. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/dynamodb/__init__.py +0 -0
  99. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/errors.py +0 -0
  100. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/facebook_ads/__init__.py +0 -0
  101. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/facebook_ads/exceptions.py +0 -0
  102. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/facebook_ads/helpers.py +0 -0
  103. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/facebook_ads/settings.py +0 -0
  104. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/filesystem/helpers.py +0 -0
  105. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/filesystem/readers.py +0 -0
  106. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/filters.py +0 -0
  107. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/github/__init__.py +0 -0
  108. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/github/helpers.py +0 -0
  109. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/github/queries.py +0 -0
  110. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/github/settings.py +0 -0
  111. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/google_analytics/__init__.py +0 -0
  112. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/google_analytics/helpers.py +0 -0
  113. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/google_sheets/README.md +0 -0
  114. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/google_sheets/__init__.py +0 -0
  115. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
  116. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
  117. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
  118. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/gorgias/__init__.py +0 -0
  119. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/gorgias/helpers.py +0 -0
  120. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/hubspot/__init__.py +0 -0
  121. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/hubspot/helpers.py +0 -0
  122. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/hubspot/settings.py +0 -0
  123. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/kafka/__init__.py +0 -0
  124. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/kafka/helpers.py +0 -0
  125. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/klaviyo/_init_.py +0 -0
  126. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/klaviyo/client.py +0 -0
  127. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/klaviyo/helpers.py +0 -0
  128. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/mongodb/__init__.py +0 -0
  129. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/mongodb/helpers.py +0 -0
  130. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/notion/__init__.py +0 -0
  131. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/notion/helpers/__init__.py +0 -0
  132. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/notion/helpers/client.py +0 -0
  133. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/notion/helpers/database.py +0 -0
  134. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/notion/settings.py +0 -0
  135. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/shopify/__init__.py +0 -0
  136. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/shopify/exceptions.py +0 -0
  137. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/shopify/helpers.py +0 -0
  138. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/shopify/settings.py +0 -0
  139. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/slack/__init__.py +0 -0
  140. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/slack/helpers.py +0 -0
  141. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/slack/settings.py +0 -0
  142. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/sql_database/__init__.py +0 -0
  143. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/sql_database/callbacks.py +0 -0
  144. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/stripe_analytics/__init__.py +0 -0
  145. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/stripe_analytics/helpers.py +0 -0
  146. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/stripe_analytics/settings.py +0 -0
  147. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/table_definition.py +0 -0
  148. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/telemetry/event.py +0 -0
  149. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/testdata/fakebqcredentials.json +0 -0
  150. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/tiktok_ads/__init__.py +0 -0
  151. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/tiktok_ads/tiktok_helpers.py +0 -0
  152. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/time.py +0 -0
  153. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/zendesk/__init__.py +0 -0
  154. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/zendesk/helpers/__init__.py +0 -0
  155. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
  156. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/zendesk/helpers/credentials.py +0 -0
  157. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
  158. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/src/zendesk/settings.py +0 -0
  159. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/testdata/.gitignore +0 -0
  160. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/testdata/create_replace.csv +0 -0
  161. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/testdata/delete_insert_expected.csv +0 -0
  162. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/testdata/delete_insert_part1.csv +0 -0
  163. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/testdata/delete_insert_part2.csv +0 -0
  164. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/testdata/merge_expected.csv +0 -0
  165. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/testdata/merge_part1.csv +0 -0
  166. {ingestr-0.12.8 → ingestr-0.12.10}/ingestr/testdata/merge_part2.csv +0 -0
  167. {ingestr-0.12.8 → ingestr-0.12.10}/package-lock.json +0 -0
  168. {ingestr-0.12.8 → ingestr-0.12.10}/package.json +0 -0
  169. {ingestr-0.12.8 → ingestr-0.12.10}/pyproject.toml +0 -0
  170. {ingestr-0.12.8 → ingestr-0.12.10}/requirements-dev.txt +0 -0
  171. {ingestr-0.12.8 → ingestr-0.12.10}/resources/demo.gif +0 -0
  172. {ingestr-0.12.8 → ingestr-0.12.10}/resources/demo.tape +0 -0
  173. {ingestr-0.12.8 → ingestr-0.12.10}/resources/ingestr.svg +0 -0
  174. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/AMPM.yml +0 -0
  175. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/Acronyms.yml +0 -0
  176. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/Colons.yml +0 -0
  177. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/Contractions.yml +0 -0
  178. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/DateFormat.yml +0 -0
  179. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/Ellipses.yml +0 -0
  180. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/EmDash.yml +0 -0
  181. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/Exclamation.yml +0 -0
  182. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/FirstPerson.yml +0 -0
  183. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/Gender.yml +0 -0
  184. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/GenderBias.yml +0 -0
  185. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/HeadingPunctuation.yml +0 -0
  186. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/Headings.yml +0 -0
  187. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/Latin.yml +0 -0
  188. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/LyHyphens.yml +0 -0
  189. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/OptionalPlurals.yml +0 -0
  190. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/Ordinal.yml +0 -0
  191. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/OxfordComma.yml +0 -0
  192. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/Parens.yml +0 -0
  193. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/Passive.yml +0 -0
  194. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/Periods.yml +0 -0
  195. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/Quotes.yml +0 -0
  196. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/Ranges.yml +0 -0
  197. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/Semicolons.yml +0 -0
  198. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/Slang.yml +0 -0
  199. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/Spacing.yml +0 -0
  200. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/Spelling.yml +0 -0
  201. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/Units.yml +0 -0
  202. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/We.yml +0 -0
  203. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/Will.yml +0 -0
  204. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/WordList.yml +0 -0
  205. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/meta.json +0 -0
  206. {ingestr-0.12.8 → ingestr-0.12.10}/styles/Google/vocab.txt +0 -0
  207. {ingestr-0.12.8 → ingestr-0.12.10}/styles/bruin/Ingestr.yml +0 -0
  208. {ingestr-0.12.8 → ingestr-0.12.10}/styles/config/vocabularies/bruin/accept.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.12.8
3
+ Version: 0.12.10
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -22,6 +22,7 @@ Requires-Dist: dlt==1.5.0
22
22
  Requires-Dist: duckdb-engine==0.13.5
23
23
  Requires-Dist: duckdb==1.1.3
24
24
  Requires-Dist: facebook-business==20.0.0
25
+ Requires-Dist: gcsfs==2024.10.0
25
26
  Requires-Dist: google-analytics-data==0.18.16
26
27
  Requires-Dist: google-api-python-client==2.130.0
27
28
  Requires-Dist: google-cloud-bigquery-storage==2.24.0
@@ -102,12 +102,14 @@ export default defineConfig({
102
102
  text: "Facebook Ads",
103
103
  link: "/supported-sources/facebook-ads.md",
104
104
  },
105
+ { text: "Google Cloud Storage (GCS)", link: "/supported-sources/gcs.md" },
105
106
  { text: "Google Analytics", link: "/supported-sources/google_analytics.md" },
106
107
  { text: "GitHub", link: "/supported-sources/github.md" },
107
108
  { text: "Google Sheets", link: "/supported-sources/gsheets.md" },
108
109
  { text: "Gorgias", link: "/supported-sources/gorgias.md" },
109
110
  { text: "HubSpot", link: "/supported-sources/hubspot.md" },
110
111
  { text: "Klaviyo", link: "/supported-sources/klaviyo.md" },
112
+ { text: "LinkedIn Ads", link: "/supported-sources/linkedin_ads.md" },
111
113
  { text: "Notion", link: "/supported-sources/notion.md" },
112
114
  { text: "S3", link: "/supported-sources/s3.md" },
113
115
  { text: "Shopify", link: "/supported-sources/shopify.md" },
@@ -0,0 +1,62 @@
1
+ # Google Cloud Storage
2
+
3
+ [Google Cloud Storage](https://cloud.google.com/storage?hl=en) is an online file storage web service for storing and accessing data on Google Cloud Platform infrastructure. The service combines the performance and scalability of Google's cloud with advanced security and sharing capabilities. It is an Infrastructure as a Service (IaaS), comparable to Amazon S3.
4
+
5
+ ## URI format
6
+
7
+ The URI format for Google Cloud Storage is as follows:
8
+
9
+ ```plaintext
10
+ gs://<bucket_name>?credentials_path=/path/to/service-account.json>
11
+ ```
12
+
13
+ URI parameters:
14
+
15
+ - `bucket_name`: The name of the bucket
16
+ - `credentials_path`: path to file containing your Google Cloud [Service Account](https://cloud.google.com/iam/docs/service-account-overview)
17
+
18
+ ## Setting up a GCS Integration
19
+
20
+ To use Google Cloud Storage source in `ingestr`, you will need:
21
+ * A Google Cloud Project.
22
+ * A Service Account with atleast [roles/storage.objectUser](https://cloud.google.com/storage/docs/access-control/iam-roles) IAM permission.
23
+ * A Service Account key file for the corresponding service account.
24
+
25
+ For more information on how to create a Service Account or it's keys, see [Create service accounts](https://cloud.google.com/iam/docs/service-accounts-create) and [Create or delete service account keys](https://cloud.google.com/iam/docs/keys-create-delete) on Google Cloud docs.
26
+
27
+ ## Example
28
+
29
+ Let's assume that:
30
+ * Service account key in available in the current directory, under the filename `service_account.json`.
31
+ * The bucket you want to load data from is called `my-org-bucket`
32
+ * The source file is available at `/data/latest/dump.csv`
33
+ * The data needs to be saved in a DuckDB database called `local.db`
34
+ * The destination table name will be `public.latest_dump`
35
+
36
+ You can run the following command line to achieve this:
37
+
38
+ ```sh
39
+ ingestr ingest \
40
+ --source-uri "gs://my-org-bucket?credentials_path=$PWD/service_account.json" \
41
+ --source-table "/data/latest/dump.csv" \
42
+ --dest-uri "duckdb:///local.db" \
43
+ --dest-table "public.latest_dump"
44
+ ```
45
+
46
+ ## Supported File Formats
47
+ `gs` source only supports loading files in the following formats:
48
+ * `csv`: Comma Separated Values (supports Tab Separated Values as well)
49
+ * `parquet`: [Apache Parquet](https://parquet.apache.org/) storage format.
50
+ * `jsonl`: Line delimited JSON. see [https://jsonlines.org/](https://jsonlines.org/)
51
+
52
+ ## File Pattern
53
+ `ingestr` supports [glob](https://en.wikipedia.org/wiki/Glob_(programming)) like pattern matching for `gs` source.
54
+ This allows for a powerful pattern matching mechanism that allows you to specify multiple files in a single `--source-table`.
55
+
56
+ Below are some examples of path patterns, each path pattern is a reference from the root of the bucket:
57
+
58
+ - `**/*.csv`: Retrieves all the CSV files, regardless of how deep they are within the folder structure.
59
+ - `*.csv`: Retrieves all the CSV files from the first level of a folder.
60
+ - `myFolder/**/*.jsonl`: Retrieves all the JSONL files from anywhere under `myFolder`.
61
+ - `myFolder/mySubFolder/users.parquet`: Retrieves the `users.parquet` file from `mySubFolder`.
62
+ - `employees.jsonl`: Retrieves the `employees.jsonl` file from the root level of the bucket.
@@ -0,0 +1,109 @@
1
+ # LinkedIn Ads
2
+ LinkedIn Ads is a platform that allows businesses and marketers to create, manage, and analyze advertising campaigns.
3
+
4
+ Ingestr supports LinkedIn Ads as a source.
5
+
6
+ ## URI format
7
+ The URI format for LinkedIn Ads as a source is as follows:
8
+
9
+ ```plaintext
10
+ linkedinads://?access_token=<access_token>&account_ids=<account_ids>
11
+ ```
12
+ ## URI parameters:
13
+ - `access_token`(required): It is used for authentication and is necessary to access data and reports through the LinkedIn Ads API. The access token lets your app access data using the permissions you set in the Developer App for your LinkedIn account.
14
+ - `account_ids`(required): The comma-separated list of Ad Account IDs specifies the LinkedIn Ad Accounts for which you want to retrieve data. These IDs uniquely identify the LinkedIn Ad Accounts associated with a company, business, or individual, depending on the ownership of the Ad Accounts. They are required to fetch data for campaigns, creatives, and other related resources.
15
+
16
+ LinkedIn Ads requires an `access_token` and `account_ids` to retrieve reports from the [LinkedIn Ads API](https://learn.microsoft.com/en-us/linkedin/marketing/integrations/ads-reporting/ads-reporting?view=li-lms-2024-11&tabs=http#analytics-finder). Please follow these steps to obtain the `access_token` and `account_ids`
17
+
18
+ ### Create a LinkedIn developer application to obtain an access token
19
+ 1. Log in to LinkedIn with a [developer account](https://www.linkedin.com/developers)
20
+ 2. Click Create App. Please fill out:
21
+ - App Name
22
+ - Your company's LinkedIn page
23
+ - Your company's privacy policy URL
24
+ - Your company logo
25
+ - Accept the terms and click "Create App"
26
+ 3. To verify your app:
27
+ - Go to "Settings" tab
28
+ - Find "App Settings"
29
+ - Click "Verify" under Company
30
+ - Click "Generate URL"
31
+ - Send this URL to your Page Admin
32
+ - Click "I'm done" and open the URL in a new tab to verify, if you are the admin of your page.
33
+ - Go to the "Products" tab, and click "Request access" for the Advertising API. It will take about a few minutes to get approved and then you need to fill out the form where you have to provide your company name, website, and other details.
34
+
35
+ #### Authorize your app and obtain access token
36
+ 1. Go to the "Auth" tab
37
+ 4. Click "OAuth 2.0 tools" which is on top right corner of the page [link](https://www.linkedin.com/developers/tools/oauth)
38
+ 5. Click "Create token"
39
+ 6. Choose these permissions:
40
+ - `r_ads`
41
+ - `r_ads_reporting`
42
+ 7. Click "Request access token"
43
+ 8. You will be redirected to an authorization page. Use your LinkedIn credentials to log in and authorize your app and obtain your Access Token and Refresh Token. Copy the Access Token.
44
+
45
+ > [!NOTE]
46
+ > Access tokens last for 2 months. After they expire, you'll need to make new ones using
47
+ > [LinkedIn's Token Generator](https://www.linkedin.com/developers/tools/oauth/token-generator).
48
+
49
+ To find the Ad Account IDs, the ad account owner can refer to the detailed instructions provided in this [guide](https://www.linkedin.com/help/linkedin/answer/a424270/find-linkedin-ads-account-details?lang=en).
50
+
51
+ ## Table: Custom Reports
52
+ Custom reports allow you to retrieve data based on specific dimensions and metrics.
53
+
54
+ Custom Table Format:
55
+ ```
56
+ custom:<dimensions>:<metrics>
57
+ ```
58
+ ### Parameters:
59
+ - `dimensions`(required): A comma-separated list of dimensions is required. It must include at least one of the following: `campaign`, `account`, or `creative`, along with one time-based dimension, either `date` or `month`.
60
+ - `date`: group the data in your report by day
61
+ - `month`: group the data in your report by month
62
+ - `metrics`(required): A comma-separated list of [metrics](https://learn.microsoft.com/en-us/linkedin/marketing/integrations/ads-reporting/ads-reporting?view=li-lms-2024-11&tabs=http#metrics-available) to retrieve.
63
+
64
+ > [!NOTE]
65
+ > By default, ingestr fetches data from January 1, 2018 to today's date. You can specify a custom date range using the `--interval-start` and `--interval-end` parameters.
66
+
67
+ ### Example
68
+
69
+ Retrieve data for campaign with `account_ids` id_123 and id_456:
70
+ ```sh
71
+ ingestr ingest \
72
+ --source-uri "linkedinads://?access_token=token_123&account_ids=id_123,id_456" \
73
+ --source-table 'custom:campaign,date:impressions,clicks' \
74
+ --dest-uri 'duckdb:///linkedin.duckdb' \
75
+ --dest-table 'dest.campaign'
76
+ ```
77
+
78
+ The applied parameters for the report are:
79
+ - dimensions: `campaign`, `date`
80
+ - metrics: `impressions`, `clicks`
81
+
82
+ Retrieve data for creative with `account_ids` id_123 and id_456 for the date range from 2024-10-15 to 2024-12-31:
83
+ ```sh
84
+ ingestr ingest \
85
+ --source-uri "linkedinads://?access_token=token_123&account_ids=id_123,id_456" \
86
+ --source-table 'custom:creative,month:impressions,shares,videoCompletions' \
87
+ --dest-uri 'duckdb:///linkedin.duckdb' \
88
+ --dest-table 'dest.creative'
89
+ --interval-start '2024-10-15'
90
+ --interval-end '2024-12-31'
91
+ ```
92
+ The applied parameters for the report are:
93
+ - dimensions: `creative`, `month`
94
+ - metrics: `shares`, `impressions`, `videoCompletions`
95
+
96
+ ```sh
97
+ ingestr ingest \
98
+ --source-uri "linkedinads://?access_token=token_123&account_ids=id_123,id_456" \
99
+ --source-table 'custom:account,month:totalEngagements,impressions,' \
100
+ --dest-uri 'duckdb:///linkedin.duckdb' \
101
+ --dest-table 'dest.account'
102
+ ```
103
+ The applied parameters for the report are:
104
+ - dimensions: `account`, `month`
105
+ - metrics: `totalEngagements`, `impressions`
106
+
107
+ This command will retrieve data and save it to the destination table in the DuckDB database.
108
+
109
+ <img alt="linkedin_ads_img" src="../media/linkedin_ads.png"/>
@@ -9,7 +9,7 @@ ingestr supports S3 as a source.
9
9
  The URI format for S3 is as follows:
10
10
 
11
11
  ```plaintext
12
- s3://<bucket_name>/<path_to_file>?access_key_id=<access_key_id>&secret_access_key=<secret_access_key>
12
+ s3://<bucket_name>?access_key_id=<access_key_id>&secret_access_key=<secret_access_key>
13
13
  ```
14
14
 
15
15
  URI parameters:
@@ -25,7 +25,11 @@ S3 requires an `access_key_id` and a `secret_access_key` to access the bucket. P
25
25
  For example, if your `access_key_id` is `AKC3YOW7E`, `secret_access_key` is `XCtkpL5B`, bucket name is `my_bucket`, and `path_to_files` is `students/students_details.csv`, here's a sample command that will copy the data from the S3 bucket into a DuckDB database:
26
26
 
27
27
  ```sh
28
- ingestr ingest --source-uri 's3://my_bucket/students/students_details.csv?access_key_id=AKC3YOW7E&secret_access_key=XCtkpL5B' --source-table 'students_details' --dest-uri duckdb:///s3.duckdb --dest-table 'dest.students_details'
28
+ ingestr ingest \
29
+ --source-uri 's3://my_bucket?access_key_id=AKC3YOW7E&secret_access_key=XCtkpL5B' \
30
+ --source-table '/students/students_details.csv' \
31
+ --dest-uri duckdb:///s3.duckdb \
32
+ --dest-table 'dest.students_details'
29
33
  ```
30
34
 
31
35
  The result of this command will be a table in the DuckDB database in the path `s3.duckdb`.
@@ -28,6 +28,10 @@ def app_store(
28
28
  start_date: Optional[datetime] = None,
29
29
  end_date: Optional[datetime] = None,
30
30
  ) -> Iterable[DltResource]:
31
+ if start_date and start_date.tzinfo is not None:
32
+ start_date = start_date.replace(tzinfo=None)
33
+ if end_date and end_date.tzinfo is not None:
34
+ end_date = end_date.replace(tzinfo=None)
31
35
  for resource in RESOURCES:
32
36
  yield dlt.resource(
33
37
  get_analytics_reports,
@@ -25,6 +25,7 @@ from ingestr.src.sources import (
25
25
  ChessSource,
26
26
  DynamoDBSource,
27
27
  FacebookAdsSource,
28
+ GCSSource,
28
29
  GitHubSource,
29
30
  GoogleAnalyticsSource,
30
31
  GoogleSheetsSource,
@@ -32,6 +33,7 @@ from ingestr.src.sources import (
32
33
  HubspotSource,
33
34
  KafkaSource,
34
35
  KlaviyoSource,
36
+ LinkedInAdsSource,
35
37
  LocalCsvSource,
36
38
  MongoDbSource,
37
39
  NotionSource,
@@ -124,6 +126,8 @@ class SourceDestinationFactory:
124
126
  "tiktok": TikTokSource,
125
127
  "googleanalytics": GoogleAnalyticsSource,
126
128
  "appstore": AppleAppStoreSource,
129
+ "gs": GCSSource,
130
+ "linkedinads": LinkedInAdsSource,
127
131
  }
128
132
  destinations: Dict[str, Type[DestinationProtocol]] = {
129
133
  "bigquery": BigQueryDestination,
@@ -39,8 +39,6 @@ def readers(
39
39
  filesystem_resource = filesystem(bucket_url, credentials, file_glob=file_glob)
40
40
  filesystem_resource.apply_hints(
41
41
  incremental=dlt.sources.incremental("modification_date"),
42
- range_end="closed",
43
- range_start="closed",
44
42
  )
45
43
  return (
46
44
  filesystem_resource | dlt.transformer(name="read_csv")(_read_csv),
@@ -0,0 +1,63 @@
1
+ from typing import Iterable
2
+
3
+ import dlt
4
+ import pendulum
5
+ from dlt.common.typing import TDataItem
6
+ from dlt.sources import DltResource
7
+ from pendulum import Date
8
+
9
+ from .dimension_time_enum import Dimension, TimeGranularity
10
+ from .helpers import LinkedInAdsAPI, find_intervals
11
+
12
+
13
+ @dlt.source(max_table_nesting=0)
14
+ def linked_in_ads_source(
15
+ start_date: Date,
16
+ end_date: Date | None,
17
+ access_token: str,
18
+ account_ids: list[str],
19
+ dimension: Dimension,
20
+ metrics: list[str],
21
+ time_granularity: TimeGranularity,
22
+ ) -> DltResource:
23
+ linkedin_api = LinkedInAdsAPI(
24
+ access_token=access_token,
25
+ account_ids=account_ids,
26
+ dimension=dimension,
27
+ metrics=metrics,
28
+ time_granularity=time_granularity,
29
+ )
30
+
31
+ if time_granularity == TimeGranularity.daily:
32
+ primary_key = [dimension.value, "date"]
33
+ incremental_loading_param = "date"
34
+ else:
35
+ primary_key = [dimension.value, "start_date", "end_date"]
36
+ incremental_loading_param = "start_date"
37
+
38
+ @dlt.resource(write_disposition="merge", primary_key=primary_key)
39
+ def custom_reports(
40
+ dateTime=(
41
+ dlt.sources.incremental(
42
+ incremental_loading_param,
43
+ initial_value=start_date,
44
+ end_value=end_date,
45
+ range_start="closed",
46
+ range_end="closed",
47
+ )
48
+ ),
49
+ ) -> Iterable[TDataItem]:
50
+ if dateTime.end_value is None:
51
+ end_date = pendulum.now().date()
52
+ else:
53
+ end_date = dateTime.end_value
54
+
55
+ list_of_interval = find_intervals(
56
+ start_date=dateTime.last_value,
57
+ end_date=end_date,
58
+ time_granularity=time_granularity,
59
+ )
60
+ for start, end in list_of_interval:
61
+ yield linkedin_api.fetch_pages(start, end)
62
+
63
+ return custom_reports
@@ -0,0 +1,12 @@
1
+ from enum import Enum
2
+
3
+
4
+ class Dimension(Enum):
5
+ campaign = "campaign"
6
+ creative = "creative"
7
+ account = "account"
8
+
9
+
10
+ class TimeGranularity(Enum):
11
+ daily = "DAILY"
12
+ monthly = "MONTHLY"
@@ -0,0 +1,148 @@
1
+ from urllib.parse import quote
2
+
3
+ import pendulum
4
+ import requests
5
+ from dlt.sources.helpers.requests import Client
6
+ from pendulum import Date
7
+
8
+ from .dimension_time_enum import Dimension, TimeGranularity
9
+
10
+
11
+ def retry_on_limit(
12
+ response: requests.Response | None, exception: BaseException | None
13
+ ) -> bool:
14
+ if response is None:
15
+ return False
16
+ return response.status_code == 429
17
+
18
+
19
+ def create_client() -> requests.Session:
20
+ return Client(
21
+ request_timeout=10.0,
22
+ raise_for_status=False,
23
+ retry_condition=retry_on_limit,
24
+ request_max_attempts=12,
25
+ ).session
26
+
27
+
28
+ def flat_structure(items, pivot: Dimension, time_granularity: TimeGranularity):
29
+ for item in items:
30
+ if "pivotValues" in item:
31
+ if len(item["pivotValues"]) > 1:
32
+ item[pivot.value.lower()] = item["pivotValues"]
33
+ else:
34
+ item[pivot.value.lower()] = item["pivotValues"][0]
35
+ if "dateRange" in item:
36
+ start_date = item["dateRange"]["start"]
37
+ start_dt = pendulum.date(
38
+ year=start_date["year"],
39
+ month=start_date["month"],
40
+ day=start_date["day"],
41
+ )
42
+ if time_granularity == TimeGranularity.daily:
43
+ item["date"] = start_dt
44
+ else:
45
+ end_date = item["dateRange"]["end"]
46
+ end_dt = pendulum.date(
47
+ year=end_date["year"],
48
+ month=end_date["month"],
49
+ day=end_date["day"],
50
+ )
51
+ item["start_date"] = start_dt
52
+ item["end_date"] = end_dt
53
+
54
+ del item["dateRange"]
55
+ del item["pivotValues"]
56
+
57
+ return items
58
+
59
+
60
+ def find_intervals(start_date: Date, end_date: Date, time_granularity: TimeGranularity):
61
+ intervals = []
62
+
63
+ if start_date > end_date:
64
+ raise ValueError("Start date must be less than end date")
65
+
66
+ while start_date <= end_date:
67
+ if time_granularity == TimeGranularity.daily:
68
+ next_date = min(start_date.add(months=6), end_date)
69
+ else:
70
+ next_date = min(start_date.add(years=2), end_date)
71
+
72
+ intervals.append((start_date, next_date))
73
+
74
+ start_date = next_date.add(days=1)
75
+
76
+ return intervals
77
+
78
+
79
+ def construct_url(
80
+ start: Date,
81
+ end: Date,
82
+ account_ids: list[str],
83
+ metrics: list[str],
84
+ dimension: Dimension,
85
+ time_granularity: TimeGranularity,
86
+ ):
87
+ date_range = f"(start:(year:{start.year},month:{start.month},day:{start.day})"
88
+ date_range += f",end:(year:{end.year},month:{end.month},day:{end.day}))"
89
+ accounts = ",".join(
90
+ [quote(f"urn:li:sponsoredAccount:{account_id}") for account_id in account_ids]
91
+ )
92
+ encoded_accounts = f"List({accounts})"
93
+ dimension_str = dimension.value.upper()
94
+ time_granularity_str = time_granularity.value
95
+ metrics_str = ",".join([metric for metric in metrics])
96
+
97
+ url = (
98
+ f"https://api.linkedin.com/rest/adAnalytics?"
99
+ f"q=analytics&timeGranularity={time_granularity_str}&"
100
+ f"dateRange={date_range}&accounts={encoded_accounts}&"
101
+ f"pivot={dimension_str}&fields={metrics_str}"
102
+ )
103
+
104
+ return url
105
+
106
+
107
+ class LinkedInAdsAPI:
108
+ def __init__(
109
+ self,
110
+ access_token,
111
+ time_granularity,
112
+ account_ids,
113
+ dimension,
114
+ metrics,
115
+ ):
116
+ self.time_granularity: TimeGranularity = time_granularity
117
+ self.account_ids: list[str] = account_ids
118
+ self.dimension: Dimension = dimension
119
+ self.metrics: list[str] = metrics
120
+ self.headers = {
121
+ "Authorization": f"Bearer {access_token}",
122
+ "Linkedin-Version": "202411",
123
+ "X-Restli-Protocol-Version": "2.0.0",
124
+ }
125
+
126
+ def fetch_pages(self, start: Date, end: Date):
127
+ client = create_client()
128
+ url = construct_url(
129
+ start=start,
130
+ end=end,
131
+ account_ids=self.account_ids,
132
+ metrics=self.metrics,
133
+ dimension=self.dimension,
134
+ time_granularity=self.time_granularity,
135
+ )
136
+ response = client.get(url=url, headers=self.headers)
137
+
138
+ if response.status_code != 200:
139
+ error_data = response.json()
140
+ raise ValueError(f"LinkedIn API Error: {error_data.get('message')}")
141
+
142
+ result = response.json()
143
+ items = result.get("elements", [])
144
+ yield flat_structure(
145
+ items=items,
146
+ pivot=self.dimension,
147
+ time_granularity=self.time_granularity,
148
+ )