ingestr 0.13.16__tar.gz → 0.13.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

Files changed (236) hide show
  1. {ingestr-0.13.16 → ingestr-0.13.18}/Makefile +8 -7
  2. {ingestr-0.13.16 → ingestr-0.13.18}/PKG-INFO +137 -6
  3. {ingestr-0.13.16 → ingestr-0.13.18}/README.md +5 -0
  4. {ingestr-0.13.16 → ingestr-0.13.18}/docs/.vitepress/config.mjs +1 -0
  5. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/clickhouse.md +0 -5
  6. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/hubspot.md +0 -1
  7. ingestr-0.13.18/docs/supported-sources/kinesis.md +39 -0
  8. ingestr-0.13.18/ingestr/src/buildinfo.py +1 -0
  9. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/factory.py +2 -0
  10. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/hubspot/__init__.py +0 -8
  11. ingestr-0.13.18/ingestr/src/kinesis/__init__.py +139 -0
  12. ingestr-0.13.18/ingestr/src/kinesis/helpers.py +65 -0
  13. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/sources.py +37 -0
  14. {ingestr-0.13.16 → ingestr-0.13.18}/pyproject.toml +0 -6
  15. ingestr-0.13.16/requirements.txt → ingestr-0.13.18/requirements.in +4 -0
  16. ingestr-0.13.18/requirements.txt +560 -0
  17. ingestr-0.13.16/ingestr/src/buildinfo.py +0 -1
  18. {ingestr-0.13.16 → ingestr-0.13.18}/.dockerignore +0 -0
  19. {ingestr-0.13.16 → ingestr-0.13.18}/.githooks/pre-commit-hook.sh +0 -0
  20. {ingestr-0.13.16 → ingestr-0.13.18}/.github/workflows/deploy-docs.yml +0 -0
  21. {ingestr-0.13.16 → ingestr-0.13.18}/.github/workflows/release.yml +0 -0
  22. {ingestr-0.13.16 → ingestr-0.13.18}/.github/workflows/secrets-scan.yml +0 -0
  23. {ingestr-0.13.16 → ingestr-0.13.18}/.github/workflows/tests.yml +0 -0
  24. {ingestr-0.13.16 → ingestr-0.13.18}/.gitignore +0 -0
  25. {ingestr-0.13.16 → ingestr-0.13.18}/.gitleaksignore +0 -0
  26. {ingestr-0.13.16 → ingestr-0.13.18}/.python-version +0 -0
  27. {ingestr-0.13.16 → ingestr-0.13.18}/.vale.ini +0 -0
  28. {ingestr-0.13.16 → ingestr-0.13.18}/Dockerfile +0 -0
  29. {ingestr-0.13.16 → ingestr-0.13.18}/LICENSE.md +0 -0
  30. {ingestr-0.13.16 → ingestr-0.13.18}/docs/.vitepress/theme/custom.css +0 -0
  31. {ingestr-0.13.16 → ingestr-0.13.18}/docs/.vitepress/theme/index.js +0 -0
  32. {ingestr-0.13.16 → ingestr-0.13.18}/docs/commands/example-uris.md +0 -0
  33. {ingestr-0.13.16 → ingestr-0.13.18}/docs/commands/ingest.md +0 -0
  34. {ingestr-0.13.16 → ingestr-0.13.18}/docs/getting-started/core-concepts.md +0 -0
  35. {ingestr-0.13.16 → ingestr-0.13.18}/docs/getting-started/incremental-loading.md +0 -0
  36. {ingestr-0.13.16 → ingestr-0.13.18}/docs/getting-started/quickstart.md +0 -0
  37. {ingestr-0.13.16 → ingestr-0.13.18}/docs/getting-started/telemetry.md +0 -0
  38. {ingestr-0.13.16 → ingestr-0.13.18}/docs/index.md +0 -0
  39. {ingestr-0.13.16 → ingestr-0.13.18}/docs/media/applovin_max.png +0 -0
  40. {ingestr-0.13.16 → ingestr-0.13.18}/docs/media/athena.png +0 -0
  41. {ingestr-0.13.16 → ingestr-0.13.18}/docs/media/clickhouse_img.png +0 -0
  42. {ingestr-0.13.16 → ingestr-0.13.18}/docs/media/github.png +0 -0
  43. {ingestr-0.13.16 → ingestr-0.13.18}/docs/media/googleanalytics.png +0 -0
  44. {ingestr-0.13.16 → ingestr-0.13.18}/docs/media/linkedin_ads.png +0 -0
  45. {ingestr-0.13.16 → ingestr-0.13.18}/docs/media/personio.png +0 -0
  46. {ingestr-0.13.16 → ingestr-0.13.18}/docs/media/tiktok.png +0 -0
  47. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/adjust.md +0 -0
  48. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/airtable.md +0 -0
  49. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/applovin.md +0 -0
  50. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/applovin_max.md +0 -0
  51. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/appsflyer.md +0 -0
  52. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/appstore.md +0 -0
  53. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/asana.md +0 -0
  54. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/athena.md +0 -0
  55. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/bigquery.md +0 -0
  56. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/chess.md +0 -0
  57. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/csv.md +0 -0
  58. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/custom_queries.md +0 -0
  59. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/databricks.md +0 -0
  60. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/duckdb.md +0 -0
  61. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/dynamodb.md +0 -0
  62. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/facebook-ads.md +0 -0
  63. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/gcs.md +0 -0
  64. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/github.md +0 -0
  65. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/google-ads.md +0 -0
  66. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/google_analytics.md +0 -0
  67. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/gorgias.md +0 -0
  68. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/gsheets.md +0 -0
  69. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/kafka.md +0 -0
  70. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/klaviyo.md +0 -0
  71. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/linkedin_ads.md +0 -0
  72. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/mongodb.md +0 -0
  73. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/mssql.md +0 -0
  74. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/mysql.md +0 -0
  75. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/notion.md +0 -0
  76. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/oracle.md +0 -0
  77. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/personio.md +0 -0
  78. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/postgres.md +0 -0
  79. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/redshift.md +0 -0
  80. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/s3.md +0 -0
  81. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/salesforce.md +0 -0
  82. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/sap-hana.md +0 -0
  83. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/shopify.md +0 -0
  84. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/slack.md +0 -0
  85. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/snowflake.md +0 -0
  86. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/sqlite.md +0 -0
  87. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/stripe.md +0 -0
  88. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/tiktok-ads.md +0 -0
  89. {ingestr-0.13.16 → ingestr-0.13.18}/docs/supported-sources/zendesk.md +0 -0
  90. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/main.py +0 -0
  91. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/.gitignore +0 -0
  92. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/adjust/__init__.py +0 -0
  93. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/adjust/adjust_helpers.py +0 -0
  94. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/airtable/__init__.py +0 -0
  95. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/applovin/__init__.py +0 -0
  96. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/applovin_max/__init__.py +0 -0
  97. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/appsflyer/_init_.py +0 -0
  98. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/appsflyer/client.py +0 -0
  99. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/appstore/__init__.py +0 -0
  100. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/appstore/client.py +0 -0
  101. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/appstore/errors.py +0 -0
  102. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/appstore/models.py +0 -0
  103. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/appstore/resources.py +0 -0
  104. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/arrow/__init__.py +0 -0
  105. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/asana_source/__init__.py +0 -0
  106. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/asana_source/helpers.py +0 -0
  107. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/asana_source/settings.py +0 -0
  108. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/blob.py +0 -0
  109. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/chess/__init__.py +0 -0
  110. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/chess/helpers.py +0 -0
  111. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/chess/settings.py +0 -0
  112. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/destinations.py +0 -0
  113. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/dynamodb/__init__.py +0 -0
  114. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/errors.py +0 -0
  115. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/facebook_ads/__init__.py +0 -0
  116. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/facebook_ads/exceptions.py +0 -0
  117. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/facebook_ads/helpers.py +0 -0
  118. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/facebook_ads/settings.py +0 -0
  119. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/filesystem/__init__.py +0 -0
  120. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/filesystem/helpers.py +0 -0
  121. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/filesystem/readers.py +0 -0
  122. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/filters.py +0 -0
  123. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/github/__init__.py +0 -0
  124. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/github/helpers.py +0 -0
  125. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/github/queries.py +0 -0
  126. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/github/settings.py +0 -0
  127. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/google_ads/__init__.py +0 -0
  128. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/google_ads/field.py +0 -0
  129. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/google_ads/metrics.py +0 -0
  130. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/google_ads/predicates.py +0 -0
  131. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/google_ads/reports.py +0 -0
  132. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/google_analytics/__init__.py +0 -0
  133. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/google_analytics/helpers.py +0 -0
  134. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/google_sheets/README.md +0 -0
  135. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/google_sheets/__init__.py +0 -0
  136. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
  137. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
  138. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
  139. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/gorgias/__init__.py +0 -0
  140. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/gorgias/helpers.py +0 -0
  141. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/hubspot/helpers.py +0 -0
  142. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/hubspot/settings.py +0 -0
  143. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/kafka/__init__.py +0 -0
  144. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/kafka/helpers.py +0 -0
  145. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/klaviyo/_init_.py +0 -0
  146. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/klaviyo/client.py +0 -0
  147. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/klaviyo/helpers.py +0 -0
  148. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/linkedin_ads/__init__.py +0 -0
  149. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/linkedin_ads/dimension_time_enum.py +0 -0
  150. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/linkedin_ads/helpers.py +0 -0
  151. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/loader.py +0 -0
  152. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/mongodb/__init__.py +0 -0
  153. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/mongodb/helpers.py +0 -0
  154. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/notion/__init__.py +0 -0
  155. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/notion/helpers/__init__.py +0 -0
  156. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/notion/helpers/client.py +0 -0
  157. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/notion/helpers/database.py +0 -0
  158. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/notion/settings.py +0 -0
  159. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/personio/__init__.py +0 -0
  160. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/personio/helpers.py +0 -0
  161. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/salesforce/__init__.py +0 -0
  162. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/salesforce/helpers.py +0 -0
  163. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/shopify/__init__.py +0 -0
  164. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/shopify/exceptions.py +0 -0
  165. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/shopify/helpers.py +0 -0
  166. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/shopify/settings.py +0 -0
  167. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/slack/__init__.py +0 -0
  168. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/slack/helpers.py +0 -0
  169. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/slack/settings.py +0 -0
  170. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/sql_database/__init__.py +0 -0
  171. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/sql_database/callbacks.py +0 -0
  172. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/stripe_analytics/__init__.py +0 -0
  173. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/stripe_analytics/helpers.py +0 -0
  174. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/stripe_analytics/settings.py +0 -0
  175. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/table_definition.py +0 -0
  176. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/telemetry/event.py +0 -0
  177. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/testdata/fakebqcredentials.json +0 -0
  178. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/tiktok_ads/__init__.py +0 -0
  179. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/tiktok_ads/tiktok_helpers.py +0 -0
  180. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/time.py +0 -0
  181. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/version.py +0 -0
  182. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/zendesk/__init__.py +0 -0
  183. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/zendesk/helpers/__init__.py +0 -0
  184. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
  185. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/zendesk/helpers/credentials.py +0 -0
  186. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
  187. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/src/zendesk/settings.py +0 -0
  188. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/testdata/.gitignore +0 -0
  189. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/testdata/create_replace.csv +0 -0
  190. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/testdata/delete_insert_expected.csv +0 -0
  191. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/testdata/delete_insert_part1.csv +0 -0
  192. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/testdata/delete_insert_part2.csv +0 -0
  193. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/testdata/merge_expected.csv +0 -0
  194. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/testdata/merge_part1.csv +0 -0
  195. {ingestr-0.13.16 → ingestr-0.13.18}/ingestr/testdata/merge_part2.csv +0 -0
  196. {ingestr-0.13.16 → ingestr-0.13.18}/package-lock.json +0 -0
  197. {ingestr-0.13.16 → ingestr-0.13.18}/package.json +0 -0
  198. {ingestr-0.13.16 → ingestr-0.13.18}/requirements-dev.txt +0 -0
  199. {ingestr-0.13.16 → ingestr-0.13.18}/resources/demo.gif +0 -0
  200. {ingestr-0.13.16 → ingestr-0.13.18}/resources/demo.tape +0 -0
  201. {ingestr-0.13.16 → ingestr-0.13.18}/resources/ingestr.svg +0 -0
  202. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/AMPM.yml +0 -0
  203. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/Acronyms.yml +0 -0
  204. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/Colons.yml +0 -0
  205. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/Contractions.yml +0 -0
  206. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/DateFormat.yml +0 -0
  207. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/Ellipses.yml +0 -0
  208. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/EmDash.yml +0 -0
  209. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/Exclamation.yml +0 -0
  210. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/FirstPerson.yml +0 -0
  211. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/Gender.yml +0 -0
  212. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/GenderBias.yml +0 -0
  213. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/HeadingPunctuation.yml +0 -0
  214. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/Headings.yml +0 -0
  215. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/Latin.yml +0 -0
  216. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/LyHyphens.yml +0 -0
  217. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/OptionalPlurals.yml +0 -0
  218. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/Ordinal.yml +0 -0
  219. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/OxfordComma.yml +0 -0
  220. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/Parens.yml +0 -0
  221. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/Passive.yml +0 -0
  222. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/Periods.yml +0 -0
  223. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/Quotes.yml +0 -0
  224. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/Ranges.yml +0 -0
  225. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/Semicolons.yml +0 -0
  226. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/Slang.yml +0 -0
  227. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/Spacing.yml +0 -0
  228. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/Spelling.yml +0 -0
  229. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/Units.yml +0 -0
  230. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/We.yml +0 -0
  231. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/Will.yml +0 -0
  232. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/WordList.yml +0 -0
  233. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/meta.json +0 -0
  234. {ingestr-0.13.16 → ingestr-0.13.18}/styles/Google/vocab.txt +0 -0
  235. {ingestr-0.13.16 → ingestr-0.13.18}/styles/bruin/Ingestr.yml +0 -0
  236. {ingestr-0.13.16 → ingestr-0.13.18}/styles/config/vocabularies/bruin/accept.txt +0 -0
@@ -11,28 +11,29 @@ venv/touchfile: requirements-dev.txt requirements.txt
11
11
  . venv/bin/activate; pip install uv; $(MAKE) deps
12
12
  touch venv/touchfile
13
13
 
14
- deps:
14
+ lock-deps:
15
+ @uv pip compile requirements.in --quiet -o requirements.txt
16
+
17
+ deps: lock-deps
15
18
  uv pip install -r requirements-dev.txt
16
19
 
17
20
  deps-ci:
18
21
  uv pip install --system -r requirements-dev.txt
19
- uv pip install --system '.[clickhouse]'
20
-
21
22
 
22
23
  test-ci:
23
24
  TESTCONTAINERS_RYUK_DISABLED=true pytest -n auto -x -rP -vv --tb=short --durations=10 --cov=ingestr --no-cov-on-fail
24
25
 
25
- test: venv
26
+ test : venv lock-deps
26
27
  . venv/bin/activate; $(MAKE) test-ci
27
28
 
28
- test-specific: venv
29
+ test-specific: venv lock-deps
29
30
  . venv/bin/activate; pytest -rP -vv --tb=short --capture=no -k $(test)
30
31
 
31
32
  lint-ci:
32
33
  ruff format ingestr && ruff check ingestr --fix
33
34
  mypy --config-file pyproject.toml --explicit-package-bases ingestr
34
35
 
35
- lint: venv
36
+ lint: venv lock-deps
36
37
  . venv/bin/activate; $(MAKE) lint-ci
37
38
 
38
39
  lint-docs:
@@ -40,7 +41,7 @@ lint-docs:
40
41
 
41
42
  tl: test lint
42
43
 
43
- build:
44
+ build: lock-deps
44
45
  cat > ${BUILDINFO} <<< "version = \"$$(git describe --tags --abbrev=0)\""
45
46
  rm -rf dist && python3 -m build
46
47
  rm -f ${BUILDINFO}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.13.16
3
+ Version: 0.13.18
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -14,52 +14,178 @@ Classifier: Operating System :: OS Independent
14
14
  Classifier: Programming Language :: Python :: 3
15
15
  Classifier: Topic :: Database
16
16
  Requires-Python: >=3.9
17
+ Requires-Dist: aiobotocore==2.21.1
18
+ Requires-Dist: aiohappyeyeballs==2.4.8
19
+ Requires-Dist: aiohttp==3.11.13
20
+ Requires-Dist: aioitertools==0.12.0
21
+ Requires-Dist: aiosignal==1.3.2
22
+ Requires-Dist: alembic==1.15.1
23
+ Requires-Dist: annotated-types==0.7.0
17
24
  Requires-Dist: asana==3.2.3
25
+ Requires-Dist: asn1crypto==1.5.1
26
+ Requires-Dist: asynch==0.2.4
27
+ Requires-Dist: attrs==25.1.0
28
+ Requires-Dist: backoff==2.2.1
29
+ Requires-Dist: beautifulsoup4==4.13.3
30
+ Requires-Dist: boto3==1.37.1
31
+ Requires-Dist: botocore==1.37.1
32
+ Requires-Dist: cachetools==5.5.2
33
+ Requires-Dist: certifi==2025.1.31
34
+ Requires-Dist: cffi==1.17.1
35
+ Requires-Dist: charset-normalizer==3.4.1
36
+ Requires-Dist: ciso8601==2.3.2
37
+ Requires-Dist: click==8.1.8
38
+ Requires-Dist: clickhouse-connect==0.8.14
39
+ Requires-Dist: clickhouse-driver==0.2.9
40
+ Requires-Dist: clickhouse-sqlalchemy==0.2.7
18
41
  Requires-Dist: confluent-kafka==2.8.0
42
+ Requires-Dist: cryptography==44.0.2
43
+ Requires-Dist: curlify==2.2.1
19
44
  Requires-Dist: databricks-sql-connector==2.9.3
20
45
  Requires-Dist: databricks-sqlalchemy==1.0.2
21
46
  Requires-Dist: dataclasses-json==0.6.7
47
+ Requires-Dist: decorator==5.2.1
48
+ Requires-Dist: deprecation==2.1.0
22
49
  Requires-Dist: dlt==1.6.1
50
+ Requires-Dist: dnspython==2.7.0
23
51
  Requires-Dist: duckdb-engine==0.15.0
24
52
  Requires-Dist: duckdb==1.2.0
53
+ Requires-Dist: et-xmlfile==2.0.0
25
54
  Requires-Dist: facebook-business==20.0.0
55
+ Requires-Dist: filelock==3.17.0
26
56
  Requires-Dist: flatten-json==0.1.14
57
+ Requires-Dist: frozenlist==1.5.0
58
+ Requires-Dist: fsspec==2024.10.0
27
59
  Requires-Dist: gcsfs==2024.10.0
60
+ Requires-Dist: gitdb==4.0.12
61
+ Requires-Dist: gitpython==3.1.44
62
+ Requires-Dist: giturlparse==0.12.0
28
63
  Requires-Dist: google-ads==25.1.0
29
64
  Requires-Dist: google-analytics-data==0.18.17
65
+ Requires-Dist: google-api-core==2.24.1
30
66
  Requires-Dist: google-api-python-client==2.130.0
67
+ Requires-Dist: google-auth-httplib2==0.2.0
68
+ Requires-Dist: google-auth-oauthlib==1.2.1
69
+ Requires-Dist: google-auth==2.38.0
31
70
  Requires-Dist: google-cloud-bigquery-storage==2.24.0
71
+ Requires-Dist: google-cloud-bigquery==3.30.0
72
+ Requires-Dist: google-cloud-core==2.4.2
73
+ Requires-Dist: google-cloud-storage==3.1.0
74
+ Requires-Dist: google-crc32c==1.6.0
75
+ Requires-Dist: google-resumable-media==2.7.2
76
+ Requires-Dist: googleapis-common-protos==1.69.0
77
+ Requires-Dist: greenlet==3.1.1
78
+ Requires-Dist: grpcio-status==1.62.3
79
+ Requires-Dist: grpcio==1.70.0
80
+ Requires-Dist: hdbcli==2.23.27
81
+ Requires-Dist: hexbytes==1.3.0
82
+ Requires-Dist: httplib2==0.22.0
83
+ Requires-Dist: humanize==4.12.1
84
+ Requires-Dist: idna==3.10
85
+ Requires-Dist: inflection==0.5.1
86
+ Requires-Dist: isodate==0.7.2
87
+ Requires-Dist: jmespath==1.0.1
88
+ Requires-Dist: jsonpath-ng==1.7.0
89
+ Requires-Dist: leb128==1.0.8
90
+ Requires-Dist: lxml==5.3.1
91
+ Requires-Dist: lz4==4.4.3
92
+ Requires-Dist: makefun==1.15.6
93
+ Requires-Dist: mako==1.3.9
94
+ Requires-Dist: markdown-it-py==3.0.0
95
+ Requires-Dist: markupsafe==3.0.2
96
+ Requires-Dist: marshmallow==3.26.1
97
+ Requires-Dist: mdurl==0.1.2
98
+ Requires-Dist: monotonic==1.6
99
+ Requires-Dist: more-itertools==10.6.0
100
+ Requires-Dist: multidict==6.1.0
101
+ Requires-Dist: mypy-extensions==1.0.0
32
102
  Requires-Dist: mysql-connector-python==9.2.0
103
+ Requires-Dist: numpy==2.2.3
104
+ Requires-Dist: oauthlib==3.2.2
105
+ Requires-Dist: openpyxl==3.1.5
106
+ Requires-Dist: orjson==3.10.15
107
+ Requires-Dist: packaging==24.2
108
+ Requires-Dist: pandas==2.2.3
109
+ Requires-Dist: pathvalidate==3.2.3
33
110
  Requires-Dist: pendulum==3.0.0
111
+ Requires-Dist: platformdirs==4.3.6
112
+ Requires-Dist: pluggy==1.5.0
113
+ Requires-Dist: ply==3.11
114
+ Requires-Dist: propcache==0.3.0
115
+ Requires-Dist: proto-plus==1.26.0
116
+ Requires-Dist: protobuf==4.25.6
34
117
  Requires-Dist: psutil==6.1.1
35
118
  Requires-Dist: psycopg2-binary==2.9.10
36
119
  Requires-Dist: py-machineid==0.6.0
37
120
  Requires-Dist: pyairtable==2.3.3
38
121
  Requires-Dist: pyarrow==18.1.0
122
+ Requires-Dist: pyasn1-modules==0.4.1
123
+ Requires-Dist: pyasn1==0.6.1
39
124
  Requires-Dist: pyathena==3.12.2
125
+ Requires-Dist: pycountry==24.6.1
126
+ Requires-Dist: pycparser==2.22
127
+ Requires-Dist: pydantic-core==2.27.2
128
+ Requires-Dist: pydantic==2.10.6
129
+ Requires-Dist: pygments==2.19.1
130
+ Requires-Dist: pyjwt==2.10.1
40
131
  Requires-Dist: pymongo==4.11.1
41
132
  Requires-Dist: pymysql==1.1.1
133
+ Requires-Dist: pyopenssl==25.0.0
134
+ Requires-Dist: pyparsing==3.2.1
42
135
  Requires-Dist: pyrate-limiter==3.7.0
136
+ Requires-Dist: python-dateutil==2.9.0.post0
137
+ Requires-Dist: python-dotenv==1.0.1
138
+ Requires-Dist: pytz==2025.1
139
+ Requires-Dist: pyyaml==6.0.2
43
140
  Requires-Dist: redshift-connector==2.1.5
141
+ Requires-Dist: requests-file==2.1.0
142
+ Requires-Dist: requests-oauthlib==1.3.1
143
+ Requires-Dist: requests-toolbelt==1.0.0
144
+ Requires-Dist: requests==2.32.3
145
+ Requires-Dist: requirements-parser==0.11.0
146
+ Requires-Dist: rich-argparse==1.7.0
44
147
  Requires-Dist: rich==13.9.4
148
+ Requires-Dist: rsa==4.9
45
149
  Requires-Dist: rudder-sdk-python==2.1.4
46
150
  Requires-Dist: s3fs==2024.10.0
151
+ Requires-Dist: s3transfer==0.11.3
152
+ Requires-Dist: scramp==1.4.5
153
+ Requires-Dist: semver==3.0.4
154
+ Requires-Dist: setuptools==75.8.2
155
+ Requires-Dist: shellingham==1.5.4
47
156
  Requires-Dist: simple-salesforce==1.12.6
157
+ Requires-Dist: simplejson==3.20.1
158
+ Requires-Dist: six==1.17.0
159
+ Requires-Dist: smmap==5.0.2
160
+ Requires-Dist: snowflake-connector-python==3.14.0
48
161
  Requires-Dist: snowflake-sqlalchemy==1.6.1
162
+ Requires-Dist: sortedcontainers==2.4.0
163
+ Requires-Dist: soupsieve==2.6
49
164
  Requires-Dist: sqlalchemy-bigquery==1.12.1
50
165
  Requires-Dist: sqlalchemy-hana==2.0.0
51
166
  Requires-Dist: sqlalchemy-redshift==0.8.14
52
167
  Requires-Dist: sqlalchemy2-stubs==0.0.2a38
53
168
  Requires-Dist: sqlalchemy==1.4.52
54
169
  Requires-Dist: stripe==10.7.0
170
+ Requires-Dist: tenacity==9.0.0
171
+ Requires-Dist: thrift==0.16.0
172
+ Requires-Dist: time-machine==2.16.0
173
+ Requires-Dist: tomlkit==0.13.2
55
174
  Requires-Dist: tqdm==4.67.1
56
175
  Requires-Dist: typer==0.13.1
57
176
  Requires-Dist: types-requests==2.32.0.20240907
58
- Provides-Extra: clickhouse
59
- Requires-Dist: clickhouse-connect==0.8.14; extra == 'clickhouse'
60
- Requires-Dist: clickhouse-driver==0.2.9; extra == 'clickhouse'
61
- Requires-Dist: clickhouse-sqlalchemy==0.2.7; extra == 'clickhouse'
62
- Requires-Dist: zstd==1.5.6.1; extra == 'clickhouse'
177
+ Requires-Dist: types-setuptools==75.8.2.20250305
178
+ Requires-Dist: typing-extensions==4.12.2
179
+ Requires-Dist: typing-inspect==0.9.0
180
+ Requires-Dist: tzdata==2025.1
181
+ Requires-Dist: tzlocal==5.3
182
+ Requires-Dist: uritemplate==4.1.1
183
+ Requires-Dist: urllib3==2.3.0
184
+ Requires-Dist: wrapt==1.17.2
185
+ Requires-Dist: yarl==1.18.3
186
+ Requires-Dist: zeep==4.3.1
187
+ Requires-Dist: zstandard==0.23.0
188
+ Requires-Dist: zstd==1.5.6.5
63
189
  Provides-Extra: odbc
64
190
  Requires-Dist: pyodbc==5.1.0; extra == 'odbc'
65
191
  Provides-Extra: oracle
@@ -231,6 +357,11 @@ Pull requests are welcome. However, please open an issue first to discuss what y
231
357
  <td>✅</td>
232
358
  <td>-</td>
233
359
  </tr>
360
+ <tr>
361
+ <td>Amazon Kinesis</td>
362
+ <td>✅</td>
363
+ <td>-</td>
364
+ </tr>
234
365
  <tr>
235
366
  <td>Apache Kafka</td>
236
367
  <td>✅</td>
@@ -163,6 +163,11 @@ Pull requests are welcome. However, please open an issue first to discuss what y
163
163
  <td>✅</td>
164
164
  <td>-</td>
165
165
  </tr>
166
+ <tr>
167
+ <td>Amazon Kinesis</td>
168
+ <td>✅</td>
169
+ <td>-</td>
170
+ </tr>
166
171
  <tr>
167
172
  <td>Apache Kafka</td>
168
173
  <td>✅</td>
@@ -95,6 +95,7 @@ export default defineConfig({
95
95
  items: [
96
96
  { text: "Adjust", link: "/supported-sources/adjust.md" },
97
97
  { text: "Airtable", link: "/supported-sources/airtable.md" },
98
+ { text: "Amazon Kinesis", link: "/supported-sources/kinesis.md" },
98
99
  { text: "AppsFlyer", link: "/supported-sources/appsflyer.md" },
99
100
  { text: "Apple App Store", link: "/supported-sources/appstore.md"},
100
101
  { text: "Applovin", link: "/supported-sources/applovin.md"},
@@ -2,11 +2,6 @@
2
2
  ClickHouse is a fast, open-source, column-oriented database management system that allows for high performance data ingestion and querying.
3
3
 
4
4
  ingestr supports ClickHouse as a source and destination.
5
- > [!INFO]
6
- > To use clickhouse in `ingestr` you need to install it's dependencies using:
7
- > ```
8
- > $ pip install ingestr[clickhouse]
9
- > ```
10
5
 
11
6
  ## URI format
12
7
  The URI format for ClickHouse as a source is as follows:
@@ -39,7 +39,6 @@ HubSpot source allows ingesting the following sources into separate tables:
39
39
  - `products`: Retrieves pricing information of products.
40
40
  - `tickets`: Handles requests for help from customers or users.
41
41
  - `quotes`: Retrieves price proposals that salespeople can create and send to their contacts.
42
- - `hubspot_events_for_objects`: Retrieves web analytics events for a given object type and object IDs.
43
42
  - `contacts`: Retrieves information about visitors, potential customers, and leads.
44
43
 
45
44
  Use these as `--source-table` parameter in the `ingestr ingest` command.
@@ -0,0 +1,39 @@
1
+ # Amazon Kinesis
2
+
3
+ [Amazon Kinesis](https://docs.aws.amazon.com/streams/latest/dev/key-concepts.html) is a
4
+ cloud-based service for real-time data streaming and analytics, enabling the processing and analysis of large streams of data in real time.
5
+
6
+ ingestr supports Kinesis as a source.
7
+
8
+ ## URI format
9
+ The URI format for Kinesis is as follows:
10
+
11
+ ```plaintext
12
+ kinesis://?aws_access_key_id=<aws-access-key-id>&aws_secret_access_key=<aws-secret-access-key>&region_name=<region-name>
13
+ ```
14
+
15
+ URI parameters:
16
+ - `aws_access_key_id`: the AWS access key ID used to authenticate the request
17
+ - `aws_secret_access_key`: the AWS secret access key used to authenticate the request
18
+ - `region_name`: the AWS region name where the stream is located
19
+
20
+
21
+
22
+ ## Setting up a Kinesis Integration
23
+ To get Kinesis credentials, please refer to the guide [here](https://dlthub.com/docs/dlt-ecosystem/verified-sources/amazon_kinesis#grab-credentials)
24
+
25
+ Once you complete the guide, you should have a aws_access_key_id, aws_secret_access_key and region_name. Let's say your `aws_access_key_id` is id_123, your `aws_secret_access_key` is secret_123 and your `region_name` is eu-central-1, here's a sample command that will copy the data from Kinesis into a DuckDB database:
26
+
27
+ ```bash
28
+ ingestr ingest --source-uri 'kinesis://?aws_access_key_id=id_123&aws_secret_access_key=secret_123&region_name=eu-central-1' \
29
+ --source-table 'stream_name_1' \
30
+ --dest-uri duckdb:///kinesis.duckdb \
31
+ --dest-table 'dest.results'
32
+ ```
33
+
34
+ When using Kinesis as a source, specify the `stream name` you want to read from as the `--source-table` parameter. For example, if you want to read from a Kinesis stream named "customer_events", you would use `--source-table 'customer_events'`.
35
+
36
+ ### Initial Load Configuration
37
+ By default, ingestr reads from the beginning of the Kinesis stream. To start reading from a specific time, use the `interval_start` parameter.
38
+
39
+
@@ -0,0 +1 @@
1
+ version = "v0.13.18"
@@ -36,6 +36,7 @@ from ingestr.src.sources import (
36
36
  GorgiasSource,
37
37
  HubspotSource,
38
38
  KafkaSource,
39
+ KinesisSource,
39
40
  KlaviyoSource,
40
41
  LinkedInAdsSource,
41
42
  LocalCsvSource,
@@ -141,6 +142,7 @@ class SourceDestinationFactory:
141
142
  "applovinmax": ApplovinMaxSource,
142
143
  "salesforce": SalesforceSource,
143
144
  "personio": PersonioSource,
145
+ "kinesis": KinesisSource,
144
146
  }
145
147
  destinations: Dict[str, Type[DestinationProtocol]] = {
146
148
  "bigquery": BigQueryDestination,
@@ -199,14 +199,6 @@ def crm_objects(
199
199
 
200
200
  props = ",".join(sorted(list(set(props))))
201
201
 
202
- if len(props) > 2000:
203
- raise ValueError(
204
- "Your request to Hubspot is too long to process. "
205
- "Maximum allowed query length is 2000 symbols, while "
206
- f"your list of properties `{props[:200]}`... is {len(props)} "
207
- "symbols long. Use the `props` argument of the resource to "
208
- "set the list of properties to extract from the endpoint."
209
- )
210
202
 
211
203
  params = {"properties": props, "limit": 100}
212
204
 
@@ -0,0 +1,139 @@
1
+ """Reads messages from Kinesis queue."""
2
+
3
+ from typing import Iterable, List, Optional
4
+
5
+ import dlt
6
+ from dlt.common import json, pendulum
7
+ from dlt.common.configuration.specs import AwsCredentials
8
+ from dlt.common.time import ensure_pendulum_datetime
9
+ from dlt.common.typing import StrStr, TAnyDateTime, TDataItem
10
+ from dlt.common.utils import digest128
11
+
12
+ from .helpers import get_shard_iterator, max_sequence_by_shard
13
+
14
+
15
+ @dlt.resource(
16
+ name=lambda args: args["stream_name"],
17
+ primary_key="kinesis_msg_id",
18
+ standalone=True,
19
+ )
20
+ def kinesis_stream(
21
+ stream_name: str,
22
+ initial_at_timestamp: TAnyDateTime,
23
+ credentials: AwsCredentials,
24
+ last_msg: Optional[dlt.sources.incremental[StrStr]] = dlt.sources.incremental(
25
+ "kinesis", last_value_func=max_sequence_by_shard
26
+ ),
27
+ max_number_of_messages: int = None, # type: ignore
28
+ milliseconds_behind_latest: int = 1000,
29
+ parse_json: bool = True,
30
+ chunk_size: int = 1000,
31
+ ) -> Iterable[TDataItem]:
32
+ """Reads a kinesis stream and yields messages. Supports incremental loading. Parses messages as json by default.
33
+
34
+ Args:
35
+ stream_name (str): The name of the stream to read from. If not provided, the
36
+ value must be present in config/secrets
37
+ credentials (AwsCredentials): The credentials to use to connect to kinesis. If not provided,
38
+ the value from secrets or credentials present on the device will be used.
39
+ last_msg (Optional[dlt.sources.incremental]): An incremental over a mapping from shard_id to message sequence
40
+ that will be used to create shard iterators of type AFTER_SEQUENCE_NUMBER when loading incrementally.
41
+ initial_at_timestamp (TAnyDateTime): An initial timestamp used to generate AT_TIMESTAMP or LATEST iterator when timestamp value is 0
42
+ max_number_of_messages (int): Maximum number of messages to read in one run. Actual read may exceed that number by up to chunk_size. Defaults to None (no limit).
43
+ milliseconds_behind_latest (int): The number of milliseconds behind the top of the shard to stop reading messages, defaults to 1000.
44
+ parse_json (bool): If True, assumes that messages are json strings, parses them and returns instead of `data` (otherwise). Defaults to False.
45
+ chunk_size (int): The number of records to fetch at once. Defaults to 1000.
46
+ Yields:
47
+ Iterable[TDataItem]: Messages. Contain Kinesis envelope in `kinesis` and bytes data in `data` (if `parse_json` disabled)
48
+
49
+ """
50
+ session = credentials._to_botocore_session()
51
+ # the default timeouts are (60, 60) which is fine
52
+ kinesis_client = session.create_client("kinesis")
53
+ # normalize at_timestamp to pendulum
54
+ initial_at_datetime = (
55
+ None
56
+ if initial_at_timestamp is None
57
+ else ensure_pendulum_datetime(initial_at_timestamp)
58
+ )
59
+ # set it in state
60
+ resource_state = dlt.current.resource_state()
61
+ initial_at_datetime = resource_state.get(
62
+ "initial_at_timestamp", initial_at_datetime
63
+ )
64
+ # so next time we request shards at AT_TIMESTAMP that is now
65
+ resource_state["initial_at_timestamp"] = pendulum.now("UTC").subtract(seconds=1)
66
+
67
+ shards_list = kinesis_client.list_shards(StreamName=stream_name)
68
+ shards: List[StrStr] = shards_list["Shards"]
69
+ while next_token := shards_list.get("NextToken"):
70
+ shards_list = kinesis_client.list_shards(NextToken=next_token)
71
+ shards.extend(shards_list)
72
+
73
+ shard_ids = [shard["ShardId"] for shard in shards]
74
+
75
+ # get next shard to fetch messages from
76
+ while shard_id := shard_ids.pop(0) if shard_ids else None:
77
+
78
+ shard_iterator, _ = get_shard_iterator(
79
+ kinesis_client,
80
+ stream_name,
81
+ shard_id,
82
+ last_msg, # type: ignore
83
+ initial_at_datetime, # type: ignore
84
+ )
85
+
86
+ while shard_iterator:
87
+ records = []
88
+ records_response = kinesis_client.get_records(
89
+ ShardIterator=shard_iterator,
90
+ Limit=chunk_size, # The size of data can be up to 1 MB, it must be controlled by the user
91
+ )
92
+
93
+ for record in records_response["Records"]:
94
+ sequence_number = record["SequenceNumber"]
95
+ content = record["Data"]
96
+
97
+ arrival_time = record["ApproximateArrivalTimestamp"]
98
+ arrival_timestamp = arrival_time.astimezone(pendulum.UTC)
99
+
100
+ message = {
101
+ "kinesis": {
102
+ "shard_id": shard_id,
103
+ "seq_no": sequence_number,
104
+ "ts": ensure_pendulum_datetime(arrival_timestamp),
105
+ "partition": record["PartitionKey"],
106
+ "stream_name": stream_name,
107
+ },
108
+ "kinesis_msg_id": digest128(shard_id + sequence_number),
109
+ }
110
+
111
+ if parse_json:
112
+ message.update(json.loadb(content))
113
+ else:
114
+ message["data"] = content
115
+ records.append(message)
116
+ yield records
117
+
118
+ # do not load more max_number_of_messages
119
+ if max_number_of_messages is not None:
120
+ max_number_of_messages -= len(records)
121
+ if max_number_of_messages <= 0:
122
+ return
123
+
124
+ # add child shards so we can request messages from them
125
+ child_shards = records_response.get("ChildShards", None)
126
+ if child_shards:
127
+ for child_shard in child_shards:
128
+ child_shard_id = child_shard["ShardId"]
129
+ if child_shard_id not in shards:
130
+ shard_ids.append(child_shard_id)
131
+
132
+ # gets 0 when no messages so we cutoff empty shards
133
+ records_ms_behind_latest = records_response.get("MillisBehindLatest", 0)
134
+ if records_ms_behind_latest < milliseconds_behind_latest:
135
+ # stop taking messages from shard
136
+ shard_iterator = None # type: ignore
137
+ else:
138
+ # continue taking messages
139
+ shard_iterator = records_response["NextShardIterator"]
@@ -0,0 +1,65 @@
1
+ from typing import Any, Sequence, Tuple
2
+
3
+ import dlt
4
+ from dlt.common import pendulum
5
+ from dlt.common.typing import DictStrAny, StrAny, StrStr
6
+
7
+
8
+ def get_shard_iterator(
9
+ kinesis_client: Any,
10
+ stream_name: str,
11
+ shard_id: str,
12
+ last_msg: dlt.sources.incremental[StrStr],
13
+ initial_at_timestamp: pendulum.DateTime | None,
14
+ ) -> Tuple[str, StrAny]:
15
+ """Gets shard `shard_id` of `stream_name` iterator. If `last_msg` incremental is present it may
16
+ contain last message sequence for shard_id. in that case AFTER_SEQUENCE_NUMBER is created.
17
+ If no message sequence is present, `initial_at_timestamp` is used for AT_TIMESTAMP or LATEST.
18
+ The final fallback is TRIM_HORIZON
19
+ """
20
+ sequence_state = (
21
+ {} if last_msg is None else last_msg.last_value or last_msg.initial_value or {}
22
+ )
23
+ iterator_params: DictStrAny
24
+ msg_sequence = sequence_state.get(shard_id, None)
25
+ if msg_sequence:
26
+ iterator_params = dict(
27
+ ShardIteratorType="AFTER_SEQUENCE_NUMBER",
28
+ StartingSequenceNumber=msg_sequence,
29
+ )
30
+ elif initial_at_timestamp is None:
31
+ # Fetch all records from the beginning
32
+ iterator_params = dict(ShardIteratorType="TRIM_HORIZON")
33
+
34
+ elif initial_at_timestamp.timestamp() == 0.0:
35
+ # will sets to latest i.e only the messages at the tip of the stream are read
36
+ iterator_params = dict(ShardIteratorType="LATEST")
37
+ else:
38
+ iterator_params = dict(
39
+ ShardIteratorType="AT_TIMESTAMP", Timestamp=initial_at_timestamp.timestamp()
40
+ )
41
+
42
+ shard_iterator: StrStr = kinesis_client.get_shard_iterator(
43
+ StreamName=stream_name, ShardId=shard_id, **iterator_params
44
+ )
45
+ return shard_iterator["ShardIterator"], iterator_params
46
+
47
+
48
+ def max_sequence_by_shard(values: Sequence[StrStr]) -> StrStr:
49
+ """A last_value_function that operates on mapping of shard_id:msg_sequence defining the max"""
50
+ last_value = None
51
+ # if tuple/list contains only one element then return it
52
+ if len(values) == 1:
53
+ item = values[0]
54
+ else:
55
+ # item is kinesis metadata, last_value is previous state of the shards
56
+ item, last_value = values
57
+
58
+ if last_value is None:
59
+ last_value = {}
60
+ else:
61
+ last_value = dict(last_value) # always make a copy
62
+ shard_id = item["shard_id"]
63
+ # we compare message sequence at shard_id
64
+ last_value[shard_id] = max(item["seq_no"], last_value.get(shard_id, ""))
65
+ return last_value
@@ -75,6 +75,7 @@ from ingestr.src.gorgias import gorgias_source
75
75
  from ingestr.src.hubspot import hubspot
76
76
  from ingestr.src.kafka import kafka_consumer
77
77
  from ingestr.src.kafka.helpers import KafkaCredentials
78
+ from ingestr.src.kinesis import kinesis_stream
78
79
  from ingestr.src.klaviyo._init_ import klaviyo_source
79
80
  from ingestr.src.linkedin_ads import linked_in_ads_source
80
81
  from ingestr.src.linkedin_ads.dimension_time_enum import (
@@ -1969,3 +1970,39 @@ class PersonioSource:
1969
1970
  start_date=interval_start_date,
1970
1971
  end_date=interval_end_date,
1971
1972
  ).with_resources(table)
1973
+
1974
+
1975
+ class KinesisSource:
1976
+ def handles_incrementality(self) -> bool:
1977
+ return True
1978
+
1979
+ def dlt_source(self, uri: str, table: str, **kwargs):
1980
+ # kinesis://?aws_access_key_id=<AccessKeyId>&aws_secret_access_key=<SecretAccessKey>&region_name=<Region>
1981
+ # source table = stream name
1982
+ parsed_uri = urlparse(uri)
1983
+ params = parse_qs(parsed_uri.query)
1984
+
1985
+ aws_access_key_id = params.get("aws_access_key_id")
1986
+ if aws_access_key_id is None:
1987
+ raise MissingValueError("aws_access_key_id", "Kinesis")
1988
+
1989
+ aws_secret_access_key = params.get("aws_secret_access_key")
1990
+ if aws_secret_access_key is None:
1991
+ raise MissingValueError("aws_secret_access_key", "Kinesis")
1992
+
1993
+ region_name = params.get("region_name")
1994
+ if region_name is None:
1995
+ raise MissingValueError("region_name", "Kinesis")
1996
+
1997
+ start_date = kwargs.get("interval_start")
1998
+ if start_date is not None:
1999
+ # the resource will read all messages after this timestamp.
2000
+ start_date = ensure_pendulum_datetime(start_date)
2001
+ credentials = AwsCredentials(
2002
+ aws_access_key_id=aws_access_key_id[0],
2003
+ aws_secret_access_key=aws_secret_access_key[0],
2004
+ region_name=region_name[0],
2005
+ )
2006
+ return kinesis_stream(
2007
+ stream_name=table, credentials=credentials, initial_at_timestamp=start_date
2008
+ )
@@ -148,12 +148,6 @@ oracle = [
148
148
  odbc = [
149
149
  "pyodbc==5.1.0",
150
150
  ]
151
- clickhouse = [
152
- "clickhouse-connect==0.8.14",
153
- "clickhouse-driver==0.2.9",
154
- "clickhouse-sqlalchemy==0.2.7",
155
- "zstd==1.5.6.1",
156
- ]
157
151
 
158
152
  [project.urls]
159
153
  Homepage = "https://github.com/bruin-data/ingestr"
@@ -43,3 +43,7 @@ dataclasses-json==0.6.7
43
43
  gcsfs==2024.10.0
44
44
  simple-salesforce==1.12.6
45
45
  databricks-sqlalchemy==1.0.2
46
+ clickhouse-connect==0.8.14
47
+ clickhouse-driver==0.2.9
48
+ clickhouse-sqlalchemy==0.2.7
49
+ zstd==1.5.6.5