ingestr 0.13.78__tar.gz → 0.13.79__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

Files changed (323) hide show
  1. {ingestr-0.13.78 → ingestr-0.13.79}/.gitignore +2 -1
  2. {ingestr-0.13.78 → ingestr-0.13.79}/PKG-INFO +6 -1
  3. {ingestr-0.13.78 → ingestr-0.13.79}/README.md +5 -0
  4. {ingestr-0.13.78 → ingestr-0.13.79}/docs/.vitepress/config.mjs +1 -0
  5. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/attio.md +4 -1
  6. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/hubspot.md +2 -17
  7. ingestr-0.13.79/docs/supported-sources/mongodb.md +150 -0
  8. ingestr-0.13.79/docs/supported-sources/motherduck.md +46 -0
  9. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/main.py +10 -3
  10. ingestr-0.13.79/ingestr/src/buildinfo.py +1 -0
  11. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/destinations.py +18 -0
  12. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/facebook_ads/__init__.py +0 -1
  13. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/factory.py +5 -0
  14. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/freshdesk/__init__.py +23 -8
  15. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/freshdesk/freshdesk_client.py +16 -5
  16. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/github/__init__.py +5 -3
  17. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/github/helpers.py +1 -0
  18. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/influxdb/__init__.py +1 -0
  19. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/mongodb/__init__.py +3 -0
  20. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/mongodb/helpers.py +178 -11
  21. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/sources.py +203 -24
  22. {ingestr-0.13.78 → ingestr-0.13.79}/pyproject.toml +3 -1
  23. ingestr-0.13.78/docs/supported-sources/mongodb.md +0 -24
  24. ingestr-0.13.78/ingestr/src/buildinfo.py +0 -1
  25. {ingestr-0.13.78 → ingestr-0.13.79}/.dlt/config.toml +0 -0
  26. {ingestr-0.13.78 → ingestr-0.13.79}/.dockerignore +0 -0
  27. {ingestr-0.13.78 → ingestr-0.13.79}/.githooks/pre-commit-hook.sh +0 -0
  28. {ingestr-0.13.78 → ingestr-0.13.79}/.github/workflows/deploy-docs.yml +0 -0
  29. {ingestr-0.13.78 → ingestr-0.13.79}/.github/workflows/release.yml +0 -0
  30. {ingestr-0.13.78 → ingestr-0.13.79}/.github/workflows/secrets-scan.yml +0 -0
  31. {ingestr-0.13.78 → ingestr-0.13.79}/.github/workflows/tests.yml +0 -0
  32. {ingestr-0.13.78 → ingestr-0.13.79}/.gitleaksignore +0 -0
  33. {ingestr-0.13.78 → ingestr-0.13.79}/.python-version +0 -0
  34. {ingestr-0.13.78 → ingestr-0.13.79}/.vale.ini +0 -0
  35. {ingestr-0.13.78 → ingestr-0.13.79}/Dockerfile +0 -0
  36. {ingestr-0.13.78 → ingestr-0.13.79}/LICENSE.md +0 -0
  37. {ingestr-0.13.78 → ingestr-0.13.79}/Makefile +0 -0
  38. {ingestr-0.13.78 → ingestr-0.13.79}/docs/.vitepress/theme/custom.css +0 -0
  39. {ingestr-0.13.78 → ingestr-0.13.79}/docs/.vitepress/theme/index.js +0 -0
  40. {ingestr-0.13.78 → ingestr-0.13.79}/docs/commands/example-uris.md +0 -0
  41. {ingestr-0.13.78 → ingestr-0.13.79}/docs/commands/ingest.md +0 -0
  42. {ingestr-0.13.78 → ingestr-0.13.79}/docs/getting-started/core-concepts.md +0 -0
  43. {ingestr-0.13.78 → ingestr-0.13.79}/docs/getting-started/incremental-loading.md +0 -0
  44. {ingestr-0.13.78 → ingestr-0.13.79}/docs/getting-started/quickstart.md +0 -0
  45. {ingestr-0.13.78 → ingestr-0.13.79}/docs/getting-started/telemetry.md +0 -0
  46. {ingestr-0.13.78 → ingestr-0.13.79}/docs/index.md +0 -0
  47. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/applovin_max.png +0 -0
  48. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/athena.png +0 -0
  49. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/clickhouse_img.png +0 -0
  50. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/clickup_ingestion.png +0 -0
  51. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/cratedb-destination.png +0 -0
  52. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/cratedb-source.png +0 -0
  53. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/freshdesk_ingestion.png +0 -0
  54. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/gcp_spanner_ingestion.png +0 -0
  55. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/github.png +0 -0
  56. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/google_analytics_realtime_report.png +0 -0
  57. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/googleanalytics.png +0 -0
  58. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/ingestion_elasticsearch_img.png +0 -0
  59. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/kinesis.bigquery.png +0 -0
  60. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/linear.png +0 -0
  61. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/linkedin_ads.png +0 -0
  62. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/mixpanel_ingestion.png +0 -0
  63. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/personio.png +0 -0
  64. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/personio_duckdb.png +0 -0
  65. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/phantombuster.png +0 -0
  66. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/pipedrive.png +0 -0
  67. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/quickbook_ingestion.png +0 -0
  68. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/sftp.png +0 -0
  69. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/stripe_postgres.png +0 -0
  70. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/tiktok.png +0 -0
  71. {ingestr-0.13.78 → ingestr-0.13.79}/docs/media/zoom_ingestion.png +0 -0
  72. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/adjust.md +0 -0
  73. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/airtable.md +0 -0
  74. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/applovin.md +0 -0
  75. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/applovin_max.md +0 -0
  76. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/appsflyer.md +0 -0
  77. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/appstore.md +0 -0
  78. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/asana.md +0 -0
  79. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/athena.md +0 -0
  80. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/bigquery.md +0 -0
  81. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/chess.md +0 -0
  82. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/clickhouse.md +0 -0
  83. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/clickup.md +0 -0
  84. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/cratedb.md +0 -0
  85. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/csv.md +0 -0
  86. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/custom_queries.md +0 -0
  87. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/databricks.md +0 -0
  88. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/db2.md +0 -0
  89. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/duckdb.md +0 -0
  90. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/dynamodb.md +0 -0
  91. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/elasticsearch.md +0 -0
  92. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/facebook-ads.md +0 -0
  93. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/frankfurter.md +0 -0
  94. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/freshdesk.md +0 -0
  95. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/gcs.md +0 -0
  96. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/github.md +0 -0
  97. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/google-ads.md +0 -0
  98. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/google_analytics.md +0 -0
  99. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/gorgias.md +0 -0
  100. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/gsheets.md +0 -0
  101. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/influxdb.md +0 -0
  102. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/isoc-pulse.md +0 -0
  103. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/kafka.md +0 -0
  104. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/kinesis.md +0 -0
  105. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/klaviyo.md +0 -0
  106. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/linear.md +0 -0
  107. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/linkedin_ads.md +0 -0
  108. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/mixpanel.md +0 -0
  109. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/mssql.md +0 -0
  110. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/mysql.md +0 -0
  111. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/notion.md +0 -0
  112. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/oracle.md +0 -0
  113. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/personio.md +0 -0
  114. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/phantombuster.md +0 -0
  115. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/pinterest.md +0 -0
  116. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/pipedrive.md +0 -0
  117. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/postgres.md +0 -0
  118. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/quickbooks.md +0 -0
  119. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/redshift.md +0 -0
  120. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/s3.md +0 -0
  121. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/salesforce.md +0 -0
  122. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/sap-hana.md +0 -0
  123. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/sftp.md +0 -0
  124. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/shopify.md +0 -0
  125. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/slack.md +0 -0
  126. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/smartsheets.md +0 -0
  127. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/snowflake.md +0 -0
  128. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/solidgate.md +0 -0
  129. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/spanner.md +0 -0
  130. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/sqlite.md +0 -0
  131. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/stripe.md +0 -0
  132. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/tiktok-ads.md +0 -0
  133. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/trustpilot.md +0 -0
  134. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/zendesk.md +0 -0
  135. {ingestr-0.13.78 → ingestr-0.13.79}/docs/supported-sources/zoom.md +0 -0
  136. {ingestr-0.13.78 → ingestr-0.13.79}/docs/tutorials/load-kinesis-bigquery.md +0 -0
  137. {ingestr-0.13.78 → ingestr-0.13.79}/docs/tutorials/load-personio-duckdb.md +0 -0
  138. {ingestr-0.13.78 → ingestr-0.13.79}/docs/tutorials/load-stripe-postgres.md +0 -0
  139. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/conftest.py +0 -0
  140. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/.gitignore +0 -0
  141. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/adjust/__init__.py +0 -0
  142. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/adjust/adjust_helpers.py +0 -0
  143. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/airtable/__init__.py +0 -0
  144. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/applovin/__init__.py +0 -0
  145. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/applovin_max/__init__.py +0 -0
  146. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/appsflyer/__init__.py +0 -0
  147. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/appsflyer/client.py +0 -0
  148. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/appstore/__init__.py +0 -0
  149. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/appstore/client.py +0 -0
  150. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/appstore/errors.py +0 -0
  151. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/appstore/models.py +0 -0
  152. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/appstore/resources.py +0 -0
  153. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/arrow/__init__.py +0 -0
  154. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/asana_source/__init__.py +0 -0
  155. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/asana_source/helpers.py +0 -0
  156. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/asana_source/settings.py +0 -0
  157. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/attio/__init__.py +0 -0
  158. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/attio/helpers.py +0 -0
  159. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/blob.py +0 -0
  160. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/chess/__init__.py +0 -0
  161. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/chess/helpers.py +0 -0
  162. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/chess/settings.py +0 -0
  163. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/clickup/__init__.py +0 -0
  164. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/clickup/helpers.py +0 -0
  165. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/collector/spinner.py +0 -0
  166. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/dynamodb/__init__.py +0 -0
  167. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/elasticsearch/__init__.py +0 -0
  168. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/errors.py +0 -0
  169. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/facebook_ads/exceptions.py +0 -0
  170. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/facebook_ads/helpers.py +0 -0
  171. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/facebook_ads/settings.py +0 -0
  172. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/facebook_ads/utils.py +0 -0
  173. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/filesystem/__init__.py +0 -0
  174. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/filesystem/helpers.py +0 -0
  175. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/filesystem/readers.py +0 -0
  176. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/filters.py +0 -0
  177. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/frankfurter/__init__.py +0 -0
  178. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/frankfurter/helpers.py +0 -0
  179. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/freshdesk/settings.py +0 -0
  180. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/github/queries.py +0 -0
  181. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/github/settings.py +0 -0
  182. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/google_ads/__init__.py +0 -0
  183. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/google_ads/field.py +0 -0
  184. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/google_ads/metrics.py +0 -0
  185. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/google_ads/predicates.py +0 -0
  186. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/google_ads/reports.py +0 -0
  187. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/google_analytics/__init__.py +0 -0
  188. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/google_analytics/helpers.py +0 -0
  189. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/google_sheets/README.md +0 -0
  190. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/google_sheets/__init__.py +0 -0
  191. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
  192. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
  193. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
  194. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/gorgias/__init__.py +0 -0
  195. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/gorgias/helpers.py +0 -0
  196. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/http_client.py +0 -0
  197. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/hubspot/__init__.py +0 -0
  198. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/hubspot/helpers.py +0 -0
  199. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/hubspot/settings.py +0 -0
  200. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/influxdb/client.py +0 -0
  201. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/isoc_pulse/__init__.py +0 -0
  202. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/kafka/__init__.py +0 -0
  203. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/kafka/helpers.py +0 -0
  204. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/kinesis/__init__.py +0 -0
  205. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/kinesis/helpers.py +0 -0
  206. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/klaviyo/__init__.py +0 -0
  207. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/klaviyo/client.py +0 -0
  208. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/klaviyo/helpers.py +0 -0
  209. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/linear/__init__.py +0 -0
  210. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/linear/helpers.py +0 -0
  211. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/linkedin_ads/__init__.py +0 -0
  212. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/linkedin_ads/dimension_time_enum.py +0 -0
  213. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/linkedin_ads/helpers.py +0 -0
  214. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/loader.py +0 -0
  215. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/mixpanel/__init__.py +0 -0
  216. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/mixpanel/client.py +0 -0
  217. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/notion/__init__.py +0 -0
  218. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/notion/helpers/__init__.py +0 -0
  219. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/notion/helpers/client.py +0 -0
  220. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/notion/helpers/database.py +0 -0
  221. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/notion/settings.py +0 -0
  222. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/partition.py +0 -0
  223. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/personio/__init__.py +0 -0
  224. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/personio/helpers.py +0 -0
  225. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/phantombuster/__init__.py +0 -0
  226. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/phantombuster/client.py +0 -0
  227. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/pinterest/__init__.py +0 -0
  228. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/pipedrive/__init__.py +0 -0
  229. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/pipedrive/helpers/__init__.py +0 -0
  230. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/pipedrive/helpers/custom_fields_munger.py +0 -0
  231. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/pipedrive/helpers/pages.py +0 -0
  232. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/pipedrive/settings.py +0 -0
  233. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/pipedrive/typing.py +0 -0
  234. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/quickbooks/__init__.py +0 -0
  235. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/resource.py +0 -0
  236. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/salesforce/__init__.py +0 -0
  237. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/salesforce/helpers.py +0 -0
  238. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/shopify/__init__.py +0 -0
  239. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/shopify/exceptions.py +0 -0
  240. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/shopify/helpers.py +0 -0
  241. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/shopify/settings.py +0 -0
  242. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/slack/__init__.py +0 -0
  243. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/slack/helpers.py +0 -0
  244. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/slack/settings.py +0 -0
  245. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/smartsheets/__init__.py +0 -0
  246. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/solidgate/__init__.py +0 -0
  247. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/solidgate/helpers.py +0 -0
  248. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/sql_database/__init__.py +0 -0
  249. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/sql_database/callbacks.py +0 -0
  250. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/stripe_analytics/__init__.py +0 -0
  251. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/stripe_analytics/helpers.py +0 -0
  252. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/stripe_analytics/settings.py +0 -0
  253. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/table_definition.py +0 -0
  254. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/telemetry/event.py +0 -0
  255. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/testdata/fakebqcredentials.json +0 -0
  256. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/tiktok_ads/__init__.py +0 -0
  257. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/tiktok_ads/tiktok_helpers.py +0 -0
  258. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/time.py +0 -0
  259. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/trustpilot/__init__.py +0 -0
  260. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/trustpilot/client.py +0 -0
  261. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/version.py +0 -0
  262. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/zendesk/__init__.py +0 -0
  263. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/zendesk/helpers/__init__.py +0 -0
  264. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
  265. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/zendesk/helpers/credentials.py +0 -0
  266. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
  267. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/zendesk/settings.py +0 -0
  268. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/zoom/__init__.py +0 -0
  269. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/src/zoom/helpers.py +0 -0
  270. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/testdata/.gitignore +0 -0
  271. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/testdata/create_replace.csv +0 -0
  272. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/testdata/delete_insert_expected.csv +0 -0
  273. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/testdata/delete_insert_part1.csv +0 -0
  274. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/testdata/delete_insert_part2.csv +0 -0
  275. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/testdata/merge_expected.csv +0 -0
  276. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/testdata/merge_part1.csv +0 -0
  277. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/testdata/merge_part2.csv +0 -0
  278. {ingestr-0.13.78 → ingestr-0.13.79}/ingestr/tests/unit/test_smartsheets.py +0 -0
  279. {ingestr-0.13.78 → ingestr-0.13.79}/package-lock.json +0 -0
  280. {ingestr-0.13.78 → ingestr-0.13.79}/package.json +0 -0
  281. {ingestr-0.13.78 → ingestr-0.13.79}/requirements-dev.txt +0 -0
  282. {ingestr-0.13.78 → ingestr-0.13.79}/requirements.in +0 -0
  283. {ingestr-0.13.78 → ingestr-0.13.79}/requirements.txt +0 -0
  284. {ingestr-0.13.78 → ingestr-0.13.79}/requirements_arm64.txt +0 -0
  285. {ingestr-0.13.78 → ingestr-0.13.79}/resources/demo.gif +0 -0
  286. {ingestr-0.13.78 → ingestr-0.13.79}/resources/demo.tape +0 -0
  287. {ingestr-0.13.78 → ingestr-0.13.79}/resources/ingestr.svg +0 -0
  288. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/AMPM.yml +0 -0
  289. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/Acronyms.yml +0 -0
  290. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/Colons.yml +0 -0
  291. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/Contractions.yml +0 -0
  292. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/DateFormat.yml +0 -0
  293. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/Ellipses.yml +0 -0
  294. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/EmDash.yml +0 -0
  295. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/Exclamation.yml +0 -0
  296. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/FirstPerson.yml +0 -0
  297. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/Gender.yml +0 -0
  298. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/GenderBias.yml +0 -0
  299. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/HeadingPunctuation.yml +0 -0
  300. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/Headings.yml +0 -0
  301. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/Latin.yml +0 -0
  302. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/LyHyphens.yml +0 -0
  303. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/OptionalPlurals.yml +0 -0
  304. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/Ordinal.yml +0 -0
  305. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/OxfordComma.yml +0 -0
  306. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/Parens.yml +0 -0
  307. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/Passive.yml +0 -0
  308. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/Periods.yml +0 -0
  309. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/Quotes.yml +0 -0
  310. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/Ranges.yml +0 -0
  311. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/Semicolons.yml +0 -0
  312. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/Slang.yml +0 -0
  313. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/Spacing.yml +0 -0
  314. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/Spelling.yml +0 -0
  315. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/Units.yml +0 -0
  316. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/We.yml +0 -0
  317. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/Will.yml +0 -0
  318. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/WordList.yml +0 -0
  319. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/meta.json +0 -0
  320. {ingestr-0.13.78 → ingestr-0.13.79}/styles/Google/vocab.txt +0 -0
  321. {ingestr-0.13.78 → ingestr-0.13.79}/styles/bruin/Ingestr.yml +0 -0
  322. {ingestr-0.13.78 → ingestr-0.13.79}/styles/config/vocabularies/bruin/accept.txt +0 -0
  323. {ingestr-0.13.78 → ingestr-0.13.79}/test.env.template +0 -0
@@ -24,4 +24,5 @@ node_modules
24
24
  ingest.sh
25
25
  test.env
26
26
  *.log
27
- .claude
27
+ .claude
28
+ helper_scripts
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.13.78
3
+ Version: 0.13.79
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -355,6 +355,11 @@ Pull requests are welcome. However, please open an issue first to discuss what y
355
355
  <td>✅</td>
356
356
  <td>❌</td>
357
357
  </tr>
358
+ <tr>
359
+ <td>MotherDuck</td>
360
+ <td>✅</td>
361
+ <td>✅</td>
362
+ </tr>
358
363
  <tr>
359
364
  <td>MySQL</td>
360
365
  <td>✅</td>
@@ -137,6 +137,11 @@ Pull requests are welcome. However, please open an issue first to discuss what y
137
137
  <td>✅</td>
138
138
  <td>❌</td>
139
139
  </tr>
140
+ <tr>
141
+ <td>MotherDuck</td>
142
+ <td>✅</td>
143
+ <td>✅</td>
144
+ </tr>
140
145
  <tr>
141
146
  <td>MySQL</td>
142
147
  <td>✅</td>
@@ -90,6 +90,7 @@ export default defineConfig({
90
90
  link: "/supported-sources/mssql.md",
91
91
  },
92
92
  { text: "MongoDB", link: "/supported-sources/mongodb.md" },
93
+ { text: "MotherDuck", link: "/supported-sources/motherduck.md" },
93
94
  { text: "MySQL", link: "/supported-sources/mysql.md" },
94
95
  { text: "Oracle", link: "/supported-sources/oracle.md" },
95
96
  { text: "Postgres", link: "/supported-sources/postgres.md" },
@@ -39,4 +39,7 @@ Attio source supports ingesting the following sources into separate tables:
39
39
  - `list_entries:{list_id}`: Lists all items in a specific list. For example: `list_entries:8abc-123-456-789d-123`
40
40
  - `all_list_entries:{object_api_slug}`: Fetches all the lists for an object, and then fetches all the entries from that list. For eg: Fetches all lists for an object, and then fetches all entries from those lists. For example: `all_list_entries:companies`
41
41
 
42
- Use this as `--source-table` parameter in the `ingestr ingest` command.
42
+ Use this as `--source-table` parameter in the `ingestr ingest` command.
43
+
44
+ > [!WARNING]
45
+ > Attio does not support incremental loading, which means ingestr will do a full-refresh.
@@ -43,22 +43,7 @@ HubSpot source allows ingesting the following sources into separate tables:
43
43
 
44
44
  Use these as `--source-table` parameter in the `ingestr ingest` command.
45
45
 
46
- ## Custom Objects
47
-
48
- HubSpot custom objects are supported by using the `custom` table.
49
-
50
- The format for the custom object is as follows:
51
-
52
- ```plaintext
53
- custom:<custom_object_name>:<optional associations>
54
- ```
55
-
56
- For example, to ingest the `course` custom object with the `contacts` and `companies` associations, the format would be:
57
-
58
- ```plaintext
59
- custom:course:contacts,companies
60
- ```
61
-
62
- This would pull all the data for the `course` custom object and include the `contacts` and `companies` associations in the resulting table.
46
+ > [!WARNING]
47
+ > Hubspot does not support incremental loading, which means ingestr will do a full-refresh.
63
48
 
64
49
 
@@ -0,0 +1,150 @@
1
+ # MongoDB
2
+ MongoDB is a popular, open source NoSQL database known for its flexibility, scalability, and wide adoption in a variety of applications.
3
+
4
+ ingestr supports MongoDB as a source.
5
+
6
+ ## URI format
7
+ The URI format for MongoDB is as follows:
8
+
9
+ ```plaintext
10
+ mongodb://user:password@host:port
11
+ ```
12
+
13
+ URI parameters:
14
+ - `user`: the user name to connect to the database
15
+ - `password`: the password for the user
16
+ - `host`: the host address of the database server
17
+ - `port`: the port number the database server is listening on, default is 27017 for MongoDB
18
+
19
+
20
+ > [!CAUTION]
21
+ > Do not put the database name at the end of the URI for MongoDB, instead make it a part of `--source-table` option as `database.collection` format.
22
+
23
+
24
+ You can read more about MongoDB's connection string format [here](https://docs.mongodb.com/manual/reference/connection-string/).
25
+
26
+ ## Source table format
27
+
28
+ The `--source-table` option for MongoDB supports two formats:
29
+
30
+ ### Basic format
31
+ ```plaintext
32
+ database.collection
33
+ ```
34
+
35
+ This performs a simple collection scan, equivalent to `db.collection.find()`.
36
+
37
+ ### Custom aggregation format
38
+ ```plaintext
39
+ database.collection:[aggregation_pipeline]
40
+ ```
41
+
42
+ This allows you to specify a custom MongoDB aggregation pipeline as a JSON array.
43
+
44
+ ## Custom aggregations
45
+
46
+ ingestr supports custom MongoDB aggregation pipelines, similar to how SQL sources support custom queries. This allows you to perform complex data transformations, filtering, and projections directly in MongoDB before the data is ingested.
47
+
48
+ ### Basic syntax
49
+
50
+ Use the following format for custom aggregations:
51
+
52
+ ```bash
53
+ ingestr ingest \
54
+ --source-uri "mongodb://user:password@host:port" \
55
+ --source-table 'database.collection:[{"$match": {...}}, {"$project": {...}}]'
56
+ ```
57
+
58
+ ### Examples
59
+
60
+ #### Simple filtering
61
+ ```bash
62
+ ingestr ingest \
63
+ --source-uri "mongodb://localhost:27017" \
64
+ --source-table 'mydb.users:[{"$match": {"status": "active"}}]'
65
+ ```
66
+
67
+ #### Complex aggregation with grouping
68
+ ```bash
69
+ ingestr ingest \
70
+ --source-uri "mongodb://localhost:27017" \
71
+ --source-table 'mydb.orders:[
72
+ {"$match": {"status": "completed"}},
73
+ {"$group": {
74
+ "_id": "$customer_id",
75
+ "total_orders": {"$sum": 1},
76
+ "total_amount": {"$sum": "$amount"}
77
+ }}
78
+ ]'
79
+ ```
80
+
81
+ #### Projection and transformation
82
+ ```bash
83
+ ingestr ingest \
84
+ --source-uri "mongodb://localhost:27017" \
85
+ --source-table 'mydb.products:[
86
+ {"$project": {
87
+ "name": 1,
88
+ "price": 1,
89
+ "category": 1,
90
+ "price_usd": {"$multiply": ["$price", 1.1]}
91
+ }}
92
+ ]'
93
+ ```
94
+
95
+ ### Incremental loads with custom aggregations
96
+
97
+ Custom aggregations support incremental loading when combined with the `--incremental-key` option. The incremental key must be included in the projected fields of your aggregation pipeline.
98
+
99
+ #### Using interval placeholders
100
+
101
+ You can use `:interval_start` and `:interval_end` placeholders in your aggregation pipeline, which will be automatically replaced with the actual datetime values during incremental loads:
102
+
103
+ ```bash
104
+ ingestr ingest \
105
+ --source-uri "mongodb://localhost:27017" \
106
+ --source-table 'mydb.events:[
107
+ {"$match": {
108
+ "created_at": {
109
+ "$gte": ":interval_start",
110
+ "$lt": ":interval_end"
111
+ }
112
+ }},
113
+ {"$project": {
114
+ "_id": 1,
115
+ "event_type": 1,
116
+ "user_id": 1,
117
+ "created_at": 1
118
+ }}
119
+ ]' \
120
+ --incremental-key "created_at"
121
+ ```
122
+
123
+ #### Requirements for incremental loads
124
+
125
+ When using incremental loads with custom aggregations:
126
+
127
+ 1. **Incremental key projection**: The field specified in `--incremental-key` must be included in your projection
128
+ 2. **Datetime type**: The incremental key should be a datetime field
129
+ 3. **Pipeline validation**: ingestr validates that your aggregation pipeline properly projects the incremental key
130
+
131
+ ### Validation and error handling
132
+
133
+ ingestr performs several validations on custom aggregation pipelines:
134
+
135
+ - **JSON validation**: Ensures the aggregation pipeline is valid JSON
136
+ - **Array format**: Aggregation pipelines must be JSON arrays
137
+ - **Incremental key validation**: When using `--incremental-key`, validates that the key is projected in the pipeline
138
+ - **Clear error messages**: Provides specific error messages for common issues
139
+
140
+ ### Limitations
141
+
142
+ - **Parallel loading**: Custom aggregations don't support parallel loading due to MongoDB cursor limitations. The loader automatically falls back to sequential processing.
143
+ - **Arrow format**: When using Arrow data format with custom aggregations, data is converted to Arrow format after loading rather than using native MongoDB Arrow integration.
144
+
145
+ ### Performance considerations
146
+
147
+ - Use `$match` stages early in your pipeline to filter data as soon as possible
148
+ - Add appropriate indexes to support your aggregation pipeline
149
+ - Consider using `$limit` to restrict the number of documents processed
150
+ - For large datasets, MongoDB's `allowDiskUse: true` option is automatically enabled for aggregation pipelines
@@ -0,0 +1,46 @@
1
+ # MotherDuck
2
+ MotherDuck is a managed cloud service built on DuckDB, designed for fast analytics and data processing in the cloud.
3
+
4
+ ingestr supports MotherDuck as both a source and destination.
5
+
6
+ ## URI format
7
+ The URI format for MotherDuck is as follows:
8
+
9
+ ```plaintext
10
+ motherduck://<database-name>?token=<your-token>
11
+ ```
12
+
13
+ Alternatively, you can use the `md://` scheme:
14
+ ```plaintext
15
+ md://<database-name>?token=<your-token>
16
+ ```
17
+
18
+ URI parameters:
19
+ - `database-name`: the name of your MotherDuck database (optional, can be omitted for default connection)
20
+ - `token`: your MotherDuck authentication token
21
+
22
+ ## Authentication
23
+
24
+ ### Using Token in URI
25
+ Include the token directly in the URI:
26
+ ```plaintext
27
+ md://<database-name>?token=<your-token>
28
+ ```
29
+
30
+ ### Connection without Database Name
31
+ If you want to connect without specifying a specific database:
32
+ ```plaintext
33
+ md://?token=<your-token>
34
+ ```
35
+
36
+ ## Getting Your Token
37
+
38
+ 1. Go to the MotherDuck UI
39
+ 2. Click on your organization name in the top left and select "Settings"
40
+ 3. Click "+ Create token"
41
+ 4. Specify a name for the token
42
+ 5. Choose between Read/Write or Read Scaling token type
43
+ 6. Set expiration if desired and click "Create token"
44
+ 7. Copy the generated token
45
+
46
+ The same URI structure can be used both for sources and destinations. You can read more about MotherDuck's connection options in their [official documentation](https://motherduck.com/docs/key-tasks/authenticating-and-connecting-to-motherduck/).
@@ -1,3 +1,4 @@
1
+ import warnings
1
2
  from datetime import datetime
2
3
  from enum import Enum
3
4
  from typing import Optional
@@ -8,6 +9,14 @@ from typing_extensions import Annotated
8
9
 
9
10
  from ingestr.src.telemetry.event import track
10
11
 
12
+ try:
13
+ from duckdb_engine import DuckDBEngineWarning
14
+
15
+ warnings.filterwarnings("ignore", category=DuckDBEngineWarning)
16
+ except ImportError:
17
+ # duckdb-engine not installed
18
+ pass
19
+
11
20
  app = typer.Typer(
12
21
  name="ingestr",
13
22
  help="ingestr is the CLI tool to ingest data from one source to another",
@@ -506,7 +515,6 @@ def ingest(
506
515
 
507
516
  if factory.source_scheme == "sqlite":
508
517
  source_table = "main." + source_table.split(".")[-1]
509
-
510
518
 
511
519
  if (
512
520
  incremental_key
@@ -600,10 +608,9 @@ def ingest(
600
608
  if factory.source_scheme == "influxdb":
601
609
  if primary_key:
602
610
  write_disposition = "merge"
603
-
604
611
 
605
612
  start_time = datetime.now()
606
-
613
+
607
614
  run_info: LoadInfo = pipeline.run(
608
615
  dlt_source,
609
616
  **destination.dlt_run_params(
@@ -0,0 +1 @@
1
+ version = "v0.13.79"
@@ -147,6 +147,24 @@ class DuckDBDestination(GenericSqlDestination):
147
147
  return dlt.destinations.duckdb(uri, **kwargs)
148
148
 
149
149
 
150
+ class MotherduckDestination(GenericSqlDestination):
151
+ def dlt_dest(self, uri: str, **kwargs):
152
+ from urllib.parse import parse_qs, urlparse
153
+
154
+ parsed = urlparse(uri)
155
+ query = parse_qs(parsed.query)
156
+ token = query.get("token", [None])[0]
157
+ from dlt.destinations.impl.motherduck.configuration import MotherDuckCredentials
158
+
159
+ creds = {
160
+ "password": token,
161
+ }
162
+ if parsed.path.lstrip("/"):
163
+ creds["database"] = parsed.path.lstrip("/")
164
+
165
+ return dlt.destinations.motherduck(MotherDuckCredentials(creds), **kwargs)
166
+
167
+
150
168
  def handle_datetimeoffset(dto_value: bytes) -> datetime.datetime:
151
169
  # ref: https://github.com/mkleehammer/pyodbc/issues/134#issuecomment-281739794
152
170
  tup = struct.unpack(
@@ -26,7 +26,6 @@ from .settings import (
26
26
  DEFAULT_LEAD_FIELDS,
27
27
  INSIGHT_FIELDS_TYPES,
28
28
  INSIGHTS_BREAKDOWNS_OPTIONS,
29
- INSIGHTS_PRIMARY_KEY,
30
29
  INVALID_INSIGHTS_FIELDS,
31
30
  TInsightsBreakdownOptions,
32
31
  TInsightsLevels,
@@ -12,6 +12,7 @@ from ingestr.src.destinations import (
12
12
  DatabricksDestination,
13
13
  DuckDBDestination,
14
14
  GCSDestination,
15
+ MotherduckDestination,
15
16
  MsSQLDestination,
16
17
  MySqlDestination,
17
18
  PostgresDestination,
@@ -85,6 +86,8 @@ SQL_SOURCE_SCHEMES = [
85
86
  "mysql",
86
87
  "mysql+pymysql",
87
88
  "mysql+mysqlconnector",
89
+ "md",
90
+ "motherduck",
88
91
  "postgres",
89
92
  "postgresql",
90
93
  "postgresql+psycopg2",
@@ -195,6 +198,8 @@ class SourceDestinationFactory:
195
198
  "cratedb": CrateDBDestination,
196
199
  "databricks": DatabricksDestination,
197
200
  "duckdb": DuckDBDestination,
201
+ "motherduck": MotherduckDestination,
202
+ "md": MotherduckDestination,
198
203
  "mssql": MsSQLDestination,
199
204
  "postgres": PostgresDestination,
200
205
  "postgresql": PostgresDestination,
@@ -4,6 +4,8 @@ etc. to the database"""
4
4
  from typing import Any, Dict, Generator, Iterable, List, Optional
5
5
 
6
6
  import dlt
7
+ import pendulum
8
+ from dlt.common.time import ensure_pendulum_datetime
7
9
  from dlt.sources import DltResource
8
10
 
9
11
  from .freshdesk_client import FreshdeskClient
@@ -12,10 +14,12 @@ from .settings import DEFAULT_ENDPOINTS
12
14
 
13
15
  @dlt.source()
14
16
  def freshdesk_source(
15
- endpoints: Optional[List[str]] = None,
17
+ domain: str,
18
+ api_secret_key: str,
19
+ start_date: pendulum.DateTime,
20
+ end_date: Optional[pendulum.DateTime] = None,
16
21
  per_page: int = 100,
17
- domain: str = dlt.secrets.value,
18
- api_secret_key: str = dlt.secrets.value,
22
+ endpoints: Optional[List[str]] = None,
19
23
  ) -> Iterable[DltResource]:
20
24
  """
21
25
  Retrieves data from specified Freshdesk API endpoints.
@@ -39,7 +43,11 @@ def freshdesk_source(
39
43
  def incremental_resource(
40
44
  endpoint: str,
41
45
  updated_at: Optional[Any] = dlt.sources.incremental(
42
- "updated_at", initial_value="2022-01-01T00:00:00Z"
46
+ "updated_at",
47
+ initial_value=start_date.isoformat(),
48
+ end_value=end_date.isoformat() if end_date else None,
49
+ range_start="closed",
50
+ range_end="closed",
43
51
  ),
44
52
  ) -> Generator[Dict[Any, Any], Any, None]:
45
53
  """
@@ -48,15 +56,22 @@ def freshdesk_source(
48
56
  to ensure incremental loading.
49
57
  """
50
58
 
51
- # Retrieve the last updated timestamp to fetch only new or updated records.
52
- if updated_at is not None:
53
- updated_at = updated_at.last_value
59
+ if updated_at.last_value is not None:
60
+ start_date = ensure_pendulum_datetime(updated_at.last_value)
61
+ else:
62
+ start_date = start_date
63
+
64
+ if updated_at.end_value is not None:
65
+ end_date = ensure_pendulum_datetime(updated_at.end_value)
66
+ else:
67
+ end_date = pendulum.now(tz="UTC")
54
68
 
55
69
  # Use the FreshdeskClient instance to fetch paginated responses
56
70
  yield from freshdesk.paginated_response(
57
71
  endpoint=endpoint,
58
72
  per_page=per_page,
59
- updated_at=updated_at,
73
+ start_date=start_date,
74
+ end_date=end_date,
60
75
  )
61
76
 
62
77
  # Set default endpoints if not provided
@@ -2,8 +2,9 @@
2
2
 
3
3
  import logging
4
4
  import time
5
- from typing import Any, Dict, Iterable, Optional
5
+ from typing import Any, Dict, Iterable
6
6
 
7
+ import pendulum
7
8
  from dlt.common.typing import TDataItem
8
9
  from dlt.sources.helpers import requests
9
10
 
@@ -67,7 +68,8 @@ class FreshdeskClient:
67
68
  self,
68
69
  endpoint: str,
69
70
  per_page: int,
70
- updated_at: Optional[str] = None,
71
+ start_date: pendulum.DateTime,
72
+ end_date: pendulum.DateTime,
71
73
  ) -> Iterable[TDataItem]:
72
74
  """
73
75
  Fetches a paginated response from a specified endpoint.
@@ -88,8 +90,8 @@ class FreshdeskClient:
88
90
  param_key = (
89
91
  "updated_since" if endpoint == "tickets" else "_updated_since"
90
92
  )
91
- if updated_at:
92
- params[param_key] = updated_at
93
+
94
+ params[param_key] = start_date.to_iso8601_string()
93
95
 
94
96
  # Handle requests with rate-limiting
95
97
  # A maximum of 300 pages (30000 tickets) will be returned.
@@ -98,5 +100,14 @@ class FreshdeskClient:
98
100
 
99
101
  if not data:
100
102
  break # Stop if no data or max page limit reached
101
- yield data
103
+
104
+ filtered_data = [
105
+ item
106
+ for item in data
107
+ if "updated_at" in item
108
+ and pendulum.parse(item["updated_at"]) <= end_date
109
+ ]
110
+ if not filtered_data:
111
+ break
112
+ yield filtered_data
102
113
  page += 1
@@ -91,7 +91,9 @@ def github_repo_events(
91
91
  """
92
92
 
93
93
  # use naming function in table name to generate separate tables for each event
94
- @dlt.resource(primary_key= "id", table_name=lambda i: i["type"], write_disposition="merge")
94
+ @dlt.resource(
95
+ primary_key="id", table_name=lambda i: i["type"], write_disposition="merge"
96
+ )
95
97
  def repo_events(
96
98
  last_created_at: dlt.sources.incremental[str] = dlt.sources.incremental(
97
99
  "created_at",
@@ -105,7 +107,7 @@ def github_repo_events(
105
107
  repos_path = (
106
108
  f"/repos/{urllib.parse.quote(owner)}/{urllib.parse.quote(name)}/events"
107
109
  )
108
-
110
+
109
111
  # Get the date range from the incremental state
110
112
  start_filter = pendulum.parse(
111
113
  last_created_at.last_value or last_created_at.initial_value
@@ -115,7 +117,7 @@ def github_repo_events(
115
117
  if last_created_at.end_value
116
118
  else pendulum.now()
117
119
  )
118
-
120
+
119
121
  for page in get_rest_pages(access_token, repos_path + "?per_page=100"):
120
122
  # Filter events by date range
121
123
  filtered_events = []
@@ -61,6 +61,7 @@ def get_stargazers(
61
61
  page_items,
62
62
  )
63
63
 
64
+
64
65
  def get_reactions_data(
65
66
  node_type: str,
66
67
  owner: str,
@@ -7,6 +7,7 @@ from dlt.sources import DltResource
7
7
 
8
8
  from .client import InfluxClient
9
9
 
10
+
10
11
  @dlt.source(max_table_nesting=0)
11
12
  def influxdb_source(
12
13
  measurement: str,
@@ -106,6 +106,7 @@ def mongodb_collection(
106
106
  filter_: Optional[Dict[str, Any]] = None,
107
107
  projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = dlt.config.value,
108
108
  pymongoarrow_schema: Optional[Any] = None,
109
+ custom_query: Optional[List[Dict[str, Any]]] = None,
109
110
  ) -> Any:
110
111
  """
111
112
  A DLT source which loads a collection from a mongo database using PyMongo.
@@ -132,6 +133,7 @@ def mongodb_collection(
132
133
  exclude (dict) - {"released": False, "runtime": False}
133
134
  Note: Can't mix include and exclude statements '{"title": True, "released": False}`
134
135
  pymongoarrow_schema (pymongoarrow.schema.Schema): Mapping of expected field types to convert BSON to Arrow
136
+ custom_query (Optional[List[Dict[str, Any]]]): Custom MongoDB aggregation pipeline to execute instead of find()
135
137
 
136
138
  Returns:
137
139
  Iterable[DltResource]: A list of DLT resources for each collection to be loaded.
@@ -161,4 +163,5 @@ def mongodb_collection(
161
163
  filter_=filter_ or {},
162
164
  projection=projection,
163
165
  pymongoarrow_schema=pymongoarrow_schema,
166
+ custom_query=custom_query,
164
167
  )