ingestr 0.13.59__tar.gz → 0.13.61__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

Files changed (306) hide show
  1. {ingestr-0.13.59 → ingestr-0.13.61}/PKG-INFO +3 -2
  2. {ingestr-0.13.59 → ingestr-0.13.61}/README.md +1 -1
  3. ingestr-0.13.61/docs/media/cratedb-destination.png +0 -0
  4. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/cratedb.md +60 -1
  5. ingestr-0.13.61/docs/supported-sources/gcs.md +169 -0
  6. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/main.py +1 -0
  7. ingestr-0.13.61/ingestr/src/buildinfo.py +1 -0
  8. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/destinations.py +102 -45
  9. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/factory.py +4 -0
  10. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/isoc_pulse/__init__.py +1 -1
  11. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/sources.py +2 -1
  12. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/stripe_analytics/__init__.py +1 -42
  13. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/stripe_analytics/helpers.py +8 -62
  14. {ingestr-0.13.59 → ingestr-0.13.61}/requirements.in +1 -0
  15. {ingestr-0.13.59 → ingestr-0.13.61}/requirements.txt +7 -1
  16. {ingestr-0.13.59 → ingestr-0.13.61}/requirements_arm64.txt +7 -1
  17. ingestr-0.13.59/docs/supported-sources/gcs.md +0 -66
  18. ingestr-0.13.59/ingestr/src/buildinfo.py +0 -1
  19. {ingestr-0.13.59 → ingestr-0.13.61}/.dockerignore +0 -0
  20. {ingestr-0.13.59 → ingestr-0.13.61}/.githooks/pre-commit-hook.sh +0 -0
  21. {ingestr-0.13.59 → ingestr-0.13.61}/.github/workflows/deploy-docs.yml +0 -0
  22. {ingestr-0.13.59 → ingestr-0.13.61}/.github/workflows/release.yml +0 -0
  23. {ingestr-0.13.59 → ingestr-0.13.61}/.github/workflows/secrets-scan.yml +0 -0
  24. {ingestr-0.13.59 → ingestr-0.13.61}/.github/workflows/tests.yml +0 -0
  25. {ingestr-0.13.59 → ingestr-0.13.61}/.gitignore +0 -0
  26. {ingestr-0.13.59 → ingestr-0.13.61}/.gitleaksignore +0 -0
  27. {ingestr-0.13.59 → ingestr-0.13.61}/.python-version +0 -0
  28. {ingestr-0.13.59 → ingestr-0.13.61}/.vale.ini +0 -0
  29. {ingestr-0.13.59 → ingestr-0.13.61}/Dockerfile +0 -0
  30. {ingestr-0.13.59 → ingestr-0.13.61}/LICENSE.md +0 -0
  31. {ingestr-0.13.59 → ingestr-0.13.61}/Makefile +0 -0
  32. {ingestr-0.13.59 → ingestr-0.13.61}/docs/.vitepress/config.mjs +0 -0
  33. {ingestr-0.13.59 → ingestr-0.13.61}/docs/.vitepress/theme/custom.css +0 -0
  34. {ingestr-0.13.59 → ingestr-0.13.61}/docs/.vitepress/theme/index.js +0 -0
  35. {ingestr-0.13.59 → ingestr-0.13.61}/docs/commands/example-uris.md +0 -0
  36. {ingestr-0.13.59 → ingestr-0.13.61}/docs/commands/ingest.md +0 -0
  37. {ingestr-0.13.59 → ingestr-0.13.61}/docs/getting-started/core-concepts.md +0 -0
  38. {ingestr-0.13.59 → ingestr-0.13.61}/docs/getting-started/incremental-loading.md +0 -0
  39. {ingestr-0.13.59 → ingestr-0.13.61}/docs/getting-started/quickstart.md +0 -0
  40. {ingestr-0.13.59 → ingestr-0.13.61}/docs/getting-started/telemetry.md +0 -0
  41. {ingestr-0.13.59 → ingestr-0.13.61}/docs/index.md +0 -0
  42. {ingestr-0.13.59 → ingestr-0.13.61}/docs/media/applovin_max.png +0 -0
  43. {ingestr-0.13.59 → ingestr-0.13.61}/docs/media/athena.png +0 -0
  44. {ingestr-0.13.59 → ingestr-0.13.61}/docs/media/clickhouse_img.png +0 -0
  45. {ingestr-0.13.59 → ingestr-0.13.61}/docs/media/cratedb-source.png +0 -0
  46. {ingestr-0.13.59 → ingestr-0.13.61}/docs/media/freshdesk_ingestion.png +0 -0
  47. {ingestr-0.13.59 → ingestr-0.13.61}/docs/media/gcp_spanner_ingestion.png +0 -0
  48. {ingestr-0.13.59 → ingestr-0.13.61}/docs/media/github.png +0 -0
  49. {ingestr-0.13.59 → ingestr-0.13.61}/docs/media/google_analytics_realtime_report.png +0 -0
  50. {ingestr-0.13.59 → ingestr-0.13.61}/docs/media/googleanalytics.png +0 -0
  51. {ingestr-0.13.59 → ingestr-0.13.61}/docs/media/ingestion_elasticsearch_img.png +0 -0
  52. {ingestr-0.13.59 → ingestr-0.13.61}/docs/media/kinesis.bigquery.png +0 -0
  53. {ingestr-0.13.59 → ingestr-0.13.61}/docs/media/linkedin_ads.png +0 -0
  54. {ingestr-0.13.59 → ingestr-0.13.61}/docs/media/mixpanel_ingestion.png +0 -0
  55. {ingestr-0.13.59 → ingestr-0.13.61}/docs/media/personio.png +0 -0
  56. {ingestr-0.13.59 → ingestr-0.13.61}/docs/media/personio_duckdb.png +0 -0
  57. {ingestr-0.13.59 → ingestr-0.13.61}/docs/media/phantombuster.png +0 -0
  58. {ingestr-0.13.59 → ingestr-0.13.61}/docs/media/pipedrive.png +0 -0
  59. {ingestr-0.13.59 → ingestr-0.13.61}/docs/media/quickbook_ingestion.png +0 -0
  60. {ingestr-0.13.59 → ingestr-0.13.61}/docs/media/sftp.png +0 -0
  61. {ingestr-0.13.59 → ingestr-0.13.61}/docs/media/stripe_postgres.png +0 -0
  62. {ingestr-0.13.59 → ingestr-0.13.61}/docs/media/tiktok.png +0 -0
  63. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/adjust.md +0 -0
  64. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/airtable.md +0 -0
  65. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/applovin.md +0 -0
  66. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/applovin_max.md +0 -0
  67. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/appsflyer.md +0 -0
  68. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/appstore.md +0 -0
  69. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/asana.md +0 -0
  70. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/athena.md +0 -0
  71. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/attio.md +0 -0
  72. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/bigquery.md +0 -0
  73. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/chess.md +0 -0
  74. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/clickhouse.md +0 -0
  75. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/csv.md +0 -0
  76. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/custom_queries.md +0 -0
  77. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/databricks.md +0 -0
  78. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/db2.md +0 -0
  79. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/duckdb.md +0 -0
  80. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/dynamodb.md +0 -0
  81. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/elasticsearch.md +0 -0
  82. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/facebook-ads.md +0 -0
  83. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/frankfurter.md +0 -0
  84. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/freshdesk.md +0 -0
  85. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/github.md +0 -0
  86. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/google-ads.md +0 -0
  87. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/google_analytics.md +0 -0
  88. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/gorgias.md +0 -0
  89. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/gsheets.md +0 -0
  90. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/hubspot.md +0 -0
  91. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/isoc-pulse.md +0 -0
  92. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/kafka.md +0 -0
  93. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/kinesis.md +0 -0
  94. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/klaviyo.md +0 -0
  95. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/linkedin_ads.md +0 -0
  96. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/mixpanel.md +0 -0
  97. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/mongodb.md +0 -0
  98. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/mssql.md +0 -0
  99. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/mysql.md +0 -0
  100. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/notion.md +0 -0
  101. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/oracle.md +0 -0
  102. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/personio.md +0 -0
  103. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/phantombuster.md +0 -0
  104. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/pinterest.md +0 -0
  105. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/pipedrive.md +0 -0
  106. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/postgres.md +0 -0
  107. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/quickbooks.md +0 -0
  108. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/redshift.md +0 -0
  109. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/s3.md +0 -0
  110. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/salesforce.md +0 -0
  111. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/sap-hana.md +0 -0
  112. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/sftp.md +0 -0
  113. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/shopify.md +0 -0
  114. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/slack.md +0 -0
  115. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/smartsheets.md +0 -0
  116. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/snowflake.md +0 -0
  117. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/solidgate.md +0 -0
  118. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/spanner.md +0 -0
  119. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/sqlite.md +0 -0
  120. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/stripe.md +0 -0
  121. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/tiktok-ads.md +0 -0
  122. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/trustpilot.md +0 -0
  123. {ingestr-0.13.59 → ingestr-0.13.61}/docs/supported-sources/zendesk.md +0 -0
  124. {ingestr-0.13.59 → ingestr-0.13.61}/docs/tutorials/load-kinesis-bigquery.md +0 -0
  125. {ingestr-0.13.59 → ingestr-0.13.61}/docs/tutorials/load-personio-duckdb.md +0 -0
  126. {ingestr-0.13.59 → ingestr-0.13.61}/docs/tutorials/load-stripe-postgres.md +0 -0
  127. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/conftest.py +0 -0
  128. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/.gitignore +0 -0
  129. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/adjust/__init__.py +0 -0
  130. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/adjust/adjust_helpers.py +0 -0
  131. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/airtable/__init__.py +0 -0
  132. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/applovin/__init__.py +0 -0
  133. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/applovin_max/__init__.py +0 -0
  134. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/appsflyer/__init__.py +0 -0
  135. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/appsflyer/client.py +0 -0
  136. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/appstore/__init__.py +0 -0
  137. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/appstore/client.py +0 -0
  138. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/appstore/errors.py +0 -0
  139. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/appstore/models.py +0 -0
  140. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/appstore/resources.py +0 -0
  141. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/arrow/__init__.py +0 -0
  142. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/asana_source/__init__.py +0 -0
  143. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/asana_source/helpers.py +0 -0
  144. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/asana_source/settings.py +0 -0
  145. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/attio/__init__.py +0 -0
  146. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/attio/helpers.py +0 -0
  147. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/blob.py +0 -0
  148. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/chess/__init__.py +0 -0
  149. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/chess/helpers.py +0 -0
  150. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/chess/settings.py +0 -0
  151. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/collector/spinner.py +0 -0
  152. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/dynamodb/__init__.py +0 -0
  153. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/elasticsearch/__init__.py +0 -0
  154. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/errors.py +0 -0
  155. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/facebook_ads/__init__.py +0 -0
  156. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/facebook_ads/exceptions.py +0 -0
  157. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/facebook_ads/helpers.py +0 -0
  158. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/facebook_ads/settings.py +0 -0
  159. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/facebook_ads/utils.py +0 -0
  160. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/filesystem/__init__.py +0 -0
  161. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/filesystem/helpers.py +0 -0
  162. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/filesystem/readers.py +0 -0
  163. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/filters.py +0 -0
  164. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/frankfurter/__init__.py +0 -0
  165. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/frankfurter/helpers.py +0 -0
  166. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/freshdesk/__init__.py +0 -0
  167. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/freshdesk/freshdesk_client.py +0 -0
  168. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/freshdesk/settings.py +0 -0
  169. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/github/__init__.py +0 -0
  170. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/github/helpers.py +0 -0
  171. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/github/queries.py +0 -0
  172. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/github/settings.py +0 -0
  173. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/google_ads/__init__.py +0 -0
  174. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/google_ads/field.py +0 -0
  175. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/google_ads/metrics.py +0 -0
  176. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/google_ads/predicates.py +0 -0
  177. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/google_ads/reports.py +0 -0
  178. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/google_analytics/__init__.py +0 -0
  179. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/google_analytics/helpers.py +0 -0
  180. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/google_sheets/README.md +0 -0
  181. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/google_sheets/__init__.py +0 -0
  182. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
  183. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
  184. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
  185. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/gorgias/__init__.py +0 -0
  186. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/gorgias/helpers.py +0 -0
  187. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/http_client.py +0 -0
  188. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/hubspot/__init__.py +0 -0
  189. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/hubspot/helpers.py +0 -0
  190. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/hubspot/settings.py +0 -0
  191. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/kafka/__init__.py +0 -0
  192. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/kafka/helpers.py +0 -0
  193. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/kinesis/__init__.py +0 -0
  194. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/kinesis/helpers.py +0 -0
  195. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/klaviyo/__init__.py +0 -0
  196. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/klaviyo/client.py +0 -0
  197. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/klaviyo/helpers.py +0 -0
  198. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/linkedin_ads/__init__.py +0 -0
  199. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/linkedin_ads/dimension_time_enum.py +0 -0
  200. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/linkedin_ads/helpers.py +0 -0
  201. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/loader.py +0 -0
  202. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/mixpanel/__init__.py +0 -0
  203. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/mixpanel/client.py +0 -0
  204. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/mongodb/__init__.py +0 -0
  205. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/mongodb/helpers.py +0 -0
  206. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/notion/__init__.py +0 -0
  207. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/notion/helpers/__init__.py +0 -0
  208. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/notion/helpers/client.py +0 -0
  209. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/notion/helpers/database.py +0 -0
  210. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/notion/settings.py +0 -0
  211. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/partition.py +0 -0
  212. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/personio/__init__.py +0 -0
  213. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/personio/helpers.py +0 -0
  214. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/phantombuster/__init__.py +0 -0
  215. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/phantombuster/client.py +0 -0
  216. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/pinterest/__init__.py +0 -0
  217. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/pipedrive/__init__.py +0 -0
  218. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/pipedrive/helpers/__init__.py +0 -0
  219. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/pipedrive/helpers/custom_fields_munger.py +0 -0
  220. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/pipedrive/helpers/pages.py +0 -0
  221. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/pipedrive/settings.py +0 -0
  222. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/pipedrive/typing.py +0 -0
  223. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/quickbooks/__init__.py +0 -0
  224. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/resource.py +0 -0
  225. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/salesforce/__init__.py +0 -0
  226. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/salesforce/helpers.py +0 -0
  227. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/shopify/__init__.py +0 -0
  228. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/shopify/exceptions.py +0 -0
  229. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/shopify/helpers.py +0 -0
  230. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/shopify/settings.py +0 -0
  231. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/slack/__init__.py +0 -0
  232. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/slack/helpers.py +0 -0
  233. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/slack/settings.py +0 -0
  234. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/smartsheets/__init__.py +0 -0
  235. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/solidgate/__init__.py +0 -0
  236. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/solidgate/helpers.py +0 -0
  237. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/sql_database/__init__.py +0 -0
  238. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/sql_database/callbacks.py +0 -0
  239. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/stripe_analytics/settings.py +0 -0
  240. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/table_definition.py +0 -0
  241. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/telemetry/event.py +0 -0
  242. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/testdata/fakebqcredentials.json +0 -0
  243. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/tiktok_ads/__init__.py +0 -0
  244. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/tiktok_ads/tiktok_helpers.py +0 -0
  245. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/time.py +0 -0
  246. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/trustpilot/__init__.py +0 -0
  247. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/trustpilot/client.py +0 -0
  248. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/version.py +0 -0
  249. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/zendesk/__init__.py +0 -0
  250. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/zendesk/helpers/__init__.py +0 -0
  251. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
  252. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/zendesk/helpers/credentials.py +0 -0
  253. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
  254. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/src/zendesk/settings.py +0 -0
  255. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/testdata/.gitignore +0 -0
  256. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/testdata/create_replace.csv +0 -0
  257. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/testdata/delete_insert_expected.csv +0 -0
  258. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/testdata/delete_insert_part1.csv +0 -0
  259. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/testdata/delete_insert_part2.csv +0 -0
  260. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/testdata/merge_expected.csv +0 -0
  261. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/testdata/merge_part1.csv +0 -0
  262. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/testdata/merge_part2.csv +0 -0
  263. {ingestr-0.13.59 → ingestr-0.13.61}/ingestr/tests/unit/test_smartsheets.py +0 -0
  264. {ingestr-0.13.59 → ingestr-0.13.61}/package-lock.json +0 -0
  265. {ingestr-0.13.59 → ingestr-0.13.61}/package.json +0 -0
  266. {ingestr-0.13.59 → ingestr-0.13.61}/pyproject.toml +0 -0
  267. {ingestr-0.13.59 → ingestr-0.13.61}/requirements-dev.txt +0 -0
  268. {ingestr-0.13.59 → ingestr-0.13.61}/resources/demo.gif +0 -0
  269. {ingestr-0.13.59 → ingestr-0.13.61}/resources/demo.tape +0 -0
  270. {ingestr-0.13.59 → ingestr-0.13.61}/resources/ingestr.svg +0 -0
  271. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/AMPM.yml +0 -0
  272. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/Acronyms.yml +0 -0
  273. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/Colons.yml +0 -0
  274. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/Contractions.yml +0 -0
  275. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/DateFormat.yml +0 -0
  276. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/Ellipses.yml +0 -0
  277. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/EmDash.yml +0 -0
  278. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/Exclamation.yml +0 -0
  279. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/FirstPerson.yml +0 -0
  280. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/Gender.yml +0 -0
  281. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/GenderBias.yml +0 -0
  282. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/HeadingPunctuation.yml +0 -0
  283. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/Headings.yml +0 -0
  284. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/Latin.yml +0 -0
  285. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/LyHyphens.yml +0 -0
  286. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/OptionalPlurals.yml +0 -0
  287. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/Ordinal.yml +0 -0
  288. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/OxfordComma.yml +0 -0
  289. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/Parens.yml +0 -0
  290. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/Passive.yml +0 -0
  291. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/Periods.yml +0 -0
  292. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/Quotes.yml +0 -0
  293. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/Ranges.yml +0 -0
  294. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/Semicolons.yml +0 -0
  295. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/Slang.yml +0 -0
  296. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/Spacing.yml +0 -0
  297. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/Spelling.yml +0 -0
  298. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/Units.yml +0 -0
  299. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/We.yml +0 -0
  300. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/Will.yml +0 -0
  301. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/WordList.yml +0 -0
  302. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/meta.json +0 -0
  303. {ingestr-0.13.59 → ingestr-0.13.61}/styles/Google/vocab.txt +0 -0
  304. {ingestr-0.13.59 → ingestr-0.13.61}/styles/bruin/Ingestr.yml +0 -0
  305. {ingestr-0.13.59 → ingestr-0.13.61}/styles/config/vocabularies/bruin/accept.txt +0 -0
  306. {ingestr-0.13.59 → ingestr-0.13.61}/test.env.template +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.13.59
3
+ Version: 0.13.61
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -47,6 +47,7 @@ Requires-Dist: databricks-sqlalchemy==1.0.2
47
47
  Requires-Dist: dataclasses-json==0.6.7
48
48
  Requires-Dist: decorator==5.2.1
49
49
  Requires-Dist: deprecation==2.1.0
50
+ Requires-Dist: dlt-cratedb==0.0.1
50
51
  Requires-Dist: dlt==1.10.0
51
52
  Requires-Dist: dnspython==2.7.0
52
53
  Requires-Dist: duckdb-engine==0.17.0
@@ -305,7 +306,7 @@ Pull requests are welcome. However, please open an issue first to discuss what y
305
306
  <tr>
306
307
  <td>CrateDB</td>
307
308
  <td>✅</td>
308
- <td>❌</td>
309
+ <td>✅</td>
309
310
  </tr>
310
311
  <tr>
311
312
  <td>Databricks</td>
@@ -90,7 +90,7 @@ Pull requests are welcome. However, please open an issue first to discuss what y
90
90
  <tr>
91
91
  <td>CrateDB</td>
92
92
  <td>✅</td>
93
- <td>❌</td>
93
+ <td>✅</td>
94
94
  </tr>
95
95
  <tr>
96
96
  <td>Databricks</td>
@@ -4,7 +4,7 @@
4
4
  massive amounts of data in near real-time, even with complex queries. It is
5
5
  PostgreSQL-compatible, and based on Lucene.
6
6
 
7
- ingestr supports CrateDB as a source database.
7
+ ingestr supports CrateDB as a source and destination database.
8
8
 
9
9
  ## Source
10
10
 
@@ -56,6 +56,61 @@ duckdb cratedb.duckdb 'SELECT * FROM dest.summits LIMIT 5'
56
56
 
57
57
  <img alt="CrateDB_img" src="../media/cratedb-source.png" />
58
58
 
59
+ ## Destination
60
+
61
+ For connecting to CrateDB as a database destination, ingestr uses the
62
+ [dlt cratedb adapter], which is based on the [dlt postgres adapter],
63
+ in turn using the [psycopg2] package.
64
+
65
+ ### URI format
66
+
67
+ The URI format for CrateDB as a destination is as follows:
68
+ ```plaintext
69
+ cratedb://<username>:<password>@<host>:<port>?sslmode=<sslmode>
70
+ ```
71
+ > [!INFO]
72
+ > When connecting to CrateDB on localhost, use:
73
+ > ```plaintext
74
+ > cratedb://crate:@localhost:5432?sslmode=disable
75
+ > ```
76
+ >
77
+ > When connecting to [CrateDB Cloud], the URI looks like this:
78
+ > ```plaintext
79
+ > cratedb://admin:<PASSWORD>@<CLUSTERNAME>.eks1.eu-west-1.aws.cratedb.net:5432?sslmode=require
80
+ > ```
81
+
82
+ ### URI parameters
83
+ - `username` (required): The username is required to authenticate with the CrateDB server.
84
+ - `password` (required): The password is required to authenticate the provided username.
85
+ - `host` (required): The hostname or IP address of the CrateDB server where the database is hosted.
86
+ - `port` (required): The TCP port number used by the CrateDB server. Mostly `5432`.
87
+ - `sslmode` (optional): Set to one of `disable`, `allow`, `prefer`, `require`, `verify-ca`,
88
+ or `verify-full`, see [PostgreSQL SSL Mode Descriptions].
89
+
90
+ ### Example
91
+
92
+ This is an example command that will import a CSV file into CrateDB,
93
+ then display the content from CrateDB.
94
+
95
+ ```shell
96
+ wget -O input.csv https://github.com/bruin-data/ingestr/raw/refs/heads/main/ingestr/testdata/create_replace.csv
97
+ ```
98
+ ```shell
99
+ ingestr ingest \
100
+ --source-uri 'csv://input.csv' \
101
+ --source-table 'sample' \
102
+ --dest-uri 'cratedb://crate:@localhost:5432/?sslmode=disable' \
103
+ --dest-table 'doc.sample'
104
+ ```
105
+ ```shell
106
+ uvx crash -c 'SELECT * FROM doc.sample'
107
+ ```
108
+
109
+ <img alt="CrateDB_img" src="../media/cratedb-destination.png" />
110
+
111
+ > [!WARNING]
112
+ > CrateDB supports the `replace` incremental materialization strategy, but
113
+ > currently does not support the `delete+insert`, `merge`, or `scd2` strategies.
59
114
 
60
115
  ## Appendix
61
116
 
@@ -75,6 +130,10 @@ or share relevant issue reports that help us improve interoperability. Thanks!
75
130
 
76
131
  [CrateDB]: https://github.com/crate/crate
77
132
  [CrateDB Cloud]: https://console.cratedb.cloud/
133
+ [dlt cratedb adapter]: https://github.com/dlt-hub/dlt/pull/2733
134
+ [dlt postgres adapter]: https://github.com/dlt-hub/dlt/tree/devel/dlt/destinations/impl/postgres
135
+ [PostgreSQL SSL Mode Descriptions]: https://www.postgresql.org/docs/current/libpq-ssl.html#LIBPQ-SSL-SSLMODE-STATEMENTS
136
+ [psycopg2]: https://pypi.org/project/psycopg2-binary/
78
137
  [sqlalchemy-cratedb]: https://pypi.org/project/sqlalchemy-cratedb/
79
138
  [Support for ingestr/CrateDB]: https://github.com/crate/crate-clients-tools/issues/86
80
139
  ["tool: dlt/ingestr"]: https://github.com/crate/crate/issues?q=state%3Aopen%20label%3A%22tool%3A%20dlt%2Fingestr%22
@@ -0,0 +1,169 @@
1
+ # Google Cloud Storage
2
+
3
+ [Google Cloud Storage](https://cloud.google.com/storage?hl=en) is an online file storage web service for storing and accessing data on Google Cloud Platform infrastructure. The service combines the performance and scalability of Google's cloud with advanced security and sharing capabilities. It is an Infrastructure as a Service (IaaS), comparable to Amazon S3.
4
+
5
+ `ingestr` supports Google Cloud Storage as both a data source and destination.
6
+
7
+ ## URI format
8
+
9
+ The URI format for Google Cloud Storage is as follows:
10
+
11
+ ```plaintext
12
+ gs://?credentials_path=/path/to/service-account.json
13
+ ```
14
+
15
+ URI parameters:
16
+
17
+ - `credentials_path`: path to file containing your Google Cloud [Service Account](https://cloud.google.com/iam/docs/service-account-overview)
18
+ - `credentials_base64`: base64-encoded service account JSON (alternative to credentials_path)
19
+ - `layout`: Layout template (optional, destination only)
20
+
21
+ The `--source-table` must be in the format:
22
+ ```
23
+ {bucket name}/{file glob}
24
+ ```
25
+
26
+ ## Setting up a GCS Integration
27
+
28
+ To use Google Cloud Storage source in `ingestr`, you will need:
29
+ * A Google Cloud Project.
30
+ * A Service Account with at least [roles/storage.objectUser](https://cloud.google.com/storage/docs/access-control/iam-roles) IAM permission for reading, or [roles/storage.objectAdmin](https://cloud.google.com/storage/docs/access-control/iam-roles) for writing to GCS.
31
+ * A Service Account key file for the corresponding service account.
32
+
33
+ For more information on how to create a Service Account or its keys, see [Create service accounts](https://cloud.google.com/iam/docs/service-accounts-create) and [Create or delete service account keys](https://cloud.google.com/iam/docs/keys-create-delete) on Google Cloud docs.
34
+
35
+ ## Example: Loading data from GCS
36
+
37
+ Let's assume that:
38
+ * Service account key is available in the current directory, under the filename `service_account.json`.
39
+ * The bucket you want to load data from is called `my-org-bucket`
40
+ * The source file is available at `data/latest/dump.csv`
41
+ * The data needs to be saved in a DuckDB database called `local.db`
42
+ * The destination table name will be `public.latest_dump`
43
+
44
+ You can run the following command line to achieve this:
45
+
46
+ ```sh
47
+ ingestr ingest \
48
+ --source-uri "gs://?credentials_path=$PWD/service_account.json" \
49
+ --source-table "my-org-bucket/data/latest/dump.csv" \
50
+ --dest-uri "duckdb:///local.db" \
51
+ --dest-table "public.latest_dump"
52
+ ```
53
+
54
+ ## Example: Uploading data to GCS
55
+
56
+ For this example, we'll assume that:
57
+ * `records.db` is a DuckDB database.
58
+ * It has a table called `public.users`.
59
+ * The service account key is available in the current directory.
60
+
61
+ The following command demonstrates how to copy data from a local DuckDB database to GCS:
62
+ ```sh
63
+ ingestr ingest \
64
+ --source-uri 'duckdb:///records.db' \
65
+ --source-table 'public.users' \
66
+ --dest-uri "gs://?credentials_path=$PWD/service_account.json" \
67
+ --dest-table 'my-org-bucket/records'
68
+ ```
69
+
70
+ This will result in a file structure like the following:
71
+ ```
72
+ my-org-bucket/
73
+ └── records
74
+ ├── _dlt_loads
75
+ ├── _dlt_pipeline_state
76
+ ├── _dlt_version
77
+ └── users
78
+ └── <load_id>.<file_id>.parquet
79
+ ```
80
+
81
+ The value of `load_id` and `file_id` is determined at runtime. The default layout creates a folder with the same table name as the source and places the data inside a parquet file. This layout is configurable using the `layout` parameter.
82
+
83
+ For example, if you would like to create a parquet file with the same name as the source table (as opposed to a folder) you can set `layout` to `{table_name}.{ext}` in the command line above:
84
+
85
+ ```sh
86
+ ingestr ingest \
87
+ --source-uri 'duckdb:///records.db' \
88
+ --source-table 'public.users' \
89
+ --dest-uri "gs://?layout={table_name}.{ext}&credentials_path=$PWD/service_account.json" \
90
+ --dest-table 'my-org-bucket/records'
91
+ ```
92
+
93
+ Result:
94
+ ```
95
+ my-org-bucket/
96
+ └── records
97
+ ├── _dlt_loads
98
+ ├── _dlt_pipeline_state
99
+ ├── _dlt_version
100
+ └── users.parquet
101
+ ```
102
+
103
+ List of available Layout variables is available [here](https://dlthub.com/docs/dlt-ecosystem/destinations/filesystem#available-layout-placeholders)
104
+
105
+ ## Supported File Formats
106
+ `gs` source only supports loading files in the following formats:
107
+ * `csv`: Comma Separated Values
108
+ * `parquet`: [Apache Parquet](https://parquet.apache.org/) storage format.
109
+ * `jsonl`: Line delimited JSON. see [https://jsonlines.org/](https://jsonlines.org/)
110
+
111
+ ::: info NOTE
112
+ When writing to GCS, only `parquet` is supported.
113
+ :::
114
+ ## File Pattern
115
+ `ingestr` supports [glob](https://en.wikipedia.org/wiki/Glob_(programming)) like pattern matching for `gs` source.
116
+ This allows for a powerful pattern matching mechanism that allows you to specify multiple files in a single `--source-table`.
117
+
118
+ Below are some examples of path patterns, each path pattern is glob you can specify after the bucket name:
119
+
120
+ - `**/*.csv`: Retrieves all the CSV files, regardless of how deep they are within the folder structure.
121
+ - `*.csv`: Retrieves all the CSV files from the first level of a folder.
122
+ - `myFolder/**/*.jsonl`: Retrieves all the JSONL files from anywhere under `myFolder`.
123
+ - `myFolder/mySubFolder/users.parquet`: Retrieves the `users.parquet` file from `mySubFolder`.
124
+ - `employees.jsonl`: Retrieves the `employees.jsonl` file from the root level of the bucket.
125
+
126
+ ### Working with compressed files
127
+
128
+ `ingestr` automatically detects and handles gzipped files in your GCS bucket. You can load data from compressed files with the `.gz` extension without any additional configuration.
129
+
130
+ For example, to load data from a gzipped CSV file:
131
+
132
+ ```sh
133
+ ingestr ingest \
134
+ --source-uri "gs://?credentials_path=$PWD/service_account.json" \
135
+ --source-table "my-org-bucket/logs/event-data.csv.gz" \
136
+ --dest-uri "duckdb:///compressed_data.duckdb" \
137
+ --dest-table "logs.events"
138
+ ```
139
+
140
+ You can also use glob patterns to load multiple compressed files:
141
+
142
+ ```sh
143
+ ingestr ingest \
144
+ --source-uri "gs://?credentials_path=$PWD/service_account.json" \
145
+ --source-table "my-org-bucket/logs/**/*.csv.gz" \
146
+ --dest-uri "duckdb:///compressed_data.duckdb" \
147
+ --dest-table "logs.events"
148
+ ```
149
+
150
+ ### File type hinting
151
+
152
+ If your files are properly encoded but lack the correct file extension (CSV, JSONL, or Parquet), you can provide a file type hint to inform `ingestr` about the format of the files. This is done by appending a fragment identifier (`#format`) to the end of the path in your `--source-table` parameter.
153
+
154
+ For example, if you have JSONL-formatted log files stored in GCS with a non-standard extension:
155
+
156
+ ```
157
+ --source-table "my-org-bucket/logs/event-data#jsonl"
158
+ ```
159
+
160
+ This tells `ingestr` to process the files as JSONL, regardless of their actual extension.
161
+
162
+ Supported format hints include:
163
+ - `#csv` - For comma-separated values files
164
+ - `#jsonl` - For line-delimited JSON files
165
+ - `#parquet` - For Parquet format files
166
+
167
+ ::: tip
168
+ File type hinting works with `gzip` compressed files as well.
169
+ :::
@@ -543,6 +543,7 @@ def ingest(
543
543
  sql_reflection_level=sql_reflection_level.value,
544
544
  sql_limit=sql_limit,
545
545
  sql_exclude_columns=sql_exclude_columns,
546
+ extract_parallelism=extract_parallelism,
546
547
  )
547
548
 
548
549
  resource.for_each(dlt_source, lambda x: x.add_map(cast_set_to_list))
@@ -0,0 +1 @@
1
+ version = "v0.13.61"
@@ -1,3 +1,4 @@
1
+ import abc
1
2
  import base64
2
3
  import csv
3
4
  import json
@@ -9,6 +10,7 @@ from urllib.parse import parse_qs, quote, urlparse
9
10
  import dlt
10
11
  import dlt.destinations.impl.filesystem.filesystem
11
12
  from dlt.common.configuration.specs import AwsCredentials
13
+ from dlt.common.storages.configuration import FileSystemCredentials
12
14
  from dlt.destinations.impl.clickhouse.configuration import (
13
15
  ClickHouseCredentials,
14
16
  )
@@ -111,6 +113,14 @@ class BigQueryDestination:
111
113
  pass
112
114
 
113
115
 
116
+ class CrateDBDestination(GenericSqlDestination):
117
+ def dlt_dest(self, uri: str, **kwargs):
118
+ uri = uri.replace("cratedb://", "postgres://")
119
+ import dlt_cratedb.impl.cratedb.factory
120
+
121
+ return dlt_cratedb.impl.cratedb.factory.cratedb(credentials=uri, **kwargs)
122
+
123
+
114
124
  class PostgresDestination(GenericSqlDestination):
115
125
  def dlt_dest(self, uri: str, **kwargs):
116
126
  return dlt.destinations.postgres(credentials=uri, **kwargs)
@@ -386,43 +396,62 @@ class ClickhouseDestination:
386
396
  pass
387
397
 
388
398
 
389
- class S3FSClient(dlt.destinations.impl.filesystem.filesystem.FilesystemClient):
399
+ class BlobFSClient(dlt.destinations.impl.filesystem.filesystem.FilesystemClient):
390
400
  @property
391
401
  def dataset_path(self):
392
402
  # override to remove dataset path
393
403
  return self.bucket_path
394
404
 
395
405
 
396
- class S3FS(dlt.destinations.filesystem):
406
+ class BlobFS(dlt.destinations.filesystem):
397
407
  @property
398
408
  def client_class(self):
399
- return S3FSClient
409
+ return BlobFSClient
400
410
 
401
411
 
402
- class S3Destination:
412
+ class SqliteDestination(GenericSqlDestination):
403
413
  def dlt_dest(self, uri: str, **kwargs):
404
- parsed_uri = urlparse(uri)
405
- params = parse_qs(parsed_uri.query)
414
+ return dlt.destinations.sqlalchemy(credentials=uri)
406
415
 
407
- access_key_id = params.get("access_key_id", [None])[0]
408
- if access_key_id is None:
409
- raise MissingValueError("access_key_id", "S3")
416
+ def dlt_run_params(self, uri: str, table: str, **kwargs):
417
+ return {
418
+ # https://dlthub.com/docs/dlt-ecosystem/destinations/sqlalchemy#dataset-files
419
+ "dataset_name": "main",
420
+ "table_name": table,
421
+ }
410
422
 
411
- secret_access_key = params.get("secret_access_key", [None])[0]
412
- if secret_access_key is None:
413
- raise MissingValueError("secret_access_key", "S3")
414
423
 
415
- endpoint_url = params.get("endpoint_url", [None])[0]
416
- if endpoint_url is not None:
417
- parsed_endpoint = urlparse(endpoint_url)
418
- if not parsed_endpoint.scheme or not parsed_endpoint.netloc:
419
- raise ValueError("Invalid endpoint_url. Must be a valid URL.")
424
+ class MySqlDestination(GenericSqlDestination):
425
+ def dlt_dest(self, uri: str, **kwargs):
426
+ return dlt.destinations.sqlalchemy(credentials=uri)
420
427
 
421
- creds = AwsCredentials(
422
- aws_access_key_id=access_key_id,
423
- aws_secret_access_key=secret_access_key,
424
- endpoint_url=endpoint_url,
425
- )
428
+ def dlt_run_params(self, uri: str, table: str, **kwargs):
429
+ parsed = urlparse(uri)
430
+ database = parsed.path.lstrip("/")
431
+ if not database:
432
+ raise ValueError("You need to specify a database")
433
+ return {
434
+ "dataset_name": database,
435
+ "table_name": table,
436
+ }
437
+
438
+
439
+ class BlobStorageDestination(abc.ABC):
440
+ @abc.abstractmethod
441
+ def credentials(self, params: dict) -> FileSystemCredentials:
442
+ """Build credentials for the blob storage destination."""
443
+ pass
444
+
445
+ @property
446
+ @abc.abstractmethod
447
+ def protocol(self) -> str:
448
+ """The protocol used for the blob storage destination."""
449
+ pass
450
+
451
+ def dlt_dest(self, uri: str, **kwargs):
452
+ parsed_uri = urlparse(uri)
453
+ params = parse_qs(parsed_uri.query)
454
+ creds = self.credentials(params)
426
455
 
427
456
  dest_table = kwargs["dest_table"]
428
457
 
@@ -442,7 +471,7 @@ class S3Destination:
442
471
  base_path = "/".join(table_parts[:-1])
443
472
 
444
473
  opts = {
445
- "bucket_url": f"s3://{base_path}",
474
+ "bucket_url": f"{self.protocol}://{base_path}",
446
475
  "credentials": creds,
447
476
  # supresses dlt warnings about dataset name normalization.
448
477
  # we don't use dataset names in S3 so it's fine to disable this.
@@ -452,7 +481,7 @@ class S3Destination:
452
481
  if layout is not None:
453
482
  opts["layout"] = layout
454
483
 
455
- return S3FS(**opts) # type: ignore
484
+ return BlobFS(**opts) # type: ignore
456
485
 
457
486
  def validate_table(self, table: str):
458
487
  table = table.strip("/ ")
@@ -470,28 +499,56 @@ class S3Destination:
470
499
  pass
471
500
 
472
501
 
473
- class SqliteDestination(GenericSqlDestination):
474
- def dlt_dest(self, uri: str, **kwargs):
475
- return dlt.destinations.sqlalchemy(credentials=uri)
502
+ class S3Destination(BlobStorageDestination):
503
+ @property
504
+ def protocol(self) -> str:
505
+ return "s3"
476
506
 
477
- def dlt_run_params(self, uri: str, table: str, **kwargs):
478
- return {
479
- # https://dlthub.com/docs/dlt-ecosystem/destinations/sqlalchemy#dataset-files
480
- "dataset_name": "main",
481
- "table_name": table,
482
- }
507
+ def credentials(self, params: dict) -> FileSystemCredentials:
508
+ access_key_id = params.get("access_key_id", [None])[0]
509
+ if access_key_id is None:
510
+ raise MissingValueError("access_key_id", "S3")
483
511
 
512
+ secret_access_key = params.get("secret_access_key", [None])[0]
513
+ if secret_access_key is None:
514
+ raise MissingValueError("secret_access_key", "S3")
484
515
 
485
- class MySqlDestination(GenericSqlDestination):
486
- def dlt_dest(self, uri: str, **kwargs):
487
- return dlt.destinations.sqlalchemy(credentials=uri)
516
+ endpoint_url = params.get("endpoint_url", [None])[0]
517
+ if endpoint_url is not None:
518
+ parsed_endpoint = urlparse(endpoint_url)
519
+ if not parsed_endpoint.scheme or not parsed_endpoint.netloc:
520
+ raise ValueError("Invalid endpoint_url. Must be a valid URL.")
488
521
 
489
- def dlt_run_params(self, uri: str, table: str, **kwargs):
490
- parsed = urlparse(uri)
491
- database = parsed.path.lstrip("/")
492
- if not database:
493
- raise ValueError("You need to specify a database")
494
- return {
495
- "dataset_name": database,
496
- "table_name": table,
497
- }
522
+ return AwsCredentials(
523
+ aws_access_key_id=access_key_id,
524
+ aws_secret_access_key=secret_access_key,
525
+ endpoint_url=endpoint_url,
526
+ )
527
+
528
+
529
+ class GCSDestination(BlobStorageDestination):
530
+ @property
531
+ def protocol(self) -> str:
532
+ return "gs"
533
+
534
+ def credentials(self, params: dict) -> FileSystemCredentials:
535
+ """Builds GCS credentials from the provided parameters."""
536
+ credentials_path = params.get("credentials_path")
537
+ credentials_base64 = params.get("credentials_base64")
538
+ credentials_available = any(
539
+ map(
540
+ lambda x: x is not None,
541
+ [credentials_path, credentials_base64],
542
+ )
543
+ )
544
+ if credentials_available is False:
545
+ raise MissingValueError("credentials_path or credentials_base64", "GCS")
546
+
547
+ credentials = None
548
+ if credentials_path:
549
+ with open(credentials_path[0], "r") as f:
550
+ credentials = json.load(f)
551
+ else:
552
+ credentials = json.loads(base64.b64decode(credentials_base64[0]).decode()) # type: ignore
553
+
554
+ return credentials
@@ -7,9 +7,11 @@ from ingestr.src.destinations import (
7
7
  AthenaDestination,
8
8
  BigQueryDestination,
9
9
  ClickhouseDestination,
10
+ CrateDBDestination,
10
11
  CsvDestination,
11
12
  DatabricksDestination,
12
13
  DuckDBDestination,
14
+ GCSDestination,
13
15
  MsSQLDestination,
14
16
  MySqlDestination,
15
17
  PostgresDestination,
@@ -181,6 +183,7 @@ class SourceDestinationFactory:
181
183
  }
182
184
  destinations: Dict[str, Type[DestinationProtocol]] = {
183
185
  "bigquery": BigQueryDestination,
186
+ "cratedb": CrateDBDestination,
184
187
  "databricks": DatabricksDestination,
185
188
  "duckdb": DuckDBDestination,
186
189
  "mssql": MsSQLDestination,
@@ -197,6 +200,7 @@ class SourceDestinationFactory:
197
200
  "clickhouse+native": ClickhouseDestination,
198
201
  "clickhouse": ClickhouseDestination,
199
202
  "s3": S3Destination,
203
+ "gs": GCSDestination,
200
204
  "sqlite": SqliteDestination,
201
205
  "mysql": MySqlDestination,
202
206
  "mysql+pymysql": MySqlDestination,
@@ -73,7 +73,7 @@ def pulse_source(
73
73
  "write_disposition": "merge",
74
74
  "primary_key": "date",
75
75
  },
76
- "resources": resources, # type:ignore
76
+ "resources": resources, # type:ignore
77
77
  }
78
78
  res = rest_api_resources(config)
79
79
  if metric == "net_loss":
@@ -737,6 +737,7 @@ class StripeAnalyticsSource:
737
737
  endpoint,
738
738
  ],
739
739
  stripe_secret_key=api_key[0],
740
+ max_workers=kwargs.get("extract_parallelism", 4),
740
741
  ).with_resources(endpoint)
741
742
 
742
743
  raise ValueError(
@@ -1884,7 +1885,7 @@ class GCSSource:
1884
1885
  endpoint = blob.parse_endpoint(path_to_file)
1885
1886
  except blob.UnsupportedEndpointError:
1886
1887
  raise ValueError(
1887
- "S3 Source only supports specific formats files: csv, jsonl, parquet"
1888
+ "GCS Source only supports specific formats files: csv, jsonl, parquet"
1888
1889
  )
1889
1890
  except Exception as e:
1890
1891
  raise ValueError(
@@ -10,7 +10,6 @@ from pendulum import DateTime
10
10
  from .helpers import (
11
11
  async_parallel_pagination,
12
12
  pagination,
13
- parallel_pagination,
14
13
  transform_date,
15
14
  )
16
15
 
@@ -55,53 +54,13 @@ def stripe_source(
55
54
  )(endpoint)
56
55
 
57
56
 
58
- @dlt.source(max_table_nesting=0)
59
- def parallel_stripe_source(
60
- endpoints: Tuple[str, ...],
61
- stripe_secret_key: str = dlt.secrets.value,
62
- start_date: Optional[DateTime] = None,
63
- end_date: Optional[DateTime] = None,
64
- max_workers: int = 12,
65
- ) -> Iterable[DltResource]:
66
- """
67
- Retrieves data from the Stripe API for the specified endpoints using parallel pagination.
68
-
69
- This source divides the date range across multiple workers to fetch data in parallel,
70
- which can significantly speed up data retrieval for large date ranges.
71
-
72
- Args:
73
- endpoints (Tuple[str, ...]): A tuple of endpoint names to retrieve data from.
74
- stripe_secret_key (str): The API access token for authentication. Defaults to the value in the `dlt.secrets` object.
75
- start_date (Optional[DateTime]): An optional start date to limit the data retrieved. Format: datetime(YYYY, MM, DD). Required for parallel processing.
76
- end_date (Optional[DateTime]): An optional end date to limit the data retrieved. Format: datetime(YYYY, MM, DD). Required for parallel processing.
77
- max_workers (int): Maximum number of worker threads for parallel fetching. Defaults to 4.
78
-
79
- Returns:
80
- Iterable[DltResource]: Resources with data that was created during the period greater than or equal to 'start_date' and less than 'end_date'.
81
- """
82
- stripe.api_key = stripe_secret_key
83
- stripe.api_version = "2022-11-15"
84
-
85
- def parallel_stripe_resource(
86
- endpoint: str,
87
- ) -> Generator[Dict[Any, Any], Any, None]:
88
- yield from parallel_pagination(endpoint, start_date, end_date, max_workers)
89
-
90
- for endpoint in endpoints:
91
- yield dlt.resource(
92
- parallel_stripe_resource,
93
- name=endpoint,
94
- write_disposition="replace",
95
- )(endpoint)
96
-
97
-
98
57
  @dlt.source(max_table_nesting=0)
99
58
  def async_stripe_source(
100
59
  endpoints: Tuple[str, ...],
101
60
  stripe_secret_key: str = dlt.secrets.value,
102
61
  start_date: Optional[DateTime] = None,
103
62
  end_date: Optional[DateTime] = None,
104
- max_workers: int = 40,
63
+ max_workers: int = 4,
105
64
  rate_limit_delay: float = 0.03,
106
65
  ) -> Iterable[DltResource]:
107
66
  """