MindsDB 25.5.3.0__py3-none-any.whl → 25.5.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (310) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +127 -79
  3. mindsdb/api/a2a/__init__.py +0 -0
  4. mindsdb/api/a2a/__main__.py +114 -0
  5. mindsdb/api/a2a/a2a_client.py +439 -0
  6. mindsdb/api/a2a/agent.py +308 -0
  7. mindsdb/api/a2a/common/__init__.py +0 -0
  8. mindsdb/api/a2a/common/client/__init__.py +4 -0
  9. mindsdb/api/a2a/common/client/card_resolver.py +21 -0
  10. mindsdb/api/a2a/common/client/client.py +86 -0
  11. mindsdb/api/a2a/common/server/__init__.py +4 -0
  12. mindsdb/api/a2a/common/server/server.py +164 -0
  13. mindsdb/api/a2a/common/server/task_manager.py +287 -0
  14. mindsdb/api/a2a/common/server/utils.py +28 -0
  15. mindsdb/api/a2a/common/types.py +365 -0
  16. mindsdb/api/a2a/constants.py +9 -0
  17. mindsdb/api/a2a/run_a2a.py +129 -0
  18. mindsdb/api/a2a/task_manager.py +594 -0
  19. mindsdb/api/executor/command_executor.py +47 -27
  20. mindsdb/api/executor/datahub/classes/response.py +5 -2
  21. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +39 -72
  22. mindsdb/api/executor/planner/query_planner.py +10 -1
  23. mindsdb/api/executor/sql_query/result_set.py +185 -52
  24. mindsdb/api/executor/sql_query/sql_query.py +1 -1
  25. mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +9 -12
  26. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +8 -10
  27. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +5 -44
  28. mindsdb/api/executor/sql_query/steps/insert_step.py +24 -15
  29. mindsdb/api/executor/sql_query/steps/join_step.py +1 -1
  30. mindsdb/api/executor/sql_query/steps/project_step.py +1 -1
  31. mindsdb/api/executor/sql_query/steps/sql_steps.py +1 -1
  32. mindsdb/api/executor/sql_query/steps/subselect_step.py +4 -8
  33. mindsdb/api/executor/sql_query/steps/union_step.py +1 -3
  34. mindsdb/api/http/initialize.py +99 -83
  35. mindsdb/api/http/namespaces/analysis.py +3 -3
  36. mindsdb/api/http/namespaces/file.py +8 -2
  37. mindsdb/api/http/namespaces/sql.py +13 -27
  38. mindsdb/api/mcp/start.py +42 -5
  39. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packet.py +0 -1
  40. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +52 -19
  41. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +8 -10
  42. mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +54 -38
  43. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +82 -115
  44. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +351 -0
  45. mindsdb/api/postgres/postgres_proxy/executor/executor.py +1 -1
  46. mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +5 -6
  47. mindsdb/integrations/handlers/altibase_handler/altibase_handler.py +26 -27
  48. mindsdb/integrations/handlers/altibase_handler/connection_args.py +13 -13
  49. mindsdb/integrations/handlers/altibase_handler/tests/test_altibase_handler.py +8 -8
  50. mindsdb/integrations/handlers/altibase_handler/tests/test_altibase_handler_dsn.py +13 -13
  51. mindsdb/integrations/handlers/anthropic_handler/__init__.py +2 -2
  52. mindsdb/integrations/handlers/anthropic_handler/anthropic_handler.py +1 -3
  53. mindsdb/integrations/handlers/aurora_handler/aurora_handler.py +1 -0
  54. mindsdb/integrations/handlers/autosklearn_handler/autosklearn_handler.py +1 -1
  55. mindsdb/integrations/handlers/autosklearn_handler/config.py +0 -1
  56. mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +1 -1
  57. mindsdb/integrations/handlers/bigquery_handler/tests/test_bigquery_handler.py +1 -1
  58. mindsdb/integrations/handlers/binance_handler/binance_handler.py +1 -0
  59. mindsdb/integrations/handlers/binance_handler/binance_tables.py +3 -4
  60. mindsdb/integrations/handlers/byom_handler/__init__.py +0 -1
  61. mindsdb/integrations/handlers/ckan_handler/ckan_handler.py +3 -0
  62. mindsdb/integrations/handlers/clickhouse_handler/__init__.py +1 -1
  63. mindsdb/integrations/handlers/cloud_spanner_handler/tests/test_cloud_spanner_handler.py +0 -2
  64. mindsdb/integrations/handlers/cloud_sql_handler/cloud_sql_handler.py +0 -1
  65. mindsdb/integrations/handlers/cohere_handler/__init__.py +1 -1
  66. mindsdb/integrations/handlers/cohere_handler/cohere_handler.py +11 -13
  67. mindsdb/integrations/handlers/confluence_handler/confluence_tables.py +6 -0
  68. mindsdb/integrations/handlers/databend_handler/connection_args.py +1 -1
  69. mindsdb/integrations/handlers/databend_handler/databend_handler.py +4 -4
  70. mindsdb/integrations/handlers/databend_handler/tests/__init__.py +0 -1
  71. mindsdb/integrations/handlers/databend_handler/tests/test_databend_handler.py +1 -1
  72. mindsdb/integrations/handlers/derby_handler/connection_args.py +1 -1
  73. mindsdb/integrations/handlers/derby_handler/derby_handler.py +14 -22
  74. mindsdb/integrations/handlers/derby_handler/tests/test_derby_handler.py +6 -6
  75. mindsdb/integrations/handlers/discord_handler/discord_handler.py +5 -5
  76. mindsdb/integrations/handlers/discord_handler/discord_tables.py +3 -3
  77. mindsdb/integrations/handlers/discord_handler/tests/test_discord.py +5 -3
  78. mindsdb/integrations/handlers/dockerhub_handler/dockerhub.py +3 -3
  79. mindsdb/integrations/handlers/dockerhub_handler/dockerhub_handler.py +2 -2
  80. mindsdb/integrations/handlers/dockerhub_handler/dockerhub_tables.py +57 -54
  81. mindsdb/integrations/handlers/dremio_handler/__init__.py +2 -2
  82. mindsdb/integrations/handlers/druid_handler/__init__.py +1 -1
  83. mindsdb/integrations/handlers/druid_handler/druid_handler.py +2 -2
  84. mindsdb/integrations/handlers/edgelessdb_handler/tests/test_edgelessdb_handler.py +9 -9
  85. mindsdb/integrations/handlers/email_handler/email_client.py +1 -1
  86. mindsdb/integrations/handlers/email_handler/email_ingestor.py +1 -1
  87. mindsdb/integrations/handlers/email_handler/email_tables.py +0 -1
  88. mindsdb/integrations/handlers/email_handler/settings.py +0 -1
  89. mindsdb/integrations/handlers/eventstoredb_handler/eventstoredb_handler.py +2 -1
  90. mindsdb/integrations/handlers/firebird_handler/firebird_handler.py +1 -1
  91. mindsdb/integrations/handlers/flaml_handler/flaml_handler.py +9 -9
  92. mindsdb/integrations/handlers/frappe_handler/frappe_client.py +5 -5
  93. mindsdb/integrations/handlers/frappe_handler/frappe_handler.py +6 -5
  94. mindsdb/integrations/handlers/frappe_handler/frappe_tables.py +2 -2
  95. mindsdb/integrations/handlers/github_handler/connection_args.py +2 -2
  96. mindsdb/integrations/handlers/github_handler/github_handler.py +1 -8
  97. mindsdb/integrations/handlers/github_handler/github_tables.py +13 -24
  98. mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py +2 -1
  99. mindsdb/integrations/handlers/gitlab_handler/gitlab_tables.py +1 -4
  100. mindsdb/integrations/handlers/gmail_handler/gmail_handler.py +6 -13
  101. mindsdb/integrations/handlers/google_books_handler/google_books_handler.py +2 -1
  102. mindsdb/integrations/handlers/google_books_handler/google_books_tables.py +0 -3
  103. mindsdb/integrations/handlers/google_calendar_handler/google_calendar_handler.py +4 -4
  104. mindsdb/integrations/handlers/google_calendar_handler/google_calendar_tables.py +2 -6
  105. mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_handler.py +3 -2
  106. mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_tables.py +0 -3
  107. mindsdb/integrations/handlers/google_fit_handler/google_fit_handler.py +10 -12
  108. mindsdb/integrations/handlers/google_fit_handler/google_fit_tables.py +11 -13
  109. mindsdb/integrations/handlers/google_search_handler/google_search_handler.py +2 -1
  110. mindsdb/integrations/handlers/google_search_handler/google_search_tables.py +0 -3
  111. mindsdb/integrations/handlers/groq_handler/__init__.py +3 -3
  112. mindsdb/integrations/handlers/hackernews_handler/hn_handler.py +5 -7
  113. mindsdb/integrations/handlers/hackernews_handler/hn_table.py +6 -7
  114. mindsdb/integrations/handlers/hive_handler/tests/test_hive_handler.py +1 -1
  115. mindsdb/integrations/handlers/hsqldb_handler/connection_args.py +6 -6
  116. mindsdb/integrations/handlers/hsqldb_handler/hsqldb_handler.py +4 -3
  117. mindsdb/integrations/handlers/huggingface_api_handler/exceptions.py +1 -1
  118. mindsdb/integrations/handlers/huggingface_api_handler/huggingface_api_handler.py +1 -8
  119. mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +6 -6
  120. mindsdb/integrations/handlers/huggingface_handler/requirements.txt +1 -1
  121. mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +1 -1
  122. mindsdb/integrations/handlers/ignite_handler/ignite_handler.py +2 -1
  123. mindsdb/integrations/handlers/impala_handler/impala_handler.py +9 -12
  124. mindsdb/integrations/handlers/impala_handler/tests/test_impala_handler.py +11 -11
  125. mindsdb/integrations/handlers/influxdb_handler/influxdb_handler.py +10 -13
  126. mindsdb/integrations/handlers/influxdb_handler/influxdb_tables.py +20 -20
  127. mindsdb/integrations/handlers/informix_handler/__about__.py +8 -8
  128. mindsdb/integrations/handlers/informix_handler/__init__.py +12 -5
  129. mindsdb/integrations/handlers/informix_handler/informix_handler.py +99 -133
  130. mindsdb/integrations/handlers/informix_handler/tests/test_informix_handler.py +13 -11
  131. mindsdb/integrations/handlers/ingres_handler/__about__.py +0 -1
  132. mindsdb/integrations/handlers/ingres_handler/ingres_handler.py +1 -0
  133. mindsdb/integrations/handlers/jira_handler/jira_handler.py +4 -4
  134. mindsdb/integrations/handlers/jira_handler/jira_tables.py +9 -9
  135. mindsdb/integrations/handlers/kinetica_handler/__init__.py +0 -1
  136. mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +4 -4
  137. mindsdb/integrations/handlers/langchain_handler/tools.py +9 -10
  138. mindsdb/integrations/handlers/leonardoai_handler/__init__.py +1 -1
  139. mindsdb/integrations/handlers/lightwood_handler/functions.py +2 -2
  140. mindsdb/integrations/handlers/lightwood_handler/lightwood_handler.py +0 -1
  141. mindsdb/integrations/handlers/lightwood_handler/tests/test_lightwood_handler.py +11 -11
  142. mindsdb/integrations/handlers/llama_index_handler/llama_index_handler.py +4 -4
  143. mindsdb/integrations/handlers/llama_index_handler/settings.py +10 -9
  144. mindsdb/integrations/handlers/materialize_handler/tests/test_materialize_handler.py +8 -10
  145. mindsdb/integrations/handlers/matrixone_handler/matrixone_handler.py +4 -4
  146. mindsdb/integrations/handlers/matrixone_handler/tests/test_matrixone_handler.py +8 -9
  147. mindsdb/integrations/handlers/maxdb_handler/connection_args.py +25 -25
  148. mindsdb/integrations/handlers/maxdb_handler/maxdb_handler.py +1 -0
  149. mindsdb/integrations/handlers/mediawiki_handler/mediawiki_handler.py +3 -2
  150. mindsdb/integrations/handlers/mediawiki_handler/mediawiki_tables.py +1 -1
  151. mindsdb/integrations/handlers/mendeley_handler/__about__.py +1 -1
  152. mindsdb/integrations/handlers/mendeley_handler/__init__.py +2 -2
  153. mindsdb/integrations/handlers/mendeley_handler/mendeley_handler.py +48 -56
  154. mindsdb/integrations/handlers/mendeley_handler/mendeley_tables.py +24 -29
  155. mindsdb/integrations/handlers/mendeley_handler/tests/test_mendeley_handler.py +19 -17
  156. mindsdb/integrations/handlers/merlion_handler/merlion_handler.py +5 -4
  157. mindsdb/integrations/handlers/minds_endpoint_handler/__init__.py +3 -3
  158. mindsdb/integrations/handlers/mlflow_handler/mlflow_handler.py +58 -36
  159. mindsdb/integrations/handlers/monetdb_handler/__about__.py +8 -8
  160. mindsdb/integrations/handlers/monetdb_handler/__init__.py +15 -5
  161. mindsdb/integrations/handlers/monetdb_handler/connection_args.py +17 -18
  162. mindsdb/integrations/handlers/monetdb_handler/monetdb_handler.py +40 -57
  163. mindsdb/integrations/handlers/monetdb_handler/tests/test_monetdb_handler.py +7 -8
  164. mindsdb/integrations/handlers/monetdb_handler/utils/monet_get_id.py +13 -14
  165. mindsdb/integrations/handlers/monkeylearn_handler/__about__.py +1 -1
  166. mindsdb/integrations/handlers/monkeylearn_handler/__init__.py +1 -1
  167. mindsdb/integrations/handlers/monkeylearn_handler/monkeylearn_handler.py +2 -5
  168. mindsdb/integrations/handlers/ms_one_drive_handler/ms_graph_api_one_drive_client.py +1 -0
  169. mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_handler.py +1 -1
  170. mindsdb/integrations/handlers/ms_teams_handler/ms_graph_api_teams_client.py +23 -23
  171. mindsdb/integrations/handlers/ms_teams_handler/ms_teams_handler.py +3 -3
  172. mindsdb/integrations/handlers/ms_teams_handler/ms_teams_tables.py +10 -5
  173. mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +73 -8
  174. mindsdb/integrations/handlers/mysql_handler/__about__.py +8 -8
  175. mindsdb/integrations/handlers/mysql_handler/__init__.py +15 -5
  176. mindsdb/integrations/handlers/mysql_handler/connection_args.py +43 -47
  177. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +101 -34
  178. mindsdb/integrations/handlers/mysql_handler/settings.py +15 -13
  179. mindsdb/integrations/handlers/neuralforecast_handler/neuralforecast_handler.py +1 -1
  180. mindsdb/integrations/handlers/newsapi_handler/newsapi_handler.py +1 -1
  181. mindsdb/integrations/handlers/newsapi_handler/tests/test_newsapi_handler.py +4 -4
  182. mindsdb/integrations/handlers/nuo_jdbc_handler/connection_args.py +2 -2
  183. mindsdb/integrations/handlers/nuo_jdbc_handler/nuo_jdbc_handler.py +28 -36
  184. mindsdb/integrations/handlers/nuo_jdbc_handler/tests/test_nuo_handler.py +5 -5
  185. mindsdb/integrations/handlers/oceanbase_handler/oceanbase_handler.py +0 -1
  186. mindsdb/integrations/handlers/oceanbase_handler/tests/test_oceanbase_handler.py +8 -10
  187. mindsdb/integrations/handlers/ollama_handler/ollama_handler.py +3 -3
  188. mindsdb/integrations/handlers/opengauss_handler/tests/test_opengauss_handler.py +1 -2
  189. mindsdb/integrations/handlers/openstreetmap_handler/__init__.py +7 -7
  190. mindsdb/integrations/handlers/oracle_handler/connection_args.py +6 -0
  191. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +77 -11
  192. mindsdb/integrations/handlers/orioledb_handler/tests/test_orioledb_handler.py +8 -10
  193. mindsdb/integrations/handlers/palm_handler/__about__.py +1 -1
  194. mindsdb/integrations/handlers/palm_handler/__init__.py +1 -1
  195. mindsdb/integrations/handlers/palm_handler/palm_handler.py +1 -3
  196. mindsdb/integrations/handlers/paypal_handler/paypal_handler.py +2 -2
  197. mindsdb/integrations/handlers/paypal_handler/paypal_tables.py +15 -14
  198. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +53 -10
  199. mindsdb/integrations/handlers/phoenix_handler/__init__.py +1 -1
  200. mindsdb/integrations/handlers/phoenix_handler/phoenix_handler.py +1 -0
  201. mindsdb/integrations/handlers/pinot_handler/__init__.py +1 -1
  202. mindsdb/integrations/handlers/pinot_handler/pinot_handler.py +3 -2
  203. mindsdb/integrations/handlers/plaid_handler/plaid_handler.py +13 -13
  204. mindsdb/integrations/handlers/plaid_handler/plaid_tables.py +10 -12
  205. mindsdb/integrations/handlers/plaid_handler/utils.py +4 -6
  206. mindsdb/integrations/handlers/planetscale_handler/planetscale_handler.py +1 -4
  207. mindsdb/integrations/handlers/portkey_handler/__init__.py +2 -2
  208. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +105 -24
  209. mindsdb/integrations/handlers/postgres_handler/tests/test_postgres_handler.py +11 -6
  210. mindsdb/integrations/handlers/questdb_handler/questdb_handler.py +1 -2
  211. mindsdb/integrations/handlers/questdb_handler/tests/test_questdb_handler.py +2 -3
  212. mindsdb/integrations/handlers/quickbooks_handler/quickbooks_handler.py +6 -8
  213. mindsdb/integrations/handlers/quickbooks_handler/quickbooks_table.py +10 -10
  214. mindsdb/integrations/handlers/rag_handler/ingest.py +2 -2
  215. mindsdb/integrations/handlers/rag_handler/rag_handler.py +1 -1
  216. mindsdb/integrations/handlers/rag_handler/settings.py +1 -1
  217. mindsdb/integrations/handlers/reddit_handler/reddit_handler.py +2 -7
  218. mindsdb/integrations/handlers/reddit_handler/reddit_tables.py +2 -3
  219. mindsdb/integrations/handlers/replicate_handler/replicate_handler.py +6 -6
  220. mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_handler.py +1 -2
  221. mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_tables.py +0 -3
  222. mindsdb/integrations/handlers/rockset_handler/connection_args.py +14 -14
  223. mindsdb/integrations/handlers/rockset_handler/tests/test_rockset_handler.py +1 -0
  224. mindsdb/integrations/handlers/scylla_handler/scylla_handler.py +6 -5
  225. mindsdb/integrations/handlers/sendinblue_handler/sendinblue_handler.py +2 -1
  226. mindsdb/integrations/handlers/sendinblue_handler/sendinblue_tables.py +16 -16
  227. mindsdb/integrations/handlers/sentence_transformers_handler/__init__.py +1 -1
  228. mindsdb/integrations/handlers/sheets_handler/connection_args.py +1 -1
  229. mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +7 -6
  230. mindsdb/integrations/handlers/shopify_handler/shopify_tables.py +38 -41
  231. mindsdb/integrations/handlers/singlestore_handler/__about__.py +1 -1
  232. mindsdb/integrations/handlers/singlestore_handler/__init__.py +0 -1
  233. mindsdb/integrations/handlers/singlestore_handler/singlestore_handler.py +1 -0
  234. mindsdb/integrations/handlers/singlestore_handler/tests/test_singlestore_handler.py +3 -3
  235. mindsdb/integrations/handlers/slack_handler/__init__.py +3 -3
  236. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +100 -6
  237. mindsdb/integrations/handlers/solr_handler/connection_args.py +7 -7
  238. mindsdb/integrations/handlers/solr_handler/solr_handler.py +2 -1
  239. mindsdb/integrations/handlers/solr_handler/tests/test_solr_handler.py +2 -1
  240. mindsdb/integrations/handlers/sqlany_handler/sqlany_handler.py +3 -2
  241. mindsdb/integrations/handlers/sqlite_handler/sqlite_handler.py +1 -0
  242. mindsdb/integrations/handlers/sqreamdb_handler/connection_args.py +1 -1
  243. mindsdb/integrations/handlers/sqreamdb_handler/sqreamdb_handler.py +15 -20
  244. mindsdb/integrations/handlers/sqreamdb_handler/tests/test_sqreamdb_handler.py +4 -4
  245. mindsdb/integrations/handlers/stabilityai_handler/__init__.py +1 -1
  246. mindsdb/integrations/handlers/starrocks_handler/starrocks_handler.py +0 -1
  247. mindsdb/integrations/handlers/starrocks_handler/tests/test_starrocks_handler.py +8 -10
  248. mindsdb/integrations/handlers/statsforecast_handler/statsforecast_handler.py +2 -2
  249. mindsdb/integrations/handlers/strava_handler/strava_handler.py +4 -8
  250. mindsdb/integrations/handlers/strava_handler/strava_tables.py +22 -30
  251. mindsdb/integrations/handlers/stripe_handler/stripe_handler.py +3 -2
  252. mindsdb/integrations/handlers/stripe_handler/stripe_tables.py +11 -27
  253. mindsdb/integrations/handlers/supabase_handler/tests/test_supabase_handler.py +1 -1
  254. mindsdb/integrations/handlers/surrealdb_handler/surrealdb_handler.py +4 -4
  255. mindsdb/integrations/handlers/tdengine_handler/tdengine_handler.py +25 -27
  256. mindsdb/integrations/handlers/tdengine_handler/tests/test_tdengine_handler.py +8 -8
  257. mindsdb/integrations/handlers/tidb_handler/tests/test_tidb_handler.py +1 -2
  258. mindsdb/integrations/handlers/timegpt_handler/timegpt_handler.py +5 -5
  259. mindsdb/integrations/handlers/tpot_handler/tpot_handler.py +21 -26
  260. mindsdb/integrations/handlers/trino_handler/trino_handler.py +14 -14
  261. mindsdb/integrations/handlers/twitter_handler/twitter_handler.py +2 -4
  262. mindsdb/integrations/handlers/unify_handler/tests/test_unify_handler.py +7 -8
  263. mindsdb/integrations/handlers/unify_handler/unify_handler.py +9 -9
  264. mindsdb/integrations/handlers/vertex_handler/vertex_client.py +1 -1
  265. mindsdb/integrations/handlers/vertica_handler/tests/test_vertica_handler.py +11 -11
  266. mindsdb/integrations/handlers/vertica_handler/vertica_handler.py +11 -14
  267. mindsdb/integrations/handlers/vitess_handler/tests/test_vitess_handler.py +9 -11
  268. mindsdb/integrations/handlers/vitess_handler/vitess_handler.py +0 -1
  269. mindsdb/integrations/handlers/web_handler/web_handler.py +1 -0
  270. mindsdb/integrations/handlers/whatsapp_handler/__init__.py +3 -3
  271. mindsdb/integrations/handlers/writer_handler/evaluate.py +1 -1
  272. mindsdb/integrations/handlers/writer_handler/settings.py +0 -1
  273. mindsdb/integrations/handlers/writer_handler/writer_handler.py +1 -0
  274. mindsdb/integrations/handlers/youtube_handler/youtube_handler.py +5 -5
  275. mindsdb/integrations/handlers/youtube_handler/youtube_tables.py +26 -27
  276. mindsdb/integrations/handlers/yugabyte_handler/tests/test_yugabyte_handler.py +3 -3
  277. mindsdb/integrations/handlers/yugabyte_handler/yugabyte_handler.py +0 -6
  278. mindsdb/integrations/libs/response.py +67 -52
  279. mindsdb/integrations/libs/vectordatabase_handler.py +6 -0
  280. mindsdb/integrations/utilities/handler_utils.py +15 -3
  281. mindsdb/integrations/utilities/handlers/api_utilities/__init__.py +0 -1
  282. mindsdb/integrations/utilities/handlers/auth_utilities/__init__.py +0 -2
  283. mindsdb/integrations/utilities/utils.py +3 -3
  284. mindsdb/interfaces/agents/agents_controller.py +164 -1
  285. mindsdb/interfaces/agents/constants.py +15 -0
  286. mindsdb/interfaces/agents/langchain_agent.py +16 -4
  287. mindsdb/interfaces/agents/mindsdb_database_agent.py +101 -2
  288. mindsdb/interfaces/knowledge_base/controller.py +25 -0
  289. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +13 -10
  290. mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +434 -0
  291. mindsdb/interfaces/knowledge_base/preprocessing/models.py +54 -0
  292. mindsdb/interfaces/query_context/context_controller.py +66 -10
  293. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +190 -0
  294. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +92 -0
  295. mindsdb/interfaces/skills/skill_tool.py +202 -57
  296. mindsdb/interfaces/skills/sql_agent.py +205 -17
  297. mindsdb/interfaces/storage/fs.py +1 -0
  298. mindsdb/interfaces/variables/__init__.py +0 -0
  299. mindsdb/interfaces/variables/variables_controller.py +97 -0
  300. mindsdb/migrations/env.py +5 -7
  301. mindsdb/migrations/migrate.py +47 -7
  302. mindsdb/migrations/versions/2025-05-21_9f150e4f9a05_checkpoint_1.py +360 -0
  303. mindsdb/utilities/config.py +331 -219
  304. mindsdb/utilities/starters.py +13 -0
  305. {mindsdb-25.5.3.0.dist-info → mindsdb-25.5.4.0.dist-info}/METADATA +641 -695
  306. {mindsdb-25.5.3.0.dist-info → mindsdb-25.5.4.0.dist-info}/RECORD +309 -288
  307. {mindsdb-25.5.3.0.dist-info → mindsdb-25.5.4.0.dist-info}/WHEEL +1 -1
  308. mindsdb/integrations/handlers/monkeylearn_handler/requirements.txt +0 -1
  309. {mindsdb-25.5.3.0.dist-info → mindsdb-25.5.4.0.dist-info}/licenses/LICENSE +0 -0
  310. {mindsdb-25.5.3.0.dist-info → mindsdb-25.5.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,434 @@
1
+ from typing import List, Dict, Any, Optional
2
+ import json
3
+ import pandas as pd
4
+ import ast
5
+
6
+ from mindsdb.interfaces.knowledge_base.preprocessing.models import (
7
+ Document,
8
+ ProcessedChunk,
9
+ JSONChunkingConfig
10
+ )
11
+ from mindsdb.interfaces.knowledge_base.preprocessing.document_preprocessor import DocumentPreprocessor
12
+ from mindsdb.utilities import log
13
+
14
+ # Set up logger
15
+ logger = log.getLogger(__name__)
16
+
17
+
18
+ class JSONChunkingPreprocessor(DocumentPreprocessor):
19
+ """JSON chunking preprocessor for handling JSON data structures"""
20
+
21
+ def __init__(self, config: Optional[JSONChunkingConfig] = None):
22
+ """Initialize with JSON chunking configuration"""
23
+ super().__init__()
24
+ self.config = config or JSONChunkingConfig()
25
+ # No need for a text splitter here as we'll chunk by JSON structure
26
+
27
+ def process_documents(self, documents: List[Document]) -> List[ProcessedChunk]:
28
+ """Process JSON documents into chunks
29
+
30
+ Args:
31
+ documents: List of documents containing JSON content
32
+
33
+ Returns:
34
+ List of processed chunks
35
+ """
36
+ all_chunks = []
37
+
38
+ for doc in documents:
39
+ try:
40
+ # Parse document content into a Python object
41
+ json_data = self._parse_document_content(doc)
42
+ if json_data is None:
43
+ # Handle parsing failure
44
+ error_message = "Content is neither valid JSON nor a valid Python literal."
45
+ error_chunk = self._create_error_chunk(doc, error_message)
46
+ all_chunks.append(error_chunk)
47
+ continue # Skip to next document
48
+
49
+ # Process the JSON data based on its structure
50
+ chunks = self._process_json_data(json_data, doc)
51
+ all_chunks.extend(chunks)
52
+ except Exception as e:
53
+ logger.error(f"Error processing document {doc.id}: {e}")
54
+ error_chunk = self._create_error_chunk(doc, str(e))
55
+ all_chunks.append(error_chunk)
56
+
57
+ return all_chunks
58
+
59
+ def _parse_document_content(self, doc: Document) -> Optional[Any]:
60
+ """Parse document content into a Python object
61
+
62
+ Args:
63
+ doc: Document with content to parse
64
+
65
+ Returns:
66
+ Parsed content as a Python object or None if parsing failed
67
+ """
68
+ # If content is not a string, assume it's already a Python object
69
+ if not isinstance(doc.content, str):
70
+ return doc.content
71
+
72
+ # Try to parse as JSON first
73
+ try:
74
+ return json.loads(doc.content)
75
+ except json.JSONDecodeError:
76
+ # If JSON parsing fails, try as Python literal
77
+ try:
78
+ return ast.literal_eval(doc.content)
79
+ except (SyntaxError, ValueError) as e:
80
+ logger.error(f"Error parsing content for document {doc.id}: {e}")
81
+ # We'll create the error chunk in the main process_documents method
82
+ return None
83
+
84
+ def _process_json_data(self, json_data: Any, doc: Document) -> List[ProcessedChunk]:
85
+ """Process JSON data based on its structure
86
+
87
+ Args:
88
+ json_data: Parsed JSON data as a Python object
89
+ doc: Original document
90
+
91
+ Returns:
92
+ List of processed chunks
93
+ """
94
+ if isinstance(json_data, list):
95
+ # List of objects - chunk by object
96
+ return self._process_json_list(json_data, doc)
97
+ elif isinstance(json_data, dict):
98
+ # Single object - chunk according to config
99
+ if self.config.chunk_by_object:
100
+ return [self._create_chunk_from_dict(json_data, doc, 0, 1)]
101
+ else:
102
+ return self._process_json_dict(json_data, doc)
103
+ else:
104
+ # Primitive value - create a single chunk
105
+ return [self._create_chunk_from_primitive(json_data, doc)]
106
+
107
+ def _create_error_chunk(self, doc: Document, error_message: str) -> ProcessedChunk:
108
+ """Create a chunk containing error information
109
+
110
+ Args:
111
+ doc: Original document
112
+ error_message: Error message to include in the chunk
113
+
114
+ Returns:
115
+ ProcessedChunk with error information
116
+ """
117
+ return ProcessedChunk(
118
+ id=f"{doc.id}_error",
119
+ content=f"Error processing document: {error_message}",
120
+ metadata=self._prepare_chunk_metadata(doc.id, 0, doc.metadata)
121
+ )
122
+
123
+ def _process_json_list(self, json_list: List, doc: Document) -> List[ProcessedChunk]:
124
+ """Process a list of JSON objects into chunks"""
125
+ chunks = []
126
+ total_objects = len(json_list)
127
+
128
+ for i, item in enumerate(json_list):
129
+ if isinstance(item, dict):
130
+ chunk = self._create_chunk_from_dict(item, doc, i, total_objects)
131
+ chunks.append(chunk)
132
+ elif isinstance(item, list):
133
+ # Handle nested lists by converting to string representation
134
+ chunk = self._create_chunk_from_primitive(
135
+ json.dumps(item),
136
+ doc,
137
+ chunk_index=i,
138
+ total_chunks=total_objects
139
+ )
140
+ chunks.append(chunk)
141
+ else:
142
+ # Handle primitive values
143
+ chunk = self._create_chunk_from_primitive(
144
+ item,
145
+ doc,
146
+ chunk_index=i,
147
+ total_chunks=total_objects
148
+ )
149
+ chunks.append(chunk)
150
+
151
+ return chunks
152
+
153
+ def _process_json_dict(self, json_dict: Dict, doc: Document) -> List[ProcessedChunk]:
154
+ """Process a single JSON object into chunks by fields"""
155
+ chunks = []
156
+
157
+ # Ensure we're working with a dictionary
158
+ if isinstance(json_dict, str):
159
+ try:
160
+ json_dict = json.loads(json_dict)
161
+ except json.JSONDecodeError:
162
+ logger.error(f"Error parsing JSON string: {json_dict[:100]}...")
163
+ return [self._create_error_chunk(doc, "Invalid JSON string")]
164
+
165
+ # Filter fields based on include/exclude lists
166
+ fields_to_process = {}
167
+ for key, value in json_dict.items():
168
+ if self.config.include_fields and key not in self.config.include_fields:
169
+ continue
170
+ if key in self.config.exclude_fields:
171
+ continue
172
+ fields_to_process[key] = value
173
+
174
+ # Create a chunk for each field
175
+ total_fields = len(fields_to_process)
176
+ for i, (key, value) in enumerate(fields_to_process.items()):
177
+ field_content = self._format_field_content(key, value)
178
+
179
+ # Create chunk metadata
180
+ metadata = self._prepare_chunk_metadata(doc.id, i, doc.metadata)
181
+ metadata["field_name"] = key
182
+
183
+ # Extract fields to metadata for filtering
184
+ self._extract_fields_to_metadata(json_dict, metadata)
185
+
186
+ # Generate chunk ID
187
+ chunk_id = self._generate_chunk_id(
188
+ chunk_index=i,
189
+ total_chunks=total_fields,
190
+ start_char=0,
191
+ end_char=len(field_content),
192
+ provided_id=doc.id,
193
+ content_column=self.config.content_column
194
+ )
195
+
196
+ # Create and add the chunk
197
+ chunk = ProcessedChunk(
198
+ id=chunk_id,
199
+ content=field_content,
200
+ metadata=metadata
201
+ )
202
+ chunks.append(chunk)
203
+
204
+ return chunks
205
+
206
+ def _create_chunk_from_dict(self,
207
+ json_dict: Dict,
208
+ doc: Document,
209
+ chunk_index: int,
210
+ total_chunks: int) -> ProcessedChunk:
211
+ """Create a chunk from a JSON dictionary"""
212
+ # Ensure we're working with a dictionary
213
+ if isinstance(json_dict, str):
214
+ try:
215
+ json_dict = json.loads(json_dict)
216
+ except json.JSONDecodeError:
217
+ logger.error(f"Error parsing JSON string: {json_dict[:100]}...")
218
+ return self._create_error_chunk(doc, "Invalid JSON string")
219
+
220
+ # Format the content
221
+ if self.config.flatten_nested:
222
+ flattened = self._flatten_dict(json_dict, self.config.nested_delimiter)
223
+ filtered_dict = self._filter_fields(flattened)
224
+ content = self._dict_to_text(filtered_dict)
225
+ else:
226
+ filtered_dict = {k: v for k, v in json_dict.items()
227
+ if (not self.config.include_fields or k in self.config.include_fields)
228
+ and k not in self.config.exclude_fields}
229
+ content = json.dumps(filtered_dict, indent=2)
230
+
231
+ # Create metadata
232
+ metadata = self._prepare_chunk_metadata(doc.id, chunk_index, doc.metadata)
233
+
234
+ # Extract fields to metadata for filtering
235
+ self._extract_fields_to_metadata(json_dict, metadata)
236
+
237
+ # Generate chunk ID
238
+ chunk_id = self._generate_chunk_id(
239
+ chunk_index=chunk_index,
240
+ total_chunks=total_chunks,
241
+ start_char=0,
242
+ end_char=len(content),
243
+ provided_id=doc.id,
244
+ content_column=self.config.content_column
245
+ )
246
+
247
+ return ProcessedChunk(
248
+ id=chunk_id,
249
+ content=content,
250
+ metadata=metadata
251
+ )
252
+
253
+ def _filter_fields(self, flattened_dict: Dict) -> Dict:
254
+ """Filter fields based on include/exclude configuration"""
255
+ # If include_fields is specified, only keep those fields
256
+ if self.config.include_fields:
257
+ filtered_dict = {k: v for k, v in flattened_dict.items()
258
+ if any(k == field or k.startswith(field + self.config.nested_delimiter)
259
+ for field in self.config.include_fields)}
260
+ else:
261
+ filtered_dict = flattened_dict.copy()
262
+
263
+ # Apply exclude_fields
264
+ if self.config.exclude_fields:
265
+ for exclude_field in self.config.exclude_fields:
266
+ # Remove exact field match
267
+ if exclude_field in filtered_dict:
268
+ filtered_dict.pop(exclude_field)
269
+
270
+ # Remove any nested fields
271
+ nested_prefix = exclude_field + self.config.nested_delimiter
272
+ keys_to_remove = [k for k in filtered_dict if k.startswith(nested_prefix)]
273
+ for key in keys_to_remove:
274
+ filtered_dict.pop(key)
275
+
276
+ return filtered_dict
277
+
278
+ def _create_chunk_from_primitive(
279
+ self,
280
+ value: Any,
281
+ doc: Document,
282
+ chunk_index: int = 0,
283
+ total_chunks: int = 1
284
+ ) -> ProcessedChunk:
285
+ """Create a chunk from a primitive value"""
286
+ content = str(value)
287
+
288
+ # Create metadata
289
+ metadata = self._prepare_chunk_metadata(doc.id, chunk_index, doc.metadata)
290
+
291
+ # For primitive values, we don't have a JSON dictionary to extract fields from
292
+ # But we can add the value itself as a metadata field if configured
293
+ if self.config.extract_all_primitives:
294
+ metadata["field_value"] = value
295
+
296
+ # Generate chunk ID
297
+ chunk_id = self._generate_chunk_id(
298
+ chunk_index=chunk_index,
299
+ total_chunks=total_chunks,
300
+ start_char=0,
301
+ end_char=len(content),
302
+ provided_id=doc.id,
303
+ content_column=self.config.content_column
304
+ )
305
+
306
+ return ProcessedChunk(
307
+ id=chunk_id,
308
+ content=content,
309
+ metadata=metadata
310
+ )
311
+
312
+ def _flatten_dict(self, d: Dict, delimiter: str = '.', prefix: str = '') -> Dict:
313
+ """Flatten a nested dictionary structure"""
314
+ result = {}
315
+ for k, v in d.items():
316
+ new_key = f"{prefix}{delimiter}{k}" if prefix else k
317
+ if isinstance(v, dict):
318
+ result.update(self._flatten_dict(v, delimiter, new_key))
319
+ elif isinstance(v, list) and all(isinstance(item, dict) for item in v):
320
+ # Handle lists of dictionaries
321
+ for i, item in enumerate(v):
322
+ result.update(self._flatten_dict(item, delimiter, f"{new_key}[{i}]"))
323
+ else:
324
+ result[new_key] = v
325
+ return result
326
+
327
+ def _dict_to_text(self, d: Dict) -> str:
328
+ """Convert a dictionary to a human-readable text format"""
329
+ lines = []
330
+ for key, value in d.items():
331
+ if value is None:
332
+ continue
333
+ if isinstance(value, list):
334
+ if not value:
335
+ continue
336
+ if all(isinstance(item, dict) for item in value):
337
+ # Format list of dictionaries
338
+ lines.append(f"{key}:")
339
+ for i, item in enumerate(value):
340
+ lines.append(f" Item {i+1}:")
341
+ for k, v in item.items():
342
+ lines.append(f" {k}: {v}")
343
+ else:
344
+ # Format list of primitives
345
+ value_str = ", ".join(str(item) for item in value)
346
+ lines.append(f"{key}: {value_str}")
347
+ else:
348
+ lines.append(f"{key}: {value}")
349
+
350
+ return "\n".join(lines)
351
+
352
+ def _format_field_content(self, key: str, value: Any) -> str:
353
+ """Format a field's content for inclusion in a chunk"""
354
+ if isinstance(value, dict):
355
+ if self.config.flatten_nested:
356
+ flattened = self._flatten_dict(value, self.config.nested_delimiter, key)
357
+ return self._dict_to_text(flattened)
358
+ else:
359
+ return f"{key}: {json.dumps(value, indent=2)}"
360
+ elif isinstance(value, list):
361
+ if all(isinstance(item, dict) for item in value):
362
+ # Format list of dictionaries
363
+ lines = [f"{key}:"]
364
+ for i, item in enumerate(value):
365
+ lines.append(f" Item {i+1}:")
366
+ for k, v in item.items():
367
+ lines.append(f" {k}: {v}")
368
+ return "\n".join(lines)
369
+ else:
370
+ # Format list of primitives
371
+ value_str = ", ".join(str(item) for item in value if item is not None)
372
+ return f"{key}: {value_str}"
373
+ else:
374
+ return f"{key}: {value}"
375
+
376
+ def _extract_fields_to_metadata(self, json_dict: Dict, metadata: Dict) -> None:
377
+ """Extract specified fields from JSON to metadata for filtering"""
378
+ # Ensure we're working with a dictionary
379
+ if isinstance(json_dict, str):
380
+ try:
381
+ json_dict = json.loads(json_dict)
382
+ except json.JSONDecodeError:
383
+ logger.error(f"Error parsing JSON string: {json_dict[:100]}...")
384
+ return
385
+
386
+ # Always flatten the dictionary for metadata extraction
387
+ flattened = self._flatten_dict(json_dict, self.config.nested_delimiter)
388
+
389
+ # If extract_all_primitives is True, extract all primitive values
390
+ if self.config.extract_all_primitives:
391
+ for key, value in flattened.items():
392
+ if isinstance(value, (str, int, float, bool)) and value is not None:
393
+ metadata[f"field_{key}"] = value
394
+ return
395
+
396
+ # If metadata_fields is empty and extract_all_primitives is False,
397
+ # assume all fields should be extracted
398
+ if not self.config.metadata_fields:
399
+ # First try to extract top-level primitive fields
400
+ has_primitives = False
401
+ for key, value in json_dict.items():
402
+ if isinstance(value, (str, int, float, bool)) and value is not None:
403
+ metadata[f"field_{key}"] = value
404
+ has_primitives = True
405
+
406
+ # If no top-level primitives were found, extract all primitives from flattened dict
407
+ if not has_primitives:
408
+ for key, value in flattened.items():
409
+ if isinstance(value, (str, int, float, bool)) and value is not None:
410
+ metadata[f"field_{key}"] = value
411
+ else:
412
+ # Extract only the specified fields
413
+ for field in self.config.metadata_fields:
414
+ if field in flattened and flattened[field] is not None:
415
+ metadata[f"field_{field}"] = flattened[field]
416
+ else:
417
+ # Try to navigate the nested structure manually
418
+ parts = field.split(self.config.nested_delimiter)
419
+ current = json_dict
420
+ found = True
421
+
422
+ for part in parts:
423
+ if isinstance(current, dict) and part in current:
424
+ current = current[part]
425
+ else:
426
+ found = False
427
+ break
428
+
429
+ if found and current is not None:
430
+ metadata[f"field_{field}"] = current
431
+
432
+ def to_dataframe(self, chunks: List[ProcessedChunk]) -> pd.DataFrame:
433
+ """Convert processed chunks to dataframe format"""
434
+ return pd.DataFrame([chunk.model_dump() for chunk in chunks])
@@ -13,6 +13,7 @@ from mindsdb.integrations.utilities.rag.settings import LLMConfig
13
13
  class PreprocessorType(Enum):
14
14
  CONTEXTUAL = "contextual"
15
15
  TEXT_CHUNKING = "text_chunking"
16
+ JSON_CHUNKING = "json_chunking"
16
17
 
17
18
 
18
19
  class BasePreprocessingConfig(BaseModel):
@@ -62,6 +63,51 @@ class TextChunkingConfig(BaseModel):
62
63
  arbitrary_types_allowed = True
63
64
 
64
65
 
66
+ class JSONChunkingConfig(BasePreprocessingConfig):
67
+ """Configuration for JSON chunking preprocessor"""
68
+ flatten_nested: bool = Field(
69
+ default=True,
70
+ description="Whether to flatten nested JSON structures"
71
+ )
72
+ include_metadata: bool = Field(
73
+ default=True,
74
+ description="Whether to include original metadata in chunks"
75
+ )
76
+ chunk_by_object: bool = Field(
77
+ default=True,
78
+ description="Whether to chunk by top-level objects (True) or create a single document (False)"
79
+ )
80
+ exclude_fields: List[str] = Field(
81
+ default_factory=list,
82
+ description="List of fields to exclude from chunking"
83
+ )
84
+ include_fields: List[str] = Field(
85
+ default_factory=list,
86
+ description="List of fields to include in chunking (if empty, all fields except excluded ones are included)"
87
+ )
88
+ metadata_fields: List[str] = Field(
89
+ default_factory=list,
90
+ description="List of fields to extract into metadata for filtering "
91
+ "(can include nested fields using dot notation). "
92
+ "If empty, all primitive fields will be extracted (top-level fields if available, otherwise all primitive fields in the flattened structure)."
93
+ )
94
+ extract_all_primitives: bool = Field(
95
+ default=False,
96
+ description="Whether to extract all primitive values (strings, numbers, booleans) into metadata"
97
+ )
98
+ nested_delimiter: str = Field(
99
+ default=".",
100
+ description="Delimiter for flattened nested field names"
101
+ )
102
+ content_column: str = Field(
103
+ default="content",
104
+ description="Name of the content column for chunk ID generation"
105
+ )
106
+
107
+ class Config:
108
+ arbitrary_types_allowed = True
109
+
110
+
65
111
  class PreprocessingConfig(BaseModel):
66
112
  """Complete preprocessing configuration"""
67
113
  type: PreprocessorType = Field(
@@ -76,6 +122,10 @@ class PreprocessingConfig(BaseModel):
76
122
  default=None,
77
123
  description="Configuration for text chunking preprocessing"
78
124
  )
125
+ json_chunking_config: Optional[JSONChunkingConfig] = Field(
126
+ default=None,
127
+ description="Configuration for JSON chunking preprocessing"
128
+ )
79
129
 
80
130
  @model_validator(mode='after')
81
131
  def validate_config_presence(self) -> 'PreprocessingConfig':
@@ -84,6 +134,10 @@ class PreprocessingConfig(BaseModel):
84
134
  self.contextual_config = ContextualConfig()
85
135
  if self.type == PreprocessorType.TEXT_CHUNKING and not self.text_chunking_config:
86
136
  self.text_chunking_config = TextChunkingConfig()
137
+ if self.type == PreprocessorType.JSON_CHUNKING and not self.json_chunking_config:
138
+ # Import here to avoid circular imports
139
+ from mindsdb.interfaces.knowledge_base.preprocessing.json_chunker import JSONChunkingConfig
140
+ self.json_chunking_config = JSONChunkingConfig()
87
141
  return self
88
142
 
89
143
 
@@ -1,4 +1,4 @@
1
- from typing import List
1
+ from typing import List, Optional, Iterable
2
2
  import pickle
3
3
  import datetime as dt
4
4
 
@@ -32,7 +32,41 @@ class RunningQuery:
32
32
  self.sql = record.sql
33
33
  self.database = record.database or config.get('default_project')
34
34
 
35
- def get_partition_query(self, step_num: int, query: Select) -> Select:
35
+ def get_partitions(self, dn, step_call, query: Select) -> Iterable:
36
+ """
37
+ Gets chunks of data from data handler for executing them in next steps of the planner
38
+ Check if datanode supports fetch with stream
39
+ :param dn: datanode to execute query
40
+ :param step_call: instance of StepCall to get some parameters from it
41
+ :param query: AST query to execute
42
+ :return: generator with query results
43
+ """
44
+ if dn.has_support_stream():
45
+ query2 = self.get_partition_query(step_call.current_step_num, query, stream=True)
46
+
47
+ for df in dn.query_stream(query2, fetch_size=self.batch_size):
48
+ max_track_value = self.get_max_track_value(df)
49
+ yield df
50
+ self.set_progress(df, max_track_value)
51
+
52
+ else:
53
+ while True:
54
+ query2 = self.get_partition_query(step_call.current_step_num, query, stream=False)
55
+
56
+ response = dn.query(
57
+ query=query2,
58
+ session=step_call.session
59
+ )
60
+ df = response.data_frame
61
+
62
+ if df is None or len(df) == 0:
63
+ break
64
+
65
+ max_track_value = self.get_max_track_value(df)
66
+ yield df
67
+ self.set_progress(df, max_track_value)
68
+
69
+ def get_partition_query(self, step_num: int, query: Select, stream=False) -> Select:
36
70
  """
37
71
  Generate query for fetching the next partition
38
72
  It wraps query to
@@ -41,16 +75,34 @@ class RunningQuery:
41
75
  order by track_column
42
76
  limit size {batch_size}
43
77
  And fill track_column, previous_value, batch_size
78
+
79
+ If steam is true:
80
+ - if track_column is defined:
81
+ - don't add limit
82
+ - else:
83
+ - return user query without modifications
44
84
  """
45
85
 
46
- track_column = self.record.parameters['track_column']
86
+ track_column = self.record.parameters.get('track_column')
87
+ if track_column is None and stream:
88
+ # if no track column for stream fetching: it is not resumable query, execute original query
89
+
90
+ # check if it is first run of the query
91
+ if self.record.processed_rows > 0:
92
+ raise RuntimeError("Can't resume query without track_column")
93
+ return query
94
+
95
+ if not stream and track_column is None:
96
+ raise ValueError('Track column is not defined')
47
97
 
48
98
  query = Select(
49
99
  targets=[Star()],
50
100
  from_table=query,
51
101
  order_by=[OrderBy(Identifier(track_column))],
52
- limit=Constant(self.batch_size)
102
+
53
103
  )
104
+ if not stream:
105
+ query.limit = Constant(self.batch_size)
54
106
 
55
107
  track_value = self.record.context.get('track_value')
56
108
  # is it different step?
@@ -114,8 +166,6 @@ class RunningQuery:
114
166
  Store parameters of the step which is about to be split into partitions
115
167
  """
116
168
 
117
- if 'track_column' not in params:
118
- raise ValueError('Track column is not defined')
119
169
  if 'batch_size' not in params:
120
170
  params['batch_size'] = 1000
121
171
 
@@ -123,15 +173,18 @@ class RunningQuery:
123
173
  self.batch_size = self.record.parameters['batch_size']
124
174
  db.session.commit()
125
175
 
126
- def get_max_track_value(self, df: pd.DataFrame) -> pd.DataFrame:
176
+ def get_max_track_value(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
127
177
  """
128
178
  return max value to use in `set_progress`.
129
179
  this function is called before execution substeps,
130
180
  `set_progress` function - after
131
181
  """
132
-
133
- track_column = self.record.parameters['track_column']
134
- return df[track_column].max()
182
+ if 'track_column' in self.record.parameters:
183
+ track_column = self.record.parameters['track_column']
184
+ return df[track_column].max()
185
+ else:
186
+ # stream mode
187
+ return None
135
188
 
136
189
  def set_progress(self, df: pd.DataFrame, max_track_value: int):
137
190
  """
@@ -508,6 +561,7 @@ class QueryContextController:
508
561
  db.Queries.finished_at < (dt.datetime.now() - dt.timedelta(days=1))
509
562
  )
510
563
  for rec in remove_query.all():
564
+ self.get_query(rec.id).remove_from_task()
511
565
  db.session.delete(rec)
512
566
 
513
567
  rec = db.Queries(
@@ -544,6 +598,8 @@ class QueryContextController:
544
598
  if rec is None:
545
599
  raise RuntimeError(f'Query not found: {query_id}')
546
600
 
601
+ self.get_query(rec.id).remove_from_task()
602
+
547
603
  # the query in progress will fail when it tries to update status
548
604
  db.session.delete(rec)
549
605
  db.session.commit()