MindsDB 25.5.3.0__py3-none-any.whl → 25.5.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (313) hide show
  1. mindsdb/__about__.py +8 -8
  2. mindsdb/__main__.py +127 -79
  3. mindsdb/api/a2a/__init__.py +0 -0
  4. mindsdb/api/a2a/__main__.py +144 -0
  5. mindsdb/api/a2a/agent.py +308 -0
  6. mindsdb/api/a2a/common/__init__.py +0 -0
  7. mindsdb/api/a2a/common/server/__init__.py +4 -0
  8. mindsdb/api/a2a/common/server/server.py +164 -0
  9. mindsdb/api/a2a/common/server/task_manager.py +287 -0
  10. mindsdb/api/a2a/common/server/utils.py +28 -0
  11. mindsdb/api/a2a/common/types.py +365 -0
  12. mindsdb/api/a2a/constants.py +9 -0
  13. mindsdb/api/a2a/run_a2a.py +86 -0
  14. mindsdb/api/a2a/task_manager.py +560 -0
  15. mindsdb/api/executor/command_executor.py +185 -309
  16. mindsdb/api/executor/datahub/classes/response.py +5 -2
  17. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +39 -72
  18. mindsdb/api/executor/planner/query_planner.py +10 -1
  19. mindsdb/api/executor/sql_query/result_set.py +185 -52
  20. mindsdb/api/executor/sql_query/sql_query.py +1 -1
  21. mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +9 -12
  22. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +8 -10
  23. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +5 -44
  24. mindsdb/api/executor/sql_query/steps/insert_step.py +24 -15
  25. mindsdb/api/executor/sql_query/steps/join_step.py +1 -1
  26. mindsdb/api/executor/sql_query/steps/project_step.py +1 -1
  27. mindsdb/api/executor/sql_query/steps/sql_steps.py +1 -1
  28. mindsdb/api/executor/sql_query/steps/subselect_step.py +4 -8
  29. mindsdb/api/executor/sql_query/steps/union_step.py +1 -3
  30. mindsdb/api/http/initialize.py +99 -83
  31. mindsdb/api/http/namespaces/analysis.py +3 -3
  32. mindsdb/api/http/namespaces/config.py +61 -86
  33. mindsdb/api/http/namespaces/file.py +8 -2
  34. mindsdb/api/http/namespaces/sql.py +13 -27
  35. mindsdb/api/mcp/start.py +42 -5
  36. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packet.py +0 -1
  37. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +52 -19
  38. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +8 -10
  39. mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +54 -38
  40. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +82 -115
  41. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +351 -0
  42. mindsdb/api/postgres/postgres_proxy/executor/executor.py +1 -1
  43. mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +5 -6
  44. mindsdb/integrations/handlers/altibase_handler/altibase_handler.py +26 -27
  45. mindsdb/integrations/handlers/altibase_handler/connection_args.py +13 -13
  46. mindsdb/integrations/handlers/altibase_handler/tests/test_altibase_handler.py +8 -8
  47. mindsdb/integrations/handlers/altibase_handler/tests/test_altibase_handler_dsn.py +13 -13
  48. mindsdb/integrations/handlers/anthropic_handler/__init__.py +2 -2
  49. mindsdb/integrations/handlers/anthropic_handler/anthropic_handler.py +1 -3
  50. mindsdb/integrations/handlers/aurora_handler/aurora_handler.py +1 -0
  51. mindsdb/integrations/handlers/autosklearn_handler/autosklearn_handler.py +1 -1
  52. mindsdb/integrations/handlers/autosklearn_handler/config.py +0 -1
  53. mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +1 -1
  54. mindsdb/integrations/handlers/bigquery_handler/tests/test_bigquery_handler.py +1 -1
  55. mindsdb/integrations/handlers/binance_handler/binance_handler.py +1 -0
  56. mindsdb/integrations/handlers/binance_handler/binance_tables.py +3 -4
  57. mindsdb/integrations/handlers/byom_handler/__init__.py +0 -1
  58. mindsdb/integrations/handlers/byom_handler/requirements.txt +1 -2
  59. mindsdb/integrations/handlers/ckan_handler/ckan_handler.py +3 -0
  60. mindsdb/integrations/handlers/clickhouse_handler/__init__.py +1 -1
  61. mindsdb/integrations/handlers/cloud_spanner_handler/tests/test_cloud_spanner_handler.py +0 -2
  62. mindsdb/integrations/handlers/cloud_sql_handler/cloud_sql_handler.py +0 -1
  63. mindsdb/integrations/handlers/cohere_handler/__init__.py +1 -1
  64. mindsdb/integrations/handlers/cohere_handler/cohere_handler.py +11 -13
  65. mindsdb/integrations/handlers/confluence_handler/confluence_tables.py +6 -0
  66. mindsdb/integrations/handlers/databend_handler/connection_args.py +1 -1
  67. mindsdb/integrations/handlers/databend_handler/databend_handler.py +4 -4
  68. mindsdb/integrations/handlers/databend_handler/tests/__init__.py +0 -1
  69. mindsdb/integrations/handlers/databend_handler/tests/test_databend_handler.py +1 -1
  70. mindsdb/integrations/handlers/derby_handler/connection_args.py +1 -1
  71. mindsdb/integrations/handlers/derby_handler/derby_handler.py +14 -22
  72. mindsdb/integrations/handlers/derby_handler/tests/test_derby_handler.py +6 -6
  73. mindsdb/integrations/handlers/discord_handler/discord_handler.py +5 -5
  74. mindsdb/integrations/handlers/discord_handler/discord_tables.py +3 -3
  75. mindsdb/integrations/handlers/discord_handler/tests/test_discord.py +5 -3
  76. mindsdb/integrations/handlers/dockerhub_handler/dockerhub.py +3 -3
  77. mindsdb/integrations/handlers/dockerhub_handler/dockerhub_handler.py +2 -2
  78. mindsdb/integrations/handlers/dockerhub_handler/dockerhub_tables.py +57 -54
  79. mindsdb/integrations/handlers/dremio_handler/__init__.py +2 -2
  80. mindsdb/integrations/handlers/druid_handler/__init__.py +1 -1
  81. mindsdb/integrations/handlers/druid_handler/druid_handler.py +2 -2
  82. mindsdb/integrations/handlers/edgelessdb_handler/tests/test_edgelessdb_handler.py +9 -9
  83. mindsdb/integrations/handlers/email_handler/email_client.py +1 -1
  84. mindsdb/integrations/handlers/email_handler/email_ingestor.py +1 -1
  85. mindsdb/integrations/handlers/email_handler/email_tables.py +0 -1
  86. mindsdb/integrations/handlers/email_handler/settings.py +0 -1
  87. mindsdb/integrations/handlers/eventstoredb_handler/eventstoredb_handler.py +2 -1
  88. mindsdb/integrations/handlers/firebird_handler/firebird_handler.py +1 -1
  89. mindsdb/integrations/handlers/flaml_handler/flaml_handler.py +9 -9
  90. mindsdb/integrations/handlers/frappe_handler/frappe_client.py +5 -5
  91. mindsdb/integrations/handlers/frappe_handler/frappe_handler.py +6 -5
  92. mindsdb/integrations/handlers/frappe_handler/frappe_tables.py +2 -2
  93. mindsdb/integrations/handlers/github_handler/connection_args.py +2 -2
  94. mindsdb/integrations/handlers/github_handler/github_handler.py +1 -8
  95. mindsdb/integrations/handlers/github_handler/github_tables.py +13 -24
  96. mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py +2 -1
  97. mindsdb/integrations/handlers/gitlab_handler/gitlab_tables.py +1 -4
  98. mindsdb/integrations/handlers/gmail_handler/gmail_handler.py +6 -13
  99. mindsdb/integrations/handlers/google_books_handler/google_books_handler.py +2 -1
  100. mindsdb/integrations/handlers/google_books_handler/google_books_tables.py +0 -3
  101. mindsdb/integrations/handlers/google_calendar_handler/google_calendar_handler.py +4 -4
  102. mindsdb/integrations/handlers/google_calendar_handler/google_calendar_tables.py +2 -6
  103. mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_handler.py +3 -2
  104. mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_tables.py +0 -3
  105. mindsdb/integrations/handlers/google_fit_handler/google_fit_handler.py +10 -12
  106. mindsdb/integrations/handlers/google_fit_handler/google_fit_tables.py +11 -13
  107. mindsdb/integrations/handlers/google_search_handler/google_search_handler.py +2 -1
  108. mindsdb/integrations/handlers/google_search_handler/google_search_tables.py +0 -3
  109. mindsdb/integrations/handlers/groq_handler/__init__.py +3 -3
  110. mindsdb/integrations/handlers/hackernews_handler/hn_handler.py +5 -7
  111. mindsdb/integrations/handlers/hackernews_handler/hn_table.py +6 -7
  112. mindsdb/integrations/handlers/hive_handler/tests/test_hive_handler.py +1 -1
  113. mindsdb/integrations/handlers/hsqldb_handler/connection_args.py +6 -6
  114. mindsdb/integrations/handlers/hsqldb_handler/hsqldb_handler.py +4 -3
  115. mindsdb/integrations/handlers/huggingface_api_handler/exceptions.py +1 -1
  116. mindsdb/integrations/handlers/huggingface_api_handler/huggingface_api_handler.py +1 -8
  117. mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +6 -6
  118. mindsdb/integrations/handlers/huggingface_handler/requirements.txt +1 -1
  119. mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +1 -1
  120. mindsdb/integrations/handlers/ignite_handler/ignite_handler.py +2 -1
  121. mindsdb/integrations/handlers/impala_handler/impala_handler.py +9 -12
  122. mindsdb/integrations/handlers/impala_handler/tests/test_impala_handler.py +11 -11
  123. mindsdb/integrations/handlers/influxdb_handler/influxdb_handler.py +10 -13
  124. mindsdb/integrations/handlers/influxdb_handler/influxdb_tables.py +20 -20
  125. mindsdb/integrations/handlers/informix_handler/__about__.py +8 -8
  126. mindsdb/integrations/handlers/informix_handler/__init__.py +12 -5
  127. mindsdb/integrations/handlers/informix_handler/informix_handler.py +99 -133
  128. mindsdb/integrations/handlers/informix_handler/tests/test_informix_handler.py +13 -11
  129. mindsdb/integrations/handlers/ingres_handler/__about__.py +0 -1
  130. mindsdb/integrations/handlers/ingres_handler/ingres_handler.py +1 -0
  131. mindsdb/integrations/handlers/jira_handler/jira_handler.py +4 -4
  132. mindsdb/integrations/handlers/jira_handler/jira_tables.py +9 -9
  133. mindsdb/integrations/handlers/kinetica_handler/__init__.py +0 -1
  134. mindsdb/integrations/handlers/lancedb_handler/requirements.txt +0 -1
  135. mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +4 -4
  136. mindsdb/integrations/handlers/langchain_handler/tools.py +9 -10
  137. mindsdb/integrations/handlers/leonardoai_handler/__init__.py +1 -1
  138. mindsdb/integrations/handlers/lightwood_handler/functions.py +2 -2
  139. mindsdb/integrations/handlers/lightwood_handler/lightwood_handler.py +0 -1
  140. mindsdb/integrations/handlers/lightwood_handler/tests/test_lightwood_handler.py +11 -11
  141. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +37 -20
  142. mindsdb/integrations/handlers/llama_index_handler/llama_index_handler.py +4 -4
  143. mindsdb/integrations/handlers/llama_index_handler/settings.py +10 -9
  144. mindsdb/integrations/handlers/materialize_handler/tests/test_materialize_handler.py +8 -10
  145. mindsdb/integrations/handlers/matrixone_handler/matrixone_handler.py +4 -4
  146. mindsdb/integrations/handlers/matrixone_handler/tests/test_matrixone_handler.py +8 -9
  147. mindsdb/integrations/handlers/maxdb_handler/connection_args.py +25 -25
  148. mindsdb/integrations/handlers/maxdb_handler/maxdb_handler.py +1 -0
  149. mindsdb/integrations/handlers/mediawiki_handler/mediawiki_handler.py +3 -2
  150. mindsdb/integrations/handlers/mediawiki_handler/mediawiki_tables.py +1 -1
  151. mindsdb/integrations/handlers/mendeley_handler/__about__.py +1 -1
  152. mindsdb/integrations/handlers/mendeley_handler/__init__.py +2 -2
  153. mindsdb/integrations/handlers/mendeley_handler/mendeley_handler.py +48 -56
  154. mindsdb/integrations/handlers/mendeley_handler/mendeley_tables.py +24 -29
  155. mindsdb/integrations/handlers/mendeley_handler/tests/test_mendeley_handler.py +19 -17
  156. mindsdb/integrations/handlers/merlion_handler/merlion_handler.py +5 -4
  157. mindsdb/integrations/handlers/minds_endpoint_handler/__init__.py +3 -3
  158. mindsdb/integrations/handlers/mlflow_handler/mlflow_handler.py +58 -36
  159. mindsdb/integrations/handlers/monetdb_handler/__about__.py +8 -8
  160. mindsdb/integrations/handlers/monetdb_handler/__init__.py +15 -5
  161. mindsdb/integrations/handlers/monetdb_handler/connection_args.py +17 -18
  162. mindsdb/integrations/handlers/monetdb_handler/monetdb_handler.py +40 -57
  163. mindsdb/integrations/handlers/monetdb_handler/tests/test_monetdb_handler.py +7 -8
  164. mindsdb/integrations/handlers/monetdb_handler/utils/monet_get_id.py +13 -14
  165. mindsdb/integrations/handlers/monkeylearn_handler/__about__.py +1 -1
  166. mindsdb/integrations/handlers/monkeylearn_handler/__init__.py +1 -1
  167. mindsdb/integrations/handlers/monkeylearn_handler/monkeylearn_handler.py +2 -5
  168. mindsdb/integrations/handlers/ms_one_drive_handler/ms_graph_api_one_drive_client.py +1 -0
  169. mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_handler.py +1 -1
  170. mindsdb/integrations/handlers/ms_teams_handler/ms_graph_api_teams_client.py +23 -23
  171. mindsdb/integrations/handlers/ms_teams_handler/ms_teams_handler.py +3 -3
  172. mindsdb/integrations/handlers/ms_teams_handler/ms_teams_tables.py +10 -5
  173. mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +73 -8
  174. mindsdb/integrations/handlers/mysql_handler/__about__.py +8 -8
  175. mindsdb/integrations/handlers/mysql_handler/__init__.py +15 -5
  176. mindsdb/integrations/handlers/mysql_handler/connection_args.py +43 -47
  177. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +101 -34
  178. mindsdb/integrations/handlers/mysql_handler/settings.py +15 -13
  179. mindsdb/integrations/handlers/neuralforecast_handler/neuralforecast_handler.py +1 -1
  180. mindsdb/integrations/handlers/newsapi_handler/newsapi_handler.py +1 -1
  181. mindsdb/integrations/handlers/newsapi_handler/tests/test_newsapi_handler.py +4 -4
  182. mindsdb/integrations/handlers/nuo_jdbc_handler/connection_args.py +2 -2
  183. mindsdb/integrations/handlers/nuo_jdbc_handler/nuo_jdbc_handler.py +28 -36
  184. mindsdb/integrations/handlers/nuo_jdbc_handler/tests/test_nuo_handler.py +5 -5
  185. mindsdb/integrations/handlers/oceanbase_handler/oceanbase_handler.py +0 -1
  186. mindsdb/integrations/handlers/oceanbase_handler/tests/test_oceanbase_handler.py +8 -10
  187. mindsdb/integrations/handlers/ollama_handler/ollama_handler.py +3 -3
  188. mindsdb/integrations/handlers/opengauss_handler/tests/test_opengauss_handler.py +1 -2
  189. mindsdb/integrations/handlers/openstreetmap_handler/__init__.py +7 -7
  190. mindsdb/integrations/handlers/oracle_handler/connection_args.py +6 -0
  191. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +77 -11
  192. mindsdb/integrations/handlers/orioledb_handler/tests/test_orioledb_handler.py +8 -10
  193. mindsdb/integrations/handlers/palm_handler/__about__.py +1 -1
  194. mindsdb/integrations/handlers/palm_handler/__init__.py +1 -1
  195. mindsdb/integrations/handlers/palm_handler/palm_handler.py +1 -3
  196. mindsdb/integrations/handlers/paypal_handler/paypal_handler.py +2 -2
  197. mindsdb/integrations/handlers/paypal_handler/paypal_tables.py +15 -14
  198. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +53 -10
  199. mindsdb/integrations/handlers/phoenix_handler/__init__.py +1 -1
  200. mindsdb/integrations/handlers/phoenix_handler/phoenix_handler.py +1 -0
  201. mindsdb/integrations/handlers/pinot_handler/__init__.py +1 -1
  202. mindsdb/integrations/handlers/pinot_handler/pinot_handler.py +3 -2
  203. mindsdb/integrations/handlers/plaid_handler/plaid_handler.py +13 -13
  204. mindsdb/integrations/handlers/plaid_handler/plaid_tables.py +10 -12
  205. mindsdb/integrations/handlers/plaid_handler/utils.py +4 -6
  206. mindsdb/integrations/handlers/planetscale_handler/planetscale_handler.py +1 -4
  207. mindsdb/integrations/handlers/portkey_handler/__init__.py +2 -2
  208. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +105 -24
  209. mindsdb/integrations/handlers/postgres_handler/tests/test_postgres_handler.py +11 -6
  210. mindsdb/integrations/handlers/questdb_handler/questdb_handler.py +1 -2
  211. mindsdb/integrations/handlers/questdb_handler/tests/test_questdb_handler.py +2 -3
  212. mindsdb/integrations/handlers/quickbooks_handler/quickbooks_handler.py +6 -8
  213. mindsdb/integrations/handlers/quickbooks_handler/quickbooks_table.py +10 -10
  214. mindsdb/integrations/handlers/rag_handler/ingest.py +2 -2
  215. mindsdb/integrations/handlers/rag_handler/rag_handler.py +1 -1
  216. mindsdb/integrations/handlers/rag_handler/settings.py +1 -1
  217. mindsdb/integrations/handlers/reddit_handler/reddit_handler.py +2 -7
  218. mindsdb/integrations/handlers/reddit_handler/reddit_tables.py +2 -3
  219. mindsdb/integrations/handlers/replicate_handler/replicate_handler.py +6 -6
  220. mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_handler.py +1 -2
  221. mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_tables.py +0 -3
  222. mindsdb/integrations/handlers/rockset_handler/connection_args.py +14 -14
  223. mindsdb/integrations/handlers/rockset_handler/tests/test_rockset_handler.py +1 -0
  224. mindsdb/integrations/handlers/scylla_handler/scylla_handler.py +6 -5
  225. mindsdb/integrations/handlers/sendinblue_handler/sendinblue_handler.py +2 -1
  226. mindsdb/integrations/handlers/sendinblue_handler/sendinblue_tables.py +16 -16
  227. mindsdb/integrations/handlers/sentence_transformers_handler/__init__.py +1 -1
  228. mindsdb/integrations/handlers/sheets_handler/connection_args.py +1 -1
  229. mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +7 -6
  230. mindsdb/integrations/handlers/shopify_handler/shopify_tables.py +38 -41
  231. mindsdb/integrations/handlers/singlestore_handler/__about__.py +1 -1
  232. mindsdb/integrations/handlers/singlestore_handler/__init__.py +0 -1
  233. mindsdb/integrations/handlers/singlestore_handler/singlestore_handler.py +1 -0
  234. mindsdb/integrations/handlers/singlestore_handler/tests/test_singlestore_handler.py +3 -3
  235. mindsdb/integrations/handlers/slack_handler/__init__.py +3 -3
  236. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +100 -6
  237. mindsdb/integrations/handlers/solr_handler/connection_args.py +7 -7
  238. mindsdb/integrations/handlers/solr_handler/solr_handler.py +2 -1
  239. mindsdb/integrations/handlers/solr_handler/tests/test_solr_handler.py +2 -1
  240. mindsdb/integrations/handlers/sqlany_handler/sqlany_handler.py +3 -2
  241. mindsdb/integrations/handlers/sqlite_handler/sqlite_handler.py +1 -0
  242. mindsdb/integrations/handlers/sqreamdb_handler/connection_args.py +1 -1
  243. mindsdb/integrations/handlers/sqreamdb_handler/sqreamdb_handler.py +15 -20
  244. mindsdb/integrations/handlers/sqreamdb_handler/tests/test_sqreamdb_handler.py +4 -4
  245. mindsdb/integrations/handlers/stabilityai_handler/__init__.py +1 -1
  246. mindsdb/integrations/handlers/starrocks_handler/starrocks_handler.py +0 -1
  247. mindsdb/integrations/handlers/starrocks_handler/tests/test_starrocks_handler.py +8 -10
  248. mindsdb/integrations/handlers/statsforecast_handler/statsforecast_handler.py +2 -2
  249. mindsdb/integrations/handlers/strava_handler/strava_handler.py +4 -8
  250. mindsdb/integrations/handlers/strava_handler/strava_tables.py +22 -30
  251. mindsdb/integrations/handlers/stripe_handler/stripe_handler.py +3 -2
  252. mindsdb/integrations/handlers/stripe_handler/stripe_tables.py +11 -27
  253. mindsdb/integrations/handlers/supabase_handler/tests/test_supabase_handler.py +1 -1
  254. mindsdb/integrations/handlers/surrealdb_handler/surrealdb_handler.py +4 -4
  255. mindsdb/integrations/handlers/tdengine_handler/tdengine_handler.py +25 -27
  256. mindsdb/integrations/handlers/tdengine_handler/tests/test_tdengine_handler.py +8 -8
  257. mindsdb/integrations/handlers/tidb_handler/tests/test_tidb_handler.py +1 -2
  258. mindsdb/integrations/handlers/timegpt_handler/timegpt_handler.py +5 -5
  259. mindsdb/integrations/handlers/tpot_handler/tpot_handler.py +21 -26
  260. mindsdb/integrations/handlers/trino_handler/trino_handler.py +14 -14
  261. mindsdb/integrations/handlers/twitter_handler/twitter_handler.py +2 -4
  262. mindsdb/integrations/handlers/unify_handler/tests/test_unify_handler.py +7 -8
  263. mindsdb/integrations/handlers/unify_handler/unify_handler.py +9 -9
  264. mindsdb/integrations/handlers/vertex_handler/vertex_client.py +1 -1
  265. mindsdb/integrations/handlers/vertica_handler/tests/test_vertica_handler.py +11 -11
  266. mindsdb/integrations/handlers/vertica_handler/vertica_handler.py +11 -14
  267. mindsdb/integrations/handlers/vitess_handler/tests/test_vitess_handler.py +9 -11
  268. mindsdb/integrations/handlers/vitess_handler/vitess_handler.py +0 -1
  269. mindsdb/integrations/handlers/web_handler/web_handler.py +1 -0
  270. mindsdb/integrations/handlers/whatsapp_handler/__init__.py +3 -3
  271. mindsdb/integrations/handlers/writer_handler/evaluate.py +1 -1
  272. mindsdb/integrations/handlers/writer_handler/settings.py +0 -1
  273. mindsdb/integrations/handlers/writer_handler/writer_handler.py +1 -0
  274. mindsdb/integrations/handlers/youtube_handler/youtube_handler.py +5 -5
  275. mindsdb/integrations/handlers/youtube_handler/youtube_tables.py +26 -27
  276. mindsdb/integrations/handlers/yugabyte_handler/tests/test_yugabyte_handler.py +3 -3
  277. mindsdb/integrations/handlers/yugabyte_handler/yugabyte_handler.py +0 -6
  278. mindsdb/integrations/libs/llm/config.py +13 -0
  279. mindsdb/integrations/libs/llm/utils.py +37 -65
  280. mindsdb/integrations/libs/response.py +67 -52
  281. mindsdb/integrations/libs/vectordatabase_handler.py +6 -0
  282. mindsdb/integrations/utilities/handler_utils.py +15 -3
  283. mindsdb/integrations/utilities/handlers/api_utilities/__init__.py +0 -1
  284. mindsdb/integrations/utilities/handlers/auth_utilities/__init__.py +0 -2
  285. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +230 -227
  286. mindsdb/integrations/utilities/utils.py +3 -3
  287. mindsdb/interfaces/agents/agents_controller.py +164 -1
  288. mindsdb/interfaces/agents/constants.py +32 -13
  289. mindsdb/interfaces/agents/langchain_agent.py +106 -95
  290. mindsdb/interfaces/agents/mindsdb_database_agent.py +101 -2
  291. mindsdb/interfaces/knowledge_base/controller.py +250 -216
  292. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +13 -10
  293. mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +434 -0
  294. mindsdb/interfaces/knowledge_base/preprocessing/models.py +54 -0
  295. mindsdb/interfaces/query_context/context_controller.py +66 -10
  296. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +190 -0
  297. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +92 -0
  298. mindsdb/interfaces/skills/skill_tool.py +202 -57
  299. mindsdb/interfaces/skills/sql_agent.py +205 -17
  300. mindsdb/interfaces/storage/fs.py +1 -0
  301. mindsdb/interfaces/variables/__init__.py +0 -0
  302. mindsdb/interfaces/variables/variables_controller.py +97 -0
  303. mindsdb/migrations/env.py +5 -7
  304. mindsdb/migrations/migrate.py +47 -7
  305. mindsdb/migrations/versions/2025-05-21_9f150e4f9a05_checkpoint_1.py +360 -0
  306. mindsdb/utilities/config.py +287 -216
  307. mindsdb/utilities/starters.py +13 -0
  308. {mindsdb-25.5.3.0.dist-info → mindsdb-25.5.4.1.dist-info}/METADATA +646 -698
  309. {mindsdb-25.5.3.0.dist-info → mindsdb-25.5.4.1.dist-info}/RECORD +312 -295
  310. {mindsdb-25.5.3.0.dist-info → mindsdb-25.5.4.1.dist-info}/WHEEL +1 -1
  311. mindsdb/integrations/handlers/monkeylearn_handler/requirements.txt +0 -1
  312. {mindsdb-25.5.3.0.dist-info → mindsdb-25.5.4.1.dist-info}/licenses/LICENSE +0 -0
  313. {mindsdb-25.5.3.0.dist-info → mindsdb-25.5.4.1.dist-info}/top_level.txt +0 -0
@@ -5,15 +5,7 @@ from typing import Dict, List, Optional
5
5
  import pandas as pd
6
6
  import numpy as np
7
7
 
8
- from mindsdb_sql_parser.ast import (
9
- BinaryOperation,
10
- Constant,
11
- Identifier,
12
- Select,
13
- Update,
14
- Delete,
15
- Star
16
- )
8
+ from mindsdb_sql_parser.ast import BinaryOperation, Constant, Identifier, Select, Update, Delete, Star
17
9
  from mindsdb_sql_parser.ast.mindsdb import CreatePredictor
18
10
 
19
11
  from mindsdb.integrations.utilities.query_traversal import query_traversal
@@ -27,11 +19,14 @@ from mindsdb.integrations.libs.vectordatabase_handler import (
27
19
  from mindsdb.integrations.utilities.rag.rag_pipeline_builder import RAG
28
20
  from mindsdb.integrations.utilities.rag.config_loader import load_rag_config
29
21
  from mindsdb.integrations.utilities.handler_utils import get_api_key
30
- from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import construct_model_from_args
22
+ from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import (
23
+ construct_model_from_args,
24
+ )
31
25
 
32
26
  from mindsdb.interfaces.agents.constants import DEFAULT_EMBEDDINGS_MODEL_CLASS
33
27
  from mindsdb.interfaces.agents.langchain_agent import create_chat_model, get_llm_provider
34
28
  from mindsdb.interfaces.database.projects import ProjectController
29
+ from mindsdb.interfaces.variables.variables_controller import variables_controller
35
30
  from mindsdb.interfaces.knowledge_base.preprocessing.models import PreprocessingConfig, Document
36
31
  from mindsdb.interfaces.knowledge_base.preprocessing.document_preprocessor import PreprocessorFactory
37
32
  from mindsdb.interfaces.model.functions import PredictorRecordNotFound
@@ -47,11 +42,7 @@ from mindsdb.integrations.utilities.rag.rerankers.base_reranker import BaseLLMRe
47
42
 
48
43
  logger = log.getLogger(__name__)
49
44
 
50
- KB_TO_VECTORDB_COLUMNS = {
51
- 'id': 'original_doc_id',
52
- 'chunk_id': 'id',
53
- 'chunk_content': 'content'
54
- }
45
+ KB_TO_VECTORDB_COLUMNS = {"id": "original_doc_id", "chunk_id": "id", "chunk_content": "content"}
55
46
 
56
47
 
57
48
  def get_model_params(model_params: dict, default_config_key: str):
@@ -71,23 +62,23 @@ def get_embedding_model_from_params(embedding_model_params: dict):
71
62
  Create embedding model from parameters.
72
63
  """
73
64
  params_copy = copy.deepcopy(embedding_model_params)
74
- provider = params_copy.pop('provider', None).lower()
75
- api_key = get_api_key(provider, params_copy, strict=False) or params_copy.get('api_key')
65
+ provider = params_copy.pop("provider", None).lower()
66
+ api_key = get_api_key(provider, params_copy, strict=False) or params_copy.get("api_key")
76
67
  # Underscores are replaced because the provider name ultimately gets mapped to a class name.
77
68
  # This is mostly to support Azure OpenAI (azure_openai); the mapped class name is 'AzureOpenAIEmbeddings'.
78
- params_copy['class'] = provider.replace('_', '')
79
- if provider == 'azure_openai':
69
+ params_copy["class"] = provider.replace("_", "")
70
+ if provider == "azure_openai":
80
71
  # Azure OpenAI expects the api_key to be passed as 'openai_api_key'.
81
- params_copy['openai_api_key'] = api_key
82
- params_copy['azure_endpoint'] = params_copy.pop('base_url')
83
- if 'chunk_size' not in params_copy:
84
- params_copy['chunk_size'] = 2048
85
- if 'api_version' in params_copy:
86
- params_copy['openai_api_version'] = params_copy['api_version']
72
+ params_copy["openai_api_key"] = api_key
73
+ params_copy["azure_endpoint"] = params_copy.pop("base_url")
74
+ if "chunk_size" not in params_copy:
75
+ params_copy["chunk_size"] = 2048
76
+ if "api_version" in params_copy:
77
+ params_copy["openai_api_version"] = params_copy["api_version"]
87
78
  else:
88
79
  params_copy[f"{provider}_api_key"] = api_key
89
- params_copy.pop('api_key', None)
90
- params_copy['model'] = params_copy.pop('model_name', None)
80
+ params_copy.pop("api_key", None)
81
+ params_copy["model"] = params_copy.pop("model_name", None)
91
82
 
92
83
  return construct_model_from_args(params_copy)
93
84
 
@@ -97,15 +88,26 @@ def get_reranking_model_from_params(reranking_model_params: dict):
97
88
  Create reranking model from parameters.
98
89
  """
99
90
  params_copy = copy.deepcopy(reranking_model_params)
100
- provider = params_copy.get('provider', "openai").lower()
91
+ provider = params_copy.get("provider", "openai").lower()
101
92
 
102
93
  if "api_key" not in params_copy:
103
94
  params_copy["api_key"] = get_api_key(provider, params_copy, strict=False)
104
- params_copy['model'] = params_copy.pop('model_name', None)
95
+ params_copy["model"] = params_copy.pop("model_name", None)
105
96
 
106
97
  return BaseLLMReranker(**params_copy)
107
98
 
108
99
 
100
+ def safe_pandas_is_datetime(value: str) -> bool:
101
+ """
102
+ Check if the value can be parsed as a datetime.
103
+ """
104
+ try:
105
+ result = pd.api.types.is_datetime64_any_dtype(value)
106
+ return result
107
+ except ValueError:
108
+ return False
109
+
110
+
109
111
  class KnowledgeBaseTable:
110
112
  """
111
113
  Knowledge base table interface
@@ -125,6 +127,11 @@ class KnowledgeBaseTable:
125
127
  logger.debug(f"Configuring preprocessing with config: {config}")
126
128
  self.document_preprocessor = None # Reset existing preprocessor
127
129
  if config is not None:
130
+ # Ensure content_column is set for JSON chunking if not already specified
131
+ if config.get("type") == "json_chunking" and config.get("json_chunking_config"):
132
+ if "content_column" not in config["json_chunking_config"]:
133
+ config["json_chunking_config"]["content_column"] = "content"
134
+
128
135
  preprocessing_config = PreprocessingConfig(**config)
129
136
  self.document_preprocessor = PreprocessorFactory.create_preprocessor(preprocessing_config)
130
137
  logger.debug(f"Created preprocessor of type: {type(self.document_preprocessor)}")
@@ -186,11 +193,13 @@ class KnowledgeBaseTable:
186
193
  query_text = item.value
187
194
 
188
195
  # replace content with embeddings
189
- conditions.append(FilterCondition(
190
- column=TableField.EMBEDDINGS.value,
191
- value=self._content_to_embeddings(item.value),
192
- op=FilterOperator.EQUAL,
193
- ))
196
+ conditions.append(
197
+ FilterCondition(
198
+ column=TableField.EMBEDDINGS.value,
199
+ value=self._content_to_embeddings(item.value),
200
+ op=FilterOperator.EQUAL,
201
+ )
202
+ )
194
203
  else:
195
204
  conditions.append(item)
196
205
 
@@ -232,7 +241,7 @@ class KnowledgeBaseTable:
232
241
  def add_relevance(self, df, query_text, relevance_threshold=None):
233
242
  relevance_column = TableField.RELEVANCE.value
234
243
 
235
- reranking_model_params = get_model_params(self._kb.params.get("reranking_model"), "default_llm")
244
+ reranking_model_params = get_model_params(self._kb.params.get("reranking_model"), "default_reranking_model")
236
245
  if reranking_model_params and query_text and len(df) > 0:
237
246
  # Use reranker for relevance score
238
247
  try:
@@ -244,7 +253,7 @@ class KnowledgeBaseTable:
244
253
 
245
254
  reranker = get_reranking_model_from_params(reranking_model_params)
246
255
  # Get documents to rerank
247
- documents = df['chunk_content'].tolist()
256
+ documents = df["chunk_content"].tolist()
248
257
  # Use the get_scores method with disable_events=True
249
258
  scores = reranker.get_scores(query_text, documents)
250
259
  # Add scores as the relevance column
@@ -257,21 +266,21 @@ class KnowledgeBaseTable:
257
266
  except Exception as e:
258
267
  logger.error(f"Error during reranking: {str(e)}")
259
268
  # Fallback to distance-based relevance
260
- if 'distance' in df.columns:
261
- df[relevance_column] = 1 / (1 + df['distance'])
269
+ if "distance" in df.columns:
270
+ df[relevance_column] = 1 / (1 + df["distance"])
262
271
  else:
263
272
  logger.info("No distance or reranker available")
264
273
 
265
- elif 'distance' in df.columns:
274
+ elif "distance" in df.columns:
266
275
  # Calculate relevance from distance
267
276
  logger.info("Calculating relevance from vector distance")
268
- df[relevance_column] = 1 / (1 + df['distance'])
277
+ df[relevance_column] = 1 / (1 + df["distance"])
269
278
  if relevance_threshold is not None:
270
279
  df = df[df[relevance_column] > relevance_threshold]
271
280
 
272
281
  else:
273
282
  df[relevance_column] = None
274
- df['distance'] = None
283
+ df["distance"] = None
275
284
  # Sort by relevance
276
285
  df = df.sort_values(by=relevance_column, ascending=False)
277
286
  return df
@@ -294,7 +303,7 @@ class KnowledgeBaseTable:
294
303
  columns = list(df.columns)
295
304
  # update id, get from metadata
296
305
  df[TableField.ID.value] = df[TableField.METADATA.value].apply(
297
- lambda m: None if m is None else m.get('original_doc_id')
306
+ lambda m: None if m is None else m.get("original_doc_id")
298
307
  )
299
308
 
300
309
  # id on first place
@@ -309,23 +318,14 @@ class KnowledgeBaseTable:
309
318
  if documents:
310
319
  self.insert_documents(documents)
311
320
 
312
- def insert_web_pages(
313
- self,
314
- urls: List[str],
315
- crawl_depth: int,
316
- limit: int,
317
- filters: List[str] = None
318
- ):
321
+ def insert_web_pages(self, urls: List[str], crawl_depth: int, limit: int, filters: List[str] = None):
319
322
  """Process and insert web pages"""
320
323
  if not self.document_loader:
321
324
  raise ValueError("Document loader not configured")
322
325
 
323
- documents = list(self.document_loader.load_web_pages(
324
- urls,
325
- limit=limit,
326
- crawl_depth=crawl_depth,
327
- filters=filters
328
- ))
326
+ documents = list(
327
+ self.document_loader.load_web_pages(urls, limit=limit, crawl_depth=crawl_depth, filters=filters)
328
+ )
329
329
  if documents:
330
330
  self.insert_documents(documents)
331
331
 
@@ -343,11 +343,9 @@ class KnowledgeBaseTable:
343
343
  if not rows:
344
344
  return
345
345
 
346
- documents = [Document(
347
- content=row.get('content', ''),
348
- id=row.get('id'),
349
- metadata=row.get('metadata', {})
350
- ) for row in rows]
346
+ documents = [
347
+ Document(content=row.get("content", ""), id=row.get("id"), metadata=row.get("metadata", {})) for row in rows
348
+ ]
351
349
 
352
350
  self.insert_documents(documents)
353
351
 
@@ -368,7 +366,7 @@ class KnowledgeBaseTable:
368
366
  conditions = db_handler.extract_conditions(query.where)
369
367
  doc_id = None
370
368
  for condition in conditions:
371
- if condition.column == 'chunk_id' and condition.op == FilterOperator.EQUAL:
369
+ if condition.column == "chunk_id" and condition.op == FilterOperator.EQUAL:
372
370
  doc_id = condition.value
373
371
 
374
372
  if cont_col in query.update_columns:
@@ -379,7 +377,7 @@ class KnowledgeBaseTable:
379
377
  doc = Document(
380
378
  id=doc_id,
381
379
  content=content.value,
382
- metadata={} # Empty metadata for content-only updates
380
+ metadata={}, # Empty metadata for content-only updates
383
381
  )
384
382
  processed_chunks = self.document_preprocessor.process_documents([doc])
385
383
  if processed_chunks:
@@ -418,7 +416,7 @@ class KnowledgeBaseTable:
418
416
  query: str,
419
417
  keywords: List[str] = None,
420
418
  metadata: Dict[str, str] = None,
421
- distance_function=DistanceFunction.COSINE_DISTANCE
419
+ distance_function=DistanceFunction.COSINE_DISTANCE,
422
420
  ) -> pd.DataFrame:
423
421
  query_df = pd.DataFrame.from_records([{TableField.CONTENT.value: query}])
424
422
  embeddings_df = self._df_to_embeddings(query_df)
@@ -427,14 +425,14 @@ class KnowledgeBaseTable:
427
425
  embeddings = embeddings_df.iloc[0][TableField.EMBEDDINGS.value]
428
426
  keywords_query = None
429
427
  if keywords is not None:
430
- keywords_query = ' '.join(keywords)
428
+ keywords_query = " ".join(keywords)
431
429
  db_handler = self.get_vector_db()
432
430
  return db_handler.hybrid_search(
433
431
  self._kb.vector_database_table,
434
432
  embeddings,
435
433
  query=keywords_query,
436
434
  metadata=metadata,
437
- distance_function=distance_function
435
+ distance_function=distance_function,
438
436
  )
439
437
 
440
438
  def clear(self):
@@ -467,7 +465,7 @@ class KnowledgeBaseTable:
467
465
 
468
466
  # First adapt column names to identify content and metadata columns
469
467
  adapted_df = self._adapt_column_names(df)
470
- content_columns = self._kb.params.get('content_columns', [TableField.CONTENT.value])
468
+ content_columns = self._kb.params.get("content_columns", [TableField.CONTENT.value])
471
469
 
472
470
  # Convert DataFrame rows to documents, creating separate documents for each content column
473
471
  raw_documents = []
@@ -485,15 +483,11 @@ class KnowledgeBaseTable:
485
483
 
486
484
  metadata = {
487
485
  **base_metadata,
488
- 'original_row_index': str(idx), # provide link to original row index
489
- 'content_column': col,
486
+ "original_row_index": str(idx), # provide link to original row index
487
+ "content_column": col,
490
488
  }
491
489
 
492
- raw_documents.append(Document(
493
- content=content_str,
494
- id=doc_id,
495
- metadata=metadata
496
- ))
490
+ raw_documents.append(Document(content=content_str, id=doc_id, metadata=metadata))
497
491
 
498
492
  # Apply preprocessing to all documents if preprocessor exists
499
493
  if self.document_preprocessor:
@@ -502,11 +496,16 @@ class KnowledgeBaseTable:
502
496
  processed_chunks = raw_documents # Use raw documents if no preprocessing
503
497
 
504
498
  # Convert processed chunks back to DataFrame with standard structure
505
- df = pd.DataFrame([{
506
- TableField.CONTENT.value: chunk.content,
507
- TableField.ID.value: chunk.id,
508
- TableField.METADATA.value: chunk.metadata
509
- } for chunk in processed_chunks])
499
+ df = pd.DataFrame(
500
+ [
501
+ {
502
+ TableField.CONTENT.value: chunk.content,
503
+ TableField.ID.value: chunk.id,
504
+ TableField.METADATA.value: chunk.metadata,
505
+ }
506
+ for chunk in processed_chunks
507
+ ]
508
+ )
510
509
 
511
510
  if df.empty:
512
511
  logger.warning("No valid content found in any content columns")
@@ -517,17 +516,17 @@ class KnowledgeBaseTable:
517
516
  df = pd.concat([df, df_emb], axis=1)
518
517
  db_handler = self.get_vector_db()
519
518
 
520
- if params is not None and params.get('kb_no_upsert', False):
519
+ if params is not None and params.get("kb_no_upsert", False):
521
520
  # speed up inserting by disable checking existing records
522
521
  db_handler.insert(self._kb.vector_database_table, df)
523
522
  else:
524
523
  db_handler.do_upsert(self._kb.vector_database_table, df)
525
524
 
526
525
  def _adapt_column_names(self, df: pd.DataFrame) -> pd.DataFrame:
527
- '''
526
+ """
528
527
  Convert input columns for vector db input
529
528
  - id, content and metadata
530
- '''
529
+ """
531
530
  # Debug incoming data
532
531
  logger.debug(f"Input DataFrame columns: {df.columns}")
533
532
  logger.debug(f"Input DataFrame first row: {df.iloc[0].to_dict()}")
@@ -536,7 +535,7 @@ class KnowledgeBaseTable:
536
535
  columns = list(df.columns)
537
536
 
538
537
  # -- prepare id --
539
- id_column = params.get('id_column')
538
+ id_column = params.get("id_column")
540
539
  if id_column is not None and id_column not in columns:
541
540
  id_column = None
542
541
 
@@ -546,8 +545,8 @@ class KnowledgeBaseTable:
546
545
  # Also check for case-insensitive 'id' column
547
546
  if id_column is None:
548
547
  column_map = {col.lower(): col for col in columns}
549
- if 'id' in column_map:
550
- id_column = column_map['id']
548
+ if "id" in column_map:
549
+ id_column = column_map["id"]
551
550
 
552
551
  if id_column is not None:
553
552
  columns.remove(id_column)
@@ -562,8 +561,8 @@ class KnowledgeBaseTable:
562
561
  logger.debug(f"Added IDs: {df_out[TableField.ID.value].tolist()}")
563
562
 
564
563
  # -- prepare content and metadata --
565
- content_columns = params.get('content_columns', [TableField.CONTENT.value])
566
- metadata_columns = params.get('metadata_columns')
564
+ content_columns = params.get("content_columns", [TableField.CONTENT.value])
565
+ metadata_columns = params.get("metadata_columns")
567
566
 
568
567
  logger.debug(f"Processing with: content_columns={content_columns}, metadata_columns={metadata_columns}")
569
568
 
@@ -571,25 +570,19 @@ class KnowledgeBaseTable:
571
570
  if content_columns:
572
571
  # Ensure content columns are case-insensitive
573
572
  column_map = {col.lower(): col for col in columns}
574
- content_columns = [
575
- column_map.get(col.lower(), col)
576
- for col in content_columns
577
- ]
573
+ content_columns = [column_map.get(col.lower(), col) for col in content_columns]
578
574
  logger.debug(f"Mapped content columns: {content_columns}")
579
575
 
580
576
  if metadata_columns:
581
577
  # Ensure metadata columns are case-insensitive
582
578
  column_map = {col.lower(): col for col in columns}
583
- metadata_columns = [
584
- column_map.get(col.lower(), col)
585
- for col in metadata_columns
586
- ]
579
+ metadata_columns = [column_map.get(col.lower(), col) for col in metadata_columns]
587
580
  logger.debug(f"Mapped metadata columns: {metadata_columns}")
588
581
 
589
582
  if content_columns is not None:
590
583
  content_columns = list(set(content_columns).intersection(columns))
591
584
  if len(content_columns) == 0:
592
- raise ValueError(f'Content columns {params.get("content_columns")} not found in dataset: {columns}')
585
+ raise ValueError(f"Content columns {params.get('content_columns')} not found in dataset: {columns}")
593
586
 
594
587
  if metadata_columns is not None:
595
588
  metadata_columns = list(set(metadata_columns).intersection(columns))
@@ -603,12 +596,13 @@ class KnowledgeBaseTable:
603
596
 
604
597
  # Add metadata
605
598
  if metadata_columns and len(metadata_columns) > 0:
599
+
606
600
  def convert_row_to_metadata(row):
607
601
  metadata = {}
608
602
  for col in metadata_columns:
609
603
  value = row[col]
610
604
  # Convert numpy/pandas types to Python native types
611
- if pd.api.types.is_datetime64_any_dtype(value) or isinstance(value, pd.Timestamp):
605
+ if safe_pandas_is_datetime(value) or isinstance(value, pd.Timestamp):
612
606
  value = str(value)
613
607
  elif pd.api.types.is_integer_dtype(value):
614
608
  value = int(value)
@@ -648,7 +642,7 @@ class KnowledgeBaseTable:
648
642
  if self._vector_db is None:
649
643
  database = db.Integration.query.get(self._kb.vector_database_id)
650
644
  if database is None:
651
- raise ValueError('Vector database not found. Is it deleted?')
645
+ raise ValueError("Vector database not found. Is it deleted?")
652
646
  database_name = database.name
653
647
  self._vector_db = self.session.integration_controller.get_data_handler(database_name)
654
648
  return self._vector_db
@@ -673,6 +667,15 @@ class KnowledgeBaseTable:
673
667
 
674
668
  model_id = self._kb.embedding_model_id
675
669
 
670
+ if model_id is None:
671
+ # call litellm handler
672
+ messages = list(df[TableField.CONTENT.value])
673
+ embedding_params = copy.deepcopy(config.get("default_embedding_model", {}))
674
+ embedding_params.update(self._kb.params["embedding_model"])
675
+ results = self.call_litellm_embedding(self.session, embedding_params, messages)
676
+ results = [[val] for val in results]
677
+ return pd.DataFrame(results, columns=[TableField.EMBEDDINGS.value])
678
+
676
679
  # get the input columns
677
680
  model_rec = db.session.query(db.Predictor).filter_by(id=model_id).first()
678
681
 
@@ -681,19 +684,15 @@ class KnowledgeBaseTable:
681
684
 
682
685
  project_datanode = self.session.datahub.get(model_project.name)
683
686
 
684
- model_using = model_rec.learn_args.get('using', {})
685
- input_col = model_using.get('question_column')
687
+ model_using = model_rec.learn_args.get("using", {})
688
+ input_col = model_using.get("question_column")
686
689
  if input_col is None:
687
- input_col = model_using.get('input_column')
690
+ input_col = model_using.get("input_column")
688
691
 
689
692
  if input_col is not None and input_col != TableField.CONTENT.value:
690
693
  df = df.rename(columns={TableField.CONTENT.value: input_col})
691
694
 
692
- df_out = project_datanode.predict(
693
- model_name=model_rec.name,
694
- df=df,
695
- params=self.model_params
696
- )
695
+ df_out = project_datanode.predict(model_name=model_rec.name, df=df, params=self.model_params)
697
696
 
698
697
  target = model_rec.to_predict[0]
699
698
  if target != TableField.EMBEDDINGS.value:
@@ -714,6 +713,23 @@ class KnowledgeBaseTable:
714
713
  res = self._df_to_embeddings(df)
715
714
  return res[TableField.EMBEDDINGS.value][0]
716
715
 
716
+ @staticmethod
717
+ def call_litellm_embedding(session, model_params, messages):
718
+ args = copy.deepcopy(model_params)
719
+
720
+ llm_model = args.pop("model_name")
721
+ engine = args.pop("provider")
722
+
723
+ llm_model = f"{engine}/{llm_model}"
724
+
725
+ if "base_url" in args:
726
+ args["api_base"] = args.pop("base_url")
727
+
728
+ module = session.integration_controller.get_handler_module("litellm")
729
+ if module is None or module.Handler is None:
730
+ raise ValueError(f'Unable to use "{engine}" provider. Litellm handler is not installed')
731
+ return module.Handler.embeddings(llm_model, messages, args)
732
+
717
733
  def build_rag_pipeline(self, retrieval_config: dict):
718
734
  """
719
735
  Builds a RAG pipeline with returned sources
@@ -729,10 +745,10 @@ class KnowledgeBaseTable:
729
745
  """
730
746
  # Get embedding model from knowledge base
731
747
  embeddings_model = None
732
- embedding_model_params = get_model_params(self._kb.params.get('embedding_model', {}), 'default_embedding_model')
748
+ embedding_model_params = get_model_params(self._kb.params.get("embedding_model", {}), "default_embedding_model")
733
749
  if self._kb.embedding_model:
734
750
  # Extract embedding model args from knowledge base table
735
- embedding_args = self._kb.embedding_model.learn_args.get('using', {})
751
+ embedding_args = self._kb.embedding_model.learn_args.get("using", {})
736
752
  # Construct the embedding model directly
737
753
  embeddings_model = construct_model_from_args(embedding_args)
738
754
  logger.debug(f"Using knowledge base embedding model with args: {embedding_args}")
@@ -744,21 +760,17 @@ class KnowledgeBaseTable:
744
760
  logger.debug("Using default embedding model as knowledge base has no embedding model")
745
761
 
746
762
  # Update retrieval config with knowledge base parameters
747
- kb_params = {
748
- 'vector_store_config': {
749
- 'kb_table': self
750
- }
751
- }
763
+ kb_params = {"vector_store_config": {"kb_table": self}}
752
764
 
753
765
  # Load and validate config
754
766
  try:
755
767
  rag_config = load_rag_config(retrieval_config, kb_params, embeddings_model)
756
768
 
757
769
  # Build LLM if specified
758
- if 'llm_model_name' in rag_config:
770
+ if "llm_model_name" in rag_config:
759
771
  llm_args = {"model_name": rag_config.llm_model_name}
760
772
  if not rag_config.llm_provider:
761
- llm_args['provider'] = get_llm_provider(llm_args)
773
+ llm_args["provider"] = get_llm_provider(llm_args)
762
774
  else:
763
775
  llm_args["provider"] = rag_config.llm_provider
764
776
  rag_config.llm = create_chat_model(llm_args)
@@ -779,6 +791,7 @@ class KnowledgeBaseTable:
779
791
  if isinstance(base_metadata, str):
780
792
  try:
781
793
  import ast
794
+
782
795
  return ast.literal_eval(base_metadata)
783
796
  except (SyntaxError, ValueError):
784
797
  logger.warning(f"Could not parse metadata: {base_metadata}. Using empty dict.")
@@ -788,6 +801,7 @@ class KnowledgeBaseTable:
788
801
  def _generate_document_id(self, content: str, content_column: str, provided_id: str = None) -> str:
789
802
  """Generate a deterministic document ID using the utility function."""
790
803
  from mindsdb.interfaces.knowledge_base.utils import generate_document_id
804
+
791
805
  return generate_document_id(content=content, provided_id=provided_id)
792
806
 
793
807
  def _convert_metadata_value(self, value):
@@ -820,6 +834,15 @@ class KnowledgeBaseTable:
820
834
  # Convert everything else to string
821
835
  return str(value)
822
836
 
837
+ def create_index(self):
838
+ """
839
+ Create an index on the knowledge base table
840
+ :param index_name: name of the index
841
+ :param params: parameters for the index
842
+ """
843
+ db_handler = self.get_vector_db()
844
+ db_handler.create_index(self._kb.vector_database_table)
845
+
823
846
 
824
847
  class KnowledgeBaseController:
825
848
  """
@@ -831,14 +854,14 @@ class KnowledgeBaseController:
831
854
  self.session = session
832
855
 
833
856
  def add(
834
- self,
835
- name: str,
836
- project_name: str,
837
- embedding_model: Identifier,
838
- storage: Identifier,
839
- params: dict,
840
- preprocessing_config: Optional[dict] = None,
841
- if_not_exists: bool = False
857
+ self,
858
+ name: str,
859
+ project_name: str,
860
+ storage: Identifier,
861
+ params: dict,
862
+ preprocessing_config: Optional[dict] = None,
863
+ if_not_exists: bool = False,
864
+ # embedding_model: Identifier = None, # Legacy: Allow MindsDB models to be passed as embedding_model.
842
865
  ) -> db.KnowledgeBase:
843
866
  """
844
867
  Add a new knowledge base to the database
@@ -846,15 +869,18 @@ class KnowledgeBaseController:
846
869
  :param is_sparse: Whether to use sparse vectors for embeddings
847
870
  :param vector_size: Optional size specification for vectors, required when is_sparse=True
848
871
  """
872
+ # fill variables
873
+ params = variables_controller.fill_parameters(params)
874
+
849
875
  # Validate preprocessing config first if provided
850
876
  if preprocessing_config is not None:
851
877
  PreprocessingConfig(**preprocessing_config) # Validate before storing
852
878
  params = params or {}
853
- params['preprocessing'] = preprocessing_config
879
+ params["preprocessing"] = preprocessing_config
854
880
 
855
881
  # Check if vector_size is provided when using sparse vectors
856
- is_sparse = params.get('is_sparse')
857
- vector_size = params.get('vector_size')
882
+ is_sparse = params.get("is_sparse")
883
+ vector_size = params.get("vector_size")
858
884
  if is_sparse and vector_size is None:
859
885
  raise ValueError("vector_size is required when is_sparse=True")
860
886
 
@@ -871,41 +897,45 @@ class KnowledgeBaseController:
871
897
  return kb
872
898
  raise EntityExistsError("Knowledge base already exists", name)
873
899
 
874
- embedding_params = copy.deepcopy(config.get('default_embedding_model', {}))
875
-
876
- model_name = None
877
- model_project = project
878
- if embedding_model:
879
- model_name = embedding_model.parts[-1]
880
- if len(embedding_model.parts) > 1:
881
- model_project = self.session.database_controller.get_project(embedding_model.parts[-2])
882
-
883
- elif 'embedding_model' in params:
884
- if isinstance(params['embedding_model'], str):
885
- # it is model name
886
- model_name = params['embedding_model']
887
- else:
888
- # it is params for model
889
- embedding_params.update(params['embedding_model'])
890
-
891
- if model_name is None:
892
- model_name = self._create_embedding_model(
893
- project.name,
894
- params=embedding_params,
895
- kb_name=name,
896
- )
897
- params['created_embedding_model'] = model_name
900
+ embedding_params = copy.deepcopy(config.get("default_embedding_model", {}))
901
+
902
+ # Legacy
903
+ # model_name = None
904
+ # model_project = project
905
+ # if embedding_model:
906
+ # model_name = embedding_model.parts[-1]
907
+ # if len(embedding_model.parts) > 1:
908
+ # model_project = self.session.database_controller.get_project(embedding_model.parts[-2])
909
+
910
+ # elif "embedding_model" in params:
911
+ # if isinstance(params["embedding_model"], str):
912
+ # # it is model name
913
+ # model_name = params["embedding_model"]
914
+ # else:
915
+ # # it is params for model
916
+ # embedding_params.update(params["embedding_model"])
917
+
918
+ if "embedding_model" in params:
919
+ if not isinstance(params["embedding_model"], dict):
920
+ raise ValueError("embedding_model should be JSON object with model parameters.")
921
+ embedding_params.update(params["embedding_model"])
922
+
923
+ # if model_name is None: # Legacy
924
+ model_name = self._create_embedding_model(
925
+ project.name,
926
+ params=embedding_params,
927
+ kb_name=name,
928
+ )
929
+ if model_name is not None:
930
+ params["created_embedding_model"] = model_name
898
931
 
899
932
  embedding_model_id = None
900
933
  if model_name is not None:
901
- model = self.session.model_controller.get_model(
902
- name=model_name,
903
- project_name=model_project.name
904
- )
905
- model_record = db.Predictor.query.get(model['id'])
934
+ model = self.session.model_controller.get_model(name=model_name, project_name=project.name)
935
+ model_record = db.Predictor.query.get(model["id"])
906
936
  embedding_model_id = model_record.id
907
937
 
908
- reranking_model_params = get_model_params(params.get('reranking_model', {}), 'default_llm')
938
+ reranking_model_params = get_model_params(params.get("reranking_model", {}), "default_reranking_model")
909
939
  if reranking_model_params:
910
940
  # Get reranking model from params.
911
941
  # This is called here to check validaity of the parameters.
@@ -913,17 +943,17 @@ class KnowledgeBaseController:
913
943
 
914
944
  # search for the vector database table
915
945
  if storage is None:
916
- cloud_pg_vector = os.environ.get('KB_PGVECTOR_URL')
946
+ cloud_pg_vector = os.environ.get("KB_PGVECTOR_URL")
917
947
  if cloud_pg_vector:
918
948
  vector_table_name = name
919
949
  # Add sparse vector support for pgvector
920
950
  vector_db_params = {}
921
951
  # Check both explicit parameter and model configuration
922
- is_sparse = is_sparse or model_record.learn_args.get('using', {}).get('sparse')
952
+ is_sparse = is_sparse or model_record.learn_args.get("using", {}).get("sparse")
923
953
  if is_sparse:
924
- vector_db_params['is_sparse'] = True
954
+ vector_db_params["is_sparse"] = True
925
955
  if vector_size is not None:
926
- vector_db_params['vector_size'] = vector_size
956
+ vector_db_params["vector_size"] = vector_size
927
957
  vector_db_name = self._create_persistent_pgvector(vector_db_params)
928
958
 
929
959
  else:
@@ -931,26 +961,22 @@ class KnowledgeBaseController:
931
961
  vector_table_name = "default_collection"
932
962
  vector_db_name = self._create_persistent_chroma(name)
933
963
  # memorize to remove it later
934
- params['default_vector_storage'] = vector_db_name
964
+ params["default_vector_storage"] = vector_db_name
935
965
  elif len(storage.parts) != 2:
936
- raise ValueError('Storage param has to be vector db with table')
966
+ raise ValueError("Storage param has to be vector db with table")
937
967
  else:
938
968
  vector_db_name, vector_table_name = storage.parts
939
969
 
940
970
  # create table in vectordb before creating KB
941
- self.session.datahub.get(vector_db_name).integration_handler.create_table(
942
- vector_table_name
943
- )
944
- vector_database_id = self.session.integration_controller.get(vector_db_name)['id']
971
+ self.session.datahub.get(vector_db_name).integration_handler.create_table(vector_table_name)
972
+ vector_database_id = self.session.integration_controller.get(vector_db_name)["id"]
945
973
 
946
974
  # Store sparse vector settings in params if specified
947
975
  if is_sparse:
948
976
  params = params or {}
949
- params['vector_config'] = {
950
- 'is_sparse': is_sparse
951
- }
977
+ params["vector_config"] = {"is_sparse": is_sparse}
952
978
  if vector_size is not None:
953
- params['vector_config']['vector_size'] = vector_size
979
+ params["vector_config"]["vector_size"] = vector_size
954
980
 
955
981
  kb = db.KnowledgeBase(
956
982
  name=name,
@@ -972,7 +998,7 @@ class KnowledgeBaseController:
972
998
  if self.session.integration_controller.get(vector_store_name):
973
999
  return vector_store_name
974
1000
 
975
- self.session.integration_controller.add(vector_store_name, 'pgvector', params or {})
1001
+ self.session.integration_controller.add(vector_store_name, "pgvector", params or {})
976
1002
  return vector_store_name
977
1003
 
978
1004
  def _create_persistent_chroma(self, kb_name, engine="chromadb"):
@@ -990,7 +1016,7 @@ class KnowledgeBaseController:
990
1016
  self.session.integration_controller.add(vector_store_name, engine, connection_args)
991
1017
  return vector_store_name
992
1018
 
993
- def _create_embedding_model(self, project_name, engine="openai", params: dict = None, kb_name=''):
1019
+ def _create_embedding_model(self, project_name, engine="openai", params: dict = None, kb_name=""):
994
1020
  """create a default embedding model for knowledge base, if not specified"""
995
1021
  model_name = f"kb_embedding_{kb_name}"
996
1022
 
@@ -1002,42 +1028,47 @@ class KnowledgeBaseController:
1002
1028
  except PredictorRecordNotFound:
1003
1029
  pass
1004
1030
 
1005
- if 'provider' in params:
1006
- engine = params.pop('provider').lower()
1031
+ if params.get("provider", None) not in ("openai", "azure"):
1032
+ # try use litellm
1033
+ KnowledgeBaseTable.call_litellm_embedding(self.session, params, ["test"])
1034
+ return
1007
1035
 
1008
- api_key = get_api_key(engine, params, strict=False) or params.pop('api_key')
1036
+ if "provider" in params:
1037
+ engine = params.pop("provider").lower()
1009
1038
 
1010
- if engine == 'azure_openai':
1011
- engine = 'openai'
1012
- params['provider'] = 'azure'
1039
+ api_key = get_api_key(engine, params, strict=False) or params.pop("api_key")
1013
1040
 
1014
- if engine == 'openai':
1015
- if 'question_column' not in params:
1016
- params['question_column'] = 'content'
1041
+ if engine == "azure_openai":
1042
+ engine = "openai"
1043
+ params["provider"] = "azure"
1044
+
1045
+ if engine == "openai":
1046
+ if "question_column" not in params:
1047
+ params["question_column"] = "content"
1017
1048
  if api_key:
1018
1049
  params[f"{engine}_api_key"] = api_key
1019
- if 'base_url' in params:
1020
- params['api_base'] = params.pop('base_url')
1050
+ if "api_key" in params:
1051
+ params.pop("api_key")
1052
+ if "base_url" in params:
1053
+ params["api_base"] = params.pop("base_url")
1021
1054
 
1022
- params['engine'] = engine
1023
- params['join_learn_process'] = True
1024
- params['mode'] = 'embedding'
1055
+ params["engine"] = engine
1056
+ params["join_learn_process"] = True
1057
+ params["mode"] = "embedding"
1025
1058
 
1026
1059
  # Include API key if provided.
1027
1060
  statement = CreatePredictor(
1028
1061
  name=Identifier(parts=[project_name, model_name]),
1029
1062
  using=params,
1030
- targets=[
1031
- Identifier(parts=[TableField.EMBEDDINGS.value])
1032
- ]
1063
+ targets=[Identifier(parts=[TableField.EMBEDDINGS.value])],
1033
1064
  )
1034
1065
 
1035
1066
  command_executor = ExecuteCommands(self.session)
1036
1067
  resp = command_executor.answer_create_predictor(statement, project_name)
1037
1068
  # check model status
1038
1069
  record = resp.data.records[0]
1039
- if record['STATUS'] == 'error':
1040
- raise ValueError('Embedding model error:' + record['ERROR'])
1070
+ if record["STATUS"] == "error":
1071
+ raise ValueError("Embedding model error:" + record["ERROR"])
1041
1072
  return model_name
1042
1073
 
1043
1074
  def delete(self, name: str, project_name: int, if_exists: bool = False) -> None:
@@ -1064,16 +1095,16 @@ class KnowledgeBaseController:
1064
1095
  db.session.commit()
1065
1096
 
1066
1097
  # drop objects if they were created automatically
1067
- if 'default_vector_storage' in kb.params:
1098
+ if "default_vector_storage" in kb.params:
1068
1099
  try:
1069
- handler = self.session.datahub.get(kb.params['default_vector_storage']).integration_handler
1100
+ handler = self.session.datahub.get(kb.params["default_vector_storage"]).integration_handler
1070
1101
  handler.drop_table(kb.vector_database_table)
1071
- self.session.integration_controller.delete(kb.params['default_vector_storage'])
1102
+ self.session.integration_controller.delete(kb.params["default_vector_storage"])
1072
1103
  except EntityNotExistsError:
1073
1104
  pass
1074
- if 'created_embedding_model' in kb.params:
1105
+ if "created_embedding_model" in kb.params:
1075
1106
  try:
1076
- self.session.model_controller.delete_model(kb.params['created_embedding_model'], project_name)
1107
+ self.session.model_controller.delete_model(kb.params["created_embedding_model"], project_name)
1077
1108
  except EntityNotExistsError:
1078
1109
  pass
1079
1110
 
@@ -1104,11 +1135,11 @@ class KnowledgeBaseController:
1104
1135
  if kb is not None:
1105
1136
  table = KnowledgeBaseTable(kb, self.session)
1106
1137
  if params:
1107
- table.model_params = params.get('model')
1138
+ table.model_params = params.get("model")
1108
1139
 
1109
1140
  # Always configure preprocessing - either from params or default
1110
- if kb.params and 'preprocessing' in kb.params:
1111
- table.configure_preprocessing(kb.params['preprocessing'])
1141
+ if kb.params and "preprocessing" in kb.params:
1142
+ table.configure_preprocessing(kb.params["preprocessing"])
1112
1143
  else:
1113
1144
  table.configure_preprocessing(None) # This ensures default preprocessor is created
1114
1145
 
@@ -1124,35 +1155,38 @@ class KnowledgeBaseController:
1124
1155
  if project_name is not None:
1125
1156
  projects = [p for p in projects if p.name == project_name]
1126
1157
 
1127
- query = (
1128
- db.session.query(db.KnowledgeBase)
1129
- .filter(db.KnowledgeBase.project_id.in_(list([p.id for p in projects])))
1158
+ query = db.session.query(db.KnowledgeBase).filter(
1159
+ db.KnowledgeBase.project_id.in_(list([p.id for p in projects]))
1130
1160
  )
1131
1161
 
1132
1162
  data = []
1133
- project_names = {
1134
- i.id: i.name
1135
- for i in project_controller.get_list()
1136
- }
1163
+ project_names = {i.id: i.name for i in project_controller.get_list()}
1137
1164
 
1138
1165
  for record in query:
1139
1166
  vector_database = record.vector_database
1140
1167
  embedding_model = record.embedding_model
1141
1168
 
1142
- data.append({
1143
- 'id': record.id,
1144
- 'name': record.name,
1145
- 'project_id': record.project_id,
1146
- 'project_name': project_names[record.project_id],
1147
- 'embedding_model': embedding_model.name if embedding_model is not None else None,
1148
- 'vector_database': None if vector_database is None else vector_database.name,
1149
- 'vector_database_table': record.vector_database_table,
1150
- 'query_id': record.query_id,
1151
- 'params': record.params
1152
- })
1169
+ data.append(
1170
+ {
1171
+ "id": record.id,
1172
+ "name": record.name,
1173
+ "project_id": record.project_id,
1174
+ "project_name": project_names[record.project_id],
1175
+ "embedding_model": embedding_model.name if embedding_model is not None else None,
1176
+ "vector_database": None if vector_database is None else vector_database.name,
1177
+ "vector_database_table": record.vector_database_table,
1178
+ "query_id": record.query_id,
1179
+ "params": record.params,
1180
+ }
1181
+ )
1153
1182
 
1154
1183
  return data
1155
1184
 
1185
+ def create_index(self, table_name, project_name):
1186
+ project_id = self.session.database_controller.get_project(project_name).id
1187
+ kb_table = self.get_table(table_name, project_id)
1188
+ kb_table.create_index()
1189
+
1156
1190
  def update(self, name: str, project_id: int, **kwargs) -> db.KnowledgeBase:
1157
1191
  """
1158
1192
  Update a knowledge base record