MindsDB 25.4.5.0__py3-none-any.whl → 25.5.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (350) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +215 -185
  3. mindsdb/api/a2a/__init__.py +0 -0
  4. mindsdb/api/a2a/__main__.py +114 -0
  5. mindsdb/api/a2a/a2a_client.py +439 -0
  6. mindsdb/api/a2a/agent.py +308 -0
  7. mindsdb/api/a2a/common/__init__.py +0 -0
  8. mindsdb/api/a2a/common/client/__init__.py +4 -0
  9. mindsdb/api/a2a/common/client/card_resolver.py +21 -0
  10. mindsdb/api/a2a/common/client/client.py +86 -0
  11. mindsdb/api/a2a/common/server/__init__.py +4 -0
  12. mindsdb/api/a2a/common/server/server.py +164 -0
  13. mindsdb/api/a2a/common/server/task_manager.py +287 -0
  14. mindsdb/api/a2a/common/server/utils.py +28 -0
  15. mindsdb/api/a2a/common/types.py +365 -0
  16. mindsdb/api/a2a/constants.py +9 -0
  17. mindsdb/api/a2a/run_a2a.py +129 -0
  18. mindsdb/api/a2a/task_manager.py +594 -0
  19. mindsdb/api/executor/command_executor.py +49 -28
  20. mindsdb/api/executor/datahub/classes/response.py +5 -2
  21. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +8 -0
  22. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +39 -72
  23. mindsdb/api/executor/datahub/datanodes/system_tables.py +10 -13
  24. mindsdb/api/executor/planner/query_planner.py +14 -2
  25. mindsdb/api/executor/sql_query/result_set.py +185 -52
  26. mindsdb/api/executor/sql_query/sql_query.py +1 -1
  27. mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +11 -13
  28. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +8 -10
  29. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +5 -44
  30. mindsdb/api/executor/sql_query/steps/insert_step.py +24 -15
  31. mindsdb/api/executor/sql_query/steps/join_step.py +1 -1
  32. mindsdb/api/executor/sql_query/steps/project_step.py +1 -1
  33. mindsdb/api/executor/sql_query/steps/sql_steps.py +1 -1
  34. mindsdb/api/executor/sql_query/steps/subselect_step.py +4 -8
  35. mindsdb/api/executor/sql_query/steps/union_step.py +1 -3
  36. mindsdb/api/http/initialize.py +118 -85
  37. mindsdb/api/http/namespaces/analysis.py +17 -4
  38. mindsdb/api/http/namespaces/file.py +8 -2
  39. mindsdb/api/http/namespaces/sql.py +13 -27
  40. mindsdb/api/http/namespaces/tree.py +1 -1
  41. mindsdb/api/http/start.py +7 -2
  42. mindsdb/api/mcp/start.py +42 -5
  43. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packet.py +0 -1
  44. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +52 -19
  45. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +8 -10
  46. mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +54 -38
  47. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +86 -123
  48. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +351 -0
  49. mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -4
  50. mindsdb/api/postgres/postgres_proxy/executor/executor.py +1 -1
  51. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_formats.py +2 -2
  52. mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +5 -6
  53. mindsdb/integrations/handlers/altibase_handler/altibase_handler.py +26 -27
  54. mindsdb/integrations/handlers/altibase_handler/connection_args.py +13 -13
  55. mindsdb/integrations/handlers/altibase_handler/tests/test_altibase_handler.py +8 -8
  56. mindsdb/integrations/handlers/altibase_handler/tests/test_altibase_handler_dsn.py +13 -13
  57. mindsdb/integrations/handlers/anthropic_handler/__init__.py +2 -2
  58. mindsdb/integrations/handlers/anthropic_handler/anthropic_handler.py +1 -3
  59. mindsdb/integrations/handlers/aurora_handler/aurora_handler.py +1 -0
  60. mindsdb/integrations/handlers/autosklearn_handler/autosklearn_handler.py +1 -1
  61. mindsdb/integrations/handlers/autosklearn_handler/config.py +0 -1
  62. mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +1 -1
  63. mindsdb/integrations/handlers/bigquery_handler/requirements.txt +1 -0
  64. mindsdb/integrations/handlers/bigquery_handler/tests/test_bigquery_handler.py +1 -1
  65. mindsdb/integrations/handlers/binance_handler/binance_handler.py +1 -0
  66. mindsdb/integrations/handlers/binance_handler/binance_tables.py +3 -4
  67. mindsdb/integrations/handlers/byom_handler/__init__.py +0 -1
  68. mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -0
  69. mindsdb/integrations/handlers/ckan_handler/ckan_handler.py +3 -0
  70. mindsdb/integrations/handlers/clickhouse_handler/__init__.py +1 -1
  71. mindsdb/integrations/handlers/cloud_spanner_handler/tests/test_cloud_spanner_handler.py +0 -2
  72. mindsdb/integrations/handlers/cloud_sql_handler/cloud_sql_handler.py +0 -1
  73. mindsdb/integrations/handlers/cohere_handler/__init__.py +1 -1
  74. mindsdb/integrations/handlers/cohere_handler/cohere_handler.py +11 -13
  75. mindsdb/integrations/handlers/confluence_handler/confluence_tables.py +6 -0
  76. mindsdb/integrations/handlers/databend_handler/connection_args.py +1 -1
  77. mindsdb/integrations/handlers/databend_handler/databend_handler.py +4 -4
  78. mindsdb/integrations/handlers/databend_handler/tests/__init__.py +0 -1
  79. mindsdb/integrations/handlers/databend_handler/tests/test_databend_handler.py +1 -1
  80. mindsdb/integrations/handlers/derby_handler/connection_args.py +1 -1
  81. mindsdb/integrations/handlers/derby_handler/derby_handler.py +14 -22
  82. mindsdb/integrations/handlers/derby_handler/tests/test_derby_handler.py +6 -6
  83. mindsdb/integrations/handlers/discord_handler/discord_handler.py +5 -5
  84. mindsdb/integrations/handlers/discord_handler/discord_tables.py +3 -3
  85. mindsdb/integrations/handlers/discord_handler/tests/test_discord.py +5 -3
  86. mindsdb/integrations/handlers/dockerhub_handler/dockerhub.py +3 -3
  87. mindsdb/integrations/handlers/dockerhub_handler/dockerhub_handler.py +2 -2
  88. mindsdb/integrations/handlers/dockerhub_handler/dockerhub_tables.py +57 -54
  89. mindsdb/integrations/handlers/dremio_handler/__init__.py +2 -2
  90. mindsdb/integrations/handlers/druid_handler/__init__.py +1 -1
  91. mindsdb/integrations/handlers/druid_handler/druid_handler.py +2 -2
  92. mindsdb/integrations/handlers/edgelessdb_handler/tests/test_edgelessdb_handler.py +9 -9
  93. mindsdb/integrations/handlers/email_handler/email_client.py +1 -1
  94. mindsdb/integrations/handlers/email_handler/email_ingestor.py +1 -1
  95. mindsdb/integrations/handlers/email_handler/email_tables.py +0 -1
  96. mindsdb/integrations/handlers/email_handler/settings.py +0 -1
  97. mindsdb/integrations/handlers/eventstoredb_handler/eventstoredb_handler.py +2 -1
  98. mindsdb/integrations/handlers/firebird_handler/firebird_handler.py +1 -1
  99. mindsdb/integrations/handlers/flaml_handler/flaml_handler.py +9 -9
  100. mindsdb/integrations/handlers/frappe_handler/frappe_client.py +5 -5
  101. mindsdb/integrations/handlers/frappe_handler/frappe_handler.py +6 -5
  102. mindsdb/integrations/handlers/frappe_handler/frappe_tables.py +2 -2
  103. mindsdb/integrations/handlers/github_handler/connection_args.py +2 -2
  104. mindsdb/integrations/handlers/github_handler/github_handler.py +1 -8
  105. mindsdb/integrations/handlers/github_handler/github_tables.py +13 -24
  106. mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py +2 -1
  107. mindsdb/integrations/handlers/gitlab_handler/gitlab_tables.py +1 -4
  108. mindsdb/integrations/handlers/gmail_handler/gmail_handler.py +6 -13
  109. mindsdb/integrations/handlers/gmail_handler/requirements.txt +1 -0
  110. mindsdb/integrations/handlers/google_analytics_handler/requirements.txt +2 -1
  111. mindsdb/integrations/handlers/google_books_handler/google_books_handler.py +2 -1
  112. mindsdb/integrations/handlers/google_books_handler/google_books_tables.py +0 -3
  113. mindsdb/integrations/handlers/google_books_handler/requirements.txt +1 -1
  114. mindsdb/integrations/handlers/google_calendar_handler/google_calendar_handler.py +4 -4
  115. mindsdb/integrations/handlers/google_calendar_handler/google_calendar_tables.py +2 -6
  116. mindsdb/integrations/handlers/google_calendar_handler/requirements.txt +1 -0
  117. mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_handler.py +3 -2
  118. mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_tables.py +0 -3
  119. mindsdb/integrations/handlers/google_content_shopping_handler/requirements.txt +1 -1
  120. mindsdb/integrations/handlers/google_fit_handler/google_fit_handler.py +10 -12
  121. mindsdb/integrations/handlers/google_fit_handler/google_fit_tables.py +11 -13
  122. mindsdb/integrations/handlers/google_fit_handler/requirements.txt +2 -0
  123. mindsdb/integrations/handlers/google_search_handler/google_search_handler.py +2 -1
  124. mindsdb/integrations/handlers/google_search_handler/google_search_tables.py +0 -3
  125. mindsdb/integrations/handlers/google_search_handler/requirements.txt +1 -1
  126. mindsdb/integrations/handlers/groq_handler/__init__.py +3 -3
  127. mindsdb/integrations/handlers/hackernews_handler/hn_handler.py +5 -7
  128. mindsdb/integrations/handlers/hackernews_handler/hn_table.py +6 -7
  129. mindsdb/integrations/handlers/hive_handler/tests/test_hive_handler.py +1 -1
  130. mindsdb/integrations/handlers/hsqldb_handler/connection_args.py +6 -6
  131. mindsdb/integrations/handlers/hsqldb_handler/hsqldb_handler.py +4 -3
  132. mindsdb/integrations/handlers/huggingface_api_handler/exceptions.py +1 -1
  133. mindsdb/integrations/handlers/huggingface_api_handler/huggingface_api_handler.py +1 -8
  134. mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +6 -6
  135. mindsdb/integrations/handlers/huggingface_handler/requirements.txt +1 -1
  136. mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +1 -1
  137. mindsdb/integrations/handlers/ignite_handler/ignite_handler.py +2 -1
  138. mindsdb/integrations/handlers/impala_handler/impala_handler.py +9 -12
  139. mindsdb/integrations/handlers/impala_handler/tests/test_impala_handler.py +11 -11
  140. mindsdb/integrations/handlers/influxdb_handler/influxdb_handler.py +10 -13
  141. mindsdb/integrations/handlers/influxdb_handler/influxdb_tables.py +20 -20
  142. mindsdb/integrations/handlers/informix_handler/__about__.py +8 -8
  143. mindsdb/integrations/handlers/informix_handler/__init__.py +12 -5
  144. mindsdb/integrations/handlers/informix_handler/informix_handler.py +99 -133
  145. mindsdb/integrations/handlers/informix_handler/tests/test_informix_handler.py +13 -11
  146. mindsdb/integrations/handlers/ingres_handler/__about__.py +0 -1
  147. mindsdb/integrations/handlers/ingres_handler/ingres_handler.py +1 -0
  148. mindsdb/integrations/handlers/jira_handler/jira_handler.archived.py +75 -0
  149. mindsdb/integrations/handlers/jira_handler/jira_handler.py +113 -38
  150. mindsdb/integrations/handlers/jira_handler/jira_tables.py +229 -0
  151. mindsdb/integrations/handlers/jira_handler/requirements.txt +1 -0
  152. mindsdb/integrations/handlers/kinetica_handler/__init__.py +0 -1
  153. mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +4 -4
  154. mindsdb/integrations/handlers/langchain_handler/tools.py +9 -10
  155. mindsdb/integrations/handlers/leonardoai_handler/__init__.py +1 -1
  156. mindsdb/integrations/handlers/lightfm_handler/requirements.txt +1 -0
  157. mindsdb/integrations/handlers/lightwood_handler/functions.py +2 -2
  158. mindsdb/integrations/handlers/lightwood_handler/lightwood_handler.py +0 -3
  159. mindsdb/integrations/handlers/lightwood_handler/requirements.txt +4 -4
  160. mindsdb/integrations/handlers/lightwood_handler/tests/test_lightwood_handler.py +11 -11
  161. mindsdb/integrations/handlers/lindorm_handler/requirements.txt +1 -0
  162. mindsdb/integrations/handlers/llama_index_handler/llama_index_handler.py +4 -4
  163. mindsdb/integrations/handlers/llama_index_handler/settings.py +10 -9
  164. mindsdb/integrations/handlers/materialize_handler/tests/test_materialize_handler.py +8 -10
  165. mindsdb/integrations/handlers/matrixone_handler/matrixone_handler.py +4 -4
  166. mindsdb/integrations/handlers/matrixone_handler/tests/test_matrixone_handler.py +8 -9
  167. mindsdb/integrations/handlers/maxdb_handler/connection_args.py +25 -25
  168. mindsdb/integrations/handlers/maxdb_handler/maxdb_handler.py +1 -0
  169. mindsdb/integrations/handlers/mediawiki_handler/mediawiki_handler.py +3 -2
  170. mindsdb/integrations/handlers/mediawiki_handler/mediawiki_tables.py +1 -1
  171. mindsdb/integrations/handlers/mendeley_handler/__about__.py +1 -1
  172. mindsdb/integrations/handlers/mendeley_handler/__init__.py +2 -2
  173. mindsdb/integrations/handlers/mendeley_handler/mendeley_handler.py +48 -56
  174. mindsdb/integrations/handlers/mendeley_handler/mendeley_tables.py +24 -29
  175. mindsdb/integrations/handlers/mendeley_handler/tests/test_mendeley_handler.py +19 -17
  176. mindsdb/integrations/handlers/merlion_handler/merlion_handler.py +5 -4
  177. mindsdb/integrations/handlers/minds_endpoint_handler/__init__.py +3 -3
  178. mindsdb/integrations/handlers/mlflow_handler/mlflow_handler.py +58 -36
  179. mindsdb/integrations/handlers/monetdb_handler/__about__.py +8 -8
  180. mindsdb/integrations/handlers/monetdb_handler/__init__.py +15 -5
  181. mindsdb/integrations/handlers/monetdb_handler/connection_args.py +17 -18
  182. mindsdb/integrations/handlers/monetdb_handler/monetdb_handler.py +40 -57
  183. mindsdb/integrations/handlers/monetdb_handler/tests/test_monetdb_handler.py +7 -8
  184. mindsdb/integrations/handlers/monetdb_handler/utils/monet_get_id.py +13 -14
  185. mindsdb/integrations/handlers/monkeylearn_handler/__about__.py +1 -1
  186. mindsdb/integrations/handlers/monkeylearn_handler/__init__.py +1 -1
  187. mindsdb/integrations/handlers/monkeylearn_handler/monkeylearn_handler.py +2 -5
  188. mindsdb/integrations/handlers/ms_one_drive_handler/ms_graph_api_one_drive_client.py +1 -0
  189. mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_handler.py +1 -1
  190. mindsdb/integrations/handlers/ms_one_drive_handler/requirements.txt +2 -0
  191. mindsdb/integrations/handlers/ms_teams_handler/ms_graph_api_teams_client.py +23 -23
  192. mindsdb/integrations/handlers/ms_teams_handler/ms_teams_handler.py +3 -3
  193. mindsdb/integrations/handlers/ms_teams_handler/ms_teams_tables.py +10 -5
  194. mindsdb/integrations/handlers/ms_teams_handler/requirements.txt +3 -1
  195. mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +73 -8
  196. mindsdb/integrations/handlers/mysql_handler/__about__.py +8 -8
  197. mindsdb/integrations/handlers/mysql_handler/__init__.py +15 -5
  198. mindsdb/integrations/handlers/mysql_handler/connection_args.py +43 -47
  199. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +101 -34
  200. mindsdb/integrations/handlers/mysql_handler/settings.py +15 -13
  201. mindsdb/integrations/handlers/neuralforecast_handler/neuralforecast_handler.py +1 -1
  202. mindsdb/integrations/handlers/newsapi_handler/newsapi_handler.py +1 -1
  203. mindsdb/integrations/handlers/newsapi_handler/tests/test_newsapi_handler.py +4 -4
  204. mindsdb/integrations/handlers/nuo_jdbc_handler/connection_args.py +2 -2
  205. mindsdb/integrations/handlers/nuo_jdbc_handler/nuo_jdbc_handler.py +28 -36
  206. mindsdb/integrations/handlers/nuo_jdbc_handler/tests/test_nuo_handler.py +5 -5
  207. mindsdb/integrations/handlers/oceanbase_handler/oceanbase_handler.py +0 -1
  208. mindsdb/integrations/handlers/oceanbase_handler/tests/test_oceanbase_handler.py +8 -10
  209. mindsdb/integrations/handlers/ollama_handler/ollama_handler.py +3 -3
  210. mindsdb/integrations/handlers/openai_handler/openai_handler.py +5 -4
  211. mindsdb/integrations/handlers/opengauss_handler/tests/test_opengauss_handler.py +1 -2
  212. mindsdb/integrations/handlers/openstreetmap_handler/__init__.py +7 -7
  213. mindsdb/integrations/handlers/oracle_handler/connection_args.py +6 -0
  214. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +77 -11
  215. mindsdb/integrations/handlers/orioledb_handler/tests/test_orioledb_handler.py +8 -10
  216. mindsdb/integrations/handlers/palm_handler/__about__.py +1 -1
  217. mindsdb/integrations/handlers/palm_handler/__init__.py +1 -1
  218. mindsdb/integrations/handlers/palm_handler/palm_handler.py +1 -3
  219. mindsdb/integrations/handlers/paypal_handler/paypal_handler.py +2 -2
  220. mindsdb/integrations/handlers/paypal_handler/paypal_tables.py +15 -14
  221. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +53 -10
  222. mindsdb/integrations/handlers/phoenix_handler/__init__.py +1 -1
  223. mindsdb/integrations/handlers/phoenix_handler/phoenix_handler.py +1 -0
  224. mindsdb/integrations/handlers/pinot_handler/__init__.py +1 -1
  225. mindsdb/integrations/handlers/pinot_handler/pinot_handler.py +3 -2
  226. mindsdb/integrations/handlers/plaid_handler/plaid_handler.py +13 -13
  227. mindsdb/integrations/handlers/plaid_handler/plaid_tables.py +10 -12
  228. mindsdb/integrations/handlers/plaid_handler/utils.py +4 -6
  229. mindsdb/integrations/handlers/planetscale_handler/planetscale_handler.py +1 -4
  230. mindsdb/integrations/handlers/portkey_handler/__init__.py +2 -2
  231. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +105 -24
  232. mindsdb/integrations/handlers/postgres_handler/tests/test_postgres_handler.py +11 -6
  233. mindsdb/integrations/handlers/questdb_handler/questdb_handler.py +1 -2
  234. mindsdb/integrations/handlers/questdb_handler/tests/test_questdb_handler.py +2 -3
  235. mindsdb/integrations/handlers/quickbooks_handler/quickbooks_handler.py +6 -8
  236. mindsdb/integrations/handlers/quickbooks_handler/quickbooks_table.py +10 -10
  237. mindsdb/integrations/handlers/rag_handler/ingest.py +2 -2
  238. mindsdb/integrations/handlers/rag_handler/rag_handler.py +1 -1
  239. mindsdb/integrations/handlers/rag_handler/settings.py +1 -1
  240. mindsdb/integrations/handlers/reddit_handler/reddit_handler.py +2 -7
  241. mindsdb/integrations/handlers/reddit_handler/reddit_tables.py +2 -3
  242. mindsdb/integrations/handlers/replicate_handler/replicate_handler.py +6 -6
  243. mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_handler.py +1 -2
  244. mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_tables.py +0 -3
  245. mindsdb/integrations/handlers/rockset_handler/connection_args.py +14 -14
  246. mindsdb/integrations/handlers/rockset_handler/tests/test_rockset_handler.py +1 -0
  247. mindsdb/integrations/handlers/scylla_handler/scylla_handler.py +6 -5
  248. mindsdb/integrations/handlers/sendinblue_handler/sendinblue_handler.py +2 -1
  249. mindsdb/integrations/handlers/sendinblue_handler/sendinblue_tables.py +16 -16
  250. mindsdb/integrations/handlers/sentence_transformers_handler/__init__.py +1 -1
  251. mindsdb/integrations/handlers/sheets_handler/connection_args.py +1 -1
  252. mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +7 -6
  253. mindsdb/integrations/handlers/shopify_handler/shopify_tables.py +38 -41
  254. mindsdb/integrations/handlers/singlestore_handler/__about__.py +1 -1
  255. mindsdb/integrations/handlers/singlestore_handler/__init__.py +0 -1
  256. mindsdb/integrations/handlers/singlestore_handler/singlestore_handler.py +1 -0
  257. mindsdb/integrations/handlers/singlestore_handler/tests/test_singlestore_handler.py +3 -3
  258. mindsdb/integrations/handlers/slack_handler/__init__.py +3 -3
  259. mindsdb/integrations/handlers/snowflake_handler/requirements.txt +1 -1
  260. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +100 -6
  261. mindsdb/integrations/handlers/solr_handler/connection_args.py +7 -7
  262. mindsdb/integrations/handlers/solr_handler/solr_handler.py +2 -1
  263. mindsdb/integrations/handlers/solr_handler/tests/test_solr_handler.py +2 -1
  264. mindsdb/integrations/handlers/sqlany_handler/sqlany_handler.py +3 -2
  265. mindsdb/integrations/handlers/sqlite_handler/sqlite_handler.py +1 -0
  266. mindsdb/integrations/handlers/sqreamdb_handler/connection_args.py +1 -1
  267. mindsdb/integrations/handlers/sqreamdb_handler/sqreamdb_handler.py +15 -20
  268. mindsdb/integrations/handlers/sqreamdb_handler/tests/test_sqreamdb_handler.py +4 -4
  269. mindsdb/integrations/handlers/stabilityai_handler/__init__.py +1 -1
  270. mindsdb/integrations/handlers/starrocks_handler/starrocks_handler.py +0 -1
  271. mindsdb/integrations/handlers/starrocks_handler/tests/test_starrocks_handler.py +8 -10
  272. mindsdb/integrations/handlers/statsforecast_handler/statsforecast_handler.py +2 -2
  273. mindsdb/integrations/handlers/strava_handler/strava_handler.py +4 -8
  274. mindsdb/integrations/handlers/strava_handler/strava_tables.py +22 -30
  275. mindsdb/integrations/handlers/stripe_handler/stripe_handler.py +3 -2
  276. mindsdb/integrations/handlers/stripe_handler/stripe_tables.py +11 -27
  277. mindsdb/integrations/handlers/supabase_handler/tests/test_supabase_handler.py +1 -1
  278. mindsdb/integrations/handlers/surrealdb_handler/surrealdb_handler.py +4 -4
  279. mindsdb/integrations/handlers/tdengine_handler/tdengine_handler.py +25 -27
  280. mindsdb/integrations/handlers/tdengine_handler/tests/test_tdengine_handler.py +8 -8
  281. mindsdb/integrations/handlers/tidb_handler/tests/test_tidb_handler.py +1 -2
  282. mindsdb/integrations/handlers/timegpt_handler/timegpt_handler.py +5 -5
  283. mindsdb/integrations/handlers/tpot_handler/tpot_handler.py +21 -26
  284. mindsdb/integrations/handlers/trino_handler/trino_handler.py +14 -14
  285. mindsdb/integrations/handlers/twitter_handler/twitter_handler.py +2 -4
  286. mindsdb/integrations/handlers/unify_handler/tests/test_unify_handler.py +7 -8
  287. mindsdb/integrations/handlers/unify_handler/unify_handler.py +9 -9
  288. mindsdb/integrations/handlers/vertex_handler/requirements.txt +1 -0
  289. mindsdb/integrations/handlers/vertex_handler/vertex_client.py +1 -1
  290. mindsdb/integrations/handlers/vertica_handler/tests/test_vertica_handler.py +11 -11
  291. mindsdb/integrations/handlers/vertica_handler/vertica_handler.py +11 -14
  292. mindsdb/integrations/handlers/vitess_handler/tests/test_vitess_handler.py +9 -11
  293. mindsdb/integrations/handlers/vitess_handler/vitess_handler.py +0 -1
  294. mindsdb/integrations/handlers/web_handler/web_handler.py +1 -0
  295. mindsdb/integrations/handlers/whatsapp_handler/__init__.py +3 -3
  296. mindsdb/integrations/handlers/writer_handler/evaluate.py +1 -1
  297. mindsdb/integrations/handlers/writer_handler/settings.py +0 -1
  298. mindsdb/integrations/handlers/writer_handler/writer_handler.py +1 -0
  299. mindsdb/integrations/handlers/youtube_handler/requirements.txt +1 -0
  300. mindsdb/integrations/handlers/youtube_handler/youtube_handler.py +5 -5
  301. mindsdb/integrations/handlers/youtube_handler/youtube_tables.py +26 -27
  302. mindsdb/integrations/handlers/yugabyte_handler/tests/test_yugabyte_handler.py +3 -3
  303. mindsdb/integrations/handlers/yugabyte_handler/yugabyte_handler.py +0 -6
  304. mindsdb/integrations/libs/response.py +67 -52
  305. mindsdb/integrations/libs/vectordatabase_handler.py +6 -0
  306. mindsdb/integrations/utilities/files/file_reader.py +5 -2
  307. mindsdb/integrations/utilities/handler_utils.py +15 -3
  308. mindsdb/integrations/utilities/handlers/api_utilities/__init__.py +0 -1
  309. mindsdb/integrations/utilities/handlers/auth_utilities/__init__.py +0 -2
  310. mindsdb/integrations/utilities/utils.py +3 -3
  311. mindsdb/interfaces/agents/agents_controller.py +164 -1
  312. mindsdb/interfaces/agents/constants.py +29 -2
  313. mindsdb/interfaces/agents/langchain_agent.py +18 -8
  314. mindsdb/interfaces/agents/mindsdb_database_agent.py +101 -2
  315. mindsdb/interfaces/database/projects.py +1 -7
  316. mindsdb/interfaces/functions/controller.py +11 -14
  317. mindsdb/interfaces/functions/to_markdown.py +9 -124
  318. mindsdb/interfaces/knowledge_base/controller.py +47 -19
  319. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +41 -15
  320. mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +434 -0
  321. mindsdb/interfaces/knowledge_base/preprocessing/models.py +54 -0
  322. mindsdb/interfaces/knowledge_base/utils.py +10 -15
  323. mindsdb/interfaces/model/model_controller.py +0 -2
  324. mindsdb/interfaces/query_context/context_controller.py +66 -10
  325. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +190 -0
  326. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +92 -0
  327. mindsdb/interfaces/skills/skill_tool.py +202 -57
  328. mindsdb/interfaces/skills/sql_agent.py +238 -28
  329. mindsdb/interfaces/storage/fs.py +1 -0
  330. mindsdb/interfaces/variables/__init__.py +0 -0
  331. mindsdb/interfaces/variables/variables_controller.py +97 -0
  332. mindsdb/migrations/env.py +5 -7
  333. mindsdb/migrations/migrate.py +47 -9
  334. mindsdb/migrations/versions/2025-05-21_9f150e4f9a05_checkpoint_1.py +360 -0
  335. mindsdb/utilities/config.py +333 -220
  336. mindsdb/utilities/context.py +1 -1
  337. mindsdb/utilities/functions.py +0 -36
  338. mindsdb/utilities/langfuse.py +19 -10
  339. mindsdb/utilities/otel/__init__.py +9 -193
  340. mindsdb/utilities/otel/metric_handlers/__init__.py +5 -1
  341. mindsdb/utilities/otel/prepare.py +198 -0
  342. mindsdb/utilities/sql.py +83 -0
  343. mindsdb/utilities/starters.py +13 -0
  344. {mindsdb-25.4.5.0.dist-info → mindsdb-25.5.4.0.dist-info}/METADATA +351 -338
  345. {mindsdb-25.4.5.0.dist-info → mindsdb-25.5.4.0.dist-info}/RECORD +348 -322
  346. {mindsdb-25.4.5.0.dist-info → mindsdb-25.5.4.0.dist-info}/WHEEL +1 -1
  347. mindsdb/api/mysql/mysql_proxy/classes/sql_statement_parser.py +0 -151
  348. mindsdb/integrations/handlers/monkeylearn_handler/requirements.txt +0 -1
  349. {mindsdb-25.4.5.0.dist-info → mindsdb-25.5.4.0.dist-info}/licenses/LICENSE +0 -0
  350. {mindsdb-25.4.5.0.dist-info → mindsdb-25.5.4.0.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  import enum
2
2
  import inspect
3
+
3
4
  from dataclasses import dataclass
4
5
  from collections import defaultdict
5
6
  from typing import List, Dict, Optional
@@ -14,7 +15,7 @@ from mindsdb.utilities.config import config
14
15
  from mindsdb.interfaces.storage import db
15
16
  from mindsdb.interfaces.skills.sql_agent import SQLAgent
16
17
  from mindsdb.integrations.libs.vectordatabase_handler import TableField
17
-
18
+ from mindsdb.interfaces.agents.constants import DEFAULT_TEXT2SQL_DATABASE
18
19
 
19
20
  _DEFAULT_TOP_K_SIMILARITY_SEARCH = 5
20
21
  _MAX_CACHE_SIZE = 1000
@@ -120,7 +121,6 @@ class SkillToolController:
120
121
  try:
121
122
  from mindsdb.interfaces.agents.mindsdb_database_agent import MindsDBSQL
122
123
  from mindsdb.interfaces.skills.custom.text2sql.mindsdb_sql_toolkit import MindsDBSQLToolkit
123
- from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
124
124
  except ImportError:
125
125
  raise ImportError(
126
126
  'To use the text-to-SQL skill, please install langchain with `pip install mindsdb[langchain]`')
@@ -132,75 +132,220 @@ class SkillToolController:
132
132
  return f'`{name}`'
133
133
 
134
134
  tables_list = []
135
+ knowledge_bases_list = []
136
+ ignore_knowledge_bases_list = []
137
+
138
+ # Track databases extracted from dot notation
139
+ extracted_databases = set()
140
+
141
+ # Initialize knowledge_base_database with default value
142
+ knowledge_base_database = DEFAULT_TEXT2SQL_DATABASE # Default to mindsdb project
143
+
144
+ # First pass: collect all database and knowledge base parameters
135
145
  for skill in skills:
136
- database = skill.params['database']
137
- restriction_on_tables = skill.restriction_on_tables
138
- if restriction_on_tables is None:
139
- handler = command_executor.session.integration_controller.get_data_handler(database)
140
- if 'all' in inspect.signature(handler.get_tables).parameters:
141
- response = handler.get_tables(all=True)
142
- else:
143
- response = handler.get_tables()
144
- # no restrictions
145
- columns = [c.lower() for c in response.data_frame.columns]
146
- name_idx = columns.index('table_name') if 'table_name' in columns else 0
147
-
148
- if 'table_schema' in response.data_frame.columns:
149
- for _, row in response.data_frame.iterrows():
150
- tables_list.append(f"{database}.{row['table_schema']}.{escape_table_name(row[name_idx])}")
151
- else:
152
- for table_name in response.data_frame.iloc[:, name_idx]:
153
- tables_list.append(f"{database}.{escape_table_name(table_name)}")
146
+ # Update knowledge_base_database if specified in any skill
147
+ if skill.params.get('knowledge_base_database'):
148
+ knowledge_base_database = skill.params.get('knowledge_base_database')
149
+
150
+ # Extract databases from include_tables with dot notation
151
+ if skill.params.get('include_tables'):
152
+ include_tables = skill.params.get('include_tables')
153
+ if isinstance(include_tables, str):
154
+ include_tables = [t.strip() for t in include_tables.split(',')]
155
+
156
+ # Extract database names from dot notation
157
+ for table in include_tables:
158
+ if '.' in table:
159
+ db_name = table.split('.')[0]
160
+ extracted_databases.add(db_name)
161
+
162
+ # Extract databases from include_knowledge_bases with dot notation
163
+ if skill.params.get('include_knowledge_bases'):
164
+ include_kbs = skill.params.get('include_knowledge_bases')
165
+ if isinstance(include_kbs, str):
166
+ include_kbs = [kb.strip() for kb in include_kbs.split(',')]
167
+
168
+ # Extract database names from dot notation
169
+ for kb in include_kbs:
170
+ if '.' in kb:
171
+ db_name = kb.split('.')[0]
172
+ if db_name != knowledge_base_database:
173
+ # Only update if it's different from the default
174
+ knowledge_base_database = db_name
175
+
176
+ # Second pass: collect all tables and knowledge base restrictions
177
+ for skill in skills:
178
+ # Get database for tables (this is an actual database connection)
179
+ database = skill.params.get('database', DEFAULT_TEXT2SQL_DATABASE)
180
+
181
+ # Add databases extracted from dot notation if no explicit database is provided
182
+ if not database and extracted_databases:
183
+ # Use the first extracted database if no explicit database is provided
184
+ database = next(iter(extracted_databases))
185
+ # Update the skill params with the extracted database
186
+ skill.params['database'] = database
187
+
188
+ # Extract knowledge base restrictions if they exist in the skill params
189
+ if skill.params.get('include_knowledge_bases'):
190
+ # Convert to list if it's a string
191
+ include_kbs = skill.params.get('include_knowledge_bases')
192
+ if isinstance(include_kbs, str):
193
+ include_kbs = [kb.strip() for kb in include_kbs.split(',')]
194
+
195
+ # Process each knowledge base name
196
+ for kb in include_kbs:
197
+ # If it doesn't have a dot, prefix it with the knowledge_base_database
198
+ if '.' not in kb:
199
+ knowledge_bases_list.append(f"{knowledge_base_database}.{kb}")
200
+ else:
201
+ knowledge_bases_list.append(kb)
202
+
203
+ # Collect ignore_knowledge_bases
204
+ if skill.params.get('ignore_knowledge_bases'):
205
+ # Convert to list if it's a string
206
+ ignore_kbs = skill.params.get('ignore_knowledge_bases')
207
+ if isinstance(ignore_kbs, str):
208
+ ignore_kbs = [kb.strip() for kb in ignore_kbs.split(',')]
209
+
210
+ # Process each knowledge base name to ignore
211
+ for kb in ignore_kbs:
212
+ # If it doesn't have a dot, prefix it with the knowledge_base_database
213
+ if '.' not in kb:
214
+ ignore_knowledge_bases_list.append(f"{knowledge_base_database}.{kb}")
215
+ else:
216
+ ignore_knowledge_bases_list.append(kb)
217
+
218
+ # Skip if no database specified
219
+ if not database:
154
220
  continue
155
- for schema_name, tables in restriction_on_tables.items():
156
- for table in tables:
157
- if schema_name is None:
158
- tables_list.append(f'{database}.{escape_table_name(table)}')
221
+
222
+ # Process include_tables with dot notation
223
+ if skill.params.get('include_tables'):
224
+ include_tables = skill.params.get('include_tables')
225
+ if isinstance(include_tables, str):
226
+ include_tables = [t.strip() for t in include_tables.split(',')]
227
+
228
+ for table in include_tables:
229
+ # If table already has a database prefix, use it as is
230
+ if '.' in table:
231
+ # Check if the table already has backticks
232
+ if '`' in table:
233
+ tables_list.append(table)
234
+ else:
235
+ # Apply escape_table_name only to the table part
236
+ parts = table.split('.')
237
+ if len(parts) == 2:
238
+ # Format: database.table
239
+ tables_list.append(f"{parts[0]}.{escape_table_name(parts[1])}")
240
+ elif len(parts) == 3:
241
+ # Format: database.schema.table
242
+ tables_list.append(f"{parts[0]}.{parts[1]}.{escape_table_name(parts[2])}")
243
+ else:
244
+ # Unusual format, escape the whole thing
245
+ tables_list.append(escape_table_name(table))
159
246
  else:
160
- tables_list.append(f'{database}.{schema_name}.{escape_table_name(table)}')
247
+ # Otherwise, prefix with the database
248
+ tables_list.append(f"{database}.{escape_table_name(table)}")
249
+
250
+ # Skip further table processing if include_tables is specified
251
+ continue
252
+
253
+ restriction_on_tables = skill.restriction_on_tables
254
+
255
+ if restriction_on_tables is None and database:
256
+ try:
257
+ handler = command_executor.session.integration_controller.get_data_handler(database)
258
+ if 'all' in inspect.signature(handler.get_tables).parameters:
259
+ response = handler.get_tables(all=True)
260
+ else:
261
+ response = handler.get_tables()
262
+ # no restrictions
263
+ columns = [c.lower() for c in response.data_frame.columns]
264
+ name_idx = columns.index('table_name') if 'table_name' in columns else 0
265
+
266
+ if 'table_schema' in response.data_frame.columns:
267
+ for _, row in response.data_frame.iterrows():
268
+ tables_list.append(f"{database}.{row['table_schema']}.{escape_table_name(row[name_idx])}")
269
+ else:
270
+ for table_name in response.data_frame.iloc[:, name_idx]:
271
+ tables_list.append(f"{database}.{escape_table_name(table_name)}")
272
+ except Exception as e:
273
+ logger.warning(f"Could not get tables from database {database}: {str(e)}")
274
+ continue
275
+
276
+ # Handle table restrictions
277
+ if restriction_on_tables and database:
278
+ for schema_name, tables in restriction_on_tables.items():
279
+ for table in tables:
280
+ # Check if the table already has dot notation (e.g., 'postgresql_conn.home_rentals')
281
+ if '.' in table:
282
+ # Table already has database prefix, add it directly
283
+ tables_list.append(escape_table_name(table))
284
+ else:
285
+ # No dot notation, apply schema and database as needed
286
+ if schema_name is None:
287
+ tables_list.append(f'{database}.{escape_table_name(table)}')
288
+ else:
289
+ tables_list.append(f'{database}.{schema_name}.{escape_table_name(table)}')
290
+ continue
291
+
292
+ # Remove duplicates from lists
293
+ tables_list = list(set(tables_list))
294
+ knowledge_bases_list = list(set(knowledge_bases_list))
295
+ ignore_knowledge_bases_list = list(set(ignore_knowledge_bases_list))
296
+
297
+ # Determine knowledge base parameters to pass to SQLAgent
298
+ include_knowledge_bases = knowledge_bases_list if knowledge_bases_list else None
299
+ ignore_knowledge_bases = ignore_knowledge_bases_list if ignore_knowledge_bases_list else None
300
+
301
+ # If both include and ignore lists exist, include takes precedence
302
+ if include_knowledge_bases:
303
+ ignore_knowledge_bases = None
304
+
305
+ # # Get all databases from skills and extracted databases
306
+ # all_databases = list(set([s.params.get('database', DEFAULT_TEXT2SQL_DATABASE) for s in skills if s.params.get('database')] + list(extracted_databases)))
307
+ #
308
+ #
309
+ # # If no databases were specified or extracted, use 'mindsdb' as a default
310
+ # if not all_databases:
311
+ # all_databases = [DEFAULT_TEXT2SQL_DATABASE]
312
+ #
313
+
314
+ all_databases = []
315
+ # Filter out None values
316
+ all_databases = [db for db in all_databases if db is not None]
317
+
318
+ # Create a databases_struct dictionary that includes all extracted databases
319
+ databases_struct = {}
320
+
321
+ # First, add databases from skills with explicit database parameters
322
+ for skill in skills:
323
+ if skill.params.get('database'):
324
+ databases_struct[skill.params['database']] = skill.restriction_on_tables
325
+
326
+ # Then, add all extracted databases with no restrictions
327
+ for db_name in extracted_databases:
328
+ if db_name not in databases_struct:
329
+ databases_struct[db_name] = None
161
330
 
162
331
  sql_agent = SQLAgent(
163
332
  command_executor=command_executor,
164
- databases=list(set(s.params['database'] for s in skills)),
165
- databases_struct={
166
- skill.params['database']: skill.restriction_on_tables
167
- for skill in skills
168
- },
333
+ databases=all_databases,
334
+ databases_struct=databases_struct,
169
335
  include_tables=tables_list,
170
336
  ignore_tables=None,
337
+ include_knowledge_bases=include_knowledge_bases,
338
+ ignore_knowledge_bases=ignore_knowledge_bases,
339
+ knowledge_base_database=knowledge_base_database,
171
340
  sample_rows_in_table_info=3,
341
+
172
342
  cache=get_cache('agent', max_size=_MAX_CACHE_SIZE)
173
343
  )
174
344
  db = MindsDBSQL.custom_init(
175
345
  sql_agent=sql_agent
176
346
  )
177
-
178
- # Users probably don't need to configure this for now.
179
- sql_database_tools = MindsDBSQLToolkit(db=db, llm=llm).get_tools()
180
- descriptions = []
181
- for skill in skills:
182
- description = skill.params.get('description', '')
183
- if description:
184
- descriptions.append(description)
185
-
186
- for i, tool in enumerate(sql_database_tools):
187
- if isinstance(tool, QuerySQLDataBaseTool):
188
- # Add our own custom description so our agent knows when to query this table.
189
- original_description = tool.description
190
- tool.description = ''
191
- if len(descriptions) > 0:
192
- tool.description += f'Use this tool if you need data about {" OR ".join(descriptions)}.\n'
193
- tool.description += 'Use the conversation context to decide which table to query.\n'
194
- if len(tables_list) > 0:
195
- f'These are the available tables: {",".join(tables_list)}.\n'
196
- tool.description += (
197
- 'ALWAYS consider these special cases:\n'
198
- ' - For TIMESTAMP type columns, make sure you include the time portion in your query (e.g. WHERE date_column = "2020-01-01 12:00:00")\n'
199
- 'Here are the rest of the instructions:\n'
200
- f'{original_description}'
201
- )
202
- sql_database_tools[i] = tool
203
- return sql_database_tools
347
+ toolkit = MindsDBSQLToolkit(db=db, llm=llm)
348
+ return toolkit.get_tools()
204
349
 
205
350
  def _make_retrieval_tools(self, skill: db.Skills, llm, embedding_model):
206
351
  """
@@ -1,7 +1,7 @@
1
-
2
1
  import re
3
2
  import csv
4
3
  import inspect
4
+ import traceback
5
5
  from io import StringIO
6
6
  from typing import Iterable, List, Optional, Any
7
7
 
@@ -13,6 +13,7 @@ from mindsdb.utilities import log
13
13
  from mindsdb.utilities.context import context as ctx
14
14
  from mindsdb.integrations.utilities.query_traversal import query_traversal
15
15
  from mindsdb.integrations.libs.response import INF_SCHEMA_COLUMNS_NAMES
16
+ from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE
16
17
 
17
18
  logger = log.getLogger(__name__)
18
19
 
@@ -34,7 +35,7 @@ def list_to_csv_str(array: List[List[Any]]) -> str:
34
35
 
35
36
 
36
37
  def split_table_name(table_name: str) -> List[str]:
37
- """Split table name from llm to parst
38
+ """Split table name from llm to parts
38
39
 
39
40
  Args:
40
41
  table_name (str): input table name
@@ -72,34 +73,69 @@ def split_table_name(table_name: str) -> List[str]:
72
73
  if current:
73
74
  result.append(current.strip('`'))
74
75
 
76
+ # ensure we split the table name
77
+ result = [r.split(".") for r in result][0]
78
+
75
79
  return result
76
80
 
77
81
 
78
82
  class SQLAgent:
83
+ """
84
+ SQLAgent is a class that handles SQL queries for agents.
85
+ """
86
+
79
87
  def __init__(
80
- self,
81
- command_executor,
82
- databases: List[str],
83
- databases_struct: dict,
84
- include_tables: Optional[List[str]] = None,
85
- ignore_tables: Optional[List[str]] = None,
86
- sample_rows_in_table_info: int = 3,
87
- cache: Optional[dict] = None
88
+ self,
89
+ command_executor,
90
+ databases: List[str],
91
+ databases_struct: dict,
92
+ knowledge_base_database: str = 'mindsdb',
93
+ include_tables: Optional[List[str]] = None,
94
+ ignore_tables: Optional[List[str]] = None,
95
+ include_knowledge_bases: Optional[List[str]] = None,
96
+ ignore_knowledge_bases: Optional[List[str]] = None,
97
+ sample_rows_in_table_info: int = 3,
98
+ cache: Optional[dict] = None
88
99
  ):
100
+ """
101
+ Initialize SQLAgent.
102
+
103
+ Args:
104
+ command_executor: Executor for SQL commands
105
+ databases (List[str]): List of databases to use
106
+ databases_struct (dict): Dictionary of database structures
107
+ knowledge_base_database (str): Project name where knowledge bases are stored (defaults to 'mindsdb')
108
+ include_tables (List[str]): Tables to include
109
+ ignore_tables (List[str]): Tables to ignore
110
+ include_knowledge_bases (List[str]): Knowledge bases to include
111
+ ignore_knowledge_bases (List[str]): Knowledge bases to ignore
112
+ sample_rows_in_table_info (int): Number of sample rows to include in table info
113
+ cache (Optional[dict]): Cache for query results
114
+ """
89
115
  self._command_executor = command_executor
90
116
  self._mindsdb_db_struct = databases_struct
91
-
117
+ self.knowledge_base_database = knowledge_base_database # This is a project name, not a database connection
92
118
  self._sample_rows_in_table_info = int(sample_rows_in_table_info)
93
119
 
94
120
  self._tables_to_include = include_tables
95
121
  self._tables_to_ignore = []
122
+ self._knowledge_bases_to_include = include_knowledge_bases
123
+ self._knowledge_bases_to_ignore = []
96
124
  self._databases = databases
97
125
  if not self._tables_to_include:
98
126
  # ignore_tables and include_tables should not be used together.
99
127
  # include_tables takes priority if it's set.
100
128
  self._tables_to_ignore = ignore_tables or []
129
+ if not self._knowledge_bases_to_include:
130
+ # ignore_knowledge_bases and include_knowledge_bases should not be used together.
131
+ # include_knowledge_bases takes priority if it's set.
132
+ self._knowledge_bases_to_ignore = ignore_knowledge_bases or []
101
133
  self._cache = cache
102
134
 
135
+ from mindsdb.interfaces.skills.skill_tool import SkillToolController
136
+ # Initialize the skill tool controller from MindsDB
137
+ self.skill_tool = SkillToolController()
138
+
103
139
  def _call_engine(self, query: str, database=None):
104
140
  # switch database
105
141
  ast_query = parse_sql(query.strip('`'))
@@ -107,7 +143,10 @@ class SQLAgent:
107
143
 
108
144
  if database is None:
109
145
  # if we use tables with prefixes it should work for any database
110
- database = self._databases[0]
146
+ if self._databases is not None:
147
+ # if we have multiple databases, we need to check which one to use
148
+ # for now, we will just use the first one
149
+ database = self._databases[0] if self._databases else "mindsdb"
111
150
 
112
151
  ret = self._command_executor.execute_command(
113
152
  ast_query,
@@ -131,9 +170,28 @@ class SQLAgent:
131
170
 
132
171
  def _check_f(node, is_table=None, **kwargs):
133
172
  if is_table and isinstance(node, Identifier):
134
- if node.parts not in tables_parts:
135
- raise ValueError(f"Table {'.'.join(node.parts)} not found. Available tables: {', '.join(self._tables_to_include)}")
136
-
173
+ table_name = '.'.join(node.parts)
174
+
175
+ # Get the list of available knowledge bases
176
+ kb_names = self.get_usable_knowledge_base_names()
177
+
178
+ # Check if this table is a knowledge base
179
+ is_kb = table_name in kb_names
180
+
181
+ # If it's a knowledge base and we have knowledge base restrictions
182
+ if is_kb and self._knowledge_bases_to_include:
183
+ kb_parts = [split_table_name(x) for x in self._knowledge_bases_to_include]
184
+ if node.parts not in kb_parts:
185
+ raise ValueError(f"Knowledge base {table_name} not found. Available knowledge bases: {', '.join(self._knowledge_bases_to_include)}")
186
+ # Regular table check
187
+ elif not is_kb and self._tables_to_include and node.parts not in tables_parts:
188
+ raise ValueError(f"Table {table_name} not found. Available tables: {', '.join(self._tables_to_include)}")
189
+ # Check if it's a restricted knowledge base
190
+ elif is_kb and table_name in self._knowledge_bases_to_ignore:
191
+ raise ValueError(f"Knowledge base {table_name} is not allowed.")
192
+ # Check if it's a restricted table
193
+ elif not is_kb and table_name in self._tables_to_ignore:
194
+ raise ValueError(f"Table {table_name} is not allowed.")
137
195
  query_traversal(ast_query, _check_f)
138
196
 
139
197
  def get_usable_table_names(self) -> Iterable[str]:
@@ -200,6 +258,78 @@ class SQLAgent:
200
258
  self._cache.set(cache_key, set(result_tables))
201
259
  return result_tables
202
260
 
261
+ def get_usable_knowledge_base_names(self) -> Iterable[str]:
262
+ """Get a list of knowledge bases that the agent has access to.
263
+
264
+ Returns:
265
+ Iterable[str]: list with knowledge base names
266
+ """
267
+ cache_key = f'{ctx.company_id}_{self.knowledge_base_database}_knowledge_bases'
268
+
269
+ # todo we need to fix the cache, file cache can potentially store out of data information
270
+ # # first check cache and return if found
271
+ # if self._cache:
272
+ # cached_kbs = self._cache.get(cache_key)
273
+ # if cached_kbs:
274
+ # return cached_kbs
275
+
276
+ if self._knowledge_bases_to_include:
277
+ return self._knowledge_bases_to_include
278
+
279
+ try:
280
+ # Query to get all knowledge bases
281
+ query = f"SHOW KNOWLEDGE_BASES FROM {self.knowledge_base_database};"
282
+ try:
283
+ result = self._call_engine(query, database=self.knowledge_base_database)
284
+ except Exception as e:
285
+ # If the direct query fails, try a different approach
286
+ # This handles the case where knowledge_base_database is not a valid integration
287
+ logger.warning(f"Error querying knowledge bases from {self.knowledge_base_database}: {str(e)}")
288
+ # Try to get knowledge bases directly from the project database
289
+ try:
290
+ # Get knowledge bases from the project database
291
+ kb_controller = self._command_executor.session.kb_controller
292
+ kb_names = [kb['name'] for kb in kb_controller.list()]
293
+
294
+ # Filter knowledge bases based on include list
295
+ if self._knowledge_bases_to_include:
296
+ kb_names = [kb_name for kb_name in kb_names if kb_name in self._knowledge_bases_to_include]
297
+ if not kb_names:
298
+ logger.warning(f"No knowledge bases found in the include list: {self._knowledge_bases_to_include}")
299
+ return []
300
+
301
+ return kb_names
302
+
303
+ # Filter knowledge bases based on ignore list
304
+ kb_names = [kb_name for kb_name in kb_names if kb_name not in self._knowledge_bases_to_ignore]
305
+
306
+ if self._cache:
307
+ self._cache.set(cache_key, set(kb_names))
308
+
309
+ return kb_names
310
+ except Exception as inner_e:
311
+ logger.error(f"Error getting knowledge bases from kb_controller: {str(inner_e)}")
312
+ return []
313
+
314
+ if not result:
315
+ return []
316
+
317
+ # Filter knowledge bases based on ignore list
318
+ kb_names = []
319
+ for row in result:
320
+ kb_name = row['name']
321
+ if kb_name not in self._knowledge_bases_to_ignore:
322
+ kb_names.append(kb_name)
323
+
324
+ if self._cache:
325
+ self._cache.set(cache_key, set(kb_names))
326
+
327
+ return kb_names
328
+ except Exception as e:
329
+ # If there's an error, log it and return an empty list
330
+ logger.error(f"Error in get_usable_knowledge_base_names: {str(e)}")
331
+ return []
332
+
203
333
  def _resolve_table_names(self, table_names: List[str], all_tables: List[Identifier]) -> List[Identifier]:
204
334
  """
205
335
  Tries to find table (which comes directly from an LLM) by its name
@@ -237,6 +367,26 @@ class SQLAgent:
237
367
 
238
368
  return tables
239
369
 
370
+ def get_knowledge_base_info(self, kb_names: Optional[List[str]] = None) -> str:
371
+ """ Get information about specified knowledge bases.
372
+ Follows best practices as specified in: Rajkumar et al, 2022 (https://arxiv.org/abs/2204.00498)
373
+ If `sample_rows_in_table_info`, the specified number of sample rows will be
374
+ appended to each table description. This can increase performance as demonstrated in the paper.
375
+ """
376
+
377
+ kbs_info = []
378
+ for kb in kb_names:
379
+ key = f"{ctx.company_id}_{kb}_info"
380
+ kb_info = self._cache.get(key) if self._cache else None
381
+ if True or kb_info is None:
382
+ kb_info = self.get_kb_sample_rows(kb)
383
+ if self._cache:
384
+ self._cache.set(key, kb_info)
385
+
386
+ kbs_info.append(kb_info)
387
+
388
+ return "\n\n".join(kbs_info)
389
+
240
390
  def get_table_info(self, table_names: Optional[List[str]] = None) -> str:
241
391
  """ Get information about specified tables.
242
392
  Follows best practices as specified in: Rajkumar et al, 2022 (https://arxiv.org/abs/2204.00498)
@@ -244,16 +394,25 @@ class SQLAgent:
244
394
  appended to each table description. This can increase performance as demonstrated in the paper.
245
395
  """
246
396
 
247
- all_tables = [Identifier(name) for name in self.get_usable_table_names()]
397
+ all_tables = []
398
+ for name in self.get_usable_table_names():
399
+ # remove backticks
400
+ name = name.replace("`", "")
248
401
 
249
- if table_names is not None:
250
- all_tables = self._resolve_table_names(table_names, all_tables)
402
+ split = name.split(".")
403
+ if len(split) > 1:
404
+ all_tables.append(Identifier(parts=[split[0], split[1]]))
405
+ else:
406
+ all_tables.append(Identifier(name))
407
+
408
+ # if table_names is not None:
409
+ # all_tables = self._resolve_table_names(table_names, all_tables)
251
410
 
252
411
  tables_info = []
253
412
  for table in all_tables:
254
413
  key = f"{ctx.company_id}_{table}_info"
255
414
  table_info = self._cache.get(key) if self._cache else None
256
- if table_info is None:
415
+ if True or table_info is None:
257
416
  table_info = self._get_single_table_info(table)
258
417
  if self._cache:
259
418
  self._cache.set(key, table_info)
@@ -262,6 +421,34 @@ class SQLAgent:
262
421
 
263
422
  return "\n\n".join(tables_info)
264
423
 
424
+ def get_kb_sample_rows(self, kb_name: str) -> str:
425
+ """Get sample rows from a knowledge base.
426
+
427
+ Args:
428
+ kb_name (str): The name of the knowledge base.
429
+
430
+ Returns:
431
+ str: A string containing the sample rows from the knowledge base.
432
+ """
433
+ logger.info(f'_get_sample_rows: knowledge base={kb_name}')
434
+ command = f"select * from {kb_name} limit 10;"
435
+ try:
436
+ ret = self._call_engine(command)
437
+ sample_rows = ret.data.to_lists()
438
+
439
+ def truncate_value(val):
440
+ str_val = str(val)
441
+ return str_val if len(str_val) < 100 else (str_val[:100] + '...')
442
+
443
+ sample_rows = list(
444
+ map(lambda row: [truncate_value(value) for value in row], sample_rows))
445
+ sample_rows_str = "\n" + f"{kb_name}:" + list_to_csv_str(sample_rows)
446
+ except Exception as e:
447
+ logger.info(f'_get_sample_rows error: {e}')
448
+ sample_rows_str = "\n" + "\t [error] Couldn't retrieve sample rows!"
449
+
450
+ return sample_rows_str
451
+
265
452
  def _get_single_table_info(self, table: Identifier) -> str:
266
453
  if len(table.parts) < 2:
267
454
  raise ValueError(f"Database is required for table: {table}")
@@ -276,19 +463,41 @@ class SQLAgent:
276
463
  dn = self._command_executor.session.datahub.get(integration)
277
464
 
278
465
  fields, dtypes = [], []
279
- for df in dn.get_table_columns_df(table_name, schema_name):
280
- df_records = df.to_dict(orient='records')
281
- fields.append(df_records[INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME])
282
- if df_records[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE] is not None:
283
- dtypes.append(df_records[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE].value)
284
- else:
285
- dtypes.append(df_records[INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE])
466
+ try:
467
+ df = dn.get_table_columns_df(table_name, schema_name)
468
+ if not isinstance(df, pd.DataFrame) or df.empty:
469
+ logger.warning(f"Received empty or invalid DataFrame for table columns of {table_str}")
470
+ return f"Table named `{table_str}`:\n [No column information available]"
471
+
472
+ fields = df[INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME].to_list()
473
+ dtypes = [
474
+ mysql_data_type.value if isinstance(mysql_data_type, MYSQL_DATA_TYPE) else (data_type or 'UNKNOWN')
475
+ for mysql_data_type, data_type
476
+ in zip(
477
+ df[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE],
478
+ df[INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE]
479
+ )
480
+ ]
481
+ except Exception as e:
482
+ logger.error(f"Failed processing column info for {table_str}: {e}", exc_info=True)
483
+ raise ValueError(f"Failed to process column info for {table_str}") from e
484
+
485
+ if not fields:
486
+ logger.error(f"Could not extract column fields for {table_str}.")
487
+ return f"Table named `{table_str}`:\n [Could not extract column information]"
488
+
489
+ try:
490
+ sample_rows_info = self._get_sample_rows(table_str, fields)
491
+ except Exception as e:
492
+ logger.warning(f"Could not get sample rows for {table_str}: {e}")
493
+ sample_rows_info = "\n\t [error] Couldn't retrieve sample rows!"
286
494
 
287
495
  info = f'Table named `{table_str}`:\n'
288
496
  info += f"\nSample with first {self._sample_rows_in_table_info} rows from table {table_str} in CSV format (dialect is 'excel'):\n"
289
- info += self._get_sample_rows(table_str, fields) + "\n"
497
+ info += sample_rows_info + "\n"
290
498
  info += '\nColumn data types: ' + ",\t".join(
291
- [f'\n`{field}` : `{dtype}`' for field, dtype in zip(fields, dtypes)]) + '\n' # noqa
499
+ [f'\n`{field}` : `{dtype}`' for field, dtype in zip(fields, dtypes)]
500
+ ) + '\n'
292
501
  return info
293
502
 
294
503
  def _get_sample_rows(self, table: str, fields: List[str]) -> str:
@@ -364,6 +573,7 @@ class SQLAgent:
364
573
  logger.info(f'query_safe (fetch={fetch}): {command}')
365
574
  return self.query(command, fetch)
366
575
  except Exception as e:
576
+ logger.error(f"Error in query_safe: {str(e)}\n{traceback.format_exc()}")
367
577
  logger.info(f'query_safe error: {e}')
368
578
  msg = f"Error: {e}"
369
579
  if 'does not exist' in msg and ' relation ' in msg: