MindsDB 25.5.3.0__py3-none-any.whl → 25.5.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (313) hide show
  1. mindsdb/__about__.py +8 -8
  2. mindsdb/__main__.py +127 -79
  3. mindsdb/api/a2a/__init__.py +0 -0
  4. mindsdb/api/a2a/__main__.py +144 -0
  5. mindsdb/api/a2a/agent.py +308 -0
  6. mindsdb/api/a2a/common/__init__.py +0 -0
  7. mindsdb/api/a2a/common/server/__init__.py +4 -0
  8. mindsdb/api/a2a/common/server/server.py +164 -0
  9. mindsdb/api/a2a/common/server/task_manager.py +287 -0
  10. mindsdb/api/a2a/common/server/utils.py +28 -0
  11. mindsdb/api/a2a/common/types.py +365 -0
  12. mindsdb/api/a2a/constants.py +9 -0
  13. mindsdb/api/a2a/run_a2a.py +86 -0
  14. mindsdb/api/a2a/task_manager.py +560 -0
  15. mindsdb/api/executor/command_executor.py +185 -309
  16. mindsdb/api/executor/datahub/classes/response.py +5 -2
  17. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +39 -72
  18. mindsdb/api/executor/planner/query_planner.py +10 -1
  19. mindsdb/api/executor/sql_query/result_set.py +185 -52
  20. mindsdb/api/executor/sql_query/sql_query.py +1 -1
  21. mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +9 -12
  22. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +8 -10
  23. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +5 -44
  24. mindsdb/api/executor/sql_query/steps/insert_step.py +24 -15
  25. mindsdb/api/executor/sql_query/steps/join_step.py +1 -1
  26. mindsdb/api/executor/sql_query/steps/project_step.py +1 -1
  27. mindsdb/api/executor/sql_query/steps/sql_steps.py +1 -1
  28. mindsdb/api/executor/sql_query/steps/subselect_step.py +4 -8
  29. mindsdb/api/executor/sql_query/steps/union_step.py +1 -3
  30. mindsdb/api/http/initialize.py +99 -83
  31. mindsdb/api/http/namespaces/analysis.py +3 -3
  32. mindsdb/api/http/namespaces/config.py +61 -86
  33. mindsdb/api/http/namespaces/file.py +8 -2
  34. mindsdb/api/http/namespaces/sql.py +13 -27
  35. mindsdb/api/mcp/start.py +42 -5
  36. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packet.py +0 -1
  37. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +52 -19
  38. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +8 -10
  39. mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +54 -38
  40. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +82 -115
  41. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +351 -0
  42. mindsdb/api/postgres/postgres_proxy/executor/executor.py +1 -1
  43. mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +5 -6
  44. mindsdb/integrations/handlers/altibase_handler/altibase_handler.py +26 -27
  45. mindsdb/integrations/handlers/altibase_handler/connection_args.py +13 -13
  46. mindsdb/integrations/handlers/altibase_handler/tests/test_altibase_handler.py +8 -8
  47. mindsdb/integrations/handlers/altibase_handler/tests/test_altibase_handler_dsn.py +13 -13
  48. mindsdb/integrations/handlers/anthropic_handler/__init__.py +2 -2
  49. mindsdb/integrations/handlers/anthropic_handler/anthropic_handler.py +1 -3
  50. mindsdb/integrations/handlers/aurora_handler/aurora_handler.py +1 -0
  51. mindsdb/integrations/handlers/autosklearn_handler/autosklearn_handler.py +1 -1
  52. mindsdb/integrations/handlers/autosklearn_handler/config.py +0 -1
  53. mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +1 -1
  54. mindsdb/integrations/handlers/bigquery_handler/tests/test_bigquery_handler.py +1 -1
  55. mindsdb/integrations/handlers/binance_handler/binance_handler.py +1 -0
  56. mindsdb/integrations/handlers/binance_handler/binance_tables.py +3 -4
  57. mindsdb/integrations/handlers/byom_handler/__init__.py +0 -1
  58. mindsdb/integrations/handlers/byom_handler/requirements.txt +1 -2
  59. mindsdb/integrations/handlers/ckan_handler/ckan_handler.py +3 -0
  60. mindsdb/integrations/handlers/clickhouse_handler/__init__.py +1 -1
  61. mindsdb/integrations/handlers/cloud_spanner_handler/tests/test_cloud_spanner_handler.py +0 -2
  62. mindsdb/integrations/handlers/cloud_sql_handler/cloud_sql_handler.py +0 -1
  63. mindsdb/integrations/handlers/cohere_handler/__init__.py +1 -1
  64. mindsdb/integrations/handlers/cohere_handler/cohere_handler.py +11 -13
  65. mindsdb/integrations/handlers/confluence_handler/confluence_tables.py +6 -0
  66. mindsdb/integrations/handlers/databend_handler/connection_args.py +1 -1
  67. mindsdb/integrations/handlers/databend_handler/databend_handler.py +4 -4
  68. mindsdb/integrations/handlers/databend_handler/tests/__init__.py +0 -1
  69. mindsdb/integrations/handlers/databend_handler/tests/test_databend_handler.py +1 -1
  70. mindsdb/integrations/handlers/derby_handler/connection_args.py +1 -1
  71. mindsdb/integrations/handlers/derby_handler/derby_handler.py +14 -22
  72. mindsdb/integrations/handlers/derby_handler/tests/test_derby_handler.py +6 -6
  73. mindsdb/integrations/handlers/discord_handler/discord_handler.py +5 -5
  74. mindsdb/integrations/handlers/discord_handler/discord_tables.py +3 -3
  75. mindsdb/integrations/handlers/discord_handler/tests/test_discord.py +5 -3
  76. mindsdb/integrations/handlers/dockerhub_handler/dockerhub.py +3 -3
  77. mindsdb/integrations/handlers/dockerhub_handler/dockerhub_handler.py +2 -2
  78. mindsdb/integrations/handlers/dockerhub_handler/dockerhub_tables.py +57 -54
  79. mindsdb/integrations/handlers/dremio_handler/__init__.py +2 -2
  80. mindsdb/integrations/handlers/druid_handler/__init__.py +1 -1
  81. mindsdb/integrations/handlers/druid_handler/druid_handler.py +2 -2
  82. mindsdb/integrations/handlers/edgelessdb_handler/tests/test_edgelessdb_handler.py +9 -9
  83. mindsdb/integrations/handlers/email_handler/email_client.py +1 -1
  84. mindsdb/integrations/handlers/email_handler/email_ingestor.py +1 -1
  85. mindsdb/integrations/handlers/email_handler/email_tables.py +0 -1
  86. mindsdb/integrations/handlers/email_handler/settings.py +0 -1
  87. mindsdb/integrations/handlers/eventstoredb_handler/eventstoredb_handler.py +2 -1
  88. mindsdb/integrations/handlers/firebird_handler/firebird_handler.py +1 -1
  89. mindsdb/integrations/handlers/flaml_handler/flaml_handler.py +9 -9
  90. mindsdb/integrations/handlers/frappe_handler/frappe_client.py +5 -5
  91. mindsdb/integrations/handlers/frappe_handler/frappe_handler.py +6 -5
  92. mindsdb/integrations/handlers/frappe_handler/frappe_tables.py +2 -2
  93. mindsdb/integrations/handlers/github_handler/connection_args.py +2 -2
  94. mindsdb/integrations/handlers/github_handler/github_handler.py +1 -8
  95. mindsdb/integrations/handlers/github_handler/github_tables.py +13 -24
  96. mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py +2 -1
  97. mindsdb/integrations/handlers/gitlab_handler/gitlab_tables.py +1 -4
  98. mindsdb/integrations/handlers/gmail_handler/gmail_handler.py +6 -13
  99. mindsdb/integrations/handlers/google_books_handler/google_books_handler.py +2 -1
  100. mindsdb/integrations/handlers/google_books_handler/google_books_tables.py +0 -3
  101. mindsdb/integrations/handlers/google_calendar_handler/google_calendar_handler.py +4 -4
  102. mindsdb/integrations/handlers/google_calendar_handler/google_calendar_tables.py +2 -6
  103. mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_handler.py +3 -2
  104. mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_tables.py +0 -3
  105. mindsdb/integrations/handlers/google_fit_handler/google_fit_handler.py +10 -12
  106. mindsdb/integrations/handlers/google_fit_handler/google_fit_tables.py +11 -13
  107. mindsdb/integrations/handlers/google_search_handler/google_search_handler.py +2 -1
  108. mindsdb/integrations/handlers/google_search_handler/google_search_tables.py +0 -3
  109. mindsdb/integrations/handlers/groq_handler/__init__.py +3 -3
  110. mindsdb/integrations/handlers/hackernews_handler/hn_handler.py +5 -7
  111. mindsdb/integrations/handlers/hackernews_handler/hn_table.py +6 -7
  112. mindsdb/integrations/handlers/hive_handler/tests/test_hive_handler.py +1 -1
  113. mindsdb/integrations/handlers/hsqldb_handler/connection_args.py +6 -6
  114. mindsdb/integrations/handlers/hsqldb_handler/hsqldb_handler.py +4 -3
  115. mindsdb/integrations/handlers/huggingface_api_handler/exceptions.py +1 -1
  116. mindsdb/integrations/handlers/huggingface_api_handler/huggingface_api_handler.py +1 -8
  117. mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +6 -6
  118. mindsdb/integrations/handlers/huggingface_handler/requirements.txt +1 -1
  119. mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +1 -1
  120. mindsdb/integrations/handlers/ignite_handler/ignite_handler.py +2 -1
  121. mindsdb/integrations/handlers/impala_handler/impala_handler.py +9 -12
  122. mindsdb/integrations/handlers/impala_handler/tests/test_impala_handler.py +11 -11
  123. mindsdb/integrations/handlers/influxdb_handler/influxdb_handler.py +10 -13
  124. mindsdb/integrations/handlers/influxdb_handler/influxdb_tables.py +20 -20
  125. mindsdb/integrations/handlers/informix_handler/__about__.py +8 -8
  126. mindsdb/integrations/handlers/informix_handler/__init__.py +12 -5
  127. mindsdb/integrations/handlers/informix_handler/informix_handler.py +99 -133
  128. mindsdb/integrations/handlers/informix_handler/tests/test_informix_handler.py +13 -11
  129. mindsdb/integrations/handlers/ingres_handler/__about__.py +0 -1
  130. mindsdb/integrations/handlers/ingres_handler/ingres_handler.py +1 -0
  131. mindsdb/integrations/handlers/jira_handler/jira_handler.py +4 -4
  132. mindsdb/integrations/handlers/jira_handler/jira_tables.py +9 -9
  133. mindsdb/integrations/handlers/kinetica_handler/__init__.py +0 -1
  134. mindsdb/integrations/handlers/lancedb_handler/requirements.txt +0 -1
  135. mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +4 -4
  136. mindsdb/integrations/handlers/langchain_handler/tools.py +9 -10
  137. mindsdb/integrations/handlers/leonardoai_handler/__init__.py +1 -1
  138. mindsdb/integrations/handlers/lightwood_handler/functions.py +2 -2
  139. mindsdb/integrations/handlers/lightwood_handler/lightwood_handler.py +0 -1
  140. mindsdb/integrations/handlers/lightwood_handler/tests/test_lightwood_handler.py +11 -11
  141. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +37 -20
  142. mindsdb/integrations/handlers/llama_index_handler/llama_index_handler.py +4 -4
  143. mindsdb/integrations/handlers/llama_index_handler/settings.py +10 -9
  144. mindsdb/integrations/handlers/materialize_handler/tests/test_materialize_handler.py +8 -10
  145. mindsdb/integrations/handlers/matrixone_handler/matrixone_handler.py +4 -4
  146. mindsdb/integrations/handlers/matrixone_handler/tests/test_matrixone_handler.py +8 -9
  147. mindsdb/integrations/handlers/maxdb_handler/connection_args.py +25 -25
  148. mindsdb/integrations/handlers/maxdb_handler/maxdb_handler.py +1 -0
  149. mindsdb/integrations/handlers/mediawiki_handler/mediawiki_handler.py +3 -2
  150. mindsdb/integrations/handlers/mediawiki_handler/mediawiki_tables.py +1 -1
  151. mindsdb/integrations/handlers/mendeley_handler/__about__.py +1 -1
  152. mindsdb/integrations/handlers/mendeley_handler/__init__.py +2 -2
  153. mindsdb/integrations/handlers/mendeley_handler/mendeley_handler.py +48 -56
  154. mindsdb/integrations/handlers/mendeley_handler/mendeley_tables.py +24 -29
  155. mindsdb/integrations/handlers/mendeley_handler/tests/test_mendeley_handler.py +19 -17
  156. mindsdb/integrations/handlers/merlion_handler/merlion_handler.py +5 -4
  157. mindsdb/integrations/handlers/minds_endpoint_handler/__init__.py +3 -3
  158. mindsdb/integrations/handlers/mlflow_handler/mlflow_handler.py +58 -36
  159. mindsdb/integrations/handlers/monetdb_handler/__about__.py +8 -8
  160. mindsdb/integrations/handlers/monetdb_handler/__init__.py +15 -5
  161. mindsdb/integrations/handlers/monetdb_handler/connection_args.py +17 -18
  162. mindsdb/integrations/handlers/monetdb_handler/monetdb_handler.py +40 -57
  163. mindsdb/integrations/handlers/monetdb_handler/tests/test_monetdb_handler.py +7 -8
  164. mindsdb/integrations/handlers/monetdb_handler/utils/monet_get_id.py +13 -14
  165. mindsdb/integrations/handlers/monkeylearn_handler/__about__.py +1 -1
  166. mindsdb/integrations/handlers/monkeylearn_handler/__init__.py +1 -1
  167. mindsdb/integrations/handlers/monkeylearn_handler/monkeylearn_handler.py +2 -5
  168. mindsdb/integrations/handlers/ms_one_drive_handler/ms_graph_api_one_drive_client.py +1 -0
  169. mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_handler.py +1 -1
  170. mindsdb/integrations/handlers/ms_teams_handler/ms_graph_api_teams_client.py +23 -23
  171. mindsdb/integrations/handlers/ms_teams_handler/ms_teams_handler.py +3 -3
  172. mindsdb/integrations/handlers/ms_teams_handler/ms_teams_tables.py +10 -5
  173. mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +73 -8
  174. mindsdb/integrations/handlers/mysql_handler/__about__.py +8 -8
  175. mindsdb/integrations/handlers/mysql_handler/__init__.py +15 -5
  176. mindsdb/integrations/handlers/mysql_handler/connection_args.py +43 -47
  177. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +101 -34
  178. mindsdb/integrations/handlers/mysql_handler/settings.py +15 -13
  179. mindsdb/integrations/handlers/neuralforecast_handler/neuralforecast_handler.py +1 -1
  180. mindsdb/integrations/handlers/newsapi_handler/newsapi_handler.py +1 -1
  181. mindsdb/integrations/handlers/newsapi_handler/tests/test_newsapi_handler.py +4 -4
  182. mindsdb/integrations/handlers/nuo_jdbc_handler/connection_args.py +2 -2
  183. mindsdb/integrations/handlers/nuo_jdbc_handler/nuo_jdbc_handler.py +28 -36
  184. mindsdb/integrations/handlers/nuo_jdbc_handler/tests/test_nuo_handler.py +5 -5
  185. mindsdb/integrations/handlers/oceanbase_handler/oceanbase_handler.py +0 -1
  186. mindsdb/integrations/handlers/oceanbase_handler/tests/test_oceanbase_handler.py +8 -10
  187. mindsdb/integrations/handlers/ollama_handler/ollama_handler.py +3 -3
  188. mindsdb/integrations/handlers/opengauss_handler/tests/test_opengauss_handler.py +1 -2
  189. mindsdb/integrations/handlers/openstreetmap_handler/__init__.py +7 -7
  190. mindsdb/integrations/handlers/oracle_handler/connection_args.py +6 -0
  191. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +77 -11
  192. mindsdb/integrations/handlers/orioledb_handler/tests/test_orioledb_handler.py +8 -10
  193. mindsdb/integrations/handlers/palm_handler/__about__.py +1 -1
  194. mindsdb/integrations/handlers/palm_handler/__init__.py +1 -1
  195. mindsdb/integrations/handlers/palm_handler/palm_handler.py +1 -3
  196. mindsdb/integrations/handlers/paypal_handler/paypal_handler.py +2 -2
  197. mindsdb/integrations/handlers/paypal_handler/paypal_tables.py +15 -14
  198. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +53 -10
  199. mindsdb/integrations/handlers/phoenix_handler/__init__.py +1 -1
  200. mindsdb/integrations/handlers/phoenix_handler/phoenix_handler.py +1 -0
  201. mindsdb/integrations/handlers/pinot_handler/__init__.py +1 -1
  202. mindsdb/integrations/handlers/pinot_handler/pinot_handler.py +3 -2
  203. mindsdb/integrations/handlers/plaid_handler/plaid_handler.py +13 -13
  204. mindsdb/integrations/handlers/plaid_handler/plaid_tables.py +10 -12
  205. mindsdb/integrations/handlers/plaid_handler/utils.py +4 -6
  206. mindsdb/integrations/handlers/planetscale_handler/planetscale_handler.py +1 -4
  207. mindsdb/integrations/handlers/portkey_handler/__init__.py +2 -2
  208. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +105 -24
  209. mindsdb/integrations/handlers/postgres_handler/tests/test_postgres_handler.py +11 -6
  210. mindsdb/integrations/handlers/questdb_handler/questdb_handler.py +1 -2
  211. mindsdb/integrations/handlers/questdb_handler/tests/test_questdb_handler.py +2 -3
  212. mindsdb/integrations/handlers/quickbooks_handler/quickbooks_handler.py +6 -8
  213. mindsdb/integrations/handlers/quickbooks_handler/quickbooks_table.py +10 -10
  214. mindsdb/integrations/handlers/rag_handler/ingest.py +2 -2
  215. mindsdb/integrations/handlers/rag_handler/rag_handler.py +1 -1
  216. mindsdb/integrations/handlers/rag_handler/settings.py +1 -1
  217. mindsdb/integrations/handlers/reddit_handler/reddit_handler.py +2 -7
  218. mindsdb/integrations/handlers/reddit_handler/reddit_tables.py +2 -3
  219. mindsdb/integrations/handlers/replicate_handler/replicate_handler.py +6 -6
  220. mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_handler.py +1 -2
  221. mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_tables.py +0 -3
  222. mindsdb/integrations/handlers/rockset_handler/connection_args.py +14 -14
  223. mindsdb/integrations/handlers/rockset_handler/tests/test_rockset_handler.py +1 -0
  224. mindsdb/integrations/handlers/scylla_handler/scylla_handler.py +6 -5
  225. mindsdb/integrations/handlers/sendinblue_handler/sendinblue_handler.py +2 -1
  226. mindsdb/integrations/handlers/sendinblue_handler/sendinblue_tables.py +16 -16
  227. mindsdb/integrations/handlers/sentence_transformers_handler/__init__.py +1 -1
  228. mindsdb/integrations/handlers/sheets_handler/connection_args.py +1 -1
  229. mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +7 -6
  230. mindsdb/integrations/handlers/shopify_handler/shopify_tables.py +38 -41
  231. mindsdb/integrations/handlers/singlestore_handler/__about__.py +1 -1
  232. mindsdb/integrations/handlers/singlestore_handler/__init__.py +0 -1
  233. mindsdb/integrations/handlers/singlestore_handler/singlestore_handler.py +1 -0
  234. mindsdb/integrations/handlers/singlestore_handler/tests/test_singlestore_handler.py +3 -3
  235. mindsdb/integrations/handlers/slack_handler/__init__.py +3 -3
  236. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +100 -6
  237. mindsdb/integrations/handlers/solr_handler/connection_args.py +7 -7
  238. mindsdb/integrations/handlers/solr_handler/solr_handler.py +2 -1
  239. mindsdb/integrations/handlers/solr_handler/tests/test_solr_handler.py +2 -1
  240. mindsdb/integrations/handlers/sqlany_handler/sqlany_handler.py +3 -2
  241. mindsdb/integrations/handlers/sqlite_handler/sqlite_handler.py +1 -0
  242. mindsdb/integrations/handlers/sqreamdb_handler/connection_args.py +1 -1
  243. mindsdb/integrations/handlers/sqreamdb_handler/sqreamdb_handler.py +15 -20
  244. mindsdb/integrations/handlers/sqreamdb_handler/tests/test_sqreamdb_handler.py +4 -4
  245. mindsdb/integrations/handlers/stabilityai_handler/__init__.py +1 -1
  246. mindsdb/integrations/handlers/starrocks_handler/starrocks_handler.py +0 -1
  247. mindsdb/integrations/handlers/starrocks_handler/tests/test_starrocks_handler.py +8 -10
  248. mindsdb/integrations/handlers/statsforecast_handler/statsforecast_handler.py +2 -2
  249. mindsdb/integrations/handlers/strava_handler/strava_handler.py +4 -8
  250. mindsdb/integrations/handlers/strava_handler/strava_tables.py +22 -30
  251. mindsdb/integrations/handlers/stripe_handler/stripe_handler.py +3 -2
  252. mindsdb/integrations/handlers/stripe_handler/stripe_tables.py +11 -27
  253. mindsdb/integrations/handlers/supabase_handler/tests/test_supabase_handler.py +1 -1
  254. mindsdb/integrations/handlers/surrealdb_handler/surrealdb_handler.py +4 -4
  255. mindsdb/integrations/handlers/tdengine_handler/tdengine_handler.py +25 -27
  256. mindsdb/integrations/handlers/tdengine_handler/tests/test_tdengine_handler.py +8 -8
  257. mindsdb/integrations/handlers/tidb_handler/tests/test_tidb_handler.py +1 -2
  258. mindsdb/integrations/handlers/timegpt_handler/timegpt_handler.py +5 -5
  259. mindsdb/integrations/handlers/tpot_handler/tpot_handler.py +21 -26
  260. mindsdb/integrations/handlers/trino_handler/trino_handler.py +14 -14
  261. mindsdb/integrations/handlers/twitter_handler/twitter_handler.py +2 -4
  262. mindsdb/integrations/handlers/unify_handler/tests/test_unify_handler.py +7 -8
  263. mindsdb/integrations/handlers/unify_handler/unify_handler.py +9 -9
  264. mindsdb/integrations/handlers/vertex_handler/vertex_client.py +1 -1
  265. mindsdb/integrations/handlers/vertica_handler/tests/test_vertica_handler.py +11 -11
  266. mindsdb/integrations/handlers/vertica_handler/vertica_handler.py +11 -14
  267. mindsdb/integrations/handlers/vitess_handler/tests/test_vitess_handler.py +9 -11
  268. mindsdb/integrations/handlers/vitess_handler/vitess_handler.py +0 -1
  269. mindsdb/integrations/handlers/web_handler/web_handler.py +1 -0
  270. mindsdb/integrations/handlers/whatsapp_handler/__init__.py +3 -3
  271. mindsdb/integrations/handlers/writer_handler/evaluate.py +1 -1
  272. mindsdb/integrations/handlers/writer_handler/settings.py +0 -1
  273. mindsdb/integrations/handlers/writer_handler/writer_handler.py +1 -0
  274. mindsdb/integrations/handlers/youtube_handler/youtube_handler.py +5 -5
  275. mindsdb/integrations/handlers/youtube_handler/youtube_tables.py +26 -27
  276. mindsdb/integrations/handlers/yugabyte_handler/tests/test_yugabyte_handler.py +3 -3
  277. mindsdb/integrations/handlers/yugabyte_handler/yugabyte_handler.py +0 -6
  278. mindsdb/integrations/libs/llm/config.py +13 -0
  279. mindsdb/integrations/libs/llm/utils.py +37 -65
  280. mindsdb/integrations/libs/response.py +67 -52
  281. mindsdb/integrations/libs/vectordatabase_handler.py +6 -0
  282. mindsdb/integrations/utilities/handler_utils.py +15 -3
  283. mindsdb/integrations/utilities/handlers/api_utilities/__init__.py +0 -1
  284. mindsdb/integrations/utilities/handlers/auth_utilities/__init__.py +0 -2
  285. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +230 -227
  286. mindsdb/integrations/utilities/utils.py +3 -3
  287. mindsdb/interfaces/agents/agents_controller.py +164 -1
  288. mindsdb/interfaces/agents/constants.py +32 -13
  289. mindsdb/interfaces/agents/langchain_agent.py +106 -95
  290. mindsdb/interfaces/agents/mindsdb_database_agent.py +101 -2
  291. mindsdb/interfaces/knowledge_base/controller.py +250 -216
  292. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +13 -10
  293. mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +434 -0
  294. mindsdb/interfaces/knowledge_base/preprocessing/models.py +54 -0
  295. mindsdb/interfaces/query_context/context_controller.py +66 -10
  296. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +190 -0
  297. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +92 -0
  298. mindsdb/interfaces/skills/skill_tool.py +202 -57
  299. mindsdb/interfaces/skills/sql_agent.py +205 -17
  300. mindsdb/interfaces/storage/fs.py +1 -0
  301. mindsdb/interfaces/variables/__init__.py +0 -0
  302. mindsdb/interfaces/variables/variables_controller.py +97 -0
  303. mindsdb/migrations/env.py +5 -7
  304. mindsdb/migrations/migrate.py +47 -7
  305. mindsdb/migrations/versions/2025-05-21_9f150e4f9a05_checkpoint_1.py +360 -0
  306. mindsdb/utilities/config.py +287 -216
  307. mindsdb/utilities/starters.py +13 -0
  308. {mindsdb-25.5.3.0.dist-info → mindsdb-25.5.4.1.dist-info}/METADATA +646 -698
  309. {mindsdb-25.5.3.0.dist-info → mindsdb-25.5.4.1.dist-info}/RECORD +312 -295
  310. {mindsdb-25.5.3.0.dist-info → mindsdb-25.5.4.1.dist-info}/WHEEL +1 -1
  311. mindsdb/integrations/handlers/monkeylearn_handler/requirements.txt +0 -1
  312. {mindsdb-25.5.3.0.dist-info → mindsdb-25.5.4.1.dist-info}/licenses/LICENSE +0 -0
  313. {mindsdb-25.5.3.0.dist-info → mindsdb-25.5.4.1.dist-info}/top_level.txt +0 -0
@@ -1,26 +1,27 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import re
3
4
  import asyncio
4
5
  import logging
5
6
  import math
6
7
  import os
7
8
  import random
8
9
  from abc import ABC
10
+ from textwrap import dedent
9
11
  from typing import Any, List, Optional, Tuple
10
12
 
11
13
  from openai import AsyncOpenAI, AsyncAzureOpenAI
12
- from pydantic import field_validator
13
14
  from pydantic import BaseModel
14
15
 
15
16
  from mindsdb.integrations.utilities.rag.settings import DEFAULT_RERANKING_MODEL, DEFAULT_LLM_ENDPOINT
17
+ from mindsdb.integrations.libs.base import BaseMLEngine
16
18
 
17
19
  log = logging.getLogger(__name__)
18
20
 
19
21
 
20
22
  class BaseLLMReranker(BaseModel, ABC):
21
-
22
23
  filtering_threshold: float = 0.0 # Default threshold for filtering
23
- provider: str = 'openai'
24
+ provider: str = "openai"
24
25
  model: str = DEFAULT_RERANKING_MODEL # Model to use for reranking
25
26
  temperature: float = 0.0 # Temperature for the model
26
27
  api_key: Optional[str] = None
@@ -29,7 +30,7 @@ class BaseLLMReranker(BaseModel, ABC):
29
30
  num_docs_to_keep: Optional[int] = None # How many of the top documents to keep after reranking & compressing.
30
31
  method: str = "multi-class" # Scoring method: 'multi-class' or 'binary'
31
32
  _api_key_var: str = "OPENAI_API_KEY"
32
- client: Optional[AsyncOpenAI] = None
33
+ client: Optional[AsyncOpenAI | BaseMLEngine] = None
33
34
  _semaphore: Optional[asyncio.Semaphore] = None
34
35
  max_concurrent_requests: int = 20
35
36
  max_retries: int = 3
@@ -40,33 +41,26 @@ class BaseLLMReranker(BaseModel, ABC):
40
41
 
41
42
  class Config:
42
43
  arbitrary_types_allowed = True
43
-
44
- @field_validator('provider')
45
- @classmethod
46
- def validate_provider(cls, v: str) -> str:
47
- allowed = {'openai', 'azure_openai'}
48
- v_lower = v.lower()
49
- if v_lower not in allowed:
50
- raise ValueError(f"Unsupported provider: {v}.")
51
- return v_lower
44
+ extra = "allow"
52
45
 
53
46
  def __init__(self, **kwargs):
54
47
  super().__init__(**kwargs)
55
48
  self._semaphore = asyncio.Semaphore(self.max_concurrent_requests)
49
+ self._init_client()
56
50
 
57
- async def _init_client(self):
51
+ def _init_client(self):
58
52
  if self.client is None:
59
-
60
53
  if self.provider == "azure_openai":
61
-
62
54
  azure_api_key = self.api_key or os.getenv("AZURE_OPENAI_API_KEY")
63
55
  azure_api_endpoint = self.base_url or os.environ.get("AZURE_OPENAI_ENDPOINT")
64
56
  azure_api_version = self.api_version or os.environ.get("AZURE_OPENAI_API_VERSION")
65
- self.client = AsyncAzureOpenAI(api_key=azure_api_key,
66
- azure_endpoint=azure_api_endpoint,
67
- api_version=azure_api_version,
68
- timeout=self.request_timeout,
69
- max_retries=2)
57
+ self.client = AsyncAzureOpenAI(
58
+ api_key=azure_api_key,
59
+ azure_endpoint=azure_api_endpoint,
60
+ api_version=azure_api_version,
61
+ timeout=self.request_timeout,
62
+ max_retries=2,
63
+ )
70
64
  elif self.provider == "openai":
71
65
  api_key_var: str = "OPENAI_API_KEY"
72
66
  openai_api_key = self.api_key or os.getenv(api_key_var)
@@ -74,48 +68,39 @@ class BaseLLMReranker(BaseModel, ABC):
74
68
  raise ValueError(f"OpenAI API key not found in environment variable {api_key_var}")
75
69
 
76
70
  base_url = self.base_url or DEFAULT_LLM_ENDPOINT
77
- self.client = AsyncOpenAI(api_key=openai_api_key, base_url=base_url, timeout=self.request_timeout, max_retries=2)
71
+ self.client = AsyncOpenAI(
72
+ api_key=openai_api_key, base_url=base_url, timeout=self.request_timeout, max_retries=2
73
+ )
78
74
 
79
- async def search_relevancy(self, query: str, document: str, rerank_callback=None) -> Any:
80
- await self._init_client()
75
+ else:
76
+ # try to use litellm
77
+ from mindsdb.api.executor.controllers.session_controller import SessionController
81
78
 
82
- async with self._semaphore:
83
- for attempt in range(self.max_retries):
84
- try:
85
- response = await self.client.chat.completions.create(
86
- model=self.model,
87
- messages=[
88
- {"role": "system", "content": "Rate the relevance of the document to the query. Respond with 'yes' or 'no'."},
89
- {"role": "user", "content": f"Query: {query}\nDocument: {document}\nIs this document relevant?"}
90
- ],
91
- temperature=self.temperature,
92
- n=1,
93
- logprobs=True,
94
- max_tokens=1
95
- )
96
-
97
- # Extract response and logprobs
98
- answer = response.choices[0].message.content
99
- logprob = response.choices[0].logprobs.content[0].logprob
100
- rerank_data = {
101
- "document": document,
102
- "answer": answer,
103
- "logprob": logprob
104
- }
105
-
106
- # Stream reranking update.
107
- if rerank_callback is not None:
108
- rerank_callback(rerank_data)
79
+ session = SessionController()
80
+ module = session.integration_controller.get_handler_module("litellm")
109
81
 
110
- return rerank_data
82
+ if module is None or module.Handler is None:
83
+ raise ValueError(f'Unable to use "{self.provider}" provider. Litellm handler is not installed')
111
84
 
112
- except Exception as e:
113
- if attempt == self.max_retries - 1:
114
- log.error(f"Failed after {self.max_retries} attempts: {str(e)}")
115
- raise
116
- # Exponential backoff with jitter
117
- retry_delay = self.retry_delay * (2 ** attempt) + random.uniform(0, 0.1)
118
- await asyncio.sleep(retry_delay)
85
+ self.client = module.Handler
86
+ self.method = "no-logprobs"
87
+
88
+ async def _call_llm(self, messages):
89
+ if self.provider in ("azure_openai", "openai"):
90
+ return await self.client.chat.completions.create(
91
+ model=self.model,
92
+ messages=messages,
93
+ )
94
+ else:
95
+ kwargs = self.model_extra.copy()
96
+
97
+ if self.base_url is not None:
98
+ kwargs["api_base"] = self.base_url
99
+
100
+ if self.api_key is not None:
101
+ kwargs["api_key"] = self.api_key
102
+
103
+ return await self.client.acompletion(model=f"{self.provider}/{self.model}", messages=messages, args=kwargs)
119
104
 
120
105
  async def _rank(self, query_document_pairs: List[Tuple[str, str]], rerank_callback=None) -> List[Tuple[str, float]]:
121
106
  ranked_results = []
@@ -123,30 +108,23 @@ class BaseLLMReranker(BaseModel, ABC):
123
108
  # Process in larger batches for better throughput
124
109
  batch_size = min(self.max_concurrent_requests * 2, len(query_document_pairs))
125
110
  for i in range(0, len(query_document_pairs), batch_size):
126
- batch = query_document_pairs[i:i + batch_size]
111
+ batch = query_document_pairs[i : i + batch_size]
127
112
  try:
128
113
  results = await asyncio.gather(
129
- *[self.search_relevancy(query=query, document=document, rerank_callback=rerank_callback) for (query, document) in batch],
130
- return_exceptions=True
114
+ *[
115
+ self._backoff_wrapper(query=query, document=document, rerank_callback=rerank_callback)
116
+ for (query, document) in batch
117
+ ],
118
+ return_exceptions=True,
131
119
  )
132
120
 
133
121
  for idx, result in enumerate(results):
134
122
  if isinstance(result, Exception):
135
- log.error(f"Error processing document {i+idx}: {str(result)}")
123
+ log.error(f"Error processing document {i + idx}: {str(result)}")
136
124
  ranked_results.append((batch[idx][1], 0.0))
137
125
  continue
138
126
 
139
- answer = result["answer"]
140
- logprob = result["logprob"]
141
- prob = math.exp(logprob)
142
-
143
- # Convert answer to score using the model's confidence
144
- if answer.lower().strip() == "yes":
145
- score = prob # If yes, use the model's confidence
146
- elif answer.lower().strip() == "no":
147
- score = 1 - prob # If no, invert the confidence
148
- else:
149
- score = 0.5 * prob # For unclear answers, reduce confidence
127
+ score = result["relevance_score"]
150
128
 
151
129
  ranked_results.append((batch[idx][1], score))
152
130
 
@@ -161,7 +139,9 @@ class BaseLLMReranker(BaseModel, ABC):
161
139
  )
162
140
 
163
141
  if can_stop_early:
164
- log.info(f"Early stopping after finding {self.num_docs_to_keep} documents with high confidence")
142
+ log.info(
143
+ f"Early stopping after finding {self.num_docs_to_keep} documents with high confidence"
144
+ )
165
145
  return ranked_results
166
146
  except Exception as e:
167
147
  # Don't let early stopping errors stop the whole process
@@ -172,114 +152,18 @@ class BaseLLMReranker(BaseModel, ABC):
172
152
  continue
173
153
  return ranked_results
174
154
 
175
- async def search_relevancy_score(self, query: str, document: str) -> Any:
176
- await self._init_client()
177
-
155
+ async def _backoff_wrapper(self, query: str, document: str, rerank_callback=None) -> Any:
178
156
  async with self._semaphore:
179
157
  for attempt in range(self.max_retries):
180
158
  try:
181
- response = await self.client.chat.completions.create(
182
- model=self.model,
183
- messages=[
184
- {"role": "system", "content": """
185
- You are an intelligent assistant that evaluates how relevant a given document chunk is to a user's search query.
186
- Your task is to analyze the similarity between the search query and the document chunk, and return **only the class label** that best represents the relevance:
187
-
188
- - "class_1": Not relevant (score between 0.0 and 0.25)
189
- - "class_2": Slightly relevant (score between 0.25 and 0.5)
190
- - "class_3": Moderately relevant (score between 0.5 and 0.75)
191
- - "class_4": Highly relevant (score between 0.75 and 1.0)
192
-
193
- Respond with only one of: "class_1", "class_2", "class_3", or "class_4".
194
-
195
- Examples:
196
-
197
- Search query: "How to reset a router to factory settings?"
198
- Document chunk: "Computers often come with customizable parental control settings."
199
- Score: class_1
200
-
201
- Search query: "Symptoms of vitamin D deficiency"
202
- Document chunk: "Vitamin D deficiency has been linked to fatigue, bone pain, and muscle weakness."
203
- Score: class_4
204
-
205
- Search query: "Best practices for onboarding remote employees"
206
- Document chunk: "An employee handbook can be useful for new hires, outlining company policies and benefits."
207
- Score: class_2
208
-
209
- Search query: "Benefits of mindfulness meditation"
210
- Document chunk: "Practicing mindfulness has shown to reduce stress and improve focus in multiple studies."
211
- Score: class_3
212
-
213
- Search query: "What is Kubernetes used for?"
214
- Document chunk: "Kubernetes is an open-source system for automating deployment, scaling, and management of containerized applications."
215
- Score: class_4
216
-
217
- Search query: "How to bake sourdough bread at home"
218
- Document chunk: "The French Revolution began in 1789 and radically transformed society."
219
- Score: class_1
220
-
221
- Search query: "Machine learning algorithms for image classification"
222
- Document chunk: "Convolutional Neural Networks (CNNs) are particularly effective in image classification tasks."
223
- Score: class_4
224
-
225
- Search query: "How to improve focus while working remotely"
226
- Document chunk: "Creating a dedicated workspace and setting a consistent schedule can significantly improve focus during remote work."
227
- Score: class_4
228
-
229
- Search query: "Carbon emissions from electric vehicles vs gas cars"
230
- Document chunk: "Electric vehicles produce zero emissions while driving, but battery production has environmental impacts."
231
- Score: class_3
232
-
233
- Search query: "Time zones in the United States"
234
- Document chunk: "The U.S. is divided into six primary time zones: Eastern, Central, Mountain, Pacific, Alaska, and Hawaii-Aleutian."
235
- Score: class_4
236
- """},
237
-
238
- {"role": "user", "content": f"""
239
- Now evaluate the following pair:
240
-
241
- Search query: {query}
242
- Document chunk: {document}
243
-
244
- Which class best represents the relevance?
245
- """}
246
- ],
247
- temperature=self.temperature,
248
- n=1,
249
- logprobs=True,
250
- top_logprobs=4,
251
- max_tokens=3
252
- )
253
-
254
- # Extract response and logprobs
255
- class_label = response.choices[0].message.content.strip()
256
- token_logprobs = response.choices[0].logprobs.content
257
- # Reconstruct the prediction and extract the top logprobs from the final token (e.g., "1")
258
- final_token_logprob = token_logprobs[-1]
259
- top_logprobs = final_token_logprob.top_logprobs
260
- # Create a map of 'class_1' -> probability, using token combinations
261
- class_probs = {}
262
- for top_token in top_logprobs:
263
- full_label = f"class_{top_token.token}"
264
- prob = math.exp(top_token.logprob)
265
- class_probs[full_label] = prob
266
- # Optional: normalize in case some are missing
267
- total_prob = sum(class_probs.values())
268
- class_probs = {k: v / total_prob for k, v in class_probs.items()}
269
- # Assign weights to classes
270
- class_weights = {
271
- "class_1": 0.25,
272
- "class_2": 0.5,
273
- "class_3": 0.75,
274
- "class_4": 1.0
275
- }
276
- # Compute the final smooth score
277
- relevance_score = sum(class_weights.get(class_label, 0) * prob for class_label, prob in class_probs.items())
278
- rerank_data = {
279
- "document": document,
280
- "answer": class_label,
281
- "relevance_score": relevance_score
282
- }
159
+ if self.method == "multi-class":
160
+ rerank_data = await self.search_relevancy_score(query, document)
161
+ elif self.method == "no-logprobs":
162
+ rerank_data = await self.search_relevancy_no_logprob(query, document)
163
+ else:
164
+ rerank_data = await self.search_relevancy(query, document)
165
+ if rerank_callback is not None:
166
+ rerank_callback(rerank_data)
283
167
  return rerank_data
284
168
 
285
169
  except Exception as e:
@@ -287,63 +171,185 @@ class BaseLLMReranker(BaseModel, ABC):
287
171
  log.error(f"Failed after {self.max_retries} attempts: {str(e)}")
288
172
  raise
289
173
  # Exponential backoff with jitter
290
- retry_delay = self.retry_delay * (2 ** attempt) + random.uniform(0, 0.1)
174
+ retry_delay = self.retry_delay * (2**attempt) + random.uniform(0, 0.1)
291
175
  await asyncio.sleep(retry_delay)
292
176
 
293
- async def _rank_score(self, query_document_pairs: List[Tuple[str, str]]) -> List[Tuple[str, float]]:
294
- ranked_results = []
177
+ async def search_relevancy(self, query: str, document: str) -> Any:
178
+ response = await self.client.chat.completions.create(
179
+ model=self.model,
180
+ messages=[
181
+ {
182
+ "role": "system",
183
+ "content": "Rate the relevance of the document to the query. Respond with 'yes' or 'no'.",
184
+ },
185
+ {"role": "user", "content": f"Query: {query}\nDocument: {document}\nIs this document relevant?"},
186
+ ],
187
+ temperature=self.temperature,
188
+ n=1,
189
+ logprobs=True,
190
+ max_tokens=1,
191
+ )
192
+
193
+ # Extract response and logprobs
194
+ answer = response.choices[0].message.content
195
+ logprob = response.choices[0].logprobs.content[0].logprob
196
+
197
+ # Convert answer to score using the model's confidence
198
+ if answer.lower().strip() == "yes":
199
+ score = logprob # If yes, use the model's confidence
200
+ elif answer.lower().strip() == "no":
201
+ score = 1 - logprob # If no, invert the confidence
202
+ else:
203
+ score = 0.5 * logprob # For unclear answers, reduce confidence
295
204
 
296
- # Process in larger batches for better throughput
297
- batch_size = min(self.max_concurrent_requests * 2, len(query_document_pairs))
298
- for i in range(0, len(query_document_pairs), batch_size):
299
- batch = query_document_pairs[i:i + batch_size]
300
- try:
301
- results = await asyncio.gather(
302
- *[self.search_relevancy_score(query=query, document=document) for (query, document) in batch],
303
- return_exceptions=True
304
- )
205
+ rerank_data = {
206
+ "document": document,
207
+ "relevance_score": score,
208
+ }
305
209
 
306
- for idx, result in enumerate(results):
307
- if isinstance(result, Exception):
308
- log.error(f"Error processing document {i+idx}: {str(result)}")
309
- ranked_results.append((batch[idx][1], 0.0))
310
- continue
210
+ return rerank_data
311
211
 
312
- score = result["relevance_score"]
313
- if score is not None:
314
- if score > 1.0:
315
- score = 1.0
316
- elif score < 0.0:
317
- score = 0.0
212
+ async def search_relevancy_no_logprob(self, query: str, document: str) -> Any:
213
+ prompt = dedent(
214
+ f"""
215
+ Score the relevance between search query and user message on scale between 0 and 100 per cents.
216
+ Consider semantic meaning, key concepts, and contextual relevance.
217
+ Return ONLY a numerical score between 0 and 100 per cents. No other text. Stop after sending a number
218
+ Search query: {query}
219
+ """
220
+ )
318
221
 
319
- ranked_results.append((batch[idx][1], score))
320
- # Check if we should stop early
321
- try:
322
- high_scoring_docs = [r for r in ranked_results if r[1] >= self.filtering_threshold]
323
- can_stop_early = (
324
- self.early_stop # Early stopping is enabled
325
- and self.num_docs_to_keep # We have a target number of docs
326
- and len(high_scoring_docs) >= self.num_docs_to_keep # Found enough good docs
327
- and score >= self.early_stop_threshold # Current doc is good enough
328
- )
222
+ response = await self._call_llm(
223
+ messages=[{"role": "system", "content": prompt}, {"role": "user", "content": document}],
224
+ )
329
225
 
330
- if can_stop_early:
331
- log.info(f"Early stopping after finding {self.num_docs_to_keep} documents with high confidence")
332
- return ranked_results
333
- except Exception as e:
334
- # Don't let early stopping errors stop the whole process
335
- log.warning(f"Error in early stopping check: {str(e)}")
226
+ answer = response.choices[0].message.content
336
227
 
337
- except Exception as e:
338
- log.error(f"Batch processing error: {str(e)}")
339
- continue
228
+ try:
229
+ value = re.findall(r"[\d]+", answer)[0]
230
+ score = float(value) / 100
231
+ score = max(0.0, min(score, 1.0))
232
+ except (ValueError, IndexError):
233
+ score = 0.0
340
234
 
341
- return ranked_results
235
+ rerank_data = {
236
+ "document": document,
237
+ "relevance_score": score,
238
+ }
239
+
240
+ return rerank_data
241
+
242
+ async def search_relevancy_score(self, query: str, document: str) -> Any:
243
+ response = await self.client.chat.completions.create(
244
+ model=self.model,
245
+ messages=[
246
+ {
247
+ "role": "system",
248
+ "content": """
249
+ You are an intelligent assistant that evaluates how relevant a given document chunk is to a user's search query.
250
+ Your task is to analyze the similarity between the search query and the document chunk, and return **only the class label** that best represents the relevance:
251
+
252
+ - "class_1": Not relevant (score between 0.0 and 0.25)
253
+ - "class_2": Slightly relevant (score between 0.25 and 0.5)
254
+ - "class_3": Moderately relevant (score between 0.5 and 0.75)
255
+ - "class_4": Highly relevant (score between 0.75 and 1.0)
256
+
257
+ Respond with only one of: "class_1", "class_2", "class_3", or "class_4".
258
+
259
+ Examples:
260
+
261
+ Search query: "How to reset a router to factory settings?"
262
+ Document chunk: "Computers often come with customizable parental control settings."
263
+ Score: class_1
264
+
265
+ Search query: "Symptoms of vitamin D deficiency"
266
+ Document chunk: "Vitamin D deficiency has been linked to fatigue, bone pain, and muscle weakness."
267
+ Score: class_4
268
+
269
+ Search query: "Best practices for onboarding remote employees"
270
+ Document chunk: "An employee handbook can be useful for new hires, outlining company policies and benefits."
271
+ Score: class_2
272
+
273
+ Search query: "Benefits of mindfulness meditation"
274
+ Document chunk: "Practicing mindfulness has shown to reduce stress and improve focus in multiple studies."
275
+ Score: class_3
276
+
277
+ Search query: "What is Kubernetes used for?"
278
+ Document chunk: "Kubernetes is an open-source system for automating deployment, scaling, and management of containerized applications."
279
+ Score: class_4
280
+
281
+ Search query: "How to bake sourdough bread at home"
282
+ Document chunk: "The French Revolution began in 1789 and radically transformed society."
283
+ Score: class_1
284
+
285
+ Search query: "Machine learning algorithms for image classification"
286
+ Document chunk: "Convolutional Neural Networks (CNNs) are particularly effective in image classification tasks."
287
+ Score: class_4
288
+
289
+ Search query: "How to improve focus while working remotely"
290
+ Document chunk: "Creating a dedicated workspace and setting a consistent schedule can significantly improve focus during remote work."
291
+ Score: class_4
292
+
293
+ Search query: "Carbon emissions from electric vehicles vs gas cars"
294
+ Document chunk: "Electric vehicles produce zero emissions while driving, but battery production has environmental impacts."
295
+ Score: class_3
296
+
297
+ Search query: "Time zones in the United States"
298
+ Document chunk: "The U.S. is divided into six primary time zones: Eastern, Central, Mountain, Pacific, Alaska, and Hawaii-Aleutian."
299
+ Score: class_4
300
+ """,
301
+ },
302
+ {
303
+ "role": "user",
304
+ "content": f"""
305
+ Now evaluate the following pair:
306
+
307
+ Search query: {query}
308
+ Document chunk: {document}
309
+
310
+ Which class best represents the relevance?
311
+ """,
312
+ },
313
+ ],
314
+ temperature=self.temperature,
315
+ n=1,
316
+ logprobs=True,
317
+ top_logprobs=4,
318
+ max_tokens=3,
319
+ )
320
+
321
+ # Extract response and logprobs
322
+ token_logprobs = response.choices[0].logprobs.content
323
+ # Reconstruct the prediction and extract the top logprobs from the final token (e.g., "1")
324
+ final_token_logprob = token_logprobs[-1]
325
+ top_logprobs = final_token_logprob.top_logprobs
326
+ # Create a map of 'class_1' -> probability, using token combinations
327
+ class_probs = {}
328
+ for top_token in top_logprobs:
329
+ full_label = f"class_{top_token.token}"
330
+ prob = math.exp(top_token.logprob)
331
+ class_probs[full_label] = prob
332
+ # Optional: normalize in case some are missing
333
+ total_prob = sum(class_probs.values())
334
+ class_probs = {k: v / total_prob for k, v in class_probs.items()}
335
+ # Assign weights to classes
336
+ class_weights = {"class_1": 0.25, "class_2": 0.5, "class_3": 0.75, "class_4": 1.0}
337
+ # Compute the final smooth score
338
+ score = sum(class_weights.get(class_label, 0) * prob for class_label, prob in class_probs.items())
339
+ if score is not None:
340
+ if score > 1.0:
341
+ score = 1.0
342
+ elif score < 0.0:
343
+ score = 0.0
344
+
345
+ rerank_data = {"document": document, "relevance_score": score}
346
+ return rerank_data
342
347
 
343
348
  def get_scores(self, query: str, documents: list[str]):
344
349
  query_document_pairs = [(query, doc) for doc in documents]
345
350
  # Create event loop and run async code
346
351
  import asyncio
352
+
347
353
  try:
348
354
  loop = asyncio.get_running_loop()
349
355
  except RuntimeError:
@@ -351,10 +357,7 @@ class BaseLLMReranker(BaseModel, ABC):
351
357
  loop = asyncio.new_event_loop()
352
358
  asyncio.set_event_loop(loop)
353
359
 
354
- if self.method == "multi-class": # default 'multi-class' method
355
- documents_and_scores = loop.run_until_complete(self._rank_score(query_document_pairs))
356
- else:
357
- documents_and_scores = loop.run_until_complete(self._rank(query_document_pairs))
360
+ documents_and_scores = loop.run_until_complete(self._rank(query_document_pairs))
358
361
 
359
362
  scores = [score for _, score in documents_and_scores]
360
363
  return scores
@@ -8,7 +8,7 @@ def format_exception_error(exception):
8
8
  exception_type, _exception_object, exception_traceback = sys.exc_info()
9
9
  filename = exception_traceback.tb_frame.f_code.co_filename
10
10
  line_number = exception_traceback.tb_lineno
11
- error_message = f'{exception_type.__name__}: {exception}, raised at: {filename}#{line_number}'
11
+ error_message = f"{exception_type.__name__}: {exception}, raised at: {filename}#{line_number}"
12
12
  except Exception:
13
13
  error_message = str(exception)
14
14
  return error_message
@@ -26,7 +26,7 @@ def dict_to_yaml(d, indent=0):
26
26
 
27
27
 
28
28
  # Mocks won't always have 'name' attribute.
29
- def get_class_name(instance: Any, default: str = 'unknown'):
30
- if hasattr(instance.__class__, 'name'):
29
+ def get_class_name(instance: Any, default: str = "unknown"):
30
+ if hasattr(instance.__class__, "name"):
31
31
  return instance.__class__.name
32
32
  return default