MindsDB 25.4.5.0__py3-none-any.whl → 25.5.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (350) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +215 -185
  3. mindsdb/api/a2a/__init__.py +0 -0
  4. mindsdb/api/a2a/__main__.py +114 -0
  5. mindsdb/api/a2a/a2a_client.py +439 -0
  6. mindsdb/api/a2a/agent.py +308 -0
  7. mindsdb/api/a2a/common/__init__.py +0 -0
  8. mindsdb/api/a2a/common/client/__init__.py +4 -0
  9. mindsdb/api/a2a/common/client/card_resolver.py +21 -0
  10. mindsdb/api/a2a/common/client/client.py +86 -0
  11. mindsdb/api/a2a/common/server/__init__.py +4 -0
  12. mindsdb/api/a2a/common/server/server.py +164 -0
  13. mindsdb/api/a2a/common/server/task_manager.py +287 -0
  14. mindsdb/api/a2a/common/server/utils.py +28 -0
  15. mindsdb/api/a2a/common/types.py +365 -0
  16. mindsdb/api/a2a/constants.py +9 -0
  17. mindsdb/api/a2a/run_a2a.py +129 -0
  18. mindsdb/api/a2a/task_manager.py +594 -0
  19. mindsdb/api/executor/command_executor.py +49 -28
  20. mindsdb/api/executor/datahub/classes/response.py +5 -2
  21. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +8 -0
  22. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +39 -72
  23. mindsdb/api/executor/datahub/datanodes/system_tables.py +10 -13
  24. mindsdb/api/executor/planner/query_planner.py +14 -2
  25. mindsdb/api/executor/sql_query/result_set.py +185 -52
  26. mindsdb/api/executor/sql_query/sql_query.py +1 -1
  27. mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +11 -13
  28. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +8 -10
  29. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +5 -44
  30. mindsdb/api/executor/sql_query/steps/insert_step.py +24 -15
  31. mindsdb/api/executor/sql_query/steps/join_step.py +1 -1
  32. mindsdb/api/executor/sql_query/steps/project_step.py +1 -1
  33. mindsdb/api/executor/sql_query/steps/sql_steps.py +1 -1
  34. mindsdb/api/executor/sql_query/steps/subselect_step.py +4 -8
  35. mindsdb/api/executor/sql_query/steps/union_step.py +1 -3
  36. mindsdb/api/http/initialize.py +118 -85
  37. mindsdb/api/http/namespaces/analysis.py +17 -4
  38. mindsdb/api/http/namespaces/file.py +8 -2
  39. mindsdb/api/http/namespaces/sql.py +13 -27
  40. mindsdb/api/http/namespaces/tree.py +1 -1
  41. mindsdb/api/http/start.py +7 -2
  42. mindsdb/api/mcp/start.py +42 -5
  43. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packet.py +0 -1
  44. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +52 -19
  45. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +8 -10
  46. mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +54 -38
  47. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +86 -123
  48. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +351 -0
  49. mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -4
  50. mindsdb/api/postgres/postgres_proxy/executor/executor.py +1 -1
  51. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_formats.py +2 -2
  52. mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +5 -6
  53. mindsdb/integrations/handlers/altibase_handler/altibase_handler.py +26 -27
  54. mindsdb/integrations/handlers/altibase_handler/connection_args.py +13 -13
  55. mindsdb/integrations/handlers/altibase_handler/tests/test_altibase_handler.py +8 -8
  56. mindsdb/integrations/handlers/altibase_handler/tests/test_altibase_handler_dsn.py +13 -13
  57. mindsdb/integrations/handlers/anthropic_handler/__init__.py +2 -2
  58. mindsdb/integrations/handlers/anthropic_handler/anthropic_handler.py +1 -3
  59. mindsdb/integrations/handlers/aurora_handler/aurora_handler.py +1 -0
  60. mindsdb/integrations/handlers/autosklearn_handler/autosklearn_handler.py +1 -1
  61. mindsdb/integrations/handlers/autosklearn_handler/config.py +0 -1
  62. mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +1 -1
  63. mindsdb/integrations/handlers/bigquery_handler/requirements.txt +1 -0
  64. mindsdb/integrations/handlers/bigquery_handler/tests/test_bigquery_handler.py +1 -1
  65. mindsdb/integrations/handlers/binance_handler/binance_handler.py +1 -0
  66. mindsdb/integrations/handlers/binance_handler/binance_tables.py +3 -4
  67. mindsdb/integrations/handlers/byom_handler/__init__.py +0 -1
  68. mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -0
  69. mindsdb/integrations/handlers/ckan_handler/ckan_handler.py +3 -0
  70. mindsdb/integrations/handlers/clickhouse_handler/__init__.py +1 -1
  71. mindsdb/integrations/handlers/cloud_spanner_handler/tests/test_cloud_spanner_handler.py +0 -2
  72. mindsdb/integrations/handlers/cloud_sql_handler/cloud_sql_handler.py +0 -1
  73. mindsdb/integrations/handlers/cohere_handler/__init__.py +1 -1
  74. mindsdb/integrations/handlers/cohere_handler/cohere_handler.py +11 -13
  75. mindsdb/integrations/handlers/confluence_handler/confluence_tables.py +6 -0
  76. mindsdb/integrations/handlers/databend_handler/connection_args.py +1 -1
  77. mindsdb/integrations/handlers/databend_handler/databend_handler.py +4 -4
  78. mindsdb/integrations/handlers/databend_handler/tests/__init__.py +0 -1
  79. mindsdb/integrations/handlers/databend_handler/tests/test_databend_handler.py +1 -1
  80. mindsdb/integrations/handlers/derby_handler/connection_args.py +1 -1
  81. mindsdb/integrations/handlers/derby_handler/derby_handler.py +14 -22
  82. mindsdb/integrations/handlers/derby_handler/tests/test_derby_handler.py +6 -6
  83. mindsdb/integrations/handlers/discord_handler/discord_handler.py +5 -5
  84. mindsdb/integrations/handlers/discord_handler/discord_tables.py +3 -3
  85. mindsdb/integrations/handlers/discord_handler/tests/test_discord.py +5 -3
  86. mindsdb/integrations/handlers/dockerhub_handler/dockerhub.py +3 -3
  87. mindsdb/integrations/handlers/dockerhub_handler/dockerhub_handler.py +2 -2
  88. mindsdb/integrations/handlers/dockerhub_handler/dockerhub_tables.py +57 -54
  89. mindsdb/integrations/handlers/dremio_handler/__init__.py +2 -2
  90. mindsdb/integrations/handlers/druid_handler/__init__.py +1 -1
  91. mindsdb/integrations/handlers/druid_handler/druid_handler.py +2 -2
  92. mindsdb/integrations/handlers/edgelessdb_handler/tests/test_edgelessdb_handler.py +9 -9
  93. mindsdb/integrations/handlers/email_handler/email_client.py +1 -1
  94. mindsdb/integrations/handlers/email_handler/email_ingestor.py +1 -1
  95. mindsdb/integrations/handlers/email_handler/email_tables.py +0 -1
  96. mindsdb/integrations/handlers/email_handler/settings.py +0 -1
  97. mindsdb/integrations/handlers/eventstoredb_handler/eventstoredb_handler.py +2 -1
  98. mindsdb/integrations/handlers/firebird_handler/firebird_handler.py +1 -1
  99. mindsdb/integrations/handlers/flaml_handler/flaml_handler.py +9 -9
  100. mindsdb/integrations/handlers/frappe_handler/frappe_client.py +5 -5
  101. mindsdb/integrations/handlers/frappe_handler/frappe_handler.py +6 -5
  102. mindsdb/integrations/handlers/frappe_handler/frappe_tables.py +2 -2
  103. mindsdb/integrations/handlers/github_handler/connection_args.py +2 -2
  104. mindsdb/integrations/handlers/github_handler/github_handler.py +1 -8
  105. mindsdb/integrations/handlers/github_handler/github_tables.py +13 -24
  106. mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py +2 -1
  107. mindsdb/integrations/handlers/gitlab_handler/gitlab_tables.py +1 -4
  108. mindsdb/integrations/handlers/gmail_handler/gmail_handler.py +6 -13
  109. mindsdb/integrations/handlers/gmail_handler/requirements.txt +1 -0
  110. mindsdb/integrations/handlers/google_analytics_handler/requirements.txt +2 -1
  111. mindsdb/integrations/handlers/google_books_handler/google_books_handler.py +2 -1
  112. mindsdb/integrations/handlers/google_books_handler/google_books_tables.py +0 -3
  113. mindsdb/integrations/handlers/google_books_handler/requirements.txt +1 -1
  114. mindsdb/integrations/handlers/google_calendar_handler/google_calendar_handler.py +4 -4
  115. mindsdb/integrations/handlers/google_calendar_handler/google_calendar_tables.py +2 -6
  116. mindsdb/integrations/handlers/google_calendar_handler/requirements.txt +1 -0
  117. mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_handler.py +3 -2
  118. mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_tables.py +0 -3
  119. mindsdb/integrations/handlers/google_content_shopping_handler/requirements.txt +1 -1
  120. mindsdb/integrations/handlers/google_fit_handler/google_fit_handler.py +10 -12
  121. mindsdb/integrations/handlers/google_fit_handler/google_fit_tables.py +11 -13
  122. mindsdb/integrations/handlers/google_fit_handler/requirements.txt +2 -0
  123. mindsdb/integrations/handlers/google_search_handler/google_search_handler.py +2 -1
  124. mindsdb/integrations/handlers/google_search_handler/google_search_tables.py +0 -3
  125. mindsdb/integrations/handlers/google_search_handler/requirements.txt +1 -1
  126. mindsdb/integrations/handlers/groq_handler/__init__.py +3 -3
  127. mindsdb/integrations/handlers/hackernews_handler/hn_handler.py +5 -7
  128. mindsdb/integrations/handlers/hackernews_handler/hn_table.py +6 -7
  129. mindsdb/integrations/handlers/hive_handler/tests/test_hive_handler.py +1 -1
  130. mindsdb/integrations/handlers/hsqldb_handler/connection_args.py +6 -6
  131. mindsdb/integrations/handlers/hsqldb_handler/hsqldb_handler.py +4 -3
  132. mindsdb/integrations/handlers/huggingface_api_handler/exceptions.py +1 -1
  133. mindsdb/integrations/handlers/huggingface_api_handler/huggingface_api_handler.py +1 -8
  134. mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +6 -6
  135. mindsdb/integrations/handlers/huggingface_handler/requirements.txt +1 -1
  136. mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +1 -1
  137. mindsdb/integrations/handlers/ignite_handler/ignite_handler.py +2 -1
  138. mindsdb/integrations/handlers/impala_handler/impala_handler.py +9 -12
  139. mindsdb/integrations/handlers/impala_handler/tests/test_impala_handler.py +11 -11
  140. mindsdb/integrations/handlers/influxdb_handler/influxdb_handler.py +10 -13
  141. mindsdb/integrations/handlers/influxdb_handler/influxdb_tables.py +20 -20
  142. mindsdb/integrations/handlers/informix_handler/__about__.py +8 -8
  143. mindsdb/integrations/handlers/informix_handler/__init__.py +12 -5
  144. mindsdb/integrations/handlers/informix_handler/informix_handler.py +99 -133
  145. mindsdb/integrations/handlers/informix_handler/tests/test_informix_handler.py +13 -11
  146. mindsdb/integrations/handlers/ingres_handler/__about__.py +0 -1
  147. mindsdb/integrations/handlers/ingres_handler/ingres_handler.py +1 -0
  148. mindsdb/integrations/handlers/jira_handler/jira_handler.archived.py +75 -0
  149. mindsdb/integrations/handlers/jira_handler/jira_handler.py +113 -38
  150. mindsdb/integrations/handlers/jira_handler/jira_tables.py +229 -0
  151. mindsdb/integrations/handlers/jira_handler/requirements.txt +1 -0
  152. mindsdb/integrations/handlers/kinetica_handler/__init__.py +0 -1
  153. mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +4 -4
  154. mindsdb/integrations/handlers/langchain_handler/tools.py +9 -10
  155. mindsdb/integrations/handlers/leonardoai_handler/__init__.py +1 -1
  156. mindsdb/integrations/handlers/lightfm_handler/requirements.txt +1 -0
  157. mindsdb/integrations/handlers/lightwood_handler/functions.py +2 -2
  158. mindsdb/integrations/handlers/lightwood_handler/lightwood_handler.py +0 -3
  159. mindsdb/integrations/handlers/lightwood_handler/requirements.txt +4 -4
  160. mindsdb/integrations/handlers/lightwood_handler/tests/test_lightwood_handler.py +11 -11
  161. mindsdb/integrations/handlers/lindorm_handler/requirements.txt +1 -0
  162. mindsdb/integrations/handlers/llama_index_handler/llama_index_handler.py +4 -4
  163. mindsdb/integrations/handlers/llama_index_handler/settings.py +10 -9
  164. mindsdb/integrations/handlers/materialize_handler/tests/test_materialize_handler.py +8 -10
  165. mindsdb/integrations/handlers/matrixone_handler/matrixone_handler.py +4 -4
  166. mindsdb/integrations/handlers/matrixone_handler/tests/test_matrixone_handler.py +8 -9
  167. mindsdb/integrations/handlers/maxdb_handler/connection_args.py +25 -25
  168. mindsdb/integrations/handlers/maxdb_handler/maxdb_handler.py +1 -0
  169. mindsdb/integrations/handlers/mediawiki_handler/mediawiki_handler.py +3 -2
  170. mindsdb/integrations/handlers/mediawiki_handler/mediawiki_tables.py +1 -1
  171. mindsdb/integrations/handlers/mendeley_handler/__about__.py +1 -1
  172. mindsdb/integrations/handlers/mendeley_handler/__init__.py +2 -2
  173. mindsdb/integrations/handlers/mendeley_handler/mendeley_handler.py +48 -56
  174. mindsdb/integrations/handlers/mendeley_handler/mendeley_tables.py +24 -29
  175. mindsdb/integrations/handlers/mendeley_handler/tests/test_mendeley_handler.py +19 -17
  176. mindsdb/integrations/handlers/merlion_handler/merlion_handler.py +5 -4
  177. mindsdb/integrations/handlers/minds_endpoint_handler/__init__.py +3 -3
  178. mindsdb/integrations/handlers/mlflow_handler/mlflow_handler.py +58 -36
  179. mindsdb/integrations/handlers/monetdb_handler/__about__.py +8 -8
  180. mindsdb/integrations/handlers/monetdb_handler/__init__.py +15 -5
  181. mindsdb/integrations/handlers/monetdb_handler/connection_args.py +17 -18
  182. mindsdb/integrations/handlers/monetdb_handler/monetdb_handler.py +40 -57
  183. mindsdb/integrations/handlers/monetdb_handler/tests/test_monetdb_handler.py +7 -8
  184. mindsdb/integrations/handlers/monetdb_handler/utils/monet_get_id.py +13 -14
  185. mindsdb/integrations/handlers/monkeylearn_handler/__about__.py +1 -1
  186. mindsdb/integrations/handlers/monkeylearn_handler/__init__.py +1 -1
  187. mindsdb/integrations/handlers/monkeylearn_handler/monkeylearn_handler.py +2 -5
  188. mindsdb/integrations/handlers/ms_one_drive_handler/ms_graph_api_one_drive_client.py +1 -0
  189. mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_handler.py +1 -1
  190. mindsdb/integrations/handlers/ms_one_drive_handler/requirements.txt +2 -0
  191. mindsdb/integrations/handlers/ms_teams_handler/ms_graph_api_teams_client.py +23 -23
  192. mindsdb/integrations/handlers/ms_teams_handler/ms_teams_handler.py +3 -3
  193. mindsdb/integrations/handlers/ms_teams_handler/ms_teams_tables.py +10 -5
  194. mindsdb/integrations/handlers/ms_teams_handler/requirements.txt +3 -1
  195. mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +73 -8
  196. mindsdb/integrations/handlers/mysql_handler/__about__.py +8 -8
  197. mindsdb/integrations/handlers/mysql_handler/__init__.py +15 -5
  198. mindsdb/integrations/handlers/mysql_handler/connection_args.py +43 -47
  199. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +101 -34
  200. mindsdb/integrations/handlers/mysql_handler/settings.py +15 -13
  201. mindsdb/integrations/handlers/neuralforecast_handler/neuralforecast_handler.py +1 -1
  202. mindsdb/integrations/handlers/newsapi_handler/newsapi_handler.py +1 -1
  203. mindsdb/integrations/handlers/newsapi_handler/tests/test_newsapi_handler.py +4 -4
  204. mindsdb/integrations/handlers/nuo_jdbc_handler/connection_args.py +2 -2
  205. mindsdb/integrations/handlers/nuo_jdbc_handler/nuo_jdbc_handler.py +28 -36
  206. mindsdb/integrations/handlers/nuo_jdbc_handler/tests/test_nuo_handler.py +5 -5
  207. mindsdb/integrations/handlers/oceanbase_handler/oceanbase_handler.py +0 -1
  208. mindsdb/integrations/handlers/oceanbase_handler/tests/test_oceanbase_handler.py +8 -10
  209. mindsdb/integrations/handlers/ollama_handler/ollama_handler.py +3 -3
  210. mindsdb/integrations/handlers/openai_handler/openai_handler.py +5 -4
  211. mindsdb/integrations/handlers/opengauss_handler/tests/test_opengauss_handler.py +1 -2
  212. mindsdb/integrations/handlers/openstreetmap_handler/__init__.py +7 -7
  213. mindsdb/integrations/handlers/oracle_handler/connection_args.py +6 -0
  214. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +77 -11
  215. mindsdb/integrations/handlers/orioledb_handler/tests/test_orioledb_handler.py +8 -10
  216. mindsdb/integrations/handlers/palm_handler/__about__.py +1 -1
  217. mindsdb/integrations/handlers/palm_handler/__init__.py +1 -1
  218. mindsdb/integrations/handlers/palm_handler/palm_handler.py +1 -3
  219. mindsdb/integrations/handlers/paypal_handler/paypal_handler.py +2 -2
  220. mindsdb/integrations/handlers/paypal_handler/paypal_tables.py +15 -14
  221. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +53 -10
  222. mindsdb/integrations/handlers/phoenix_handler/__init__.py +1 -1
  223. mindsdb/integrations/handlers/phoenix_handler/phoenix_handler.py +1 -0
  224. mindsdb/integrations/handlers/pinot_handler/__init__.py +1 -1
  225. mindsdb/integrations/handlers/pinot_handler/pinot_handler.py +3 -2
  226. mindsdb/integrations/handlers/plaid_handler/plaid_handler.py +13 -13
  227. mindsdb/integrations/handlers/plaid_handler/plaid_tables.py +10 -12
  228. mindsdb/integrations/handlers/plaid_handler/utils.py +4 -6
  229. mindsdb/integrations/handlers/planetscale_handler/planetscale_handler.py +1 -4
  230. mindsdb/integrations/handlers/portkey_handler/__init__.py +2 -2
  231. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +105 -24
  232. mindsdb/integrations/handlers/postgres_handler/tests/test_postgres_handler.py +11 -6
  233. mindsdb/integrations/handlers/questdb_handler/questdb_handler.py +1 -2
  234. mindsdb/integrations/handlers/questdb_handler/tests/test_questdb_handler.py +2 -3
  235. mindsdb/integrations/handlers/quickbooks_handler/quickbooks_handler.py +6 -8
  236. mindsdb/integrations/handlers/quickbooks_handler/quickbooks_table.py +10 -10
  237. mindsdb/integrations/handlers/rag_handler/ingest.py +2 -2
  238. mindsdb/integrations/handlers/rag_handler/rag_handler.py +1 -1
  239. mindsdb/integrations/handlers/rag_handler/settings.py +1 -1
  240. mindsdb/integrations/handlers/reddit_handler/reddit_handler.py +2 -7
  241. mindsdb/integrations/handlers/reddit_handler/reddit_tables.py +2 -3
  242. mindsdb/integrations/handlers/replicate_handler/replicate_handler.py +6 -6
  243. mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_handler.py +1 -2
  244. mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_tables.py +0 -3
  245. mindsdb/integrations/handlers/rockset_handler/connection_args.py +14 -14
  246. mindsdb/integrations/handlers/rockset_handler/tests/test_rockset_handler.py +1 -0
  247. mindsdb/integrations/handlers/scylla_handler/scylla_handler.py +6 -5
  248. mindsdb/integrations/handlers/sendinblue_handler/sendinblue_handler.py +2 -1
  249. mindsdb/integrations/handlers/sendinblue_handler/sendinblue_tables.py +16 -16
  250. mindsdb/integrations/handlers/sentence_transformers_handler/__init__.py +1 -1
  251. mindsdb/integrations/handlers/sheets_handler/connection_args.py +1 -1
  252. mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +7 -6
  253. mindsdb/integrations/handlers/shopify_handler/shopify_tables.py +38 -41
  254. mindsdb/integrations/handlers/singlestore_handler/__about__.py +1 -1
  255. mindsdb/integrations/handlers/singlestore_handler/__init__.py +0 -1
  256. mindsdb/integrations/handlers/singlestore_handler/singlestore_handler.py +1 -0
  257. mindsdb/integrations/handlers/singlestore_handler/tests/test_singlestore_handler.py +3 -3
  258. mindsdb/integrations/handlers/slack_handler/__init__.py +3 -3
  259. mindsdb/integrations/handlers/snowflake_handler/requirements.txt +1 -1
  260. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +100 -6
  261. mindsdb/integrations/handlers/solr_handler/connection_args.py +7 -7
  262. mindsdb/integrations/handlers/solr_handler/solr_handler.py +2 -1
  263. mindsdb/integrations/handlers/solr_handler/tests/test_solr_handler.py +2 -1
  264. mindsdb/integrations/handlers/sqlany_handler/sqlany_handler.py +3 -2
  265. mindsdb/integrations/handlers/sqlite_handler/sqlite_handler.py +1 -0
  266. mindsdb/integrations/handlers/sqreamdb_handler/connection_args.py +1 -1
  267. mindsdb/integrations/handlers/sqreamdb_handler/sqreamdb_handler.py +15 -20
  268. mindsdb/integrations/handlers/sqreamdb_handler/tests/test_sqreamdb_handler.py +4 -4
  269. mindsdb/integrations/handlers/stabilityai_handler/__init__.py +1 -1
  270. mindsdb/integrations/handlers/starrocks_handler/starrocks_handler.py +0 -1
  271. mindsdb/integrations/handlers/starrocks_handler/tests/test_starrocks_handler.py +8 -10
  272. mindsdb/integrations/handlers/statsforecast_handler/statsforecast_handler.py +2 -2
  273. mindsdb/integrations/handlers/strava_handler/strava_handler.py +4 -8
  274. mindsdb/integrations/handlers/strava_handler/strava_tables.py +22 -30
  275. mindsdb/integrations/handlers/stripe_handler/stripe_handler.py +3 -2
  276. mindsdb/integrations/handlers/stripe_handler/stripe_tables.py +11 -27
  277. mindsdb/integrations/handlers/supabase_handler/tests/test_supabase_handler.py +1 -1
  278. mindsdb/integrations/handlers/surrealdb_handler/surrealdb_handler.py +4 -4
  279. mindsdb/integrations/handlers/tdengine_handler/tdengine_handler.py +25 -27
  280. mindsdb/integrations/handlers/tdengine_handler/tests/test_tdengine_handler.py +8 -8
  281. mindsdb/integrations/handlers/tidb_handler/tests/test_tidb_handler.py +1 -2
  282. mindsdb/integrations/handlers/timegpt_handler/timegpt_handler.py +5 -5
  283. mindsdb/integrations/handlers/tpot_handler/tpot_handler.py +21 -26
  284. mindsdb/integrations/handlers/trino_handler/trino_handler.py +14 -14
  285. mindsdb/integrations/handlers/twitter_handler/twitter_handler.py +2 -4
  286. mindsdb/integrations/handlers/unify_handler/tests/test_unify_handler.py +7 -8
  287. mindsdb/integrations/handlers/unify_handler/unify_handler.py +9 -9
  288. mindsdb/integrations/handlers/vertex_handler/requirements.txt +1 -0
  289. mindsdb/integrations/handlers/vertex_handler/vertex_client.py +1 -1
  290. mindsdb/integrations/handlers/vertica_handler/tests/test_vertica_handler.py +11 -11
  291. mindsdb/integrations/handlers/vertica_handler/vertica_handler.py +11 -14
  292. mindsdb/integrations/handlers/vitess_handler/tests/test_vitess_handler.py +9 -11
  293. mindsdb/integrations/handlers/vitess_handler/vitess_handler.py +0 -1
  294. mindsdb/integrations/handlers/web_handler/web_handler.py +1 -0
  295. mindsdb/integrations/handlers/whatsapp_handler/__init__.py +3 -3
  296. mindsdb/integrations/handlers/writer_handler/evaluate.py +1 -1
  297. mindsdb/integrations/handlers/writer_handler/settings.py +0 -1
  298. mindsdb/integrations/handlers/writer_handler/writer_handler.py +1 -0
  299. mindsdb/integrations/handlers/youtube_handler/requirements.txt +1 -0
  300. mindsdb/integrations/handlers/youtube_handler/youtube_handler.py +5 -5
  301. mindsdb/integrations/handlers/youtube_handler/youtube_tables.py +26 -27
  302. mindsdb/integrations/handlers/yugabyte_handler/tests/test_yugabyte_handler.py +3 -3
  303. mindsdb/integrations/handlers/yugabyte_handler/yugabyte_handler.py +0 -6
  304. mindsdb/integrations/libs/response.py +67 -52
  305. mindsdb/integrations/libs/vectordatabase_handler.py +6 -0
  306. mindsdb/integrations/utilities/files/file_reader.py +5 -2
  307. mindsdb/integrations/utilities/handler_utils.py +15 -3
  308. mindsdb/integrations/utilities/handlers/api_utilities/__init__.py +0 -1
  309. mindsdb/integrations/utilities/handlers/auth_utilities/__init__.py +0 -2
  310. mindsdb/integrations/utilities/utils.py +3 -3
  311. mindsdb/interfaces/agents/agents_controller.py +164 -1
  312. mindsdb/interfaces/agents/constants.py +29 -2
  313. mindsdb/interfaces/agents/langchain_agent.py +18 -8
  314. mindsdb/interfaces/agents/mindsdb_database_agent.py +101 -2
  315. mindsdb/interfaces/database/projects.py +1 -7
  316. mindsdb/interfaces/functions/controller.py +11 -14
  317. mindsdb/interfaces/functions/to_markdown.py +9 -124
  318. mindsdb/interfaces/knowledge_base/controller.py +47 -19
  319. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +41 -15
  320. mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +434 -0
  321. mindsdb/interfaces/knowledge_base/preprocessing/models.py +54 -0
  322. mindsdb/interfaces/knowledge_base/utils.py +10 -15
  323. mindsdb/interfaces/model/model_controller.py +0 -2
  324. mindsdb/interfaces/query_context/context_controller.py +66 -10
  325. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +190 -0
  326. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +92 -0
  327. mindsdb/interfaces/skills/skill_tool.py +202 -57
  328. mindsdb/interfaces/skills/sql_agent.py +238 -28
  329. mindsdb/interfaces/storage/fs.py +1 -0
  330. mindsdb/interfaces/variables/__init__.py +0 -0
  331. mindsdb/interfaces/variables/variables_controller.py +97 -0
  332. mindsdb/migrations/env.py +5 -7
  333. mindsdb/migrations/migrate.py +47 -9
  334. mindsdb/migrations/versions/2025-05-21_9f150e4f9a05_checkpoint_1.py +360 -0
  335. mindsdb/utilities/config.py +333 -220
  336. mindsdb/utilities/context.py +1 -1
  337. mindsdb/utilities/functions.py +0 -36
  338. mindsdb/utilities/langfuse.py +19 -10
  339. mindsdb/utilities/otel/__init__.py +9 -193
  340. mindsdb/utilities/otel/metric_handlers/__init__.py +5 -1
  341. mindsdb/utilities/otel/prepare.py +198 -0
  342. mindsdb/utilities/sql.py +83 -0
  343. mindsdb/utilities/starters.py +13 -0
  344. {mindsdb-25.4.5.0.dist-info → mindsdb-25.5.4.0.dist-info}/METADATA +351 -338
  345. {mindsdb-25.4.5.0.dist-info → mindsdb-25.5.4.0.dist-info}/RECORD +348 -322
  346. {mindsdb-25.4.5.0.dist-info → mindsdb-25.5.4.0.dist-info}/WHEEL +1 -1
  347. mindsdb/api/mysql/mysql_proxy/classes/sql_statement_parser.py +0 -151
  348. mindsdb/integrations/handlers/monkeylearn_handler/requirements.txt +0 -1
  349. {mindsdb-25.4.5.0.dist-info → mindsdb-25.5.4.0.dist-info}/licenses/LICENSE +0 -0
  350. {mindsdb-25.4.5.0.dist-info → mindsdb-25.5.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,434 @@
1
+ from typing import List, Dict, Any, Optional
2
+ import json
3
+ import pandas as pd
4
+ import ast
5
+
6
+ from mindsdb.interfaces.knowledge_base.preprocessing.models import (
7
+ Document,
8
+ ProcessedChunk,
9
+ JSONChunkingConfig
10
+ )
11
+ from mindsdb.interfaces.knowledge_base.preprocessing.document_preprocessor import DocumentPreprocessor
12
+ from mindsdb.utilities import log
13
+
14
+ # Set up logger
15
+ logger = log.getLogger(__name__)
16
+
17
+
18
+ class JSONChunkingPreprocessor(DocumentPreprocessor):
19
+ """JSON chunking preprocessor for handling JSON data structures"""
20
+
21
+ def __init__(self, config: Optional[JSONChunkingConfig] = None):
22
+ """Initialize with JSON chunking configuration"""
23
+ super().__init__()
24
+ self.config = config or JSONChunkingConfig()
25
+ # No need for a text splitter here as we'll chunk by JSON structure
26
+
27
+ def process_documents(self, documents: List[Document]) -> List[ProcessedChunk]:
28
+ """Process JSON documents into chunks
29
+
30
+ Args:
31
+ documents: List of documents containing JSON content
32
+
33
+ Returns:
34
+ List of processed chunks
35
+ """
36
+ all_chunks = []
37
+
38
+ for doc in documents:
39
+ try:
40
+ # Parse document content into a Python object
41
+ json_data = self._parse_document_content(doc)
42
+ if json_data is None:
43
+ # Handle parsing failure
44
+ error_message = "Content is neither valid JSON nor a valid Python literal."
45
+ error_chunk = self._create_error_chunk(doc, error_message)
46
+ all_chunks.append(error_chunk)
47
+ continue # Skip to next document
48
+
49
+ # Process the JSON data based on its structure
50
+ chunks = self._process_json_data(json_data, doc)
51
+ all_chunks.extend(chunks)
52
+ except Exception as e:
53
+ logger.error(f"Error processing document {doc.id}: {e}")
54
+ error_chunk = self._create_error_chunk(doc, str(e))
55
+ all_chunks.append(error_chunk)
56
+
57
+ return all_chunks
58
+
59
+ def _parse_document_content(self, doc: Document) -> Optional[Any]:
60
+ """Parse document content into a Python object
61
+
62
+ Args:
63
+ doc: Document with content to parse
64
+
65
+ Returns:
66
+ Parsed content as a Python object or None if parsing failed
67
+ """
68
+ # If content is not a string, assume it's already a Python object
69
+ if not isinstance(doc.content, str):
70
+ return doc.content
71
+
72
+ # Try to parse as JSON first
73
+ try:
74
+ return json.loads(doc.content)
75
+ except json.JSONDecodeError:
76
+ # If JSON parsing fails, try as Python literal
77
+ try:
78
+ return ast.literal_eval(doc.content)
79
+ except (SyntaxError, ValueError) as e:
80
+ logger.error(f"Error parsing content for document {doc.id}: {e}")
81
+ # We'll create the error chunk in the main process_documents method
82
+ return None
83
+
84
+ def _process_json_data(self, json_data: Any, doc: Document) -> List[ProcessedChunk]:
85
+ """Process JSON data based on its structure
86
+
87
+ Args:
88
+ json_data: Parsed JSON data as a Python object
89
+ doc: Original document
90
+
91
+ Returns:
92
+ List of processed chunks
93
+ """
94
+ if isinstance(json_data, list):
95
+ # List of objects - chunk by object
96
+ return self._process_json_list(json_data, doc)
97
+ elif isinstance(json_data, dict):
98
+ # Single object - chunk according to config
99
+ if self.config.chunk_by_object:
100
+ return [self._create_chunk_from_dict(json_data, doc, 0, 1)]
101
+ else:
102
+ return self._process_json_dict(json_data, doc)
103
+ else:
104
+ # Primitive value - create a single chunk
105
+ return [self._create_chunk_from_primitive(json_data, doc)]
106
+
107
+ def _create_error_chunk(self, doc: Document, error_message: str) -> ProcessedChunk:
108
+ """Create a chunk containing error information
109
+
110
+ Args:
111
+ doc: Original document
112
+ error_message: Error message to include in the chunk
113
+
114
+ Returns:
115
+ ProcessedChunk with error information
116
+ """
117
+ return ProcessedChunk(
118
+ id=f"{doc.id}_error",
119
+ content=f"Error processing document: {error_message}",
120
+ metadata=self._prepare_chunk_metadata(doc.id, 0, doc.metadata)
121
+ )
122
+
123
+ def _process_json_list(self, json_list: List, doc: Document) -> List[ProcessedChunk]:
124
+ """Process a list of JSON objects into chunks"""
125
+ chunks = []
126
+ total_objects = len(json_list)
127
+
128
+ for i, item in enumerate(json_list):
129
+ if isinstance(item, dict):
130
+ chunk = self._create_chunk_from_dict(item, doc, i, total_objects)
131
+ chunks.append(chunk)
132
+ elif isinstance(item, list):
133
+ # Handle nested lists by converting to string representation
134
+ chunk = self._create_chunk_from_primitive(
135
+ json.dumps(item),
136
+ doc,
137
+ chunk_index=i,
138
+ total_chunks=total_objects
139
+ )
140
+ chunks.append(chunk)
141
+ else:
142
+ # Handle primitive values
143
+ chunk = self._create_chunk_from_primitive(
144
+ item,
145
+ doc,
146
+ chunk_index=i,
147
+ total_chunks=total_objects
148
+ )
149
+ chunks.append(chunk)
150
+
151
+ return chunks
152
+
153
+ def _process_json_dict(self, json_dict: Dict, doc: Document) -> List[ProcessedChunk]:
154
+ """Process a single JSON object into chunks by fields"""
155
+ chunks = []
156
+
157
+ # Ensure we're working with a dictionary
158
+ if isinstance(json_dict, str):
159
+ try:
160
+ json_dict = json.loads(json_dict)
161
+ except json.JSONDecodeError:
162
+ logger.error(f"Error parsing JSON string: {json_dict[:100]}...")
163
+ return [self._create_error_chunk(doc, "Invalid JSON string")]
164
+
165
+ # Filter fields based on include/exclude lists
166
+ fields_to_process = {}
167
+ for key, value in json_dict.items():
168
+ if self.config.include_fields and key not in self.config.include_fields:
169
+ continue
170
+ if key in self.config.exclude_fields:
171
+ continue
172
+ fields_to_process[key] = value
173
+
174
+ # Create a chunk for each field
175
+ total_fields = len(fields_to_process)
176
+ for i, (key, value) in enumerate(fields_to_process.items()):
177
+ field_content = self._format_field_content(key, value)
178
+
179
+ # Create chunk metadata
180
+ metadata = self._prepare_chunk_metadata(doc.id, i, doc.metadata)
181
+ metadata["field_name"] = key
182
+
183
+ # Extract fields to metadata for filtering
184
+ self._extract_fields_to_metadata(json_dict, metadata)
185
+
186
+ # Generate chunk ID
187
+ chunk_id = self._generate_chunk_id(
188
+ chunk_index=i,
189
+ total_chunks=total_fields,
190
+ start_char=0,
191
+ end_char=len(field_content),
192
+ provided_id=doc.id,
193
+ content_column=self.config.content_column
194
+ )
195
+
196
+ # Create and add the chunk
197
+ chunk = ProcessedChunk(
198
+ id=chunk_id,
199
+ content=field_content,
200
+ metadata=metadata
201
+ )
202
+ chunks.append(chunk)
203
+
204
+ return chunks
205
+
206
+ def _create_chunk_from_dict(self,
207
+ json_dict: Dict,
208
+ doc: Document,
209
+ chunk_index: int,
210
+ total_chunks: int) -> ProcessedChunk:
211
+ """Create a chunk from a JSON dictionary"""
212
+ # Ensure we're working with a dictionary
213
+ if isinstance(json_dict, str):
214
+ try:
215
+ json_dict = json.loads(json_dict)
216
+ except json.JSONDecodeError:
217
+ logger.error(f"Error parsing JSON string: {json_dict[:100]}...")
218
+ return self._create_error_chunk(doc, "Invalid JSON string")
219
+
220
+ # Format the content
221
+ if self.config.flatten_nested:
222
+ flattened = self._flatten_dict(json_dict, self.config.nested_delimiter)
223
+ filtered_dict = self._filter_fields(flattened)
224
+ content = self._dict_to_text(filtered_dict)
225
+ else:
226
+ filtered_dict = {k: v for k, v in json_dict.items()
227
+ if (not self.config.include_fields or k in self.config.include_fields)
228
+ and k not in self.config.exclude_fields}
229
+ content = json.dumps(filtered_dict, indent=2)
230
+
231
+ # Create metadata
232
+ metadata = self._prepare_chunk_metadata(doc.id, chunk_index, doc.metadata)
233
+
234
+ # Extract fields to metadata for filtering
235
+ self._extract_fields_to_metadata(json_dict, metadata)
236
+
237
+ # Generate chunk ID
238
+ chunk_id = self._generate_chunk_id(
239
+ chunk_index=chunk_index,
240
+ total_chunks=total_chunks,
241
+ start_char=0,
242
+ end_char=len(content),
243
+ provided_id=doc.id,
244
+ content_column=self.config.content_column
245
+ )
246
+
247
+ return ProcessedChunk(
248
+ id=chunk_id,
249
+ content=content,
250
+ metadata=metadata
251
+ )
252
+
253
+ def _filter_fields(self, flattened_dict: Dict) -> Dict:
254
+ """Filter fields based on include/exclude configuration"""
255
+ # If include_fields is specified, only keep those fields
256
+ if self.config.include_fields:
257
+ filtered_dict = {k: v for k, v in flattened_dict.items()
258
+ if any(k == field or k.startswith(field + self.config.nested_delimiter)
259
+ for field in self.config.include_fields)}
260
+ else:
261
+ filtered_dict = flattened_dict.copy()
262
+
263
+ # Apply exclude_fields
264
+ if self.config.exclude_fields:
265
+ for exclude_field in self.config.exclude_fields:
266
+ # Remove exact field match
267
+ if exclude_field in filtered_dict:
268
+ filtered_dict.pop(exclude_field)
269
+
270
+ # Remove any nested fields
271
+ nested_prefix = exclude_field + self.config.nested_delimiter
272
+ keys_to_remove = [k for k in filtered_dict if k.startswith(nested_prefix)]
273
+ for key in keys_to_remove:
274
+ filtered_dict.pop(key)
275
+
276
+ return filtered_dict
277
+
278
+ def _create_chunk_from_primitive(
279
+ self,
280
+ value: Any,
281
+ doc: Document,
282
+ chunk_index: int = 0,
283
+ total_chunks: int = 1
284
+ ) -> ProcessedChunk:
285
+ """Create a chunk from a primitive value"""
286
+ content = str(value)
287
+
288
+ # Create metadata
289
+ metadata = self._prepare_chunk_metadata(doc.id, chunk_index, doc.metadata)
290
+
291
+ # For primitive values, we don't have a JSON dictionary to extract fields from
292
+ # But we can add the value itself as a metadata field if configured
293
+ if self.config.extract_all_primitives:
294
+ metadata["field_value"] = value
295
+
296
+ # Generate chunk ID
297
+ chunk_id = self._generate_chunk_id(
298
+ chunk_index=chunk_index,
299
+ total_chunks=total_chunks,
300
+ start_char=0,
301
+ end_char=len(content),
302
+ provided_id=doc.id,
303
+ content_column=self.config.content_column
304
+ )
305
+
306
+ return ProcessedChunk(
307
+ id=chunk_id,
308
+ content=content,
309
+ metadata=metadata
310
+ )
311
+
312
+ def _flatten_dict(self, d: Dict, delimiter: str = '.', prefix: str = '') -> Dict:
313
+ """Flatten a nested dictionary structure"""
314
+ result = {}
315
+ for k, v in d.items():
316
+ new_key = f"{prefix}{delimiter}{k}" if prefix else k
317
+ if isinstance(v, dict):
318
+ result.update(self._flatten_dict(v, delimiter, new_key))
319
+ elif isinstance(v, list) and all(isinstance(item, dict) for item in v):
320
+ # Handle lists of dictionaries
321
+ for i, item in enumerate(v):
322
+ result.update(self._flatten_dict(item, delimiter, f"{new_key}[{i}]"))
323
+ else:
324
+ result[new_key] = v
325
+ return result
326
+
327
+ def _dict_to_text(self, d: Dict) -> str:
328
+ """Convert a dictionary to a human-readable text format"""
329
+ lines = []
330
+ for key, value in d.items():
331
+ if value is None:
332
+ continue
333
+ if isinstance(value, list):
334
+ if not value:
335
+ continue
336
+ if all(isinstance(item, dict) for item in value):
337
+ # Format list of dictionaries
338
+ lines.append(f"{key}:")
339
+ for i, item in enumerate(value):
340
+ lines.append(f" Item {i+1}:")
341
+ for k, v in item.items():
342
+ lines.append(f" {k}: {v}")
343
+ else:
344
+ # Format list of primitives
345
+ value_str = ", ".join(str(item) for item in value)
346
+ lines.append(f"{key}: {value_str}")
347
+ else:
348
+ lines.append(f"{key}: {value}")
349
+
350
+ return "\n".join(lines)
351
+
352
+ def _format_field_content(self, key: str, value: Any) -> str:
353
+ """Format a field's content for inclusion in a chunk"""
354
+ if isinstance(value, dict):
355
+ if self.config.flatten_nested:
356
+ flattened = self._flatten_dict(value, self.config.nested_delimiter, key)
357
+ return self._dict_to_text(flattened)
358
+ else:
359
+ return f"{key}: {json.dumps(value, indent=2)}"
360
+ elif isinstance(value, list):
361
+ if all(isinstance(item, dict) for item in value):
362
+ # Format list of dictionaries
363
+ lines = [f"{key}:"]
364
+ for i, item in enumerate(value):
365
+ lines.append(f" Item {i+1}:")
366
+ for k, v in item.items():
367
+ lines.append(f" {k}: {v}")
368
+ return "\n".join(lines)
369
+ else:
370
+ # Format list of primitives
371
+ value_str = ", ".join(str(item) for item in value if item is not None)
372
+ return f"{key}: {value_str}"
373
+ else:
374
+ return f"{key}: {value}"
375
+
376
+ def _extract_fields_to_metadata(self, json_dict: Dict, metadata: Dict) -> None:
377
+ """Extract specified fields from JSON to metadata for filtering"""
378
+ # Ensure we're working with a dictionary
379
+ if isinstance(json_dict, str):
380
+ try:
381
+ json_dict = json.loads(json_dict)
382
+ except json.JSONDecodeError:
383
+ logger.error(f"Error parsing JSON string: {json_dict[:100]}...")
384
+ return
385
+
386
+ # Always flatten the dictionary for metadata extraction
387
+ flattened = self._flatten_dict(json_dict, self.config.nested_delimiter)
388
+
389
+ # If extract_all_primitives is True, extract all primitive values
390
+ if self.config.extract_all_primitives:
391
+ for key, value in flattened.items():
392
+ if isinstance(value, (str, int, float, bool)) and value is not None:
393
+ metadata[f"field_{key}"] = value
394
+ return
395
+
396
+ # If metadata_fields is empty and extract_all_primitives is False,
397
+ # assume all fields should be extracted
398
+ if not self.config.metadata_fields:
399
+ # First try to extract top-level primitive fields
400
+ has_primitives = False
401
+ for key, value in json_dict.items():
402
+ if isinstance(value, (str, int, float, bool)) and value is not None:
403
+ metadata[f"field_{key}"] = value
404
+ has_primitives = True
405
+
406
+ # If no top-level primitives were found, extract all primitives from flattened dict
407
+ if not has_primitives:
408
+ for key, value in flattened.items():
409
+ if isinstance(value, (str, int, float, bool)) and value is not None:
410
+ metadata[f"field_{key}"] = value
411
+ else:
412
+ # Extract only the specified fields
413
+ for field in self.config.metadata_fields:
414
+ if field in flattened and flattened[field] is not None:
415
+ metadata[f"field_{field}"] = flattened[field]
416
+ else:
417
+ # Try to navigate the nested structure manually
418
+ parts = field.split(self.config.nested_delimiter)
419
+ current = json_dict
420
+ found = True
421
+
422
+ for part in parts:
423
+ if isinstance(current, dict) and part in current:
424
+ current = current[part]
425
+ else:
426
+ found = False
427
+ break
428
+
429
+ if found and current is not None:
430
+ metadata[f"field_{field}"] = current
431
+
432
+ def to_dataframe(self, chunks: List[ProcessedChunk]) -> pd.DataFrame:
433
+ """Convert processed chunks to dataframe format"""
434
+ return pd.DataFrame([chunk.model_dump() for chunk in chunks])
@@ -13,6 +13,7 @@ from mindsdb.integrations.utilities.rag.settings import LLMConfig
13
13
  class PreprocessorType(Enum):
14
14
  CONTEXTUAL = "contextual"
15
15
  TEXT_CHUNKING = "text_chunking"
16
+ JSON_CHUNKING = "json_chunking"
16
17
 
17
18
 
18
19
  class BasePreprocessingConfig(BaseModel):
@@ -62,6 +63,51 @@ class TextChunkingConfig(BaseModel):
62
63
  arbitrary_types_allowed = True
63
64
 
64
65
 
66
+ class JSONChunkingConfig(BasePreprocessingConfig):
67
+ """Configuration for JSON chunking preprocessor"""
68
+ flatten_nested: bool = Field(
69
+ default=True,
70
+ description="Whether to flatten nested JSON structures"
71
+ )
72
+ include_metadata: bool = Field(
73
+ default=True,
74
+ description="Whether to include original metadata in chunks"
75
+ )
76
+ chunk_by_object: bool = Field(
77
+ default=True,
78
+ description="Whether to chunk by top-level objects (True) or create a single document (False)"
79
+ )
80
+ exclude_fields: List[str] = Field(
81
+ default_factory=list,
82
+ description="List of fields to exclude from chunking"
83
+ )
84
+ include_fields: List[str] = Field(
85
+ default_factory=list,
86
+ description="List of fields to include in chunking (if empty, all fields except excluded ones are included)"
87
+ )
88
+ metadata_fields: List[str] = Field(
89
+ default_factory=list,
90
+ description="List of fields to extract into metadata for filtering "
91
+ "(can include nested fields using dot notation). "
92
+ "If empty, all primitive fields will be extracted (top-level fields if available, otherwise all primitive fields in the flattened structure)."
93
+ )
94
+ extract_all_primitives: bool = Field(
95
+ default=False,
96
+ description="Whether to extract all primitive values (strings, numbers, booleans) into metadata"
97
+ )
98
+ nested_delimiter: str = Field(
99
+ default=".",
100
+ description="Delimiter for flattened nested field names"
101
+ )
102
+ content_column: str = Field(
103
+ default="content",
104
+ description="Name of the content column for chunk ID generation"
105
+ )
106
+
107
+ class Config:
108
+ arbitrary_types_allowed = True
109
+
110
+
65
111
  class PreprocessingConfig(BaseModel):
66
112
  """Complete preprocessing configuration"""
67
113
  type: PreprocessorType = Field(
@@ -76,6 +122,10 @@ class PreprocessingConfig(BaseModel):
76
122
  default=None,
77
123
  description="Configuration for text chunking preprocessing"
78
124
  )
125
+ json_chunking_config: Optional[JSONChunkingConfig] = Field(
126
+ default=None,
127
+ description="Configuration for JSON chunking preprocessing"
128
+ )
79
129
 
80
130
  @model_validator(mode='after')
81
131
  def validate_config_presence(self) -> 'PreprocessingConfig':
@@ -84,6 +134,10 @@ class PreprocessingConfig(BaseModel):
84
134
  self.contextual_config = ContextualConfig()
85
135
  if self.type == PreprocessorType.TEXT_CHUNKING and not self.text_chunking_config:
86
136
  self.text_chunking_config = TextChunkingConfig()
137
+ if self.type == PreprocessorType.JSON_CHUNKING and not self.json_chunking_config:
138
+ # Import here to avoid circular imports
139
+ from mindsdb.interfaces.knowledge_base.preprocessing.json_chunker import JSONChunkingConfig
140
+ self.json_chunking_config = JSONChunkingConfig()
87
141
  return self
88
142
 
89
143
 
@@ -2,27 +2,22 @@
2
2
  import hashlib
3
3
 
4
4
 
5
- def generate_document_id(content: str, content_column: str, provided_id: str = None) -> str:
5
+ def generate_document_id(content: str, content_column: str = None, provided_id: str = None) -> str:
6
6
  """
7
- Generate a deterministic document ID from content and column name.
8
- If provided_id exists, combines it with content_column.
9
- For generated IDs, uses a short hash of just the content to ensure
10
- same content gets same base ID across different columns.
7
+ Generate a deterministic document ID from content.
8
+ If provided_id exists, returns it directly.
9
+ For generated IDs, uses a short hash of just the content.
11
10
 
12
11
  Args:
13
12
  content: The content string
14
- content_column: Name of the content column
13
+ content_column: Name of the content column (not used in ID generation, kept for backward compatibility)
15
14
  provided_id: Optional user-provided ID
16
15
  Returns:
17
- Deterministic document ID in format: <base_id>_<column>
18
- where base_id is either the provided_id or a 16-char hash of content
16
+ Deterministic document ID (either provided_id or a 16-char hash of content)
19
17
  """
20
18
  if provided_id is not None:
21
- base_id = provided_id
22
- else:
23
- # Generate a shorter 16-character hash based only on content
24
- hash_obj = hashlib.md5(content.encode())
25
- base_id = hash_obj.hexdigest()[:16]
19
+ return provided_id
26
20
 
27
- # Append column name to maintain uniqueness across columns
28
- return f"{base_id}_{content_column}"
21
+ # Generate a shorter 16-character hash based only on content
22
+ hash_obj = hashlib.md5(content.encode())
23
+ return hash_obj.hexdigest()[:16]
@@ -1,4 +1,3 @@
1
- import sys
2
1
  import copy
3
2
  import datetime as dt
4
3
  from copy import deepcopy
@@ -28,7 +27,6 @@ from mindsdb.utilities import log
28
27
 
29
28
  logger = log.getLogger(__name__)
30
29
 
31
- IS_PY36 = sys.version_info[1] <= 6
32
30
 
33
31
  default_project = config.get('default_project')
34
32