MindsDB 25.5.3.0__py3-none-any.whl → 25.5.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +8 -8
- mindsdb/__main__.py +127 -79
- mindsdb/api/a2a/__init__.py +0 -0
- mindsdb/api/a2a/__main__.py +144 -0
- mindsdb/api/a2a/agent.py +308 -0
- mindsdb/api/a2a/common/__init__.py +0 -0
- mindsdb/api/a2a/common/server/__init__.py +4 -0
- mindsdb/api/a2a/common/server/server.py +164 -0
- mindsdb/api/a2a/common/server/task_manager.py +287 -0
- mindsdb/api/a2a/common/server/utils.py +28 -0
- mindsdb/api/a2a/common/types.py +365 -0
- mindsdb/api/a2a/constants.py +9 -0
- mindsdb/api/a2a/run_a2a.py +86 -0
- mindsdb/api/a2a/task_manager.py +560 -0
- mindsdb/api/executor/command_executor.py +185 -309
- mindsdb/api/executor/datahub/classes/response.py +5 -2
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +39 -72
- mindsdb/api/executor/planner/query_planner.py +10 -1
- mindsdb/api/executor/sql_query/result_set.py +185 -52
- mindsdb/api/executor/sql_query/sql_query.py +1 -1
- mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +9 -12
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +8 -10
- mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +5 -44
- mindsdb/api/executor/sql_query/steps/insert_step.py +24 -15
- mindsdb/api/executor/sql_query/steps/join_step.py +1 -1
- mindsdb/api/executor/sql_query/steps/project_step.py +1 -1
- mindsdb/api/executor/sql_query/steps/sql_steps.py +1 -1
- mindsdb/api/executor/sql_query/steps/subselect_step.py +4 -8
- mindsdb/api/executor/sql_query/steps/union_step.py +1 -3
- mindsdb/api/http/initialize.py +99 -83
- mindsdb/api/http/namespaces/analysis.py +3 -3
- mindsdb/api/http/namespaces/config.py +61 -86
- mindsdb/api/http/namespaces/file.py +8 -2
- mindsdb/api/http/namespaces/sql.py +13 -27
- mindsdb/api/mcp/start.py +42 -5
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_packet.py +0 -1
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +52 -19
- mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +8 -10
- mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +54 -38
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +82 -115
- mindsdb/api/mysql/mysql_proxy/utilities/dump.py +351 -0
- mindsdb/api/postgres/postgres_proxy/executor/executor.py +1 -1
- mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +5 -6
- mindsdb/integrations/handlers/altibase_handler/altibase_handler.py +26 -27
- mindsdb/integrations/handlers/altibase_handler/connection_args.py +13 -13
- mindsdb/integrations/handlers/altibase_handler/tests/test_altibase_handler.py +8 -8
- mindsdb/integrations/handlers/altibase_handler/tests/test_altibase_handler_dsn.py +13 -13
- mindsdb/integrations/handlers/anthropic_handler/__init__.py +2 -2
- mindsdb/integrations/handlers/anthropic_handler/anthropic_handler.py +1 -3
- mindsdb/integrations/handlers/aurora_handler/aurora_handler.py +1 -0
- mindsdb/integrations/handlers/autosklearn_handler/autosklearn_handler.py +1 -1
- mindsdb/integrations/handlers/autosklearn_handler/config.py +0 -1
- mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +1 -1
- mindsdb/integrations/handlers/bigquery_handler/tests/test_bigquery_handler.py +1 -1
- mindsdb/integrations/handlers/binance_handler/binance_handler.py +1 -0
- mindsdb/integrations/handlers/binance_handler/binance_tables.py +3 -4
- mindsdb/integrations/handlers/byom_handler/__init__.py +0 -1
- mindsdb/integrations/handlers/byom_handler/requirements.txt +1 -2
- mindsdb/integrations/handlers/ckan_handler/ckan_handler.py +3 -0
- mindsdb/integrations/handlers/clickhouse_handler/__init__.py +1 -1
- mindsdb/integrations/handlers/cloud_spanner_handler/tests/test_cloud_spanner_handler.py +0 -2
- mindsdb/integrations/handlers/cloud_sql_handler/cloud_sql_handler.py +0 -1
- mindsdb/integrations/handlers/cohere_handler/__init__.py +1 -1
- mindsdb/integrations/handlers/cohere_handler/cohere_handler.py +11 -13
- mindsdb/integrations/handlers/confluence_handler/confluence_tables.py +6 -0
- mindsdb/integrations/handlers/databend_handler/connection_args.py +1 -1
- mindsdb/integrations/handlers/databend_handler/databend_handler.py +4 -4
- mindsdb/integrations/handlers/databend_handler/tests/__init__.py +0 -1
- mindsdb/integrations/handlers/databend_handler/tests/test_databend_handler.py +1 -1
- mindsdb/integrations/handlers/derby_handler/connection_args.py +1 -1
- mindsdb/integrations/handlers/derby_handler/derby_handler.py +14 -22
- mindsdb/integrations/handlers/derby_handler/tests/test_derby_handler.py +6 -6
- mindsdb/integrations/handlers/discord_handler/discord_handler.py +5 -5
- mindsdb/integrations/handlers/discord_handler/discord_tables.py +3 -3
- mindsdb/integrations/handlers/discord_handler/tests/test_discord.py +5 -3
- mindsdb/integrations/handlers/dockerhub_handler/dockerhub.py +3 -3
- mindsdb/integrations/handlers/dockerhub_handler/dockerhub_handler.py +2 -2
- mindsdb/integrations/handlers/dockerhub_handler/dockerhub_tables.py +57 -54
- mindsdb/integrations/handlers/dremio_handler/__init__.py +2 -2
- mindsdb/integrations/handlers/druid_handler/__init__.py +1 -1
- mindsdb/integrations/handlers/druid_handler/druid_handler.py +2 -2
- mindsdb/integrations/handlers/edgelessdb_handler/tests/test_edgelessdb_handler.py +9 -9
- mindsdb/integrations/handlers/email_handler/email_client.py +1 -1
- mindsdb/integrations/handlers/email_handler/email_ingestor.py +1 -1
- mindsdb/integrations/handlers/email_handler/email_tables.py +0 -1
- mindsdb/integrations/handlers/email_handler/settings.py +0 -1
- mindsdb/integrations/handlers/eventstoredb_handler/eventstoredb_handler.py +2 -1
- mindsdb/integrations/handlers/firebird_handler/firebird_handler.py +1 -1
- mindsdb/integrations/handlers/flaml_handler/flaml_handler.py +9 -9
- mindsdb/integrations/handlers/frappe_handler/frappe_client.py +5 -5
- mindsdb/integrations/handlers/frappe_handler/frappe_handler.py +6 -5
- mindsdb/integrations/handlers/frappe_handler/frappe_tables.py +2 -2
- mindsdb/integrations/handlers/github_handler/connection_args.py +2 -2
- mindsdb/integrations/handlers/github_handler/github_handler.py +1 -8
- mindsdb/integrations/handlers/github_handler/github_tables.py +13 -24
- mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py +2 -1
- mindsdb/integrations/handlers/gitlab_handler/gitlab_tables.py +1 -4
- mindsdb/integrations/handlers/gmail_handler/gmail_handler.py +6 -13
- mindsdb/integrations/handlers/google_books_handler/google_books_handler.py +2 -1
- mindsdb/integrations/handlers/google_books_handler/google_books_tables.py +0 -3
- mindsdb/integrations/handlers/google_calendar_handler/google_calendar_handler.py +4 -4
- mindsdb/integrations/handlers/google_calendar_handler/google_calendar_tables.py +2 -6
- mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_handler.py +3 -2
- mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_tables.py +0 -3
- mindsdb/integrations/handlers/google_fit_handler/google_fit_handler.py +10 -12
- mindsdb/integrations/handlers/google_fit_handler/google_fit_tables.py +11 -13
- mindsdb/integrations/handlers/google_search_handler/google_search_handler.py +2 -1
- mindsdb/integrations/handlers/google_search_handler/google_search_tables.py +0 -3
- mindsdb/integrations/handlers/groq_handler/__init__.py +3 -3
- mindsdb/integrations/handlers/hackernews_handler/hn_handler.py +5 -7
- mindsdb/integrations/handlers/hackernews_handler/hn_table.py +6 -7
- mindsdb/integrations/handlers/hive_handler/tests/test_hive_handler.py +1 -1
- mindsdb/integrations/handlers/hsqldb_handler/connection_args.py +6 -6
- mindsdb/integrations/handlers/hsqldb_handler/hsqldb_handler.py +4 -3
- mindsdb/integrations/handlers/huggingface_api_handler/exceptions.py +1 -1
- mindsdb/integrations/handlers/huggingface_api_handler/huggingface_api_handler.py +1 -8
- mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +6 -6
- mindsdb/integrations/handlers/huggingface_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +1 -1
- mindsdb/integrations/handlers/ignite_handler/ignite_handler.py +2 -1
- mindsdb/integrations/handlers/impala_handler/impala_handler.py +9 -12
- mindsdb/integrations/handlers/impala_handler/tests/test_impala_handler.py +11 -11
- mindsdb/integrations/handlers/influxdb_handler/influxdb_handler.py +10 -13
- mindsdb/integrations/handlers/influxdb_handler/influxdb_tables.py +20 -20
- mindsdb/integrations/handlers/informix_handler/__about__.py +8 -8
- mindsdb/integrations/handlers/informix_handler/__init__.py +12 -5
- mindsdb/integrations/handlers/informix_handler/informix_handler.py +99 -133
- mindsdb/integrations/handlers/informix_handler/tests/test_informix_handler.py +13 -11
- mindsdb/integrations/handlers/ingres_handler/__about__.py +0 -1
- mindsdb/integrations/handlers/ingres_handler/ingres_handler.py +1 -0
- mindsdb/integrations/handlers/jira_handler/jira_handler.py +4 -4
- mindsdb/integrations/handlers/jira_handler/jira_tables.py +9 -9
- mindsdb/integrations/handlers/kinetica_handler/__init__.py +0 -1
- mindsdb/integrations/handlers/lancedb_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +4 -4
- mindsdb/integrations/handlers/langchain_handler/tools.py +9 -10
- mindsdb/integrations/handlers/leonardoai_handler/__init__.py +1 -1
- mindsdb/integrations/handlers/lightwood_handler/functions.py +2 -2
- mindsdb/integrations/handlers/lightwood_handler/lightwood_handler.py +0 -1
- mindsdb/integrations/handlers/lightwood_handler/tests/test_lightwood_handler.py +11 -11
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +37 -20
- mindsdb/integrations/handlers/llama_index_handler/llama_index_handler.py +4 -4
- mindsdb/integrations/handlers/llama_index_handler/settings.py +10 -9
- mindsdb/integrations/handlers/materialize_handler/tests/test_materialize_handler.py +8 -10
- mindsdb/integrations/handlers/matrixone_handler/matrixone_handler.py +4 -4
- mindsdb/integrations/handlers/matrixone_handler/tests/test_matrixone_handler.py +8 -9
- mindsdb/integrations/handlers/maxdb_handler/connection_args.py +25 -25
- mindsdb/integrations/handlers/maxdb_handler/maxdb_handler.py +1 -0
- mindsdb/integrations/handlers/mediawiki_handler/mediawiki_handler.py +3 -2
- mindsdb/integrations/handlers/mediawiki_handler/mediawiki_tables.py +1 -1
- mindsdb/integrations/handlers/mendeley_handler/__about__.py +1 -1
- mindsdb/integrations/handlers/mendeley_handler/__init__.py +2 -2
- mindsdb/integrations/handlers/mendeley_handler/mendeley_handler.py +48 -56
- mindsdb/integrations/handlers/mendeley_handler/mendeley_tables.py +24 -29
- mindsdb/integrations/handlers/mendeley_handler/tests/test_mendeley_handler.py +19 -17
- mindsdb/integrations/handlers/merlion_handler/merlion_handler.py +5 -4
- mindsdb/integrations/handlers/minds_endpoint_handler/__init__.py +3 -3
- mindsdb/integrations/handlers/mlflow_handler/mlflow_handler.py +58 -36
- mindsdb/integrations/handlers/monetdb_handler/__about__.py +8 -8
- mindsdb/integrations/handlers/monetdb_handler/__init__.py +15 -5
- mindsdb/integrations/handlers/monetdb_handler/connection_args.py +17 -18
- mindsdb/integrations/handlers/monetdb_handler/monetdb_handler.py +40 -57
- mindsdb/integrations/handlers/monetdb_handler/tests/test_monetdb_handler.py +7 -8
- mindsdb/integrations/handlers/monetdb_handler/utils/monet_get_id.py +13 -14
- mindsdb/integrations/handlers/monkeylearn_handler/__about__.py +1 -1
- mindsdb/integrations/handlers/monkeylearn_handler/__init__.py +1 -1
- mindsdb/integrations/handlers/monkeylearn_handler/monkeylearn_handler.py +2 -5
- mindsdb/integrations/handlers/ms_one_drive_handler/ms_graph_api_one_drive_client.py +1 -0
- mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_handler.py +1 -1
- mindsdb/integrations/handlers/ms_teams_handler/ms_graph_api_teams_client.py +23 -23
- mindsdb/integrations/handlers/ms_teams_handler/ms_teams_handler.py +3 -3
- mindsdb/integrations/handlers/ms_teams_handler/ms_teams_tables.py +10 -5
- mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +73 -8
- mindsdb/integrations/handlers/mysql_handler/__about__.py +8 -8
- mindsdb/integrations/handlers/mysql_handler/__init__.py +15 -5
- mindsdb/integrations/handlers/mysql_handler/connection_args.py +43 -47
- mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +101 -34
- mindsdb/integrations/handlers/mysql_handler/settings.py +15 -13
- mindsdb/integrations/handlers/neuralforecast_handler/neuralforecast_handler.py +1 -1
- mindsdb/integrations/handlers/newsapi_handler/newsapi_handler.py +1 -1
- mindsdb/integrations/handlers/newsapi_handler/tests/test_newsapi_handler.py +4 -4
- mindsdb/integrations/handlers/nuo_jdbc_handler/connection_args.py +2 -2
- mindsdb/integrations/handlers/nuo_jdbc_handler/nuo_jdbc_handler.py +28 -36
- mindsdb/integrations/handlers/nuo_jdbc_handler/tests/test_nuo_handler.py +5 -5
- mindsdb/integrations/handlers/oceanbase_handler/oceanbase_handler.py +0 -1
- mindsdb/integrations/handlers/oceanbase_handler/tests/test_oceanbase_handler.py +8 -10
- mindsdb/integrations/handlers/ollama_handler/ollama_handler.py +3 -3
- mindsdb/integrations/handlers/opengauss_handler/tests/test_opengauss_handler.py +1 -2
- mindsdb/integrations/handlers/openstreetmap_handler/__init__.py +7 -7
- mindsdb/integrations/handlers/oracle_handler/connection_args.py +6 -0
- mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +77 -11
- mindsdb/integrations/handlers/orioledb_handler/tests/test_orioledb_handler.py +8 -10
- mindsdb/integrations/handlers/palm_handler/__about__.py +1 -1
- mindsdb/integrations/handlers/palm_handler/__init__.py +1 -1
- mindsdb/integrations/handlers/palm_handler/palm_handler.py +1 -3
- mindsdb/integrations/handlers/paypal_handler/paypal_handler.py +2 -2
- mindsdb/integrations/handlers/paypal_handler/paypal_tables.py +15 -14
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +53 -10
- mindsdb/integrations/handlers/phoenix_handler/__init__.py +1 -1
- mindsdb/integrations/handlers/phoenix_handler/phoenix_handler.py +1 -0
- mindsdb/integrations/handlers/pinot_handler/__init__.py +1 -1
- mindsdb/integrations/handlers/pinot_handler/pinot_handler.py +3 -2
- mindsdb/integrations/handlers/plaid_handler/plaid_handler.py +13 -13
- mindsdb/integrations/handlers/plaid_handler/plaid_tables.py +10 -12
- mindsdb/integrations/handlers/plaid_handler/utils.py +4 -6
- mindsdb/integrations/handlers/planetscale_handler/planetscale_handler.py +1 -4
- mindsdb/integrations/handlers/portkey_handler/__init__.py +2 -2
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +105 -24
- mindsdb/integrations/handlers/postgres_handler/tests/test_postgres_handler.py +11 -6
- mindsdb/integrations/handlers/questdb_handler/questdb_handler.py +1 -2
- mindsdb/integrations/handlers/questdb_handler/tests/test_questdb_handler.py +2 -3
- mindsdb/integrations/handlers/quickbooks_handler/quickbooks_handler.py +6 -8
- mindsdb/integrations/handlers/quickbooks_handler/quickbooks_table.py +10 -10
- mindsdb/integrations/handlers/rag_handler/ingest.py +2 -2
- mindsdb/integrations/handlers/rag_handler/rag_handler.py +1 -1
- mindsdb/integrations/handlers/rag_handler/settings.py +1 -1
- mindsdb/integrations/handlers/reddit_handler/reddit_handler.py +2 -7
- mindsdb/integrations/handlers/reddit_handler/reddit_tables.py +2 -3
- mindsdb/integrations/handlers/replicate_handler/replicate_handler.py +6 -6
- mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_handler.py +1 -2
- mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_tables.py +0 -3
- mindsdb/integrations/handlers/rockset_handler/connection_args.py +14 -14
- mindsdb/integrations/handlers/rockset_handler/tests/test_rockset_handler.py +1 -0
- mindsdb/integrations/handlers/scylla_handler/scylla_handler.py +6 -5
- mindsdb/integrations/handlers/sendinblue_handler/sendinblue_handler.py +2 -1
- mindsdb/integrations/handlers/sendinblue_handler/sendinblue_tables.py +16 -16
- mindsdb/integrations/handlers/sentence_transformers_handler/__init__.py +1 -1
- mindsdb/integrations/handlers/sheets_handler/connection_args.py +1 -1
- mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +7 -6
- mindsdb/integrations/handlers/shopify_handler/shopify_tables.py +38 -41
- mindsdb/integrations/handlers/singlestore_handler/__about__.py +1 -1
- mindsdb/integrations/handlers/singlestore_handler/__init__.py +0 -1
- mindsdb/integrations/handlers/singlestore_handler/singlestore_handler.py +1 -0
- mindsdb/integrations/handlers/singlestore_handler/tests/test_singlestore_handler.py +3 -3
- mindsdb/integrations/handlers/slack_handler/__init__.py +3 -3
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +100 -6
- mindsdb/integrations/handlers/solr_handler/connection_args.py +7 -7
- mindsdb/integrations/handlers/solr_handler/solr_handler.py +2 -1
- mindsdb/integrations/handlers/solr_handler/tests/test_solr_handler.py +2 -1
- mindsdb/integrations/handlers/sqlany_handler/sqlany_handler.py +3 -2
- mindsdb/integrations/handlers/sqlite_handler/sqlite_handler.py +1 -0
- mindsdb/integrations/handlers/sqreamdb_handler/connection_args.py +1 -1
- mindsdb/integrations/handlers/sqreamdb_handler/sqreamdb_handler.py +15 -20
- mindsdb/integrations/handlers/sqreamdb_handler/tests/test_sqreamdb_handler.py +4 -4
- mindsdb/integrations/handlers/stabilityai_handler/__init__.py +1 -1
- mindsdb/integrations/handlers/starrocks_handler/starrocks_handler.py +0 -1
- mindsdb/integrations/handlers/starrocks_handler/tests/test_starrocks_handler.py +8 -10
- mindsdb/integrations/handlers/statsforecast_handler/statsforecast_handler.py +2 -2
- mindsdb/integrations/handlers/strava_handler/strava_handler.py +4 -8
- mindsdb/integrations/handlers/strava_handler/strava_tables.py +22 -30
- mindsdb/integrations/handlers/stripe_handler/stripe_handler.py +3 -2
- mindsdb/integrations/handlers/stripe_handler/stripe_tables.py +11 -27
- mindsdb/integrations/handlers/supabase_handler/tests/test_supabase_handler.py +1 -1
- mindsdb/integrations/handlers/surrealdb_handler/surrealdb_handler.py +4 -4
- mindsdb/integrations/handlers/tdengine_handler/tdengine_handler.py +25 -27
- mindsdb/integrations/handlers/tdengine_handler/tests/test_tdengine_handler.py +8 -8
- mindsdb/integrations/handlers/tidb_handler/tests/test_tidb_handler.py +1 -2
- mindsdb/integrations/handlers/timegpt_handler/timegpt_handler.py +5 -5
- mindsdb/integrations/handlers/tpot_handler/tpot_handler.py +21 -26
- mindsdb/integrations/handlers/trino_handler/trino_handler.py +14 -14
- mindsdb/integrations/handlers/twitter_handler/twitter_handler.py +2 -4
- mindsdb/integrations/handlers/unify_handler/tests/test_unify_handler.py +7 -8
- mindsdb/integrations/handlers/unify_handler/unify_handler.py +9 -9
- mindsdb/integrations/handlers/vertex_handler/vertex_client.py +1 -1
- mindsdb/integrations/handlers/vertica_handler/tests/test_vertica_handler.py +11 -11
- mindsdb/integrations/handlers/vertica_handler/vertica_handler.py +11 -14
- mindsdb/integrations/handlers/vitess_handler/tests/test_vitess_handler.py +9 -11
- mindsdb/integrations/handlers/vitess_handler/vitess_handler.py +0 -1
- mindsdb/integrations/handlers/web_handler/web_handler.py +1 -0
- mindsdb/integrations/handlers/whatsapp_handler/__init__.py +3 -3
- mindsdb/integrations/handlers/writer_handler/evaluate.py +1 -1
- mindsdb/integrations/handlers/writer_handler/settings.py +0 -1
- mindsdb/integrations/handlers/writer_handler/writer_handler.py +1 -0
- mindsdb/integrations/handlers/youtube_handler/youtube_handler.py +5 -5
- mindsdb/integrations/handlers/youtube_handler/youtube_tables.py +26 -27
- mindsdb/integrations/handlers/yugabyte_handler/tests/test_yugabyte_handler.py +3 -3
- mindsdb/integrations/handlers/yugabyte_handler/yugabyte_handler.py +0 -6
- mindsdb/integrations/libs/llm/config.py +13 -0
- mindsdb/integrations/libs/llm/utils.py +37 -65
- mindsdb/integrations/libs/response.py +67 -52
- mindsdb/integrations/libs/vectordatabase_handler.py +6 -0
- mindsdb/integrations/utilities/handler_utils.py +15 -3
- mindsdb/integrations/utilities/handlers/api_utilities/__init__.py +0 -1
- mindsdb/integrations/utilities/handlers/auth_utilities/__init__.py +0 -2
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +230 -227
- mindsdb/integrations/utilities/utils.py +3 -3
- mindsdb/interfaces/agents/agents_controller.py +164 -1
- mindsdb/interfaces/agents/constants.py +32 -13
- mindsdb/interfaces/agents/langchain_agent.py +106 -95
- mindsdb/interfaces/agents/mindsdb_database_agent.py +101 -2
- mindsdb/interfaces/knowledge_base/controller.py +250 -216
- mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +13 -10
- mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +434 -0
- mindsdb/interfaces/knowledge_base/preprocessing/models.py +54 -0
- mindsdb/interfaces/query_context/context_controller.py +66 -10
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +190 -0
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +92 -0
- mindsdb/interfaces/skills/skill_tool.py +202 -57
- mindsdb/interfaces/skills/sql_agent.py +205 -17
- mindsdb/interfaces/storage/fs.py +1 -0
- mindsdb/interfaces/variables/__init__.py +0 -0
- mindsdb/interfaces/variables/variables_controller.py +97 -0
- mindsdb/migrations/env.py +5 -7
- mindsdb/migrations/migrate.py +47 -7
- mindsdb/migrations/versions/2025-05-21_9f150e4f9a05_checkpoint_1.py +360 -0
- mindsdb/utilities/config.py +287 -216
- mindsdb/utilities/starters.py +13 -0
- {mindsdb-25.5.3.0.dist-info → mindsdb-25.5.4.1.dist-info}/METADATA +646 -698
- {mindsdb-25.5.3.0.dist-info → mindsdb-25.5.4.1.dist-info}/RECORD +312 -295
- {mindsdb-25.5.3.0.dist-info → mindsdb-25.5.4.1.dist-info}/WHEEL +1 -1
- mindsdb/integrations/handlers/monkeylearn_handler/requirements.txt +0 -1
- {mindsdb-25.5.3.0.dist-info → mindsdb-25.5.4.1.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.5.3.0.dist-info → mindsdb-25.5.4.1.dist-info}/top_level.txt +0 -0
|
@@ -5,15 +5,7 @@ from typing import Dict, List, Optional
|
|
|
5
5
|
import pandas as pd
|
|
6
6
|
import numpy as np
|
|
7
7
|
|
|
8
|
-
from mindsdb_sql_parser.ast import
|
|
9
|
-
BinaryOperation,
|
|
10
|
-
Constant,
|
|
11
|
-
Identifier,
|
|
12
|
-
Select,
|
|
13
|
-
Update,
|
|
14
|
-
Delete,
|
|
15
|
-
Star
|
|
16
|
-
)
|
|
8
|
+
from mindsdb_sql_parser.ast import BinaryOperation, Constant, Identifier, Select, Update, Delete, Star
|
|
17
9
|
from mindsdb_sql_parser.ast.mindsdb import CreatePredictor
|
|
18
10
|
|
|
19
11
|
from mindsdb.integrations.utilities.query_traversal import query_traversal
|
|
@@ -27,11 +19,14 @@ from mindsdb.integrations.libs.vectordatabase_handler import (
|
|
|
27
19
|
from mindsdb.integrations.utilities.rag.rag_pipeline_builder import RAG
|
|
28
20
|
from mindsdb.integrations.utilities.rag.config_loader import load_rag_config
|
|
29
21
|
from mindsdb.integrations.utilities.handler_utils import get_api_key
|
|
30
|
-
from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import
|
|
22
|
+
from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import (
|
|
23
|
+
construct_model_from_args,
|
|
24
|
+
)
|
|
31
25
|
|
|
32
26
|
from mindsdb.interfaces.agents.constants import DEFAULT_EMBEDDINGS_MODEL_CLASS
|
|
33
27
|
from mindsdb.interfaces.agents.langchain_agent import create_chat_model, get_llm_provider
|
|
34
28
|
from mindsdb.interfaces.database.projects import ProjectController
|
|
29
|
+
from mindsdb.interfaces.variables.variables_controller import variables_controller
|
|
35
30
|
from mindsdb.interfaces.knowledge_base.preprocessing.models import PreprocessingConfig, Document
|
|
36
31
|
from mindsdb.interfaces.knowledge_base.preprocessing.document_preprocessor import PreprocessorFactory
|
|
37
32
|
from mindsdb.interfaces.model.functions import PredictorRecordNotFound
|
|
@@ -47,11 +42,7 @@ from mindsdb.integrations.utilities.rag.rerankers.base_reranker import BaseLLMRe
|
|
|
47
42
|
|
|
48
43
|
logger = log.getLogger(__name__)
|
|
49
44
|
|
|
50
|
-
KB_TO_VECTORDB_COLUMNS = {
|
|
51
|
-
'id': 'original_doc_id',
|
|
52
|
-
'chunk_id': 'id',
|
|
53
|
-
'chunk_content': 'content'
|
|
54
|
-
}
|
|
45
|
+
KB_TO_VECTORDB_COLUMNS = {"id": "original_doc_id", "chunk_id": "id", "chunk_content": "content"}
|
|
55
46
|
|
|
56
47
|
|
|
57
48
|
def get_model_params(model_params: dict, default_config_key: str):
|
|
@@ -71,23 +62,23 @@ def get_embedding_model_from_params(embedding_model_params: dict):
|
|
|
71
62
|
Create embedding model from parameters.
|
|
72
63
|
"""
|
|
73
64
|
params_copy = copy.deepcopy(embedding_model_params)
|
|
74
|
-
provider = params_copy.pop(
|
|
75
|
-
api_key = get_api_key(provider, params_copy, strict=False) or params_copy.get(
|
|
65
|
+
provider = params_copy.pop("provider", None).lower()
|
|
66
|
+
api_key = get_api_key(provider, params_copy, strict=False) or params_copy.get("api_key")
|
|
76
67
|
# Underscores are replaced because the provider name ultimately gets mapped to a class name.
|
|
77
68
|
# This is mostly to support Azure OpenAI (azure_openai); the mapped class name is 'AzureOpenAIEmbeddings'.
|
|
78
|
-
params_copy[
|
|
79
|
-
if provider ==
|
|
69
|
+
params_copy["class"] = provider.replace("_", "")
|
|
70
|
+
if provider == "azure_openai":
|
|
80
71
|
# Azure OpenAI expects the api_key to be passed as 'openai_api_key'.
|
|
81
|
-
params_copy[
|
|
82
|
-
params_copy[
|
|
83
|
-
if
|
|
84
|
-
params_copy[
|
|
85
|
-
if
|
|
86
|
-
params_copy[
|
|
72
|
+
params_copy["openai_api_key"] = api_key
|
|
73
|
+
params_copy["azure_endpoint"] = params_copy.pop("base_url")
|
|
74
|
+
if "chunk_size" not in params_copy:
|
|
75
|
+
params_copy["chunk_size"] = 2048
|
|
76
|
+
if "api_version" in params_copy:
|
|
77
|
+
params_copy["openai_api_version"] = params_copy["api_version"]
|
|
87
78
|
else:
|
|
88
79
|
params_copy[f"{provider}_api_key"] = api_key
|
|
89
|
-
params_copy.pop(
|
|
90
|
-
params_copy[
|
|
80
|
+
params_copy.pop("api_key", None)
|
|
81
|
+
params_copy["model"] = params_copy.pop("model_name", None)
|
|
91
82
|
|
|
92
83
|
return construct_model_from_args(params_copy)
|
|
93
84
|
|
|
@@ -97,15 +88,26 @@ def get_reranking_model_from_params(reranking_model_params: dict):
|
|
|
97
88
|
Create reranking model from parameters.
|
|
98
89
|
"""
|
|
99
90
|
params_copy = copy.deepcopy(reranking_model_params)
|
|
100
|
-
provider = params_copy.get(
|
|
91
|
+
provider = params_copy.get("provider", "openai").lower()
|
|
101
92
|
|
|
102
93
|
if "api_key" not in params_copy:
|
|
103
94
|
params_copy["api_key"] = get_api_key(provider, params_copy, strict=False)
|
|
104
|
-
params_copy[
|
|
95
|
+
params_copy["model"] = params_copy.pop("model_name", None)
|
|
105
96
|
|
|
106
97
|
return BaseLLMReranker(**params_copy)
|
|
107
98
|
|
|
108
99
|
|
|
100
|
+
def safe_pandas_is_datetime(value: str) -> bool:
|
|
101
|
+
"""
|
|
102
|
+
Check if the value can be parsed as a datetime.
|
|
103
|
+
"""
|
|
104
|
+
try:
|
|
105
|
+
result = pd.api.types.is_datetime64_any_dtype(value)
|
|
106
|
+
return result
|
|
107
|
+
except ValueError:
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
|
|
109
111
|
class KnowledgeBaseTable:
|
|
110
112
|
"""
|
|
111
113
|
Knowledge base table interface
|
|
@@ -125,6 +127,11 @@ class KnowledgeBaseTable:
|
|
|
125
127
|
logger.debug(f"Configuring preprocessing with config: {config}")
|
|
126
128
|
self.document_preprocessor = None # Reset existing preprocessor
|
|
127
129
|
if config is not None:
|
|
130
|
+
# Ensure content_column is set for JSON chunking if not already specified
|
|
131
|
+
if config.get("type") == "json_chunking" and config.get("json_chunking_config"):
|
|
132
|
+
if "content_column" not in config["json_chunking_config"]:
|
|
133
|
+
config["json_chunking_config"]["content_column"] = "content"
|
|
134
|
+
|
|
128
135
|
preprocessing_config = PreprocessingConfig(**config)
|
|
129
136
|
self.document_preprocessor = PreprocessorFactory.create_preprocessor(preprocessing_config)
|
|
130
137
|
logger.debug(f"Created preprocessor of type: {type(self.document_preprocessor)}")
|
|
@@ -186,11 +193,13 @@ class KnowledgeBaseTable:
|
|
|
186
193
|
query_text = item.value
|
|
187
194
|
|
|
188
195
|
# replace content with embeddings
|
|
189
|
-
conditions.append(
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
196
|
+
conditions.append(
|
|
197
|
+
FilterCondition(
|
|
198
|
+
column=TableField.EMBEDDINGS.value,
|
|
199
|
+
value=self._content_to_embeddings(item.value),
|
|
200
|
+
op=FilterOperator.EQUAL,
|
|
201
|
+
)
|
|
202
|
+
)
|
|
194
203
|
else:
|
|
195
204
|
conditions.append(item)
|
|
196
205
|
|
|
@@ -232,7 +241,7 @@ class KnowledgeBaseTable:
|
|
|
232
241
|
def add_relevance(self, df, query_text, relevance_threshold=None):
|
|
233
242
|
relevance_column = TableField.RELEVANCE.value
|
|
234
243
|
|
|
235
|
-
reranking_model_params = get_model_params(self._kb.params.get("reranking_model"), "
|
|
244
|
+
reranking_model_params = get_model_params(self._kb.params.get("reranking_model"), "default_reranking_model")
|
|
236
245
|
if reranking_model_params and query_text and len(df) > 0:
|
|
237
246
|
# Use reranker for relevance score
|
|
238
247
|
try:
|
|
@@ -244,7 +253,7 @@ class KnowledgeBaseTable:
|
|
|
244
253
|
|
|
245
254
|
reranker = get_reranking_model_from_params(reranking_model_params)
|
|
246
255
|
# Get documents to rerank
|
|
247
|
-
documents = df[
|
|
256
|
+
documents = df["chunk_content"].tolist()
|
|
248
257
|
# Use the get_scores method with disable_events=True
|
|
249
258
|
scores = reranker.get_scores(query_text, documents)
|
|
250
259
|
# Add scores as the relevance column
|
|
@@ -257,21 +266,21 @@ class KnowledgeBaseTable:
|
|
|
257
266
|
except Exception as e:
|
|
258
267
|
logger.error(f"Error during reranking: {str(e)}")
|
|
259
268
|
# Fallback to distance-based relevance
|
|
260
|
-
if
|
|
261
|
-
df[relevance_column] = 1 / (1 + df[
|
|
269
|
+
if "distance" in df.columns:
|
|
270
|
+
df[relevance_column] = 1 / (1 + df["distance"])
|
|
262
271
|
else:
|
|
263
272
|
logger.info("No distance or reranker available")
|
|
264
273
|
|
|
265
|
-
elif
|
|
274
|
+
elif "distance" in df.columns:
|
|
266
275
|
# Calculate relevance from distance
|
|
267
276
|
logger.info("Calculating relevance from vector distance")
|
|
268
|
-
df[relevance_column] = 1 / (1 + df[
|
|
277
|
+
df[relevance_column] = 1 / (1 + df["distance"])
|
|
269
278
|
if relevance_threshold is not None:
|
|
270
279
|
df = df[df[relevance_column] > relevance_threshold]
|
|
271
280
|
|
|
272
281
|
else:
|
|
273
282
|
df[relevance_column] = None
|
|
274
|
-
df[
|
|
283
|
+
df["distance"] = None
|
|
275
284
|
# Sort by relevance
|
|
276
285
|
df = df.sort_values(by=relevance_column, ascending=False)
|
|
277
286
|
return df
|
|
@@ -294,7 +303,7 @@ class KnowledgeBaseTable:
|
|
|
294
303
|
columns = list(df.columns)
|
|
295
304
|
# update id, get from metadata
|
|
296
305
|
df[TableField.ID.value] = df[TableField.METADATA.value].apply(
|
|
297
|
-
lambda m: None if m is None else m.get(
|
|
306
|
+
lambda m: None if m is None else m.get("original_doc_id")
|
|
298
307
|
)
|
|
299
308
|
|
|
300
309
|
# id on first place
|
|
@@ -309,23 +318,14 @@ class KnowledgeBaseTable:
|
|
|
309
318
|
if documents:
|
|
310
319
|
self.insert_documents(documents)
|
|
311
320
|
|
|
312
|
-
def insert_web_pages(
|
|
313
|
-
self,
|
|
314
|
-
urls: List[str],
|
|
315
|
-
crawl_depth: int,
|
|
316
|
-
limit: int,
|
|
317
|
-
filters: List[str] = None
|
|
318
|
-
):
|
|
321
|
+
def insert_web_pages(self, urls: List[str], crawl_depth: int, limit: int, filters: List[str] = None):
|
|
319
322
|
"""Process and insert web pages"""
|
|
320
323
|
if not self.document_loader:
|
|
321
324
|
raise ValueError("Document loader not configured")
|
|
322
325
|
|
|
323
|
-
documents = list(
|
|
324
|
-
urls,
|
|
325
|
-
|
|
326
|
-
crawl_depth=crawl_depth,
|
|
327
|
-
filters=filters
|
|
328
|
-
))
|
|
326
|
+
documents = list(
|
|
327
|
+
self.document_loader.load_web_pages(urls, limit=limit, crawl_depth=crawl_depth, filters=filters)
|
|
328
|
+
)
|
|
329
329
|
if documents:
|
|
330
330
|
self.insert_documents(documents)
|
|
331
331
|
|
|
@@ -343,11 +343,9 @@ class KnowledgeBaseTable:
|
|
|
343
343
|
if not rows:
|
|
344
344
|
return
|
|
345
345
|
|
|
346
|
-
documents = [
|
|
347
|
-
content=row.get(
|
|
348
|
-
|
|
349
|
-
metadata=row.get('metadata', {})
|
|
350
|
-
) for row in rows]
|
|
346
|
+
documents = [
|
|
347
|
+
Document(content=row.get("content", ""), id=row.get("id"), metadata=row.get("metadata", {})) for row in rows
|
|
348
|
+
]
|
|
351
349
|
|
|
352
350
|
self.insert_documents(documents)
|
|
353
351
|
|
|
@@ -368,7 +366,7 @@ class KnowledgeBaseTable:
|
|
|
368
366
|
conditions = db_handler.extract_conditions(query.where)
|
|
369
367
|
doc_id = None
|
|
370
368
|
for condition in conditions:
|
|
371
|
-
if condition.column ==
|
|
369
|
+
if condition.column == "chunk_id" and condition.op == FilterOperator.EQUAL:
|
|
372
370
|
doc_id = condition.value
|
|
373
371
|
|
|
374
372
|
if cont_col in query.update_columns:
|
|
@@ -379,7 +377,7 @@ class KnowledgeBaseTable:
|
|
|
379
377
|
doc = Document(
|
|
380
378
|
id=doc_id,
|
|
381
379
|
content=content.value,
|
|
382
|
-
metadata={} # Empty metadata for content-only updates
|
|
380
|
+
metadata={}, # Empty metadata for content-only updates
|
|
383
381
|
)
|
|
384
382
|
processed_chunks = self.document_preprocessor.process_documents([doc])
|
|
385
383
|
if processed_chunks:
|
|
@@ -418,7 +416,7 @@ class KnowledgeBaseTable:
|
|
|
418
416
|
query: str,
|
|
419
417
|
keywords: List[str] = None,
|
|
420
418
|
metadata: Dict[str, str] = None,
|
|
421
|
-
distance_function=DistanceFunction.COSINE_DISTANCE
|
|
419
|
+
distance_function=DistanceFunction.COSINE_DISTANCE,
|
|
422
420
|
) -> pd.DataFrame:
|
|
423
421
|
query_df = pd.DataFrame.from_records([{TableField.CONTENT.value: query}])
|
|
424
422
|
embeddings_df = self._df_to_embeddings(query_df)
|
|
@@ -427,14 +425,14 @@ class KnowledgeBaseTable:
|
|
|
427
425
|
embeddings = embeddings_df.iloc[0][TableField.EMBEDDINGS.value]
|
|
428
426
|
keywords_query = None
|
|
429
427
|
if keywords is not None:
|
|
430
|
-
keywords_query =
|
|
428
|
+
keywords_query = " ".join(keywords)
|
|
431
429
|
db_handler = self.get_vector_db()
|
|
432
430
|
return db_handler.hybrid_search(
|
|
433
431
|
self._kb.vector_database_table,
|
|
434
432
|
embeddings,
|
|
435
433
|
query=keywords_query,
|
|
436
434
|
metadata=metadata,
|
|
437
|
-
distance_function=distance_function
|
|
435
|
+
distance_function=distance_function,
|
|
438
436
|
)
|
|
439
437
|
|
|
440
438
|
def clear(self):
|
|
@@ -467,7 +465,7 @@ class KnowledgeBaseTable:
|
|
|
467
465
|
|
|
468
466
|
# First adapt column names to identify content and metadata columns
|
|
469
467
|
adapted_df = self._adapt_column_names(df)
|
|
470
|
-
content_columns = self._kb.params.get(
|
|
468
|
+
content_columns = self._kb.params.get("content_columns", [TableField.CONTENT.value])
|
|
471
469
|
|
|
472
470
|
# Convert DataFrame rows to documents, creating separate documents for each content column
|
|
473
471
|
raw_documents = []
|
|
@@ -485,15 +483,11 @@ class KnowledgeBaseTable:
|
|
|
485
483
|
|
|
486
484
|
metadata = {
|
|
487
485
|
**base_metadata,
|
|
488
|
-
|
|
489
|
-
|
|
486
|
+
"original_row_index": str(idx), # provide link to original row index
|
|
487
|
+
"content_column": col,
|
|
490
488
|
}
|
|
491
489
|
|
|
492
|
-
raw_documents.append(Document(
|
|
493
|
-
content=content_str,
|
|
494
|
-
id=doc_id,
|
|
495
|
-
metadata=metadata
|
|
496
|
-
))
|
|
490
|
+
raw_documents.append(Document(content=content_str, id=doc_id, metadata=metadata))
|
|
497
491
|
|
|
498
492
|
# Apply preprocessing to all documents if preprocessor exists
|
|
499
493
|
if self.document_preprocessor:
|
|
@@ -502,11 +496,16 @@ class KnowledgeBaseTable:
|
|
|
502
496
|
processed_chunks = raw_documents # Use raw documents if no preprocessing
|
|
503
497
|
|
|
504
498
|
# Convert processed chunks back to DataFrame with standard structure
|
|
505
|
-
df = pd.DataFrame(
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
499
|
+
df = pd.DataFrame(
|
|
500
|
+
[
|
|
501
|
+
{
|
|
502
|
+
TableField.CONTENT.value: chunk.content,
|
|
503
|
+
TableField.ID.value: chunk.id,
|
|
504
|
+
TableField.METADATA.value: chunk.metadata,
|
|
505
|
+
}
|
|
506
|
+
for chunk in processed_chunks
|
|
507
|
+
]
|
|
508
|
+
)
|
|
510
509
|
|
|
511
510
|
if df.empty:
|
|
512
511
|
logger.warning("No valid content found in any content columns")
|
|
@@ -517,17 +516,17 @@ class KnowledgeBaseTable:
|
|
|
517
516
|
df = pd.concat([df, df_emb], axis=1)
|
|
518
517
|
db_handler = self.get_vector_db()
|
|
519
518
|
|
|
520
|
-
if params is not None and params.get(
|
|
519
|
+
if params is not None and params.get("kb_no_upsert", False):
|
|
521
520
|
# speed up inserting by disable checking existing records
|
|
522
521
|
db_handler.insert(self._kb.vector_database_table, df)
|
|
523
522
|
else:
|
|
524
523
|
db_handler.do_upsert(self._kb.vector_database_table, df)
|
|
525
524
|
|
|
526
525
|
def _adapt_column_names(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
527
|
-
|
|
526
|
+
"""
|
|
528
527
|
Convert input columns for vector db input
|
|
529
528
|
- id, content and metadata
|
|
530
|
-
|
|
529
|
+
"""
|
|
531
530
|
# Debug incoming data
|
|
532
531
|
logger.debug(f"Input DataFrame columns: {df.columns}")
|
|
533
532
|
logger.debug(f"Input DataFrame first row: {df.iloc[0].to_dict()}")
|
|
@@ -536,7 +535,7 @@ class KnowledgeBaseTable:
|
|
|
536
535
|
columns = list(df.columns)
|
|
537
536
|
|
|
538
537
|
# -- prepare id --
|
|
539
|
-
id_column = params.get(
|
|
538
|
+
id_column = params.get("id_column")
|
|
540
539
|
if id_column is not None and id_column not in columns:
|
|
541
540
|
id_column = None
|
|
542
541
|
|
|
@@ -546,8 +545,8 @@ class KnowledgeBaseTable:
|
|
|
546
545
|
# Also check for case-insensitive 'id' column
|
|
547
546
|
if id_column is None:
|
|
548
547
|
column_map = {col.lower(): col for col in columns}
|
|
549
|
-
if
|
|
550
|
-
id_column = column_map[
|
|
548
|
+
if "id" in column_map:
|
|
549
|
+
id_column = column_map["id"]
|
|
551
550
|
|
|
552
551
|
if id_column is not None:
|
|
553
552
|
columns.remove(id_column)
|
|
@@ -562,8 +561,8 @@ class KnowledgeBaseTable:
|
|
|
562
561
|
logger.debug(f"Added IDs: {df_out[TableField.ID.value].tolist()}")
|
|
563
562
|
|
|
564
563
|
# -- prepare content and metadata --
|
|
565
|
-
content_columns = params.get(
|
|
566
|
-
metadata_columns = params.get(
|
|
564
|
+
content_columns = params.get("content_columns", [TableField.CONTENT.value])
|
|
565
|
+
metadata_columns = params.get("metadata_columns")
|
|
567
566
|
|
|
568
567
|
logger.debug(f"Processing with: content_columns={content_columns}, metadata_columns={metadata_columns}")
|
|
569
568
|
|
|
@@ -571,25 +570,19 @@ class KnowledgeBaseTable:
|
|
|
571
570
|
if content_columns:
|
|
572
571
|
# Ensure content columns are case-insensitive
|
|
573
572
|
column_map = {col.lower(): col for col in columns}
|
|
574
|
-
content_columns = [
|
|
575
|
-
column_map.get(col.lower(), col)
|
|
576
|
-
for col in content_columns
|
|
577
|
-
]
|
|
573
|
+
content_columns = [column_map.get(col.lower(), col) for col in content_columns]
|
|
578
574
|
logger.debug(f"Mapped content columns: {content_columns}")
|
|
579
575
|
|
|
580
576
|
if metadata_columns:
|
|
581
577
|
# Ensure metadata columns are case-insensitive
|
|
582
578
|
column_map = {col.lower(): col for col in columns}
|
|
583
|
-
metadata_columns = [
|
|
584
|
-
column_map.get(col.lower(), col)
|
|
585
|
-
for col in metadata_columns
|
|
586
|
-
]
|
|
579
|
+
metadata_columns = [column_map.get(col.lower(), col) for col in metadata_columns]
|
|
587
580
|
logger.debug(f"Mapped metadata columns: {metadata_columns}")
|
|
588
581
|
|
|
589
582
|
if content_columns is not None:
|
|
590
583
|
content_columns = list(set(content_columns).intersection(columns))
|
|
591
584
|
if len(content_columns) == 0:
|
|
592
|
-
raise ValueError(f
|
|
585
|
+
raise ValueError(f"Content columns {params.get('content_columns')} not found in dataset: {columns}")
|
|
593
586
|
|
|
594
587
|
if metadata_columns is not None:
|
|
595
588
|
metadata_columns = list(set(metadata_columns).intersection(columns))
|
|
@@ -603,12 +596,13 @@ class KnowledgeBaseTable:
|
|
|
603
596
|
|
|
604
597
|
# Add metadata
|
|
605
598
|
if metadata_columns and len(metadata_columns) > 0:
|
|
599
|
+
|
|
606
600
|
def convert_row_to_metadata(row):
|
|
607
601
|
metadata = {}
|
|
608
602
|
for col in metadata_columns:
|
|
609
603
|
value = row[col]
|
|
610
604
|
# Convert numpy/pandas types to Python native types
|
|
611
|
-
if
|
|
605
|
+
if safe_pandas_is_datetime(value) or isinstance(value, pd.Timestamp):
|
|
612
606
|
value = str(value)
|
|
613
607
|
elif pd.api.types.is_integer_dtype(value):
|
|
614
608
|
value = int(value)
|
|
@@ -648,7 +642,7 @@ class KnowledgeBaseTable:
|
|
|
648
642
|
if self._vector_db is None:
|
|
649
643
|
database = db.Integration.query.get(self._kb.vector_database_id)
|
|
650
644
|
if database is None:
|
|
651
|
-
raise ValueError(
|
|
645
|
+
raise ValueError("Vector database not found. Is it deleted?")
|
|
652
646
|
database_name = database.name
|
|
653
647
|
self._vector_db = self.session.integration_controller.get_data_handler(database_name)
|
|
654
648
|
return self._vector_db
|
|
@@ -673,6 +667,15 @@ class KnowledgeBaseTable:
|
|
|
673
667
|
|
|
674
668
|
model_id = self._kb.embedding_model_id
|
|
675
669
|
|
|
670
|
+
if model_id is None:
|
|
671
|
+
# call litellm handler
|
|
672
|
+
messages = list(df[TableField.CONTENT.value])
|
|
673
|
+
embedding_params = copy.deepcopy(config.get("default_embedding_model", {}))
|
|
674
|
+
embedding_params.update(self._kb.params["embedding_model"])
|
|
675
|
+
results = self.call_litellm_embedding(self.session, embedding_params, messages)
|
|
676
|
+
results = [[val] for val in results]
|
|
677
|
+
return pd.DataFrame(results, columns=[TableField.EMBEDDINGS.value])
|
|
678
|
+
|
|
676
679
|
# get the input columns
|
|
677
680
|
model_rec = db.session.query(db.Predictor).filter_by(id=model_id).first()
|
|
678
681
|
|
|
@@ -681,19 +684,15 @@ class KnowledgeBaseTable:
|
|
|
681
684
|
|
|
682
685
|
project_datanode = self.session.datahub.get(model_project.name)
|
|
683
686
|
|
|
684
|
-
model_using = model_rec.learn_args.get(
|
|
685
|
-
input_col = model_using.get(
|
|
687
|
+
model_using = model_rec.learn_args.get("using", {})
|
|
688
|
+
input_col = model_using.get("question_column")
|
|
686
689
|
if input_col is None:
|
|
687
|
-
input_col = model_using.get(
|
|
690
|
+
input_col = model_using.get("input_column")
|
|
688
691
|
|
|
689
692
|
if input_col is not None and input_col != TableField.CONTENT.value:
|
|
690
693
|
df = df.rename(columns={TableField.CONTENT.value: input_col})
|
|
691
694
|
|
|
692
|
-
df_out = project_datanode.predict(
|
|
693
|
-
model_name=model_rec.name,
|
|
694
|
-
df=df,
|
|
695
|
-
params=self.model_params
|
|
696
|
-
)
|
|
695
|
+
df_out = project_datanode.predict(model_name=model_rec.name, df=df, params=self.model_params)
|
|
697
696
|
|
|
698
697
|
target = model_rec.to_predict[0]
|
|
699
698
|
if target != TableField.EMBEDDINGS.value:
|
|
@@ -714,6 +713,23 @@ class KnowledgeBaseTable:
|
|
|
714
713
|
res = self._df_to_embeddings(df)
|
|
715
714
|
return res[TableField.EMBEDDINGS.value][0]
|
|
716
715
|
|
|
716
|
+
@staticmethod
|
|
717
|
+
def call_litellm_embedding(session, model_params, messages):
|
|
718
|
+
args = copy.deepcopy(model_params)
|
|
719
|
+
|
|
720
|
+
llm_model = args.pop("model_name")
|
|
721
|
+
engine = args.pop("provider")
|
|
722
|
+
|
|
723
|
+
llm_model = f"{engine}/{llm_model}"
|
|
724
|
+
|
|
725
|
+
if "base_url" in args:
|
|
726
|
+
args["api_base"] = args.pop("base_url")
|
|
727
|
+
|
|
728
|
+
module = session.integration_controller.get_handler_module("litellm")
|
|
729
|
+
if module is None or module.Handler is None:
|
|
730
|
+
raise ValueError(f'Unable to use "{engine}" provider. Litellm handler is not installed')
|
|
731
|
+
return module.Handler.embeddings(llm_model, messages, args)
|
|
732
|
+
|
|
717
733
|
def build_rag_pipeline(self, retrieval_config: dict):
|
|
718
734
|
"""
|
|
719
735
|
Builds a RAG pipeline with returned sources
|
|
@@ -729,10 +745,10 @@ class KnowledgeBaseTable:
|
|
|
729
745
|
"""
|
|
730
746
|
# Get embedding model from knowledge base
|
|
731
747
|
embeddings_model = None
|
|
732
|
-
embedding_model_params = get_model_params(self._kb.params.get(
|
|
748
|
+
embedding_model_params = get_model_params(self._kb.params.get("embedding_model", {}), "default_embedding_model")
|
|
733
749
|
if self._kb.embedding_model:
|
|
734
750
|
# Extract embedding model args from knowledge base table
|
|
735
|
-
embedding_args = self._kb.embedding_model.learn_args.get(
|
|
751
|
+
embedding_args = self._kb.embedding_model.learn_args.get("using", {})
|
|
736
752
|
# Construct the embedding model directly
|
|
737
753
|
embeddings_model = construct_model_from_args(embedding_args)
|
|
738
754
|
logger.debug(f"Using knowledge base embedding model with args: {embedding_args}")
|
|
@@ -744,21 +760,17 @@ class KnowledgeBaseTable:
|
|
|
744
760
|
logger.debug("Using default embedding model as knowledge base has no embedding model")
|
|
745
761
|
|
|
746
762
|
# Update retrieval config with knowledge base parameters
|
|
747
|
-
kb_params = {
|
|
748
|
-
'vector_store_config': {
|
|
749
|
-
'kb_table': self
|
|
750
|
-
}
|
|
751
|
-
}
|
|
763
|
+
kb_params = {"vector_store_config": {"kb_table": self}}
|
|
752
764
|
|
|
753
765
|
# Load and validate config
|
|
754
766
|
try:
|
|
755
767
|
rag_config = load_rag_config(retrieval_config, kb_params, embeddings_model)
|
|
756
768
|
|
|
757
769
|
# Build LLM if specified
|
|
758
|
-
if
|
|
770
|
+
if "llm_model_name" in rag_config:
|
|
759
771
|
llm_args = {"model_name": rag_config.llm_model_name}
|
|
760
772
|
if not rag_config.llm_provider:
|
|
761
|
-
llm_args[
|
|
773
|
+
llm_args["provider"] = get_llm_provider(llm_args)
|
|
762
774
|
else:
|
|
763
775
|
llm_args["provider"] = rag_config.llm_provider
|
|
764
776
|
rag_config.llm = create_chat_model(llm_args)
|
|
@@ -779,6 +791,7 @@ class KnowledgeBaseTable:
|
|
|
779
791
|
if isinstance(base_metadata, str):
|
|
780
792
|
try:
|
|
781
793
|
import ast
|
|
794
|
+
|
|
782
795
|
return ast.literal_eval(base_metadata)
|
|
783
796
|
except (SyntaxError, ValueError):
|
|
784
797
|
logger.warning(f"Could not parse metadata: {base_metadata}. Using empty dict.")
|
|
@@ -788,6 +801,7 @@ class KnowledgeBaseTable:
|
|
|
788
801
|
def _generate_document_id(self, content: str, content_column: str, provided_id: str = None) -> str:
|
|
789
802
|
"""Generate a deterministic document ID using the utility function."""
|
|
790
803
|
from mindsdb.interfaces.knowledge_base.utils import generate_document_id
|
|
804
|
+
|
|
791
805
|
return generate_document_id(content=content, provided_id=provided_id)
|
|
792
806
|
|
|
793
807
|
def _convert_metadata_value(self, value):
|
|
@@ -820,6 +834,15 @@ class KnowledgeBaseTable:
|
|
|
820
834
|
# Convert everything else to string
|
|
821
835
|
return str(value)
|
|
822
836
|
|
|
837
|
+
def create_index(self):
|
|
838
|
+
"""
|
|
839
|
+
Create an index on the knowledge base table
|
|
840
|
+
:param index_name: name of the index
|
|
841
|
+
:param params: parameters for the index
|
|
842
|
+
"""
|
|
843
|
+
db_handler = self.get_vector_db()
|
|
844
|
+
db_handler.create_index(self._kb.vector_database_table)
|
|
845
|
+
|
|
823
846
|
|
|
824
847
|
class KnowledgeBaseController:
|
|
825
848
|
"""
|
|
@@ -831,14 +854,14 @@ class KnowledgeBaseController:
|
|
|
831
854
|
self.session = session
|
|
832
855
|
|
|
833
856
|
def add(
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
857
|
+
self,
|
|
858
|
+
name: str,
|
|
859
|
+
project_name: str,
|
|
860
|
+
storage: Identifier,
|
|
861
|
+
params: dict,
|
|
862
|
+
preprocessing_config: Optional[dict] = None,
|
|
863
|
+
if_not_exists: bool = False,
|
|
864
|
+
# embedding_model: Identifier = None, # Legacy: Allow MindsDB models to be passed as embedding_model.
|
|
842
865
|
) -> db.KnowledgeBase:
|
|
843
866
|
"""
|
|
844
867
|
Add a new knowledge base to the database
|
|
@@ -846,15 +869,18 @@ class KnowledgeBaseController:
|
|
|
846
869
|
:param is_sparse: Whether to use sparse vectors for embeddings
|
|
847
870
|
:param vector_size: Optional size specification for vectors, required when is_sparse=True
|
|
848
871
|
"""
|
|
872
|
+
# fill variables
|
|
873
|
+
params = variables_controller.fill_parameters(params)
|
|
874
|
+
|
|
849
875
|
# Validate preprocessing config first if provided
|
|
850
876
|
if preprocessing_config is not None:
|
|
851
877
|
PreprocessingConfig(**preprocessing_config) # Validate before storing
|
|
852
878
|
params = params or {}
|
|
853
|
-
params[
|
|
879
|
+
params["preprocessing"] = preprocessing_config
|
|
854
880
|
|
|
855
881
|
# Check if vector_size is provided when using sparse vectors
|
|
856
|
-
is_sparse = params.get(
|
|
857
|
-
vector_size = params.get(
|
|
882
|
+
is_sparse = params.get("is_sparse")
|
|
883
|
+
vector_size = params.get("vector_size")
|
|
858
884
|
if is_sparse and vector_size is None:
|
|
859
885
|
raise ValueError("vector_size is required when is_sparse=True")
|
|
860
886
|
|
|
@@ -871,41 +897,45 @@ class KnowledgeBaseController:
|
|
|
871
897
|
return kb
|
|
872
898
|
raise EntityExistsError("Knowledge base already exists", name)
|
|
873
899
|
|
|
874
|
-
embedding_params = copy.deepcopy(config.get(
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
900
|
+
embedding_params = copy.deepcopy(config.get("default_embedding_model", {}))
|
|
901
|
+
|
|
902
|
+
# Legacy
|
|
903
|
+
# model_name = None
|
|
904
|
+
# model_project = project
|
|
905
|
+
# if embedding_model:
|
|
906
|
+
# model_name = embedding_model.parts[-1]
|
|
907
|
+
# if len(embedding_model.parts) > 1:
|
|
908
|
+
# model_project = self.session.database_controller.get_project(embedding_model.parts[-2])
|
|
909
|
+
|
|
910
|
+
# elif "embedding_model" in params:
|
|
911
|
+
# if isinstance(params["embedding_model"], str):
|
|
912
|
+
# # it is model name
|
|
913
|
+
# model_name = params["embedding_model"]
|
|
914
|
+
# else:
|
|
915
|
+
# # it is params for model
|
|
916
|
+
# embedding_params.update(params["embedding_model"])
|
|
917
|
+
|
|
918
|
+
if "embedding_model" in params:
|
|
919
|
+
if not isinstance(params["embedding_model"], dict):
|
|
920
|
+
raise ValueError("embedding_model should be JSON object with model parameters.")
|
|
921
|
+
embedding_params.update(params["embedding_model"])
|
|
922
|
+
|
|
923
|
+
# if model_name is None: # Legacy
|
|
924
|
+
model_name = self._create_embedding_model(
|
|
925
|
+
project.name,
|
|
926
|
+
params=embedding_params,
|
|
927
|
+
kb_name=name,
|
|
928
|
+
)
|
|
929
|
+
if model_name is not None:
|
|
930
|
+
params["created_embedding_model"] = model_name
|
|
898
931
|
|
|
899
932
|
embedding_model_id = None
|
|
900
933
|
if model_name is not None:
|
|
901
|
-
model = self.session.model_controller.get_model(
|
|
902
|
-
|
|
903
|
-
project_name=model_project.name
|
|
904
|
-
)
|
|
905
|
-
model_record = db.Predictor.query.get(model['id'])
|
|
934
|
+
model = self.session.model_controller.get_model(name=model_name, project_name=project.name)
|
|
935
|
+
model_record = db.Predictor.query.get(model["id"])
|
|
906
936
|
embedding_model_id = model_record.id
|
|
907
937
|
|
|
908
|
-
reranking_model_params = get_model_params(params.get(
|
|
938
|
+
reranking_model_params = get_model_params(params.get("reranking_model", {}), "default_reranking_model")
|
|
909
939
|
if reranking_model_params:
|
|
910
940
|
# Get reranking model from params.
|
|
911
941
|
# This is called here to check validaity of the parameters.
|
|
@@ -913,17 +943,17 @@ class KnowledgeBaseController:
|
|
|
913
943
|
|
|
914
944
|
# search for the vector database table
|
|
915
945
|
if storage is None:
|
|
916
|
-
cloud_pg_vector = os.environ.get(
|
|
946
|
+
cloud_pg_vector = os.environ.get("KB_PGVECTOR_URL")
|
|
917
947
|
if cloud_pg_vector:
|
|
918
948
|
vector_table_name = name
|
|
919
949
|
# Add sparse vector support for pgvector
|
|
920
950
|
vector_db_params = {}
|
|
921
951
|
# Check both explicit parameter and model configuration
|
|
922
|
-
is_sparse = is_sparse or model_record.learn_args.get(
|
|
952
|
+
is_sparse = is_sparse or model_record.learn_args.get("using", {}).get("sparse")
|
|
923
953
|
if is_sparse:
|
|
924
|
-
vector_db_params[
|
|
954
|
+
vector_db_params["is_sparse"] = True
|
|
925
955
|
if vector_size is not None:
|
|
926
|
-
vector_db_params[
|
|
956
|
+
vector_db_params["vector_size"] = vector_size
|
|
927
957
|
vector_db_name = self._create_persistent_pgvector(vector_db_params)
|
|
928
958
|
|
|
929
959
|
else:
|
|
@@ -931,26 +961,22 @@ class KnowledgeBaseController:
|
|
|
931
961
|
vector_table_name = "default_collection"
|
|
932
962
|
vector_db_name = self._create_persistent_chroma(name)
|
|
933
963
|
# memorize to remove it later
|
|
934
|
-
params[
|
|
964
|
+
params["default_vector_storage"] = vector_db_name
|
|
935
965
|
elif len(storage.parts) != 2:
|
|
936
|
-
raise ValueError(
|
|
966
|
+
raise ValueError("Storage param has to be vector db with table")
|
|
937
967
|
else:
|
|
938
968
|
vector_db_name, vector_table_name = storage.parts
|
|
939
969
|
|
|
940
970
|
# create table in vectordb before creating KB
|
|
941
|
-
self.session.datahub.get(vector_db_name).integration_handler.create_table(
|
|
942
|
-
|
|
943
|
-
)
|
|
944
|
-
vector_database_id = self.session.integration_controller.get(vector_db_name)['id']
|
|
971
|
+
self.session.datahub.get(vector_db_name).integration_handler.create_table(vector_table_name)
|
|
972
|
+
vector_database_id = self.session.integration_controller.get(vector_db_name)["id"]
|
|
945
973
|
|
|
946
974
|
# Store sparse vector settings in params if specified
|
|
947
975
|
if is_sparse:
|
|
948
976
|
params = params or {}
|
|
949
|
-
params[
|
|
950
|
-
'is_sparse': is_sparse
|
|
951
|
-
}
|
|
977
|
+
params["vector_config"] = {"is_sparse": is_sparse}
|
|
952
978
|
if vector_size is not None:
|
|
953
|
-
params[
|
|
979
|
+
params["vector_config"]["vector_size"] = vector_size
|
|
954
980
|
|
|
955
981
|
kb = db.KnowledgeBase(
|
|
956
982
|
name=name,
|
|
@@ -972,7 +998,7 @@ class KnowledgeBaseController:
|
|
|
972
998
|
if self.session.integration_controller.get(vector_store_name):
|
|
973
999
|
return vector_store_name
|
|
974
1000
|
|
|
975
|
-
self.session.integration_controller.add(vector_store_name,
|
|
1001
|
+
self.session.integration_controller.add(vector_store_name, "pgvector", params or {})
|
|
976
1002
|
return vector_store_name
|
|
977
1003
|
|
|
978
1004
|
def _create_persistent_chroma(self, kb_name, engine="chromadb"):
|
|
@@ -990,7 +1016,7 @@ class KnowledgeBaseController:
|
|
|
990
1016
|
self.session.integration_controller.add(vector_store_name, engine, connection_args)
|
|
991
1017
|
return vector_store_name
|
|
992
1018
|
|
|
993
|
-
def _create_embedding_model(self, project_name, engine="openai", params: dict = None, kb_name=
|
|
1019
|
+
def _create_embedding_model(self, project_name, engine="openai", params: dict = None, kb_name=""):
|
|
994
1020
|
"""create a default embedding model for knowledge base, if not specified"""
|
|
995
1021
|
model_name = f"kb_embedding_{kb_name}"
|
|
996
1022
|
|
|
@@ -1002,42 +1028,47 @@ class KnowledgeBaseController:
|
|
|
1002
1028
|
except PredictorRecordNotFound:
|
|
1003
1029
|
pass
|
|
1004
1030
|
|
|
1005
|
-
if
|
|
1006
|
-
|
|
1031
|
+
if params.get("provider", None) not in ("openai", "azure"):
|
|
1032
|
+
# try use litellm
|
|
1033
|
+
KnowledgeBaseTable.call_litellm_embedding(self.session, params, ["test"])
|
|
1034
|
+
return
|
|
1007
1035
|
|
|
1008
|
-
|
|
1036
|
+
if "provider" in params:
|
|
1037
|
+
engine = params.pop("provider").lower()
|
|
1009
1038
|
|
|
1010
|
-
|
|
1011
|
-
engine = 'openai'
|
|
1012
|
-
params['provider'] = 'azure'
|
|
1039
|
+
api_key = get_api_key(engine, params, strict=False) or params.pop("api_key")
|
|
1013
1040
|
|
|
1014
|
-
if engine ==
|
|
1015
|
-
|
|
1016
|
-
|
|
1041
|
+
if engine == "azure_openai":
|
|
1042
|
+
engine = "openai"
|
|
1043
|
+
params["provider"] = "azure"
|
|
1044
|
+
|
|
1045
|
+
if engine == "openai":
|
|
1046
|
+
if "question_column" not in params:
|
|
1047
|
+
params["question_column"] = "content"
|
|
1017
1048
|
if api_key:
|
|
1018
1049
|
params[f"{engine}_api_key"] = api_key
|
|
1019
|
-
|
|
1020
|
-
|
|
1050
|
+
if "api_key" in params:
|
|
1051
|
+
params.pop("api_key")
|
|
1052
|
+
if "base_url" in params:
|
|
1053
|
+
params["api_base"] = params.pop("base_url")
|
|
1021
1054
|
|
|
1022
|
-
params[
|
|
1023
|
-
params[
|
|
1024
|
-
params[
|
|
1055
|
+
params["engine"] = engine
|
|
1056
|
+
params["join_learn_process"] = True
|
|
1057
|
+
params["mode"] = "embedding"
|
|
1025
1058
|
|
|
1026
1059
|
# Include API key if provided.
|
|
1027
1060
|
statement = CreatePredictor(
|
|
1028
1061
|
name=Identifier(parts=[project_name, model_name]),
|
|
1029
1062
|
using=params,
|
|
1030
|
-
targets=[
|
|
1031
|
-
Identifier(parts=[TableField.EMBEDDINGS.value])
|
|
1032
|
-
]
|
|
1063
|
+
targets=[Identifier(parts=[TableField.EMBEDDINGS.value])],
|
|
1033
1064
|
)
|
|
1034
1065
|
|
|
1035
1066
|
command_executor = ExecuteCommands(self.session)
|
|
1036
1067
|
resp = command_executor.answer_create_predictor(statement, project_name)
|
|
1037
1068
|
# check model status
|
|
1038
1069
|
record = resp.data.records[0]
|
|
1039
|
-
if record[
|
|
1040
|
-
raise ValueError(
|
|
1070
|
+
if record["STATUS"] == "error":
|
|
1071
|
+
raise ValueError("Embedding model error:" + record["ERROR"])
|
|
1041
1072
|
return model_name
|
|
1042
1073
|
|
|
1043
1074
|
def delete(self, name: str, project_name: int, if_exists: bool = False) -> None:
|
|
@@ -1064,16 +1095,16 @@ class KnowledgeBaseController:
|
|
|
1064
1095
|
db.session.commit()
|
|
1065
1096
|
|
|
1066
1097
|
# drop objects if they were created automatically
|
|
1067
|
-
if
|
|
1098
|
+
if "default_vector_storage" in kb.params:
|
|
1068
1099
|
try:
|
|
1069
|
-
handler = self.session.datahub.get(kb.params[
|
|
1100
|
+
handler = self.session.datahub.get(kb.params["default_vector_storage"]).integration_handler
|
|
1070
1101
|
handler.drop_table(kb.vector_database_table)
|
|
1071
|
-
self.session.integration_controller.delete(kb.params[
|
|
1102
|
+
self.session.integration_controller.delete(kb.params["default_vector_storage"])
|
|
1072
1103
|
except EntityNotExistsError:
|
|
1073
1104
|
pass
|
|
1074
|
-
if
|
|
1105
|
+
if "created_embedding_model" in kb.params:
|
|
1075
1106
|
try:
|
|
1076
|
-
self.session.model_controller.delete_model(kb.params[
|
|
1107
|
+
self.session.model_controller.delete_model(kb.params["created_embedding_model"], project_name)
|
|
1077
1108
|
except EntityNotExistsError:
|
|
1078
1109
|
pass
|
|
1079
1110
|
|
|
@@ -1104,11 +1135,11 @@ class KnowledgeBaseController:
|
|
|
1104
1135
|
if kb is not None:
|
|
1105
1136
|
table = KnowledgeBaseTable(kb, self.session)
|
|
1106
1137
|
if params:
|
|
1107
|
-
table.model_params = params.get(
|
|
1138
|
+
table.model_params = params.get("model")
|
|
1108
1139
|
|
|
1109
1140
|
# Always configure preprocessing - either from params or default
|
|
1110
|
-
if kb.params and
|
|
1111
|
-
table.configure_preprocessing(kb.params[
|
|
1141
|
+
if kb.params and "preprocessing" in kb.params:
|
|
1142
|
+
table.configure_preprocessing(kb.params["preprocessing"])
|
|
1112
1143
|
else:
|
|
1113
1144
|
table.configure_preprocessing(None) # This ensures default preprocessor is created
|
|
1114
1145
|
|
|
@@ -1124,35 +1155,38 @@ class KnowledgeBaseController:
|
|
|
1124
1155
|
if project_name is not None:
|
|
1125
1156
|
projects = [p for p in projects if p.name == project_name]
|
|
1126
1157
|
|
|
1127
|
-
query = (
|
|
1128
|
-
db.
|
|
1129
|
-
.filter(db.KnowledgeBase.project_id.in_(list([p.id for p in projects])))
|
|
1158
|
+
query = db.session.query(db.KnowledgeBase).filter(
|
|
1159
|
+
db.KnowledgeBase.project_id.in_(list([p.id for p in projects]))
|
|
1130
1160
|
)
|
|
1131
1161
|
|
|
1132
1162
|
data = []
|
|
1133
|
-
project_names = {
|
|
1134
|
-
i.id: i.name
|
|
1135
|
-
for i in project_controller.get_list()
|
|
1136
|
-
}
|
|
1163
|
+
project_names = {i.id: i.name for i in project_controller.get_list()}
|
|
1137
1164
|
|
|
1138
1165
|
for record in query:
|
|
1139
1166
|
vector_database = record.vector_database
|
|
1140
1167
|
embedding_model = record.embedding_model
|
|
1141
1168
|
|
|
1142
|
-
data.append(
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1169
|
+
data.append(
|
|
1170
|
+
{
|
|
1171
|
+
"id": record.id,
|
|
1172
|
+
"name": record.name,
|
|
1173
|
+
"project_id": record.project_id,
|
|
1174
|
+
"project_name": project_names[record.project_id],
|
|
1175
|
+
"embedding_model": embedding_model.name if embedding_model is not None else None,
|
|
1176
|
+
"vector_database": None if vector_database is None else vector_database.name,
|
|
1177
|
+
"vector_database_table": record.vector_database_table,
|
|
1178
|
+
"query_id": record.query_id,
|
|
1179
|
+
"params": record.params,
|
|
1180
|
+
}
|
|
1181
|
+
)
|
|
1153
1182
|
|
|
1154
1183
|
return data
|
|
1155
1184
|
|
|
1185
|
+
def create_index(self, table_name, project_name):
|
|
1186
|
+
project_id = self.session.database_controller.get_project(project_name).id
|
|
1187
|
+
kb_table = self.get_table(table_name, project_id)
|
|
1188
|
+
kb_table.create_index()
|
|
1189
|
+
|
|
1156
1190
|
def update(self, name: str, project_id: int, **kwargs) -> db.KnowledgeBase:
|
|
1157
1191
|
"""
|
|
1158
1192
|
Update a knowledge base record
|