MindsDB 25.4.5.0__py3-none-any.whl → 25.5.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +215 -185
- mindsdb/api/a2a/__init__.py +0 -0
- mindsdb/api/a2a/__main__.py +114 -0
- mindsdb/api/a2a/a2a_client.py +439 -0
- mindsdb/api/a2a/agent.py +308 -0
- mindsdb/api/a2a/common/__init__.py +0 -0
- mindsdb/api/a2a/common/client/__init__.py +4 -0
- mindsdb/api/a2a/common/client/card_resolver.py +21 -0
- mindsdb/api/a2a/common/client/client.py +86 -0
- mindsdb/api/a2a/common/server/__init__.py +4 -0
- mindsdb/api/a2a/common/server/server.py +164 -0
- mindsdb/api/a2a/common/server/task_manager.py +287 -0
- mindsdb/api/a2a/common/server/utils.py +28 -0
- mindsdb/api/a2a/common/types.py +365 -0
- mindsdb/api/a2a/constants.py +9 -0
- mindsdb/api/a2a/run_a2a.py +129 -0
- mindsdb/api/a2a/task_manager.py +594 -0
- mindsdb/api/executor/command_executor.py +49 -28
- mindsdb/api/executor/datahub/classes/response.py +5 -2
- mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +8 -0
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +39 -72
- mindsdb/api/executor/datahub/datanodes/system_tables.py +10 -13
- mindsdb/api/executor/planner/query_planner.py +14 -2
- mindsdb/api/executor/sql_query/result_set.py +185 -52
- mindsdb/api/executor/sql_query/sql_query.py +1 -1
- mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +11 -13
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +8 -10
- mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +5 -44
- mindsdb/api/executor/sql_query/steps/insert_step.py +24 -15
- mindsdb/api/executor/sql_query/steps/join_step.py +1 -1
- mindsdb/api/executor/sql_query/steps/project_step.py +1 -1
- mindsdb/api/executor/sql_query/steps/sql_steps.py +1 -1
- mindsdb/api/executor/sql_query/steps/subselect_step.py +4 -8
- mindsdb/api/executor/sql_query/steps/union_step.py +1 -3
- mindsdb/api/http/initialize.py +118 -85
- mindsdb/api/http/namespaces/analysis.py +17 -4
- mindsdb/api/http/namespaces/file.py +8 -2
- mindsdb/api/http/namespaces/sql.py +13 -27
- mindsdb/api/http/namespaces/tree.py +1 -1
- mindsdb/api/http/start.py +7 -2
- mindsdb/api/mcp/start.py +42 -5
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_packet.py +0 -1
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +52 -19
- mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +8 -10
- mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +54 -38
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +86 -123
- mindsdb/api/mysql/mysql_proxy/utilities/dump.py +351 -0
- mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -4
- mindsdb/api/postgres/postgres_proxy/executor/executor.py +1 -1
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_formats.py +2 -2
- mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +5 -6
- mindsdb/integrations/handlers/altibase_handler/altibase_handler.py +26 -27
- mindsdb/integrations/handlers/altibase_handler/connection_args.py +13 -13
- mindsdb/integrations/handlers/altibase_handler/tests/test_altibase_handler.py +8 -8
- mindsdb/integrations/handlers/altibase_handler/tests/test_altibase_handler_dsn.py +13 -13
- mindsdb/integrations/handlers/anthropic_handler/__init__.py +2 -2
- mindsdb/integrations/handlers/anthropic_handler/anthropic_handler.py +1 -3
- mindsdb/integrations/handlers/aurora_handler/aurora_handler.py +1 -0
- mindsdb/integrations/handlers/autosklearn_handler/autosklearn_handler.py +1 -1
- mindsdb/integrations/handlers/autosklearn_handler/config.py +0 -1
- mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +1 -1
- mindsdb/integrations/handlers/bigquery_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/bigquery_handler/tests/test_bigquery_handler.py +1 -1
- mindsdb/integrations/handlers/binance_handler/binance_handler.py +1 -0
- mindsdb/integrations/handlers/binance_handler/binance_tables.py +3 -4
- mindsdb/integrations/handlers/byom_handler/__init__.py +0 -1
- mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/ckan_handler/ckan_handler.py +3 -0
- mindsdb/integrations/handlers/clickhouse_handler/__init__.py +1 -1
- mindsdb/integrations/handlers/cloud_spanner_handler/tests/test_cloud_spanner_handler.py +0 -2
- mindsdb/integrations/handlers/cloud_sql_handler/cloud_sql_handler.py +0 -1
- mindsdb/integrations/handlers/cohere_handler/__init__.py +1 -1
- mindsdb/integrations/handlers/cohere_handler/cohere_handler.py +11 -13
- mindsdb/integrations/handlers/confluence_handler/confluence_tables.py +6 -0
- mindsdb/integrations/handlers/databend_handler/connection_args.py +1 -1
- mindsdb/integrations/handlers/databend_handler/databend_handler.py +4 -4
- mindsdb/integrations/handlers/databend_handler/tests/__init__.py +0 -1
- mindsdb/integrations/handlers/databend_handler/tests/test_databend_handler.py +1 -1
- mindsdb/integrations/handlers/derby_handler/connection_args.py +1 -1
- mindsdb/integrations/handlers/derby_handler/derby_handler.py +14 -22
- mindsdb/integrations/handlers/derby_handler/tests/test_derby_handler.py +6 -6
- mindsdb/integrations/handlers/discord_handler/discord_handler.py +5 -5
- mindsdb/integrations/handlers/discord_handler/discord_tables.py +3 -3
- mindsdb/integrations/handlers/discord_handler/tests/test_discord.py +5 -3
- mindsdb/integrations/handlers/dockerhub_handler/dockerhub.py +3 -3
- mindsdb/integrations/handlers/dockerhub_handler/dockerhub_handler.py +2 -2
- mindsdb/integrations/handlers/dockerhub_handler/dockerhub_tables.py +57 -54
- mindsdb/integrations/handlers/dremio_handler/__init__.py +2 -2
- mindsdb/integrations/handlers/druid_handler/__init__.py +1 -1
- mindsdb/integrations/handlers/druid_handler/druid_handler.py +2 -2
- mindsdb/integrations/handlers/edgelessdb_handler/tests/test_edgelessdb_handler.py +9 -9
- mindsdb/integrations/handlers/email_handler/email_client.py +1 -1
- mindsdb/integrations/handlers/email_handler/email_ingestor.py +1 -1
- mindsdb/integrations/handlers/email_handler/email_tables.py +0 -1
- mindsdb/integrations/handlers/email_handler/settings.py +0 -1
- mindsdb/integrations/handlers/eventstoredb_handler/eventstoredb_handler.py +2 -1
- mindsdb/integrations/handlers/firebird_handler/firebird_handler.py +1 -1
- mindsdb/integrations/handlers/flaml_handler/flaml_handler.py +9 -9
- mindsdb/integrations/handlers/frappe_handler/frappe_client.py +5 -5
- mindsdb/integrations/handlers/frappe_handler/frappe_handler.py +6 -5
- mindsdb/integrations/handlers/frappe_handler/frappe_tables.py +2 -2
- mindsdb/integrations/handlers/github_handler/connection_args.py +2 -2
- mindsdb/integrations/handlers/github_handler/github_handler.py +1 -8
- mindsdb/integrations/handlers/github_handler/github_tables.py +13 -24
- mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py +2 -1
- mindsdb/integrations/handlers/gitlab_handler/gitlab_tables.py +1 -4
- mindsdb/integrations/handlers/gmail_handler/gmail_handler.py +6 -13
- mindsdb/integrations/handlers/gmail_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/google_analytics_handler/requirements.txt +2 -1
- mindsdb/integrations/handlers/google_books_handler/google_books_handler.py +2 -1
- mindsdb/integrations/handlers/google_books_handler/google_books_tables.py +0 -3
- mindsdb/integrations/handlers/google_books_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/google_calendar_handler/google_calendar_handler.py +4 -4
- mindsdb/integrations/handlers/google_calendar_handler/google_calendar_tables.py +2 -6
- mindsdb/integrations/handlers/google_calendar_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_handler.py +3 -2
- mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_tables.py +0 -3
- mindsdb/integrations/handlers/google_content_shopping_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/google_fit_handler/google_fit_handler.py +10 -12
- mindsdb/integrations/handlers/google_fit_handler/google_fit_tables.py +11 -13
- mindsdb/integrations/handlers/google_fit_handler/requirements.txt +2 -0
- mindsdb/integrations/handlers/google_search_handler/google_search_handler.py +2 -1
- mindsdb/integrations/handlers/google_search_handler/google_search_tables.py +0 -3
- mindsdb/integrations/handlers/google_search_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/groq_handler/__init__.py +3 -3
- mindsdb/integrations/handlers/hackernews_handler/hn_handler.py +5 -7
- mindsdb/integrations/handlers/hackernews_handler/hn_table.py +6 -7
- mindsdb/integrations/handlers/hive_handler/tests/test_hive_handler.py +1 -1
- mindsdb/integrations/handlers/hsqldb_handler/connection_args.py +6 -6
- mindsdb/integrations/handlers/hsqldb_handler/hsqldb_handler.py +4 -3
- mindsdb/integrations/handlers/huggingface_api_handler/exceptions.py +1 -1
- mindsdb/integrations/handlers/huggingface_api_handler/huggingface_api_handler.py +1 -8
- mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +6 -6
- mindsdb/integrations/handlers/huggingface_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +1 -1
- mindsdb/integrations/handlers/ignite_handler/ignite_handler.py +2 -1
- mindsdb/integrations/handlers/impala_handler/impala_handler.py +9 -12
- mindsdb/integrations/handlers/impala_handler/tests/test_impala_handler.py +11 -11
- mindsdb/integrations/handlers/influxdb_handler/influxdb_handler.py +10 -13
- mindsdb/integrations/handlers/influxdb_handler/influxdb_tables.py +20 -20
- mindsdb/integrations/handlers/informix_handler/__about__.py +8 -8
- mindsdb/integrations/handlers/informix_handler/__init__.py +12 -5
- mindsdb/integrations/handlers/informix_handler/informix_handler.py +99 -133
- mindsdb/integrations/handlers/informix_handler/tests/test_informix_handler.py +13 -11
- mindsdb/integrations/handlers/ingres_handler/__about__.py +0 -1
- mindsdb/integrations/handlers/ingres_handler/ingres_handler.py +1 -0
- mindsdb/integrations/handlers/jira_handler/jira_handler.archived.py +75 -0
- mindsdb/integrations/handlers/jira_handler/jira_handler.py +113 -38
- mindsdb/integrations/handlers/jira_handler/jira_tables.py +229 -0
- mindsdb/integrations/handlers/jira_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/kinetica_handler/__init__.py +0 -1
- mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +4 -4
- mindsdb/integrations/handlers/langchain_handler/tools.py +9 -10
- mindsdb/integrations/handlers/leonardoai_handler/__init__.py +1 -1
- mindsdb/integrations/handlers/lightfm_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/lightwood_handler/functions.py +2 -2
- mindsdb/integrations/handlers/lightwood_handler/lightwood_handler.py +0 -3
- mindsdb/integrations/handlers/lightwood_handler/requirements.txt +4 -4
- mindsdb/integrations/handlers/lightwood_handler/tests/test_lightwood_handler.py +11 -11
- mindsdb/integrations/handlers/lindorm_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/llama_index_handler/llama_index_handler.py +4 -4
- mindsdb/integrations/handlers/llama_index_handler/settings.py +10 -9
- mindsdb/integrations/handlers/materialize_handler/tests/test_materialize_handler.py +8 -10
- mindsdb/integrations/handlers/matrixone_handler/matrixone_handler.py +4 -4
- mindsdb/integrations/handlers/matrixone_handler/tests/test_matrixone_handler.py +8 -9
- mindsdb/integrations/handlers/maxdb_handler/connection_args.py +25 -25
- mindsdb/integrations/handlers/maxdb_handler/maxdb_handler.py +1 -0
- mindsdb/integrations/handlers/mediawiki_handler/mediawiki_handler.py +3 -2
- mindsdb/integrations/handlers/mediawiki_handler/mediawiki_tables.py +1 -1
- mindsdb/integrations/handlers/mendeley_handler/__about__.py +1 -1
- mindsdb/integrations/handlers/mendeley_handler/__init__.py +2 -2
- mindsdb/integrations/handlers/mendeley_handler/mendeley_handler.py +48 -56
- mindsdb/integrations/handlers/mendeley_handler/mendeley_tables.py +24 -29
- mindsdb/integrations/handlers/mendeley_handler/tests/test_mendeley_handler.py +19 -17
- mindsdb/integrations/handlers/merlion_handler/merlion_handler.py +5 -4
- mindsdb/integrations/handlers/minds_endpoint_handler/__init__.py +3 -3
- mindsdb/integrations/handlers/mlflow_handler/mlflow_handler.py +58 -36
- mindsdb/integrations/handlers/monetdb_handler/__about__.py +8 -8
- mindsdb/integrations/handlers/monetdb_handler/__init__.py +15 -5
- mindsdb/integrations/handlers/monetdb_handler/connection_args.py +17 -18
- mindsdb/integrations/handlers/monetdb_handler/monetdb_handler.py +40 -57
- mindsdb/integrations/handlers/monetdb_handler/tests/test_monetdb_handler.py +7 -8
- mindsdb/integrations/handlers/monetdb_handler/utils/monet_get_id.py +13 -14
- mindsdb/integrations/handlers/monkeylearn_handler/__about__.py +1 -1
- mindsdb/integrations/handlers/monkeylearn_handler/__init__.py +1 -1
- mindsdb/integrations/handlers/monkeylearn_handler/monkeylearn_handler.py +2 -5
- mindsdb/integrations/handlers/ms_one_drive_handler/ms_graph_api_one_drive_client.py +1 -0
- mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_handler.py +1 -1
- mindsdb/integrations/handlers/ms_one_drive_handler/requirements.txt +2 -0
- mindsdb/integrations/handlers/ms_teams_handler/ms_graph_api_teams_client.py +23 -23
- mindsdb/integrations/handlers/ms_teams_handler/ms_teams_handler.py +3 -3
- mindsdb/integrations/handlers/ms_teams_handler/ms_teams_tables.py +10 -5
- mindsdb/integrations/handlers/ms_teams_handler/requirements.txt +3 -1
- mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +73 -8
- mindsdb/integrations/handlers/mysql_handler/__about__.py +8 -8
- mindsdb/integrations/handlers/mysql_handler/__init__.py +15 -5
- mindsdb/integrations/handlers/mysql_handler/connection_args.py +43 -47
- mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +101 -34
- mindsdb/integrations/handlers/mysql_handler/settings.py +15 -13
- mindsdb/integrations/handlers/neuralforecast_handler/neuralforecast_handler.py +1 -1
- mindsdb/integrations/handlers/newsapi_handler/newsapi_handler.py +1 -1
- mindsdb/integrations/handlers/newsapi_handler/tests/test_newsapi_handler.py +4 -4
- mindsdb/integrations/handlers/nuo_jdbc_handler/connection_args.py +2 -2
- mindsdb/integrations/handlers/nuo_jdbc_handler/nuo_jdbc_handler.py +28 -36
- mindsdb/integrations/handlers/nuo_jdbc_handler/tests/test_nuo_handler.py +5 -5
- mindsdb/integrations/handlers/oceanbase_handler/oceanbase_handler.py +0 -1
- mindsdb/integrations/handlers/oceanbase_handler/tests/test_oceanbase_handler.py +8 -10
- mindsdb/integrations/handlers/ollama_handler/ollama_handler.py +3 -3
- mindsdb/integrations/handlers/openai_handler/openai_handler.py +5 -4
- mindsdb/integrations/handlers/opengauss_handler/tests/test_opengauss_handler.py +1 -2
- mindsdb/integrations/handlers/openstreetmap_handler/__init__.py +7 -7
- mindsdb/integrations/handlers/oracle_handler/connection_args.py +6 -0
- mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +77 -11
- mindsdb/integrations/handlers/orioledb_handler/tests/test_orioledb_handler.py +8 -10
- mindsdb/integrations/handlers/palm_handler/__about__.py +1 -1
- mindsdb/integrations/handlers/palm_handler/__init__.py +1 -1
- mindsdb/integrations/handlers/palm_handler/palm_handler.py +1 -3
- mindsdb/integrations/handlers/paypal_handler/paypal_handler.py +2 -2
- mindsdb/integrations/handlers/paypal_handler/paypal_tables.py +15 -14
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +53 -10
- mindsdb/integrations/handlers/phoenix_handler/__init__.py +1 -1
- mindsdb/integrations/handlers/phoenix_handler/phoenix_handler.py +1 -0
- mindsdb/integrations/handlers/pinot_handler/__init__.py +1 -1
- mindsdb/integrations/handlers/pinot_handler/pinot_handler.py +3 -2
- mindsdb/integrations/handlers/plaid_handler/plaid_handler.py +13 -13
- mindsdb/integrations/handlers/plaid_handler/plaid_tables.py +10 -12
- mindsdb/integrations/handlers/plaid_handler/utils.py +4 -6
- mindsdb/integrations/handlers/planetscale_handler/planetscale_handler.py +1 -4
- mindsdb/integrations/handlers/portkey_handler/__init__.py +2 -2
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +105 -24
- mindsdb/integrations/handlers/postgres_handler/tests/test_postgres_handler.py +11 -6
- mindsdb/integrations/handlers/questdb_handler/questdb_handler.py +1 -2
- mindsdb/integrations/handlers/questdb_handler/tests/test_questdb_handler.py +2 -3
- mindsdb/integrations/handlers/quickbooks_handler/quickbooks_handler.py +6 -8
- mindsdb/integrations/handlers/quickbooks_handler/quickbooks_table.py +10 -10
- mindsdb/integrations/handlers/rag_handler/ingest.py +2 -2
- mindsdb/integrations/handlers/rag_handler/rag_handler.py +1 -1
- mindsdb/integrations/handlers/rag_handler/settings.py +1 -1
- mindsdb/integrations/handlers/reddit_handler/reddit_handler.py +2 -7
- mindsdb/integrations/handlers/reddit_handler/reddit_tables.py +2 -3
- mindsdb/integrations/handlers/replicate_handler/replicate_handler.py +6 -6
- mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_handler.py +1 -2
- mindsdb/integrations/handlers/rocket_chat_handler/rocket_chat_tables.py +0 -3
- mindsdb/integrations/handlers/rockset_handler/connection_args.py +14 -14
- mindsdb/integrations/handlers/rockset_handler/tests/test_rockset_handler.py +1 -0
- mindsdb/integrations/handlers/scylla_handler/scylla_handler.py +6 -5
- mindsdb/integrations/handlers/sendinblue_handler/sendinblue_handler.py +2 -1
- mindsdb/integrations/handlers/sendinblue_handler/sendinblue_tables.py +16 -16
- mindsdb/integrations/handlers/sentence_transformers_handler/__init__.py +1 -1
- mindsdb/integrations/handlers/sheets_handler/connection_args.py +1 -1
- mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +7 -6
- mindsdb/integrations/handlers/shopify_handler/shopify_tables.py +38 -41
- mindsdb/integrations/handlers/singlestore_handler/__about__.py +1 -1
- mindsdb/integrations/handlers/singlestore_handler/__init__.py +0 -1
- mindsdb/integrations/handlers/singlestore_handler/singlestore_handler.py +1 -0
- mindsdb/integrations/handlers/singlestore_handler/tests/test_singlestore_handler.py +3 -3
- mindsdb/integrations/handlers/slack_handler/__init__.py +3 -3
- mindsdb/integrations/handlers/snowflake_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +100 -6
- mindsdb/integrations/handlers/solr_handler/connection_args.py +7 -7
- mindsdb/integrations/handlers/solr_handler/solr_handler.py +2 -1
- mindsdb/integrations/handlers/solr_handler/tests/test_solr_handler.py +2 -1
- mindsdb/integrations/handlers/sqlany_handler/sqlany_handler.py +3 -2
- mindsdb/integrations/handlers/sqlite_handler/sqlite_handler.py +1 -0
- mindsdb/integrations/handlers/sqreamdb_handler/connection_args.py +1 -1
- mindsdb/integrations/handlers/sqreamdb_handler/sqreamdb_handler.py +15 -20
- mindsdb/integrations/handlers/sqreamdb_handler/tests/test_sqreamdb_handler.py +4 -4
- mindsdb/integrations/handlers/stabilityai_handler/__init__.py +1 -1
- mindsdb/integrations/handlers/starrocks_handler/starrocks_handler.py +0 -1
- mindsdb/integrations/handlers/starrocks_handler/tests/test_starrocks_handler.py +8 -10
- mindsdb/integrations/handlers/statsforecast_handler/statsforecast_handler.py +2 -2
- mindsdb/integrations/handlers/strava_handler/strava_handler.py +4 -8
- mindsdb/integrations/handlers/strava_handler/strava_tables.py +22 -30
- mindsdb/integrations/handlers/stripe_handler/stripe_handler.py +3 -2
- mindsdb/integrations/handlers/stripe_handler/stripe_tables.py +11 -27
- mindsdb/integrations/handlers/supabase_handler/tests/test_supabase_handler.py +1 -1
- mindsdb/integrations/handlers/surrealdb_handler/surrealdb_handler.py +4 -4
- mindsdb/integrations/handlers/tdengine_handler/tdengine_handler.py +25 -27
- mindsdb/integrations/handlers/tdengine_handler/tests/test_tdengine_handler.py +8 -8
- mindsdb/integrations/handlers/tidb_handler/tests/test_tidb_handler.py +1 -2
- mindsdb/integrations/handlers/timegpt_handler/timegpt_handler.py +5 -5
- mindsdb/integrations/handlers/tpot_handler/tpot_handler.py +21 -26
- mindsdb/integrations/handlers/trino_handler/trino_handler.py +14 -14
- mindsdb/integrations/handlers/twitter_handler/twitter_handler.py +2 -4
- mindsdb/integrations/handlers/unify_handler/tests/test_unify_handler.py +7 -8
- mindsdb/integrations/handlers/unify_handler/unify_handler.py +9 -9
- mindsdb/integrations/handlers/vertex_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/vertex_handler/vertex_client.py +1 -1
- mindsdb/integrations/handlers/vertica_handler/tests/test_vertica_handler.py +11 -11
- mindsdb/integrations/handlers/vertica_handler/vertica_handler.py +11 -14
- mindsdb/integrations/handlers/vitess_handler/tests/test_vitess_handler.py +9 -11
- mindsdb/integrations/handlers/vitess_handler/vitess_handler.py +0 -1
- mindsdb/integrations/handlers/web_handler/web_handler.py +1 -0
- mindsdb/integrations/handlers/whatsapp_handler/__init__.py +3 -3
- mindsdb/integrations/handlers/writer_handler/evaluate.py +1 -1
- mindsdb/integrations/handlers/writer_handler/settings.py +0 -1
- mindsdb/integrations/handlers/writer_handler/writer_handler.py +1 -0
- mindsdb/integrations/handlers/youtube_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/youtube_handler/youtube_handler.py +5 -5
- mindsdb/integrations/handlers/youtube_handler/youtube_tables.py +26 -27
- mindsdb/integrations/handlers/yugabyte_handler/tests/test_yugabyte_handler.py +3 -3
- mindsdb/integrations/handlers/yugabyte_handler/yugabyte_handler.py +0 -6
- mindsdb/integrations/libs/response.py +67 -52
- mindsdb/integrations/libs/vectordatabase_handler.py +6 -0
- mindsdb/integrations/utilities/files/file_reader.py +5 -2
- mindsdb/integrations/utilities/handler_utils.py +15 -3
- mindsdb/integrations/utilities/handlers/api_utilities/__init__.py +0 -1
- mindsdb/integrations/utilities/handlers/auth_utilities/__init__.py +0 -2
- mindsdb/integrations/utilities/utils.py +3 -3
- mindsdb/interfaces/agents/agents_controller.py +164 -1
- mindsdb/interfaces/agents/constants.py +29 -2
- mindsdb/interfaces/agents/langchain_agent.py +18 -8
- mindsdb/interfaces/agents/mindsdb_database_agent.py +101 -2
- mindsdb/interfaces/database/projects.py +1 -7
- mindsdb/interfaces/functions/controller.py +11 -14
- mindsdb/interfaces/functions/to_markdown.py +9 -124
- mindsdb/interfaces/knowledge_base/controller.py +47 -19
- mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +41 -15
- mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +434 -0
- mindsdb/interfaces/knowledge_base/preprocessing/models.py +54 -0
- mindsdb/interfaces/knowledge_base/utils.py +10 -15
- mindsdb/interfaces/model/model_controller.py +0 -2
- mindsdb/interfaces/query_context/context_controller.py +66 -10
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +190 -0
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +92 -0
- mindsdb/interfaces/skills/skill_tool.py +202 -57
- mindsdb/interfaces/skills/sql_agent.py +238 -28
- mindsdb/interfaces/storage/fs.py +1 -0
- mindsdb/interfaces/variables/__init__.py +0 -0
- mindsdb/interfaces/variables/variables_controller.py +97 -0
- mindsdb/migrations/env.py +5 -7
- mindsdb/migrations/migrate.py +47 -9
- mindsdb/migrations/versions/2025-05-21_9f150e4f9a05_checkpoint_1.py +360 -0
- mindsdb/utilities/config.py +333 -220
- mindsdb/utilities/context.py +1 -1
- mindsdb/utilities/functions.py +0 -36
- mindsdb/utilities/langfuse.py +19 -10
- mindsdb/utilities/otel/__init__.py +9 -193
- mindsdb/utilities/otel/metric_handlers/__init__.py +5 -1
- mindsdb/utilities/otel/prepare.py +198 -0
- mindsdb/utilities/sql.py +83 -0
- mindsdb/utilities/starters.py +13 -0
- {mindsdb-25.4.5.0.dist-info → mindsdb-25.5.4.0.dist-info}/METADATA +351 -338
- {mindsdb-25.4.5.0.dist-info → mindsdb-25.5.4.0.dist-info}/RECORD +348 -322
- {mindsdb-25.4.5.0.dist-info → mindsdb-25.5.4.0.dist-info}/WHEEL +1 -1
- mindsdb/api/mysql/mysql_proxy/classes/sql_statement_parser.py +0 -151
- mindsdb/integrations/handlers/monkeylearn_handler/requirements.txt +0 -1
- {mindsdb-25.4.5.0.dist-info → mindsdb-25.5.4.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.4.5.0.dist-info → mindsdb-25.5.4.0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import enum
|
|
2
2
|
import inspect
|
|
3
|
+
|
|
3
4
|
from dataclasses import dataclass
|
|
4
5
|
from collections import defaultdict
|
|
5
6
|
from typing import List, Dict, Optional
|
|
@@ -14,7 +15,7 @@ from mindsdb.utilities.config import config
|
|
|
14
15
|
from mindsdb.interfaces.storage import db
|
|
15
16
|
from mindsdb.interfaces.skills.sql_agent import SQLAgent
|
|
16
17
|
from mindsdb.integrations.libs.vectordatabase_handler import TableField
|
|
17
|
-
|
|
18
|
+
from mindsdb.interfaces.agents.constants import DEFAULT_TEXT2SQL_DATABASE
|
|
18
19
|
|
|
19
20
|
_DEFAULT_TOP_K_SIMILARITY_SEARCH = 5
|
|
20
21
|
_MAX_CACHE_SIZE = 1000
|
|
@@ -120,7 +121,6 @@ class SkillToolController:
|
|
|
120
121
|
try:
|
|
121
122
|
from mindsdb.interfaces.agents.mindsdb_database_agent import MindsDBSQL
|
|
122
123
|
from mindsdb.interfaces.skills.custom.text2sql.mindsdb_sql_toolkit import MindsDBSQLToolkit
|
|
123
|
-
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
|
|
124
124
|
except ImportError:
|
|
125
125
|
raise ImportError(
|
|
126
126
|
'To use the text-to-SQL skill, please install langchain with `pip install mindsdb[langchain]`')
|
|
@@ -132,75 +132,220 @@ class SkillToolController:
|
|
|
132
132
|
return f'`{name}`'
|
|
133
133
|
|
|
134
134
|
tables_list = []
|
|
135
|
+
knowledge_bases_list = []
|
|
136
|
+
ignore_knowledge_bases_list = []
|
|
137
|
+
|
|
138
|
+
# Track databases extracted from dot notation
|
|
139
|
+
extracted_databases = set()
|
|
140
|
+
|
|
141
|
+
# Initialize knowledge_base_database with default value
|
|
142
|
+
knowledge_base_database = DEFAULT_TEXT2SQL_DATABASE # Default to mindsdb project
|
|
143
|
+
|
|
144
|
+
# First pass: collect all database and knowledge base parameters
|
|
135
145
|
for skill in skills:
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
146
|
+
# Update knowledge_base_database if specified in any skill
|
|
147
|
+
if skill.params.get('knowledge_base_database'):
|
|
148
|
+
knowledge_base_database = skill.params.get('knowledge_base_database')
|
|
149
|
+
|
|
150
|
+
# Extract databases from include_tables with dot notation
|
|
151
|
+
if skill.params.get('include_tables'):
|
|
152
|
+
include_tables = skill.params.get('include_tables')
|
|
153
|
+
if isinstance(include_tables, str):
|
|
154
|
+
include_tables = [t.strip() for t in include_tables.split(',')]
|
|
155
|
+
|
|
156
|
+
# Extract database names from dot notation
|
|
157
|
+
for table in include_tables:
|
|
158
|
+
if '.' in table:
|
|
159
|
+
db_name = table.split('.')[0]
|
|
160
|
+
extracted_databases.add(db_name)
|
|
161
|
+
|
|
162
|
+
# Extract databases from include_knowledge_bases with dot notation
|
|
163
|
+
if skill.params.get('include_knowledge_bases'):
|
|
164
|
+
include_kbs = skill.params.get('include_knowledge_bases')
|
|
165
|
+
if isinstance(include_kbs, str):
|
|
166
|
+
include_kbs = [kb.strip() for kb in include_kbs.split(',')]
|
|
167
|
+
|
|
168
|
+
# Extract database names from dot notation
|
|
169
|
+
for kb in include_kbs:
|
|
170
|
+
if '.' in kb:
|
|
171
|
+
db_name = kb.split('.')[0]
|
|
172
|
+
if db_name != knowledge_base_database:
|
|
173
|
+
# Only update if it's different from the default
|
|
174
|
+
knowledge_base_database = db_name
|
|
175
|
+
|
|
176
|
+
# Second pass: collect all tables and knowledge base restrictions
|
|
177
|
+
for skill in skills:
|
|
178
|
+
# Get database for tables (this is an actual database connection)
|
|
179
|
+
database = skill.params.get('database', DEFAULT_TEXT2SQL_DATABASE)
|
|
180
|
+
|
|
181
|
+
# Add databases extracted from dot notation if no explicit database is provided
|
|
182
|
+
if not database and extracted_databases:
|
|
183
|
+
# Use the first extracted database if no explicit database is provided
|
|
184
|
+
database = next(iter(extracted_databases))
|
|
185
|
+
# Update the skill params with the extracted database
|
|
186
|
+
skill.params['database'] = database
|
|
187
|
+
|
|
188
|
+
# Extract knowledge base restrictions if they exist in the skill params
|
|
189
|
+
if skill.params.get('include_knowledge_bases'):
|
|
190
|
+
# Convert to list if it's a string
|
|
191
|
+
include_kbs = skill.params.get('include_knowledge_bases')
|
|
192
|
+
if isinstance(include_kbs, str):
|
|
193
|
+
include_kbs = [kb.strip() for kb in include_kbs.split(',')]
|
|
194
|
+
|
|
195
|
+
# Process each knowledge base name
|
|
196
|
+
for kb in include_kbs:
|
|
197
|
+
# If it doesn't have a dot, prefix it with the knowledge_base_database
|
|
198
|
+
if '.' not in kb:
|
|
199
|
+
knowledge_bases_list.append(f"{knowledge_base_database}.{kb}")
|
|
200
|
+
else:
|
|
201
|
+
knowledge_bases_list.append(kb)
|
|
202
|
+
|
|
203
|
+
# Collect ignore_knowledge_bases
|
|
204
|
+
if skill.params.get('ignore_knowledge_bases'):
|
|
205
|
+
# Convert to list if it's a string
|
|
206
|
+
ignore_kbs = skill.params.get('ignore_knowledge_bases')
|
|
207
|
+
if isinstance(ignore_kbs, str):
|
|
208
|
+
ignore_kbs = [kb.strip() for kb in ignore_kbs.split(',')]
|
|
209
|
+
|
|
210
|
+
# Process each knowledge base name to ignore
|
|
211
|
+
for kb in ignore_kbs:
|
|
212
|
+
# If it doesn't have a dot, prefix it with the knowledge_base_database
|
|
213
|
+
if '.' not in kb:
|
|
214
|
+
ignore_knowledge_bases_list.append(f"{knowledge_base_database}.{kb}")
|
|
215
|
+
else:
|
|
216
|
+
ignore_knowledge_bases_list.append(kb)
|
|
217
|
+
|
|
218
|
+
# Skip if no database specified
|
|
219
|
+
if not database:
|
|
154
220
|
continue
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
221
|
+
|
|
222
|
+
# Process include_tables with dot notation
|
|
223
|
+
if skill.params.get('include_tables'):
|
|
224
|
+
include_tables = skill.params.get('include_tables')
|
|
225
|
+
if isinstance(include_tables, str):
|
|
226
|
+
include_tables = [t.strip() for t in include_tables.split(',')]
|
|
227
|
+
|
|
228
|
+
for table in include_tables:
|
|
229
|
+
# If table already has a database prefix, use it as is
|
|
230
|
+
if '.' in table:
|
|
231
|
+
# Check if the table already has backticks
|
|
232
|
+
if '`' in table:
|
|
233
|
+
tables_list.append(table)
|
|
234
|
+
else:
|
|
235
|
+
# Apply escape_table_name only to the table part
|
|
236
|
+
parts = table.split('.')
|
|
237
|
+
if len(parts) == 2:
|
|
238
|
+
# Format: database.table
|
|
239
|
+
tables_list.append(f"{parts[0]}.{escape_table_name(parts[1])}")
|
|
240
|
+
elif len(parts) == 3:
|
|
241
|
+
# Format: database.schema.table
|
|
242
|
+
tables_list.append(f"{parts[0]}.{parts[1]}.{escape_table_name(parts[2])}")
|
|
243
|
+
else:
|
|
244
|
+
# Unusual format, escape the whole thing
|
|
245
|
+
tables_list.append(escape_table_name(table))
|
|
159
246
|
else:
|
|
160
|
-
|
|
247
|
+
# Otherwise, prefix with the database
|
|
248
|
+
tables_list.append(f"{database}.{escape_table_name(table)}")
|
|
249
|
+
|
|
250
|
+
# Skip further table processing if include_tables is specified
|
|
251
|
+
continue
|
|
252
|
+
|
|
253
|
+
restriction_on_tables = skill.restriction_on_tables
|
|
254
|
+
|
|
255
|
+
if restriction_on_tables is None and database:
|
|
256
|
+
try:
|
|
257
|
+
handler = command_executor.session.integration_controller.get_data_handler(database)
|
|
258
|
+
if 'all' in inspect.signature(handler.get_tables).parameters:
|
|
259
|
+
response = handler.get_tables(all=True)
|
|
260
|
+
else:
|
|
261
|
+
response = handler.get_tables()
|
|
262
|
+
# no restrictions
|
|
263
|
+
columns = [c.lower() for c in response.data_frame.columns]
|
|
264
|
+
name_idx = columns.index('table_name') if 'table_name' in columns else 0
|
|
265
|
+
|
|
266
|
+
if 'table_schema' in response.data_frame.columns:
|
|
267
|
+
for _, row in response.data_frame.iterrows():
|
|
268
|
+
tables_list.append(f"{database}.{row['table_schema']}.{escape_table_name(row[name_idx])}")
|
|
269
|
+
else:
|
|
270
|
+
for table_name in response.data_frame.iloc[:, name_idx]:
|
|
271
|
+
tables_list.append(f"{database}.{escape_table_name(table_name)}")
|
|
272
|
+
except Exception as e:
|
|
273
|
+
logger.warning(f"Could not get tables from database {database}: {str(e)}")
|
|
274
|
+
continue
|
|
275
|
+
|
|
276
|
+
# Handle table restrictions
|
|
277
|
+
if restriction_on_tables and database:
|
|
278
|
+
for schema_name, tables in restriction_on_tables.items():
|
|
279
|
+
for table in tables:
|
|
280
|
+
# Check if the table already has dot notation (e.g., 'postgresql_conn.home_rentals')
|
|
281
|
+
if '.' in table:
|
|
282
|
+
# Table already has database prefix, add it directly
|
|
283
|
+
tables_list.append(escape_table_name(table))
|
|
284
|
+
else:
|
|
285
|
+
# No dot notation, apply schema and database as needed
|
|
286
|
+
if schema_name is None:
|
|
287
|
+
tables_list.append(f'{database}.{escape_table_name(table)}')
|
|
288
|
+
else:
|
|
289
|
+
tables_list.append(f'{database}.{schema_name}.{escape_table_name(table)}')
|
|
290
|
+
continue
|
|
291
|
+
|
|
292
|
+
# Remove duplicates from lists
|
|
293
|
+
tables_list = list(set(tables_list))
|
|
294
|
+
knowledge_bases_list = list(set(knowledge_bases_list))
|
|
295
|
+
ignore_knowledge_bases_list = list(set(ignore_knowledge_bases_list))
|
|
296
|
+
|
|
297
|
+
# Determine knowledge base parameters to pass to SQLAgent
|
|
298
|
+
include_knowledge_bases = knowledge_bases_list if knowledge_bases_list else None
|
|
299
|
+
ignore_knowledge_bases = ignore_knowledge_bases_list if ignore_knowledge_bases_list else None
|
|
300
|
+
|
|
301
|
+
# If both include and ignore lists exist, include takes precedence
|
|
302
|
+
if include_knowledge_bases:
|
|
303
|
+
ignore_knowledge_bases = None
|
|
304
|
+
|
|
305
|
+
# # Get all databases from skills and extracted databases
|
|
306
|
+
# all_databases = list(set([s.params.get('database', DEFAULT_TEXT2SQL_DATABASE) for s in skills if s.params.get('database')] + list(extracted_databases)))
|
|
307
|
+
#
|
|
308
|
+
#
|
|
309
|
+
# # If no databases were specified or extracted, use 'mindsdb' as a default
|
|
310
|
+
# if not all_databases:
|
|
311
|
+
# all_databases = [DEFAULT_TEXT2SQL_DATABASE]
|
|
312
|
+
#
|
|
313
|
+
|
|
314
|
+
all_databases = []
|
|
315
|
+
# Filter out None values
|
|
316
|
+
all_databases = [db for db in all_databases if db is not None]
|
|
317
|
+
|
|
318
|
+
# Create a databases_struct dictionary that includes all extracted databases
|
|
319
|
+
databases_struct = {}
|
|
320
|
+
|
|
321
|
+
# First, add databases from skills with explicit database parameters
|
|
322
|
+
for skill in skills:
|
|
323
|
+
if skill.params.get('database'):
|
|
324
|
+
databases_struct[skill.params['database']] = skill.restriction_on_tables
|
|
325
|
+
|
|
326
|
+
# Then, add all extracted databases with no restrictions
|
|
327
|
+
for db_name in extracted_databases:
|
|
328
|
+
if db_name not in databases_struct:
|
|
329
|
+
databases_struct[db_name] = None
|
|
161
330
|
|
|
162
331
|
sql_agent = SQLAgent(
|
|
163
332
|
command_executor=command_executor,
|
|
164
|
-
databases=
|
|
165
|
-
databases_struct=
|
|
166
|
-
skill.params['database']: skill.restriction_on_tables
|
|
167
|
-
for skill in skills
|
|
168
|
-
},
|
|
333
|
+
databases=all_databases,
|
|
334
|
+
databases_struct=databases_struct,
|
|
169
335
|
include_tables=tables_list,
|
|
170
336
|
ignore_tables=None,
|
|
337
|
+
include_knowledge_bases=include_knowledge_bases,
|
|
338
|
+
ignore_knowledge_bases=ignore_knowledge_bases,
|
|
339
|
+
knowledge_base_database=knowledge_base_database,
|
|
171
340
|
sample_rows_in_table_info=3,
|
|
341
|
+
|
|
172
342
|
cache=get_cache('agent', max_size=_MAX_CACHE_SIZE)
|
|
173
343
|
)
|
|
174
344
|
db = MindsDBSQL.custom_init(
|
|
175
345
|
sql_agent=sql_agent
|
|
176
346
|
)
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
sql_database_tools = MindsDBSQLToolkit(db=db, llm=llm).get_tools()
|
|
180
|
-
descriptions = []
|
|
181
|
-
for skill in skills:
|
|
182
|
-
description = skill.params.get('description', '')
|
|
183
|
-
if description:
|
|
184
|
-
descriptions.append(description)
|
|
185
|
-
|
|
186
|
-
for i, tool in enumerate(sql_database_tools):
|
|
187
|
-
if isinstance(tool, QuerySQLDataBaseTool):
|
|
188
|
-
# Add our own custom description so our agent knows when to query this table.
|
|
189
|
-
original_description = tool.description
|
|
190
|
-
tool.description = ''
|
|
191
|
-
if len(descriptions) > 0:
|
|
192
|
-
tool.description += f'Use this tool if you need data about {" OR ".join(descriptions)}.\n'
|
|
193
|
-
tool.description += 'Use the conversation context to decide which table to query.\n'
|
|
194
|
-
if len(tables_list) > 0:
|
|
195
|
-
f'These are the available tables: {",".join(tables_list)}.\n'
|
|
196
|
-
tool.description += (
|
|
197
|
-
'ALWAYS consider these special cases:\n'
|
|
198
|
-
' - For TIMESTAMP type columns, make sure you include the time portion in your query (e.g. WHERE date_column = "2020-01-01 12:00:00")\n'
|
|
199
|
-
'Here are the rest of the instructions:\n'
|
|
200
|
-
f'{original_description}'
|
|
201
|
-
)
|
|
202
|
-
sql_database_tools[i] = tool
|
|
203
|
-
return sql_database_tools
|
|
347
|
+
toolkit = MindsDBSQLToolkit(db=db, llm=llm)
|
|
348
|
+
return toolkit.get_tools()
|
|
204
349
|
|
|
205
350
|
def _make_retrieval_tools(self, skill: db.Skills, llm, embedding_model):
|
|
206
351
|
"""
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
|
|
2
1
|
import re
|
|
3
2
|
import csv
|
|
4
3
|
import inspect
|
|
4
|
+
import traceback
|
|
5
5
|
from io import StringIO
|
|
6
6
|
from typing import Iterable, List, Optional, Any
|
|
7
7
|
|
|
@@ -13,6 +13,7 @@ from mindsdb.utilities import log
|
|
|
13
13
|
from mindsdb.utilities.context import context as ctx
|
|
14
14
|
from mindsdb.integrations.utilities.query_traversal import query_traversal
|
|
15
15
|
from mindsdb.integrations.libs.response import INF_SCHEMA_COLUMNS_NAMES
|
|
16
|
+
from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE
|
|
16
17
|
|
|
17
18
|
logger = log.getLogger(__name__)
|
|
18
19
|
|
|
@@ -34,7 +35,7 @@ def list_to_csv_str(array: List[List[Any]]) -> str:
|
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
def split_table_name(table_name: str) -> List[str]:
|
|
37
|
-
"""Split table name from llm to
|
|
38
|
+
"""Split table name from llm to parts
|
|
38
39
|
|
|
39
40
|
Args:
|
|
40
41
|
table_name (str): input table name
|
|
@@ -72,34 +73,69 @@ def split_table_name(table_name: str) -> List[str]:
|
|
|
72
73
|
if current:
|
|
73
74
|
result.append(current.strip('`'))
|
|
74
75
|
|
|
76
|
+
# ensure we split the table name
|
|
77
|
+
result = [r.split(".") for r in result][0]
|
|
78
|
+
|
|
75
79
|
return result
|
|
76
80
|
|
|
77
81
|
|
|
78
82
|
class SQLAgent:
|
|
83
|
+
"""
|
|
84
|
+
SQLAgent is a class that handles SQL queries for agents.
|
|
85
|
+
"""
|
|
86
|
+
|
|
79
87
|
def __init__(
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
+
self,
|
|
89
|
+
command_executor,
|
|
90
|
+
databases: List[str],
|
|
91
|
+
databases_struct: dict,
|
|
92
|
+
knowledge_base_database: str = 'mindsdb',
|
|
93
|
+
include_tables: Optional[List[str]] = None,
|
|
94
|
+
ignore_tables: Optional[List[str]] = None,
|
|
95
|
+
include_knowledge_bases: Optional[List[str]] = None,
|
|
96
|
+
ignore_knowledge_bases: Optional[List[str]] = None,
|
|
97
|
+
sample_rows_in_table_info: int = 3,
|
|
98
|
+
cache: Optional[dict] = None
|
|
88
99
|
):
|
|
100
|
+
"""
|
|
101
|
+
Initialize SQLAgent.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
command_executor: Executor for SQL commands
|
|
105
|
+
databases (List[str]): List of databases to use
|
|
106
|
+
databases_struct (dict): Dictionary of database structures
|
|
107
|
+
knowledge_base_database (str): Project name where knowledge bases are stored (defaults to 'mindsdb')
|
|
108
|
+
include_tables (List[str]): Tables to include
|
|
109
|
+
ignore_tables (List[str]): Tables to ignore
|
|
110
|
+
include_knowledge_bases (List[str]): Knowledge bases to include
|
|
111
|
+
ignore_knowledge_bases (List[str]): Knowledge bases to ignore
|
|
112
|
+
sample_rows_in_table_info (int): Number of sample rows to include in table info
|
|
113
|
+
cache (Optional[dict]): Cache for query results
|
|
114
|
+
"""
|
|
89
115
|
self._command_executor = command_executor
|
|
90
116
|
self._mindsdb_db_struct = databases_struct
|
|
91
|
-
|
|
117
|
+
self.knowledge_base_database = knowledge_base_database # This is a project name, not a database connection
|
|
92
118
|
self._sample_rows_in_table_info = int(sample_rows_in_table_info)
|
|
93
119
|
|
|
94
120
|
self._tables_to_include = include_tables
|
|
95
121
|
self._tables_to_ignore = []
|
|
122
|
+
self._knowledge_bases_to_include = include_knowledge_bases
|
|
123
|
+
self._knowledge_bases_to_ignore = []
|
|
96
124
|
self._databases = databases
|
|
97
125
|
if not self._tables_to_include:
|
|
98
126
|
# ignore_tables and include_tables should not be used together.
|
|
99
127
|
# include_tables takes priority if it's set.
|
|
100
128
|
self._tables_to_ignore = ignore_tables or []
|
|
129
|
+
if not self._knowledge_bases_to_include:
|
|
130
|
+
# ignore_knowledge_bases and include_knowledge_bases should not be used together.
|
|
131
|
+
# include_knowledge_bases takes priority if it's set.
|
|
132
|
+
self._knowledge_bases_to_ignore = ignore_knowledge_bases or []
|
|
101
133
|
self._cache = cache
|
|
102
134
|
|
|
135
|
+
from mindsdb.interfaces.skills.skill_tool import SkillToolController
|
|
136
|
+
# Initialize the skill tool controller from MindsDB
|
|
137
|
+
self.skill_tool = SkillToolController()
|
|
138
|
+
|
|
103
139
|
def _call_engine(self, query: str, database=None):
|
|
104
140
|
# switch database
|
|
105
141
|
ast_query = parse_sql(query.strip('`'))
|
|
@@ -107,7 +143,10 @@ class SQLAgent:
|
|
|
107
143
|
|
|
108
144
|
if database is None:
|
|
109
145
|
# if we use tables with prefixes it should work for any database
|
|
110
|
-
|
|
146
|
+
if self._databases is not None:
|
|
147
|
+
# if we have multiple databases, we need to check which one to use
|
|
148
|
+
# for now, we will just use the first one
|
|
149
|
+
database = self._databases[0] if self._databases else "mindsdb"
|
|
111
150
|
|
|
112
151
|
ret = self._command_executor.execute_command(
|
|
113
152
|
ast_query,
|
|
@@ -131,9 +170,28 @@ class SQLAgent:
|
|
|
131
170
|
|
|
132
171
|
def _check_f(node, is_table=None, **kwargs):
|
|
133
172
|
if is_table and isinstance(node, Identifier):
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
173
|
+
table_name = '.'.join(node.parts)
|
|
174
|
+
|
|
175
|
+
# Get the list of available knowledge bases
|
|
176
|
+
kb_names = self.get_usable_knowledge_base_names()
|
|
177
|
+
|
|
178
|
+
# Check if this table is a knowledge base
|
|
179
|
+
is_kb = table_name in kb_names
|
|
180
|
+
|
|
181
|
+
# If it's a knowledge base and we have knowledge base restrictions
|
|
182
|
+
if is_kb and self._knowledge_bases_to_include:
|
|
183
|
+
kb_parts = [split_table_name(x) for x in self._knowledge_bases_to_include]
|
|
184
|
+
if node.parts not in kb_parts:
|
|
185
|
+
raise ValueError(f"Knowledge base {table_name} not found. Available knowledge bases: {', '.join(self._knowledge_bases_to_include)}")
|
|
186
|
+
# Regular table check
|
|
187
|
+
elif not is_kb and self._tables_to_include and node.parts not in tables_parts:
|
|
188
|
+
raise ValueError(f"Table {table_name} not found. Available tables: {', '.join(self._tables_to_include)}")
|
|
189
|
+
# Check if it's a restricted knowledge base
|
|
190
|
+
elif is_kb and table_name in self._knowledge_bases_to_ignore:
|
|
191
|
+
raise ValueError(f"Knowledge base {table_name} is not allowed.")
|
|
192
|
+
# Check if it's a restricted table
|
|
193
|
+
elif not is_kb and table_name in self._tables_to_ignore:
|
|
194
|
+
raise ValueError(f"Table {table_name} is not allowed.")
|
|
137
195
|
query_traversal(ast_query, _check_f)
|
|
138
196
|
|
|
139
197
|
def get_usable_table_names(self) -> Iterable[str]:
|
|
@@ -200,6 +258,78 @@ class SQLAgent:
|
|
|
200
258
|
self._cache.set(cache_key, set(result_tables))
|
|
201
259
|
return result_tables
|
|
202
260
|
|
|
261
|
+
def get_usable_knowledge_base_names(self) -> Iterable[str]:
|
|
262
|
+
"""Get a list of knowledge bases that the agent has access to.
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
Iterable[str]: list with knowledge base names
|
|
266
|
+
"""
|
|
267
|
+
cache_key = f'{ctx.company_id}_{self.knowledge_base_database}_knowledge_bases'
|
|
268
|
+
|
|
269
|
+
# todo we need to fix the cache, file cache can potentially store out of data information
|
|
270
|
+
# # first check cache and return if found
|
|
271
|
+
# if self._cache:
|
|
272
|
+
# cached_kbs = self._cache.get(cache_key)
|
|
273
|
+
# if cached_kbs:
|
|
274
|
+
# return cached_kbs
|
|
275
|
+
|
|
276
|
+
if self._knowledge_bases_to_include:
|
|
277
|
+
return self._knowledge_bases_to_include
|
|
278
|
+
|
|
279
|
+
try:
|
|
280
|
+
# Query to get all knowledge bases
|
|
281
|
+
query = f"SHOW KNOWLEDGE_BASES FROM {self.knowledge_base_database};"
|
|
282
|
+
try:
|
|
283
|
+
result = self._call_engine(query, database=self.knowledge_base_database)
|
|
284
|
+
except Exception as e:
|
|
285
|
+
# If the direct query fails, try a different approach
|
|
286
|
+
# This handles the case where knowledge_base_database is not a valid integration
|
|
287
|
+
logger.warning(f"Error querying knowledge bases from {self.knowledge_base_database}: {str(e)}")
|
|
288
|
+
# Try to get knowledge bases directly from the project database
|
|
289
|
+
try:
|
|
290
|
+
# Get knowledge bases from the project database
|
|
291
|
+
kb_controller = self._command_executor.session.kb_controller
|
|
292
|
+
kb_names = [kb['name'] for kb in kb_controller.list()]
|
|
293
|
+
|
|
294
|
+
# Filter knowledge bases based on include list
|
|
295
|
+
if self._knowledge_bases_to_include:
|
|
296
|
+
kb_names = [kb_name for kb_name in kb_names if kb_name in self._knowledge_bases_to_include]
|
|
297
|
+
if not kb_names:
|
|
298
|
+
logger.warning(f"No knowledge bases found in the include list: {self._knowledge_bases_to_include}")
|
|
299
|
+
return []
|
|
300
|
+
|
|
301
|
+
return kb_names
|
|
302
|
+
|
|
303
|
+
# Filter knowledge bases based on ignore list
|
|
304
|
+
kb_names = [kb_name for kb_name in kb_names if kb_name not in self._knowledge_bases_to_ignore]
|
|
305
|
+
|
|
306
|
+
if self._cache:
|
|
307
|
+
self._cache.set(cache_key, set(kb_names))
|
|
308
|
+
|
|
309
|
+
return kb_names
|
|
310
|
+
except Exception as inner_e:
|
|
311
|
+
logger.error(f"Error getting knowledge bases from kb_controller: {str(inner_e)}")
|
|
312
|
+
return []
|
|
313
|
+
|
|
314
|
+
if not result:
|
|
315
|
+
return []
|
|
316
|
+
|
|
317
|
+
# Filter knowledge bases based on ignore list
|
|
318
|
+
kb_names = []
|
|
319
|
+
for row in result:
|
|
320
|
+
kb_name = row['name']
|
|
321
|
+
if kb_name not in self._knowledge_bases_to_ignore:
|
|
322
|
+
kb_names.append(kb_name)
|
|
323
|
+
|
|
324
|
+
if self._cache:
|
|
325
|
+
self._cache.set(cache_key, set(kb_names))
|
|
326
|
+
|
|
327
|
+
return kb_names
|
|
328
|
+
except Exception as e:
|
|
329
|
+
# If there's an error, log it and return an empty list
|
|
330
|
+
logger.error(f"Error in get_usable_knowledge_base_names: {str(e)}")
|
|
331
|
+
return []
|
|
332
|
+
|
|
203
333
|
def _resolve_table_names(self, table_names: List[str], all_tables: List[Identifier]) -> List[Identifier]:
|
|
204
334
|
"""
|
|
205
335
|
Tries to find table (which comes directly from an LLM) by its name
|
|
@@ -237,6 +367,26 @@ class SQLAgent:
|
|
|
237
367
|
|
|
238
368
|
return tables
|
|
239
369
|
|
|
370
|
+
def get_knowledge_base_info(self, kb_names: Optional[List[str]] = None) -> str:
|
|
371
|
+
""" Get information about specified knowledge bases.
|
|
372
|
+
Follows best practices as specified in: Rajkumar et al, 2022 (https://arxiv.org/abs/2204.00498)
|
|
373
|
+
If `sample_rows_in_table_info`, the specified number of sample rows will be
|
|
374
|
+
appended to each table description. This can increase performance as demonstrated in the paper.
|
|
375
|
+
"""
|
|
376
|
+
|
|
377
|
+
kbs_info = []
|
|
378
|
+
for kb in kb_names:
|
|
379
|
+
key = f"{ctx.company_id}_{kb}_info"
|
|
380
|
+
kb_info = self._cache.get(key) if self._cache else None
|
|
381
|
+
if True or kb_info is None:
|
|
382
|
+
kb_info = self.get_kb_sample_rows(kb)
|
|
383
|
+
if self._cache:
|
|
384
|
+
self._cache.set(key, kb_info)
|
|
385
|
+
|
|
386
|
+
kbs_info.append(kb_info)
|
|
387
|
+
|
|
388
|
+
return "\n\n".join(kbs_info)
|
|
389
|
+
|
|
240
390
|
def get_table_info(self, table_names: Optional[List[str]] = None) -> str:
|
|
241
391
|
""" Get information about specified tables.
|
|
242
392
|
Follows best practices as specified in: Rajkumar et al, 2022 (https://arxiv.org/abs/2204.00498)
|
|
@@ -244,16 +394,25 @@ class SQLAgent:
|
|
|
244
394
|
appended to each table description. This can increase performance as demonstrated in the paper.
|
|
245
395
|
"""
|
|
246
396
|
|
|
247
|
-
all_tables = [
|
|
397
|
+
all_tables = []
|
|
398
|
+
for name in self.get_usable_table_names():
|
|
399
|
+
# remove backticks
|
|
400
|
+
name = name.replace("`", "")
|
|
248
401
|
|
|
249
|
-
|
|
250
|
-
|
|
402
|
+
split = name.split(".")
|
|
403
|
+
if len(split) > 1:
|
|
404
|
+
all_tables.append(Identifier(parts=[split[0], split[1]]))
|
|
405
|
+
else:
|
|
406
|
+
all_tables.append(Identifier(name))
|
|
407
|
+
|
|
408
|
+
# if table_names is not None:
|
|
409
|
+
# all_tables = self._resolve_table_names(table_names, all_tables)
|
|
251
410
|
|
|
252
411
|
tables_info = []
|
|
253
412
|
for table in all_tables:
|
|
254
413
|
key = f"{ctx.company_id}_{table}_info"
|
|
255
414
|
table_info = self._cache.get(key) if self._cache else None
|
|
256
|
-
if table_info is None:
|
|
415
|
+
if True or table_info is None:
|
|
257
416
|
table_info = self._get_single_table_info(table)
|
|
258
417
|
if self._cache:
|
|
259
418
|
self._cache.set(key, table_info)
|
|
@@ -262,6 +421,34 @@ class SQLAgent:
|
|
|
262
421
|
|
|
263
422
|
return "\n\n".join(tables_info)
|
|
264
423
|
|
|
424
|
+
def get_kb_sample_rows(self, kb_name: str) -> str:
|
|
425
|
+
"""Get sample rows from a knowledge base.
|
|
426
|
+
|
|
427
|
+
Args:
|
|
428
|
+
kb_name (str): The name of the knowledge base.
|
|
429
|
+
|
|
430
|
+
Returns:
|
|
431
|
+
str: A string containing the sample rows from the knowledge base.
|
|
432
|
+
"""
|
|
433
|
+
logger.info(f'_get_sample_rows: knowledge base={kb_name}')
|
|
434
|
+
command = f"select * from {kb_name} limit 10;"
|
|
435
|
+
try:
|
|
436
|
+
ret = self._call_engine(command)
|
|
437
|
+
sample_rows = ret.data.to_lists()
|
|
438
|
+
|
|
439
|
+
def truncate_value(val):
|
|
440
|
+
str_val = str(val)
|
|
441
|
+
return str_val if len(str_val) < 100 else (str_val[:100] + '...')
|
|
442
|
+
|
|
443
|
+
sample_rows = list(
|
|
444
|
+
map(lambda row: [truncate_value(value) for value in row], sample_rows))
|
|
445
|
+
sample_rows_str = "\n" + f"{kb_name}:" + list_to_csv_str(sample_rows)
|
|
446
|
+
except Exception as e:
|
|
447
|
+
logger.info(f'_get_sample_rows error: {e}')
|
|
448
|
+
sample_rows_str = "\n" + "\t [error] Couldn't retrieve sample rows!"
|
|
449
|
+
|
|
450
|
+
return sample_rows_str
|
|
451
|
+
|
|
265
452
|
def _get_single_table_info(self, table: Identifier) -> str:
|
|
266
453
|
if len(table.parts) < 2:
|
|
267
454
|
raise ValueError(f"Database is required for table: {table}")
|
|
@@ -276,19 +463,41 @@ class SQLAgent:
|
|
|
276
463
|
dn = self._command_executor.session.datahub.get(integration)
|
|
277
464
|
|
|
278
465
|
fields, dtypes = [], []
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
466
|
+
try:
|
|
467
|
+
df = dn.get_table_columns_df(table_name, schema_name)
|
|
468
|
+
if not isinstance(df, pd.DataFrame) or df.empty:
|
|
469
|
+
logger.warning(f"Received empty or invalid DataFrame for table columns of {table_str}")
|
|
470
|
+
return f"Table named `{table_str}`:\n [No column information available]"
|
|
471
|
+
|
|
472
|
+
fields = df[INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME].to_list()
|
|
473
|
+
dtypes = [
|
|
474
|
+
mysql_data_type.value if isinstance(mysql_data_type, MYSQL_DATA_TYPE) else (data_type or 'UNKNOWN')
|
|
475
|
+
for mysql_data_type, data_type
|
|
476
|
+
in zip(
|
|
477
|
+
df[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE],
|
|
478
|
+
df[INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE]
|
|
479
|
+
)
|
|
480
|
+
]
|
|
481
|
+
except Exception as e:
|
|
482
|
+
logger.error(f"Failed processing column info for {table_str}: {e}", exc_info=True)
|
|
483
|
+
raise ValueError(f"Failed to process column info for {table_str}") from e
|
|
484
|
+
|
|
485
|
+
if not fields:
|
|
486
|
+
logger.error(f"Could not extract column fields for {table_str}.")
|
|
487
|
+
return f"Table named `{table_str}`:\n [Could not extract column information]"
|
|
488
|
+
|
|
489
|
+
try:
|
|
490
|
+
sample_rows_info = self._get_sample_rows(table_str, fields)
|
|
491
|
+
except Exception as e:
|
|
492
|
+
logger.warning(f"Could not get sample rows for {table_str}: {e}")
|
|
493
|
+
sample_rows_info = "\n\t [error] Couldn't retrieve sample rows!"
|
|
286
494
|
|
|
287
495
|
info = f'Table named `{table_str}`:\n'
|
|
288
496
|
info += f"\nSample with first {self._sample_rows_in_table_info} rows from table {table_str} in CSV format (dialect is 'excel'):\n"
|
|
289
|
-
info +=
|
|
497
|
+
info += sample_rows_info + "\n"
|
|
290
498
|
info += '\nColumn data types: ' + ",\t".join(
|
|
291
|
-
[f'\n`{field}` : `{dtype}`' for field, dtype in zip(fields, dtypes)]
|
|
499
|
+
[f'\n`{field}` : `{dtype}`' for field, dtype in zip(fields, dtypes)]
|
|
500
|
+
) + '\n'
|
|
292
501
|
return info
|
|
293
502
|
|
|
294
503
|
def _get_sample_rows(self, table: str, fields: List[str]) -> str:
|
|
@@ -364,6 +573,7 @@ class SQLAgent:
|
|
|
364
573
|
logger.info(f'query_safe (fetch={fetch}): {command}')
|
|
365
574
|
return self.query(command, fetch)
|
|
366
575
|
except Exception as e:
|
|
576
|
+
logger.error(f"Error in query_safe: {str(e)}\n{traceback.format_exc()}")
|
|
367
577
|
logger.info(f'query_safe error: {e}')
|
|
368
578
|
msg = f"Error: {e}"
|
|
369
579
|
if 'does not exist' in msg and ' relation ' in msg:
|