MindsDB 25.9.2.0a1__py3-none-any.whl → 25.10.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +40 -29
- mindsdb/api/a2a/__init__.py +1 -1
- mindsdb/api/a2a/agent.py +16 -10
- mindsdb/api/a2a/common/server/server.py +7 -3
- mindsdb/api/a2a/common/server/task_manager.py +12 -5
- mindsdb/api/a2a/common/types.py +66 -0
- mindsdb/api/a2a/task_manager.py +65 -17
- mindsdb/api/common/middleware.py +10 -12
- mindsdb/api/executor/command_executor.py +51 -40
- mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
- mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +7 -13
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +101 -49
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
- mindsdb/api/executor/datahub/datanodes/system_tables.py +3 -2
- mindsdb/api/executor/exceptions.py +29 -10
- mindsdb/api/executor/planner/plan_join.py +17 -3
- mindsdb/api/executor/planner/query_prepare.py +2 -20
- mindsdb/api/executor/sql_query/sql_query.py +74 -74
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
- mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
- mindsdb/api/executor/utilities/functions.py +6 -6
- mindsdb/api/executor/utilities/sql.py +37 -20
- mindsdb/api/http/gui.py +5 -11
- mindsdb/api/http/initialize.py +75 -61
- mindsdb/api/http/namespaces/agents.py +10 -15
- mindsdb/api/http/namespaces/analysis.py +13 -20
- mindsdb/api/http/namespaces/auth.py +1 -1
- mindsdb/api/http/namespaces/chatbots.py +0 -5
- mindsdb/api/http/namespaces/config.py +15 -11
- mindsdb/api/http/namespaces/databases.py +140 -201
- mindsdb/api/http/namespaces/file.py +17 -4
- mindsdb/api/http/namespaces/handlers.py +17 -7
- mindsdb/api/http/namespaces/knowledge_bases.py +28 -7
- mindsdb/api/http/namespaces/models.py +94 -126
- mindsdb/api/http/namespaces/projects.py +13 -22
- mindsdb/api/http/namespaces/sql.py +33 -25
- mindsdb/api/http/namespaces/tab.py +27 -37
- mindsdb/api/http/namespaces/views.py +1 -1
- mindsdb/api/http/start.py +16 -10
- mindsdb/api/mcp/__init__.py +2 -1
- mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
- mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
- mindsdb/api/mysql/mysql_proxy/utilities/dump.py +8 -2
- mindsdb/integrations/handlers/byom_handler/byom_handler.py +165 -190
- mindsdb/integrations/handlers/databricks_handler/databricks_handler.py +98 -46
- mindsdb/integrations/handlers/druid_handler/druid_handler.py +32 -40
- mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
- mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py +5 -2
- mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
- mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +438 -100
- mindsdb/integrations/handlers/mssql_handler/requirements_odbc.txt +3 -0
- mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +235 -3
- mindsdb/integrations/handlers/oracle_handler/__init__.py +2 -0
- mindsdb/integrations/handlers/oracle_handler/connection_args.py +7 -1
- mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +321 -16
- mindsdb/integrations/handlers/oracle_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +14 -2
- mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +25 -12
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
- mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
- mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
- mindsdb/integrations/handlers/zendesk_handler/zendesk_tables.py +144 -111
- mindsdb/integrations/libs/api_handler.py +10 -10
- mindsdb/integrations/libs/base.py +4 -4
- mindsdb/integrations/libs/llm/utils.py +2 -2
- mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
- mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
- mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
- mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
- mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
- mindsdb/integrations/libs/process_cache.py +132 -140
- mindsdb/integrations/libs/response.py +18 -12
- mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
- mindsdb/integrations/utilities/files/file_reader.py +6 -7
- mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/__init__.py +1 -0
- mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/snowflake_jwt_gen.py +151 -0
- mindsdb/integrations/utilities/rag/config_loader.py +37 -26
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +83 -30
- mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
- mindsdb/integrations/utilities/rag/settings.py +58 -133
- mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
- mindsdb/interfaces/agents/agents_controller.py +2 -3
- mindsdb/interfaces/agents/constants.py +0 -2
- mindsdb/interfaces/agents/litellm_server.py +34 -58
- mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
- mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
- mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
- mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
- mindsdb/interfaces/chatbot/polling.py +30 -18
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +16 -17
- mindsdb/interfaces/data_catalog/data_catalog_reader.py +15 -4
- mindsdb/interfaces/database/data_handlers_cache.py +190 -0
- mindsdb/interfaces/database/database.py +3 -3
- mindsdb/interfaces/database/integrations.py +7 -110
- mindsdb/interfaces/database/projects.py +2 -6
- mindsdb/interfaces/database/views.py +1 -4
- mindsdb/interfaces/file/file_controller.py +6 -6
- mindsdb/interfaces/functions/controller.py +1 -1
- mindsdb/interfaces/functions/to_markdown.py +2 -2
- mindsdb/interfaces/jobs/jobs_controller.py +5 -9
- mindsdb/interfaces/jobs/scheduler.py +3 -9
- mindsdb/interfaces/knowledge_base/controller.py +244 -128
- mindsdb/interfaces/knowledge_base/evaluate.py +36 -41
- mindsdb/interfaces/knowledge_base/executor.py +11 -0
- mindsdb/interfaces/knowledge_base/llm_client.py +51 -17
- mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
- mindsdb/interfaces/model/model_controller.py +172 -168
- mindsdb/interfaces/query_context/context_controller.py +14 -2
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +10 -14
- mindsdb/interfaces/skills/retrieval_tool.py +43 -50
- mindsdb/interfaces/skills/skill_tool.py +2 -2
- mindsdb/interfaces/skills/skills_controller.py +1 -4
- mindsdb/interfaces/skills/sql_agent.py +25 -19
- mindsdb/interfaces/storage/db.py +16 -6
- mindsdb/interfaces/storage/fs.py +114 -169
- mindsdb/interfaces/storage/json.py +19 -18
- mindsdb/interfaces/tabs/tabs_controller.py +49 -72
- mindsdb/interfaces/tasks/task_monitor.py +3 -9
- mindsdb/interfaces/tasks/task_thread.py +7 -9
- mindsdb/interfaces/triggers/trigger_task.py +7 -13
- mindsdb/interfaces/triggers/triggers_controller.py +47 -52
- mindsdb/migrations/migrate.py +16 -16
- mindsdb/utilities/api_status.py +58 -0
- mindsdb/utilities/config.py +68 -2
- mindsdb/utilities/exception.py +40 -1
- mindsdb/utilities/fs.py +0 -1
- mindsdb/utilities/hooks/profiling.py +17 -14
- mindsdb/utilities/json_encoder.py +24 -10
- mindsdb/utilities/langfuse.py +40 -45
- mindsdb/utilities/log.py +272 -0
- mindsdb/utilities/ml_task_queue/consumer.py +52 -58
- mindsdb/utilities/ml_task_queue/producer.py +26 -30
- mindsdb/utilities/render/sqlalchemy_render.py +22 -20
- mindsdb/utilities/starters.py +0 -10
- mindsdb/utilities/utils.py +2 -2
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/METADATA +293 -276
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/RECORD +144 -158
- mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
- mindsdb/api/postgres/__init__.py +0 -0
- mindsdb/api/postgres/postgres_proxy/__init__.py +0 -0
- mindsdb/api/postgres/postgres_proxy/executor/__init__.py +0 -1
- mindsdb/api/postgres/postgres_proxy/executor/executor.py +0 -189
- mindsdb/api/postgres/postgres_proxy/postgres_packets/__init__.py +0 -0
- mindsdb/api/postgres/postgres_proxy/postgres_packets/errors.py +0 -322
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_fields.py +0 -34
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message.py +0 -31
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_formats.py +0 -1265
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_identifiers.py +0 -31
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +0 -253
- mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +0 -477
- mindsdb/api/postgres/postgres_proxy/utilities/__init__.py +0 -10
- mindsdb/api/postgres/start.py +0 -11
- mindsdb/integrations/handlers/mssql_handler/tests/__init__.py +0 -0
- mindsdb/integrations/handlers/mssql_handler/tests/test_mssql_handler.py +0 -169
- mindsdb/integrations/handlers/oracle_handler/tests/__init__.py +0 -0
- mindsdb/integrations/handlers/oracle_handler/tests/test_oracle_handler.py +0 -32
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/WHEEL +0 -0
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -55,7 +55,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
55
55
|
f"""\
|
|
56
56
|
Input: A detailed and well-structured SQL query. The query must be enclosed between the symbols $START$ and $STOP$.
|
|
57
57
|
Output: Database result or error message. For errors, rewrite and retry the query. For 'Unknown column' errors, use '{info_sql_database_tool.name}' to check table fields.
|
|
58
|
-
This system is a highly intelligent and reliable
|
|
58
|
+
This system is a highly intelligent and reliable SQL skill designed to work with databases.
|
|
59
59
|
Follow these instructions with utmost precision:
|
|
60
60
|
1. Final Response Format:
|
|
61
61
|
- Assume the frontend fully supports Markdown unless the user specifies otherwise.
|
|
@@ -73,7 +73,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
73
73
|
- Let the user know they can request additional results and/or specify how they would like the results ordered or grouped.
|
|
74
74
|
5. Date Handling:
|
|
75
75
|
- **System current date and time: {current_date_time} (UTC or local timezone based on server settings).**
|
|
76
|
-
- **Always** use
|
|
76
|
+
- **Always** use `CURRENT_DATE` or `NOW()` functions when working with dates—never assume or guess the current date.
|
|
77
77
|
- For any date-related comparisons in the query, *always* ensure that your query casts the column being compared using `column_name::DATE [operator] ..`
|
|
78
78
|
- Do not compare date values without casting columns to date.
|
|
79
79
|
- For date interval operations, use Interval units as keywords. You can use keywords to specify units like days, hours, months, years, etc., directly without quotes. Examples:
|
|
@@ -95,6 +95,8 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
95
95
|
8. Identity and Purpose:
|
|
96
96
|
- When asked about yourself or your maker, state that you are a Data-Mind, created by MindsDB to help answer data questions.
|
|
97
97
|
- When asked about your purpose or how you can help, explore the available data sources and then explain that you can answer questions based on the connected data. Provide a few relevant example questions that you could answer for the user about their data.
|
|
98
|
+
9. Important: you can use only mysql quoting rules to compose queries: backticks (`) for identifiers, and single quotes (') for constants
|
|
99
|
+
|
|
98
100
|
Adhere to these guidelines for all queries and responses. Ask for clarification if needed.
|
|
99
101
|
"""
|
|
100
102
|
)
|
|
@@ -110,7 +112,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
110
112
|
"If the query is correct, it will be parsed and returned. "
|
|
111
113
|
f"ALWAYS run this tool before executing a query with {query_sql_database_tool.name}. "
|
|
112
114
|
)
|
|
113
|
-
mindsdb_sql_parser_tool = MindsDBSQLParserTool(
|
|
115
|
+
mindsdb_sql_parser_tool = MindsDBSQLParserTool( # noqa: F841
|
|
114
116
|
name=f"mindsdb_sql_parser_tool{prefix}", description=mindsdb_sql_parser_tool_description
|
|
115
117
|
)
|
|
116
118
|
|
|
@@ -118,7 +120,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
118
120
|
query_sql_database_tool,
|
|
119
121
|
info_sql_database_tool,
|
|
120
122
|
list_sql_database_tool,
|
|
121
|
-
mindsdb_sql_parser_tool,
|
|
123
|
+
# mindsdb_sql_parser_tool,
|
|
122
124
|
]
|
|
123
125
|
if not self.include_knowledge_base_tools:
|
|
124
126
|
return sql_tools
|
|
@@ -175,29 +177,23 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
175
177
|
|
|
176
178
|
Query Types and Examples:
|
|
177
179
|
1. Basic semantic search:
|
|
178
|
-
kb_query_tool("SELECT * FROM kb_name WHERE
|
|
180
|
+
kb_query_tool("SELECT * FROM kb_name WHERE chunk_content = 'your search query';")
|
|
179
181
|
|
|
180
182
|
2. Metadata filtering:
|
|
181
183
|
kb_query_tool("SELECT * FROM kb_name WHERE metadata_field = 'value';")
|
|
182
184
|
|
|
183
185
|
3. Combined search:
|
|
184
|
-
kb_query_tool("SELECT * FROM kb_name WHERE
|
|
186
|
+
kb_query_tool("SELECT * FROM kb_name WHERE chunk_content = 'query' AND metadata_field = 'value';")
|
|
185
187
|
|
|
186
188
|
4. Setting relevance threshold:
|
|
187
|
-
kb_query_tool("SELECT * FROM kb_name WHERE
|
|
189
|
+
kb_query_tool("SELECT * FROM kb_name WHERE chunk_content = 'query' AND relevance_threshold = 0.7;")
|
|
188
190
|
|
|
189
191
|
5. Limiting results:
|
|
190
|
-
kb_query_tool("SELECT * FROM kb_name WHERE
|
|
192
|
+
kb_query_tool("SELECT * FROM kb_name WHERE chunk_content = 'query' LIMIT 5;")
|
|
191
193
|
|
|
192
194
|
6. Getting sample data:
|
|
193
195
|
kb_query_tool("SELECT * FROM kb_name LIMIT 3;")
|
|
194
196
|
|
|
195
|
-
7. Don't use LIKE operator on content filter ie semantic search:
|
|
196
|
-
SELECT * FROM `test_kb` WHERE content LIKE '%population of New York%' $STOP$
|
|
197
|
-
|
|
198
|
-
Like is not supported, use the following instead:
|
|
199
|
-
SELECT * FROM `test_kb` WHERE content = 'population of New York'
|
|
200
|
-
|
|
201
197
|
Result Format:
|
|
202
198
|
- Results include: id, chunk_id, chunk_content, metadata, distance, and relevance columns
|
|
203
199
|
- The metadata column contains a JSON object with all metadata fields
|
|
@@ -1,32 +1,32 @@
|
|
|
1
|
-
import
|
|
1
|
+
from langchain_core.documents import Document
|
|
2
|
+
from langchain_core.tools import Tool
|
|
2
3
|
|
|
3
4
|
from mindsdb.integrations.utilities.rag.rag_pipeline_builder import RAG
|
|
4
5
|
from mindsdb.integrations.utilities.rag.config_loader import load_rag_config
|
|
5
6
|
from mindsdb.integrations.utilities.rag.settings import RAGPipelineModel
|
|
6
7
|
from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator
|
|
7
|
-
|
|
8
|
+
from mindsdb.integrations.libs.response import RESPONSE_TYPE
|
|
9
|
+
from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import (
|
|
10
|
+
construct_model_from_args,
|
|
11
|
+
)
|
|
8
12
|
from mindsdb.interfaces.agents.constants import DEFAULT_EMBEDDINGS_MODEL_CLASS
|
|
9
13
|
from mindsdb.interfaces.skills.skill_tool import skill_tool
|
|
10
14
|
from mindsdb.interfaces.storage import db
|
|
11
15
|
from mindsdb.interfaces.storage.db import KnowledgeBase
|
|
12
16
|
from mindsdb.utilities import log
|
|
13
|
-
from langchain_core.documents import Document
|
|
14
|
-
from langchain_core.tools import Tool
|
|
15
|
-
from mindsdb.integrations.libs.response import RESPONSE_TYPE
|
|
16
|
-
from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import construct_model_from_args
|
|
17
17
|
|
|
18
18
|
logger = log.getLogger(__name__)
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
def _load_rag_config(tool: dict, pred_args: dict, skill: db.Skills) -> RAGPipelineModel:
|
|
22
|
-
tools_config = tool[
|
|
22
|
+
tools_config = tool["config"]
|
|
23
23
|
tools_config.update(pred_args)
|
|
24
24
|
|
|
25
25
|
kb_params = {}
|
|
26
26
|
embeddings_model = None
|
|
27
27
|
|
|
28
|
-
if
|
|
29
|
-
kb_name = tool[
|
|
28
|
+
if "source" in tool:
|
|
29
|
+
kb_name = tool["source"]
|
|
30
30
|
executor = skill_tool.get_command_executor()
|
|
31
31
|
kb = _get_knowledge_base(kb_name, skill.project_id, executor)
|
|
32
32
|
|
|
@@ -34,30 +34,26 @@ def _load_rag_config(tool: dict, pred_args: dict, skill: db.Skills) -> RAGPipeli
|
|
|
34
34
|
raise ValueError(f"Knowledge base not found: {kb_name}")
|
|
35
35
|
|
|
36
36
|
kb_table = executor.session.kb_controller.get_table(kb.name, kb.project_id)
|
|
37
|
-
vector_store_config = {
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
is_sparse = tools_config.pop('is_sparse', None)
|
|
41
|
-
vector_size = tools_config.pop('vector_size', None)
|
|
37
|
+
vector_store_config = {"kb_table": kb_table}
|
|
38
|
+
is_sparse = tools_config.pop("is_sparse", None)
|
|
39
|
+
vector_size = tools_config.pop("vector_size", None)
|
|
42
40
|
if is_sparse is not None:
|
|
43
|
-
vector_store_config[
|
|
41
|
+
vector_store_config["is_sparse"] = is_sparse
|
|
44
42
|
if vector_size is not None:
|
|
45
|
-
vector_store_config[
|
|
46
|
-
kb_params = {
|
|
47
|
-
'vector_store_config': vector_store_config
|
|
48
|
-
}
|
|
43
|
+
vector_store_config["vector_size"] = vector_size
|
|
44
|
+
kb_params = {"vector_store_config": vector_store_config}
|
|
49
45
|
|
|
50
46
|
# Get embedding model from knowledge base table
|
|
51
47
|
if kb_table._kb.embedding_model:
|
|
52
48
|
# Extract embedding model args from knowledge base table
|
|
53
|
-
embedding_args = kb_table._kb.embedding_model.learn_args.get(
|
|
49
|
+
embedding_args = kb_table._kb.embedding_model.learn_args.get("using", {})
|
|
54
50
|
# Construct the embedding model directly
|
|
55
51
|
embeddings_model = construct_model_from_args(embedding_args)
|
|
56
52
|
logger.debug(f"Using knowledge base embedding model with args: {embedding_args}")
|
|
57
53
|
else:
|
|
58
54
|
embeddings_model = DEFAULT_EMBEDDINGS_MODEL_CLASS()
|
|
59
55
|
logger.debug("Using default embedding model as knowledge base has no embedding model")
|
|
60
|
-
elif
|
|
56
|
+
elif "embedding_model" not in tools_config:
|
|
61
57
|
embeddings_model = DEFAULT_EMBEDDINGS_MODEL_CLASS()
|
|
62
58
|
logger.debug("Using default embedding model as no knowledge base provided")
|
|
63
59
|
|
|
@@ -75,29 +71,28 @@ def _build_rag_pipeline_tool(tool: dict, pred_args: dict, skill: db.Skills):
|
|
|
75
71
|
try:
|
|
76
72
|
result = rag_pipeline(query)
|
|
77
73
|
logger.debug(f"RAG pipeline result: {result}")
|
|
78
|
-
return result[
|
|
74
|
+
return result["answer"]
|
|
79
75
|
except Exception as e:
|
|
80
|
-
logger.
|
|
81
|
-
logger.error(traceback.format_exc())
|
|
76
|
+
logger.exception("Error in RAG pipeline:")
|
|
82
77
|
return f"Error in retrieval: {str(e)}"
|
|
83
78
|
|
|
84
79
|
# Create RAG tool
|
|
85
|
-
tools_config = tool[
|
|
80
|
+
tools_config = tool["config"]
|
|
86
81
|
tools_config.update(pred_args)
|
|
87
82
|
return Tool(
|
|
88
83
|
func=rag_wrapper,
|
|
89
|
-
name=tool[
|
|
90
|
-
description=tool[
|
|
91
|
-
response_format=
|
|
84
|
+
name=tool["name"],
|
|
85
|
+
description=tool["description"],
|
|
86
|
+
response_format="content",
|
|
92
87
|
# Return directly by default since we already use an LLM against retrieved context to generate a response.
|
|
93
|
-
return_direct=tools_config.get(
|
|
88
|
+
return_direct=tools_config.get("return_direct", True),
|
|
94
89
|
)
|
|
95
90
|
|
|
96
91
|
|
|
97
92
|
def _build_name_lookup_tool(tool: dict, pred_args: dict, skill: db.Skills):
|
|
98
|
-
if
|
|
93
|
+
if "source" not in tool:
|
|
99
94
|
raise ValueError("Knowledge base for tool not found")
|
|
100
|
-
kb_name = tool[
|
|
95
|
+
kb_name = tool["source"]
|
|
101
96
|
executor = skill_tool.get_command_executor()
|
|
102
97
|
kb = _get_knowledge_base(kb_name, skill.project_id, executor)
|
|
103
98
|
if not kb:
|
|
@@ -110,16 +105,16 @@ def _build_name_lookup_tool(tool: dict, pred_args: dict, skill: db.Skills):
|
|
|
110
105
|
|
|
111
106
|
def _get_document_by_name(name: str):
|
|
112
107
|
if metadata_config.name_column_index is not None:
|
|
113
|
-
tsquery_str =
|
|
108
|
+
tsquery_str = " & ".join(name.split(" "))
|
|
114
109
|
documents_response = vector_db_handler.native_query(
|
|
115
|
-
f
|
|
110
|
+
f"SELECT * FROM {metadata_config.table} WHERE {metadata_config.name_column_index} @@ to_tsquery('{tsquery_str}') LIMIT 1;"
|
|
116
111
|
)
|
|
117
112
|
else:
|
|
118
113
|
documents_response = vector_db_handler.native_query(
|
|
119
|
-
f
|
|
114
|
+
f"SELECT * FROM {metadata_config.table} WHERE \"{metadata_config.name_column}\" ILIKE '%{name}%' LIMIT 1;"
|
|
120
115
|
)
|
|
121
116
|
if documents_response.resp_type == RESPONSE_TYPE.ERROR:
|
|
122
|
-
raise RuntimeError(f
|
|
117
|
+
raise RuntimeError(f"There was an error looking up documents: {documents_response.error_message}")
|
|
123
118
|
if documents_response.data_frame.empty:
|
|
124
119
|
return None
|
|
125
120
|
document_row = documents_response.data_frame.head(1)
|
|
@@ -127,38 +122,36 @@ def _build_name_lookup_tool(tool: dict, pred_args: dict, skill: db.Skills):
|
|
|
127
122
|
id_filter_condition = FilterCondition(
|
|
128
123
|
f"{metadata_config.embeddings_metadata_column}->>'{metadata_config.doc_id_key}'",
|
|
129
124
|
FilterOperator.EQUAL,
|
|
130
|
-
str(document_row.get(metadata_config.id_column).item())
|
|
125
|
+
str(document_row.get(metadata_config.id_column).item()),
|
|
131
126
|
)
|
|
132
127
|
document_chunks_df = vector_db_handler.select(
|
|
133
|
-
metadata_config.embeddings_table,
|
|
134
|
-
conditions=[id_filter_condition]
|
|
128
|
+
metadata_config.embeddings_table, conditions=[id_filter_condition]
|
|
135
129
|
)
|
|
136
130
|
if document_chunks_df.empty:
|
|
137
131
|
return None
|
|
138
|
-
sort_col =
|
|
132
|
+
sort_col = "chunk_id" if "chunk_id" in document_chunks_df.columns else "id"
|
|
139
133
|
document_chunks_df.sort_values(by=sort_col)
|
|
140
|
-
content =
|
|
134
|
+
content = ""
|
|
141
135
|
for _, chunk in document_chunks_df.iterrows():
|
|
142
136
|
if len(content) > metadata_config.max_document_context:
|
|
143
137
|
break
|
|
144
|
-
content += chunk.get(metadata_config.content_column,
|
|
138
|
+
content += chunk.get(metadata_config.content_column, "")
|
|
145
139
|
|
|
146
|
-
return Document(
|
|
147
|
-
page_content=content,
|
|
148
|
-
metadata=document_row.to_dict(orient='records')[0]
|
|
149
|
-
)
|
|
140
|
+
return Document(page_content=content, metadata=document_row.to_dict(orient="records")[0])
|
|
150
141
|
|
|
151
142
|
def _lookup_document_by_name(name: str):
|
|
152
143
|
found_document = _get_document_by_name(name)
|
|
153
144
|
if found_document is None:
|
|
154
|
-
return
|
|
145
|
+
return (
|
|
146
|
+
f"I could not find any document with name {name}. Please make sure the document name matches exactly."
|
|
147
|
+
)
|
|
155
148
|
return f"I found document {found_document.metadata.get(metadata_config.id_column)} with name {found_document.metadata.get(metadata_config.name_column)}. Here is the full document to use as context:\n\n{found_document.page_content}"
|
|
156
149
|
|
|
157
150
|
return Tool(
|
|
158
151
|
func=_lookup_document_by_name,
|
|
159
|
-
name=tool.get(
|
|
160
|
-
description=
|
|
161
|
-
return_direct=False
|
|
152
|
+
name=tool.get("name", "") + "_name_lookup",
|
|
153
|
+
description="You must use this tool ONLY when the user is asking about a specific document by name or title. The input should be the exact name of the document the user is looking for.",
|
|
154
|
+
return_direct=False,
|
|
162
155
|
)
|
|
163
156
|
|
|
164
157
|
|
|
@@ -181,7 +174,7 @@ def build_retrieval_tools(tool: dict, pred_args: dict, skill: db.Skills):
|
|
|
181
174
|
try:
|
|
182
175
|
rag_config = _load_rag_config(tool, pred_args, skill)
|
|
183
176
|
except Exception as e:
|
|
184
|
-
logger.
|
|
177
|
+
logger.exception("Error building RAG pipeline:")
|
|
185
178
|
raise ValueError(f"Failed to build RAG pipeline: {str(e)}")
|
|
186
179
|
tools = [_build_rag_pipeline_tool(tool, pred_args, skill)]
|
|
187
180
|
if rag_config.metadata_config is None:
|
|
@@ -274,8 +274,8 @@ class SkillToolController:
|
|
|
274
274
|
else:
|
|
275
275
|
for table_name in response.data_frame.iloc[:, name_idx]:
|
|
276
276
|
tables_list.append(f"{database}.{escape_table_name(table_name)}")
|
|
277
|
-
except Exception
|
|
278
|
-
logger.warning(f"Could not get tables from database {database}:
|
|
277
|
+
except Exception:
|
|
278
|
+
logger.warning(f"Could not get tables from database {database}:", exc_info=True)
|
|
279
279
|
continue
|
|
280
280
|
|
|
281
281
|
# Handle table restrictions
|
|
@@ -100,10 +100,7 @@ class SkillsController:
|
|
|
100
100
|
project_name = default_project
|
|
101
101
|
project = self.project_controller.get(name=project_name)
|
|
102
102
|
|
|
103
|
-
|
|
104
|
-
raise ValueError(f"The name must be in lower case: {name}")
|
|
105
|
-
|
|
106
|
-
skill = self.get_skill(name, project_name)
|
|
103
|
+
skill = self.get_skill(name, project_name, strict_case=True)
|
|
107
104
|
|
|
108
105
|
if skill is not None:
|
|
109
106
|
raise ValueError(f"Skill with name already exists: {name}")
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import re
|
|
2
2
|
import csv
|
|
3
3
|
import inspect
|
|
4
|
-
import traceback
|
|
5
4
|
from io import StringIO
|
|
6
5
|
from typing import Iterable, List, Optional, Any, Tuple
|
|
7
6
|
from collections import defaultdict
|
|
@@ -254,8 +253,16 @@ class SQLAgent:
|
|
|
254
253
|
self.check_table_permission(node)
|
|
255
254
|
except ValueError as origin_exc:
|
|
256
255
|
# was it badly quoted by llm?
|
|
257
|
-
|
|
258
|
-
|
|
256
|
+
#
|
|
257
|
+
if "." in node.parts[0]:
|
|
258
|
+
# extract quoted parts (with dots) to sub-parts
|
|
259
|
+
parts = []
|
|
260
|
+
for i, item in enumerate(node.parts):
|
|
261
|
+
if node.is_quoted[i] and "." in item:
|
|
262
|
+
parts.extend(Identifier(item).parts)
|
|
263
|
+
else:
|
|
264
|
+
parts.append(item)
|
|
265
|
+
node2 = Identifier(parts=parts)
|
|
259
266
|
try:
|
|
260
267
|
_check_f(node2, is_table=True)
|
|
261
268
|
return node2
|
|
@@ -382,9 +389,9 @@ class SQLAgent:
|
|
|
382
389
|
# self._cache.set(cache_key, set(kb_names))
|
|
383
390
|
|
|
384
391
|
return kb_names
|
|
385
|
-
except Exception
|
|
392
|
+
except Exception:
|
|
386
393
|
# If there's an error, log it and return an empty list
|
|
387
|
-
logger.
|
|
394
|
+
logger.exception("Error in get_usable_knowledge_base_names")
|
|
388
395
|
return []
|
|
389
396
|
|
|
390
397
|
def _resolve_table_names(self, table_names: List[str], all_tables: List[Identifier]) -> List[Identifier]:
|
|
@@ -483,9 +490,9 @@ class SQLAgent:
|
|
|
483
490
|
# remove backticks
|
|
484
491
|
name = name.replace("`", "")
|
|
485
492
|
|
|
486
|
-
|
|
487
|
-
if len(
|
|
488
|
-
all_tables.append(Identifier(parts=
|
|
493
|
+
parts = name.split(".")
|
|
494
|
+
if len(parts) > 1:
|
|
495
|
+
all_tables.append(Identifier(parts=parts))
|
|
489
496
|
else:
|
|
490
497
|
all_tables.append(Identifier(name))
|
|
491
498
|
|
|
@@ -526,8 +533,8 @@ class SQLAgent:
|
|
|
526
533
|
|
|
527
534
|
sample_rows = list(map(lambda row: [truncate_value(value) for value in row], sample_rows))
|
|
528
535
|
sample_rows_str = "\n" + f"{kb_name}:" + list_to_csv_str(sample_rows)
|
|
529
|
-
except Exception
|
|
530
|
-
logger.info(
|
|
536
|
+
except Exception:
|
|
537
|
+
logger.info("_get_sample_rows error:", exc_info=True)
|
|
531
538
|
sample_rows_str = "\n" + "\t [error] Couldn't retrieve sample rows!"
|
|
532
539
|
|
|
533
540
|
return sample_rows_str
|
|
@@ -560,7 +567,7 @@ class SQLAgent:
|
|
|
560
567
|
)
|
|
561
568
|
]
|
|
562
569
|
except Exception as e:
|
|
563
|
-
logger.
|
|
570
|
+
logger.exception(f"Failed processing column info for {table_str}:")
|
|
564
571
|
raise ValueError(f"Failed to process column info for {table_str}") from e
|
|
565
572
|
|
|
566
573
|
if not fields:
|
|
@@ -569,8 +576,8 @@ class SQLAgent:
|
|
|
569
576
|
|
|
570
577
|
try:
|
|
571
578
|
sample_rows_info = self._get_sample_rows(table_str, fields)
|
|
572
|
-
except Exception
|
|
573
|
-
logger.warning(f"Could not get sample rows for {table_str}:
|
|
579
|
+
except Exception:
|
|
580
|
+
logger.warning(f"Could not get sample rows for {table_str}:", exc_info=True)
|
|
574
581
|
sample_rows_info = "\n\t [error] Couldn't retrieve sample rows!"
|
|
575
582
|
|
|
576
583
|
info = f"Table named `{table_str}`:\n"
|
|
@@ -585,7 +592,7 @@ class SQLAgent:
|
|
|
585
592
|
|
|
586
593
|
def _get_sample_rows(self, table: str, fields: List[str]) -> str:
|
|
587
594
|
logger.info(f"_get_sample_rows: table={table} fields={fields}")
|
|
588
|
-
command = f"select
|
|
595
|
+
command = f"select * from {table} limit {self._sample_rows_in_table_info};"
|
|
589
596
|
try:
|
|
590
597
|
ret = self._call_engine(command)
|
|
591
598
|
sample_rows = ret.data.to_lists()
|
|
@@ -596,8 +603,8 @@ class SQLAgent:
|
|
|
596
603
|
|
|
597
604
|
sample_rows = list(map(lambda row: [truncate_value(value) for value in row], sample_rows))
|
|
598
605
|
sample_rows_str = "\n" + list_to_csv_str([fields] + sample_rows)
|
|
599
|
-
except Exception
|
|
600
|
-
logger.info(
|
|
606
|
+
except Exception:
|
|
607
|
+
logger.info("_get_sample_rows error:", exc_info=True)
|
|
601
608
|
sample_rows_str = "\n" + "\t [error] Couldn't retrieve sample rows!"
|
|
602
609
|
|
|
603
610
|
return sample_rows_str
|
|
@@ -647,7 +654,7 @@ class SQLAgent:
|
|
|
647
654
|
logger.info(f"get_table_info_safe: {table_names}")
|
|
648
655
|
return self.get_table_info(table_names)
|
|
649
656
|
except Exception as e:
|
|
650
|
-
logger.info(
|
|
657
|
+
logger.info("get_table_info_safe error:", exc_info=True)
|
|
651
658
|
return f"Error: {e}"
|
|
652
659
|
|
|
653
660
|
def query_safe(self, command: str, fetch: str = "all") -> str:
|
|
@@ -655,8 +662,7 @@ class SQLAgent:
|
|
|
655
662
|
logger.info(f"query_safe (fetch={fetch}): {command}")
|
|
656
663
|
return self.query(command, fetch)
|
|
657
664
|
except Exception as e:
|
|
658
|
-
logger.
|
|
659
|
-
logger.info(f"query_safe error: {e}")
|
|
665
|
+
logger.exception("Error in query_safe:")
|
|
660
666
|
msg = f"Error: {e}"
|
|
661
667
|
if "does not exist" in msg and " relation " in msg:
|
|
662
668
|
msg += "\nAvailable tables: " + ", ".join(self.get_usable_table_names())
|
mindsdb/interfaces/storage/db.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import orjson
|
|
2
3
|
import datetime
|
|
3
4
|
from typing import Dict, List, Optional
|
|
4
5
|
|
|
@@ -47,10 +48,20 @@ def init(connection_str: str = None):
|
|
|
47
48
|
global Base, session, engine
|
|
48
49
|
if connection_str is None:
|
|
49
50
|
connection_str = config["storage_db"]
|
|
51
|
+
# Use orjson with our CustomJSONEncoder.default for JSON serialization
|
|
52
|
+
_default_json = CustomJSONEncoder().default
|
|
53
|
+
|
|
54
|
+
def _json_serializer(value):
|
|
55
|
+
return orjson.dumps(
|
|
56
|
+
value,
|
|
57
|
+
default=_default_json,
|
|
58
|
+
option=orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_PASSTHROUGH_DATETIME,
|
|
59
|
+
).decode("utf-8")
|
|
60
|
+
|
|
50
61
|
base_args = {
|
|
51
62
|
"pool_size": 30,
|
|
52
63
|
"max_overflow": 200,
|
|
53
|
-
"json_serializer":
|
|
64
|
+
"json_serializer": _json_serializer,
|
|
54
65
|
}
|
|
55
66
|
engine = create_engine(connection_str, echo=False, **base_args)
|
|
56
67
|
session = scoped_session(sessionmaker(bind=engine, autoflush=True))
|
|
@@ -534,11 +545,10 @@ class KnowledgeBase(Base):
|
|
|
534
545
|
reranking_model = params.pop("reranking_model", None)
|
|
535
546
|
|
|
536
547
|
if not with_secrets:
|
|
537
|
-
|
|
538
|
-
embedding_model
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
reranking_model["api_key"] = "******"
|
|
548
|
+
for key in ("api_key", "private_key"):
|
|
549
|
+
for el in (embedding_model, reranking_model):
|
|
550
|
+
if el and key in el:
|
|
551
|
+
el[key] = "******"
|
|
542
552
|
|
|
543
553
|
return {
|
|
544
554
|
"id": self.id,
|