MindsDB 25.6.3.0__py3-none-any.whl → 25.6.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (28) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +71 -43
  3. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +16 -1
  4. mindsdb/api/executor/datahub/datanodes/project_datanode.py +1 -1
  5. mindsdb/api/executor/datahub/datanodes/system_tables.py +314 -1
  6. mindsdb/api/executor/planner/plan_join.py +1 -1
  7. mindsdb/api/executor/planner/query_planner.py +7 -1
  8. mindsdb/api/executor/utilities/sql.py +18 -19
  9. mindsdb/integrations/handlers/lindorm_handler/requirements.txt +1 -1
  10. mindsdb/integrations/handlers/ludwig_handler/requirements.txt +1 -1
  11. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +2 -0
  12. mindsdb/integrations/libs/api_handler.py +6 -7
  13. mindsdb/interfaces/agents/constants.py +44 -0
  14. mindsdb/interfaces/agents/langchain_agent.py +8 -1
  15. mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -17
  16. mindsdb/interfaces/data_catalog/data_catalog_reader.py +19 -2
  17. mindsdb/interfaces/knowledge_base/controller.py +23 -13
  18. mindsdb/interfaces/knowledge_base/evaluate.py +3 -3
  19. mindsdb/interfaces/knowledge_base/preprocessing/document_loader.py +17 -86
  20. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +30 -3
  21. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +40 -28
  22. mindsdb/interfaces/skills/skill_tool.py +91 -88
  23. mindsdb/interfaces/skills/sql_agent.py +1 -1
  24. {mindsdb-25.6.3.0.dist-info → mindsdb-25.6.4.0.dist-info}/METADATA +255 -253
  25. {mindsdb-25.6.3.0.dist-info → mindsdb-25.6.4.0.dist-info}/RECORD +28 -28
  26. {mindsdb-25.6.3.0.dist-info → mindsdb-25.6.4.0.dist-info}/WHEEL +0 -0
  27. {mindsdb-25.6.3.0.dist-info → mindsdb-25.6.4.0.dist-info}/licenses/LICENSE +0 -0
  28. {mindsdb-25.6.3.0.dist-info → mindsdb-25.6.4.0.dist-info}/top_level.txt +0 -0
@@ -433,16 +433,15 @@ class APIHandler(BaseHandler):
433
433
  Args:
434
434
  name (str): the handler name
435
435
  """
436
-
437
436
  self._tables = {}
438
437
 
439
438
  def _register_table(self, table_name: str, table_class: Any):
440
439
  """
441
440
  Register the data resource. For e.g if you are using Twitter API it registers the `tweets` resource from `/api/v2/tweets`.
442
441
  """
443
- if table_name in self._tables:
442
+ if table_name.lower() in self._tables:
444
443
  raise TableAlreadyExists(f"Table with name {table_name} already exists for this handler")
445
- self._tables[table_name] = table_class
444
+ self._tables[table_name.lower()] = table_class
446
445
 
447
446
  def _get_table(self, name: Identifier):
448
447
  """
@@ -450,10 +449,10 @@ class APIHandler(BaseHandler):
450
449
  Args:
451
450
  name (Identifier): the table name
452
451
  """
453
- name = name.parts[-1]
454
- if name not in self._tables:
455
- raise TableNotFound(f"Table not found: {name}")
456
- return self._tables[name]
452
+ name = name.parts[-1].lower()
453
+ if name in self._tables:
454
+ return self._tables[name]
455
+ raise TableNotFound(f"Table not found: {name}")
457
456
 
458
457
  def query(self, query: ASTNode):
459
458
  if isinstance(query, Select):
@@ -171,6 +171,8 @@ NVIDIA_NIM_CHAT_MODELS = (
171
171
  )
172
172
 
173
173
  GOOGLE_GEMINI_CHAT_MODELS = (
174
+ "gemini-2.5-pro",
175
+ "gemini-2.5-flash",
174
176
  "gemini-2.5-pro-preview-03-25",
175
177
  "gemini-2.0-flash",
176
178
  "gemini-2.0-flash-lite",
@@ -228,3 +230,45 @@ You are an AI assistant powered by MindsDB. When answering questions, follow the
228
230
  For factual questions, ALWAYS use the available tools to look up information rather than relying on your internal knowledge.
229
231
 
230
232
  """
233
+
234
+ MINDSDB_PREFIX = """You are an AI assistant powered by MindsDB. When answering questions, follow these guidelines:
235
+
236
+ 1. For questions about database tables and their contents:
237
+ - Use the sql_db_query to query the tables directly
238
+ - You can join tables if needed to get comprehensive information
239
+ - You are running on a federated query engine, so joins across multiple databases are allowed and supported
240
+ - **Important Rule for SQL Queries:** If you formulate an SQL query as part of answering a user's question, you *must* then use the `sql_db_query` tool to execute that query and get its results. The SQL query string itself is NOT the final answer to the user unless the user has specifically asked for the query. Your final AI response should be based on the *results* obtained from executing the query.
241
+
242
+ 2. For factual questions about specific topics, use the knowledge base tools, if available, in this sequence:
243
+ - First use kb_list_tool to see available knowledge bases
244
+ - Then use kb_info_tool to understand the structure of relevant knowledge bases
245
+ - Finally use kb_query_tool to query the knowledge base for specific information
246
+
247
+ For factual questions, ALWAYS use the available tools to look up information rather than relying on your internal knowledge.
248
+
249
+ Here is the user's question: {{question}}
250
+
251
+ TOOLS:
252
+ ------
253
+
254
+ Assistant has access to the following tools:"""
255
+
256
+ EXPLICIT_FORMAT_INSTRUCTIONS = """
257
+ << TOOL CALLING INSTRUCTIONS >>
258
+
259
+ **It is critical you use the following format to call a tool**
260
+
261
+ ```
262
+ Thought: Do I need to use a tool? Yes
263
+ Action: the action to take, should be one of [{tool_names}]
264
+ Action Input: the input to the action
265
+ Observation: the result of the action
266
+ ```
267
+
268
+ When you have a response to say to the Human, or if you do not need to use a tool, you MUST use the format:
269
+
270
+ ```
271
+ Thought: Do I need to use a tool? No
272
+ {ai_prefix}: [your response here]
273
+ ```
274
+ """
@@ -58,6 +58,8 @@ from mindsdb.interfaces.agents.constants import (
58
58
  TRACE_ID_COLUMN,
59
59
  DEFAULT_AGENT_SYSTEM_PROMPT,
60
60
  WRITER_CHAT_MODELS,
61
+ MINDSDB_PREFIX,
62
+ EXPLICIT_FORMAT_INSTRUCTIONS,
61
63
  )
62
64
  from mindsdb.interfaces.skills.skill_tool import skill_tool, SkillData
63
65
  from langchain_anthropic import ChatAnthropic
@@ -426,7 +428,12 @@ class LangchainAgent:
426
428
  llm,
427
429
  agent=agent_type,
428
430
  # Use custom output parser to handle flaky LLMs that don't ALWAYS conform to output format.
429
- agent_kwargs={"output_parser": SafeOutputParser()},
431
+ agent_kwargs={
432
+ "output_parser": SafeOutputParser(),
433
+ "prefix": MINDSDB_PREFIX, # Override default "Assistant is a large language model..." text
434
+ "format_instructions": EXPLICIT_FORMAT_INSTRUCTIONS, # More explicit tool calling instructions
435
+ "ai_prefix": "AI",
436
+ },
430
437
  # Calls the agent's LLM Chain one final time to generate a final answer based on the previous steps
431
438
  early_stopping_method="generate",
432
439
  handle_parsing_errors=self._handle_parsing_errors,
@@ -111,24 +111,12 @@ class MindsDBSQL(SQLDatabase):
111
111
  )
112
112
 
113
113
  # Convert ExecuteAnswer to a DataFrame for easier manipulation
114
- df = None
115
- if hasattr(result, "data") and hasattr(result.data, "data_frame"):
116
- df = result.data.data_frame
114
+ if result.data is not None:
115
+ df = result.data.to_df()
116
+ return df.to_string(index=False)
117
+
117
118
  else:
118
- # Fallback to to_df when data_frame attr not available
119
- try:
120
- df = result.data.to_df()
121
- except Exception:
122
- df = None
123
-
124
- # Default behaviour (string)
125
- if df is not None:
126
- if not df.empty:
127
- return df.to_string(index=False)
128
- else:
129
- return "Query executed successfully, but returned no data."
130
-
131
- return str(result)
119
+ return "Query executed successfully, but returned no data."
132
120
 
133
121
  except Exception as e:
134
122
  logger.error(f"Error executing SQL command: {str(e)}\n{traceback.format_exc()}")
@@ -11,8 +11,6 @@ class DataCatalogReader(BaseDataCatalog):
11
11
  """
12
12
  Read the metadata from the data catalog and return it as a string.
13
13
  """
14
- if not self.is_data_catalog_supported():
15
- return f"Data catalog is not supported for database '{self.database_name}'."
16
14
  tables = self._read_metadata()
17
15
  if not tables:
18
16
  self.logger.warning(f"No metadata found for database '{self.database_name}'")
@@ -26,10 +24,29 @@ class DataCatalogReader(BaseDataCatalog):
26
24
  metadata_str += table.as_string() + "\n\n"
27
25
  return metadata_str
28
26
 
27
+ def read_metadata_as_records(self) -> list:
28
+ """
29
+ Read the metadata from the data catalog and return it as a list of database records.
30
+ """
31
+ tables = self._read_metadata()
32
+ if not tables:
33
+ self.logger.warning(f"No metadata found for database '{self.database_name}'")
34
+ return []
35
+ return tables
36
+
37
+ def get_handler_info(self) -> str:
38
+ """
39
+ Get the handler info for the database.
40
+ """
41
+ return self.data_handler.meta_get_handler_info()
42
+
29
43
  def _read_metadata(self) -> list:
30
44
  """
31
45
  Read the metadata from the data catalog and return it in a structured format.
32
46
  """
47
+ if not self.is_data_catalog_supported():
48
+ return f"Data catalog is not supported for database '{self.database_name}'."
49
+
33
50
  query = db.session.query(db.MetaTables).filter_by(integration_id=self.integration_id)
34
51
  if self.table_names:
35
52
  cleaned_table_names = [name.strip("`").split(".")[-1] for name in self.table_names]
@@ -9,6 +9,7 @@ import numpy as np
9
9
 
10
10
  from mindsdb_sql_parser.ast import BinaryOperation, Constant, Identifier, Select, Update, Delete, Star
11
11
  from mindsdb_sql_parser.ast.mindsdb import CreatePredictor
12
+ from mindsdb_sql_parser import parse_sql
12
13
 
13
14
  from mindsdb.integrations.utilities.query_traversal import query_traversal
14
15
 
@@ -55,8 +56,13 @@ def get_model_params(model_params: dict, default_config_key: str):
55
56
  combined_model_params = copy.deepcopy(config.get(default_config_key, {}))
56
57
 
57
58
  if model_params:
59
+ if not isinstance(model_params, dict):
60
+ raise ValueError("Model parameters must be passed as a JSON object")
61
+
58
62
  combined_model_params.update(model_params)
59
63
 
64
+ combined_model_params.pop("use_default_llm", None)
65
+
60
66
  return combined_model_params
61
67
 
62
68
 
@@ -359,23 +365,30 @@ class KnowledgeBaseTable:
359
365
 
360
366
  def insert_query_result(self, query: str, project_name: str):
361
367
  """Process and insert SQL query results"""
362
- if not self.document_loader:
363
- raise ValueError("Document loader not configured")
368
+ ast_query = parse_sql(query)
364
369
 
365
- documents = list(self.document_loader.load_query_result(query, project_name))
366
- if documents:
367
- self.insert_documents(documents)
370
+ command_executor = ExecuteCommands(self.session)
371
+ response = command_executor.execute_command(ast_query, project_name)
372
+
373
+ if response.error_code is not None:
374
+ raise ValueError(f"Error executing query: {response.error_message}")
375
+
376
+ if response.data is None:
377
+ raise ValueError("Query returned no data")
378
+
379
+ records = response.data.records
380
+ df = pd.DataFrame(records)
381
+
382
+ self.insert(df)
368
383
 
369
384
  def insert_rows(self, rows: List[Dict]):
370
385
  """Process and insert raw data rows"""
371
386
  if not rows:
372
387
  return
373
388
 
374
- documents = [
375
- Document(content=row.get("content", ""), id=row.get("id"), metadata=row.get("metadata", {})) for row in rows
376
- ]
389
+ df = pd.DataFrame(rows)
377
390
 
378
- self.insert_documents(documents)
391
+ self.insert(df)
379
392
 
380
393
  def insert_documents(self, documents: List[Document]):
381
394
  """Process and insert documents with preprocessing if configured"""
@@ -944,10 +957,7 @@ class KnowledgeBaseController:
944
957
  # # it is params for model
945
958
  # embedding_params.update(params["embedding_model"])
946
959
 
947
- if "embedding_model" in params:
948
- if not isinstance(params["embedding_model"], dict):
949
- raise ValueError("embedding_model should be JSON object with model parameters.")
950
- embedding_params.update(params["embedding_model"])
960
+ embedding_params = get_model_params(params.get("embedding_model", {}), "default_embedding_model")
951
961
 
952
962
  # if model_name is None: # Legacy
953
963
  model_name = self._create_embedding_model(
@@ -168,13 +168,13 @@ class EvaluateBase:
168
168
  test_data = self.generate_test_data(gen_params)
169
169
 
170
170
  self.save_to_table(test_table, test_data, is_replace=True)
171
- else:
172
- test_data = self.read_from_table(test_table)
173
171
 
174
172
  if params.get("evaluate", True) is False:
175
173
  # no evaluate is required
176
174
  return pd.DataFrame()
177
175
 
176
+ test_data = self.read_from_table(test_table)
177
+
178
178
  scores = self.evaluate(test_data)
179
179
  scores["name"] = self.name
180
180
  scores["created_at"] = dt.datetime.now()
@@ -511,6 +511,6 @@ class EvaluateDocID(EvaluateBase):
511
511
  "total": total_questions,
512
512
  "total_found": total_found,
513
513
  "retrieved_in_top_10": accurate_in_top_10,
514
- "cumulative_recall": cumulative_recall,
514
+ "cumulative_recall": json.dumps(cumulative_recall),
515
515
  "avg_query_time": avg_query_time,
516
516
  }
@@ -2,7 +2,6 @@ import os
2
2
  from typing import List, Iterator
3
3
  from langchain_core.documents import Document as LangchainDocument
4
4
  from langchain_text_splitters import MarkdownHeaderTextSplitter
5
- import pandas as pd
6
5
 
7
6
  from mindsdb.interfaces.file.file_controller import FileController
8
7
  from mindsdb.integrations.utilities.rag.loaders.file_loader import FileLoader
@@ -20,12 +19,12 @@ class DocumentLoader:
20
19
  """Handles loading documents from various sources including SQL queries"""
21
20
 
22
21
  def __init__(
23
- self,
24
- file_controller: FileController,
25
- file_splitter: FileSplitter,
26
- markdown_splitter: MarkdownHeaderTextSplitter,
27
- file_loader_class=FileLoader,
28
- mysql_proxy=None
22
+ self,
23
+ file_controller: FileController,
24
+ file_splitter: FileSplitter,
25
+ markdown_splitter: MarkdownHeaderTextSplitter,
26
+ file_loader_class=FileLoader,
27
+ mysql_proxy=None,
29
28
  ):
30
29
  """
31
30
  Initialize with required dependencies
@@ -52,8 +51,8 @@ class DocumentLoader:
52
51
  for doc in loader.lazy_load():
53
52
  # Add file extension to metadata for proper splitting
54
53
  extension = os.path.splitext(file_path)[1].lower()
55
- doc.metadata['extension'] = extension
56
- doc.metadata['source'] = file_name
54
+ doc.metadata["extension"] = extension
55
+ doc.metadata["source"] = file_name
57
56
 
58
57
  # Use FileSplitter to handle the document based on its type
59
58
  split_docs = self.file_splitter.split_documents([doc])
@@ -62,34 +61,22 @@ class DocumentLoader:
62
61
  metadata = doc.metadata.copy()
63
62
  metadata.update(split_doc.metadata or {})
64
63
 
65
- yield Document(
66
- content=split_doc.page_content,
67
- metadata=metadata
68
- )
64
+ yield Document(content=split_doc.page_content, metadata=metadata)
69
65
 
70
66
  def load_web_pages(
71
- self,
72
- urls: List[str],
73
- crawl_depth: int,
74
- limit: int,
75
- filters: List[str] = None,
67
+ self,
68
+ urls: List[str],
69
+ crawl_depth: int,
70
+ limit: int,
71
+ filters: List[str] = None,
76
72
  ) -> Iterator[Document]:
77
73
  """Load and split documents from web pages"""
78
- websites_df = get_all_websites(
79
- urls,
80
- crawl_depth=crawl_depth,
81
- limit=limit,
82
- filters=filters
83
- )
74
+ websites_df = get_all_websites(urls, crawl_depth=crawl_depth, limit=limit, filters=filters)
84
75
 
85
76
  for _, row in websites_df.iterrows():
86
77
  # Create a document with HTML extension for proper splitting
87
78
  doc = LangchainDocument(
88
- page_content=row['text_content'],
89
- metadata={
90
- 'extension': '.html',
91
- 'url': row['url']
92
- }
79
+ page_content=row["text_content"], metadata={"extension": ".html", "url": row["url"]}
93
80
  )
94
81
 
95
82
  # Use FileSplitter to handle HTML content
@@ -98,60 +85,4 @@ class DocumentLoader:
98
85
  metadata = doc.metadata.copy()
99
86
  metadata.update(split_doc.metadata or {})
100
87
 
101
- yield Document(
102
- content=split_doc.page_content,
103
- metadata=metadata
104
- )
105
-
106
- def load_query_result(self, query: str, project_name: str) -> Iterator[Document]:
107
- """
108
- Load documents from SQL query results
109
-
110
- Args:
111
- query: SQL query to execute
112
- project_name: Name of the project context
113
-
114
- Returns:
115
- Iterator of Document objects
116
-
117
- Raises:
118
- ValueError: If mysql_proxy is not configured or query returns no data
119
- """
120
- if not self.mysql_proxy:
121
- raise ValueError("MySQL proxy not configured")
122
-
123
- if not query:
124
- return
125
-
126
- # Set project context and execute query
127
- self.mysql_proxy.set_context({'db': project_name})
128
- query_result = self.mysql_proxy.process_query(query)
129
-
130
- if query_result.type != 'table':
131
- raise ValueError('Query returned no data')
132
-
133
- # Convert query result to DataFrame
134
- df = query_result.data.to_df()
135
-
136
- # Process each row into a Document
137
- for _, row in df.iterrows():
138
- # Extract id, content and metadata
139
- content = str(row.get('content', ''))
140
- id = row.get('id', None)
141
-
142
- # Convert remaining columns to metadata
143
- metadata = {
144
- col: str(row[col])
145
- for col in df.columns
146
- if col != 'content' and not pd.isna(row[col])
147
- }
148
- metadata['source'] = 'query'
149
-
150
- # Split content using recursive splitter
151
- if content:
152
-
153
- yield Document(
154
- id=id,
155
- content=content,
156
- metadata=metadata
157
- )
88
+ yield Document(content=split_doc.page_content, metadata=metadata)
@@ -3,6 +3,28 @@ import re
3
3
  import json
4
4
  from pydantic import BaseModel, Field
5
5
  from langchain_core.tools import BaseTool
6
+ from mindsdb_sql_parser.ast import Describe, Select, Identifier, Constant, Star
7
+
8
+
9
+ def llm_str_strip(s):
10
+ length = -1
11
+ while length != len(s):
12
+ length = len(s)
13
+
14
+ # remove ```
15
+ if s.startswith("```"):
16
+ s = s[3:]
17
+ if s.endswith("```"):
18
+ s = s[:-3]
19
+
20
+ # remove trailing new lines
21
+ s = s.strip("\n")
22
+
23
+ # remove extra quotes
24
+ for q in ('"', "'", "`"):
25
+ if s.count(q) == 1:
26
+ s = s.strip(q)
27
+ return s
6
28
 
7
29
 
8
30
  class KnowledgeBaseListToolInput(BaseModel):
@@ -63,12 +85,14 @@ class KnowledgeBaseInfoTool(BaseTool):
63
85
  return [kb.strip() for kb in tool_input.split(",")]
64
86
  # If it's just a single string without formatting, return it as a single item
65
87
  if tool_input.strip():
66
- return [tool_input.strip()]
88
+ return [llm_str_strip(tool_input)]
67
89
  return []
68
90
 
69
91
  # Extract and clean the knowledge base names
70
92
  kb_names_str = match.group(1).strip()
71
93
  kb_names = re.findall(r"`([^`]+)`", kb_names_str)
94
+
95
+ kb_names = [llm_str_strip(n) for n in kb_names]
72
96
  return kb_names
73
97
 
74
98
  def _run(self, tool_input: str) -> str:
@@ -83,7 +107,7 @@ class KnowledgeBaseInfoTool(BaseTool):
83
107
  for kb_name in kb_names:
84
108
  try:
85
109
  # Get knowledge base schema
86
- schema_result = self.db.run_no_throw(f"DESCRIBE KNOWLEDGE_BASE `{kb_name}`;")
110
+ schema_result = self.db.run_no_throw(str(Describe(kb_name, type="knowledge_base")))
87
111
 
88
112
  if not schema_result:
89
113
  results.append(f"Knowledge base `{kb_name}` not found or has no schema information.")
@@ -111,7 +135,9 @@ class KnowledgeBaseInfoTool(BaseTool):
111
135
  kb_info += "```\n\n"
112
136
 
113
137
  # Get sample data
114
- sample_data = self.db.run_no_throw(f"SELECT * FROM `{kb_name}` LIMIT 10;")
138
+ sample_data = self.db.run_no_throw(
139
+ str(Select(targets=[Star()], from_table=Identifier(kb_name), limit=Constant(20)))
140
+ )
115
141
 
116
142
  # Sample data
117
143
  kb_info += "### Sample Data:\n"
@@ -196,6 +222,7 @@ class KnowledgeBaseQueryTool(BaseTool):
196
222
 
197
223
  try:
198
224
  # Execute the query
225
+ query = llm_str_strip(query)
199
226
  result = self.db.run_no_throw(query)
200
227
 
201
228
  if not result:
@@ -10,25 +10,27 @@ from mindsdb.interfaces.skills.custom.text2sql.mindsdb_sql_tool import MindsDBSQ
10
10
  from mindsdb.interfaces.skills.custom.text2sql.mindsdb_kb_tools import (
11
11
  KnowledgeBaseListTool,
12
12
  KnowledgeBaseInfoTool,
13
- KnowledgeBaseQueryTool
13
+ KnowledgeBaseQueryTool,
14
14
  )
15
15
 
16
16
 
17
17
  class MindsDBSQLToolkit(SQLDatabaseToolkit):
18
+ include_knowledge_base_tools: bool = True
18
19
 
19
- def get_tools(self, prefix='') -> List[BaseTool]:
20
-
20
+ def get_tools(self, prefix="") -> List[BaseTool]:
21
21
  current_date_time = datetime.now().strftime("%Y-%m-%d %H:%M")
22
22
 
23
23
  """Get the tools in the toolkit."""
24
24
  list_sql_database_tool = ListSQLDatabaseTool(
25
- name=f'sql_db_list_tables{prefix}',
25
+ name=f"sql_db_list_tables{prefix}",
26
26
  db=self.db,
27
- description=dedent("""\n
27
+ description=dedent(
28
+ """\n
28
29
  Input is an empty string, output is a comma-separated list of tables in the database. Each table name is escaped using backticks.
29
30
  Each table name in the list may be in one of two formats: database_name.`table_name` or database_name.schema_name.`table_name`.
30
31
  Table names in response to the user must be escaped using backticks.
31
- """)
32
+ """
33
+ ),
32
34
  )
33
35
 
34
36
  info_sql_database_tool_description = (
@@ -45,11 +47,11 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
45
47
  " $START$ table1 table2 table3 $STOP$\n"
46
48
  )
47
49
  info_sql_database_tool = InfoSQLDatabaseTool(
48
- name=f'sql_db_schema{prefix}',
49
- db=self.db, description=info_sql_database_tool_description
50
+ name=f"sql_db_schema{prefix}", db=self.db, description=info_sql_database_tool_description
50
51
  )
51
52
 
52
- query_sql_database_tool_description = dedent(f"""\
53
+ query_sql_database_tool_description = dedent(
54
+ f"""\
53
55
  Input: A detailed and well-structured SQL query. The query must be enclosed between the symbols $START$ and $STOP$.
54
56
  Output: Database result or error message. For errors, rewrite and retry the query. For 'Unknown column' errors, use '{info_sql_database_tool.name}' to check table fields.
55
57
  This system is a highly intelligent and reliable PostgreSQL SQL skill designed to work with databases.
@@ -93,11 +95,11 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
93
95
  - When asked about yourself or your maker, state that you are a Data-Mind, created by MindsDB to help answer data questions.
94
96
  - When asked about your purpose or how you can help, explore the available data sources and then explain that you can answer questions based on the connected data. Provide a few relevant example questions that you could answer for the user about their data.
95
97
  Adhere to these guidelines for all queries and responses. Ask for clarification if needed.
96
- """)
98
+ """
99
+ )
97
100
 
98
101
  query_sql_database_tool = QuerySQLDataBaseTool(
99
- name=f'sql_db_query{prefix}',
100
- db=self.db, description=query_sql_database_tool_description
102
+ name=f"sql_db_query{prefix}", db=self.db, description=query_sql_database_tool_description
101
103
  )
102
104
 
103
105
  mindsdb_sql_parser_tool_description = (
@@ -108,15 +110,24 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
108
110
  f"ALWAYS run this tool before executing a query with {query_sql_database_tool.name}. "
109
111
  )
110
112
  mindsdb_sql_parser_tool = MindsDBSQLParserTool(
111
- name=f'mindsdb_sql_parser_tool{prefix}',
112
- description=mindsdb_sql_parser_tool_description
113
+ name=f"mindsdb_sql_parser_tool{prefix}", description=mindsdb_sql_parser_tool_description
113
114
  )
114
115
 
116
+ sql_tools = [
117
+ query_sql_database_tool,
118
+ info_sql_database_tool,
119
+ list_sql_database_tool,
120
+ mindsdb_sql_parser_tool,
121
+ ]
122
+ if not self.include_knowledge_base_tools:
123
+ return sql_tools
124
+
115
125
  # Knowledge base tools
116
126
  kb_list_tool = KnowledgeBaseListTool(
117
- name=f'kb_list_tool{prefix}',
127
+ name=f"kb_list_tool{prefix}",
118
128
  db=self.db,
119
- description=dedent("""\
129
+ description=dedent(
130
+ """\
120
131
  Lists all available knowledge bases that can be queried.
121
132
  Input: No input required, just call the tool directly.
122
133
  Output: A table of all available knowledge bases with their names and creation dates.
@@ -125,13 +136,15 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
125
136
  Each knowledge base name is escaped using backticks.
126
137
 
127
138
  Example usage: kb_list_tool()
128
- """)
139
+ """
140
+ ),
129
141
  )
130
142
 
131
143
  kb_info_tool = KnowledgeBaseInfoTool(
132
- name=f'kb_info_tool{prefix}',
144
+ name=f"kb_info_tool{prefix}",
133
145
  db=self.db,
134
- description=dedent(f"""\
146
+ description=dedent(
147
+ f"""\
135
148
  Gets detailed information about specific knowledge bases including their structure and metadata fields.
136
149
 
137
150
  Input: A knowledge base name as a simple string.
@@ -143,13 +156,15 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
143
156
  Example usage: kb_info_tool("kb_name")
144
157
 
145
158
  Make sure the knowledge base exists by calling {kb_list_tool.name} first.
146
- """)
159
+ """
160
+ ),
147
161
  )
148
162
 
149
163
  kb_query_tool = KnowledgeBaseQueryTool(
150
- name=f'kb_query_tool{prefix}',
164
+ name=f"kb_query_tool{prefix}",
151
165
  db=self.db,
152
- description=dedent(f"""\
166
+ description=dedent(
167
+ f"""\
153
168
  Queries knowledge bases using SQL syntax to retrieve relevant information.
154
169
 
155
170
  Input: A SQL query string that targets a knowledge base.
@@ -192,15 +207,12 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
192
207
  - Always include a semicolon at the end of your SQL query
193
208
 
194
209
  For factual questions, use this tool to retrieve information rather than relying on the model's knowledge.
195
- """)
210
+ """
211
+ ),
196
212
  )
197
213
 
198
214
  # Return standard SQL tools and knowledge base tools
199
- return [
200
- query_sql_database_tool,
201
- info_sql_database_tool,
202
- list_sql_database_tool,
203
- mindsdb_sql_parser_tool,
215
+ return sql_tools + [
204
216
  kb_list_tool,
205
217
  kb_info_tool,
206
218
  kb_query_tool,