MindsDB 25.5.4.2__py3-none-any.whl → 25.6.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (69) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/a2a/agent.py +28 -25
  3. mindsdb/api/a2a/common/server/server.py +32 -26
  4. mindsdb/api/executor/command_executor.py +69 -14
  5. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +49 -65
  6. mindsdb/api/executor/datahub/datanodes/project_datanode.py +29 -48
  7. mindsdb/api/executor/datahub/datanodes/system_tables.py +35 -61
  8. mindsdb/api/executor/planner/plan_join.py +67 -77
  9. mindsdb/api/executor/planner/query_planner.py +176 -155
  10. mindsdb/api/executor/planner/steps.py +37 -12
  11. mindsdb/api/executor/sql_query/result_set.py +45 -64
  12. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +14 -18
  13. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +17 -18
  14. mindsdb/api/executor/sql_query/steps/insert_step.py +13 -33
  15. mindsdb/api/executor/sql_query/steps/subselect_step.py +43 -35
  16. mindsdb/api/executor/utilities/sql.py +42 -48
  17. mindsdb/api/http/namespaces/config.py +1 -1
  18. mindsdb/api/http/namespaces/file.py +14 -23
  19. mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +12 -28
  20. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +59 -50
  21. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +9 -8
  22. mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +449 -461
  23. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +87 -36
  24. mindsdb/integrations/handlers/file_handler/file_handler.py +15 -9
  25. mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +43 -24
  26. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +10 -3
  27. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +26 -33
  28. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +74 -51
  29. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +305 -98
  30. mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +53 -34
  31. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +136 -6
  32. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +334 -83
  33. mindsdb/integrations/libs/api_handler.py +261 -57
  34. mindsdb/integrations/libs/base.py +100 -29
  35. mindsdb/integrations/utilities/files/file_reader.py +99 -73
  36. mindsdb/integrations/utilities/handler_utils.py +23 -8
  37. mindsdb/integrations/utilities/sql_utils.py +35 -40
  38. mindsdb/interfaces/agents/agents_controller.py +196 -192
  39. mindsdb/interfaces/agents/constants.py +7 -1
  40. mindsdb/interfaces/agents/langchain_agent.py +42 -11
  41. mindsdb/interfaces/agents/mcp_client_agent.py +29 -21
  42. mindsdb/interfaces/data_catalog/__init__.py +0 -0
  43. mindsdb/interfaces/data_catalog/base_data_catalog.py +54 -0
  44. mindsdb/interfaces/data_catalog/data_catalog_loader.py +359 -0
  45. mindsdb/interfaces/data_catalog/data_catalog_reader.py +34 -0
  46. mindsdb/interfaces/database/database.py +81 -57
  47. mindsdb/interfaces/database/integrations.py +220 -234
  48. mindsdb/interfaces/database/log.py +72 -104
  49. mindsdb/interfaces/database/projects.py +156 -193
  50. mindsdb/interfaces/file/file_controller.py +21 -65
  51. mindsdb/interfaces/knowledge_base/controller.py +63 -10
  52. mindsdb/interfaces/knowledge_base/evaluate.py +519 -0
  53. mindsdb/interfaces/knowledge_base/llm_client.py +75 -0
  54. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +83 -43
  55. mindsdb/interfaces/skills/skills_controller.py +54 -36
  56. mindsdb/interfaces/skills/sql_agent.py +109 -86
  57. mindsdb/interfaces/storage/db.py +223 -79
  58. mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +118 -0
  59. mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +58 -0
  60. mindsdb/utilities/config.py +9 -2
  61. mindsdb/utilities/log.py +35 -26
  62. mindsdb/utilities/ml_task_queue/task.py +19 -22
  63. mindsdb/utilities/render/sqlalchemy_render.py +129 -181
  64. mindsdb/utilities/starters.py +40 -0
  65. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/METADATA +253 -253
  66. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/RECORD +69 -61
  67. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/WHEEL +0 -0
  68. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/licenses/LICENSE +0 -0
  69. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/top_level.txt +0 -0
@@ -6,9 +6,7 @@ from langchain_core.tools import BaseTool
6
6
 
7
7
 
8
8
  class KnowledgeBaseListToolInput(BaseModel):
9
- tool_input: str = Field(
10
- "", description="An empty string to list all knowledge bases."
11
- )
9
+ tool_input: str = Field("", description="An empty string to list all knowledge bases.")
12
10
 
13
11
 
14
12
  class KnowledgeBaseListTool(BaseTool):
@@ -21,7 +19,11 @@ class KnowledgeBaseListTool(BaseTool):
21
19
 
22
20
  def _run(self, tool_input: str) -> str:
23
21
  """List all knowledge bases."""
24
- return self.db.get_usable_knowledge_base_names()
22
+ kb_names = self.db.get_usable_knowledge_base_names()
23
+ # Convert list to a formatted string for better readability
24
+ if not kb_names:
25
+ return "No knowledge bases found."
26
+ return json.dumps(kb_names)
25
27
 
26
28
 
27
29
  class KnowledgeBaseInfoToolInput(BaseModel):
@@ -41,8 +43,27 @@ class KnowledgeBaseInfoTool(BaseTool):
41
43
 
42
44
  def _extract_kb_names(self, tool_input: str) -> List[str]:
43
45
  """Extract knowledge base names from the tool input."""
46
+ # First, check if the input is already a list (passed directly from include_knowledge_bases)
47
+ if isinstance(tool_input, list):
48
+ return tool_input
49
+
50
+ # Next, try to parse it as JSON in case it was serialized as a JSON string
51
+ try:
52
+ parsed_input = json.loads(tool_input)
53
+ if isinstance(parsed_input, list):
54
+ return parsed_input
55
+ except (json.JSONDecodeError, TypeError):
56
+ pass
57
+
58
+ # Finally, try the original regex pattern for $START$ and $STOP$ markers
44
59
  match = re.search(r"\$START\$(.*?)\$STOP\$", tool_input, re.DOTALL)
45
60
  if not match:
61
+ # If no markers found, check if it's a simple comma-separated string
62
+ if "," in tool_input:
63
+ return [kb.strip() for kb in tool_input.split(",")]
64
+ # If it's just a single string without formatting, return it as a single item
65
+ if tool_input.strip():
66
+ return [tool_input.strip()]
46
67
  return []
47
68
 
48
69
  # Extract and clean the knowledge base names
@@ -55,66 +76,84 @@ class KnowledgeBaseInfoTool(BaseTool):
55
76
  kb_names = self._extract_kb_names(tool_input)
56
77
 
57
78
  if not kb_names:
58
- return "No valid knowledge base names provided. Please provide names enclosed in backticks between $START$ and $STOP$."
79
+ return "No valid knowledge base names provided. Please provide knowledge base names as a list, comma-separated string, or enclosed in backticks between $START$ and $STOP$."
59
80
 
60
81
  results = []
61
82
 
62
83
  for kb_name in kb_names:
63
84
  try:
64
85
  # Get knowledge base schema
65
- schema_result = self.db.run_no_throw(
66
- f"DESCRIBE KNOWLEDGE_BASE `{kb_name}`;"
67
- )
86
+ schema_result = self.db.run_no_throw(f"DESCRIBE KNOWLEDGE_BASE `{kb_name}`;")
68
87
 
69
88
  if not schema_result:
70
- results.append(
71
- f"Knowledge base `{kb_name}` not found or has no schema information."
72
- )
89
+ results.append(f"Knowledge base `{kb_name}` not found or has no schema information.")
73
90
  continue
74
91
 
75
- # Get sample data
76
- sample_data = self.db.run_no_throw(
77
- f"SELECT * FROM `{kb_name}` LIMIT 10;"
78
- )
79
-
80
92
  # Format the results
81
93
  kb_info = f"## Knowledge Base: `{kb_name}`\n\n"
82
94
 
83
95
  # Schema information
84
96
  kb_info += "### Schema Information:\n"
85
97
  kb_info += "```\n"
86
- for row in schema_result:
87
- kb_info += f"{json.dumps(row, indent=2)}\n"
98
+
99
+ # Handle different return types for schema_result
100
+ if isinstance(schema_result, str):
101
+ kb_info += f"{schema_result}\n"
102
+ elif isinstance(schema_result, list):
103
+ for row in schema_result:
104
+ if isinstance(row, dict):
105
+ kb_info += f"{json.dumps(row, indent=2)}\n"
106
+ else:
107
+ kb_info += f"{str(row)}\n"
108
+ else:
109
+ kb_info += f"{str(schema_result)}\n"
110
+
88
111
  kb_info += "```\n\n"
89
112
 
113
+ # Get sample data
114
+ sample_data = self.db.run_no_throw(f"SELECT * FROM `{kb_name}` LIMIT 10;")
115
+
90
116
  # Sample data
91
117
  kb_info += "### Sample Data:\n"
92
- if sample_data:
93
- # Extract column names
94
- columns = list(sample_data[0].keys())
95
-
96
- # Create markdown table header
97
- kb_info += "| " + " | ".join(columns) + " |\n"
98
- kb_info += "| " + " | ".join(["---" for _ in columns]) + " |\n"
99
-
100
- # Add rows
101
- for row in sample_data:
102
- formatted_row = []
103
- for col in columns:
104
- cell_value = row[col]
105
- if isinstance(cell_value, dict):
106
- cell_value = json.dumps(cell_value, ensure_ascii=False)
107
- formatted_row.append(str(cell_value).replace("|", "\\|"))
108
- kb_info += "| " + " | ".join(formatted_row) + " |\n"
109
- else:
118
+
119
+ # Handle different return types for sample_data
120
+ if not sample_data:
110
121
  kb_info += "No sample data available.\n"
122
+ elif isinstance(sample_data, str):
123
+ kb_info += f"```\n{sample_data}\n```\n"
124
+ elif isinstance(sample_data, list) and len(sample_data) > 0:
125
+ # Only try to extract columns if we have a list of dictionaries
126
+ if isinstance(sample_data[0], dict):
127
+ # Extract column names
128
+ columns = list(sample_data[0].keys())
129
+
130
+ # Create markdown table header
131
+ kb_info += "| " + " | ".join(columns) + " |\n"
132
+ kb_info += "| " + " | ".join(["---" for _ in columns]) + " |\n"
133
+
134
+ # Add rows
135
+ for row in sample_data:
136
+ formatted_row = []
137
+ for col in columns:
138
+ cell_value = row[col]
139
+ if isinstance(cell_value, dict):
140
+ cell_value = json.dumps(cell_value, ensure_ascii=False)
141
+ formatted_row.append(str(cell_value).replace("|", "\\|"))
142
+ kb_info += "| " + " | ".join(formatted_row) + " |\n"
143
+ else:
144
+ # If it's a list but not of dictionaries, just format as text
145
+ kb_info += "```\n"
146
+ for item in sample_data:
147
+ kb_info += f"{str(item)}\n"
148
+ kb_info += "```\n"
149
+ else:
150
+ # For any other type, just convert to string
151
+ kb_info += f"```\n{str(sample_data)}\n```\n"
111
152
 
112
153
  results.append(kb_info)
113
154
 
114
155
  except Exception as e:
115
- results.append(
116
- f"Error getting information for knowledge base `{kb_name}`: {str(e)}"
117
- )
156
+ results.append(f"Error getting information for knowledge base `{kb_name}`: {str(e)}")
118
157
 
119
158
  return "\n\n".join(results)
120
159
 
@@ -143,9 +182,7 @@ class KnowledgeBaseQueryTool(BaseTool):
143
182
 
144
183
  # If not wrapped in delimiters, use the input directly
145
184
  # Check for SQL keywords to validate it's likely a query
146
- if re.search(
147
- r"\b(SELECT|FROM|WHERE|LIMIT|ORDER BY)\b", tool_input, re.IGNORECASE
148
- ):
185
+ if re.search(r"\b(SELECT|FROM|WHERE|LIMIT|ORDER BY)\b", tool_input, re.IGNORECASE):
149
186
  return tool_input.strip()
150
187
 
151
188
  return ""
@@ -185,6 +222,9 @@ class KnowledgeBaseQueryTool(BaseTool):
185
222
 
186
223
  return table
187
224
 
188
- return result
225
+ # Ensure we always return a string
226
+ if isinstance(result, (list, dict)):
227
+ return json.dumps(result, indent=2)
228
+ return str(result)
189
229
  except Exception as e:
190
230
  return f"Error executing query: {str(e)}"
@@ -6,14 +6,19 @@ from sqlalchemy.orm.attributes import flag_modified
6
6
 
7
7
  from mindsdb.interfaces.storage import db
8
8
  from mindsdb.interfaces.database.projects import ProjectController
9
+ from mindsdb.interfaces.data_catalog.data_catalog_loader import DataCatalogLoader
10
+ from mindsdb.interfaces.skills.skill_tool import SkillType
9
11
  from mindsdb.utilities.config import config
12
+ from mindsdb.utilities import log
10
13
 
11
14
 
12
- default_project = config.get('default_project')
15
+ logger = log.getLogger(__name__)
16
+
17
+ default_project = config.get("default_project")
13
18
 
14
19
 
15
20
  class SkillsController:
16
- '''Handles CRUD operations at the database level for Skills'''
21
+ """Handles CRUD operations at the database level for Skills"""
17
22
 
18
23
  def __init__(self, project_controller: ProjectController = None):
19
24
  if project_controller is None:
@@ -21,7 +26,7 @@ class SkillsController:
21
26
  self.project_controller = project_controller
22
27
 
23
28
  def get_skill(self, skill_name: str, project_name: str = default_project) -> Optional[db.Skills]:
24
- '''
29
+ """
25
30
  Gets a skill by name. Skills are expected to have unique names.
26
31
 
27
32
  Parameters:
@@ -33,17 +38,17 @@ class SkillsController:
33
38
 
34
39
  Raises:
35
40
  ValueError: If `project_name` does not exist
36
- '''
41
+ """
37
42
 
38
43
  project = self.project_controller.get(name=project_name)
39
44
  return db.Skills.query.filter(
40
45
  func.lower(db.Skills.name) == func.lower(skill_name),
41
46
  db.Skills.project_id == project.id,
42
- db.Skills.deleted_at == null()
47
+ db.Skills.deleted_at == null(),
43
48
  ).first()
44
49
 
45
50
  def get_skills(self, project_name: Optional[str]) -> List[dict]:
46
- '''
51
+ """
47
52
  Gets all skills in a project.
48
53
 
49
54
  Parameters:
@@ -54,7 +59,7 @@ class SkillsController:
54
59
 
55
60
  Raises:
56
61
  ValueError: If `project_name` does not exist
57
- '''
62
+ """
58
63
 
59
64
  if project_name is None:
60
65
  projects = self.project_controller.get_list()
@@ -63,23 +68,14 @@ class SkillsController:
63
68
  project = self.project_controller.get(name=project_name)
64
69
  project_ids = [project.id]
65
70
 
66
- query = (
67
- db.session.query(db.Skills)
68
- .filter(
69
- db.Skills.project_id.in_(project_ids),
70
- db.Skills.deleted_at == null()
71
- )
71
+ query = db.session.query(db.Skills).filter(
72
+ db.Skills.project_id.in_(project_ids), db.Skills.deleted_at == null()
72
73
  )
73
74
 
74
75
  return query.all()
75
76
 
76
- def add_skill(
77
- self,
78
- name: str,
79
- project_name: str,
80
- type: str,
81
- params: Dict[str, str] = {}) -> db.Skills:
82
- '''
77
+ def add_skill(self, name: str, project_name: str, type: str, params: Dict[str, str] = {}) -> db.Skills:
78
+ """
83
79
  Adds a skill to the database.
84
80
 
85
81
  Parameters:
@@ -93,7 +89,7 @@ class SkillsController:
93
89
 
94
90
  Raises:
95
91
  ValueError: If `project_name` does not exist or skill already exists
96
- '''
92
+ """
97
93
  if project_name is None:
98
94
  project_name = default_project
99
95
  project = self.project_controller.get(name=project_name)
@@ -101,7 +97,28 @@ class SkillsController:
101
97
  skill = self.get_skill(name, project_name)
102
98
 
103
99
  if skill is not None:
104
- raise ValueError(f'Skill with name already exists: {name}')
100
+ raise ValueError(f"Skill with name already exists: {name}")
101
+
102
+ # Load metadata to data catalog (if enabled) if the skill is Text-to-SQL.
103
+ if config.get("data_catalog", {}).get("enabled", False):
104
+ if type == SkillType.TEXT2SQL.value and "include_tables" in params:
105
+ # TODO: Is it possible to create a skill with complete access to the database with the new agent syntax?
106
+ # TODO: Handle the case where `ignore_tables` is provided. Is this a valid parameter?
107
+ # TODO: Knowledge Bases?
108
+ database_table_map = {}
109
+ for table in params["include_tables"]:
110
+ parts = table.split(".", 1)
111
+ database_table_map[parts[0]] = database_table_map.get(parts[0], []) + [parts[1]]
112
+
113
+ for database_name, table_names in database_table_map.items():
114
+ data_catalog_loader = DataCatalogLoader(database_name=database_name, table_names=table_names)
115
+ data_catalog_loader.load_metadata()
116
+
117
+ elif type in [SkillType.TEXT2SQL.value, SkillType.TEXT2SQL_LEGACY.value] and "database" in params:
118
+ data_catalog_loader = DataCatalogLoader(
119
+ database_name=params["database"], table_names=params["tables"] if "tables" in params else None
120
+ )
121
+ data_catalog_loader.load_metadata()
105
122
 
106
123
  new_skill = db.Skills(
107
124
  name=name,
@@ -115,13 +132,14 @@ class SkillsController:
115
132
  return new_skill
116
133
 
117
134
  def update_skill(
118
- self,
119
- skill_name: str,
120
- new_name: str = None,
121
- project_name: str = default_project,
122
- type: str = None,
123
- params: Dict[str, str] = None):
124
- '''
135
+ self,
136
+ skill_name: str,
137
+ new_name: str = None,
138
+ project_name: str = default_project,
139
+ type: str = None,
140
+ params: Dict[str, str] = None,
141
+ ):
142
+ """
125
143
  Updates an existing skill in the database.
126
144
 
127
145
  Parameters:
@@ -136,12 +154,12 @@ class SkillsController:
136
154
 
137
155
  Raises:
138
156
  ValueError: If `project_name` does not exist or skill doesn't exist
139
- '''
157
+ """
140
158
 
141
159
  existing_skill = self.get_skill(skill_name, project_name)
142
160
  if existing_skill is None:
143
- raise ValueError(f'Skill with name not found: {skill_name}')
144
- if isinstance(existing_skill.params, dict) and existing_skill.params.get('is_demo') is True:
161
+ raise ValueError(f"Skill with name not found: {skill_name}")
162
+ if isinstance(existing_skill.params, dict) and existing_skill.params.get("is_demo") is True:
145
163
  raise ValueError("It is forbidden to change properties of the demo object")
146
164
 
147
165
  if new_name is not None:
@@ -157,14 +175,14 @@ class SkillsController:
157
175
  existing_skill.params = params
158
176
  # Some versions of SQL Alchemy won't handle JSON updates correctly without this.
159
177
  # See: https://docs.sqlalchemy.org/en/20/orm/session_api.html#sqlalchemy.orm.attributes.flag_modified
160
- flag_modified(existing_skill, 'params')
178
+ flag_modified(existing_skill, "params")
161
179
 
162
180
  db.session.commit()
163
181
 
164
182
  return existing_skill
165
183
 
166
184
  def delete_skill(self, skill_name: str, project_name: str = default_project):
167
- '''
185
+ """
168
186
  Deletes a skill by name.
169
187
 
170
188
  Parameters:
@@ -173,12 +191,12 @@ class SkillsController:
173
191
 
174
192
  Raises:
175
193
  ValueError: If `project_name` does not exist or skill doesn't exist
176
- '''
194
+ """
177
195
 
178
196
  skill = self.get_skill(skill_name, project_name)
179
197
  if skill is None:
180
198
  raise ValueError(f"Skill with name doesn't exist: {skill_name}")
181
- if isinstance(skill.params, dict) and skill.params.get('is_demo') is True:
199
+ if isinstance(skill.params, dict) and skill.params.get("is_demo") is True:
182
200
  raise ValueError("Unable to delete demo object")
183
201
  skill.deleted_at = datetime.datetime.now()
184
202
  db.session.commit()