MindsDB 25.6.3.1__py3-none-any.whl → 25.7.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (55) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/executor/command_executor.py +8 -6
  3. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +72 -44
  4. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +14 -1
  5. mindsdb/api/executor/datahub/datanodes/project_datanode.py +1 -1
  6. mindsdb/api/executor/datahub/datanodes/system_tables.py +314 -1
  7. mindsdb/api/executor/planner/plan_join.py +1 -1
  8. mindsdb/api/executor/planner/query_planner.py +7 -1
  9. mindsdb/api/executor/planner/query_prepare.py +68 -87
  10. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +6 -1
  11. mindsdb/api/executor/sql_query/steps/union_step.py +11 -9
  12. mindsdb/api/http/namespaces/file.py +49 -24
  13. mindsdb/api/mcp/start.py +45 -31
  14. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +45 -52
  15. mindsdb/integrations/handlers/huggingface_handler/__init__.py +17 -12
  16. mindsdb/integrations/handlers/huggingface_handler/finetune.py +223 -223
  17. mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +383 -383
  18. mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -6
  19. mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -6
  20. mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
  21. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +22 -15
  22. mindsdb/integrations/handlers/ludwig_handler/requirements.txt +1 -1
  23. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +150 -140
  24. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +1 -1
  25. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +2 -0
  26. mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
  27. mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
  28. mindsdb/integrations/libs/api_handler.py +6 -7
  29. mindsdb/integrations/libs/vectordatabase_handler.py +86 -77
  30. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +36 -42
  31. mindsdb/interfaces/agents/agents_controller.py +29 -9
  32. mindsdb/interfaces/agents/constants.py +44 -0
  33. mindsdb/interfaces/agents/langchain_agent.py +15 -6
  34. mindsdb/interfaces/agents/mcp_client_agent.py +4 -4
  35. mindsdb/interfaces/agents/mindsdb_database_agent.py +10 -43
  36. mindsdb/interfaces/data_catalog/data_catalog_reader.py +22 -3
  37. mindsdb/interfaces/knowledge_base/controller.py +121 -102
  38. mindsdb/interfaces/knowledge_base/evaluate.py +19 -7
  39. mindsdb/interfaces/knowledge_base/executor.py +346 -0
  40. mindsdb/interfaces/knowledge_base/llm_client.py +5 -6
  41. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +20 -45
  42. mindsdb/interfaces/knowledge_base/preprocessing/models.py +36 -69
  43. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +26 -22
  44. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +40 -28
  45. mindsdb/interfaces/skills/skill_tool.py +91 -88
  46. mindsdb/interfaces/skills/sql_agent.py +181 -130
  47. mindsdb/interfaces/storage/db.py +9 -7
  48. mindsdb/utilities/config.py +12 -1
  49. mindsdb/utilities/exception.py +47 -7
  50. mindsdb/utilities/security.py +54 -11
  51. {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/METADATA +239 -251
  52. {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/RECORD +55 -54
  53. {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/WHEEL +0 -0
  54. {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/licenses/LICENSE +0 -0
  55. {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/top_level.txt +0 -0
@@ -18,45 +18,31 @@ class PreprocessorType(Enum):
18
18
 
19
19
  class BasePreprocessingConfig(BaseModel):
20
20
  """Base configuration for preprocessing"""
21
+
21
22
  chunk_size: int = Field(default=DEFAULT_CHUNK_SIZE, description="Size of document chunks")
22
23
  chunk_overlap: int = Field(default=DEFAULT_CHUNK_OVERLAP, description="Overlap between chunks")
24
+ doc_id_column_name: str = Field(default="_original_doc_id", description="Name of doc_id columns in metadata")
23
25
 
24
26
 
25
27
  class ContextualConfig(BasePreprocessingConfig):
26
28
  """Configuration specific to contextual preprocessing"""
29
+
27
30
  llm_config: LLMConfig = Field(
28
- default_factory=LLMConfig,
29
- description="LLM configuration to use for context generation"
30
- )
31
- context_template: Optional[str] = Field(
32
- default=None,
33
- description="Custom template for context generation"
34
- )
35
- summarize: Optional[bool] = Field(
36
- default=False,
37
- description="Whether to return chunks as summarizations"
31
+ default_factory=LLMConfig, description="LLM configuration to use for context generation"
38
32
  )
33
+ context_template: Optional[str] = Field(default=None, description="Custom template for context generation")
34
+ summarize: Optional[bool] = Field(default=False, description="Whether to return chunks as summarizations")
39
35
 
40
36
 
41
- class TextChunkingConfig(BaseModel):
37
+ class TextChunkingConfig(BasePreprocessingConfig):
42
38
  """Configuration for text chunking preprocessor using Pydantic"""
43
- chunk_size: int = Field(
44
- default=1000,
45
- description="The target size of each text chunk",
46
- gt=0
47
- )
48
- chunk_overlap: int = Field(
49
- default=200,
50
- description="The number of characters to overlap between chunks",
51
- ge=0
52
- )
53
- length_function: Callable = Field(
54
- default=len,
55
- description="Function to measure text length"
56
- )
39
+
40
+ chunk_size: int = Field(default=1000, description="The target size of each text chunk", gt=0)
41
+ chunk_overlap: int = Field(default=200, description="The number of characters to overlap between chunks", ge=0)
42
+ length_function: Callable = Field(default=len, description="Function to measure text length")
57
43
  separators: List[str] = Field(
58
44
  default=["\n\n", "\n", " ", ""],
59
- description="List of separators to use for splitting text, in order of priority"
45
+ description="List of separators to use for splitting text, in order of priority",
60
46
  )
61
47
 
62
48
  class Config:
@@ -65,44 +51,28 @@ class TextChunkingConfig(BaseModel):
65
51
 
66
52
  class JSONChunkingConfig(BasePreprocessingConfig):
67
53
  """Configuration for JSON chunking preprocessor"""
68
- flatten_nested: bool = Field(
69
- default=True,
70
- description="Whether to flatten nested JSON structures"
71
- )
72
- include_metadata: bool = Field(
73
- default=True,
74
- description="Whether to include original metadata in chunks"
75
- )
54
+
55
+ flatten_nested: bool = Field(default=True, description="Whether to flatten nested JSON structures")
56
+ include_metadata: bool = Field(default=True, description="Whether to include original metadata in chunks")
76
57
  chunk_by_object: bool = Field(
77
- default=True,
78
- description="Whether to chunk by top-level objects (True) or create a single document (False)"
79
- )
80
- exclude_fields: List[str] = Field(
81
- default_factory=list,
82
- description="List of fields to exclude from chunking"
58
+ default=True, description="Whether to chunk by top-level objects (True) or create a single document (False)"
83
59
  )
60
+ exclude_fields: List[str] = Field(default_factory=list, description="List of fields to exclude from chunking")
84
61
  include_fields: List[str] = Field(
85
62
  default_factory=list,
86
- description="List of fields to include in chunking (if empty, all fields except excluded ones are included)"
63
+ description="List of fields to include in chunking (if empty, all fields except excluded ones are included)",
87
64
  )
88
65
  metadata_fields: List[str] = Field(
89
66
  default_factory=list,
90
67
  description="List of fields to extract into metadata for filtering "
91
- "(can include nested fields using dot notation). "
92
- "If empty, all primitive fields will be extracted (top-level fields if available, otherwise all primitive fields in the flattened structure)."
68
+ "(can include nested fields using dot notation). "
69
+ "If empty, all primitive fields will be extracted (top-level fields if available, otherwise all primitive fields in the flattened structure).",
93
70
  )
94
71
  extract_all_primitives: bool = Field(
95
- default=False,
96
- description="Whether to extract all primitive values (strings, numbers, booleans) into metadata"
97
- )
98
- nested_delimiter: str = Field(
99
- default=".",
100
- description="Delimiter for flattened nested field names"
101
- )
102
- content_column: str = Field(
103
- default="content",
104
- description="Name of the content column for chunk ID generation"
72
+ default=False, description="Whether to extract all primitive values (strings, numbers, booleans) into metadata"
105
73
  )
74
+ nested_delimiter: str = Field(default=".", description="Delimiter for flattened nested field names")
75
+ content_column: str = Field(default="content", description="Name of the content column for chunk ID generation")
106
76
 
107
77
  class Config:
108
78
  arbitrary_types_allowed = True
@@ -110,25 +80,20 @@ class JSONChunkingConfig(BasePreprocessingConfig):
110
80
 
111
81
  class PreprocessingConfig(BaseModel):
112
82
  """Complete preprocessing configuration"""
113
- type: PreprocessorType = Field(
114
- default=PreprocessorType.TEXT_CHUNKING,
115
- description="Type of preprocessing to apply"
116
- )
83
+
84
+ type: PreprocessorType = Field(default=PreprocessorType.TEXT_CHUNKING, description="Type of preprocessing to apply")
117
85
  contextual_config: Optional[ContextualConfig] = Field(
118
- default=None,
119
- description="Configuration for contextual preprocessing"
86
+ default=None, description="Configuration for contextual preprocessing"
120
87
  )
121
88
  text_chunking_config: Optional[TextChunkingConfig] = Field(
122
- default=None,
123
- description="Configuration for text chunking preprocessing"
89
+ default=None, description="Configuration for text chunking preprocessing"
124
90
  )
125
91
  json_chunking_config: Optional[JSONChunkingConfig] = Field(
126
- default=None,
127
- description="Configuration for JSON chunking preprocessing"
92
+ default=None, description="Configuration for JSON chunking preprocessing"
128
93
  )
129
94
 
130
- @model_validator(mode='after')
131
- def validate_config_presence(self) -> 'PreprocessingConfig':
95
+ @model_validator(mode="after")
96
+ def validate_config_presence(self) -> "PreprocessingConfig":
132
97
  """Ensure the appropriate config is present for the chosen type"""
133
98
  if self.type == PreprocessorType.CONTEXTUAL and not self.contextual_config:
134
99
  self.contextual_config = ContextualConfig()
@@ -137,26 +102,28 @@ class PreprocessingConfig(BaseModel):
137
102
  if self.type == PreprocessorType.JSON_CHUNKING and not self.json_chunking_config:
138
103
  # Import here to avoid circular imports
139
104
  from mindsdb.interfaces.knowledge_base.preprocessing.json_chunker import JSONChunkingConfig
105
+
140
106
  self.json_chunking_config = JSONChunkingConfig()
141
107
  return self
142
108
 
143
109
 
144
110
  class Document(BaseModel):
145
-
146
111
  """Document model with default metadata handling"""
112
+
147
113
  id: Optional[Union[int, str]] = Field(default=None, description="Unique identifier for the document")
148
114
  content: str = Field(description="The document content")
149
115
  embeddings: Optional[List[float]] = Field(default=None, description="Vector embeddings of the content")
150
116
  metadata: Optional[Dict[str, Any]] = Field(default=None, description="Additional document metadata")
151
117
 
152
- @model_validator(mode='after')
153
- def validate_metadata(self) -> 'Document':
118
+ @model_validator(mode="after")
119
+ def validate_metadata(self) -> "Document":
154
120
  """Ensure metadata is present and valid"""
155
121
  if not self.metadata:
156
- self.metadata = {'source': 'default'}
122
+ self.metadata = {"source": "default"}
157
123
  return self
158
124
 
159
125
 
160
126
  class ProcessedChunk(Document):
161
127
  """Processed chunk that aligns with VectorStoreHandler schema"""
128
+
162
129
  pass
@@ -6,6 +6,27 @@ from langchain_core.tools import BaseTool
6
6
  from mindsdb_sql_parser.ast import Describe, Select, Identifier, Constant, Star
7
7
 
8
8
 
9
+ def llm_str_strip(s):
10
+ length = -1
11
+ while length != len(s):
12
+ length = len(s)
13
+
14
+ # remove ```
15
+ if s.startswith("```"):
16
+ s = s[3:]
17
+ if s.endswith("```"):
18
+ s = s[:-3]
19
+
20
+ # remove trailing new lines
21
+ s = s.strip("\n")
22
+
23
+ # remove extra quotes
24
+ for q in ('"', "'", "`"):
25
+ if s.count(q) == 1:
26
+ s = s.strip(q)
27
+ return s
28
+
29
+
9
30
  class KnowledgeBaseListToolInput(BaseModel):
10
31
  tool_input: str = Field("", description="An empty string to list all knowledge bases.")
11
32
 
@@ -56,26 +77,6 @@ class KnowledgeBaseInfoTool(BaseTool):
56
77
  except (json.JSONDecodeError, TypeError):
57
78
  pass
58
79
 
59
- def strip(s):
60
- length = -1
61
- while length != len(s):
62
- length = len(s)
63
-
64
- # remove ```
65
- if s.startswith("```"):
66
- s = s[3:]
67
- if s.endswith("```"):
68
- s = s[:-3]
69
-
70
- # remove trailing new lines
71
- s = s.strip("\n")
72
-
73
- # remove extra quotes
74
- for q in ('"', "'", "`"):
75
- if s.count(q) == 1:
76
- s = s.strip(q)
77
- return s
78
-
79
80
  # Finally, try the original regex pattern for $START$ and $STOP$ markers
80
81
  match = re.search(r"\$START\$(.*?)\$STOP\$", tool_input, re.DOTALL)
81
82
  if not match:
@@ -84,14 +85,14 @@ class KnowledgeBaseInfoTool(BaseTool):
84
85
  return [kb.strip() for kb in tool_input.split(",")]
85
86
  # If it's just a single string without formatting, return it as a single item
86
87
  if tool_input.strip():
87
- return [strip(tool_input)]
88
+ return [llm_str_strip(tool_input)]
88
89
  return []
89
90
 
90
91
  # Extract and clean the knowledge base names
91
92
  kb_names_str = match.group(1).strip()
92
93
  kb_names = re.findall(r"`([^`]+)`", kb_names_str)
93
94
 
94
- kb_names = [strip(n) for n in kb_names]
95
+ kb_names = [llm_str_strip(n) for n in kb_names]
95
96
  return kb_names
96
97
 
97
98
  def _run(self, tool_input: str) -> str:
@@ -105,6 +106,8 @@ class KnowledgeBaseInfoTool(BaseTool):
105
106
 
106
107
  for kb_name in kb_names:
107
108
  try:
109
+ self.db.check_knowledge_base_permission(Identifier(kb_name))
110
+
108
111
  # Get knowledge base schema
109
112
  schema_result = self.db.run_no_throw(str(Describe(kb_name, type="knowledge_base")))
110
113
 
@@ -221,6 +224,7 @@ class KnowledgeBaseQueryTool(BaseTool):
221
224
 
222
225
  try:
223
226
  # Execute the query
227
+ query = llm_str_strip(query)
224
228
  result = self.db.run_no_throw(query)
225
229
 
226
230
  if not result:
@@ -10,25 +10,27 @@ from mindsdb.interfaces.skills.custom.text2sql.mindsdb_sql_tool import MindsDBSQ
10
10
  from mindsdb.interfaces.skills.custom.text2sql.mindsdb_kb_tools import (
11
11
  KnowledgeBaseListTool,
12
12
  KnowledgeBaseInfoTool,
13
- KnowledgeBaseQueryTool
13
+ KnowledgeBaseQueryTool,
14
14
  )
15
15
 
16
16
 
17
17
  class MindsDBSQLToolkit(SQLDatabaseToolkit):
18
+ include_knowledge_base_tools: bool = True
18
19
 
19
- def get_tools(self, prefix='') -> List[BaseTool]:
20
-
20
+ def get_tools(self, prefix="") -> List[BaseTool]:
21
21
  current_date_time = datetime.now().strftime("%Y-%m-%d %H:%M")
22
22
 
23
23
  """Get the tools in the toolkit."""
24
24
  list_sql_database_tool = ListSQLDatabaseTool(
25
- name=f'sql_db_list_tables{prefix}',
25
+ name=f"sql_db_list_tables{prefix}",
26
26
  db=self.db,
27
- description=dedent("""\n
27
+ description=dedent(
28
+ """\n
28
29
  Input is an empty string, output is a comma-separated list of tables in the database. Each table name is escaped using backticks.
29
30
  Each table name in the list may be in one of two formats: database_name.`table_name` or database_name.schema_name.`table_name`.
30
31
  Table names in response to the user must be escaped using backticks.
31
- """)
32
+ """
33
+ ),
32
34
  )
33
35
 
34
36
  info_sql_database_tool_description = (
@@ -45,11 +47,11 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
45
47
  " $START$ table1 table2 table3 $STOP$\n"
46
48
  )
47
49
  info_sql_database_tool = InfoSQLDatabaseTool(
48
- name=f'sql_db_schema{prefix}',
49
- db=self.db, description=info_sql_database_tool_description
50
+ name=f"sql_db_schema{prefix}", db=self.db, description=info_sql_database_tool_description
50
51
  )
51
52
 
52
- query_sql_database_tool_description = dedent(f"""\
53
+ query_sql_database_tool_description = dedent(
54
+ f"""\
53
55
  Input: A detailed and well-structured SQL query. The query must be enclosed between the symbols $START$ and $STOP$.
54
56
  Output: Database result or error message. For errors, rewrite and retry the query. For 'Unknown column' errors, use '{info_sql_database_tool.name}' to check table fields.
55
57
  This system is a highly intelligent and reliable PostgreSQL SQL skill designed to work with databases.
@@ -93,11 +95,11 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
93
95
  - When asked about yourself or your maker, state that you are a Data-Mind, created by MindsDB to help answer data questions.
94
96
  - When asked about your purpose or how you can help, explore the available data sources and then explain that you can answer questions based on the connected data. Provide a few relevant example questions that you could answer for the user about their data.
95
97
  Adhere to these guidelines for all queries and responses. Ask for clarification if needed.
96
- """)
98
+ """
99
+ )
97
100
 
98
101
  query_sql_database_tool = QuerySQLDataBaseTool(
99
- name=f'sql_db_query{prefix}',
100
- db=self.db, description=query_sql_database_tool_description
102
+ name=f"sql_db_query{prefix}", db=self.db, description=query_sql_database_tool_description
101
103
  )
102
104
 
103
105
  mindsdb_sql_parser_tool_description = (
@@ -108,15 +110,24 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
108
110
  f"ALWAYS run this tool before executing a query with {query_sql_database_tool.name}. "
109
111
  )
110
112
  mindsdb_sql_parser_tool = MindsDBSQLParserTool(
111
- name=f'mindsdb_sql_parser_tool{prefix}',
112
- description=mindsdb_sql_parser_tool_description
113
+ name=f"mindsdb_sql_parser_tool{prefix}", description=mindsdb_sql_parser_tool_description
113
114
  )
114
115
 
116
+ sql_tools = [
117
+ query_sql_database_tool,
118
+ info_sql_database_tool,
119
+ list_sql_database_tool,
120
+ mindsdb_sql_parser_tool,
121
+ ]
122
+ if not self.include_knowledge_base_tools:
123
+ return sql_tools
124
+
115
125
  # Knowledge base tools
116
126
  kb_list_tool = KnowledgeBaseListTool(
117
- name=f'kb_list_tool{prefix}',
127
+ name=f"kb_list_tool{prefix}",
118
128
  db=self.db,
119
- description=dedent("""\
129
+ description=dedent(
130
+ """\
120
131
  Lists all available knowledge bases that can be queried.
121
132
  Input: No input required, just call the tool directly.
122
133
  Output: A table of all available knowledge bases with their names and creation dates.
@@ -125,13 +136,15 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
125
136
  Each knowledge base name is escaped using backticks.
126
137
 
127
138
  Example usage: kb_list_tool()
128
- """)
139
+ """
140
+ ),
129
141
  )
130
142
 
131
143
  kb_info_tool = KnowledgeBaseInfoTool(
132
- name=f'kb_info_tool{prefix}',
144
+ name=f"kb_info_tool{prefix}",
133
145
  db=self.db,
134
- description=dedent(f"""\
146
+ description=dedent(
147
+ f"""\
135
148
  Gets detailed information about specific knowledge bases including their structure and metadata fields.
136
149
 
137
150
  Input: A knowledge base name as a simple string.
@@ -143,13 +156,15 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
143
156
  Example usage: kb_info_tool("kb_name")
144
157
 
145
158
  Make sure the knowledge base exists by calling {kb_list_tool.name} first.
146
- """)
159
+ """
160
+ ),
147
161
  )
148
162
 
149
163
  kb_query_tool = KnowledgeBaseQueryTool(
150
- name=f'kb_query_tool{prefix}',
164
+ name=f"kb_query_tool{prefix}",
151
165
  db=self.db,
152
- description=dedent(f"""\
166
+ description=dedent(
167
+ f"""\
153
168
  Queries knowledge bases using SQL syntax to retrieve relevant information.
154
169
 
155
170
  Input: A SQL query string that targets a knowledge base.
@@ -192,15 +207,12 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
192
207
  - Always include a semicolon at the end of your SQL query
193
208
 
194
209
  For factual questions, use this tool to retrieve information rather than relying on the model's knowledge.
195
- """)
210
+ """
211
+ ),
196
212
  )
197
213
 
198
214
  # Return standard SQL tools and knowledge base tools
199
- return [
200
- query_sql_database_tool,
201
- info_sql_database_tool,
202
- list_sql_database_tool,
203
- mindsdb_sql_parser_tool,
215
+ return sql_tools + [
204
216
  kb_list_tool,
205
217
  kb_info_tool,
206
218
  kb_query_tool,