MindsDB 25.6.3.0__py3-none-any.whl → 25.6.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (28) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +71 -43
  3. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +16 -1
  4. mindsdb/api/executor/datahub/datanodes/project_datanode.py +1 -1
  5. mindsdb/api/executor/datahub/datanodes/system_tables.py +314 -1
  6. mindsdb/api/executor/planner/plan_join.py +1 -1
  7. mindsdb/api/executor/planner/query_planner.py +7 -1
  8. mindsdb/api/executor/utilities/sql.py +18 -19
  9. mindsdb/integrations/handlers/lindorm_handler/requirements.txt +1 -1
  10. mindsdb/integrations/handlers/ludwig_handler/requirements.txt +1 -1
  11. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +2 -0
  12. mindsdb/integrations/libs/api_handler.py +6 -7
  13. mindsdb/interfaces/agents/constants.py +44 -0
  14. mindsdb/interfaces/agents/langchain_agent.py +8 -1
  15. mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -17
  16. mindsdb/interfaces/data_catalog/data_catalog_reader.py +19 -2
  17. mindsdb/interfaces/knowledge_base/controller.py +23 -13
  18. mindsdb/interfaces/knowledge_base/evaluate.py +3 -3
  19. mindsdb/interfaces/knowledge_base/preprocessing/document_loader.py +17 -86
  20. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +30 -3
  21. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +40 -28
  22. mindsdb/interfaces/skills/skill_tool.py +91 -88
  23. mindsdb/interfaces/skills/sql_agent.py +1 -1
  24. {mindsdb-25.6.3.0.dist-info → mindsdb-25.6.4.0.dist-info}/METADATA +255 -253
  25. {mindsdb-25.6.3.0.dist-info → mindsdb-25.6.4.0.dist-info}/RECORD +28 -28
  26. {mindsdb-25.6.3.0.dist-info → mindsdb-25.6.4.0.dist-info}/WHEEL +0 -0
  27. {mindsdb-25.6.3.0.dist-info → mindsdb-25.6.4.0.dist-info}/licenses/LICENSE +0 -0
  28. {mindsdb-25.6.3.0.dist-info → mindsdb-25.6.4.0.dist-info}/top_level.txt +0 -0
@@ -24,10 +24,10 @@ logger = log.getLogger(__name__)
24
24
 
25
25
 
26
26
  class SkillType(enum.Enum):
27
- TEXT2SQL_LEGACY = 'text2sql'
28
- TEXT2SQL = 'sql'
29
- KNOWLEDGE_BASE = 'knowledge_base'
30
- RETRIEVAL = 'retrieval'
27
+ TEXT2SQL_LEGACY = "text2sql"
28
+ TEXT2SQL = "sql"
29
+ KNOWLEDGE_BASE = "knowledge_base"
30
+ RETRIEVAL = "retrieval"
31
31
 
32
32
 
33
33
  @dataclass
@@ -41,6 +41,7 @@ class SkillData:
41
41
  project_id (int): id of the project
42
42
  agent_tables_list (Optional[List[str]]): the restriction on available tables for an agent using the skill
43
43
  """
44
+
44
45
  name: str
45
46
  type: str
46
47
  params: dict
@@ -59,6 +60,7 @@ class SkillData:
59
60
  ValueError: if there is no intersection between skill's and agent's list.
60
61
  This means that all tables restricted for use.
61
62
  """
63
+
62
64
  def list_to_map(input: List) -> Dict:
63
65
  agent_tables_map = defaultdict(set)
64
66
  for x in input:
@@ -66,15 +68,15 @@ class SkillData:
66
68
  table_name = x
67
69
  schema_name = None
68
70
  elif isinstance(x, dict):
69
- table_name = x['table']
70
- schema_name = x.get('schema')
71
+ table_name = x["table"]
72
+ schema_name = x.get("schema")
71
73
  else:
72
- raise ValueError(f'Unexpected value in tables list: {x}')
74
+ raise ValueError(f"Unexpected value in tables list: {x}")
73
75
  agent_tables_map[schema_name].add(table_name)
74
76
  return agent_tables_map
75
77
 
76
78
  agent_tables_map = list_to_map(self.agent_tables_list or [])
77
- skill_tables_map = list_to_map(self.params.get('tables', []))
79
+ skill_tables_map = list_to_map(self.params.get("tables", []))
78
80
 
79
81
  if len(agent_tables_map) > 0 and len(skill_tables_map) > 0:
80
82
  if len(set(agent_tables_map) & set(skill_tables_map)) == 0:
@@ -105,31 +107,34 @@ class SkillToolController:
105
107
  def get_command_executor(self):
106
108
  if self.command_executor is None:
107
109
  from mindsdb.api.executor.command_executor import ExecuteCommands
108
- from mindsdb.api.executor.controllers import SessionController # Top-level import produces circular import in some cases TODO: figure out a fix without losing runtime improvements (context: see #9304) # noqa
110
+ from mindsdb.api.executor.controllers import (
111
+ SessionController,
112
+ ) # Top-level import produces circular import in some cases TODO: figure out a fix without losing runtime improvements (context: see #9304) # noqa
109
113
 
110
114
  sql_session = SessionController()
111
- sql_session.database = config.get('default_project')
115
+ sql_session.database = config.get("default_project")
112
116
 
113
117
  self.command_executor = ExecuteCommands(sql_session)
114
118
  return self.command_executor
115
119
 
116
120
  def _make_text_to_sql_tools(self, skills: List[db.Skills], llm) -> List:
117
- '''
118
- Uses SQLAgent to execute tool
119
- '''
121
+ """
122
+ Uses SQLAgent to execute tool
123
+ """
120
124
  # To prevent dependency on Langchain unless an actual tool uses it.
121
125
  try:
122
126
  from mindsdb.interfaces.agents.mindsdb_database_agent import MindsDBSQL
123
127
  from mindsdb.interfaces.skills.custom.text2sql.mindsdb_sql_toolkit import MindsDBSQLToolkit
124
128
  except ImportError:
125
129
  raise ImportError(
126
- 'To use the text-to-SQL skill, please install langchain with `pip install mindsdb[langchain]`')
130
+ "To use the text-to-SQL skill, please install langchain with `pip install mindsdb[langchain]`"
131
+ )
127
132
 
128
133
  command_executor = self.get_command_executor()
129
134
 
130
135
  def escape_table_name(name: str) -> str:
131
- name = name.strip(' `')
132
- return f'`{name}`'
136
+ name = name.strip(" `")
137
+ return f"`{name}`"
133
138
 
134
139
  tables_list = []
135
140
  knowledge_bases_list = []
@@ -144,31 +149,31 @@ class SkillToolController:
144
149
  # First pass: collect all database and knowledge base parameters
145
150
  for skill in skills:
146
151
  # Update knowledge_base_database if specified in any skill
147
- if skill.params.get('knowledge_base_database'):
148
- knowledge_base_database = skill.params.get('knowledge_base_database')
152
+ if skill.params.get("knowledge_base_database"):
153
+ knowledge_base_database = skill.params.get("knowledge_base_database")
149
154
 
150
155
  # Extract databases from include_tables with dot notation
151
- if skill.params.get('include_tables'):
152
- include_tables = skill.params.get('include_tables')
156
+ if skill.params.get("include_tables"):
157
+ include_tables = skill.params.get("include_tables")
153
158
  if isinstance(include_tables, str):
154
- include_tables = [t.strip() for t in include_tables.split(',')]
159
+ include_tables = [t.strip() for t in include_tables.split(",")]
155
160
 
156
161
  # Extract database names from dot notation
157
162
  for table in include_tables:
158
- if '.' in table:
159
- db_name = table.split('.')[0]
163
+ if "." in table:
164
+ db_name = table.split(".")[0]
160
165
  extracted_databases.add(db_name)
161
166
 
162
167
  # Extract databases from include_knowledge_bases with dot notation
163
- if skill.params.get('include_knowledge_bases'):
164
- include_kbs = skill.params.get('include_knowledge_bases')
168
+ if skill.params.get("include_knowledge_bases"):
169
+ include_kbs = skill.params.get("include_knowledge_bases")
165
170
  if isinstance(include_kbs, str):
166
- include_kbs = [kb.strip() for kb in include_kbs.split(',')]
171
+ include_kbs = [kb.strip() for kb in include_kbs.split(",")]
167
172
 
168
173
  # Extract database names from dot notation
169
174
  for kb in include_kbs:
170
- if '.' in kb:
171
- db_name = kb.split('.')[0]
175
+ if "." in kb:
176
+ db_name = kb.split(".")[0]
172
177
  if db_name != knowledge_base_database:
173
178
  # Only update if it's different from the default
174
179
  knowledge_base_database = db_name
@@ -176,41 +181,41 @@ class SkillToolController:
176
181
  # Second pass: collect all tables and knowledge base restrictions
177
182
  for skill in skills:
178
183
  # Get database for tables (this is an actual database connection)
179
- database = skill.params.get('database', DEFAULT_TEXT2SQL_DATABASE)
184
+ database = skill.params.get("database", DEFAULT_TEXT2SQL_DATABASE)
180
185
 
181
186
  # Add databases extracted from dot notation if no explicit database is provided
182
187
  if not database and extracted_databases:
183
188
  # Use the first extracted database if no explicit database is provided
184
189
  database = next(iter(extracted_databases))
185
190
  # Update the skill params with the extracted database
186
- skill.params['database'] = database
191
+ skill.params["database"] = database
187
192
 
188
193
  # Extract knowledge base restrictions if they exist in the skill params
189
- if skill.params.get('include_knowledge_bases'):
194
+ if skill.params.get("include_knowledge_bases"):
190
195
  # Convert to list if it's a string
191
- include_kbs = skill.params.get('include_knowledge_bases')
196
+ include_kbs = skill.params.get("include_knowledge_bases")
192
197
  if isinstance(include_kbs, str):
193
- include_kbs = [kb.strip() for kb in include_kbs.split(',')]
198
+ include_kbs = [kb.strip() for kb in include_kbs.split(",")]
194
199
 
195
200
  # Process each knowledge base name
196
201
  for kb in include_kbs:
197
202
  # If it doesn't have a dot, prefix it with the knowledge_base_database
198
- if '.' not in kb:
203
+ if "." not in kb:
199
204
  knowledge_bases_list.append(f"{knowledge_base_database}.{kb}")
200
205
  else:
201
206
  knowledge_bases_list.append(kb)
202
207
 
203
208
  # Collect ignore_knowledge_bases
204
- if skill.params.get('ignore_knowledge_bases'):
209
+ if skill.params.get("ignore_knowledge_bases"):
205
210
  # Convert to list if it's a string
206
- ignore_kbs = skill.params.get('ignore_knowledge_bases')
211
+ ignore_kbs = skill.params.get("ignore_knowledge_bases")
207
212
  if isinstance(ignore_kbs, str):
208
- ignore_kbs = [kb.strip() for kb in ignore_kbs.split(',')]
213
+ ignore_kbs = [kb.strip() for kb in ignore_kbs.split(",")]
209
214
 
210
215
  # Process each knowledge base name to ignore
211
216
  for kb in ignore_kbs:
212
217
  # If it doesn't have a dot, prefix it with the knowledge_base_database
213
- if '.' not in kb:
218
+ if "." not in kb:
214
219
  ignore_knowledge_bases_list.append(f"{knowledge_base_database}.{kb}")
215
220
  else:
216
221
  ignore_knowledge_bases_list.append(kb)
@@ -220,20 +225,20 @@ class SkillToolController:
220
225
  continue
221
226
 
222
227
  # Process include_tables with dot notation
223
- if skill.params.get('include_tables'):
224
- include_tables = skill.params.get('include_tables')
228
+ if skill.params.get("include_tables"):
229
+ include_tables = skill.params.get("include_tables")
225
230
  if isinstance(include_tables, str):
226
- include_tables = [t.strip() for t in include_tables.split(',')]
231
+ include_tables = [t.strip() for t in include_tables.split(",")]
227
232
 
228
233
  for table in include_tables:
229
234
  # If table already has a database prefix, use it as is
230
- if '.' in table:
235
+ if "." in table:
231
236
  # Check if the table already has backticks
232
- if '`' in table:
237
+ if "`" in table:
233
238
  tables_list.append(table)
234
239
  else:
235
240
  # Apply escape_table_name only to the table part
236
- parts = table.split('.')
241
+ parts = table.split(".")
237
242
  if len(parts) == 2:
238
243
  # Format: database.table
239
244
  tables_list.append(f"{parts[0]}.{escape_table_name(parts[1])}")
@@ -255,15 +260,15 @@ class SkillToolController:
255
260
  if restriction_on_tables is None and database:
256
261
  try:
257
262
  handler = command_executor.session.integration_controller.get_data_handler(database)
258
- if 'all' in inspect.signature(handler.get_tables).parameters:
263
+ if "all" in inspect.signature(handler.get_tables).parameters:
259
264
  response = handler.get_tables(all=True)
260
265
  else:
261
266
  response = handler.get_tables()
262
267
  # no restrictions
263
268
  columns = [c.lower() for c in response.data_frame.columns]
264
- name_idx = columns.index('table_name') if 'table_name' in columns else 0
269
+ name_idx = columns.index("table_name") if "table_name" in columns else 0
265
270
 
266
- if 'table_schema' in response.data_frame.columns:
271
+ if "table_schema" in response.data_frame.columns:
267
272
  for _, row in response.data_frame.iterrows():
268
273
  tables_list.append(f"{database}.{row['table_schema']}.{escape_table_name(row[name_idx])}")
269
274
  else:
@@ -278,15 +283,15 @@ class SkillToolController:
278
283
  for schema_name, tables in restriction_on_tables.items():
279
284
  for table in tables:
280
285
  # Check if the table already has dot notation (e.g., 'postgresql_conn.home_rentals')
281
- if '.' in table:
286
+ if "." in table:
282
287
  # Table already has database prefix, add it directly
283
288
  tables_list.append(escape_table_name(table))
284
289
  else:
285
290
  # No dot notation, apply schema and database as needed
286
291
  if schema_name is None:
287
- tables_list.append(f'{database}.{escape_table_name(table)}')
292
+ tables_list.append(f"{database}.{escape_table_name(table)}")
288
293
  else:
289
- tables_list.append(f'{database}.{schema_name}.{escape_table_name(table)}')
294
+ tables_list.append(f"{database}.{schema_name}.{escape_table_name(table)}")
290
295
  continue
291
296
 
292
297
  # Remove duplicates from lists
@@ -320,8 +325,8 @@ class SkillToolController:
320
325
 
321
326
  # First, add databases from skills with explicit database parameters
322
327
  for skill in skills:
323
- if skill.params.get('database'):
324
- databases_struct[skill.params['database']] = skill.restriction_on_tables
328
+ if skill.params.get("database"):
329
+ databases_struct[skill.params["database"]] = skill.restriction_on_tables
325
330
 
326
331
  # Then, add all extracted databases with no restrictions
327
332
  for db_name in extracted_databases:
@@ -338,13 +343,11 @@ class SkillToolController:
338
343
  ignore_knowledge_bases=ignore_knowledge_bases,
339
344
  knowledge_base_database=knowledge_base_database,
340
345
  sample_rows_in_table_info=3,
341
-
342
- cache=get_cache('agent', max_size=_MAX_CACHE_SIZE)
343
- )
344
- db = MindsDBSQL.custom_init(
345
- sql_agent=sql_agent
346
+ cache=get_cache("agent", max_size=_MAX_CACHE_SIZE),
346
347
  )
347
- toolkit = MindsDBSQLToolkit(db=db, llm=llm)
348
+ db = MindsDBSQL.custom_init(sql_agent=sql_agent)
349
+ should_include_kb_tools = include_knowledge_bases is not None and len(include_knowledge_bases) > 0
350
+ toolkit = MindsDBSQLToolkit(db=db, llm=llm, include_knowledge_base_tools=should_include_kb_tools)
348
351
  return toolkit.get_tools()
349
352
 
350
353
  def _make_retrieval_tools(self, skill: db.Skills, llm, embedding_model):
@@ -352,47 +355,46 @@ class SkillToolController:
352
355
  creates advanced retrieval tool i.e. RAG
353
356
  """
354
357
  params = skill.params
355
- config = params.get('config', {})
356
- if 'llm' not in config:
358
+ config = params.get("config", {})
359
+ if "llm" not in config:
357
360
  # Set LLM if not explicitly provided in configs.
358
- config['llm'] = llm
361
+ config["llm"] = llm
359
362
  tool = dict(
360
- name=params.get('name', skill.name),
361
- source=params.get('source', None),
363
+ name=params.get("name", skill.name),
364
+ source=params.get("source", None),
362
365
  config=config,
363
- description=f'You must use this tool to get more context or information '
364
- f'to answer a question about {params["description"]}. '
365
- f'The input should be the exact question the user is asking.',
366
- type=skill.type
366
+ description=f"You must use this tool to get more context or information "
367
+ f"to answer a question about {params['description']}. "
368
+ f"The input should be the exact question the user is asking.",
369
+ type=skill.type,
367
370
  )
368
371
  pred_args = {}
369
- pred_args['llm'] = llm
372
+ pred_args["llm"] = llm
370
373
 
371
374
  from .retrieval_tool import build_retrieval_tools
375
+
372
376
  return build_retrieval_tools(tool, pred_args, skill)
373
377
 
374
378
  def _get_rag_query_function(self, skill: db.Skills):
375
379
  session_controller = self.get_command_executor().session
376
380
 
377
381
  def _answer_question(question: str) -> str:
378
- knowledge_base_name = skill.params['source']
382
+ knowledge_base_name = skill.params["source"]
379
383
 
380
384
  # make select in KB table
381
385
  query = Select(
382
386
  targets=[Star()],
383
- where=BinaryOperation(op='=', args=[
384
- Identifier(TableField.CONTENT.value), Constant(question)
385
- ]),
387
+ where=BinaryOperation(op="=", args=[Identifier(TableField.CONTENT.value), Constant(question)]),
386
388
  limit=Constant(_DEFAULT_TOP_K_SIMILARITY_SEARCH),
387
389
  )
388
390
  kb_table = session_controller.kb_controller.get_table(knowledge_base_name, skill.project_id)
389
391
 
390
392
  res = kb_table.select_query(query)
391
393
  # Handle both chunk_content and content column names
392
- if hasattr(res, 'chunk_content'):
393
- return '\n'.join(res.chunk_content)
394
- elif hasattr(res, 'content'):
395
- return '\n'.join(res.content)
394
+ if hasattr(res, "chunk_content"):
395
+ return "\n".join(res.chunk_content)
396
+ elif hasattr(res, "content"):
397
+ return "\n".join(res.content)
396
398
  else:
397
399
  return "No content or chunk_content found in knowledge base response"
398
400
 
@@ -400,19 +402,22 @@ class SkillToolController:
400
402
 
401
403
  def _make_knowledge_base_tools(self, skill: db.Skills) -> dict:
402
404
  # To prevent dependency on Langchain unless an actual tool uses it.
403
- description = skill.params.get('description', '')
405
+ description = skill.params.get("description", "")
404
406
 
405
- logger.warning("This skill is deprecated and will be removed in the future. "
406
- "Please use `retrieval` skill instead ")
407
+ logger.warning(
408
+ "This skill is deprecated and will be removed in the future. Please use `retrieval` skill instead "
409
+ )
407
410
 
408
411
  return dict(
409
- name='Knowledge Base Retrieval',
412
+ name="Knowledge Base Retrieval",
410
413
  func=self._get_rag_query_function(skill),
411
- description=f'Use this tool to get more context or information to answer a question about {description}. The input should be the exact question the user is asking.',
412
- type=skill.type
414
+ description=f"Use this tool to get more context or information to answer a question about {description}. The input should be the exact question the user is asking.",
415
+ type=skill.type,
413
416
  )
414
417
 
415
- def get_tools_from_skills(self, skills_data: List[SkillData], llm: BaseChatModel, embedding_model: Embeddings) -> dict:
418
+ def get_tools_from_skills(
419
+ self, skills_data: List[SkillData], llm: BaseChatModel, embedding_model: Embeddings
420
+ ) -> dict:
416
421
  """Creates function for skill and metadata (name, description)
417
422
 
418
423
  Args:
@@ -431,7 +436,8 @@ class SkillToolController:
431
436
  skill_type = SkillType(skill.type)
432
437
  except ValueError:
433
438
  raise NotImplementedError(
434
- f'skill of type {skill.type} is not supported as a tool, supported types are: {list(SkillType._member_names_)}')
439
+ f"skill of type {skill.type} is not supported as a tool, supported types are: {list(SkillType._member_names_)}"
440
+ )
435
441
 
436
442
  if skill_type == SkillType.TEXT2SQL_LEGACY:
437
443
  skill_type = SkillType.TEXT2SQL
@@ -442,10 +448,7 @@ class SkillToolController:
442
448
  if skill_type == SkillType.TEXT2SQL:
443
449
  tools[skill_type] = self._make_text_to_sql_tools(skills, llm)
444
450
  elif skill_type == SkillType.KNOWLEDGE_BASE:
445
- tools[skill_type] = [
446
- self._make_knowledge_base_tools(skill)
447
- for skill in skills
448
- ]
451
+ tools[skill_type] = [self._make_knowledge_base_tools(skill) for skill in skills]
449
452
  elif skill_type == SkillType.RETRIEVAL:
450
453
  tools[skill_type] = []
451
454
  for skill in skills:
@@ -76,7 +76,7 @@ def split_table_name(table_name: str) -> List[str]:
76
76
  result.append(current.strip("`"))
77
77
 
78
78
  # ensure we split the table name
79
- result = [r.split(".") for r in result][0]
79
+ # result = [r.split(".") for r in result][0]
80
80
 
81
81
  return result
82
82