MindsDB 25.6.3.1__py3-none-any.whl → 25.7.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (55) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/executor/command_executor.py +8 -6
  3. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +72 -44
  4. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +14 -1
  5. mindsdb/api/executor/datahub/datanodes/project_datanode.py +1 -1
  6. mindsdb/api/executor/datahub/datanodes/system_tables.py +314 -1
  7. mindsdb/api/executor/planner/plan_join.py +1 -1
  8. mindsdb/api/executor/planner/query_planner.py +7 -1
  9. mindsdb/api/executor/planner/query_prepare.py +68 -87
  10. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +6 -1
  11. mindsdb/api/executor/sql_query/steps/union_step.py +11 -9
  12. mindsdb/api/http/namespaces/file.py +49 -24
  13. mindsdb/api/mcp/start.py +45 -31
  14. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +45 -52
  15. mindsdb/integrations/handlers/huggingface_handler/__init__.py +17 -12
  16. mindsdb/integrations/handlers/huggingface_handler/finetune.py +223 -223
  17. mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +383 -383
  18. mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -6
  19. mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -6
  20. mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
  21. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +22 -15
  22. mindsdb/integrations/handlers/ludwig_handler/requirements.txt +1 -1
  23. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +150 -140
  24. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +1 -1
  25. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +2 -0
  26. mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
  27. mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
  28. mindsdb/integrations/libs/api_handler.py +6 -7
  29. mindsdb/integrations/libs/vectordatabase_handler.py +86 -77
  30. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +36 -42
  31. mindsdb/interfaces/agents/agents_controller.py +29 -9
  32. mindsdb/interfaces/agents/constants.py +44 -0
  33. mindsdb/interfaces/agents/langchain_agent.py +15 -6
  34. mindsdb/interfaces/agents/mcp_client_agent.py +4 -4
  35. mindsdb/interfaces/agents/mindsdb_database_agent.py +10 -43
  36. mindsdb/interfaces/data_catalog/data_catalog_reader.py +22 -3
  37. mindsdb/interfaces/knowledge_base/controller.py +121 -102
  38. mindsdb/interfaces/knowledge_base/evaluate.py +19 -7
  39. mindsdb/interfaces/knowledge_base/executor.py +346 -0
  40. mindsdb/interfaces/knowledge_base/llm_client.py +5 -6
  41. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +20 -45
  42. mindsdb/interfaces/knowledge_base/preprocessing/models.py +36 -69
  43. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +26 -22
  44. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +40 -28
  45. mindsdb/interfaces/skills/skill_tool.py +91 -88
  46. mindsdb/interfaces/skills/sql_agent.py +181 -130
  47. mindsdb/interfaces/storage/db.py +9 -7
  48. mindsdb/utilities/config.py +12 -1
  49. mindsdb/utilities/exception.py +47 -7
  50. mindsdb/utilities/security.py +54 -11
  51. {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/METADATA +239 -251
  52. {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/RECORD +55 -54
  53. {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/WHEEL +0 -0
  54. {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/licenses/LICENSE +0 -0
  55. {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/top_level.txt +0 -0
@@ -91,9 +91,7 @@ class ChromaDBHandler(VectorStoreHandler):
91
91
  self.persist_directory = config.persist_directory
92
92
  elif not self.handler_storage.is_temporal:
93
93
  # get full persistence directory from handler storage
94
- self.persist_directory = self.handler_storage.folder_get(
95
- config.persist_directory
96
- )
94
+ self.persist_directory = self.handler_storage.folder_get(config.persist_directory)
97
95
  self._use_handler_storage = True
98
96
 
99
97
  return config
@@ -141,7 +139,7 @@ class ChromaDBHandler(VectorStoreHandler):
141
139
  def disconnect(self):
142
140
  """Close the database connection."""
143
141
  if self.is_connected:
144
- if hasattr(self._client, 'close'):
142
+ if hasattr(self._client, "close"):
145
143
  self._client.close() # Some ChromaDB clients have a close method
146
144
  self._client = None
147
145
  self.is_connected = False
@@ -182,9 +180,7 @@ class ChromaDBHandler(VectorStoreHandler):
182
180
 
183
181
  return mapping[operator]
184
182
 
185
- def _translate_metadata_condition(
186
- self, conditions: List[FilterCondition]
187
- ) -> Optional[dict]:
183
+ def _translate_metadata_condition(self, conditions: List[FilterCondition]) -> Optional[dict]:
188
184
  """
189
185
  Translate a list of FilterCondition objects a dict that can be used by ChromaDB.
190
186
  E.g.,
@@ -212,9 +208,7 @@ class ChromaDBHandler(VectorStoreHandler):
212
208
  if conditions is None:
213
209
  return None
214
210
  metadata_conditions = [
215
- condition
216
- for condition in conditions
217
- if condition.column.startswith(TableField.METADATA.value)
211
+ condition for condition in conditions if condition.column.startswith(TableField.METADATA.value)
218
212
  ]
219
213
  if len(metadata_conditions) == 0:
220
214
  return None
@@ -224,19 +218,11 @@ class ChromaDBHandler(VectorStoreHandler):
224
218
  for condition in metadata_conditions:
225
219
  metadata_key = condition.column.split(".")[-1]
226
220
 
227
- chroma_db_conditions.append(
228
- {
229
- metadata_key: {
230
- self._get_chromadb_operator(condition.op): condition.value
231
- }
232
- }
233
- )
221
+ chroma_db_conditions.append({metadata_key: {self._get_chromadb_operator(condition.op): condition.value}})
234
222
 
235
223
  # we combine all metadata conditions into a single dict
236
224
  metadata_condition = (
237
- {"$and": chroma_db_conditions}
238
- if len(chroma_db_conditions) > 1
239
- else chroma_db_conditions[0]
225
+ {"$and": chroma_db_conditions} if len(chroma_db_conditions) > 1 else chroma_db_conditions[0]
240
226
  )
241
227
  return metadata_condition
242
228
 
@@ -248,7 +234,6 @@ class ChromaDBHandler(VectorStoreHandler):
248
234
  offset: int = None,
249
235
  limit: int = None,
250
236
  ) -> pd.DataFrame:
251
-
252
237
  collection = self._client.get_collection(table_name)
253
238
  filters = self._translate_metadata_condition(conditions)
254
239
 
@@ -258,38 +243,43 @@ class ChromaDBHandler(VectorStoreHandler):
258
243
  vector_filter = (
259
244
  []
260
245
  if conditions is None
261
- else [
262
- condition
263
- for condition in conditions
264
- if condition.column == TableField.EMBEDDINGS.value
265
- ]
246
+ else [condition for condition in conditions if condition.column == TableField.EMBEDDINGS.value]
266
247
  )
267
248
 
268
249
  if len(vector_filter) > 0:
269
250
  vector_filter = vector_filter[0]
270
251
  else:
271
252
  vector_filter = None
272
- id_filters = []
253
+ ids_include = []
254
+ ids_exclude = []
255
+
273
256
  if conditions is not None:
274
257
  for condition in conditions:
275
258
  if condition.column != TableField.ID.value:
276
259
  continue
277
260
  if condition.op == FilterOperator.EQUAL:
278
- id_filters.append(condition.value)
261
+ ids_include.append(condition.value)
279
262
  elif condition.op == FilterOperator.IN:
280
- id_filters.extend(condition.value)
263
+ ids_include.extend(condition.value)
264
+ elif condition.op == FilterOperator.NOT_EQUAL:
265
+ ids_exclude.append(condition.value)
266
+ elif condition.op == FilterOperator.NOT_IN:
267
+ ids_exclude.extend(condition.value)
281
268
 
282
269
  if vector_filter is not None:
283
270
  # similarity search
284
271
  query_payload = {
285
272
  "where": filters,
286
- "query_embeddings": vector_filter.value
287
- if vector_filter is not None
288
- else None,
273
+ "query_embeddings": vector_filter.value if vector_filter is not None else None,
289
274
  "include": include + ["distances"],
290
275
  }
276
+
291
277
  if limit is not None:
292
- query_payload["n_results"] = limit
278
+ if len(ids_include) == 0 and len(ids_exclude) == 0:
279
+ query_payload["n_results"] = limit
280
+ else:
281
+ # get more results if we have filters by id
282
+ query_payload["n_results"] = limit * 10
293
283
 
294
284
  result = collection.query(**query_payload)
295
285
  ids = result["ids"][0]
@@ -301,7 +291,7 @@ class ChromaDBHandler(VectorStoreHandler):
301
291
  else:
302
292
  # general get query
303
293
  result = collection.get(
304
- ids=id_filters or None,
294
+ ids=ids_include or None,
305
295
  where=filters,
306
296
  limit=limit,
307
297
  offset=offset,
@@ -337,13 +327,21 @@ class ChromaDBHandler(VectorStoreHandler):
337
327
  break
338
328
 
339
329
  df = pd.DataFrame(payload)
330
+ if ids_exclude or ids_include:
331
+ if ids_exclude:
332
+ df = df[~df[TableField.ID.value].isin(ids_exclude)]
333
+ if ids_include:
334
+ df = df[df[TableField.ID.value].isin(ids_include)]
335
+ if limit is not None:
336
+ df = df[:limit]
337
+
340
338
  if distance_filter is not None:
341
339
  op_map = {
342
- '<': '__lt__',
343
- '<=': '__le__',
344
- '>': '__gt__',
345
- '>=': '__ge__',
346
- '=': '__eq__',
340
+ "<": "__lt__",
341
+ "<=": "__le__",
342
+ ">": "__gt__",
343
+ ">=": "__ge__",
344
+ "=": "__eq__",
347
345
  }
348
346
  op = op_map.get(distance_filter.op.value)
349
347
  if op:
@@ -393,7 +391,7 @@ class ChromaDBHandler(VectorStoreHandler):
393
391
  else:
394
392
  # Convert IDs to strings and remove any duplicates
395
393
  df[TableField.ID.value] = df[TableField.ID.value].astype(str)
396
- df = df.drop_duplicates(subset=[TableField.ID.value], keep='last')
394
+ df = df.drop_duplicates(subset=[TableField.ID.value], keep="last")
397
395
 
398
396
  return df
399
397
 
@@ -413,7 +411,7 @@ class ChromaDBHandler(VectorStoreHandler):
413
411
  df = df.dropna(subset=[TableField.METADATA.value])
414
412
 
415
413
  # Convert embeddings from string to list if they are strings
416
- if TableField.EMBEDDINGS.value in df.columns and df[TableField.EMBEDDINGS.value].dtype == 'object':
414
+ if TableField.EMBEDDINGS.value in df.columns and df[TableField.EMBEDDINGS.value].dtype == "object":
417
415
  df[TableField.EMBEDDINGS.value] = df[TableField.EMBEDDINGS.value].apply(
418
416
  lambda x: ast.literal_eval(x) if isinstance(x, str) else x
419
417
  )
@@ -429,7 +427,7 @@ class ChromaDBHandler(VectorStoreHandler):
429
427
  ids=data_dict[TableField.ID.value],
430
428
  documents=data_dict[TableField.CONTENT.value],
431
429
  embeddings=data_dict.get(TableField.EMBEDDINGS.value, None),
432
- metadatas=data_dict.get(TableField.METADATA.value, None)
430
+ metadatas=data_dict.get(TableField.METADATA.value, None),
433
431
  )
434
432
  self._sync()
435
433
  except Exception as e:
@@ -467,16 +465,10 @@ class ChromaDBHandler(VectorStoreHandler):
467
465
  )
468
466
  self._sync()
469
467
 
470
- def delete(
471
- self, table_name: str, conditions: List[FilterCondition] = None
472
- ):
468
+ def delete(self, table_name: str, conditions: List[FilterCondition] = None):
473
469
  filters = self._translate_metadata_condition(conditions)
474
470
  # get id filters
475
- id_filters = [
476
- condition.value
477
- for condition in conditions
478
- if condition.column == TableField.ID.value
479
- ] or None
471
+ id_filters = [condition.value for condition in conditions if condition.column == TableField.ID.value] or None
480
472
 
481
473
  if filters is None and id_filters is None:
482
474
  raise Exception("Delete query must have at least one condition!")
@@ -488,8 +480,9 @@ class ChromaDBHandler(VectorStoreHandler):
488
480
  """
489
481
  Create a collection with the given name in the ChromaDB database.
490
482
  """
491
- self._client.create_collection(table_name, get_or_create=if_not_exists,
492
- metadata=self.create_collection_metadata)
483
+ self._client.create_collection(
484
+ table_name, get_or_create=if_not_exists, metadata=self.create_collection_metadata
485
+ )
493
486
  self._sync()
494
487
 
495
488
  def drop_table(self, table_name: str, if_exists=True):
@@ -1,20 +1,25 @@
1
1
  from mindsdb.integrations.libs.const import HANDLER_TYPE
2
2
 
3
3
  from .__about__ import __version__ as version, __description__ as description
4
- try:
5
- from .huggingface_handler import HuggingFaceHandler as Handler
6
- import_error = None
7
- except Exception as e:
8
- Handler = None
9
- import_error = e
4
+ # try:
5
+ # from .huggingface_handler import HuggingFaceHandler as Handler
6
+ # import_error = None
7
+ # except Exception as e:
8
+ # Handler = None
9
+ # import_error = e
10
10
 
11
- title = 'Hugging Face'
12
- name = 'huggingface'
11
+ # NOTE: security vulnerability is in `pytorch` v2.7.1, revert changes here and in
12
+ # requirements.txt/requirements_cpu.txt when new version is released
13
+ Handler = None
14
+ import_error = """
15
+ The `huggingface_handler` is temporary disabled in current version of MindsDB due to security vulnerability.
16
+ """
17
+
18
+ title = "Hugging Face"
19
+ name = "huggingface"
13
20
  type = HANDLER_TYPE.ML
14
21
  icon_path = "icon.svg"
15
22
  permanent = False
16
- execution_method = 'subprocess_keep'
23
+ execution_method = "subprocess_keep"
17
24
 
18
- __all__ = [
19
- 'Handler', 'version', 'name', 'type', 'title', 'description', 'import_error', 'icon_path'
20
- ]
25
+ __all__ = ["Handler", "version", "name", "type", "title", "description", "import_error", "icon_path"]