MindsDB 25.6.3.1__py3-none-any.whl → 25.7.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/api/executor/command_executor.py +8 -6
- mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +72 -44
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +14 -1
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +1 -1
- mindsdb/api/executor/datahub/datanodes/system_tables.py +314 -1
- mindsdb/api/executor/planner/plan_join.py +1 -1
- mindsdb/api/executor/planner/query_planner.py +7 -1
- mindsdb/api/executor/planner/query_prepare.py +68 -87
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +6 -1
- mindsdb/api/executor/sql_query/steps/union_step.py +11 -9
- mindsdb/api/http/namespaces/file.py +49 -24
- mindsdb/api/mcp/start.py +45 -31
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +45 -52
- mindsdb/integrations/handlers/huggingface_handler/__init__.py +17 -12
- mindsdb/integrations/handlers/huggingface_handler/finetune.py +223 -223
- mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +383 -383
- mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -6
- mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -6
- mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +22 -15
- mindsdb/integrations/handlers/ludwig_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +150 -140
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +1 -1
- mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +2 -0
- mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
- mindsdb/integrations/libs/api_handler.py +6 -7
- mindsdb/integrations/libs/vectordatabase_handler.py +86 -77
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +36 -42
- mindsdb/interfaces/agents/agents_controller.py +29 -9
- mindsdb/interfaces/agents/constants.py +44 -0
- mindsdb/interfaces/agents/langchain_agent.py +15 -6
- mindsdb/interfaces/agents/mcp_client_agent.py +4 -4
- mindsdb/interfaces/agents/mindsdb_database_agent.py +10 -43
- mindsdb/interfaces/data_catalog/data_catalog_reader.py +22 -3
- mindsdb/interfaces/knowledge_base/controller.py +121 -102
- mindsdb/interfaces/knowledge_base/evaluate.py +19 -7
- mindsdb/interfaces/knowledge_base/executor.py +346 -0
- mindsdb/interfaces/knowledge_base/llm_client.py +5 -6
- mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +20 -45
- mindsdb/interfaces/knowledge_base/preprocessing/models.py +36 -69
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +26 -22
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +40 -28
- mindsdb/interfaces/skills/skill_tool.py +91 -88
- mindsdb/interfaces/skills/sql_agent.py +181 -130
- mindsdb/interfaces/storage/db.py +9 -7
- mindsdb/utilities/config.py +12 -1
- mindsdb/utilities/exception.py +47 -7
- mindsdb/utilities/security.py +54 -11
- {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/METADATA +239 -251
- {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/RECORD +55 -54
- {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/top_level.txt +0 -0
|
@@ -91,9 +91,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
91
91
|
self.persist_directory = config.persist_directory
|
|
92
92
|
elif not self.handler_storage.is_temporal:
|
|
93
93
|
# get full persistence directory from handler storage
|
|
94
|
-
self.persist_directory = self.handler_storage.folder_get(
|
|
95
|
-
config.persist_directory
|
|
96
|
-
)
|
|
94
|
+
self.persist_directory = self.handler_storage.folder_get(config.persist_directory)
|
|
97
95
|
self._use_handler_storage = True
|
|
98
96
|
|
|
99
97
|
return config
|
|
@@ -141,7 +139,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
141
139
|
def disconnect(self):
|
|
142
140
|
"""Close the database connection."""
|
|
143
141
|
if self.is_connected:
|
|
144
|
-
if hasattr(self._client,
|
|
142
|
+
if hasattr(self._client, "close"):
|
|
145
143
|
self._client.close() # Some ChromaDB clients have a close method
|
|
146
144
|
self._client = None
|
|
147
145
|
self.is_connected = False
|
|
@@ -182,9 +180,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
182
180
|
|
|
183
181
|
return mapping[operator]
|
|
184
182
|
|
|
185
|
-
def _translate_metadata_condition(
|
|
186
|
-
self, conditions: List[FilterCondition]
|
|
187
|
-
) -> Optional[dict]:
|
|
183
|
+
def _translate_metadata_condition(self, conditions: List[FilterCondition]) -> Optional[dict]:
|
|
188
184
|
"""
|
|
189
185
|
Translate a list of FilterCondition objects a dict that can be used by ChromaDB.
|
|
190
186
|
E.g.,
|
|
@@ -212,9 +208,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
212
208
|
if conditions is None:
|
|
213
209
|
return None
|
|
214
210
|
metadata_conditions = [
|
|
215
|
-
condition
|
|
216
|
-
for condition in conditions
|
|
217
|
-
if condition.column.startswith(TableField.METADATA.value)
|
|
211
|
+
condition for condition in conditions if condition.column.startswith(TableField.METADATA.value)
|
|
218
212
|
]
|
|
219
213
|
if len(metadata_conditions) == 0:
|
|
220
214
|
return None
|
|
@@ -224,19 +218,11 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
224
218
|
for condition in metadata_conditions:
|
|
225
219
|
metadata_key = condition.column.split(".")[-1]
|
|
226
220
|
|
|
227
|
-
chroma_db_conditions.append(
|
|
228
|
-
{
|
|
229
|
-
metadata_key: {
|
|
230
|
-
self._get_chromadb_operator(condition.op): condition.value
|
|
231
|
-
}
|
|
232
|
-
}
|
|
233
|
-
)
|
|
221
|
+
chroma_db_conditions.append({metadata_key: {self._get_chromadb_operator(condition.op): condition.value}})
|
|
234
222
|
|
|
235
223
|
# we combine all metadata conditions into a single dict
|
|
236
224
|
metadata_condition = (
|
|
237
|
-
{"$and": chroma_db_conditions}
|
|
238
|
-
if len(chroma_db_conditions) > 1
|
|
239
|
-
else chroma_db_conditions[0]
|
|
225
|
+
{"$and": chroma_db_conditions} if len(chroma_db_conditions) > 1 else chroma_db_conditions[0]
|
|
240
226
|
)
|
|
241
227
|
return metadata_condition
|
|
242
228
|
|
|
@@ -248,7 +234,6 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
248
234
|
offset: int = None,
|
|
249
235
|
limit: int = None,
|
|
250
236
|
) -> pd.DataFrame:
|
|
251
|
-
|
|
252
237
|
collection = self._client.get_collection(table_name)
|
|
253
238
|
filters = self._translate_metadata_condition(conditions)
|
|
254
239
|
|
|
@@ -258,38 +243,43 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
258
243
|
vector_filter = (
|
|
259
244
|
[]
|
|
260
245
|
if conditions is None
|
|
261
|
-
else [
|
|
262
|
-
condition
|
|
263
|
-
for condition in conditions
|
|
264
|
-
if condition.column == TableField.EMBEDDINGS.value
|
|
265
|
-
]
|
|
246
|
+
else [condition for condition in conditions if condition.column == TableField.EMBEDDINGS.value]
|
|
266
247
|
)
|
|
267
248
|
|
|
268
249
|
if len(vector_filter) > 0:
|
|
269
250
|
vector_filter = vector_filter[0]
|
|
270
251
|
else:
|
|
271
252
|
vector_filter = None
|
|
272
|
-
|
|
253
|
+
ids_include = []
|
|
254
|
+
ids_exclude = []
|
|
255
|
+
|
|
273
256
|
if conditions is not None:
|
|
274
257
|
for condition in conditions:
|
|
275
258
|
if condition.column != TableField.ID.value:
|
|
276
259
|
continue
|
|
277
260
|
if condition.op == FilterOperator.EQUAL:
|
|
278
|
-
|
|
261
|
+
ids_include.append(condition.value)
|
|
279
262
|
elif condition.op == FilterOperator.IN:
|
|
280
|
-
|
|
263
|
+
ids_include.extend(condition.value)
|
|
264
|
+
elif condition.op == FilterOperator.NOT_EQUAL:
|
|
265
|
+
ids_exclude.append(condition.value)
|
|
266
|
+
elif condition.op == FilterOperator.NOT_IN:
|
|
267
|
+
ids_exclude.extend(condition.value)
|
|
281
268
|
|
|
282
269
|
if vector_filter is not None:
|
|
283
270
|
# similarity search
|
|
284
271
|
query_payload = {
|
|
285
272
|
"where": filters,
|
|
286
|
-
"query_embeddings": vector_filter.value
|
|
287
|
-
if vector_filter is not None
|
|
288
|
-
else None,
|
|
273
|
+
"query_embeddings": vector_filter.value if vector_filter is not None else None,
|
|
289
274
|
"include": include + ["distances"],
|
|
290
275
|
}
|
|
276
|
+
|
|
291
277
|
if limit is not None:
|
|
292
|
-
|
|
278
|
+
if len(ids_include) == 0 and len(ids_exclude) == 0:
|
|
279
|
+
query_payload["n_results"] = limit
|
|
280
|
+
else:
|
|
281
|
+
# get more results if we have filters by id
|
|
282
|
+
query_payload["n_results"] = limit * 10
|
|
293
283
|
|
|
294
284
|
result = collection.query(**query_payload)
|
|
295
285
|
ids = result["ids"][0]
|
|
@@ -301,7 +291,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
301
291
|
else:
|
|
302
292
|
# general get query
|
|
303
293
|
result = collection.get(
|
|
304
|
-
ids=
|
|
294
|
+
ids=ids_include or None,
|
|
305
295
|
where=filters,
|
|
306
296
|
limit=limit,
|
|
307
297
|
offset=offset,
|
|
@@ -337,13 +327,21 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
337
327
|
break
|
|
338
328
|
|
|
339
329
|
df = pd.DataFrame(payload)
|
|
330
|
+
if ids_exclude or ids_include:
|
|
331
|
+
if ids_exclude:
|
|
332
|
+
df = df[~df[TableField.ID.value].isin(ids_exclude)]
|
|
333
|
+
if ids_include:
|
|
334
|
+
df = df[df[TableField.ID.value].isin(ids_include)]
|
|
335
|
+
if limit is not None:
|
|
336
|
+
df = df[:limit]
|
|
337
|
+
|
|
340
338
|
if distance_filter is not None:
|
|
341
339
|
op_map = {
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
340
|
+
"<": "__lt__",
|
|
341
|
+
"<=": "__le__",
|
|
342
|
+
">": "__gt__",
|
|
343
|
+
">=": "__ge__",
|
|
344
|
+
"=": "__eq__",
|
|
347
345
|
}
|
|
348
346
|
op = op_map.get(distance_filter.op.value)
|
|
349
347
|
if op:
|
|
@@ -393,7 +391,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
393
391
|
else:
|
|
394
392
|
# Convert IDs to strings and remove any duplicates
|
|
395
393
|
df[TableField.ID.value] = df[TableField.ID.value].astype(str)
|
|
396
|
-
df = df.drop_duplicates(subset=[TableField.ID.value], keep=
|
|
394
|
+
df = df.drop_duplicates(subset=[TableField.ID.value], keep="last")
|
|
397
395
|
|
|
398
396
|
return df
|
|
399
397
|
|
|
@@ -413,7 +411,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
413
411
|
df = df.dropna(subset=[TableField.METADATA.value])
|
|
414
412
|
|
|
415
413
|
# Convert embeddings from string to list if they are strings
|
|
416
|
-
if TableField.EMBEDDINGS.value in df.columns and df[TableField.EMBEDDINGS.value].dtype ==
|
|
414
|
+
if TableField.EMBEDDINGS.value in df.columns and df[TableField.EMBEDDINGS.value].dtype == "object":
|
|
417
415
|
df[TableField.EMBEDDINGS.value] = df[TableField.EMBEDDINGS.value].apply(
|
|
418
416
|
lambda x: ast.literal_eval(x) if isinstance(x, str) else x
|
|
419
417
|
)
|
|
@@ -429,7 +427,7 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
429
427
|
ids=data_dict[TableField.ID.value],
|
|
430
428
|
documents=data_dict[TableField.CONTENT.value],
|
|
431
429
|
embeddings=data_dict.get(TableField.EMBEDDINGS.value, None),
|
|
432
|
-
metadatas=data_dict.get(TableField.METADATA.value, None)
|
|
430
|
+
metadatas=data_dict.get(TableField.METADATA.value, None),
|
|
433
431
|
)
|
|
434
432
|
self._sync()
|
|
435
433
|
except Exception as e:
|
|
@@ -467,16 +465,10 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
467
465
|
)
|
|
468
466
|
self._sync()
|
|
469
467
|
|
|
470
|
-
def delete(
|
|
471
|
-
self, table_name: str, conditions: List[FilterCondition] = None
|
|
472
|
-
):
|
|
468
|
+
def delete(self, table_name: str, conditions: List[FilterCondition] = None):
|
|
473
469
|
filters = self._translate_metadata_condition(conditions)
|
|
474
470
|
# get id filters
|
|
475
|
-
id_filters = [
|
|
476
|
-
condition.value
|
|
477
|
-
for condition in conditions
|
|
478
|
-
if condition.column == TableField.ID.value
|
|
479
|
-
] or None
|
|
471
|
+
id_filters = [condition.value for condition in conditions if condition.column == TableField.ID.value] or None
|
|
480
472
|
|
|
481
473
|
if filters is None and id_filters is None:
|
|
482
474
|
raise Exception("Delete query must have at least one condition!")
|
|
@@ -488,8 +480,9 @@ class ChromaDBHandler(VectorStoreHandler):
|
|
|
488
480
|
"""
|
|
489
481
|
Create a collection with the given name in the ChromaDB database.
|
|
490
482
|
"""
|
|
491
|
-
self._client.create_collection(
|
|
492
|
-
|
|
483
|
+
self._client.create_collection(
|
|
484
|
+
table_name, get_or_create=if_not_exists, metadata=self.create_collection_metadata
|
|
485
|
+
)
|
|
493
486
|
self._sync()
|
|
494
487
|
|
|
495
488
|
def drop_table(self, table_name: str, if_exists=True):
|
|
@@ -1,20 +1,25 @@
|
|
|
1
1
|
from mindsdb.integrations.libs.const import HANDLER_TYPE
|
|
2
2
|
|
|
3
3
|
from .__about__ import __version__ as version, __description__ as description
|
|
4
|
-
try:
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
except Exception as e:
|
|
8
|
-
|
|
9
|
-
|
|
4
|
+
# try:
|
|
5
|
+
# from .huggingface_handler import HuggingFaceHandler as Handler
|
|
6
|
+
# import_error = None
|
|
7
|
+
# except Exception as e:
|
|
8
|
+
# Handler = None
|
|
9
|
+
# import_error = e
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
11
|
+
# NOTE: security vulnerability is in `pytorch` v2.7.1, revert changes here and in
|
|
12
|
+
# requirements.txt/requirements_cpu.txt when new version is released
|
|
13
|
+
Handler = None
|
|
14
|
+
import_error = """
|
|
15
|
+
The `huggingface_handler` is temporary disabled in current version of MindsDB due to security vulnerability.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
title = "Hugging Face"
|
|
19
|
+
name = "huggingface"
|
|
13
20
|
type = HANDLER_TYPE.ML
|
|
14
21
|
icon_path = "icon.svg"
|
|
15
22
|
permanent = False
|
|
16
|
-
execution_method =
|
|
23
|
+
execution_method = "subprocess_keep"
|
|
17
24
|
|
|
18
|
-
__all__ = [
|
|
19
|
-
'Handler', 'version', 'name', 'type', 'title', 'description', 'import_error', 'icon_path'
|
|
20
|
-
]
|
|
25
|
+
__all__ = ["Handler", "version", "name", "type", "title", "description", "import_error", "icon_path"]
|