MindsDB 25.5.4.0__py3-none-any.whl → 25.5.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +8 -8
- mindsdb/api/a2a/__main__.py +38 -8
- mindsdb/api/a2a/run_a2a.py +10 -53
- mindsdb/api/a2a/task_manager.py +19 -53
- mindsdb/api/executor/command_executor.py +147 -291
- mindsdb/api/http/namespaces/config.py +61 -86
- mindsdb/integrations/handlers/byom_handler/requirements.txt +1 -2
- mindsdb/integrations/handlers/lancedb_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +37 -20
- mindsdb/integrations/libs/llm/config.py +13 -0
- mindsdb/integrations/libs/llm/utils.py +37 -65
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +230 -227
- mindsdb/interfaces/agents/constants.py +17 -13
- mindsdb/interfaces/agents/langchain_agent.py +93 -94
- mindsdb/interfaces/knowledge_base/controller.py +230 -221
- mindsdb/utilities/config.py +43 -84
- mindsdb/utilities/starters.py +9 -1
- {mindsdb-25.5.4.0.dist-info → mindsdb-25.5.4.2.dist-info}/METADATA +268 -266
- {mindsdb-25.5.4.0.dist-info → mindsdb-25.5.4.2.dist-info}/RECORD +22 -26
- mindsdb/api/a2a/a2a_client.py +0 -439
- mindsdb/api/a2a/common/client/__init__.py +0 -4
- mindsdb/api/a2a/common/client/card_resolver.py +0 -21
- mindsdb/api/a2a/common/client/client.py +0 -86
- {mindsdb-25.5.4.0.dist-info → mindsdb-25.5.4.2.dist-info}/WHEEL +0 -0
- {mindsdb-25.5.4.0.dist-info → mindsdb-25.5.4.2.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.5.4.0.dist-info → mindsdb-25.5.4.2.dist-info}/top_level.txt +0 -0
|
@@ -5,15 +5,7 @@ from typing import Dict, List, Optional
|
|
|
5
5
|
import pandas as pd
|
|
6
6
|
import numpy as np
|
|
7
7
|
|
|
8
|
-
from mindsdb_sql_parser.ast import
|
|
9
|
-
BinaryOperation,
|
|
10
|
-
Constant,
|
|
11
|
-
Identifier,
|
|
12
|
-
Select,
|
|
13
|
-
Update,
|
|
14
|
-
Delete,
|
|
15
|
-
Star
|
|
16
|
-
)
|
|
8
|
+
from mindsdb_sql_parser.ast import BinaryOperation, Constant, Identifier, Select, Update, Delete, Star
|
|
17
9
|
from mindsdb_sql_parser.ast.mindsdb import CreatePredictor
|
|
18
10
|
|
|
19
11
|
from mindsdb.integrations.utilities.query_traversal import query_traversal
|
|
@@ -27,7 +19,9 @@ from mindsdb.integrations.libs.vectordatabase_handler import (
|
|
|
27
19
|
from mindsdb.integrations.utilities.rag.rag_pipeline_builder import RAG
|
|
28
20
|
from mindsdb.integrations.utilities.rag.config_loader import load_rag_config
|
|
29
21
|
from mindsdb.integrations.utilities.handler_utils import get_api_key
|
|
30
|
-
from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import
|
|
22
|
+
from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import (
|
|
23
|
+
construct_model_from_args,
|
|
24
|
+
)
|
|
31
25
|
|
|
32
26
|
from mindsdb.interfaces.agents.constants import DEFAULT_EMBEDDINGS_MODEL_CLASS
|
|
33
27
|
from mindsdb.interfaces.agents.langchain_agent import create_chat_model, get_llm_provider
|
|
@@ -48,11 +42,7 @@ from mindsdb.integrations.utilities.rag.rerankers.base_reranker import BaseLLMRe
|
|
|
48
42
|
|
|
49
43
|
logger = log.getLogger(__name__)
|
|
50
44
|
|
|
51
|
-
KB_TO_VECTORDB_COLUMNS = {
|
|
52
|
-
'id': 'original_doc_id',
|
|
53
|
-
'chunk_id': 'id',
|
|
54
|
-
'chunk_content': 'content'
|
|
55
|
-
}
|
|
45
|
+
KB_TO_VECTORDB_COLUMNS = {"id": "original_doc_id", "chunk_id": "id", "chunk_content": "content"}
|
|
56
46
|
|
|
57
47
|
|
|
58
48
|
def get_model_params(model_params: dict, default_config_key: str):
|
|
@@ -72,23 +62,23 @@ def get_embedding_model_from_params(embedding_model_params: dict):
|
|
|
72
62
|
Create embedding model from parameters.
|
|
73
63
|
"""
|
|
74
64
|
params_copy = copy.deepcopy(embedding_model_params)
|
|
75
|
-
provider = params_copy.pop(
|
|
76
|
-
api_key = get_api_key(provider, params_copy, strict=False) or params_copy.get(
|
|
65
|
+
provider = params_copy.pop("provider", None).lower()
|
|
66
|
+
api_key = get_api_key(provider, params_copy, strict=False) or params_copy.get("api_key")
|
|
77
67
|
# Underscores are replaced because the provider name ultimately gets mapped to a class name.
|
|
78
68
|
# This is mostly to support Azure OpenAI (azure_openai); the mapped class name is 'AzureOpenAIEmbeddings'.
|
|
79
|
-
params_copy[
|
|
80
|
-
if provider ==
|
|
69
|
+
params_copy["class"] = provider.replace("_", "")
|
|
70
|
+
if provider == "azure_openai":
|
|
81
71
|
# Azure OpenAI expects the api_key to be passed as 'openai_api_key'.
|
|
82
|
-
params_copy[
|
|
83
|
-
params_copy[
|
|
84
|
-
if
|
|
85
|
-
params_copy[
|
|
86
|
-
if
|
|
87
|
-
params_copy[
|
|
72
|
+
params_copy["openai_api_key"] = api_key
|
|
73
|
+
params_copy["azure_endpoint"] = params_copy.pop("base_url")
|
|
74
|
+
if "chunk_size" not in params_copy:
|
|
75
|
+
params_copy["chunk_size"] = 2048
|
|
76
|
+
if "api_version" in params_copy:
|
|
77
|
+
params_copy["openai_api_version"] = params_copy["api_version"]
|
|
88
78
|
else:
|
|
89
79
|
params_copy[f"{provider}_api_key"] = api_key
|
|
90
|
-
params_copy.pop(
|
|
91
|
-
params_copy[
|
|
80
|
+
params_copy.pop("api_key", None)
|
|
81
|
+
params_copy["model"] = params_copy.pop("model_name", None)
|
|
92
82
|
|
|
93
83
|
return construct_model_from_args(params_copy)
|
|
94
84
|
|
|
@@ -98,15 +88,26 @@ def get_reranking_model_from_params(reranking_model_params: dict):
|
|
|
98
88
|
Create reranking model from parameters.
|
|
99
89
|
"""
|
|
100
90
|
params_copy = copy.deepcopy(reranking_model_params)
|
|
101
|
-
provider = params_copy.get(
|
|
91
|
+
provider = params_copy.get("provider", "openai").lower()
|
|
102
92
|
|
|
103
93
|
if "api_key" not in params_copy:
|
|
104
94
|
params_copy["api_key"] = get_api_key(provider, params_copy, strict=False)
|
|
105
|
-
params_copy[
|
|
95
|
+
params_copy["model"] = params_copy.pop("model_name", None)
|
|
106
96
|
|
|
107
97
|
return BaseLLMReranker(**params_copy)
|
|
108
98
|
|
|
109
99
|
|
|
100
|
+
def safe_pandas_is_datetime(value: str) -> bool:
|
|
101
|
+
"""
|
|
102
|
+
Check if the value can be parsed as a datetime.
|
|
103
|
+
"""
|
|
104
|
+
try:
|
|
105
|
+
result = pd.api.types.is_datetime64_any_dtype(value)
|
|
106
|
+
return result
|
|
107
|
+
except ValueError:
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
|
|
110
111
|
class KnowledgeBaseTable:
|
|
111
112
|
"""
|
|
112
113
|
Knowledge base table interface
|
|
@@ -127,9 +128,9 @@ class KnowledgeBaseTable:
|
|
|
127
128
|
self.document_preprocessor = None # Reset existing preprocessor
|
|
128
129
|
if config is not None:
|
|
129
130
|
# Ensure content_column is set for JSON chunking if not already specified
|
|
130
|
-
if config.get(
|
|
131
|
-
if
|
|
132
|
-
config[
|
|
131
|
+
if config.get("type") == "json_chunking" and config.get("json_chunking_config"):
|
|
132
|
+
if "content_column" not in config["json_chunking_config"]:
|
|
133
|
+
config["json_chunking_config"]["content_column"] = "content"
|
|
133
134
|
|
|
134
135
|
preprocessing_config = PreprocessingConfig(**config)
|
|
135
136
|
self.document_preprocessor = PreprocessorFactory.create_preprocessor(preprocessing_config)
|
|
@@ -192,11 +193,13 @@ class KnowledgeBaseTable:
|
|
|
192
193
|
query_text = item.value
|
|
193
194
|
|
|
194
195
|
# replace content with embeddings
|
|
195
|
-
conditions.append(
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
196
|
+
conditions.append(
|
|
197
|
+
FilterCondition(
|
|
198
|
+
column=TableField.EMBEDDINGS.value,
|
|
199
|
+
value=self._content_to_embeddings(item.value),
|
|
200
|
+
op=FilterOperator.EQUAL,
|
|
201
|
+
)
|
|
202
|
+
)
|
|
200
203
|
else:
|
|
201
204
|
conditions.append(item)
|
|
202
205
|
|
|
@@ -238,7 +241,7 @@ class KnowledgeBaseTable:
|
|
|
238
241
|
def add_relevance(self, df, query_text, relevance_threshold=None):
|
|
239
242
|
relevance_column = TableField.RELEVANCE.value
|
|
240
243
|
|
|
241
|
-
reranking_model_params = get_model_params(self._kb.params.get("reranking_model"), "
|
|
244
|
+
reranking_model_params = get_model_params(self._kb.params.get("reranking_model"), "default_reranking_model")
|
|
242
245
|
if reranking_model_params and query_text and len(df) > 0:
|
|
243
246
|
# Use reranker for relevance score
|
|
244
247
|
try:
|
|
@@ -250,7 +253,7 @@ class KnowledgeBaseTable:
|
|
|
250
253
|
|
|
251
254
|
reranker = get_reranking_model_from_params(reranking_model_params)
|
|
252
255
|
# Get documents to rerank
|
|
253
|
-
documents = df[
|
|
256
|
+
documents = df["chunk_content"].tolist()
|
|
254
257
|
# Use the get_scores method with disable_events=True
|
|
255
258
|
scores = reranker.get_scores(query_text, documents)
|
|
256
259
|
# Add scores as the relevance column
|
|
@@ -263,21 +266,21 @@ class KnowledgeBaseTable:
|
|
|
263
266
|
except Exception as e:
|
|
264
267
|
logger.error(f"Error during reranking: {str(e)}")
|
|
265
268
|
# Fallback to distance-based relevance
|
|
266
|
-
if
|
|
267
|
-
df[relevance_column] = 1 / (1 + df[
|
|
269
|
+
if "distance" in df.columns:
|
|
270
|
+
df[relevance_column] = 1 / (1 + df["distance"])
|
|
268
271
|
else:
|
|
269
272
|
logger.info("No distance or reranker available")
|
|
270
273
|
|
|
271
|
-
elif
|
|
274
|
+
elif "distance" in df.columns:
|
|
272
275
|
# Calculate relevance from distance
|
|
273
276
|
logger.info("Calculating relevance from vector distance")
|
|
274
|
-
df[relevance_column] = 1 / (1 + df[
|
|
277
|
+
df[relevance_column] = 1 / (1 + df["distance"])
|
|
275
278
|
if relevance_threshold is not None:
|
|
276
279
|
df = df[df[relevance_column] > relevance_threshold]
|
|
277
280
|
|
|
278
281
|
else:
|
|
279
282
|
df[relevance_column] = None
|
|
280
|
-
df[
|
|
283
|
+
df["distance"] = None
|
|
281
284
|
# Sort by relevance
|
|
282
285
|
df = df.sort_values(by=relevance_column, ascending=False)
|
|
283
286
|
return df
|
|
@@ -300,7 +303,7 @@ class KnowledgeBaseTable:
|
|
|
300
303
|
columns = list(df.columns)
|
|
301
304
|
# update id, get from metadata
|
|
302
305
|
df[TableField.ID.value] = df[TableField.METADATA.value].apply(
|
|
303
|
-
lambda m: None if m is None else m.get(
|
|
306
|
+
lambda m: None if m is None else m.get("original_doc_id")
|
|
304
307
|
)
|
|
305
308
|
|
|
306
309
|
# id on first place
|
|
@@ -315,23 +318,14 @@ class KnowledgeBaseTable:
|
|
|
315
318
|
if documents:
|
|
316
319
|
self.insert_documents(documents)
|
|
317
320
|
|
|
318
|
-
def insert_web_pages(
|
|
319
|
-
self,
|
|
320
|
-
urls: List[str],
|
|
321
|
-
crawl_depth: int,
|
|
322
|
-
limit: int,
|
|
323
|
-
filters: List[str] = None
|
|
324
|
-
):
|
|
321
|
+
def insert_web_pages(self, urls: List[str], crawl_depth: int, limit: int, filters: List[str] = None):
|
|
325
322
|
"""Process and insert web pages"""
|
|
326
323
|
if not self.document_loader:
|
|
327
324
|
raise ValueError("Document loader not configured")
|
|
328
325
|
|
|
329
|
-
documents = list(
|
|
330
|
-
urls,
|
|
331
|
-
|
|
332
|
-
crawl_depth=crawl_depth,
|
|
333
|
-
filters=filters
|
|
334
|
-
))
|
|
326
|
+
documents = list(
|
|
327
|
+
self.document_loader.load_web_pages(urls, limit=limit, crawl_depth=crawl_depth, filters=filters)
|
|
328
|
+
)
|
|
335
329
|
if documents:
|
|
336
330
|
self.insert_documents(documents)
|
|
337
331
|
|
|
@@ -349,11 +343,9 @@ class KnowledgeBaseTable:
|
|
|
349
343
|
if not rows:
|
|
350
344
|
return
|
|
351
345
|
|
|
352
|
-
documents = [
|
|
353
|
-
content=row.get(
|
|
354
|
-
|
|
355
|
-
metadata=row.get('metadata', {})
|
|
356
|
-
) for row in rows]
|
|
346
|
+
documents = [
|
|
347
|
+
Document(content=row.get("content", ""), id=row.get("id"), metadata=row.get("metadata", {})) for row in rows
|
|
348
|
+
]
|
|
357
349
|
|
|
358
350
|
self.insert_documents(documents)
|
|
359
351
|
|
|
@@ -374,7 +366,7 @@ class KnowledgeBaseTable:
|
|
|
374
366
|
conditions = db_handler.extract_conditions(query.where)
|
|
375
367
|
doc_id = None
|
|
376
368
|
for condition in conditions:
|
|
377
|
-
if condition.column ==
|
|
369
|
+
if condition.column == "chunk_id" and condition.op == FilterOperator.EQUAL:
|
|
378
370
|
doc_id = condition.value
|
|
379
371
|
|
|
380
372
|
if cont_col in query.update_columns:
|
|
@@ -385,7 +377,7 @@ class KnowledgeBaseTable:
|
|
|
385
377
|
doc = Document(
|
|
386
378
|
id=doc_id,
|
|
387
379
|
content=content.value,
|
|
388
|
-
metadata={} # Empty metadata for content-only updates
|
|
380
|
+
metadata={}, # Empty metadata for content-only updates
|
|
389
381
|
)
|
|
390
382
|
processed_chunks = self.document_preprocessor.process_documents([doc])
|
|
391
383
|
if processed_chunks:
|
|
@@ -424,7 +416,7 @@ class KnowledgeBaseTable:
|
|
|
424
416
|
query: str,
|
|
425
417
|
keywords: List[str] = None,
|
|
426
418
|
metadata: Dict[str, str] = None,
|
|
427
|
-
distance_function=DistanceFunction.COSINE_DISTANCE
|
|
419
|
+
distance_function=DistanceFunction.COSINE_DISTANCE,
|
|
428
420
|
) -> pd.DataFrame:
|
|
429
421
|
query_df = pd.DataFrame.from_records([{TableField.CONTENT.value: query}])
|
|
430
422
|
embeddings_df = self._df_to_embeddings(query_df)
|
|
@@ -433,14 +425,14 @@ class KnowledgeBaseTable:
|
|
|
433
425
|
embeddings = embeddings_df.iloc[0][TableField.EMBEDDINGS.value]
|
|
434
426
|
keywords_query = None
|
|
435
427
|
if keywords is not None:
|
|
436
|
-
keywords_query =
|
|
428
|
+
keywords_query = " ".join(keywords)
|
|
437
429
|
db_handler = self.get_vector_db()
|
|
438
430
|
return db_handler.hybrid_search(
|
|
439
431
|
self._kb.vector_database_table,
|
|
440
432
|
embeddings,
|
|
441
433
|
query=keywords_query,
|
|
442
434
|
metadata=metadata,
|
|
443
|
-
distance_function=distance_function
|
|
435
|
+
distance_function=distance_function,
|
|
444
436
|
)
|
|
445
437
|
|
|
446
438
|
def clear(self):
|
|
@@ -473,7 +465,7 @@ class KnowledgeBaseTable:
|
|
|
473
465
|
|
|
474
466
|
# First adapt column names to identify content and metadata columns
|
|
475
467
|
adapted_df = self._adapt_column_names(df)
|
|
476
|
-
content_columns = self._kb.params.get(
|
|
468
|
+
content_columns = self._kb.params.get("content_columns", [TableField.CONTENT.value])
|
|
477
469
|
|
|
478
470
|
# Convert DataFrame rows to documents, creating separate documents for each content column
|
|
479
471
|
raw_documents = []
|
|
@@ -491,15 +483,11 @@ class KnowledgeBaseTable:
|
|
|
491
483
|
|
|
492
484
|
metadata = {
|
|
493
485
|
**base_metadata,
|
|
494
|
-
|
|
495
|
-
|
|
486
|
+
"original_row_index": str(idx), # provide link to original row index
|
|
487
|
+
"content_column": col,
|
|
496
488
|
}
|
|
497
489
|
|
|
498
|
-
raw_documents.append(Document(
|
|
499
|
-
content=content_str,
|
|
500
|
-
id=doc_id,
|
|
501
|
-
metadata=metadata
|
|
502
|
-
))
|
|
490
|
+
raw_documents.append(Document(content=content_str, id=doc_id, metadata=metadata))
|
|
503
491
|
|
|
504
492
|
# Apply preprocessing to all documents if preprocessor exists
|
|
505
493
|
if self.document_preprocessor:
|
|
@@ -508,11 +496,16 @@ class KnowledgeBaseTable:
|
|
|
508
496
|
processed_chunks = raw_documents # Use raw documents if no preprocessing
|
|
509
497
|
|
|
510
498
|
# Convert processed chunks back to DataFrame with standard structure
|
|
511
|
-
df = pd.DataFrame(
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
499
|
+
df = pd.DataFrame(
|
|
500
|
+
[
|
|
501
|
+
{
|
|
502
|
+
TableField.CONTENT.value: chunk.content,
|
|
503
|
+
TableField.ID.value: chunk.id,
|
|
504
|
+
TableField.METADATA.value: chunk.metadata,
|
|
505
|
+
}
|
|
506
|
+
for chunk in processed_chunks
|
|
507
|
+
]
|
|
508
|
+
)
|
|
516
509
|
|
|
517
510
|
if df.empty:
|
|
518
511
|
logger.warning("No valid content found in any content columns")
|
|
@@ -523,17 +516,17 @@ class KnowledgeBaseTable:
|
|
|
523
516
|
df = pd.concat([df, df_emb], axis=1)
|
|
524
517
|
db_handler = self.get_vector_db()
|
|
525
518
|
|
|
526
|
-
if params is not None and params.get(
|
|
519
|
+
if params is not None and params.get("kb_no_upsert", False):
|
|
527
520
|
# speed up inserting by disable checking existing records
|
|
528
521
|
db_handler.insert(self._kb.vector_database_table, df)
|
|
529
522
|
else:
|
|
530
523
|
db_handler.do_upsert(self._kb.vector_database_table, df)
|
|
531
524
|
|
|
532
525
|
def _adapt_column_names(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
533
|
-
|
|
526
|
+
"""
|
|
534
527
|
Convert input columns for vector db input
|
|
535
528
|
- id, content and metadata
|
|
536
|
-
|
|
529
|
+
"""
|
|
537
530
|
# Debug incoming data
|
|
538
531
|
logger.debug(f"Input DataFrame columns: {df.columns}")
|
|
539
532
|
logger.debug(f"Input DataFrame first row: {df.iloc[0].to_dict()}")
|
|
@@ -542,7 +535,7 @@ class KnowledgeBaseTable:
|
|
|
542
535
|
columns = list(df.columns)
|
|
543
536
|
|
|
544
537
|
# -- prepare id --
|
|
545
|
-
id_column = params.get(
|
|
538
|
+
id_column = params.get("id_column")
|
|
546
539
|
if id_column is not None and id_column not in columns:
|
|
547
540
|
id_column = None
|
|
548
541
|
|
|
@@ -552,8 +545,8 @@ class KnowledgeBaseTable:
|
|
|
552
545
|
# Also check for case-insensitive 'id' column
|
|
553
546
|
if id_column is None:
|
|
554
547
|
column_map = {col.lower(): col for col in columns}
|
|
555
|
-
if
|
|
556
|
-
id_column = column_map[
|
|
548
|
+
if "id" in column_map:
|
|
549
|
+
id_column = column_map["id"]
|
|
557
550
|
|
|
558
551
|
if id_column is not None:
|
|
559
552
|
columns.remove(id_column)
|
|
@@ -568,8 +561,8 @@ class KnowledgeBaseTable:
|
|
|
568
561
|
logger.debug(f"Added IDs: {df_out[TableField.ID.value].tolist()}")
|
|
569
562
|
|
|
570
563
|
# -- prepare content and metadata --
|
|
571
|
-
content_columns = params.get(
|
|
572
|
-
metadata_columns = params.get(
|
|
564
|
+
content_columns = params.get("content_columns", [TableField.CONTENT.value])
|
|
565
|
+
metadata_columns = params.get("metadata_columns")
|
|
573
566
|
|
|
574
567
|
logger.debug(f"Processing with: content_columns={content_columns}, metadata_columns={metadata_columns}")
|
|
575
568
|
|
|
@@ -577,25 +570,19 @@ class KnowledgeBaseTable:
|
|
|
577
570
|
if content_columns:
|
|
578
571
|
# Ensure content columns are case-insensitive
|
|
579
572
|
column_map = {col.lower(): col for col in columns}
|
|
580
|
-
content_columns = [
|
|
581
|
-
column_map.get(col.lower(), col)
|
|
582
|
-
for col in content_columns
|
|
583
|
-
]
|
|
573
|
+
content_columns = [column_map.get(col.lower(), col) for col in content_columns]
|
|
584
574
|
logger.debug(f"Mapped content columns: {content_columns}")
|
|
585
575
|
|
|
586
576
|
if metadata_columns:
|
|
587
577
|
# Ensure metadata columns are case-insensitive
|
|
588
578
|
column_map = {col.lower(): col for col in columns}
|
|
589
|
-
metadata_columns = [
|
|
590
|
-
column_map.get(col.lower(), col)
|
|
591
|
-
for col in metadata_columns
|
|
592
|
-
]
|
|
579
|
+
metadata_columns = [column_map.get(col.lower(), col) for col in metadata_columns]
|
|
593
580
|
logger.debug(f"Mapped metadata columns: {metadata_columns}")
|
|
594
581
|
|
|
595
582
|
if content_columns is not None:
|
|
596
583
|
content_columns = list(set(content_columns).intersection(columns))
|
|
597
584
|
if len(content_columns) == 0:
|
|
598
|
-
raise ValueError(f
|
|
585
|
+
raise ValueError(f"Content columns {params.get('content_columns')} not found in dataset: {columns}")
|
|
599
586
|
|
|
600
587
|
if metadata_columns is not None:
|
|
601
588
|
metadata_columns = list(set(metadata_columns).intersection(columns))
|
|
@@ -609,12 +596,13 @@ class KnowledgeBaseTable:
|
|
|
609
596
|
|
|
610
597
|
# Add metadata
|
|
611
598
|
if metadata_columns and len(metadata_columns) > 0:
|
|
599
|
+
|
|
612
600
|
def convert_row_to_metadata(row):
|
|
613
601
|
metadata = {}
|
|
614
602
|
for col in metadata_columns:
|
|
615
603
|
value = row[col]
|
|
616
604
|
# Convert numpy/pandas types to Python native types
|
|
617
|
-
if
|
|
605
|
+
if safe_pandas_is_datetime(value) or isinstance(value, pd.Timestamp):
|
|
618
606
|
value = str(value)
|
|
619
607
|
elif pd.api.types.is_integer_dtype(value):
|
|
620
608
|
value = int(value)
|
|
@@ -654,7 +642,7 @@ class KnowledgeBaseTable:
|
|
|
654
642
|
if self._vector_db is None:
|
|
655
643
|
database = db.Integration.query.get(self._kb.vector_database_id)
|
|
656
644
|
if database is None:
|
|
657
|
-
raise ValueError(
|
|
645
|
+
raise ValueError("Vector database not found. Is it deleted?")
|
|
658
646
|
database_name = database.name
|
|
659
647
|
self._vector_db = self.session.integration_controller.get_data_handler(database_name)
|
|
660
648
|
return self._vector_db
|
|
@@ -679,6 +667,15 @@ class KnowledgeBaseTable:
|
|
|
679
667
|
|
|
680
668
|
model_id = self._kb.embedding_model_id
|
|
681
669
|
|
|
670
|
+
if model_id is None:
|
|
671
|
+
# call litellm handler
|
|
672
|
+
messages = list(df[TableField.CONTENT.value])
|
|
673
|
+
embedding_params = copy.deepcopy(config.get("default_embedding_model", {}))
|
|
674
|
+
embedding_params.update(self._kb.params["embedding_model"])
|
|
675
|
+
results = self.call_litellm_embedding(self.session, embedding_params, messages)
|
|
676
|
+
results = [[val] for val in results]
|
|
677
|
+
return pd.DataFrame(results, columns=[TableField.EMBEDDINGS.value])
|
|
678
|
+
|
|
682
679
|
# get the input columns
|
|
683
680
|
model_rec = db.session.query(db.Predictor).filter_by(id=model_id).first()
|
|
684
681
|
|
|
@@ -687,19 +684,15 @@ class KnowledgeBaseTable:
|
|
|
687
684
|
|
|
688
685
|
project_datanode = self.session.datahub.get(model_project.name)
|
|
689
686
|
|
|
690
|
-
model_using = model_rec.learn_args.get(
|
|
691
|
-
input_col = model_using.get(
|
|
687
|
+
model_using = model_rec.learn_args.get("using", {})
|
|
688
|
+
input_col = model_using.get("question_column")
|
|
692
689
|
if input_col is None:
|
|
693
|
-
input_col = model_using.get(
|
|
690
|
+
input_col = model_using.get("input_column")
|
|
694
691
|
|
|
695
692
|
if input_col is not None and input_col != TableField.CONTENT.value:
|
|
696
693
|
df = df.rename(columns={TableField.CONTENT.value: input_col})
|
|
697
694
|
|
|
698
|
-
df_out = project_datanode.predict(
|
|
699
|
-
model_name=model_rec.name,
|
|
700
|
-
df=df,
|
|
701
|
-
params=self.model_params
|
|
702
|
-
)
|
|
695
|
+
df_out = project_datanode.predict(model_name=model_rec.name, df=df, params=self.model_params)
|
|
703
696
|
|
|
704
697
|
target = model_rec.to_predict[0]
|
|
705
698
|
if target != TableField.EMBEDDINGS.value:
|
|
@@ -720,6 +713,23 @@ class KnowledgeBaseTable:
|
|
|
720
713
|
res = self._df_to_embeddings(df)
|
|
721
714
|
return res[TableField.EMBEDDINGS.value][0]
|
|
722
715
|
|
|
716
|
+
@staticmethod
|
|
717
|
+
def call_litellm_embedding(session, model_params, messages):
|
|
718
|
+
args = copy.deepcopy(model_params)
|
|
719
|
+
|
|
720
|
+
llm_model = args.pop("model_name")
|
|
721
|
+
engine = args.pop("provider")
|
|
722
|
+
|
|
723
|
+
llm_model = f"{engine}/{llm_model}"
|
|
724
|
+
|
|
725
|
+
if "base_url" in args:
|
|
726
|
+
args["api_base"] = args.pop("base_url")
|
|
727
|
+
|
|
728
|
+
module = session.integration_controller.get_handler_module("litellm")
|
|
729
|
+
if module is None or module.Handler is None:
|
|
730
|
+
raise ValueError(f'Unable to use "{engine}" provider. Litellm handler is not installed')
|
|
731
|
+
return module.Handler.embeddings(llm_model, messages, args)
|
|
732
|
+
|
|
723
733
|
def build_rag_pipeline(self, retrieval_config: dict):
|
|
724
734
|
"""
|
|
725
735
|
Builds a RAG pipeline with returned sources
|
|
@@ -735,10 +745,10 @@ class KnowledgeBaseTable:
|
|
|
735
745
|
"""
|
|
736
746
|
# Get embedding model from knowledge base
|
|
737
747
|
embeddings_model = None
|
|
738
|
-
embedding_model_params = get_model_params(self._kb.params.get(
|
|
748
|
+
embedding_model_params = get_model_params(self._kb.params.get("embedding_model", {}), "default_embedding_model")
|
|
739
749
|
if self._kb.embedding_model:
|
|
740
750
|
# Extract embedding model args from knowledge base table
|
|
741
|
-
embedding_args = self._kb.embedding_model.learn_args.get(
|
|
751
|
+
embedding_args = self._kb.embedding_model.learn_args.get("using", {})
|
|
742
752
|
# Construct the embedding model directly
|
|
743
753
|
embeddings_model = construct_model_from_args(embedding_args)
|
|
744
754
|
logger.debug(f"Using knowledge base embedding model with args: {embedding_args}")
|
|
@@ -750,21 +760,17 @@ class KnowledgeBaseTable:
|
|
|
750
760
|
logger.debug("Using default embedding model as knowledge base has no embedding model")
|
|
751
761
|
|
|
752
762
|
# Update retrieval config with knowledge base parameters
|
|
753
|
-
kb_params = {
|
|
754
|
-
'vector_store_config': {
|
|
755
|
-
'kb_table': self
|
|
756
|
-
}
|
|
757
|
-
}
|
|
763
|
+
kb_params = {"vector_store_config": {"kb_table": self}}
|
|
758
764
|
|
|
759
765
|
# Load and validate config
|
|
760
766
|
try:
|
|
761
767
|
rag_config = load_rag_config(retrieval_config, kb_params, embeddings_model)
|
|
762
768
|
|
|
763
769
|
# Build LLM if specified
|
|
764
|
-
if
|
|
770
|
+
if "llm_model_name" in rag_config:
|
|
765
771
|
llm_args = {"model_name": rag_config.llm_model_name}
|
|
766
772
|
if not rag_config.llm_provider:
|
|
767
|
-
llm_args[
|
|
773
|
+
llm_args["provider"] = get_llm_provider(llm_args)
|
|
768
774
|
else:
|
|
769
775
|
llm_args["provider"] = rag_config.llm_provider
|
|
770
776
|
rag_config.llm = create_chat_model(llm_args)
|
|
@@ -785,6 +791,7 @@ class KnowledgeBaseTable:
|
|
|
785
791
|
if isinstance(base_metadata, str):
|
|
786
792
|
try:
|
|
787
793
|
import ast
|
|
794
|
+
|
|
788
795
|
return ast.literal_eval(base_metadata)
|
|
789
796
|
except (SyntaxError, ValueError):
|
|
790
797
|
logger.warning(f"Could not parse metadata: {base_metadata}. Using empty dict.")
|
|
@@ -794,6 +801,7 @@ class KnowledgeBaseTable:
|
|
|
794
801
|
def _generate_document_id(self, content: str, content_column: str, provided_id: str = None) -> str:
|
|
795
802
|
"""Generate a deterministic document ID using the utility function."""
|
|
796
803
|
from mindsdb.interfaces.knowledge_base.utils import generate_document_id
|
|
804
|
+
|
|
797
805
|
return generate_document_id(content=content, provided_id=provided_id)
|
|
798
806
|
|
|
799
807
|
def _convert_metadata_value(self, value):
|
|
@@ -846,14 +854,14 @@ class KnowledgeBaseController:
|
|
|
846
854
|
self.session = session
|
|
847
855
|
|
|
848
856
|
def add(
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
+
self,
|
|
858
|
+
name: str,
|
|
859
|
+
project_name: str,
|
|
860
|
+
storage: Identifier,
|
|
861
|
+
params: dict,
|
|
862
|
+
preprocessing_config: Optional[dict] = None,
|
|
863
|
+
if_not_exists: bool = False,
|
|
864
|
+
# embedding_model: Identifier = None, # Legacy: Allow MindsDB models to be passed as embedding_model.
|
|
857
865
|
) -> db.KnowledgeBase:
|
|
858
866
|
"""
|
|
859
867
|
Add a new knowledge base to the database
|
|
@@ -868,11 +876,11 @@ class KnowledgeBaseController:
|
|
|
868
876
|
if preprocessing_config is not None:
|
|
869
877
|
PreprocessingConfig(**preprocessing_config) # Validate before storing
|
|
870
878
|
params = params or {}
|
|
871
|
-
params[
|
|
879
|
+
params["preprocessing"] = preprocessing_config
|
|
872
880
|
|
|
873
881
|
# Check if vector_size is provided when using sparse vectors
|
|
874
|
-
is_sparse = params.get(
|
|
875
|
-
vector_size = params.get(
|
|
882
|
+
is_sparse = params.get("is_sparse")
|
|
883
|
+
vector_size = params.get("vector_size")
|
|
876
884
|
if is_sparse and vector_size is None:
|
|
877
885
|
raise ValueError("vector_size is required when is_sparse=True")
|
|
878
886
|
|
|
@@ -889,41 +897,45 @@ class KnowledgeBaseController:
|
|
|
889
897
|
return kb
|
|
890
898
|
raise EntityExistsError("Knowledge base already exists", name)
|
|
891
899
|
|
|
892
|
-
embedding_params = copy.deepcopy(config.get(
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
900
|
+
embedding_params = copy.deepcopy(config.get("default_embedding_model", {}))
|
|
901
|
+
|
|
902
|
+
# Legacy
|
|
903
|
+
# model_name = None
|
|
904
|
+
# model_project = project
|
|
905
|
+
# if embedding_model:
|
|
906
|
+
# model_name = embedding_model.parts[-1]
|
|
907
|
+
# if len(embedding_model.parts) > 1:
|
|
908
|
+
# model_project = self.session.database_controller.get_project(embedding_model.parts[-2])
|
|
909
|
+
|
|
910
|
+
# elif "embedding_model" in params:
|
|
911
|
+
# if isinstance(params["embedding_model"], str):
|
|
912
|
+
# # it is model name
|
|
913
|
+
# model_name = params["embedding_model"]
|
|
914
|
+
# else:
|
|
915
|
+
# # it is params for model
|
|
916
|
+
# embedding_params.update(params["embedding_model"])
|
|
917
|
+
|
|
918
|
+
if "embedding_model" in params:
|
|
919
|
+
if not isinstance(params["embedding_model"], dict):
|
|
920
|
+
raise ValueError("embedding_model should be JSON object with model parameters.")
|
|
921
|
+
embedding_params.update(params["embedding_model"])
|
|
922
|
+
|
|
923
|
+
# if model_name is None: # Legacy
|
|
924
|
+
model_name = self._create_embedding_model(
|
|
925
|
+
project.name,
|
|
926
|
+
params=embedding_params,
|
|
927
|
+
kb_name=name,
|
|
928
|
+
)
|
|
929
|
+
if model_name is not None:
|
|
930
|
+
params["created_embedding_model"] = model_name
|
|
916
931
|
|
|
917
932
|
embedding_model_id = None
|
|
918
933
|
if model_name is not None:
|
|
919
|
-
model = self.session.model_controller.get_model(
|
|
920
|
-
|
|
921
|
-
project_name=model_project.name
|
|
922
|
-
)
|
|
923
|
-
model_record = db.Predictor.query.get(model['id'])
|
|
934
|
+
model = self.session.model_controller.get_model(name=model_name, project_name=project.name)
|
|
935
|
+
model_record = db.Predictor.query.get(model["id"])
|
|
924
936
|
embedding_model_id = model_record.id
|
|
925
937
|
|
|
926
|
-
reranking_model_params = get_model_params(params.get(
|
|
938
|
+
reranking_model_params = get_model_params(params.get("reranking_model", {}), "default_reranking_model")
|
|
927
939
|
if reranking_model_params:
|
|
928
940
|
# Get reranking model from params.
|
|
929
941
|
# This is called here to check validaity of the parameters.
|
|
@@ -931,17 +943,17 @@ class KnowledgeBaseController:
|
|
|
931
943
|
|
|
932
944
|
# search for the vector database table
|
|
933
945
|
if storage is None:
|
|
934
|
-
cloud_pg_vector = os.environ.get(
|
|
946
|
+
cloud_pg_vector = os.environ.get("KB_PGVECTOR_URL")
|
|
935
947
|
if cloud_pg_vector:
|
|
936
948
|
vector_table_name = name
|
|
937
949
|
# Add sparse vector support for pgvector
|
|
938
950
|
vector_db_params = {}
|
|
939
951
|
# Check both explicit parameter and model configuration
|
|
940
|
-
is_sparse = is_sparse or model_record.learn_args.get(
|
|
952
|
+
is_sparse = is_sparse or model_record.learn_args.get("using", {}).get("sparse")
|
|
941
953
|
if is_sparse:
|
|
942
|
-
vector_db_params[
|
|
954
|
+
vector_db_params["is_sparse"] = True
|
|
943
955
|
if vector_size is not None:
|
|
944
|
-
vector_db_params[
|
|
956
|
+
vector_db_params["vector_size"] = vector_size
|
|
945
957
|
vector_db_name = self._create_persistent_pgvector(vector_db_params)
|
|
946
958
|
|
|
947
959
|
else:
|
|
@@ -949,26 +961,22 @@ class KnowledgeBaseController:
|
|
|
949
961
|
vector_table_name = "default_collection"
|
|
950
962
|
vector_db_name = self._create_persistent_chroma(name)
|
|
951
963
|
# memorize to remove it later
|
|
952
|
-
params[
|
|
964
|
+
params["default_vector_storage"] = vector_db_name
|
|
953
965
|
elif len(storage.parts) != 2:
|
|
954
|
-
raise ValueError(
|
|
966
|
+
raise ValueError("Storage param has to be vector db with table")
|
|
955
967
|
else:
|
|
956
968
|
vector_db_name, vector_table_name = storage.parts
|
|
957
969
|
|
|
958
970
|
# create table in vectordb before creating KB
|
|
959
|
-
self.session.datahub.get(vector_db_name).integration_handler.create_table(
|
|
960
|
-
|
|
961
|
-
)
|
|
962
|
-
vector_database_id = self.session.integration_controller.get(vector_db_name)['id']
|
|
971
|
+
self.session.datahub.get(vector_db_name).integration_handler.create_table(vector_table_name)
|
|
972
|
+
vector_database_id = self.session.integration_controller.get(vector_db_name)["id"]
|
|
963
973
|
|
|
964
974
|
# Store sparse vector settings in params if specified
|
|
965
975
|
if is_sparse:
|
|
966
976
|
params = params or {}
|
|
967
|
-
params[
|
|
968
|
-
'is_sparse': is_sparse
|
|
969
|
-
}
|
|
977
|
+
params["vector_config"] = {"is_sparse": is_sparse}
|
|
970
978
|
if vector_size is not None:
|
|
971
|
-
params[
|
|
979
|
+
params["vector_config"]["vector_size"] = vector_size
|
|
972
980
|
|
|
973
981
|
kb = db.KnowledgeBase(
|
|
974
982
|
name=name,
|
|
@@ -990,7 +998,7 @@ class KnowledgeBaseController:
|
|
|
990
998
|
if self.session.integration_controller.get(vector_store_name):
|
|
991
999
|
return vector_store_name
|
|
992
1000
|
|
|
993
|
-
self.session.integration_controller.add(vector_store_name,
|
|
1001
|
+
self.session.integration_controller.add(vector_store_name, "pgvector", params or {})
|
|
994
1002
|
return vector_store_name
|
|
995
1003
|
|
|
996
1004
|
def _create_persistent_chroma(self, kb_name, engine="chromadb"):
|
|
@@ -1008,7 +1016,7 @@ class KnowledgeBaseController:
|
|
|
1008
1016
|
self.session.integration_controller.add(vector_store_name, engine, connection_args)
|
|
1009
1017
|
return vector_store_name
|
|
1010
1018
|
|
|
1011
|
-
def _create_embedding_model(self, project_name, engine="openai", params: dict = None, kb_name=
|
|
1019
|
+
def _create_embedding_model(self, project_name, engine="openai", params: dict = None, kb_name=""):
|
|
1012
1020
|
"""create a default embedding model for knowledge base, if not specified"""
|
|
1013
1021
|
model_name = f"kb_embedding_{kb_name}"
|
|
1014
1022
|
|
|
@@ -1020,44 +1028,47 @@ class KnowledgeBaseController:
|
|
|
1020
1028
|
except PredictorRecordNotFound:
|
|
1021
1029
|
pass
|
|
1022
1030
|
|
|
1023
|
-
if
|
|
1024
|
-
|
|
1031
|
+
if params.get("provider", None) not in ("openai", "azure"):
|
|
1032
|
+
# try use litellm
|
|
1033
|
+
KnowledgeBaseTable.call_litellm_embedding(self.session, params, ["test"])
|
|
1034
|
+
return
|
|
1025
1035
|
|
|
1026
|
-
|
|
1036
|
+
if "provider" in params:
|
|
1037
|
+
engine = params.pop("provider").lower()
|
|
1027
1038
|
|
|
1028
|
-
|
|
1029
|
-
engine = 'openai'
|
|
1030
|
-
params['provider'] = 'azure'
|
|
1039
|
+
api_key = get_api_key(engine, params, strict=False) or params.pop("api_key")
|
|
1031
1040
|
|
|
1032
|
-
if engine ==
|
|
1033
|
-
|
|
1034
|
-
|
|
1041
|
+
if engine == "azure_openai":
|
|
1042
|
+
engine = "openai"
|
|
1043
|
+
params["provider"] = "azure"
|
|
1044
|
+
|
|
1045
|
+
if engine == "openai":
|
|
1046
|
+
if "question_column" not in params:
|
|
1047
|
+
params["question_column"] = "content"
|
|
1035
1048
|
if api_key:
|
|
1036
1049
|
params[f"{engine}_api_key"] = api_key
|
|
1037
|
-
if
|
|
1038
|
-
params.pop(
|
|
1039
|
-
if
|
|
1040
|
-
params[
|
|
1050
|
+
if "api_key" in params:
|
|
1051
|
+
params.pop("api_key")
|
|
1052
|
+
if "base_url" in params:
|
|
1053
|
+
params["api_base"] = params.pop("base_url")
|
|
1041
1054
|
|
|
1042
|
-
params[
|
|
1043
|
-
params[
|
|
1044
|
-
params[
|
|
1055
|
+
params["engine"] = engine
|
|
1056
|
+
params["join_learn_process"] = True
|
|
1057
|
+
params["mode"] = "embedding"
|
|
1045
1058
|
|
|
1046
1059
|
# Include API key if provided.
|
|
1047
1060
|
statement = CreatePredictor(
|
|
1048
1061
|
name=Identifier(parts=[project_name, model_name]),
|
|
1049
1062
|
using=params,
|
|
1050
|
-
targets=[
|
|
1051
|
-
Identifier(parts=[TableField.EMBEDDINGS.value])
|
|
1052
|
-
]
|
|
1063
|
+
targets=[Identifier(parts=[TableField.EMBEDDINGS.value])],
|
|
1053
1064
|
)
|
|
1054
1065
|
|
|
1055
1066
|
command_executor = ExecuteCommands(self.session)
|
|
1056
1067
|
resp = command_executor.answer_create_predictor(statement, project_name)
|
|
1057
1068
|
# check model status
|
|
1058
1069
|
record = resp.data.records[0]
|
|
1059
|
-
if record[
|
|
1060
|
-
raise ValueError(
|
|
1070
|
+
if record["STATUS"] == "error":
|
|
1071
|
+
raise ValueError("Embedding model error:" + record["ERROR"])
|
|
1061
1072
|
return model_name
|
|
1062
1073
|
|
|
1063
1074
|
def delete(self, name: str, project_name: int, if_exists: bool = False) -> None:
|
|
@@ -1084,16 +1095,16 @@ class KnowledgeBaseController:
|
|
|
1084
1095
|
db.session.commit()
|
|
1085
1096
|
|
|
1086
1097
|
# drop objects if they were created automatically
|
|
1087
|
-
if
|
|
1098
|
+
if "default_vector_storage" in kb.params:
|
|
1088
1099
|
try:
|
|
1089
|
-
handler = self.session.datahub.get(kb.params[
|
|
1100
|
+
handler = self.session.datahub.get(kb.params["default_vector_storage"]).integration_handler
|
|
1090
1101
|
handler.drop_table(kb.vector_database_table)
|
|
1091
|
-
self.session.integration_controller.delete(kb.params[
|
|
1102
|
+
self.session.integration_controller.delete(kb.params["default_vector_storage"])
|
|
1092
1103
|
except EntityNotExistsError:
|
|
1093
1104
|
pass
|
|
1094
|
-
if
|
|
1105
|
+
if "created_embedding_model" in kb.params:
|
|
1095
1106
|
try:
|
|
1096
|
-
self.session.model_controller.delete_model(kb.params[
|
|
1107
|
+
self.session.model_controller.delete_model(kb.params["created_embedding_model"], project_name)
|
|
1097
1108
|
except EntityNotExistsError:
|
|
1098
1109
|
pass
|
|
1099
1110
|
|
|
@@ -1124,11 +1135,11 @@ class KnowledgeBaseController:
|
|
|
1124
1135
|
if kb is not None:
|
|
1125
1136
|
table = KnowledgeBaseTable(kb, self.session)
|
|
1126
1137
|
if params:
|
|
1127
|
-
table.model_params = params.get(
|
|
1138
|
+
table.model_params = params.get("model")
|
|
1128
1139
|
|
|
1129
1140
|
# Always configure preprocessing - either from params or default
|
|
1130
|
-
if kb.params and
|
|
1131
|
-
table.configure_preprocessing(kb.params[
|
|
1141
|
+
if kb.params and "preprocessing" in kb.params:
|
|
1142
|
+
table.configure_preprocessing(kb.params["preprocessing"])
|
|
1132
1143
|
else:
|
|
1133
1144
|
table.configure_preprocessing(None) # This ensures default preprocessor is created
|
|
1134
1145
|
|
|
@@ -1144,32 +1155,30 @@ class KnowledgeBaseController:
|
|
|
1144
1155
|
if project_name is not None:
|
|
1145
1156
|
projects = [p for p in projects if p.name == project_name]
|
|
1146
1157
|
|
|
1147
|
-
query = (
|
|
1148
|
-
db.
|
|
1149
|
-
.filter(db.KnowledgeBase.project_id.in_(list([p.id for p in projects])))
|
|
1158
|
+
query = db.session.query(db.KnowledgeBase).filter(
|
|
1159
|
+
db.KnowledgeBase.project_id.in_(list([p.id for p in projects]))
|
|
1150
1160
|
)
|
|
1151
1161
|
|
|
1152
1162
|
data = []
|
|
1153
|
-
project_names = {
|
|
1154
|
-
i.id: i.name
|
|
1155
|
-
for i in project_controller.get_list()
|
|
1156
|
-
}
|
|
1163
|
+
project_names = {i.id: i.name for i in project_controller.get_list()}
|
|
1157
1164
|
|
|
1158
1165
|
for record in query:
|
|
1159
1166
|
vector_database = record.vector_database
|
|
1160
1167
|
embedding_model = record.embedding_model
|
|
1161
1168
|
|
|
1162
|
-
data.append(
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1169
|
+
data.append(
|
|
1170
|
+
{
|
|
1171
|
+
"id": record.id,
|
|
1172
|
+
"name": record.name,
|
|
1173
|
+
"project_id": record.project_id,
|
|
1174
|
+
"project_name": project_names[record.project_id],
|
|
1175
|
+
"embedding_model": embedding_model.name if embedding_model is not None else None,
|
|
1176
|
+
"vector_database": None if vector_database is None else vector_database.name,
|
|
1177
|
+
"vector_database_table": record.vector_database_table,
|
|
1178
|
+
"query_id": record.query_id,
|
|
1179
|
+
"params": record.params,
|
|
1180
|
+
}
|
|
1181
|
+
)
|
|
1173
1182
|
|
|
1174
1183
|
return data
|
|
1175
1184
|
|