MindsDB 25.6.2.0__py3-none-any.whl → 25.6.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/api/a2a/agent.py +25 -4
- mindsdb/api/a2a/task_manager.py +68 -6
- mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +91 -84
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +1 -1
- mindsdb/api/executor/utilities/sql.py +18 -19
- mindsdb/api/http/namespaces/knowledge_bases.py +132 -154
- mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +219 -28
- mindsdb/integrations/handlers/lindorm_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/llama_index_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +3 -0
- mindsdb/integrations/handlers/openai_handler/openai_handler.py +277 -356
- mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +94 -8
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +19 -1
- mindsdb/integrations/libs/api_handler.py +19 -1
- mindsdb/integrations/libs/base.py +86 -2
- mindsdb/interfaces/agents/agents_controller.py +32 -6
- mindsdb/interfaces/agents/constants.py +1 -0
- mindsdb/interfaces/agents/mindsdb_database_agent.py +27 -34
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +22 -6
- mindsdb/interfaces/data_catalog/data_catalog_reader.py +4 -0
- mindsdb/interfaces/database/integrations.py +4 -2
- mindsdb/interfaces/knowledge_base/controller.py +29 -24
- mindsdb/interfaces/knowledge_base/evaluate.py +0 -3
- mindsdb/interfaces/knowledge_base/preprocessing/document_loader.py +17 -86
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +28 -3
- mindsdb/interfaces/skills/skills_controller.py +0 -23
- mindsdb/interfaces/skills/sql_agent.py +9 -5
- mindsdb/interfaces/storage/db.py +20 -4
- mindsdb/utilities/config.py +5 -1
- {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.1.dist-info}/METADATA +247 -247
- {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.1.dist-info}/RECORD +35 -35
- {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.1.dist-info}/WHEEL +0 -0
- {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.1.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.1.dist-info}/top_level.txt +0 -0
|
@@ -9,6 +9,7 @@ import numpy as np
|
|
|
9
9
|
|
|
10
10
|
from mindsdb_sql_parser.ast import BinaryOperation, Constant, Identifier, Select, Update, Delete, Star
|
|
11
11
|
from mindsdb_sql_parser.ast.mindsdb import CreatePredictor
|
|
12
|
+
from mindsdb_sql_parser import parse_sql
|
|
12
13
|
|
|
13
14
|
from mindsdb.integrations.utilities.query_traversal import query_traversal
|
|
14
15
|
|
|
@@ -52,6 +53,13 @@ def get_model_params(model_params: dict, default_config_key: str):
|
|
|
52
53
|
"""
|
|
53
54
|
Get model parameters by combining default config with user provided parameters.
|
|
54
55
|
"""
|
|
56
|
+
# If the default config key is for reranking and the switch to use the default LLM is enabled,
|
|
57
|
+
# switch to the default LLM model.
|
|
58
|
+
if default_config_key == "default_reranking_model" and config.get("default_reranking_model").get(
|
|
59
|
+
"use_default_llm", False
|
|
60
|
+
):
|
|
61
|
+
default_config_key = "default_llm_model"
|
|
62
|
+
|
|
55
63
|
combined_model_params = copy.deepcopy(config.get(default_config_key, {}))
|
|
56
64
|
|
|
57
65
|
if model_params:
|
|
@@ -97,6 +105,8 @@ def get_reranking_model_from_params(reranking_model_params: dict):
|
|
|
97
105
|
params_copy["api_key"] = get_api_key(provider, params_copy, strict=False)
|
|
98
106
|
params_copy["model"] = params_copy.pop("model_name", None)
|
|
99
107
|
|
|
108
|
+
params_copy.pop("use_default_llm", None)
|
|
109
|
+
|
|
100
110
|
return BaseLLMReranker(**params_copy)
|
|
101
111
|
|
|
102
112
|
|
|
@@ -359,23 +369,30 @@ class KnowledgeBaseTable:
|
|
|
359
369
|
|
|
360
370
|
def insert_query_result(self, query: str, project_name: str):
|
|
361
371
|
"""Process and insert SQL query results"""
|
|
362
|
-
|
|
363
|
-
raise ValueError("Document loader not configured")
|
|
372
|
+
ast_query = parse_sql(query)
|
|
364
373
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
374
|
+
command_executor = ExecuteCommands(self.session)
|
|
375
|
+
response = command_executor.execute_command(ast_query, project_name)
|
|
376
|
+
|
|
377
|
+
if response.error_code is not None:
|
|
378
|
+
raise ValueError(f"Error executing query: {response.error_message}")
|
|
379
|
+
|
|
380
|
+
if response.data is None:
|
|
381
|
+
raise ValueError("Query returned no data")
|
|
382
|
+
|
|
383
|
+
records = response.data.records
|
|
384
|
+
df = pd.DataFrame(records)
|
|
385
|
+
|
|
386
|
+
self.insert(df)
|
|
368
387
|
|
|
369
388
|
def insert_rows(self, rows: List[Dict]):
|
|
370
389
|
"""Process and insert raw data rows"""
|
|
371
390
|
if not rows:
|
|
372
391
|
return
|
|
373
392
|
|
|
374
|
-
|
|
375
|
-
Document(content=row.get("content", ""), id=row.get("id"), metadata=row.get("metadata", {})) for row in rows
|
|
376
|
-
]
|
|
393
|
+
df = pd.DataFrame(rows)
|
|
377
394
|
|
|
378
|
-
self.
|
|
395
|
+
self.insert(df)
|
|
379
396
|
|
|
380
397
|
def insert_documents(self, documents: List[Document]):
|
|
381
398
|
"""Process and insert documents with preprocessing if configured"""
|
|
@@ -1201,22 +1218,10 @@ class KnowledgeBaseController:
|
|
|
1201
1218
|
project_names = {i.id: i.name for i in project_controller.get_list()}
|
|
1202
1219
|
|
|
1203
1220
|
for record in query:
|
|
1204
|
-
|
|
1205
|
-
|
|
1221
|
+
kb = record.as_dict(with_secrets=self.session.show_secrets)
|
|
1222
|
+
kb["project_name"] = project_names[record.project_id]
|
|
1206
1223
|
|
|
1207
|
-
data.append(
|
|
1208
|
-
{
|
|
1209
|
-
"id": record.id,
|
|
1210
|
-
"name": record.name,
|
|
1211
|
-
"project_id": record.project_id,
|
|
1212
|
-
"project_name": project_names[record.project_id],
|
|
1213
|
-
"embedding_model": embedding_model.name if embedding_model is not None else None,
|
|
1214
|
-
"vector_database": None if vector_database is None else vector_database.name,
|
|
1215
|
-
"vector_database_table": record.vector_database_table,
|
|
1216
|
-
"query_id": record.query_id,
|
|
1217
|
-
"params": record.params,
|
|
1218
|
-
}
|
|
1219
|
-
)
|
|
1224
|
+
data.append(kb)
|
|
1220
1225
|
|
|
1221
1226
|
return data
|
|
1222
1227
|
|
|
@@ -492,8 +492,6 @@ class EvaluateDocID(EvaluateBase):
|
|
|
492
492
|
total_questions = len(stats)
|
|
493
493
|
total_found = sum([1 for stat in stats if stat["doc_found"]])
|
|
494
494
|
|
|
495
|
-
total_accurately_retrieved = sum([1 for stat in stats if stat["doc_found"]])
|
|
496
|
-
|
|
497
495
|
accurate_in_top_10 = sum([1 for stat in stats if stat["doc_found"] and stat["doc_position"] < 10])
|
|
498
496
|
|
|
499
497
|
# calculate recall curve by position
|
|
@@ -512,7 +510,6 @@ class EvaluateDocID(EvaluateBase):
|
|
|
512
510
|
return {
|
|
513
511
|
"total": total_questions,
|
|
514
512
|
"total_found": total_found,
|
|
515
|
-
"retrieved_in_top_k": total_accurately_retrieved,
|
|
516
513
|
"retrieved_in_top_10": accurate_in_top_10,
|
|
517
514
|
"cumulative_recall": cumulative_recall,
|
|
518
515
|
"avg_query_time": avg_query_time,
|
|
@@ -2,7 +2,6 @@ import os
|
|
|
2
2
|
from typing import List, Iterator
|
|
3
3
|
from langchain_core.documents import Document as LangchainDocument
|
|
4
4
|
from langchain_text_splitters import MarkdownHeaderTextSplitter
|
|
5
|
-
import pandas as pd
|
|
6
5
|
|
|
7
6
|
from mindsdb.interfaces.file.file_controller import FileController
|
|
8
7
|
from mindsdb.integrations.utilities.rag.loaders.file_loader import FileLoader
|
|
@@ -20,12 +19,12 @@ class DocumentLoader:
|
|
|
20
19
|
"""Handles loading documents from various sources including SQL queries"""
|
|
21
20
|
|
|
22
21
|
def __init__(
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
22
|
+
self,
|
|
23
|
+
file_controller: FileController,
|
|
24
|
+
file_splitter: FileSplitter,
|
|
25
|
+
markdown_splitter: MarkdownHeaderTextSplitter,
|
|
26
|
+
file_loader_class=FileLoader,
|
|
27
|
+
mysql_proxy=None,
|
|
29
28
|
):
|
|
30
29
|
"""
|
|
31
30
|
Initialize with required dependencies
|
|
@@ -52,8 +51,8 @@ class DocumentLoader:
|
|
|
52
51
|
for doc in loader.lazy_load():
|
|
53
52
|
# Add file extension to metadata for proper splitting
|
|
54
53
|
extension = os.path.splitext(file_path)[1].lower()
|
|
55
|
-
doc.metadata[
|
|
56
|
-
doc.metadata[
|
|
54
|
+
doc.metadata["extension"] = extension
|
|
55
|
+
doc.metadata["source"] = file_name
|
|
57
56
|
|
|
58
57
|
# Use FileSplitter to handle the document based on its type
|
|
59
58
|
split_docs = self.file_splitter.split_documents([doc])
|
|
@@ -62,34 +61,22 @@ class DocumentLoader:
|
|
|
62
61
|
metadata = doc.metadata.copy()
|
|
63
62
|
metadata.update(split_doc.metadata or {})
|
|
64
63
|
|
|
65
|
-
yield Document(
|
|
66
|
-
content=split_doc.page_content,
|
|
67
|
-
metadata=metadata
|
|
68
|
-
)
|
|
64
|
+
yield Document(content=split_doc.page_content, metadata=metadata)
|
|
69
65
|
|
|
70
66
|
def load_web_pages(
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
67
|
+
self,
|
|
68
|
+
urls: List[str],
|
|
69
|
+
crawl_depth: int,
|
|
70
|
+
limit: int,
|
|
71
|
+
filters: List[str] = None,
|
|
76
72
|
) -> Iterator[Document]:
|
|
77
73
|
"""Load and split documents from web pages"""
|
|
78
|
-
websites_df = get_all_websites(
|
|
79
|
-
urls,
|
|
80
|
-
crawl_depth=crawl_depth,
|
|
81
|
-
limit=limit,
|
|
82
|
-
filters=filters
|
|
83
|
-
)
|
|
74
|
+
websites_df = get_all_websites(urls, crawl_depth=crawl_depth, limit=limit, filters=filters)
|
|
84
75
|
|
|
85
76
|
for _, row in websites_df.iterrows():
|
|
86
77
|
# Create a document with HTML extension for proper splitting
|
|
87
78
|
doc = LangchainDocument(
|
|
88
|
-
page_content=row[
|
|
89
|
-
metadata={
|
|
90
|
-
'extension': '.html',
|
|
91
|
-
'url': row['url']
|
|
92
|
-
}
|
|
79
|
+
page_content=row["text_content"], metadata={"extension": ".html", "url": row["url"]}
|
|
93
80
|
)
|
|
94
81
|
|
|
95
82
|
# Use FileSplitter to handle HTML content
|
|
@@ -98,60 +85,4 @@ class DocumentLoader:
|
|
|
98
85
|
metadata = doc.metadata.copy()
|
|
99
86
|
metadata.update(split_doc.metadata or {})
|
|
100
87
|
|
|
101
|
-
yield Document(
|
|
102
|
-
content=split_doc.page_content,
|
|
103
|
-
metadata=metadata
|
|
104
|
-
)
|
|
105
|
-
|
|
106
|
-
def load_query_result(self, query: str, project_name: str) -> Iterator[Document]:
|
|
107
|
-
"""
|
|
108
|
-
Load documents from SQL query results
|
|
109
|
-
|
|
110
|
-
Args:
|
|
111
|
-
query: SQL query to execute
|
|
112
|
-
project_name: Name of the project context
|
|
113
|
-
|
|
114
|
-
Returns:
|
|
115
|
-
Iterator of Document objects
|
|
116
|
-
|
|
117
|
-
Raises:
|
|
118
|
-
ValueError: If mysql_proxy is not configured or query returns no data
|
|
119
|
-
"""
|
|
120
|
-
if not self.mysql_proxy:
|
|
121
|
-
raise ValueError("MySQL proxy not configured")
|
|
122
|
-
|
|
123
|
-
if not query:
|
|
124
|
-
return
|
|
125
|
-
|
|
126
|
-
# Set project context and execute query
|
|
127
|
-
self.mysql_proxy.set_context({'db': project_name})
|
|
128
|
-
query_result = self.mysql_proxy.process_query(query)
|
|
129
|
-
|
|
130
|
-
if query_result.type != 'table':
|
|
131
|
-
raise ValueError('Query returned no data')
|
|
132
|
-
|
|
133
|
-
# Convert query result to DataFrame
|
|
134
|
-
df = query_result.data.to_df()
|
|
135
|
-
|
|
136
|
-
# Process each row into a Document
|
|
137
|
-
for _, row in df.iterrows():
|
|
138
|
-
# Extract id, content and metadata
|
|
139
|
-
content = str(row.get('content', ''))
|
|
140
|
-
id = row.get('id', None)
|
|
141
|
-
|
|
142
|
-
# Convert remaining columns to metadata
|
|
143
|
-
metadata = {
|
|
144
|
-
col: str(row[col])
|
|
145
|
-
for col in df.columns
|
|
146
|
-
if col != 'content' and not pd.isna(row[col])
|
|
147
|
-
}
|
|
148
|
-
metadata['source'] = 'query'
|
|
149
|
-
|
|
150
|
-
# Split content using recursive splitter
|
|
151
|
-
if content:
|
|
152
|
-
|
|
153
|
-
yield Document(
|
|
154
|
-
id=id,
|
|
155
|
-
content=content,
|
|
156
|
-
metadata=metadata
|
|
157
|
-
)
|
|
88
|
+
yield Document(content=split_doc.page_content, metadata=metadata)
|
|
@@ -3,6 +3,7 @@ import re
|
|
|
3
3
|
import json
|
|
4
4
|
from pydantic import BaseModel, Field
|
|
5
5
|
from langchain_core.tools import BaseTool
|
|
6
|
+
from mindsdb_sql_parser.ast import Describe, Select, Identifier, Constant, Star
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
class KnowledgeBaseListToolInput(BaseModel):
|
|
@@ -55,6 +56,26 @@ class KnowledgeBaseInfoTool(BaseTool):
|
|
|
55
56
|
except (json.JSONDecodeError, TypeError):
|
|
56
57
|
pass
|
|
57
58
|
|
|
59
|
+
def strip(s):
|
|
60
|
+
length = -1
|
|
61
|
+
while length != len(s):
|
|
62
|
+
length = len(s)
|
|
63
|
+
|
|
64
|
+
# remove ```
|
|
65
|
+
if s.startswith("```"):
|
|
66
|
+
s = s[3:]
|
|
67
|
+
if s.endswith("```"):
|
|
68
|
+
s = s[:-3]
|
|
69
|
+
|
|
70
|
+
# remove trailing new lines
|
|
71
|
+
s = s.strip("\n")
|
|
72
|
+
|
|
73
|
+
# remove extra quotes
|
|
74
|
+
for q in ('"', "'", "`"):
|
|
75
|
+
if s.count(q) == 1:
|
|
76
|
+
s = s.strip(q)
|
|
77
|
+
return s
|
|
78
|
+
|
|
58
79
|
# Finally, try the original regex pattern for $START$ and $STOP$ markers
|
|
59
80
|
match = re.search(r"\$START\$(.*?)\$STOP\$", tool_input, re.DOTALL)
|
|
60
81
|
if not match:
|
|
@@ -63,12 +84,14 @@ class KnowledgeBaseInfoTool(BaseTool):
|
|
|
63
84
|
return [kb.strip() for kb in tool_input.split(",")]
|
|
64
85
|
# If it's just a single string without formatting, return it as a single item
|
|
65
86
|
if tool_input.strip():
|
|
66
|
-
return [
|
|
87
|
+
return [strip(tool_input)]
|
|
67
88
|
return []
|
|
68
89
|
|
|
69
90
|
# Extract and clean the knowledge base names
|
|
70
91
|
kb_names_str = match.group(1).strip()
|
|
71
92
|
kb_names = re.findall(r"`([^`]+)`", kb_names_str)
|
|
93
|
+
|
|
94
|
+
kb_names = [strip(n) for n in kb_names]
|
|
72
95
|
return kb_names
|
|
73
96
|
|
|
74
97
|
def _run(self, tool_input: str) -> str:
|
|
@@ -83,7 +106,7 @@ class KnowledgeBaseInfoTool(BaseTool):
|
|
|
83
106
|
for kb_name in kb_names:
|
|
84
107
|
try:
|
|
85
108
|
# Get knowledge base schema
|
|
86
|
-
schema_result = self.db.run_no_throw(
|
|
109
|
+
schema_result = self.db.run_no_throw(str(Describe(kb_name, type="knowledge_base")))
|
|
87
110
|
|
|
88
111
|
if not schema_result:
|
|
89
112
|
results.append(f"Knowledge base `{kb_name}` not found or has no schema information.")
|
|
@@ -111,7 +134,9 @@ class KnowledgeBaseInfoTool(BaseTool):
|
|
|
111
134
|
kb_info += "```\n\n"
|
|
112
135
|
|
|
113
136
|
# Get sample data
|
|
114
|
-
sample_data = self.db.run_no_throw(
|
|
137
|
+
sample_data = self.db.run_no_throw(
|
|
138
|
+
str(Select(targets=[Star()], from_table=Identifier(kb_name), limit=Constant(20)))
|
|
139
|
+
)
|
|
115
140
|
|
|
116
141
|
# Sample data
|
|
117
142
|
kb_info += "### Sample Data:\n"
|
|
@@ -6,8 +6,6 @@ from sqlalchemy.orm.attributes import flag_modified
|
|
|
6
6
|
|
|
7
7
|
from mindsdb.interfaces.storage import db
|
|
8
8
|
from mindsdb.interfaces.database.projects import ProjectController
|
|
9
|
-
from mindsdb.interfaces.data_catalog.data_catalog_loader import DataCatalogLoader
|
|
10
|
-
from mindsdb.interfaces.skills.skill_tool import SkillType
|
|
11
9
|
from mindsdb.utilities.config import config
|
|
12
10
|
from mindsdb.utilities import log
|
|
13
11
|
|
|
@@ -99,27 +97,6 @@ class SkillsController:
|
|
|
99
97
|
if skill is not None:
|
|
100
98
|
raise ValueError(f"Skill with name already exists: {name}")
|
|
101
99
|
|
|
102
|
-
# Load metadata to data catalog (if enabled) if the skill is Text-to-SQL.
|
|
103
|
-
if config.get("data_catalog", {}).get("enabled", False):
|
|
104
|
-
if type == SkillType.TEXT2SQL.value and "include_tables" in params:
|
|
105
|
-
# TODO: Is it possible to create a skill with complete access to the database with the new agent syntax?
|
|
106
|
-
# TODO: Handle the case where `ignore_tables` is provided. Is this a valid parameter?
|
|
107
|
-
# TODO: Knowledge Bases?
|
|
108
|
-
database_table_map = {}
|
|
109
|
-
for table in params["include_tables"]:
|
|
110
|
-
parts = table.split(".", 1)
|
|
111
|
-
database_table_map[parts[0]] = database_table_map.get(parts[0], []) + [parts[1]]
|
|
112
|
-
|
|
113
|
-
for database_name, table_names in database_table_map.items():
|
|
114
|
-
data_catalog_loader = DataCatalogLoader(database_name=database_name, table_names=table_names)
|
|
115
|
-
data_catalog_loader.load_metadata()
|
|
116
|
-
|
|
117
|
-
elif type in [SkillType.TEXT2SQL.value, SkillType.TEXT2SQL_LEGACY.value] and "database" in params:
|
|
118
|
-
data_catalog_loader = DataCatalogLoader(
|
|
119
|
-
database_name=params["database"], table_names=params["tables"] if "tables" in params else None
|
|
120
|
-
)
|
|
121
|
-
data_catalog_loader.load_metadata()
|
|
122
|
-
|
|
123
100
|
new_skill = db.Skills(
|
|
124
101
|
name=name,
|
|
125
102
|
project_id=project.id,
|
|
@@ -76,7 +76,7 @@ def split_table_name(table_name: str) -> List[str]:
|
|
|
76
76
|
result.append(current.strip("`"))
|
|
77
77
|
|
|
78
78
|
# ensure we split the table name
|
|
79
|
-
result = [r.split(".") for r in result][0]
|
|
79
|
+
# result = [r.split(".") for r in result][0]
|
|
80
80
|
|
|
81
81
|
return result
|
|
82
82
|
|
|
@@ -402,11 +402,15 @@ class SQLAgent:
|
|
|
402
402
|
"""
|
|
403
403
|
if config.get("data_catalog", {}).get("enabled", False):
|
|
404
404
|
database_table_map = {}
|
|
405
|
-
for name in self.get_usable_table_names():
|
|
405
|
+
for name in table_names or self.get_usable_table_names():
|
|
406
406
|
name = name.replace("`", "")
|
|
407
407
|
|
|
408
|
-
# TODO: Can there be situations where the database name is returned from the above method?
|
|
409
408
|
parts = name.split(".", 1)
|
|
409
|
+
# TODO: Will there be situations where parts has more than 2 elements? Like a schema?
|
|
410
|
+
# This is unlikely given that we default to a single schema per database.
|
|
411
|
+
if len(parts) == 1:
|
|
412
|
+
raise ValueError(f"Invalid table name: {name}. Expected format is 'database.table'.")
|
|
413
|
+
|
|
410
414
|
database_table_map[parts[0]] = database_table_map.get(parts[0], []) + [parts[1]]
|
|
411
415
|
|
|
412
416
|
data_catalog_str = ""
|
|
@@ -430,8 +434,8 @@ class SQLAgent:
|
|
|
430
434
|
else:
|
|
431
435
|
all_tables.append(Identifier(name))
|
|
432
436
|
|
|
433
|
-
|
|
434
|
-
|
|
437
|
+
if table_names is not None:
|
|
438
|
+
all_tables = self._resolve_table_names(table_names, all_tables)
|
|
435
439
|
|
|
436
440
|
tables_info = []
|
|
437
441
|
for table in all_tables:
|
mindsdb/interfaces/storage/db.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import datetime
|
|
3
|
-
from typing import Dict, List
|
|
3
|
+
from typing import Dict, List, Optional
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
from sqlalchemy import (
|
|
@@ -494,17 +494,33 @@ class KnowledgeBase(Base):
|
|
|
494
494
|
|
|
495
495
|
__table_args__ = (UniqueConstraint("name", "project_id", name="unique_knowledge_base_name_project_id"),)
|
|
496
496
|
|
|
497
|
-
def as_dict(self) -> Dict:
|
|
497
|
+
def as_dict(self, with_secrets: Optional[bool] = True) -> Dict:
|
|
498
|
+
params = self.params.copy()
|
|
499
|
+
embedding_model = params.pop("embedding_model", None)
|
|
500
|
+
reranking_model = params.pop("reranking_model", None)
|
|
501
|
+
|
|
502
|
+
if not with_secrets:
|
|
503
|
+
if embedding_model and "api_key" in embedding_model:
|
|
504
|
+
embedding_model["api_key"] = "******"
|
|
505
|
+
|
|
506
|
+
if reranking_model and "api_key" in reranking_model:
|
|
507
|
+
reranking_model["api_key"] = "******"
|
|
508
|
+
|
|
498
509
|
return {
|
|
499
510
|
"id": self.id,
|
|
500
511
|
"name": self.name,
|
|
501
512
|
"project_id": self.project_id,
|
|
502
|
-
"embedding_model": None if self.embedding_model is None else self.embedding_model.name,
|
|
503
513
|
"vector_database": None if self.vector_database is None else self.vector_database.name,
|
|
504
514
|
"vector_database_table": self.vector_database_table,
|
|
505
515
|
"updated_at": self.updated_at,
|
|
506
516
|
"created_at": self.created_at,
|
|
507
|
-
"
|
|
517
|
+
"query_id": self.query_id,
|
|
518
|
+
"embedding_model": embedding_model,
|
|
519
|
+
"reranking_model": reranking_model,
|
|
520
|
+
"metadata_columns": params.pop("metadata_columns", None),
|
|
521
|
+
"content_columns": params.pop("content_columns", None),
|
|
522
|
+
"id_column": params.pop("id_column", None),
|
|
523
|
+
"params": params,
|
|
508
524
|
}
|
|
509
525
|
|
|
510
526
|
|
mindsdb/utilities/config.py
CHANGED
|
@@ -400,7 +400,11 @@ class Config:
|
|
|
400
400
|
bool: True if config was loaded or updated
|
|
401
401
|
"""
|
|
402
402
|
|
|
403
|
-
if
|
|
403
|
+
if (
|
|
404
|
+
self.auto_config_path.is_file()
|
|
405
|
+
and self.auto_config_path.read_text() != ""
|
|
406
|
+
and self.auto_config_mtime != self.auto_config_path.stat().st_mtime
|
|
407
|
+
):
|
|
404
408
|
try:
|
|
405
409
|
self._auto_config = json.loads(self.auto_config_path.read_text())
|
|
406
410
|
except json.JSONDecodeError as e:
|