MindsDB 25.6.2.0__py3-none-any.whl → 25.6.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/api/a2a/agent.py +25 -4
- mindsdb/api/a2a/task_manager.py +68 -6
- mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +91 -84
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +1 -1
- mindsdb/api/executor/utilities/sql.py +18 -19
- mindsdb/api/http/namespaces/knowledge_bases.py +132 -154
- mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +219 -28
- mindsdb/integrations/handlers/lindorm_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/llama_index_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +3 -0
- mindsdb/integrations/handlers/openai_handler/openai_handler.py +277 -356
- mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +94 -8
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +19 -1
- mindsdb/integrations/libs/api_handler.py +19 -1
- mindsdb/integrations/libs/base.py +86 -2
- mindsdb/interfaces/agents/agents_controller.py +32 -6
- mindsdb/interfaces/agents/constants.py +1 -0
- mindsdb/interfaces/agents/mindsdb_database_agent.py +27 -34
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +22 -6
- mindsdb/interfaces/data_catalog/data_catalog_reader.py +4 -0
- mindsdb/interfaces/database/integrations.py +4 -2
- mindsdb/interfaces/knowledge_base/controller.py +29 -24
- mindsdb/interfaces/knowledge_base/evaluate.py +0 -3
- mindsdb/interfaces/knowledge_base/preprocessing/document_loader.py +17 -86
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +28 -3
- mindsdb/interfaces/skills/skills_controller.py +0 -23
- mindsdb/interfaces/skills/sql_agent.py +9 -5
- mindsdb/interfaces/storage/db.py +20 -4
- mindsdb/utilities/config.py +5 -1
- {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.1.dist-info}/METADATA +247 -247
- {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.1.dist-info}/RECORD +35 -35
- {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.1.dist-info}/WHEEL +0 -0
- {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.1.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.6.2.0.dist-info → mindsdb-25.6.3.1.dist-info}/top_level.txt +0 -0
|
@@ -14,8 +14,10 @@ from mindsdb.integrations.utilities.rag.splitters.file_splitter import FileSplit
|
|
|
14
14
|
from mindsdb.interfaces.file.file_controller import FileController
|
|
15
15
|
from mindsdb.interfaces.knowledge_base.preprocessing.constants import (
|
|
16
16
|
DEFAULT_CONTEXT_DOCUMENT_LIMIT,
|
|
17
|
-
DEFAULT_CRAWL_DEPTH,
|
|
18
|
-
|
|
17
|
+
DEFAULT_CRAWL_DEPTH,
|
|
18
|
+
DEFAULT_WEB_FILTERS,
|
|
19
|
+
DEFAULT_MARKDOWN_HEADERS,
|
|
20
|
+
DEFAULT_WEB_CRAWL_LIMIT,
|
|
19
21
|
)
|
|
20
22
|
from mindsdb.interfaces.knowledge_base.preprocessing.document_loader import DocumentLoader
|
|
21
23
|
from mindsdb.metrics.metrics import api_endpoint_metrics
|
|
@@ -31,12 +33,12 @@ from mindsdb_sql_parser.ast import Identifier
|
|
|
31
33
|
logger = log.getLogger(__name__)
|
|
32
34
|
|
|
33
35
|
|
|
34
|
-
@ns_conf.route(
|
|
36
|
+
@ns_conf.route("/<project_name>/knowledge_bases")
|
|
35
37
|
class KnowledgeBasesResource(Resource):
|
|
36
|
-
@ns_conf.doc(
|
|
37
|
-
@api_endpoint_metrics(
|
|
38
|
+
@ns_conf.doc("list_knowledge_bases")
|
|
39
|
+
@api_endpoint_metrics("GET", "/knowledge_bases")
|
|
38
40
|
def get(self, project_name):
|
|
39
|
-
|
|
41
|
+
"""List all knowledge bases"""
|
|
40
42
|
session = SessionController()
|
|
41
43
|
project_controller = ProjectController()
|
|
42
44
|
try:
|
|
@@ -44,43 +46,37 @@ class KnowledgeBasesResource(Resource):
|
|
|
44
46
|
except EntityNotExistsError:
|
|
45
47
|
# Project must exist.
|
|
46
48
|
return http_error(
|
|
47
|
-
HTTPStatus.NOT_FOUND,
|
|
48
|
-
'Project not found',
|
|
49
|
-
f'Project with name {project_name} does not exist'
|
|
49
|
+
HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist"
|
|
50
50
|
)
|
|
51
51
|
|
|
52
52
|
# KB Controller already returns dict.
|
|
53
53
|
return session.kb_controller.list(project_name)
|
|
54
54
|
|
|
55
|
-
@ns_conf.doc(
|
|
56
|
-
@api_endpoint_metrics(
|
|
55
|
+
@ns_conf.doc("create_knowledge_base")
|
|
56
|
+
@api_endpoint_metrics("POST", "/knowledge_bases")
|
|
57
57
|
def post(self, project_name):
|
|
58
|
-
|
|
58
|
+
"""Create a knowledge base"""
|
|
59
59
|
|
|
60
60
|
# Check for required parameters.
|
|
61
|
-
if
|
|
61
|
+
if "knowledge_base" not in request.json:
|
|
62
62
|
return http_error(
|
|
63
|
-
HTTPStatus.BAD_REQUEST,
|
|
64
|
-
'Missing parameter',
|
|
65
|
-
'Must provide "knowledge_base" parameter in POST body'
|
|
63
|
+
HTTPStatus.BAD_REQUEST, "Missing parameter", 'Must provide "knowledge_base" parameter in POST body'
|
|
66
64
|
)
|
|
67
65
|
|
|
68
|
-
knowledge_base = request.json[
|
|
66
|
+
knowledge_base = request.json["knowledge_base"]
|
|
69
67
|
# Explicitly require embedding model & vector database.
|
|
70
|
-
required_fields = [
|
|
68
|
+
required_fields = ["name"]
|
|
71
69
|
for field in required_fields:
|
|
72
70
|
if field not in knowledge_base:
|
|
73
71
|
return http_error(
|
|
74
|
-
HTTPStatus.BAD_REQUEST,
|
|
75
|
-
'Missing parameter',
|
|
76
|
-
f'Must provide "{field}" field in "knowledge_base"'
|
|
72
|
+
HTTPStatus.BAD_REQUEST, "Missing parameter", f'Must provide "{field}" field in "knowledge_base"'
|
|
77
73
|
)
|
|
78
|
-
if
|
|
79
|
-
if
|
|
74
|
+
if "storage" in knowledge_base:
|
|
75
|
+
if "database" not in knowledge_base["storage"] or "table" not in knowledge_base["storage"]:
|
|
80
76
|
return http_error(
|
|
81
77
|
HTTPStatus.BAD_REQUEST,
|
|
82
|
-
|
|
83
|
-
'Must provide "database" and "table" field in "storage" param'
|
|
78
|
+
"Missing parameter",
|
|
79
|
+
'Must provide "database" and "table" field in "storage" param',
|
|
84
80
|
)
|
|
85
81
|
|
|
86
82
|
session = SessionController()
|
|
@@ -90,57 +86,65 @@ class KnowledgeBasesResource(Resource):
|
|
|
90
86
|
except EntityNotExistsError:
|
|
91
87
|
# Project must exist.
|
|
92
88
|
return http_error(
|
|
93
|
-
HTTPStatus.NOT_FOUND,
|
|
94
|
-
'Project not found',
|
|
95
|
-
f'Project with name {project_name} does not exist'
|
|
89
|
+
HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist"
|
|
96
90
|
)
|
|
97
91
|
|
|
98
|
-
kb_name = knowledge_base.get(
|
|
92
|
+
kb_name = knowledge_base.get("name")
|
|
99
93
|
existing_kb = session.kb_controller.get(kb_name, project.id)
|
|
100
94
|
if existing_kb is not None:
|
|
101
95
|
# Knowledge Base must not exist.
|
|
102
96
|
return http_error(
|
|
103
97
|
HTTPStatus.CONFLICT,
|
|
104
|
-
|
|
105
|
-
f
|
|
98
|
+
"Knowledge Base already exists",
|
|
99
|
+
f"Knowledge Base with name {kb_name} already exists",
|
|
106
100
|
)
|
|
107
101
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
102
|
+
# Legacy: Support for embedding model identifier.
|
|
103
|
+
# embedding_model_identifier = None
|
|
104
|
+
# if knowledge_base.get('model'):
|
|
105
|
+
# embedding_model_identifier = Identifier(parts=[knowledge_base['model']])
|
|
111
106
|
|
|
112
|
-
storage = knowledge_base.get(
|
|
107
|
+
storage = knowledge_base.get("storage")
|
|
113
108
|
embedding_table_identifier = None
|
|
114
109
|
if storage is not None:
|
|
115
|
-
embedding_table_identifier = Identifier(parts=[storage[
|
|
110
|
+
embedding_table_identifier = Identifier(parts=[storage["database"], storage["table"]])
|
|
111
|
+
|
|
112
|
+
params = knowledge_base.get("params", {})
|
|
113
|
+
|
|
114
|
+
optional_parameter_fields = [
|
|
115
|
+
"embedding_model",
|
|
116
|
+
"reranking_model",
|
|
117
|
+
"content_columns",
|
|
118
|
+
"metadata_columns",
|
|
119
|
+
"id_column",
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
for field in optional_parameter_fields:
|
|
123
|
+
if field in knowledge_base:
|
|
124
|
+
params[field] = knowledge_base[field]
|
|
116
125
|
|
|
117
126
|
try:
|
|
118
127
|
new_kb = session.kb_controller.add(
|
|
119
128
|
kb_name,
|
|
120
129
|
project.name,
|
|
121
|
-
embedding_model_identifier,
|
|
122
130
|
embedding_table_identifier,
|
|
123
|
-
params=
|
|
124
|
-
preprocessing_config=knowledge_base.get(
|
|
131
|
+
params=params,
|
|
132
|
+
preprocessing_config=knowledge_base.get("preprocessing"),
|
|
125
133
|
)
|
|
126
134
|
except ValueError as e:
|
|
127
|
-
return http_error(
|
|
128
|
-
HTTPStatus.BAD_REQUEST,
|
|
129
|
-
'Invalid preprocessing configuration',
|
|
130
|
-
str(e)
|
|
131
|
-
)
|
|
135
|
+
return http_error(HTTPStatus.BAD_REQUEST, "Invalid preprocessing configuration", str(e))
|
|
132
136
|
|
|
133
|
-
return new_kb.as_dict(), HTTPStatus.CREATED
|
|
137
|
+
return new_kb.as_dict(session.show_secrets), HTTPStatus.CREATED
|
|
134
138
|
|
|
135
139
|
|
|
136
|
-
@ns_conf.route(
|
|
137
|
-
@ns_conf.param(
|
|
138
|
-
@ns_conf.param(
|
|
140
|
+
@ns_conf.route("/<project_name>/knowledge_bases/<knowledge_base_name>")
|
|
141
|
+
@ns_conf.param("project_name", "Name of the project")
|
|
142
|
+
@ns_conf.param("knowledge_base_name", "Name of the knowledge_base")
|
|
139
143
|
class KnowledgeBaseResource(Resource):
|
|
140
|
-
@ns_conf.doc(
|
|
141
|
-
@api_endpoint_metrics(
|
|
144
|
+
@ns_conf.doc("get_knowledge_base")
|
|
145
|
+
@api_endpoint_metrics("GET", "/knowledge_bases/knowledge_base")
|
|
142
146
|
def get(self, project_name, knowledge_base_name):
|
|
143
|
-
|
|
147
|
+
"""Gets a knowledge base by name"""
|
|
144
148
|
session = SessionController()
|
|
145
149
|
project_controller = ProjectController()
|
|
146
150
|
try:
|
|
@@ -148,31 +152,27 @@ class KnowledgeBaseResource(Resource):
|
|
|
148
152
|
except EntityNotExistsError:
|
|
149
153
|
# Project must exist.
|
|
150
154
|
return http_error(
|
|
151
|
-
HTTPStatus.NOT_FOUND,
|
|
152
|
-
'Project not found',
|
|
153
|
-
f'Project with name {project_name} does not exist'
|
|
155
|
+
HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist"
|
|
154
156
|
)
|
|
155
157
|
|
|
156
158
|
existing_kb = session.kb_controller.get(knowledge_base_name, project.id)
|
|
157
159
|
if existing_kb is None:
|
|
158
160
|
return http_error(
|
|
159
161
|
HTTPStatus.NOT_FOUND,
|
|
160
|
-
|
|
161
|
-
f
|
|
162
|
+
"Knowledge Base not found",
|
|
163
|
+
f"Knowledge Base with name {knowledge_base_name} does not exist",
|
|
162
164
|
)
|
|
163
|
-
return existing_kb.as_dict()
|
|
165
|
+
return existing_kb.as_dict(session.show_secrets), HTTPStatus.OK
|
|
164
166
|
|
|
165
|
-
@ns_conf.doc(
|
|
166
|
-
@api_endpoint_metrics(
|
|
167
|
+
@ns_conf.doc("update_knowledge_base")
|
|
168
|
+
@api_endpoint_metrics("PUT", "/knowledge_bases/knowledge_base")
|
|
167
169
|
def put(self, project_name: str, knowledge_base_name: str):
|
|
168
|
-
|
|
170
|
+
"""Updates a knowledge base with optional preprocessing."""
|
|
169
171
|
|
|
170
172
|
# Check for required parameters
|
|
171
|
-
if
|
|
173
|
+
if "knowledge_base" not in request.json:
|
|
172
174
|
return http_error(
|
|
173
|
-
HTTPStatus.BAD_REQUEST,
|
|
174
|
-
'Missing parameter',
|
|
175
|
-
'Must provide "knowledge_base" parameter in PUT body'
|
|
175
|
+
HTTPStatus.BAD_REQUEST, "Missing parameter", 'Must provide "knowledge_base" parameter in PUT body'
|
|
176
176
|
)
|
|
177
177
|
|
|
178
178
|
session = SessionController()
|
|
@@ -182,21 +182,19 @@ class KnowledgeBaseResource(Resource):
|
|
|
182
182
|
project = project_controller.get(name=project_name)
|
|
183
183
|
except EntityNotExistsError:
|
|
184
184
|
return http_error(
|
|
185
|
-
HTTPStatus.NOT_FOUND,
|
|
186
|
-
'Project not found',
|
|
187
|
-
f'Project with name {project_name} does not exist'
|
|
185
|
+
HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist"
|
|
188
186
|
)
|
|
189
187
|
|
|
190
188
|
try:
|
|
191
|
-
kb_data = request.json[
|
|
189
|
+
kb_data = request.json["knowledge_base"]
|
|
192
190
|
|
|
193
191
|
# Retrieve the knowledge base table for updates
|
|
194
|
-
table = session.kb_controller.get_table(knowledge_base_name, project.id, params=kb_data.get(
|
|
192
|
+
table = session.kb_controller.get_table(knowledge_base_name, project.id, params=kb_data.get("params"))
|
|
195
193
|
if table is None:
|
|
196
194
|
return http_error(
|
|
197
195
|
HTTPStatus.NOT_FOUND,
|
|
198
|
-
|
|
199
|
-
f
|
|
196
|
+
"Knowledge Base not found",
|
|
197
|
+
f"Knowledge Base with name {knowledge_base_name} does not exist",
|
|
200
198
|
)
|
|
201
199
|
|
|
202
200
|
# Set up dependencies for DocumentLoader
|
|
@@ -211,68 +209,64 @@ class KnowledgeBaseResource(Resource):
|
|
|
211
209
|
file_controller=file_controller,
|
|
212
210
|
file_splitter=file_splitter,
|
|
213
211
|
markdown_splitter=markdown_splitter,
|
|
214
|
-
mysql_proxy=mysql_proxy
|
|
212
|
+
mysql_proxy=mysql_proxy,
|
|
215
213
|
)
|
|
216
214
|
|
|
217
215
|
# Configure table with dependencies
|
|
218
216
|
table.document_loader = document_loader
|
|
219
217
|
|
|
220
218
|
# Update preprocessing configuration if provided
|
|
221
|
-
if
|
|
222
|
-
table.configure_preprocessing(kb_data[
|
|
219
|
+
if "preprocessing" in kb_data:
|
|
220
|
+
table.configure_preprocessing(kb_data["preprocessing"])
|
|
223
221
|
|
|
224
222
|
# Process raw data rows if provided
|
|
225
|
-
if kb_data.get(
|
|
226
|
-
table.insert_rows(kb_data[
|
|
223
|
+
if kb_data.get("rows"):
|
|
224
|
+
table.insert_rows(kb_data["rows"])
|
|
227
225
|
|
|
228
226
|
# Process files if specified
|
|
229
|
-
if kb_data.get(
|
|
230
|
-
table.insert_files(kb_data[
|
|
227
|
+
if kb_data.get("files"):
|
|
228
|
+
table.insert_files(kb_data["files"])
|
|
231
229
|
|
|
232
230
|
# Process web pages if URLs provided
|
|
233
|
-
if kb_data.get(
|
|
231
|
+
if kb_data.get("urls"):
|
|
234
232
|
table.insert_web_pages(
|
|
235
|
-
urls=kb_data[
|
|
236
|
-
limit=kb_data.get(
|
|
237
|
-
crawl_depth=kb_data.get(
|
|
238
|
-
filters=kb_data.get(
|
|
233
|
+
urls=kb_data["urls"],
|
|
234
|
+
limit=kb_data.get("limit") or DEFAULT_WEB_CRAWL_LIMIT,
|
|
235
|
+
crawl_depth=kb_data.get("crawl_depth", DEFAULT_CRAWL_DEPTH),
|
|
236
|
+
filters=kb_data.get("filters", DEFAULT_WEB_FILTERS),
|
|
239
237
|
)
|
|
240
238
|
|
|
241
239
|
# Process query if provided
|
|
242
|
-
if kb_data.get(
|
|
243
|
-
table.insert_query_result(kb_data[
|
|
240
|
+
if kb_data.get("query"):
|
|
241
|
+
table.insert_query_result(kb_data["query"], project_name)
|
|
244
242
|
|
|
245
243
|
except ExecutorException as e:
|
|
246
|
-
logger.error(f
|
|
244
|
+
logger.error(f"Error during preprocessing and insertion: {str(e)}")
|
|
247
245
|
return http_error(
|
|
248
246
|
HTTPStatus.BAD_REQUEST,
|
|
249
|
-
|
|
250
|
-
f'Executing "query" failed. Needs to be a valid SELECT statement that returns data: {str(e)}'
|
|
247
|
+
"Invalid SELECT query",
|
|
248
|
+
f'Executing "query" failed. Needs to be a valid SELECT statement that returns data: {str(e)}',
|
|
251
249
|
)
|
|
252
250
|
|
|
253
251
|
except Exception as e:
|
|
254
|
-
logger.error(f
|
|
252
|
+
logger.error(f"Error during preprocessing and insertion: {str(e)}")
|
|
255
253
|
return http_error(
|
|
256
|
-
HTTPStatus.BAD_REQUEST,
|
|
257
|
-
'Preprocessing Error',
|
|
258
|
-
f'Error during preprocessing and insertion: {str(e)}'
|
|
254
|
+
HTTPStatus.BAD_REQUEST, "Preprocessing Error", f"Error during preprocessing and insertion: {str(e)}"
|
|
259
255
|
)
|
|
260
256
|
|
|
261
|
-
return
|
|
257
|
+
return "", HTTPStatus.OK
|
|
262
258
|
|
|
263
|
-
@ns_conf.doc(
|
|
264
|
-
@api_endpoint_metrics(
|
|
259
|
+
@ns_conf.doc("delete_knowledge_base")
|
|
260
|
+
@api_endpoint_metrics("DELETE", "/knowledge_bases/knowledge_base")
|
|
265
261
|
def delete(self, project_name: str, knowledge_base_name: str):
|
|
266
|
-
|
|
262
|
+
"""Deletes a knowledge base."""
|
|
267
263
|
project_controller = ProjectController()
|
|
268
264
|
try:
|
|
269
265
|
project = project_controller.get(name=project_name)
|
|
270
266
|
except EntityNotExistsError:
|
|
271
267
|
# Project must exist.
|
|
272
268
|
return http_error(
|
|
273
|
-
HTTPStatus.NOT_FOUND,
|
|
274
|
-
'Project not found',
|
|
275
|
-
f'Project with name {project_name} does not exist'
|
|
269
|
+
HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist"
|
|
276
270
|
)
|
|
277
271
|
|
|
278
272
|
session_controller = SessionController()
|
|
@@ -281,43 +275,45 @@ class KnowledgeBaseResource(Resource):
|
|
|
281
275
|
# Knowledge Base must exist.
|
|
282
276
|
return http_error(
|
|
283
277
|
HTTPStatus.NOT_FOUND,
|
|
284
|
-
|
|
285
|
-
f
|
|
278
|
+
"Knowledge Base not found",
|
|
279
|
+
f"Knowledge Base with name {knowledge_base_name} does not exist",
|
|
286
280
|
)
|
|
287
281
|
|
|
288
282
|
session_controller.kb_controller.delete(knowledge_base_name, project_name)
|
|
289
|
-
return
|
|
283
|
+
return "", HTTPStatus.NO_CONTENT
|
|
290
284
|
|
|
291
285
|
|
|
292
286
|
def _handle_chat_completion(knowledge_base_table: KnowledgeBaseTable, request):
|
|
293
287
|
# Check for required parameters
|
|
294
|
-
query = request.json.get(
|
|
288
|
+
query = request.json.get("query")
|
|
295
289
|
|
|
296
|
-
llm_model = request.json.get(
|
|
290
|
+
llm_model = request.json.get("llm_model")
|
|
297
291
|
if llm_model is None:
|
|
298
292
|
logger.warning(f'Missing parameter "llm_model" in POST body, using default llm_model {DEFAULT_LLM_MODEL}')
|
|
299
293
|
|
|
300
|
-
prompt_template = request.json.get(
|
|
294
|
+
prompt_template = request.json.get("prompt_template")
|
|
301
295
|
if prompt_template is None:
|
|
302
|
-
logger.warning(
|
|
296
|
+
logger.warning(
|
|
297
|
+
f'Missing parameter "prompt_template" in POST body, using default prompt template {DEFAULT_RAG_PROMPT_TEMPLATE}'
|
|
298
|
+
)
|
|
303
299
|
|
|
304
300
|
# Get retrieval config, if set
|
|
305
|
-
retrieval_config = request.json.get(
|
|
301
|
+
retrieval_config = request.json.get("retrieval_config", {})
|
|
306
302
|
if not retrieval_config:
|
|
307
|
-
logger.warning(
|
|
303
|
+
logger.warning("No retrieval config provided, using default retrieval config")
|
|
308
304
|
|
|
309
305
|
# add llm model to retrieval config
|
|
310
306
|
if llm_model is not None:
|
|
311
|
-
retrieval_config[
|
|
307
|
+
retrieval_config["llm_model_name"] = llm_model
|
|
312
308
|
|
|
313
309
|
# add prompt template to retrieval config
|
|
314
310
|
if prompt_template is not None:
|
|
315
|
-
retrieval_config[
|
|
311
|
+
retrieval_config["rag_prompt_template"] = prompt_template
|
|
316
312
|
|
|
317
313
|
# add llm provider to retrieval config if set
|
|
318
|
-
llm_provider = request.json.get(
|
|
314
|
+
llm_provider = request.json.get("model_provider")
|
|
319
315
|
if llm_provider is not None:
|
|
320
|
-
retrieval_config[
|
|
316
|
+
retrieval_config["llm_provider"] = llm_provider
|
|
321
317
|
|
|
322
318
|
# build rag pipeline
|
|
323
319
|
rag_pipeline = knowledge_base_table.build_rag_pipeline(retrieval_config)
|
|
@@ -325,11 +321,7 @@ def _handle_chat_completion(knowledge_base_table: KnowledgeBaseTable, request):
|
|
|
325
321
|
# get response from rag pipeline
|
|
326
322
|
rag_response = rag_pipeline(query)
|
|
327
323
|
response = {
|
|
328
|
-
|
|
329
|
-
'content': rag_response.get('answer'),
|
|
330
|
-
'context': rag_response.get('context'),
|
|
331
|
-
'role': 'assistant'
|
|
332
|
-
}
|
|
324
|
+
"message": {"content": rag_response.get("answer"), "context": rag_response.get("context"), "role": "assistant"}
|
|
333
325
|
}
|
|
334
326
|
|
|
335
327
|
return response
|
|
@@ -337,20 +329,16 @@ def _handle_chat_completion(knowledge_base_table: KnowledgeBaseTable, request):
|
|
|
337
329
|
|
|
338
330
|
def _handle_context_completion(knowledge_base_table: KnowledgeBaseTable, request):
|
|
339
331
|
# Used for semantic search.
|
|
340
|
-
query = request.json.get(
|
|
332
|
+
query = request.json.get("query")
|
|
341
333
|
# Keyword search.
|
|
342
|
-
keywords = request.json.get(
|
|
334
|
+
keywords = request.json.get("keywords")
|
|
343
335
|
# Metadata search.
|
|
344
|
-
metadata = request.json.get(
|
|
336
|
+
metadata = request.json.get("metadata")
|
|
345
337
|
# Maximum amount of documents to return as context.
|
|
346
|
-
limit = request.json.get(
|
|
338
|
+
limit = request.json.get("limit", DEFAULT_CONTEXT_DOCUMENT_LIMIT)
|
|
347
339
|
|
|
348
340
|
# Use default distance function & column names for ID, content, & metadata, to keep things simple.
|
|
349
|
-
hybrid_search_df = knowledge_base_table.hybrid_search(
|
|
350
|
-
query,
|
|
351
|
-
keywords=keywords,
|
|
352
|
-
metadata=metadata
|
|
353
|
-
)
|
|
341
|
+
hybrid_search_df = knowledge_base_table.hybrid_search(query, keywords=keywords, metadata=metadata)
|
|
354
342
|
|
|
355
343
|
num_documents = len(hybrid_search_df.index)
|
|
356
344
|
context_documents = []
|
|
@@ -358,34 +346,26 @@ def _handle_context_completion(knowledge_base_table: KnowledgeBaseTable, request
|
|
|
358
346
|
if i >= num_documents:
|
|
359
347
|
break
|
|
360
348
|
row = hybrid_search_df.iloc[i]
|
|
361
|
-
context_documents.append({
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
'rank': row['rank']
|
|
365
|
-
})
|
|
366
|
-
|
|
367
|
-
return {
|
|
368
|
-
'documents': context_documents
|
|
369
|
-
}
|
|
349
|
+
context_documents.append({"id": row["id"], "content": row["content"], "rank": row["rank"]})
|
|
350
|
+
|
|
351
|
+
return {"documents": context_documents}
|
|
370
352
|
|
|
371
353
|
|
|
372
|
-
@ns_conf.route(
|
|
373
|
-
@ns_conf.param(
|
|
374
|
-
@ns_conf.param(
|
|
354
|
+
@ns_conf.route("/<project_name>/knowledge_bases/<knowledge_base_name>/completions")
|
|
355
|
+
@ns_conf.param("project_name", "Name of the project")
|
|
356
|
+
@ns_conf.param("knowledge_base_name", "Name of the knowledge_base")
|
|
375
357
|
class KnowledgeBaseCompletions(Resource):
|
|
376
|
-
@ns_conf.doc(
|
|
377
|
-
@api_endpoint_metrics(
|
|
358
|
+
@ns_conf.doc("knowledge_base_completions")
|
|
359
|
+
@api_endpoint_metrics("POST", "/knowledge_bases/knowledge_base/completions")
|
|
378
360
|
def post(self, project_name, knowledge_base_name):
|
|
379
361
|
"""
|
|
380
362
|
Add support for LLM generation on the response from knowledge base. Default completion type is 'chat' unless specified.
|
|
381
363
|
"""
|
|
382
|
-
if request.json.get(
|
|
364
|
+
if request.json.get("query") is None:
|
|
383
365
|
# "query" is used for semantic search for both completion types.
|
|
384
366
|
logger.error('Missing parameter "query" in POST body')
|
|
385
367
|
return http_error(
|
|
386
|
-
HTTPStatus.BAD_REQUEST,
|
|
387
|
-
'Missing parameter',
|
|
388
|
-
'Must provide "query" parameter in POST body'
|
|
368
|
+
HTTPStatus.BAD_REQUEST, "Missing parameter", 'Must provide "query" parameter in POST body'
|
|
389
369
|
)
|
|
390
370
|
|
|
391
371
|
project_controller = ProjectController()
|
|
@@ -395,9 +375,7 @@ class KnowledgeBaseCompletions(Resource):
|
|
|
395
375
|
# Project must exist.
|
|
396
376
|
logger.error("Project not found, please check the project name exists")
|
|
397
377
|
return http_error(
|
|
398
|
-
HTTPStatus.NOT_FOUND,
|
|
399
|
-
'Project not found',
|
|
400
|
-
f'Project with name {project_name} does not exist'
|
|
378
|
+
HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist"
|
|
401
379
|
)
|
|
402
380
|
|
|
403
381
|
session = SessionController()
|
|
@@ -407,17 +385,17 @@ class KnowledgeBaseCompletions(Resource):
|
|
|
407
385
|
logger.error("Knowledge Base not found, please check the knowledge base name exists")
|
|
408
386
|
return http_error(
|
|
409
387
|
HTTPStatus.NOT_FOUND,
|
|
410
|
-
|
|
411
|
-
f
|
|
388
|
+
"Knowledge Base not found",
|
|
389
|
+
f"Knowledge Base with name {knowledge_base_name} does not exist",
|
|
412
390
|
)
|
|
413
391
|
|
|
414
|
-
completion_type = request.json.get(
|
|
415
|
-
if completion_type ==
|
|
392
|
+
completion_type = request.json.get("type", "chat")
|
|
393
|
+
if completion_type == "context":
|
|
416
394
|
return _handle_context_completion(table, request)
|
|
417
|
-
if completion_type ==
|
|
395
|
+
if completion_type == "chat":
|
|
418
396
|
return _handle_chat_completion(table, request)
|
|
419
397
|
return http_error(
|
|
420
398
|
HTTPStatus.BAD_REQUEST,
|
|
421
|
-
|
|
422
|
-
f'Completion type must be one of: "context", "chat". Received {completion_type}'
|
|
399
|
+
"Invalid parameter",
|
|
400
|
+
f'Completion type must be one of: "context", "chat". Received {completion_type}',
|
|
423
401
|
)
|