MindsDB 25.5.4.2__py3-none-any.whl → 25.6.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (76) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/a2a/agent.py +50 -26
  3. mindsdb/api/a2a/common/server/server.py +32 -26
  4. mindsdb/api/a2a/task_manager.py +68 -6
  5. mindsdb/api/executor/command_executor.py +69 -14
  6. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +49 -65
  7. mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +91 -84
  8. mindsdb/api/executor/datahub/datanodes/project_datanode.py +29 -48
  9. mindsdb/api/executor/datahub/datanodes/system_tables.py +35 -61
  10. mindsdb/api/executor/planner/plan_join.py +67 -77
  11. mindsdb/api/executor/planner/query_planner.py +176 -155
  12. mindsdb/api/executor/planner/steps.py +37 -12
  13. mindsdb/api/executor/sql_query/result_set.py +45 -64
  14. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +14 -18
  15. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +17 -18
  16. mindsdb/api/executor/sql_query/steps/insert_step.py +13 -33
  17. mindsdb/api/executor/sql_query/steps/subselect_step.py +43 -35
  18. mindsdb/api/executor/utilities/sql.py +42 -48
  19. mindsdb/api/http/namespaces/config.py +1 -1
  20. mindsdb/api/http/namespaces/file.py +14 -23
  21. mindsdb/api/http/namespaces/knowledge_bases.py +132 -154
  22. mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +12 -28
  23. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +59 -50
  24. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +9 -8
  25. mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +449 -461
  26. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +87 -36
  27. mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +219 -28
  28. mindsdb/integrations/handlers/file_handler/file_handler.py +15 -9
  29. mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +43 -24
  30. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +10 -3
  31. mindsdb/integrations/handlers/llama_index_handler/requirements.txt +1 -1
  32. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +29 -33
  33. mindsdb/integrations/handlers/openai_handler/openai_handler.py +277 -356
  34. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +74 -51
  35. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +305 -98
  36. mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +145 -40
  37. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +136 -6
  38. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +352 -83
  39. mindsdb/integrations/libs/api_handler.py +279 -57
  40. mindsdb/integrations/libs/base.py +185 -30
  41. mindsdb/integrations/utilities/files/file_reader.py +99 -73
  42. mindsdb/integrations/utilities/handler_utils.py +23 -8
  43. mindsdb/integrations/utilities/sql_utils.py +35 -40
  44. mindsdb/interfaces/agents/agents_controller.py +226 -196
  45. mindsdb/interfaces/agents/constants.py +8 -1
  46. mindsdb/interfaces/agents/langchain_agent.py +42 -11
  47. mindsdb/interfaces/agents/mcp_client_agent.py +29 -21
  48. mindsdb/interfaces/agents/mindsdb_database_agent.py +23 -18
  49. mindsdb/interfaces/data_catalog/__init__.py +0 -0
  50. mindsdb/interfaces/data_catalog/base_data_catalog.py +54 -0
  51. mindsdb/interfaces/data_catalog/data_catalog_loader.py +375 -0
  52. mindsdb/interfaces/data_catalog/data_catalog_reader.py +38 -0
  53. mindsdb/interfaces/database/database.py +81 -57
  54. mindsdb/interfaces/database/integrations.py +222 -234
  55. mindsdb/interfaces/database/log.py +72 -104
  56. mindsdb/interfaces/database/projects.py +156 -193
  57. mindsdb/interfaces/file/file_controller.py +21 -65
  58. mindsdb/interfaces/knowledge_base/controller.py +66 -25
  59. mindsdb/interfaces/knowledge_base/evaluate.py +516 -0
  60. mindsdb/interfaces/knowledge_base/llm_client.py +75 -0
  61. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +83 -43
  62. mindsdb/interfaces/skills/skills_controller.py +31 -36
  63. mindsdb/interfaces/skills/sql_agent.py +113 -86
  64. mindsdb/interfaces/storage/db.py +242 -82
  65. mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +118 -0
  66. mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +58 -0
  67. mindsdb/utilities/config.py +13 -2
  68. mindsdb/utilities/log.py +35 -26
  69. mindsdb/utilities/ml_task_queue/task.py +19 -22
  70. mindsdb/utilities/render/sqlalchemy_render.py +129 -181
  71. mindsdb/utilities/starters.py +40 -0
  72. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/METADATA +257 -257
  73. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/RECORD +76 -68
  74. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/WHEEL +0 -0
  75. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/licenses/LICENSE +0 -0
  76. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/top_level.txt +0 -0
@@ -14,8 +14,10 @@ from mindsdb.integrations.utilities.rag.splitters.file_splitter import FileSplit
14
14
  from mindsdb.interfaces.file.file_controller import FileController
15
15
  from mindsdb.interfaces.knowledge_base.preprocessing.constants import (
16
16
  DEFAULT_CONTEXT_DOCUMENT_LIMIT,
17
- DEFAULT_CRAWL_DEPTH, DEFAULT_WEB_FILTERS,
18
- DEFAULT_MARKDOWN_HEADERS, DEFAULT_WEB_CRAWL_LIMIT
17
+ DEFAULT_CRAWL_DEPTH,
18
+ DEFAULT_WEB_FILTERS,
19
+ DEFAULT_MARKDOWN_HEADERS,
20
+ DEFAULT_WEB_CRAWL_LIMIT,
19
21
  )
20
22
  from mindsdb.interfaces.knowledge_base.preprocessing.document_loader import DocumentLoader
21
23
  from mindsdb.metrics.metrics import api_endpoint_metrics
@@ -31,12 +33,12 @@ from mindsdb_sql_parser.ast import Identifier
31
33
  logger = log.getLogger(__name__)
32
34
 
33
35
 
34
- @ns_conf.route('/<project_name>/knowledge_bases')
36
+ @ns_conf.route("/<project_name>/knowledge_bases")
35
37
  class KnowledgeBasesResource(Resource):
36
- @ns_conf.doc('list_knowledge_bases')
37
- @api_endpoint_metrics('GET', '/knowledge_bases')
38
+ @ns_conf.doc("list_knowledge_bases")
39
+ @api_endpoint_metrics("GET", "/knowledge_bases")
38
40
  def get(self, project_name):
39
- '''List all knowledge bases'''
41
+ """List all knowledge bases"""
40
42
  session = SessionController()
41
43
  project_controller = ProjectController()
42
44
  try:
@@ -44,43 +46,37 @@ class KnowledgeBasesResource(Resource):
44
46
  except EntityNotExistsError:
45
47
  # Project must exist.
46
48
  return http_error(
47
- HTTPStatus.NOT_FOUND,
48
- 'Project not found',
49
- f'Project with name {project_name} does not exist'
49
+ HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist"
50
50
  )
51
51
 
52
52
  # KB Controller already returns dict.
53
53
  return session.kb_controller.list(project_name)
54
54
 
55
- @ns_conf.doc('create_knowledge_base')
56
- @api_endpoint_metrics('POST', '/knowledge_bases')
55
+ @ns_conf.doc("create_knowledge_base")
56
+ @api_endpoint_metrics("POST", "/knowledge_bases")
57
57
  def post(self, project_name):
58
- '''Create a knowledge base'''
58
+ """Create a knowledge base"""
59
59
 
60
60
  # Check for required parameters.
61
- if 'knowledge_base' not in request.json:
61
+ if "knowledge_base" not in request.json:
62
62
  return http_error(
63
- HTTPStatus.BAD_REQUEST,
64
- 'Missing parameter',
65
- 'Must provide "knowledge_base" parameter in POST body'
63
+ HTTPStatus.BAD_REQUEST, "Missing parameter", 'Must provide "knowledge_base" parameter in POST body'
66
64
  )
67
65
 
68
- knowledge_base = request.json['knowledge_base']
66
+ knowledge_base = request.json["knowledge_base"]
69
67
  # Explicitly require embedding model & vector database.
70
- required_fields = ['name', 'model']
68
+ required_fields = ["name"]
71
69
  for field in required_fields:
72
70
  if field not in knowledge_base:
73
71
  return http_error(
74
- HTTPStatus.BAD_REQUEST,
75
- 'Missing parameter',
76
- f'Must provide "{field}" field in "knowledge_base"'
72
+ HTTPStatus.BAD_REQUEST, "Missing parameter", f'Must provide "{field}" field in "knowledge_base"'
77
73
  )
78
- if 'storage' in knowledge_base:
79
- if 'database' not in knowledge_base['storage'] or 'table' not in knowledge_base['storage']:
74
+ if "storage" in knowledge_base:
75
+ if "database" not in knowledge_base["storage"] or "table" not in knowledge_base["storage"]:
80
76
  return http_error(
81
77
  HTTPStatus.BAD_REQUEST,
82
- 'Missing parameter',
83
- 'Must provide "database" and "table" field in "storage" param'
78
+ "Missing parameter",
79
+ 'Must provide "database" and "table" field in "storage" param',
84
80
  )
85
81
 
86
82
  session = SessionController()
@@ -90,57 +86,65 @@ class KnowledgeBasesResource(Resource):
90
86
  except EntityNotExistsError:
91
87
  # Project must exist.
92
88
  return http_error(
93
- HTTPStatus.NOT_FOUND,
94
- 'Project not found',
95
- f'Project with name {project_name} does not exist'
89
+ HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist"
96
90
  )
97
91
 
98
- kb_name = knowledge_base.get('name')
92
+ kb_name = knowledge_base.get("name")
99
93
  existing_kb = session.kb_controller.get(kb_name, project.id)
100
94
  if existing_kb is not None:
101
95
  # Knowledge Base must not exist.
102
96
  return http_error(
103
97
  HTTPStatus.CONFLICT,
104
- 'Knowledge Base already exists',
105
- f'Knowledge Base with name {kb_name} already exists'
98
+ "Knowledge Base already exists",
99
+ f"Knowledge Base with name {kb_name} already exists",
106
100
  )
107
101
 
108
- embedding_model_identifier = None
109
- if knowledge_base.get('model'):
110
- embedding_model_identifier = Identifier(parts=[knowledge_base['model']])
102
+ # Legacy: Support for embedding model identifier.
103
+ # embedding_model_identifier = None
104
+ # if knowledge_base.get('model'):
105
+ # embedding_model_identifier = Identifier(parts=[knowledge_base['model']])
111
106
 
112
- storage = knowledge_base.get('storage')
107
+ storage = knowledge_base.get("storage")
113
108
  embedding_table_identifier = None
114
109
  if storage is not None:
115
- embedding_table_identifier = Identifier(parts=[storage['database'], storage['table']])
110
+ embedding_table_identifier = Identifier(parts=[storage["database"], storage["table"]])
111
+
112
+ params = knowledge_base.get("params", {})
113
+
114
+ optional_parameter_fields = [
115
+ "embedding_model",
116
+ "reranking_model",
117
+ "content_columns",
118
+ "metadata_columns",
119
+ "id_column",
120
+ ]
121
+
122
+ for field in optional_parameter_fields:
123
+ if field in knowledge_base:
124
+ params[field] = knowledge_base[field]
116
125
 
117
126
  try:
118
127
  new_kb = session.kb_controller.add(
119
128
  kb_name,
120
129
  project.name,
121
- embedding_model_identifier,
122
130
  embedding_table_identifier,
123
- params=knowledge_base.get('params', {}),
124
- preprocessing_config=knowledge_base.get('preprocessing')
131
+ params=params,
132
+ preprocessing_config=knowledge_base.get("preprocessing"),
125
133
  )
126
134
  except ValueError as e:
127
- return http_error(
128
- HTTPStatus.BAD_REQUEST,
129
- 'Invalid preprocessing configuration',
130
- str(e)
131
- )
135
+ return http_error(HTTPStatus.BAD_REQUEST, "Invalid preprocessing configuration", str(e))
132
136
 
133
- return new_kb.as_dict(), HTTPStatus.CREATED
137
+ return new_kb.as_dict(session.show_secrets), HTTPStatus.CREATED
134
138
 
135
139
 
136
- @ns_conf.route('/<project_name>/knowledge_bases/<knowledge_base_name>')
137
- @ns_conf.param('project_name', 'Name of the project')
138
- @ns_conf.param('knowledge_base_name', 'Name of the knowledge_base')
140
+ @ns_conf.route("/<project_name>/knowledge_bases/<knowledge_base_name>")
141
+ @ns_conf.param("project_name", "Name of the project")
142
+ @ns_conf.param("knowledge_base_name", "Name of the knowledge_base")
139
143
  class KnowledgeBaseResource(Resource):
140
- @ns_conf.doc('get_knowledge_base')
141
- @api_endpoint_metrics('GET', '/knowledge_bases/knowledge_base')
144
+ @ns_conf.doc("get_knowledge_base")
145
+ @api_endpoint_metrics("GET", "/knowledge_bases/knowledge_base")
142
146
  def get(self, project_name, knowledge_base_name):
143
- '''Gets a knowledge base by name'''
147
+ """Gets a knowledge base by name"""
144
148
  session = SessionController()
145
149
  project_controller = ProjectController()
146
150
  try:
@@ -148,31 +152,27 @@ class KnowledgeBaseResource(Resource):
148
152
  except EntityNotExistsError:
149
153
  # Project must exist.
150
154
  return http_error(
151
- HTTPStatus.NOT_FOUND,
152
- 'Project not found',
153
- f'Project with name {project_name} does not exist'
155
+ HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist"
154
156
  )
155
157
 
156
158
  existing_kb = session.kb_controller.get(knowledge_base_name, project.id)
157
159
  if existing_kb is None:
158
160
  return http_error(
159
161
  HTTPStatus.NOT_FOUND,
160
- 'Knowledge Base not found',
161
- f'Knowledge Base with name {knowledge_base_name} does not exist'
162
+ "Knowledge Base not found",
163
+ f"Knowledge Base with name {knowledge_base_name} does not exist",
162
164
  )
163
- return existing_kb.as_dict()
165
+ return existing_kb.as_dict(session.show_secrets), HTTPStatus.OK
164
166
 
165
- @ns_conf.doc('update_knowledge_base')
166
- @api_endpoint_metrics('PUT', '/knowledge_bases/knowledge_base')
167
+ @ns_conf.doc("update_knowledge_base")
168
+ @api_endpoint_metrics("PUT", "/knowledge_bases/knowledge_base")
167
169
  def put(self, project_name: str, knowledge_base_name: str):
168
- '''Updates a knowledge base with optional preprocessing.'''
170
+ """Updates a knowledge base with optional preprocessing."""
169
171
 
170
172
  # Check for required parameters
171
- if 'knowledge_base' not in request.json:
173
+ if "knowledge_base" not in request.json:
172
174
  return http_error(
173
- HTTPStatus.BAD_REQUEST,
174
- 'Missing parameter',
175
- 'Must provide "knowledge_base" parameter in PUT body'
175
+ HTTPStatus.BAD_REQUEST, "Missing parameter", 'Must provide "knowledge_base" parameter in PUT body'
176
176
  )
177
177
 
178
178
  session = SessionController()
@@ -182,21 +182,19 @@ class KnowledgeBaseResource(Resource):
182
182
  project = project_controller.get(name=project_name)
183
183
  except EntityNotExistsError:
184
184
  return http_error(
185
- HTTPStatus.NOT_FOUND,
186
- 'Project not found',
187
- f'Project with name {project_name} does not exist'
185
+ HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist"
188
186
  )
189
187
 
190
188
  try:
191
- kb_data = request.json['knowledge_base']
189
+ kb_data = request.json["knowledge_base"]
192
190
 
193
191
  # Retrieve the knowledge base table for updates
194
- table = session.kb_controller.get_table(knowledge_base_name, project.id, params=kb_data.get('params'))
192
+ table = session.kb_controller.get_table(knowledge_base_name, project.id, params=kb_data.get("params"))
195
193
  if table is None:
196
194
  return http_error(
197
195
  HTTPStatus.NOT_FOUND,
198
- 'Knowledge Base not found',
199
- f'Knowledge Base with name {knowledge_base_name} does not exist'
196
+ "Knowledge Base not found",
197
+ f"Knowledge Base with name {knowledge_base_name} does not exist",
200
198
  )
201
199
 
202
200
  # Set up dependencies for DocumentLoader
@@ -211,68 +209,64 @@ class KnowledgeBaseResource(Resource):
211
209
  file_controller=file_controller,
212
210
  file_splitter=file_splitter,
213
211
  markdown_splitter=markdown_splitter,
214
- mysql_proxy=mysql_proxy
212
+ mysql_proxy=mysql_proxy,
215
213
  )
216
214
 
217
215
  # Configure table with dependencies
218
216
  table.document_loader = document_loader
219
217
 
220
218
  # Update preprocessing configuration if provided
221
- if 'preprocessing' in kb_data:
222
- table.configure_preprocessing(kb_data['preprocessing'])
219
+ if "preprocessing" in kb_data:
220
+ table.configure_preprocessing(kb_data["preprocessing"])
223
221
 
224
222
  # Process raw data rows if provided
225
- if kb_data.get('rows'):
226
- table.insert_rows(kb_data['rows'])
223
+ if kb_data.get("rows"):
224
+ table.insert_rows(kb_data["rows"])
227
225
 
228
226
  # Process files if specified
229
- if kb_data.get('files'):
230
- table.insert_files(kb_data['files'])
227
+ if kb_data.get("files"):
228
+ table.insert_files(kb_data["files"])
231
229
 
232
230
  # Process web pages if URLs provided
233
- if kb_data.get('urls'):
231
+ if kb_data.get("urls"):
234
232
  table.insert_web_pages(
235
- urls=kb_data['urls'],
236
- limit=kb_data.get('limit') or DEFAULT_WEB_CRAWL_LIMIT,
237
- crawl_depth=kb_data.get('crawl_depth', DEFAULT_CRAWL_DEPTH),
238
- filters=kb_data.get('filters', DEFAULT_WEB_FILTERS)
233
+ urls=kb_data["urls"],
234
+ limit=kb_data.get("limit") or DEFAULT_WEB_CRAWL_LIMIT,
235
+ crawl_depth=kb_data.get("crawl_depth", DEFAULT_CRAWL_DEPTH),
236
+ filters=kb_data.get("filters", DEFAULT_WEB_FILTERS),
239
237
  )
240
238
 
241
239
  # Process query if provided
242
- if kb_data.get('query'):
243
- table.insert_query_result(kb_data['query'], project_name)
240
+ if kb_data.get("query"):
241
+ table.insert_query_result(kb_data["query"], project_name)
244
242
 
245
243
  except ExecutorException as e:
246
- logger.error(f'Error during preprocessing and insertion: {str(e)}')
244
+ logger.error(f"Error during preprocessing and insertion: {str(e)}")
247
245
  return http_error(
248
246
  HTTPStatus.BAD_REQUEST,
249
- 'Invalid SELECT query',
250
- f'Executing "query" failed. Needs to be a valid SELECT statement that returns data: {str(e)}'
247
+ "Invalid SELECT query",
248
+ f'Executing "query" failed. Needs to be a valid SELECT statement that returns data: {str(e)}',
251
249
  )
252
250
 
253
251
  except Exception as e:
254
- logger.error(f'Error during preprocessing and insertion: {str(e)}')
252
+ logger.error(f"Error during preprocessing and insertion: {str(e)}")
255
253
  return http_error(
256
- HTTPStatus.BAD_REQUEST,
257
- 'Preprocessing Error',
258
- f'Error during preprocessing and insertion: {str(e)}'
254
+ HTTPStatus.BAD_REQUEST, "Preprocessing Error", f"Error during preprocessing and insertion: {str(e)}"
259
255
  )
260
256
 
261
- return '', HTTPStatus.OK
257
+ return "", HTTPStatus.OK
262
258
 
263
- @ns_conf.doc('delete_knowledge_base')
264
- @api_endpoint_metrics('DELETE', '/knowledge_bases/knowledge_base')
259
+ @ns_conf.doc("delete_knowledge_base")
260
+ @api_endpoint_metrics("DELETE", "/knowledge_bases/knowledge_base")
265
261
  def delete(self, project_name: str, knowledge_base_name: str):
266
- '''Deletes a knowledge base.'''
262
+ """Deletes a knowledge base."""
267
263
  project_controller = ProjectController()
268
264
  try:
269
265
  project = project_controller.get(name=project_name)
270
266
  except EntityNotExistsError:
271
267
  # Project must exist.
272
268
  return http_error(
273
- HTTPStatus.NOT_FOUND,
274
- 'Project not found',
275
- f'Project with name {project_name} does not exist'
269
+ HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist"
276
270
  )
277
271
 
278
272
  session_controller = SessionController()
@@ -281,43 +275,45 @@ class KnowledgeBaseResource(Resource):
281
275
  # Knowledge Base must exist.
282
276
  return http_error(
283
277
  HTTPStatus.NOT_FOUND,
284
- 'Knowledge Base not found',
285
- f'Knowledge Base with name {knowledge_base_name} does not exist'
278
+ "Knowledge Base not found",
279
+ f"Knowledge Base with name {knowledge_base_name} does not exist",
286
280
  )
287
281
 
288
282
  session_controller.kb_controller.delete(knowledge_base_name, project_name)
289
- return '', HTTPStatus.NO_CONTENT
283
+ return "", HTTPStatus.NO_CONTENT
290
284
 
291
285
 
292
286
  def _handle_chat_completion(knowledge_base_table: KnowledgeBaseTable, request):
293
287
  # Check for required parameters
294
- query = request.json.get('query')
288
+ query = request.json.get("query")
295
289
 
296
- llm_model = request.json.get('llm_model')
290
+ llm_model = request.json.get("llm_model")
297
291
  if llm_model is None:
298
292
  logger.warning(f'Missing parameter "llm_model" in POST body, using default llm_model {DEFAULT_LLM_MODEL}')
299
293
 
300
- prompt_template = request.json.get('prompt_template')
294
+ prompt_template = request.json.get("prompt_template")
301
295
  if prompt_template is None:
302
- logger.warning(f'Missing parameter "prompt_template" in POST body, using default prompt template {DEFAULT_RAG_PROMPT_TEMPLATE}')
296
+ logger.warning(
297
+ f'Missing parameter "prompt_template" in POST body, using default prompt template {DEFAULT_RAG_PROMPT_TEMPLATE}'
298
+ )
303
299
 
304
300
  # Get retrieval config, if set
305
- retrieval_config = request.json.get('retrieval_config', {})
301
+ retrieval_config = request.json.get("retrieval_config", {})
306
302
  if not retrieval_config:
307
- logger.warning('No retrieval config provided, using default retrieval config')
303
+ logger.warning("No retrieval config provided, using default retrieval config")
308
304
 
309
305
  # add llm model to retrieval config
310
306
  if llm_model is not None:
311
- retrieval_config['llm_model_name'] = llm_model
307
+ retrieval_config["llm_model_name"] = llm_model
312
308
 
313
309
  # add prompt template to retrieval config
314
310
  if prompt_template is not None:
315
- retrieval_config['rag_prompt_template'] = prompt_template
311
+ retrieval_config["rag_prompt_template"] = prompt_template
316
312
 
317
313
  # add llm provider to retrieval config if set
318
- llm_provider = request.json.get('model_provider')
314
+ llm_provider = request.json.get("model_provider")
319
315
  if llm_provider is not None:
320
- retrieval_config['llm_provider'] = llm_provider
316
+ retrieval_config["llm_provider"] = llm_provider
321
317
 
322
318
  # build rag pipeline
323
319
  rag_pipeline = knowledge_base_table.build_rag_pipeline(retrieval_config)
@@ -325,11 +321,7 @@ def _handle_chat_completion(knowledge_base_table: KnowledgeBaseTable, request):
325
321
  # get response from rag pipeline
326
322
  rag_response = rag_pipeline(query)
327
323
  response = {
328
- 'message': {
329
- 'content': rag_response.get('answer'),
330
- 'context': rag_response.get('context'),
331
- 'role': 'assistant'
332
- }
324
+ "message": {"content": rag_response.get("answer"), "context": rag_response.get("context"), "role": "assistant"}
333
325
  }
334
326
 
335
327
  return response
@@ -337,20 +329,16 @@ def _handle_chat_completion(knowledge_base_table: KnowledgeBaseTable, request):
337
329
 
338
330
  def _handle_context_completion(knowledge_base_table: KnowledgeBaseTable, request):
339
331
  # Used for semantic search.
340
- query = request.json.get('query')
332
+ query = request.json.get("query")
341
333
  # Keyword search.
342
- keywords = request.json.get('keywords')
334
+ keywords = request.json.get("keywords")
343
335
  # Metadata search.
344
- metadata = request.json.get('metadata')
336
+ metadata = request.json.get("metadata")
345
337
  # Maximum amount of documents to return as context.
346
- limit = request.json.get('limit', DEFAULT_CONTEXT_DOCUMENT_LIMIT)
338
+ limit = request.json.get("limit", DEFAULT_CONTEXT_DOCUMENT_LIMIT)
347
339
 
348
340
  # Use default distance function & column names for ID, content, & metadata, to keep things simple.
349
- hybrid_search_df = knowledge_base_table.hybrid_search(
350
- query,
351
- keywords=keywords,
352
- metadata=metadata
353
- )
341
+ hybrid_search_df = knowledge_base_table.hybrid_search(query, keywords=keywords, metadata=metadata)
354
342
 
355
343
  num_documents = len(hybrid_search_df.index)
356
344
  context_documents = []
@@ -358,34 +346,26 @@ def _handle_context_completion(knowledge_base_table: KnowledgeBaseTable, request
358
346
  if i >= num_documents:
359
347
  break
360
348
  row = hybrid_search_df.iloc[i]
361
- context_documents.append({
362
- 'id': row['id'],
363
- 'content': row['content'],
364
- 'rank': row['rank']
365
- })
366
-
367
- return {
368
- 'documents': context_documents
369
- }
349
+ context_documents.append({"id": row["id"], "content": row["content"], "rank": row["rank"]})
350
+
351
+ return {"documents": context_documents}
370
352
 
371
353
 
372
- @ns_conf.route('/<project_name>/knowledge_bases/<knowledge_base_name>/completions')
373
- @ns_conf.param('project_name', 'Name of the project')
374
- @ns_conf.param('knowledge_base_name', 'Name of the knowledge_base')
354
+ @ns_conf.route("/<project_name>/knowledge_bases/<knowledge_base_name>/completions")
355
+ @ns_conf.param("project_name", "Name of the project")
356
+ @ns_conf.param("knowledge_base_name", "Name of the knowledge_base")
375
357
  class KnowledgeBaseCompletions(Resource):
376
- @ns_conf.doc('knowledge_base_completions')
377
- @api_endpoint_metrics('POST', '/knowledge_bases/knowledge_base/completions')
358
+ @ns_conf.doc("knowledge_base_completions")
359
+ @api_endpoint_metrics("POST", "/knowledge_bases/knowledge_base/completions")
378
360
  def post(self, project_name, knowledge_base_name):
379
361
  """
380
362
  Add support for LLM generation on the response from knowledge base. Default completion type is 'chat' unless specified.
381
363
  """
382
- if request.json.get('query') is None:
364
+ if request.json.get("query") is None:
383
365
  # "query" is used for semantic search for both completion types.
384
366
  logger.error('Missing parameter "query" in POST body')
385
367
  return http_error(
386
- HTTPStatus.BAD_REQUEST,
387
- 'Missing parameter',
388
- 'Must provide "query" parameter in POST body'
368
+ HTTPStatus.BAD_REQUEST, "Missing parameter", 'Must provide "query" parameter in POST body'
389
369
  )
390
370
 
391
371
  project_controller = ProjectController()
@@ -395,9 +375,7 @@ class KnowledgeBaseCompletions(Resource):
395
375
  # Project must exist.
396
376
  logger.error("Project not found, please check the project name exists")
397
377
  return http_error(
398
- HTTPStatus.NOT_FOUND,
399
- 'Project not found',
400
- f'Project with name {project_name} does not exist'
378
+ HTTPStatus.NOT_FOUND, "Project not found", f"Project with name {project_name} does not exist"
401
379
  )
402
380
 
403
381
  session = SessionController()
@@ -407,17 +385,17 @@ class KnowledgeBaseCompletions(Resource):
407
385
  logger.error("Knowledge Base not found, please check the knowledge base name exists")
408
386
  return http_error(
409
387
  HTTPStatus.NOT_FOUND,
410
- 'Knowledge Base not found',
411
- f'Knowledge Base with name {knowledge_base_name} does not exist'
388
+ "Knowledge Base not found",
389
+ f"Knowledge Base with name {knowledge_base_name} does not exist",
412
390
  )
413
391
 
414
- completion_type = request.json.get('type', 'chat')
415
- if completion_type == 'context':
392
+ completion_type = request.json.get("type", "chat")
393
+ if completion_type == "context":
416
394
  return _handle_context_completion(table, request)
417
- if completion_type == 'chat':
395
+ if completion_type == "chat":
418
396
  return _handle_chat_completion(table, request)
419
397
  return http_error(
420
398
  HTTPStatus.BAD_REQUEST,
421
- 'Invalid parameter',
422
- f'Completion type must be one of: "context", "chat". Received {completion_type}'
399
+ "Invalid parameter",
400
+ f'Completion type must be one of: "context", "chat". Received {completion_type}',
423
401
  )
@@ -8,6 +8,7 @@
8
8
  * permission of MindsDB Inc
9
9
  *******************************************************
10
10
  """
11
+
11
12
  import struct
12
13
 
13
14
  from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import (
@@ -25,16 +26,16 @@ NULL_VALUE_INT = ord(NULL_VALUE)
25
26
 
26
27
 
27
28
  class Datum:
28
- __slots__ = ['value', 'var_type', 'var_len']
29
+ __slots__ = ["value", "var_type", "var_len"]
29
30
 
30
31
  def __init__(self, var_type, value=None, var_len=None):
31
32
  # TODO other types: float, timestamp
32
33
  self.value = b""
33
34
 
34
35
  if var_len is None:
35
- idx = var_type.find('<')
36
- var_len = var_type[idx + 1: -1]
37
- var_type = var_type[: idx]
36
+ idx = var_type.find("<")
37
+ var_len = var_type[idx + 1 : -1]
38
+ var_type = var_type[:idx]
38
39
  self.var_type = var_type
39
40
  self.var_len = var_len
40
41
 
@@ -128,7 +129,7 @@ class Datum:
128
129
  return self.get_serializer()(self.value)
129
130
 
130
131
  def get_serializer(self):
131
- if self.var_type == "string":
132
+ if self.var_type in ("string", "byte"):
132
133
  if self.var_len == "lenenc":
133
134
  if isinstance(self.value, bytes):
134
135
  return self.serialize_bytes
@@ -140,15 +141,13 @@ class Datum:
140
141
  if self.var_len == "packet":
141
142
  return lambda v: v.get_packet_string()
142
143
  else:
143
- return lambda v: struct.pack(self.var_len + "s", bytes(v, "utf-8"))[
144
- :int(self.var_len)
145
- ]
144
+ return lambda v: struct.pack(self.var_len + "s", bytes(v, "utf-8"))[: int(self.var_len)]
146
145
 
147
146
  if self.var_type == "int":
148
147
  if self.var_len == "lenenc":
149
148
  return self.serialize_int
150
149
  else:
151
- return lambda v: struct.pack("Q", v)[:int(self.var_len)]
150
+ return lambda v: struct.pack("Q", v)[: int(self.var_len)]
152
151
 
153
152
  @classmethod
154
153
  def serialize_str_eof(cls, value):
@@ -157,9 +156,7 @@ class Datum:
157
156
  if length == 0:
158
157
  return b""
159
158
  else:
160
- return struct.pack(
161
- "{len}s".format(len=var_len), bytes(value, "utf-8")
162
- )[:length]
159
+ return struct.pack("{len}s".format(len=var_len), bytes(value, "utf-8"))[:length]
163
160
 
164
161
  # def serialize_obj(self, value):
165
162
  # return self.serialize_str(str(value))
@@ -170,7 +167,6 @@ class Datum:
170
167
 
171
168
  @classmethod
172
169
  def serialize_bytes(cls, value):
173
-
174
170
  val_len = len(value)
175
171
 
176
172
  if val_len == 0:
@@ -181,23 +177,11 @@ class Datum:
181
177
 
182
178
  byte_count = -(val_len.bit_length() // (-8))
183
179
  if byte_count <= 2:
184
- return (
185
- TWO_BYTE_ENC
186
- + struct.pack("H", val_len)
187
- + value
188
- )
180
+ return TWO_BYTE_ENC + struct.pack("H", val_len) + value
189
181
  if byte_count <= 3:
190
- return (
191
- THREE_BYTE_ENC
192
- + struct.pack("i", val_len)[:3]
193
- + value
194
- )
182
+ return THREE_BYTE_ENC + struct.pack("i", val_len)[:3] + value
195
183
  if byte_count <= 8:
196
- return (
197
- THREE_BYTE_ENC
198
- + struct.pack("Q", val_len)
199
- + value
200
- )
184
+ return THREE_BYTE_ENC + struct.pack("Q", val_len) + value
201
185
 
202
186
 
203
187
  def test():