MindsDB 25.5.4.2__py3-none-any.whl → 25.6.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (69) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/a2a/agent.py +28 -25
  3. mindsdb/api/a2a/common/server/server.py +32 -26
  4. mindsdb/api/executor/command_executor.py +69 -14
  5. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +49 -65
  6. mindsdb/api/executor/datahub/datanodes/project_datanode.py +29 -48
  7. mindsdb/api/executor/datahub/datanodes/system_tables.py +35 -61
  8. mindsdb/api/executor/planner/plan_join.py +67 -77
  9. mindsdb/api/executor/planner/query_planner.py +176 -155
  10. mindsdb/api/executor/planner/steps.py +37 -12
  11. mindsdb/api/executor/sql_query/result_set.py +45 -64
  12. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +14 -18
  13. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +17 -18
  14. mindsdb/api/executor/sql_query/steps/insert_step.py +13 -33
  15. mindsdb/api/executor/sql_query/steps/subselect_step.py +43 -35
  16. mindsdb/api/executor/utilities/sql.py +42 -48
  17. mindsdb/api/http/namespaces/config.py +1 -1
  18. mindsdb/api/http/namespaces/file.py +14 -23
  19. mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +12 -28
  20. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +59 -50
  21. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +9 -8
  22. mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +449 -461
  23. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +87 -36
  24. mindsdb/integrations/handlers/file_handler/file_handler.py +15 -9
  25. mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +43 -24
  26. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +10 -3
  27. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +26 -33
  28. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +74 -51
  29. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +305 -98
  30. mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +53 -34
  31. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +136 -6
  32. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +334 -83
  33. mindsdb/integrations/libs/api_handler.py +261 -57
  34. mindsdb/integrations/libs/base.py +100 -29
  35. mindsdb/integrations/utilities/files/file_reader.py +99 -73
  36. mindsdb/integrations/utilities/handler_utils.py +23 -8
  37. mindsdb/integrations/utilities/sql_utils.py +35 -40
  38. mindsdb/interfaces/agents/agents_controller.py +196 -192
  39. mindsdb/interfaces/agents/constants.py +7 -1
  40. mindsdb/interfaces/agents/langchain_agent.py +42 -11
  41. mindsdb/interfaces/agents/mcp_client_agent.py +29 -21
  42. mindsdb/interfaces/data_catalog/__init__.py +0 -0
  43. mindsdb/interfaces/data_catalog/base_data_catalog.py +54 -0
  44. mindsdb/interfaces/data_catalog/data_catalog_loader.py +359 -0
  45. mindsdb/interfaces/data_catalog/data_catalog_reader.py +34 -0
  46. mindsdb/interfaces/database/database.py +81 -57
  47. mindsdb/interfaces/database/integrations.py +220 -234
  48. mindsdb/interfaces/database/log.py +72 -104
  49. mindsdb/interfaces/database/projects.py +156 -193
  50. mindsdb/interfaces/file/file_controller.py +21 -65
  51. mindsdb/interfaces/knowledge_base/controller.py +63 -10
  52. mindsdb/interfaces/knowledge_base/evaluate.py +519 -0
  53. mindsdb/interfaces/knowledge_base/llm_client.py +75 -0
  54. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +83 -43
  55. mindsdb/interfaces/skills/skills_controller.py +54 -36
  56. mindsdb/interfaces/skills/sql_agent.py +109 -86
  57. mindsdb/interfaces/storage/db.py +223 -79
  58. mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +118 -0
  59. mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +58 -0
  60. mindsdb/utilities/config.py +9 -2
  61. mindsdb/utilities/log.py +35 -26
  62. mindsdb/utilities/ml_task_queue/task.py +19 -22
  63. mindsdb/utilities/render/sqlalchemy_render.py +129 -181
  64. mindsdb/utilities/starters.py +40 -0
  65. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/METADATA +253 -253
  66. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/RECORD +69 -61
  67. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/WHEEL +0 -0
  68. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/licenses/LICENSE +0 -0
  69. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,519 @@
1
+ import json
2
+ import math
3
+ import time
4
+ from typing import List
5
+
6
+ import pandas as pd
7
+ import datetime as dt
8
+
9
+ from mindsdb.api.executor.sql_query.result_set import ResultSet
10
+ from mindsdb_sql_parser import Identifier, Select, Constant, Star, parse_sql
11
+ from mindsdb.utilities import log
12
+
13
+ from mindsdb.interfaces.knowledge_base.llm_client import LLMClient
14
+
15
+ logger = log.getLogger(__name__)
16
+
17
+
18
+ GENERATE_QA_SYSTEM_PROMPT = """
19
+ Your task is to generate question and answer pairs for a search engine.
20
+ The search engine will take your query and return a list of documents.
21
+ You will be given a text and you need to generate a question that can be answered using the information in the text.
22
+ Your questions will be used to evaluate the search engine.
23
+ Question should always have enough clues to identify the specific text that this question is generated from.
24
+ Never ask questions like "What license number is associated with Amend 6" because Amend 6 could be found in many documents and the question is not specific enough.
25
+ Example output 1: {\"query\": \"What processor does the HP 2023 14\" FHD IPS Laptop use?\", \"reference_answer\": \"Ryzen 3 5300U\"}
26
+ Example output 2: {\"query\": \"What is the name of the river in Paris?\", \"reference_answer\": \"Seine\"}
27
+ Don't generate questions like "What is being amended in the application?" because these questions cannot be answered using the text and without knowing which document it refers to.
28
+ The question should be answerable without the text, but the answer should be present in the text.
29
+ Return ONLY a json response. No other text.
30
+ """
31
+
32
+
33
+ def calc_entropy(values: List[float]) -> float:
34
+ """
35
+ Alternative of scipy.stats.entropy, to not add `scipy` dependency
36
+ :param values: Input distribution
37
+ :return: The calculated entropy.
38
+ """
39
+ # normalize & filter
40
+ total = sum(values)
41
+ values = [i / total for i in values if i > 0]
42
+ # calc
43
+ return -sum([pk * math.log(pk) for pk in values])
44
+
45
+
46
+ class EvaluateBase:
47
+ DEFAULT_QUESTION_COUNT = 20
48
+ DEFAULT_SAMPLE_SIZE = 10000
49
+
50
+ def __init__(self, session, knowledge_base):
51
+ self.kb = knowledge_base
52
+ self.name = knowledge_base._kb.name
53
+ self.session = session
54
+
55
+ self._llm_client = None
56
+
57
+ def generate(self, sampled_df: pd.DataFrame) -> pd.DataFrame:
58
+ # generate test data from sample
59
+ raise NotImplementedError
60
+
61
+ def evaluate(self, test_data: pd.DataFrame) -> pd.DataFrame:
62
+ # create evaluate metric from test data
63
+ raise NotImplementedError
64
+
65
+ def _set_llm_client(self, llm_params: dict):
66
+ """
67
+ Logic to get LLM setting:
68
+ - first get `llm` setting of ‘evaluate’ command
69
+ - if not defined, look at the knowledge base reranker config
70
+ """
71
+ if llm_params is None:
72
+ llm_params = self.kb._kb.params.get("reranking_model")
73
+
74
+ self.llm_client = LLMClient(llm_params)
75
+
76
+ def generate_test_data(self, gen_params: dict) -> pd.DataFrame:
77
+ # Extract source data (from users query or from KB itself) and call `generate` to get test data
78
+
79
+ if "from_sql" in gen_params:
80
+ # get data from sql
81
+ query = parse_sql(gen_params["from_sql"])
82
+ if not isinstance(query, Select) or query.from_table is None:
83
+ raise ValueError(f"Query not supported {gen_params['from_sql']}")
84
+
85
+ dn, table_name = self._get_dn_table(query.from_table)
86
+ query.from_table = table_name
87
+ query.limit = Constant(self.DEFAULT_SAMPLE_SIZE)
88
+
89
+ response = dn.query(query=query, session=self.session)
90
+ df = response.data_frame
91
+
92
+ if "content" not in df.columns:
93
+ raise ValueError("`content` column isn't found in source data")
94
+
95
+ df.rename(columns={"content": "chunk_content"}, inplace=True)
96
+ else:
97
+ # get data from knowledge base
98
+ df = self.kb.select_query(
99
+ Select(
100
+ targets=[Identifier("chunk_content"), Identifier("id")], limit=Constant(self.DEFAULT_SAMPLE_SIZE)
101
+ )
102
+ )
103
+
104
+ if "count" in gen_params:
105
+ number_of_questions = gen_params["count"]
106
+ else:
107
+ number_of_questions = self.DEFAULT_QUESTION_COUNT
108
+
109
+ number_of_questions = min(number_of_questions, len(df))
110
+ sampled_df = df.sample(n=number_of_questions)
111
+
112
+ return self.generate(sampled_df)
113
+
114
+ def read_from_table(self, test_table: Identifier) -> pd.DataFrame:
115
+ # read data from table
116
+
117
+ dn, table_name = self._get_dn_table(test_table)
118
+
119
+ query = Select(
120
+ targets=[Star()],
121
+ from_table=table_name,
122
+ )
123
+ response = dn.query(query=query, session=self.session)
124
+ return response.data_frame
125
+
126
+ def _get_dn_table(self, table_name: Identifier):
127
+ if len(table_name.parts) < 2:
128
+ raise ValueError(f"Can't find database, table name must have at least 2 parts: {table_name}")
129
+
130
+ integration_name = table_name.parts[0]
131
+ table_name = Identifier(parts=table_name.parts[1:])
132
+ dn = self.session.datahub.get(integration_name)
133
+ return dn, table_name
134
+
135
+ def save_to_table(self, table_name: Identifier, df: pd.DataFrame, is_replace=False):
136
+ # save data to table
137
+
138
+ dn, table_name = self._get_dn_table(table_name)
139
+
140
+ data = ResultSet.from_df(df)
141
+
142
+ dn.create_table(
143
+ table_name=table_name,
144
+ result_set=data,
145
+ is_replace=is_replace,
146
+ is_create=True,
147
+ raise_if_exists=False,
148
+ )
149
+
150
+ def run_evaluate(self, params: dict) -> pd.DataFrame:
151
+ # evaluate function entry point
152
+
153
+ self._set_llm_client(params.get("llm"))
154
+
155
+ if "test_table" not in params:
156
+ raise ValueError('The table with has to be defined in "test_table" parameter')
157
+
158
+ test_table = params["test_table"]
159
+
160
+ if isinstance(test_table, str):
161
+ test_table = Identifier(test_table)
162
+
163
+ if "generate_data" in params:
164
+ # generate question / answers using llm
165
+ gen_params = params["generate_data"]
166
+ if not isinstance(gen_params, dict):
167
+ gen_params = {}
168
+ test_data = self.generate_test_data(gen_params)
169
+
170
+ self.save_to_table(test_table, test_data, is_replace=True)
171
+ else:
172
+ test_data = self.read_from_table(test_table)
173
+
174
+ if params.get("evaluate", True) is False:
175
+ # no evaluate is required
176
+ return pd.DataFrame()
177
+
178
+ scores = self.evaluate(test_data)
179
+ scores["name"] = self.name
180
+ scores["created_at"] = dt.datetime.now()
181
+
182
+ # save scores
183
+ if "save_to" in params:
184
+ to_table = params["save_to"]
185
+ if isinstance(to_table, str):
186
+ to_table = Identifier(to_table)
187
+ self.save_to_table(to_table, scores)
188
+
189
+ return scores
190
+
191
+ @staticmethod
192
+ def run(session, kb_table, params) -> pd.DataFrame:
193
+ # choose the evaluator version according to the 'version' parameter in config
194
+
195
+ evaluate_version = params.get("version", "doc_id")
196
+
197
+ if evaluate_version == "llm_relevancy":
198
+ cls = EvaluateRerank
199
+ elif evaluate_version == "doc_id":
200
+ cls = EvaluateDocID
201
+ else:
202
+ raise NotImplementedError(f"Version of evaluator is not implemented: {evaluate_version}")
203
+
204
+ return cls(session, kb_table).run_evaluate(params)
205
+
206
+
207
+ class EvaluateRerank(EvaluateBase):
208
+ """
209
+ Rank responses from KB using LLM (by calling KB reranker function)
210
+ """
211
+
212
+ TOP_K = 10
213
+
214
+ def generate(self, sampled_df: pd.DataFrame) -> pd.DataFrame:
215
+ qa_data = []
216
+ count_errors = 0
217
+ for chunk_content in sampled_df["chunk_content"]:
218
+ try:
219
+ question, answer = self.generate_question_answer(chunk_content)
220
+ except ValueError as e:
221
+ # allow some numbers of error
222
+ count_errors += 1
223
+ if count_errors > 5:
224
+ raise e
225
+ continue
226
+
227
+ qa_data.append({"text": chunk_content, "question": question, "answer": answer})
228
+
229
+ df = pd.DataFrame(qa_data)
230
+ df["id"] = df.index
231
+ return df
232
+
233
+ def generate_question_answer(self, text: str) -> (str, str):
234
+ messages = [
235
+ {"role": "system", "content": GENERATE_QA_SYSTEM_PROMPT},
236
+ {"role": "user", "content": f"\n\nText:\n{text}\n\n"},
237
+ ]
238
+ answer = self.llm_client.completion(messages)
239
+ try:
240
+ output = json.loads(answer)
241
+ except json.JSONDecodeError:
242
+ raise ValueError(f"Could not parse response from LLM: {answer}")
243
+
244
+ if "query" not in output or "reference_answer" not in output:
245
+ raise ValueError("Cant find question/answer in LLM response")
246
+
247
+ return output.get("query"), output.get("reference_answer")
248
+
249
+ def evaluate(self, test_data: pd.DataFrame) -> pd.DataFrame:
250
+ json_to_log_list = []
251
+ questions = test_data.to_dict("records")
252
+
253
+ for i, item in enumerate(questions):
254
+ question = item["question"]
255
+ ground_truth = item["answer"]
256
+
257
+ start_time = time.time()
258
+ logger.debug(f"Querying [{i + 1}/{len(questions)}]: {question}")
259
+ df_answers = self.kb.select_query(Select(targets=[Identifier("chunk_content")], limit=Constant(self.TOP_K)))
260
+ query_time = time.time() - start_time
261
+
262
+ proposed_responses = list(df_answers["chunk_content"])
263
+
264
+ # generate answer using llm
265
+ relevance_score_list = self.kb.score_documents(question, proposed_responses, self.llm_client.params)
266
+
267
+ # set binary relevancy
268
+ binary_relevancy_list = [1 if score >= 0.5 else 0 for score in relevance_score_list]
269
+
270
+ # calculate first relevant position
271
+ first_relevant_position = next((i for i, x in enumerate(binary_relevancy_list) if x == 1), None)
272
+ json_to_log = {
273
+ "question": question,
274
+ "ground_truth": ground_truth,
275
+ # "relevancy_at_k": relevancy_at_k,
276
+ "binary_relevancy_list": binary_relevancy_list,
277
+ "relevance_score_list": relevance_score_list,
278
+ "first_relevant_position": first_relevant_position,
279
+ "query_time": query_time,
280
+ }
281
+ json_to_log_list.append(json_to_log)
282
+
283
+ evaluation_results = self.evaluate_retrieval_metrics(json_to_log_list)
284
+ return pd.DataFrame([evaluation_results])
285
+
286
+ def evaluate_retrieval_metrics(self, json_to_log_list):
287
+ """
288
+ Computes retrieval evaluation metrics from the result log.
289
+
290
+ Metrics computed:
291
+ - Average Relevancy (mean soft relevance score)
292
+ - Average Relevancy@k (soft score)
293
+ - Average First Relevant Position
294
+ - Mean Reciprocal Rank (MRR)
295
+ - Hit@k
296
+ - Binary Precision@k
297
+ - Average Entropy of Relevance Scores
298
+ - Average nDCG
299
+
300
+ Args:
301
+ json_to_log_list (list): List of evaluation logs per query.
302
+
303
+ Returns:
304
+ dict: A dictionary containing all computed metrics.
305
+ """
306
+
307
+ mrr_list = []
308
+ hit_at_k_matrix = []
309
+ binary_precision_at_k_matrix = []
310
+ ndcg_list = []
311
+ entropy_list = []
312
+
313
+ total_relevancy = 0
314
+ relevance_score_matrix = []
315
+ first_relevant_positions = []
316
+
317
+ for item in json_to_log_list:
318
+ binary_relevancy = item["binary_relevancy_list"]
319
+ relevance_scores = item["relevance_score_list"]
320
+
321
+ # Skip if empty
322
+ if not relevance_scores:
323
+ continue
324
+
325
+ # Mean relevancy per query
326
+ query_relevancy = sum(relevance_scores) / len(relevance_scores)
327
+ total_relevancy += query_relevancy
328
+
329
+ # Build score matrix for later average@k
330
+ relevance_score_matrix.append(relevance_scores)
331
+
332
+ # First relevant position
333
+ pos = item["first_relevant_position"]
334
+ if pos is not None:
335
+ first_relevant_positions.append(pos)
336
+
337
+ # MRR
338
+ reciprocal_rank = 1 / (pos + 1) if pos is not None else 0
339
+ mrr_list.append(reciprocal_rank)
340
+
341
+ # Hit@k and Binary Precision@k
342
+ hit_row = []
343
+ precision_row = []
344
+ for k in range(1, len(binary_relevancy) + 1):
345
+ hit = int(any(binary_relevancy[:k]))
346
+ precision = sum(binary_relevancy[:k]) / k
347
+ hit_row.append(hit)
348
+ precision_row.append(precision)
349
+ hit_at_k_matrix.append(hit_row)
350
+ binary_precision_at_k_matrix.append(precision_row)
351
+
352
+ # Entropy
353
+
354
+ entropy = calc_entropy(relevance_scores) if len(relevance_scores) > 1 else 0
355
+ entropy_list.append(entropy)
356
+
357
+ # nDCG
358
+ def dcg(scores):
359
+ return sum(score / math.log2(idx + 2) for idx, score in enumerate(scores))
360
+
361
+ ideal = sorted(relevance_scores, reverse=True)
362
+ actual_dcg = dcg(relevance_scores)
363
+ ideal_dcg = dcg(ideal)
364
+ ndcg = actual_dcg / ideal_dcg if ideal_dcg > 0 else 0
365
+ ndcg_list.append(ndcg)
366
+
367
+ # Aggregated metrics
368
+ num_queries = len(json_to_log_list)
369
+ average_relevancy = total_relevancy / num_queries if num_queries else 0
370
+
371
+ # Relevancy@k
372
+ average_relevance_score_by_k = []
373
+ if relevance_score_matrix:
374
+ relevance_score_matrix = list(zip(*relevance_score_matrix))
375
+ for col in relevance_score_matrix:
376
+ avg_k = sum(col) / len(col)
377
+ average_relevance_score_by_k.append(round(avg_k, 2))
378
+
379
+ average_first_relevant_position = (
380
+ sum(first_relevant_positions) / len(first_relevant_positions) if first_relevant_positions else None
381
+ )
382
+
383
+ mean_mrr = sum(mrr_list) / len(mrr_list) if mrr_list else 0
384
+ hit_at_k_avg = [round(sum(col) / len(col), 2) for col in zip(*hit_at_k_matrix)] if hit_at_k_matrix else []
385
+ binary_precision_at_k_avg = (
386
+ [round(sum(col) / len(col), 2) for col in zip(*binary_precision_at_k_matrix)]
387
+ if binary_precision_at_k_matrix
388
+ else []
389
+ )
390
+ avg_entropy = sum(entropy_list) / len(entropy_list) if entropy_list else 0
391
+ avg_ndcg = sum(ndcg_list) / len(ndcg_list) if ndcg_list else 0
392
+
393
+ avg_query_time = sum(item["query_time"] for item in json_to_log_list) / num_queries
394
+
395
+ return {
396
+ "avg_relevancy": average_relevancy,
397
+ "avg_relevance_score_by_k": average_relevance_score_by_k,
398
+ "avg_first_relevant_position": average_first_relevant_position,
399
+ "mean_mrr": mean_mrr,
400
+ "hit_at_k": hit_at_k_avg,
401
+ "bin_precision_at_k": binary_precision_at_k_avg,
402
+ "avg_entropy": avg_entropy,
403
+ "avg_ndcg": avg_ndcg,
404
+ "avg_query_time": avg_query_time,
405
+ }
406
+
407
+
408
+ class EvaluateDocID(EvaluateBase):
409
+ """
410
+ Checks if ID in response from KB is matched with doc ID in test dataset
411
+ """
412
+
413
+ TOP_K = 100
414
+
415
+ def generate(self, sampled_df: pd.DataFrame) -> pd.DataFrame:
416
+ if "id" not in sampled_df.columns:
417
+ raise ValueError("'id' column is required for generating test dataset")
418
+
419
+ qa_data = []
420
+ count_errors = 0
421
+ for _, item in sampled_df.iterrows():
422
+ chunk_content = item["chunk_content"]
423
+ try:
424
+ question, answer = self.generate_question_answer(chunk_content)
425
+ except ValueError as e:
426
+ # allow some numbers of error
427
+ count_errors += 1
428
+ if count_errors > 5:
429
+ raise e
430
+ continue
431
+
432
+ qa_data.append({"text": chunk_content, "question": question, "answer": answer, "doc_id": item["id"]})
433
+ if len(qa_data) == 0:
434
+ raise ValueError("No data in generated test dataset")
435
+ df = pd.DataFrame(qa_data)
436
+ return df
437
+
438
+ def generate_question_answer(self, text: str) -> (str, str):
439
+ messages = [
440
+ {"role": "system", "content": GENERATE_QA_SYSTEM_PROMPT},
441
+ {"role": "user", "content": f"\n\nText:\n{text}\n\n"},
442
+ ]
443
+ answer = self.llm_client.completion(messages)
444
+ try:
445
+ output = json.loads(answer)
446
+ except json.JSONDecodeError:
447
+ raise ValueError(f"Could not parse response from LLM: {answer}")
448
+
449
+ if "query" not in output or "reference_answer" not in output:
450
+ raise ValueError("Cant find question/answer in LLM response")
451
+
452
+ return output.get("query"), output.get("reference_answer")
453
+
454
+ def evaluate(self, test_data: pd.DataFrame) -> pd.DataFrame:
455
+ stats = []
456
+ questions = test_data.to_dict("records")
457
+
458
+ for i, item in enumerate(questions):
459
+ question = item["question"]
460
+ doc_id = item["doc_id"]
461
+
462
+ start_time = time.time()
463
+ logger.debug(f"Querying [{i + 1}/{len(questions)}]: {question}")
464
+ df_answers = self.kb.select_query(
465
+ Select(targets=[Identifier("chunk_content"), Identifier("id")], limit=Constant(self.TOP_K))
466
+ )
467
+ query_time = time.time() - start_time
468
+
469
+ retrieved_doc_ids = list(df_answers["id"])
470
+
471
+ if doc_id in retrieved_doc_ids:
472
+ doc_found = True
473
+ doc_position = retrieved_doc_ids.index(doc_id)
474
+ else:
475
+ doc_found = False
476
+ doc_position = -1
477
+
478
+ stats.append(
479
+ {
480
+ "question": question,
481
+ "doc_id": doc_id,
482
+ "doc_found": doc_found,
483
+ "doc_position": doc_position,
484
+ "query_time": query_time,
485
+ }
486
+ )
487
+
488
+ evaluation_results = self.summarize_results(stats)
489
+ return pd.DataFrame([evaluation_results])
490
+
491
+ def summarize_results(self, stats):
492
+ total_questions = len(stats)
493
+ total_found = sum([1 for stat in stats if stat["doc_found"]])
494
+
495
+ total_accurately_retrieved = sum([1 for stat in stats if stat["doc_found"]])
496
+
497
+ accurate_in_top_10 = sum([1 for stat in stats if stat["doc_found"] and stat["doc_position"] < 10])
498
+
499
+ # calculate recall curve by position
500
+ recall_curve = {}
501
+ for i in range(self.TOP_K):
502
+ recall_curve[i] = sum([1 for stat in stats if stat["doc_found"] and stat["doc_position"] == i])
503
+ # convert to proportion of total questions
504
+ for i in range(self.TOP_K):
505
+ recall_curve[i] = recall_curve[i] / total_questions
506
+ # calculate cumulative recall
507
+ cumulative_recall = {}
508
+ for i in range(self.TOP_K):
509
+ cumulative_recall[i] = sum([recall_curve[j] for j in range(i + 1)])
510
+
511
+ avg_query_time = sum(item["query_time"] for item in stats) / total_questions
512
+ return {
513
+ "total": total_questions,
514
+ "total_found": total_found,
515
+ "retrieved_in_top_k": total_accurately_retrieved,
516
+ "retrieved_in_top_10": accurate_in_top_10,
517
+ "cumulative_recall": cumulative_recall,
518
+ "avg_query_time": avg_query_time,
519
+ }
@@ -0,0 +1,75 @@
1
+ import copy
2
+ import os
3
+ from typing import List
4
+
5
+ from openai import OpenAI, AzureOpenAI
6
+
7
+ from mindsdb.integrations.utilities.handler_utils import get_api_key
8
+ from mindsdb.utilities.config import config
9
+
10
+
11
+ class LLMClient:
12
+ """
13
+ Class for accession to LLM.
14
+ It chooses openai client or litellm handler depending on the config
15
+ """
16
+
17
+ def __init__(self, llm_params: dict = None):
18
+ params = copy.deepcopy(config.get("default_llm", {}))
19
+
20
+ if llm_params:
21
+ params.update(llm_params)
22
+
23
+ self.params = params
24
+
25
+ self.provider = params.get("provider", "openai")
26
+
27
+ if "api_key" not in params:
28
+ params["api_key"] = get_api_key(self.provider, params, strict=False)
29
+
30
+ if self.provider == "azure_openai":
31
+ azure_api_key = params.get("api_key") or os.getenv("AZURE_OPENAI_API_KEY")
32
+ azure_api_endpoint = params.get("base_url") or os.environ.get("AZURE_OPENAI_ENDPOINT")
33
+ azure_api_version = params.get("api_version") or os.environ.get("AZURE_OPENAI_API_VERSION")
34
+ self._llm_client = AzureOpenAI(
35
+ api_key=azure_api_key, azure_endpoint=azure_api_endpoint, api_version=azure_api_version, max_retries=2
36
+ )
37
+ elif self.provider == "openai":
38
+ openai_api_key = params.get("api_key") or os.getenv("OPENAI_API_KEY")
39
+ base_url = params.get("base_url")
40
+ self.client = OpenAI(api_key=openai_api_key, base_url=base_url, max_retries=2)
41
+
42
+ else:
43
+ # try to use litellm
44
+ from mindsdb.api.executor.controllers.session_controller import SessionController
45
+
46
+ session = SessionController()
47
+ module = session.integration_controller.get_handler_module("litellm")
48
+
49
+ if module is None or module.Handler is None:
50
+ raise ValueError(f'Unable to use "{self.provider}" provider. Litellm handler is not installed')
51
+
52
+ self.client = module.Handler
53
+
54
+ def completion(self, messages: List[dict]) -> str:
55
+ """
56
+ Call LLM completion and get response
57
+ """
58
+ params = self.params
59
+
60
+ if self.provider in ("azure_openai", "openai"):
61
+ response = self.client.chat.completions.create(
62
+ model=params["model_name"],
63
+ messages=messages,
64
+ )
65
+ return response.choices[0].message.content
66
+ else:
67
+ kwargs = params.copy()
68
+ model = kwargs.pop("model_name")
69
+
70
+ base_url = params.pop("base_url", None)
71
+ if base_url is not None:
72
+ kwargs["api_base"] = base_url
73
+
74
+ response = self.client.completion(model=f"{self.provider}/{model}", messages=messages, args=kwargs)
75
+ return response.choices[0].message.content