MindsDB 25.1.4.0__py3-none-any.whl → 25.1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (44) hide show
  1. {MindsDB-25.1.4.0.dist-info → MindsDB-25.1.5.1.dist-info}/METADATA +235 -246
  2. {MindsDB-25.1.4.0.dist-info → MindsDB-25.1.5.1.dist-info}/RECORD +44 -42
  3. mindsdb/__about__.py +1 -1
  4. mindsdb/api/executor/datahub/datanodes/datanode.py +1 -1
  5. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +1 -1
  6. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +1 -1
  7. mindsdb/api/executor/datahub/datanodes/project_datanode.py +2 -26
  8. mindsdb/api/http/namespaces/agents.py +3 -1
  9. mindsdb/api/http/namespaces/knowledge_bases.py +4 -1
  10. mindsdb/integrations/handlers/databricks_handler/requirements.txt +1 -1
  11. mindsdb/integrations/handlers/file_handler/requirements.txt +0 -4
  12. mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_handler.py +1 -1
  13. mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_tables.py +8 -0
  14. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +4 -2
  15. mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py +5 -3
  16. mindsdb/integrations/handlers/snowflake_handler/requirements.txt +1 -1
  17. mindsdb/integrations/handlers/web_handler/requirements.txt +0 -1
  18. mindsdb/integrations/libs/ml_handler_process/learn_process.py +1 -1
  19. mindsdb/integrations/libs/vectordatabase_handler.py +4 -3
  20. mindsdb/integrations/utilities/files/__init__.py +0 -0
  21. mindsdb/integrations/utilities/files/file_reader.py +258 -0
  22. mindsdb/integrations/utilities/handlers/api_utilities/microsoft/ms_graph_api_utilities.py +2 -1
  23. mindsdb/integrations/utilities/handlers/auth_utilities/microsoft/ms_graph_api_auth_utilities.py +8 -3
  24. mindsdb/integrations/utilities/rag/chains/map_reduce_summarizer_chain.py +5 -9
  25. mindsdb/integrations/utilities/rag/pipelines/rag.py +1 -3
  26. mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +97 -89
  27. mindsdb/integrations/utilities/rag/settings.py +29 -14
  28. mindsdb/interfaces/agents/agents_controller.py +15 -3
  29. mindsdb/interfaces/agents/constants.py +1 -0
  30. mindsdb/interfaces/agents/langchain_agent.py +15 -10
  31. mindsdb/interfaces/agents/langfuse_callback_handler.py +4 -0
  32. mindsdb/interfaces/agents/mindsdb_database_agent.py +14 -0
  33. mindsdb/interfaces/database/integrations.py +5 -1
  34. mindsdb/interfaces/database/projects.py +38 -1
  35. mindsdb/interfaces/knowledge_base/controller.py +26 -11
  36. mindsdb/interfaces/knowledge_base/preprocessing/document_loader.py +7 -26
  37. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +18 -10
  38. mindsdb/interfaces/skills/skill_tool.py +12 -6
  39. mindsdb/interfaces/skills/skills_controller.py +7 -3
  40. mindsdb/interfaces/skills/sql_agent.py +81 -18
  41. mindsdb/utilities/langfuse.py +15 -0
  42. {MindsDB-25.1.4.0.dist-info → MindsDB-25.1.5.1.dist-info}/LICENSE +0 -0
  43. {MindsDB-25.1.4.0.dist-info → MindsDB-25.1.5.1.dist-info}/WHEEL +0 -0
  44. {MindsDB-25.1.4.0.dist-info → MindsDB-25.1.5.1.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,9 @@
1
1
 
2
2
  import re
3
+ import csv
3
4
  import inspect
4
- from typing import Iterable, List, Optional
5
+ from io import StringIO
6
+ from typing import Iterable, List, Optional, Any
5
7
 
6
8
  import pandas as pd
7
9
  from mindsdb_sql_parser import parse_sql
@@ -14,6 +16,64 @@ from mindsdb.integrations.utilities.query_traversal import query_traversal
14
16
  logger = log.getLogger(__name__)
15
17
 
16
18
 
19
+ def list_to_csv_str(array: List[List[Any]]) -> str:
20
+ """Convert a 2D array into a CSV string.
21
+
22
+ Args:
23
+ array (List[List[Any]]): A 2D array/list of values to convert to CSV format
24
+
25
+ Returns:
26
+ str: The array formatted as a CSV string using Excel dialect
27
+ """
28
+ output = StringIO()
29
+ writer = csv.writer(output, dialect='excel')
30
+ str_array = [[str(item) for item in row] for row in array]
31
+ writer.writerows(str_array)
32
+ return output.getvalue()
33
+
34
+
35
+ def split_table_name(table_name: str) -> List[str]:
36
+ """Split table name from llm to parst
37
+
38
+ Args:
39
+ table_name (str): input table name
40
+
41
+ Returns:
42
+ List[str]: parts of table identifier like ['database', 'schema', 'table']
43
+
44
+ Example:
45
+ 'input': '`aaa`.`bbb.ccc`', 'output': ['aaa', 'bbb.ccc']
46
+ 'input': '`aaa`.`bbb`.`ccc`', 'output': ['aaa', 'bbb', 'ccc']
47
+ 'input': 'aaa.bbb', 'output': ['aaa', 'bbb']
48
+ 'input': '`aaa.bbb`', 'output': ['aaa.bbb']
49
+ 'input': '`aaa.bbb.ccc`', 'output': ['aaa.bbb.ccc']
50
+ 'input': 'aaa.`bbb`', 'output': ['aaa', 'bbb']
51
+ 'input': 'aaa.bbb.ccc', 'output': ['aaa', 'bbb', 'ccc']
52
+ 'input': 'aaa.`bbb.ccc`', 'output': ['aaa', 'bbb.ccc']
53
+ 'input': '`aaa`.`bbb.ccc`', 'output': ['aaa', 'bbb.ccc']
54
+ """
55
+ result = []
56
+ current = ''
57
+ in_backticks = False
58
+
59
+ i = 0
60
+ while i < len(table_name):
61
+ if table_name[i] == '`':
62
+ in_backticks = not in_backticks
63
+ elif table_name[i] == '.' and not in_backticks:
64
+ if current:
65
+ result.append(current.strip('`'))
66
+ current = ''
67
+ else:
68
+ current += table_name[i]
69
+ i += 1
70
+
71
+ if current:
72
+ result.append(current.strip('`'))
73
+
74
+ return result
75
+
76
+
17
77
  class SQLAgent:
18
78
  def __init__(
19
79
  self,
@@ -161,15 +221,17 @@ class SQLAgent:
161
221
  continue
162
222
 
163
223
  # Some LLMs (e.g. gpt-4o) may include backticks or quotes when invoking tools.
164
- table_name = table_name.strip(' `"\'\n\r')
165
- table = Identifier(table_name)
224
+ table_parts = split_table_name(table_name)
225
+ if len(table_parts) == 1:
226
+ # most likely LLM enclosed all table name in backticks `database.table`
227
+ table_parts = split_table_name(table_name)
166
228
 
167
229
  # resolved table
168
- table2 = tables_idx.get(tuple(table.parts))
230
+ table_identifier = tables_idx.get(tuple(table_parts))
169
231
 
170
- if table2 is None:
171
- raise ValueError(f"Table {table} not found in database")
172
- tables.append(table2)
232
+ if table_identifier is None:
233
+ raise ValueError(f"Table {table} not found in the database")
234
+ tables.append(table_identifier)
173
235
 
174
236
  return tables
175
237
 
@@ -217,8 +279,7 @@ class SQLAgent:
217
279
  dtypes.append(column.get('type', ''))
218
280
 
219
281
  info = f'Table named `{table_str}`:\n'
220
- info += f"\nSample with first {self._sample_rows_in_table_info} rows from table {table_str}:\n"
221
- info += "\t".join([field for field in fields])
282
+ info += f"\nSample with first {self._sample_rows_in_table_info} rows from table {table_str} in CSV format (dialect is 'excel'). Table contains 99 rows. Only 3 rows are shown from the entire table.:\n"
222
283
  info += self._get_sample_rows(table_str, fields) + "\n"
223
284
  info += '\nColumn data types: ' + ",\t".join(
224
285
  [f'\n`{field}` : `{dtype}`' for field, dtype in zip(fields, dtypes)]) + '\n' # noqa
@@ -229,9 +290,14 @@ class SQLAgent:
229
290
  try:
230
291
  ret = self._call_engine(command)
231
292
  sample_rows = ret.data.to_lists()
293
+
294
+ def truncate_value(val):
295
+ str_val = str(val)
296
+ return str_val if len(str_val) < 100 else (str_val[:100] + '...')
297
+
232
298
  sample_rows = list(
233
- map(lambda ls: [str(i) if len(str(i)) < 100 else str[:100] + '...' for i in ls], sample_rows))
234
- sample_rows_str = "\n" + "\n".join(["\t".join(row) for row in sample_rows])
299
+ map(lambda row: [truncate_value(value) for value in row], sample_rows))
300
+ sample_rows_str = "\n" + list_to_csv_str([fields] + sample_rows)
235
301
  except Exception as e:
236
302
  logger.warning(e)
237
303
  sample_rows_str = "\n" + "\t [error] Couldn't retrieve sample rows!"
@@ -249,9 +315,6 @@ class SQLAgent:
249
315
  If the statement returns no rows, an empty string is returned.
250
316
  """
251
317
 
252
- def _tidy(result: List) -> str:
253
- return '\n'.join(['\t'.join([str(value) for value in row]) for row in result])
254
-
255
318
  def _repr_result(ret):
256
319
  limit_rows = 30
257
320
 
@@ -267,16 +330,16 @@ class SQLAgent:
267
330
  res += f'First {limit_rows} rows:\n'
268
331
 
269
332
  else:
270
- res += 'Result:\n'
271
-
272
- res += _tidy(data[:limit_rows])
333
+ res += "Result in CSV format (dialect is 'excel'):\n"
334
+ res += list_to_csv_str([[col.name for col in ret.columns]] + data[:limit_rows])
273
335
  return res
274
336
 
275
337
  ret = self._call_engine(self._clean_query(command))
276
338
  if fetch == "all":
277
339
  result = _repr_result(ret.data)
278
340
  elif fetch == "one":
279
- result = _tidy(ret.data.to_lists()[0])
341
+ result = "Result in CSV format (dialect is 'excel'):\n"
342
+ result += list_to_csv_str([[col.name for col in ret.data.columns]] + [ret.data.to_lists()[0]])
280
343
  else:
281
344
  raise ValueError("Fetch parameter must be either 'one' or 'all'")
282
345
  return str(result)
@@ -147,6 +147,21 @@ class LangfuseClientWrapper:
147
147
 
148
148
  logger.info(f"Langfuse trace configured with ID: {self.trace.id}")
149
149
 
150
+ def get_trace_id(self) -> typing.Optional[str]:
151
+ """
152
+ Get trace ID. If Langfuse is disabled, returns None.
153
+ """
154
+
155
+ if self.client is None:
156
+ logger.debug("Langfuse is disabled.")
157
+ return ""
158
+
159
+ if self.trace is None:
160
+ logger.debug("Langfuse trace is not setup.")
161
+ return ""
162
+
163
+ return self.trace.id
164
+
150
165
  def start_span(self,
151
166
  name: str,
152
167
  input: typing.Optional[typing.Any] = None) -> typing.Optional[StatefulSpanClient]: