MindsDB 25.1.4.0__py3-none-any.whl → 25.1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- {MindsDB-25.1.4.0.dist-info → MindsDB-25.1.5.1.dist-info}/METADATA +235 -246
- {MindsDB-25.1.4.0.dist-info → MindsDB-25.1.5.1.dist-info}/RECORD +44 -42
- mindsdb/__about__.py +1 -1
- mindsdb/api/executor/datahub/datanodes/datanode.py +1 -1
- mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +1 -1
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +1 -1
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +2 -26
- mindsdb/api/http/namespaces/agents.py +3 -1
- mindsdb/api/http/namespaces/knowledge_bases.py +4 -1
- mindsdb/integrations/handlers/databricks_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/file_handler/requirements.txt +0 -4
- mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_handler.py +1 -1
- mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_tables.py +8 -0
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +4 -2
- mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py +5 -3
- mindsdb/integrations/handlers/snowflake_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/web_handler/requirements.txt +0 -1
- mindsdb/integrations/libs/ml_handler_process/learn_process.py +1 -1
- mindsdb/integrations/libs/vectordatabase_handler.py +4 -3
- mindsdb/integrations/utilities/files/__init__.py +0 -0
- mindsdb/integrations/utilities/files/file_reader.py +258 -0
- mindsdb/integrations/utilities/handlers/api_utilities/microsoft/ms_graph_api_utilities.py +2 -1
- mindsdb/integrations/utilities/handlers/auth_utilities/microsoft/ms_graph_api_auth_utilities.py +8 -3
- mindsdb/integrations/utilities/rag/chains/map_reduce_summarizer_chain.py +5 -9
- mindsdb/integrations/utilities/rag/pipelines/rag.py +1 -3
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +97 -89
- mindsdb/integrations/utilities/rag/settings.py +29 -14
- mindsdb/interfaces/agents/agents_controller.py +15 -3
- mindsdb/interfaces/agents/constants.py +1 -0
- mindsdb/interfaces/agents/langchain_agent.py +15 -10
- mindsdb/interfaces/agents/langfuse_callback_handler.py +4 -0
- mindsdb/interfaces/agents/mindsdb_database_agent.py +14 -0
- mindsdb/interfaces/database/integrations.py +5 -1
- mindsdb/interfaces/database/projects.py +38 -1
- mindsdb/interfaces/knowledge_base/controller.py +26 -11
- mindsdb/interfaces/knowledge_base/preprocessing/document_loader.py +7 -26
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +18 -10
- mindsdb/interfaces/skills/skill_tool.py +12 -6
- mindsdb/interfaces/skills/skills_controller.py +7 -3
- mindsdb/interfaces/skills/sql_agent.py +81 -18
- mindsdb/utilities/langfuse.py +15 -0
- {MindsDB-25.1.4.0.dist-info → MindsDB-25.1.5.1.dist-info}/LICENSE +0 -0
- {MindsDB-25.1.4.0.dist-info → MindsDB-25.1.5.1.dist-info}/WHEEL +0 -0
- {MindsDB-25.1.4.0.dist-info → MindsDB-25.1.5.1.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
|
|
2
2
|
import re
|
|
3
|
+
import csv
|
|
3
4
|
import inspect
|
|
4
|
-
from
|
|
5
|
+
from io import StringIO
|
|
6
|
+
from typing import Iterable, List, Optional, Any
|
|
5
7
|
|
|
6
8
|
import pandas as pd
|
|
7
9
|
from mindsdb_sql_parser import parse_sql
|
|
@@ -14,6 +16,64 @@ from mindsdb.integrations.utilities.query_traversal import query_traversal
|
|
|
14
16
|
logger = log.getLogger(__name__)
|
|
15
17
|
|
|
16
18
|
|
|
19
|
+
def list_to_csv_str(array: List[List[Any]]) -> str:
|
|
20
|
+
"""Convert a 2D array into a CSV string.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
array (List[List[Any]]): A 2D array/list of values to convert to CSV format
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
str: The array formatted as a CSV string using Excel dialect
|
|
27
|
+
"""
|
|
28
|
+
output = StringIO()
|
|
29
|
+
writer = csv.writer(output, dialect='excel')
|
|
30
|
+
str_array = [[str(item) for item in row] for row in array]
|
|
31
|
+
writer.writerows(str_array)
|
|
32
|
+
return output.getvalue()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def split_table_name(table_name: str) -> List[str]:
|
|
36
|
+
"""Split table name from llm to parst
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
table_name (str): input table name
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
List[str]: parts of table identifier like ['database', 'schema', 'table']
|
|
43
|
+
|
|
44
|
+
Example:
|
|
45
|
+
'input': '`aaa`.`bbb.ccc`', 'output': ['aaa', 'bbb.ccc']
|
|
46
|
+
'input': '`aaa`.`bbb`.`ccc`', 'output': ['aaa', 'bbb', 'ccc']
|
|
47
|
+
'input': 'aaa.bbb', 'output': ['aaa', 'bbb']
|
|
48
|
+
'input': '`aaa.bbb`', 'output': ['aaa.bbb']
|
|
49
|
+
'input': '`aaa.bbb.ccc`', 'output': ['aaa.bbb.ccc']
|
|
50
|
+
'input': 'aaa.`bbb`', 'output': ['aaa', 'bbb']
|
|
51
|
+
'input': 'aaa.bbb.ccc', 'output': ['aaa', 'bbb', 'ccc']
|
|
52
|
+
'input': 'aaa.`bbb.ccc`', 'output': ['aaa', 'bbb.ccc']
|
|
53
|
+
'input': '`aaa`.`bbb.ccc`', 'output': ['aaa', 'bbb.ccc']
|
|
54
|
+
"""
|
|
55
|
+
result = []
|
|
56
|
+
current = ''
|
|
57
|
+
in_backticks = False
|
|
58
|
+
|
|
59
|
+
i = 0
|
|
60
|
+
while i < len(table_name):
|
|
61
|
+
if table_name[i] == '`':
|
|
62
|
+
in_backticks = not in_backticks
|
|
63
|
+
elif table_name[i] == '.' and not in_backticks:
|
|
64
|
+
if current:
|
|
65
|
+
result.append(current.strip('`'))
|
|
66
|
+
current = ''
|
|
67
|
+
else:
|
|
68
|
+
current += table_name[i]
|
|
69
|
+
i += 1
|
|
70
|
+
|
|
71
|
+
if current:
|
|
72
|
+
result.append(current.strip('`'))
|
|
73
|
+
|
|
74
|
+
return result
|
|
75
|
+
|
|
76
|
+
|
|
17
77
|
class SQLAgent:
|
|
18
78
|
def __init__(
|
|
19
79
|
self,
|
|
@@ -161,15 +221,17 @@ class SQLAgent:
|
|
|
161
221
|
continue
|
|
162
222
|
|
|
163
223
|
# Some LLMs (e.g. gpt-4o) may include backticks or quotes when invoking tools.
|
|
164
|
-
|
|
165
|
-
|
|
224
|
+
table_parts = split_table_name(table_name)
|
|
225
|
+
if len(table_parts) == 1:
|
|
226
|
+
# most likely LLM enclosed all table name in backticks `database.table`
|
|
227
|
+
table_parts = split_table_name(table_name)
|
|
166
228
|
|
|
167
229
|
# resolved table
|
|
168
|
-
|
|
230
|
+
table_identifier = tables_idx.get(tuple(table_parts))
|
|
169
231
|
|
|
170
|
-
if
|
|
171
|
-
raise ValueError(f"Table {table} not found in database")
|
|
172
|
-
tables.append(
|
|
232
|
+
if table_identifier is None:
|
|
233
|
+
raise ValueError(f"Table {table} not found in the database")
|
|
234
|
+
tables.append(table_identifier)
|
|
173
235
|
|
|
174
236
|
return tables
|
|
175
237
|
|
|
@@ -217,8 +279,7 @@ class SQLAgent:
|
|
|
217
279
|
dtypes.append(column.get('type', ''))
|
|
218
280
|
|
|
219
281
|
info = f'Table named `{table_str}`:\n'
|
|
220
|
-
info += f"\nSample with first {self._sample_rows_in_table_info} rows from table {table_str}
|
|
221
|
-
info += "\t".join([field for field in fields])
|
|
282
|
+
info += f"\nSample with first {self._sample_rows_in_table_info} rows from table {table_str} in CSV format (dialect is 'excel'). Table contains 99 rows. Only 3 rows are shown from the entire table.:\n"
|
|
222
283
|
info += self._get_sample_rows(table_str, fields) + "\n"
|
|
223
284
|
info += '\nColumn data types: ' + ",\t".join(
|
|
224
285
|
[f'\n`{field}` : `{dtype}`' for field, dtype in zip(fields, dtypes)]) + '\n' # noqa
|
|
@@ -229,9 +290,14 @@ class SQLAgent:
|
|
|
229
290
|
try:
|
|
230
291
|
ret = self._call_engine(command)
|
|
231
292
|
sample_rows = ret.data.to_lists()
|
|
293
|
+
|
|
294
|
+
def truncate_value(val):
|
|
295
|
+
str_val = str(val)
|
|
296
|
+
return str_val if len(str_val) < 100 else (str_val[:100] + '...')
|
|
297
|
+
|
|
232
298
|
sample_rows = list(
|
|
233
|
-
map(lambda
|
|
234
|
-
sample_rows_str = "\n" +
|
|
299
|
+
map(lambda row: [truncate_value(value) for value in row], sample_rows))
|
|
300
|
+
sample_rows_str = "\n" + list_to_csv_str([fields] + sample_rows)
|
|
235
301
|
except Exception as e:
|
|
236
302
|
logger.warning(e)
|
|
237
303
|
sample_rows_str = "\n" + "\t [error] Couldn't retrieve sample rows!"
|
|
@@ -249,9 +315,6 @@ class SQLAgent:
|
|
|
249
315
|
If the statement returns no rows, an empty string is returned.
|
|
250
316
|
"""
|
|
251
317
|
|
|
252
|
-
def _tidy(result: List) -> str:
|
|
253
|
-
return '\n'.join(['\t'.join([str(value) for value in row]) for row in result])
|
|
254
|
-
|
|
255
318
|
def _repr_result(ret):
|
|
256
319
|
limit_rows = 30
|
|
257
320
|
|
|
@@ -267,16 +330,16 @@ class SQLAgent:
|
|
|
267
330
|
res += f'First {limit_rows} rows:\n'
|
|
268
331
|
|
|
269
332
|
else:
|
|
270
|
-
res += '
|
|
271
|
-
|
|
272
|
-
res += _tidy(data[:limit_rows])
|
|
333
|
+
res += "Result in CSV format (dialect is 'excel'):\n"
|
|
334
|
+
res += list_to_csv_str([[col.name for col in ret.columns]] + data[:limit_rows])
|
|
273
335
|
return res
|
|
274
336
|
|
|
275
337
|
ret = self._call_engine(self._clean_query(command))
|
|
276
338
|
if fetch == "all":
|
|
277
339
|
result = _repr_result(ret.data)
|
|
278
340
|
elif fetch == "one":
|
|
279
|
-
result =
|
|
341
|
+
result = "Result in CSV format (dialect is 'excel'):\n"
|
|
342
|
+
result += list_to_csv_str([[col.name for col in ret.data.columns]] + [ret.data.to_lists()[0]])
|
|
280
343
|
else:
|
|
281
344
|
raise ValueError("Fetch parameter must be either 'one' or 'all'")
|
|
282
345
|
return str(result)
|
mindsdb/utilities/langfuse.py
CHANGED
|
@@ -147,6 +147,21 @@ class LangfuseClientWrapper:
|
|
|
147
147
|
|
|
148
148
|
logger.info(f"Langfuse trace configured with ID: {self.trace.id}")
|
|
149
149
|
|
|
150
|
+
def get_trace_id(self) -> typing.Optional[str]:
|
|
151
|
+
"""
|
|
152
|
+
Get trace ID. If Langfuse is disabled, returns None.
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
if self.client is None:
|
|
156
|
+
logger.debug("Langfuse is disabled.")
|
|
157
|
+
return ""
|
|
158
|
+
|
|
159
|
+
if self.trace is None:
|
|
160
|
+
logger.debug("Langfuse trace is not setup.")
|
|
161
|
+
return ""
|
|
162
|
+
|
|
163
|
+
return self.trace.id
|
|
164
|
+
|
|
150
165
|
def start_span(self,
|
|
151
166
|
name: str,
|
|
152
167
|
input: typing.Optional[typing.Any] = None) -> typing.Optional[StatefulSpanClient]:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|