MindsDB 25.2.1.2__py3-none-any.whl → 25.2.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- {MindsDB-25.2.1.2.dist-info → MindsDB-25.2.2.1.dist-info}/METADATA +234 -230
- {MindsDB-25.2.1.2.dist-info → MindsDB-25.2.2.1.dist-info}/RECORD +33 -33
- mindsdb/__about__.py +1 -1
- mindsdb/api/executor/command_executor.py +1 -57
- mindsdb/api/executor/datahub/datanodes/system_tables.py +34 -33
- mindsdb/api/executor/planner/query_planner.py +7 -2
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +19 -11
- mindsdb/api/executor/sql_query/steps/subselect_step.py +44 -2
- mindsdb/integrations/handlers/byom_handler/byom_handler.py +1 -1
- mindsdb/integrations/handlers/byom_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/file_handler/file_handler.py +13 -320
- mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +60 -156
- mindsdb/integrations/handlers/huggingface_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +1 -1
- mindsdb/integrations/handlers/lancedb_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/lightwood_handler/requirements.txt +3 -3
- mindsdb/integrations/handlers/ms_one_drive_handler/ms_graph_api_one_drive_client.py +3 -3
- mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_tables.py +2 -20
- mindsdb/integrations/handlers/salesforce_handler/connection_args.py +9 -1
- mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +2 -1
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +1 -1
- mindsdb/integrations/handlers/writer_handler/requirements.txt +1 -1
- mindsdb/integrations/utilities/files/file_reader.py +120 -61
- mindsdb/integrations/utilities/handlers/api_utilities/microsoft/ms_graph_api_utilities.py +1 -8
- mindsdb/integrations/utilities/query_traversal.py +42 -37
- mindsdb/interfaces/agents/langfuse_callback_handler.py +205 -27
- mindsdb/interfaces/file/file_controller.py +1 -1
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +12 -2
- mindsdb/utilities/config.py +2 -2
- mindsdb/utilities/render/sqlalchemy_render.py +52 -19
- {MindsDB-25.2.1.2.dist-info → MindsDB-25.2.2.1.dist-info}/LICENSE +0 -0
- {MindsDB-25.2.1.2.dist-info → MindsDB-25.2.2.1.dist-info}/WHEEL +0 -0
- {MindsDB-25.2.1.2.dist-info → MindsDB-25.2.2.1.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from mindsdb_sql_parser import ast
|
|
2
2
|
|
|
3
3
|
|
|
4
|
-
def query_traversal(node, callback, is_table=False, is_target=False, parent_query=None):
|
|
4
|
+
def query_traversal(node, callback, is_table=False, is_target=False, parent_query=None, stack=None):
|
|
5
5
|
"""
|
|
6
6
|
:param node: element
|
|
7
7
|
:param callback: function applied to every element
|
|
@@ -26,20 +26,25 @@ def query_traversal(node, callback, is_table=False, is_target=False, parent_quer
|
|
|
26
26
|
|
|
27
27
|
"""
|
|
28
28
|
|
|
29
|
-
|
|
29
|
+
if stack is None:
|
|
30
|
+
stack = []
|
|
31
|
+
|
|
32
|
+
res = callback(node, is_table=is_table, is_target=is_target, parent_query=parent_query, callstack=stack)
|
|
33
|
+
stack2 = [node] + stack
|
|
34
|
+
|
|
30
35
|
if res is not None:
|
|
31
36
|
# node is going to be replaced
|
|
32
37
|
return res
|
|
33
38
|
|
|
34
39
|
if isinstance(node, ast.Select):
|
|
35
40
|
if node.from_table is not None:
|
|
36
|
-
node_out = query_traversal(node.from_table, callback, is_table=True, parent_query=node)
|
|
41
|
+
node_out = query_traversal(node.from_table, callback, is_table=True, parent_query=node, stack=stack2)
|
|
37
42
|
if node_out is not None:
|
|
38
43
|
node.from_table = node_out
|
|
39
44
|
|
|
40
45
|
array = []
|
|
41
46
|
for node2 in node.targets:
|
|
42
|
-
node_out = query_traversal(node2, callback, parent_query=node, is_target=True) or node2
|
|
47
|
+
node_out = query_traversal(node2, callback, parent_query=node, is_target=True, stack=stack2) or node2
|
|
43
48
|
if isinstance(node_out, list):
|
|
44
49
|
array.extend(node_out)
|
|
45
50
|
else:
|
|
@@ -49,51 +54,51 @@ def query_traversal(node, callback, is_table=False, is_target=False, parent_quer
|
|
|
49
54
|
if node.cte is not None:
|
|
50
55
|
array = []
|
|
51
56
|
for cte in node.cte:
|
|
52
|
-
node_out = query_traversal(cte.query, callback, parent_query=node) or cte
|
|
57
|
+
node_out = query_traversal(cte.query, callback, parent_query=node, stack=stack2) or cte
|
|
53
58
|
array.append(node_out)
|
|
54
59
|
node.cte = array
|
|
55
60
|
|
|
56
61
|
if node.where is not None:
|
|
57
|
-
node_out = query_traversal(node.where, callback, parent_query=node)
|
|
62
|
+
node_out = query_traversal(node.where, callback, parent_query=node, stack=stack2)
|
|
58
63
|
if node_out is not None:
|
|
59
64
|
node.where = node_out
|
|
60
65
|
|
|
61
66
|
if node.group_by is not None:
|
|
62
67
|
array = []
|
|
63
68
|
for node2 in node.group_by:
|
|
64
|
-
node_out = query_traversal(node2, callback, parent_query=node) or node2
|
|
69
|
+
node_out = query_traversal(node2, callback, parent_query=node, stack=stack2) or node2
|
|
65
70
|
array.append(node_out)
|
|
66
71
|
node.group_by = array
|
|
67
72
|
|
|
68
73
|
if node.having is not None:
|
|
69
|
-
node_out = query_traversal(node.having, callback, parent_query=node)
|
|
74
|
+
node_out = query_traversal(node.having, callback, parent_query=node, stack=stack2)
|
|
70
75
|
if node_out is not None:
|
|
71
76
|
node.having = node_out
|
|
72
77
|
|
|
73
78
|
if node.order_by is not None:
|
|
74
79
|
array = []
|
|
75
80
|
for node2 in node.order_by:
|
|
76
|
-
node_out = query_traversal(node2, callback, parent_query=node) or node2
|
|
81
|
+
node_out = query_traversal(node2, callback, parent_query=node, stack=stack2) or node2
|
|
77
82
|
array.append(node_out)
|
|
78
83
|
node.order_by = array
|
|
79
84
|
|
|
80
85
|
elif isinstance(node, (ast.Union, ast.Intersect, ast.Except)):
|
|
81
|
-
node_out = query_traversal(node.left, callback, parent_query=node)
|
|
86
|
+
node_out = query_traversal(node.left, callback, parent_query=node, stack=stack2)
|
|
82
87
|
if node_out is not None:
|
|
83
88
|
node.left = node_out
|
|
84
|
-
node_out = query_traversal(node.right, callback, parent_query=node)
|
|
89
|
+
node_out = query_traversal(node.right, callback, parent_query=node, stack=stack2)
|
|
85
90
|
if node_out is not None:
|
|
86
91
|
node.right = node_out
|
|
87
92
|
|
|
88
93
|
elif isinstance(node, ast.Join):
|
|
89
|
-
node_out = query_traversal(node.right, callback, is_table=True, parent_query=parent_query)
|
|
94
|
+
node_out = query_traversal(node.right, callback, is_table=True, parent_query=parent_query, stack=stack2)
|
|
90
95
|
if node_out is not None:
|
|
91
96
|
node.right = node_out
|
|
92
|
-
node_out = query_traversal(node.left, callback, is_table=True, parent_query=parent_query)
|
|
97
|
+
node_out = query_traversal(node.left, callback, is_table=True, parent_query=parent_query, stack=stack2)
|
|
93
98
|
if node_out is not None:
|
|
94
99
|
node.left = node_out
|
|
95
100
|
if node.condition is not None:
|
|
96
|
-
node_out = query_traversal(node.condition, callback, parent_query=parent_query)
|
|
101
|
+
node_out = query_traversal(node.condition, callback, parent_query=parent_query, stack=stack2)
|
|
97
102
|
if node_out is not None:
|
|
98
103
|
node.condition = node_out
|
|
99
104
|
|
|
@@ -101,46 +106,46 @@ def query_traversal(node, callback, is_table=False, is_target=False, parent_quer
|
|
|
101
106
|
ast.Exists, ast.NotExists)):
|
|
102
107
|
array = []
|
|
103
108
|
for arg in node.args:
|
|
104
|
-
node_out = query_traversal(arg, callback, parent_query=parent_query) or arg
|
|
109
|
+
node_out = query_traversal(arg, callback, parent_query=parent_query, stack=stack2) or arg
|
|
105
110
|
array.append(node_out)
|
|
106
111
|
node.args = array
|
|
107
112
|
|
|
108
113
|
if isinstance(node, ast.Function):
|
|
109
114
|
if node.from_arg is not None:
|
|
110
|
-
node_out = query_traversal(node.from_arg, callback, parent_query=parent_query)
|
|
115
|
+
node_out = query_traversal(node.from_arg, callback, parent_query=parent_query, stack=stack2)
|
|
111
116
|
if node_out is not None:
|
|
112
117
|
node.from_arg = node_out
|
|
113
118
|
|
|
114
119
|
elif isinstance(node, ast.WindowFunction):
|
|
115
|
-
query_traversal(node.function, callback, parent_query=parent_query)
|
|
120
|
+
query_traversal(node.function, callback, parent_query=parent_query, stack=stack2)
|
|
116
121
|
if node.partition is not None:
|
|
117
122
|
array = []
|
|
118
123
|
for node2 in node.partition:
|
|
119
|
-
node_out = query_traversal(node2, callback, parent_query=parent_query) or node2
|
|
124
|
+
node_out = query_traversal(node2, callback, parent_query=parent_query, stack=stack2) or node2
|
|
120
125
|
array.append(node_out)
|
|
121
126
|
node.partition = array
|
|
122
127
|
if node.order_by is not None:
|
|
123
128
|
array = []
|
|
124
129
|
for node2 in node.order_by:
|
|
125
|
-
node_out = query_traversal(node2, callback, parent_query=parent_query) or node2
|
|
130
|
+
node_out = query_traversal(node2, callback, parent_query=parent_query, stack=stack2) or node2
|
|
126
131
|
array.append(node_out)
|
|
127
132
|
node.order_by = array
|
|
128
133
|
|
|
129
134
|
elif isinstance(node, ast.TypeCast):
|
|
130
|
-
node_out = query_traversal(node.arg, callback, parent_query=parent_query)
|
|
135
|
+
node_out = query_traversal(node.arg, callback, parent_query=parent_query, stack=stack2)
|
|
131
136
|
if node_out is not None:
|
|
132
137
|
node.arg = node_out
|
|
133
138
|
|
|
134
139
|
elif isinstance(node, ast.Tuple):
|
|
135
140
|
array = []
|
|
136
141
|
for node2 in node.items:
|
|
137
|
-
node_out = query_traversal(node2, callback, parent_query=parent_query) or node2
|
|
142
|
+
node_out = query_traversal(node2, callback, parent_query=parent_query, stack=stack2) or node2
|
|
138
143
|
array.append(node_out)
|
|
139
144
|
node.items = array
|
|
140
145
|
|
|
141
146
|
elif isinstance(node, ast.Insert):
|
|
142
147
|
if node.table is not None:
|
|
143
|
-
node_out = query_traversal(node.table, callback, is_table=True, parent_query=node)
|
|
148
|
+
node_out = query_traversal(node.table, callback, is_table=True, parent_query=node, stack=stack2)
|
|
144
149
|
if node_out is not None:
|
|
145
150
|
node.table = node_out
|
|
146
151
|
|
|
@@ -149,38 +154,38 @@ def query_traversal(node, callback, is_table=False, is_target=False, parent_quer
|
|
|
149
154
|
for row in node.values:
|
|
150
155
|
items = []
|
|
151
156
|
for item in row:
|
|
152
|
-
item2 = query_traversal(item, callback, parent_query=node) or item
|
|
157
|
+
item2 = query_traversal(item, callback, parent_query=node, stack=stack2) or item
|
|
153
158
|
items.append(item2)
|
|
154
159
|
rows.append(items)
|
|
155
160
|
node.values = rows
|
|
156
161
|
|
|
157
162
|
if node.from_select is not None:
|
|
158
|
-
node_out = query_traversal(node.from_select, callback, parent_query=node)
|
|
163
|
+
node_out = query_traversal(node.from_select, callback, parent_query=node, stack=stack2)
|
|
159
164
|
if node_out is not None:
|
|
160
165
|
node.from_select = node_out
|
|
161
166
|
|
|
162
167
|
elif isinstance(node, ast.Update):
|
|
163
168
|
if node.table is not None:
|
|
164
|
-
node_out = query_traversal(node.table, callback, is_table=True, parent_query=node)
|
|
169
|
+
node_out = query_traversal(node.table, callback, is_table=True, parent_query=node, stack=stack2)
|
|
165
170
|
if node_out is not None:
|
|
166
171
|
node.table = node_out
|
|
167
172
|
|
|
168
173
|
if node.where is not None:
|
|
169
|
-
node_out = query_traversal(node.where, callback, parent_query=node)
|
|
174
|
+
node_out = query_traversal(node.where, callback, parent_query=node, stack=stack2)
|
|
170
175
|
if node_out is not None:
|
|
171
176
|
node.where = node_out
|
|
172
177
|
|
|
173
178
|
if node.update_columns is not None:
|
|
174
179
|
changes = {}
|
|
175
180
|
for k, v in node.update_columns.items():
|
|
176
|
-
v2 = query_traversal(v, callback, parent_query=node)
|
|
181
|
+
v2 = query_traversal(v, callback, parent_query=node, stack=stack2)
|
|
177
182
|
if v2 is not None:
|
|
178
183
|
changes[k] = v2
|
|
179
184
|
if changes:
|
|
180
185
|
node.update_columns.update(changes)
|
|
181
186
|
|
|
182
187
|
if node.from_select is not None:
|
|
183
|
-
node_out = query_traversal(node.from_select, callback, parent_query=node)
|
|
188
|
+
node_out = query_traversal(node.from_select, callback, parent_query=node, stack=stack2)
|
|
184
189
|
if node_out is not None:
|
|
185
190
|
node.from_select = node_out
|
|
186
191
|
|
|
@@ -188,50 +193,50 @@ def query_traversal(node, callback, is_table=False, is_target=False, parent_quer
|
|
|
188
193
|
array = []
|
|
189
194
|
if node.columns is not None:
|
|
190
195
|
for node2 in node.columns:
|
|
191
|
-
node_out = query_traversal(node2, callback, parent_query=node) or node2
|
|
196
|
+
node_out = query_traversal(node2, callback, parent_query=node, stack=stack2) or node2
|
|
192
197
|
array.append(node_out)
|
|
193
198
|
node.columns = array
|
|
194
199
|
|
|
195
200
|
if node.name is not None:
|
|
196
|
-
node_out = query_traversal(node.name, callback, is_table=True, parent_query=node)
|
|
201
|
+
node_out = query_traversal(node.name, callback, is_table=True, parent_query=node, stack=stack2)
|
|
197
202
|
if node_out is not None:
|
|
198
203
|
node.name = node_out
|
|
199
204
|
|
|
200
205
|
if node.from_select is not None:
|
|
201
|
-
node_out = query_traversal(node.from_select, callback, parent_query=node)
|
|
206
|
+
node_out = query_traversal(node.from_select, callback, parent_query=node, stack=stack2)
|
|
202
207
|
if node_out is not None:
|
|
203
208
|
node.from_select = node_out
|
|
204
209
|
|
|
205
210
|
elif isinstance(node, ast.Delete):
|
|
206
211
|
if node.where is not None:
|
|
207
|
-
node_out = query_traversal(node.where, callback, parent_query=node)
|
|
212
|
+
node_out = query_traversal(node.where, callback, parent_query=node, stack=stack2)
|
|
208
213
|
if node_out is not None:
|
|
209
214
|
node.where = node_out
|
|
210
215
|
|
|
211
216
|
elif isinstance(node, ast.OrderBy):
|
|
212
217
|
if node.field is not None:
|
|
213
|
-
node_out = query_traversal(node.field, callback, parent_query=parent_query)
|
|
218
|
+
node_out = query_traversal(node.field, callback, parent_query=parent_query, stack=stack2)
|
|
214
219
|
if node_out is not None:
|
|
215
220
|
node.field = node_out
|
|
216
221
|
|
|
217
222
|
elif isinstance(node, ast.Case):
|
|
218
223
|
rules = []
|
|
219
224
|
for condition, result in node.rules:
|
|
220
|
-
condition2 = query_traversal(condition, callback, parent_query=parent_query)
|
|
221
|
-
result2 = query_traversal(result, callback, parent_query=parent_query)
|
|
225
|
+
condition2 = query_traversal(condition, callback, parent_query=parent_query, stack=stack2)
|
|
226
|
+
result2 = query_traversal(result, callback, parent_query=parent_query, stack=stack2)
|
|
222
227
|
|
|
223
228
|
condition = condition if condition2 is None else condition2
|
|
224
229
|
result = result if result2 is None else result2
|
|
225
230
|
rules.append([condition, result])
|
|
226
231
|
node.rules = rules
|
|
227
|
-
default = query_traversal(node.default, callback, parent_query=parent_query)
|
|
232
|
+
default = query_traversal(node.default, callback, parent_query=parent_query, stack=stack2)
|
|
228
233
|
if default is not None:
|
|
229
234
|
node.default = default
|
|
230
235
|
|
|
231
236
|
elif isinstance(node, list):
|
|
232
237
|
array = []
|
|
233
238
|
for node2 in node:
|
|
234
|
-
node_out = query_traversal(node2, callback, parent_query=parent_query) or node2
|
|
239
|
+
node_out = query_traversal(node2, callback, parent_query=parent_query, stack=stack2) or node2
|
|
235
240
|
array.append(node_out)
|
|
236
241
|
return array
|
|
237
242
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from typing import Any, Dict, Union, Optional, List
|
|
2
2
|
from uuid import uuid4
|
|
3
3
|
import datetime
|
|
4
|
+
import json
|
|
4
5
|
|
|
5
6
|
from langchain_core.callbacks.base import BaseCallbackHandler
|
|
6
7
|
|
|
@@ -21,6 +22,10 @@ class LangfuseCallbackHandler(BaseCallbackHandler):
|
|
|
21
22
|
# if these are not available, we generate some UUIDs
|
|
22
23
|
self.trace_id = trace_id or uuid4().hex
|
|
23
24
|
self.observation_id = observation_id or uuid4().hex
|
|
25
|
+
# Track metrics about tools and chains
|
|
26
|
+
self.tool_metrics = {}
|
|
27
|
+
self.chain_metrics = {}
|
|
28
|
+
self.current_chain = None
|
|
24
29
|
|
|
25
30
|
def on_tool_start(
|
|
26
31
|
self, serialized: Dict[str, Any], input_str: str, **kwargs: Any
|
|
@@ -30,9 +35,28 @@ class LangfuseCallbackHandler(BaseCallbackHandler):
|
|
|
30
35
|
action_span = self.action_uuid_to_span.get(parent_run_uuid)
|
|
31
36
|
if action_span is None:
|
|
32
37
|
return
|
|
38
|
+
|
|
39
|
+
tool_name = serialized.get("name", "tool")
|
|
40
|
+
start_time = datetime.datetime.now()
|
|
41
|
+
|
|
42
|
+
# Initialize or update tool metrics
|
|
43
|
+
if tool_name not in self.tool_metrics:
|
|
44
|
+
self.tool_metrics[tool_name] = {
|
|
45
|
+
'count': 0,
|
|
46
|
+
'total_time': 0,
|
|
47
|
+
'errors': 0,
|
|
48
|
+
'last_error': None,
|
|
49
|
+
'inputs': []
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
self.tool_metrics[tool_name]['count'] += 1
|
|
53
|
+
self.tool_metrics[tool_name]['inputs'].append(input_str)
|
|
54
|
+
|
|
33
55
|
metadata = {
|
|
34
|
-
'tool_name':
|
|
35
|
-
'started':
|
|
56
|
+
'tool_name': tool_name,
|
|
57
|
+
'started': start_time.isoformat(),
|
|
58
|
+
'start_timestamp': start_time.timestamp(),
|
|
59
|
+
'input_length': len(input_str) if input_str else 0
|
|
36
60
|
}
|
|
37
61
|
action_span.update(metadata=metadata)
|
|
38
62
|
|
|
@@ -42,9 +66,25 @@ class LangfuseCallbackHandler(BaseCallbackHandler):
|
|
|
42
66
|
action_span = self.action_uuid_to_span.get(parent_run_uuid)
|
|
43
67
|
if action_span is None:
|
|
44
68
|
return
|
|
69
|
+
|
|
70
|
+
end_time = datetime.datetime.now()
|
|
71
|
+
tool_name = action_span.metadata.get('tool_name', 'unknown')
|
|
72
|
+
start_timestamp = action_span.metadata.get('start_timestamp')
|
|
73
|
+
|
|
74
|
+
if start_timestamp:
|
|
75
|
+
duration = end_time.timestamp() - start_timestamp
|
|
76
|
+
if tool_name in self.tool_metrics:
|
|
77
|
+
self.tool_metrics[tool_name]['total_time'] += duration
|
|
78
|
+
|
|
79
|
+
metadata = {
|
|
80
|
+
'finished': end_time.isoformat(),
|
|
81
|
+
'duration_seconds': duration if start_timestamp else None,
|
|
82
|
+
'output_length': len(output) if output else 0
|
|
83
|
+
}
|
|
84
|
+
|
|
45
85
|
action_span.update(
|
|
46
86
|
output=output, # tool output is action output (unless superseded by a global action output)
|
|
47
|
-
metadata=
|
|
87
|
+
metadata=metadata
|
|
48
88
|
)
|
|
49
89
|
|
|
50
90
|
def on_tool_error(
|
|
@@ -55,75 +95,213 @@ class LangfuseCallbackHandler(BaseCallbackHandler):
|
|
|
55
95
|
action_span = self.action_uuid_to_span.get(parent_run_uuid)
|
|
56
96
|
if action_span is None:
|
|
57
97
|
return
|
|
98
|
+
|
|
58
99
|
try:
|
|
59
100
|
error_str = str(error)
|
|
60
101
|
except Exception:
|
|
61
102
|
error_str = "Couldn't get error string."
|
|
62
|
-
|
|
103
|
+
|
|
104
|
+
tool_name = action_span.metadata.get('tool_name', 'unknown')
|
|
105
|
+
if tool_name in self.tool_metrics:
|
|
106
|
+
self.tool_metrics[tool_name]['errors'] += 1
|
|
107
|
+
self.tool_metrics[tool_name]['last_error'] = error_str
|
|
108
|
+
|
|
109
|
+
metadata = {
|
|
110
|
+
'error_description': error_str,
|
|
111
|
+
'error_type': error.__class__.__name__,
|
|
112
|
+
'error_time': datetime.datetime.now().isoformat()
|
|
113
|
+
}
|
|
114
|
+
action_span.update(metadata=metadata)
|
|
63
115
|
|
|
64
116
|
def on_chain_start(
|
|
65
117
|
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
|
|
66
118
|
) -> Any:
|
|
67
119
|
"""Run when chain starts running."""
|
|
120
|
+
if self.langfuse is None:
|
|
121
|
+
return
|
|
122
|
+
|
|
68
123
|
run_uuid = kwargs.get('run_id', uuid4()).hex
|
|
69
124
|
|
|
70
125
|
if serialized is None:
|
|
71
126
|
serialized = {}
|
|
72
127
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
128
|
+
chain_name = serialized.get("name", "chain")
|
|
129
|
+
start_time = datetime.datetime.now()
|
|
130
|
+
|
|
131
|
+
# Initialize or update chain metrics
|
|
132
|
+
if chain_name not in self.chain_metrics:
|
|
133
|
+
self.chain_metrics[chain_name] = {
|
|
134
|
+
'count': 0,
|
|
135
|
+
'total_time': 0,
|
|
136
|
+
'errors': 0,
|
|
137
|
+
'last_error': None
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
self.chain_metrics[chain_name]['count'] += 1
|
|
141
|
+
self.current_chain = chain_name
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
chain_span = self.langfuse.span(
|
|
145
|
+
name=f'{chain_name}-{run_uuid}',
|
|
146
|
+
trace_id=self.trace_id,
|
|
147
|
+
parent_observation_id=self.observation_id,
|
|
148
|
+
input=json.dumps(inputs, indent=2)
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
metadata = {
|
|
152
|
+
'chain_name': chain_name,
|
|
153
|
+
'started': start_time.isoformat(),
|
|
154
|
+
'start_timestamp': start_time.timestamp(),
|
|
155
|
+
'input_keys': list(inputs.keys()) if isinstance(inputs, dict) else None,
|
|
156
|
+
'input_size': len(inputs) if isinstance(inputs, dict) else len(str(inputs))
|
|
157
|
+
}
|
|
158
|
+
chain_span.update(metadata=metadata)
|
|
159
|
+
self.chain_uuid_to_span[run_uuid] = chain_span
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logger.warning(f"Error creating Langfuse span: {str(e)}")
|
|
80
162
|
|
|
81
163
|
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> Any:
|
|
82
164
|
"""Run when chain ends running."""
|
|
165
|
+
if self.langfuse is None:
|
|
166
|
+
return
|
|
167
|
+
|
|
83
168
|
chain_uuid = kwargs.get('run_id', uuid4()).hex
|
|
84
169
|
if chain_uuid not in self.chain_uuid_to_span:
|
|
85
170
|
return
|
|
86
171
|
chain_span = self.chain_uuid_to_span.pop(chain_uuid)
|
|
87
172
|
if chain_span is None:
|
|
88
173
|
return
|
|
89
|
-
|
|
90
|
-
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
end_time = datetime.datetime.now()
|
|
177
|
+
chain_name = chain_span.metadata.get('chain_name', 'unknown')
|
|
178
|
+
start_timestamp = chain_span.metadata.get('start_timestamp')
|
|
179
|
+
|
|
180
|
+
if start_timestamp and chain_name in self.chain_metrics:
|
|
181
|
+
duration = end_time.timestamp() - start_timestamp
|
|
182
|
+
self.chain_metrics[chain_name]['total_time'] += duration
|
|
183
|
+
|
|
184
|
+
metadata = {
|
|
185
|
+
'finished': end_time.isoformat(),
|
|
186
|
+
'duration_seconds': duration if start_timestamp else None,
|
|
187
|
+
'output_keys': list(outputs.keys()) if isinstance(outputs, dict) else None,
|
|
188
|
+
'output_size': len(outputs) if isinstance(outputs, dict) else len(str(outputs))
|
|
189
|
+
}
|
|
190
|
+
chain_span.update(output=json.dumps(outputs, indent=2), metadata=metadata)
|
|
191
|
+
chain_span.end()
|
|
192
|
+
except Exception as e:
|
|
193
|
+
logger.warning(f"Error updating Langfuse span: {str(e)}")
|
|
91
194
|
|
|
92
195
|
def on_chain_error(self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any) -> Any:
|
|
93
196
|
"""Run when chain errors."""
|
|
94
|
-
|
|
95
|
-
|
|
197
|
+
chain_uuid = kwargs.get('run_id', uuid4()).hex
|
|
198
|
+
if chain_uuid not in self.chain_uuid_to_span:
|
|
199
|
+
return
|
|
200
|
+
chain_span = self.chain_uuid_to_span.get(chain_uuid)
|
|
201
|
+
if chain_span is None:
|
|
202
|
+
return
|
|
203
|
+
|
|
204
|
+
try:
|
|
205
|
+
error_str = str(error)
|
|
206
|
+
except Exception:
|
|
207
|
+
error_str = "Couldn't get error string."
|
|
208
|
+
|
|
209
|
+
chain_name = chain_span.metadata.get('chain_name', 'unknown')
|
|
210
|
+
if chain_name in self.chain_metrics:
|
|
211
|
+
self.chain_metrics[chain_name]['errors'] += 1
|
|
212
|
+
self.chain_metrics[chain_name]['last_error'] = error_str
|
|
213
|
+
|
|
214
|
+
metadata = {
|
|
215
|
+
'error_description': error_str,
|
|
216
|
+
'error_type': error.__class__.__name__,
|
|
217
|
+
'error_time': datetime.datetime.now().isoformat()
|
|
218
|
+
}
|
|
219
|
+
chain_span.update(metadata=metadata)
|
|
96
220
|
|
|
97
221
|
def on_agent_action(self, action, **kwargs: Any) -> Any:
|
|
98
222
|
"""Run on agent action."""
|
|
99
|
-
|
|
223
|
+
if self.langfuse is None:
|
|
224
|
+
return
|
|
225
|
+
|
|
100
226
|
run_uuid = kwargs.get('run_id', uuid4()).hex
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
227
|
+
try:
|
|
228
|
+
action_span = self.langfuse.span(
|
|
229
|
+
name=f'{getattr(action, "type", "action")}-{getattr(action, "tool", "")}-{run_uuid}',
|
|
230
|
+
trace_id=self.trace_id,
|
|
231
|
+
parent_observation_id=self.observation_id,
|
|
232
|
+
input=str(action)
|
|
233
|
+
)
|
|
234
|
+
self.action_uuid_to_span[run_uuid] = action_span
|
|
235
|
+
except Exception as e:
|
|
236
|
+
logger.warning(f"Error creating Langfuse span for agent action: {str(e)}")
|
|
108
237
|
|
|
109
238
|
def on_agent_finish(self, finish, **kwargs: Any) -> Any:
|
|
110
239
|
"""Run on agent end."""
|
|
111
|
-
|
|
240
|
+
if self.langfuse is None:
|
|
241
|
+
return
|
|
242
|
+
|
|
112
243
|
run_uuid = kwargs.get('run_id', uuid4()).hex
|
|
113
244
|
if run_uuid not in self.action_uuid_to_span:
|
|
114
245
|
return
|
|
115
246
|
action_span = self.action_uuid_to_span.pop(run_uuid)
|
|
116
247
|
if action_span is None:
|
|
117
248
|
return
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
249
|
+
|
|
250
|
+
try:
|
|
251
|
+
if finish is not None:
|
|
252
|
+
action_span.update(output=finish) # supersedes tool output
|
|
253
|
+
action_span.end()
|
|
254
|
+
except Exception as e:
|
|
255
|
+
logger.warning(f"Error updating Langfuse span: {str(e)}")
|
|
121
256
|
|
|
122
257
|
def auth_check(self):
|
|
123
258
|
if self.langfuse is not None:
|
|
124
259
|
return self.langfuse.auth_check()
|
|
125
260
|
return False
|
|
126
261
|
|
|
262
|
+
def get_metrics(self) -> Dict[str, Any]:
|
|
263
|
+
"""Get collected metrics about tools and chains.
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
Dict containing:
|
|
267
|
+
- tool_metrics: Statistics about tool usage, errors, and timing
|
|
268
|
+
- chain_metrics: Statistics about chain execution, errors, and timing
|
|
269
|
+
For each tool/chain, includes:
|
|
270
|
+
- count: Number of times used
|
|
271
|
+
- total_time: Total execution time
|
|
272
|
+
- errors: Number of errors
|
|
273
|
+
- last_error: Most recent error message
|
|
274
|
+
- avg_duration: Average execution time
|
|
275
|
+
"""
|
|
276
|
+
metrics = {
|
|
277
|
+
'tool_metrics': {},
|
|
278
|
+
'chain_metrics': {}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
# Process tool metrics
|
|
282
|
+
for tool_name, data in self.tool_metrics.items():
|
|
283
|
+
metrics['tool_metrics'][tool_name] = {
|
|
284
|
+
'count': data['count'],
|
|
285
|
+
'total_time': data['total_time'],
|
|
286
|
+
'avg_duration': data['total_time'] / data['count'] if data['count'] > 0 else 0,
|
|
287
|
+
'errors': data['errors'],
|
|
288
|
+
'last_error': data['last_error'],
|
|
289
|
+
'error_rate': data['errors'] / data['count'] if data['count'] > 0 else 0
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
# Process chain metrics
|
|
293
|
+
for chain_name, data in self.chain_metrics.items():
|
|
294
|
+
metrics['chain_metrics'][chain_name] = {
|
|
295
|
+
'count': data['count'],
|
|
296
|
+
'total_time': data['total_time'],
|
|
297
|
+
'avg_duration': data['total_time'] / data['count'] if data['count'] > 0 else 0,
|
|
298
|
+
'errors': data['errors'],
|
|
299
|
+
'last_error': data['last_error'],
|
|
300
|
+
'error_rate': data['errors'] / data['count'] if data['count'] > 0 else 0
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
return metrics
|
|
304
|
+
|
|
127
305
|
|
|
128
306
|
def get_skills(agent: db.Agents) -> List:
|
|
129
307
|
""" Retrieve skills from agent `skills` attribute. Specific to agent endpoints. """
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from typing import List
|
|
2
2
|
from textwrap import dedent
|
|
3
|
+
from datetime import datetime
|
|
3
4
|
|
|
4
5
|
from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
|
|
5
6
|
from langchain_community.tools import ListSQLDatabaseTool, InfoSQLDatabaseTool, QuerySQLDataBaseTool
|
|
@@ -11,6 +12,9 @@ from mindsdb.interfaces.skills.custom.text2sql.mindsdb_sql_tool import MindsDBSQ
|
|
|
11
12
|
class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
12
13
|
|
|
13
14
|
def get_tools(self, prefix='') -> List[BaseTool]:
|
|
15
|
+
|
|
16
|
+
current_date_time = datetime.now().strftime("%Y-%m-%d %H:%M")
|
|
17
|
+
|
|
14
18
|
"""Get the tools in the toolkit."""
|
|
15
19
|
list_sql_database_tool = ListSQLDatabaseTool(
|
|
16
20
|
name=f'sql_db_list_tables{prefix}',
|
|
@@ -45,8 +49,9 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
45
49
|
Output: Database result or error message. For errors, rewrite and retry the query. For 'Unknown column' errors, use '{info_sql_database_tool.name}' to check table fields.
|
|
46
50
|
This system is a highly intelligent and reliable PostgreSQL SQL skill designed to work with databases.
|
|
47
51
|
Follow these instructions with utmost precision:
|
|
48
|
-
1.
|
|
49
|
-
-
|
|
52
|
+
1. Final Response Format:
|
|
53
|
+
- Assume the frontend fully supports Markdown unless the user specifies otherwise.
|
|
54
|
+
- When the response contains data that fits a table format, present it as a properly formatted Markdown table
|
|
50
55
|
- Ensure clarity and proper structure for easy readability.
|
|
51
56
|
2. Sample Data:
|
|
52
57
|
- Before answering a question, if you don't have sample data about a table, **always** get sample data using `SELECT * FROM table LIMIT 3` from the tables you believe are relevant to formulating your answers.
|
|
@@ -59,6 +64,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
59
64
|
- **Always** inform the user of the total number of results available and specify that you are providing the first 10 results.
|
|
60
65
|
- Let the user know they can request additional results and/or specify how they would like the results ordered or grouped.
|
|
61
66
|
5. Date Handling:
|
|
67
|
+
- **System current date and time: {current_date_time} (UTC or local timezone based on server settings).**
|
|
62
68
|
- **Always** use PostgreSQL-compatible `CURRENT_DATE` or `NOW()` functions when working with dates—never assume or guess the current date.
|
|
63
69
|
- For any date-related comparisons in the query, *always* ensure that your query casts the column being compared using `column_name::DATE [operator] ..`
|
|
64
70
|
- Do not compare date values without casting columns to date.
|
|
@@ -67,6 +73,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
67
73
|
SELECT NOW() - INTERVAL 3 HOUR;
|
|
68
74
|
SELECT NOW() + INTERVAL 2 MONTH + INTERVAL 3 DAY;
|
|
69
75
|
SELECT NOW() - INTERVAL 1 YEAR;
|
|
76
|
+
- Always run SELECT NOW() to retrieve the current date when answering current or relative to current date-related questions.
|
|
70
77
|
6. Query Best Practices:
|
|
71
78
|
- Always send only one query at a time.
|
|
72
79
|
- Always enclose the names of tables, schemas, and databases in backticks.
|
|
@@ -77,6 +84,9 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
|
|
|
77
84
|
7. Error Handling:
|
|
78
85
|
- For errors, rewrite and retry the query.
|
|
79
86
|
- For 'Unknown column' errors, check table fields using info_sql_database_tool.
|
|
87
|
+
8. Identity and Purpose:
|
|
88
|
+
- When asked about yourself or your maker, state that you are a Data-Mind, created by MindsDB to help answer data questions.
|
|
89
|
+
- When asked about your purpose or how you can help, explore the available data sources and then explain that you can answer questions based on the connected data. Provide a few relevant example questions that you could answer for the user about their data.
|
|
80
90
|
Adhere to these guidelines for all queries and responses. Ask for clarification if needed.
|
|
81
91
|
""")
|
|
82
92
|
|
mindsdb/utilities/config.py
CHANGED
|
@@ -142,7 +142,8 @@ class Config:
|
|
|
142
142
|
},
|
|
143
143
|
'auth': {
|
|
144
144
|
'http_auth_enabled': False,
|
|
145
|
-
"http_permanent_session_lifetime": datetime.timedelta(days=31)
|
|
145
|
+
"http_permanent_session_lifetime": datetime.timedelta(days=31),
|
|
146
|
+
"username": "mindsdb"
|
|
146
147
|
},
|
|
147
148
|
"logging": {
|
|
148
149
|
"handlers": {
|
|
@@ -183,7 +184,6 @@ class Config:
|
|
|
183
184
|
},
|
|
184
185
|
"mysql": {
|
|
185
186
|
"host": api_host,
|
|
186
|
-
"password": "",
|
|
187
187
|
"port": "47335",
|
|
188
188
|
"database": "mindsdb",
|
|
189
189
|
"ssl": True,
|