MindsDB 25.2.1.2__py3-none-any.whl → 25.2.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (33) hide show
  1. {MindsDB-25.2.1.2.dist-info → MindsDB-25.2.2.1.dist-info}/METADATA +234 -230
  2. {MindsDB-25.2.1.2.dist-info → MindsDB-25.2.2.1.dist-info}/RECORD +33 -33
  3. mindsdb/__about__.py +1 -1
  4. mindsdb/api/executor/command_executor.py +1 -57
  5. mindsdb/api/executor/datahub/datanodes/system_tables.py +34 -33
  6. mindsdb/api/executor/planner/query_planner.py +7 -2
  7. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +19 -11
  8. mindsdb/api/executor/sql_query/steps/subselect_step.py +44 -2
  9. mindsdb/integrations/handlers/byom_handler/byom_handler.py +1 -1
  10. mindsdb/integrations/handlers/byom_handler/requirements.txt +1 -1
  11. mindsdb/integrations/handlers/file_handler/file_handler.py +13 -320
  12. mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +60 -156
  13. mindsdb/integrations/handlers/huggingface_handler/requirements.txt +1 -1
  14. mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +1 -1
  15. mindsdb/integrations/handlers/lancedb_handler/requirements.txt +1 -1
  16. mindsdb/integrations/handlers/lightwood_handler/requirements.txt +3 -3
  17. mindsdb/integrations/handlers/ms_one_drive_handler/ms_graph_api_one_drive_client.py +3 -3
  18. mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_tables.py +2 -20
  19. mindsdb/integrations/handlers/salesforce_handler/connection_args.py +9 -1
  20. mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +2 -1
  21. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +1 -1
  22. mindsdb/integrations/handlers/writer_handler/requirements.txt +1 -1
  23. mindsdb/integrations/utilities/files/file_reader.py +120 -61
  24. mindsdb/integrations/utilities/handlers/api_utilities/microsoft/ms_graph_api_utilities.py +1 -8
  25. mindsdb/integrations/utilities/query_traversal.py +42 -37
  26. mindsdb/interfaces/agents/langfuse_callback_handler.py +205 -27
  27. mindsdb/interfaces/file/file_controller.py +1 -1
  28. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +12 -2
  29. mindsdb/utilities/config.py +2 -2
  30. mindsdb/utilities/render/sqlalchemy_render.py +52 -19
  31. {MindsDB-25.2.1.2.dist-info → MindsDB-25.2.2.1.dist-info}/LICENSE +0 -0
  32. {MindsDB-25.2.1.2.dist-info → MindsDB-25.2.2.1.dist-info}/WHEEL +0 -0
  33. {MindsDB-25.2.1.2.dist-info → MindsDB-25.2.2.1.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  from mindsdb_sql_parser import ast
2
2
 
3
3
 
4
- def query_traversal(node, callback, is_table=False, is_target=False, parent_query=None):
4
+ def query_traversal(node, callback, is_table=False, is_target=False, parent_query=None, stack=None):
5
5
  """
6
6
  :param node: element
7
7
  :param callback: function applied to every element
@@ -26,20 +26,25 @@ def query_traversal(node, callback, is_table=False, is_target=False, parent_quer
26
26
 
27
27
  """
28
28
 
29
- res = callback(node, is_table=is_table, is_target=is_target, parent_query=parent_query)
29
+ if stack is None:
30
+ stack = []
31
+
32
+ res = callback(node, is_table=is_table, is_target=is_target, parent_query=parent_query, callstack=stack)
33
+ stack2 = [node] + stack
34
+
30
35
  if res is not None:
31
36
  # node is going to be replaced
32
37
  return res
33
38
 
34
39
  if isinstance(node, ast.Select):
35
40
  if node.from_table is not None:
36
- node_out = query_traversal(node.from_table, callback, is_table=True, parent_query=node)
41
+ node_out = query_traversal(node.from_table, callback, is_table=True, parent_query=node, stack=stack2)
37
42
  if node_out is not None:
38
43
  node.from_table = node_out
39
44
 
40
45
  array = []
41
46
  for node2 in node.targets:
42
- node_out = query_traversal(node2, callback, parent_query=node, is_target=True) or node2
47
+ node_out = query_traversal(node2, callback, parent_query=node, is_target=True, stack=stack2) or node2
43
48
  if isinstance(node_out, list):
44
49
  array.extend(node_out)
45
50
  else:
@@ -49,51 +54,51 @@ def query_traversal(node, callback, is_table=False, is_target=False, parent_quer
49
54
  if node.cte is not None:
50
55
  array = []
51
56
  for cte in node.cte:
52
- node_out = query_traversal(cte.query, callback, parent_query=node) or cte
57
+ node_out = query_traversal(cte.query, callback, parent_query=node, stack=stack2) or cte
53
58
  array.append(node_out)
54
59
  node.cte = array
55
60
 
56
61
  if node.where is not None:
57
- node_out = query_traversal(node.where, callback, parent_query=node)
62
+ node_out = query_traversal(node.where, callback, parent_query=node, stack=stack2)
58
63
  if node_out is not None:
59
64
  node.where = node_out
60
65
 
61
66
  if node.group_by is not None:
62
67
  array = []
63
68
  for node2 in node.group_by:
64
- node_out = query_traversal(node2, callback, parent_query=node) or node2
69
+ node_out = query_traversal(node2, callback, parent_query=node, stack=stack2) or node2
65
70
  array.append(node_out)
66
71
  node.group_by = array
67
72
 
68
73
  if node.having is not None:
69
- node_out = query_traversal(node.having, callback, parent_query=node)
74
+ node_out = query_traversal(node.having, callback, parent_query=node, stack=stack2)
70
75
  if node_out is not None:
71
76
  node.having = node_out
72
77
 
73
78
  if node.order_by is not None:
74
79
  array = []
75
80
  for node2 in node.order_by:
76
- node_out = query_traversal(node2, callback, parent_query=node) or node2
81
+ node_out = query_traversal(node2, callback, parent_query=node, stack=stack2) or node2
77
82
  array.append(node_out)
78
83
  node.order_by = array
79
84
 
80
85
  elif isinstance(node, (ast.Union, ast.Intersect, ast.Except)):
81
- node_out = query_traversal(node.left, callback, parent_query=node)
86
+ node_out = query_traversal(node.left, callback, parent_query=node, stack=stack2)
82
87
  if node_out is not None:
83
88
  node.left = node_out
84
- node_out = query_traversal(node.right, callback, parent_query=node)
89
+ node_out = query_traversal(node.right, callback, parent_query=node, stack=stack2)
85
90
  if node_out is not None:
86
91
  node.right = node_out
87
92
 
88
93
  elif isinstance(node, ast.Join):
89
- node_out = query_traversal(node.right, callback, is_table=True, parent_query=parent_query)
94
+ node_out = query_traversal(node.right, callback, is_table=True, parent_query=parent_query, stack=stack2)
90
95
  if node_out is not None:
91
96
  node.right = node_out
92
- node_out = query_traversal(node.left, callback, is_table=True, parent_query=parent_query)
97
+ node_out = query_traversal(node.left, callback, is_table=True, parent_query=parent_query, stack=stack2)
93
98
  if node_out is not None:
94
99
  node.left = node_out
95
100
  if node.condition is not None:
96
- node_out = query_traversal(node.condition, callback, parent_query=parent_query)
101
+ node_out = query_traversal(node.condition, callback, parent_query=parent_query, stack=stack2)
97
102
  if node_out is not None:
98
103
  node.condition = node_out
99
104
 
@@ -101,46 +106,46 @@ def query_traversal(node, callback, is_table=False, is_target=False, parent_quer
101
106
  ast.Exists, ast.NotExists)):
102
107
  array = []
103
108
  for arg in node.args:
104
- node_out = query_traversal(arg, callback, parent_query=parent_query) or arg
109
+ node_out = query_traversal(arg, callback, parent_query=parent_query, stack=stack2) or arg
105
110
  array.append(node_out)
106
111
  node.args = array
107
112
 
108
113
  if isinstance(node, ast.Function):
109
114
  if node.from_arg is not None:
110
- node_out = query_traversal(node.from_arg, callback, parent_query=parent_query)
115
+ node_out = query_traversal(node.from_arg, callback, parent_query=parent_query, stack=stack2)
111
116
  if node_out is not None:
112
117
  node.from_arg = node_out
113
118
 
114
119
  elif isinstance(node, ast.WindowFunction):
115
- query_traversal(node.function, callback, parent_query=parent_query)
120
+ query_traversal(node.function, callback, parent_query=parent_query, stack=stack2)
116
121
  if node.partition is not None:
117
122
  array = []
118
123
  for node2 in node.partition:
119
- node_out = query_traversal(node2, callback, parent_query=parent_query) or node2
124
+ node_out = query_traversal(node2, callback, parent_query=parent_query, stack=stack2) or node2
120
125
  array.append(node_out)
121
126
  node.partition = array
122
127
  if node.order_by is not None:
123
128
  array = []
124
129
  for node2 in node.order_by:
125
- node_out = query_traversal(node2, callback, parent_query=parent_query) or node2
130
+ node_out = query_traversal(node2, callback, parent_query=parent_query, stack=stack2) or node2
126
131
  array.append(node_out)
127
132
  node.order_by = array
128
133
 
129
134
  elif isinstance(node, ast.TypeCast):
130
- node_out = query_traversal(node.arg, callback, parent_query=parent_query)
135
+ node_out = query_traversal(node.arg, callback, parent_query=parent_query, stack=stack2)
131
136
  if node_out is not None:
132
137
  node.arg = node_out
133
138
 
134
139
  elif isinstance(node, ast.Tuple):
135
140
  array = []
136
141
  for node2 in node.items:
137
- node_out = query_traversal(node2, callback, parent_query=parent_query) or node2
142
+ node_out = query_traversal(node2, callback, parent_query=parent_query, stack=stack2) or node2
138
143
  array.append(node_out)
139
144
  node.items = array
140
145
 
141
146
  elif isinstance(node, ast.Insert):
142
147
  if node.table is not None:
143
- node_out = query_traversal(node.table, callback, is_table=True, parent_query=node)
148
+ node_out = query_traversal(node.table, callback, is_table=True, parent_query=node, stack=stack2)
144
149
  if node_out is not None:
145
150
  node.table = node_out
146
151
 
@@ -149,38 +154,38 @@ def query_traversal(node, callback, is_table=False, is_target=False, parent_quer
149
154
  for row in node.values:
150
155
  items = []
151
156
  for item in row:
152
- item2 = query_traversal(item, callback, parent_query=node) or item
157
+ item2 = query_traversal(item, callback, parent_query=node, stack=stack2) or item
153
158
  items.append(item2)
154
159
  rows.append(items)
155
160
  node.values = rows
156
161
 
157
162
  if node.from_select is not None:
158
- node_out = query_traversal(node.from_select, callback, parent_query=node)
163
+ node_out = query_traversal(node.from_select, callback, parent_query=node, stack=stack2)
159
164
  if node_out is not None:
160
165
  node.from_select = node_out
161
166
 
162
167
  elif isinstance(node, ast.Update):
163
168
  if node.table is not None:
164
- node_out = query_traversal(node.table, callback, is_table=True, parent_query=node)
169
+ node_out = query_traversal(node.table, callback, is_table=True, parent_query=node, stack=stack2)
165
170
  if node_out is not None:
166
171
  node.table = node_out
167
172
 
168
173
  if node.where is not None:
169
- node_out = query_traversal(node.where, callback, parent_query=node)
174
+ node_out = query_traversal(node.where, callback, parent_query=node, stack=stack2)
170
175
  if node_out is not None:
171
176
  node.where = node_out
172
177
 
173
178
  if node.update_columns is not None:
174
179
  changes = {}
175
180
  for k, v in node.update_columns.items():
176
- v2 = query_traversal(v, callback, parent_query=node)
181
+ v2 = query_traversal(v, callback, parent_query=node, stack=stack2)
177
182
  if v2 is not None:
178
183
  changes[k] = v2
179
184
  if changes:
180
185
  node.update_columns.update(changes)
181
186
 
182
187
  if node.from_select is not None:
183
- node_out = query_traversal(node.from_select, callback, parent_query=node)
188
+ node_out = query_traversal(node.from_select, callback, parent_query=node, stack=stack2)
184
189
  if node_out is not None:
185
190
  node.from_select = node_out
186
191
 
@@ -188,50 +193,50 @@ def query_traversal(node, callback, is_table=False, is_target=False, parent_quer
188
193
  array = []
189
194
  if node.columns is not None:
190
195
  for node2 in node.columns:
191
- node_out = query_traversal(node2, callback, parent_query=node) or node2
196
+ node_out = query_traversal(node2, callback, parent_query=node, stack=stack2) or node2
192
197
  array.append(node_out)
193
198
  node.columns = array
194
199
 
195
200
  if node.name is not None:
196
- node_out = query_traversal(node.name, callback, is_table=True, parent_query=node)
201
+ node_out = query_traversal(node.name, callback, is_table=True, parent_query=node, stack=stack2)
197
202
  if node_out is not None:
198
203
  node.name = node_out
199
204
 
200
205
  if node.from_select is not None:
201
- node_out = query_traversal(node.from_select, callback, parent_query=node)
206
+ node_out = query_traversal(node.from_select, callback, parent_query=node, stack=stack2)
202
207
  if node_out is not None:
203
208
  node.from_select = node_out
204
209
 
205
210
  elif isinstance(node, ast.Delete):
206
211
  if node.where is not None:
207
- node_out = query_traversal(node.where, callback, parent_query=node)
212
+ node_out = query_traversal(node.where, callback, parent_query=node, stack=stack2)
208
213
  if node_out is not None:
209
214
  node.where = node_out
210
215
 
211
216
  elif isinstance(node, ast.OrderBy):
212
217
  if node.field is not None:
213
- node_out = query_traversal(node.field, callback, parent_query=parent_query)
218
+ node_out = query_traversal(node.field, callback, parent_query=parent_query, stack=stack2)
214
219
  if node_out is not None:
215
220
  node.field = node_out
216
221
 
217
222
  elif isinstance(node, ast.Case):
218
223
  rules = []
219
224
  for condition, result in node.rules:
220
- condition2 = query_traversal(condition, callback, parent_query=parent_query)
221
- result2 = query_traversal(result, callback, parent_query=parent_query)
225
+ condition2 = query_traversal(condition, callback, parent_query=parent_query, stack=stack2)
226
+ result2 = query_traversal(result, callback, parent_query=parent_query, stack=stack2)
222
227
 
223
228
  condition = condition if condition2 is None else condition2
224
229
  result = result if result2 is None else result2
225
230
  rules.append([condition, result])
226
231
  node.rules = rules
227
- default = query_traversal(node.default, callback, parent_query=parent_query)
232
+ default = query_traversal(node.default, callback, parent_query=parent_query, stack=stack2)
228
233
  if default is not None:
229
234
  node.default = default
230
235
 
231
236
  elif isinstance(node, list):
232
237
  array = []
233
238
  for node2 in node:
234
- node_out = query_traversal(node2, callback, parent_query=parent_query) or node2
239
+ node_out = query_traversal(node2, callback, parent_query=parent_query, stack=stack2) or node2
235
240
  array.append(node_out)
236
241
  return array
237
242
 
@@ -1,6 +1,7 @@
1
1
  from typing import Any, Dict, Union, Optional, List
2
2
  from uuid import uuid4
3
3
  import datetime
4
+ import json
4
5
 
5
6
  from langchain_core.callbacks.base import BaseCallbackHandler
6
7
 
@@ -21,6 +22,10 @@ class LangfuseCallbackHandler(BaseCallbackHandler):
21
22
  # if these are not available, we generate some UUIDs
22
23
  self.trace_id = trace_id or uuid4().hex
23
24
  self.observation_id = observation_id or uuid4().hex
25
+ # Track metrics about tools and chains
26
+ self.tool_metrics = {}
27
+ self.chain_metrics = {}
28
+ self.current_chain = None
24
29
 
25
30
  def on_tool_start(
26
31
  self, serialized: Dict[str, Any], input_str: str, **kwargs: Any
@@ -30,9 +35,28 @@ class LangfuseCallbackHandler(BaseCallbackHandler):
30
35
  action_span = self.action_uuid_to_span.get(parent_run_uuid)
31
36
  if action_span is None:
32
37
  return
38
+
39
+ tool_name = serialized.get("name", "tool")
40
+ start_time = datetime.datetime.now()
41
+
42
+ # Initialize or update tool metrics
43
+ if tool_name not in self.tool_metrics:
44
+ self.tool_metrics[tool_name] = {
45
+ 'count': 0,
46
+ 'total_time': 0,
47
+ 'errors': 0,
48
+ 'last_error': None,
49
+ 'inputs': []
50
+ }
51
+
52
+ self.tool_metrics[tool_name]['count'] += 1
53
+ self.tool_metrics[tool_name]['inputs'].append(input_str)
54
+
33
55
  metadata = {
34
- 'tool_name': serialized.get("name", "tool"),
35
- 'started': datetime.datetime.now().isoformat()
56
+ 'tool_name': tool_name,
57
+ 'started': start_time.isoformat(),
58
+ 'start_timestamp': start_time.timestamp(),
59
+ 'input_length': len(input_str) if input_str else 0
36
60
  }
37
61
  action_span.update(metadata=metadata)
38
62
 
@@ -42,9 +66,25 @@ class LangfuseCallbackHandler(BaseCallbackHandler):
42
66
  action_span = self.action_uuid_to_span.get(parent_run_uuid)
43
67
  if action_span is None:
44
68
  return
69
+
70
+ end_time = datetime.datetime.now()
71
+ tool_name = action_span.metadata.get('tool_name', 'unknown')
72
+ start_timestamp = action_span.metadata.get('start_timestamp')
73
+
74
+ if start_timestamp:
75
+ duration = end_time.timestamp() - start_timestamp
76
+ if tool_name in self.tool_metrics:
77
+ self.tool_metrics[tool_name]['total_time'] += duration
78
+
79
+ metadata = {
80
+ 'finished': end_time.isoformat(),
81
+ 'duration_seconds': duration if start_timestamp else None,
82
+ 'output_length': len(output) if output else 0
83
+ }
84
+
45
85
  action_span.update(
46
86
  output=output, # tool output is action output (unless superseded by a global action output)
47
- metadata={'finished': datetime.datetime.now().isoformat()}
87
+ metadata=metadata
48
88
  )
49
89
 
50
90
  def on_tool_error(
@@ -55,75 +95,213 @@ class LangfuseCallbackHandler(BaseCallbackHandler):
55
95
  action_span = self.action_uuid_to_span.get(parent_run_uuid)
56
96
  if action_span is None:
57
97
  return
98
+
58
99
  try:
59
100
  error_str = str(error)
60
101
  except Exception:
61
102
  error_str = "Couldn't get error string."
62
- action_span.update(metadata={'error_description': error_str})
103
+
104
+ tool_name = action_span.metadata.get('tool_name', 'unknown')
105
+ if tool_name in self.tool_metrics:
106
+ self.tool_metrics[tool_name]['errors'] += 1
107
+ self.tool_metrics[tool_name]['last_error'] = error_str
108
+
109
+ metadata = {
110
+ 'error_description': error_str,
111
+ 'error_type': error.__class__.__name__,
112
+ 'error_time': datetime.datetime.now().isoformat()
113
+ }
114
+ action_span.update(metadata=metadata)
63
115
 
64
116
  def on_chain_start(
65
117
  self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
66
118
  ) -> Any:
67
119
  """Run when chain starts running."""
120
+ if self.langfuse is None:
121
+ return
122
+
68
123
  run_uuid = kwargs.get('run_id', uuid4()).hex
69
124
 
70
125
  if serialized is None:
71
126
  serialized = {}
72
127
 
73
- chain_span = self.langfuse.span(
74
- name=f'{serialized.get("name", "chain")}-{run_uuid}',
75
- trace_id=self.trace_id,
76
- parent_observation_id=self.observation_id,
77
- input=str(inputs)
78
- )
79
- self.chain_uuid_to_span[run_uuid] = chain_span
128
+ chain_name = serialized.get("name", "chain")
129
+ start_time = datetime.datetime.now()
130
+
131
+ # Initialize or update chain metrics
132
+ if chain_name not in self.chain_metrics:
133
+ self.chain_metrics[chain_name] = {
134
+ 'count': 0,
135
+ 'total_time': 0,
136
+ 'errors': 0,
137
+ 'last_error': None
138
+ }
139
+
140
+ self.chain_metrics[chain_name]['count'] += 1
141
+ self.current_chain = chain_name
142
+
143
+ try:
144
+ chain_span = self.langfuse.span(
145
+ name=f'{chain_name}-{run_uuid}',
146
+ trace_id=self.trace_id,
147
+ parent_observation_id=self.observation_id,
148
+ input=json.dumps(inputs, indent=2)
149
+ )
150
+
151
+ metadata = {
152
+ 'chain_name': chain_name,
153
+ 'started': start_time.isoformat(),
154
+ 'start_timestamp': start_time.timestamp(),
155
+ 'input_keys': list(inputs.keys()) if isinstance(inputs, dict) else None,
156
+ 'input_size': len(inputs) if isinstance(inputs, dict) else len(str(inputs))
157
+ }
158
+ chain_span.update(metadata=metadata)
159
+ self.chain_uuid_to_span[run_uuid] = chain_span
160
+ except Exception as e:
161
+ logger.warning(f"Error creating Langfuse span: {str(e)}")
80
162
 
81
163
  def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> Any:
82
164
  """Run when chain ends running."""
165
+ if self.langfuse is None:
166
+ return
167
+
83
168
  chain_uuid = kwargs.get('run_id', uuid4()).hex
84
169
  if chain_uuid not in self.chain_uuid_to_span:
85
170
  return
86
171
  chain_span = self.chain_uuid_to_span.pop(chain_uuid)
87
172
  if chain_span is None:
88
173
  return
89
- chain_span.update(output=str(outputs))
90
- chain_span.end()
174
+
175
+ try:
176
+ end_time = datetime.datetime.now()
177
+ chain_name = chain_span.metadata.get('chain_name', 'unknown')
178
+ start_timestamp = chain_span.metadata.get('start_timestamp')
179
+
180
+ if start_timestamp and chain_name in self.chain_metrics:
181
+ duration = end_time.timestamp() - start_timestamp
182
+ self.chain_metrics[chain_name]['total_time'] += duration
183
+
184
+ metadata = {
185
+ 'finished': end_time.isoformat(),
186
+ 'duration_seconds': duration if start_timestamp else None,
187
+ 'output_keys': list(outputs.keys()) if isinstance(outputs, dict) else None,
188
+ 'output_size': len(outputs) if isinstance(outputs, dict) else len(str(outputs))
189
+ }
190
+ chain_span.update(output=json.dumps(outputs, indent=2), metadata=metadata)
191
+ chain_span.end()
192
+ except Exception as e:
193
+ logger.warning(f"Error updating Langfuse span: {str(e)}")
91
194
 
92
195
  def on_chain_error(self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any) -> Any:
93
196
  """Run when chain errors."""
94
- # Do nothing for now.
95
- pass
197
+ chain_uuid = kwargs.get('run_id', uuid4()).hex
198
+ if chain_uuid not in self.chain_uuid_to_span:
199
+ return
200
+ chain_span = self.chain_uuid_to_span.get(chain_uuid)
201
+ if chain_span is None:
202
+ return
203
+
204
+ try:
205
+ error_str = str(error)
206
+ except Exception:
207
+ error_str = "Couldn't get error string."
208
+
209
+ chain_name = chain_span.metadata.get('chain_name', 'unknown')
210
+ if chain_name in self.chain_metrics:
211
+ self.chain_metrics[chain_name]['errors'] += 1
212
+ self.chain_metrics[chain_name]['last_error'] = error_str
213
+
214
+ metadata = {
215
+ 'error_description': error_str,
216
+ 'error_type': error.__class__.__name__,
217
+ 'error_time': datetime.datetime.now().isoformat()
218
+ }
219
+ chain_span.update(metadata=metadata)
96
220
 
97
221
  def on_agent_action(self, action, **kwargs: Any) -> Any:
98
222
  """Run on agent action."""
99
- # Do nothing for now.
223
+ if self.langfuse is None:
224
+ return
225
+
100
226
  run_uuid = kwargs.get('run_id', uuid4()).hex
101
- action_span = self.langfuse.span(
102
- name=f'{getattr(action, "type", "action")}-{getattr(action, "tool", "")}-{run_uuid}',
103
- trace_id=self.trace_id,
104
- parent_observation_id=self.observation_id,
105
- input=str(action)
106
- )
107
- self.action_uuid_to_span[run_uuid] = action_span
227
+ try:
228
+ action_span = self.langfuse.span(
229
+ name=f'{getattr(action, "type", "action")}-{getattr(action, "tool", "")}-{run_uuid}',
230
+ trace_id=self.trace_id,
231
+ parent_observation_id=self.observation_id,
232
+ input=str(action)
233
+ )
234
+ self.action_uuid_to_span[run_uuid] = action_span
235
+ except Exception as e:
236
+ logger.warning(f"Error creating Langfuse span for agent action: {str(e)}")
108
237
 
109
238
  def on_agent_finish(self, finish, **kwargs: Any) -> Any:
110
239
  """Run on agent end."""
111
- # Do nothing for now.
240
+ if self.langfuse is None:
241
+ return
242
+
112
243
  run_uuid = kwargs.get('run_id', uuid4()).hex
113
244
  if run_uuid not in self.action_uuid_to_span:
114
245
  return
115
246
  action_span = self.action_uuid_to_span.pop(run_uuid)
116
247
  if action_span is None:
117
248
  return
118
- if finish is not None:
119
- action_span.update(output=finish) # supersedes tool output
120
- action_span.end()
249
+
250
+ try:
251
+ if finish is not None:
252
+ action_span.update(output=finish) # supersedes tool output
253
+ action_span.end()
254
+ except Exception as e:
255
+ logger.warning(f"Error updating Langfuse span: {str(e)}")
121
256
 
122
257
  def auth_check(self):
123
258
  if self.langfuse is not None:
124
259
  return self.langfuse.auth_check()
125
260
  return False
126
261
 
262
+ def get_metrics(self) -> Dict[str, Any]:
263
+ """Get collected metrics about tools and chains.
264
+
265
+ Returns:
266
+ Dict containing:
267
+ - tool_metrics: Statistics about tool usage, errors, and timing
268
+ - chain_metrics: Statistics about chain execution, errors, and timing
269
+ For each tool/chain, includes:
270
+ - count: Number of times used
271
+ - total_time: Total execution time
272
+ - errors: Number of errors
273
+ - last_error: Most recent error message
274
+ - avg_duration: Average execution time
275
+ """
276
+ metrics = {
277
+ 'tool_metrics': {},
278
+ 'chain_metrics': {}
279
+ }
280
+
281
+ # Process tool metrics
282
+ for tool_name, data in self.tool_metrics.items():
283
+ metrics['tool_metrics'][tool_name] = {
284
+ 'count': data['count'],
285
+ 'total_time': data['total_time'],
286
+ 'avg_duration': data['total_time'] / data['count'] if data['count'] > 0 else 0,
287
+ 'errors': data['errors'],
288
+ 'last_error': data['last_error'],
289
+ 'error_rate': data['errors'] / data['count'] if data['count'] > 0 else 0
290
+ }
291
+
292
+ # Process chain metrics
293
+ for chain_name, data in self.chain_metrics.items():
294
+ metrics['chain_metrics'][chain_name] = {
295
+ 'count': data['count'],
296
+ 'total_time': data['total_time'],
297
+ 'avg_duration': data['total_time'] / data['count'] if data['count'] > 0 else 0,
298
+ 'errors': data['errors'],
299
+ 'last_error': data['last_error'],
300
+ 'error_rate': data['errors'] / data['count'] if data['count'] > 0 else 0
301
+ }
302
+
303
+ return metrics
304
+
127
305
 
128
306
  def get_skills(agent: db.Agents) -> List:
129
307
  """ Retrieve skills from agent `skills` attribute. Specific to agent endpoints. """
@@ -82,7 +82,7 @@ class FileController:
82
82
 
83
83
  file_dir = None
84
84
  try:
85
- df, _col_map = FileHandler._handle_source(file_path)
85
+ df = FileHandler.handle_source(file_path)
86
86
 
87
87
  ds_meta = {"row_count": len(df), "column_names": list(df.columns)}
88
88
 
@@ -1,5 +1,6 @@
1
1
  from typing import List
2
2
  from textwrap import dedent
3
+ from datetime import datetime
3
4
 
4
5
  from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
5
6
  from langchain_community.tools import ListSQLDatabaseTool, InfoSQLDatabaseTool, QuerySQLDataBaseTool
@@ -11,6 +12,9 @@ from mindsdb.interfaces.skills.custom.text2sql.mindsdb_sql_tool import MindsDBSQ
11
12
  class MindsDBSQLToolkit(SQLDatabaseToolkit):
12
13
 
13
14
  def get_tools(self, prefix='') -> List[BaseTool]:
15
+
16
+ current_date_time = datetime.now().strftime("%Y-%m-%d %H:%M")
17
+
14
18
  """Get the tools in the toolkit."""
15
19
  list_sql_database_tool = ListSQLDatabaseTool(
16
20
  name=f'sql_db_list_tables{prefix}',
@@ -45,8 +49,9 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
45
49
  Output: Database result or error message. For errors, rewrite and retry the query. For 'Unknown column' errors, use '{info_sql_database_tool.name}' to check table fields.
46
50
  This system is a highly intelligent and reliable PostgreSQL SQL skill designed to work with databases.
47
51
  Follow these instructions with utmost precision:
48
- 1. Query Output Format:
49
- - Always return results in well-formatted **Markdown tables**.
52
+ 1. Final Response Format:
53
+ - Assume the frontend fully supports Markdown unless the user specifies otherwise.
54
+ - When the response contains data that fits a table format, present it as a properly formatted Markdown table
50
55
  - Ensure clarity and proper structure for easy readability.
51
56
  2. Sample Data:
52
57
  - Before answering a question, if you don't have sample data about a table, **always** get sample data using `SELECT * FROM table LIMIT 3` from the tables you believe are relevant to formulating your answers.
@@ -59,6 +64,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
59
64
  - **Always** inform the user of the total number of results available and specify that you are providing the first 10 results.
60
65
  - Let the user know they can request additional results and/or specify how they would like the results ordered or grouped.
61
66
  5. Date Handling:
67
+ - **System current date and time: {current_date_time} (UTC or local timezone based on server settings).**
62
68
  - **Always** use PostgreSQL-compatible `CURRENT_DATE` or `NOW()` functions when working with dates—never assume or guess the current date.
63
69
  - For any date-related comparisons in the query, *always* ensure that your query casts the column being compared using `column_name::DATE [operator] ..`
64
70
  - Do not compare date values without casting columns to date.
@@ -67,6 +73,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
67
73
  SELECT NOW() - INTERVAL 3 HOUR;
68
74
  SELECT NOW() + INTERVAL 2 MONTH + INTERVAL 3 DAY;
69
75
  SELECT NOW() - INTERVAL 1 YEAR;
76
+ - Always run SELECT NOW() to retrieve the current date when answering current or relative to current date-related questions.
70
77
  6. Query Best Practices:
71
78
  - Always send only one query at a time.
72
79
  - Always enclose the names of tables, schemas, and databases in backticks.
@@ -77,6 +84,9 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
77
84
  7. Error Handling:
78
85
  - For errors, rewrite and retry the query.
79
86
  - For 'Unknown column' errors, check table fields using info_sql_database_tool.
87
+ 8. Identity and Purpose:
88
+ - When asked about yourself or your maker, state that you are a Data-Mind, created by MindsDB to help answer data questions.
89
+ - When asked about your purpose or how you can help, explore the available data sources and then explain that you can answer questions based on the connected data. Provide a few relevant example questions that you could answer for the user about their data.
80
90
  Adhere to these guidelines for all queries and responses. Ask for clarification if needed.
81
91
  """)
82
92
 
@@ -142,7 +142,8 @@ class Config:
142
142
  },
143
143
  'auth': {
144
144
  'http_auth_enabled': False,
145
- "http_permanent_session_lifetime": datetime.timedelta(days=31)
145
+ "http_permanent_session_lifetime": datetime.timedelta(days=31),
146
+ "username": "mindsdb"
146
147
  },
147
148
  "logging": {
148
149
  "handlers": {
@@ -183,7 +184,6 @@ class Config:
183
184
  },
184
185
  "mysql": {
185
186
  "host": api_host,
186
- "password": "",
187
187
  "port": "47335",
188
188
  "database": "mindsdb",
189
189
  "ssl": True,