MindsDB 25.2.1.2__py3-none-any.whl → 25.2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (23) hide show
  1. {MindsDB-25.2.1.2.dist-info → MindsDB-25.2.2.0.dist-info}/METADATA +223 -223
  2. {MindsDB-25.2.1.2.dist-info → MindsDB-25.2.2.0.dist-info}/RECORD +23 -23
  3. mindsdb/__about__.py +1 -1
  4. mindsdb/api/executor/command_executor.py +0 -56
  5. mindsdb/api/executor/planner/query_planner.py +7 -2
  6. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +19 -11
  7. mindsdb/api/executor/sql_query/steps/subselect_step.py +44 -2
  8. mindsdb/integrations/handlers/file_handler/file_handler.py +13 -320
  9. mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +60 -156
  10. mindsdb/integrations/handlers/ms_one_drive_handler/ms_graph_api_one_drive_client.py +3 -3
  11. mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_tables.py +2 -20
  12. mindsdb/integrations/handlers/salesforce_handler/connection_args.py +9 -1
  13. mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +2 -1
  14. mindsdb/integrations/utilities/files/file_reader.py +120 -61
  15. mindsdb/integrations/utilities/handlers/api_utilities/microsoft/ms_graph_api_utilities.py +1 -8
  16. mindsdb/integrations/utilities/query_traversal.py +42 -37
  17. mindsdb/interfaces/agents/langfuse_callback_handler.py +205 -27
  18. mindsdb/interfaces/file/file_controller.py +1 -1
  19. mindsdb/utilities/config.py +2 -2
  20. mindsdb/utilities/render/sqlalchemy_render.py +52 -19
  21. {MindsDB-25.2.1.2.dist-info → MindsDB-25.2.2.0.dist-info}/LICENSE +0 -0
  22. {MindsDB-25.2.1.2.dist-info → MindsDB-25.2.2.0.dist-info}/WHEEL +0 -0
  23. {MindsDB-25.2.1.2.dist-info → MindsDB-25.2.2.0.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,7 @@
1
1
  from typing import Any, Dict, Union, Optional, List
2
2
  from uuid import uuid4
3
3
  import datetime
4
+ import json
4
5
 
5
6
  from langchain_core.callbacks.base import BaseCallbackHandler
6
7
 
@@ -21,6 +22,10 @@ class LangfuseCallbackHandler(BaseCallbackHandler):
21
22
  # if these are not available, we generate some UUIDs
22
23
  self.trace_id = trace_id or uuid4().hex
23
24
  self.observation_id = observation_id or uuid4().hex
25
+ # Track metrics about tools and chains
26
+ self.tool_metrics = {}
27
+ self.chain_metrics = {}
28
+ self.current_chain = None
24
29
 
25
30
  def on_tool_start(
26
31
  self, serialized: Dict[str, Any], input_str: str, **kwargs: Any
@@ -30,9 +35,28 @@ class LangfuseCallbackHandler(BaseCallbackHandler):
30
35
  action_span = self.action_uuid_to_span.get(parent_run_uuid)
31
36
  if action_span is None:
32
37
  return
38
+
39
+ tool_name = serialized.get("name", "tool")
40
+ start_time = datetime.datetime.now()
41
+
42
+ # Initialize or update tool metrics
43
+ if tool_name not in self.tool_metrics:
44
+ self.tool_metrics[tool_name] = {
45
+ 'count': 0,
46
+ 'total_time': 0,
47
+ 'errors': 0,
48
+ 'last_error': None,
49
+ 'inputs': []
50
+ }
51
+
52
+ self.tool_metrics[tool_name]['count'] += 1
53
+ self.tool_metrics[tool_name]['inputs'].append(input_str)
54
+
33
55
  metadata = {
34
- 'tool_name': serialized.get("name", "tool"),
35
- 'started': datetime.datetime.now().isoformat()
56
+ 'tool_name': tool_name,
57
+ 'started': start_time.isoformat(),
58
+ 'start_timestamp': start_time.timestamp(),
59
+ 'input_length': len(input_str) if input_str else 0
36
60
  }
37
61
  action_span.update(metadata=metadata)
38
62
 
@@ -42,9 +66,25 @@ class LangfuseCallbackHandler(BaseCallbackHandler):
42
66
  action_span = self.action_uuid_to_span.get(parent_run_uuid)
43
67
  if action_span is None:
44
68
  return
69
+
70
+ end_time = datetime.datetime.now()
71
+ tool_name = action_span.metadata.get('tool_name', 'unknown')
72
+ start_timestamp = action_span.metadata.get('start_timestamp')
73
+
74
+ if start_timestamp:
75
+ duration = end_time.timestamp() - start_timestamp
76
+ if tool_name in self.tool_metrics:
77
+ self.tool_metrics[tool_name]['total_time'] += duration
78
+
79
+ metadata = {
80
+ 'finished': end_time.isoformat(),
81
+ 'duration_seconds': duration if start_timestamp else None,
82
+ 'output_length': len(output) if output else 0
83
+ }
84
+
45
85
  action_span.update(
46
86
  output=output, # tool output is action output (unless superseded by a global action output)
47
- metadata={'finished': datetime.datetime.now().isoformat()}
87
+ metadata=metadata
48
88
  )
49
89
 
50
90
  def on_tool_error(
@@ -55,75 +95,213 @@ class LangfuseCallbackHandler(BaseCallbackHandler):
55
95
  action_span = self.action_uuid_to_span.get(parent_run_uuid)
56
96
  if action_span is None:
57
97
  return
98
+
58
99
  try:
59
100
  error_str = str(error)
60
101
  except Exception:
61
102
  error_str = "Couldn't get error string."
62
- action_span.update(metadata={'error_description': error_str})
103
+
104
+ tool_name = action_span.metadata.get('tool_name', 'unknown')
105
+ if tool_name in self.tool_metrics:
106
+ self.tool_metrics[tool_name]['errors'] += 1
107
+ self.tool_metrics[tool_name]['last_error'] = error_str
108
+
109
+ metadata = {
110
+ 'error_description': error_str,
111
+ 'error_type': error.__class__.__name__,
112
+ 'error_time': datetime.datetime.now().isoformat()
113
+ }
114
+ action_span.update(metadata=metadata)
63
115
 
64
116
  def on_chain_start(
65
117
  self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
66
118
  ) -> Any:
67
119
  """Run when chain starts running."""
120
+ if self.langfuse is None:
121
+ return
122
+
68
123
  run_uuid = kwargs.get('run_id', uuid4()).hex
69
124
 
70
125
  if serialized is None:
71
126
  serialized = {}
72
127
 
73
- chain_span = self.langfuse.span(
74
- name=f'{serialized.get("name", "chain")}-{run_uuid}',
75
- trace_id=self.trace_id,
76
- parent_observation_id=self.observation_id,
77
- input=str(inputs)
78
- )
79
- self.chain_uuid_to_span[run_uuid] = chain_span
128
+ chain_name = serialized.get("name", "chain")
129
+ start_time = datetime.datetime.now()
130
+
131
+ # Initialize or update chain metrics
132
+ if chain_name not in self.chain_metrics:
133
+ self.chain_metrics[chain_name] = {
134
+ 'count': 0,
135
+ 'total_time': 0,
136
+ 'errors': 0,
137
+ 'last_error': None
138
+ }
139
+
140
+ self.chain_metrics[chain_name]['count'] += 1
141
+ self.current_chain = chain_name
142
+
143
+ try:
144
+ chain_span = self.langfuse.span(
145
+ name=f'{chain_name}-{run_uuid}',
146
+ trace_id=self.trace_id,
147
+ parent_observation_id=self.observation_id,
148
+ input=json.dumps(inputs, indent=2)
149
+ )
150
+
151
+ metadata = {
152
+ 'chain_name': chain_name,
153
+ 'started': start_time.isoformat(),
154
+ 'start_timestamp': start_time.timestamp(),
155
+ 'input_keys': list(inputs.keys()) if isinstance(inputs, dict) else None,
156
+ 'input_size': len(inputs) if isinstance(inputs, dict) else len(str(inputs))
157
+ }
158
+ chain_span.update(metadata=metadata)
159
+ self.chain_uuid_to_span[run_uuid] = chain_span
160
+ except Exception as e:
161
+ logger.warning(f"Error creating Langfuse span: {str(e)}")
80
162
 
81
163
  def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> Any:
82
164
  """Run when chain ends running."""
165
+ if self.langfuse is None:
166
+ return
167
+
83
168
  chain_uuid = kwargs.get('run_id', uuid4()).hex
84
169
  if chain_uuid not in self.chain_uuid_to_span:
85
170
  return
86
171
  chain_span = self.chain_uuid_to_span.pop(chain_uuid)
87
172
  if chain_span is None:
88
173
  return
89
- chain_span.update(output=str(outputs))
90
- chain_span.end()
174
+
175
+ try:
176
+ end_time = datetime.datetime.now()
177
+ chain_name = chain_span.metadata.get('chain_name', 'unknown')
178
+ start_timestamp = chain_span.metadata.get('start_timestamp')
179
+
180
+ if start_timestamp and chain_name in self.chain_metrics:
181
+ duration = end_time.timestamp() - start_timestamp
182
+ self.chain_metrics[chain_name]['total_time'] += duration
183
+
184
+ metadata = {
185
+ 'finished': end_time.isoformat(),
186
+ 'duration_seconds': duration if start_timestamp else None,
187
+ 'output_keys': list(outputs.keys()) if isinstance(outputs, dict) else None,
188
+ 'output_size': len(outputs) if isinstance(outputs, dict) else len(str(outputs))
189
+ }
190
+ chain_span.update(output=json.dumps(outputs, indent=2), metadata=metadata)
191
+ chain_span.end()
192
+ except Exception as e:
193
+ logger.warning(f"Error updating Langfuse span: {str(e)}")
91
194
 
92
195
  def on_chain_error(self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any) -> Any:
93
196
  """Run when chain errors."""
94
- # Do nothing for now.
95
- pass
197
+ chain_uuid = kwargs.get('run_id', uuid4()).hex
198
+ if chain_uuid not in self.chain_uuid_to_span:
199
+ return
200
+ chain_span = self.chain_uuid_to_span.get(chain_uuid)
201
+ if chain_span is None:
202
+ return
203
+
204
+ try:
205
+ error_str = str(error)
206
+ except Exception:
207
+ error_str = "Couldn't get error string."
208
+
209
+ chain_name = chain_span.metadata.get('chain_name', 'unknown')
210
+ if chain_name in self.chain_metrics:
211
+ self.chain_metrics[chain_name]['errors'] += 1
212
+ self.chain_metrics[chain_name]['last_error'] = error_str
213
+
214
+ metadata = {
215
+ 'error_description': error_str,
216
+ 'error_type': error.__class__.__name__,
217
+ 'error_time': datetime.datetime.now().isoformat()
218
+ }
219
+ chain_span.update(metadata=metadata)
96
220
 
97
221
  def on_agent_action(self, action, **kwargs: Any) -> Any:
98
222
  """Run on agent action."""
99
- # Do nothing for now.
223
+ if self.langfuse is None:
224
+ return
225
+
100
226
  run_uuid = kwargs.get('run_id', uuid4()).hex
101
- action_span = self.langfuse.span(
102
- name=f'{getattr(action, "type", "action")}-{getattr(action, "tool", "")}-{run_uuid}',
103
- trace_id=self.trace_id,
104
- parent_observation_id=self.observation_id,
105
- input=str(action)
106
- )
107
- self.action_uuid_to_span[run_uuid] = action_span
227
+ try:
228
+ action_span = self.langfuse.span(
229
+ name=f'{getattr(action, "type", "action")}-{getattr(action, "tool", "")}-{run_uuid}',
230
+ trace_id=self.trace_id,
231
+ parent_observation_id=self.observation_id,
232
+ input=str(action)
233
+ )
234
+ self.action_uuid_to_span[run_uuid] = action_span
235
+ except Exception as e:
236
+ logger.warning(f"Error creating Langfuse span for agent action: {str(e)}")
108
237
 
109
238
  def on_agent_finish(self, finish, **kwargs: Any) -> Any:
110
239
  """Run on agent end."""
111
- # Do nothing for now.
240
+ if self.langfuse is None:
241
+ return
242
+
112
243
  run_uuid = kwargs.get('run_id', uuid4()).hex
113
244
  if run_uuid not in self.action_uuid_to_span:
114
245
  return
115
246
  action_span = self.action_uuid_to_span.pop(run_uuid)
116
247
  if action_span is None:
117
248
  return
118
- if finish is not None:
119
- action_span.update(output=finish) # supersedes tool output
120
- action_span.end()
249
+
250
+ try:
251
+ if finish is not None:
252
+ action_span.update(output=finish) # supersedes tool output
253
+ action_span.end()
254
+ except Exception as e:
255
+ logger.warning(f"Error updating Langfuse span: {str(e)}")
121
256
 
122
257
  def auth_check(self):
123
258
  if self.langfuse is not None:
124
259
  return self.langfuse.auth_check()
125
260
  return False
126
261
 
262
+ def get_metrics(self) -> Dict[str, Any]:
263
+ """Get collected metrics about tools and chains.
264
+
265
+ Returns:
266
+ Dict containing:
267
+ - tool_metrics: Statistics about tool usage, errors, and timing
268
+ - chain_metrics: Statistics about chain execution, errors, and timing
269
+ For each tool/chain, includes:
270
+ - count: Number of times used
271
+ - total_time: Total execution time
272
+ - errors: Number of errors
273
+ - last_error: Most recent error message
274
+ - avg_duration: Average execution time
275
+ """
276
+ metrics = {
277
+ 'tool_metrics': {},
278
+ 'chain_metrics': {}
279
+ }
280
+
281
+ # Process tool metrics
282
+ for tool_name, data in self.tool_metrics.items():
283
+ metrics['tool_metrics'][tool_name] = {
284
+ 'count': data['count'],
285
+ 'total_time': data['total_time'],
286
+ 'avg_duration': data['total_time'] / data['count'] if data['count'] > 0 else 0,
287
+ 'errors': data['errors'],
288
+ 'last_error': data['last_error'],
289
+ 'error_rate': data['errors'] / data['count'] if data['count'] > 0 else 0
290
+ }
291
+
292
+ # Process chain metrics
293
+ for chain_name, data in self.chain_metrics.items():
294
+ metrics['chain_metrics'][chain_name] = {
295
+ 'count': data['count'],
296
+ 'total_time': data['total_time'],
297
+ 'avg_duration': data['total_time'] / data['count'] if data['count'] > 0 else 0,
298
+ 'errors': data['errors'],
299
+ 'last_error': data['last_error'],
300
+ 'error_rate': data['errors'] / data['count'] if data['count'] > 0 else 0
301
+ }
302
+
303
+ return metrics
304
+
127
305
 
128
306
  def get_skills(agent: db.Agents) -> List:
129
307
  """ Retrieve skills from agent `skills` attribute. Specific to agent endpoints. """
@@ -82,7 +82,7 @@ class FileController:
82
82
 
83
83
  file_dir = None
84
84
  try:
85
- df, _col_map = FileHandler._handle_source(file_path)
85
+ df = FileHandler.handle_source(file_path)
86
86
 
87
87
  ds_meta = {"row_count": len(df), "column_names": list(df.columns)}
88
88
 
@@ -142,7 +142,8 @@ class Config:
142
142
  },
143
143
  'auth': {
144
144
  'http_auth_enabled': False,
145
- "http_permanent_session_lifetime": datetime.timedelta(days=31)
145
+ "http_permanent_session_lifetime": datetime.timedelta(days=31),
146
+ "username": "mindsdb"
146
147
  },
147
148
  "logging": {
148
149
  "handlers": {
@@ -183,7 +184,6 @@ class Config:
183
184
  },
184
185
  "mysql": {
185
186
  "host": api_host,
186
- "password": "",
187
187
  "port": "47335",
188
188
  "database": "mindsdb",
189
189
  "ssl": True,
@@ -54,6 +54,23 @@ def _compile_interval(element, compiler, **kw):
54
54
  return "INTERVAL " + args
55
55
 
56
56
 
57
+ class AttributedStr(str):
58
+ """
59
+ Custom str-like object to pass it to `_requires_quotes` method with `is_quoted` flag
60
+ """
61
+ def __new__(cls, string, is_quoted: bool):
62
+ obj = str.__new__(cls, string)
63
+ obj.is_quoted = is_quoted
64
+ return obj
65
+
66
+
67
+ def get_is_quoted(identifier: ast.Identifier):
68
+ quoted = getattr(identifier, 'is_quoted', [])
69
+ # len can be different
70
+ quoted = quoted + [None] * (len(identifier.parts) - len(quoted))
71
+ return quoted
72
+
73
+
57
74
  class SqlalchemyRender:
58
75
 
59
76
  def __init__(self, dialect_name):
@@ -72,6 +89,29 @@ class SqlalchemyRender:
72
89
  else:
73
90
  dialect = dialect_name
74
91
 
92
+ # override dialect's preparer
93
+ if hasattr(dialect, 'preparer'):
94
+ class Preparer(dialect.preparer):
95
+
96
+ def __init__(self, *args, **kwargs):
97
+ super().__init__(*args, **kwargs)
98
+
99
+ def _requires_quotes(self, value: str) -> bool:
100
+ # check force-quote flag
101
+ if isinstance(value, AttributedStr):
102
+ if value.is_quoted:
103
+ return True
104
+
105
+ lc_value = value.lower()
106
+ return (
107
+ lc_value in self.reserved_words
108
+ or value[0] in self.illegal_initial_characters
109
+ or not self.legal_characters.match(str(value))
110
+ # Override sqlalchemy behavior: don't require to quote mixed- or upper-case
111
+ # or (lc_value != value)
112
+ )
113
+ dialect.preparer = Preparer
114
+
75
115
  # remove double percent signs
76
116
  # https://docs.sqlalchemy.org/en/14/faq/sqlexpressions.html#why-are-percent-signs-being-doubled-up-when-stringifying-sql-statements
77
117
  self.dialect = dialect(paramstyle="named")
@@ -90,26 +130,16 @@ class SqlalchemyRender:
90
130
 
91
131
  parts2 = []
92
132
 
93
- quoted = getattr(identifier, 'is_quoted', [])
94
- # len can be different
95
- quoted = quoted + [None] * (len(identifier.parts) - len(quoted))
96
-
133
+ quoted = get_is_quoted(identifier)
97
134
  for i, is_quoted in zip(identifier.parts, quoted):
98
135
  if isinstance(i, ast.Star):
99
136
  part = '*'
100
- elif is_quoted:
101
- part = self.dialect.identifier_preparer.quote(i)
137
+ elif is_quoted or i.lower() in RESERVED_WORDS:
138
+ # quote anyway
139
+ part = self.dialect.identifier_preparer.quote_identifier(i)
102
140
  else:
103
- part = str(sa.column(i).compile(dialect=self.dialect))
104
-
105
- if not i.islower():
106
- # if lower value is not quoted
107
- # then it is quoted only because of mixed case
108
- # in that case use origin string
109
-
110
- part_lower = str(sa.column(i.lower()).compile(dialect=self.dialect))
111
- if part.lower() != part_lower and i.lower() not in RESERVED_WORDS:
112
- part = i
141
+ # quote if required
142
+ part = self.dialect.identifier_preparer.quote(i)
113
143
 
114
144
  parts2.append(part)
115
145
 
@@ -120,7 +150,9 @@ class SqlalchemyRender:
120
150
  return None
121
151
  if len(alias.parts) > 1:
122
152
  raise NotImplementedError(f'Multiple alias {alias.parts}')
123
- return alias.parts[0]
153
+
154
+ is_quoted = get_is_quoted(alias)[0]
155
+ return AttributedStr(alias.parts[0], is_quoted)
124
156
 
125
157
  def to_expression(self, t):
126
158
 
@@ -435,15 +467,16 @@ class SqlalchemyRender:
435
467
  schema = None
436
468
  if isinstance(table_name, ast.Identifier):
437
469
  parts = table_name.parts
470
+ quoted = get_is_quoted(table_name)
438
471
 
439
472
  if len(parts) > 2:
440
473
  # TODO tests is failing
441
474
  raise NotImplementedError(f'Path to long: {table_name.parts}')
442
475
 
443
476
  if len(parts) == 2:
444
- schema = parts[-2]
477
+ schema = AttributedStr(parts[-2], quoted[-2])
445
478
 
446
- table_name = parts[-1]
479
+ table_name = AttributedStr(parts[-1], quoted[-1])
447
480
 
448
481
  return schema, table_name
449
482