MindsDB 25.7.1.0__py3-none-any.whl → 25.7.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (38) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +54 -95
  3. mindsdb/api/a2a/agent.py +30 -206
  4. mindsdb/api/a2a/common/server/server.py +26 -27
  5. mindsdb/api/a2a/task_manager.py +93 -227
  6. mindsdb/api/a2a/utils.py +21 -0
  7. mindsdb/api/executor/command_executor.py +7 -2
  8. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +5 -1
  9. mindsdb/api/executor/utilities/sql.py +97 -21
  10. mindsdb/api/http/namespaces/agents.py +127 -202
  11. mindsdb/api/http/namespaces/config.py +12 -1
  12. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +11 -1
  13. mindsdb/integrations/handlers/llama_index_handler/requirements.txt +1 -1
  14. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +94 -1
  15. mindsdb/integrations/handlers/s3_handler/s3_handler.py +72 -70
  16. mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +4 -3
  17. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +12 -3
  18. mindsdb/integrations/handlers/slack_handler/slack_tables.py +141 -161
  19. mindsdb/integrations/handlers/youtube_handler/youtube_tables.py +183 -55
  20. mindsdb/integrations/libs/keyword_search_base.py +41 -0
  21. mindsdb/integrations/libs/vectordatabase_handler.py +35 -14
  22. mindsdb/integrations/utilities/sql_utils.py +11 -0
  23. mindsdb/interfaces/agents/agents_controller.py +2 -2
  24. mindsdb/interfaces/data_catalog/data_catalog_loader.py +18 -4
  25. mindsdb/interfaces/database/projects.py +1 -3
  26. mindsdb/interfaces/functions/controller.py +54 -64
  27. mindsdb/interfaces/functions/to_markdown.py +47 -14
  28. mindsdb/interfaces/knowledge_base/controller.py +134 -35
  29. mindsdb/interfaces/knowledge_base/evaluate.py +53 -10
  30. mindsdb/interfaces/knowledge_base/llm_client.py +3 -3
  31. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +21 -13
  32. mindsdb/utilities/config.py +46 -39
  33. mindsdb/utilities/exception.py +11 -0
  34. {mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/METADATA +236 -236
  35. {mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/RECORD +38 -36
  36. {mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/WHEEL +0 -0
  37. {mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/licenses/LICENSE +0 -0
  38. {mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/top_level.txt +0 -0
@@ -135,36 +135,35 @@ class A2AServer:
135
135
 
136
136
  def _create_response(self, result: Any) -> JSONResponse | EventSourceResponse:
137
137
  if isinstance(result, AsyncIterable):
138
-
139
- async def event_generator(result) -> AsyncIterable[dict[str, str]]:
138
+ # Step 2: Yield actual serialized event as JSON, with timing logs
139
+ async def event_generator(result):
140
140
  async for item in result:
141
- # Send the data event with immediate flush directive
142
- yield {
143
- "data": item.model_dump_json(exclude_none=True),
144
- "event": "message",
145
- "id": str(id(item)), # Add a unique ID for each event
146
- }
147
- # Add an empty comment event to force flush
148
- yield {
149
- "comment": " ", # Empty comment event to force flush
150
- }
151
-
152
- # Create EventSourceResponse with complete headers for browser compatibility
153
- return EventSourceResponse(
154
- event_generator(result),
155
- # Complete set of headers needed for browser streaming
156
- headers={
157
- "Cache-Control": "no-cache, no-transform",
158
- "X-Accel-Buffering": "no",
159
- "Connection": "keep-alive",
160
- "Content-Type": "text/event-stream",
161
- "Transfer-Encoding": "chunked",
162
- },
163
- # Explicitly set media_type
164
- media_type="text/event-stream",
165
- )
141
+ t0 = time.time()
142
+ logger.debug(f"[A2AServer] STEP2 serializing item at {t0}: {str(item)[:120]}")
143
+ try:
144
+ if hasattr(item, "model_dump_json"):
145
+ data = item.model_dump_json(exclude_none=True)
146
+ else:
147
+ data = json.dumps(item)
148
+ except Exception as e:
149
+ logger.error(f"Serialization error in SSE stream: {e}")
150
+ data = json.dumps({"error": f"Serialization error: {str(e)}"})
151
+ yield {"data": data}
152
+
153
+ # Add robust SSE headers for compatibility
154
+ sse_headers = {
155
+ "Content-Type": "text/event-stream",
156
+ "Cache-Control": "no-cache, no-transform",
157
+ "X-Accel-Buffering": "no",
158
+ "Connection": "keep-alive",
159
+ "Transfer-Encoding": "chunked",
160
+ }
161
+ return EventSourceResponse(event_generator(result), headers=sse_headers)
166
162
  elif isinstance(result, JSONRPCResponse):
167
163
  return JSONResponse(result.model_dump(exclude_none=True))
164
+ elif isinstance(result, dict):
165
+ logger.warning("Falling back to JSONResponse for result type: dict")
166
+ return JSONResponse(result)
168
167
  else:
169
168
  logger.error(f"Unexpected result type: {type(result)}")
170
169
  raise ValueError(f"Unexpected result type: {type(result)}")
@@ -18,14 +18,30 @@ from mindsdb.api.a2a.common.types import (
18
18
  )
19
19
  from mindsdb.api.a2a.common.server.task_manager import InMemoryTaskManager
20
20
  from mindsdb.api.a2a.agent import MindsDBAgent
21
+ from mindsdb.api.a2a.utils import to_serializable
21
22
 
22
23
  from typing import Union
23
24
  import logging
24
25
  import asyncio
26
+ import time
25
27
 
26
28
  logger = logging.getLogger(__name__)
27
29
 
28
30
 
31
+ def to_question_format(messages):
32
+ """Convert A2A messages to a list of {"question": ...} dicts for agent compatibility."""
33
+ out = []
34
+ for msg in messages:
35
+ if "question" in msg:
36
+ out.append(msg)
37
+ elif "parts" in msg and isinstance(msg["parts"], list):
38
+ for part in msg["parts"]:
39
+ part_dict = to_serializable(part)
40
+ if part_dict.get("type") == "text" and "text" in part_dict:
41
+ out.append({"question": part_dict["text"]})
42
+ return out
43
+
44
+
29
45
  class AgentTaskManager(InMemoryTaskManager):
30
46
  def __init__(
31
47
  self,
@@ -67,10 +83,13 @@ class AgentTaskManager(InMemoryTaskManager):
67
83
  logger.info(f"Task created/updated with history length: {len(task.history) if task.history else 0}")
68
84
  except Exception as e:
69
85
  logger.error(f"Error creating task: {str(e)}")
70
- yield SendTaskStreamingResponse(
71
- id=request.id,
72
- error=InternalError(message=f"Error creating task: {str(e)}"),
86
+ error_result = to_serializable(
87
+ {
88
+ "id": request.id,
89
+ "error": to_serializable(InternalError(message=f"Error creating task: {str(e)}")),
90
+ }
73
91
  )
92
+ yield error_result
74
93
  return # Early return from generator
75
94
 
76
95
  agent = self._create_agent(agent_name)
@@ -123,239 +142,81 @@ class AgentTaskManager(InMemoryTaskManager):
123
142
  await self._update_store(task_send_params.id, task_status, [artifact])
124
143
 
125
144
  # Yield the artifact update
126
- yield SendTaskStreamingResponse(
127
- id=request.id,
128
- result=TaskArtifactUpdateEvent(id=task_send_params.id, artifact=artifact),
145
+ yield to_serializable(
146
+ SendTaskStreamingResponse(
147
+ id=request.id,
148
+ result=to_serializable(TaskArtifactUpdateEvent(id=task_send_params.id, artifact=artifact)),
149
+ )
129
150
  )
130
151
 
131
152
  # Yield the final status update
132
- yield SendTaskStreamingResponse(
133
- id=request.id,
134
- result=TaskStatusUpdateEvent(
135
- id=task_send_params.id,
136
- status=TaskStatus(state=task_status.state),
137
- final=True,
138
- ),
153
+ yield to_serializable(
154
+ SendTaskStreamingResponse(
155
+ id=request.id,
156
+ result=to_serializable(
157
+ TaskStatusUpdateEvent(
158
+ id=task_send_params.id,
159
+ status=to_serializable(TaskStatus(state=task_status.state)),
160
+ final=True,
161
+ )
162
+ ),
163
+ )
139
164
  )
140
165
  return
141
166
 
142
167
  except Exception as e:
143
168
  logger.error(f"Error invoking agent: {e}")
144
- yield JSONRPCResponse(
145
- id=request.id,
146
- error=InternalError(message=f"Error invoking agent: {str(e)}"),
169
+ error_result = to_serializable(
170
+ {
171
+ "id": request.id,
172
+ "error": to_serializable(
173
+ JSONRPCResponse(
174
+ id=request.id,
175
+ error=to_serializable(InternalError(message=f"Error invoking agent: {str(e)}")),
176
+ )
177
+ ),
178
+ }
147
179
  )
180
+ yield error_result
148
181
  return
149
182
 
150
183
  # If streaming is enabled (default), use the streaming implementation
151
184
  try:
152
- # Track the chunks we've seen to avoid duplicates
153
- seen_chunks = set()
154
-
155
- async for item in agent.stream(query, task_send_params.sessionId, history=history):
156
- # Ensure item has the required fields or provide defaults
157
- is_task_complete = item.get("is_task_complete", False)
158
-
159
- # Create a structured thought dictionary to encapsulate the agent's thought process
160
- thought_dict = {}
161
- parts = []
162
-
163
- # Handle different chunk formats to extract text content
164
- if "actions" in item:
165
- # Extract thought process from actions
166
- thought_dict["type"] = "thought"
167
- thought_dict["actions"] = item["actions"]
168
-
169
- for action in item.get("actions", []):
170
- if "log" in action:
171
- # Use "text" type for all parts, but add a thought_type in metadata
172
- parts.append(
173
- {
174
- "type": "text",
175
- "text": action["log"],
176
- "metadata": {"thought_type": "thought"},
177
- }
178
- )
179
- if "tool_input" in action:
180
- # Include SQL queries
181
- tool_input = action.get("tool_input", "")
182
- if "$START$" in tool_input and "$STOP$" in tool_input:
183
- sql = tool_input.replace("$START$", "").replace("$STOP$", "")
184
- parts.append(
185
- {
186
- "type": "text",
187
- "text": sql,
188
- "metadata": {"thought_type": "sql"},
189
- }
190
- )
191
-
192
- elif "steps" in item:
193
- # Extract observations from steps
194
- thought_dict["type"] = "observation"
195
- thought_dict["steps"] = item["steps"]
196
-
197
- for step in item.get("steps", []):
198
- if "observation" in step:
199
- parts.append(
200
- {
201
- "type": "text",
202
- "text": step["observation"],
203
- "metadata": {"thought_type": "observation"},
204
- }
205
- )
206
- if "action" in step and "log" in step["action"]:
207
- parts.append(
208
- {
209
- "type": "text",
210
- "text": step["action"]["log"],
211
- "metadata": {"thought_type": "thought"},
212
- }
213
- )
214
-
215
- elif "output" in item:
216
- # Final answer
217
- thought_dict["type"] = "answer"
218
- thought_dict["output"] = item["output"]
219
- parts.append({"type": "text", "text": item["output"]})
220
-
221
- elif "parts" in item and item["parts"]:
222
- # Use existing parts, but ensure they have valid types
223
- for part in item["parts"]:
224
- if part.get("type") in ["text", "file", "data"]:
225
- # Valid type, use as is
226
- parts.append(part)
227
- else:
228
- # Invalid type, convert to text
229
- text_content = part.get("text", "")
230
- if not text_content and "content" in part:
231
- text_content = part["content"]
232
-
233
- new_part = {"type": "text", "text": text_content}
234
-
235
- # Preserve metadata if it exists
236
- if "metadata" in part:
237
- new_part["metadata"] = part["metadata"]
238
- else:
239
- new_part["metadata"] = {"thought_type": part.get("type", "text")}
240
-
241
- parts.append(new_part)
242
-
243
- # Try to determine the type from parts for the thought dictionary
244
- for part in item["parts"]:
245
- if part.get("type") == "text" and part.get("text", "").startswith("$START$"):
246
- thought_dict["type"] = "sql"
247
- thought_dict["query"] = part.get("text")
248
- else:
249
- thought_dict["type"] = "text"
250
-
251
- elif "content" in item:
252
- # Simple content
253
- thought_dict["type"] = "text"
254
- thought_dict["content"] = item["content"]
255
- parts.append({"type": "text", "text": item["content"]})
256
-
257
- elif "messages" in item:
258
- # Extract content from messages
259
- thought_dict["type"] = "message"
260
- thought_dict["messages"] = item["messages"]
261
-
262
- for message in item.get("messages", []):
263
- if "content" in message:
264
- parts.append(
265
- {
266
- "type": "text",
267
- "text": message["content"],
268
- "metadata": {"thought_type": "message"},
269
- }
270
- )
271
-
272
- # Skip if we have no parts to send
273
- if not parts:
274
- continue
275
-
276
- # Process each part individually to ensure true streaming
277
- for part in parts:
278
- # Generate a unique key for this part to avoid duplicates
279
- part_key = str(part)
280
- if part_key in seen_chunks:
281
- continue
282
- seen_chunks.add(part_key)
283
-
284
- # Ensure metadata exists
285
- metadata = item.get("metadata", {})
286
-
287
- # Add the thought dictionary to metadata for frontend parsing
288
- if thought_dict:
289
- metadata["thought_process"] = thought_dict
290
-
291
- # Handle error field if present
292
- if "error" in item and not is_task_complete:
293
- logger.warning(f"Error in streaming response: {item['error']}")
294
- # Mark as complete if there's an error
295
- is_task_complete = True
296
-
297
- if not is_task_complete:
298
- # Create a message with just this part and send it immediately
299
- task_state = TaskState.WORKING
300
- message = Message(role="agent", parts=[part], metadata=metadata)
301
- task_status = TaskStatus(state=task_state, message=message)
302
- await self._update_store(task_send_params.id, task_status, [])
303
- task_update_event = TaskStatusUpdateEvent(
304
- id=task_send_params.id,
305
- status=task_status,
306
- final=False,
307
- )
308
- yield SendTaskStreamingResponse(id=request.id, result=task_update_event)
309
-
310
- # If this is the final chunk, send a completion message
311
- if is_task_complete:
312
- task_state = TaskState.COMPLETED
313
- artifact = Artifact(parts=parts, index=0, append=False)
314
- task_status = TaskStatus(state=task_state)
315
- yield SendTaskStreamingResponse(
316
- id=request.id,
317
- result=TaskArtifactUpdateEvent(id=task_send_params.id, artifact=artifact),
318
- )
319
- await self._update_store(task_send_params.id, task_status, [artifact])
320
- yield SendTaskStreamingResponse(
321
- id=request.id,
322
- result=TaskStatusUpdateEvent(
323
- id=task_send_params.id,
324
- status=TaskStatus(
325
- state=task_status.state,
326
- ),
327
- final=True,
328
- ),
329
- )
330
-
185
+ logger.debug(f"[TaskManager] Entering agent.stream() at {time.time()}")
186
+ # Transform to agent-compatible format
187
+ agent_messages = to_question_format(
188
+ [
189
+ {
190
+ "role": task_send_params.message.role,
191
+ "parts": task_send_params.message.parts,
192
+ "metadata": task_send_params.message.metadata,
193
+ }
194
+ ]
195
+ )
196
+ async for item in agent.streaming_invoke(agent_messages, timeout=60):
197
+ # Clean up: Remove verbose debug logs, keep only errors and essential info
198
+ if isinstance(item, dict) and "artifact" in item and "parts" in item["artifact"]:
199
+ item["artifact"]["parts"] = [to_serializable(p) for p in item["artifact"]["parts"]]
200
+ yield to_serializable(item)
331
201
  except Exception as e:
332
202
  logger.error(f"An error occurred while streaming the response: {e}")
333
203
  error_text = f"An error occurred while streaming the response: {str(e)}"
204
+ # Ensure all parts are plain dicts
334
205
  parts = [{"type": "text", "text": error_text}]
335
-
336
- # First send the error as an artifact
337
- artifact = Artifact(parts=parts, index=0, append=False)
338
- yield SendTaskStreamingResponse(
339
- id=request.id,
340
- result=TaskArtifactUpdateEvent(id=task_send_params.id, artifact=artifact),
341
- )
342
-
343
- # Then mark the task as completed with an error
344
- task_state = TaskState.FAILED
345
- task_status = TaskStatus(state=task_state)
346
- await self._update_store(task_send_params.id, task_status, [artifact])
347
-
348
- # Send the final status update
349
- yield SendTaskStreamingResponse(
350
- id=request.id,
351
- result=TaskStatusUpdateEvent(
352
- id=task_send_params.id,
353
- status=TaskStatus(
354
- state=task_status.state,
355
- ),
356
- final=True,
357
- ),
358
- )
206
+ parts = [to_serializable(part) for part in parts]
207
+ artifact = {
208
+ "parts": parts,
209
+ "index": 0,
210
+ "append": False,
211
+ }
212
+ error_result = {
213
+ "id": request.id,
214
+ "error": {
215
+ "id": task_send_params.id,
216
+ "artifact": artifact,
217
+ },
218
+ }
219
+ yield error_result
359
220
 
360
221
  async def upsert_task(self, task_send_params: TaskSendParams) -> Task:
361
222
  """Create or update a task in the task store.
@@ -472,21 +333,26 @@ class AgentTaskManager(InMemoryTaskManager):
472
333
  ) -> AsyncIterable[SendTaskStreamingResponse]:
473
334
  error = self._validate_request(request)
474
335
  if error:
475
- # Convert JSONRPCResponse to SendTaskStreamingResponse
476
- yield SendTaskStreamingResponse(id=request.id, error=error.error)
336
+ logger.info(f"[TaskManager] Yielding error at {time.time()} for invalid request: {error}")
337
+ yield to_serializable(SendTaskStreamingResponse(id=request.id, error=to_serializable(error.error)))
477
338
  return
478
339
 
479
340
  # We can't await an async generator directly, so we need to use it as is
480
341
  try:
342
+ logger.debug(f"[TaskManager] Entering streaming path at {time.time()}")
481
343
  async for response in self._stream_generator(request):
344
+ logger.debug(f"[TaskManager] Yielding streaming response at {time.time()} with: {str(response)[:120]}")
482
345
  yield response
483
346
  except Exception as e:
484
347
  # If an error occurs, yield an error response
485
348
  logger.error(f"Error in on_send_task_subscribe: {str(e)}")
486
- yield SendTaskStreamingResponse(
487
- id=request.id,
488
- error=InternalError(message=f"Error processing streaming request: {str(e)}"),
349
+ error_result = to_serializable(
350
+ {
351
+ "id": request.id,
352
+ "error": to_serializable(InternalError(message=f"Error processing streaming request: {str(e)}")),
353
+ }
489
354
  )
355
+ yield error_result
490
356
 
491
357
  async def _update_store(self, task_id: str, status: TaskStatus, artifacts: list[Artifact]) -> Task:
492
358
  async with self.lock:
@@ -579,7 +445,7 @@ class AgentTaskManager(InMemoryTaskManager):
579
445
  # Just create a minimal response to acknowledge the request
580
446
  task_state = TaskState.WORKING
581
447
  task = await self._update_store(task_send_params.id, TaskStatus(state=task_state), [])
582
- return SendTaskResponse(id=request.id, result=task)
448
+ return to_serializable(SendTaskResponse(id=request.id, result=task))
583
449
  else:
584
450
  # For non-streaming mode, collect all chunks into a single response
585
451
  async for chunk in stream_gen:
@@ -607,7 +473,7 @@ class AgentTaskManager(InMemoryTaskManager):
607
473
  ),
608
474
  [Artifact(parts=all_parts)],
609
475
  )
610
- return SendTaskResponse(id=request.id, result=task)
476
+ return to_serializable(SendTaskResponse(id=request.id, result=task))
611
477
  except Exception as e:
612
478
  logger.error(f"Error invoking agent: {e}")
613
479
  result_text = f"Error invoking agent: {e}"
@@ -619,4 +485,4 @@ class AgentTaskManager(InMemoryTaskManager):
619
485
  TaskStatus(state=task_state, message=Message(role="agent", parts=parts)),
620
486
  [Artifact(parts=parts)],
621
487
  )
622
- return SendTaskResponse(id=request.id, result=task)
488
+ return to_serializable(SendTaskResponse(id=request.id, result=task))
@@ -0,0 +1,21 @@
1
+ def to_serializable(obj):
2
+ # Primitives
3
+ if isinstance(obj, (str, int, float, bool, type(None))):
4
+ return obj
5
+ # Pydantic v2
6
+ if hasattr(obj, "model_dump"):
7
+ return to_serializable(obj.model_dump(exclude_none=True))
8
+ # Pydantic v1
9
+ if hasattr(obj, "dict"):
10
+ return to_serializable(obj.dict(exclude_none=True))
11
+ # Custom classes with __dict__
12
+ if hasattr(obj, "__dict__"):
13
+ return {k: to_serializable(v) for k, v in vars(obj).items() if not k.startswith("_")}
14
+ # Dicts
15
+ if isinstance(obj, dict):
16
+ return {k: to_serializable(v) for k, v in obj.items()}
17
+ # Lists, Tuples, Sets
18
+ if isinstance(obj, (list, tuple, set)):
19
+ return [to_serializable(v) for v in obj]
20
+ # Fallback: string
21
+ return str(obj)
@@ -84,7 +84,7 @@ from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import (
84
84
  TYPES,
85
85
  )
86
86
 
87
- from .exceptions import (
87
+ from mindsdb.api.executor.exceptions import (
88
88
  ExecutorException,
89
89
  BadDbError,
90
90
  NotSupportedYet,
@@ -1221,9 +1221,11 @@ class ExecuteCommands:
1221
1221
  db_name = database_name
1222
1222
 
1223
1223
  dn = self.session.datahub[db_name]
1224
+ if dn is None:
1225
+ raise ExecutorException(f"Cannot delete a table from database '{db_name}': the database does not exist")
1226
+
1224
1227
  if db_name is not None:
1225
1228
  dn.drop_table(table, if_exists=statement.if_exists)
1226
-
1227
1229
  elif db_name in self.session.database_controller.get_dict(filter_type="project"):
1228
1230
  # TODO do we need feature: delete object from project via drop table?
1229
1231
 
@@ -1428,6 +1430,9 @@ class ExecuteCommands:
1428
1430
  provider=provider,
1429
1431
  params=statement.params,
1430
1432
  )
1433
+ except EntityExistsError as e:
1434
+ if statement.if_not_exists is not True:
1435
+ raise ExecutorException(str(e))
1431
1436
  except ValueError as e:
1432
1437
  # Project does not exist or agent already exists.
1433
1438
  raise ExecutorException(str(e))
@@ -164,7 +164,11 @@ class IntegrationDataNode(DataNode):
164
164
  df = result_set.to_df()
165
165
 
166
166
  result: HandlerResponse = self.integration_handler.insert(table_name.parts[-1], df)
167
- return DataHubResponse(affected_rows=result.affected_rows)
167
+ if result is not None:
168
+ affected_rows = result.affected_rows
169
+ else:
170
+ affected_rows = None
171
+ return DataHubResponse(affected_rows=affected_rows)
168
172
 
169
173
  insert_columns = [Identifier(parts=[x.alias]) for x in result_set.columns]
170
174