PyPI - MindsDB - Versions diffs - 25.7.1.0__py3-none-any.whl → 25.7.3.0__py3-none-any.whl - Mend

MindsDB 25.7.1.0py3-none-any.whl → 25.7.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (38) hide show

mindsdb/__about__.py +1 -1
mindsdb/__main__.py +54 -95
mindsdb/api/a2a/agent.py +30 -206
mindsdb/api/a2a/common/server/server.py +26 -27
mindsdb/api/a2a/task_manager.py +93 -227
mindsdb/api/a2a/utils.py +21 -0
mindsdb/api/executor/command_executor.py +7 -2
mindsdb/api/executor/datahub/datanodes/integration_datanode.py +5 -1
mindsdb/api/executor/utilities/sql.py +97 -21
mindsdb/api/http/namespaces/agents.py +127 -202
mindsdb/api/http/namespaces/config.py +12 -1
mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +11 -1
mindsdb/integrations/handlers/llama_index_handler/requirements.txt +1 -1
mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +94 -1
mindsdb/integrations/handlers/s3_handler/s3_handler.py +72 -70
mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +4 -3
mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +12 -3
mindsdb/integrations/handlers/slack_handler/slack_tables.py +141 -161
mindsdb/integrations/handlers/youtube_handler/youtube_tables.py +183 -55
mindsdb/integrations/libs/keyword_search_base.py +41 -0
mindsdb/integrations/libs/vectordatabase_handler.py +35 -14
mindsdb/integrations/utilities/sql_utils.py +11 -0
mindsdb/interfaces/agents/agents_controller.py +2 -2
mindsdb/interfaces/data_catalog/data_catalog_loader.py +18 -4
mindsdb/interfaces/database/projects.py +1 -3
mindsdb/interfaces/functions/controller.py +54 -64
mindsdb/interfaces/functions/to_markdown.py +47 -14
mindsdb/interfaces/knowledge_base/controller.py +134 -35
mindsdb/interfaces/knowledge_base/evaluate.py +53 -10
mindsdb/interfaces/knowledge_base/llm_client.py +3 -3
mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +21 -13
mindsdb/utilities/config.py +46 -39
mindsdb/utilities/exception.py +11 -0
{mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/METADATA +236 -236
{mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/RECORD +38 -36
{mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/WHEEL +0 -0
{mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/licenses/LICENSE +0 -0
{mindsdb-25.7.1.0.dist-info → mindsdb-25.7.3.0.dist-info}/top_level.txt +0 -0

mindsdb/api/a2a/common/server/server.py CHANGED Viewed

@@ -135,36 +135,35 @@ class A2AServer:
     def _create_response(self, result: Any) -> JSONResponse | EventSourceResponse:
         if isinstance(result, AsyncIterable):
-            async def event_generator(result) -> AsyncIterable[dict[str, str]]:
+            # Step 2: Yield actual serialized event as JSON, with timing logs
+            async def event_generator(result):
                 async for item in result:
-                    # Send the data event with immediate flush directive
-                    yield {
-                        "data": item.model_dump_json(exclude_none=True),
-                        "event": "message",
-                        "id": str(id(item)),  # Add a unique ID for each event
-                    }
-                    # Add an empty comment event to force flush
-                    yield {
-                        "comment": " ",  # Empty comment event to force flush
-                    }
-            # Create EventSourceResponse with complete headers for browser compatibility
-            return EventSourceResponse(
-                event_generator(result),
-                # Complete set of headers needed for browser streaming
-                headers={
-                    "Cache-Control": "no-cache, no-transform",
-                    "X-Accel-Buffering": "no",
-                    "Connection": "keep-alive",
-                    "Content-Type": "text/event-stream",
-                    "Transfer-Encoding": "chunked",
-                },
-                # Explicitly set media_type
-                media_type="text/event-stream",
-            )
+                    t0 = time.time()
+                    logger.debug(f"[A2AServer] STEP2 serializing item at {t0}: {str(item)[:120]}")
+                    try:
+                        if hasattr(item, "model_dump_json"):
+                            data = item.model_dump_json(exclude_none=True)
+                        else:
+                            data = json.dumps(item)
+                    except Exception as e:
+                        logger.error(f"Serialization error in SSE stream: {e}")
+                        data = json.dumps({"error": f"Serialization error: {str(e)}"})
+                    yield {"data": data}
+            # Add robust SSE headers for compatibility
+            sse_headers = {
+                "Content-Type": "text/event-stream",
+                "Cache-Control": "no-cache, no-transform",
+                "X-Accel-Buffering": "no",
+                "Connection": "keep-alive",
+                "Transfer-Encoding": "chunked",
+            }
+            return EventSourceResponse(event_generator(result), headers=sse_headers)
         elif isinstance(result, JSONRPCResponse):
             return JSONResponse(result.model_dump(exclude_none=True))
+        elif isinstance(result, dict):
+            logger.warning("Falling back to JSONResponse for result type: dict")
+            return JSONResponse(result)
         else:
             logger.error(f"Unexpected result type: {type(result)}")
             raise ValueError(f"Unexpected result type: {type(result)}")

mindsdb/api/a2a/task_manager.py CHANGED Viewed

@@ -18,14 +18,30 @@ from mindsdb.api.a2a.common.types import (
 )
 from mindsdb.api.a2a.common.server.task_manager import InMemoryTaskManager
 from mindsdb.api.a2a.agent import MindsDBAgent
+from mindsdb.api.a2a.utils import to_serializable
 from typing import Union
 import logging
 import asyncio
+import time
 logger = logging.getLogger(__name__)
+def to_question_format(messages):
+    """Convert A2A messages to a list of {"question": ...} dicts for agent compatibility."""
+    out = []
+    for msg in messages:
+        if "question" in msg:
+            out.append(msg)
+        elif "parts" in msg and isinstance(msg["parts"], list):
+            for part in msg["parts"]:
+                part_dict = to_serializable(part)
+                if part_dict.get("type") == "text" and "text" in part_dict:
+                    out.append({"question": part_dict["text"]})
+    return out
 class AgentTaskManager(InMemoryTaskManager):
     def __init__(
         self,
@@ -67,10 +83,13 @@ class AgentTaskManager(InMemoryTaskManager):
             logger.info(f"Task created/updated with history length: {len(task.history) if task.history else 0}")
         except Exception as e:
             logger.error(f"Error creating task: {str(e)}")
-            yield SendTaskStreamingResponse(
-                id=request.id,
-                error=InternalError(message=f"Error creating task: {str(e)}"),
+            error_result = to_serializable(
+                {
+                    "id": request.id,
+                    "error": to_serializable(InternalError(message=f"Error creating task: {str(e)}")),
+                }
             )
+            yield error_result
             return  # Early return from generator
         agent = self._create_agent(agent_name)
@@ -123,239 +142,81 @@ class AgentTaskManager(InMemoryTaskManager):
                 await self._update_store(task_send_params.id, task_status, [artifact])
                 # Yield the artifact update
-                yield SendTaskStreamingResponse(
-                    id=request.id,
-                    result=TaskArtifactUpdateEvent(id=task_send_params.id, artifact=artifact),
+                yield to_serializable(
+                    SendTaskStreamingResponse(
+                        id=request.id,
+                        result=to_serializable(TaskArtifactUpdateEvent(id=task_send_params.id, artifact=artifact)),
+                    )
                 )
                 # Yield the final status update
-                yield SendTaskStreamingResponse(
-                    id=request.id,
-                    result=TaskStatusUpdateEvent(
-                        id=task_send_params.id,
-                        status=TaskStatus(state=task_status.state),
-                        final=True,
-                    ),
+                yield to_serializable(
+                    SendTaskStreamingResponse(
+                        id=request.id,
+                        result=to_serializable(
+                            TaskStatusUpdateEvent(
+                                id=task_send_params.id,
+                                status=to_serializable(TaskStatus(state=task_status.state)),
+                                final=True,
+                            )
+                        ),
+                    )
                 )
                 return
             except Exception as e:
                 logger.error(f"Error invoking agent: {e}")
-                yield JSONRPCResponse(
-                    id=request.id,
-                    error=InternalError(message=f"Error invoking agent: {str(e)}"),
+                error_result = to_serializable(
+                    {
+                        "id": request.id,
+                        "error": to_serializable(
+                            JSONRPCResponse(
+                                id=request.id,
+                                error=to_serializable(InternalError(message=f"Error invoking agent: {str(e)}")),
+                            )
+                        ),
+                    }
                 )
+                yield error_result
                 return
         # If streaming is enabled (default), use the streaming implementation
         try:
-            # Track the chunks we've seen to avoid duplicates
-            seen_chunks = set()
-            async for item in agent.stream(query, task_send_params.sessionId, history=history):
-                # Ensure item has the required fields or provide defaults
-                is_task_complete = item.get("is_task_complete", False)
-                # Create a structured thought dictionary to encapsulate the agent's thought process
-                thought_dict = {}
-                parts = []
-                # Handle different chunk formats to extract text content
-                if "actions" in item:
-                    # Extract thought process from actions
-                    thought_dict["type"] = "thought"
-                    thought_dict["actions"] = item["actions"]
-                    for action in item.get("actions", []):
-                        if "log" in action:
-                            # Use "text" type for all parts, but add a thought_type in metadata
-                            parts.append(
-                                {
-                                    "type": "text",
-                                    "text": action["log"],
-                                    "metadata": {"thought_type": "thought"},
-                                }
-                            )
-                        if "tool_input" in action:
-                            # Include SQL queries
-                            tool_input = action.get("tool_input", "")
-                            if "$START$" in tool_input and "$STOP$" in tool_input:
-                                sql = tool_input.replace("$START$", "").replace("$STOP$", "")
-                                parts.append(
-                                    {
-                                        "type": "text",
-                                        "text": sql,
-                                        "metadata": {"thought_type": "sql"},
-                                    }
-                                )
-                elif "steps" in item:
-                    # Extract observations from steps
-                    thought_dict["type"] = "observation"
-                    thought_dict["steps"] = item["steps"]
-                    for step in item.get("steps", []):
-                        if "observation" in step:
-                            parts.append(
-                                {
-                                    "type": "text",
-                                    "text": step["observation"],
-                                    "metadata": {"thought_type": "observation"},
-                                }
-                            )
-                        if "action" in step and "log" in step["action"]:
-                            parts.append(
-                                {
-                                    "type": "text",
-                                    "text": step["action"]["log"],
-                                    "metadata": {"thought_type": "thought"},
-                                }
-                            )
-                elif "output" in item:
-                    # Final answer
-                    thought_dict["type"] = "answer"
-                    thought_dict["output"] = item["output"]
-                    parts.append({"type": "text", "text": item["output"]})
-                elif "parts" in item and item["parts"]:
-                    # Use existing parts, but ensure they have valid types
-                    for part in item["parts"]:
-                        if part.get("type") in ["text", "file", "data"]:
-                            # Valid type, use as is
-                            parts.append(part)
-                        else:
-                            # Invalid type, convert to text
-                            text_content = part.get("text", "")
-                            if not text_content and "content" in part:
-                                text_content = part["content"]
-                            new_part = {"type": "text", "text": text_content}
-                            # Preserve metadata if it exists
-                            if "metadata" in part:
-                                new_part["metadata"] = part["metadata"]
-                            else:
-                                new_part["metadata"] = {"thought_type": part.get("type", "text")}
-                            parts.append(new_part)
-                    # Try to determine the type from parts for the thought dictionary
-                    for part in item["parts"]:
-                        if part.get("type") == "text" and part.get("text", "").startswith("$START$"):
-                            thought_dict["type"] = "sql"
-                            thought_dict["query"] = part.get("text")
-                        else:
-                            thought_dict["type"] = "text"
-                elif "content" in item:
-                    # Simple content
-                    thought_dict["type"] = "text"
-                    thought_dict["content"] = item["content"]
-                    parts.append({"type": "text", "text": item["content"]})
-                elif "messages" in item:
-                    # Extract content from messages
-                    thought_dict["type"] = "message"
-                    thought_dict["messages"] = item["messages"]
-                    for message in item.get("messages", []):
-                        if "content" in message:
-                            parts.append(
-                                {
-                                    "type": "text",
-                                    "text": message["content"],
-                                    "metadata": {"thought_type": "message"},
-                                }
-                            )
-                # Skip if we have no parts to send
-                if not parts:
-                    continue
-                # Process each part individually to ensure true streaming
-                for part in parts:
-                    # Generate a unique key for this part to avoid duplicates
-                    part_key = str(part)
-                    if part_key in seen_chunks:
-                        continue
-                    seen_chunks.add(part_key)
-                    # Ensure metadata exists
-                    metadata = item.get("metadata", {})
-                    # Add the thought dictionary to metadata for frontend parsing
-                    if thought_dict:
-                        metadata["thought_process"] = thought_dict
-                    # Handle error field if present
-                    if "error" in item and not is_task_complete:
-                        logger.warning(f"Error in streaming response: {item['error']}")
-                        # Mark as complete if there's an error
-                        is_task_complete = True
-                    if not is_task_complete:
-                        # Create a message with just this part and send it immediately
-                        task_state = TaskState.WORKING
-                        message = Message(role="agent", parts=[part], metadata=metadata)
-                        task_status = TaskStatus(state=task_state, message=message)
-                        await self._update_store(task_send_params.id, task_status, [])
-                        task_update_event = TaskStatusUpdateEvent(
-                            id=task_send_params.id,
-                            status=task_status,
-                            final=False,
-                        )
-                        yield SendTaskStreamingResponse(id=request.id, result=task_update_event)
-                # If this is the final chunk, send a completion message
-                if is_task_complete:
-                    task_state = TaskState.COMPLETED
-                    artifact = Artifact(parts=parts, index=0, append=False)
-                    task_status = TaskStatus(state=task_state)
-                    yield SendTaskStreamingResponse(
-                        id=request.id,
-                        result=TaskArtifactUpdateEvent(id=task_send_params.id, artifact=artifact),
-                    )
-                    await self._update_store(task_send_params.id, task_status, [artifact])
-                    yield SendTaskStreamingResponse(
-                        id=request.id,
-                        result=TaskStatusUpdateEvent(
-                            id=task_send_params.id,
-                            status=TaskStatus(
-                                state=task_status.state,
-                            ),
-                            final=True,
-                        ),
-                    )
+            logger.debug(f"[TaskManager] Entering agent.stream() at {time.time()}")
+            # Transform to agent-compatible format
+            agent_messages = to_question_format(
+                [
+                    {
+                        "role": task_send_params.message.role,
+                        "parts": task_send_params.message.parts,
+                        "metadata": task_send_params.message.metadata,
+                    }
+                ]
+            )
+            async for item in agent.streaming_invoke(agent_messages, timeout=60):
+                # Clean up: Remove verbose debug logs, keep only errors and essential info
+                if isinstance(item, dict) and "artifact" in item and "parts" in item["artifact"]:
+                    item["artifact"]["parts"] = [to_serializable(p) for p in item["artifact"]["parts"]]
+                yield to_serializable(item)
         except Exception as e:
             logger.error(f"An error occurred while streaming the response: {e}")
             error_text = f"An error occurred while streaming the response: {str(e)}"
+            # Ensure all parts are plain dicts
             parts = [{"type": "text", "text": error_text}]
-            # First send the error as an artifact
-            artifact = Artifact(parts=parts, index=0, append=False)
-            yield SendTaskStreamingResponse(
-                id=request.id,
-                result=TaskArtifactUpdateEvent(id=task_send_params.id, artifact=artifact),
-            )
-            # Then mark the task as completed with an error
-            task_state = TaskState.FAILED
-            task_status = TaskStatus(state=task_state)
-            await self._update_store(task_send_params.id, task_status, [artifact])
-            # Send the final status update
-            yield SendTaskStreamingResponse(
-                id=request.id,
-                result=TaskStatusUpdateEvent(
-                    id=task_send_params.id,
-                    status=TaskStatus(
-                        state=task_status.state,
-                    ),
-                    final=True,
-                ),
-            )
+            parts = [to_serializable(part) for part in parts]
+            artifact = {
+                "parts": parts,
+                "index": 0,
+                "append": False,
+            }
+            error_result = {
+                "id": request.id,
+                "error": {
+                    "id": task_send_params.id,
+                    "artifact": artifact,
+                },
+            }
+            yield error_result
     async def upsert_task(self, task_send_params: TaskSendParams) -> Task:
         """Create or update a task in the task store.
@@ -472,21 +333,26 @@ class AgentTaskManager(InMemoryTaskManager):
     ) -> AsyncIterable[SendTaskStreamingResponse]:
         error = self._validate_request(request)
         if error:
-            # Convert JSONRPCResponse to SendTaskStreamingResponse
-            yield SendTaskStreamingResponse(id=request.id, error=error.error)
+            logger.info(f"[TaskManager] Yielding error at {time.time()} for invalid request: {error}")
+            yield to_serializable(SendTaskStreamingResponse(id=request.id, error=to_serializable(error.error)))
             return
         # We can't await an async generator directly, so we need to use it as is
         try:
+            logger.debug(f"[TaskManager] Entering streaming path at {time.time()}")
             async for response in self._stream_generator(request):
+                logger.debug(f"[TaskManager] Yielding streaming response at {time.time()} with: {str(response)[:120]}")
                 yield response
         except Exception as e:
             # If an error occurs, yield an error response
             logger.error(f"Error in on_send_task_subscribe: {str(e)}")
-            yield SendTaskStreamingResponse(
-                id=request.id,
-                error=InternalError(message=f"Error processing streaming request: {str(e)}"),
+            error_result = to_serializable(
+                {
+                    "id": request.id,
+                    "error": to_serializable(InternalError(message=f"Error processing streaming request: {str(e)}")),
+                }
             )
+            yield error_result
     async def _update_store(self, task_id: str, status: TaskStatus, artifacts: list[Artifact]) -> Task:
         async with self.lock:
@@ -579,7 +445,7 @@ class AgentTaskManager(InMemoryTaskManager):
                 # Just create a minimal response to acknowledge the request
                 task_state = TaskState.WORKING
                 task = await self._update_store(task_send_params.id, TaskStatus(state=task_state), [])
-                return SendTaskResponse(id=request.id, result=task)
+                return to_serializable(SendTaskResponse(id=request.id, result=task))
             else:
                 # For non-streaming mode, collect all chunks into a single response
                 async for chunk in stream_gen:
@@ -607,7 +473,7 @@ class AgentTaskManager(InMemoryTaskManager):
                     ),
                     [Artifact(parts=all_parts)],
                 )
-                return SendTaskResponse(id=request.id, result=task)
+                return to_serializable(SendTaskResponse(id=request.id, result=task))
         except Exception as e:
             logger.error(f"Error invoking agent: {e}")
             result_text = f"Error invoking agent: {e}"
@@ -619,4 +485,4 @@ class AgentTaskManager(InMemoryTaskManager):
                 TaskStatus(state=task_state, message=Message(role="agent", parts=parts)),
                 [Artifact(parts=parts)],
             )
-            return SendTaskResponse(id=request.id, result=task)
+            return to_serializable(SendTaskResponse(id=request.id, result=task))

mindsdb/api/a2a/utils.py ADDED Viewed

@@ -0,0 +1,21 @@
+def to_serializable(obj):
+    # Primitives
+    if isinstance(obj, (str, int, float, bool, type(None))):
+        return obj
+    # Pydantic v2
+    if hasattr(obj, "model_dump"):
+        return to_serializable(obj.model_dump(exclude_none=True))
+    # Pydantic v1
+    if hasattr(obj, "dict"):
+        return to_serializable(obj.dict(exclude_none=True))
+    # Custom classes with __dict__
+    if hasattr(obj, "__dict__"):
+        return {k: to_serializable(v) for k, v in vars(obj).items() if not k.startswith("_")}
+    # Dicts
+    if isinstance(obj, dict):
+        return {k: to_serializable(v) for k, v in obj.items()}
+    # Lists, Tuples, Sets
+    if isinstance(obj, (list, tuple, set)):
+        return [to_serializable(v) for v in obj]
+    # Fallback: string
+    return str(obj)

mindsdb/api/executor/command_executor.py CHANGED Viewed

@@ -84,7 +84,7 @@ from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import (
     TYPES,
 )
-from .exceptions import (
+from mindsdb.api.executor.exceptions import (
     ExecutorException,
     BadDbError,
     NotSupportedYet,
@@ -1221,9 +1221,11 @@ class ExecuteCommands:
                 db_name = database_name
             dn = self.session.datahub[db_name]
+            if dn is None:
+                raise ExecutorException(f"Cannot delete a table from database '{db_name}': the database does not exist")
             if db_name is not None:
                 dn.drop_table(table, if_exists=statement.if_exists)
             elif db_name in self.session.database_controller.get_dict(filter_type="project"):
                 # TODO do we need feature: delete object from project via drop table?
@@ -1428,6 +1430,9 @@ class ExecuteCommands:
                 provider=provider,
                 params=statement.params,
             )
+        except EntityExistsError as e:
+            if statement.if_not_exists is not True:
+                raise ExecutorException(str(e))
         except ValueError as e:
             # Project does not exist or agent already exists.
             raise ExecutorException(str(e))

mindsdb/api/executor/datahub/datanodes/integration_datanode.py CHANGED Viewed

@@ -164,7 +164,11 @@ class IntegrationDataNode(DataNode):
             df = result_set.to_df()
             result: HandlerResponse = self.integration_handler.insert(table_name.parts[-1], df)
-            return DataHubResponse(affected_rows=result.affected_rows)
+            if result is not None:
+                affected_rows = result.affected_rows
+            else:
+                affected_rows = None
+            return DataHubResponse(affected_rows=affected_rows)
         insert_columns = [Identifier(parts=[x.alias]) for x in result_set.columns]

MindsDB 25.7.1.0__py3-none-any.whl → 25.7.3.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.7.1.0py3-none-any.whl → 25.7.3.0py3-none-any.whl