PyPI - MindsDB - Versions diffs - 25.4.3.1__py3-none-any.whl → 25.4.4.0__py3-none-any.whl - Mend - Supply Chain Defender

MindsDB 25.4.3.1py3-none-any.whl → 25.4.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (43) hide show

mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py CHANGED Viewed

@@ -10,7 +10,8 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple
 from langchain.retrievers.document_compressors.base import BaseDocumentCompressor
 from langchain_core.callbacks import Callbacks, dispatch_custom_event
 from langchain_core.documents import Document
-from openai import AsyncOpenAI
+from openai import AsyncOpenAI, AsyncAzureOpenAI
+from pydantic import field_validator
 from mindsdb.integrations.utilities.rag.settings import DEFAULT_RERANKING_MODEL, DEFAULT_LLM_ENDPOINT
@@ -19,12 +20,15 @@ log = logging.getLogger(__name__)
 class LLMReranker(BaseDocumentCompressor):
     filtering_threshold: float = 0.0  # Default threshold for filtering
+    provider: str = 'openai'
     model: str = DEFAULT_RERANKING_MODEL  # Model to use for reranking
     temperature: float = 0.0  # Temperature for the model
-    openai_api_key: Optional[str] = None
+    api_key: Optional[str] = None
     remove_irrelevant: bool = True  # New flag to control removal of irrelevant documents
-    base_url: str = DEFAULT_LLM_ENDPOINT
+    base_url: Optional[str] = None
+    api_version: Optional[str] = None
     num_docs_to_keep: Optional[int] = None  # How many of the top documents to keep after reranking & compressing.
+    method: str = "multi-class"  # Scoring method: 'multi-class' or 'binary'
     _api_key_var: str = "OPENAI_API_KEY"
     client: Optional[AsyncOpenAI] = None
     _semaphore: Optional[asyncio.Semaphore] = None
@@ -38,21 +42,40 @@ class LLMReranker(BaseDocumentCompressor):
     class Config:
         arbitrary_types_allowed = True
+    @field_validator('provider')
+    @classmethod
+    def validate_provider(cls, v: str) -> str:
+        allowed = {'openai', 'azure_openai'}
+        v_lower = v.lower()
+        if v_lower not in allowed:
+            raise ValueError(f"Unsupported provider: {v}.")
+        return v_lower
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
         self._semaphore = asyncio.Semaphore(self.max_concurrent_requests)
     async def _init_client(self):
         if self.client is None:
-            openai_api_key = self.openai_api_key or os.getenv(self._api_key_var)
-            if not openai_api_key:
-                raise ValueError(f"OpenAI API key not found in environment variable {self._api_key_var}")
-            self.client = AsyncOpenAI(
-                api_key=openai_api_key,
-                base_url=self.base_url,
-                timeout=self.request_timeout,
-                max_retries=2  # Client-level retries
-            )
+            if self.provider == "azure_openai":
+                azure_api_key = self.api_key or os.getenv("AZURE_OPENAI_API_KEY")
+                azure_api_endpoint = self.base_url or os.environ.get("AZURE_OPENAI_ENDPOINT")
+                azure_api_version = self.api_version or os.environ.get("AZURE_OPENAI_API_VERSION")
+                self.client = AsyncAzureOpenAI(api_key=azure_api_key,
+                                               azure_endpoint=azure_api_endpoint,
+                                               api_version=azure_api_version,
+                                               timeout=self.request_timeout,
+                                               max_retries=2)
+            elif self.provider == "openai":
+                api_key_var: str = "OPENAI_API_KEY"
+                openai_api_key = self.api_key or os.getenv(api_key_var)
+                if not openai_api_key:
+                    raise ValueError(f"OpenAI API key not found in environment variable {api_key_var}")
+                base_url = self.base_url or DEFAULT_LLM_ENDPOINT
+                self.client = AsyncOpenAI(api_key=openai_api_key, base_url=base_url, timeout=self.request_timeout, max_retries=2)
     async def search_relevancy(self, query: str, document: str, custom_event: bool = True) -> Any:
         await self._init_client()
@@ -147,6 +170,173 @@ class LLMReranker(BaseDocumentCompressor):
             except Exception as e:
                 log.error(f"Batch processing error: {str(e)}")
                 continue
+        return ranked_results
+    async def search_relevancy_score(self, query: str, document: str) -> Any:
+        await self._init_client()
+        async with self._semaphore:
+            for attempt in range(self.max_retries):
+                try:
+                    response = await self.client.chat.completions.create(
+                        model=self.model,
+                        messages=[
+                            {"role": "system", "content": """
+                                You are an intelligent assistant that evaluates how relevant a given document chunk is to a user's search query.
+                                Your task is to analyze the similarity between the search query and the document chunk, and return **only the class label** that best represents the relevance:
+                                - "class_1": Not relevant (score between 0.0 and 0.25)
+                                - "class_2": Slightly relevant (score between 0.25 and 0.5)
+                                - "class_3": Moderately relevant (score between 0.5 and 0.75)
+                                - "class_4": Highly relevant (score between 0.75 and 1.0)
+                                Respond with only one of: "class_1", "class_2", "class_3", or "class_4".
+                                Examples:
+                                Search query: "How to reset a router to factory settings?"
+                                Document chunk: "Computers often come with customizable parental control settings."
+                                Score: class_1
+                                Search query: "Symptoms of vitamin D deficiency"
+                                Document chunk: "Vitamin D deficiency has been linked to fatigue, bone pain, and muscle weakness."
+                                Score: class_4
+                                Search query: "Best practices for onboarding remote employees"
+                                Document chunk: "An employee handbook can be useful for new hires, outlining company policies and benefits."
+                                Score: class_2
+                                Search query: "Benefits of mindfulness meditation"
+                                Document chunk: "Practicing mindfulness has shown to reduce stress and improve focus in multiple studies."
+                                Score: class_3
+                                Search query: "What is Kubernetes used for?"
+                                Document chunk: "Kubernetes is an open-source system for automating deployment, scaling, and management of containerized applications."
+                                Score: class_4
+                                Search query: "How to bake sourdough bread at home"
+                                Document chunk: "The French Revolution began in 1789 and radically transformed society."
+                                Score: class_1
+                                Search query: "Machine learning algorithms for image classification"
+                                Document chunk: "Convolutional Neural Networks (CNNs) are particularly effective in image classification tasks."
+                                Score: class_4
+                                Search query: "How to improve focus while working remotely"
+                                Document chunk: "Creating a dedicated workspace and setting a consistent schedule can significantly improve focus during remote work."
+                                Score: class_4
+                                Search query: "Carbon emissions from electric vehicles vs gas cars"
+                                Document chunk: "Electric vehicles produce zero emissions while driving, but battery production has environmental impacts."
+                                Score: class_3
+                                Search query: "Time zones in the United States"
+                                Document chunk: "The U.S. is divided into six primary time zones: Eastern, Central, Mountain, Pacific, Alaska, and Hawaii-Aleutian."
+                                Score: class_4
+                             """},
+                            {"role": "user", "content": f"""
+                                Now evaluate the following pair:
+                                Search query: {query}
+                                Document chunk: {document}
+                                Which class best represents the relevance?
+                            """}
+                        ],
+                        temperature=self.temperature,
+                        n=1,
+                        logprobs=True,
+                        top_logprobs=4,
+                        max_tokens=3
+                    )
+                    # Extract response and logprobs
+                    class_label = response.choices[0].message.content.strip()
+                    token_logprobs = response.choices[0].logprobs.content
+                    # Reconstruct the prediction and extract the top logprobs from the final token (e.g., "1")
+                    final_token_logprob = token_logprobs[-1]
+                    top_logprobs = final_token_logprob.top_logprobs
+                    # Create a map of 'class_1' -> probability, using token combinations
+                    class_probs = {}
+                    for top_token in top_logprobs:
+                        full_label = f"class_{top_token.token}"
+                        prob = math.exp(top_token.logprob)
+                        class_probs[full_label] = prob
+                    # Optional: normalize in case some are missing
+                    total_prob = sum(class_probs.values())
+                    class_probs = {k: v / total_prob for k, v in class_probs.items()}
+                    # Assign weights to classes
+                    class_weights = {
+                        "class_1": 0.25,
+                        "class_2": 0.5,
+                        "class_3": 0.75,
+                        "class_4": 1.0
+                    }
+                    # Compute the final smooth score
+                    relevance_score = sum(class_weights.get(class_label, 0) * prob for class_label, prob in class_probs.items())
+                    rerank_data = {
+                        "document": document,
+                        "answer": class_label,
+                        "relevance_score": relevance_score
+                    }
+                    return rerank_data
+                except Exception as e:
+                    if attempt == self.max_retries - 1:
+                        log.error(f"Failed after {self.max_retries} attempts: {str(e)}")
+                        raise
+                    # Exponential backoff with jitter
+                    retry_delay = self.retry_delay * (2 ** attempt) + random.uniform(0, 0.1)
+                    await asyncio.sleep(retry_delay)
+    async def _rank_score(self, query_document_pairs: List[Tuple[str, str]]) -> List[Tuple[str, float]]:
+        ranked_results = []
+        # Process in larger batches for better throughput
+        batch_size = min(self.max_concurrent_requests * 2, len(query_document_pairs))
+        for i in range(0, len(query_document_pairs), batch_size):
+            batch = query_document_pairs[i:i + batch_size]
+            try:
+                results = await asyncio.gather(
+                    *[self.search_relevancy_score(query=query, document=document) for (query, document) in batch],
+                    return_exceptions=True
+                )
+                for idx, result in enumerate(results):
+                    if isinstance(result, Exception):
+                        log.error(f"Error processing document {i+idx}: {str(result)}")
+                        ranked_results.append((batch[idx][1], 0.0))
+                        continue
+                    score = result["relevance_score"]
+                    if score is not None:
+                        if score > 1.0:
+                            score = 1.0
+                        elif score < 0.0:
+                            score = 0.0
+                    ranked_results.append((batch[idx][1], score))
+                    # Check if we should stop early
+                    try:
+                        high_scoring_docs = [r for r in ranked_results if r[1] >= self.filtering_threshold]
+                        can_stop_early = (
+                            self.early_stop  # Early stopping is enabled
+                            and self.num_docs_to_keep  # We have a target number of docs
+                            and len(high_scoring_docs) >= self.num_docs_to_keep  # Found enough good docs
+                            and score >= self.early_stop_threshold  # Current doc is good enough
+                        )
+                        if can_stop_early:
+                            log.info(f"Early stopping after finding {self.num_docs_to_keep} documents with high confidence")
+                            return ranked_results
+                    except Exception as e:
+                        # Don't let early stopping errors stop the whole process
+                        log.warning(f"Error in early stopping check: {str(e)}")
+            except Exception as e:
+                log.error(f"Batch processing error: {str(e)}")
+                continue
         return ranked_results
@@ -226,6 +416,7 @@ class LLMReranker(BaseDocumentCompressor):
             "model": self.model,
             "temperature": self.temperature,
             "remove_irrelevant": self.remove_irrelevant,
+            "method": self.method,
         }
     def get_scores(self, query: str, documents: list[str], custom_event: bool = False):
@@ -239,6 +430,10 @@ class LLMReranker(BaseDocumentCompressor):
             loop = asyncio.new_event_loop()
             asyncio.set_event_loop(loop)
-        documents_and_scores = loop.run_until_complete(self._rank(query_document_pairs, custom_event=custom_event))
+        if self.method == "multi-class":  # default 'multi-class' method
+            documents_and_scores = loop.run_until_complete(self._rank_score(query_document_pairs))
+        else:
+            documents_and_scores = loop.run_until_complete(self._rank(query_document_pairs, custom_event=custom_event))
         scores = [score for _, score in documents_and_scores]
         return scores

mindsdb/interfaces/agents/litellm_server.py ADDED Viewed

@@ -0,0 +1,345 @@
+import asyncio
+import argparse
+import json
+from typing import List, Dict, Optional
+from contextlib import AsyncExitStack
+import uvicorn
+from fastapi import FastAPI, HTTPException, BackgroundTasks
+from fastapi.responses import StreamingResponse
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
+from mcp import ClientSession, StdioServerParameters
+from mcp.client.stdio import stdio_client
+from mindsdb.utilities import log
+from mindsdb.interfaces.agents.mcp_client_agent import create_mcp_agent
+logger = log.getLogger(__name__)
+app = FastAPI(title="MindsDB MCP Agent LiteLLM API")
+# Configure CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Store agent wrapper as a global variable
+agent_wrapper = None
+# MCP session for direct SQL queries
+mcp_session = None
+exit_stack = AsyncExitStack()
+class ChatMessage(BaseModel):
+    role: str
+    content: str
+class ChatCompletionRequest(BaseModel):
+    model: str
+    messages: List[ChatMessage]
+    stream: bool = False
+    temperature: Optional[float] = None
+    max_tokens: Optional[int] = None
+class ChatCompletionChoice(BaseModel):
+    index: int = 0
+    message: Optional[Dict[str, str]] = None
+    delta: Optional[Dict[str, str]] = None
+    finish_reason: Optional[str] = "stop"
+class ChatCompletionResponse(BaseModel):
+    id: str = "mcp-agent-response"
+    object: str = "chat.completion"
+    created: int = 0
+    model: str
+    choices: List[ChatCompletionChoice]
+    usage: Dict[str, int] = Field(default_factory=lambda: {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0})
+class DirectSQLRequest(BaseModel):
+    query: str
+@app.post("/v1/chat/completions")
+async def chat_completions(request: ChatCompletionRequest):
+    global agent_wrapper
+    if agent_wrapper is None:
+        raise HTTPException(status_code=500, detail="Agent not initialized. Make sure MindsDB server is running with MCP enabled: python -m mindsdb --api=mysql,mcp,http")
+    try:
+        # Convert request to messages format
+        messages = [
+            {"role": msg.role, "content": msg.content}
+            for msg in request.messages
+        ]
+        if request.stream:
+            # Return a streaming response
+            async def generate():
+                try:
+                    async for chunk in agent_wrapper.acompletion_stream(messages, model=request.model):
+                        yield f"data: {json.dumps(chunk)}\n\n"
+                    yield "data: [DONE]\n\n"
+                except Exception as e:
+                    logger.error(f"Streaming error: {str(e)}")
+                    yield "data: {{'error': 'Streaming failed due to an internal error.'}}\n\n"
+            return StreamingResponse(generate(), media_type="text/event-stream")
+        else:
+            # Return a regular response
+            response = await agent_wrapper.acompletion(messages)
+            # Ensure the content is a string
+            content = response["choices"][0]["message"].get("content", "")
+            if not isinstance(content, str):
+                content = str(content)
+            # Transform to proper OpenAI format
+            return ChatCompletionResponse(
+                model=request.model,
+                choices=[
+                    ChatCompletionChoice(
+                        message={"role": "assistant", "content": content}
+                    )
+                ]
+            )
+    except Exception as e:
+        logger.error(f"Error in chat completion: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/direct-sql")
+async def direct_sql(request: DirectSQLRequest, background_tasks: BackgroundTasks):
+    """Execute a direct SQL query via MCP (for testing)"""
+    global agent_wrapper, mcp_session
+    if agent_wrapper is None and mcp_session is None:
+        raise HTTPException(status_code=500, detail="No MCP session available. Make sure MindsDB server is running with MCP enabled.")
+    try:
+        # First try to use the agent's session if available
+        if hasattr(agent_wrapper.agent, "session") and agent_wrapper.agent.session:
+            session = agent_wrapper.agent.session
+            result = await session.call_tool("query", {"query": request.query})
+            return {"result": result.content}
+        # If agent session not available, use the direct session
+        elif mcp_session:
+            result = await mcp_session.call_tool("query", {"query": request.query})
+            return {"result": result.content}
+        else:
+            raise HTTPException(status_code=500, detail="No MCP session available")
+    except Exception as e:
+        logger.error(f"Error executing direct SQL: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/v1/models")
+async def list_models():
+    """List available models - always returns the single model we're using"""
+    global agent_wrapper
+    if agent_wrapper is None:
+        return {
+            "object": "list",
+            "data": [
+                {
+                    "id": "mcp-agent",
+                    "object": "model",
+                    "created": 0,
+                    "owned_by": "mindsdb"
+                }
+            ]
+        }
+    # Return the actual model name if available
+    model_name = agent_wrapper.agent.args.get("model_name", "mcp-agent")
+    return {
+        "object": "list",
+        "data": [
+            {
+                "id": model_name,
+                "object": "model",
+                "created": 0,
+                "owned_by": "mindsdb"
+            }
+        ]
+    }
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    global agent_wrapper
+    health_status = {
+        "status": "ok",
+        "agent_initialized": agent_wrapper is not None,
+    }
+    if agent_wrapper is not None:
+        health_status["mcp_connected"] = hasattr(agent_wrapper.agent, "session") and agent_wrapper.agent.session is not None
+        health_status["agent_name"] = agent_wrapper.agent.agent.name
+        health_status["model_name"] = agent_wrapper.agent.args.get("model_name", "unknown")
+    return health_status
+@app.get("/test-mcp-connection")
+async def test_mcp_connection():
+    """Test the connection to the MCP server"""
+    global mcp_session, exit_stack
+    try:
+        # If we already have a session, test it
+        if mcp_session:
+            try:
+                tools_response = await mcp_session.list_tools()
+                return {
+                    "status": "ok",
+                    "message": "Successfully connected to MCP server",
+                    "tools": [tool.name for tool in tools_response.tools]
+                }
+            except Exception:
+                # If error, close existing session and create a new one
+                await exit_stack.aclose()
+                mcp_session = None
+        # Create a new MCP session - connect to running server
+        server_params = StdioServerParameters(
+            command="python",
+            args=["-m", "mindsdb", "--api=mcp"],
+            env=None
+        )
+        stdio_transport = await exit_stack.enter_async_context(stdio_client(server_params))
+        stdio, write = stdio_transport
+        session = await exit_stack.enter_async_context(ClientSession(stdio, write))
+        await session.initialize()
+        # Save the session for future use
+        mcp_session = session
+        # Get available tools
+        tools_response = await session.list_tools()
+        return {
+            "status": "ok",
+            "message": "Successfully connected to MCP server",
+            "tools": [tool.name for tool in tools_response.tools]
+        }
+    except Exception as e:
+        logger.error(f"Error connecting to MCP server: {str(e)}")
+        error_detail = f"Error connecting to MCP server: {str(e)}. Make sure MindsDB server is running with MCP enabled: python -m mindsdb --api=mysql,mcp,http"
+        raise HTTPException(status_code=500, detail=error_detail)
+async def init_agent(agent_name: str, project_name: str, mcp_host: str, mcp_port: int):
+    """Initialize the agent"""
+    global agent_wrapper
+    try:
+        logger.info(f"Initializing MCP agent '{agent_name}' in project '{project_name}'")
+        logger.info(f"Connecting to MCP server at {mcp_host}:{mcp_port}")
+        logger.info("Make sure MindsDB server is running with MCP enabled: python -m mindsdb --api=mysql,mcp,http")
+        agent_wrapper = create_mcp_agent(
+            agent_name=agent_name,
+            project_name=project_name,
+            mcp_host=mcp_host,
+            mcp_port=mcp_port
+        )
+        logger.info("Agent initialized successfully")
+        return True
+    except Exception as e:
+        logger.error(f"Failed to initialize agent: {str(e)}")
+        return False
+@app.on_event("shutdown")
+async def shutdown_event():
+    """Clean up resources on server shutdown"""
+    global agent_wrapper, exit_stack
+    if agent_wrapper:
+        await agent_wrapper.cleanup()
+    await exit_stack.aclose()
+async def run_server_async(
+    agent_name: str,
+    project_name: str = "mindsdb",
+    mcp_host: str = "127.0.0.1",
+    mcp_port: int = 47337,
+    host: str = "0.0.0.0",
+    port: int = 8000
+):
+    """Run the FastAPI server"""
+    # Initialize the agent
+    success = await init_agent(agent_name, project_name, mcp_host, mcp_port)
+    if not success:
+        logger.error("Failed to initialize agent. Make sure MindsDB server is running with MCP enabled.")
+        return 1
+    return 0
+def run_server(
+    agent_name: str,
+    project_name: str = "mindsdb",
+    mcp_host: str = "127.0.0.1",
+    mcp_port: int = 47337,
+    host: str = "0.0.0.0",
+    port: int = 8000
+):
+    """Run the FastAPI server"""
+    logger.info("Make sure MindsDB server is running with MCP enabled: python -m mindsdb --api=mysql,mcp,http")
+    # Initialize database
+    from mindsdb.interfaces.storage import db
+    db.init()
+    # Run initialization in the event loop
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    result = loop.run_until_complete(run_server_async(agent_name, project_name, mcp_host, mcp_port))
+    if result != 0:
+        return result
+    # Run the server
+    logger.info(f"Starting server on {host}:{port}")
+    uvicorn.run(app, host=host, port=port)
+    return 0
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Run a LiteLLM-compatible API server for MCP agent")
+    parser.add_argument("--agent", type=str, required=True, help="Name of the agent to use")
+    parser.add_argument("--project", type=str, default="mindsdb", help="Project containing the agent")
+    parser.add_argument("--mcp-host", type=str, default="127.0.0.1", help="MCP server host")
+    parser.add_argument("--mcp-port", type=int, default=47337, help="MCP server port")
+    parser.add_argument("--host", type=str, default="0.0.0.0", help="Host to bind the server to")
+    parser.add_argument("--port", type=int, default=8000, help="Port to run the server on")
+    args = parser.parse_args()
+    run_server(
+        agent_name=args.agent,
+        project_name=args.project,
+        mcp_host=args.mcp_host,
+        mcp_port=args.mcp_port,
+        host=args.host,
+        port=args.port
+    )