PyPI - loopllm - Versions diffs - 0.7.0__py3-none-any.whl - Mend

loopllm 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

loopllm/__init__.py +69 -0
loopllm/__main__.py +5 -0
loopllm/adaptive_exit.py +78 -0
loopllm/agent_loop.py +299 -0
loopllm/cli.py +521 -0
loopllm/elicitation.py +519 -0
loopllm/engine.py +376 -0
loopllm/evaluator_factory.py +72 -0
loopllm/evaluators.py +419 -0
loopllm/guards.py +254 -0
loopllm/local_loop.py +273 -0
loopllm/mcp_server.py +2657 -0
loopllm/plan_registry.py +412 -0
loopllm/priors.py +604 -0
loopllm/provider.py +51 -0
loopllm/providers/__init__.py +15 -0
loopllm/providers/agent.py +64 -0
loopllm/providers/mock.py +64 -0
loopllm/providers/ollama.py +95 -0
loopllm/providers/openrouter.py +101 -0
loopllm/serve.py +297 -0
loopllm/step_scorer.py +190 -0
loopllm/store.py +1126 -0
loopllm/tasks.py +599 -0
loopllm-0.7.0.dist-info/METADATA +454 -0
loopllm-0.7.0.dist-info/RECORD +29 -0
loopllm-0.7.0.dist-info/WHEEL +4 -0
loopllm-0.7.0.dist-info/entry_points.txt +3 -0
loopllm-0.7.0.dist-info/licenses/LICENSE +21 -0

loopllm/providers/agent.py ADDED Viewed

@@ -0,0 +1,64 @@
+"""Agent passthrough provider.
+Instead of calling an external LLM, this provider signals that the
+calling agent (VS Code Copilot, Cursor, Claude, etc.) should perform
+the generation itself.  The MCP tools catch :class:`AgentExecutionRequired`
+and return a structured ``agent_prompt`` payload—the connected IDE agent
+then executes it directly.
+This eliminates the Ollama / OpenRouter dependency entirely and lets the
+tool use whatever frontier model the user already has active.
+"""
+from __future__ import annotations
+from typing import Any
+from loopllm.provider import LLMProvider, LLMResponse
+class AgentExecutionRequired(Exception):
+    """Raised by :class:`AgentPassthroughProvider` instead of calling an LLM.
+    Attributes:
+        prompt: The prompt that should be executed by the calling agent.
+        model: The model hint passed by the caller (informational only).
+        kwargs: Any extra keyword arguments forwarded from the call site.
+    """
+    def __init__(self, prompt: str, model: str, **kwargs: Any) -> None:
+        super().__init__(f"Agent execution required for model={model!r}")
+        self.prompt = prompt
+        self.model = model
+        self.kwargs = kwargs
+class AgentPassthroughProvider(LLMProvider):
+    """LLM provider that delegates generation to the calling IDE agent.
+    When :meth:`complete` is called it raises :class:`AgentExecutionRequired`
+    instead of contacting any external service.  MCP tool implementations
+    catch this exception and return a structured ``agent_prompt`` response
+    that instructs the connected agent (Copilot / Claude / Cursor) to
+    perform the generation itself.
+    Usage::
+        loopllm mcp-server --provider agent
+    """
+    @property
+    def name(self) -> str:
+        return "agent"
+    def complete(self, prompt: str, model: str, **kwargs: Any) -> LLMResponse:
+        """Raise :class:`AgentExecutionRequired` — never calls a remote API.
+        Args:
+            prompt: The prompt to be executed by the calling agent.
+            model: Model hint (passed through to the exception).
+            **kwargs: Forwarded verbatim.
+        Raises:
+            AgentExecutionRequired: Always — callers must handle this.
+        """
+        raise AgentExecutionRequired(prompt, model, **kwargs)

loopllm/providers/mock.py ADDED Viewed

@@ -0,0 +1,64 @@
+"""Mock LLM provider for testing."""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any
+from loopllm.provider import LLMProvider, LLMResponse, LLMUsage
+@dataclass
+class MockLLMProvider(LLMProvider):
+    """LLM provider that returns pre-configured responses. Ideal for testing.
+    Args:
+        responses: Ordered list of responses to cycle through.
+        default_score: Unused; kept for compatibility.
+        latency_ms: Simulated latency per call in milliseconds.
+    """
+    responses: list[str] | None = None
+    default_score: float = 0.9
+    latency_ms: float = 10.0
+    calls: list[dict[str, Any]] = field(default_factory=list, repr=False)
+    _index: int = field(default=0, repr=False)
+    @property
+    def name(self) -> str:
+        """Provider name."""
+        return "mock"
+    @property
+    def call_count(self) -> int:
+        """Number of calls made so far."""
+        return len(self.calls)
+    def complete(self, prompt: str, model: str, **kwargs: Any) -> LLMResponse:
+        """Return the next mock response.
+        Cycles through *responses* if provided, otherwise returns
+        ``"Mock response {n}"``.
+        Args:
+            prompt: The prompt (recorded but not used).
+            model: The model name (recorded but not used).
+            **kwargs: Extra keyword arguments (recorded).
+        Returns:
+            :class:`LLMResponse` with fake content and usage.
+        """
+        self.calls.append({"prompt": prompt, "model": model, **kwargs})
+        if self.responses:
+            content = self.responses[self._index % len(self.responses)]
+        else:
+            content = f"Mock response {self._index}"
+        self._index += 1
+        return LLMResponse(
+            content=content,
+            model=model,
+            usage=LLMUsage(prompt_tokens=10, completion_tokens=20, total_tokens=30),
+            latency_ms=self.latency_ms,
+        )

loopllm/providers/ollama.py ADDED Viewed

@@ -0,0 +1,95 @@
+"""Ollama LLM provider."""
+from __future__ import annotations
+import time
+from dataclasses import dataclass
+import structlog
+from typing import Any
+from loopllm.provider import LLMProvider, LLMResponse, LLMUsage
+logger = structlog.get_logger(__name__)
+@dataclass
+class OllamaProvider(LLMProvider):
+    """LLM provider backed by a local Ollama instance.
+    Args:
+        base_url: Base URL for the Ollama API.
+    """
+    base_url: str = "http://localhost:11434"
+    @property
+    def name(self) -> str:
+        """Provider name."""
+        return "ollama"
+    def complete(self, prompt: str, model: str, **kwargs: Any) -> LLMResponse:
+        """Call the Ollama chat endpoint.
+        Args:
+            prompt: The user prompt to complete.
+            model: Ollama model name (e.g. ``llama3``).
+            **kwargs: Extra fields merged into the request body.
+        Returns:
+            Parsed :class:`LLMResponse` with content and latency.
+        Raises:
+            RuntimeError: If the API returns a non-200 status code.
+            ImportError: If ``httpx`` is not installed.
+        """
+        try:
+            import httpx
+        except ImportError as exc:  # pragma: no cover
+            raise ImportError(
+                "httpx is required for OllamaProvider. "
+                "Install it with: pip install loopllm[ollama]"
+            ) from exc
+        payload = {
+            "model": model,
+            "messages": [{"role": "user", "content": prompt}],
+            "stream": False,
+            **kwargs,
+        }
+        t0 = time.perf_counter()
+        response = httpx.post(
+            f"{self.base_url}/api/chat",
+            json=payload,
+            timeout=120.0,
+        )
+        latency_ms = (time.perf_counter() - t0) * 1000.0
+        if response.status_code != 200:
+            raise RuntimeError(
+                f"Ollama returned {response.status_code}: {response.text}"
+            )
+        data = response.json()
+        content = data["message"]["content"]
+        usage_data = data.get("usage", {})
+        usage = LLMUsage(
+            prompt_tokens=usage_data.get("prompt_tokens", 0),
+            completion_tokens=usage_data.get("completion_tokens", 0),
+            total_tokens=usage_data.get("total_tokens", 0),
+        )
+        logger.debug(
+            "ollama_complete",
+            model=model,
+            latency_ms=round(latency_ms, 1),
+        )
+        return LLMResponse(
+            content=content,
+            model=model,
+            usage=usage,
+            latency_ms=latency_ms,
+        )

loopllm/providers/openrouter.py ADDED Viewed

@@ -0,0 +1,101 @@
+"""OpenRouter LLM provider."""
+from __future__ import annotations
+import time
+from dataclasses import dataclass
+import structlog
+from typing import Any
+from loopllm.provider import LLMProvider, LLMResponse, LLMUsage
+logger = structlog.get_logger(__name__)
+@dataclass
+class OpenRouterProvider(LLMProvider):
+    """LLM provider backed by the OpenRouter API.
+    Args:
+        api_key: OpenRouter API key.
+        base_url: Base URL for the OpenRouter API.
+    """
+    api_key: str
+    base_url: str = "https://openrouter.ai/api/v1"
+    @property
+    def name(self) -> str:
+        """Provider name."""
+        return "openrouter"
+    def complete(self, prompt: str, model: str, **kwargs: Any) -> LLMResponse:
+        """Call the OpenRouter chat completions endpoint.
+        Args:
+            prompt: The user prompt to complete.
+            model: OpenRouter model identifier (e.g. ``openai/gpt-4o-mini``).
+            **kwargs: Extra fields merged into the request body.
+        Returns:
+            Parsed :class:`LLMResponse` with content, usage, and latency.
+        Raises:
+            RuntimeError: If the API returns a non-200 status code.
+            ImportError: If ``httpx`` is not installed.
+        """
+        try:
+            import httpx
+        except ImportError as exc:  # pragma: no cover
+            raise ImportError(
+                "httpx is required for OpenRouterProvider. "
+                "Install it with: pip install loopllm[openrouter]"
+            ) from exc
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+            "HTTP-Referer": "https://github.com/azank1/loop-llm",
+        }
+        payload = {
+            "model": model,
+            "messages": [{"role": "user", "content": prompt}],
+            **kwargs,
+        }
+        t0 = time.perf_counter()
+        response = httpx.post(
+            f"{self.base_url}/chat/completions",
+            headers=headers,
+            json=payload,
+        )
+        latency_ms = (time.perf_counter() - t0) * 1000.0
+        if response.status_code != 200:
+            raise RuntimeError(
+                f"OpenRouter returned {response.status_code}: {response.text}"
+            )
+        data = response.json()
+        content = data["choices"][0]["message"]["content"]
+        usage_data = data.get("usage", {})
+        usage = LLMUsage(
+            prompt_tokens=usage_data.get("prompt_tokens", 0),
+            completion_tokens=usage_data.get("completion_tokens", 0),
+            total_tokens=usage_data.get("total_tokens", 0),
+        )
+        logger.debug(
+            "openrouter_complete",
+            model=model,
+            latency_ms=round(latency_ms, 1),
+            total_tokens=usage.total_tokens,
+        )
+        return LLMResponse(
+            content=content,
+            model=model,
+            usage=usage,
+            latency_ms=latency_ms,
+        )

loopllm/serve.py ADDED Viewed

@@ -0,0 +1,297 @@
+"""REST API server exposing loopllm scoring to local models.
+Starts a lightweight HTTP server (FastAPI + uvicorn) that exposes:
+  POST /score          — score a prompt+output pair, return quality metrics
+  POST /rewrite        — score + return a rewritten prompt if below threshold
+  GET  /intercept      — run loopllm_intercept on a prompt
+  POST /plan/register  — create a new plan in the PlanRegistry
+  POST /plan/update    — update task scores and get confidence status
+  GET  /plan/{plan_id} — get full plan status
+  GET  /health         — health check
+This is the bridge that lets local models (Ollama, llama.cpp, LM Studio)
+use loopllm as a scoring middleware without needing MCP tool-calling support.
+Usage::
+    loopllm serve --host 0.0.0.0 --port 8765
+"""
+from __future__ import annotations
+import json
+from typing import Any
+from loopllm.mcp_server import (
+    _init_state,
+    _score_prompt_quality,
+    _build_evaluator,
+    _tool_intercept,
+)
+from loopllm.plan_registry import get_registry
+# ---------------------------------------------------------------------------
+# Request / response models (Pydantic, only imported when FastAPI available)
+# ---------------------------------------------------------------------------
+def _get_app() -> Any:
+    """Build and return the FastAPI application.
+    Deferred import so the rest of the package doesn't require FastAPI.
+    """
+    try:
+        from fastapi import FastAPI, HTTPException
+        from fastapi.responses import JSONResponse
+        from pydantic import BaseModel
+    except ImportError as exc:
+        raise ImportError(
+            "FastAPI and uvicorn are required for `loopllm serve`.\n"
+            "Install with: pip install loopllm[serve]"
+        ) from exc
+    _init_state()
+    app = FastAPI(
+        title="loopllm scoring API",
+        description=(
+            "Quality scoring and prompt optimization middleware for local LLMs. "
+            "POST your prompt+output to /score to get quality metrics and a "
+            "rewritten prompt if needed."
+        ),
+        version="0.5.0",
+    )
+    # -- Pydantic models -------------------------------------------------------
+    class ScoreRequest(BaseModel):
+        prompt: str
+        output: str
+        evaluator_type: str = "length"
+        min_words: int = 5
+        max_words: int = 10_000
+        required_fields: list[str] = []
+        required_patterns: list[str] = []
+        quality_threshold: float = 0.80
+    class RewriteRequest(BaseModel):
+        prompt: str
+        output: str
+        iteration: int = 0
+        max_retries: int = 3
+        evaluator_type: str = "length"
+        min_words: int = 5
+        max_words: int = 10_000
+        quality_threshold: float = 0.80
+    class InterceptRequest(BaseModel):
+        prompt: str
+    class PlanRegisterRequest(BaseModel):
+        goal: str
+        tasks: list[dict[str, Any]]
+        confidence_threshold: float = 0.72
+    class PlanUpdateRequest(BaseModel):
+        plan_id: str
+        task_id: str
+        prompt_score: float | None = None
+        output_score: float | None = None
+        mark_done: bool = True
+    class PlanNextRequest(BaseModel):
+        plan_id: str
+    # -- Endpoints -------------------------------------------------------------
+    @app.get("/health")
+    def health() -> dict[str, str]:
+        return {"status": "ok", "service": "loopllm"}
+    @app.post("/score")
+    def score(req: ScoreRequest) -> JSONResponse:
+        """Score a prompt+output pair.
+        Returns prompt_score, output_score, combined_score, passed,
+        deficiencies, and grade.
+        """
+        # Prompt quality (heuristic)
+        prompt_quality = _score_prompt_quality(req.prompt)
+        prompt_score = prompt_quality["quality_score"]
+        # Output quality (evaluator)
+        evaluator = _build_evaluator(
+            req.evaluator_type,
+            min_words=req.min_words,
+            max_words=req.max_words,
+            required_fields=req.required_fields,
+            required_patterns=req.required_patterns,
+        )
+        eval_result = evaluator.evaluate(req.output)
+        output_score = eval_result.score
+        # Combined (prompt has lower weight — see PlanRegistry)
+        combined = prompt_score * 0.35 + output_score * 0.65
+        passed = combined >= req.quality_threshold
+        return JSONResponse({
+            "prompt_score": round(prompt_score, 4),
+            "output_score": round(output_score, 4),
+            "combined_score": round(combined, 4),
+            "passed": passed,
+            "quality_threshold": req.quality_threshold,
+            "deficiencies": eval_result.deficiencies,
+            "prompt_grade": prompt_quality["grade"],
+            "prompt_gauge": prompt_quality["gauge"],
+            "prompt_issues": prompt_quality["issues"],
+            "prompt_suggestions": prompt_quality["suggestions"],
+        })
+    @app.post("/rewrite")
+    def rewrite(req: RewriteRequest) -> JSONResponse:
+        """Score output and return a rewritten prompt if quality is below threshold.
+        If passed=True the response also contains rewritten_prompt=null —
+        meaning no retry is needed.
+        """
+        prompt_quality = _score_prompt_quality(req.prompt)
+        prompt_score = prompt_quality["quality_score"]
+        evaluator = _build_evaluator(
+            req.evaluator_type,
+            min_words=req.min_words,
+            max_words=req.max_words,
+        )
+        eval_result = evaluator.evaluate(req.output)
+        output_score = eval_result.score
+        combined = prompt_score * 0.35 + output_score * 0.65
+        passed = combined >= req.quality_threshold
+        rewritten: str | None = None
+        if not passed and req.iteration < req.max_retries:
+            deficiency_str = (
+                "\n".join(f"  - {d}" for d in eval_result.deficiencies)
+                if eval_result.deficiencies
+                else "  - Output did not meet quality threshold"
+            )
+            rewritten = (
+                f"[LOOPLLM | score={combined:.2f} | "
+                f"retry={req.iteration + 1}/{req.max_retries} | "
+                f"threshold={req.quality_threshold:.2f}]\n"
+                f"Your previous response scored {combined:.2f}/1.0.\n"
+                f"Issues to fix:\n{deficiency_str}\n\n"
+                f"Original task:\n{req.prompt}\n\n"
+                f"Previous response (do not repeat):\n{req.output[:500]}\n\n"
+                f"Please produce an improved response that addresses all issues."
+            )
+        return JSONResponse({
+            "prompt_score": round(prompt_score, 4),
+            "output_score": round(output_score, 4),
+            "combined_score": round(combined, 4),
+            "passed": passed,
+            "quality_threshold": req.quality_threshold,
+            "deficiencies": eval_result.deficiencies,
+            "rewritten_prompt": rewritten,
+            "should_retry": not passed and req.iteration < req.max_retries,
+            "iteration": req.iteration,
+        })
+    @app.post("/intercept")
+    def intercept(req: InterceptRequest) -> JSONResponse:
+        """Run loopllm_intercept on a prompt (same as the MCP tool)."""
+        result = _tool_intercept(req.prompt)
+        return JSONResponse(json.loads(result))
+    # -- Plan endpoints --------------------------------------------------------
+    @app.post("/plan/register")
+    def plan_register(req: PlanRegisterRequest) -> JSONResponse:
+        """Create a new plan in the PlanRegistry."""
+        registry = get_registry()
+        plan = registry.create(
+            goal=req.goal,
+            tasks=req.tasks,
+            confidence_threshold=req.confidence_threshold,
+        )
+        return JSONResponse(plan.to_dict())
+    @app.post("/plan/update")
+    def plan_update(req: PlanUpdateRequest) -> JSONResponse:
+        """Score a task's prompt and/or output and get updated plan confidence."""
+        registry = get_registry()
+        result: dict[str, Any] = {}
+        if req.prompt_score is not None:
+            result = registry.score_prompt(req.plan_id, req.task_id, req.prompt_score)
+            if "error" in result:
+                raise HTTPException(status_code=404, detail=result["error"])
+        if req.output_score is not None:
+            result = registry.score_output(
+                req.plan_id, req.task_id, req.output_score, mark_done=req.mark_done
+            )
+            if "error" in result:
+                raise HTTPException(status_code=404, detail=result["error"])
+        if not result:
+            result = registry.get_status(req.plan_id)
+            if "error" in result:
+                raise HTTPException(status_code=404, detail=result["error"])
+        return JSONResponse(result)
+    @app.get("/plan/{plan_id}")
+    def plan_status(plan_id: str) -> JSONResponse:
+        """Get the current status and rolling confidence of a plan."""
+        registry = get_registry()
+        result = registry.get_status(plan_id)
+        if "error" in result:
+            raise HTTPException(status_code=404, detail=result["error"])
+        return JSONResponse(result)
+    @app.post("/plan/next")
+    def plan_next(req: PlanNextRequest) -> JSONResponse:
+        """Get and activate the next pending task in a plan."""
+        registry = get_registry()
+        task = registry.next_task(req.plan_id)
+        if task is None:
+            return JSONResponse({"done": True, "plan_id": req.plan_id})
+        return JSONResponse({**task, "done": False})
+    @app.get("/plan")
+    def list_plans() -> JSONResponse:
+        """List all active plans."""
+        registry = get_registry()
+        return JSONResponse({"plans": registry.list_plans()})
+    return app
+def run_server(host: str = "127.0.0.1", port: int = 8765, reload: bool = False) -> None:
+    """Start the loopllm scoring REST server.
+    Args:
+        host: Bind address.
+        port: Port to listen on.
+        reload: Enable auto-reload (development only).
+    """
+    try:
+        import uvicorn
+    except ImportError as exc:
+        raise ImportError(
+            "uvicorn is required for `loopllm serve`.\n"
+            "Install with: pip install loopllm[serve]"
+        ) from exc
+    # Build the app once to surface import errors before uvicorn starts
+    _get_app()
+    uvicorn.run(
+        "loopllm.serve:_get_app",
+        factory=True,
+        host=host,
+        port=port,
+        reload=reload,
+        log_level="info",
+    )