PyPI - loopllm - Versions diffs - 0.7.0__py3-none-any.whl - Mend

loopllm 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

loopllm/__init__.py +69 -0
loopllm/__main__.py +5 -0
loopllm/adaptive_exit.py +78 -0
loopllm/agent_loop.py +299 -0
loopllm/cli.py +521 -0
loopllm/elicitation.py +519 -0
loopllm/engine.py +376 -0
loopllm/evaluator_factory.py +72 -0
loopllm/evaluators.py +419 -0
loopllm/guards.py +254 -0
loopllm/local_loop.py +273 -0
loopllm/mcp_server.py +2657 -0
loopllm/plan_registry.py +412 -0
loopllm/priors.py +604 -0
loopllm/provider.py +51 -0
loopllm/providers/__init__.py +15 -0
loopllm/providers/agent.py +64 -0
loopllm/providers/mock.py +64 -0
loopllm/providers/ollama.py +95 -0
loopllm/providers/openrouter.py +101 -0
loopllm/serve.py +297 -0
loopllm/step_scorer.py +190 -0
loopllm/store.py +1126 -0
loopllm/tasks.py +599 -0
loopllm-0.7.0.dist-info/METADATA +454 -0
loopllm-0.7.0.dist-info/RECORD +29 -0
loopllm-0.7.0.dist-info/WHEEL +4 -0
loopllm-0.7.0.dist-info/entry_points.txt +3 -0
loopllm-0.7.0.dist-info/licenses/LICENSE +21 -0

loopllm/__init__.py ADDED Viewed

@@ -0,0 +1,69 @@
+"""loop-llm: iterative refinement engine for LLM applications."""
+from __future__ import annotations
+from loopllm.adaptive_exit import BayesianExitCondition
+from loopllm.agent_loop import AgentLoopController, AgentLoopSession
+from loopllm.elicitation import (
+    ClarifyingQuestion,
+    ElicitationSession,
+    IntentRefiner,
+    IntentSpec,
+)
+from loopllm.engine import (
+    CompositeEvaluator,
+    Evaluator,
+    EvaluationResult,
+    ExitConditionProtocol,
+    ExitReason,
+    IterationRecord,
+    LoopConfig,
+    LoopedLLM,
+    LoopMetrics,
+    RefinementResult,
+)
+from loopllm.guards import AgentLoopGuard, GuardContext, GuardStack
+from loopllm.priors import AdaptivePriors, CallObservation
+from loopllm.step_scorer import DualVerifyScore, conservative_dual_verify
+from loopllm.store import LoopStore, SQLiteBackedPriors
+from loopllm.tasks import Task, TaskOrchestrator, TaskPlan, TaskState
+__version__ = "0.7.0"
+__all__ = [
+    # Engine
+    "LoopedLLM",
+    "LoopConfig",
+    "EvaluationResult",
+    "ExitReason",
+    "IterationRecord",
+    "LoopMetrics",
+    "RefinementResult",
+    "CompositeEvaluator",
+    "Evaluator",
+    "ExitConditionProtocol",
+    # Priors
+    "AdaptivePriors",
+    "CallObservation",
+    "BayesianExitCondition",
+    # Agent loops
+    "AgentLoopController",
+    "AgentLoopSession",
+    "AgentLoopGuard",
+    "GuardContext",
+    "GuardStack",
+    "DualVerifyScore",
+    "conservative_dual_verify",
+    # Elicitation
+    "IntentRefiner",
+    "IntentSpec",
+    "ClarifyingQuestion",
+    "ElicitationSession",
+    # Store
+    "LoopStore",
+    "SQLiteBackedPriors",
+    # Tasks
+    "Task",
+    "TaskPlan",
+    "TaskState",
+    "TaskOrchestrator",
+]

loopllm/__main__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Allow running loop-llm as ``python -m loopllm``."""
+from loopllm.cli import main
+if __name__ == "__main__":
+    main()

loopllm/adaptive_exit.py ADDED Viewed

@@ -0,0 +1,78 @@
+"""Bayesian adaptive exit condition for the refinement loop."""
+from __future__ import annotations
+from dataclasses import dataclass
+from loopllm.engine import ExitReason
+from loopllm.priors import AdaptivePriors
+@dataclass
+class BayesianExitCondition:
+    """Exit condition that uses learned priors to decide when to stop looping.
+    Integrates with :class:`AdaptivePriors` to make statistically-informed
+    stopping decisions based on historical observations.
+    Attributes:
+        priors: The adaptive priors manager holding learned beliefs.
+        task_type: Identifier for the task class.
+        model_id: Identifier for the LLM model.
+        quality_threshold: Target quality level.
+        continue_probability_threshold: Minimum probability of improvement to continue.
+        min_iterations: Minimum iterations before this condition can fire.
+    """
+    priors: AdaptivePriors
+    task_type: str = "unknown"
+    model_id: str = "unknown"
+    quality_threshold: float = 0.8
+    continue_probability_threshold: float = 0.3
+    min_iterations: int = 1
+    def should_exit(
+        self,
+        iteration: int,
+        current_score: float,
+        scores_so_far: list[float],
+    ) -> ExitReason | None:
+        """Determine whether the loop should exit based on Bayesian analysis.
+        Args:
+            iteration: Current iteration number (1-based).
+            current_score: Score from the most recent evaluation.
+            scores_so_far: All scores observed so far in this run.
+        Returns:
+            An :class:`ExitReason` if the loop should stop, or ``None`` to continue.
+        """
+        if iteration < self.min_iterations:
+            return None
+        if not scores_so_far:
+            return None
+        should_go = self.priors.should_continue(
+            self.task_type,
+            self.model_id,
+            iteration,
+            current_score,
+            scores_so_far,
+            quality_threshold=self.quality_threshold,
+        )
+        if not should_go:
+            expected_delta, uncertainty = self.priors.expected_improvement(
+                self.task_type, self.model_id, iteration
+            )
+            return ExitReason(
+                condition="adaptive_bayesian",
+                message=(
+                    f"Bayesian exit at iteration {iteration}: "
+                    f"score={current_score:.3f}, "
+                    f"E[delta]={expected_delta:.3f}±{uncertainty:.3f}, "
+                    f"threshold={self.quality_threshold:.2f}"
+                ),
+            )
+        return None

loopllm/agent_loop.py ADDED Viewed

@@ -0,0 +1,299 @@
+"""Adaptive agent-loop control built on the Bayesian priors layer.
+Agent loops use Conservative Dual-Verify (CDV) at the MCP boundary: step
+artifacts are scored externally before entering this controller. The controller
+applies a composable guard stack and learns optimal depth from verified score
+trajectories.
+"""
+from __future__ import annotations
+import time
+import uuid
+from dataclasses import dataclass, field
+from typing import Any
+import structlog
+from loopllm.guards import (
+    CONVERGENCE_DELTA,
+    MAX_STEPS_DEFAULT,
+    AgentLoopGuard,
+    GuardContext,
+    GuardStack,
+    default_guard_stack,
+)
+from loopllm.priors import AdaptivePriors, CallObservation
+logger = structlog.get_logger(__name__)
+MAX_STEPS = MAX_STEPS_DEFAULT
+@dataclass
+class AgentLoopSession:
+    """Mutable state for a single adaptive agent-loop run."""
+    session_id: str
+    goal: str
+    task_type: str
+    model_id: str
+    quality_threshold: float
+    suggested_budget: int
+    cost_weight: float = 0.5
+    confidence: float = 0.0
+    total_observations: int = 0
+    scores: list[float] = field(default_factory=list)
+    latencies_ms: list[float] = field(default_factory=list)
+    notes: list[str] = field(default_factory=list)
+    started_at: float = field(default_factory=time.perf_counter)
+    last_step_at: float = field(default_factory=time.perf_counter)
+    last_decision: str = "continue"
+    converged: bool | None = None
+    closed: bool = False
+    # CDV verifier recipe (configured at start)
+    evaluator_type: str = "composite"
+    evaluator_kwargs: dict[str, Any] = field(default_factory=dict)
+    quality_criteria: list[str] = field(default_factory=list)
+    max_wall_ms: float = 300_000.0
+    max_tokens: int = 0
+    step_outputs: list[str] = field(default_factory=list)
+    step_fingerprints: list[str] = field(default_factory=list)
+    prompt_tokens: int = 0
+    completion_tokens: int = 0
+class AgentLoopController:
+    """Advises an agent's multi-step loop on when to stop, and learns.
+    Lifecycle: ``start`` → repeated ``step`` → ``end``.
+    """
+    def __init__(
+        self,
+        priors: AdaptivePriors,
+        guards: AgentLoopGuard | GuardStack | None = None,
+        max_steps: int = MAX_STEPS,
+    ) -> None:
+        self._priors = priors
+        self._sessions: dict[str, AgentLoopSession] = {}
+        if guards is None:
+            self._guards = default_guard_stack(priors, max_steps)
+        elif isinstance(guards, GuardStack):
+            self._guards = guards
+        else:
+            self._guards = GuardStack([guards])
+    def start(
+        self,
+        goal: str,
+        task_type: str = "general",
+        model_id: str = "unknown",
+        quality_threshold: float | None = None,
+        cost_weight: float = 0.5,
+        evaluator_type: str = "composite",
+        quality_criteria: list[str] | None = None,
+        max_wall_ms: float = 300_000.0,
+        max_tokens: int = 0,
+        **evaluator_kwargs: Any,
+    ) -> AgentLoopSession:
+        """Begin a new adaptive agent-loop session."""
+        suggestion = self._priors.suggest_config(task_type, model_id, cost_weight)
+        budget = int(suggestion["max_iterations"])
+        threshold = (
+            float(quality_threshold)
+            if quality_threshold is not None
+            else float(suggestion["quality_threshold"])
+        )
+        meta = suggestion.get("metadata", {})
+        criteria = list(quality_criteria or [])
+        if not criteria and goal:
+            criteria = [goal]
+        session = AgentLoopSession(
+            session_id=uuid.uuid4().hex[:12],
+            goal=goal,
+            task_type=task_type,
+            model_id=model_id,
+            quality_threshold=threshold,
+            suggested_budget=max(1, min(budget, MAX_STEPS)),
+            cost_weight=cost_weight,
+            confidence=float(meta.get("confidence", 0.0)),
+            total_observations=int(meta.get("total_observations", 0)),
+            evaluator_type=evaluator_type,
+            evaluator_kwargs=dict(evaluator_kwargs),
+            quality_criteria=criteria,
+            max_wall_ms=max_wall_ms,
+            max_tokens=max_tokens,
+        )
+        self._sessions[session.session_id] = session
+        logger.info(
+            "agent_loop_start",
+            session_id=session.session_id,
+            task_type=task_type,
+            suggested_budget=session.suggested_budget,
+            confidence=session.confidence,
+            evaluator_type=evaluator_type,
+        )
+        return session
+    def step(
+        self,
+        session_id: str,
+        score: float,
+        note: str = "",
+        step_output: str = "",
+        step_tokens: int = 0,
+    ) -> dict[str, Any]:
+        """Advance a session with a verified (or legacy) progress score."""
+        session = self._require(session_id)
+        if session.closed:
+            raise ValueError(f"Session already closed: {session_id}")
+        score = max(0.0, min(1.0, float(score)))
+        now = time.perf_counter()
+        session.latencies_ms.append((now - session.last_step_at) * 1000.0)
+        session.last_step_at = now
+        session.scores.append(score)
+        if note:
+            session.notes.append(note)
+        if step_output:
+            session.step_outputs.append(step_output)
+        if step_tokens > 0:
+            session.completion_tokens += step_tokens
+            session.prompt_tokens += max(step_tokens // 4, 1)
+        steps_used = len(session.scores)
+        expected_delta, uncertainty = self._priors.expected_improvement(
+            session.task_type, session.model_id, steps_used
+        )
+        decision, reason = self._decide(session, score, steps_used, step_output)
+        session.last_decision = decision
+        verdict: dict[str, Any] = {
+            "session_id": session.session_id,
+            "decision": decision,
+            "reason": reason,
+            "score": round(score, 4),
+            "steps_used": steps_used,
+            "suggested_budget": session.suggested_budget,
+            "quality_threshold": round(session.quality_threshold, 3),
+            "expected_delta": round(expected_delta, 4),
+            "uncertainty": round(uncertainty, 4),
+            "score_trajectory": [round(s, 4) for s in session.scores],
+        }
+        logger.debug(
+            "agent_loop_step",
+            session_id=session.session_id,
+            decision=decision,
+            steps_used=steps_used,
+        )
+        return verdict
+    def end(self, session_id: str, converged: bool | None = None) -> dict[str, Any]:
+        """Finalise a loop and learn from verified score trajectories."""
+        session = self._require(session_id)
+        if not session.closed:
+            if converged is None:
+                converged = bool(
+                    session.scores and session.scores[-1] >= session.quality_threshold
+                )
+            observation = CallObservation(
+                task_type=session.task_type,
+                model_id=session.model_id,
+                scores=list(session.scores),
+                latencies_ms=list(session.latencies_ms),
+                converged=converged,
+                total_iterations=len(session.scores),
+                max_iterations=session.suggested_budget,
+                quality_threshold=session.quality_threshold,
+                prompt_tokens=session.prompt_tokens,
+                completion_tokens=session.completion_tokens,
+            )
+            self._priors.observe(observation)
+            session.converged = converged
+            session.closed = True
+            logger.info(
+                "agent_loop_end",
+                session_id=session.session_id,
+                steps_run=len(session.scores),
+                converged=converged,
+            )
+        report = self._priors.report(session.task_type, session.model_id)
+        return {
+            "session_id": session.session_id,
+            "goal": session.goal,
+            "task_type": session.task_type,
+            "model_id": session.model_id,
+            "steps_run": len(session.scores),
+            "converged": session.converged,
+            "final_score": round(session.scores[-1], 4) if session.scores else 0.0,
+            "learned": {
+                "optimal_depth": report["optimal_depth"],
+                "converge_rate": report["converge_rate"],
+                "confidence": report["confidence"],
+                "total_observations": report["total_calls"],
+            },
+        }
+    def status(self, session_id: str) -> dict[str, Any]:
+        """Return the current state of an active session."""
+        session = self._require(session_id)
+        return {
+            "session_id": session.session_id,
+            "goal": session.goal,
+            "task_type": session.task_type,
+            "model_id": session.model_id,
+            "steps_used": len(session.scores),
+            "suggested_budget": session.suggested_budget,
+            "quality_threshold": round(session.quality_threshold, 3),
+            "score_trajectory": [round(s, 4) for s in session.scores],
+            "last_decision": session.last_decision,
+            "closed": session.closed,
+            "converged": session.converged,
+            "evaluator_type": session.evaluator_type,
+            "quality_criteria": session.quality_criteria,
+        }
+    def get_session(self, session_id: str) -> AgentLoopSession:
+        """Return the raw session object (for MCP CDV wiring)."""
+        return self._require(session_id)
+    def _decide(
+        self,
+        session: AgentLoopSession,
+        score: float,
+        steps_used: int,
+        step_output: str = "",
+    ) -> tuple[str, str]:
+        """Run guard stack; continue if no guard fires."""
+        ctx = GuardContext(
+            session=session,
+            iteration=steps_used,
+            current_score=score,
+            scores_so_far=list(session.scores),
+            step_output=step_output,
+        )
+        reason = self._guards.evaluate(ctx)
+        if reason is not None:
+            return "stop", reason.message
+        return "continue", (
+            f"Keep going: step {steps_used}/{session.suggested_budget}, "
+            f"score={score:.3f} below threshold {session.quality_threshold:.2f}"
+        )
+    def _require(self, session_id: str) -> AgentLoopSession:
+        if session_id not in self._sessions:
+            raise KeyError(f"Unknown agent-loop session: {session_id}")
+        return self._sessions[session_id]
+# Re-export for backward compatibility
+__all__ = [
+    "AgentLoopController",
+    "AgentLoopSession",
+    "CONVERGENCE_DELTA",
+    "MAX_STEPS",
+]