loopllm 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
loopllm/__init__.py ADDED
@@ -0,0 +1,69 @@
1
+ """loop-llm: iterative refinement engine for LLM applications."""
2
+ from __future__ import annotations
3
+
4
+ from loopllm.adaptive_exit import BayesianExitCondition
5
+ from loopllm.agent_loop import AgentLoopController, AgentLoopSession
6
+ from loopllm.elicitation import (
7
+ ClarifyingQuestion,
8
+ ElicitationSession,
9
+ IntentRefiner,
10
+ IntentSpec,
11
+ )
12
+ from loopllm.engine import (
13
+ CompositeEvaluator,
14
+ Evaluator,
15
+ EvaluationResult,
16
+ ExitConditionProtocol,
17
+ ExitReason,
18
+ IterationRecord,
19
+ LoopConfig,
20
+ LoopedLLM,
21
+ LoopMetrics,
22
+ RefinementResult,
23
+ )
24
+ from loopllm.guards import AgentLoopGuard, GuardContext, GuardStack
25
+ from loopllm.priors import AdaptivePriors, CallObservation
26
+ from loopllm.step_scorer import DualVerifyScore, conservative_dual_verify
27
+ from loopllm.store import LoopStore, SQLiteBackedPriors
28
+ from loopllm.tasks import Task, TaskOrchestrator, TaskPlan, TaskState
29
+
30
+ __version__ = "0.7.0"
31
+
32
+ __all__ = [
33
+ # Engine
34
+ "LoopedLLM",
35
+ "LoopConfig",
36
+ "EvaluationResult",
37
+ "ExitReason",
38
+ "IterationRecord",
39
+ "LoopMetrics",
40
+ "RefinementResult",
41
+ "CompositeEvaluator",
42
+ "Evaluator",
43
+ "ExitConditionProtocol",
44
+ # Priors
45
+ "AdaptivePriors",
46
+ "CallObservation",
47
+ "BayesianExitCondition",
48
+ # Agent loops
49
+ "AgentLoopController",
50
+ "AgentLoopSession",
51
+ "AgentLoopGuard",
52
+ "GuardContext",
53
+ "GuardStack",
54
+ "DualVerifyScore",
55
+ "conservative_dual_verify",
56
+ # Elicitation
57
+ "IntentRefiner",
58
+ "IntentSpec",
59
+ "ClarifyingQuestion",
60
+ "ElicitationSession",
61
+ # Store
62
+ "LoopStore",
63
+ "SQLiteBackedPriors",
64
+ # Tasks
65
+ "Task",
66
+ "TaskPlan",
67
+ "TaskState",
68
+ "TaskOrchestrator",
69
+ ]
loopllm/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Allow running loop-llm as ``python -m loopllm``."""
2
+ from loopllm.cli import main
3
+
4
+ if __name__ == "__main__":
5
+ main()
@@ -0,0 +1,78 @@
1
+ """Bayesian adaptive exit condition for the refinement loop."""
2
+ from __future__ import annotations
3
+
4
+ from dataclasses import dataclass
5
+
6
+ from loopllm.engine import ExitReason
7
+ from loopllm.priors import AdaptivePriors
8
+
9
+
10
+ @dataclass
11
+ class BayesianExitCondition:
12
+ """Exit condition that uses learned priors to decide when to stop looping.
13
+
14
+ Integrates with :class:`AdaptivePriors` to make statistically-informed
15
+ stopping decisions based on historical observations.
16
+
17
+ Attributes:
18
+ priors: The adaptive priors manager holding learned beliefs.
19
+ task_type: Identifier for the task class.
20
+ model_id: Identifier for the LLM model.
21
+ quality_threshold: Target quality level.
22
+ continue_probability_threshold: Minimum probability of improvement to continue.
23
+ min_iterations: Minimum iterations before this condition can fire.
24
+ """
25
+
26
+ priors: AdaptivePriors
27
+ task_type: str = "unknown"
28
+ model_id: str = "unknown"
29
+ quality_threshold: float = 0.8
30
+ continue_probability_threshold: float = 0.3
31
+ min_iterations: int = 1
32
+
33
+ def should_exit(
34
+ self,
35
+ iteration: int,
36
+ current_score: float,
37
+ scores_so_far: list[float],
38
+ ) -> ExitReason | None:
39
+ """Determine whether the loop should exit based on Bayesian analysis.
40
+
41
+ Args:
42
+ iteration: Current iteration number (1-based).
43
+ current_score: Score from the most recent evaluation.
44
+ scores_so_far: All scores observed so far in this run.
45
+
46
+ Returns:
47
+ An :class:`ExitReason` if the loop should stop, or ``None`` to continue.
48
+ """
49
+ if iteration < self.min_iterations:
50
+ return None
51
+
52
+ if not scores_so_far:
53
+ return None
54
+
55
+ should_go = self.priors.should_continue(
56
+ self.task_type,
57
+ self.model_id,
58
+ iteration,
59
+ current_score,
60
+ scores_so_far,
61
+ quality_threshold=self.quality_threshold,
62
+ )
63
+
64
+ if not should_go:
65
+ expected_delta, uncertainty = self.priors.expected_improvement(
66
+ self.task_type, self.model_id, iteration
67
+ )
68
+ return ExitReason(
69
+ condition="adaptive_bayesian",
70
+ message=(
71
+ f"Bayesian exit at iteration {iteration}: "
72
+ f"score={current_score:.3f}, "
73
+ f"E[delta]={expected_delta:.3f}±{uncertainty:.3f}, "
74
+ f"threshold={self.quality_threshold:.2f}"
75
+ ),
76
+ )
77
+
78
+ return None
loopllm/agent_loop.py ADDED
@@ -0,0 +1,299 @@
1
+ """Adaptive agent-loop control built on the Bayesian priors layer.
2
+
3
+ Agent loops use Conservative Dual-Verify (CDV) at the MCP boundary: step
4
+ artifacts are scored externally before entering this controller. The controller
5
+ applies a composable guard stack and learns optimal depth from verified score
6
+ trajectories.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import time
11
+ import uuid
12
+ from dataclasses import dataclass, field
13
+ from typing import Any
14
+
15
+ import structlog
16
+
17
+ from loopllm.guards import (
18
+ CONVERGENCE_DELTA,
19
+ MAX_STEPS_DEFAULT,
20
+ AgentLoopGuard,
21
+ GuardContext,
22
+ GuardStack,
23
+ default_guard_stack,
24
+ )
25
+ from loopllm.priors import AdaptivePriors, CallObservation
26
+
27
+ logger = structlog.get_logger(__name__)
28
+
29
+ MAX_STEPS = MAX_STEPS_DEFAULT
30
+
31
+
32
+ @dataclass
33
+ class AgentLoopSession:
34
+ """Mutable state for a single adaptive agent-loop run."""
35
+
36
+ session_id: str
37
+ goal: str
38
+ task_type: str
39
+ model_id: str
40
+ quality_threshold: float
41
+ suggested_budget: int
42
+ cost_weight: float = 0.5
43
+ confidence: float = 0.0
44
+ total_observations: int = 0
45
+ scores: list[float] = field(default_factory=list)
46
+ latencies_ms: list[float] = field(default_factory=list)
47
+ notes: list[str] = field(default_factory=list)
48
+ started_at: float = field(default_factory=time.perf_counter)
49
+ last_step_at: float = field(default_factory=time.perf_counter)
50
+ last_decision: str = "continue"
51
+ converged: bool | None = None
52
+ closed: bool = False
53
+ # CDV verifier recipe (configured at start)
54
+ evaluator_type: str = "composite"
55
+ evaluator_kwargs: dict[str, Any] = field(default_factory=dict)
56
+ quality_criteria: list[str] = field(default_factory=list)
57
+ max_wall_ms: float = 300_000.0
58
+ max_tokens: int = 0
59
+ step_outputs: list[str] = field(default_factory=list)
60
+ step_fingerprints: list[str] = field(default_factory=list)
61
+ prompt_tokens: int = 0
62
+ completion_tokens: int = 0
63
+
64
+
65
+ class AgentLoopController:
66
+ """Advises an agent's multi-step loop on when to stop, and learns.
67
+
68
+ Lifecycle: ``start`` → repeated ``step`` → ``end``.
69
+ """
70
+
71
+ def __init__(
72
+ self,
73
+ priors: AdaptivePriors,
74
+ guards: AgentLoopGuard | GuardStack | None = None,
75
+ max_steps: int = MAX_STEPS,
76
+ ) -> None:
77
+ self._priors = priors
78
+ self._sessions: dict[str, AgentLoopSession] = {}
79
+ if guards is None:
80
+ self._guards = default_guard_stack(priors, max_steps)
81
+ elif isinstance(guards, GuardStack):
82
+ self._guards = guards
83
+ else:
84
+ self._guards = GuardStack([guards])
85
+
86
+ def start(
87
+ self,
88
+ goal: str,
89
+ task_type: str = "general",
90
+ model_id: str = "unknown",
91
+ quality_threshold: float | None = None,
92
+ cost_weight: float = 0.5,
93
+ evaluator_type: str = "composite",
94
+ quality_criteria: list[str] | None = None,
95
+ max_wall_ms: float = 300_000.0,
96
+ max_tokens: int = 0,
97
+ **evaluator_kwargs: Any,
98
+ ) -> AgentLoopSession:
99
+ """Begin a new adaptive agent-loop session."""
100
+ suggestion = self._priors.suggest_config(task_type, model_id, cost_weight)
101
+ budget = int(suggestion["max_iterations"])
102
+ threshold = (
103
+ float(quality_threshold)
104
+ if quality_threshold is not None
105
+ else float(suggestion["quality_threshold"])
106
+ )
107
+ meta = suggestion.get("metadata", {})
108
+ criteria = list(quality_criteria or [])
109
+ if not criteria and goal:
110
+ criteria = [goal]
111
+
112
+ session = AgentLoopSession(
113
+ session_id=uuid.uuid4().hex[:12],
114
+ goal=goal,
115
+ task_type=task_type,
116
+ model_id=model_id,
117
+ quality_threshold=threshold,
118
+ suggested_budget=max(1, min(budget, MAX_STEPS)),
119
+ cost_weight=cost_weight,
120
+ confidence=float(meta.get("confidence", 0.0)),
121
+ total_observations=int(meta.get("total_observations", 0)),
122
+ evaluator_type=evaluator_type,
123
+ evaluator_kwargs=dict(evaluator_kwargs),
124
+ quality_criteria=criteria,
125
+ max_wall_ms=max_wall_ms,
126
+ max_tokens=max_tokens,
127
+ )
128
+ self._sessions[session.session_id] = session
129
+ logger.info(
130
+ "agent_loop_start",
131
+ session_id=session.session_id,
132
+ task_type=task_type,
133
+ suggested_budget=session.suggested_budget,
134
+ confidence=session.confidence,
135
+ evaluator_type=evaluator_type,
136
+ )
137
+ return session
138
+
139
+ def step(
140
+ self,
141
+ session_id: str,
142
+ score: float,
143
+ note: str = "",
144
+ step_output: str = "",
145
+ step_tokens: int = 0,
146
+ ) -> dict[str, Any]:
147
+ """Advance a session with a verified (or legacy) progress score."""
148
+ session = self._require(session_id)
149
+ if session.closed:
150
+ raise ValueError(f"Session already closed: {session_id}")
151
+
152
+ score = max(0.0, min(1.0, float(score)))
153
+ now = time.perf_counter()
154
+ session.latencies_ms.append((now - session.last_step_at) * 1000.0)
155
+ session.last_step_at = now
156
+ session.scores.append(score)
157
+ if note:
158
+ session.notes.append(note)
159
+ if step_output:
160
+ session.step_outputs.append(step_output)
161
+ if step_tokens > 0:
162
+ session.completion_tokens += step_tokens
163
+ session.prompt_tokens += max(step_tokens // 4, 1)
164
+
165
+ steps_used = len(session.scores)
166
+ expected_delta, uncertainty = self._priors.expected_improvement(
167
+ session.task_type, session.model_id, steps_used
168
+ )
169
+
170
+ decision, reason = self._decide(session, score, steps_used, step_output)
171
+ session.last_decision = decision
172
+
173
+ verdict: dict[str, Any] = {
174
+ "session_id": session.session_id,
175
+ "decision": decision,
176
+ "reason": reason,
177
+ "score": round(score, 4),
178
+ "steps_used": steps_used,
179
+ "suggested_budget": session.suggested_budget,
180
+ "quality_threshold": round(session.quality_threshold, 3),
181
+ "expected_delta": round(expected_delta, 4),
182
+ "uncertainty": round(uncertainty, 4),
183
+ "score_trajectory": [round(s, 4) for s in session.scores],
184
+ }
185
+ logger.debug(
186
+ "agent_loop_step",
187
+ session_id=session.session_id,
188
+ decision=decision,
189
+ steps_used=steps_used,
190
+ )
191
+ return verdict
192
+
193
+ def end(self, session_id: str, converged: bool | None = None) -> dict[str, Any]:
194
+ """Finalise a loop and learn from verified score trajectories."""
195
+ session = self._require(session_id)
196
+ if not session.closed:
197
+ if converged is None:
198
+ converged = bool(
199
+ session.scores and session.scores[-1] >= session.quality_threshold
200
+ )
201
+ observation = CallObservation(
202
+ task_type=session.task_type,
203
+ model_id=session.model_id,
204
+ scores=list(session.scores),
205
+ latencies_ms=list(session.latencies_ms),
206
+ converged=converged,
207
+ total_iterations=len(session.scores),
208
+ max_iterations=session.suggested_budget,
209
+ quality_threshold=session.quality_threshold,
210
+ prompt_tokens=session.prompt_tokens,
211
+ completion_tokens=session.completion_tokens,
212
+ )
213
+ self._priors.observe(observation)
214
+ session.converged = converged
215
+ session.closed = True
216
+ logger.info(
217
+ "agent_loop_end",
218
+ session_id=session.session_id,
219
+ steps_run=len(session.scores),
220
+ converged=converged,
221
+ )
222
+
223
+ report = self._priors.report(session.task_type, session.model_id)
224
+ return {
225
+ "session_id": session.session_id,
226
+ "goal": session.goal,
227
+ "task_type": session.task_type,
228
+ "model_id": session.model_id,
229
+ "steps_run": len(session.scores),
230
+ "converged": session.converged,
231
+ "final_score": round(session.scores[-1], 4) if session.scores else 0.0,
232
+ "learned": {
233
+ "optimal_depth": report["optimal_depth"],
234
+ "converge_rate": report["converge_rate"],
235
+ "confidence": report["confidence"],
236
+ "total_observations": report["total_calls"],
237
+ },
238
+ }
239
+
240
+ def status(self, session_id: str) -> dict[str, Any]:
241
+ """Return the current state of an active session."""
242
+ session = self._require(session_id)
243
+ return {
244
+ "session_id": session.session_id,
245
+ "goal": session.goal,
246
+ "task_type": session.task_type,
247
+ "model_id": session.model_id,
248
+ "steps_used": len(session.scores),
249
+ "suggested_budget": session.suggested_budget,
250
+ "quality_threshold": round(session.quality_threshold, 3),
251
+ "score_trajectory": [round(s, 4) for s in session.scores],
252
+ "last_decision": session.last_decision,
253
+ "closed": session.closed,
254
+ "converged": session.converged,
255
+ "evaluator_type": session.evaluator_type,
256
+ "quality_criteria": session.quality_criteria,
257
+ }
258
+
259
+ def get_session(self, session_id: str) -> AgentLoopSession:
260
+ """Return the raw session object (for MCP CDV wiring)."""
261
+ return self._require(session_id)
262
+
263
+ def _decide(
264
+ self,
265
+ session: AgentLoopSession,
266
+ score: float,
267
+ steps_used: int,
268
+ step_output: str = "",
269
+ ) -> tuple[str, str]:
270
+ """Run guard stack; continue if no guard fires."""
271
+ ctx = GuardContext(
272
+ session=session,
273
+ iteration=steps_used,
274
+ current_score=score,
275
+ scores_so_far=list(session.scores),
276
+ step_output=step_output,
277
+ )
278
+ reason = self._guards.evaluate(ctx)
279
+ if reason is not None:
280
+ return "stop", reason.message
281
+
282
+ return "continue", (
283
+ f"Keep going: step {steps_used}/{session.suggested_budget}, "
284
+ f"score={score:.3f} below threshold {session.quality_threshold:.2f}"
285
+ )
286
+
287
+ def _require(self, session_id: str) -> AgentLoopSession:
288
+ if session_id not in self._sessions:
289
+ raise KeyError(f"Unknown agent-loop session: {session_id}")
290
+ return self._sessions[session_id]
291
+
292
+
293
+ # Re-export for backward compatibility
294
+ __all__ = [
295
+ "AgentLoopController",
296
+ "AgentLoopSession",
297
+ "CONVERGENCE_DELTA",
298
+ "MAX_STEPS",
299
+ ]