loopllm 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loopllm/__init__.py +69 -0
- loopllm/__main__.py +5 -0
- loopllm/adaptive_exit.py +78 -0
- loopllm/agent_loop.py +299 -0
- loopllm/cli.py +521 -0
- loopllm/elicitation.py +519 -0
- loopllm/engine.py +376 -0
- loopllm/evaluator_factory.py +72 -0
- loopllm/evaluators.py +419 -0
- loopllm/guards.py +254 -0
- loopllm/local_loop.py +273 -0
- loopllm/mcp_server.py +2657 -0
- loopllm/plan_registry.py +412 -0
- loopllm/priors.py +604 -0
- loopllm/provider.py +51 -0
- loopllm/providers/__init__.py +15 -0
- loopllm/providers/agent.py +64 -0
- loopllm/providers/mock.py +64 -0
- loopllm/providers/ollama.py +95 -0
- loopllm/providers/openrouter.py +101 -0
- loopllm/serve.py +297 -0
- loopllm/step_scorer.py +190 -0
- loopllm/store.py +1126 -0
- loopllm/tasks.py +599 -0
- loopllm-0.7.0.dist-info/METADATA +454 -0
- loopllm-0.7.0.dist-info/RECORD +29 -0
- loopllm-0.7.0.dist-info/WHEEL +4 -0
- loopllm-0.7.0.dist-info/entry_points.txt +3 -0
- loopllm-0.7.0.dist-info/licenses/LICENSE +21 -0
loopllm/local_loop.py
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
"""LocalModelLoop — closes the loop for local models (Ollama, llama.cpp, etc.).
|
|
2
|
+
|
|
3
|
+
Instead of the local model calling MCP tools itself, this module wraps any
|
|
4
|
+
local LLM call with a scoring middleware layer:
|
|
5
|
+
|
|
6
|
+
1. Send prompt → local model → get output
|
|
7
|
+
2. POST output to loopllm /score endpoint
|
|
8
|
+
3. Receive score + weighted prompt rewrite
|
|
9
|
+
4. If score < threshold, re-submit rewritten prompt to local model
|
|
10
|
+
5. Repeat until score >= threshold or max_retries exhausted
|
|
11
|
+
|
|
12
|
+
The local model never needs to support tool-calling or MCP. loopllm acts
|
|
13
|
+
purely as a prompt optimizer and quality gate that sits between the caller
|
|
14
|
+
and the model.
|
|
15
|
+
|
|
16
|
+
Usage::
|
|
17
|
+
|
|
18
|
+
loop = LocalModelLoop(
|
|
19
|
+
base_url="http://localhost:11434",
|
|
20
|
+
model="llama3.2",
|
|
21
|
+
score_url="http://localhost:8765/score",
|
|
22
|
+
quality_threshold=0.80,
|
|
23
|
+
max_retries=3,
|
|
24
|
+
)
|
|
25
|
+
result = loop.run("Write a Python function to parse JSON safely.")
|
|
26
|
+
print(result.output)
|
|
27
|
+
print(f"Final score: {result.final_score}")
|
|
28
|
+
"""
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
import time
|
|
32
|
+
from dataclasses import dataclass, field
|
|
33
|
+
from typing import Any, cast
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class LoopIteration:
|
|
38
|
+
"""Record of a single local-model loop iteration."""
|
|
39
|
+
|
|
40
|
+
iteration: int
|
|
41
|
+
prompt: str
|
|
42
|
+
output: str
|
|
43
|
+
score: float
|
|
44
|
+
passed: bool
|
|
45
|
+
deficiencies: list[str]
|
|
46
|
+
latency_ms: float
|
|
47
|
+
rewrite_used: bool = False
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class LocalLoopResult:
|
|
52
|
+
"""Final result from a LocalModelLoop run."""
|
|
53
|
+
|
|
54
|
+
output: str
|
|
55
|
+
final_score: float
|
|
56
|
+
best_score: float
|
|
57
|
+
total_iterations: int
|
|
58
|
+
converged: bool
|
|
59
|
+
iterations: list[LoopIteration] = field(default_factory=list)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class LocalModelLoop:
|
|
63
|
+
"""Wraps any local HTTP LLM (Ollama-compatible) with loopllm scoring.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
base_url: Base URL of the local model API (Ollama default: http://localhost:11434).
|
|
67
|
+
model: Model name (e.g. "llama3.2", "qwen2.5:0.5b").
|
|
68
|
+
score_url: URL of the loopllm score endpoint (loopllm serve default: http://localhost:8765/score).
|
|
69
|
+
quality_threshold: Minimum score to accept a response without retrying.
|
|
70
|
+
max_retries: Maximum number of retry iterations.
|
|
71
|
+
timeout: HTTP timeout in seconds for model calls.
|
|
72
|
+
prompt_weight: Weight of prompt score in weighted rewrite (0–1).
|
|
73
|
+
output_weight: Weight of output score in weighted rewrite (0–1).
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def __init__(
|
|
77
|
+
self,
|
|
78
|
+
base_url: str = "http://localhost:11434",
|
|
79
|
+
model: str = "llama3.2",
|
|
80
|
+
score_url: str = "http://localhost:8765/score",
|
|
81
|
+
quality_threshold: float = 0.80,
|
|
82
|
+
max_retries: int = 3,
|
|
83
|
+
timeout: float = 60.0,
|
|
84
|
+
prompt_weight: float = 0.35,
|
|
85
|
+
output_weight: float = 0.65,
|
|
86
|
+
) -> None:
|
|
87
|
+
self.base_url = base_url.rstrip("/")
|
|
88
|
+
self.model = model
|
|
89
|
+
self.score_url = score_url
|
|
90
|
+
self.quality_threshold = quality_threshold
|
|
91
|
+
self.max_retries = max_retries
|
|
92
|
+
self.timeout = timeout
|
|
93
|
+
self.prompt_weight = prompt_weight
|
|
94
|
+
self.output_weight = output_weight
|
|
95
|
+
|
|
96
|
+
# -- public API ----------------------------------------------------------
|
|
97
|
+
|
|
98
|
+
def run(
|
|
99
|
+
self,
|
|
100
|
+
prompt: str,
|
|
101
|
+
system: str | None = None,
|
|
102
|
+
evaluator_type: str = "length",
|
|
103
|
+
min_words: int = 5,
|
|
104
|
+
**kwargs: Any,
|
|
105
|
+
) -> LocalLoopResult:
|
|
106
|
+
"""Run prompt → score → rewrite → retry loop.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
prompt: The initial user prompt.
|
|
110
|
+
system: Optional system message.
|
|
111
|
+
evaluator_type: Scoring evaluator type passed to loopllm ('length', 'json', 'regex').
|
|
112
|
+
min_words: Minimum word count evaluator argument.
|
|
113
|
+
**kwargs: Extra keyword args forwarded to the model API.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
:class:`LocalLoopResult` with the best output and scores.
|
|
117
|
+
"""
|
|
118
|
+
current_prompt = prompt
|
|
119
|
+
iterations: list[LoopIteration] = []
|
|
120
|
+
best_output = ""
|
|
121
|
+
best_score = -1.0
|
|
122
|
+
|
|
123
|
+
for i in range(self.max_retries):
|
|
124
|
+
iter_start = time.perf_counter()
|
|
125
|
+
rewrite_used = i > 0
|
|
126
|
+
|
|
127
|
+
# 1. Call local model
|
|
128
|
+
output = self._call_model(current_prompt, system=system, **kwargs)
|
|
129
|
+
latency_ms = (time.perf_counter() - iter_start) * 1000.0
|
|
130
|
+
|
|
131
|
+
# 2. Score via loopllm
|
|
132
|
+
score_result = self._score(
|
|
133
|
+
prompt=current_prompt,
|
|
134
|
+
output=output,
|
|
135
|
+
evaluator_type=evaluator_type,
|
|
136
|
+
min_words=min_words,
|
|
137
|
+
)
|
|
138
|
+
score = score_result.get("output_score", 0.5)
|
|
139
|
+
deficiencies = score_result.get("deficiencies", [])
|
|
140
|
+
passed = score >= self.quality_threshold
|
|
141
|
+
|
|
142
|
+
record = LoopIteration(
|
|
143
|
+
iteration=i,
|
|
144
|
+
prompt=current_prompt,
|
|
145
|
+
output=output,
|
|
146
|
+
score=score,
|
|
147
|
+
passed=passed,
|
|
148
|
+
deficiencies=deficiencies,
|
|
149
|
+
latency_ms=latency_ms,
|
|
150
|
+
rewrite_used=rewrite_used,
|
|
151
|
+
)
|
|
152
|
+
iterations.append(record)
|
|
153
|
+
|
|
154
|
+
if score > best_score:
|
|
155
|
+
best_score = score
|
|
156
|
+
best_output = output
|
|
157
|
+
|
|
158
|
+
# 3. Accept if good enough
|
|
159
|
+
if passed:
|
|
160
|
+
break
|
|
161
|
+
|
|
162
|
+
# 4. Rewrite prompt with score-weighted feedback
|
|
163
|
+
current_prompt = self._rewrite_prompt(
|
|
164
|
+
original_prompt=prompt,
|
|
165
|
+
previous_output=output,
|
|
166
|
+
score=score,
|
|
167
|
+
deficiencies=deficiencies,
|
|
168
|
+
iteration=i + 1,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
converged = best_score >= self.quality_threshold
|
|
172
|
+
return LocalLoopResult(
|
|
173
|
+
output=best_output,
|
|
174
|
+
final_score=iterations[-1].score if iterations else 0.0,
|
|
175
|
+
best_score=best_score,
|
|
176
|
+
total_iterations=len(iterations),
|
|
177
|
+
converged=converged,
|
|
178
|
+
iterations=iterations,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# -- private helpers -----------------------------------------------------
|
|
182
|
+
|
|
183
|
+
def _call_model(
|
|
184
|
+
self,
|
|
185
|
+
prompt: str,
|
|
186
|
+
system: str | None = None,
|
|
187
|
+
**kwargs: Any,
|
|
188
|
+
) -> str:
|
|
189
|
+
"""Send prompt to the local model and return the response text."""
|
|
190
|
+
try:
|
|
191
|
+
import httpx
|
|
192
|
+
except ImportError as e:
|
|
193
|
+
raise ImportError(
|
|
194
|
+
"httpx is required for LocalModelLoop. "
|
|
195
|
+
"Install with: pip install httpx"
|
|
196
|
+
) from e
|
|
197
|
+
|
|
198
|
+
messages = []
|
|
199
|
+
if system:
|
|
200
|
+
messages.append({"role": "system", "content": system})
|
|
201
|
+
messages.append({"role": "user", "content": prompt})
|
|
202
|
+
|
|
203
|
+
resp = httpx.post(
|
|
204
|
+
f"{self.base_url}/api/chat",
|
|
205
|
+
json={"model": self.model, "messages": messages, "stream": False, **kwargs},
|
|
206
|
+
timeout=self.timeout,
|
|
207
|
+
)
|
|
208
|
+
resp.raise_for_status()
|
|
209
|
+
data = resp.json()
|
|
210
|
+
# Ollama /api/chat response
|
|
211
|
+
return str(data.get("message", {}).get("content", data.get("response", "")))
|
|
212
|
+
|
|
213
|
+
def _score(
|
|
214
|
+
self,
|
|
215
|
+
prompt: str,
|
|
216
|
+
output: str,
|
|
217
|
+
evaluator_type: str = "length",
|
|
218
|
+
min_words: int = 5,
|
|
219
|
+
) -> dict[str, Any]:
|
|
220
|
+
"""POST to loopllm /score and return the score dict."""
|
|
221
|
+
try:
|
|
222
|
+
import httpx
|
|
223
|
+
except ImportError as e:
|
|
224
|
+
raise ImportError(
|
|
225
|
+
"httpx is required for LocalModelLoop. "
|
|
226
|
+
"Install with: pip install httpx"
|
|
227
|
+
) from e
|
|
228
|
+
|
|
229
|
+
try:
|
|
230
|
+
resp = httpx.post(
|
|
231
|
+
self.score_url,
|
|
232
|
+
json={
|
|
233
|
+
"prompt": prompt,
|
|
234
|
+
"output": output,
|
|
235
|
+
"evaluator_type": evaluator_type,
|
|
236
|
+
"min_words": min_words,
|
|
237
|
+
},
|
|
238
|
+
timeout=10.0,
|
|
239
|
+
)
|
|
240
|
+
resp.raise_for_status()
|
|
241
|
+
return cast(dict[str, Any], resp.json())
|
|
242
|
+
except Exception:
|
|
243
|
+
# If loopllm serve is unreachable, use a simple word-count fallback
|
|
244
|
+
words = len(output.split())
|
|
245
|
+
score = min(1.0, words / max(min_words, 1))
|
|
246
|
+
return {
|
|
247
|
+
"output_score": round(score, 3),
|
|
248
|
+
"deficiencies": [] if score >= self.quality_threshold else ["output too short"],
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
def _rewrite_prompt(
|
|
252
|
+
self,
|
|
253
|
+
original_prompt: str,
|
|
254
|
+
previous_output: str,
|
|
255
|
+
score: float,
|
|
256
|
+
deficiencies: list[str],
|
|
257
|
+
iteration: int,
|
|
258
|
+
) -> str:
|
|
259
|
+
"""Build a score-weighted prompt rewrite for the next iteration."""
|
|
260
|
+
deficiency_str = (
|
|
261
|
+
"\n".join(f" - {d}" for d in deficiencies)
|
|
262
|
+
if deficiencies
|
|
263
|
+
else " - Output did not meet quality threshold"
|
|
264
|
+
)
|
|
265
|
+
return (
|
|
266
|
+
f"[LOOPLLM | score={score:.2f} | retry={iteration}/{self.max_retries} | "
|
|
267
|
+
f"threshold={self.quality_threshold:.2f}]\n"
|
|
268
|
+
f"Your previous response scored {score:.2f}/1.0 and did not meet the quality bar.\n"
|
|
269
|
+
f"Issues to fix:\n{deficiency_str}\n\n"
|
|
270
|
+
f"Original task:\n{original_prompt}\n\n"
|
|
271
|
+
f"Previous response (do not repeat this):\n{previous_output[:500]}\n\n"
|
|
272
|
+
f"Please produce an improved response that addresses all issues listed above."
|
|
273
|
+
)
|