loopllm 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
loopllm/local_loop.py ADDED
@@ -0,0 +1,273 @@
1
+ """LocalModelLoop — closes the loop for local models (Ollama, llama.cpp, etc.).
2
+
3
+ Instead of the local model calling MCP tools itself, this module wraps any
4
+ local LLM call with a scoring middleware layer:
5
+
6
+ 1. Send prompt → local model → get output
7
+ 2. POST output to loopllm /score endpoint
8
+ 3. Receive score + weighted prompt rewrite
9
+ 4. If score < threshold, re-submit rewritten prompt to local model
10
+ 5. Repeat until score >= threshold or max_retries exhausted
11
+
12
+ The local model never needs to support tool-calling or MCP. loopllm acts
13
+ purely as a prompt optimizer and quality gate that sits between the caller
14
+ and the model.
15
+
16
+ Usage::
17
+
18
+ loop = LocalModelLoop(
19
+ base_url="http://localhost:11434",
20
+ model="llama3.2",
21
+ score_url="http://localhost:8765/score",
22
+ quality_threshold=0.80,
23
+ max_retries=3,
24
+ )
25
+ result = loop.run("Write a Python function to parse JSON safely.")
26
+ print(result.output)
27
+ print(f"Final score: {result.final_score}")
28
+ """
29
+ from __future__ import annotations
30
+
31
+ import time
32
+ from dataclasses import dataclass, field
33
+ from typing import Any, cast
34
+
35
+
36
+ @dataclass
37
+ class LoopIteration:
38
+ """Record of a single local-model loop iteration."""
39
+
40
+ iteration: int
41
+ prompt: str
42
+ output: str
43
+ score: float
44
+ passed: bool
45
+ deficiencies: list[str]
46
+ latency_ms: float
47
+ rewrite_used: bool = False
48
+
49
+
50
+ @dataclass
51
+ class LocalLoopResult:
52
+ """Final result from a LocalModelLoop run."""
53
+
54
+ output: str
55
+ final_score: float
56
+ best_score: float
57
+ total_iterations: int
58
+ converged: bool
59
+ iterations: list[LoopIteration] = field(default_factory=list)
60
+
61
+
62
+ class LocalModelLoop:
63
+ """Wraps any local HTTP LLM (Ollama-compatible) with loopllm scoring.
64
+
65
+ Args:
66
+ base_url: Base URL of the local model API (Ollama default: http://localhost:11434).
67
+ model: Model name (e.g. "llama3.2", "qwen2.5:0.5b").
68
+ score_url: URL of the loopllm score endpoint (loopllm serve default: http://localhost:8765/score).
69
+ quality_threshold: Minimum score to accept a response without retrying.
70
+ max_retries: Maximum number of retry iterations.
71
+ timeout: HTTP timeout in seconds for model calls.
72
+ prompt_weight: Weight of prompt score in weighted rewrite (0–1).
73
+ output_weight: Weight of output score in weighted rewrite (0–1).
74
+ """
75
+
76
+ def __init__(
77
+ self,
78
+ base_url: str = "http://localhost:11434",
79
+ model: str = "llama3.2",
80
+ score_url: str = "http://localhost:8765/score",
81
+ quality_threshold: float = 0.80,
82
+ max_retries: int = 3,
83
+ timeout: float = 60.0,
84
+ prompt_weight: float = 0.35,
85
+ output_weight: float = 0.65,
86
+ ) -> None:
87
+ self.base_url = base_url.rstrip("/")
88
+ self.model = model
89
+ self.score_url = score_url
90
+ self.quality_threshold = quality_threshold
91
+ self.max_retries = max_retries
92
+ self.timeout = timeout
93
+ self.prompt_weight = prompt_weight
94
+ self.output_weight = output_weight
95
+
96
+ # -- public API ----------------------------------------------------------
97
+
98
+ def run(
99
+ self,
100
+ prompt: str,
101
+ system: str | None = None,
102
+ evaluator_type: str = "length",
103
+ min_words: int = 5,
104
+ **kwargs: Any,
105
+ ) -> LocalLoopResult:
106
+ """Run prompt → score → rewrite → retry loop.
107
+
108
+ Args:
109
+ prompt: The initial user prompt.
110
+ system: Optional system message.
111
+ evaluator_type: Scoring evaluator type passed to loopllm ('length', 'json', 'regex').
112
+ min_words: Minimum word count evaluator argument.
113
+ **kwargs: Extra keyword args forwarded to the model API.
114
+
115
+ Returns:
116
+ :class:`LocalLoopResult` with the best output and scores.
117
+ """
118
+ current_prompt = prompt
119
+ iterations: list[LoopIteration] = []
120
+ best_output = ""
121
+ best_score = -1.0
122
+
123
+ for i in range(self.max_retries):
124
+ iter_start = time.perf_counter()
125
+ rewrite_used = i > 0
126
+
127
+ # 1. Call local model
128
+ output = self._call_model(current_prompt, system=system, **kwargs)
129
+ latency_ms = (time.perf_counter() - iter_start) * 1000.0
130
+
131
+ # 2. Score via loopllm
132
+ score_result = self._score(
133
+ prompt=current_prompt,
134
+ output=output,
135
+ evaluator_type=evaluator_type,
136
+ min_words=min_words,
137
+ )
138
+ score = score_result.get("output_score", 0.5)
139
+ deficiencies = score_result.get("deficiencies", [])
140
+ passed = score >= self.quality_threshold
141
+
142
+ record = LoopIteration(
143
+ iteration=i,
144
+ prompt=current_prompt,
145
+ output=output,
146
+ score=score,
147
+ passed=passed,
148
+ deficiencies=deficiencies,
149
+ latency_ms=latency_ms,
150
+ rewrite_used=rewrite_used,
151
+ )
152
+ iterations.append(record)
153
+
154
+ if score > best_score:
155
+ best_score = score
156
+ best_output = output
157
+
158
+ # 3. Accept if good enough
159
+ if passed:
160
+ break
161
+
162
+ # 4. Rewrite prompt with score-weighted feedback
163
+ current_prompt = self._rewrite_prompt(
164
+ original_prompt=prompt,
165
+ previous_output=output,
166
+ score=score,
167
+ deficiencies=deficiencies,
168
+ iteration=i + 1,
169
+ )
170
+
171
+ converged = best_score >= self.quality_threshold
172
+ return LocalLoopResult(
173
+ output=best_output,
174
+ final_score=iterations[-1].score if iterations else 0.0,
175
+ best_score=best_score,
176
+ total_iterations=len(iterations),
177
+ converged=converged,
178
+ iterations=iterations,
179
+ )
180
+
181
+ # -- private helpers -----------------------------------------------------
182
+
183
+ def _call_model(
184
+ self,
185
+ prompt: str,
186
+ system: str | None = None,
187
+ **kwargs: Any,
188
+ ) -> str:
189
+ """Send prompt to the local model and return the response text."""
190
+ try:
191
+ import httpx
192
+ except ImportError as e:
193
+ raise ImportError(
194
+ "httpx is required for LocalModelLoop. "
195
+ "Install with: pip install httpx"
196
+ ) from e
197
+
198
+ messages = []
199
+ if system:
200
+ messages.append({"role": "system", "content": system})
201
+ messages.append({"role": "user", "content": prompt})
202
+
203
+ resp = httpx.post(
204
+ f"{self.base_url}/api/chat",
205
+ json={"model": self.model, "messages": messages, "stream": False, **kwargs},
206
+ timeout=self.timeout,
207
+ )
208
+ resp.raise_for_status()
209
+ data = resp.json()
210
+ # Ollama /api/chat response
211
+ return str(data.get("message", {}).get("content", data.get("response", "")))
212
+
213
+ def _score(
214
+ self,
215
+ prompt: str,
216
+ output: str,
217
+ evaluator_type: str = "length",
218
+ min_words: int = 5,
219
+ ) -> dict[str, Any]:
220
+ """POST to loopllm /score and return the score dict."""
221
+ try:
222
+ import httpx
223
+ except ImportError as e:
224
+ raise ImportError(
225
+ "httpx is required for LocalModelLoop. "
226
+ "Install with: pip install httpx"
227
+ ) from e
228
+
229
+ try:
230
+ resp = httpx.post(
231
+ self.score_url,
232
+ json={
233
+ "prompt": prompt,
234
+ "output": output,
235
+ "evaluator_type": evaluator_type,
236
+ "min_words": min_words,
237
+ },
238
+ timeout=10.0,
239
+ )
240
+ resp.raise_for_status()
241
+ return cast(dict[str, Any], resp.json())
242
+ except Exception:
243
+ # If loopllm serve is unreachable, use a simple word-count fallback
244
+ words = len(output.split())
245
+ score = min(1.0, words / max(min_words, 1))
246
+ return {
247
+ "output_score": round(score, 3),
248
+ "deficiencies": [] if score >= self.quality_threshold else ["output too short"],
249
+ }
250
+
251
+ def _rewrite_prompt(
252
+ self,
253
+ original_prompt: str,
254
+ previous_output: str,
255
+ score: float,
256
+ deficiencies: list[str],
257
+ iteration: int,
258
+ ) -> str:
259
+ """Build a score-weighted prompt rewrite for the next iteration."""
260
+ deficiency_str = (
261
+ "\n".join(f" - {d}" for d in deficiencies)
262
+ if deficiencies
263
+ else " - Output did not meet quality threshold"
264
+ )
265
+ return (
266
+ f"[LOOPLLM | score={score:.2f} | retry={iteration}/{self.max_retries} | "
267
+ f"threshold={self.quality_threshold:.2f}]\n"
268
+ f"Your previous response scored {score:.2f}/1.0 and did not meet the quality bar.\n"
269
+ f"Issues to fix:\n{deficiency_str}\n\n"
270
+ f"Original task:\n{original_prompt}\n\n"
271
+ f"Previous response (do not repeat this):\n{previous_output[:500]}\n\n"
272
+ f"Please produce an improved response that addresses all issues listed above."
273
+ )