loopllm 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,64 @@
1
+ """Agent passthrough provider.
2
+
3
+ Instead of calling an external LLM, this provider signals that the
4
+ calling agent (VS Code Copilot, Cursor, Claude, etc.) should perform
5
+ the generation itself. The MCP tools catch :class:`AgentExecutionRequired`
6
+ and return a structured ``agent_prompt`` payload—the connected IDE agent
7
+ then executes it directly.
8
+
9
+ This eliminates the Ollama / OpenRouter dependency entirely and lets the
10
+ tool use whatever frontier model the user already has active.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ from typing import Any
15
+
16
+ from loopllm.provider import LLMProvider, LLMResponse
17
+
18
+
19
+ class AgentExecutionRequired(Exception):
20
+ """Raised by :class:`AgentPassthroughProvider` instead of calling an LLM.
21
+
22
+ Attributes:
23
+ prompt: The prompt that should be executed by the calling agent.
24
+ model: The model hint passed by the caller (informational only).
25
+ kwargs: Any extra keyword arguments forwarded from the call site.
26
+ """
27
+
28
+ def __init__(self, prompt: str, model: str, **kwargs: Any) -> None:
29
+ super().__init__(f"Agent execution required for model={model!r}")
30
+ self.prompt = prompt
31
+ self.model = model
32
+ self.kwargs = kwargs
33
+
34
+
35
+ class AgentPassthroughProvider(LLMProvider):
36
+ """LLM provider that delegates generation to the calling IDE agent.
37
+
38
+ When :meth:`complete` is called it raises :class:`AgentExecutionRequired`
39
+ instead of contacting any external service. MCP tool implementations
40
+ catch this exception and return a structured ``agent_prompt`` response
41
+ that instructs the connected agent (Copilot / Claude / Cursor) to
42
+ perform the generation itself.
43
+
44
+ Usage::
45
+
46
+ loopllm mcp-server --provider agent
47
+ """
48
+
49
+ @property
50
+ def name(self) -> str:
51
+ return "agent"
52
+
53
+ def complete(self, prompt: str, model: str, **kwargs: Any) -> LLMResponse:
54
+ """Raise :class:`AgentExecutionRequired` — never calls a remote API.
55
+
56
+ Args:
57
+ prompt: The prompt to be executed by the calling agent.
58
+ model: Model hint (passed through to the exception).
59
+ **kwargs: Forwarded verbatim.
60
+
61
+ Raises:
62
+ AgentExecutionRequired: Always — callers must handle this.
63
+ """
64
+ raise AgentExecutionRequired(prompt, model, **kwargs)
@@ -0,0 +1,64 @@
1
+ """Mock LLM provider for testing."""
2
+ from __future__ import annotations
3
+
4
+ from dataclasses import dataclass, field
5
+ from typing import Any
6
+
7
+ from loopllm.provider import LLMProvider, LLMResponse, LLMUsage
8
+
9
+
10
+ @dataclass
11
+ class MockLLMProvider(LLMProvider):
12
+ """LLM provider that returns pre-configured responses. Ideal for testing.
13
+
14
+ Args:
15
+ responses: Ordered list of responses to cycle through.
16
+ default_score: Unused; kept for compatibility.
17
+ latency_ms: Simulated latency per call in milliseconds.
18
+ """
19
+
20
+ responses: list[str] | None = None
21
+ default_score: float = 0.9
22
+ latency_ms: float = 10.0
23
+ calls: list[dict[str, Any]] = field(default_factory=list, repr=False)
24
+ _index: int = field(default=0, repr=False)
25
+
26
+ @property
27
+ def name(self) -> str:
28
+ """Provider name."""
29
+ return "mock"
30
+
31
+ @property
32
+ def call_count(self) -> int:
33
+ """Number of calls made so far."""
34
+ return len(self.calls)
35
+
36
+ def complete(self, prompt: str, model: str, **kwargs: Any) -> LLMResponse:
37
+ """Return the next mock response.
38
+
39
+ Cycles through *responses* if provided, otherwise returns
40
+ ``"Mock response {n}"``.
41
+
42
+ Args:
43
+ prompt: The prompt (recorded but not used).
44
+ model: The model name (recorded but not used).
45
+ **kwargs: Extra keyword arguments (recorded).
46
+
47
+ Returns:
48
+ :class:`LLMResponse` with fake content and usage.
49
+ """
50
+ self.calls.append({"prompt": prompt, "model": model, **kwargs})
51
+
52
+ if self.responses:
53
+ content = self.responses[self._index % len(self.responses)]
54
+ else:
55
+ content = f"Mock response {self._index}"
56
+
57
+ self._index += 1
58
+
59
+ return LLMResponse(
60
+ content=content,
61
+ model=model,
62
+ usage=LLMUsage(prompt_tokens=10, completion_tokens=20, total_tokens=30),
63
+ latency_ms=self.latency_ms,
64
+ )
@@ -0,0 +1,95 @@
1
+ """Ollama LLM provider."""
2
+ from __future__ import annotations
3
+
4
+ import time
5
+ from dataclasses import dataclass
6
+
7
+ import structlog
8
+
9
+ from typing import Any
10
+
11
+ from loopllm.provider import LLMProvider, LLMResponse, LLMUsage
12
+
13
+ logger = structlog.get_logger(__name__)
14
+
15
+
16
+ @dataclass
17
+ class OllamaProvider(LLMProvider):
18
+ """LLM provider backed by a local Ollama instance.
19
+
20
+ Args:
21
+ base_url: Base URL for the Ollama API.
22
+ """
23
+
24
+ base_url: str = "http://localhost:11434"
25
+
26
+ @property
27
+ def name(self) -> str:
28
+ """Provider name."""
29
+ return "ollama"
30
+
31
+ def complete(self, prompt: str, model: str, **kwargs: Any) -> LLMResponse:
32
+ """Call the Ollama chat endpoint.
33
+
34
+ Args:
35
+ prompt: The user prompt to complete.
36
+ model: Ollama model name (e.g. ``llama3``).
37
+ **kwargs: Extra fields merged into the request body.
38
+
39
+ Returns:
40
+ Parsed :class:`LLMResponse` with content and latency.
41
+
42
+ Raises:
43
+ RuntimeError: If the API returns a non-200 status code.
44
+ ImportError: If ``httpx`` is not installed.
45
+ """
46
+ try:
47
+ import httpx
48
+ except ImportError as exc: # pragma: no cover
49
+ raise ImportError(
50
+ "httpx is required for OllamaProvider. "
51
+ "Install it with: pip install loopllm[ollama]"
52
+ ) from exc
53
+
54
+ payload = {
55
+ "model": model,
56
+ "messages": [{"role": "user", "content": prompt}],
57
+ "stream": False,
58
+ **kwargs,
59
+ }
60
+
61
+ t0 = time.perf_counter()
62
+ response = httpx.post(
63
+ f"{self.base_url}/api/chat",
64
+ json=payload,
65
+ timeout=120.0,
66
+ )
67
+ latency_ms = (time.perf_counter() - t0) * 1000.0
68
+
69
+ if response.status_code != 200:
70
+ raise RuntimeError(
71
+ f"Ollama returned {response.status_code}: {response.text}"
72
+ )
73
+
74
+ data = response.json()
75
+ content = data["message"]["content"]
76
+
77
+ usage_data = data.get("usage", {})
78
+ usage = LLMUsage(
79
+ prompt_tokens=usage_data.get("prompt_tokens", 0),
80
+ completion_tokens=usage_data.get("completion_tokens", 0),
81
+ total_tokens=usage_data.get("total_tokens", 0),
82
+ )
83
+
84
+ logger.debug(
85
+ "ollama_complete",
86
+ model=model,
87
+ latency_ms=round(latency_ms, 1),
88
+ )
89
+
90
+ return LLMResponse(
91
+ content=content,
92
+ model=model,
93
+ usage=usage,
94
+ latency_ms=latency_ms,
95
+ )
@@ -0,0 +1,101 @@
1
+ """OpenRouter LLM provider."""
2
+ from __future__ import annotations
3
+
4
+ import time
5
+ from dataclasses import dataclass
6
+
7
+ import structlog
8
+
9
+ from typing import Any
10
+
11
+ from loopllm.provider import LLMProvider, LLMResponse, LLMUsage
12
+
13
+ logger = structlog.get_logger(__name__)
14
+
15
+
16
+ @dataclass
17
+ class OpenRouterProvider(LLMProvider):
18
+ """LLM provider backed by the OpenRouter API.
19
+
20
+ Args:
21
+ api_key: OpenRouter API key.
22
+ base_url: Base URL for the OpenRouter API.
23
+ """
24
+
25
+ api_key: str
26
+ base_url: str = "https://openrouter.ai/api/v1"
27
+
28
+ @property
29
+ def name(self) -> str:
30
+ """Provider name."""
31
+ return "openrouter"
32
+
33
+ def complete(self, prompt: str, model: str, **kwargs: Any) -> LLMResponse:
34
+ """Call the OpenRouter chat completions endpoint.
35
+
36
+ Args:
37
+ prompt: The user prompt to complete.
38
+ model: OpenRouter model identifier (e.g. ``openai/gpt-4o-mini``).
39
+ **kwargs: Extra fields merged into the request body.
40
+
41
+ Returns:
42
+ Parsed :class:`LLMResponse` with content, usage, and latency.
43
+
44
+ Raises:
45
+ RuntimeError: If the API returns a non-200 status code.
46
+ ImportError: If ``httpx`` is not installed.
47
+ """
48
+ try:
49
+ import httpx
50
+ except ImportError as exc: # pragma: no cover
51
+ raise ImportError(
52
+ "httpx is required for OpenRouterProvider. "
53
+ "Install it with: pip install loopllm[openrouter]"
54
+ ) from exc
55
+
56
+ headers = {
57
+ "Authorization": f"Bearer {self.api_key}",
58
+ "Content-Type": "application/json",
59
+ "HTTP-Referer": "https://github.com/azank1/loop-llm",
60
+ }
61
+ payload = {
62
+ "model": model,
63
+ "messages": [{"role": "user", "content": prompt}],
64
+ **kwargs,
65
+ }
66
+
67
+ t0 = time.perf_counter()
68
+ response = httpx.post(
69
+ f"{self.base_url}/chat/completions",
70
+ headers=headers,
71
+ json=payload,
72
+ )
73
+ latency_ms = (time.perf_counter() - t0) * 1000.0
74
+
75
+ if response.status_code != 200:
76
+ raise RuntimeError(
77
+ f"OpenRouter returned {response.status_code}: {response.text}"
78
+ )
79
+
80
+ data = response.json()
81
+ content = data["choices"][0]["message"]["content"]
82
+ usage_data = data.get("usage", {})
83
+ usage = LLMUsage(
84
+ prompt_tokens=usage_data.get("prompt_tokens", 0),
85
+ completion_tokens=usage_data.get("completion_tokens", 0),
86
+ total_tokens=usage_data.get("total_tokens", 0),
87
+ )
88
+
89
+ logger.debug(
90
+ "openrouter_complete",
91
+ model=model,
92
+ latency_ms=round(latency_ms, 1),
93
+ total_tokens=usage.total_tokens,
94
+ )
95
+
96
+ return LLMResponse(
97
+ content=content,
98
+ model=model,
99
+ usage=usage,
100
+ latency_ms=latency_ms,
101
+ )
loopllm/serve.py ADDED
@@ -0,0 +1,297 @@
1
+ """REST API server exposing loopllm scoring to local models.
2
+
3
+ Starts a lightweight HTTP server (FastAPI + uvicorn) that exposes:
4
+
5
+ POST /score — score a prompt+output pair, return quality metrics
6
+ POST /rewrite — score + return a rewritten prompt if below threshold
7
+ GET /intercept — run loopllm_intercept on a prompt
8
+ POST /plan/register — create a new plan in the PlanRegistry
9
+ POST /plan/update — update task scores and get confidence status
10
+ GET /plan/{plan_id} — get full plan status
11
+ GET /health — health check
12
+
13
+ This is the bridge that lets local models (Ollama, llama.cpp, LM Studio)
14
+ use loopllm as a scoring middleware without needing MCP tool-calling support.
15
+
16
+ Usage::
17
+
18
+ loopllm serve --host 0.0.0.0 --port 8765
19
+ """
20
+ from __future__ import annotations
21
+
22
+ import json
23
+ from typing import Any
24
+
25
+ from loopllm.mcp_server import (
26
+ _init_state,
27
+ _score_prompt_quality,
28
+ _build_evaluator,
29
+ _tool_intercept,
30
+ )
31
+ from loopllm.plan_registry import get_registry
32
+
33
+
34
+ # ---------------------------------------------------------------------------
35
+ # Request / response models (Pydantic, only imported when FastAPI available)
36
+ # ---------------------------------------------------------------------------
37
+
38
+
39
+ def _get_app() -> Any:
40
+ """Build and return the FastAPI application.
41
+
42
+ Deferred import so the rest of the package doesn't require FastAPI.
43
+ """
44
+ try:
45
+ from fastapi import FastAPI, HTTPException
46
+ from fastapi.responses import JSONResponse
47
+ from pydantic import BaseModel
48
+ except ImportError as exc:
49
+ raise ImportError(
50
+ "FastAPI and uvicorn are required for `loopllm serve`.\n"
51
+ "Install with: pip install loopllm[serve]"
52
+ ) from exc
53
+
54
+ _init_state()
55
+ app = FastAPI(
56
+ title="loopllm scoring API",
57
+ description=(
58
+ "Quality scoring and prompt optimization middleware for local LLMs. "
59
+ "POST your prompt+output to /score to get quality metrics and a "
60
+ "rewritten prompt if needed."
61
+ ),
62
+ version="0.5.0",
63
+ )
64
+
65
+ # -- Pydantic models -------------------------------------------------------
66
+
67
+ class ScoreRequest(BaseModel):
68
+ prompt: str
69
+ output: str
70
+ evaluator_type: str = "length"
71
+ min_words: int = 5
72
+ max_words: int = 10_000
73
+ required_fields: list[str] = []
74
+ required_patterns: list[str] = []
75
+ quality_threshold: float = 0.80
76
+
77
+ class RewriteRequest(BaseModel):
78
+ prompt: str
79
+ output: str
80
+ iteration: int = 0
81
+ max_retries: int = 3
82
+ evaluator_type: str = "length"
83
+ min_words: int = 5
84
+ max_words: int = 10_000
85
+ quality_threshold: float = 0.80
86
+
87
+ class InterceptRequest(BaseModel):
88
+ prompt: str
89
+
90
+ class PlanRegisterRequest(BaseModel):
91
+ goal: str
92
+ tasks: list[dict[str, Any]]
93
+ confidence_threshold: float = 0.72
94
+
95
+ class PlanUpdateRequest(BaseModel):
96
+ plan_id: str
97
+ task_id: str
98
+ prompt_score: float | None = None
99
+ output_score: float | None = None
100
+ mark_done: bool = True
101
+
102
+ class PlanNextRequest(BaseModel):
103
+ plan_id: str
104
+
105
+ # -- Endpoints -------------------------------------------------------------
106
+
107
+ @app.get("/health")
108
+ def health() -> dict[str, str]:
109
+ return {"status": "ok", "service": "loopllm"}
110
+
111
+ @app.post("/score")
112
+ def score(req: ScoreRequest) -> JSONResponse:
113
+ """Score a prompt+output pair.
114
+
115
+ Returns prompt_score, output_score, combined_score, passed,
116
+ deficiencies, and grade.
117
+ """
118
+ # Prompt quality (heuristic)
119
+ prompt_quality = _score_prompt_quality(req.prompt)
120
+ prompt_score = prompt_quality["quality_score"]
121
+
122
+ # Output quality (evaluator)
123
+ evaluator = _build_evaluator(
124
+ req.evaluator_type,
125
+ min_words=req.min_words,
126
+ max_words=req.max_words,
127
+ required_fields=req.required_fields,
128
+ required_patterns=req.required_patterns,
129
+ )
130
+ eval_result = evaluator.evaluate(req.output)
131
+ output_score = eval_result.score
132
+
133
+ # Combined (prompt has lower weight — see PlanRegistry)
134
+ combined = prompt_score * 0.35 + output_score * 0.65
135
+ passed = combined >= req.quality_threshold
136
+
137
+ return JSONResponse({
138
+ "prompt_score": round(prompt_score, 4),
139
+ "output_score": round(output_score, 4),
140
+ "combined_score": round(combined, 4),
141
+ "passed": passed,
142
+ "quality_threshold": req.quality_threshold,
143
+ "deficiencies": eval_result.deficiencies,
144
+ "prompt_grade": prompt_quality["grade"],
145
+ "prompt_gauge": prompt_quality["gauge"],
146
+ "prompt_issues": prompt_quality["issues"],
147
+ "prompt_suggestions": prompt_quality["suggestions"],
148
+ })
149
+
150
+ @app.post("/rewrite")
151
+ def rewrite(req: RewriteRequest) -> JSONResponse:
152
+ """Score output and return a rewritten prompt if quality is below threshold.
153
+
154
+ If passed=True the response also contains rewritten_prompt=null —
155
+ meaning no retry is needed.
156
+ """
157
+ prompt_quality = _score_prompt_quality(req.prompt)
158
+ prompt_score = prompt_quality["quality_score"]
159
+
160
+ evaluator = _build_evaluator(
161
+ req.evaluator_type,
162
+ min_words=req.min_words,
163
+ max_words=req.max_words,
164
+ )
165
+ eval_result = evaluator.evaluate(req.output)
166
+ output_score = eval_result.score
167
+ combined = prompt_score * 0.35 + output_score * 0.65
168
+ passed = combined >= req.quality_threshold
169
+
170
+ rewritten: str | None = None
171
+ if not passed and req.iteration < req.max_retries:
172
+ deficiency_str = (
173
+ "\n".join(f" - {d}" for d in eval_result.deficiencies)
174
+ if eval_result.deficiencies
175
+ else " - Output did not meet quality threshold"
176
+ )
177
+ rewritten = (
178
+ f"[LOOPLLM | score={combined:.2f} | "
179
+ f"retry={req.iteration + 1}/{req.max_retries} | "
180
+ f"threshold={req.quality_threshold:.2f}]\n"
181
+ f"Your previous response scored {combined:.2f}/1.0.\n"
182
+ f"Issues to fix:\n{deficiency_str}\n\n"
183
+ f"Original task:\n{req.prompt}\n\n"
184
+ f"Previous response (do not repeat):\n{req.output[:500]}\n\n"
185
+ f"Please produce an improved response that addresses all issues."
186
+ )
187
+
188
+ return JSONResponse({
189
+ "prompt_score": round(prompt_score, 4),
190
+ "output_score": round(output_score, 4),
191
+ "combined_score": round(combined, 4),
192
+ "passed": passed,
193
+ "quality_threshold": req.quality_threshold,
194
+ "deficiencies": eval_result.deficiencies,
195
+ "rewritten_prompt": rewritten,
196
+ "should_retry": not passed and req.iteration < req.max_retries,
197
+ "iteration": req.iteration,
198
+ })
199
+
200
+ @app.post("/intercept")
201
+ def intercept(req: InterceptRequest) -> JSONResponse:
202
+ """Run loopllm_intercept on a prompt (same as the MCP tool)."""
203
+ result = _tool_intercept(req.prompt)
204
+ return JSONResponse(json.loads(result))
205
+
206
+ # -- Plan endpoints --------------------------------------------------------
207
+
208
+ @app.post("/plan/register")
209
+ def plan_register(req: PlanRegisterRequest) -> JSONResponse:
210
+ """Create a new plan in the PlanRegistry."""
211
+ registry = get_registry()
212
+ plan = registry.create(
213
+ goal=req.goal,
214
+ tasks=req.tasks,
215
+ confidence_threshold=req.confidence_threshold,
216
+ )
217
+ return JSONResponse(plan.to_dict())
218
+
219
+ @app.post("/plan/update")
220
+ def plan_update(req: PlanUpdateRequest) -> JSONResponse:
221
+ """Score a task's prompt and/or output and get updated plan confidence."""
222
+ registry = get_registry()
223
+ result: dict[str, Any] = {}
224
+
225
+ if req.prompt_score is not None:
226
+ result = registry.score_prompt(req.plan_id, req.task_id, req.prompt_score)
227
+ if "error" in result:
228
+ raise HTTPException(status_code=404, detail=result["error"])
229
+
230
+ if req.output_score is not None:
231
+ result = registry.score_output(
232
+ req.plan_id, req.task_id, req.output_score, mark_done=req.mark_done
233
+ )
234
+ if "error" in result:
235
+ raise HTTPException(status_code=404, detail=result["error"])
236
+
237
+ if not result:
238
+ result = registry.get_status(req.plan_id)
239
+ if "error" in result:
240
+ raise HTTPException(status_code=404, detail=result["error"])
241
+
242
+ return JSONResponse(result)
243
+
244
+ @app.get("/plan/{plan_id}")
245
+ def plan_status(plan_id: str) -> JSONResponse:
246
+ """Get the current status and rolling confidence of a plan."""
247
+ registry = get_registry()
248
+ result = registry.get_status(plan_id)
249
+ if "error" in result:
250
+ raise HTTPException(status_code=404, detail=result["error"])
251
+ return JSONResponse(result)
252
+
253
+ @app.post("/plan/next")
254
+ def plan_next(req: PlanNextRequest) -> JSONResponse:
255
+ """Get and activate the next pending task in a plan."""
256
+ registry = get_registry()
257
+ task = registry.next_task(req.plan_id)
258
+ if task is None:
259
+ return JSONResponse({"done": True, "plan_id": req.plan_id})
260
+ return JSONResponse({**task, "done": False})
261
+
262
+ @app.get("/plan")
263
+ def list_plans() -> JSONResponse:
264
+ """List all active plans."""
265
+ registry = get_registry()
266
+ return JSONResponse({"plans": registry.list_plans()})
267
+
268
+ return app
269
+
270
+
271
+ def run_server(host: str = "127.0.0.1", port: int = 8765, reload: bool = False) -> None:
272
+ """Start the loopllm scoring REST server.
273
+
274
+ Args:
275
+ host: Bind address.
276
+ port: Port to listen on.
277
+ reload: Enable auto-reload (development only).
278
+ """
279
+ try:
280
+ import uvicorn
281
+ except ImportError as exc:
282
+ raise ImportError(
283
+ "uvicorn is required for `loopllm serve`.\n"
284
+ "Install with: pip install loopllm[serve]"
285
+ ) from exc
286
+
287
+ # Build the app once to surface import errors before uvicorn starts
288
+ _get_app()
289
+
290
+ uvicorn.run(
291
+ "loopllm.serve:_get_app",
292
+ factory=True,
293
+ host=host,
294
+ port=port,
295
+ reload=reload,
296
+ log_level="info",
297
+ )