litesquad 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
litesquad/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ from .cli import app
2
+
3
+
4
+ def main() -> None:
5
+ app()
@@ -0,0 +1,57 @@
1
+ """Dead-simple check that each provider API key is valid and active.
2
+
3
+ Run: uv run litesquad-keys (or: uv run python -m litesquad.check_keys)
4
+
5
+ Pings one cheap model per provider with a 1-token request and prints a
6
+ checkmark per key. Independent of the squad config, so it tests the key
7
+ itself, not whichever model your squad happens to use.
8
+ """
9
+
10
+ import os
11
+
12
+ import litellm
13
+ from rich.console import Console
14
+
15
+ from .llm import load_env # loads .env with override=True; importing also quiets litellm
16
+
17
+ console = Console()
18
+
19
+ # One cheap, broadly-available probe model per provider key.
20
+ PROBES = {
21
+ "ANTHROPIC_API_KEY": "anthropic/claude-haiku-4-5",
22
+ "OPENAI_API_KEY": "openai/gpt-4o-mini",
23
+ "GEMINI_API_KEY": "gemini/gemini-2.5-flash",
24
+ "OPENROUTER_API_KEY": "openrouter/openai/gpt-4o-mini",
25
+ }
26
+
27
+
28
+ def check(model: str) -> tuple[bool, str]:
29
+ try:
30
+ litellm.completion(
31
+ model=model,
32
+ messages=[{"role": "user", "content": "ping"}],
33
+ max_tokens=1,
34
+ drop_params=True,
35
+ )
36
+ return True, "ok"
37
+ except Exception as exc: # noqa: BLE001 - any failure means the key isn't usable
38
+ return False, str(exc).splitlines()[0].strip()
39
+
40
+
41
+ def main() -> None:
42
+ load_env()
43
+ for key, model in PROBES.items():
44
+ # An unset key is a distinct state from an invalid one: say "not set"
45
+ # instead of probing and printing a provider auth exception.
46
+ if not os.environ.get(key):
47
+ console.print(f"[red]✗[/] {key} — not set")
48
+ continue
49
+ ok, detail = check(model)
50
+ if ok:
51
+ console.print(f"[green]✓[/] {key} — active")
52
+ else:
53
+ console.print(f"[red]✗[/] {key} — invalid: {detail}")
54
+
55
+
56
+ if __name__ == "__main__":
57
+ main()
litesquad/cli.py ADDED
@@ -0,0 +1,276 @@
1
+ """Command-line interface: one fixed-flow run, then interactive follow-ups."""
2
+
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+
6
+ import typer
7
+ from rich.console import Console
8
+ from rich.markdown import Markdown
9
+ from rich.panel import Panel
10
+ from rich.prompt import Prompt
11
+ from rich.status import Status
12
+
13
+ from . import paths
14
+ from .config import RunConfig, SquadConfig, ensure_starter, load_config
15
+ from .llm import LLMError, MissingKeysError, call_model, load_env, mock_call_model, preflight
16
+ from .models import Conversation, Stage, TranscriptEvent
17
+ from .squad import run_quick, run_turn
18
+
19
+ app = typer.Typer(add_completion=False, help="Ask a small ensemble of LLMs anything.")
20
+ console = Console()
21
+
22
+ STAGE_LABEL: dict[Stage, str] = {
23
+ "propose": "responding",
24
+ "critique": "critiquing",
25
+ "revise": "revising",
26
+ "extract": "de-stylizing",
27
+ "cluster": "clustering",
28
+ "judge": "judging",
29
+ "reply": "answering",
30
+ }
31
+ QUIT_WORDS = {":quit", ":q", "quit", "exit"}
32
+ SMOKE_PROMPT = "What is 1 + 1? Answer in one short sentence."
33
+
34
+
35
+ class ConsoleReporter:
36
+ """Renders each stage as a Rich panel and appends events to a JSONL file.
37
+
38
+ Worker chains run in parallel, so their stages complete interleaved. Printing them
39
+ as they land is unreadable, so chain stages (propose/critique/revise) are buffered
40
+ per worker and printed as one coherent block the moment that worker's chain finishes
41
+ (or dies) -- blocks appear in chain-completion order. Everything else (extract,
42
+ cluster, judge, reply) prints as it completes, and the spinner always shows everyone
43
+ currently in flight. The transcript JSONL is still appended per event in completion
44
+ order: it is the durable stream, and the web UI regroups it by worker anyway.
45
+
46
+ Per the Reporter contract, calls arrive serialized, so no lock is needed here;
47
+ printing while the Status runs is safe (rich renders it above the spinner).
48
+ """
49
+
50
+ def __init__(self, transcript_path: Path | None) -> None:
51
+ self.transcript_path = transcript_path
52
+ self._status: Status | None = None
53
+ self._in_flight: dict[tuple[str, Stage], str] = {} # (role, stage) -> spinner text
54
+ self._pending_chains: dict[str, list[Panel]] = {} # worker key -> panels so far
55
+
56
+ @staticmethod
57
+ def _worker_key(role: str) -> str:
58
+ """propose/revise carry ``worker_N``; critique carries ``critic->worker_N``."""
59
+ return role.split("->")[-1]
60
+
61
+ @staticmethod
62
+ def _panel(event: TranscriptEvent) -> Panel:
63
+ title = f"{event.role} | {event.model}"
64
+ if event.error:
65
+ return Panel(event.error, title=f"{title} (error)", border_style="red")
66
+ border = "green" if event.stage in ("judge", "reply") else "cyan"
67
+ return Panel(Markdown(event.output), title=title, border_style=border)
68
+
69
+ def _spinner_text(self) -> str:
70
+ return " | ".join(self._in_flight.values())
71
+
72
+ def _flush_chain(self, worker: str) -> None:
73
+ for panel in self._pending_chains.pop(worker, []):
74
+ console.print(panel)
75
+
76
+ def stage_start(self, stage: Stage, role: str, model: str) -> None:
77
+ self._in_flight[(role, stage)] = f"[bold]{role}[/] ({model}) - {STAGE_LABEL[stage]}..."
78
+ if self._status is None:
79
+ self._status = console.status(self._spinner_text(), spinner="dots")
80
+ self._status.start()
81
+ else:
82
+ self._status.update(self._spinner_text())
83
+
84
+ def stage_done(self, event: TranscriptEvent) -> None:
85
+ self._in_flight.pop((event.role, event.stage), None)
86
+ if self.transcript_path is not None:
87
+ with self.transcript_path.open("a", encoding="utf-8") as fh:
88
+ fh.write(event.to_jsonl() + "\n")
89
+ if event.stage in ("propose", "critique", "revise"):
90
+ worker = self._worker_key(event.role)
91
+ self._pending_chains.setdefault(worker, []).append(self._panel(event))
92
+ if event.stage == "revise" or event.error: # chain finished, or died here
93
+ self._flush_chain(worker)
94
+ else:
95
+ console.print(self._panel(event))
96
+ if self._status is not None:
97
+ if self._in_flight:
98
+ self._status.update(self._spinner_text())
99
+ else:
100
+ self._status.stop()
101
+ self._status = None
102
+
103
+ def close(self) -> None:
104
+ """Flush any unfinished chains and stop the spinner. Call in a ``finally``: an
105
+ aborted turn must still show what its surviving chains produced, and must not
106
+ leave a live display running (hidden cursor, endless spinner)."""
107
+ for worker in list(self._pending_chains):
108
+ self._flush_chain(worker)
109
+ if self._status is not None:
110
+ self._status.stop()
111
+ self._status = None
112
+ self._in_flight.clear()
113
+
114
+
115
+ def _transcript_path(save: bool) -> Path | None:
116
+ if not save:
117
+ return None
118
+ stamp = datetime.now().strftime("%Y-%m-%d_%H%M%S")
119
+ return paths.transcripts_dir() / f"{stamp}.jsonl"
120
+
121
+
122
+ def _model_roles(config: SquadConfig) -> list[tuple[str, str]]:
123
+ """Distinct configured models in pipeline order (workers first, then critic ->
124
+ extractor -> clusterer -> judge), each with the role(s) it holds -- so --check
125
+ doubles as a who-does-what roster card."""
126
+ assignments = [
127
+ *(("worker", worker.model) for worker in config.workers),
128
+ ("critic", config.critic.model),
129
+ ("extractor", config.extractor.model),
130
+ ("clusterer", config.clusterer.model),
131
+ ("judge", config.judge.model),
132
+ ]
133
+ roles_by_model: dict[str, list[str]] = {}
134
+ for role, model in assignments:
135
+ roles = roles_by_model.setdefault(model, [])
136
+ if role not in roles: # two workers on one model still read as one "worker"
137
+ roles.append(role)
138
+ return [(model, ", ".join(roles)) for model, roles in roles_by_model.items()]
139
+
140
+
141
+ def _check_models(config: SquadConfig) -> bool:
142
+ """Ping each distinct configured model with a tiny request. Returns all-ok.
143
+
144
+ ``max_tokens`` is generous (not 5) because reasoning models (GPT-5,
145
+ Gemini 2.5 Pro) spend output budget on hidden reasoning before any visible
146
+ text — too small a cap returns empty content. The cap only prevents
147
+ truncation; actual usage stays tiny since the model stops after "ok".
148
+ """
149
+ run_cfg = RunConfig(max_tokens=1024, save_transcript=False)
150
+ messages = [{"role": "user", "content": "Reply with the single word: ok"}]
151
+ all_ok = True
152
+ for model, roles in _model_roles(config):
153
+ try:
154
+ reply = call_model(model, messages, run_cfg, role="check").strip()
155
+ console.print(f"[green]✓[/] {model} [dim]({roles})[/] — {reply[:40]}")
156
+ except LLMError as exc:
157
+ all_ok = False
158
+ console.print(f"[red]✗[/] {model} [dim]({roles})[/] — {exc}")
159
+ return all_ok
160
+
161
+
162
+ @app.command()
163
+ def run(
164
+ task: str = typer.Argument(None, help="Your question or task for the ensemble."),
165
+ quick: bool = typer.Option(
166
+ False, "--quick", help="Talk to just the judge (Opus), skipping the ensemble."
167
+ ),
168
+ mock: bool = typer.Option(
169
+ False, "--mock", help="Use canned offline responses; no API keys needed."
170
+ ),
171
+ check: bool = typer.Option(
172
+ False, "--check", help="Ping each configured model with a tiny request and exit."
173
+ ),
174
+ smoke: bool = typer.Option(
175
+ False, "--smoke", help="Run one real turn on a fixed tiny prompt and exit (cheap end-to-end)."
176
+ ),
177
+ web: bool = typer.Option(
178
+ False, "--web", help="Serve the web UI instead of the terminal (needs the 'web' extra)."
179
+ ),
180
+ port: int = typer.Option(8050, help="Port for the web UI (only used with --web)."),
181
+ ) -> None:
182
+ """Ask the ensemble (or, with --quick, just the judge), then take follow-ups."""
183
+ if web:
184
+ if task is not None:
185
+ console.print("[red]With --web, submit tasks in the browser, not on the command line.[/]")
186
+ raise typer.Exit(2)
187
+ from .web import serve # lazy: dash is an optional extra
188
+
189
+ serve(port=port, mock=mock)
190
+ return
191
+
192
+ load_env()
193
+
194
+ cfg_path = paths.config_path()
195
+ if ensure_starter(cfg_path):
196
+ console.print(
197
+ f"[dim]Wrote a starter config (all defaults, commented) you can edit at {cfg_path}[/]"
198
+ )
199
+ config = load_config(cfg_path)
200
+
201
+ if check:
202
+ try:
203
+ preflight(config)
204
+ except MissingKeysError as exc:
205
+ console.print(f"[red]{exc}[/]")
206
+ raise typer.Exit(1) from exc
207
+ console.print("Pinging configured models…")
208
+ raise typer.Exit(0 if _check_models(config) else 1)
209
+
210
+ if smoke:
211
+ caller = mock_call_model if mock else call_model
212
+ if not mock:
213
+ try:
214
+ preflight(config)
215
+ except MissingKeysError as exc:
216
+ console.print(f"[red]{exc}[/]")
217
+ raise typer.Exit(1) from exc
218
+ console.print(f'Smoke test: one turn on [dim]"{SMOKE_PROMPT}"[/]\n')
219
+ transcript_path = _transcript_path(config.run.save_transcript)
220
+ reporter = ConsoleReporter(transcript_path)
221
+ try:
222
+ run_turn(Conversation(), SMOKE_PROMPT, config, reporter, caller=caller)
223
+ except Exception as exc: # noqa: BLE001 - smoke surfaces ANY failure (call or save)
224
+ console.print(f"[red]Smoke test FAILED: {exc}[/]")
225
+ raise typer.Exit(1) from exc
226
+ finally:
227
+ reporter.close()
228
+ if transcript_path is not None:
229
+ console.print(f"[dim]Transcript: {transcript_path}[/]")
230
+ console.print("[green]Smoke test passed - all stages produced output.[/]")
231
+ raise typer.Exit(0)
232
+
233
+ if task is None:
234
+ console.print('[red]Provide a task, e.g. litesquad "Plan my week", or use --check.[/]')
235
+ raise typer.Exit(2)
236
+
237
+ caller = call_model
238
+ if mock:
239
+ caller = mock_call_model
240
+ console.print("[yellow]Running in --mock mode: canned responses, no API calls.[/]")
241
+ else:
242
+ try:
243
+ preflight(config)
244
+ except MissingKeysError as exc:
245
+ console.print(f"[red]{exc}[/]")
246
+ raise typer.Exit(1) from exc
247
+
248
+ conversation = Conversation()
249
+ transcript_path = _transcript_path(config.run.save_transcript)
250
+ reporter = ConsoleReporter(transcript_path)
251
+ run_one = run_quick if quick else run_turn
252
+ if quick:
253
+ console.print("[dim]Quick mode: just the judge, no ensemble.[/]")
254
+
255
+ current_task = task
256
+ while True:
257
+ try:
258
+ run_one(conversation, current_task, config, reporter, caller=caller)
259
+ except LLMError as exc:
260
+ console.print(f"[red]Turn aborted: {exc}[/]")
261
+ finally:
262
+ reporter.close()
263
+
264
+ if transcript_path is not None:
265
+ console.print(f"[dim]Transcript: {transcript_path}[/]")
266
+
267
+ try:
268
+ reply = Prompt.ask(
269
+ "\n[bold magenta]Follow-up[/] ([bold yellow]:quit to exit[/])"
270
+ ).strip()
271
+ except (EOFError, KeyboardInterrupt):
272
+ console.print()
273
+ break
274
+ if not reply or reply.lower() in QUIT_WORDS:
275
+ break
276
+ current_task = reply
litesquad/config.py ADDED
@@ -0,0 +1,192 @@
1
+ """Typed configuration: good defaults in code, user overrides from ~/.litesquad.
2
+
3
+ The defaults below are authoritative and versioned. On load, an optional user
4
+ config at :func:`litesquad.paths.config_path` is shallow-merged on top, so the
5
+ user's file only needs the handful of things they want to change.
6
+ """
7
+
8
+ import tomllib
9
+ from pathlib import Path
10
+
11
+ from pydantic import BaseModel, Field
12
+
13
+ from . import paths
14
+
15
+ DEFAULT_CONFIG_TOML = """\
16
+ # litesquad default squad. Models are LiteLLM model strings.
17
+
18
+ [run]
19
+ # Caps output per stage. Reasoning models (GPT-5, Gemini 2.5 Pro) spend part of
20
+ # this on hidden reasoning before the visible answer, so keep it roomy.
21
+ max_tokens = 8000
22
+ save_transcript = true
23
+ # Randomize the order the judge sees the responses so no worker is permanently
24
+ # first (LLM judges have a primacy bias). Turn off for deterministic-order tests.
25
+ shuffle = true
26
+ # How many worker chains run at once (threads; model calls are I/O-bound).
27
+ # 1 runs everything serially, which is useful when debugging. Higher values
28
+ # speed up wide rosters but hit providers harder: the critic model receives
29
+ # roughly this many concurrent requests.
30
+ max_parallel = 4
31
+ # temperature is omitted: frontier models (Opus 4.7+, GPT-5) reject it with a 400.
32
+ # Set it only if every model in your squad supports it (Sonnet, Gemini, Opus 4.6-).
33
+ # temperature = 0.4
34
+
35
+ # The judge writes the final answer from the clustered content map (extract -> cluster -> judge).
36
+ [agents.judge]
37
+ model = "anthropic/claude-opus-4-8"
38
+
39
+ [agents.critic]
40
+ model = "openai/gpt-5"
41
+
42
+ # The extractor de-stylizes each revised response into content units (JSON). A mechanical step,
43
+ # so a cheap model is fine -- swap to openai/gpt-4.1-mini to cut cost.
44
+ [agents.extractor]
45
+ model = "openai/gpt-5"
46
+
47
+ # The clusterer groups equivalent units across responses and flags conflicts, building the
48
+ # content map the judge writes from. It makes no quality judgment (that is the judge's job).
49
+ # Its load scales with roster width (every worker's units land in one pool), and a reasoning
50
+ # model in this seat spends hidden budget before emitting the JSON, so it gets extra headroom.
51
+ [agents.clusterer]
52
+ model = "anthropic/claude-opus-4-8"
53
+ max_tokens = 24000
54
+
55
+ # Each worker responds independently (blind to the others), the critic gives each one feedback,
56
+ # and the worker revises. Then the revised answers are extracted into units, clustered into a
57
+ # content map, and the judge writes the final answer from it. Any agent may add an
58
+ # `instructions` string that is appended to its system prompt, and a `max_tokens` that
59
+ # overrides [run] max_tokens for its own calls -- e.g. a reasoning model clustering a wide
60
+ # roster's units can need max_tokens = 24000 while the workers stay at the run default.
61
+ [[agents.workers]]
62
+ model = "anthropic/claude-sonnet-4-6"
63
+
64
+ [[agents.workers]]
65
+ model = "gemini/gemini-2.5-pro"
66
+
67
+ # openrouter/* workers: one OPENROUTER_API_KEY reaches every provider on
68
+ # openrouter.ai (deepseek, mistral, llama, grok, qwen, ...), which is how the
69
+ # roster grows wide without needing a key per provider. Prefer non-reasoning
70
+ # models as workers here: through OpenRouter a reasoning model can spend the
71
+ # whole max_tokens budget on hidden reasoning and return empty content. If you
72
+ # do add one, tell it to answer tersely -- instructions cap the visible answer
73
+ # reliably, the hidden reasoning only partly.
74
+ [[agents.workers]]
75
+ model = "openrouter/deepseek/deepseek-chat"
76
+
77
+ [[agents.workers]]
78
+ model = "openrouter/mistralai/mistral-large"
79
+
80
+ [[agents.workers]]
81
+ model = "openrouter/meta-llama/llama-3.3-70b-instruct"
82
+ """
83
+
84
+ _STARTER_HEADER = """\
85
+ # litesquad overrides. This file mirrors the built-in defaults, fully commented.
86
+ # Uncomment and edit a line to override that default; anything left commented
87
+ # keeps following the library default, so you only carry the deltas you care about.
88
+
89
+ """
90
+
91
+
92
+ class AgentConfig(BaseModel):
93
+ model: str
94
+ instructions: str | None = None # optional, appended to this agent's system prompt
95
+ max_tokens: int | None = None # optional, overrides run.max_tokens for this agent's calls
96
+
97
+
98
+ class RunConfig(BaseModel):
99
+ temperature: float | None = None
100
+ max_tokens: int = 8000
101
+ save_transcript: bool = True
102
+ shuffle: bool = True # randomize the order the judge sees responses (kills primacy bias)
103
+ max_parallel: int = 4 # concurrent worker chains (1 = serial); keep equal to the TOML default
104
+
105
+
106
+ class SquadConfig(BaseModel):
107
+ run: RunConfig = Field(default_factory=RunConfig)
108
+ judge: AgentConfig
109
+ critic: AgentConfig
110
+ extractor: AgentConfig
111
+ clusterer: AgentConfig
112
+ workers: list[AgentConfig] = Field(min_length=1)
113
+
114
+ def models(self) -> list[str]:
115
+ """Every distinct model string referenced by the squad."""
116
+ seen: list[str] = []
117
+ roles = [self.judge.model, self.critic.model, self.extractor.model, self.clusterer.model]
118
+ for model in [*roles, *(w.model for w in self.workers)]:
119
+ if model not in seen:
120
+ seen.append(model)
121
+ return seen
122
+
123
+
124
+ def _build(raw: dict) -> SquadConfig:
125
+ """Build a SquadConfig from a raw TOML dict (flattening [agents.*])."""
126
+ agents = raw.get("agents", {})
127
+ return SquadConfig(
128
+ run=raw.get("run", {}),
129
+ judge=agents.get("judge", {}),
130
+ critic=agents.get("critic", {}),
131
+ extractor=agents.get("extractor", {}),
132
+ clusterer=agents.get("clusterer", {}),
133
+ workers=agents.get("workers", []),
134
+ )
135
+
136
+
137
+ def default_config() -> SquadConfig:
138
+ """The authoritative, versioned default squad."""
139
+ return _build(tomllib.loads(DEFAULT_CONFIG_TOML))
140
+
141
+
142
+ def _merge(defaults: dict, overrides: dict) -> dict:
143
+ """Shallow-merge user overrides onto the default raw config.
144
+
145
+ ``run`` keys merge individually; a provided single agent (judge/critic/extractor/clusterer)
146
+ replaces that agent; a provided (non-empty) workers list replaces the default workers.
147
+ Nothing deeper.
148
+ """
149
+ d_agents = defaults.get("agents", {})
150
+ o_agents = overrides.get("agents", {})
151
+ return {
152
+ "run": {**defaults.get("run", {}), **overrides.get("run", {})},
153
+ "agents": {
154
+ "judge": o_agents.get("judge") or d_agents.get("judge", {}),
155
+ "critic": o_agents.get("critic") or d_agents.get("critic", {}),
156
+ "extractor": o_agents.get("extractor") or d_agents.get("extractor", {}),
157
+ "clusterer": o_agents.get("clusterer") or d_agents.get("clusterer", {}),
158
+ "workers": o_agents.get("workers") or d_agents.get("workers", []),
159
+ },
160
+ }
161
+
162
+
163
+ def load_config(path: Path | None = None) -> SquadConfig:
164
+ """Load the default squad, with any user overrides merged on top."""
165
+ defaults = tomllib.loads(DEFAULT_CONFIG_TOML)
166
+ cfg_path = path or paths.config_path()
167
+ if cfg_path.exists():
168
+ overrides = tomllib.loads(cfg_path.read_text(encoding="utf-8"))
169
+ return _build(_merge(defaults, overrides))
170
+ return _build(defaults)
171
+
172
+
173
+ def _commented_starter() -> str:
174
+ """The defaults rendered as an all-commented override file."""
175
+ lines = []
176
+ for line in DEFAULT_CONFIG_TOML.splitlines():
177
+ stripped = line.lstrip()
178
+ if stripped == "" or stripped.startswith("#"):
179
+ lines.append(line)
180
+ else:
181
+ lines.append(f"# {line}")
182
+ return _STARTER_HEADER + "\n".join(lines) + "\n"
183
+
184
+
185
+ def ensure_starter(path: Path | None = None) -> bool:
186
+ """Write the commented starter override if none exists. True if it was written."""
187
+ cfg_path = path or paths.config_path()
188
+ if cfg_path.exists():
189
+ return False
190
+ cfg_path.parent.mkdir(parents=True, exist_ok=True)
191
+ cfg_path.write_text(_commented_starter(), encoding="utf-8")
192
+ return True