loki-mode 6.79.0 → 6.80.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/SKILL.md +2 -2
- package/VERSION +1 -1
- package/dashboard/__init__.py +1 -1
- package/docs/INSTALLATION.md +1 -1
- package/magic/__init__.py +7 -0
- package/magic/core/__init__.py +0 -0
- package/magic/core/debate.py +781 -0
- package/magic/core/design_tokens.py +469 -0
- package/magic/core/freshness.py +86 -0
- package/magic/core/generator.py +755 -0
- package/magic/core/memory_bridge.py +220 -0
- package/magic/core/prd_scanner.py +265 -0
- package/magic/core/registry.py +340 -0
- package/magic/core/spec.py +337 -0
- package/magic/debate/personas/a11y.md +95 -0
- package/magic/debate/personas/conservative.md +83 -0
- package/magic/debate/personas/creative.md +73 -0
- package/magic/debate/personas/performance.md +93 -0
- package/magic/registry/schema.json +38 -0
- package/magic/testing/__init__.py +0 -0
- package/magic/testing/snapshot.py +224 -0
- package/magic/testing/test_generator.py +453 -0
- package/magic/tokens/README.md +83 -0
- package/magic/tokens/defaults.json +59 -0
- package/mcp/__init__.py +1 -1
- package/package.json +2 -1
|
@@ -0,0 +1,781 @@
|
|
|
1
|
+
"""Multi-persona debate for component quality review.
|
|
2
|
+
|
|
3
|
+
Inspired by MoMoA (retomeier/MoMoA). Conflicting personas argue perspectives
|
|
4
|
+
on a generated component; their critique is fed into a final refinement pass.
|
|
5
|
+
|
|
6
|
+
Design:
|
|
7
|
+
- Round 1: Each persona reviews independently (parallel if provider supports)
|
|
8
|
+
- Round 2: Personas see each other's critiques and respond
|
|
9
|
+
- Round 3: Synthesizer produces final refined code incorporating valid critiques
|
|
10
|
+
- If any persona BLOCKS (severe issue), escalate to human-in-the-loop
|
|
11
|
+
|
|
12
|
+
Stdlib only: dataclasses, json, pathlib, subprocess, shutil, os, sys,
|
|
13
|
+
concurrent.futures, textwrap.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import json
|
|
19
|
+
import os
|
|
20
|
+
import shutil
|
|
21
|
+
import subprocess
|
|
22
|
+
import sys
|
|
23
|
+
import textwrap
|
|
24
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
25
|
+
from dataclasses import dataclass, field
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
from typing import Iterable, Optional
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
# Constants
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
DEFAULT_PERSONAS = ["creative", "conservative", "a11y", "performance"]
|
|
35
|
+
VALID_SEVERITIES = ("info", "suggestion", "warning", "block")
|
|
36
|
+
VALID_TARGETS = ("react", "webcomponent")
|
|
37
|
+
|
|
38
|
+
# Provider invocation timeouts (seconds). Debate prompts are review-sized,
|
|
39
|
+
# not generation-sized, so we keep this shorter than the generator's.
|
|
40
|
+
DEFAULT_PROVIDER_TIMEOUT = 180
|
|
41
|
+
|
|
42
|
+
# Max characters of code/spec we inject into a single prompt. Keeps
|
|
43
|
+
# subprocess arg lists and model context usage bounded.
|
|
44
|
+
MAX_PROMPT_CODE_CHARS = 40000
|
|
45
|
+
MAX_PROMPT_SPEC_CHARS = 8000
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# ---------------------------------------------------------------------------
|
|
49
|
+
# Data classes
|
|
50
|
+
# ---------------------------------------------------------------------------
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class Critique:
|
|
54
|
+
"""Single persona's verdict on a component."""
|
|
55
|
+
|
|
56
|
+
persona: str
|
|
57
|
+
severity: str # one of VALID_SEVERITIES
|
|
58
|
+
issues: list = field(default_factory=list)
|
|
59
|
+
suggestions: list = field(default_factory=list)
|
|
60
|
+
approves: bool = False
|
|
61
|
+
raw_response: str = ""
|
|
62
|
+
parse_error: Optional[str] = None
|
|
63
|
+
|
|
64
|
+
def to_dict(self) -> dict:
|
|
65
|
+
return {
|
|
66
|
+
"persona": self.persona,
|
|
67
|
+
"severity": self.severity,
|
|
68
|
+
"issues": list(self.issues),
|
|
69
|
+
"suggestions": list(self.suggestions),
|
|
70
|
+
"approves": self.approves,
|
|
71
|
+
"parse_error": self.parse_error,
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass
|
|
76
|
+
class DebateResult:
|
|
77
|
+
"""Result of a full multi-round debate."""
|
|
78
|
+
|
|
79
|
+
rounds: int
|
|
80
|
+
critiques: list # List[Critique]
|
|
81
|
+
refined_code: str
|
|
82
|
+
consensus: bool
|
|
83
|
+
blocks: list # List[Critique] where severity == 'block'
|
|
84
|
+
human_needed: bool
|
|
85
|
+
|
|
86
|
+
def to_dict(self) -> dict:
|
|
87
|
+
return {
|
|
88
|
+
"rounds": self.rounds,
|
|
89
|
+
"critiques": [c.to_dict() for c in self.critiques],
|
|
90
|
+
"refined_code": self.refined_code,
|
|
91
|
+
"consensus": self.consensus,
|
|
92
|
+
"blocks": [c.to_dict() for c in self.blocks],
|
|
93
|
+
"human_needed": self.human_needed,
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# ---------------------------------------------------------------------------
|
|
98
|
+
# Provider invocation errors
|
|
99
|
+
# ---------------------------------------------------------------------------
|
|
100
|
+
|
|
101
|
+
class DebateProviderError(RuntimeError):
|
|
102
|
+
"""Raised when a provider subprocess fails non-recoverably."""
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# ---------------------------------------------------------------------------
|
|
106
|
+
# DebateRunner
|
|
107
|
+
# ---------------------------------------------------------------------------
|
|
108
|
+
|
|
109
|
+
class DebateRunner:
|
|
110
|
+
"""Runs a multi-persona debate against generated component code."""
|
|
111
|
+
|
|
112
|
+
def __init__(
|
|
113
|
+
self,
|
|
114
|
+
provider: str = "claude",
|
|
115
|
+
project_dir: str = ".",
|
|
116
|
+
timeout: int = DEFAULT_PROVIDER_TIMEOUT,
|
|
117
|
+
parallel: bool = True,
|
|
118
|
+
):
|
|
119
|
+
self.provider = provider
|
|
120
|
+
self.project_dir = Path(project_dir).resolve()
|
|
121
|
+
self.timeout = timeout
|
|
122
|
+
self.parallel = parallel
|
|
123
|
+
self.personas_dir = self._find_personas_dir()
|
|
124
|
+
|
|
125
|
+
# ---- Persona discovery / loading ----------------------------------
|
|
126
|
+
|
|
127
|
+
def _find_personas_dir(self) -> Path:
|
|
128
|
+
"""Locate magic/debate/personas/ relative to script or installed path.
|
|
129
|
+
|
|
130
|
+
Search order:
|
|
131
|
+
1. Environment override: ``LOKI_DEBATE_PERSONAS_DIR``
|
|
132
|
+
2. Sibling to this module: ``../debate/personas``
|
|
133
|
+
3. Under the project directory: ``<project>/magic/debate/personas``
|
|
134
|
+
4. Current working directory: ``./magic/debate/personas``
|
|
135
|
+
"""
|
|
136
|
+
env_override = os.environ.get("LOKI_DEBATE_PERSONAS_DIR")
|
|
137
|
+
candidates: list = []
|
|
138
|
+
if env_override:
|
|
139
|
+
candidates.append(Path(env_override))
|
|
140
|
+
|
|
141
|
+
module_dir = Path(__file__).resolve().parent # magic/core
|
|
142
|
+
candidates.append(module_dir.parent / "debate" / "personas")
|
|
143
|
+
candidates.append(self.project_dir / "magic" / "debate" / "personas")
|
|
144
|
+
candidates.append(Path.cwd() / "magic" / "debate" / "personas")
|
|
145
|
+
|
|
146
|
+
for candidate in candidates:
|
|
147
|
+
if candidate.is_dir():
|
|
148
|
+
return candidate
|
|
149
|
+
|
|
150
|
+
# Fallback to the conventional location even if missing; loading will
|
|
151
|
+
# fail later with a clear error.
|
|
152
|
+
return module_dir.parent / "debate" / "personas"
|
|
153
|
+
|
|
154
|
+
def load_persona(self, name: str) -> str:
|
|
155
|
+
"""Load a persona system prompt from a markdown file."""
|
|
156
|
+
safe_name = Path(name).name # prevent path traversal
|
|
157
|
+
prompt_path = self.personas_dir / f"{safe_name}.md"
|
|
158
|
+
if not prompt_path.is_file():
|
|
159
|
+
raise FileNotFoundError(
|
|
160
|
+
f"Persona '{name}' not found at {prompt_path}. "
|
|
161
|
+
f"Available: {sorted(p.stem for p in self.personas_dir.glob('*.md'))}"
|
|
162
|
+
)
|
|
163
|
+
return prompt_path.read_text(encoding="utf-8")
|
|
164
|
+
|
|
165
|
+
# ---- Public API ---------------------------------------------------
|
|
166
|
+
|
|
167
|
+
def run_debate(
|
|
168
|
+
self,
|
|
169
|
+
component_code: str,
|
|
170
|
+
spec: str,
|
|
171
|
+
personas: Optional[list] = None,
|
|
172
|
+
rounds: int = 3,
|
|
173
|
+
target: str = "react",
|
|
174
|
+
) -> DebateResult:
|
|
175
|
+
"""Run the full debate and return a DebateResult.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
component_code: The generated component source code.
|
|
179
|
+
spec: The original spec the component was generated against.
|
|
180
|
+
personas: List of persona names (default: all four).
|
|
181
|
+
rounds: 1 = independent review only; 2 = add cross-critique;
|
|
182
|
+
3 = add synthesis into refined code.
|
|
183
|
+
target: 'react' or 'webcomponent' (affects synthesis prompt).
|
|
184
|
+
"""
|
|
185
|
+
if target not in VALID_TARGETS:
|
|
186
|
+
raise ValueError(
|
|
187
|
+
f"target must be one of {VALID_TARGETS}, got {target!r}"
|
|
188
|
+
)
|
|
189
|
+
if rounds < 1:
|
|
190
|
+
raise ValueError("rounds must be >= 1")
|
|
191
|
+
|
|
192
|
+
personas = list(personas) if personas else list(DEFAULT_PERSONAS)
|
|
193
|
+
|
|
194
|
+
# Round 1: independent review (parallel when supported).
|
|
195
|
+
critiques = self._round_one(personas, component_code, spec, target)
|
|
196
|
+
|
|
197
|
+
# Round 2: cross-critique.
|
|
198
|
+
if rounds >= 2:
|
|
199
|
+
critiques = self._round_two(
|
|
200
|
+
personas, critiques, component_code, spec, target
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
# Round 3: synthesis.
|
|
204
|
+
refined = component_code
|
|
205
|
+
if rounds >= 3:
|
|
206
|
+
refined = self._synthesize(component_code, critiques, spec, target)
|
|
207
|
+
|
|
208
|
+
blocks = [c for c in critiques if c.severity == "block"]
|
|
209
|
+
consensus = len(critiques) > 0 and all(c.approves for c in critiques)
|
|
210
|
+
|
|
211
|
+
return DebateResult(
|
|
212
|
+
rounds=rounds,
|
|
213
|
+
critiques=critiques,
|
|
214
|
+
refined_code=refined,
|
|
215
|
+
consensus=consensus,
|
|
216
|
+
blocks=blocks,
|
|
217
|
+
human_needed=len(blocks) > 0,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# ---- Rounds -------------------------------------------------------
|
|
221
|
+
|
|
222
|
+
def _round_one(
|
|
223
|
+
self,
|
|
224
|
+
personas: list,
|
|
225
|
+
code: str,
|
|
226
|
+
spec: str,
|
|
227
|
+
target: str,
|
|
228
|
+
) -> list:
|
|
229
|
+
"""Round 1: each persona reviews independently."""
|
|
230
|
+
if self.parallel and len(personas) > 1:
|
|
231
|
+
results: dict = {}
|
|
232
|
+
with ThreadPoolExecutor(max_workers=len(personas)) as pool:
|
|
233
|
+
future_map = {
|
|
234
|
+
pool.submit(self._review, name, code, spec, target): name
|
|
235
|
+
for name in personas
|
|
236
|
+
}
|
|
237
|
+
for fut in as_completed(future_map):
|
|
238
|
+
name = future_map[fut]
|
|
239
|
+
try:
|
|
240
|
+
results[name] = fut.result()
|
|
241
|
+
except Exception as exc: # keep the debate moving
|
|
242
|
+
results[name] = self._critique_from_error(name, exc)
|
|
243
|
+
return [results[n] for n in personas]
|
|
244
|
+
|
|
245
|
+
# Sequential path (degraded providers, or parallel disabled).
|
|
246
|
+
critiques = []
|
|
247
|
+
for name in personas:
|
|
248
|
+
try:
|
|
249
|
+
critiques.append(self._review(name, code, spec, target))
|
|
250
|
+
except Exception as exc:
|
|
251
|
+
critiques.append(self._critique_from_error(name, exc))
|
|
252
|
+
return critiques
|
|
253
|
+
|
|
254
|
+
def _round_two(
|
|
255
|
+
self,
|
|
256
|
+
personas: list,
|
|
257
|
+
initial: list,
|
|
258
|
+
code: str,
|
|
259
|
+
spec: str,
|
|
260
|
+
target: str,
|
|
261
|
+
) -> list:
|
|
262
|
+
"""Round 2: each persona responds after reading the others."""
|
|
263
|
+
summary = self._summarize_critiques(initial)
|
|
264
|
+
|
|
265
|
+
def _revise(idx: int, name: str) -> Critique:
|
|
266
|
+
others = [c for j, c in enumerate(initial) if j != idx]
|
|
267
|
+
others_text = self._summarize_critiques(others) or "(no other critiques)"
|
|
268
|
+
return self._review(
|
|
269
|
+
persona_name=name,
|
|
270
|
+
code=code,
|
|
271
|
+
spec=spec,
|
|
272
|
+
target=target,
|
|
273
|
+
prior_critiques_text=others_text,
|
|
274
|
+
prior_self_critique=initial[idx],
|
|
275
|
+
round_label="round-2",
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
if self.parallel and len(personas) > 1:
|
|
279
|
+
results: dict = {}
|
|
280
|
+
with ThreadPoolExecutor(max_workers=len(personas)) as pool:
|
|
281
|
+
future_map = {
|
|
282
|
+
pool.submit(_revise, i, name): i
|
|
283
|
+
for i, name in enumerate(personas)
|
|
284
|
+
}
|
|
285
|
+
for fut in as_completed(future_map):
|
|
286
|
+
i = future_map[fut]
|
|
287
|
+
try:
|
|
288
|
+
results[i] = fut.result()
|
|
289
|
+
except Exception as exc:
|
|
290
|
+
results[i] = self._critique_from_error(personas[i], exc)
|
|
291
|
+
# Preserve original persona ordering.
|
|
292
|
+
return [results[i] for i in range(len(personas))]
|
|
293
|
+
|
|
294
|
+
revised = []
|
|
295
|
+
for i, name in enumerate(personas):
|
|
296
|
+
try:
|
|
297
|
+
revised.append(_revise(i, name))
|
|
298
|
+
except Exception as exc:
|
|
299
|
+
revised.append(self._critique_from_error(name, exc))
|
|
300
|
+
# If summary was unused in the sequential path, keep it bound to
|
|
301
|
+
# silence linters and signal intent.
|
|
302
|
+
_ = summary
|
|
303
|
+
return revised
|
|
304
|
+
|
|
305
|
+
def _synthesize(
|
|
306
|
+
self,
|
|
307
|
+
code: str,
|
|
308
|
+
critiques: list,
|
|
309
|
+
spec: str,
|
|
310
|
+
target: str,
|
|
311
|
+
) -> str:
|
|
312
|
+
"""Round 3: synthesize critiques into a refined component."""
|
|
313
|
+
summary = self._summarize_critiques(critiques)
|
|
314
|
+
target_note = (
|
|
315
|
+
"React component (functional, hooks, TypeScript if already used)."
|
|
316
|
+
if target == "react"
|
|
317
|
+
else "Web Component (custom element extending HTMLElement, Shadow DOM if already used)."
|
|
318
|
+
)
|
|
319
|
+
prompt = textwrap.dedent(
|
|
320
|
+
"""
|
|
321
|
+
You are a synthesizing editor. You have a generated component and
|
|
322
|
+
four expert critiques from a creative developer, a conservative
|
|
323
|
+
senior engineer, an accessibility advocate, and a performance
|
|
324
|
+
engineer. Produce a refined version of the component that
|
|
325
|
+
incorporates the VALID critiques.
|
|
326
|
+
|
|
327
|
+
Rules:
|
|
328
|
+
- Preserve the component's public API (props, exports) unless a
|
|
329
|
+
critique calls out a breaking-change bug.
|
|
330
|
+
- When critiques conflict, prefer: accessibility > correctness >
|
|
331
|
+
performance > delight. Cite your reasoning in a short comment
|
|
332
|
+
at the top of the file only if a meaningful trade-off was made.
|
|
333
|
+
- Do not invent new dependencies. Only import from packages the
|
|
334
|
+
original code already imports from.
|
|
335
|
+
- Do not add TODO comments. Fix issues inline or leave them.
|
|
336
|
+
- Output ONLY the final source code. No prose, no markdown fence,
|
|
337
|
+
no explanation.
|
|
338
|
+
|
|
339
|
+
TARGET: {target_note}
|
|
340
|
+
|
|
341
|
+
SPEC:
|
|
342
|
+
{spec}
|
|
343
|
+
|
|
344
|
+
CRITIQUES:
|
|
345
|
+
{summary}
|
|
346
|
+
|
|
347
|
+
ORIGINAL CODE:
|
|
348
|
+
{code}
|
|
349
|
+
|
|
350
|
+
Return the refined code now.
|
|
351
|
+
"""
|
|
352
|
+
).strip().format(
|
|
353
|
+
target_note=target_note,
|
|
354
|
+
spec=self._truncate(spec, MAX_PROMPT_SPEC_CHARS),
|
|
355
|
+
summary=summary or "(no critiques)",
|
|
356
|
+
code=self._truncate(code, MAX_PROMPT_CODE_CHARS),
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
try:
|
|
360
|
+
response = self._invoke_provider(prompt)
|
|
361
|
+
except DebateProviderError:
|
|
362
|
+
# Synthesis failure is non-fatal; fall back to original code.
|
|
363
|
+
return code
|
|
364
|
+
|
|
365
|
+
return self._strip_code_fences(response).strip() or code
|
|
366
|
+
|
|
367
|
+
# ---- Single-persona review ----------------------------------------
|
|
368
|
+
|
|
369
|
+
def _review(
|
|
370
|
+
self,
|
|
371
|
+
persona_name: str,
|
|
372
|
+
code: str,
|
|
373
|
+
spec: str,
|
|
374
|
+
target: str,
|
|
375
|
+
prior_critiques_text: str = "",
|
|
376
|
+
prior_self_critique: Optional[Critique] = None,
|
|
377
|
+
round_label: str = "round-1",
|
|
378
|
+
) -> Critique:
|
|
379
|
+
"""Invoke one persona and parse its critique."""
|
|
380
|
+
persona_prompt = self.load_persona(persona_name)
|
|
381
|
+
target_note = (
|
|
382
|
+
"The component is a React functional component."
|
|
383
|
+
if target == "react"
|
|
384
|
+
else "The component is a Web Component (custom element)."
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
prior_block = ""
|
|
388
|
+
if prior_critiques_text:
|
|
389
|
+
prior_block = (
|
|
390
|
+
"\n\nOTHER REVIEWERS SAID (use this to refine or defend your "
|
|
391
|
+
"position; do not repeat their points unless you agree):\n"
|
|
392
|
+
f"{prior_critiques_text}"
|
|
393
|
+
)
|
|
394
|
+
if prior_self_critique is not None:
|
|
395
|
+
prior_block += (
|
|
396
|
+
"\n\nYOUR EARLIER CRITIQUE (you may escalate, soften, or "
|
|
397
|
+
"retract items in light of the discussion):\n"
|
|
398
|
+
f"{self._format_single_critique(prior_self_critique)}"
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
full_prompt = textwrap.dedent(
|
|
402
|
+
"""
|
|
403
|
+
{persona_prompt}
|
|
404
|
+
|
|
405
|
+
---
|
|
406
|
+
|
|
407
|
+
REVIEW ROUND: {round_label}
|
|
408
|
+
{target_note}
|
|
409
|
+
|
|
410
|
+
SPEC:
|
|
411
|
+
{spec}
|
|
412
|
+
|
|
413
|
+
CODE TO REVIEW:
|
|
414
|
+
{code}
|
|
415
|
+
{prior_block}
|
|
416
|
+
|
|
417
|
+
Respond with a SINGLE JSON object and nothing else. The JSON must
|
|
418
|
+
have exactly these keys: "severity", "issues", "suggestions",
|
|
419
|
+
"approves". "severity" must be one of: "info", "suggestion",
|
|
420
|
+
"warning", "block". "issues" and "suggestions" must be arrays of
|
|
421
|
+
short strings (each a concrete, specific item). "approves" must be
|
|
422
|
+
a boolean. Do not wrap the JSON in markdown fences.
|
|
423
|
+
"""
|
|
424
|
+
).strip().format(
|
|
425
|
+
persona_prompt=persona_prompt,
|
|
426
|
+
round_label=round_label,
|
|
427
|
+
target_note=target_note,
|
|
428
|
+
spec=self._truncate(spec, MAX_PROMPT_SPEC_CHARS),
|
|
429
|
+
code=self._truncate(code, MAX_PROMPT_CODE_CHARS),
|
|
430
|
+
prior_block=prior_block,
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
response = self._invoke_provider(full_prompt)
|
|
434
|
+
return self._parse_critique(persona_name, response)
|
|
435
|
+
|
|
436
|
+
# ---- Provider invocation ------------------------------------------
|
|
437
|
+
|
|
438
|
+
def _invoke_provider(self, prompt: str) -> str:
|
|
439
|
+
"""Invoke the configured provider CLI and return stdout.
|
|
440
|
+
|
|
441
|
+
Mirrors the subprocess pattern used by ComponentGenerator: each
|
|
442
|
+
provider has its own flag layout for autonomous + prompt input.
|
|
443
|
+
"""
|
|
444
|
+
cmd = self._build_command(prompt)
|
|
445
|
+
cli = cmd[0]
|
|
446
|
+
if shutil.which(cli) is None:
|
|
447
|
+
raise DebateProviderError(
|
|
448
|
+
f"Provider CLI '{cli}' not found on PATH. "
|
|
449
|
+
f"Install it or pick a different provider."
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
try:
|
|
453
|
+
completed = subprocess.run(
|
|
454
|
+
cmd,
|
|
455
|
+
capture_output=True,
|
|
456
|
+
text=True,
|
|
457
|
+
timeout=self.timeout,
|
|
458
|
+
check=False,
|
|
459
|
+
cwd=str(self.project_dir),
|
|
460
|
+
)
|
|
461
|
+
except subprocess.TimeoutExpired as exc:
|
|
462
|
+
raise DebateProviderError(
|
|
463
|
+
f"Provider '{self.provider}' timed out after {self.timeout}s"
|
|
464
|
+
) from exc
|
|
465
|
+
except OSError as exc:
|
|
466
|
+
raise DebateProviderError(
|
|
467
|
+
f"Failed to spawn provider '{self.provider}': {exc}"
|
|
468
|
+
) from exc
|
|
469
|
+
|
|
470
|
+
if completed.returncode != 0:
|
|
471
|
+
stderr = (completed.stderr or "").strip()
|
|
472
|
+
raise DebateProviderError(
|
|
473
|
+
f"Provider '{self.provider}' exited with code "
|
|
474
|
+
f"{completed.returncode}: {stderr[:400]}"
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
return completed.stdout or ""
|
|
478
|
+
|
|
479
|
+
def _build_command(self, prompt: str) -> list:
|
|
480
|
+
"""Map self.provider to an argv list."""
|
|
481
|
+
provider = self.provider.lower()
|
|
482
|
+
if provider == "claude":
|
|
483
|
+
return ["claude", "--dangerously-skip-permissions", "-p", prompt]
|
|
484
|
+
if provider == "codex":
|
|
485
|
+
# Codex uses `exec --full-auto` with the prompt as positional.
|
|
486
|
+
return ["codex", "exec", "--full-auto", prompt]
|
|
487
|
+
if provider == "gemini":
|
|
488
|
+
return ["gemini", "--approval-mode=yolo", prompt]
|
|
489
|
+
if provider == "cline":
|
|
490
|
+
return ["cline", "--auto", "-p", prompt]
|
|
491
|
+
if provider == "aider":
|
|
492
|
+
return ["aider", "--yes", "--message", prompt]
|
|
493
|
+
raise DebateProviderError(f"Unknown provider: {self.provider!r}")
|
|
494
|
+
|
|
495
|
+
# ---- Parsing ------------------------------------------------------
|
|
496
|
+
|
|
497
|
+
def _parse_critique(self, persona: str, response: str) -> Critique:
|
|
498
|
+
"""Parse a provider response into a Critique.
|
|
499
|
+
|
|
500
|
+
Handles: fenced JSON, stray prose around JSON, and malformed JSON.
|
|
501
|
+
Always returns a Critique -- parse errors are recorded but never
|
|
502
|
+
raised so the debate can proceed.
|
|
503
|
+
"""
|
|
504
|
+
raw = response or ""
|
|
505
|
+
payload = self._extract_json(raw)
|
|
506
|
+
|
|
507
|
+
if payload is None:
|
|
508
|
+
return Critique(
|
|
509
|
+
persona=persona,
|
|
510
|
+
severity="info",
|
|
511
|
+
issues=[],
|
|
512
|
+
suggestions=[],
|
|
513
|
+
approves=True,
|
|
514
|
+
raw_response=raw,
|
|
515
|
+
parse_error="no JSON object found in response",
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
severity = str(payload.get("severity", "info")).strip().lower()
|
|
519
|
+
if severity not in VALID_SEVERITIES:
|
|
520
|
+
severity = "info"
|
|
521
|
+
|
|
522
|
+
issues = self._coerce_string_list(payload.get("issues"))
|
|
523
|
+
suggestions = self._coerce_string_list(payload.get("suggestions"))
|
|
524
|
+
|
|
525
|
+
approves_raw = payload.get("approves")
|
|
526
|
+
if isinstance(approves_raw, bool):
|
|
527
|
+
approves = approves_raw
|
|
528
|
+
elif isinstance(approves_raw, str):
|
|
529
|
+
approves = approves_raw.strip().lower() in ("true", "yes", "1")
|
|
530
|
+
else:
|
|
531
|
+
# If the model omitted approves, infer from severity.
|
|
532
|
+
approves = severity in ("info", "suggestion")
|
|
533
|
+
|
|
534
|
+
# Safety: a blocking severity can never be approved.
|
|
535
|
+
if severity == "block":
|
|
536
|
+
approves = False
|
|
537
|
+
|
|
538
|
+
return Critique(
|
|
539
|
+
persona=persona,
|
|
540
|
+
severity=severity,
|
|
541
|
+
issues=issues,
|
|
542
|
+
suggestions=suggestions,
|
|
543
|
+
approves=approves,
|
|
544
|
+
raw_response=raw,
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
@staticmethod
|
|
548
|
+
def _extract_json(text: str) -> Optional[dict]:
|
|
549
|
+
"""Extract the first top-level JSON object from text.
|
|
550
|
+
|
|
551
|
+
Tries: direct parse, fenced block parse, brace-balanced scan.
|
|
552
|
+
"""
|
|
553
|
+
stripped = text.strip()
|
|
554
|
+
if not stripped:
|
|
555
|
+
return None
|
|
556
|
+
|
|
557
|
+
# Direct parse first.
|
|
558
|
+
try:
|
|
559
|
+
value = json.loads(stripped)
|
|
560
|
+
return value if isinstance(value, dict) else None
|
|
561
|
+
except json.JSONDecodeError:
|
|
562
|
+
pass
|
|
563
|
+
|
|
564
|
+
# Strip common markdown fences and retry.
|
|
565
|
+
fenced = DebateRunner._strip_code_fences(stripped)
|
|
566
|
+
if fenced != stripped:
|
|
567
|
+
try:
|
|
568
|
+
value = json.loads(fenced)
|
|
569
|
+
if isinstance(value, dict):
|
|
570
|
+
return value
|
|
571
|
+
except json.JSONDecodeError:
|
|
572
|
+
pass
|
|
573
|
+
|
|
574
|
+
# Brace-balanced scan to find an embedded object.
|
|
575
|
+
start = stripped.find("{")
|
|
576
|
+
while start != -1:
|
|
577
|
+
depth = 0
|
|
578
|
+
in_string = False
|
|
579
|
+
escape = False
|
|
580
|
+
for i in range(start, len(stripped)):
|
|
581
|
+
ch = stripped[i]
|
|
582
|
+
if escape:
|
|
583
|
+
escape = False
|
|
584
|
+
continue
|
|
585
|
+
if ch == "\\" and in_string:
|
|
586
|
+
escape = True
|
|
587
|
+
continue
|
|
588
|
+
if ch == '"':
|
|
589
|
+
in_string = not in_string
|
|
590
|
+
continue
|
|
591
|
+
if in_string:
|
|
592
|
+
continue
|
|
593
|
+
if ch == "{":
|
|
594
|
+
depth += 1
|
|
595
|
+
elif ch == "}":
|
|
596
|
+
depth -= 1
|
|
597
|
+
if depth == 0:
|
|
598
|
+
candidate = stripped[start : i + 1]
|
|
599
|
+
try:
|
|
600
|
+
value = json.loads(candidate)
|
|
601
|
+
if isinstance(value, dict):
|
|
602
|
+
return value
|
|
603
|
+
except json.JSONDecodeError:
|
|
604
|
+
break # try next opening brace
|
|
605
|
+
break
|
|
606
|
+
start = stripped.find("{", start + 1)
|
|
607
|
+
|
|
608
|
+
return None
|
|
609
|
+
|
|
610
|
+
@staticmethod
|
|
611
|
+
def _coerce_string_list(value) -> list:
|
|
612
|
+
if value is None:
|
|
613
|
+
return []
|
|
614
|
+
if isinstance(value, str):
|
|
615
|
+
cleaned = value.strip()
|
|
616
|
+
return [cleaned] if cleaned else []
|
|
617
|
+
if isinstance(value, list):
|
|
618
|
+
out = []
|
|
619
|
+
for item in value:
|
|
620
|
+
if isinstance(item, str):
|
|
621
|
+
s = item.strip()
|
|
622
|
+
if s:
|
|
623
|
+
out.append(s)
|
|
624
|
+
elif isinstance(item, dict):
|
|
625
|
+
# Gracefully flatten {"issue": "..."} style entries.
|
|
626
|
+
for key in ("issue", "suggestion", "text", "message"):
|
|
627
|
+
if key in item and isinstance(item[key], str):
|
|
628
|
+
s = item[key].strip()
|
|
629
|
+
if s:
|
|
630
|
+
out.append(s)
|
|
631
|
+
break
|
|
632
|
+
else:
|
|
633
|
+
out.append(json.dumps(item, ensure_ascii=False))
|
|
634
|
+
return out
|
|
635
|
+
return [str(value)]
|
|
636
|
+
|
|
637
|
+
@staticmethod
|
|
638
|
+
def _strip_code_fences(text: str) -> str:
|
|
639
|
+
"""Strip leading/trailing markdown code fences, if any."""
|
|
640
|
+
s = text.strip()
|
|
641
|
+
if not s.startswith("```"):
|
|
642
|
+
return s
|
|
643
|
+
# Drop the first fence line (optionally with language tag).
|
|
644
|
+
first_newline = s.find("\n")
|
|
645
|
+
if first_newline == -1:
|
|
646
|
+
return s
|
|
647
|
+
body = s[first_newline + 1 :]
|
|
648
|
+
if body.rstrip().endswith("```"):
|
|
649
|
+
body = body.rstrip()[:-3]
|
|
650
|
+
return body.rstrip()
|
|
651
|
+
|
|
652
|
+
@staticmethod
|
|
653
|
+
def _truncate(text: str, limit: int) -> str:
|
|
654
|
+
if text is None:
|
|
655
|
+
return ""
|
|
656
|
+
if len(text) <= limit:
|
|
657
|
+
return text
|
|
658
|
+
return text[:limit] + f"\n... [truncated at {limit} chars]"
|
|
659
|
+
|
|
660
|
+
# ---- Formatting helpers -------------------------------------------
|
|
661
|
+
|
|
662
|
+
@staticmethod
|
|
663
|
+
def _format_single_critique(c: Critique) -> str:
|
|
664
|
+
issues = "\n".join(f" - {i}" for i in c.issues) or " (none)"
|
|
665
|
+
suggestions = (
|
|
666
|
+
"\n".join(f" - {s}" for s in c.suggestions) or " (none)"
|
|
667
|
+
)
|
|
668
|
+
approves = "yes" if c.approves else "no"
|
|
669
|
+
return (
|
|
670
|
+
f"[{c.persona}] severity={c.severity} approves={approves}\n"
|
|
671
|
+
f" issues:\n{issues}\n"
|
|
672
|
+
f" suggestions:\n{suggestions}"
|
|
673
|
+
)
|
|
674
|
+
|
|
675
|
+
def _summarize_critiques(self, critiques: Iterable[Critique]) -> str:
|
|
676
|
+
parts = [self._format_single_critique(c) for c in critiques]
|
|
677
|
+
return "\n\n".join(parts)
|
|
678
|
+
|
|
679
|
+
@staticmethod
|
|
680
|
+
def _critique_from_error(persona: str, exc: Exception) -> Critique:
|
|
681
|
+
"""Build a fail-safe Critique when a persona invocation errors.
|
|
682
|
+
|
|
683
|
+
The debate must continue even if one persona goes down. We mark
|
|
684
|
+
these as neutral ('info', approves=True) so they do not spuriously
|
|
685
|
+
block consensus; the underlying error is preserved in parse_error.
|
|
686
|
+
"""
|
|
687
|
+
return Critique(
|
|
688
|
+
persona=persona,
|
|
689
|
+
severity="info",
|
|
690
|
+
issues=[],
|
|
691
|
+
suggestions=[],
|
|
692
|
+
approves=True,
|
|
693
|
+
raw_response="",
|
|
694
|
+
parse_error=f"{type(exc).__name__}: {exc}",
|
|
695
|
+
)
|
|
696
|
+
|
|
697
|
+
|
|
698
|
+
# ---------------------------------------------------------------------------
|
|
699
|
+
# CLI entry point (optional convenience)
|
|
700
|
+
# ---------------------------------------------------------------------------
|
|
701
|
+
|
|
702
|
+
def _main(argv: list) -> int:
|
|
703
|
+
"""Minimal CLI for manual smoke-testing: ``python -m magic.core.debate``.
|
|
704
|
+
|
|
705
|
+
Reads code from --code-file, spec from --spec-file, prints the
|
|
706
|
+
DebateResult as JSON. Not intended as the primary interface; the
|
|
707
|
+
Python API is the supported contract.
|
|
708
|
+
"""
|
|
709
|
+
import argparse
|
|
710
|
+
|
|
711
|
+
parser = argparse.ArgumentParser(description="Run a multi-persona debate.")
|
|
712
|
+
parser.add_argument("--code-file", required=True)
|
|
713
|
+
parser.add_argument("--spec-file", required=True)
|
|
714
|
+
parser.add_argument("--provider", default="claude")
|
|
715
|
+
parser.add_argument("--rounds", type=int, default=3)
|
|
716
|
+
parser.add_argument("--target", default="react", choices=list(VALID_TARGETS))
|
|
717
|
+
parser.add_argument("--personas", nargs="*", default=None)
|
|
718
|
+
parser.add_argument("--no-parallel", action="store_true")
|
|
719
|
+
parser.add_argument("--project-dir", default=".")
|
|
720
|
+
args = parser.parse_args(argv)
|
|
721
|
+
|
|
722
|
+
code = Path(args.code_file).read_text(encoding="utf-8")
|
|
723
|
+
spec = Path(args.spec_file).read_text(encoding="utf-8")
|
|
724
|
+
|
|
725
|
+
runner = DebateRunner(
|
|
726
|
+
provider=args.provider,
|
|
727
|
+
project_dir=args.project_dir,
|
|
728
|
+
parallel=not args.no_parallel,
|
|
729
|
+
)
|
|
730
|
+
result = runner.run_debate(
|
|
731
|
+
component_code=code,
|
|
732
|
+
spec=spec,
|
|
733
|
+
personas=args.personas,
|
|
734
|
+
rounds=args.rounds,
|
|
735
|
+
target=args.target,
|
|
736
|
+
)
|
|
737
|
+
sys.stdout.write(json.dumps(result.to_dict(), indent=2) + "\n")
|
|
738
|
+
return 0 if not result.human_needed else 2
|
|
739
|
+
|
|
740
|
+
|
|
741
|
+
if __name__ == "__main__":
|
|
742
|
+
raise SystemExit(_main(sys.argv[1:]))
|
|
743
|
+
|
|
744
|
+
|
|
745
|
+
# ---------------------------------------------------------------------------
|
|
746
|
+
# Module-level convenience API (called by autonomy/loki cmd_magic)
|
|
747
|
+
# ---------------------------------------------------------------------------
|
|
748
|
+
|
|
749
|
+
def run_debate(
|
|
750
|
+
name: str,
|
|
751
|
+
spec_path: str = "",
|
|
752
|
+
component_path: str = "",
|
|
753
|
+
rounds: int = 3,
|
|
754
|
+
personas: list = None,
|
|
755
|
+
target: str = "react",
|
|
756
|
+
project_dir: str = ".",
|
|
757
|
+
) -> dict:
|
|
758
|
+
"""Run a multi-persona debate and return the result as a dict."""
|
|
759
|
+
from pathlib import Path as _P
|
|
760
|
+
spec = _P(spec_path).read_text() if spec_path and _P(spec_path).exists() else ""
|
|
761
|
+
code = _P(component_path).read_text() if component_path and _P(component_path).exists() else ""
|
|
762
|
+
runner = DebateRunner(project_dir=project_dir)
|
|
763
|
+
result = runner.run_debate(
|
|
764
|
+
component_code=code,
|
|
765
|
+
spec=spec,
|
|
766
|
+
personas=personas,
|
|
767
|
+
rounds=rounds,
|
|
768
|
+
target=target,
|
|
769
|
+
)
|
|
770
|
+
try:
|
|
771
|
+
from dataclasses import asdict as _asdict
|
|
772
|
+
return {
|
|
773
|
+
"rounds": result.rounds,
|
|
774
|
+
"consensus": result.consensus,
|
|
775
|
+
"human_needed": result.human_needed,
|
|
776
|
+
"critiques": [_asdict(c) for c in result.critiques],
|
|
777
|
+
"refined_code": result.refined_code,
|
|
778
|
+
"blocks": [_asdict(b) for b in result.blocks],
|
|
779
|
+
}
|
|
780
|
+
except Exception as exc:
|
|
781
|
+
return {"error": str(exc)}
|