moa-cli 0.2.0__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: moa-cli
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: Ask one question to multiple local AI coding CLIs in parallel and collect their answers.
|
|
5
5
|
Keywords: llm,agents,cli,claude,codex,agy,opencode,peer-review
|
|
6
6
|
Author: Paul-Louis Pröve
|
|
@@ -180,7 +180,7 @@ The synthesizer default is persistable too (e.g. `moa config set synthesizer cod
|
|
|
180
180
|
|
|
181
181
|
### Output
|
|
182
182
|
|
|
183
|
-
- **stdout** carries only content: each agent's answer
|
|
183
|
+
- **stdout** carries only content: each agent's answer is fronted by a centered separator rule naming it (`──── claude (opus) · OK · 3.5s ────`) with blank lines around it for clear separation, flushed the instant that agent finishes. `moa distill` then appends the merged block (`──── synthesis · via claude · OK · ... ────`) once the aggregator finishes.
|
|
184
184
|
- **stderr** carries progress and selection notes (`Asking claude, codex ...`), so piping stdout stays clean.
|
|
185
185
|
- `--json` emits one JSON object per line (JSONL): a `{"type": "response", ...}` record per agent as it completes; `distill` then adds a `{"type": "synthesis", ...}` record. `debate` instead emits a `{"type": "debate_turn", "round": N, ...}` record per turn plus a final `{"type": "verdict", ...}` record. Ideal when another agent calls MOA and parses the result.
|
|
186
186
|
|
|
@@ -204,9 +204,9 @@ The aggregator prompt is adapted from the Mixture-of-Agents "Aggregate-and-Synth
|
|
|
204
204
|
|
|
205
205
|
**The loop.** Round 1: debater A answers cold; debater B sees A's answer with an adversarial-stance instruction ("identify errors/weaknesses before giving your own answer; do not agree merely to reach consensus"). Each later round, every debater sees the other's latest answer and responds in the same spirit. If every debater signals it has *no substantive change* (it may open its reply with `NO SUBSTANTIVE CHANGE`), the debate stops early before the cap.
|
|
206
206
|
|
|
207
|
-
**The judge.** A model that is **not** a debater reads the full transcript - presented **anonymized and order-shuffled** (a model is judging, so brand/position bias is killed, per item 002) - and writes the final answer. Its prompt instructs it to weigh correctness and evidence **above** confidence and fluency. The judge's verdict is the final block (
|
|
207
|
+
**The judge.** A model that is **not** a debater reads the full transcript - presented **anonymized and order-shuffled** (a model is judging, so brand/position bias is killed, per item 002) - and writes the final answer. Its prompt instructs it to weigh correctness and evidence **above** confidence and fluency. The judge's verdict is the final block (`──── verdict · judge <name> · ... ────`).
|
|
208
208
|
|
|
209
|
-
**Streaming/output.** Each debater's turn streams as it completes (
|
|
209
|
+
**Streaming/output.** Each debater's turn streams as it completes (`──── round N · <provider> · ... ────`), then the judge's verdict last. `--json` emits a `{"type": "debate_turn", "round": N, ...}` record per turn plus a final `{"type": "verdict", ...}` record.
|
|
210
210
|
|
|
211
211
|
**Safety.** Debaters and the judge run in the same read-only (or `--yolo`) mode as the other verbs - there is no permission bypass. agy's partial-sandbox caveat (shell only; it can still edit files) applies here too.
|
|
212
212
|
|
|
@@ -169,7 +169,7 @@ The synthesizer default is persistable too (e.g. `moa config set synthesizer cod
|
|
|
169
169
|
|
|
170
170
|
### Output
|
|
171
171
|
|
|
172
|
-
- **stdout** carries only content: each agent's answer
|
|
172
|
+
- **stdout** carries only content: each agent's answer is fronted by a centered separator rule naming it (`──── claude (opus) · OK · 3.5s ────`) with blank lines around it for clear separation, flushed the instant that agent finishes. `moa distill` then appends the merged block (`──── synthesis · via claude · OK · ... ────`) once the aggregator finishes.
|
|
173
173
|
- **stderr** carries progress and selection notes (`Asking claude, codex ...`), so piping stdout stays clean.
|
|
174
174
|
- `--json` emits one JSON object per line (JSONL): a `{"type": "response", ...}` record per agent as it completes; `distill` then adds a `{"type": "synthesis", ...}` record. `debate` instead emits a `{"type": "debate_turn", "round": N, ...}` record per turn plus a final `{"type": "verdict", ...}` record. Ideal when another agent calls MOA and parses the result.
|
|
175
175
|
|
|
@@ -193,9 +193,9 @@ The aggregator prompt is adapted from the Mixture-of-Agents "Aggregate-and-Synth
|
|
|
193
193
|
|
|
194
194
|
**The loop.** Round 1: debater A answers cold; debater B sees A's answer with an adversarial-stance instruction ("identify errors/weaknesses before giving your own answer; do not agree merely to reach consensus"). Each later round, every debater sees the other's latest answer and responds in the same spirit. If every debater signals it has *no substantive change* (it may open its reply with `NO SUBSTANTIVE CHANGE`), the debate stops early before the cap.
|
|
195
195
|
|
|
196
|
-
**The judge.** A model that is **not** a debater reads the full transcript - presented **anonymized and order-shuffled** (a model is judging, so brand/position bias is killed, per item 002) - and writes the final answer. Its prompt instructs it to weigh correctness and evidence **above** confidence and fluency. The judge's verdict is the final block (
|
|
196
|
+
**The judge.** A model that is **not** a debater reads the full transcript - presented **anonymized and order-shuffled** (a model is judging, so brand/position bias is killed, per item 002) - and writes the final answer. Its prompt instructs it to weigh correctness and evidence **above** confidence and fluency. The judge's verdict is the final block (`──── verdict · judge <name> · ... ────`).
|
|
197
197
|
|
|
198
|
-
**Streaming/output.** Each debater's turn streams as it completes (
|
|
198
|
+
**Streaming/output.** Each debater's turn streams as it completes (`──── round N · <provider> · ... ────`), then the judge's verdict last. `--json` emits a `{"type": "debate_turn", "round": N, ...}` record per turn plus a final `{"type": "verdict", ...}` record.
|
|
199
199
|
|
|
200
200
|
**Safety.** Debaters and the judge run in the same read-only (or `--yolo`) mode as the other verbs - there is no permission bypass. agy's partial-sandbox caveat (shell only; it can still edit files) applies here too.
|
|
201
201
|
|
|
@@ -532,11 +532,27 @@ def build_judge_prompt(
|
|
|
532
532
|
|
|
533
533
|
_STATUS_LABELS = {"ok": "OK", "failed": "FAILED", "timeout": "TIMEOUT", "missing": "MISSING"}
|
|
534
534
|
|
|
535
|
+
# Width of the separator rule that fronts each answer block. Fixed (not terminal-
|
|
536
|
+
# derived) so output is identical whether shown live or piped to a file.
|
|
537
|
+
_RULE_WIDTH = 60
|
|
538
|
+
|
|
535
539
|
|
|
536
540
|
def _status_label(status: str) -> str:
|
|
537
541
|
return _STATUS_LABELS.get(status, status.upper())
|
|
538
542
|
|
|
539
543
|
|
|
544
|
+
def _rule(label: str) -> str:
|
|
545
|
+
"""A centered, box-drawing separator that names the block, e.g.
|
|
546
|
+
`──────── claude (opus) · OK · 2.3s ────────`. Falls back to the bare label
|
|
547
|
+
when it's wider than the rule."""
|
|
548
|
+
text = f" {label} "
|
|
549
|
+
if len(text) >= _RULE_WIDTH:
|
|
550
|
+
return text.strip()
|
|
551
|
+
pad = _RULE_WIDTH - len(text)
|
|
552
|
+
left = pad // 2
|
|
553
|
+
return "─" * left + text + "─" * (pad - left)
|
|
554
|
+
|
|
555
|
+
|
|
540
556
|
def _body(result: RunResult) -> list[str]:
|
|
541
557
|
if result.status == "ok":
|
|
542
558
|
return [result.stdout.strip(), ""]
|
|
@@ -544,15 +560,21 @@ def _body(result: RunResult) -> list[str]:
|
|
|
544
560
|
return ["```text", detail[-1200:], "```", ""]
|
|
545
561
|
|
|
546
562
|
|
|
563
|
+
def _render(label: str, result: RunResult) -> str:
|
|
564
|
+
"""A block: two leading blank lines, the named rule, a blank line, the body.
|
|
565
|
+
The leading blanks give each answer clear breathing room as blocks stream."""
|
|
566
|
+
return "\n".join(["", "", _rule(label), "", *_body(result)])
|
|
567
|
+
|
|
568
|
+
|
|
547
569
|
def render_block(result: RunResult) -> str:
|
|
548
570
|
model = f" ({result.model})" if result.model else ""
|
|
549
|
-
|
|
550
|
-
return
|
|
571
|
+
label = f"{result.provider}{model} · {_status_label(result.status)} · {result.elapsed:.1f}s"
|
|
572
|
+
return _render(label, result)
|
|
551
573
|
|
|
552
574
|
|
|
553
575
|
def render_synthesis_block(result: RunResult, synthesizer: str) -> str:
|
|
554
|
-
|
|
555
|
-
return
|
|
576
|
+
label = f"synthesis · via {synthesizer} · {_status_label(result.status)} · {result.elapsed:.1f}s"
|
|
577
|
+
return _render(label, result)
|
|
556
578
|
|
|
557
579
|
|
|
558
580
|
def result_record(result: RunResult) -> dict:
|
|
@@ -581,16 +603,16 @@ def synthesis_record(result: RunResult, synthesizer: str) -> dict:
|
|
|
581
603
|
|
|
582
604
|
def render_debate_turn_block(result: RunResult, round_num: int) -> str:
|
|
583
605
|
model = f" ({result.model})" if result.model else ""
|
|
584
|
-
|
|
585
|
-
f"
|
|
586
|
-
f"{_status_label(result.status)}
|
|
606
|
+
label = (
|
|
607
|
+
f"round {round_num} · {result.provider}{model} · "
|
|
608
|
+
f"{_status_label(result.status)} · {result.elapsed:.1f}s"
|
|
587
609
|
)
|
|
588
|
-
return
|
|
610
|
+
return _render(label, result)
|
|
589
611
|
|
|
590
612
|
|
|
591
613
|
def render_judge_block(result: RunResult, judge: str) -> str:
|
|
592
|
-
|
|
593
|
-
return
|
|
614
|
+
label = f"verdict · judge {judge} · {_status_label(result.status)} · {result.elapsed:.1f}s"
|
|
615
|
+
return _render(label, result)
|
|
594
616
|
|
|
595
617
|
|
|
596
618
|
def debate_turn_record(result: RunResult, round_num: int) -> dict:
|