@lythos/skill-arena 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lythos/skill-arena",
3
- "version": "0.9.1",
3
+ "version": "0.9.2",
4
4
  "description": "Skill Arena — benchmark skill effectiveness with controlled-variable comparison",
5
5
  "keywords": [
6
6
  "ai-agent",
@@ -35,5 +35,9 @@
35
35
  "homepage": "https://github.com/lythos-labs/lythoskill/tree/main/packages/lythoskill-arena#readme",
36
36
  "engines": {
37
37
  "bun": ">=1.0.0"
38
+ },
39
+ "dependencies": {
40
+ "@lythos/test-utils": "^0.9.1",
41
+ "zod-to-json-schema": "^3.25.2"
38
42
  }
39
43
  }
package/src/cli.ts CHANGED
@@ -29,23 +29,31 @@ function printHelp(): void {
29
29
  console.log(`šŸŽ­ lythoskill-arena — Skill comparison runner
30
30
 
31
31
  Usage:
32
- lythoskill-arena --task "<task description>" --skills <skill1,skill2,...>
33
- lythoskill-arena --task "<task description>" --decks <deck1,deck2,...>
32
+ lythoskill-arena run --task <path> --players <A.toml,B.toml> --decks <A.toml,B.toml> --criteria <c1,c2,...> [--out <dir>]
33
+ lythoskill-arena scaffold --task "<description>" --skills <skill1,skill2,...>
34
+ lythoskill-arena scaffold --task "<description>" --decks <deck1,deck2,...>
34
35
  lythoskill-arena viz <arena-dir>
35
36
 
37
+ Commands:
38
+ run Run arena programmatically (cartesian player Ɨ deck → judge → report)
39
+ scaffold Create arena directory structure (legacy, manual subagent execution)
40
+ viz Visualize arena report (ASCII charts)
41
+
36
42
  Options:
37
- -t, --task <desc> Task description (required)
38
- -s, --skills <list> Comma-separated skill names
43
+ -t, --task <path|desc> Task description or path to TASK-arena.md
44
+ -s, --skills <list> Comma-separated skill names (scaffold only)
39
45
  --decks <list> Comma-separated deck paths
40
46
  -c, --criteria <list> Evaluation criteria (default: syntax,context,logic,token)
41
- --control <skill> Control skill for comparison (default: lythoskill-project-scribe)
42
- -d, --dir <dir> Output directory (default: tmp)
47
+ --players <list> Comma-separated player.toml paths (run only)
48
+ --control <skill> Control skill for comparison (scaffold only)
49
+ --out <dir> Output directory (run: defaults to runs/arena-<id>)
50
+ -d, --dir <dir> Output directory (scaffold: defaults to tmp)
43
51
  -p, --project <dir> Project directory (default: .)
44
52
 
45
53
  Examples:
46
- lythoskill-arena --task "Refactor auth module" --skills skill-a,skill-b
47
- lythoskill-arena --task "Write tests" --decks ./decks/minimal.toml,./decks/full.toml
48
- lythoskill-arena viz tmp/arena-20260430
54
+ lythoskill-arena run --task ./TASK-arena.md --players ./players/claude.toml,./players/kimi.toml --decks ./decks/run-01.toml,./decks/run-02.toml --criteria coverage,relevance
55
+ lythoskill-arena scaffold --task "Refactor auth module" --skills skill-a,skill-b
56
+ lythoskill-arena viz runs/arena-20260504
49
57
  `)
50
58
  }
51
59
 
@@ -551,6 +559,32 @@ function runViz(argv: string[]) {
551
559
  console.log(renderRadarChart(report))
552
560
  }
553
561
 
562
+ // ── Run: programmatic arena execution ───────────────────────
563
+
564
+ async function runProgrammaticArena(argv: string[]) {
565
+ const { options } = parseArgs(argv)
566
+
567
+ if (!options.task || !options.decks) {
568
+ console.error('āŒ --task <path> and --decks <list> are required for "run"')
569
+ process.exit(1)
570
+ }
571
+
572
+ const { runArena: runArenaProgrammatic } = await import('./runner')
573
+
574
+ const result = await runArenaProgrammatic({
575
+ taskPath: options.task,
576
+ playerPaths: (options.players ?? 'players/claude-code.toml').split(',').map(s => s.trim()).filter(Boolean),
577
+ deckPaths: options.decks.split(',').map(s => s.trim()).filter(Boolean),
578
+ criteria: (options.criteria ?? 'syntax,context,logic,token').split(',').map(s => s.trim()).filter(Boolean),
579
+ outDir: options.out ?? `runs/arena-${timestamp()}`,
580
+ projectDir: options.project,
581
+ })
582
+
583
+ console.log(`\nšŸŽ® Arena complete: ${result.manifest.id}`)
584
+ console.log(`šŸ“ Artifacts: ${result.artifactsDir}`)
585
+ console.log(`šŸ“Š Report: ${result.artifactsDir}/report.md`)
586
+ }
587
+
554
588
  // ── Main Entry ───────────────────────────────────────────────
555
589
 
556
590
  if (import.meta.main) {
@@ -559,7 +593,14 @@ if (import.meta.main) {
559
593
 
560
594
  if (cmd === 'viz') {
561
595
  runViz(args.slice(1))
596
+ } else if (cmd === 'run') {
597
+ runProgrammaticArena(args.slice(1))
598
+ } else if (cmd === 'scaffold' || !cmd || args[0]?.startsWith('-')) {
599
+ // Legacy behavior: if no subcommand or starts with flags, treat as scaffold
600
+ runArena(cmd === 'scaffold' ? args.slice(1) : args)
562
601
  } else {
563
- runArena(args)
602
+ console.error(`āŒ Unknown command: ${cmd}`)
603
+ printHelp()
604
+ process.exit(1)
564
605
  }
565
606
  }
@@ -0,0 +1,92 @@
1
+ import { describe, test, expect } from 'bun:test'
2
+ import { computePareto } from './comparative-judge'
3
+
4
+ describe('computePareto', () => {
5
+ test('single participant is always non-dominated', () => {
6
+ const result = computePareto([
7
+ { participant_id: 'run-01', scores: { a: 5, b: 3 } },
8
+ ])
9
+ expect(result).toHaveLength(1)
10
+ expect(result[0].dominated).toBe(false)
11
+ expect(result[0].dominated_by).toEqual([])
12
+ })
13
+
14
+ test('clear dominance: run-01 dominates run-02 on all criteria', () => {
15
+ const result = computePareto([
16
+ { participant_id: 'run-01', scores: { coverage: 5, relevance: 5 } },
17
+ { participant_id: 'run-02', scores: { coverage: 3, relevance: 2 } },
18
+ ])
19
+ expect(result[0].dominated).toBe(false)
20
+ expect(result[1].dominated).toBe(true)
21
+ expect(result[1].dominated_by).toEqual(['run-01'])
22
+ })
23
+
24
+ test('equal scores: no one dominates', () => {
25
+ const result = computePareto([
26
+ { participant_id: 'run-01', scores: { a: 4, b: 4 } },
27
+ { participant_id: 'run-02', scores: { a: 4, b: 4 } },
28
+ ])
29
+ expect(result[0].dominated).toBe(false)
30
+ expect(result[1].dominated).toBe(false)
31
+ })
32
+
33
+ test('cross dominance: each wins on different criteria', () => {
34
+ const result = computePareto([
35
+ { participant_id: 'run-01', scores: { speed: 5, accuracy: 2 } },
36
+ { participant_id: 'run-02', scores: { speed: 2, accuracy: 5 } },
37
+ ])
38
+ // Neither dominates: run-01 better on speed but worse on accuracy
39
+ expect(result[0].dominated).toBe(false)
40
+ expect(result[1].dominated).toBe(false)
41
+ })
42
+
43
+ test('multi-participant: transitive dominance chain', () => {
44
+ const result = computePareto([
45
+ { participant_id: 'best', scores: { a: 5, b: 5, c: 5 } },
46
+ { participant_id: 'mid', scores: { a: 4, b: 4, c: 4 } },
47
+ { participant_id: 'worst', scores: { a: 2, b: 2, c: 2 } },
48
+ ])
49
+ // best dominates both, mid dominates worst
50
+ expect(result[0].dominated).toBe(false) // best
51
+ expect(result[1].dominated).toBe(true) // mid (by best)
52
+ expect(result[1].dominated_by).toEqual(['best'])
53
+ expect(result[2].dominated).toBe(true) // worst (by both)
54
+ expect(result[2].dominated_by.sort()).toEqual(['best', 'mid'].sort())
55
+ })
56
+
57
+ test('Pareto frontier from playground BDD-research: run-01 dominates run-02', () => {
58
+ // From playground/arena-bdd-research/report.md:
59
+ // Run-01: coverage=5, relevance=5, actionability=5, depth=5
60
+ // Run-02: coverage=3, relevance=2, actionability=2, depth=1
61
+ const result = computePareto([
62
+ { participant_id: 'run-01', scores: { coverage: 5, relevance: 5, actionability: 5, depth: 5 } },
63
+ { participant_id: 'run-02', scores: { coverage: 3, relevance: 2, actionability: 2, depth: 1 } },
64
+ ])
65
+ expect(result[0].dominated).toBe(false) // run-01: Pareto-optimal
66
+ expect(result[1].dominated).toBe(true) // run-02: dominated by run-01
67
+ expect(result[1].dominated_by).toEqual(['run-01'])
68
+ })
69
+
70
+ test('empty scores object', () => {
71
+ const result = computePareto([
72
+ { participant_id: 'a', scores: {} },
73
+ { participant_id: 'b', scores: {} },
74
+ ])
75
+ expect(result).toHaveLength(2)
76
+ expect(result[0].dominated).toBe(false)
77
+ expect(result[1].dominated).toBe(false)
78
+ })
79
+
80
+ test('partial criteria overlap', () => {
81
+ const result = computePareto([
82
+ { participant_id: 'run-01', scores: { a: 5, b: 3 } },
83
+ { participant_id: 'run-02', scores: { a: 3, c: 5 } },
84
+ ])
85
+ // run-01 has a=5 vs run-02 a=3 (a wins)
86
+ // run-02 has b=undefined vs run-01 b=3 → treated as 0. So run-01 >= run-02 on all shared crit, > on one.
87
+ // But c: run-01 has 0, run-02 has 5. So run-02 > run-01 on c.
88
+ // Cross-dominance → neither dominates
89
+ expect(result[0].dominated).toBe(false)
90
+ expect(result[1].dominated).toBe(false)
91
+ })
92
+ })
@@ -0,0 +1,166 @@
1
+ import { zodToJsonSchema } from 'zod-to-json-schema'
2
+ import { ComparativeReport, ScoreCell, ParetoEntry } from '@lythos/test-utils/schema'
3
+ import type { AgentAdapter } from '@lythos/test-utils/agents'
4
+ import type { ArenaManifest } from '@lythos/test-utils/schema'
5
+
6
+ // ── Pareto Frontier (deterministic algorithm) ──────────────────────────────
7
+
8
+ export interface ScoreVector {
9
+ participant_id: string
10
+ scores: Record<string, number>
11
+ dominated: boolean
12
+ dominated_by: string[]
13
+ }
14
+
15
+ /**
16
+ * Compute Pareto frontier from score vectors.
17
+ * Participant A dominates B if A >= B in all criteria AND A > B in at least one.
18
+ * This is deterministic — never delegated to LLM.
19
+ */
20
+ export function computePareto(vectors: { participant_id: string; scores: Record<string, number> }[]): ParetoEntry[] {
21
+ const result: ParetoEntry[] = vectors.map(v => ({
22
+ participant_id: v.participant_id,
23
+ scores: { ...v.scores },
24
+ dominated: false,
25
+ dominated_by: [] as string[],
26
+ }))
27
+
28
+ // Union of all criteria across all participants
29
+ const allCriteria = [...new Set(vectors.flatMap(v => Object.keys(v.scores)))]
30
+
31
+ if (allCriteria.length === 0) return result
32
+
33
+ for (let i = 0; i < result.length; i++) {
34
+ for (let j = 0; j < result.length; j++) {
35
+ if (i === j) continue
36
+ const a = vectors[i].scores
37
+ const b = vectors[j].scores
38
+
39
+ const allGe = allCriteria.every(k => (a[k] ?? 0) >= (b[k] ?? 0))
40
+ const anyGt = allCriteria.some(k => (a[k] ?? 0) > (b[k] ?? 0))
41
+
42
+ if (allGe && anyGt) {
43
+ // i dominates j
44
+ result[j].dominated = true
45
+ if (!result[j].dominated_by.includes(result[i].participant_id)) {
46
+ result[j].dominated_by.push(result[i].participant_id)
47
+ }
48
+ }
49
+ }
50
+ }
51
+
52
+ return result
53
+ }
54
+
55
+ // ── Comparative Judge Prompt ──────────────────────────────────────────────
56
+
57
+ function buildComparativePrompt(opts: {
58
+ manifest: ArenaManifest
59
+ verdicts: { participantId: string; verdict: unknown }[]
60
+ }): string {
61
+ const criteriaDesc = opts.manifest.criteria.join(', ')
62
+ const participants = opts.manifest.participants
63
+ .map(p => `- ${p.id}: ${p.name} (${p.description || 'no description'})`)
64
+ .join('\n')
65
+
66
+ return `You are a comparative judge evaluating ${opts.manifest.participants.length} participants against shared criteria.
67
+
68
+ ## Task
69
+ ${opts.manifest.task}
70
+
71
+ ## Participants
72
+ ${participants}
73
+
74
+ ## Criteria
75
+ ${criteriaDesc}
76
+
77
+ ## Your Job
78
+ For each participant, score them 1-5 on each criterion. Provide a brief rationale.
79
+ Score meanings: 1=poor, 3=acceptable, 5=excellent.
80
+
81
+ Use the submit_scores tool to return your structured evaluation.`
82
+ }
83
+
84
+ const SCORE_TOOL = {
85
+ name: 'submit_scores',
86
+ description: 'Submit per-participant scores for each criterion with rationales',
87
+ input_schema: zodToJsonSchema(ComparativeReport.pick({ score_matrix: true, key_findings: true, recommendations: true })) as Record<string, unknown>,
88
+ }
89
+
90
+ function toScoreMatrix(
91
+ manifest: ArenaManifest,
92
+ scores: { participant_id: string; criterion: string; weight: number; score: number; rationale: string }[]
93
+ ): typeof ScoreCell._output[] {
94
+ return scores.map(s => ScoreCell.parse(s))
95
+ }
96
+
97
+ // ── Comparative Judge ─────────────────────────────────────────────────────
98
+
99
+ export async function runComparativeJudge(opts: {
100
+ manifest: ArenaManifest
101
+ verdicts: { participantId: string; verdict: unknown }[]
102
+ judge: AgentAdapter
103
+ workdir: string
104
+ }): Promise<typeof ComparativeReport._output> {
105
+ const { manifest, verdicts, judge, workdir } = opts
106
+
107
+ const prompt = buildComparativePrompt({ manifest, verdicts })
108
+
109
+ let raw: string
110
+ let parsed: unknown
111
+
112
+ if (judge.invokeTool) {
113
+ parsed = await judge.invokeTool({
114
+ tool: SCORE_TOOL,
115
+ prompt,
116
+ cwd: workdir,
117
+ timeoutMs: 120000,
118
+ })
119
+ raw = JSON.stringify(parsed)
120
+ } else {
121
+ const result = await judge.spawn({ cwd: workdir, brief: prompt, timeoutMs: 120000 })
122
+ raw = result.stdout
123
+ const fenceMatch = raw.match(/```(?:json)?\s*([\s\S]*?)\s*```/)
124
+ const jsonStr = fenceMatch ? fenceMatch[1].trim() : raw.trim()
125
+ parsed = JSON.parse(jsonStr)
126
+ }
127
+
128
+ // Validate LLM output
129
+ const llmResult = ComparativeReport.pick({
130
+ score_matrix: true,
131
+ key_findings: true,
132
+ recommendations: true,
133
+ }).parse(parsed)
134
+
135
+ const scoreMatrix = toScoreMatrix(manifest, llmResult.score_matrix)
136
+
137
+ // Pareto: deterministic, never delegated to LLM
138
+ const participantScores = manifest.participants.map(p => {
139
+ const pScores: Record<string, number> = {}
140
+ for (const cell of scoreMatrix) {
141
+ if (cell.participant_id === p.id) {
142
+ pScores[cell.criterion] = cell.score
143
+ }
144
+ }
145
+ return { participant_id: p.id, scores: pScores }
146
+ })
147
+
148
+ const pareto = computePareto(participantScores)
149
+
150
+ // Weighted totals (equal weight by default)
151
+ const weightedTotals: Record<string, number> = {}
152
+ for (const p of manifest.participants) {
153
+ const pCells = scoreMatrix.filter(c => c.participant_id === p.id)
154
+ weightedTotals[p.id] = pCells.reduce((sum, c) => sum + c.score * c.weight, 0) / (pCells.length || 1)
155
+ }
156
+
157
+ return ComparativeReport.parse({
158
+ arena_id: manifest.id,
159
+ generated_at: new Date().toISOString(),
160
+ score_matrix: scoreMatrix,
161
+ weighted_totals: weightedTotals,
162
+ pareto,
163
+ key_findings: llmResult.key_findings ?? [],
164
+ recommendations: llmResult.recommendations ?? [],
165
+ })
166
+ }
package/src/runner.ts ADDED
@@ -0,0 +1,187 @@
1
+ import { mkdirSync, writeFileSync, existsSync, readFileSync } from 'node:fs'
2
+ import { join, resolve } from 'node:path'
3
+ import { runAgentScenario, type AgentScenario } from '@lythos/test-utils/agent-bdd'
4
+ import { useAgent } from '@lythos/test-utils/agents'
5
+ import { ArenaManifest, Player, type ArenaManifest as ArenaManifestType } from '@lythos/test-utils/schema'
6
+ import { runComparativeJudge } from './comparative-judge'
7
+
8
+ // ── Helpers ───────────────────────────────────────────────────────────────
9
+
10
+ function stamp(): string {
11
+ const d = new Date()
12
+ return `${d.getFullYear()}${String(d.getMonth() + 1).padStart(2, '0')}${String(d.getDate()).padStart(2, '0')}-${String(d.getHours()).padStart(2, '0')}${String(d.getMinutes()).padStart(2, '0')}${String(d.getSeconds()).padStart(2, '0')}`
13
+ }
14
+
15
+ function cartesian<T>(arrays: T[][]): T[][] {
16
+ if (arrays.length === 0) return [[]]
17
+ const [first, ...rest] = arrays
18
+ const restProd = cartesian(rest)
19
+ return first.flatMap(a => restProd.map(r => [a, ...r]))
20
+ }
21
+
22
+ function slugify(input: string): string {
23
+ return input.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, '').slice(0, 40)
24
+ }
25
+
26
+ // ── Runner ────────────────────────────────────────────────────────────────
27
+
28
+ export async function runArena(opts: {
29
+ taskPath: string
30
+ playerPaths: string[]
31
+ deckPaths: string[]
32
+ criteria: string[]
33
+ outDir: string
34
+ projectDir?: string
35
+ }): Promise<{ manifest: ArenaManifestType; report: unknown; artifactsDir: string }> {
36
+ const { taskPath, playerPaths, deckPaths, criteria, outDir } = opts
37
+
38
+ // Load players
39
+ const players = playerPaths.map(p => {
40
+ const content = readFileSync(resolve(p), 'utf-8')
41
+ const parsed = Player.parse(JSON.parse(content))
42
+ return { path: p, ...parsed }
43
+ })
44
+
45
+ // Load deck labels from deck paths
46
+ const decks = deckPaths.map(p => ({ path: resolve(p) }))
47
+
48
+ // Build (player Ɨ deck) variant matrix
49
+ const variants = cartesian([players, decks]).map(([player, deck], i) => ({
50
+ participant_id: `run-${String(i + 1).padStart(2, '0')}`,
51
+ player,
52
+ deck_path: deck.path,
53
+ }))
54
+
55
+ // Build arena manifest
56
+ const arenaId = `arena-${stamp()}`
57
+ const artifactsDir = outDir || join(process.cwd(), 'runs', arenaId)
58
+
59
+ const manifest = ArenaManifest.parse({
60
+ id: arenaId,
61
+ created_at: new Date().toISOString(),
62
+ task: readFileSync(resolve(taskPath), 'utf-8').slice(0, 200),
63
+ mode: 'decks',
64
+ participants: variants.map(v => ({
65
+ id: v.participant_id,
66
+ name: v.player.path.split('/').pop()?.replace('.toml', '') ?? v.player.platform,
67
+ player: v.player.platform,
68
+ deck: v.deck_path,
69
+ description: `${v.player.platform} Ɨ ${v.deck_path.split('/').pop()?.replace('.toml', '')}`,
70
+ })),
71
+ criteria,
72
+ status: 'running',
73
+ })
74
+
75
+ mkdirSync(artifactsDir, { recursive: true })
76
+ writeFileSync(join(artifactsDir, 'arena.json'), JSON.stringify(manifest, null, 2) + '\n')
77
+
78
+ // Run each variant
79
+ const verdicts: { participantId: string; verdict: unknown }[] = []
80
+
81
+ for (const variant of variants) {
82
+ const cellDir = join(artifactsDir, 'runs', variant.participant_id)
83
+ mkdirSync(cellDir, { recursive: true })
84
+
85
+ try {
86
+ const result = await runAgentScenario({
87
+ scenarioPath: resolve(taskPath),
88
+ agent: useAgent(variant.player.platform),
89
+ setupWorkdir(_scenario: AgentScenario, workdir: string) {
90
+ mkdirSync(workdir, { recursive: true })
91
+ // Write deck.toml as skill-deck.toml
92
+ const deckContent = readFileSync(variant.deck_path, 'utf-8')
93
+ writeFileSync(join(workdir, 'skill-deck.toml'), deckContent)
94
+ },
95
+ baseDir: artifactsDir,
96
+ })
97
+
98
+ verdicts.push({
99
+ participantId: variant.participant_id,
100
+ verdict: result.verdict,
101
+ })
102
+ } catch (e) {
103
+ verdicts.push({
104
+ participantId: variant.participant_id,
105
+ verdict: {
106
+ verdict: 'ERROR' as const,
107
+ reason: `Runner exception: ${e instanceof Error ? e.message : String(e)}`,
108
+ },
109
+ })
110
+ }
111
+ }
112
+
113
+ // Run comparative judge
114
+ const judge = useAgent(players[0]?.platform ?? 'claude')
115
+ const report = await runComparativeJudge({
116
+ manifest,
117
+ verdicts,
118
+ judge,
119
+ workdir: artifactsDir,
120
+ })
121
+
122
+ // Write report
123
+ writeFileSync(join(artifactsDir, 'report.md'), `# Arena Report: ${manifest.id}
124
+
125
+ **Task**: ${manifest.task}
126
+ **Criteria**: ${manifest.criteria.join(', ')}
127
+ **Date**: ${new Date().toISOString()}
128
+
129
+ ## Score Matrix
130
+ ${renderScoreMatrix(report)}
131
+
132
+ ## Pareto Frontier
133
+ ${renderPareto(report)}
134
+
135
+ ## Key Findings
136
+ ${(report.key_findings ?? []).map((f: string) => `- ${f}`).join('\n')}
137
+
138
+ ## Recommendations
139
+ ${(report.recommendations ?? []).map((r: { audience: string; recommendation: string }) => `- **${r.audience}**: ${r.recommendation}`).join('\n')}
140
+ `)
141
+
142
+ // Update manifest status
143
+ const finalManifest = ArenaManifest.parse({ ...manifest, status: 'completed' })
144
+ writeFileSync(join(artifactsDir, 'arena.json'), JSON.stringify(finalManifest, null, 2) + '\n')
145
+
146
+ return { manifest: finalManifest, report, artifactsDir }
147
+ }
148
+
149
+ // ── Markdown Renderers ────────────────────────────────────────────────────
150
+
151
+ function renderScoreMatrix(report: unknown & { score_matrix?: { participant_id: string; criterion: string; weight: number; score: number; rationale: string }[] }): string {
152
+ if (!report.score_matrix?.length) return 'No scores available.\n'
153
+
154
+ // Build participant Ɨ criterion matrix
155
+ const participants = [...new Set(report.score_matrix.map(s => s.participant_id))]
156
+ const criteria = [...new Set(report.score_matrix.map(s => s.criterion))]
157
+
158
+ let table = `| Criterion | Weight | ${participants.map(p => `${p}`).join(' | ')} |\n`
159
+ table += `|${'---|'.repeat(2 + participants.length)}\n`
160
+
161
+ for (const c of criteria) {
162
+ table += `| ${c} | 25% | ${participants.map(p => {
163
+ const cell = report.score_matrix!.find(s => s.participant_id === p && s.criterion === c)
164
+ return `**${cell?.score ?? '?'}**`
165
+ }).join(' | ')} |\n`
166
+ }
167
+
168
+ // Weighted totals
169
+ table += `| **Weighted Total** | 100% | ${participants.map(p => {
170
+ const pScores = report.score_matrix!.filter(s => s.participant_id === p)
171
+ const avg = pScores.length ? pScores.reduce((sum, s) => sum + s.score, 0) / pScores.length : 0
172
+ return `**${avg.toFixed(1)}**`
173
+ }).join(' | ')} |\n`
174
+
175
+ return table
176
+ }
177
+
178
+ function renderPareto(report: unknown & { pareto?: { participant_id: string; dominated: boolean; dominated_by: string[] }[] }): string {
179
+ if (!report.pareto?.length) return 'No Pareto analysis.\n'
180
+
181
+ return report.pareto.map((p: { participant_id: string; dominated: boolean; dominated_by: string[] }) => {
182
+ if (p.dominated) {
183
+ return `- **${p.participant_id}**: dominated by ${p.dominated_by.join(', ')}`
184
+ }
185
+ return `- **${p.participant_id}**: Pareto-optimal (non-dominated)`
186
+ }).join('\n')
187
+ }