@nforma.ai/nforma 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. package/LICENSE +22 -0
  2. package/README.md +1024 -0
  3. package/agents/qgsd-codebase-mapper.md +764 -0
  4. package/agents/qgsd-debugger.md +1201 -0
  5. package/agents/qgsd-executor.md +472 -0
  6. package/agents/qgsd-integration-checker.md +443 -0
  7. package/agents/qgsd-phase-researcher.md +502 -0
  8. package/agents/qgsd-plan-checker.md +643 -0
  9. package/agents/qgsd-planner.md +1182 -0
  10. package/agents/qgsd-project-researcher.md +621 -0
  11. package/agents/qgsd-quorum-orchestrator.md +628 -0
  12. package/agents/qgsd-quorum-slot-worker.md +41 -0
  13. package/agents/qgsd-quorum-synthesizer.md +133 -0
  14. package/agents/qgsd-quorum-test-worker.md +37 -0
  15. package/agents/qgsd-quorum-worker.md +161 -0
  16. package/agents/qgsd-research-synthesizer.md +239 -0
  17. package/agents/qgsd-roadmapper.md +660 -0
  18. package/agents/qgsd-verifier.md +628 -0
  19. package/bin/accept-debug-invariant.cjs +165 -0
  20. package/bin/account-manager.cjs +719 -0
  21. package/bin/aggregate-requirements.cjs +466 -0
  22. package/bin/analyze-assumptions.cjs +757 -0
  23. package/bin/analyze-state-space.cjs +921 -0
  24. package/bin/attribute-trace-divergence.cjs +150 -0
  25. package/bin/auth-drivers/gh-cli.cjs +93 -0
  26. package/bin/auth-drivers/index.cjs +46 -0
  27. package/bin/auth-drivers/pool.cjs +67 -0
  28. package/bin/auth-drivers/simple.cjs +95 -0
  29. package/bin/autoClosePtoF.cjs +110 -0
  30. package/bin/blessed-terminal.cjs +350 -0
  31. package/bin/build-phase-index.cjs +472 -0
  32. package/bin/call-quorum-slot.cjs +541 -0
  33. package/bin/ccr-secure-config.cjs +99 -0
  34. package/bin/ccr-secure-start.cjs +83 -0
  35. package/bin/check-bundled-sdks.cjs +177 -0
  36. package/bin/check-coverage-guard.cjs +112 -0
  37. package/bin/check-liveness-fairness.cjs +95 -0
  38. package/bin/check-mcp-health.cjs +123 -0
  39. package/bin/check-provider-health.cjs +395 -0
  40. package/bin/check-results-exit.cjs +24 -0
  41. package/bin/check-spec-sync.cjs +360 -0
  42. package/bin/check-trace-redaction.cjs +271 -0
  43. package/bin/check-trace-schema-drift.cjs +99 -0
  44. package/bin/compareDrift.cjs +21 -0
  45. package/bin/conformance-schema.cjs +12 -0
  46. package/bin/count-scenarios.cjs +420 -0
  47. package/bin/debt-dedup.cjs +144 -0
  48. package/bin/debt-ledger.cjs +61 -0
  49. package/bin/debt-retention.cjs +76 -0
  50. package/bin/debt-state-machine.cjs +80 -0
  51. package/bin/detect-coverage-gaps.cjs +204 -0
  52. package/bin/detect-project-intent.cjs +362 -0
  53. package/bin/export-prism-constants.cjs +164 -0
  54. package/bin/extract-annotations.cjs +633 -0
  55. package/bin/extractFormalExpected.cjs +104 -0
  56. package/bin/fingerprint-drift.cjs +24 -0
  57. package/bin/fingerprint-issue.cjs +46 -0
  58. package/bin/formal-core.cjs +519 -0
  59. package/bin/formal-ref-linker.cjs +141 -0
  60. package/bin/formal-test-sync.cjs +788 -0
  61. package/bin/generate-formal-specs.cjs +588 -0
  62. package/bin/generate-petri-net.cjs +397 -0
  63. package/bin/generate-phase-spec.cjs +249 -0
  64. package/bin/generate-proposed-changes.cjs +194 -0
  65. package/bin/generate-tla-cfg.cjs +122 -0
  66. package/bin/generate-traceability-matrix.cjs +701 -0
  67. package/bin/generate-triage-bundle.cjs +300 -0
  68. package/bin/gh-account-rotate.cjs +34 -0
  69. package/bin/initialize-model-registry.cjs +105 -0
  70. package/bin/install-formal-tools.cjs +382 -0
  71. package/bin/install.js +2424 -0
  72. package/bin/isNumericThreshold.cjs +34 -0
  73. package/bin/issue-classifier.cjs +151 -0
  74. package/bin/levenshtein.cjs +74 -0
  75. package/bin/lint-formal-models.cjs +580 -0
  76. package/bin/load-baseline-requirements.cjs +275 -0
  77. package/bin/manage-agents-core.cjs +815 -0
  78. package/bin/migrate-formal-dir.cjs +172 -0
  79. package/bin/migrate-planning.cjs +206 -0
  80. package/bin/migrate-to-slots.cjs +255 -0
  81. package/bin/nForma.cjs +2726 -0
  82. package/bin/observe-config.cjs +353 -0
  83. package/bin/observe-debt-writer.cjs +140 -0
  84. package/bin/observe-handler-grafana.cjs +128 -0
  85. package/bin/observe-handler-internal.cjs +301 -0
  86. package/bin/observe-handler-logstash.cjs +153 -0
  87. package/bin/observe-handler-prometheus.cjs +185 -0
  88. package/bin/observe-handlers.cjs +436 -0
  89. package/bin/observe-registry.cjs +131 -0
  90. package/bin/observe-render.cjs +168 -0
  91. package/bin/planning-paths.cjs +167 -0
  92. package/bin/polyrepo.cjs +560 -0
  93. package/bin/prism-priority.cjs +153 -0
  94. package/bin/probe-quorum-slots.cjs +167 -0
  95. package/bin/promote-model.cjs +225 -0
  96. package/bin/propose-debug-invariants.cjs +165 -0
  97. package/bin/providers.json +392 -0
  98. package/bin/pty-proxy.py +129 -0
  99. package/bin/qgsd-solve.cjs +2477 -0
  100. package/bin/quorum-consensus-gate.cjs +238 -0
  101. package/bin/quorum-formal-context.cjs +183 -0
  102. package/bin/quorum-slot-dispatch.cjs +934 -0
  103. package/bin/read-policy.cjs +60 -0
  104. package/bin/requirement-map.cjs +63 -0
  105. package/bin/requirements-core.cjs +247 -0
  106. package/bin/resolve-cli.cjs +101 -0
  107. package/bin/review-mcp-logs.cjs +294 -0
  108. package/bin/run-account-manager-tlc.cjs +188 -0
  109. package/bin/run-account-pool-alloy.cjs +158 -0
  110. package/bin/run-alloy.cjs +153 -0
  111. package/bin/run-audit-alloy.cjs +187 -0
  112. package/bin/run-breaker-tlc.cjs +181 -0
  113. package/bin/run-formal-check.cjs +395 -0
  114. package/bin/run-formal-verify.cjs +701 -0
  115. package/bin/run-installer-alloy.cjs +188 -0
  116. package/bin/run-oauth-rotation-prism.cjs +132 -0
  117. package/bin/run-oscillation-tlc.cjs +202 -0
  118. package/bin/run-phase-tlc.cjs +228 -0
  119. package/bin/run-prism.cjs +446 -0
  120. package/bin/run-protocol-tlc.cjs +201 -0
  121. package/bin/run-quorum-composition-alloy.cjs +155 -0
  122. package/bin/run-sensitivity-sweep.cjs +231 -0
  123. package/bin/run-stop-hook-tlc.cjs +188 -0
  124. package/bin/run-tlc.cjs +467 -0
  125. package/bin/run-transcript-alloy.cjs +173 -0
  126. package/bin/run-uppaal.cjs +264 -0
  127. package/bin/secrets.cjs +134 -0
  128. package/bin/sensitivity-report.cjs +219 -0
  129. package/bin/sensitivity-sweep-feedback.cjs +194 -0
  130. package/bin/set-secret.cjs +29 -0
  131. package/bin/setup-telemetry-cron.sh +36 -0
  132. package/bin/sweepPtoF.cjs +63 -0
  133. package/bin/sync-baseline-requirements.cjs +290 -0
  134. package/bin/task-envelope.cjs +360 -0
  135. package/bin/telemetry-collector.cjs +229 -0
  136. package/bin/unified-mcp-server.mjs +735 -0
  137. package/bin/update-agents.cjs +369 -0
  138. package/bin/update-scoreboard.cjs +1134 -0
  139. package/bin/validate-debt-entry.cjs +207 -0
  140. package/bin/validate-invariant.cjs +419 -0
  141. package/bin/validate-memory.cjs +389 -0
  142. package/bin/validate-requirements-haiku.cjs +435 -0
  143. package/bin/validate-traces.cjs +438 -0
  144. package/bin/verify-formal-results.cjs +124 -0
  145. package/bin/verify-quorum-health.cjs +273 -0
  146. package/bin/write-check-result.cjs +106 -0
  147. package/bin/xstate-to-tla.cjs +483 -0
  148. package/bin/xstate-trace-walker.cjs +205 -0
  149. package/commands/qgsd/add-phase.md +43 -0
  150. package/commands/qgsd/add-requirement.md +24 -0
  151. package/commands/qgsd/add-todo.md +47 -0
  152. package/commands/qgsd/audit-milestone.md +37 -0
  153. package/commands/qgsd/check-todos.md +45 -0
  154. package/commands/qgsd/cleanup.md +18 -0
  155. package/commands/qgsd/close-formal-gaps.md +33 -0
  156. package/commands/qgsd/complete-milestone.md +136 -0
  157. package/commands/qgsd/debug.md +166 -0
  158. package/commands/qgsd/discuss-phase.md +83 -0
  159. package/commands/qgsd/execute-phase.md +117 -0
  160. package/commands/qgsd/fix-tests.md +27 -0
  161. package/commands/qgsd/formal-test-sync.md +32 -0
  162. package/commands/qgsd/health.md +22 -0
  163. package/commands/qgsd/help.md +22 -0
  164. package/commands/qgsd/insert-phase.md +32 -0
  165. package/commands/qgsd/join-discord.md +18 -0
  166. package/commands/qgsd/list-phase-assumptions.md +46 -0
  167. package/commands/qgsd/map-codebase.md +71 -0
  168. package/commands/qgsd/map-requirements.md +20 -0
  169. package/commands/qgsd/mcp-restart.md +176 -0
  170. package/commands/qgsd/mcp-set-model.md +134 -0
  171. package/commands/qgsd/mcp-setup.md +1371 -0
  172. package/commands/qgsd/mcp-status.md +274 -0
  173. package/commands/qgsd/mcp-update.md +238 -0
  174. package/commands/qgsd/new-milestone.md +44 -0
  175. package/commands/qgsd/new-project.md +42 -0
  176. package/commands/qgsd/observe.md +260 -0
  177. package/commands/qgsd/pause-work.md +38 -0
  178. package/commands/qgsd/plan-milestone-gaps.md +34 -0
  179. package/commands/qgsd/plan-phase.md +44 -0
  180. package/commands/qgsd/polyrepo.md +50 -0
  181. package/commands/qgsd/progress.md +24 -0
  182. package/commands/qgsd/queue.md +54 -0
  183. package/commands/qgsd/quick.md +133 -0
  184. package/commands/qgsd/quorum-test.md +275 -0
  185. package/commands/qgsd/quorum.md +707 -0
  186. package/commands/qgsd/reapply-patches.md +110 -0
  187. package/commands/qgsd/remove-phase.md +31 -0
  188. package/commands/qgsd/research-phase.md +189 -0
  189. package/commands/qgsd/resume-work.md +40 -0
  190. package/commands/qgsd/set-profile.md +34 -0
  191. package/commands/qgsd/settings.md +39 -0
  192. package/commands/qgsd/solve.md +565 -0
  193. package/commands/qgsd/sync-baselines.md +119 -0
  194. package/commands/qgsd/triage.md +233 -0
  195. package/commands/qgsd/update.md +37 -0
  196. package/commands/qgsd/verify-work.md +38 -0
  197. package/hooks/dist/config-loader.js +297 -0
  198. package/hooks/dist/conformance-schema.cjs +12 -0
  199. package/hooks/dist/gsd-context-monitor.js +64 -0
  200. package/hooks/dist/qgsd-check-update.js +62 -0
  201. package/hooks/dist/qgsd-circuit-breaker.js +682 -0
  202. package/hooks/dist/qgsd-precompact.js +156 -0
  203. package/hooks/dist/qgsd-prompt.js +653 -0
  204. package/hooks/dist/qgsd-session-start.js +122 -0
  205. package/hooks/dist/qgsd-slot-correlator.js +58 -0
  206. package/hooks/dist/qgsd-spec-regen.js +86 -0
  207. package/hooks/dist/qgsd-statusline.js +91 -0
  208. package/hooks/dist/qgsd-stop.js +553 -0
  209. package/hooks/dist/qgsd-token-collector.js +133 -0
  210. package/hooks/dist/unified-mcp-server.mjs +669 -0
  211. package/package.json +95 -0
  212. package/scripts/build-hooks.js +46 -0
  213. package/scripts/postinstall.js +48 -0
  214. package/scripts/secret-audit.sh +45 -0
  215. package/templates/qgsd.json +49 -0
@@ -0,0 +1,1134 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /**
5
+ * update-scoreboard.cjs
6
+ *
7
+ * CLI script to update .planning/quorum-scoreboard.json atomically.
8
+ * Reads current JSON, applies score delta for one model/round, recalculates
9
+ * all cumulative stats from scratch, writes back.
10
+ *
11
+ * Usage (round vote):
12
+ * node bin/update-scoreboard.cjs \
13
+ * --model <name> --result <code> --task <label> --round <n> --verdict <v> \
14
+ * [--scoreboard <path>] [--category <cat>] [--subcategory <subcat>] \
15
+ * [--task-description <text>]
16
+ *
17
+ * Usage (team identity — once per session):
18
+ * node bin/update-scoreboard.cjs init-team \
19
+ * --claude-model <model-id> \
20
+ * --team '<json-object-of-agent-identities>' \
21
+ * [--scoreboard <path>]
22
+ */
23
+
24
+ const fs = require('fs');
25
+ const path = require('path');
26
+ const crypto = require('crypto');
27
+ const os = require('os');
28
+ const https = require('https');
29
+
30
+ // ---------------------------------------------------------------------------
31
+ // Score delta lookup
32
+ // ---------------------------------------------------------------------------
33
+
34
+ const SCORE_DELTAS = {
35
+ TP: 1,
36
+ TN: 5,
37
+ FP: -3,
38
+ FN: -1,
39
+ 'TP+': 3, // +1 TP effectiveness + +2 improvement bonus
40
+ 'TN+': 7, // +5 TN effectiveness + +2 constructive alternative adopted
41
+ UNAVAIL: 0,
42
+ '': 0,
43
+ };
44
+
45
+ const VALID_MODELS = ['claude', 'gemini', 'opencode', 'copilot', 'codex', 'deepseek', 'minimax', 'qwen-coder', 'kimi', 'llama4'];
46
+ const VALID_RESULTS = ['TP', 'TN', 'FP', 'FN', 'TP+', 'TN+', 'UNAVAIL', ''];
47
+ const VALID_VERDICTS = ['APPROVE', 'BLOCK', 'DELIBERATE', 'CONSENSUS', 'GAPS_FOUND', '—'];
48
+
49
+ // ---------------------------------------------------------------------------
50
+ // Argument parsing
51
+ // ---------------------------------------------------------------------------
52
+
53
+ function parseArgs(argv) {
54
+ const args = {};
55
+ for (let i = 0; i < argv.length; i++) {
56
+ const key = argv[i];
57
+ if (key.startsWith('--')) {
58
+ const name = key.slice(2);
59
+ const value = argv[i + 1] !== undefined && !argv[i + 1].startsWith('--')
60
+ ? argv[++i]
61
+ : '';
62
+ args[name] = value;
63
+ }
64
+ }
65
+ return args;
66
+ }
67
+
68
+ // ---------------------------------------------------------------------------
69
+ // Default scoreboard path helper
70
+ // ---------------------------------------------------------------------------
71
+ function defaultScoreboardPath() {
72
+ try {
73
+ return require('./planning-paths.cjs').resolveWithFallback(process.cwd(), 'quorum-scoreboard');
74
+ } catch (_) {
75
+ return '.planning/quorum-scoreboard.json';
76
+ }
77
+ }
78
+
79
+ // ---------------------------------------------------------------------------
80
+ // Usage / validation
81
+ // ---------------------------------------------------------------------------
82
+
83
+ const USAGE = `Usage: node bin/update-scoreboard.cjs --model <name> --result <code> --task <label> --round <n> --verdict <v> [--scoreboard <path>] [--category <cat>] [--subcategory <subcat>] [--task-description <text>]
84
+ --model claude | gemini | opencode | copilot | codex
85
+ --result TP | TN | FP | FN | TP+ | TN+ | (empty for not scored)
86
+ --task task label, e.g. "quick-25"
87
+ --round round number (integer)
88
+ --verdict APPROVE | BLOCK | DELIBERATE | CONSENSUS | GAPS_FOUND | —
89
+ --category (optional) explicit parent category name
90
+ --subcategory (optional) explicit subcategory name
91
+ --task-description (optional) debate question/topic text; used by Haiku auto-classification when --category/--subcategory omitted
92
+ --slot slot name (e.g. claude-1) — use instead of --model for MCP server instances
93
+ --model-id full model id from health_check (e.g. "deepseek-ai/DeepSeek-V3") — required with --slot`;
94
+
95
+ function validate(args) {
96
+ const errors = [];
97
+
98
+ // --slot and --model are mutually exclusive
99
+ if (args.slot && args.model) {
100
+ errors.push('--slot and --model are mutually exclusive');
101
+ } else if (args.slot) {
102
+ // Slot mode: require --model-id
103
+ if (!args['model-id']) errors.push('--model-id is required when using --slot');
104
+ } else {
105
+ // Model mode: require --model
106
+ if (!args.model) errors.push('--model is required');
107
+ }
108
+ if (!args.task) errors.push('--task is required');
109
+ if (!args.round) errors.push('--round is required');
110
+ if (!args.verdict) errors.push('--verdict is required');
111
+ // --result can be empty string (not scored), but must be present as key
112
+ if (!('result' in args)) errors.push('--result is required (use empty string for not scored)');
113
+
114
+ if (args.model && !VALID_MODELS.includes(args.model)) {
115
+ errors.push(`--model must be one of: ${VALID_MODELS.join(', ')}`);
116
+ }
117
+
118
+ const result = args.result || '';
119
+ if (!VALID_RESULTS.includes(result)) {
120
+ errors.push(`--result must be one of: TP, TN, FP, FN, TP+, TN+, (empty)`);
121
+ }
122
+
123
+ const roundNum = parseInt(args.round, 10);
124
+ if (isNaN(roundNum) || roundNum < 1) {
125
+ errors.push('--round must be a positive integer');
126
+ }
127
+
128
+ if (errors.length > 0) {
129
+ process.stderr.write(USAGE + '\n\nErrors:\n' + errors.map(e => ' ' + e).join('\n') + '\n');
130
+ process.exit(1);
131
+ }
132
+
133
+ return {
134
+ model: args.model,
135
+ slot: args.slot || null,
136
+ modelId: args['model-id'] || null,
137
+ result: result,
138
+ task: args.task,
139
+ round: roundNum,
140
+ verdict: args.verdict,
141
+ scoreboard: args.scoreboard || defaultScoreboardPath(),
142
+ category: args.category || null,
143
+ subcategory: args.subcategory || null,
144
+ taskDescription: args['task-description'] || null,
145
+ };
146
+ }
147
+
148
+ // ---------------------------------------------------------------------------
149
+ // JSON schema helpers
150
+ // ---------------------------------------------------------------------------
151
+
152
+ function emptyModelStats() {
153
+ return { score: 0, tp: 0, tn: 0, fp: 0, fn: 0, impr: 0, invocations: 0 };
154
+ }
155
+
156
+ function emptyData() {
157
+ return {
158
+ models: {
159
+ claude: emptyModelStats(),
160
+ gemini: emptyModelStats(),
161
+ opencode: emptyModelStats(),
162
+ copilot: emptyModelStats(),
163
+ codex: emptyModelStats(),
164
+ deepseek: emptyModelStats(),
165
+ minimax: emptyModelStats(),
166
+ 'qwen-coder': emptyModelStats(),
167
+ kimi: emptyModelStats(),
168
+ llama4: emptyModelStats(),
169
+ },
170
+ slots: {}, // slot-keyed map; key = '<slot-name>:<model-id>'
171
+ categories: {},
172
+ rounds: [],
173
+ availability: {}, // per-slot availability windows: { slotOrModel: { available_at_iso, ... } }
174
+ delivery_stats: {
175
+ total_rounds: 0,
176
+ target_vote_count: 3,
177
+ achieved_by_outcome: {},
178
+ },
179
+ };
180
+ }
181
+
182
+ function loadData(scoreboard) {
183
+ const absPath = path.resolve(process.cwd(), scoreboard);
184
+ if (!fs.existsSync(absPath)) {
185
+ return emptyData();
186
+ }
187
+ try {
188
+ const raw = fs.readFileSync(absPath, 'utf8');
189
+ const data = JSON.parse(raw);
190
+ // Backward compat: ensure categories exists
191
+ if (!data.categories) {
192
+ data.categories = {};
193
+ }
194
+ // Backward compat: ensure slots exists
195
+ if (!data.slots) {
196
+ data.slots = {};
197
+ }
198
+ // Backward compat: ensure availability exists
199
+ if (!data.availability) {
200
+ data.availability = {};
201
+ }
202
+ return data;
203
+ } catch (e) {
204
+ process.stderr.write(`[update-scoreboard] WARNING: could not parse ${absPath}: ${e.message}\n`);
205
+ return emptyData();
206
+ }
207
+ }
208
+
209
+ // ---------------------------------------------------------------------------
210
+ // Cumulative stats recompute (from-scratch to avoid drift)
211
+ // ---------------------------------------------------------------------------
212
+
213
+ function recomputeStats(data) {
214
+ // Reset all model stats
215
+ for (const model of VALID_MODELS) {
216
+ if (!data.models[model]) data.models[model] = emptyModelStats();
217
+ const m = data.models[model];
218
+ m.score = 0;
219
+ m.tp = 0;
220
+ m.tn = 0;
221
+ m.fp = 0;
222
+ m.fn = 0;
223
+ m.impr = 0;
224
+ m.invocations = 0;
225
+ }
226
+
227
+ for (const round of data.rounds) {
228
+ const votes = round.votes || {};
229
+ for (const model of VALID_MODELS) {
230
+ const vote = votes[model];
231
+ if (!vote || vote === '' || vote === 'UNAVAIL') continue;
232
+
233
+ const m = data.models[model];
234
+ m.invocations += 1;
235
+
236
+ const delta = SCORE_DELTAS[vote];
237
+ if (delta === undefined) continue; // unknown vote code — skip
238
+
239
+ m.score += delta;
240
+
241
+ if (vote === 'TP' || vote === 'TP+') m.tp += 1;
242
+ if (vote === 'TN' || vote === 'TN+') m.tn += 1;
243
+ if (vote === 'FP') m.fp += 1;
244
+ if (vote === 'FN') m.fn += 1;
245
+ if (vote === 'TP+' || vote === 'TN+') m.impr += 1;
246
+ }
247
+ }
248
+ }
249
+
250
+ // ---------------------------------------------------------------------------
251
+ // Slot-keyed stats helpers
252
+ // ---------------------------------------------------------------------------
253
+
254
+ function emptySlotStats(slot, modelId) {
255
+ return { slot, model: modelId, score: 0, tp: 0, tn: 0, fp: 0, fn: 0, impr: 0, invocations: 0 };
256
+ }
257
+
258
+ function recomputeSlots(data) {
259
+ // Reset all slot stats in data.slots
260
+ for (const key of Object.keys(data.slots)) {
261
+ const s = data.slots[key];
262
+ s.score = 0; s.tp = 0; s.tn = 0; s.fp = 0; s.fn = 0; s.impr = 0; s.invocations = 0;
263
+ }
264
+ // Replay all rounds — look for votes keyed by composite slot:model-id keys
265
+ for (const round of data.rounds) {
266
+ const votes = round.votes || {};
267
+ for (const [key, vote] of Object.entries(votes)) {
268
+ if (!key.includes(':')) continue; // slot keys contain ':'
269
+ if (!vote || vote === '' || vote === 'UNAVAIL') continue;
270
+ if (!data.slots[key]) continue; // key not in slots map — skip
271
+ const s = data.slots[key];
272
+ s.invocations += 1;
273
+ const delta = SCORE_DELTAS[vote];
274
+ if (delta === undefined) continue;
275
+ s.score += delta;
276
+ if (vote === 'TP' || vote === 'TP+') s.tp += 1;
277
+ if (vote === 'TN' || vote === 'TN+') s.tn += 1;
278
+ if (vote === 'FP') s.fp += 1;
279
+ if (vote === 'FN') s.fn += 1;
280
+ if (vote === 'TP+' || vote === 'TN+') s.impr += 1;
281
+ }
282
+ }
283
+ }
284
+
285
+ // ---------------------------------------------------------------------------
286
+ // Delivery stats computation
287
+ // ---------------------------------------------------------------------------
288
+
289
+ /**
290
+ * Compute delivery statistics from rounds data.
291
+ * Counts how many rounds achieved each vote count and calculates percentages.
292
+ * Stores result in data.delivery_stats.
293
+ */
294
+ function computeDeliveryStats(data) {
295
+ try {
296
+ if (!data.rounds || data.rounds.length === 0) {
297
+ data.delivery_stats = {
298
+ total_rounds: 0,
299
+ target_vote_count: 3,
300
+ achieved_by_outcome: {},
301
+ };
302
+ return data;
303
+ }
304
+
305
+ // Helper to count valid votes in a round
306
+ function countValidVotes(votes) {
307
+ let count = 0;
308
+ for (const key of Object.keys(votes)) {
309
+ const v = votes[key];
310
+ if (v && v !== '' && v !== 'UNAVAIL' && v !== 'TIMEOUT') count++;
311
+ }
312
+ return count;
313
+ }
314
+
315
+ // Tally vote counts across all rounds
316
+ const voteCountHistogram = {};
317
+ for (const round of data.rounds) {
318
+ const voteCount = countValidVotes(round.votes || {});
319
+ const key = voteCount + '_votes';
320
+ voteCountHistogram[key] = (voteCountHistogram[key] || 0) + 1;
321
+ }
322
+
323
+ // Compute percentages
324
+ const totalRounds = data.rounds.length;
325
+ const achievedByOutcome = {};
326
+ for (const [key, count] of Object.entries(voteCountHistogram)) {
327
+ const pct = parseFloat(((count / totalRounds) * 100).toFixed(1));
328
+ achievedByOutcome[key] = { count, pct };
329
+ }
330
+
331
+ data.delivery_stats = {
332
+ total_rounds: totalRounds,
333
+ target_vote_count: 3,
334
+ achieved_by_outcome: achievedByOutcome,
335
+ };
336
+
337
+ return data;
338
+ } catch (e) {
339
+ process.stderr.write(`[computeDeliveryStats] ERROR: ${e.message}\n`);
340
+ return data;
341
+ }
342
+ }
343
+
344
+ // ---------------------------------------------------------------------------
345
+ // Flakiness scoring
346
+ // ---------------------------------------------------------------------------
347
+
348
+ /**
349
+ * Compute per-slot flakiness scores from recent verdicts.
350
+ * Flakiness = failure rate in trailing 10-round window.
351
+ * Stores result in data.slots[key].flakiness_score and recent_verdicts.
352
+ */
353
+ function computeFlakiness(data, windowSize = 10) {
354
+ try {
355
+ if (!data.rounds || data.rounds.length === 0) {
356
+ // No rounds yet — all slots have default score 0.0
357
+ return data;
358
+ }
359
+
360
+ // Collect all unique slot names from all rounds
361
+ const allSlots = new Set();
362
+ for (const round of data.rounds) {
363
+ const votes = round.votes || {};
364
+ for (const key of Object.keys(votes)) {
365
+ // Handle both model-keyed (e.g., 'claude') and slot-keyed (e.g., 'gemini-1:model-id')
366
+ if (key.includes(':')) {
367
+ // Slot-keyed: extract slot name (before ':')
368
+ const slotName = key.split(':')[0];
369
+ allSlots.add(slotName);
370
+ } else {
371
+ // Model-keyed: use model name as slot key
372
+ allSlots.add(key);
373
+ }
374
+ }
375
+ }
376
+
377
+ // For each slot, compute flakiness from trailing window
378
+ for (const slotName of allSlots) {
379
+ // Collect all verdicts for this slot from data.rounds
380
+ const verdictWindow = [];
381
+ for (const round of data.rounds) {
382
+ const votes = round.votes || {};
383
+ // Look for any vote entry matching this slot
384
+ let verdict = null;
385
+ for (const [key, voteValue] of Object.entries(votes)) {
386
+ const keySlotName = key.includes(':') ? key.split(':')[0] : key;
387
+ if (keySlotName === slotName) {
388
+ // Count as failure: UNAVAIL, TIMEOUT, empty string, or any other falsy value
389
+ if (voteValue === 'UNAVAIL' || voteValue === 'TIMEOUT' || voteValue === '' || !voteValue) {
390
+ verdict = 'FAILED';
391
+ } else {
392
+ // Success (any other value including verdict names)
393
+ verdict = 'SUCCESS';
394
+ }
395
+ break;
396
+ }
397
+ }
398
+ if (verdict) {
399
+ verdictWindow.push({
400
+ round_num: round.round || verdictWindow.length + 1,
401
+ verdict,
402
+ });
403
+ }
404
+ }
405
+
406
+ // Use trailing window
407
+ const window = verdictWindow.slice(-windowSize);
408
+ if (window.length === 0) {
409
+ // No verdicts for this slot — default to 0.0 (reliable)
410
+ // Ensure slot entry exists
411
+ for (const [key, slotEntry] of Object.entries(data.slots)) {
412
+ const keySlotName = key.split(':')[0];
413
+ if (keySlotName === slotName && !slotEntry.flakiness_score) {
414
+ slotEntry.flakiness_score = 0.0;
415
+ slotEntry.recent_verdicts = [];
416
+ }
417
+ }
418
+ continue;
419
+ }
420
+
421
+ // Count failures in window
422
+ const failures = window.filter(v => v.verdict === 'FAILED').length;
423
+ const flakinessScore = failures / window.length;
424
+ const score = parseFloat(flakinessScore.toFixed(2)); // Store as number
425
+
426
+ // Store flakiness in all slot entries matching this slot name
427
+ for (const [key, slotEntry] of Object.entries(data.slots)) {
428
+ const keySlotName = key.split(':')[0];
429
+ if (keySlotName === slotName) {
430
+ slotEntry.flakiness_score = score;
431
+ slotEntry.recent_verdicts = window;
432
+ }
433
+ }
434
+ }
435
+
436
+ return data;
437
+ } catch (e) {
438
+ process.stderr.write(`[computeFlakiness] ERROR: ${e.message}\n`);
439
+ return data;
440
+ }
441
+ }
442
+
443
+ // ---------------------------------------------------------------------------
444
+ // Today's date in MM-DD format
445
+ // ---------------------------------------------------------------------------
446
+
447
+ function todayMMDD() {
448
+ const d = new Date();
449
+ const mm = String(d.getMonth() + 1).padStart(2, '0');
450
+ const dd = String(d.getDate()).padStart(2, '0');
451
+ return `${mm}-${dd}`;
452
+ }
453
+
454
+ // ---------------------------------------------------------------------------
455
+ // Haiku auto-classification
456
+ // ---------------------------------------------------------------------------
457
+
458
+ /**
459
+ * Attempt to classify a task description using claude-haiku-4-5-20251001.
460
+ * Returns { category, subcategory, is_new } or null on any failure (fail-open).
461
+ */
462
+ async function classifyWithHaiku(taskDescription, categories) {
463
+ const apiKey = process.env.ANTHROPIC_API_KEY;
464
+ if (!apiKey) return null; // No API key — skip silently (fail-open)
465
+
466
+ try {
467
+ // Build formatted taxonomy list for prompt
468
+ const taxonomyLines = Object.entries(categories).map(([cat, subs]) => {
469
+ const subsStr = subs.map(s => ` - ${s}`).join('\n');
470
+ return ` ${cat}:\n${subsStr}`;
471
+ }).join('\n');
472
+
473
+ const prompt = `You are classifying a quorum debate topic into a category taxonomy.
474
+
475
+ Debate topic: ${taskDescription}
476
+
477
+ Taxonomy:
478
+ ${taxonomyLines}
479
+
480
+ Return ONLY valid JSON (no markdown, no explanation):
481
+ {"category": "<parent category name>", "subcategory": "<subcategory name>", "is_new": false}
482
+
483
+ If the topic does not match any existing category or subcategory well, propose new names:
484
+ {"category": "<new parent name>", "subcategory": "<new subcategory name>", "is_new": true}
485
+
486
+ Choose the single best match. Return nothing except the JSON object.`;
487
+
488
+ const body = JSON.stringify({
489
+ model: 'claude-haiku-4-5-20251001',
490
+ max_tokens: 128,
491
+ messages: [{ role: 'user', content: prompt }],
492
+ });
493
+
494
+ const text = await new Promise((resolve, reject) => {
495
+ const req = https.request({
496
+ hostname: 'api.anthropic.com',
497
+ path: '/v1/messages',
498
+ method: 'POST',
499
+ headers: {
500
+ 'Content-Type': 'application/json',
501
+ 'x-api-key': apiKey,
502
+ 'anthropic-version': '2023-06-01',
503
+ 'Content-Length': Buffer.byteLength(body),
504
+ },
505
+ timeout: 15000,
506
+ }, (res) => {
507
+ let data = '';
508
+ res.on('data', chunk => { data += chunk; });
509
+ res.on('end', () => {
510
+ try {
511
+ const parsed = JSON.parse(data);
512
+ const content = ((parsed.content || [])[0] || {}).text || '';
513
+ resolve(content.trim());
514
+ } catch { resolve(null); }
515
+ });
516
+ });
517
+ req.on('error', () => resolve(null));
518
+ req.on('timeout', () => { req.destroy(); resolve(null); });
519
+ req.write(body);
520
+ req.end();
521
+ });
522
+
523
+ if (!text) return null;
524
+
525
+ const result = JSON.parse(text);
526
+ if (typeof result.category !== 'string' || typeof result.subcategory !== 'string') {
527
+ return null;
528
+ }
529
+ return result;
530
+ } catch (_) {
531
+ return null; // any error — fail-open
532
+ }
533
+ }
534
+
535
+ // ---------------------------------------------------------------------------
536
+ // init-team: capture team fingerprint (idempotent — skips if unchanged)
537
+ // ---------------------------------------------------------------------------
538
+
539
+ async function initTeam(argv) {
540
+ const args = parseArgs(argv);
541
+ const scoreboardPath = args.scoreboard || defaultScoreboardPath();
542
+ const claudeModel = args['claude-model'] || process.env.CLAUDE_MODEL || process.env.ANTHROPIC_MODEL || 'unknown';
543
+
544
+ // Parse agent identities from --team JSON
545
+ let agents = {};
546
+ if (args.team) {
547
+ try {
548
+ agents = JSON.parse(args.team);
549
+ } catch (e) {
550
+ process.stderr.write(`[init-team] WARNING: could not parse --team JSON: ${e.message}\n`);
551
+ }
552
+ }
553
+
554
+ // Auto-detect MCPs and plugins from ~/.claude.json
555
+ let mcps = [];
556
+ let plugins = [];
557
+ try {
558
+ const claudeJsonPath = process.env.QGSD_CLAUDE_JSON || path.join(os.homedir(), '.claude.json');
559
+ const claudeJson = JSON.parse(fs.readFileSync(claudeJsonPath, 'utf8'));
560
+ mcps = Object.keys(claudeJson.mcpServers || {});
561
+ plugins = claudeJson.plugins || [];
562
+ } catch (e) {
563
+ process.stderr.write(`[init-team] WARNING: could not read ~/.claude.json: ${e.message}\n`);
564
+ }
565
+
566
+ // Compute fingerprint from canonical team composition
567
+ const canonical = JSON.stringify({
568
+ claude_model: claudeModel,
569
+ agents: Object.fromEntries(Object.entries(agents).sort()),
570
+ mcps: [...mcps].sort(),
571
+ plugins: [...plugins].sort(),
572
+ });
573
+ const fingerprint = crypto.createHash('sha256').update(canonical).digest('hex').slice(0, 16);
574
+
575
+ const data = loadData(scoreboardPath);
576
+
577
+ // Skip if fingerprint unchanged
578
+ if (data.team && data.team.fingerprint === fingerprint) {
579
+ process.stdout.write(`[init-team] fingerprint: ${fingerprint} | no change\n`);
580
+ return;
581
+ }
582
+
583
+ const prevFingerprint = data.team ? data.team.fingerprint : null;
584
+
585
+ data.team = {
586
+ fingerprint,
587
+ captured_at: new Date().toISOString(),
588
+ claude_model: claudeModel,
589
+ agents,
590
+ mcps,
591
+ plugins,
592
+ };
593
+
594
+ const absPath = path.resolve(process.cwd(), scoreboardPath);
595
+ fs.mkdirSync(path.dirname(absPath), { recursive: true });
596
+ const tmpPath0 = absPath + '.' + process.pid + '.tmp';
597
+ fs.writeFileSync(tmpPath0, JSON.stringify(data, null, 2) + '\n', 'utf8');
598
+ fs.renameSync(tmpPath0, absPath);
599
+
600
+ const agentCount = Object.keys(agents).length;
601
+ if (prevFingerprint) {
602
+ process.stdout.write(`[init-team] fingerprint: ${fingerprint} (updated from ${prevFingerprint}) | ${agentCount} agents, ${mcps.length} MCPs, ${plugins.length} plugins\n`);
603
+ } else {
604
+ process.stdout.write(`[init-team] fingerprint: ${fingerprint} | ${agentCount} agents, ${mcps.length} MCPs, ${plugins.length} plugins\n`);
605
+ }
606
+ }
607
+
608
+ // ---------------------------------------------------------------------------
609
+ // Availability tracking helpers
610
+ // ---------------------------------------------------------------------------
611
+
612
+ const MONTH_MAP = {
613
+ jan: 0, feb: 1, mar: 2, apr: 3, may: 4, jun: 5,
614
+ jul: 6, aug: 7, sep: 8, oct: 9, nov: 10, dec: 11,
615
+ };
616
+
617
+ /**
618
+ * Parse a local date/time string like "Feb 24 8:37 PM" into a Date.
619
+ * Returns null if unparseable.
620
+ */
621
+ function parseLocalDateTime(str) {
622
+ str = str.trim();
623
+ // Matches: "Feb 24 8:37 PM", "February 24 20:37", "Feb 24 8:37:00 PM"
624
+ const m = str.match(/^(\w{3,9})\s+(\d{1,2})\s+(\d{1,2}):(\d{2})(?::(\d{2}))?\s*(AM|PM)?$/i);
625
+ if (!m) return null;
626
+
627
+ const monthKey = m[1].slice(0, 3).toLowerCase();
628
+ const month = MONTH_MAP[monthKey];
629
+ if (month === undefined) return null;
630
+
631
+ const day = parseInt(m[2], 10);
632
+ let hour = parseInt(m[3], 10);
633
+ const minute = parseInt(m[4], 10);
634
+ const ampm = (m[6] || '').toUpperCase();
635
+
636
+ if (ampm === 'PM' && hour < 12) hour += 12;
637
+ if (ampm === 'AM' && hour === 12) hour = 0;
638
+
639
+ const now = new Date();
640
+ let d = new Date(now.getFullYear(), month, day, hour, minute, 0, 0);
641
+ // If the computed time is already in the past, assume next year
642
+ if (d < now) d = new Date(now.getFullYear() + 1, month, day, hour, minute, 0, 0);
643
+ return d;
644
+ }
645
+
646
+ /**
647
+ * Parse availability hint from a raw UNAVAIL message.
648
+ * Supports:
649
+ * "usage limit until Feb 24 8:37 PM"
650
+ * "in 5 hours" / "restart in 5 hours"
651
+ * "in 30 minutes"
652
+ * Returns { available_at: Date, reason: string } or null if no hint found.
653
+ */
654
+ function parseAvailabilityHint(message) {
655
+ // "until <Month Day HH:MM AM/PM>" — absolute local time
656
+ const untilMatch = message.match(/until\s+([A-Za-z]{3,9}\s+\d{1,2}\s+\d{1,2}:\d{2}(?::\d{2})?\s*(?:AM|PM)?)/i);
657
+ if (untilMatch) {
658
+ const parsed = parseLocalDateTime(untilMatch[1]);
659
+ if (parsed) {
660
+ const reason = /usage.?limit/i.test(message) ? 'usage limit'
661
+ : /quota/i.test(message) ? 'quota exceeded'
662
+ : /rate.?limit/i.test(message) ? 'rate limit'
663
+ : 'unavailable';
664
+ return { available_at: parsed, reason };
665
+ }
666
+ }
667
+
668
+ // "in N hours" — relative
669
+ const hoursMatch = message.match(/in\s+(\d+(?:\.\d+)?)\s*hours?/i);
670
+ if (hoursMatch) {
671
+ const hours = parseFloat(hoursMatch[1]);
672
+ const reason = /quota/i.test(message) ? 'quota exceeded'
673
+ : /rate.?limit/i.test(message) ? 'rate limit'
674
+ : 'unavailable';
675
+ return { available_at: new Date(Date.now() + hours * 3_600_000), reason };
676
+ }
677
+
678
+ // "in N minutes" — relative
679
+ const minsMatch = message.match(/in\s+(\d+)\s*minutes?/i);
680
+ if (minsMatch) {
681
+ const mins = parseInt(minsMatch[1], 10);
682
+ return { available_at: new Date(Date.now() + mins * 60_000), reason: 'unavailable' };
683
+ }
684
+
685
+ return null;
686
+ }
687
+
688
+ /** Format remaining milliseconds as "Xh Ym", "Zm", or "now". */
689
+ function formatDuration(ms) {
690
+ if (ms <= 0) return 'now';
691
+ const totalMins = Math.ceil(ms / 60_000);
692
+ if (totalMins < 60) return `${totalMins}m`;
693
+ const hours = Math.floor(totalMins / 60);
694
+ const mins = totalMins % 60;
695
+ return mins > 0 ? `${hours}h ${mins}m` : `${hours}h`;
696
+ }
697
+
698
+ // ---------------------------------------------------------------------------
699
+ // set-availability subcommand
700
+ //
701
+ // Usage:
702
+ // node update-scoreboard.cjs set-availability \
703
+ // --slot codex-1 --message "usage limit until Feb 24 8:37 PM" [--scoreboard <path>]
704
+ // node update-scoreboard.cjs set-availability \
705
+ // --model codex --message "restart in 5 hours" [--scoreboard <path>]
706
+ //
707
+ // --slot or --model is the key in data.availability (both accepted; no functional difference).
708
+ // --message is the raw UNAVAIL output text from the agent.
709
+ // ---------------------------------------------------------------------------
710
+
711
+ async function setAvailability(argv) {
712
+ const args = parseArgs(argv);
713
+ const scoreboardPath = args.scoreboard || defaultScoreboardPath();
714
+ const key = args.slot || args.model;
715
+ const message = args.message || '';
716
+
717
+ if (!key) {
718
+ process.stderr.write('[set-availability] --slot or --model is required\n');
719
+ process.exit(1);
720
+ }
721
+ if (!message) {
722
+ process.stderr.write('[set-availability] --message "<raw output text>" is required\n');
723
+ process.exit(1);
724
+ }
725
+
726
+ const hint = parseAvailabilityHint(message);
727
+ if (!hint) {
728
+ process.stdout.write(`[set-availability] ${key}: no availability hint found in message — skipping\n`);
729
+ return;
730
+ }
731
+
732
+ const data = loadData(scoreboardPath);
733
+ if (!data.availability) data.availability = {};
734
+
735
+ const now = new Date();
736
+ const remaining_ms = Math.max(0, hint.available_at.getTime() - now.getTime());
737
+
738
+ data.availability[key] = {
739
+ available_at_iso: hint.available_at.toISOString(),
740
+ available_at_local: hint.available_at.toLocaleString(),
741
+ reason: hint.reason,
742
+ set_at: now.toISOString(),
743
+ };
744
+
745
+ const absPath = path.resolve(process.cwd(), scoreboardPath);
746
+ fs.mkdirSync(path.dirname(absPath), { recursive: true });
747
+ const tmpPath1 = absPath + '.' + process.pid + '.tmp';
748
+ fs.writeFileSync(tmpPath1, JSON.stringify(data, null, 2) + '\n', 'utf8');
749
+ fs.renameSync(tmpPath1, absPath);
750
+
751
+ const eta = remaining_ms > 0 ? `available in ${formatDuration(remaining_ms)}` : 'available now';
752
+ process.stdout.write(
753
+ `[set-availability] ${key}: ${hint.reason} | ${eta} | local: ${hint.available_at.toLocaleString()}\n`
754
+ );
755
+ }
756
+
757
+ // ---------------------------------------------------------------------------
758
+ // get-availability subcommand
759
+ //
760
+ // Usage:
761
+ // node update-scoreboard.cjs get-availability [--scoreboard <path>]
762
+ //
763
+ // Outputs JSON: { "<slot-or-model>": { available_at_iso, available_at_local,
764
+ // reason, set_at, is_available,
765
+ // remaining_ms, remaining_display } }
766
+ //
767
+ // Use this to check dormant slots before invoking them in a quorum run.
768
+ // ---------------------------------------------------------------------------
769
+
770
+ async function getAvailability(argv) {
771
+ const args = parseArgs(argv);
772
+ const scoreboardPath = args.scoreboard || defaultScoreboardPath();
773
+ const data = loadData(scoreboardPath);
774
+ const now = Date.now();
775
+
776
+ const result = {};
777
+ for (const [key, avail] of Object.entries(data.availability || {})) {
778
+ const available_at_ms = new Date(avail.available_at_iso).getTime();
779
+ const is_available = available_at_ms <= now;
780
+ const remaining_ms = Math.max(0, available_at_ms - now);
781
+ result[key] = {
782
+ available_at_iso: avail.available_at_iso,
783
+ available_at_local: avail.available_at_local,
784
+ reason: avail.reason,
785
+ set_at: avail.set_at,
786
+ is_available,
787
+ remaining_ms,
788
+ remaining_display: formatDuration(remaining_ms),
789
+ };
790
+ }
791
+
792
+ process.stdout.write(JSON.stringify(result, null, 2) + '\n');
793
+ }
794
+
795
+ // ---------------------------------------------------------------------------
796
+ // merge-wave subcommand
797
+ //
798
+ // Usage:
799
+ // node update-scoreboard.cjs merge-wave \
800
+ // --dir .planning/scoreboard-tmp \
801
+ // --task "quick-97" --round 1 \
802
+ // [--scoreboard <path>]
803
+ //
804
+ // Reads all vote files matching vote-*-<task>-<round>-*.json in --dir,
805
+ // applies them in one atomic transaction to the scoreboard.
806
+ //
807
+ // Vote file schema (JSON):
808
+ // {
809
+ // "slot": "<slotName>", // for --slot path
810
+ // "model": "<modelFamily>", // for --model path (alternative)
811
+ // "modelId": "<fullModelId>", // required when slot is set
812
+ // "result": "TP|TN|FP|FN|TP+|TN+|UNAVAIL|",
813
+ // "verdict": "APPROVE|REJECT|FLAG|CONSENSUS|DELIBERATE|GAPS_FOUND",
814
+ // "taskDescription": "<optional>"
815
+ // }
816
+ // ---------------------------------------------------------------------------
817
+
818
+ async function mergeWave(argv) {
819
+ const args = parseArgs(argv);
820
+ const scoreboardPath = args.scoreboard || defaultScoreboardPath();
821
+ const dir = args.dir || '.planning/scoreboard-tmp';
822
+ const task = args.task;
823
+ const round = parseInt(args.round, 10);
824
+
825
+ if (!task) {
826
+ process.stderr.write('[merge-wave] --task is required\n');
827
+ process.exit(1);
828
+ }
829
+ if (isNaN(round) || round < 1) {
830
+ process.stderr.write('[merge-wave] --round must be a positive integer\n');
831
+ process.exit(1);
832
+ }
833
+
834
+ const absDir = path.resolve(process.cwd(), dir);
835
+ if (!fs.existsSync(absDir)) {
836
+ process.stdout.write(`[merge-wave] dir ${absDir} does not exist — no votes to merge\n`);
837
+ return;
838
+ }
839
+
840
+ // Find matching vote files: vote-*-<task>-<round>-*.json
841
+ const pattern = new RegExp(`^vote-.*-${round}-[^/]+\\.json$`);
842
+ const files = fs.readdirSync(absDir).filter(f => {
843
+ if (!pattern.test(f)) return false;
844
+ // Also filter by task name in file
845
+ return f.includes(`-${task}-`);
846
+ });
847
+
848
+ if (files.length === 0) {
849
+ process.stdout.write(`[merge-wave] no vote files found for task=${task} round=${round} in ${absDir}\n`);
850
+ return;
851
+ }
852
+
853
+ // Parse all vote files
854
+ const votes = [];
855
+ for (const file of files) {
856
+ try {
857
+ const filePath = path.join(absDir, file);
858
+ const vote = JSON.parse(fs.readFileSync(filePath, 'utf8'));
859
+ votes.push({ file, vote });
860
+ } catch (e) {
861
+ process.stderr.write(`[merge-wave] WARNING: could not parse ${file}: ${e.message}\n`);
862
+ }
863
+ }
864
+
865
+ if (votes.length === 0) {
866
+ process.stdout.write(`[merge-wave] all vote files unparseable — nothing to merge\n`);
867
+ return;
868
+ }
869
+
870
+ // Load scoreboard once
871
+ const data = loadData(scoreboardPath);
872
+
873
+ // Ensure all model keys exist
874
+ for (const model of VALID_MODELS) {
875
+ if (!data.models[model]) data.models[model] = emptyModelStats();
876
+ }
877
+
878
+ const VALID_RESULTS = new Set(['TP', 'TP+', 'TN', 'FP', 'FN', 'UNAVAIL']);
879
+
880
+ // Apply all votes to data in memory
881
+ for (const { file, vote } of votes) {
882
+ // Normalise UNAVAILABLE → UNAVAIL (typo variant from early rounds)
883
+ let result = vote.result === 'UNAVAILABLE' ? 'UNAVAIL' : (vote.result || '');
884
+ const verdict = vote.verdict || '';
885
+
886
+ // Mode A rounds intentionally have no binary result. Skip writing an empty
887
+ // string — it would corrupt empirical rate calculations by appearing as an
888
+ // "available but unclassified" vote. The availability signal for Mode A is
889
+ // tracked separately via set-availability calls.
890
+ if (result === '') {
891
+ process.stderr.write(`[merge-wave] NOTE: ${file} has no result code (Mode A round) — availability recorded via verdict only\n`);
892
+ // Still record the verdict/round for audit trail, but omit from votes map
893
+ const taskRound = data.rounds.findIndex(r => r.task === task && r.round === round);
894
+ if (taskRound === -1) {
895
+ data.rounds.push({ date: todayMMDD(), task, round, votes: {}, verdict });
896
+ } else if (!data.rounds[taskRound].verdict) {
897
+ data.rounds[taskRound].verdict = verdict;
898
+ }
899
+ continue;
900
+ }
901
+
902
+ if (vote.slot && vote.modelId) {
903
+ // Slot mode
904
+ const compositeKey = `${vote.slot}:${vote.modelId}`;
905
+ if (!data.slots[compositeKey]) {
906
+ data.slots[compositeKey] = emptySlotStats(vote.slot, vote.modelId);
907
+ }
908
+ // Find or create round entry
909
+ const existingIdx = data.rounds.findIndex(r => r.task === task && r.round === round);
910
+ if (existingIdx !== -1) {
911
+ data.rounds[existingIdx].votes = data.rounds[existingIdx].votes || {};
912
+ data.rounds[existingIdx].votes[compositeKey] = result;
913
+ data.rounds[existingIdx].verdict = verdict;
914
+ } else {
915
+ const newEntry = {
916
+ date: todayMMDD(),
917
+ task,
918
+ round,
919
+ votes: { [compositeKey]: result },
920
+ verdict,
921
+ };
922
+ if (data.team && data.team.fingerprint) newEntry.team_fingerprint = data.team.fingerprint;
923
+ data.rounds.push(newEntry);
924
+ }
925
+ } else if (vote.model) {
926
+ // Model mode
927
+ const model = vote.model;
928
+ if (!VALID_MODELS.includes(model)) {
929
+ process.stderr.write(`[merge-wave] WARNING: unknown model "${model}" in ${file} — skipping\n`);
930
+ continue;
931
+ }
932
+ const existingIdx = data.rounds.findIndex(r => r.task === task && r.round === round);
933
+ if (existingIdx !== -1) {
934
+ data.rounds[existingIdx].votes = data.rounds[existingIdx].votes || {};
935
+ data.rounds[existingIdx].votes[model] = result;
936
+ data.rounds[existingIdx].verdict = verdict;
937
+ } else {
938
+ const newEntry = {
939
+ date: todayMMDD(),
940
+ task,
941
+ round,
942
+ votes: { [model]: result },
943
+ verdict,
944
+ };
945
+ if (data.team && data.team.fingerprint) newEntry.team_fingerprint = data.team.fingerprint;
946
+ data.rounds.push(newEntry);
947
+ }
948
+ } else {
949
+ process.stderr.write(`[merge-wave] WARNING: vote file ${file} missing slot+modelId or model — skipping\n`);
950
+ }
951
+ }
952
+
953
+ // Recompute stats from scratch
954
+ recomputeStats(data);
955
+ recomputeSlots(data);
956
+ computeDeliveryStats(data);
957
+ computeFlakiness(data);
958
+
959
+ // Single atomic write
960
+ const absPath = path.resolve(process.cwd(), scoreboardPath);
961
+ fs.mkdirSync(path.dirname(absPath), { recursive: true });
962
+ const tmpPath = absPath + '.' + process.pid + '.tmp';
963
+ fs.writeFileSync(tmpPath, JSON.stringify(data, null, 2) + '\n', 'utf8');
964
+ fs.renameSync(tmpPath, absPath);
965
+
966
+ process.stdout.write(`[merge-wave] merged ${votes.length} vote(s) for task=${task} round=${round} into ${scoreboardPath}\n`);
967
+ }
968
+
969
+ // ---------------------------------------------------------------------------
970
+ // Main
971
+ // ---------------------------------------------------------------------------
972
+
973
+ async function main() {
974
+ const rawArgs = process.argv.slice(2);
975
+
976
+ // Subcommand routing
977
+ if (rawArgs[0] === 'init-team') return initTeam(rawArgs.slice(1));
978
+ if (rawArgs[0] === 'set-availability') return setAvailability(rawArgs.slice(1));
979
+ if (rawArgs[0] === 'get-availability') return getAvailability(rawArgs.slice(1));
980
+ if (rawArgs[0] === 'merge-wave') return mergeWave(rawArgs.slice(1));
981
+
982
+ const parsed = parseArgs(rawArgs);
983
+ const cfg = validate(parsed);
984
+
985
+ const data = loadData(cfg.scoreboard);
986
+
987
+ // ---------------------------------------------------------------------------
988
+ // Slot mode: --slot + --model-id path (SCBD-01, SCBD-02, SCBD-03)
989
+ // ---------------------------------------------------------------------------
990
+ if (cfg.slot) {
991
+ const compositeKey = `${cfg.slot}:${cfg.modelId}`;
992
+
993
+ // Ensure slot entry exists in data.slots
994
+ if (!data.slots[compositeKey]) {
995
+ data.slots[compositeKey] = emptySlotStats(cfg.slot, cfg.modelId);
996
+ }
997
+
998
+ // Append to rounds with vote keyed by compositeKey
999
+ const roundEntry = {
1000
+ date: todayMMDD(),
1001
+ task: cfg.task,
1002
+ round: cfg.round,
1003
+ votes: { [compositeKey]: cfg.result },
1004
+ verdict: cfg.verdict,
1005
+ };
1006
+ if (data.team && data.team.fingerprint) {
1007
+ roundEntry.team_fingerprint = data.team.fingerprint;
1008
+ }
1009
+ data.rounds.push(roundEntry);
1010
+
1011
+ // Recompute slot stats only (do NOT call recomputeStats — that is for --model path)
1012
+ recomputeSlots(data);
1013
+
1014
+ // Write back
1015
+ const absPath = path.resolve(process.cwd(), cfg.scoreboard);
1016
+ fs.mkdirSync(path.dirname(absPath), { recursive: true });
1017
+ const tmpPath2 = absPath + '.' + process.pid + '.tmp';
1018
+ fs.writeFileSync(tmpPath2, JSON.stringify(data, null, 2) + '\n', 'utf8');
1019
+ fs.renameSync(tmpPath2, absPath);
1020
+
1021
+ // Print confirmation
1022
+ process.stdout.write(`[update-scoreboard] slot ${cfg.slot} (${cfg.modelId}): ${cfg.result} | score=${data.slots[compositeKey].score}\n`);
1023
+ return;
1024
+ }
1025
+
1026
+ // ---------------------------------------------------------------------------
1027
+ // Model mode: --model path (existing behavior, unchanged)
1028
+ // ---------------------------------------------------------------------------
1029
+
1030
+ // Ensure all model keys exist
1031
+ for (const model of VALID_MODELS) {
1032
+ if (!data.models[model]) data.models[model] = emptyModelStats();
1033
+ }
1034
+
1035
+ // Resolve category/subcategory
1036
+ let resolvedCategory = cfg.category;
1037
+ let resolvedSubcategory = cfg.subcategory;
1038
+
1039
+ if (!resolvedCategory && !resolvedSubcategory && cfg.taskDescription) {
1040
+ // Auto-classify via Haiku
1041
+ const classification = await classifyWithHaiku(cfg.taskDescription, data.categories);
1042
+ if (classification) {
1043
+ resolvedCategory = classification.category;
1044
+ resolvedSubcategory = classification.subcategory;
1045
+
1046
+ if (classification.is_new) {
1047
+ // Add new category/subcategory dynamically
1048
+ if (!data.categories[resolvedCategory]) {
1049
+ data.categories[resolvedCategory] = [];
1050
+ }
1051
+ if (!data.categories[resolvedCategory].includes(resolvedSubcategory)) {
1052
+ data.categories[resolvedCategory].push(resolvedSubcategory);
1053
+ }
1054
+ } else {
1055
+ // Existing category — if subcategory is a variant not yet listed, append it
1056
+ if (data.categories[resolvedCategory] && !data.categories[resolvedCategory].includes(resolvedSubcategory)) {
1057
+ data.categories[resolvedCategory].push(resolvedSubcategory);
1058
+ }
1059
+ }
1060
+ }
1061
+ } else if (resolvedCategory && resolvedSubcategory) {
1062
+ // Explicit flags provided — no Haiku needed
1063
+ // (categories map is not modified for explicit flags)
1064
+ }
1065
+
1066
+ // Find existing round entry matching task + round number
1067
+ const existingIdx = data.rounds.findIndex(
1068
+ r => r.task === cfg.task && r.round === cfg.round
1069
+ );
1070
+
1071
+ if (existingIdx !== -1) {
1072
+ // Update existing entry: set/overwrite the model's vote, preserve verdict
1073
+ data.rounds[existingIdx].votes = data.rounds[existingIdx].votes || {};
1074
+ data.rounds[existingIdx].votes[cfg.model] = cfg.result;
1075
+ // Allow verdict update too
1076
+ data.rounds[existingIdx].verdict = cfg.verdict;
1077
+ // Set category if resolved
1078
+ if (resolvedCategory && resolvedSubcategory) {
1079
+ data.rounds[existingIdx].category = resolvedCategory;
1080
+ data.rounds[existingIdx].subcategory = resolvedSubcategory;
1081
+ }
1082
+ } else {
1083
+ // Append new round entry
1084
+ const newEntry = {
1085
+ date: todayMMDD(),
1086
+ task: cfg.task,
1087
+ round: cfg.round,
1088
+ votes: { [cfg.model]: cfg.result },
1089
+ verdict: cfg.verdict,
1090
+ };
1091
+ if (resolvedCategory && resolvedSubcategory) {
1092
+ newEntry.category = resolvedCategory;
1093
+ newEntry.subcategory = resolvedSubcategory;
1094
+ }
1095
+ if (data.team && data.team.fingerprint) {
1096
+ newEntry.team_fingerprint = data.team.fingerprint;
1097
+ }
1098
+ data.rounds.push(newEntry);
1099
+ }
1100
+
1101
+ // Recompute all cumulative stats from scratch
1102
+ recomputeStats(data);
1103
+
1104
+ // Write back
1105
+ const absPath = path.resolve(process.cwd(), cfg.scoreboard);
1106
+ fs.mkdirSync(path.dirname(absPath), { recursive: true });
1107
+ const tmpPath3 = absPath + '.' + process.pid + '.tmp';
1108
+ fs.writeFileSync(tmpPath3, JSON.stringify(data, null, 2) + '\n', 'utf8');
1109
+ fs.renameSync(tmpPath3, absPath);
1110
+
1111
+ // Print confirmation
1112
+ const delta = SCORE_DELTAS[cfg.result] || 0;
1113
+ const sign = delta >= 0 ? '+' : '';
1114
+ const newScore = data.models[cfg.model].score;
1115
+ const deltaStr = cfg.result === '' ? '(not scored)' : `${cfg.result} (${sign}${delta})`;
1116
+ let confirmation = `[update-scoreboard] ${cfg.model}: ${deltaStr} → score: ${newScore} | ${cfg.task} R${cfg.round} ${cfg.verdict}`;
1117
+ if (resolvedCategory && resolvedSubcategory) {
1118
+ confirmation += ` | category: ${resolvedCategory} > ${resolvedSubcategory}`;
1119
+ }
1120
+ process.stdout.write(confirmation + '\n');
1121
+ }
1122
+
1123
+ // Guard pattern: only export when require()d by tests, not when run as a CLI script
1124
+ if (typeof module !== 'undefined') {
1125
+ module.exports = { computeDeliveryStats, computeFlakiness, emptyData };
1126
+ }
1127
+
1128
+ // Only run main() when invoked as a script, not when require()d by tests
1129
+ if (require.main === module) {
1130
+ main().catch(err => {
1131
+ process.stderr.write(`[update-scoreboard] FATAL: ${err.message}\n`);
1132
+ process.exit(1);
1133
+ });
1134
+ }