@nforma.ai/nforma 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +1024 -0
- package/agents/qgsd-codebase-mapper.md +764 -0
- package/agents/qgsd-debugger.md +1201 -0
- package/agents/qgsd-executor.md +472 -0
- package/agents/qgsd-integration-checker.md +443 -0
- package/agents/qgsd-phase-researcher.md +502 -0
- package/agents/qgsd-plan-checker.md +643 -0
- package/agents/qgsd-planner.md +1182 -0
- package/agents/qgsd-project-researcher.md +621 -0
- package/agents/qgsd-quorum-orchestrator.md +628 -0
- package/agents/qgsd-quorum-slot-worker.md +41 -0
- package/agents/qgsd-quorum-synthesizer.md +133 -0
- package/agents/qgsd-quorum-test-worker.md +37 -0
- package/agents/qgsd-quorum-worker.md +161 -0
- package/agents/qgsd-research-synthesizer.md +239 -0
- package/agents/qgsd-roadmapper.md +660 -0
- package/agents/qgsd-verifier.md +628 -0
- package/bin/accept-debug-invariant.cjs +165 -0
- package/bin/account-manager.cjs +719 -0
- package/bin/aggregate-requirements.cjs +466 -0
- package/bin/analyze-assumptions.cjs +757 -0
- package/bin/analyze-state-space.cjs +921 -0
- package/bin/attribute-trace-divergence.cjs +150 -0
- package/bin/auth-drivers/gh-cli.cjs +93 -0
- package/bin/auth-drivers/index.cjs +46 -0
- package/bin/auth-drivers/pool.cjs +67 -0
- package/bin/auth-drivers/simple.cjs +95 -0
- package/bin/autoClosePtoF.cjs +110 -0
- package/bin/blessed-terminal.cjs +350 -0
- package/bin/build-phase-index.cjs +472 -0
- package/bin/call-quorum-slot.cjs +541 -0
- package/bin/ccr-secure-config.cjs +99 -0
- package/bin/ccr-secure-start.cjs +83 -0
- package/bin/check-bundled-sdks.cjs +177 -0
- package/bin/check-coverage-guard.cjs +112 -0
- package/bin/check-liveness-fairness.cjs +95 -0
- package/bin/check-mcp-health.cjs +123 -0
- package/bin/check-provider-health.cjs +395 -0
- package/bin/check-results-exit.cjs +24 -0
- package/bin/check-spec-sync.cjs +360 -0
- package/bin/check-trace-redaction.cjs +271 -0
- package/bin/check-trace-schema-drift.cjs +99 -0
- package/bin/compareDrift.cjs +21 -0
- package/bin/conformance-schema.cjs +12 -0
- package/bin/count-scenarios.cjs +420 -0
- package/bin/debt-dedup.cjs +144 -0
- package/bin/debt-ledger.cjs +61 -0
- package/bin/debt-retention.cjs +76 -0
- package/bin/debt-state-machine.cjs +80 -0
- package/bin/detect-coverage-gaps.cjs +204 -0
- package/bin/detect-project-intent.cjs +362 -0
- package/bin/export-prism-constants.cjs +164 -0
- package/bin/extract-annotations.cjs +633 -0
- package/bin/extractFormalExpected.cjs +104 -0
- package/bin/fingerprint-drift.cjs +24 -0
- package/bin/fingerprint-issue.cjs +46 -0
- package/bin/formal-core.cjs +519 -0
- package/bin/formal-ref-linker.cjs +141 -0
- package/bin/formal-test-sync.cjs +788 -0
- package/bin/generate-formal-specs.cjs +588 -0
- package/bin/generate-petri-net.cjs +397 -0
- package/bin/generate-phase-spec.cjs +249 -0
- package/bin/generate-proposed-changes.cjs +194 -0
- package/bin/generate-tla-cfg.cjs +122 -0
- package/bin/generate-traceability-matrix.cjs +701 -0
- package/bin/generate-triage-bundle.cjs +300 -0
- package/bin/gh-account-rotate.cjs +34 -0
- package/bin/initialize-model-registry.cjs +105 -0
- package/bin/install-formal-tools.cjs +382 -0
- package/bin/install.js +2424 -0
- package/bin/isNumericThreshold.cjs +34 -0
- package/bin/issue-classifier.cjs +151 -0
- package/bin/levenshtein.cjs +74 -0
- package/bin/lint-formal-models.cjs +580 -0
- package/bin/load-baseline-requirements.cjs +275 -0
- package/bin/manage-agents-core.cjs +815 -0
- package/bin/migrate-formal-dir.cjs +172 -0
- package/bin/migrate-planning.cjs +206 -0
- package/bin/migrate-to-slots.cjs +255 -0
- package/bin/nForma.cjs +2726 -0
- package/bin/observe-config.cjs +353 -0
- package/bin/observe-debt-writer.cjs +140 -0
- package/bin/observe-handler-grafana.cjs +128 -0
- package/bin/observe-handler-internal.cjs +301 -0
- package/bin/observe-handler-logstash.cjs +153 -0
- package/bin/observe-handler-prometheus.cjs +185 -0
- package/bin/observe-handlers.cjs +436 -0
- package/bin/observe-registry.cjs +131 -0
- package/bin/observe-render.cjs +168 -0
- package/bin/planning-paths.cjs +167 -0
- package/bin/polyrepo.cjs +560 -0
- package/bin/prism-priority.cjs +153 -0
- package/bin/probe-quorum-slots.cjs +167 -0
- package/bin/promote-model.cjs +225 -0
- package/bin/propose-debug-invariants.cjs +165 -0
- package/bin/providers.json +392 -0
- package/bin/pty-proxy.py +129 -0
- package/bin/qgsd-solve.cjs +2477 -0
- package/bin/quorum-consensus-gate.cjs +238 -0
- package/bin/quorum-formal-context.cjs +183 -0
- package/bin/quorum-slot-dispatch.cjs +934 -0
- package/bin/read-policy.cjs +60 -0
- package/bin/requirement-map.cjs +63 -0
- package/bin/requirements-core.cjs +247 -0
- package/bin/resolve-cli.cjs +101 -0
- package/bin/review-mcp-logs.cjs +294 -0
- package/bin/run-account-manager-tlc.cjs +188 -0
- package/bin/run-account-pool-alloy.cjs +158 -0
- package/bin/run-alloy.cjs +153 -0
- package/bin/run-audit-alloy.cjs +187 -0
- package/bin/run-breaker-tlc.cjs +181 -0
- package/bin/run-formal-check.cjs +395 -0
- package/bin/run-formal-verify.cjs +701 -0
- package/bin/run-installer-alloy.cjs +188 -0
- package/bin/run-oauth-rotation-prism.cjs +132 -0
- package/bin/run-oscillation-tlc.cjs +202 -0
- package/bin/run-phase-tlc.cjs +228 -0
- package/bin/run-prism.cjs +446 -0
- package/bin/run-protocol-tlc.cjs +201 -0
- package/bin/run-quorum-composition-alloy.cjs +155 -0
- package/bin/run-sensitivity-sweep.cjs +231 -0
- package/bin/run-stop-hook-tlc.cjs +188 -0
- package/bin/run-tlc.cjs +467 -0
- package/bin/run-transcript-alloy.cjs +173 -0
- package/bin/run-uppaal.cjs +264 -0
- package/bin/secrets.cjs +134 -0
- package/bin/sensitivity-report.cjs +219 -0
- package/bin/sensitivity-sweep-feedback.cjs +194 -0
- package/bin/set-secret.cjs +29 -0
- package/bin/setup-telemetry-cron.sh +36 -0
- package/bin/sweepPtoF.cjs +63 -0
- package/bin/sync-baseline-requirements.cjs +290 -0
- package/bin/task-envelope.cjs +360 -0
- package/bin/telemetry-collector.cjs +229 -0
- package/bin/unified-mcp-server.mjs +735 -0
- package/bin/update-agents.cjs +369 -0
- package/bin/update-scoreboard.cjs +1134 -0
- package/bin/validate-debt-entry.cjs +207 -0
- package/bin/validate-invariant.cjs +419 -0
- package/bin/validate-memory.cjs +389 -0
- package/bin/validate-requirements-haiku.cjs +435 -0
- package/bin/validate-traces.cjs +438 -0
- package/bin/verify-formal-results.cjs +124 -0
- package/bin/verify-quorum-health.cjs +273 -0
- package/bin/write-check-result.cjs +106 -0
- package/bin/xstate-to-tla.cjs +483 -0
- package/bin/xstate-trace-walker.cjs +205 -0
- package/commands/qgsd/add-phase.md +43 -0
- package/commands/qgsd/add-requirement.md +24 -0
- package/commands/qgsd/add-todo.md +47 -0
- package/commands/qgsd/audit-milestone.md +37 -0
- package/commands/qgsd/check-todos.md +45 -0
- package/commands/qgsd/cleanup.md +18 -0
- package/commands/qgsd/close-formal-gaps.md +33 -0
- package/commands/qgsd/complete-milestone.md +136 -0
- package/commands/qgsd/debug.md +166 -0
- package/commands/qgsd/discuss-phase.md +83 -0
- package/commands/qgsd/execute-phase.md +117 -0
- package/commands/qgsd/fix-tests.md +27 -0
- package/commands/qgsd/formal-test-sync.md +32 -0
- package/commands/qgsd/health.md +22 -0
- package/commands/qgsd/help.md +22 -0
- package/commands/qgsd/insert-phase.md +32 -0
- package/commands/qgsd/join-discord.md +18 -0
- package/commands/qgsd/list-phase-assumptions.md +46 -0
- package/commands/qgsd/map-codebase.md +71 -0
- package/commands/qgsd/map-requirements.md +20 -0
- package/commands/qgsd/mcp-restart.md +176 -0
- package/commands/qgsd/mcp-set-model.md +134 -0
- package/commands/qgsd/mcp-setup.md +1371 -0
- package/commands/qgsd/mcp-status.md +274 -0
- package/commands/qgsd/mcp-update.md +238 -0
- package/commands/qgsd/new-milestone.md +44 -0
- package/commands/qgsd/new-project.md +42 -0
- package/commands/qgsd/observe.md +260 -0
- package/commands/qgsd/pause-work.md +38 -0
- package/commands/qgsd/plan-milestone-gaps.md +34 -0
- package/commands/qgsd/plan-phase.md +44 -0
- package/commands/qgsd/polyrepo.md +50 -0
- package/commands/qgsd/progress.md +24 -0
- package/commands/qgsd/queue.md +54 -0
- package/commands/qgsd/quick.md +133 -0
- package/commands/qgsd/quorum-test.md +275 -0
- package/commands/qgsd/quorum.md +707 -0
- package/commands/qgsd/reapply-patches.md +110 -0
- package/commands/qgsd/remove-phase.md +31 -0
- package/commands/qgsd/research-phase.md +189 -0
- package/commands/qgsd/resume-work.md +40 -0
- package/commands/qgsd/set-profile.md +34 -0
- package/commands/qgsd/settings.md +39 -0
- package/commands/qgsd/solve.md +565 -0
- package/commands/qgsd/sync-baselines.md +119 -0
- package/commands/qgsd/triage.md +233 -0
- package/commands/qgsd/update.md +37 -0
- package/commands/qgsd/verify-work.md +38 -0
- package/hooks/dist/config-loader.js +297 -0
- package/hooks/dist/conformance-schema.cjs +12 -0
- package/hooks/dist/gsd-context-monitor.js +64 -0
- package/hooks/dist/qgsd-check-update.js +62 -0
- package/hooks/dist/qgsd-circuit-breaker.js +682 -0
- package/hooks/dist/qgsd-precompact.js +156 -0
- package/hooks/dist/qgsd-prompt.js +653 -0
- package/hooks/dist/qgsd-session-start.js +122 -0
- package/hooks/dist/qgsd-slot-correlator.js +58 -0
- package/hooks/dist/qgsd-spec-regen.js +86 -0
- package/hooks/dist/qgsd-statusline.js +91 -0
- package/hooks/dist/qgsd-stop.js +553 -0
- package/hooks/dist/qgsd-token-collector.js +133 -0
- package/hooks/dist/unified-mcp-server.mjs +669 -0
- package/package.json +95 -0
- package/scripts/build-hooks.js +46 -0
- package/scripts/postinstall.js +48 -0
- package/scripts/secret-audit.sh +45 -0
- package/templates/qgsd.json +49 -0
|
@@ -0,0 +1,1134 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* update-scoreboard.cjs
|
|
6
|
+
*
|
|
7
|
+
* CLI script to update .planning/quorum-scoreboard.json atomically.
|
|
8
|
+
* Reads current JSON, applies score delta for one model/round, recalculates
|
|
9
|
+
* all cumulative stats from scratch, writes back.
|
|
10
|
+
*
|
|
11
|
+
* Usage (round vote):
|
|
12
|
+
* node bin/update-scoreboard.cjs \
|
|
13
|
+
* --model <name> --result <code> --task <label> --round <n> --verdict <v> \
|
|
14
|
+
* [--scoreboard <path>] [--category <cat>] [--subcategory <subcat>] \
|
|
15
|
+
* [--task-description <text>]
|
|
16
|
+
*
|
|
17
|
+
* Usage (team identity — once per session):
|
|
18
|
+
* node bin/update-scoreboard.cjs init-team \
|
|
19
|
+
* --claude-model <model-id> \
|
|
20
|
+
* --team '<json-object-of-agent-identities>' \
|
|
21
|
+
* [--scoreboard <path>]
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
const fs = require('fs');
|
|
25
|
+
const path = require('path');
|
|
26
|
+
const crypto = require('crypto');
|
|
27
|
+
const os = require('os');
|
|
28
|
+
const https = require('https');
|
|
29
|
+
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
// Score delta lookup
|
|
32
|
+
// ---------------------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
const SCORE_DELTAS = {
|
|
35
|
+
TP: 1,
|
|
36
|
+
TN: 5,
|
|
37
|
+
FP: -3,
|
|
38
|
+
FN: -1,
|
|
39
|
+
'TP+': 3, // +1 TP effectiveness + +2 improvement bonus
|
|
40
|
+
'TN+': 7, // +5 TN effectiveness + +2 constructive alternative adopted
|
|
41
|
+
UNAVAIL: 0,
|
|
42
|
+
'': 0,
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
const VALID_MODELS = ['claude', 'gemini', 'opencode', 'copilot', 'codex', 'deepseek', 'minimax', 'qwen-coder', 'kimi', 'llama4'];
|
|
46
|
+
const VALID_RESULTS = ['TP', 'TN', 'FP', 'FN', 'TP+', 'TN+', 'UNAVAIL', ''];
|
|
47
|
+
const VALID_VERDICTS = ['APPROVE', 'BLOCK', 'DELIBERATE', 'CONSENSUS', 'GAPS_FOUND', '—'];
|
|
48
|
+
|
|
49
|
+
// ---------------------------------------------------------------------------
|
|
50
|
+
// Argument parsing
|
|
51
|
+
// ---------------------------------------------------------------------------
|
|
52
|
+
|
|
53
|
+
function parseArgs(argv) {
|
|
54
|
+
const args = {};
|
|
55
|
+
for (let i = 0; i < argv.length; i++) {
|
|
56
|
+
const key = argv[i];
|
|
57
|
+
if (key.startsWith('--')) {
|
|
58
|
+
const name = key.slice(2);
|
|
59
|
+
const value = argv[i + 1] !== undefined && !argv[i + 1].startsWith('--')
|
|
60
|
+
? argv[++i]
|
|
61
|
+
: '';
|
|
62
|
+
args[name] = value;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return args;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// ---------------------------------------------------------------------------
|
|
69
|
+
// Default scoreboard path helper
|
|
70
|
+
// ---------------------------------------------------------------------------
|
|
71
|
+
function defaultScoreboardPath() {
|
|
72
|
+
try {
|
|
73
|
+
return require('./planning-paths.cjs').resolveWithFallback(process.cwd(), 'quorum-scoreboard');
|
|
74
|
+
} catch (_) {
|
|
75
|
+
return '.planning/quorum-scoreboard.json';
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// ---------------------------------------------------------------------------
|
|
80
|
+
// Usage / validation
|
|
81
|
+
// ---------------------------------------------------------------------------
|
|
82
|
+
|
|
83
|
+
const USAGE = `Usage: node bin/update-scoreboard.cjs --model <name> --result <code> --task <label> --round <n> --verdict <v> [--scoreboard <path>] [--category <cat>] [--subcategory <subcat>] [--task-description <text>]
|
|
84
|
+
--model claude | gemini | opencode | copilot | codex
|
|
85
|
+
--result TP | TN | FP | FN | TP+ | TN+ | (empty for not scored)
|
|
86
|
+
--task task label, e.g. "quick-25"
|
|
87
|
+
--round round number (integer)
|
|
88
|
+
--verdict APPROVE | BLOCK | DELIBERATE | CONSENSUS | GAPS_FOUND | —
|
|
89
|
+
--category (optional) explicit parent category name
|
|
90
|
+
--subcategory (optional) explicit subcategory name
|
|
91
|
+
--task-description (optional) debate question/topic text; used by Haiku auto-classification when --category/--subcategory omitted
|
|
92
|
+
--slot slot name (e.g. claude-1) — use instead of --model for MCP server instances
|
|
93
|
+
--model-id full model id from health_check (e.g. "deepseek-ai/DeepSeek-V3") — required with --slot`;
|
|
94
|
+
|
|
95
|
+
function validate(args) {
|
|
96
|
+
const errors = [];
|
|
97
|
+
|
|
98
|
+
// --slot and --model are mutually exclusive
|
|
99
|
+
if (args.slot && args.model) {
|
|
100
|
+
errors.push('--slot and --model are mutually exclusive');
|
|
101
|
+
} else if (args.slot) {
|
|
102
|
+
// Slot mode: require --model-id
|
|
103
|
+
if (!args['model-id']) errors.push('--model-id is required when using --slot');
|
|
104
|
+
} else {
|
|
105
|
+
// Model mode: require --model
|
|
106
|
+
if (!args.model) errors.push('--model is required');
|
|
107
|
+
}
|
|
108
|
+
if (!args.task) errors.push('--task is required');
|
|
109
|
+
if (!args.round) errors.push('--round is required');
|
|
110
|
+
if (!args.verdict) errors.push('--verdict is required');
|
|
111
|
+
// --result can be empty string (not scored), but must be present as key
|
|
112
|
+
if (!('result' in args)) errors.push('--result is required (use empty string for not scored)');
|
|
113
|
+
|
|
114
|
+
if (args.model && !VALID_MODELS.includes(args.model)) {
|
|
115
|
+
errors.push(`--model must be one of: ${VALID_MODELS.join(', ')}`);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const result = args.result || '';
|
|
119
|
+
if (!VALID_RESULTS.includes(result)) {
|
|
120
|
+
errors.push(`--result must be one of: TP, TN, FP, FN, TP+, TN+, (empty)`);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const roundNum = parseInt(args.round, 10);
|
|
124
|
+
if (isNaN(roundNum) || roundNum < 1) {
|
|
125
|
+
errors.push('--round must be a positive integer');
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
if (errors.length > 0) {
|
|
129
|
+
process.stderr.write(USAGE + '\n\nErrors:\n' + errors.map(e => ' ' + e).join('\n') + '\n');
|
|
130
|
+
process.exit(1);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return {
|
|
134
|
+
model: args.model,
|
|
135
|
+
slot: args.slot || null,
|
|
136
|
+
modelId: args['model-id'] || null,
|
|
137
|
+
result: result,
|
|
138
|
+
task: args.task,
|
|
139
|
+
round: roundNum,
|
|
140
|
+
verdict: args.verdict,
|
|
141
|
+
scoreboard: args.scoreboard || defaultScoreboardPath(),
|
|
142
|
+
category: args.category || null,
|
|
143
|
+
subcategory: args.subcategory || null,
|
|
144
|
+
taskDescription: args['task-description'] || null,
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// ---------------------------------------------------------------------------
|
|
149
|
+
// JSON schema helpers
|
|
150
|
+
// ---------------------------------------------------------------------------
|
|
151
|
+
|
|
152
|
+
function emptyModelStats() {
|
|
153
|
+
return { score: 0, tp: 0, tn: 0, fp: 0, fn: 0, impr: 0, invocations: 0 };
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
function emptyData() {
|
|
157
|
+
return {
|
|
158
|
+
models: {
|
|
159
|
+
claude: emptyModelStats(),
|
|
160
|
+
gemini: emptyModelStats(),
|
|
161
|
+
opencode: emptyModelStats(),
|
|
162
|
+
copilot: emptyModelStats(),
|
|
163
|
+
codex: emptyModelStats(),
|
|
164
|
+
deepseek: emptyModelStats(),
|
|
165
|
+
minimax: emptyModelStats(),
|
|
166
|
+
'qwen-coder': emptyModelStats(),
|
|
167
|
+
kimi: emptyModelStats(),
|
|
168
|
+
llama4: emptyModelStats(),
|
|
169
|
+
},
|
|
170
|
+
slots: {}, // slot-keyed map; key = '<slot-name>:<model-id>'
|
|
171
|
+
categories: {},
|
|
172
|
+
rounds: [],
|
|
173
|
+
availability: {}, // per-slot availability windows: { slotOrModel: { available_at_iso, ... } }
|
|
174
|
+
delivery_stats: {
|
|
175
|
+
total_rounds: 0,
|
|
176
|
+
target_vote_count: 3,
|
|
177
|
+
achieved_by_outcome: {},
|
|
178
|
+
},
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
function loadData(scoreboard) {
|
|
183
|
+
const absPath = path.resolve(process.cwd(), scoreboard);
|
|
184
|
+
if (!fs.existsSync(absPath)) {
|
|
185
|
+
return emptyData();
|
|
186
|
+
}
|
|
187
|
+
try {
|
|
188
|
+
const raw = fs.readFileSync(absPath, 'utf8');
|
|
189
|
+
const data = JSON.parse(raw);
|
|
190
|
+
// Backward compat: ensure categories exists
|
|
191
|
+
if (!data.categories) {
|
|
192
|
+
data.categories = {};
|
|
193
|
+
}
|
|
194
|
+
// Backward compat: ensure slots exists
|
|
195
|
+
if (!data.slots) {
|
|
196
|
+
data.slots = {};
|
|
197
|
+
}
|
|
198
|
+
// Backward compat: ensure availability exists
|
|
199
|
+
if (!data.availability) {
|
|
200
|
+
data.availability = {};
|
|
201
|
+
}
|
|
202
|
+
return data;
|
|
203
|
+
} catch (e) {
|
|
204
|
+
process.stderr.write(`[update-scoreboard] WARNING: could not parse ${absPath}: ${e.message}\n`);
|
|
205
|
+
return emptyData();
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// ---------------------------------------------------------------------------
|
|
210
|
+
// Cumulative stats recompute (from-scratch to avoid drift)
|
|
211
|
+
// ---------------------------------------------------------------------------
|
|
212
|
+
|
|
213
|
+
function recomputeStats(data) {
|
|
214
|
+
// Reset all model stats
|
|
215
|
+
for (const model of VALID_MODELS) {
|
|
216
|
+
if (!data.models[model]) data.models[model] = emptyModelStats();
|
|
217
|
+
const m = data.models[model];
|
|
218
|
+
m.score = 0;
|
|
219
|
+
m.tp = 0;
|
|
220
|
+
m.tn = 0;
|
|
221
|
+
m.fp = 0;
|
|
222
|
+
m.fn = 0;
|
|
223
|
+
m.impr = 0;
|
|
224
|
+
m.invocations = 0;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
for (const round of data.rounds) {
|
|
228
|
+
const votes = round.votes || {};
|
|
229
|
+
for (const model of VALID_MODELS) {
|
|
230
|
+
const vote = votes[model];
|
|
231
|
+
if (!vote || vote === '' || vote === 'UNAVAIL') continue;
|
|
232
|
+
|
|
233
|
+
const m = data.models[model];
|
|
234
|
+
m.invocations += 1;
|
|
235
|
+
|
|
236
|
+
const delta = SCORE_DELTAS[vote];
|
|
237
|
+
if (delta === undefined) continue; // unknown vote code — skip
|
|
238
|
+
|
|
239
|
+
m.score += delta;
|
|
240
|
+
|
|
241
|
+
if (vote === 'TP' || vote === 'TP+') m.tp += 1;
|
|
242
|
+
if (vote === 'TN' || vote === 'TN+') m.tn += 1;
|
|
243
|
+
if (vote === 'FP') m.fp += 1;
|
|
244
|
+
if (vote === 'FN') m.fn += 1;
|
|
245
|
+
if (vote === 'TP+' || vote === 'TN+') m.impr += 1;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// ---------------------------------------------------------------------------
|
|
251
|
+
// Slot-keyed stats helpers
|
|
252
|
+
// ---------------------------------------------------------------------------
|
|
253
|
+
|
|
254
|
+
function emptySlotStats(slot, modelId) {
|
|
255
|
+
return { slot, model: modelId, score: 0, tp: 0, tn: 0, fp: 0, fn: 0, impr: 0, invocations: 0 };
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
function recomputeSlots(data) {
|
|
259
|
+
// Reset all slot stats in data.slots
|
|
260
|
+
for (const key of Object.keys(data.slots)) {
|
|
261
|
+
const s = data.slots[key];
|
|
262
|
+
s.score = 0; s.tp = 0; s.tn = 0; s.fp = 0; s.fn = 0; s.impr = 0; s.invocations = 0;
|
|
263
|
+
}
|
|
264
|
+
// Replay all rounds — look for votes keyed by composite slot:model-id keys
|
|
265
|
+
for (const round of data.rounds) {
|
|
266
|
+
const votes = round.votes || {};
|
|
267
|
+
for (const [key, vote] of Object.entries(votes)) {
|
|
268
|
+
if (!key.includes(':')) continue; // slot keys contain ':'
|
|
269
|
+
if (!vote || vote === '' || vote === 'UNAVAIL') continue;
|
|
270
|
+
if (!data.slots[key]) continue; // key not in slots map — skip
|
|
271
|
+
const s = data.slots[key];
|
|
272
|
+
s.invocations += 1;
|
|
273
|
+
const delta = SCORE_DELTAS[vote];
|
|
274
|
+
if (delta === undefined) continue;
|
|
275
|
+
s.score += delta;
|
|
276
|
+
if (vote === 'TP' || vote === 'TP+') s.tp += 1;
|
|
277
|
+
if (vote === 'TN' || vote === 'TN+') s.tn += 1;
|
|
278
|
+
if (vote === 'FP') s.fp += 1;
|
|
279
|
+
if (vote === 'FN') s.fn += 1;
|
|
280
|
+
if (vote === 'TP+' || vote === 'TN+') s.impr += 1;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// ---------------------------------------------------------------------------
|
|
286
|
+
// Delivery stats computation
|
|
287
|
+
// ---------------------------------------------------------------------------
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* Compute delivery statistics from rounds data.
|
|
291
|
+
* Counts how many rounds achieved each vote count and calculates percentages.
|
|
292
|
+
* Stores result in data.delivery_stats.
|
|
293
|
+
*/
|
|
294
|
+
function computeDeliveryStats(data) {
|
|
295
|
+
try {
|
|
296
|
+
if (!data.rounds || data.rounds.length === 0) {
|
|
297
|
+
data.delivery_stats = {
|
|
298
|
+
total_rounds: 0,
|
|
299
|
+
target_vote_count: 3,
|
|
300
|
+
achieved_by_outcome: {},
|
|
301
|
+
};
|
|
302
|
+
return data;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// Helper to count valid votes in a round
|
|
306
|
+
function countValidVotes(votes) {
|
|
307
|
+
let count = 0;
|
|
308
|
+
for (const key of Object.keys(votes)) {
|
|
309
|
+
const v = votes[key];
|
|
310
|
+
if (v && v !== '' && v !== 'UNAVAIL' && v !== 'TIMEOUT') count++;
|
|
311
|
+
}
|
|
312
|
+
return count;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
// Tally vote counts across all rounds
|
|
316
|
+
const voteCountHistogram = {};
|
|
317
|
+
for (const round of data.rounds) {
|
|
318
|
+
const voteCount = countValidVotes(round.votes || {});
|
|
319
|
+
const key = voteCount + '_votes';
|
|
320
|
+
voteCountHistogram[key] = (voteCountHistogram[key] || 0) + 1;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// Compute percentages
|
|
324
|
+
const totalRounds = data.rounds.length;
|
|
325
|
+
const achievedByOutcome = {};
|
|
326
|
+
for (const [key, count] of Object.entries(voteCountHistogram)) {
|
|
327
|
+
const pct = parseFloat(((count / totalRounds) * 100).toFixed(1));
|
|
328
|
+
achievedByOutcome[key] = { count, pct };
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
data.delivery_stats = {
|
|
332
|
+
total_rounds: totalRounds,
|
|
333
|
+
target_vote_count: 3,
|
|
334
|
+
achieved_by_outcome: achievedByOutcome,
|
|
335
|
+
};
|
|
336
|
+
|
|
337
|
+
return data;
|
|
338
|
+
} catch (e) {
|
|
339
|
+
process.stderr.write(`[computeDeliveryStats] ERROR: ${e.message}\n`);
|
|
340
|
+
return data;
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
// ---------------------------------------------------------------------------
|
|
345
|
+
// Flakiness scoring
|
|
346
|
+
// ---------------------------------------------------------------------------
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Compute per-slot flakiness scores from recent verdicts.
|
|
350
|
+
* Flakiness = failure rate in trailing 10-round window.
|
|
351
|
+
* Stores result in data.slots[key].flakiness_score and recent_verdicts.
|
|
352
|
+
*/
|
|
353
|
+
function computeFlakiness(data, windowSize = 10) {
|
|
354
|
+
try {
|
|
355
|
+
if (!data.rounds || data.rounds.length === 0) {
|
|
356
|
+
// No rounds yet — all slots have default score 0.0
|
|
357
|
+
return data;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// Collect all unique slot names from all rounds
|
|
361
|
+
const allSlots = new Set();
|
|
362
|
+
for (const round of data.rounds) {
|
|
363
|
+
const votes = round.votes || {};
|
|
364
|
+
for (const key of Object.keys(votes)) {
|
|
365
|
+
// Handle both model-keyed (e.g., 'claude') and slot-keyed (e.g., 'gemini-1:model-id')
|
|
366
|
+
if (key.includes(':')) {
|
|
367
|
+
// Slot-keyed: extract slot name (before ':')
|
|
368
|
+
const slotName = key.split(':')[0];
|
|
369
|
+
allSlots.add(slotName);
|
|
370
|
+
} else {
|
|
371
|
+
// Model-keyed: use model name as slot key
|
|
372
|
+
allSlots.add(key);
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
// For each slot, compute flakiness from trailing window
|
|
378
|
+
for (const slotName of allSlots) {
|
|
379
|
+
// Collect all verdicts for this slot from data.rounds
|
|
380
|
+
const verdictWindow = [];
|
|
381
|
+
for (const round of data.rounds) {
|
|
382
|
+
const votes = round.votes || {};
|
|
383
|
+
// Look for any vote entry matching this slot
|
|
384
|
+
let verdict = null;
|
|
385
|
+
for (const [key, voteValue] of Object.entries(votes)) {
|
|
386
|
+
const keySlotName = key.includes(':') ? key.split(':')[0] : key;
|
|
387
|
+
if (keySlotName === slotName) {
|
|
388
|
+
// Count as failure: UNAVAIL, TIMEOUT, empty string, or any other falsy value
|
|
389
|
+
if (voteValue === 'UNAVAIL' || voteValue === 'TIMEOUT' || voteValue === '' || !voteValue) {
|
|
390
|
+
verdict = 'FAILED';
|
|
391
|
+
} else {
|
|
392
|
+
// Success (any other value including verdict names)
|
|
393
|
+
verdict = 'SUCCESS';
|
|
394
|
+
}
|
|
395
|
+
break;
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
if (verdict) {
|
|
399
|
+
verdictWindow.push({
|
|
400
|
+
round_num: round.round || verdictWindow.length + 1,
|
|
401
|
+
verdict,
|
|
402
|
+
});
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// Use trailing window
|
|
407
|
+
const window = verdictWindow.slice(-windowSize);
|
|
408
|
+
if (window.length === 0) {
|
|
409
|
+
// No verdicts for this slot — default to 0.0 (reliable)
|
|
410
|
+
// Ensure slot entry exists
|
|
411
|
+
for (const [key, slotEntry] of Object.entries(data.slots)) {
|
|
412
|
+
const keySlotName = key.split(':')[0];
|
|
413
|
+
if (keySlotName === slotName && !slotEntry.flakiness_score) {
|
|
414
|
+
slotEntry.flakiness_score = 0.0;
|
|
415
|
+
slotEntry.recent_verdicts = [];
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
continue;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
// Count failures in window
|
|
422
|
+
const failures = window.filter(v => v.verdict === 'FAILED').length;
|
|
423
|
+
const flakinessScore = failures / window.length;
|
|
424
|
+
const score = parseFloat(flakinessScore.toFixed(2)); // Store as number
|
|
425
|
+
|
|
426
|
+
// Store flakiness in all slot entries matching this slot name
|
|
427
|
+
for (const [key, slotEntry] of Object.entries(data.slots)) {
|
|
428
|
+
const keySlotName = key.split(':')[0];
|
|
429
|
+
if (keySlotName === slotName) {
|
|
430
|
+
slotEntry.flakiness_score = score;
|
|
431
|
+
slotEntry.recent_verdicts = window;
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
return data;
|
|
437
|
+
} catch (e) {
|
|
438
|
+
process.stderr.write(`[computeFlakiness] ERROR: ${e.message}\n`);
|
|
439
|
+
return data;
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
// ---------------------------------------------------------------------------
|
|
444
|
+
// Today's date in MM-DD format
|
|
445
|
+
// ---------------------------------------------------------------------------
|
|
446
|
+
|
|
447
|
+
function todayMMDD() {
|
|
448
|
+
const d = new Date();
|
|
449
|
+
const mm = String(d.getMonth() + 1).padStart(2, '0');
|
|
450
|
+
const dd = String(d.getDate()).padStart(2, '0');
|
|
451
|
+
return `${mm}-${dd}`;
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// ---------------------------------------------------------------------------
|
|
455
|
+
// Haiku auto-classification
|
|
456
|
+
// ---------------------------------------------------------------------------
|
|
457
|
+
|
|
458
|
+
/**
|
|
459
|
+
* Attempt to classify a task description using claude-haiku-4-5-20251001.
|
|
460
|
+
* Returns { category, subcategory, is_new } or null on any failure (fail-open).
|
|
461
|
+
*/
|
|
462
|
+
async function classifyWithHaiku(taskDescription, categories) {
|
|
463
|
+
const apiKey = process.env.ANTHROPIC_API_KEY;
|
|
464
|
+
if (!apiKey) return null; // No API key — skip silently (fail-open)
|
|
465
|
+
|
|
466
|
+
try {
|
|
467
|
+
// Build formatted taxonomy list for prompt
|
|
468
|
+
const taxonomyLines = Object.entries(categories).map(([cat, subs]) => {
|
|
469
|
+
const subsStr = subs.map(s => ` - ${s}`).join('\n');
|
|
470
|
+
return ` ${cat}:\n${subsStr}`;
|
|
471
|
+
}).join('\n');
|
|
472
|
+
|
|
473
|
+
const prompt = `You are classifying a quorum debate topic into a category taxonomy.
|
|
474
|
+
|
|
475
|
+
Debate topic: ${taskDescription}
|
|
476
|
+
|
|
477
|
+
Taxonomy:
|
|
478
|
+
${taxonomyLines}
|
|
479
|
+
|
|
480
|
+
Return ONLY valid JSON (no markdown, no explanation):
|
|
481
|
+
{"category": "<parent category name>", "subcategory": "<subcategory name>", "is_new": false}
|
|
482
|
+
|
|
483
|
+
If the topic does not match any existing category or subcategory well, propose new names:
|
|
484
|
+
{"category": "<new parent name>", "subcategory": "<new subcategory name>", "is_new": true}
|
|
485
|
+
|
|
486
|
+
Choose the single best match. Return nothing except the JSON object.`;
|
|
487
|
+
|
|
488
|
+
const body = JSON.stringify({
|
|
489
|
+
model: 'claude-haiku-4-5-20251001',
|
|
490
|
+
max_tokens: 128,
|
|
491
|
+
messages: [{ role: 'user', content: prompt }],
|
|
492
|
+
});
|
|
493
|
+
|
|
494
|
+
const text = await new Promise((resolve, reject) => {
|
|
495
|
+
const req = https.request({
|
|
496
|
+
hostname: 'api.anthropic.com',
|
|
497
|
+
path: '/v1/messages',
|
|
498
|
+
method: 'POST',
|
|
499
|
+
headers: {
|
|
500
|
+
'Content-Type': 'application/json',
|
|
501
|
+
'x-api-key': apiKey,
|
|
502
|
+
'anthropic-version': '2023-06-01',
|
|
503
|
+
'Content-Length': Buffer.byteLength(body),
|
|
504
|
+
},
|
|
505
|
+
timeout: 15000,
|
|
506
|
+
}, (res) => {
|
|
507
|
+
let data = '';
|
|
508
|
+
res.on('data', chunk => { data += chunk; });
|
|
509
|
+
res.on('end', () => {
|
|
510
|
+
try {
|
|
511
|
+
const parsed = JSON.parse(data);
|
|
512
|
+
const content = ((parsed.content || [])[0] || {}).text || '';
|
|
513
|
+
resolve(content.trim());
|
|
514
|
+
} catch { resolve(null); }
|
|
515
|
+
});
|
|
516
|
+
});
|
|
517
|
+
req.on('error', () => resolve(null));
|
|
518
|
+
req.on('timeout', () => { req.destroy(); resolve(null); });
|
|
519
|
+
req.write(body);
|
|
520
|
+
req.end();
|
|
521
|
+
});
|
|
522
|
+
|
|
523
|
+
if (!text) return null;
|
|
524
|
+
|
|
525
|
+
const result = JSON.parse(text);
|
|
526
|
+
if (typeof result.category !== 'string' || typeof result.subcategory !== 'string') {
|
|
527
|
+
return null;
|
|
528
|
+
}
|
|
529
|
+
return result;
|
|
530
|
+
} catch (_) {
|
|
531
|
+
return null; // any error — fail-open
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
// ---------------------------------------------------------------------------
|
|
536
|
+
// init-team: capture team fingerprint (idempotent — skips if unchanged)
|
|
537
|
+
// ---------------------------------------------------------------------------
|
|
538
|
+
|
|
539
|
+
async function initTeam(argv) {
|
|
540
|
+
const args = parseArgs(argv);
|
|
541
|
+
const scoreboardPath = args.scoreboard || defaultScoreboardPath();
|
|
542
|
+
const claudeModel = args['claude-model'] || process.env.CLAUDE_MODEL || process.env.ANTHROPIC_MODEL || 'unknown';
|
|
543
|
+
|
|
544
|
+
// Parse agent identities from --team JSON
|
|
545
|
+
let agents = {};
|
|
546
|
+
if (args.team) {
|
|
547
|
+
try {
|
|
548
|
+
agents = JSON.parse(args.team);
|
|
549
|
+
} catch (e) {
|
|
550
|
+
process.stderr.write(`[init-team] WARNING: could not parse --team JSON: ${e.message}\n`);
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
// Auto-detect MCPs and plugins from ~/.claude.json
|
|
555
|
+
let mcps = [];
|
|
556
|
+
let plugins = [];
|
|
557
|
+
try {
|
|
558
|
+
const claudeJsonPath = process.env.QGSD_CLAUDE_JSON || path.join(os.homedir(), '.claude.json');
|
|
559
|
+
const claudeJson = JSON.parse(fs.readFileSync(claudeJsonPath, 'utf8'));
|
|
560
|
+
mcps = Object.keys(claudeJson.mcpServers || {});
|
|
561
|
+
plugins = claudeJson.plugins || [];
|
|
562
|
+
} catch (e) {
|
|
563
|
+
process.stderr.write(`[init-team] WARNING: could not read ~/.claude.json: ${e.message}\n`);
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
// Compute fingerprint from canonical team composition
|
|
567
|
+
const canonical = JSON.stringify({
|
|
568
|
+
claude_model: claudeModel,
|
|
569
|
+
agents: Object.fromEntries(Object.entries(agents).sort()),
|
|
570
|
+
mcps: [...mcps].sort(),
|
|
571
|
+
plugins: [...plugins].sort(),
|
|
572
|
+
});
|
|
573
|
+
const fingerprint = crypto.createHash('sha256').update(canonical).digest('hex').slice(0, 16);
|
|
574
|
+
|
|
575
|
+
const data = loadData(scoreboardPath);
|
|
576
|
+
|
|
577
|
+
// Skip if fingerprint unchanged
|
|
578
|
+
if (data.team && data.team.fingerprint === fingerprint) {
|
|
579
|
+
process.stdout.write(`[init-team] fingerprint: ${fingerprint} | no change\n`);
|
|
580
|
+
return;
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
const prevFingerprint = data.team ? data.team.fingerprint : null;
|
|
584
|
+
|
|
585
|
+
data.team = {
|
|
586
|
+
fingerprint,
|
|
587
|
+
captured_at: new Date().toISOString(),
|
|
588
|
+
claude_model: claudeModel,
|
|
589
|
+
agents,
|
|
590
|
+
mcps,
|
|
591
|
+
plugins,
|
|
592
|
+
};
|
|
593
|
+
|
|
594
|
+
const absPath = path.resolve(process.cwd(), scoreboardPath);
|
|
595
|
+
fs.mkdirSync(path.dirname(absPath), { recursive: true });
|
|
596
|
+
const tmpPath0 = absPath + '.' + process.pid + '.tmp';
|
|
597
|
+
fs.writeFileSync(tmpPath0, JSON.stringify(data, null, 2) + '\n', 'utf8');
|
|
598
|
+
fs.renameSync(tmpPath0, absPath);
|
|
599
|
+
|
|
600
|
+
const agentCount = Object.keys(agents).length;
|
|
601
|
+
if (prevFingerprint) {
|
|
602
|
+
process.stdout.write(`[init-team] fingerprint: ${fingerprint} (updated from ${prevFingerprint}) | ${agentCount} agents, ${mcps.length} MCPs, ${plugins.length} plugins\n`);
|
|
603
|
+
} else {
|
|
604
|
+
process.stdout.write(`[init-team] fingerprint: ${fingerprint} | ${agentCount} agents, ${mcps.length} MCPs, ${plugins.length} plugins\n`);
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
// ---------------------------------------------------------------------------
|
|
609
|
+
// Availability tracking helpers
|
|
610
|
+
// ---------------------------------------------------------------------------
|
|
611
|
+
|
|
612
|
+
const MONTH_MAP = {
|
|
613
|
+
jan: 0, feb: 1, mar: 2, apr: 3, may: 4, jun: 5,
|
|
614
|
+
jul: 6, aug: 7, sep: 8, oct: 9, nov: 10, dec: 11,
|
|
615
|
+
};
|
|
616
|
+
|
|
617
|
+
/**
|
|
618
|
+
* Parse a local date/time string like "Feb 24 8:37 PM" into a Date.
|
|
619
|
+
* Returns null if unparseable.
|
|
620
|
+
*/
|
|
621
|
+
function parseLocalDateTime(str) {
|
|
622
|
+
str = str.trim();
|
|
623
|
+
// Matches: "Feb 24 8:37 PM", "February 24 20:37", "Feb 24 8:37:00 PM"
|
|
624
|
+
const m = str.match(/^(\w{3,9})\s+(\d{1,2})\s+(\d{1,2}):(\d{2})(?::(\d{2}))?\s*(AM|PM)?$/i);
|
|
625
|
+
if (!m) return null;
|
|
626
|
+
|
|
627
|
+
const monthKey = m[1].slice(0, 3).toLowerCase();
|
|
628
|
+
const month = MONTH_MAP[monthKey];
|
|
629
|
+
if (month === undefined) return null;
|
|
630
|
+
|
|
631
|
+
const day = parseInt(m[2], 10);
|
|
632
|
+
let hour = parseInt(m[3], 10);
|
|
633
|
+
const minute = parseInt(m[4], 10);
|
|
634
|
+
const ampm = (m[6] || '').toUpperCase();
|
|
635
|
+
|
|
636
|
+
if (ampm === 'PM' && hour < 12) hour += 12;
|
|
637
|
+
if (ampm === 'AM' && hour === 12) hour = 0;
|
|
638
|
+
|
|
639
|
+
const now = new Date();
|
|
640
|
+
let d = new Date(now.getFullYear(), month, day, hour, minute, 0, 0);
|
|
641
|
+
// If the computed time is already in the past, assume next year
|
|
642
|
+
if (d < now) d = new Date(now.getFullYear() + 1, month, day, hour, minute, 0, 0);
|
|
643
|
+
return d;
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
/**
|
|
647
|
+
* Parse availability hint from a raw UNAVAIL message.
|
|
648
|
+
* Supports:
|
|
649
|
+
* "usage limit until Feb 24 8:37 PM"
|
|
650
|
+
* "in 5 hours" / "restart in 5 hours"
|
|
651
|
+
* "in 30 minutes"
|
|
652
|
+
* Returns { available_at: Date, reason: string } or null if no hint found.
|
|
653
|
+
*/
|
|
654
|
+
function parseAvailabilityHint(message) {
|
|
655
|
+
// "until <Month Day HH:MM AM/PM>" — absolute local time
|
|
656
|
+
const untilMatch = message.match(/until\s+([A-Za-z]{3,9}\s+\d{1,2}\s+\d{1,2}:\d{2}(?::\d{2})?\s*(?:AM|PM)?)/i);
|
|
657
|
+
if (untilMatch) {
|
|
658
|
+
const parsed = parseLocalDateTime(untilMatch[1]);
|
|
659
|
+
if (parsed) {
|
|
660
|
+
const reason = /usage.?limit/i.test(message) ? 'usage limit'
|
|
661
|
+
: /quota/i.test(message) ? 'quota exceeded'
|
|
662
|
+
: /rate.?limit/i.test(message) ? 'rate limit'
|
|
663
|
+
: 'unavailable';
|
|
664
|
+
return { available_at: parsed, reason };
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
// "in N hours" — relative
|
|
669
|
+
const hoursMatch = message.match(/in\s+(\d+(?:\.\d+)?)\s*hours?/i);
|
|
670
|
+
if (hoursMatch) {
|
|
671
|
+
const hours = parseFloat(hoursMatch[1]);
|
|
672
|
+
const reason = /quota/i.test(message) ? 'quota exceeded'
|
|
673
|
+
: /rate.?limit/i.test(message) ? 'rate limit'
|
|
674
|
+
: 'unavailable';
|
|
675
|
+
return { available_at: new Date(Date.now() + hours * 3_600_000), reason };
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
// "in N minutes" — relative
|
|
679
|
+
const minsMatch = message.match(/in\s+(\d+)\s*minutes?/i);
|
|
680
|
+
if (minsMatch) {
|
|
681
|
+
const mins = parseInt(minsMatch[1], 10);
|
|
682
|
+
return { available_at: new Date(Date.now() + mins * 60_000), reason: 'unavailable' };
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
return null;
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
/** Format remaining milliseconds as "Xh Ym", "Zm", or "now". */
|
|
689
|
+
function formatDuration(ms) {
|
|
690
|
+
if (ms <= 0) return 'now';
|
|
691
|
+
const totalMins = Math.ceil(ms / 60_000);
|
|
692
|
+
if (totalMins < 60) return `${totalMins}m`;
|
|
693
|
+
const hours = Math.floor(totalMins / 60);
|
|
694
|
+
const mins = totalMins % 60;
|
|
695
|
+
return mins > 0 ? `${hours}h ${mins}m` : `${hours}h`;
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
// ---------------------------------------------------------------------------
|
|
699
|
+
// set-availability subcommand
|
|
700
|
+
//
|
|
701
|
+
// Usage:
|
|
702
|
+
// node update-scoreboard.cjs set-availability \
|
|
703
|
+
// --slot codex-1 --message "usage limit until Feb 24 8:37 PM" [--scoreboard <path>]
|
|
704
|
+
// node update-scoreboard.cjs set-availability \
|
|
705
|
+
// --model codex --message "restart in 5 hours" [--scoreboard <path>]
|
|
706
|
+
//
|
|
707
|
+
// --slot or --model is the key in data.availability (both accepted; no functional difference).
|
|
708
|
+
// --message is the raw UNAVAIL output text from the agent.
|
|
709
|
+
// ---------------------------------------------------------------------------
|
|
710
|
+
|
|
711
|
+
async function setAvailability(argv) {
|
|
712
|
+
const args = parseArgs(argv);
|
|
713
|
+
const scoreboardPath = args.scoreboard || defaultScoreboardPath();
|
|
714
|
+
const key = args.slot || args.model;
|
|
715
|
+
const message = args.message || '';
|
|
716
|
+
|
|
717
|
+
if (!key) {
|
|
718
|
+
process.stderr.write('[set-availability] --slot or --model is required\n');
|
|
719
|
+
process.exit(1);
|
|
720
|
+
}
|
|
721
|
+
if (!message) {
|
|
722
|
+
process.stderr.write('[set-availability] --message "<raw output text>" is required\n');
|
|
723
|
+
process.exit(1);
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
const hint = parseAvailabilityHint(message);
|
|
727
|
+
if (!hint) {
|
|
728
|
+
process.stdout.write(`[set-availability] ${key}: no availability hint found in message — skipping\n`);
|
|
729
|
+
return;
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
const data = loadData(scoreboardPath);
|
|
733
|
+
if (!data.availability) data.availability = {};
|
|
734
|
+
|
|
735
|
+
const now = new Date();
|
|
736
|
+
const remaining_ms = Math.max(0, hint.available_at.getTime() - now.getTime());
|
|
737
|
+
|
|
738
|
+
data.availability[key] = {
|
|
739
|
+
available_at_iso: hint.available_at.toISOString(),
|
|
740
|
+
available_at_local: hint.available_at.toLocaleString(),
|
|
741
|
+
reason: hint.reason,
|
|
742
|
+
set_at: now.toISOString(),
|
|
743
|
+
};
|
|
744
|
+
|
|
745
|
+
const absPath = path.resolve(process.cwd(), scoreboardPath);
|
|
746
|
+
fs.mkdirSync(path.dirname(absPath), { recursive: true });
|
|
747
|
+
const tmpPath1 = absPath + '.' + process.pid + '.tmp';
|
|
748
|
+
fs.writeFileSync(tmpPath1, JSON.stringify(data, null, 2) + '\n', 'utf8');
|
|
749
|
+
fs.renameSync(tmpPath1, absPath);
|
|
750
|
+
|
|
751
|
+
const eta = remaining_ms > 0 ? `available in ${formatDuration(remaining_ms)}` : 'available now';
|
|
752
|
+
process.stdout.write(
|
|
753
|
+
`[set-availability] ${key}: ${hint.reason} | ${eta} | local: ${hint.available_at.toLocaleString()}\n`
|
|
754
|
+
);
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
// ---------------------------------------------------------------------------
|
|
758
|
+
// get-availability subcommand
|
|
759
|
+
//
|
|
760
|
+
// Usage:
|
|
761
|
+
// node update-scoreboard.cjs get-availability [--scoreboard <path>]
|
|
762
|
+
//
|
|
763
|
+
// Outputs JSON: { "<slot-or-model>": { available_at_iso, available_at_local,
|
|
764
|
+
// reason, set_at, is_available,
|
|
765
|
+
// remaining_ms, remaining_display } }
|
|
766
|
+
//
|
|
767
|
+
// Use this to check dormant slots before invoking them in a quorum run.
|
|
768
|
+
// ---------------------------------------------------------------------------
|
|
769
|
+
|
|
770
|
+
async function getAvailability(argv) {
|
|
771
|
+
const args = parseArgs(argv);
|
|
772
|
+
const scoreboardPath = args.scoreboard || defaultScoreboardPath();
|
|
773
|
+
const data = loadData(scoreboardPath);
|
|
774
|
+
const now = Date.now();
|
|
775
|
+
|
|
776
|
+
const result = {};
|
|
777
|
+
for (const [key, avail] of Object.entries(data.availability || {})) {
|
|
778
|
+
const available_at_ms = new Date(avail.available_at_iso).getTime();
|
|
779
|
+
const is_available = available_at_ms <= now;
|
|
780
|
+
const remaining_ms = Math.max(0, available_at_ms - now);
|
|
781
|
+
result[key] = {
|
|
782
|
+
available_at_iso: avail.available_at_iso,
|
|
783
|
+
available_at_local: avail.available_at_local,
|
|
784
|
+
reason: avail.reason,
|
|
785
|
+
set_at: avail.set_at,
|
|
786
|
+
is_available,
|
|
787
|
+
remaining_ms,
|
|
788
|
+
remaining_display: formatDuration(remaining_ms),
|
|
789
|
+
};
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
process.stdout.write(JSON.stringify(result, null, 2) + '\n');
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
// ---------------------------------------------------------------------------
|
|
796
|
+
// merge-wave subcommand
|
|
797
|
+
//
|
|
798
|
+
// Usage:
|
|
799
|
+
// node update-scoreboard.cjs merge-wave \
|
|
800
|
+
// --dir .planning/scoreboard-tmp \
|
|
801
|
+
// --task "quick-97" --round 1 \
|
|
802
|
+
// [--scoreboard <path>]
|
|
803
|
+
//
|
|
804
|
+
// Reads all vote files matching vote-*-<task>-<round>-*.json in --dir,
|
|
805
|
+
// applies them in one atomic transaction to the scoreboard.
|
|
806
|
+
//
|
|
807
|
+
// Vote file schema (JSON):
|
|
808
|
+
// {
|
|
809
|
+
// "slot": "<slotName>", // for --slot path
|
|
810
|
+
// "model": "<modelFamily>", // for --model path (alternative)
|
|
811
|
+
// "modelId": "<fullModelId>", // required when slot is set
|
|
812
|
+
// "result": "TP|TN|FP|FN|TP+|TN+|UNAVAIL|",
|
|
813
|
+
// "verdict": "APPROVE|REJECT|FLAG|CONSENSUS|DELIBERATE|GAPS_FOUND",
|
|
814
|
+
// "taskDescription": "<optional>"
|
|
815
|
+
// }
|
|
816
|
+
// ---------------------------------------------------------------------------
|
|
817
|
+
|
|
818
|
+
async function mergeWave(argv) {
|
|
819
|
+
const args = parseArgs(argv);
|
|
820
|
+
const scoreboardPath = args.scoreboard || defaultScoreboardPath();
|
|
821
|
+
const dir = args.dir || '.planning/scoreboard-tmp';
|
|
822
|
+
const task = args.task;
|
|
823
|
+
const round = parseInt(args.round, 10);
|
|
824
|
+
|
|
825
|
+
if (!task) {
|
|
826
|
+
process.stderr.write('[merge-wave] --task is required\n');
|
|
827
|
+
process.exit(1);
|
|
828
|
+
}
|
|
829
|
+
if (isNaN(round) || round < 1) {
|
|
830
|
+
process.stderr.write('[merge-wave] --round must be a positive integer\n');
|
|
831
|
+
process.exit(1);
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
const absDir = path.resolve(process.cwd(), dir);
|
|
835
|
+
if (!fs.existsSync(absDir)) {
|
|
836
|
+
process.stdout.write(`[merge-wave] dir ${absDir} does not exist — no votes to merge\n`);
|
|
837
|
+
return;
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
// Find matching vote files: vote-*-<task>-<round>-*.json
|
|
841
|
+
const pattern = new RegExp(`^vote-.*-${round}-[^/]+\\.json$`);
|
|
842
|
+
const files = fs.readdirSync(absDir).filter(f => {
|
|
843
|
+
if (!pattern.test(f)) return false;
|
|
844
|
+
// Also filter by task name in file
|
|
845
|
+
return f.includes(`-${task}-`);
|
|
846
|
+
});
|
|
847
|
+
|
|
848
|
+
if (files.length === 0) {
|
|
849
|
+
process.stdout.write(`[merge-wave] no vote files found for task=${task} round=${round} in ${absDir}\n`);
|
|
850
|
+
return;
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
// Parse all vote files
|
|
854
|
+
const votes = [];
|
|
855
|
+
for (const file of files) {
|
|
856
|
+
try {
|
|
857
|
+
const filePath = path.join(absDir, file);
|
|
858
|
+
const vote = JSON.parse(fs.readFileSync(filePath, 'utf8'));
|
|
859
|
+
votes.push({ file, vote });
|
|
860
|
+
} catch (e) {
|
|
861
|
+
process.stderr.write(`[merge-wave] WARNING: could not parse ${file}: ${e.message}\n`);
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
|
|
865
|
+
if (votes.length === 0) {
|
|
866
|
+
process.stdout.write(`[merge-wave] all vote files unparseable — nothing to merge\n`);
|
|
867
|
+
return;
|
|
868
|
+
}
|
|
869
|
+
|
|
870
|
+
// Load scoreboard once
|
|
871
|
+
const data = loadData(scoreboardPath);
|
|
872
|
+
|
|
873
|
+
// Ensure all model keys exist
|
|
874
|
+
for (const model of VALID_MODELS) {
|
|
875
|
+
if (!data.models[model]) data.models[model] = emptyModelStats();
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
const VALID_RESULTS = new Set(['TP', 'TP+', 'TN', 'FP', 'FN', 'UNAVAIL']);
|
|
879
|
+
|
|
880
|
+
// Apply all votes to data in memory
|
|
881
|
+
for (const { file, vote } of votes) {
|
|
882
|
+
// Normalise UNAVAILABLE → UNAVAIL (typo variant from early rounds)
|
|
883
|
+
let result = vote.result === 'UNAVAILABLE' ? 'UNAVAIL' : (vote.result || '');
|
|
884
|
+
const verdict = vote.verdict || '';
|
|
885
|
+
|
|
886
|
+
// Mode A rounds intentionally have no binary result. Skip writing an empty
|
|
887
|
+
// string — it would corrupt empirical rate calculations by appearing as an
|
|
888
|
+
// "available but unclassified" vote. The availability signal for Mode A is
|
|
889
|
+
// tracked separately via set-availability calls.
|
|
890
|
+
if (result === '') {
|
|
891
|
+
process.stderr.write(`[merge-wave] NOTE: ${file} has no result code (Mode A round) — availability recorded via verdict only\n`);
|
|
892
|
+
// Still record the verdict/round for audit trail, but omit from votes map
|
|
893
|
+
const taskRound = data.rounds.findIndex(r => r.task === task && r.round === round);
|
|
894
|
+
if (taskRound === -1) {
|
|
895
|
+
data.rounds.push({ date: todayMMDD(), task, round, votes: {}, verdict });
|
|
896
|
+
} else if (!data.rounds[taskRound].verdict) {
|
|
897
|
+
data.rounds[taskRound].verdict = verdict;
|
|
898
|
+
}
|
|
899
|
+
continue;
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
if (vote.slot && vote.modelId) {
|
|
903
|
+
// Slot mode
|
|
904
|
+
const compositeKey = `${vote.slot}:${vote.modelId}`;
|
|
905
|
+
if (!data.slots[compositeKey]) {
|
|
906
|
+
data.slots[compositeKey] = emptySlotStats(vote.slot, vote.modelId);
|
|
907
|
+
}
|
|
908
|
+
// Find or create round entry
|
|
909
|
+
const existingIdx = data.rounds.findIndex(r => r.task === task && r.round === round);
|
|
910
|
+
if (existingIdx !== -1) {
|
|
911
|
+
data.rounds[existingIdx].votes = data.rounds[existingIdx].votes || {};
|
|
912
|
+
data.rounds[existingIdx].votes[compositeKey] = result;
|
|
913
|
+
data.rounds[existingIdx].verdict = verdict;
|
|
914
|
+
} else {
|
|
915
|
+
const newEntry = {
|
|
916
|
+
date: todayMMDD(),
|
|
917
|
+
task,
|
|
918
|
+
round,
|
|
919
|
+
votes: { [compositeKey]: result },
|
|
920
|
+
verdict,
|
|
921
|
+
};
|
|
922
|
+
if (data.team && data.team.fingerprint) newEntry.team_fingerprint = data.team.fingerprint;
|
|
923
|
+
data.rounds.push(newEntry);
|
|
924
|
+
}
|
|
925
|
+
} else if (vote.model) {
|
|
926
|
+
// Model mode
|
|
927
|
+
const model = vote.model;
|
|
928
|
+
if (!VALID_MODELS.includes(model)) {
|
|
929
|
+
process.stderr.write(`[merge-wave] WARNING: unknown model "${model}" in ${file} — skipping\n`);
|
|
930
|
+
continue;
|
|
931
|
+
}
|
|
932
|
+
const existingIdx = data.rounds.findIndex(r => r.task === task && r.round === round);
|
|
933
|
+
if (existingIdx !== -1) {
|
|
934
|
+
data.rounds[existingIdx].votes = data.rounds[existingIdx].votes || {};
|
|
935
|
+
data.rounds[existingIdx].votes[model] = result;
|
|
936
|
+
data.rounds[existingIdx].verdict = verdict;
|
|
937
|
+
} else {
|
|
938
|
+
const newEntry = {
|
|
939
|
+
date: todayMMDD(),
|
|
940
|
+
task,
|
|
941
|
+
round,
|
|
942
|
+
votes: { [model]: result },
|
|
943
|
+
verdict,
|
|
944
|
+
};
|
|
945
|
+
if (data.team && data.team.fingerprint) newEntry.team_fingerprint = data.team.fingerprint;
|
|
946
|
+
data.rounds.push(newEntry);
|
|
947
|
+
}
|
|
948
|
+
} else {
|
|
949
|
+
process.stderr.write(`[merge-wave] WARNING: vote file ${file} missing slot+modelId or model — skipping\n`);
|
|
950
|
+
}
|
|
951
|
+
}
|
|
952
|
+
|
|
953
|
+
// Recompute stats from scratch
|
|
954
|
+
recomputeStats(data);
|
|
955
|
+
recomputeSlots(data);
|
|
956
|
+
computeDeliveryStats(data);
|
|
957
|
+
computeFlakiness(data);
|
|
958
|
+
|
|
959
|
+
// Single atomic write
|
|
960
|
+
const absPath = path.resolve(process.cwd(), scoreboardPath);
|
|
961
|
+
fs.mkdirSync(path.dirname(absPath), { recursive: true });
|
|
962
|
+
const tmpPath = absPath + '.' + process.pid + '.tmp';
|
|
963
|
+
fs.writeFileSync(tmpPath, JSON.stringify(data, null, 2) + '\n', 'utf8');
|
|
964
|
+
fs.renameSync(tmpPath, absPath);
|
|
965
|
+
|
|
966
|
+
process.stdout.write(`[merge-wave] merged ${votes.length} vote(s) for task=${task} round=${round} into ${scoreboardPath}\n`);
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
// ---------------------------------------------------------------------------
|
|
970
|
+
// Main
|
|
971
|
+
// ---------------------------------------------------------------------------
|
|
972
|
+
|
|
973
|
+
async function main() {
|
|
974
|
+
const rawArgs = process.argv.slice(2);
|
|
975
|
+
|
|
976
|
+
// Subcommand routing
|
|
977
|
+
if (rawArgs[0] === 'init-team') return initTeam(rawArgs.slice(1));
|
|
978
|
+
if (rawArgs[0] === 'set-availability') return setAvailability(rawArgs.slice(1));
|
|
979
|
+
if (rawArgs[0] === 'get-availability') return getAvailability(rawArgs.slice(1));
|
|
980
|
+
if (rawArgs[0] === 'merge-wave') return mergeWave(rawArgs.slice(1));
|
|
981
|
+
|
|
982
|
+
const parsed = parseArgs(rawArgs);
|
|
983
|
+
const cfg = validate(parsed);
|
|
984
|
+
|
|
985
|
+
const data = loadData(cfg.scoreboard);
|
|
986
|
+
|
|
987
|
+
// ---------------------------------------------------------------------------
|
|
988
|
+
// Slot mode: --slot + --model-id path (SCBD-01, SCBD-02, SCBD-03)
|
|
989
|
+
// ---------------------------------------------------------------------------
|
|
990
|
+
if (cfg.slot) {
|
|
991
|
+
const compositeKey = `${cfg.slot}:${cfg.modelId}`;
|
|
992
|
+
|
|
993
|
+
// Ensure slot entry exists in data.slots
|
|
994
|
+
if (!data.slots[compositeKey]) {
|
|
995
|
+
data.slots[compositeKey] = emptySlotStats(cfg.slot, cfg.modelId);
|
|
996
|
+
}
|
|
997
|
+
|
|
998
|
+
// Append to rounds with vote keyed by compositeKey
|
|
999
|
+
const roundEntry = {
|
|
1000
|
+
date: todayMMDD(),
|
|
1001
|
+
task: cfg.task,
|
|
1002
|
+
round: cfg.round,
|
|
1003
|
+
votes: { [compositeKey]: cfg.result },
|
|
1004
|
+
verdict: cfg.verdict,
|
|
1005
|
+
};
|
|
1006
|
+
if (data.team && data.team.fingerprint) {
|
|
1007
|
+
roundEntry.team_fingerprint = data.team.fingerprint;
|
|
1008
|
+
}
|
|
1009
|
+
data.rounds.push(roundEntry);
|
|
1010
|
+
|
|
1011
|
+
// Recompute slot stats only (do NOT call recomputeStats — that is for --model path)
|
|
1012
|
+
recomputeSlots(data);
|
|
1013
|
+
|
|
1014
|
+
// Write back
|
|
1015
|
+
const absPath = path.resolve(process.cwd(), cfg.scoreboard);
|
|
1016
|
+
fs.mkdirSync(path.dirname(absPath), { recursive: true });
|
|
1017
|
+
const tmpPath2 = absPath + '.' + process.pid + '.tmp';
|
|
1018
|
+
fs.writeFileSync(tmpPath2, JSON.stringify(data, null, 2) + '\n', 'utf8');
|
|
1019
|
+
fs.renameSync(tmpPath2, absPath);
|
|
1020
|
+
|
|
1021
|
+
// Print confirmation
|
|
1022
|
+
process.stdout.write(`[update-scoreboard] slot ${cfg.slot} (${cfg.modelId}): ${cfg.result} | score=${data.slots[compositeKey].score}\n`);
|
|
1023
|
+
return;
|
|
1024
|
+
}
|
|
1025
|
+
|
|
1026
|
+
// ---------------------------------------------------------------------------
|
|
1027
|
+
// Model mode: --model path (existing behavior, unchanged)
|
|
1028
|
+
// ---------------------------------------------------------------------------
|
|
1029
|
+
|
|
1030
|
+
// Ensure all model keys exist
|
|
1031
|
+
for (const model of VALID_MODELS) {
|
|
1032
|
+
if (!data.models[model]) data.models[model] = emptyModelStats();
|
|
1033
|
+
}
|
|
1034
|
+
|
|
1035
|
+
// Resolve category/subcategory
|
|
1036
|
+
let resolvedCategory = cfg.category;
|
|
1037
|
+
let resolvedSubcategory = cfg.subcategory;
|
|
1038
|
+
|
|
1039
|
+
if (!resolvedCategory && !resolvedSubcategory && cfg.taskDescription) {
|
|
1040
|
+
// Auto-classify via Haiku
|
|
1041
|
+
const classification = await classifyWithHaiku(cfg.taskDescription, data.categories);
|
|
1042
|
+
if (classification) {
|
|
1043
|
+
resolvedCategory = classification.category;
|
|
1044
|
+
resolvedSubcategory = classification.subcategory;
|
|
1045
|
+
|
|
1046
|
+
if (classification.is_new) {
|
|
1047
|
+
// Add new category/subcategory dynamically
|
|
1048
|
+
if (!data.categories[resolvedCategory]) {
|
|
1049
|
+
data.categories[resolvedCategory] = [];
|
|
1050
|
+
}
|
|
1051
|
+
if (!data.categories[resolvedCategory].includes(resolvedSubcategory)) {
|
|
1052
|
+
data.categories[resolvedCategory].push(resolvedSubcategory);
|
|
1053
|
+
}
|
|
1054
|
+
} else {
|
|
1055
|
+
// Existing category — if subcategory is a variant not yet listed, append it
|
|
1056
|
+
if (data.categories[resolvedCategory] && !data.categories[resolvedCategory].includes(resolvedSubcategory)) {
|
|
1057
|
+
data.categories[resolvedCategory].push(resolvedSubcategory);
|
|
1058
|
+
}
|
|
1059
|
+
}
|
|
1060
|
+
}
|
|
1061
|
+
} else if (resolvedCategory && resolvedSubcategory) {
|
|
1062
|
+
// Explicit flags provided — no Haiku needed
|
|
1063
|
+
// (categories map is not modified for explicit flags)
|
|
1064
|
+
}
|
|
1065
|
+
|
|
1066
|
+
// Find existing round entry matching task + round number
|
|
1067
|
+
const existingIdx = data.rounds.findIndex(
|
|
1068
|
+
r => r.task === cfg.task && r.round === cfg.round
|
|
1069
|
+
);
|
|
1070
|
+
|
|
1071
|
+
if (existingIdx !== -1) {
|
|
1072
|
+
// Update existing entry: set/overwrite the model's vote, preserve verdict
|
|
1073
|
+
data.rounds[existingIdx].votes = data.rounds[existingIdx].votes || {};
|
|
1074
|
+
data.rounds[existingIdx].votes[cfg.model] = cfg.result;
|
|
1075
|
+
// Allow verdict update too
|
|
1076
|
+
data.rounds[existingIdx].verdict = cfg.verdict;
|
|
1077
|
+
// Set category if resolved
|
|
1078
|
+
if (resolvedCategory && resolvedSubcategory) {
|
|
1079
|
+
data.rounds[existingIdx].category = resolvedCategory;
|
|
1080
|
+
data.rounds[existingIdx].subcategory = resolvedSubcategory;
|
|
1081
|
+
}
|
|
1082
|
+
} else {
|
|
1083
|
+
// Append new round entry
|
|
1084
|
+
const newEntry = {
|
|
1085
|
+
date: todayMMDD(),
|
|
1086
|
+
task: cfg.task,
|
|
1087
|
+
round: cfg.round,
|
|
1088
|
+
votes: { [cfg.model]: cfg.result },
|
|
1089
|
+
verdict: cfg.verdict,
|
|
1090
|
+
};
|
|
1091
|
+
if (resolvedCategory && resolvedSubcategory) {
|
|
1092
|
+
newEntry.category = resolvedCategory;
|
|
1093
|
+
newEntry.subcategory = resolvedSubcategory;
|
|
1094
|
+
}
|
|
1095
|
+
if (data.team && data.team.fingerprint) {
|
|
1096
|
+
newEntry.team_fingerprint = data.team.fingerprint;
|
|
1097
|
+
}
|
|
1098
|
+
data.rounds.push(newEntry);
|
|
1099
|
+
}
|
|
1100
|
+
|
|
1101
|
+
// Recompute all cumulative stats from scratch
|
|
1102
|
+
recomputeStats(data);
|
|
1103
|
+
|
|
1104
|
+
// Write back
|
|
1105
|
+
const absPath = path.resolve(process.cwd(), cfg.scoreboard);
|
|
1106
|
+
fs.mkdirSync(path.dirname(absPath), { recursive: true });
|
|
1107
|
+
const tmpPath3 = absPath + '.' + process.pid + '.tmp';
|
|
1108
|
+
fs.writeFileSync(tmpPath3, JSON.stringify(data, null, 2) + '\n', 'utf8');
|
|
1109
|
+
fs.renameSync(tmpPath3, absPath);
|
|
1110
|
+
|
|
1111
|
+
// Print confirmation
|
|
1112
|
+
const delta = SCORE_DELTAS[cfg.result] || 0;
|
|
1113
|
+
const sign = delta >= 0 ? '+' : '';
|
|
1114
|
+
const newScore = data.models[cfg.model].score;
|
|
1115
|
+
const deltaStr = cfg.result === '' ? '(not scored)' : `${cfg.result} (${sign}${delta})`;
|
|
1116
|
+
let confirmation = `[update-scoreboard] ${cfg.model}: ${deltaStr} → score: ${newScore} | ${cfg.task} R${cfg.round} ${cfg.verdict}`;
|
|
1117
|
+
if (resolvedCategory && resolvedSubcategory) {
|
|
1118
|
+
confirmation += ` | category: ${resolvedCategory} > ${resolvedSubcategory}`;
|
|
1119
|
+
}
|
|
1120
|
+
process.stdout.write(confirmation + '\n');
|
|
1121
|
+
}
|
|
1122
|
+
|
|
1123
|
+
// Guard pattern: only export when require()d by tests, not when run as a CLI script
|
|
1124
|
+
if (typeof module !== 'undefined') {
|
|
1125
|
+
module.exports = { computeDeliveryStats, computeFlakiness, emptyData };
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
// Only run main() when invoked as a script, not when require()d by tests
|
|
1129
|
+
if (require.main === module) {
|
|
1130
|
+
main().catch(err => {
|
|
1131
|
+
process.stderr.write(`[update-scoreboard] FATAL: ${err.message}\n`);
|
|
1132
|
+
process.exit(1);
|
|
1133
|
+
});
|
|
1134
|
+
}
|