mindforge-cc 11.0.0 → 11.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/.agent/hooks/mindforge-statusline.js +2 -2
  2. package/.mindforge/config.json +13 -4
  3. package/CHANGELOG.md +101 -0
  4. package/MINDFORGE.md +3 -3
  5. package/RELEASENOTES.md +1 -1
  6. package/bin/autonomous/audit-writer.js +108 -86
  7. package/bin/autonomous/auto-runner.js +304 -19
  8. package/bin/autonomous/dependency-dag.js +59 -0
  9. package/bin/autonomous/wave-executor.js +20 -1
  10. package/bin/council-cli.js +161 -0
  11. package/bin/dashboard/approval-handler.js +3 -1
  12. package/bin/dashboard/server.js +1 -1
  13. package/bin/dashboard/sse-bridge.js +9 -12
  14. package/bin/engine/council-runtime.js +124 -0
  15. package/bin/engine/otel-exporter.js +123 -0
  16. package/bin/engine/remediation-engine.js +1 -1
  17. package/bin/engine/self-corrective-synthesizer.js +1 -1
  18. package/bin/engine/temporal-cli.js +4 -2
  19. package/bin/engine/verification-runner.js +131 -0
  20. package/bin/engine/verify-cli.js +34 -0
  21. package/bin/eval/eval-harness.js +82 -0
  22. package/bin/eval/golden-set-retrieval.json +46 -0
  23. package/bin/governance/audit-hash.js +12 -0
  24. package/bin/governance/audit-verifier.js +60 -0
  25. package/bin/governance/quantum-crypto.js +63 -9
  26. package/bin/governance/ztai-manager.js +30 -2
  27. package/bin/hindsight-injector.js +5 -6
  28. package/bin/hooks/instinct-capture-hook.js +186 -0
  29. package/bin/memory/auto-shadow.js +32 -3
  30. package/bin/memory/identity-synthesizer.js +2 -2
  31. package/bin/memory/knowledge-store.js +30 -6
  32. package/bin/memory/retrieval-fusion.js +58 -0
  33. package/bin/memory/semantic-hub.js +2 -2
  34. package/bin/memory/vector-hub.js +111 -6
  35. package/bin/mindforge-cli.js +4 -5
  36. package/bin/models/anthropic-provider.js +13 -4
  37. package/bin/models/cost-tracker.js +3 -1
  38. package/bin/models/difficulty-scorer.js +54 -0
  39. package/bin/models/gemini-provider.js +6 -2
  40. package/bin/models/model-router.js +31 -18
  41. package/bin/models/openai-provider.js +6 -3
  42. package/bin/models/pricing-registry.js +128 -0
  43. package/bin/review/ads-engine.js +1 -1
  44. package/bin/security/trust-boundaries.js +102 -0
  45. package/bin/security/trust-gate-hook.js +39 -0
  46. package/bin/skill-registry.js +3 -2
  47. package/bin/skills-builder/marketplace-cli.js +5 -3
  48. package/bin/skills-builder/skill-registrar.js +4 -6
  49. package/bin/sre/sentinel.js +7 -5
  50. package/bin/utils/append-queue.js +55 -0
  51. package/bin/utils/file-io.js +27 -37
  52. package/bin/utils/version-check.js +59 -0
  53. package/bin/verify-audit.js +12 -0
  54. package/bin/wizard/theme.js +1 -2
  55. package/package.json +1 -1
  56. package/bin/dashboard/team-tracker.js +0 -0
@@ -161,7 +161,7 @@ app.use((req, res, next) => {
161
161
  res.setHeader('X-Content-Type-Options', 'nosniff');
162
162
  res.setHeader('X-Frame-Options', 'DENY');
163
163
  res.setHeader('Cache-Control', 'no-store'); // Never cache dashboard responses
164
- res.setHeader('Content-Security-Policy', "default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; connect-src 'self'");
164
+ res.setHeader('Content-Security-Policy', 'default-src \'self\'; script-src \'self\'; style-src \'self\' \'unsafe-inline\'; connect-src \'self\'');
165
165
  res.setHeader('X-XSS-Protection', '1; mode=block');
166
166
  res.setHeader('Referrer-Policy', 'strict-origin-when-cross-origin');
167
167
  next();
@@ -21,7 +21,6 @@ const APPROVAL_DIR = path.join(process.cwd(), '.planning', 'approvals');
21
21
  const clients = new Set(); // Connected SSE response objects
22
22
 
23
23
  let _lastAuditSize = 0;
24
- let _auditInode = 0; // Track file inode for rotation detection
25
24
  let _lastAutoState = '';
26
25
  let _lastApprovals = '';
27
26
 
@@ -78,14 +77,16 @@ function pollAuditLog() {
78
77
  try {
79
78
  const stat = fs.statSync(AUDIT_PATH);
80
79
  const newSize = stat.size;
81
- const newIno = stat.ino;
82
80
 
83
- // File rotation detected: inode changed or file shrunk (truncated after archival)
84
- if ((newIno !== _auditInode && _auditInode !== 0) || (newSize < _lastAuditSize)) {
85
- process.stderr.write(`[sse-bridge] AUDIT.jsonl rotation detected (size: ${_lastAuditSize} -> ${newSize}, ino: ${_auditInode} -> ${newIno})\n`);
81
+ // Truncation / recreation recovery (NOT rotation audit rotation was retired in
82
+ // UC-04b because it broke the hash chain; AUDIT.jsonl grows unbounded). If the
83
+ // file ever SHRINKS (manually truncated, .planning wiped, or replaced), reset the
84
+ // read offset to 0 so the live tail keeps working instead of stalling forever on
85
+ // the `newSize <= _lastAuditSize` early-return below or reading at a stale offset.
86
+ if (newSize < _lastAuditSize) {
87
+ process.stderr.write(`[sse-bridge] AUDIT.jsonl shrank (size: ${_lastAuditSize} -> ${newSize}) — re-tailing from start\n`);
86
88
  _lastAuditSize = 0;
87
89
  }
88
- _auditInode = newIno;
89
90
 
90
91
  if (newSize <= _lastAuditSize) return;
91
92
 
@@ -169,9 +170,7 @@ function startPolling() {
169
170
 
170
171
  // Initialize AUDIT position on first start
171
172
  if (!_initialized && fs.existsSync(AUDIT_PATH)) {
172
- const stat = fs.statSync(AUDIT_PATH);
173
- _lastAuditSize = stat.size;
174
- _auditInode = stat.ino;
173
+ _lastAuditSize = fs.statSync(AUDIT_PATH).size;
175
174
  _initialized = true;
176
175
  }
177
176
 
@@ -207,9 +206,7 @@ function stopPolling() {
207
206
  function start() {
208
207
  // Pre-initialize AUDIT position so first client gets instant data
209
208
  if (!_initialized && fs.existsSync(AUDIT_PATH)) {
210
- const stat = fs.statSync(AUDIT_PATH);
211
- _lastAuditSize = stat.size;
212
- _auditInode = stat.ino;
209
+ _lastAuditSize = fs.statSync(AUDIT_PATH).size;
213
210
  _initialized = true;
214
211
  }
215
212
  // Polling starts lazily when addClient() is called
@@ -0,0 +1,124 @@
1
+ 'use strict';
2
+ /**
3
+ * MindForge — Council Runtime (UC-10). Thin multi-voice decision harness (ADS).
4
+ *
5
+ * Activates the Adversarial Decision Loop (ADS) mandated by CLAUDE.md: instead of
6
+ * a multi-round debate simulator, this is a THIN runtime — parallel position
7
+ * collection (one per voice) + consensus scoring + dissent capture.
8
+ *
9
+ * The model is INJECTABLE (no hard LLM dependency) so callers/tests can supply a
10
+ * mock. The Semaphore from wave-executor is reused to bound concurrent voice calls.
11
+ *
12
+ * Design decision — NO challenge round (kept thin per adversarial review + YAGNI):
13
+ * A second model call per dissenter that folds a revised confidence back into the
14
+ * consensus adds ordering/weighting complexity and extra failure modes without
15
+ * changing the activation goal. Position-collection + consensus fully satisfies the
16
+ * ADS loop and the verdict contract. Add a challenge round only if a concrete need
17
+ * arises (an explicit `opts.challengeRound` flag would be the clean extension point).
18
+ *
19
+ * Filenames use opts.decisionId (NOT Date.now(), which is unavailable in some
20
+ * MindForge execution contexts); falls back to "council-latest.json".
21
+ */
22
+ const fs = require('fs');
23
+ const path = require('path');
24
+ const { Semaphore } = require('../autonomous/wave-executor');
25
+
26
+ const DEFAULT_VOICES = ['architect', 'skeptic', 'pragmatist', 'critic'];
27
+ const VALID_RECOMMENDATIONS = ['PROCEED', 'REVISE'];
28
+
29
+ /**
30
+ * Validates a single voice's position payload. Throws a clear, voice-named error
31
+ * on a malformed payload rather than letting it degrade into NaN consensus.
32
+ * @param {string} voice — The voice that produced the position (for the error message).
33
+ * @param {object} position — The raw position returned by the model.
34
+ */
35
+ function validatePosition(voice, position) {
36
+ if (!position || typeof position !== 'object') {
37
+ throw new Error(`Council voice "${voice}" returned an invalid position: expected an object, got ${position === null ? 'null' : typeof position}`);
38
+ }
39
+ if (!VALID_RECOMMENDATIONS.includes(position.recommendation)) {
40
+ throw new Error(`Council voice "${voice}" returned an invalid position: recommendation must be one of ${VALID_RECOMMENDATIONS.join('/')}, got ${JSON.stringify(position.recommendation)}`);
41
+ }
42
+ if (typeof position.confidence !== 'number' || !Number.isFinite(position.confidence) || position.confidence < 0 || position.confidence > 1) {
43
+ throw new Error(`Council voice "${voice}" returned an invalid position: confidence must be a number in [0,1], got ${position.confidence}`);
44
+ }
45
+ }
46
+
47
+ /**
48
+ * Runs a thin adversarial council over a question.
49
+ * @param {string} question — The decision/question put to the council.
50
+ * @param {object} [opts]
51
+ * @param {string[]} [opts.voices] — Voice personas to consult (default: 4 ADS voices).
52
+ * @param {number} [opts.consensusThreshold=0.75] — Threshold for PROCEED/REVISE.
53
+ * @param {function} opts.model — REQUIRED. async ({voice, question}) =>
54
+ * { recommendation: 'PROCEED'|'REVISE', confidence: number(0..1), rationale: string }
55
+ * @param {number} [opts.maxConcurrency] — Bound on parallel voice calls (default: #voices).
56
+ * @param {boolean} [opts.writeDecision=true] — Persist a decision record to disk.
57
+ * @param {string} [opts.outputPath] — Directory for the record (default: .planning/decisions).
58
+ * @param {string} [opts.decisionId] — Stable id used in the filename (no Date.now()).
59
+ * @returns {Promise<{question,positions,consensus,verdict,dissent}>}
60
+ */
61
+ async function runCouncil(question, opts = {}) {
62
+ const voices = Array.isArray(opts.voices) && opts.voices.length > 0
63
+ ? opts.voices
64
+ : DEFAULT_VOICES;
65
+ const consensusThreshold = opts.consensusThreshold ?? 0.75;
66
+ const model = opts.model;
67
+ if (typeof model !== 'function') {
68
+ throw new Error('runCouncil requires an injectable model function (opts.model)');
69
+ }
70
+ const maxConcurrency = opts.maxConcurrency || voices.length;
71
+
72
+ // Parallel position collection — bounded by the reused Semaphore.
73
+ const sem = new Semaphore(maxConcurrency);
74
+ const positions = await Promise.all(voices.map(async (voice) => {
75
+ await sem.acquire();
76
+ try {
77
+ const position = await model({ voice, question });
78
+ // Validate the payload immediately — never silently swallow a malformed
79
+ // position into NaN consensus (which would collapse to NO_CONSENSUS and
80
+ // write NaN to the decision record).
81
+ validatePosition(voice, position);
82
+ return { voice, ...position };
83
+ } finally {
84
+ sem.release();
85
+ }
86
+ }));
87
+
88
+ // Consensus = mean approval signal across voices.
89
+ // A PROCEED contributes its confidence; a REVISE contributes its inverse
90
+ // (so a high-confidence REVISE pulls consensus down hard).
91
+ const consensus = positions.reduce((sum, p) => {
92
+ const approval = p.recommendation === 'PROCEED' ? p.confidence : (1 - p.confidence);
93
+ return sum + approval;
94
+ }, 0) / positions.length;
95
+
96
+ const verdict = consensus >= consensusThreshold ? 'PROCEED'
97
+ : consensus <= (1 - consensusThreshold) ? 'REVISE'
98
+ : 'NO_CONSENSUS';
99
+
100
+ // Dissent capture:
101
+ // - For a decisive verdict (PROCEED/REVISE): the voices opposing that direction.
102
+ // - For NO_CONSENSUS (the deadlock ADS most needs documented): the FULL split —
103
+ // every voice's {voice, recommendation, rationale} — so the decision record
104
+ // preserves both camps rather than recording an empty dissent list.
105
+ const dissent = verdict === 'NO_CONSENSUS'
106
+ ? positions.map((p) => ({ voice: p.voice, recommendation: p.recommendation, rationale: p.rationale }))
107
+ : positions.filter((p) =>
108
+ (verdict === 'PROCEED' && p.recommendation !== 'PROCEED') ||
109
+ (verdict === 'REVISE' && p.recommendation === 'PROCEED'))
110
+ .map((d) => ({ voice: d.voice, rationale: d.rationale }));
111
+
112
+ const result = { question, positions, consensus, verdict, dissent };
113
+
114
+ if (opts.writeDecision !== false) {
115
+ const dir = opts.outputPath || path.join(process.cwd(), '.planning', 'decisions');
116
+ fs.mkdirSync(dir, { recursive: true });
117
+ const name = opts.decisionId ? `council-${opts.decisionId}.json` : 'council-latest.json';
118
+ fs.writeFileSync(path.join(dir, name), JSON.stringify(result, null, 2));
119
+ }
120
+
121
+ return result;
122
+ }
123
+
124
+ module.exports = { runCouncil };
@@ -0,0 +1,123 @@
1
+ 'use strict';
2
+ /**
3
+ * MindForge — OTel GenAI Exporter (UC-18).
4
+ * Translates NexusTracer spans to OpenTelemetry GenAI semantic conventions.
5
+ * Active only when OTEL_EXPORTER_OTLP_ENDPOINT is set.
6
+ *
7
+ * NexusTracer span shape (from nexus-tracer.js startSpan/endSpan):
8
+ * {
9
+ * id: 'sp_<hex>',
10
+ * trace_id: 'tr_<hex>',
11
+ * parent_id: string|null,
12
+ * name: string,
13
+ * status: 'active'|'success'|'error',
14
+ * start_time: ISO-8601,
15
+ * end_time: ISO-8601,
16
+ * attributes: {
17
+ * service: string,
18
+ * host: string,
19
+ * pid: number,
20
+ * model_id?: string,
21
+ * skill?: string,
22
+ * input_tokens?: number,
23
+ * output_tokens?: number,
24
+ * ...
25
+ * }
26
+ * }
27
+ *
28
+ * Mapping to OTel GenAI semantic conventions:
29
+ * span.name → name
30
+ * span.attributes.model_id → gen_ai.request.model, gen_ai.response.model
31
+ * span.attributes.input_tokens → gen_ai.usage.input_tokens
32
+ * span.attributes.output_tokens → gen_ai.usage.output_tokens
33
+ * span.name → gen_ai.operation.name
34
+ * 'mindforge' → gen_ai.system (or span.attributes.provider if present)
35
+ */
36
+
37
+ const crypto = require('crypto');
38
+
39
+ /**
40
+ * Check if the OTel exporter is enabled (env var gate).
41
+ * @returns {boolean}
42
+ */
43
+ function isEnabled() {
44
+ return !!process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
45
+ }
46
+
47
+ /**
48
+ * Translate a NexusTracer span to OTel GenAI-compatible format.
49
+ * Produces a valid 16-byte hex traceId and 8-byte hex spanId.
50
+ *
51
+ * @param {object} nexusSpan - A span object from NexusTracer
52
+ * @returns {object} OTel-compatible span object
53
+ */
54
+ function toOtelSpan(nexusSpan) {
55
+ const attrs = nexusSpan.attributes || {};
56
+
57
+ return {
58
+ traceId: crypto.randomBytes(16).toString('hex'),
59
+ spanId: crypto.randomBytes(8).toString('hex'),
60
+ parentSpanId: nexusSpan.parent_id || '',
61
+ name: nexusSpan.name || 'unknown',
62
+ kind: 1, // SPAN_KIND_INTERNAL
63
+ startTimeUnixNano: nexusSpan.start_time
64
+ ? BigInt(new Date(nexusSpan.start_time).getTime()) * 1_000_000n
65
+ : 0n,
66
+ endTimeUnixNano: nexusSpan.end_time
67
+ ? BigInt(new Date(nexusSpan.end_time).getTime()) * 1_000_000n
68
+ : 0n,
69
+ status: nexusSpan.status === 'success' ? { code: 1 } : { code: 2 },
70
+ attributes: {
71
+ 'gen_ai.system': attrs.provider || 'mindforge',
72
+ 'gen_ai.request.model': attrs.model_id || '',
73
+ 'gen_ai.response.model': attrs.model_id || '',
74
+ 'gen_ai.usage.input_tokens': attrs.input_tokens || 0,
75
+ 'gen_ai.usage.output_tokens': attrs.output_tokens || 0,
76
+ 'gen_ai.operation.name': nexusSpan.name || '',
77
+ 'service.name': attrs.service || 'mindforge-nexus',
78
+ },
79
+ };
80
+ }
81
+
82
+ /**
83
+ * Serialize BigInt values to strings for JSON compatibility.
84
+ * @param {object} otelSpan
85
+ * @returns {object}
86
+ */
87
+ function toJsonSafe(otelSpan) {
88
+ return {
89
+ ...otelSpan,
90
+ startTimeUnixNano: String(otelSpan.startTimeUnixNano),
91
+ endTimeUnixNano: String(otelSpan.endTimeUnixNano),
92
+ };
93
+ }
94
+
95
+ /**
96
+ * Export a NexusTracer span to the OTel-compatible local file.
97
+ * In production, this would POST to OTEL_EXPORTER_OTLP_ENDPOINT/v1/traces.
98
+ * For now, appends to .mindforge/metrics/otel-spans.jsonl for verification.
99
+ *
100
+ * @param {object} nexusSpan - A span from NexusTracer
101
+ */
102
+ async function exportSpan(nexusSpan) {
103
+ if (!isEnabled()) return;
104
+
105
+ const otelSpan = toOtelSpan(nexusSpan);
106
+ const jsonSafe = toJsonSafe(otelSpan);
107
+
108
+ const fs = require('fs');
109
+ const path = require('path');
110
+ const outPath = path.join(process.cwd(), '.mindforge', 'metrics', 'otel-spans.jsonl');
111
+
112
+ try {
113
+ const dir = path.dirname(outPath);
114
+ if (!fs.existsSync(dir)) {
115
+ fs.mkdirSync(dir, { recursive: true });
116
+ }
117
+ fs.appendFileSync(outPath, JSON.stringify(jsonSafe) + '\n');
118
+ } catch {
119
+ // Non-fatal: observability export should never break the main flow
120
+ }
121
+ }
122
+
123
+ module.exports = { isEnabled, toOtelSpan, toJsonSafe, exportSpan };
@@ -1,5 +1,5 @@
1
1
  /**
2
- * MindForge v11.0.0 — Neural Drift Remediation (NDR)
2
+ * MindForge v11.1.0 — Neural Drift Remediation (NDR)
3
3
  * Component: Remediation Engine (Pillar X)
4
4
  *
5
5
  * Triggers corrective actions when logic drift or reasoning
@@ -1,5 +1,5 @@
1
1
  /**
2
- * MindForge v11.0.0 — Self-Corrective Synthesis (SCS)
2
+ * MindForge v11.1.0 — Self-Corrective Synthesis (SCS)
3
3
  * Component: Self-Corrective Synthesizer (Pillar XII)
4
4
  *
5
5
  * Analyzes mission drift and logic stagnation to synthesize
@@ -12,7 +12,7 @@ const SUBCOMMAND = ARGS[0];
12
12
 
13
13
  async function main() {
14
14
  switch (SUBCOMMAND) {
15
- case 'status':
15
+ case 'status': {
16
16
  const history = TemporalHub.getHistory();
17
17
  console.log('\n⏳ MindForge Temporal Status');
18
18
  console.log(` Snapshots: ${history.length}`);
@@ -20,6 +20,7 @@ async function main() {
20
20
  console.log(` Latest: ${history[0].id} (${history[0].timestamp})`);
21
21
  }
22
22
  break;
23
+ }
23
24
 
24
25
  case 'cleanup':
25
26
  console.log('🧹 Cleaning up old temporal snapshots...');
@@ -27,7 +28,7 @@ async function main() {
27
28
  console.log('✅ Cleanup complete.');
28
29
  break;
29
30
 
30
- case 'inject':
31
+ case 'inject': {
31
32
  const auditId = ARGS[1];
32
33
  const fix = ARGS.slice(2).join(' ');
33
34
  if (!auditId || !fix) {
@@ -42,6 +43,7 @@ async function main() {
42
43
  process.exit(1);
43
44
  }
44
45
  break;
46
+ }
45
47
 
46
48
  default:
47
49
  console.log('Usage: /mindforge:temporal <status|cleanup|inject>');
@@ -0,0 +1,131 @@
1
+ 'use strict';
2
+
3
+ const { execSync } = require('child_process');
4
+ const path = require('path');
5
+ const fs = require('fs');
6
+
7
+ const MAX_OUTPUT_LENGTH = 2000;
8
+
9
+ /**
10
+ * Stage definitions — each maps a stage name to its command and optional skip condition.
11
+ * The tests stage guards against recursion: if NODE_ENV=test (set by run-all.js) or
12
+ * MINDFORGE_VERIFICATION_ACTIVE=1 (set by this runner), we skip to prevent infinite nesting.
13
+ */
14
+ const STAGE_DEFS = {
15
+ tests: {
16
+ command: 'node tests/run-all.js',
17
+ skipIf: () =>
18
+ process.env.MINDFORGE_VERIFICATION_ACTIVE === '1' ||
19
+ process.env.NODE_ENV === 'test',
20
+ },
21
+ lint: {
22
+ command: 'npx eslint . --max-warnings=0',
23
+ skipIf: null,
24
+ },
25
+ audit: {
26
+ command: 'node bin/verify-audit.js',
27
+ skipIf: null,
28
+ },
29
+ typecheck: {
30
+ command: 'npx tsc --noEmit',
31
+ skipIf: (cwd) => !fs.existsSync(path.join(cwd, 'tsconfig.json')),
32
+ },
33
+ };
34
+
35
+ /**
36
+ * Run a single stage, returning a structured result object.
37
+ */
38
+ function executeStage(name, cwd) {
39
+ const def = STAGE_DEFS[name];
40
+ if (!def) {
41
+ return { name, status: 'skip', durationMs: 0, output: `Unknown stage: ${name}` };
42
+ }
43
+
44
+ // Check skip condition
45
+ if (def.skipIf && def.skipIf(cwd)) {
46
+ return { name, status: 'skip', durationMs: 0, output: '' };
47
+ }
48
+
49
+ const start = Date.now();
50
+ let output = '';
51
+ let status = 'pass';
52
+
53
+ try {
54
+ const env = Object.assign({}, process.env, {
55
+ MINDFORGE_VERIFICATION_ACTIVE: '1',
56
+ });
57
+ const result = execSync(def.command, {
58
+ cwd,
59
+ encoding: 'utf8',
60
+ stdio: ['pipe', 'pipe', 'pipe'],
61
+ timeout: 120000,
62
+ env,
63
+ });
64
+ output = (result || '').slice(0, MAX_OUTPUT_LENGTH);
65
+ } catch (err) {
66
+ status = 'fail';
67
+ const stdout = err.stdout || '';
68
+ const stderr = err.stderr || '';
69
+ output = (stdout + '\n' + stderr).trim().slice(0, MAX_OUTPUT_LENGTH);
70
+ }
71
+
72
+ const durationMs = Date.now() - start;
73
+ return { name, status, durationMs, output };
74
+ }
75
+
76
+ /**
77
+ * Run verification across multiple stages.
78
+ * @param {{ cwd: string, stages: string[] }} opts
79
+ * @returns {Promise<object>} Structured verification result
80
+ */
81
+ async function runVerification({ cwd, stages }) {
82
+ const resolvedCwd = path.resolve(cwd);
83
+ const results = [];
84
+
85
+ for (const stageName of stages) {
86
+ const result = executeStage(stageName, resolvedCwd);
87
+ results.push(result);
88
+ }
89
+
90
+ const passed = results.filter(s => s.status === 'pass').length;
91
+ const failed = results.filter(s => s.status === 'fail').length;
92
+ const skipped = results.filter(s => s.status === 'skip').length;
93
+ const totalDurationMs = results.reduce((sum, s) => sum + s.durationMs, 0);
94
+
95
+ return {
96
+ stages: results,
97
+ summary: { passed, failed, skipped, totalDurationMs },
98
+ timestamp: new Date().toISOString(),
99
+ };
100
+ }
101
+
102
+ /**
103
+ * Format a verification result as a markdown report.
104
+ * @param {object} result — output from runVerification
105
+ * @returns {string} Markdown report
106
+ */
107
+ function formatReport(result) {
108
+ const statusEmoji = { pass: '✅', fail: '❌', skip: '⏭️' };
109
+ const lines = [];
110
+
111
+ lines.push('# Verification Report');
112
+ lines.push('');
113
+ lines.push(`**Timestamp:** ${result.timestamp}`);
114
+ lines.push('');
115
+ lines.push('| Stage | Status | Duration |');
116
+ lines.push('|-------|--------|----------|');
117
+
118
+ for (const stage of result.stages) {
119
+ const emoji = statusEmoji[stage.status] || '?';
120
+ const duration = stage.durationMs > 0 ? `${stage.durationMs}ms` : '-';
121
+ lines.push(`| ${stage.name} | ${emoji} ${stage.status} | ${duration} |`);
122
+ }
123
+
124
+ lines.push('');
125
+ lines.push(`**Summary:** ${result.summary.passed} passed, ${result.summary.failed} failed, ${result.summary.skipped} skipped (${result.summary.totalDurationMs}ms total)`);
126
+ lines.push('');
127
+
128
+ return lines.join('\n');
129
+ }
130
+
131
+ module.exports = { runVerification, formatReport };
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /**
5
+ * verify-cli.js — Entrypoint for the `verify` CLI command.
6
+ * Calls the unified verification runner across all stages and writes
7
+ * the formatted report to .planning/VERIFICATION.md.
8
+ */
9
+
10
+ const path = require('path');
11
+ const fs = require('fs');
12
+ const { runVerification, formatReport } = require('./verification-runner');
13
+
14
+ const STAGES = ['tests', 'lint', 'audit', 'typecheck'];
15
+ const CWD = process.env.MINDFORGE_ROOT || path.resolve(__dirname, '../..');
16
+
17
+ async function main() {
18
+ const planningDir = path.join(CWD, '.planning');
19
+ if (!fs.existsSync(planningDir)) {
20
+ fs.mkdirSync(planningDir, { recursive: true });
21
+ }
22
+
23
+ const result = await runVerification({ cwd: CWD, stages: STAGES });
24
+ const report = formatReport(result);
25
+
26
+ fs.writeFileSync(path.join(planningDir, 'VERIFICATION.md'), report);
27
+ process.stdout.write(report + '\n');
28
+ process.exit(result.summary.failed > 0 ? 1 : 0);
29
+ }
30
+
31
+ main().catch(err => {
32
+ console.error('Verification runner failed:', err.message);
33
+ process.exit(1);
34
+ });
@@ -0,0 +1,82 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Recall@K — fraction of relevant items found in the top-k retrieved results.
5
+ * @param {string[]} retrieved - IDs in ranked order
6
+ * @param {string[]} relevant - ground-truth relevant IDs
7
+ * @param {number} k - cutoff
8
+ * @returns {number} recall in [0, 1]
9
+ */
10
+ function recallAtK(retrieved, relevant, k) {
11
+ if (relevant.length === 0) return 0;
12
+ const topK = retrieved.slice(0, k);
13
+ const relevantSet = new Set(relevant);
14
+ const found = topK.filter(id => relevantSet.has(id)).length;
15
+ return found / relevant.length;
16
+ }
17
+
18
+ /**
19
+ * nDCG (Normalized Discounted Cumulative Gain) with graded relevance.
20
+ * @param {string[]} retrieved - IDs in ranked order
21
+ * @param {Object.<string, number>} relevanceMap - {id: grade} where grade is 0-3
22
+ * @param {number} k - cutoff
23
+ * @returns {number} nDCG in [0, 1]
24
+ */
25
+ function ndcg(retrieved, relevanceMap, k) {
26
+ const topK = retrieved.slice(0, k);
27
+
28
+ // DCG = Σ (2^rel_i - 1) / log2(i + 2) for i = 0..k-1
29
+ const dcg = topK.reduce((sum, id, i) => {
30
+ const rel = relevanceMap[id] || 0;
31
+ return sum + (Math.pow(2, rel) - 1) / Math.log2(i + 2);
32
+ }, 0);
33
+
34
+ // IDCG — ideal ordering: sort all relevance grades descending, take top-k
35
+ const idealGrades = Object.values(relevanceMap)
36
+ .filter(g => g > 0)
37
+ .sort((a, b) => b - a)
38
+ .slice(0, k);
39
+
40
+ const idcg = idealGrades.reduce((sum, rel, i) => {
41
+ return sum + (Math.pow(2, rel) - 1) / Math.log2(i + 2);
42
+ }, 0);
43
+
44
+ if (idcg === 0) return 0;
45
+ return dcg / idcg;
46
+ }
47
+
48
+ /**
49
+ * Run a full evaluation over a golden set of queries.
50
+ * @param {Object} opts
51
+ * @param {Array<{query: string, relevant: string[]}>} opts.goldenSet
52
+ * @param {function(string): string[]} opts.retriever
53
+ * @param {number} opts.k
54
+ * @returns {Promise<{meanRecallAtK: number, meanNDCG: number, perQuery: Array}>}
55
+ */
56
+ async function runEval({ goldenSet, retriever, k }) {
57
+ const perQuery = [];
58
+
59
+ for (const { query, relevant } of goldenSet) {
60
+ const retrieved = await Promise.resolve(retriever(query));
61
+
62
+ // Binary relevance map: relevant items get grade 1, others 0
63
+ const relevanceMap = {};
64
+ for (const id of relevant) {
65
+ relevanceMap[id] = 1;
66
+ }
67
+
68
+ const recall = recallAtK(retrieved, relevant, k);
69
+ const ndcgScore = ndcg(retrieved, relevanceMap, k);
70
+
71
+ perQuery.push({ query, recall, ndcg: ndcgScore, retrieved });
72
+ }
73
+
74
+ if (perQuery.length === 0) return { meanRecallAtK: 0, meanNDCG: 0, perQuery: [] };
75
+
76
+ const meanRecallAtK = perQuery.reduce((s, q) => s + q.recall, 0) / perQuery.length;
77
+ const meanNDCG = perQuery.reduce((s, q) => s + q.ndcg, 0) / perQuery.length;
78
+
79
+ return { meanRecallAtK, meanNDCG, perQuery };
80
+ }
81
+
82
+ module.exports = { recallAtK, ndcg, runEval };
@@ -0,0 +1,46 @@
1
+ {
2
+ "description": "Golden set for retrieval quality evaluation. Each entry has a natural-language query and the IDs of documents that SHOULD be retrieved.",
3
+ "version": "1.0.0",
4
+ "queries": [
5
+ {
6
+ "query": "how does the audit hash chain work",
7
+ "relevant": ["audit-hash", "audit-verifier", "verify-audit"]
8
+ },
9
+ {
10
+ "query": "what model should I use for a security-sensitive task",
11
+ "relevant": ["difficulty-scorer", "model-router", "pricing-registry", "trust-boundaries"]
12
+ },
13
+ {
14
+ "query": "how does wave execution and parallel orchestration work",
15
+ "relevant": ["wave-executor", "swarm-controller", "auto-executor"]
16
+ },
17
+ {
18
+ "query": "how do I track token costs and budget enforcement",
19
+ "relevant": ["cost-tracker", "token-ledger", "budget-enforcer", "finops-hub"]
20
+ },
21
+ {
22
+ "query": "how does the knowledge store persist and retrieve entries",
23
+ "relevant": ["knowledge-store", "knowledge-graph-protocol", "shard-controller"]
24
+ },
25
+ {
26
+ "query": "what happens during council consensus and synthesis",
27
+ "relevant": ["council-protocol", "synthesis-engine", "council-templates"]
28
+ },
29
+ {
30
+ "query": "how do instincts get captured and promoted to skills",
31
+ "relevant": ["capture-engine", "promotion-engine", "instinct-schema", "skill-registry"]
32
+ },
33
+ {
34
+ "query": "what verification checks run before marking a task complete",
35
+ "relevant": ["verification-pipeline", "trust-verifier", "policy-engine"]
36
+ },
37
+ {
38
+ "query": "how does the autonomous stuck detector recover failed tasks",
39
+ "relevant": ["stuck-detector", "node-repair", "steering-manager", "progress-reporter"]
40
+ },
41
+ {
42
+ "query": "how are hooks triggered and what security gates exist pre-commit",
43
+ "relevant": ["trust-gate-hook", "instinct-capture-hook", "policy-gate-hardened", "impact-analyzer"]
44
+ }
45
+ ]
46
+ }
@@ -0,0 +1,12 @@
1
+ 'use strict';
2
+ const crypto = require('crypto');
3
+ /**
4
+ * Canonical audit hash material. MUST be the single source of truth for both
5
+ * the writer (pre-_hash entry) and the verifier (entry with _hash stripped).
6
+ * Hashes {...entry, previous_hash} — entry must NOT contain _hash.
7
+ */
8
+ function hashAuditEntry(entryWithoutHash, previousHash) {
9
+ const material = JSON.stringify({ ...entryWithoutHash, previous_hash: previousHash });
10
+ return crypto.createHash('sha256').update(material).digest('hex');
11
+ }
12
+ module.exports = { hashAuditEntry };