@nusoft/nuos-build-catalogue 0.33.3 → 0.35.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -14,7 +14,7 @@ The embedder is selected via `NUOS_CATALOGUE_EMBEDDER`:
14
14
 
15
15
  | Value | Provider | Default model | Dimensions | Notes |
16
16
  |---|---|---|---|---|
17
- | `ollama` (default) | Local Ollama | `qwen3-embedding:8b` | 4096 | **Sovereignty by default.** No network egress. Override the model with `NUOS_CATALOGUE_OLLAMA_MODEL=qwen3-embedding:4b` (2560 dims) or `qwen3-embedding:0.6b` (1024 dims) for smaller boxes. Needs `ollama serve` running and the model pulled (`ollama pull qwen3-embedding:8b`). |
17
+ | `ollama` (default) | Local Ollama | `qwen3-embedding:0.6b` | 1024 | **Sovereignty by default.** No network egress. The 0.6b default (~600 MB) runs on any modern laptop, including CPU-only. For better recall on a machine with headroom, raise fidelity with `NUOS_CATALOGUE_OLLAMA_MODEL=qwen3-embedding:4b` (2560 dims, ~2.5 GB) or `qwen3-embedding:8b` (4096 dims, ~4.7 GB). Needs `ollama serve` running and the model pulled (`ollama pull qwen3-embedding:0.6b`). |
18
18
  | `vertex` | Google Vertex | `text-embedding-005` | 768 | Cloud Google. Needs `GOOGLE_CLOUD_PROJECT` plus a Vertex access token (set `GOOGLE_VERTEX_ACCESS_TOKEN`, or have `gcloud` on PATH and run `gcloud auth application-default login`). |
19
19
  | `openai` | OpenAI | `text-embedding-3-small` | 1536 | Cloud OpenAI. Needs `OPENAI_API_KEY`. |
20
20
  | `stub` | Hash-based, no API | — | 384 | Tests + dev only. Results are noisy. |
@@ -26,9 +26,9 @@ Switching embedder (or model variant) requires a full reindex (`rm -rf .nuos-cat
26
26
  ```bash
27
27
  # Pre-flight (one time):
28
28
  ollama serve # in another shell
29
- ollama pull qwen3-embedding:8b # ~4.7 GB download
29
+ ollama pull qwen3-embedding:0.6b # ~600 MB download
30
30
 
31
- # Index the catalogue (first time takes ~20 min on 8b)
31
+ # Index the catalogue (first time re-embeds everything; later runs only re-embed changed files)
32
32
  npm run index
33
33
 
34
34
  # Search
package/dist/cli.js CHANGED
@@ -436,6 +436,15 @@ Usage:
436
436
  nuos-catalogue memory store --value="..." [--wu=wu-007] [--agent=architect] [--key="label"]
437
437
  nuos-catalogue memory search --query="..." [--limit=N] [--wu=wu-007] [--agent=architect]
438
438
 
439
+ nuos-catalogue state compile [--dry-run] [--state-md=<path>]
440
+ (WU 113b — recompile STATE.md generated regions from canonical store;
441
+ splices metadata / what-is-next / open-questions / decisions / risks /
442
+ health-check regions; preserves authored prose byte-for-byte)
443
+ nuos-catalogue state drift-check [--state-md=<path>]
444
+ (WU 113b Stage B — check whether STATE.md generated regions match
445
+ canonical state; exit 0 on clean / no-regions / can't-run;
446
+ exit 1 ONLY on confirmed generated-region drift; called by pre-commit hook)
447
+
439
448
  nuos-catalogue end-of-session
440
449
  (WU 112 — verify-and-gate: checks the nine end-of-session protocol steps
441
450
  against disk facts; prints a per-check report; exits non-zero on a blocked
@@ -657,6 +666,45 @@ async function main() {
657
666
  process.exit(result.exitCode);
658
667
  break;
659
668
  }
669
+ case 'state': {
670
+ // `state compile` — regenerate the generated regions of STATE.md (WU 113b / D132).
671
+ // `state drift-check` — check for generated-region drift (Stage B; called by pre-commit hook).
672
+ const sub = args.positional[0];
673
+ const buildRoot = resolveBuildRoot(args.flags['build-root']);
674
+ const workflowsPath = resolveWorkflowsPath(buildRoot, args.flags['workflows']);
675
+ if (sub === 'compile') {
676
+ const { cmdStateCompile } = await import('./commands/state-compile.js');
677
+ const store = await openWorkflowStore(workflowsPath);
678
+ const result = await cmdStateCompile(store, {
679
+ buildRoot,
680
+ stateMdPath: args.flags['state-md'] ? String(args.flags['state-md']) : undefined,
681
+ dryRun: Boolean(args.flags['dry-run']),
682
+ });
683
+ if (result.output)
684
+ console.log(result.output);
685
+ process.exit(result.exitCode);
686
+ }
687
+ else if (sub === 'drift-check') {
688
+ const { cmdStateDriftCheck } = await import('./commands/state-compile.js');
689
+ const store = await openWorkflowStore(workflowsPath);
690
+ const result = await cmdStateDriftCheck(store, {
691
+ buildRoot,
692
+ stateMdPath: args.flags['state-md'] ? String(args.flags['state-md']) : undefined,
693
+ });
694
+ // Drift-check output: clean/skipped messages go to stderr (informational); drifted goes to stderr too.
695
+ if (result.output)
696
+ process.stderr.write(result.output + '\n');
697
+ process.exit(result.exitCode);
698
+ }
699
+ else {
700
+ console.error(`unknown state subcommand: ${sub ?? '(none)'}`);
701
+ console.error('available:');
702
+ console.error(' state compile [--dry-run] [--state-md=<path>] [--build-root=<dir>] [--workflows=<file>]');
703
+ console.error(' state drift-check [--state-md=<path>] [--build-root=<dir>] [--workflows=<file>]');
704
+ process.exit(1);
705
+ }
706
+ break;
707
+ }
660
708
  case 'start-of-session': {
661
709
  // Reserved handle — body in a follow-up WU.
662
710
  console.error('start-of-session: not yet implemented (WU 112 reserves the handle; body in a follow-up WU).');
@@ -22,6 +22,7 @@
22
22
  */
23
23
  import { stat, readdir, readFile } from 'node:fs/promises';
24
24
  import path from 'node:path';
25
+ import { cmdStateCompile } from './state-compile.js';
25
26
  const BUILD_MAINTAINER = {
26
27
  kind: 'staff',
27
28
  id: 'build-maintainer',
@@ -46,7 +47,7 @@ export async function cmdEndOfSession(store, runtime, args) {
46
47
  }
47
48
  // Gather disk facts — this is the only place filesystem access happens
48
49
  // (the workflow itself is pure).
49
- const catalogueFacts = await gatherFacts(args.buildRoot, activeWuHandle, sessionStartIso, today);
50
+ const catalogueFacts = await gatherFacts(args.buildRoot, activeWuHandle, sessionStartIso, today, store);
50
51
  // Check for an existing (incomplete) session.end:<date> record.
51
52
  const existingHandle = `session.end:${today}`;
52
53
  const existingRecord = store.get(existingHandle);
@@ -71,6 +72,7 @@ export async function cmdEndOfSession(store, runtime, args) {
71
72
  'capture_open_questions',
72
73
  'capture_risks',
73
74
  'update_work_units_index',
75
+ 'recompile_state_md',
74
76
  'update_state_md',
75
77
  'write_session_log',
76
78
  'confirm_no_loss',
@@ -127,7 +129,7 @@ export async function cmdEndOfSession(store, runtime, args) {
127
129
  // ---------------------------------------------------------------------------
128
130
  // Disk fact gathering — the only place fs access happens
129
131
  // ---------------------------------------------------------------------------
130
- async function gatherFacts(buildRoot, activeWuHandle, sessionStartIso, sessionDate) {
132
+ async function gatherFacts(buildRoot, activeWuHandle, sessionStartIso, sessionDate, store) {
131
133
  const sessionStartMs = new Date(sessionStartIso).getTime();
132
134
  // Step 1: WU notes
133
135
  const { wuNotesTouched, wuNotesHasTodayHeading } = await checkWuNotes(buildRoot, activeWuHandle, sessionStartMs, sessionDate);
@@ -137,6 +139,10 @@ async function gatherFacts(buildRoot, activeWuHandle, sessionStartIso, sessionDa
137
139
  const risksParity = await checkRisksParity(buildRoot);
138
140
  // Step 5: work-units index
139
141
  const doneMoveOk = await checkWorkUnitsIndex(buildRoot);
142
+ // Step 5.5 (D132): recompile the generated regions of STATE.md.
143
+ // This is the orchestrate-and-write step sanctioned by D132 for generated regions.
144
+ // It must not fail the session if STATE.md has no sentinel regions yet (pre-cutover).
145
+ const { stateMdRecompileResult, stateMdRecompileDetail } = await recompileStateMd(buildRoot, store);
140
146
  // Step 6: STATE.md
141
147
  const { stateMdTouched, stateMdLastUpdated, stateMdLastSessionResolves } = await checkStateMd(buildRoot, sessionStartMs, sessionDate);
142
148
  // Step 7: session log
@@ -148,6 +154,8 @@ async function gatherFacts(buildRoot, activeWuHandle, sessionStartIso, sessionDa
148
154
  questionsParity,
149
155
  risksParity,
150
156
  doneMoveOk,
157
+ stateMdRecompileResult,
158
+ stateMdRecompileDetail,
151
159
  stateMdTouched,
152
160
  stateMdLastUpdated,
153
161
  stateMdLastSessionResolves,
@@ -294,6 +302,44 @@ async function checkRisksParity(buildRoot) {
294
302
  // This check is present for forward-compat when risks get individual files.
295
303
  return { filesWithoutRow: [], rowsWithoutFile: [] };
296
304
  }
305
+ /**
306
+ * Recompile the generated regions of STATE.md (D132 / D130: orchestrate-and-write
307
+ * for the generated regions is sanctioned by D132; authored prose is never touched).
308
+ *
309
+ * Fail-open contract (same as `cmdStateDriftCheck`):
310
+ * - 'skipped' when STATE.md has no sentinel regions yet (pre-cutover) — ok
311
+ * - 'ok' when the recompile succeeded (or was already current)
312
+ * - 'error' when the compile command returned non-zero (adapter error, splice error)
313
+ *
314
+ * A 'skipped' result is treated as passing by the pack workflow so that
315
+ * end-of-session is not broken for catalogues that haven't completed Stage B cutover.
316
+ */
317
+ async function recompileStateMd(buildRoot, store) {
318
+ try {
319
+ const result = await cmdStateCompile(store, { buildRoot });
320
+ if (result.exitCode === 0) {
321
+ return { stateMdRecompileResult: 'ok', stateMdRecompileDetail: result.output?.trim() };
322
+ }
323
+ // Non-zero exit from cmdStateCompile — check if it's the missing-sentinel case (pre-cutover).
324
+ // The missing-sentinel output contains the specific wording from the command.
325
+ if (result.output?.includes('sentinel regions are absent')) {
326
+ return {
327
+ stateMdRecompileResult: 'skipped',
328
+ stateMdRecompileDetail: 'sentinel regions absent — pre-cutover',
329
+ };
330
+ }
331
+ return {
332
+ stateMdRecompileResult: 'error',
333
+ stateMdRecompileDetail: result.output?.trim(),
334
+ };
335
+ }
336
+ catch (err) {
337
+ return {
338
+ stateMdRecompileResult: 'error',
339
+ stateMdRecompileDetail: err instanceof Error ? err.message : String(err),
340
+ };
341
+ }
342
+ }
297
343
  async function checkWorkUnitsIndex(buildRoot) {
298
344
  const indexPath = path.join(buildRoot, 'work-units', '_index.md');
299
345
  const content = await fileContent(indexPath);
@@ -349,7 +395,9 @@ async function checkStateMd(buildRoot, sessionStartMs, sessionDate) {
349
395
  const stateMdTouched = mtime ? mtime.getTime() > sessionStartMs : false;
350
396
  const content = await fileContent(stateMdPath);
351
397
  let stateMdLastUpdated = '';
352
- let stateMdLastSessionResolves = false;
398
+ // Renamed from stateMdLastSessionResolves stateMdLastSessionPresent (WU 113b).
399
+ // The field checks presence of a non-empty "Last session" row, not link resolution.
400
+ let stateMdLastSessionPresent = false;
353
401
  if (content) {
354
402
  // Fix 1 (WU 112 fix-pass): accept all three "Last updated" shapes:
355
403
  // table-row: | Last updated | 2026-05-31 (**Session 115 — ...**) ... |
@@ -370,10 +418,14 @@ async function checkStateMd(buildRoot, sessionStartMs, sessionDate) {
370
418
  if (sessionLineMatch) {
371
419
  // The row is non-empty if it contains more than just the label itself.
372
420
  const rowText = sessionLineMatch[0].replace(/Last session/i, '').replace(/[|:\s]/g, '');
373
- stateMdLastSessionResolves = rowText.length > 0;
421
+ stateMdLastSessionPresent = rowText.length > 0;
374
422
  }
375
423
  }
376
- return { stateMdTouched, stateMdLastUpdated, stateMdLastSessionResolves };
424
+ // Return under the pack's EndOfSessionFacts field name (stateMdLastSessionResolves)
425
+ // — the internal variable was renamed to stateMdLastSessionPresent above to clarify
426
+ // the semantics (presence check, not link-resolution). The published interface is
427
+ // unchanged so the pack type is not broken.
428
+ return { stateMdTouched, stateMdLastUpdated, stateMdLastSessionResolves: stateMdLastSessionPresent };
377
429
  }
378
430
  async function checkSessionLog(buildRoot, sessionDate) {
379
431
  const sessionsDir = path.join(buildRoot, 'sessions');
@@ -406,15 +458,16 @@ function formatReport(payload, today, resumedFrom, dryRun) {
406
458
  lines.push('══════════════════════════════════════════════════════════════════════');
407
459
  lines.push('');
408
460
  const STEP_LABELS = {
409
- update_active_wu_notes: 'Step 1 — WU notes updated',
410
- capture_decisions: 'Step 2 — decisions captured',
411
- capture_open_questions: 'Step 3 — open questions captured',
412
- capture_risks: 'Step 4 — risks captured',
413
- update_work_units_index: 'Step 5 — work-units index updated',
414
- update_state_md: 'Step 6 — STATE.md updated',
415
- write_session_log: 'Step 7 session log written',
416
- confirm_no_loss: 'Step 8 confirm-no-loss gate',
417
- report: 'Step 9 report',
461
+ update_active_wu_notes: 'Step 1 — WU notes updated',
462
+ capture_decisions: 'Step 2 — decisions captured',
463
+ capture_open_questions: 'Step 3 — open questions captured',
464
+ capture_risks: 'Step 4 — risks captured',
465
+ update_work_units_index: 'Step 5 — work-units index updated',
466
+ recompile_state_md: 'Step 5b — STATE.md generated regions recompiled (D132)',
467
+ update_state_md: 'Step 6 STATE.md updated',
468
+ write_session_log: 'Step 7 session log written',
469
+ confirm_no_loss: 'Step 8 confirm-no-loss gate',
470
+ report: 'Step 9 — report',
418
471
  };
419
472
  for (const [stepId, state] of Object.entries(payload.steps)) {
420
473
  const label = STEP_LABELS[stepId] ?? stepId;
@@ -0,0 +1,108 @@
1
+ /**
2
+ * `nuos-catalogue state compile` — STATE.md hybrid-document recompile (WU 113b / D132).
3
+ *
4
+ * Reads canonical state from the **live markdown registers** (not the workflow
5
+ * store, which is stale under Mode 1) and splices the generated sections into
6
+ * the sentinel-delimited regions of STATE.md, leaving all authored prose
7
+ * byte-for-byte identical.
8
+ *
9
+ * **Source-of-truth for each generated region (D129 / Mode 1):**
10
+ * - Active WU: `.nuos-catalogue/active-wu` marker file (WU 136 pointer)
11
+ * + title/status resolved from `work-units/_index.md`
12
+ * - WUs in progress: 🟡 row count in `work-units/_index.md`
13
+ * - WUs completed: file count in `work-units/done/`
14
+ * - Blocked WUs: 🔴 rows in `work-units/_index.md`
15
+ * - Decisions: `decisions/_index.md` active section
16
+ * - Open questions: `open-questions/_index.md` active section
17
+ * - Risks: `risks/_index.md` active section
18
+ *
19
+ * The workflow store (`workflows.json`) is accepted as a parameter for API
20
+ * compatibility (the CLI always opens it), but is NOT consulted for any of
21
+ * the above — it is frozen at migration time and would produce stale counts.
22
+ *
23
+ * **No LLM in this path.** The adapter builds an `LLMCompilationOutput`
24
+ * directly from disk state. `renderArticleMarkdown` is called per section,
25
+ * then `spliceGeneratedRegions` writes only inside the sentinel pairs.
26
+ *
27
+ * **First-cutover boundary.** If a sentinel region is absent from the target
28
+ * STATE.md, this command reports the missing regions clearly and exits
29
+ * non-zero without guessing where to insert them. The one-time insertion of
30
+ * sentinels into the live file is a manual operator step (Stage B walkthrough).
31
+ *
32
+ * D132 / D129 boundary:
33
+ * - Generated regions: live markdown registers are source of truth; disk is
34
+ * rendered projection for these regions only.
35
+ * - Authored regions: disk remains the edit base (untouched by this command).
36
+ */
37
+ import type { LLMCompilationOutput, SentinelConfig } from '@nusoft/nuwiki';
38
+ import { checkArticleDrift } from '@nusoft/nuwiki';
39
+ import type { WorkflowStore } from '../migrate/store.js';
40
+ export declare const STATE_SENTINEL_CONFIG: SentinelConfig;
41
+ export declare const STATE_REGION_KEYS: {
42
+ readonly METADATA: "metadata";
43
+ readonly WHAT_IS_NEXT: "what_is_next";
44
+ readonly OPEN_QUESTIONS: "open_questions";
45
+ readonly RECENT_DECISIONS: "recent_decisions";
46
+ readonly RISKS: "risks";
47
+ readonly HEALTH_CHECK: "health_check";
48
+ };
49
+ export type StateRegionKey = (typeof STATE_REGION_KEYS)[keyof typeof STATE_REGION_KEYS];
50
+ export interface StateSourceAdapterInput {
51
+ store: WorkflowStore;
52
+ buildRoot: string;
53
+ now?: string;
54
+ }
55
+ export interface StateCompiledOutput {
56
+ /** The structured body — one section per generated region. */
57
+ compilationOutput: LLMCompilationOutput;
58
+ /** The generated region contents keyed by region key (ready for splice). */
59
+ regions: Record<StateRegionKey, string>;
60
+ }
61
+ /**
62
+ * Reads canonical state from the live markdown registers and the active-WU
63
+ * marker file, and produces the generated content for each STATE.md region.
64
+ *
65
+ * No LLM call is made. The adapter derives all content deterministically.
66
+ * The workflow store parameter is accepted for API compatibility but is not
67
+ * consulted — see module-level comment for the source-of-truth map.
68
+ */
69
+ export declare function buildStateCompilationOutput(input: StateSourceAdapterInput): Promise<StateCompiledOutput>;
70
+ export interface StateCompileResult {
71
+ output: string;
72
+ exitCode: number;
73
+ updatedRegions?: string[];
74
+ unchangedRegions?: string[];
75
+ }
76
+ export declare function cmdStateCompile(store: WorkflowStore, args: {
77
+ buildRoot: string;
78
+ stateMdPath?: string;
79
+ dryRun?: boolean;
80
+ now?: string;
81
+ }): Promise<StateCompileResult>;
82
+ /**
83
+ * Expose `checkArticleDrift` with STATE.md's sentinel config pre-applied.
84
+ * Used by the pre-commit hook (Stage B) and tests.
85
+ */
86
+ export declare function checkStateMdDrift(fileContent: string, expectedRegions: Record<string, string>): ReturnType<typeof checkArticleDrift>;
87
+ export interface StateDriftCheckResult {
88
+ output: string;
89
+ exitCode: number;
90
+ /** 'clean' | 'drifted' | 'skipped' — used by tests */
91
+ verdict: 'clean' | 'drifted' | 'skipped';
92
+ driftedRegions?: string[];
93
+ }
94
+ /**
95
+ * Check whether the generated regions of STATE.md match what the canonical
96
+ * state currently produces. Designed to be called by the pre-commit hook.
97
+ *
98
+ * Exit-code contract (fail-open):
99
+ * - exit 0 when generated regions are clean
100
+ * - exit 0 when STATE.md has no sentinel regions yet (pre-cutover)
101
+ * - exit 0 when the check cannot run (STATE.md unreadable, store missing)
102
+ * - exit 1 ONLY on confirmed generated-region drift
103
+ */
104
+ export declare function cmdStateDriftCheck(store: WorkflowStore, args: {
105
+ buildRoot: string;
106
+ stateMdPath?: string;
107
+ now?: string;
108
+ }): Promise<StateDriftCheckResult>;
@@ -0,0 +1,793 @@
1
+ /**
2
+ * `nuos-catalogue state compile` — STATE.md hybrid-document recompile (WU 113b / D132).
3
+ *
4
+ * Reads canonical state from the **live markdown registers** (not the workflow
5
+ * store, which is stale under Mode 1) and splices the generated sections into
6
+ * the sentinel-delimited regions of STATE.md, leaving all authored prose
7
+ * byte-for-byte identical.
8
+ *
9
+ * **Source-of-truth for each generated region (D129 / Mode 1):**
10
+ * - Active WU: `.nuos-catalogue/active-wu` marker file (WU 136 pointer)
11
+ * + title/status resolved from `work-units/_index.md`
12
+ * - WUs in progress: 🟡 row count in `work-units/_index.md`
13
+ * - WUs completed: file count in `work-units/done/`
14
+ * - Blocked WUs: 🔴 rows in `work-units/_index.md`
15
+ * - Decisions: `decisions/_index.md` active section
16
+ * - Open questions: `open-questions/_index.md` active section
17
+ * - Risks: `risks/_index.md` active section
18
+ *
19
+ * The workflow store (`workflows.json`) is accepted as a parameter for API
20
+ * compatibility (the CLI always opens it), but is NOT consulted for any of
21
+ * the above — it is frozen at migration time and would produce stale counts.
22
+ *
23
+ * **No LLM in this path.** The adapter builds an `LLMCompilationOutput`
24
+ * directly from disk state. `renderArticleMarkdown` is called per section,
25
+ * then `spliceGeneratedRegions` writes only inside the sentinel pairs.
26
+ *
27
+ * **First-cutover boundary.** If a sentinel region is absent from the target
28
+ * STATE.md, this command reports the missing regions clearly and exits
29
+ * non-zero without guessing where to insert them. The one-time insertion of
30
+ * sentinels into the live file is a manual operator step (Stage B walkthrough).
31
+ *
32
+ * D132 / D129 boundary:
33
+ * - Generated regions: live markdown registers are source of truth; disk is
34
+ * rendered projection for these regions only.
35
+ * - Authored regions: disk remains the edit base (untouched by this command).
36
+ */
37
+ import { readFile, writeFile, readdir } from 'node:fs/promises';
38
+ import path from 'node:path';
39
+ import { renderArticleMarkdown, spliceGeneratedRegions, checkArticleDrift, } from '@nusoft/nuwiki';
40
+ import { resolveIndexDir } from '../path-resolution.js';
41
+ // ---------------------------------------------------------------------------
42
+ // Sentinel configuration — the marker scheme for STATE.md generated regions.
43
+ // HTML-comment markers, compatible with STATE.md's existing nuos:sentinel scheme.
44
+ // The `{{key}}` placeholder is replaced by the region key; `{{marker}}` is
45
+ // replaced by the expanded marker.
46
+ // ---------------------------------------------------------------------------
47
+ export const STATE_SENTINEL_CONFIG = {
48
+ markerPattern: 'nuos:generated:{{key}}',
49
+ openTemplate: '<!-- {{marker}}:start -->',
50
+ closeTemplate: '<!-- {{marker}}:end -->',
51
+ };
52
+ // ---------------------------------------------------------------------------
53
+ // Region keys — one per generated section (per WU 113b section map).
54
+ // ---------------------------------------------------------------------------
55
+ export const STATE_REGION_KEYS = {
56
+ METADATA: 'metadata',
57
+ WHAT_IS_NEXT: 'what_is_next',
58
+ OPEN_QUESTIONS: 'open_questions',
59
+ RECENT_DECISIONS: 'recent_decisions',
60
+ RISKS: 'risks',
61
+ HEALTH_CHECK: 'health_check',
62
+ };
63
+ /**
64
+ * Reads canonical state from the live markdown registers and the active-WU
65
+ * marker file, and produces the generated content for each STATE.md region.
66
+ *
67
+ * No LLM call is made. The adapter derives all content deterministically.
68
+ * The workflow store parameter is accepted for API compatibility but is not
69
+ * consulted — see module-level comment for the source-of-truth map.
70
+ */
71
+ export async function buildStateCompilationOutput(input) {
72
+ const { buildRoot } = input;
73
+ const now = input.now ?? new Date().toISOString();
74
+ const today = now.slice(0, 10);
75
+ // 1. Active WU — from the .nuos-catalogue/active-wu marker file (WU 136).
76
+ // Title + status resolved from work-units/_index.md (live source).
77
+ const activeWu = await readActiveWuFromMarker(buildRoot);
78
+ // 2. Blocked WUs — from 🔴 rows in work-units/_index.md.
79
+ const blockedWorkflows = await readBlockedWorkflowsFromIndex(buildRoot);
80
+ // 3. Register indexes (all parsed from live disk files).
81
+ const unresolvedQuestions = await readUnresolvedQuestions(buildRoot);
82
+ const recentDecisions = await readRecentDecisions(buildRoot);
83
+ const activeRisks = await readActiveRisks(buildRoot);
84
+ const healthStats = await readHealthStatsFromDisk(buildRoot);
85
+ // 4. Build each section's text content.
86
+ const metadataText = renderMetadataSection(activeWu, today, healthStats);
87
+ const whatIsNextText = renderWhatIsNextSection(activeWu, blockedWorkflows);
88
+ const openQuestionsText = renderOpenQuestionsSection(unresolvedQuestions);
89
+ const recentDecisionsText = renderRecentDecisionsSection(recentDecisions);
90
+ const risksText = renderRisksSection(activeRisks);
91
+ const healthCheckText = renderHealthCheckSection(healthStats);
92
+ // 5. Assemble LLMCompilationOutput (one section per region, positionally ordered)
93
+ const sections = [
94
+ { key: STATE_REGION_KEYS.METADATA, heading: 'Metadata', text: metadataText, citationIds: [], position: 1 },
95
+ { key: STATE_REGION_KEYS.WHAT_IS_NEXT, heading: 'What is next', text: whatIsNextText, citationIds: [], position: 2 },
96
+ { key: STATE_REGION_KEYS.OPEN_QUESTIONS, heading: 'Open questions blocking active work', text: openQuestionsText, citationIds: [], position: 3 },
97
+ { key: STATE_REGION_KEYS.RECENT_DECISIONS, heading: 'Recent decisions', text: recentDecisionsText, citationIds: [], position: 4 },
98
+ { key: STATE_REGION_KEYS.RISKS, heading: 'Risks currently being watched', text: risksText, citationIds: [], position: 5 },
99
+ { key: STATE_REGION_KEYS.HEALTH_CHECK, heading: 'Health check', text: healthCheckText, citationIds: [], position: 6 },
100
+ ];
101
+ const compilationOutput = {
102
+ summary: `STATE.md compiled ${today} from live markdown registers. Active: ${activeWu?.handle ?? 'none'}.`,
103
+ sections,
104
+ citations: [],
105
+ outboundLinks: [],
106
+ };
107
+ // 5. Render each section to markdown (the splice expects the body text, no heading)
108
+ const regions = {};
109
+ for (const section of sections) {
110
+ const md = renderArticleMarkdown(compilationOutput, { sections: [section.key] });
111
+ // renderArticleMarkdown produces "## Heading\n\ntext\n" — we keep the full
112
+ // rendering including the heading so the sentinel region is self-contained.
113
+ regions[section.key] = md;
114
+ }
115
+ return { compilationOutput, regions };
116
+ }
117
+ export async function cmdStateCompile(store, args) {
118
+ const stateMdPath = args.stateMdPath ?? path.join(args.buildRoot, 'STATE.md');
119
+ // Read the current on-disk STATE.md — this is the edit base for authored prose.
120
+ let existingFile;
121
+ try {
122
+ existingFile = await readFile(stateMdPath, 'utf8');
123
+ }
124
+ catch (err) {
125
+ return {
126
+ output: `state compile: cannot read STATE.md at ${stateMdPath}\n ${err instanceof Error ? err.message : String(err)}`,
127
+ exitCode: 1,
128
+ };
129
+ }
130
+ // Build the compiled output from canonical state.
131
+ let compiled;
132
+ try {
133
+ compiled = await buildStateCompilationOutput({
134
+ store,
135
+ buildRoot: args.buildRoot,
136
+ now: args.now,
137
+ });
138
+ }
139
+ catch (err) {
140
+ return {
141
+ output: `state compile: adapter error — ${err instanceof Error ? err.message : String(err)}`,
142
+ exitCode: 1,
143
+ };
144
+ }
145
+ // First-cutover guard: check that every region's sentinel pair is present.
146
+ // If any are missing, report them clearly and exit without modifying anything.
147
+ const missingRegions = [];
148
+ for (const key of Object.keys(compiled.regions)) {
149
+ const open = STATE_SENTINEL_CONFIG.openTemplate.replace('{{marker}}', STATE_SENTINEL_CONFIG.markerPattern.replace('{{key}}', key));
150
+ if (!existingFile.includes(open)) {
151
+ missingRegions.push(key);
152
+ }
153
+ }
154
+ if (missingRegions.length > 0) {
155
+ const lines = [
156
+ 'state compile: the following sentinel regions are absent from STATE.md:',
157
+ '',
158
+ ];
159
+ for (const key of missingRegions) {
160
+ const marker = STATE_SENTINEL_CONFIG.markerPattern.replace('{{key}}', key);
161
+ lines.push(` missing: <!-- ${marker}:start --> / <!-- ${marker}:end -->`);
162
+ }
163
+ lines.push('');
164
+ lines.push('This is expected on first cutover. The sentinel pairs must be inserted');
165
+ lines.push('manually into STATE.md by the operator (Stage B walkthrough) before');
166
+ lines.push('`state compile` can manage those regions.');
167
+ lines.push('');
168
+ lines.push('For each missing region, add a sentinel pair at the appropriate location:');
169
+ lines.push(' <!-- nuos:generated:<key>:start -->');
170
+ lines.push(' (generated content will appear here)');
171
+ lines.push(' <!-- nuos:generated:<key>:end -->');
172
+ return {
173
+ output: lines.join('\n'),
174
+ exitCode: 1,
175
+ };
176
+ }
177
+ // Splice the generated regions into the existing file.
178
+ let spliceResult;
179
+ try {
180
+ spliceResult = spliceGeneratedRegions({
181
+ existingFile,
182
+ regions: compiled.regions,
183
+ sentinelConfig: STATE_SENTINEL_CONFIG,
184
+ });
185
+ }
186
+ catch (err) {
187
+ return {
188
+ output: `state compile: splice error — ${err instanceof Error ? err.message : String(err)}`,
189
+ exitCode: 1,
190
+ };
191
+ }
192
+ if (args.dryRun) {
193
+ const lines = [
194
+ '',
195
+ '── state compile (dry run) ──────────────────────────────────────────',
196
+ ` target: ${stateMdPath}`,
197
+ ` updated regions: ${spliceResult.updatedRegions.length > 0 ? spliceResult.updatedRegions.join(', ') : '(none — already current)'}`,
198
+ ` unchanged regions: ${spliceResult.unchangedRegions.join(', ')}`,
199
+ ' (dry run — STATE.md was not written)',
200
+ '─────────────────────────────────────────────────────────────────────',
201
+ '',
202
+ ];
203
+ return {
204
+ output: lines.join('\n'),
205
+ exitCode: 0,
206
+ updatedRegions: spliceResult.updatedRegions,
207
+ unchangedRegions: spliceResult.unchangedRegions,
208
+ };
209
+ }
210
+ // Write the spliced content back to disk.
211
+ try {
212
+ await writeFile(stateMdPath, spliceResult.merged, 'utf8');
213
+ }
214
+ catch (err) {
215
+ return {
216
+ output: `state compile: cannot write STATE.md at ${stateMdPath}\n ${err instanceof Error ? err.message : String(err)}`,
217
+ exitCode: 1,
218
+ };
219
+ }
220
+ const lines = [
221
+ '',
222
+ '── state compile ────────────────────────────────────────────────────',
223
+ ` target: ${stateMdPath}`,
224
+ ` updated regions: ${spliceResult.updatedRegions.length > 0 ? spliceResult.updatedRegions.join(', ') : '(none — already current)'}`,
225
+ ` unchanged regions: ${spliceResult.unchangedRegions.join(', ')}`,
226
+ '─────────────────────────────────────────────────────────────────────',
227
+ '',
228
+ ];
229
+ return {
230
+ output: lines.join('\n'),
231
+ exitCode: 0,
232
+ updatedRegions: spliceResult.updatedRegions,
233
+ unchangedRegions: spliceResult.unchangedRegions,
234
+ };
235
+ }
236
+ /**
237
+ * Expose `checkArticleDrift` with STATE.md's sentinel config pre-applied.
238
+ * Used by the pre-commit hook (Stage B) and tests.
239
+ */
240
+ export function checkStateMdDrift(fileContent, expectedRegions) {
241
+ return checkArticleDrift({
242
+ file: fileContent,
243
+ sentinelConfig: STATE_SENTINEL_CONFIG,
244
+ expectedRegions,
245
+ });
246
+ }
247
+ /**
248
+ * Check whether the generated regions of STATE.md match what the canonical
249
+ * state currently produces. Designed to be called by the pre-commit hook.
250
+ *
251
+ * Exit-code contract (fail-open):
252
+ * - exit 0 when generated regions are clean
253
+ * - exit 0 when STATE.md has no sentinel regions yet (pre-cutover)
254
+ * - exit 0 when the check cannot run (STATE.md unreadable, store missing)
255
+ * - exit 1 ONLY on confirmed generated-region drift
256
+ */
257
+ export async function cmdStateDriftCheck(store, args) {
258
+ const stateMdPath = args.stateMdPath ?? path.join(args.buildRoot, 'STATE.md');
259
+ // Read the current on-disk STATE.md — if unreadable, fail open.
260
+ let existingFile;
261
+ try {
262
+ existingFile = await readFile(stateMdPath, 'utf8');
263
+ }
264
+ catch {
265
+ return {
266
+ output: `state drift-check: STATE.md unreadable at ${stateMdPath} — skipping (fail open)`,
267
+ exitCode: 0,
268
+ verdict: 'skipped',
269
+ };
270
+ }
271
+ // Pre-cutover guard: if none of the sentinel open-markers are present,
272
+ // the file has no sentinel regions yet — skip gracefully (fail open).
273
+ const hasAnySentinel = Object.values(STATE_REGION_KEYS).some((key) => {
274
+ const open = STATE_SENTINEL_CONFIG.openTemplate.replace('{{marker}}', STATE_SENTINEL_CONFIG.markerPattern.replace('{{key}}', key));
275
+ return existingFile.includes(open);
276
+ });
277
+ if (!hasAnySentinel) {
278
+ return {
279
+ output: 'state drift-check: no sentinel regions found in STATE.md — skipping (pre-cutover)',
280
+ exitCode: 0,
281
+ verdict: 'skipped',
282
+ };
283
+ }
284
+ // Build expected regions from canonical state.
285
+ let compiled;
286
+ try {
287
+ compiled = await buildStateCompilationOutput({
288
+ store,
289
+ buildRoot: args.buildRoot,
290
+ now: args.now,
291
+ });
292
+ }
293
+ catch {
294
+ return {
295
+ output: `state drift-check: adapter error — skipping (fail open)`,
296
+ exitCode: 0,
297
+ verdict: 'skipped',
298
+ };
299
+ }
300
+ // Run the drift check.
301
+ let driftReport;
302
+ try {
303
+ driftReport = checkStateMdDrift(existingFile, compiled.regions);
304
+ }
305
+ catch {
306
+ return {
307
+ output: `state drift-check: drift-check error — skipping (fail open)`,
308
+ exitCode: 0,
309
+ verdict: 'skipped',
310
+ };
311
+ }
312
+ if (driftReport.clean) {
313
+ return {
314
+ output: 'state drift-check: generated regions are current — clean',
315
+ exitCode: 0,
316
+ verdict: 'clean',
317
+ };
318
+ }
319
+ // Confirmed generated-region drift — exit non-zero.
320
+ const driftedRegions = driftReport.regions
321
+ .filter((r) => r.status !== 'clean')
322
+ .map((r) => r.key);
323
+ const lines = [
324
+ '✖ state drift-check: generated regions in STATE.md have drifted from canonical state.',
325
+ '',
326
+ ` Drifted region(s): ${driftedRegions.join(', ')}`,
327
+ '',
328
+ ' These regions are compiled deterministically from the workflow store and',
329
+ ' register indexes. Hand-editing them will be overwritten on next recompile.',
330
+ '',
331
+ ' To fix: recompile the generated regions and re-stage STATE.md:',
332
+ ' nuos-catalogue state compile',
333
+ ' git add docs/build/STATE.md',
334
+ '',
335
+ ' Then re-commit.',
336
+ ];
337
+ return {
338
+ output: lines.join('\n'),
339
+ exitCode: 1,
340
+ verdict: 'drifted',
341
+ driftedRegions,
342
+ };
343
+ }
344
+ /**
345
+ * Read the active WU from the `.nuos-catalogue/active-wu` marker file (WU 136).
346
+ * The handle stored there (e.g. `wu-113b`) is used to locate the matching row
347
+ * in `work-units/_index.md` to resolve the title and status.
348
+ *
349
+ * Degrades gracefully when:
350
+ * - the marker file is absent or empty → returns null (no active WU declared)
351
+ * - the index row is not found → returns the handle with unknown title/status
352
+ * - the index file is unreadable → returns the handle with unknown title/status
353
+ */
354
+ async function readActiveWuFromMarker(buildRoot) {
355
+ const catalogueDir = resolveIndexDir(buildRoot);
356
+ const markerPath = path.join(catalogueDir, 'active-wu');
357
+ let handle;
358
+ try {
359
+ const raw = await readFile(markerPath, 'utf8');
360
+ handle = raw.trim();
361
+ }
362
+ catch {
363
+ return null; // marker absent — no active WU declared
364
+ }
365
+ if (!handle)
366
+ return null;
367
+ // The handle is e.g. "wu-113b". Strip the "wu-" prefix to get the ID as it
368
+ // appears in the _index.md ID column (e.g. "113b").
369
+ const idInIndex = handle.replace(/^wu-/i, '');
370
+ const slug = idInIndex;
371
+ const indexContent = await readIndexFile(path.join(buildRoot, 'work-units', '_index.md'));
372
+ if (!indexContent) {
373
+ return { handle, title: '(title unknown — index unreadable)', status: 'in_progress', slug };
374
+ }
375
+ // Parse the matching row. Row shape: `| 113b | [Title](file.md) | 🟡 in_progress — ... | ... |`
376
+ for (const line of indexContent.split('\n')) {
377
+ if (!/^\s*\|/.test(line))
378
+ continue;
379
+ const cells = line.split('|').map((c) => c.trim());
380
+ // cells[1] = ID cell, cells[2] = title cell, cells[3] = status cell
381
+ if (cells.length < 4)
382
+ continue;
383
+ const idCell = cells[1];
384
+ if (idCell !== idInIndex)
385
+ continue;
386
+ const titleCell = cells[2] ?? '';
387
+ // Strip markdown link syntax if present: [Title](file.md) → Title
388
+ const titleMatch = titleCell.match(/^\[([^\]]+)\]/) ?? titleCell.match(/^(.+)$/);
389
+ const title = titleMatch ? titleMatch[1].trim() : titleCell.trim();
390
+ const statusCell = cells[3] ?? '';
391
+ // Extract the status keyword (first word after the emoji, up to ' — ' or end)
392
+ const statusMatch = statusCell.match(/(?:🟡|🔴|🟢|🔵|🟣|✅|⚫)\s+(\S+)/);
393
+ const status = statusMatch ? statusMatch[1] : statusCell.split('—')[0].trim() || 'in_progress';
394
+ return { handle, title, status, slug };
395
+ }
396
+ // Handle declared but no matching row found in index
397
+ return { handle, title: '(title not found in work-units/_index.md)', status: 'in_progress', slug };
398
+ }
399
+ /**
400
+ * Read blocked WUs from 🔴 rows in `work-units/_index.md`.
401
+ * The workflow store is stale and must not be consulted for this.
402
+ */
403
+ async function readBlockedWorkflowsFromIndex(buildRoot) {
404
+ const indexContent = await readIndexFile(path.join(buildRoot, 'work-units', '_index.md'));
405
+ if (!indexContent)
406
+ return [];
407
+ const blocked = [];
408
+ for (const line of indexContent.split('\n')) {
409
+ if (!/^\s*\|/.test(line))
410
+ continue;
411
+ if (!line.includes('🔴'))
412
+ continue;
413
+ const cells = line.split('|').map((c) => c.trim());
414
+ if (cells.length < 3)
415
+ continue;
416
+ const idCell = cells[1];
417
+ if (!idCell || /^[-\s]*$/.test(idCell) || idCell === 'ID')
418
+ continue;
419
+ const titleCell = cells[2] ?? '';
420
+ const titleMatch = titleCell.match(/^\[([^\]]+)\]/) ?? titleCell.match(/^(.+)$/);
421
+ const title = titleMatch ? titleMatch[1].trim() : titleCell.trim();
422
+ const handle = `wu-${idCell}`;
423
+ blocked.push({ handle, title });
424
+ }
425
+ return blocked;
426
+ }
427
+ async function readRecentDecisions(buildRoot) {
428
+ const indexContent = await readIndexFile(path.join(buildRoot, 'decisions', '_index.md'));
429
+ if (!indexContent)
430
+ return [];
431
+ return parseDecisionsIndex(indexContent);
432
+ }
433
+ async function readUnresolvedQuestions(buildRoot) {
434
+ const indexContent = await readIndexFile(path.join(buildRoot, 'open-questions', '_index.md'));
435
+ if (!indexContent)
436
+ return [];
437
+ return parseQuestionsIndex(indexContent);
438
+ }
439
+ async function readActiveRisks(buildRoot) {
440
+ const indexContent = await readIndexFile(path.join(buildRoot, 'risks', '_index.md'));
441
+ if (!indexContent)
442
+ return [];
443
+ return parseRisksIndex(indexContent);
444
+ }
445
+ /**
446
+ * Derive health stats entirely from live disk sources:
447
+ * - in_progress / blocked counts: 🟡 / 🔴 rows in work-units/_index.md
448
+ * - completed count: files in work-units/done/
449
+ * - decisions count: active rows in decisions/_index.md
450
+ * - open questions: active rows in open-questions/_index.md
451
+ * - active risks: active rows in risks/_index.md
452
+ *
453
+ * The workflow store is NOT consulted (it is stale under Mode 1 — D129).
454
+ */
455
+ async function readHealthStatsFromDisk(buildRoot) {
456
+ const wuIndex = await readIndexFile(path.join(buildRoot, 'work-units', '_index.md'));
457
+ let inProgressWus = 0;
458
+ let blockedWus = 0;
459
+ let maxInProgressWuNum = 0;
460
+ if (wuIndex) {
461
+ for (const line of wuIndex.split('\n')) {
462
+ if (!/^\s*\|/.test(line))
463
+ continue;
464
+ const cells = line.split('|').map((c) => c.trim());
465
+ if (cells.length < 4)
466
+ continue;
467
+ const idCell = cells[1];
468
+ if (!idCell || /^[-\s]*$/.test(idCell) || idCell === 'ID')
469
+ continue;
470
+ const statusCell = cells[3] ?? '';
471
+ if (statusCell.includes('🟡')) {
472
+ inProgressWus++;
473
+ // Extract the numeric part of the ID for phase derivation
474
+ const numMatch = idCell.match(/^(\d+)/);
475
+ if (numMatch) {
476
+ const n = parseInt(numMatch[1], 10);
477
+ if (n > maxInProgressWuNum)
478
+ maxInProgressWuNum = n;
479
+ }
480
+ }
481
+ if (statusCell.includes('🔴'))
482
+ blockedWus++;
483
+ }
484
+ }
485
+ // Completed count: files in work-units/done/
486
+ let doneWus = 0;
487
+ try {
488
+ const doneEntries = await readdir(path.join(buildRoot, 'work-units', 'done'));
489
+ doneWus = doneEntries.filter((f) => f.endsWith('.md') && !f.startsWith('_')).length;
490
+ }
491
+ catch {
492
+ // done/ may not exist yet
493
+ }
494
+ // Decisions: active rows in decisions/_index.md
495
+ const decisionsIndex = await readIndexFile(path.join(buildRoot, 'decisions', '_index.md'));
496
+ let totalDecisions = 0;
497
+ if (decisionsIndex) {
498
+ const activeSection = decisionsIndex.split(/^## (?:Superseded|Withdrawn) decisions/im)[0];
499
+ for (const line of activeSection.split('\n')) {
500
+ if (!/^\s*\|/.test(line))
501
+ continue;
502
+ const cells = line.split('|').map((c) => c.trim());
503
+ if (cells.length < 3)
504
+ continue;
505
+ const idCell = cells[1];
506
+ if (!idCell || /^[-\s]*$/.test(idCell) || idCell === 'ID' || idCell === '---')
507
+ continue;
508
+ if (/^D\d+/i.test(idCell.replace(/^\[/, '')))
509
+ totalDecisions++;
510
+ }
511
+ }
512
+ // Open questions: active section
513
+ const questionsIndex = await readIndexFile(path.join(buildRoot, 'open-questions', '_index.md'));
514
+ let openQuestions = 0;
515
+ if (questionsIndex) {
516
+ const activeSection = questionsIndex.split(/^## Resolved questions/im)[0];
517
+ for (const line of activeSection.split('\n')) {
518
+ if (!/^\s*\|/.test(line))
519
+ continue;
520
+ const cells = line.split('|').map((c) => c.trim());
521
+ if (cells.length < 3)
522
+ continue;
523
+ const idCell = cells[1];
524
+ if (!idCell || /^[-\s]*$/.test(idCell) || idCell === 'ID' || idCell === '---')
525
+ continue;
526
+ if (/^Q\d+/i.test(idCell.replace(/^\[/, '')))
527
+ openQuestions++;
528
+ }
529
+ }
530
+ // Active risks: active section
531
+ const risksIndex = await readIndexFile(path.join(buildRoot, 'risks', '_index.md'));
532
+ let activeRisks = 0;
533
+ if (risksIndex) {
534
+ const activeSection = risksIndex.split(/^## Resolved risks/im)[0];
535
+ for (const line of activeSection.split('\n')) {
536
+ if (!/^\s*\|/.test(line))
537
+ continue;
538
+ const cells = line.split('|').map((c) => c.trim());
539
+ if (cells.length < 3)
540
+ continue;
541
+ const idCell = cells[1];
542
+ if (!idCell || /^[-\s]*$/.test(idCell) || idCell === 'ID' || idCell === '---')
543
+ continue;
544
+ if (/^R\d+/i.test(idCell))
545
+ activeRisks++;
546
+ }
547
+ }
548
+ return { inProgressWus, doneWus, blockedWus, totalDecisions, openQuestions, activeRisks, maxInProgressWuNum };
549
+ }
550
+ // ---------------------------------------------------------------------------
551
+ // Text renderers for each section
552
+ // ---------------------------------------------------------------------------
553
+ function renderMetadataSection(activeWu, today, stats) {
554
+ const phase = deriveCurrentPhase(stats.maxInProgressWuNum);
555
+ const lines = [
556
+ '| Field | Value |',
557
+ '| --- | --- |',
558
+ `| Last compiled | ${today} |`,
559
+ `| Current phase | ${phase} |`,
560
+ `| Active WU | ${activeWu ? `**${activeWu.handle}** — ${activeWu.title} (${activeWu.status ?? 'unknown'})` : '(no active WU declared — run `nuos-catalogue wu start <handle>`)'} |`,
561
+ `| WUs in progress | ${stats.inProgressWus} |`,
562
+ ];
563
+ return lines.join('\n');
564
+ }
565
+ /**
566
+ * Derive the current phase label from the highest in-progress WU number
567
+ * (read from the live `work-units/_index.md`, not the store).
568
+ */
569
+ function deriveCurrentPhase(maxInProgressWuNum) {
570
+ if (maxInProgressWuNum === 0)
571
+ return 'No active phase detected';
572
+ if (maxInProgressWuNum >= 100)
573
+ return 'Continuous Track 1 — NuOS leads the build';
574
+ if (maxInProgressWuNum >= 80)
575
+ return 'Phase 5 — Consumer shell + productisation';
576
+ if (maxInProgressWuNum >= 60)
577
+ return 'Phase 4 — Trifecta integration test';
578
+ if (maxInProgressWuNum >= 40)
579
+ return 'Phase 3 — NuWiki + trifecta';
580
+ if (maxInProgressWuNum >= 20)
581
+ return 'Phase 2 — NuFlow';
582
+ return 'Phase 1 — NuVector';
583
+ }
584
+ function renderWhatIsNextSection(activeWu, blockedWorkflows) {
585
+ if (!activeWu) {
586
+ return [
587
+ 'No active WU marker found. Declare the active WU with:',
588
+ ' nuos-catalogue wu start <handle>',
589
+ '',
590
+ 'Then recompile STATE.md with `nuos-catalogue state compile`.',
591
+ ].join('\n');
592
+ }
593
+ const lines = [
594
+ `**Active WU: ${activeWu.handle}** — ${activeWu.title}`,
595
+ `Status: \`${activeWu.status ?? 'in_progress'}\``,
596
+ ];
597
+ if (blockedWorkflows.length > 0) {
598
+ lines.push('');
599
+ lines.push('**Blocked work units requiring attention:**');
600
+ for (const b of blockedWorkflows) {
601
+ lines.push(`- ${b.handle} — ${b.title}`);
602
+ }
603
+ }
604
+ lines.push('');
605
+ lines.push('Continue the active WU. Recompile STATE.md at end-of-session via `nuos-catalogue state compile`.');
606
+ return lines.join('\n');
607
+ }
608
+ function renderOpenQuestionsSection(questions) {
609
+ if (questions.length === 0) {
610
+ return 'No unresolved open questions. See `docs/build/open-questions/_index.md` for the full register.';
611
+ }
612
+ const lines = [];
613
+ for (const q of questions.slice(0, 10)) {
614
+ const blocks = q.blocks ? ` — blocks: ${q.blocks}` : '';
615
+ lines.push(`- **${q.id}** — ${q.title}${blocks}`);
616
+ }
617
+ if (questions.length > 10) {
618
+ lines.push(`- *(${questions.length - 10} more — see open-questions/_index.md)*`);
619
+ }
620
+ return lines.join('\n');
621
+ }
622
+ function renderRecentDecisionsSection(decisions) {
623
+ if (decisions.length === 0) {
624
+ return 'No decisions found. See `docs/build/decisions/_index.md` for the full register.';
625
+ }
626
+ const recent = decisions.slice(0, 8);
627
+ const lines = [];
628
+ for (const d of recent) {
629
+ lines.push(`- **${d.handle}** — ${d.title}${d.status ? ` *(${d.status})*` : ''}`);
630
+ }
631
+ if (decisions.length > 8) {
632
+ lines.push(`- *(${decisions.length - 8} more — see decisions/_index.md)*`);
633
+ }
634
+ return lines.join('\n');
635
+ }
636
+ function renderRisksSection(risks) {
637
+ if (risks.length === 0) {
638
+ return 'No active risks found. See `docs/build/risks/_index.md` for the full register.';
639
+ }
640
+ const lines = [];
641
+ for (const r of risks.slice(0, 5)) {
642
+ lines.push(`- **${r.id}** (${r.severity}) — ${r.title} *(${r.status})*`);
643
+ }
644
+ if (risks.length > 5) {
645
+ lines.push(`- *(${risks.length - 5} more — see risks/_index.md)*`);
646
+ }
647
+ return lines.join('\n');
648
+ }
649
+ function renderHealthCheckSection(stats) {
650
+ const lines = [
651
+ '| Check | Count |',
652
+ '| --- | --- |',
653
+ `| WUs in progress | ${stats.inProgressWus} |`,
654
+ `| WUs completed | ${stats.doneWus} (files in work-units/done/) |`,
655
+ `| Decisions recorded | ${stats.totalDecisions} (active section) |`,
656
+ `| Open questions | ${stats.openQuestions} |`,
657
+ `| Active risks | ${stats.activeRisks} |`,
658
+ ];
659
+ if (stats.blockedWus > 0) {
660
+ lines.push(`| Blocked WUs | ${stats.blockedWus} — attention needed |`);
661
+ }
662
+ return lines.join('\n');
663
+ }
664
+ // ---------------------------------------------------------------------------
665
+ // Index file parsers
666
+ // ---------------------------------------------------------------------------
667
+ async function readIndexFile(filePath) {
668
+ try {
669
+ const { readFile: rf } = await import('node:fs/promises');
670
+ return await rf(filePath, 'utf8');
671
+ }
672
+ catch {
673
+ return null;
674
+ }
675
+ }
676
+ /**
677
+ * Parse the decisions _index.md table — active decisions only.
678
+ * Row shape: `| [D001](file.md) | Title | Date | Status |`
679
+ * or: `| D001 | Title | Date | Status |`
680
+ *
681
+ * The real decisions/_index.md has three terminal sections after the active
682
+ * table: `## Superseded decisions`, `## Withdrawn decisions`, and
683
+ * `## How to write a decision`. We split on the first non-active section
684
+ * (whichever of Superseded / Withdrawn appears first) so a high-numbered
685
+ * decision that is later superseded never leaks into the generated region.
686
+ */
687
+ function parseDecisionsIndex(content) {
688
+ const decisions = [];
689
+ // Scope to the active-decisions section only.
690
+ // Split on the first of the two non-active `##` headers that follow it.
691
+ const activeSection = content.split(/^## (?:Superseded|Withdrawn) decisions/im)[0];
692
+ const lines = activeSection.split('\n');
693
+ for (const line of lines) {
694
+ if (!/^\s*\|/.test(line))
695
+ continue;
696
+ const cells = line.split('|').map((c) => c.trim());
697
+ // Expect: [empty, id-cell, title, date, status, empty]
698
+ if (cells.length < 5)
699
+ continue;
700
+ const idCell = cells[1];
701
+ if (!idCell || !/^D\d+/i.test(idCell.replace(/^\[/, '')))
702
+ continue;
703
+ // Extract the handle — strip link markup if present
704
+ const handleMatch = idCell.match(/\[?(D\d+)\]?/i);
705
+ if (!handleMatch)
706
+ continue;
707
+ const handle = handleMatch[1];
708
+ const title = cells[2] ?? '';
709
+ if (!title || title === 'Title' || title === '---')
710
+ continue;
711
+ const status = cells[4] ?? null;
712
+ if (status === 'Status' || status === '---')
713
+ continue;
714
+ decisions.push({
715
+ handle,
716
+ title,
717
+ status: status || null,
718
+ fileModifiedAt: cells[3] ?? '',
719
+ });
720
+ }
721
+ // Sort by handle number descending to get most recent first
722
+ return decisions.sort((a, b) => {
723
+ const na = parseInt(a.handle.slice(1), 10);
724
+ const nb = parseInt(b.handle.slice(1), 10);
725
+ return nb - na;
726
+ });
727
+ }
728
+ /**
729
+ * Parse the open-questions _index.md active table.
730
+ * Row shape: `| [Q003](file.md) | Title | Blocks | Raised |`
731
+ * or: `| Q003 | Title | Blocks | Raised |`
732
+ */
733
+ function parseQuestionsIndex(content) {
734
+ const questions = [];
735
+ // Find the "Active questions" section — stop at "Resolved questions"
736
+ const activeSection = content.split(/^## Resolved questions/im)[0];
737
+ const lines = activeSection.split('\n');
738
+ for (const line of lines) {
739
+ if (!/^\s*\|/.test(line))
740
+ continue;
741
+ const cells = line.split('|').map((c) => c.trim());
742
+ if (cells.length < 4)
743
+ continue;
744
+ const idCell = cells[1];
745
+ if (!idCell || !/^Q\d+/i.test(idCell.replace(/^\[/, '')))
746
+ continue;
747
+ const idMatch = idCell.match(/\[?(Q\d+)\]?/i);
748
+ if (!idMatch)
749
+ continue;
750
+ const id = idMatch[1];
751
+ const title = cells[2] ?? '';
752
+ if (!title || title === 'Title' || title === '---')
753
+ continue;
754
+ const blocks = cells[3] ?? '';
755
+ if (blocks === 'Blocks' || blocks === '---')
756
+ continue;
757
+ questions.push({ id, title, blocks });
758
+ }
759
+ return questions;
760
+ }
761
+ /**
762
+ * Parse the risks _index.md active table.
763
+ * Row shape: `| R001 | Title | Severity | Likelihood | Status |`
764
+ */
765
+ function parseRisksIndex(content) {
766
+ const risks = [];
767
+ // Find the "Active risks" section — stop at "Resolved risks"
768
+ const activeSection = content.split(/^## Resolved risks/im)[0];
769
+ const lines = activeSection.split('\n');
770
+ for (const line of lines) {
771
+ if (!/^\s*\|/.test(line))
772
+ continue;
773
+ const cells = line.split('|').map((c) => c.trim());
774
+ if (cells.length < 6)
775
+ continue;
776
+ const idCell = cells[1];
777
+ if (!idCell || !/^R\d+/i.test(idCell))
778
+ continue;
779
+ if (idCell === 'ID' || idCell === '---')
780
+ continue;
781
+ const id = idCell;
782
+ const title = cells[2] ?? '';
783
+ if (!title || title === 'Title' || title === '---')
784
+ continue;
785
+ const severity = cells[3] ?? '';
786
+ const likelihood = cells[4] ?? '';
787
+ const status = cells[5] ?? '';
788
+ if (status === 'Status' || status === '---')
789
+ continue;
790
+ risks.push({ id, title, severity, likelihood, status });
791
+ }
792
+ return risks;
793
+ }
@@ -32,6 +32,13 @@
32
32
  * idle-timeout (the keep_alive: "1m" we sent) cleans up within a
33
33
  * minute.
34
34
  *
35
+ * **Bounded footprint while loaded.** Beyond unloading promptly, each call
36
+ * also pins `options.num_ctx` (see EMBED_NUM_CTX) so the model loads with an
37
+ * embedding-sized context window instead of inheriting the daemon's
38
+ * chat-sized OLLAMA_CONTEXT_LENGTH. Without this the 639MB model loads at
39
+ * ~5.7GB resident; with it, ~1.1GB. This is what keeps a reindex from pushing
40
+ * a developer's machine into swap.
41
+ *
35
42
  * Sizing note — the new 0.6b default is ~600MB on disk and runs
36
43
  * comfortably on any modern laptop, including CPU-only. The 4b variant
37
44
  * (~2.5GB) and 8b variant (~4.7GB, benefits from ~16GB RAM + Metal)
@@ -32,6 +32,13 @@
32
32
  * idle-timeout (the keep_alive: "1m" we sent) cleans up within a
33
33
  * minute.
34
34
  *
35
+ * **Bounded footprint while loaded.** Beyond unloading promptly, each call
36
+ * also pins `options.num_ctx` (see EMBED_NUM_CTX) so the model loads with an
37
+ * embedding-sized context window instead of inheriting the daemon's
38
+ * chat-sized OLLAMA_CONTEXT_LENGTH. Without this the 639MB model loads at
39
+ * ~5.7GB resident; with it, ~1.1GB. This is what keeps a reindex from pushing
40
+ * a developer's machine into swap.
41
+ *
35
42
  * Sizing note — the new 0.6b default is ~600MB on disk and runs
36
43
  * comfortably on any modern laptop, including CPU-only. The 4b variant
37
44
  * (~2.5GB) and 8b variant (~4.7GB, benefits from ~16GB RAM + Metal)
@@ -47,6 +54,16 @@ const KNOWN_DIMENSIONS = {
47
54
  'qwen3-embedding:4b': 2560,
48
55
  'qwen3-embedding:0.6b': 1024,
49
56
  };
57
+ // Context window for embedding loads. The Ollama daemon's global
58
+ // OLLAMA_CONTEXT_LENGTH — set high for chat models (commonly 32K–64K) — is
59
+ // inherited by every model that doesn't override it. Inherited unchanged, it
60
+ // inflates the 639MB qwen3-embedding:0.6b model to ~5.7GB resident, which is
61
+ // enough to push a 16–18GB developer machine into swap during a reindex.
62
+ // Embedding inputs are capped at ~600 tokens (MAX_CHUNK_CHARS in
63
+ // indexer/chunk.ts), so a 2048-token window leaves ~3x headroom and never
64
+ // truncates a chunk. Measured 2026-06-01 (qwen3-embedding:0.6b, Apple Silicon):
65
+ // inherited 32K ctx → 5.7GB resident; num_ctx 2048 → 1.1GB resident.
66
+ const EMBED_NUM_CTX = 2048;
50
67
  export class OllamaEmbedder {
51
68
  dimensions;
52
69
  modelId;
@@ -68,7 +85,13 @@ export class OllamaEmbedder {
68
85
  const probe = await fetch(`${host}/api/embed`, {
69
86
  method: 'POST',
70
87
  headers: { 'content-type': 'application/json' },
71
- body: JSON.stringify({ model: modelId, input: 'probe' }),
88
+ body: JSON.stringify({
89
+ model: modelId,
90
+ input: 'probe',
91
+ // Pin the context window here too — the probe is what first loads the
92
+ // model, so without it the probe alone would pull in the full ~5.7GB.
93
+ options: { num_ctx: EMBED_NUM_CTX },
94
+ }),
72
95
  });
73
96
  if (!probe.ok) {
74
97
  const body = await probe.text().catch(() => '<unreadable>');
@@ -121,6 +144,9 @@ export class OllamaEmbedder {
121
144
  // Keep the model warm only for the duration of one operation.
122
145
  // dispose() at the end of the run sends keep_alive: 0 to unload.
123
146
  keep_alive: '1m',
147
+ // Cap the context window so the model loads at ~1.1GB rather than
148
+ // inheriting the daemon's chat-sized window and ballooning to ~5.7GB.
149
+ options: { num_ctx: EMBED_NUM_CTX },
124
150
  }),
125
151
  });
126
152
  if (!res.ok) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nusoft/nuos-build-catalogue",
3
- "version": "0.33.3",
3
+ "version": "0.35.1",
4
4
  "description": "NuOS build-catalogue tooling: semantic search (WU 110) + migration runner that lifts markdown artefacts into JSON-backed workflow records (WU 111, Phase G).",
5
5
  "type": "module",
6
6
  "bin": {
@@ -19,15 +19,16 @@
19
19
  "build": "rm -rf dist && tsc && chmod +x dist/cli.js",
20
20
  "prepublishOnly": "npm run build",
21
21
  "verify-storage": "tsx scripts/verify-persistence.ts",
22
- "test": "tsx --test tests/chunk.test.ts tests/metadata.test.ts tests/crawl.test.ts tests/migrate.test.ts tests/commands-read.test.ts tests/regenerate.test.ts tests/commands-write.test.ts tests/ac-parse.test.ts tests/create.test.ts tests/init.test.ts tests/wu-111-soak-findings.test.ts tests/plan.test.ts tests/mode.test.ts tests/render.test.ts tests/swarm.test.ts tests/setup-progress-bar.test.ts tests/setup-ollama-pull.test.ts tests/setup-run-llm-setup.test.ts tests/wu-active.test.ts tests/install-claude-hooks.test.ts tests/protocols-in-sync.test.ts tests/end-of-session.test.ts tests/hooks-in-sync.test.ts tests/memory-store-separation.test.ts",
22
+ "test": "tsx --test tests/chunk.test.ts tests/metadata.test.ts tests/crawl.test.ts tests/migrate.test.ts tests/commands-read.test.ts tests/regenerate.test.ts tests/commands-write.test.ts tests/ac-parse.test.ts tests/create.test.ts tests/init.test.ts tests/wu-111-soak-findings.test.ts tests/plan.test.ts tests/mode.test.ts tests/render.test.ts tests/swarm.test.ts tests/setup-progress-bar.test.ts tests/setup-ollama-pull.test.ts tests/setup-run-llm-setup.test.ts tests/wu-active.test.ts tests/install-claude-hooks.test.ts tests/protocols-in-sync.test.ts tests/end-of-session.test.ts tests/hooks-in-sync.test.ts tests/memory-store-separation.test.ts tests/state-compile.test.ts tests/state-drift-check.test.ts tests/hook-isolation.test.ts",
23
23
  "typecheck": "tsc --noEmit",
24
24
  "index": "tsx src/cli.ts index",
25
25
  "search": "tsx src/cli.ts search"
26
26
  },
27
27
  "dependencies": {
28
- "@nusoft/nuvector": "^0.1.5",
29
28
  "@nusoft/nuflow": "^0.4.1",
30
- "@nusoft/nuflow-pack-nuos-build-catalogue": "^0.1.0"
29
+ "@nusoft/nuflow-pack-nuos-build-catalogue": "^0.3.0",
30
+ "@nusoft/nuvector": "^0.1.5",
31
+ "@nusoft/nuwiki": "^0.3.0"
31
32
  },
32
33
  "devDependencies": {
33
34
  "@nusoft/nuflow": "file:../nuflow",
@@ -177,6 +177,49 @@ if [[ -n "$locked_decisions" ]]; then
177
177
  EXIT_CODE=1
178
178
  fi
179
179
 
180
+ # ---------- Rule 3: STATE.md generated-region drift block (WU 113b Stage B) ---
181
+
182
+ # Only run when docs/build/STATE.md is in the staged changes.
183
+ # Guard on nuos-catalogue being present and supporting `state drift-check`.
184
+ # Fail-open: if the binary is absent, old (doesn't know drift-check), or
185
+ # errors for any infra reason, skip this check silently — a missing binary
186
+ # must never block all commits.
187
+ #
188
+ # Old-binary detection: an old binary (< 0.35.0) exits non-zero with
189
+ # "unknown state subcommand: drift-check" on stderr. We distinguish this
190
+ # from a genuine drift finding by checking whether the output contains the
191
+ # drift-specific marker phrase. If the output does NOT contain "generated regions"
192
+ # (the phrase only the new drift-check command emits), we skip.
193
+ staged_state_md=$(git diff --cached --name-only | grep -F 'docs/build/STATE.md' || true)
194
+
195
+ if [[ -n "$staged_state_md" ]]; then
196
+ dim "[nuos:pre-commit] STATE.md generated-region drift check (WU 113b)"
197
+
198
+ if ! command -v nuos-catalogue > /dev/null 2>&1; then
199
+ dim "[nuos:pre-commit] nuos-catalogue not found — skipping STATE.md drift check"
200
+ else
201
+ # Run drift-check; capture output + exit code.
202
+ drift_output=$(nuos-catalogue state drift-check 2>&1) || drift_exit=$?
203
+ drift_exit=${drift_exit:-0}
204
+
205
+ if [[ $drift_exit -ne 0 ]]; then
206
+ # Non-zero exit — check whether this is a genuine drift finding or an
207
+ # infra/version problem (old binary, missing store, etc.).
208
+ if echo "$drift_output" | grep -qF 'generated regions'; then
209
+ # Confirmed generated-region drift — block the commit.
210
+ red "✖ STATE.md generated-region drift — BLOCKED (WU 113b enforcement):"
211
+ echo "$drift_output" | while IFS= read -r line; do echo " $line"; done
212
+ log_event "state-drift-block" "generated-region drift detected"
213
+ EXIT_CODE=1
214
+ else
215
+ # Not a drift finding (unknown subcommand from old binary, infra error, etc.)
216
+ # — skip silently (fail open).
217
+ dim "[nuos:pre-commit] STATE.md drift check returned non-zero (not a drift finding) — skipping"
218
+ fi
219
+ fi
220
+ fi
221
+ fi
222
+
180
223
  # ---------- Result ------------------------------------------------------
181
224
 
182
225
  if [[ $EXIT_CODE -eq 0 ]]; then
@@ -177,6 +177,49 @@ if [[ -n "$locked_decisions" ]]; then
177
177
  EXIT_CODE=1
178
178
  fi
179
179
 
180
+ # ---------- Rule 3: STATE.md generated-region drift block (WU 113b Stage B) ---
181
+
182
+ # Only run when docs/build/STATE.md is in the staged changes.
183
+ # Guard on nuos-catalogue being present and supporting `state drift-check`.
184
+ # Fail-open: if the binary is absent, old (doesn't know drift-check), or
185
+ # errors for any infra reason, skip this check silently — a missing binary
186
+ # must never block all commits.
187
+ #
188
+ # Old-binary detection: an old binary (< 0.35.0) exits non-zero with
189
+ # "unknown state subcommand: drift-check" on stderr. We distinguish this
190
+ # from a genuine drift finding by checking whether the output contains the
191
+ # drift-specific marker phrase. If the output does NOT contain "generated regions"
192
+ # (the phrase only the new drift-check command emits), we skip.
193
+ staged_state_md=$(git diff --cached --name-only | grep -F 'docs/build/STATE.md' || true)
194
+
195
+ if [[ -n "$staged_state_md" ]]; then
196
+ dim "[nuos:pre-commit] STATE.md generated-region drift check (WU 113b)"
197
+
198
+ if ! command -v nuos-catalogue > /dev/null 2>&1; then
199
+ dim "[nuos:pre-commit] nuos-catalogue not found — skipping STATE.md drift check"
200
+ else
201
+ # Run drift-check; capture output + exit code.
202
+ drift_output=$(nuos-catalogue state drift-check 2>&1) || drift_exit=$?
203
+ drift_exit=${drift_exit:-0}
204
+
205
+ if [[ $drift_exit -ne 0 ]]; then
206
+ # Non-zero exit — check whether this is a genuine drift finding or an
207
+ # infra/version problem (old binary, missing store, etc.).
208
+ if echo "$drift_output" | grep -qF 'generated regions'; then
209
+ # Confirmed generated-region drift — block the commit.
210
+ red "✖ STATE.md generated-region drift — BLOCKED (WU 113b enforcement):"
211
+ echo "$drift_output" | while IFS= read -r line; do echo " $line"; done
212
+ log_event "state-drift-block" "generated-region drift detected"
213
+ EXIT_CODE=1
214
+ else
215
+ # Not a drift finding (unknown subcommand from old binary, infra error, etc.)
216
+ # — skip silently (fail open).
217
+ dim "[nuos:pre-commit] STATE.md drift check returned non-zero (not a drift finding) — skipping"
218
+ fi
219
+ fi
220
+ fi
221
+ fi
222
+
180
223
  # ---------- Result ------------------------------------------------------
181
224
 
182
225
  if [[ $EXIT_CODE -eq 0 ]]; then