@nusoft/nuos-build-catalogue 0.33.1 → 0.33.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,8 +4,9 @@
4
4
  *
5
5
  * Cross-agent memory: every agent in a swarm can write findings here and
6
6
  * any future agent (in this run or a later one) can retrieve them by
7
- * semantic query. Uses the same NuVector store as the catalogue index,
8
- * distinguished by kind: 'agent_memory'.
7
+ * semantic query. Uses its own NuVector store file (`memory.nv`), separate
8
+ * from the doc-search index (`index.nv`), so that the ~40s background
9
+ * reindex never locks out memory writes. See D131.
9
10
  *
10
11
  * CLI:
11
12
  * memory store --value="..." [--wu=wu-007] [--agent=architect] [--key="label"]
@@ -18,6 +19,9 @@ export interface MemoryStoreOptions {
18
19
  key?: string;
19
20
  cwd?: string;
20
21
  buildRoot?: string | boolean;
22
+ /** Override for the memory store path (defaults to `<index-dir>/memory.nv`). */
23
+ memory?: string | boolean;
24
+ /** @deprecated Kept for callers that pass `index` — resolved as `memory` for memory commands. */
21
25
  index?: string | boolean;
22
26
  }
23
27
  export interface MemorySearchOptions {
@@ -27,6 +31,9 @@ export interface MemorySearchOptions {
27
31
  agent?: string;
28
32
  cwd?: string;
29
33
  buildRoot?: string | boolean;
34
+ /** Override for the memory store path (defaults to `<index-dir>/memory.nv`). */
35
+ memory?: string | boolean;
36
+ /** @deprecated Kept for callers that pass `index` — resolved as `memory` for memory commands. */
30
37
  index?: string | boolean;
31
38
  }
32
39
  export interface MemoryHit {
@@ -4,21 +4,167 @@
4
4
  *
5
5
  * Cross-agent memory: every agent in a swarm can write findings here and
6
6
  * any future agent (in this run or a later one) can retrieve them by
7
- * semantic query. Uses the same NuVector store as the catalogue index,
8
- * distinguished by kind: 'agent_memory'.
7
+ * semantic query. Uses its own NuVector store file (`memory.nv`), separate
8
+ * from the doc-search index (`index.nv`), so that the ~40s background
9
+ * reindex never locks out memory writes. See D131.
9
10
  *
10
11
  * CLI:
11
12
  * memory store --value="..." [--wu=wu-007] [--agent=architect] [--key="label"]
12
13
  * memory search --query="..." [--limit=N] [--wu=wu-007] [--agent=architect]
13
14
  */
14
15
  import { randomUUID } from 'node:crypto';
15
- import { resolveBuildRoot, resolveIndexPath } from '../path-resolution.js';
16
+ import { existsSync, unlinkSync, writeFileSync } from 'node:fs';
17
+ import { resolveBuildRoot, resolveIndexPath, resolveMemoryPath } from '../path-resolution.js';
18
+ // resolveIndexPath is used only as the migration *source* (legacy index.nv),
19
+ // not as the live memory path (which is resolved via resolveMemoryPath).
16
20
  // NuVector's MemoryRecordKind union doesn't include a swarm-specific kind yet.
17
21
  // 'workflow_provenance' is the closest semantic match — agent memories are
18
22
  // provenance of the swarm workflow. NuFlow isn't wired (harness.runtime.nuflow
19
23
  // is null) so there's no collision today; records are further distinguished by
20
24
  // the presence of an `agent_role` metadata field (absent on NuFlow provenance).
21
25
  const MEMORY_KIND = 'workflow_provenance';
26
+ /**
27
+ * One-time idempotent migration: copy existing agent-memory records
28
+ * (kind `workflow_provenance` with an `agent_role` metadata field) from
29
+ * the legacy `index.nv` into the new `memory.nv`. Triggered lazily the
30
+ * first time a memory command opens the store (i.e. when `memory.nv` does
31
+ * not yet exist). Once `memory.nv` exists this function is a no-op.
32
+ *
33
+ * Decision on delete-vs-leave: we leave migrated records in `index.nv`.
34
+ * They are dead weight there — `memory search` reads only `memory.nv`,
35
+ * and the doc reindex upserts only doc-kind records — so leaving them
36
+ * causes no observable problem. Deletion via the store's `DeletionQuery`
37
+ * API would need the id list; the extra complexity buys nothing for a
38
+ * handful of records.
39
+ *
40
+ * Embeddings are copied verbatim via `fetch(ids)` — no re-embedding.
41
+ * If `index.nv` does not exist yet (fresh project), migration is skipped.
42
+ *
43
+ * Atomicity: uses a sentinel file (`memory.nv.migrating`) written before the
44
+ * migration opens `memory.nv` and deleted after a successful close. If the
45
+ * process dies mid-migration, the next run sees both files and retries.
46
+ *
47
+ * INVARIANT — never `unlinkSync(memoryPath)` then `openStore(memoryPath)` in
48
+ * the same process. NuVector's NAPI in-process inode registry tracks handles
49
+ * by inode; a same-process unlink+reopen materialises the store in-memory only
50
+ * (the file never appears on disk), silently losing all data on process exit.
51
+ * The only permitted `unlinkSync(memoryPath)` is the corrupt-open-failure guard
52
+ * at the bottom, which always re-throws immediately — the store is never
53
+ * reopened in the same process after that unlink.
54
+ *
55
+ * In the interrupted-migration path (memory.nv + sentinel both present) we
56
+ * therefore open the existing partial `memory.nv` directly. `upsertBatch` is
57
+ * idempotent by id, so re-writing the same records into a partial store just
58
+ * completes it, with no phantom-materialisation risk.
59
+ */
60
+ async function migrateMemoryRecordsIfNeeded(indexPath, memoryPath, dimensions) {
61
+ const sentinelPath = `${memoryPath}.migrating`;
62
+ // Complete gate: memory.nv exists with no sentinel → done (either a clean
63
+ // migration or a store created by a normal memory write). Early return.
64
+ if (existsSync(memoryPath) && !existsSync(sentinelPath))
65
+ return;
66
+ // Fresh project: no legacy index to migrate from. Clear any stray sentinel
67
+ // (shouldn't exist, but be tidy) and return; the caller's openStore will
68
+ // create memory.nv fresh on its own write.
69
+ if (!existsSync(indexPath)) {
70
+ if (existsSync(sentinelPath)) {
71
+ try {
72
+ unlinkSync(sentinelPath);
73
+ }
74
+ catch { /* ignore — best-effort */ }
75
+ }
76
+ return;
77
+ }
78
+ const { openStore, TENANT } = await import('../store/open.js');
79
+ // Write the sentinel before opening memory.nv. If the process dies after
80
+ // this point, the next run sees both files (or just the sentinel) and
81
+ // falls through to the (re)migration path below.
82
+ try {
83
+ writeFileSync(sentinelPath, '');
84
+ }
85
+ catch { /* non-fatal; best-effort */ }
86
+ try {
87
+ // Read from index.nv. Hold the store open for both retrieveContext and
88
+ // fetch — a single open avoids a close→reopen timing window.
89
+ const srcStore = await openStore({ storagePath: indexPath, dimensions });
90
+ let fullRecords;
91
+ try {
92
+ const zeroEmbedding = new Float32Array(dimensions);
93
+ const result = await srcStore.retrieveContext({
94
+ embedding: zeroEmbedding,
95
+ tenant: TENANT,
96
+ topK: 10_000,
97
+ filters: { kind: MEMORY_KIND },
98
+ scoreThreshold: 0,
99
+ });
100
+ const items = (result?.items ?? []);
101
+ // Filter to agent-memory records (presence of `agent_role` metadata).
102
+ const agentMemoryRefs = items
103
+ .filter((item) => {
104
+ const meta = item.metadata;
105
+ return meta !== undefined && 'agent_role' in meta;
106
+ })
107
+ .map((item) => item.ref);
108
+ fullRecords = agentMemoryRefs.length > 0
109
+ ? await srcStore.fetch(agentMemoryRefs)
110
+ : [];
111
+ }
112
+ finally {
113
+ await srcStore.close();
114
+ }
115
+ // Open memory.nv — create fresh (first run) or open the existing partial
116
+ // file (interrupted run). Do NOT unlink first: same-process unlink+reopen
117
+ // triggers the NAPI phantom-materialisation bug (see invariant above).
118
+ // upsertBatch is idempotent by id, so replaying into a partial store is safe.
119
+ let dstStore;
120
+ try {
121
+ dstStore = await openStore({ storagePath: memoryPath, dimensions });
122
+ }
123
+ catch (openErr) {
124
+ // openStore itself threw — the partial file is genuinely corrupt.
125
+ // Unlink it so a future process gets a clean create, leave the sentinel
126
+ // so that future run still enters the (re)migration path, then rethrow.
127
+ // NEVER reopen memoryPath in this process after this unlink.
128
+ if (existsSync(memoryPath)) {
129
+ try {
130
+ unlinkSync(memoryPath);
131
+ }
132
+ catch { /* ignore */ }
133
+ }
134
+ throw openErr;
135
+ }
136
+ try {
137
+ if (fullRecords.length > 0) {
138
+ await dstStore.upsertBatch(fullRecords);
139
+ }
140
+ // If there are no agent-memory records, the store is opened-and-closed
141
+ // empty. That materialises memory.nv on disk so existsSync is true and
142
+ // the gate is stable — memory search never falls through to re-read
143
+ // index.nv on subsequent calls.
144
+ }
145
+ finally {
146
+ await dstStore.close();
147
+ }
148
+ // Migration complete. Remove sentinel so the gate sees memory.nv alone.
149
+ try {
150
+ unlinkSync(sentinelPath);
151
+ }
152
+ catch { /* ignore — best-effort */ }
153
+ }
154
+ catch (err) {
155
+ // Any failure other than the corrupt-open case above (e.g. F2 lock on
156
+ // index.nv): clean up the sentinel so the next call retries from scratch.
157
+ // Do NOT unlink memoryPath here — if it was opened successfully before the
158
+ // failure, it's a valid partial store that the next run can complete via
159
+ // upsertBatch. Unlinking it would trigger the phantom-materialisation bug
160
+ // on re-entry in the same process.
161
+ try {
162
+ unlinkSync(sentinelPath);
163
+ }
164
+ catch { /* ignore */ }
165
+ throw err;
166
+ }
167
+ }
22
168
  export async function cmdMemoryStore(opts) {
23
169
  const { value, wu, agent, key } = opts;
24
170
  if (!value || value.trim().length === 0) {
@@ -28,9 +174,16 @@ export async function cmdMemoryStore(opts) {
28
174
  const { selectEmbedderFromEnv } = await import('../embedder/select.js');
29
175
  const { openStore, TENANT } = await import('../store/open.js');
30
176
  const buildRoot = resolveBuildRoot(opts.buildRoot, { cwd: opts.cwd ?? process.cwd() });
31
- const indexPath = resolveIndexPath(buildRoot, opts.index);
177
+ // Resolve the memory-specific path (memory.nv), falling back to the
178
+ // legacy `index` flag for callers that pass it, then the default.
179
+ const memoryFlag = opts.memory ?? opts.index;
180
+ const memoryPath = resolveMemoryPath(buildRoot, memoryFlag);
181
+ const indexPath = resolveIndexPath(buildRoot, undefined);
32
182
  const embedder = await selectEmbedderFromEnv();
33
- const store = await openStore({ storagePath: indexPath, dimensions: embedder.dimensions });
183
+ // Lazy one-time migration: move existing agent-memory records from
184
+ // index.nv into memory.nv on the first memory command run.
185
+ await migrateMemoryRecordsIfNeeded(indexPath, memoryPath, embedder.dimensions);
186
+ const store = await openStore({ storagePath: memoryPath, dimensions: embedder.dimensions });
34
187
  const [embedding] = await embedder.embed([value]);
35
188
  await store.upsert({
36
189
  id: randomUUID(),
@@ -59,9 +212,16 @@ export async function cmdMemorySearch(opts) {
59
212
  const { selectEmbedderFromEnv } = await import('../embedder/select.js');
60
213
  const { openStore, TENANT } = await import('../store/open.js');
61
214
  const buildRoot = resolveBuildRoot(opts.buildRoot, { cwd: opts.cwd ?? process.cwd() });
62
- const indexPath = resolveIndexPath(buildRoot, opts.index);
215
+ // Resolve the memory-specific path (memory.nv), falling back to the
216
+ // legacy `index` flag for callers that pass it, then the default.
217
+ const memoryFlag = opts.memory ?? opts.index;
218
+ const memoryPath = resolveMemoryPath(buildRoot, memoryFlag);
219
+ const indexPath = resolveIndexPath(buildRoot, undefined);
63
220
  const embedder = await selectEmbedderFromEnv();
64
- const store = await openStore({ storagePath: indexPath, dimensions: embedder.dimensions });
221
+ // Lazy one-time migration: move existing agent-memory records from
222
+ // index.nv into memory.nv on the first memory command run.
223
+ await migrateMemoryRecordsIfNeeded(indexPath, memoryPath, embedder.dimensions);
224
+ const store = await openStore({ storagePath: memoryPath, dimensions: embedder.dimensions });
65
225
  const [queryEmbedding] = await embedder.embed([query]);
66
226
  const result = await store.retrieveContext({
67
227
  embedding: queryEmbedding,
@@ -52,6 +52,15 @@ export declare function resolveCatalogueRoot(flag: string | boolean | undefined,
52
52
  export declare function resolveIndexDir(buildRoot: string, ctx?: ResolutionContext): string;
53
53
  export declare function resolveWorkflowsPath(buildRoot: string, flag: string | boolean | undefined, ctx?: ResolutionContext): string;
54
54
  export declare function resolveIndexPath(buildRoot: string, flag: string | boolean | undefined, ctx?: ResolutionContext): string;
55
+ /**
56
+ * Resolve the cross-agent memory store path. Always co-located with the
57
+ * doc-index in the same `.nuos-catalogue/` directory, but in a separate
58
+ * file (`memory.nv`) so that the doc-index reindex (which holds an
59
+ * exclusive lock on `index.nv`) never contends with memory writes.
60
+ * Resolves `NUOS_CATALOGUE_MEMORY_PATH` env var when set; otherwise
61
+ * derives from `resolveIndexDir`. See D131.
62
+ */
63
+ export declare function resolveMemoryPath(buildRoot: string, flag: string | boolean | undefined, ctx?: ResolutionContext): string;
55
64
  export declare function resolveHashPath(buildRoot: string, flag: string | boolean | undefined, ctx?: ResolutionContext): string;
56
65
  /**
57
66
  * Soft warning surfaced after a `migrate` or `regenerate` run: if the
@@ -108,6 +108,22 @@ export function resolveIndexPath(buildRoot, flag, ctx) {
108
108
  return path.resolve(flag);
109
109
  return path.join(resolveIndexDir(buildRoot, ctx), 'index.nv');
110
110
  }
111
+ /**
112
+ * Resolve the cross-agent memory store path. Always co-located with the
113
+ * doc-index in the same `.nuos-catalogue/` directory, but in a separate
114
+ * file (`memory.nv`) so that the doc-index reindex (which holds an
115
+ * exclusive lock on `index.nv`) never contends with memory writes.
116
+ * Resolves `NUOS_CATALOGUE_MEMORY_PATH` env var when set; otherwise
117
+ * derives from `resolveIndexDir`. See D131.
118
+ */
119
+ export function resolveMemoryPath(buildRoot, flag, ctx) {
120
+ if (typeof flag === 'string' && flag.length > 0)
121
+ return path.resolve(flag);
122
+ const env = ctxEnv(ctx);
123
+ if (env.NUOS_CATALOGUE_MEMORY_PATH)
124
+ return path.resolve(env.NUOS_CATALOGUE_MEMORY_PATH);
125
+ return path.join(resolveIndexDir(buildRoot, ctx), 'memory.nv');
126
+ }
111
127
  export function resolveHashPath(buildRoot, flag, ctx) {
112
128
  if (typeof flag === 'string' && flag.length > 0)
113
129
  return path.resolve(flag);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nusoft/nuos-build-catalogue",
3
- "version": "0.33.1",
3
+ "version": "0.33.3",
4
4
  "description": "NuOS build-catalogue tooling: semantic search (WU 110) + migration runner that lifts markdown artefacts into JSON-backed workflow records (WU 111, Phase G).",
5
5
  "type": "module",
6
6
  "bin": {
@@ -19,7 +19,7 @@
19
19
  "build": "rm -rf dist && tsc && chmod +x dist/cli.js",
20
20
  "prepublishOnly": "npm run build",
21
21
  "verify-storage": "tsx scripts/verify-persistence.ts",
22
- "test": "tsx --test tests/chunk.test.ts tests/metadata.test.ts tests/crawl.test.ts tests/migrate.test.ts tests/commands-read.test.ts tests/regenerate.test.ts tests/commands-write.test.ts tests/ac-parse.test.ts tests/create.test.ts tests/init.test.ts tests/wu-111-soak-findings.test.ts tests/plan.test.ts tests/mode.test.ts tests/render.test.ts tests/swarm.test.ts tests/setup-progress-bar.test.ts tests/setup-ollama-pull.test.ts tests/setup-run-llm-setup.test.ts tests/wu-active.test.ts tests/install-claude-hooks.test.ts tests/protocols-in-sync.test.ts tests/end-of-session.test.ts",
22
+ "test": "tsx --test tests/chunk.test.ts tests/metadata.test.ts tests/crawl.test.ts tests/migrate.test.ts tests/commands-read.test.ts tests/regenerate.test.ts tests/commands-write.test.ts tests/ac-parse.test.ts tests/create.test.ts tests/init.test.ts tests/wu-111-soak-findings.test.ts tests/plan.test.ts tests/mode.test.ts tests/render.test.ts tests/swarm.test.ts tests/setup-progress-bar.test.ts tests/setup-ollama-pull.test.ts tests/setup-run-llm-setup.test.ts tests/wu-active.test.ts tests/install-claude-hooks.test.ts tests/protocols-in-sync.test.ts tests/end-of-session.test.ts tests/hooks-in-sync.test.ts tests/memory-store-separation.test.ts",
23
23
  "typecheck": "tsc --noEmit",
24
24
  "index": "tsx src/cli.ts index",
25
25
  "search": "tsx src/cli.ts search"
@@ -134,14 +134,38 @@ fi
134
134
  # ---------- Rule 2: active-decision modification block (WU 111 ship) ---
135
135
 
136
136
  dim "[nuos:pre-commit] active-decision modification check"
137
- modified_decisions=$(git diff --cached --name-only --diff-filter=M \
137
+ #
138
+ # "Immutable once accepted" — this blocks edits only to a decision whose
139
+ # status *in HEAD* is already a locked state (`accepted` or `active`).
140
+ # Editing a still-`proposed` decision is allowed: promoting it to
141
+ # accepted/active is the sanctioned lifecycle step, not a violation, and
142
+ # proposed decisions are in-flight by design. New decision files are
143
+ # additions (excluded by --diff-filter=M), so a decision born `accepted`
144
+ # is never blocked on creation. The locked-status check uses the HEAD
145
+ # pre-image, so flipping an accepted decision back to `proposed` to sneak
146
+ # a substantive edit is still caught.
147
+ candidate_decisions=$(git diff --cached --name-only --diff-filter=M \
138
148
  | grep -E '^docs/build/decisions/D[0-9]+.*\.md$' \
139
149
  | grep -v '/superseded/' \
140
150
  || true)
141
151
 
142
- if [[ -n "$modified_decisions" ]]; then
152
+ locked_decisions=""
153
+ if [[ -n "$candidate_decisions" ]]; then
154
+ while IFS= read -r f; do
155
+ [[ -z "$f" ]] && continue
156
+ head_status=$(git show "HEAD:$f" 2>/dev/null \
157
+ | grep -m1 -E '^\*\*Status:\*\*' \
158
+ | sed -E 's/^\*\*Status:\*\*[[:space:]]*//' \
159
+ | awk '{print tolower($1)}')
160
+ case "$head_status" in
161
+ accepted|active) locked_decisions+="${f}"$'\n' ;;
162
+ esac
163
+ done <<< "$candidate_decisions"
164
+ fi
165
+
166
+ if [[ -n "$locked_decisions" ]]; then
143
167
  red "✖ active-decision modification — BLOCKED (WU 111 enforcement):"
144
- while IFS= read -r f; do echo " — $f"; done <<< "$modified_decisions"
168
+ while IFS= read -r f; do [[ -n "$f" ]] && echo " — $f"; done <<< "$locked_decisions"
145
169
  red " Decisions are immutable once accepted. The discipline is to write a"
146
170
  red " superseding D-NNN+1 and link forward. Use:"
147
171
  red " nuos-catalogue decision supersede <target> --by=<new-D> --reason=\"...\""
@@ -149,7 +173,7 @@ if [[ -n "$modified_decisions" ]]; then
149
173
  red " If this edit is a non-substantive typo fix or link cleanup that does"
150
174
  red " not change the decision's meaning, you may bypass this block with"
151
175
  red " --no-verify. CLAUDE.md prohibits --no-verify for substantive changes."
152
- log_event "active-decision-block" "$(echo "$modified_decisions" | tr '\n' ',')"
176
+ log_event "active-decision-block" "$(echo "$locked_decisions" | tr '\n' ',')"
153
177
  EXIT_CODE=1
154
178
  fi
155
179
 
@@ -134,14 +134,38 @@ fi
134
134
  # ---------- Rule 2: active-decision modification block (WU 111 ship) ---
135
135
 
136
136
  dim "[nuos:pre-commit] active-decision modification check"
137
- modified_decisions=$(git diff --cached --name-only --diff-filter=M \
137
+ #
138
+ # "Immutable once accepted" — this blocks edits only to a decision whose
139
+ # status *in HEAD* is already a locked state (`accepted` or `active`).
140
+ # Editing a still-`proposed` decision is allowed: promoting it to
141
+ # accepted/active is the sanctioned lifecycle step, not a violation, and
142
+ # proposed decisions are in-flight by design. New decision files are
143
+ # additions (excluded by --diff-filter=M), so a decision born `accepted`
144
+ # is never blocked on creation. The locked-status check uses the HEAD
145
+ # pre-image, so flipping an accepted decision back to `proposed` to sneak
146
+ # a substantive edit is still caught.
147
+ candidate_decisions=$(git diff --cached --name-only --diff-filter=M \
138
148
  | grep -E '^docs/build/decisions/D[0-9]+.*\.md$' \
139
149
  | grep -v '/superseded/' \
140
150
  || true)
141
151
 
142
- if [[ -n "$modified_decisions" ]]; then
152
+ locked_decisions=""
153
+ if [[ -n "$candidate_decisions" ]]; then
154
+ while IFS= read -r f; do
155
+ [[ -z "$f" ]] && continue
156
+ head_status=$(git show "HEAD:$f" 2>/dev/null \
157
+ | grep -m1 -E '^\*\*Status:\*\*' \
158
+ | sed -E 's/^\*\*Status:\*\*[[:space:]]*//' \
159
+ | awk '{print tolower($1)}')
160
+ case "$head_status" in
161
+ accepted|active) locked_decisions+="${f}"$'\n' ;;
162
+ esac
163
+ done <<< "$candidate_decisions"
164
+ fi
165
+
166
+ if [[ -n "$locked_decisions" ]]; then
143
167
  red "✖ active-decision modification — BLOCKED (WU 111 enforcement):"
144
- while IFS= read -r f; do echo " — $f"; done <<< "$modified_decisions"
168
+ while IFS= read -r f; do [[ -n "$f" ]] && echo " — $f"; done <<< "$locked_decisions"
145
169
  red " Decisions are immutable once accepted. The discipline is to write a"
146
170
  red " superseding D-NNN+1 and link forward. Use:"
147
171
  red " nuos-catalogue decision supersede <target> --by=<new-D> --reason=\"...\""
@@ -149,7 +173,7 @@ if [[ -n "$modified_decisions" ]]; then
149
173
  red " If this edit is a non-substantive typo fix or link cleanup that does"
150
174
  red " not change the decision's meaning, you may bypass this block with"
151
175
  red " --no-verify. CLAUDE.md prohibits --no-verify for substantive changes."
152
- log_event "active-decision-block" "$(echo "$modified_decisions" | tr '\n' ',')"
176
+ log_event "active-decision-block" "$(echo "$locked_decisions" | tr '\n' ',')"
153
177
  EXIT_CODE=1
154
178
  fi
155
179