@nusoft/nuos-build-catalogue 0.10.0 → 0.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/dist/cli.d.ts +13 -0
  2. package/dist/cli.js +491 -0
  3. package/dist/commands/create.d.ts +70 -0
  4. package/dist/commands/create.js +341 -0
  5. package/dist/commands/format.d.ts +19 -0
  6. package/dist/commands/format.js +89 -0
  7. package/dist/commands/handlers.d.ts +35 -0
  8. package/dist/commands/handlers.js +132 -0
  9. package/dist/commands/init.d.ts +41 -0
  10. package/dist/commands/init.js +289 -0
  11. package/dist/commands/prompt.d.ts +44 -0
  12. package/dist/commands/prompt.js +100 -0
  13. package/dist/commands/write.d.ts +39 -0
  14. package/dist/commands/write.js +247 -0
  15. package/dist/embedder/ollama.d.ts +54 -0
  16. package/dist/embedder/ollama.js +164 -0
  17. package/dist/embedder/openai.d.ts +21 -0
  18. package/dist/embedder/openai.js +56 -0
  19. package/dist/embedder/select.d.ts +9 -0
  20. package/dist/embedder/select.js +27 -0
  21. package/dist/embedder/stub.d.ts +15 -0
  22. package/dist/embedder/stub.js +40 -0
  23. package/dist/embedder/types.d.ts +21 -0
  24. package/dist/embedder/types.js +6 -0
  25. package/dist/embedder/vertex.d.ts +41 -0
  26. package/dist/embedder/vertex.js +94 -0
  27. package/dist/indexer/chunk.d.ts +20 -0
  28. package/dist/indexer/chunk.js +196 -0
  29. package/dist/indexer/crawl.d.ts +20 -0
  30. package/dist/indexer/crawl.js +66 -0
  31. package/dist/indexer/metadata.d.ts +21 -0
  32. package/dist/indexer/metadata.js +126 -0
  33. package/dist/indexer/upsert.d.ts +26 -0
  34. package/dist/indexer/upsert.js +152 -0
  35. package/dist/migrate/parsers.d.ts +17 -0
  36. package/dist/migrate/parsers.js +123 -0
  37. package/dist/migrate/run.d.ts +22 -0
  38. package/dist/migrate/run.js +142 -0
  39. package/dist/migrate/store.d.ts +20 -0
  40. package/dist/migrate/store.js +52 -0
  41. package/dist/migrate/types.d.ts +57 -0
  42. package/dist/migrate/types.js +13 -0
  43. package/dist/regenerate/check.d.ts +11 -0
  44. package/dist/regenerate/check.js +97 -0
  45. package/dist/regenerate/diff.d.ts +18 -0
  46. package/dist/regenerate/diff.js +38 -0
  47. package/dist/regenerate/types.d.ts +52 -0
  48. package/dist/regenerate/types.js +14 -0
  49. package/dist/runtime/ac-parse.d.ts +63 -0
  50. package/dist/runtime/ac-parse.js +196 -0
  51. package/dist/runtime/markdown-edit.d.ts +53 -0
  52. package/dist/runtime/markdown-edit.js +101 -0
  53. package/dist/runtime/markdown-render.d.ts +27 -0
  54. package/dist/runtime/markdown-render.js +209 -0
  55. package/dist/runtime/mis-adapter.d.ts +35 -0
  56. package/dist/runtime/mis-adapter.js +364 -0
  57. package/dist/runtime/runtime.d.ts +20 -0
  58. package/dist/runtime/runtime.js +39 -0
  59. package/dist/search/format.d.ts +6 -0
  60. package/dist/search/format.js +23 -0
  61. package/dist/search/query.d.ts +29 -0
  62. package/dist/search/query.js +71 -0
  63. package/dist/store/open.d.ts +14 -0
  64. package/dist/store/open.js +16 -0
  65. package/package.json +3 -2
@@ -0,0 +1,247 @@
1
+ /**
2
+ * Phase H part 2 — flag-driven write commands.
3
+ *
4
+ * Each handler:
5
+ * 1. Validates the flags
6
+ * 2. Looks up the target record from the store
7
+ * 3. Builds a typed `CaptureInput` for the relevant workflow
8
+ * 4. Drives the NuFlow lifecycle through the runtime
9
+ * 5. Reports the result
10
+ *
11
+ * No interactive prompts; flag-driven only. Interactive `create`
12
+ * commands are deferred (Phase H part 3).
13
+ */
14
+ import { normaliseHandle } from './handlers.js';
15
+ import { extractForCompletion } from '../runtime/ac-parse.js';
16
+ const BUILD_MAINTAINER = {
17
+ kind: 'staff',
18
+ id: 'build-maintainer',
19
+ role: 'build-maintainer',
20
+ };
21
+ // ---------------------------------------------------------------------------
22
+ // wu advance <handle> --to=<status> [--reason="..."]
23
+ // ---------------------------------------------------------------------------
24
+ export async function cmdWuAdvance(store, runtime, args) {
25
+ if (!args.handle) {
26
+ return { output: 'Usage: nuos-catalogue wu advance <handle> --to=<status> [--reason="..."]', exitCode: 2 };
27
+ }
28
+ if (!args.to) {
29
+ return { output: '--to=<status> is required (e.g. --to=in_progress)', exitCode: 2 };
30
+ }
31
+ const handle = normaliseHandle('work_unit', args.handle);
32
+ const record = store.get(handle);
33
+ if (!record || record.register !== 'work_unit') {
34
+ return { output: `no work_unit record for handle "${handle}"`, exitCode: 1 };
35
+ }
36
+ const fromStatus = inferWorkflowStatus(record);
37
+ // For → completed, the pack's completion gate requires the AC list.
38
+ // Extract it from the markdown so the gate can verify every AC is
39
+ // ticked-with-evidence. For other transitions the AC list is informational.
40
+ const acceptanceCriteria = args.to === 'completed' ? extractForCompletion(record.rawMarkdown) : undefined;
41
+ const capture = {
42
+ channel: 'typed_note',
43
+ content: `advance ${handle} → ${args.to}${args.reason ? `: ${args.reason}` : ''}`,
44
+ subjects: [{ kind: 'work_unit', id: handle }],
45
+ metadata: {
46
+ targetHandle: handle,
47
+ fromStatus,
48
+ toStatus: args.to,
49
+ reason: args.reason,
50
+ acceptanceCriteria,
51
+ },
52
+ };
53
+ return await driveLifecycle(runtime, 'work_unit.advance_status', capture, handle, args.to);
54
+ }
55
+ function inferWorkflowStatus(record) {
56
+ // Strip emoji + leading/trailing whitespace from the stored status text.
57
+ const raw = (record.status ?? '').trim();
58
+ // Pull the first ASCII word that matches a known status enum.
59
+ const KNOWN = [
60
+ 'proposed',
61
+ 'ready',
62
+ 'in_progress',
63
+ 'in_review',
64
+ 'completed',
65
+ 'superseded',
66
+ 'cancelled',
67
+ 'deferred-with-trigger',
68
+ 'blocked-on-question',
69
+ 'in_flight',
70
+ 'in flight',
71
+ 'blocked',
72
+ ];
73
+ const lower = raw.toLowerCase();
74
+ for (const candidate of KNOWN) {
75
+ if (lower.includes(candidate)) {
76
+ // Normalise variants we accept on input but don't have in the
77
+ // pack's state machine.
78
+ if (candidate === 'in_flight' || candidate === 'in flight')
79
+ return 'in_progress';
80
+ if (candidate === 'blocked')
81
+ return 'blocked-on-question';
82
+ return candidate;
83
+ }
84
+ }
85
+ return 'proposed';
86
+ }
87
+ // ---------------------------------------------------------------------------
88
+ // wu tick <handle> --index=N --evidence="..."
89
+ // ---------------------------------------------------------------------------
90
+ export async function cmdWuTick(store, runtime, args) {
91
+ if (!args.handle) {
92
+ return {
93
+ output: 'Usage: nuos-catalogue wu tick <handle> --index=N --evidence="..."',
94
+ exitCode: 2,
95
+ };
96
+ }
97
+ if (typeof args.index !== 'number' || !Number.isInteger(args.index) || args.index < 0) {
98
+ return { output: '--index=<non-negative integer> is required', exitCode: 2 };
99
+ }
100
+ if (!args.evidence || args.evidence.trim().length === 0) {
101
+ return { output: '--evidence="..." is required (non-empty)', exitCode: 2 };
102
+ }
103
+ const handle = normaliseHandle('work_unit', args.handle);
104
+ if (!store.has(handle)) {
105
+ return { output: `no work_unit record for handle "${handle}"`, exitCode: 1 };
106
+ }
107
+ const capture = {
108
+ channel: 'typed_note',
109
+ content: `tick AC #${args.index} on ${handle}`,
110
+ subjects: [{ kind: 'work_unit', id: handle }],
111
+ metadata: {
112
+ targetHandle: handle,
113
+ criterionIndex: args.index,
114
+ evidence: args.evidence,
115
+ },
116
+ };
117
+ return await driveLifecycle(runtime, 'work_unit.tick_acceptance_criterion', capture, handle, `index ${args.index}`);
118
+ }
119
+ // ---------------------------------------------------------------------------
120
+ // decision supersede <target> --by=<superseding> [--reason="..."]
121
+ // ---------------------------------------------------------------------------
122
+ export async function cmdDecisionSupersede(store, runtime, args) {
123
+ if (!args.target) {
124
+ return {
125
+ output: 'Usage: nuos-catalogue decision supersede <target> --by=<superseding> [--reason="..."]',
126
+ exitCode: 2,
127
+ };
128
+ }
129
+ if (!args.by) {
130
+ return { output: '--by=<superseding D-handle> is required', exitCode: 2 };
131
+ }
132
+ const target = normaliseHandle('decision', args.target);
133
+ const superseding = normaliseHandle('decision', args.by);
134
+ const targetRecord = store.get(target);
135
+ const supersedingRecord = store.get(superseding);
136
+ if (!targetRecord || targetRecord.register !== 'decision') {
137
+ return { output: `no decision record for target "${target}"`, exitCode: 1 };
138
+ }
139
+ if (!supersedingRecord || supersedingRecord.register !== 'decision') {
140
+ return { output: `no decision record for superseding "${superseding}"`, exitCode: 1 };
141
+ }
142
+ const capture = {
143
+ channel: 'typed_note',
144
+ content: `supersede ${target} by ${superseding}`,
145
+ subjects: [
146
+ { kind: 'decision', id: target },
147
+ { kind: 'decision', id: superseding },
148
+ ],
149
+ metadata: {
150
+ targetHandle: target,
151
+ supersedingHandle: superseding,
152
+ // Workflow validates this matches; we infer from the stored status.
153
+ // For decisions we assume the target is currently 'accepted' unless
154
+ // the markdown says otherwise; the workflow rejects invalid input.
155
+ targetCurrentStatus: 'accepted',
156
+ reason: args.reason,
157
+ },
158
+ };
159
+ return await driveLifecycle(runtime, 'decision.supersede', capture, target, superseding);
160
+ }
161
+ // ---------------------------------------------------------------------------
162
+ // question resolve <q-handle> --by=<d-handle> [--reason="..."]
163
+ // ---------------------------------------------------------------------------
164
+ export async function cmdQuestionResolve(store, runtime, args) {
165
+ if (!args.qHandle) {
166
+ return {
167
+ output: 'Usage: nuos-catalogue question resolve <q-handle> --by=<d-handle> [--reason="..."]',
168
+ exitCode: 2,
169
+ };
170
+ }
171
+ if (!args.by) {
172
+ return { output: '--by=<resolving D-handle> is required', exitCode: 2 };
173
+ }
174
+ const qHandle = normaliseHandle('open_question', args.qHandle);
175
+ const dHandle = normaliseHandle('decision', args.by);
176
+ const qRecord = store.get(qHandle);
177
+ const dRecord = store.get(dHandle);
178
+ if (!qRecord || qRecord.register !== 'open_question') {
179
+ return { output: `no open_question record for handle "${qHandle}"`, exitCode: 1 };
180
+ }
181
+ if (!dRecord || dRecord.register !== 'decision') {
182
+ return { output: `no decision record for resolving handle "${dHandle}"`, exitCode: 1 };
183
+ }
184
+ const capture = {
185
+ channel: 'typed_note',
186
+ content: `resolve ${qHandle} by ${dHandle}`,
187
+ subjects: [
188
+ { kind: 'open_question', id: qHandle },
189
+ { kind: 'decision', id: dHandle },
190
+ ],
191
+ metadata: {
192
+ targetHandle: qHandle,
193
+ targetCurrentStatus: 'active',
194
+ resolvingDecisionHandle: dHandle,
195
+ reason: args.reason,
196
+ },
197
+ };
198
+ return await driveLifecycle(runtime, 'open_question.resolve', capture, qHandle, dHandle);
199
+ }
200
+ // ---------------------------------------------------------------------------
201
+ // Lifecycle driver — single path that handles all four workflows
202
+ // ---------------------------------------------------------------------------
203
+ async function driveLifecycle(runtime, workflowType, capture, primarySubject, detail) {
204
+ let workflow;
205
+ try {
206
+ workflow = await runtime.startWorkflow(workflowType, BUILD_MAINTAINER, capture);
207
+ }
208
+ catch (err) {
209
+ return {
210
+ output: `${workflowType} rejected at start: ${err.message}`,
211
+ exitCode: 1,
212
+ };
213
+ }
214
+ if (workflow.status === 'waiting_for_clarification') {
215
+ return {
216
+ output: `${workflowType} produced a clarification request: ${workflow.clarification?.reason ?? 'unspecified'}`,
217
+ exitCode: 1,
218
+ };
219
+ }
220
+ if (workflow.status !== 'waiting_for_confirmation') {
221
+ return {
222
+ output: `${workflowType} unexpected post-start status: ${workflow.status}`,
223
+ exitCode: 1,
224
+ };
225
+ }
226
+ workflow = await runtime.confirmIntent(workflow.id, BUILD_MAINTAINER.id);
227
+ if (workflow.status === 'waiting_for_approval') {
228
+ workflow = await runtime.approveIntent(workflow.id, BUILD_MAINTAINER.id);
229
+ }
230
+ if (workflow.status !== 'committing') {
231
+ return {
232
+ output: `${workflowType} unexpected pre-commit status: ${workflow.status}`,
233
+ exitCode: 1,
234
+ };
235
+ }
236
+ workflow = await runtime.commitIntent(workflow.id, BUILD_MAINTAINER.id);
237
+ if (workflow.status !== 'completed') {
238
+ return {
239
+ output: `${workflowType} commit failed: status=${workflow.status}`,
240
+ exitCode: 1,
241
+ };
242
+ }
243
+ return {
244
+ output: `${workflowType} ✅ ${primarySubject} → ${detail} (commit ${workflow.commitRef?.commitRef ?? '?'})`,
245
+ exitCode: 0,
246
+ };
247
+ }
@@ -0,0 +1,54 @@
1
+ /**
2
+ * Ollama embedder — local inference, no network egress.
3
+ *
4
+ * Default model: qwen3-embedding:8b (4096 dims, 32k context). Config via
5
+ * NUOS_CATALOGUE_OLLAMA_MODEL. Smaller variants (qwen3-embedding:4b,
6
+ * qwen3-embedding:0.6b) work the same way; switching variants requires
7
+ * a full reindex if the dimension changes.
8
+ *
9
+ * Why local: keeps the catalogue's content (and any future workload that
10
+ * uses the same Embedder interface) inside whatever boundary Ollama is
11
+ * running in — typically the developer's machine, or a school-local
12
+ * server in a deployment context. Closes one of the two remaining
13
+ * third-party calls in the NuOS stack (the other is LLM completion;
14
+ * WU 058 covers that).
15
+ *
16
+ * **Unload-after-use commitment.** A school server (or developer
17
+ * machine) must not be left holding ~5GB of model in RAM idle. Per the
18
+ * NuOS-wide local-inference principle, models are loaded for the
19
+ * duration of work and unloaded as soon as the work is done.
20
+ *
21
+ * Implementation: each call passes `keep_alive: "1m"` so sequential
22
+ * batches within one operation stay warm; the embedder exposes
23
+ * `dispose()` which explicitly unloads via `keep_alive: 0`. The CLI
24
+ * calls `dispose()` after every `index` and `search` command. If the
25
+ * process exits without `dispose()` (crash, kill -9), Ollama's own
26
+ * idle-timeout (the keep_alive: "1m" we sent) cleans up within a
27
+ * minute.
28
+ *
29
+ * Sizing note — the 8b model at Q4_K_M is ~4.7GB on disk and benefits
30
+ * from ~16GB of RAM. Apple Silicon Metal acceleration helps a lot. On
31
+ * smaller boxes drop to qwen3-embedding:4b (better accuracy/RAM ratio)
32
+ * or qwen3-embedding:0.6b (CPU-only friendly).
33
+ */
34
+ import type { Embedder } from './types.js';
35
+ export declare class OllamaEmbedder implements Embedder {
36
+ readonly dimensions: number;
37
+ readonly modelId: string;
38
+ private readonly host;
39
+ private readonly batchSize;
40
+ private constructor();
41
+ static fromEnv(): Promise<OllamaEmbedder>;
42
+ embed(texts: string[]): Promise<Float32Array[]>;
43
+ private embedBatch;
44
+ /**
45
+ * Explicitly unload the model from Ollama's RAM. Safe to call multiple
46
+ * times; safe to call before any embed() — it's a no-op if the model
47
+ * isn't currently loaded.
48
+ *
49
+ * Implements the NuOS-wide unload-after-use commitment: at the end of
50
+ * any operation that uses local inference, the model is freed so the
51
+ * host machine isn't left carrying idle weights.
52
+ */
53
+ dispose(): Promise<void>;
54
+ }
@@ -0,0 +1,164 @@
1
+ /**
2
+ * Ollama embedder — local inference, no network egress.
3
+ *
4
+ * Default model: qwen3-embedding:8b (4096 dims, 32k context). Config via
5
+ * NUOS_CATALOGUE_OLLAMA_MODEL. Smaller variants (qwen3-embedding:4b,
6
+ * qwen3-embedding:0.6b) work the same way; switching variants requires
7
+ * a full reindex if the dimension changes.
8
+ *
9
+ * Why local: keeps the catalogue's content (and any future workload that
10
+ * uses the same Embedder interface) inside whatever boundary Ollama is
11
+ * running in — typically the developer's machine, or a school-local
12
+ * server in a deployment context. Closes one of the two remaining
13
+ * third-party calls in the NuOS stack (the other is LLM completion;
14
+ * WU 058 covers that).
15
+ *
16
+ * **Unload-after-use commitment.** A school server (or developer
17
+ * machine) must not be left holding ~5GB of model in RAM idle. Per the
18
+ * NuOS-wide local-inference principle, models are loaded for the
19
+ * duration of work and unloaded as soon as the work is done.
20
+ *
21
+ * Implementation: each call passes `keep_alive: "1m"` so sequential
22
+ * batches within one operation stay warm; the embedder exposes
23
+ * `dispose()` which explicitly unloads via `keep_alive: 0`. The CLI
24
+ * calls `dispose()` after every `index` and `search` command. If the
25
+ * process exits without `dispose()` (crash, kill -9), Ollama's own
26
+ * idle-timeout (the keep_alive: "1m" we sent) cleans up within a
27
+ * minute.
28
+ *
29
+ * Sizing note — the 8b model at Q4_K_M is ~4.7GB on disk and benefits
30
+ * from ~16GB of RAM. Apple Silicon Metal acceleration helps a lot. On
31
+ * smaller boxes drop to qwen3-embedding:4b (better accuracy/RAM ratio)
32
+ * or qwen3-embedding:0.6b (CPU-only friendly).
33
+ */
34
+ const DEFAULT_MODEL = 'qwen3-embedding:8b';
35
+ const DEFAULT_HOST = 'http://localhost:11434';
36
+ // Qwen3-Embedding produces Matryoshka representations 32–4096 dims.
37
+ // We use the model default. A future tweak could truncate to e.g. 1024
38
+ // to shrink the index by 4x at minor accuracy cost.
39
+ const KNOWN_DIMENSIONS = {
40
+ 'qwen3-embedding:8b': 4096,
41
+ 'qwen3-embedding:4b': 2560,
42
+ 'qwen3-embedding:0.6b': 1024,
43
+ };
44
+ export class OllamaEmbedder {
45
+ dimensions;
46
+ modelId;
47
+ host;
48
+ batchSize;
49
+ constructor(options) {
50
+ this.modelId = options.modelId;
51
+ this.dimensions = options.dimensions;
52
+ this.host = options.host;
53
+ this.batchSize = options.batchSize;
54
+ }
55
+ static async fromEnv() {
56
+ const modelId = process.env.NUOS_CATALOGUE_OLLAMA_MODEL ?? DEFAULT_MODEL;
57
+ const host = (process.env.OLLAMA_HOST ?? DEFAULT_HOST).replace(/\/$/, '');
58
+ const batchSize = Number(process.env.NUOS_CATALOGUE_OLLAMA_BATCH ?? 8);
59
+ // Probe the host to give a useful error early
60
+ let dimensions = KNOWN_DIMENSIONS[modelId];
61
+ try {
62
+ const probe = await fetch(`${host}/api/embed`, {
63
+ method: 'POST',
64
+ headers: { 'content-type': 'application/json' },
65
+ body: JSON.stringify({ model: modelId, input: 'probe' }),
66
+ });
67
+ if (!probe.ok) {
68
+ const body = await probe.text().catch(() => '<unreadable>');
69
+ throw new Error(`Ollama probe failed (${probe.status}): ${body}\n` +
70
+ `Check that Ollama is running and the model is pulled:\n` +
71
+ ` ollama serve\n` +
72
+ ` ollama pull ${modelId}`);
73
+ }
74
+ const json = (await probe.json());
75
+ const probeDim = json.embeddings?.[0]?.length;
76
+ if (probeDim) {
77
+ if (dimensions && dimensions !== probeDim) {
78
+ // Trust the live probe over the lookup table
79
+ dimensions = probeDim;
80
+ }
81
+ dimensions ??= probeDim;
82
+ }
83
+ }
84
+ catch (err) {
85
+ if (err instanceof Error && err.message.startsWith('Ollama probe failed'))
86
+ throw err;
87
+ throw new Error(`Could not reach Ollama at ${host}. Is it running? ` +
88
+ `Start it with \`ollama serve\` and pull the model with \`ollama pull ${modelId}\`. ` +
89
+ `Underlying error: ${err instanceof Error ? err.message : String(err)}`);
90
+ }
91
+ if (!dimensions) {
92
+ throw new Error(`Could not determine embedding dimension for model ${modelId}. ` +
93
+ `If this is a new variant, add it to KNOWN_DIMENSIONS in src/embedder/ollama.ts.`);
94
+ }
95
+ return new OllamaEmbedder({ modelId, dimensions, host, batchSize });
96
+ }
97
+ async embed(texts) {
98
+ if (texts.length === 0)
99
+ return [];
100
+ const out = [];
101
+ for (let i = 0; i < texts.length; i += this.batchSize) {
102
+ const slice = texts.slice(i, i + this.batchSize);
103
+ const embeddings = await this.embedBatch(slice);
104
+ out.push(...embeddings);
105
+ }
106
+ return out;
107
+ }
108
+ async embedBatch(texts) {
109
+ const res = await fetch(`${this.host}/api/embed`, {
110
+ method: 'POST',
111
+ headers: { 'content-type': 'application/json' },
112
+ body: JSON.stringify({
113
+ model: this.modelId,
114
+ input: texts,
115
+ // Keep the model warm only for the duration of one operation.
116
+ // dispose() at the end of the run sends keep_alive: 0 to unload.
117
+ keep_alive: '1m',
118
+ }),
119
+ });
120
+ if (!res.ok) {
121
+ const body = await res.text().catch(() => '<unreadable>');
122
+ throw new Error(`Ollama embed call failed (${res.status}): ${body}`);
123
+ }
124
+ const json = (await res.json());
125
+ if (!Array.isArray(json.embeddings) || json.embeddings.length !== texts.length) {
126
+ throw new Error(`Ollama returned ${json.embeddings?.length ?? 0} embeddings for ${texts.length} inputs`);
127
+ }
128
+ return json.embeddings.map((e) => new Float32Array(e));
129
+ }
130
+ /**
131
+ * Explicitly unload the model from Ollama's RAM. Safe to call multiple
132
+ * times; safe to call before any embed() — it's a no-op if the model
133
+ * isn't currently loaded.
134
+ *
135
+ * Implements the NuOS-wide unload-after-use commitment: at the end of
136
+ * any operation that uses local inference, the model is freed so the
137
+ * host machine isn't left carrying idle weights.
138
+ */
139
+ async dispose() {
140
+ try {
141
+ const res = await fetch(`${this.host}/api/embed`, {
142
+ method: 'POST',
143
+ headers: { 'content-type': 'application/json' },
144
+ // Empty input + keep_alive: 0 is Ollama's documented unload trigger.
145
+ body: JSON.stringify({
146
+ model: this.modelId,
147
+ input: '',
148
+ keep_alive: 0,
149
+ }),
150
+ });
151
+ // Non-2xx is non-fatal — the keep_alive on prior calls will still
152
+ // expire within ~1 minute and Ollama will free the model.
153
+ if (!res.ok) {
154
+ const body = await res.text().catch(() => '<unreadable>');
155
+ process.stderr.write(`[ollama] dispose() returned ${res.status}; model will unload via keep_alive timeout. body: ${body}\n`);
156
+ }
157
+ }
158
+ catch (err) {
159
+ // Network error reaching Ollama at dispose time is non-fatal.
160
+ // The keep_alive timeout on prior calls covers cleanup.
161
+ process.stderr.write(`[ollama] dispose() failed: ${err instanceof Error ? err.message : String(err)}\n`);
162
+ }
163
+ }
164
+ }
@@ -0,0 +1,21 @@
1
+ /**
2
+ * OpenAI embedder — text-embedding-3-small (1536 dims).
3
+ *
4
+ * Auth: OPENAI_API_KEY env var.
5
+ *
6
+ * Chosen as the alternate embedder because it has the lowest setup
7
+ * friction for a contributor without GCP access. Per the WU 110 spec
8
+ * the build catalogue is non-sensitive so cross-region inference is
9
+ * acceptable; per D010 NuVector does not generate embeddings so the
10
+ * consumer (this CLI) decides.
11
+ */
12
+ import type { Embedder } from './types.js';
13
+ export declare class OpenAIEmbedder implements Embedder {
14
+ private readonly apiKey;
15
+ readonly dimensions = 1536;
16
+ readonly modelId = "text-embedding-3-small";
17
+ constructor(apiKey: string);
18
+ static fromEnv(): OpenAIEmbedder;
19
+ embed(texts: string[]): Promise<Float32Array[]>;
20
+ dispose(): Promise<void>;
21
+ }
@@ -0,0 +1,56 @@
1
+ /**
2
+ * OpenAI embedder — text-embedding-3-small (1536 dims).
3
+ *
4
+ * Auth: OPENAI_API_KEY env var.
5
+ *
6
+ * Chosen as the alternate embedder because it has the lowest setup
7
+ * friction for a contributor without GCP access. Per the WU 110 spec
8
+ * the build catalogue is non-sensitive so cross-region inference is
9
+ * acceptable; per D010 NuVector does not generate embeddings so the
10
+ * consumer (this CLI) decides.
11
+ */
12
+ const MODEL_ID = 'text-embedding-3-small';
13
+ const DIMENSIONS = 1536;
14
+ const API_URL = 'https://api.openai.com/v1/embeddings';
15
+ export class OpenAIEmbedder {
16
+ apiKey;
17
+ dimensions = DIMENSIONS;
18
+ modelId = MODEL_ID;
19
+ constructor(apiKey) {
20
+ this.apiKey = apiKey;
21
+ }
22
+ static fromEnv() {
23
+ const key = process.env.OPENAI_API_KEY;
24
+ if (!key) {
25
+ throw new Error('OPENAI_API_KEY is not set; required for the openai embedder. ' +
26
+ 'Set it, or switch to NUOS_CATALOGUE_EMBEDDER=vertex.');
27
+ }
28
+ return new OpenAIEmbedder(key);
29
+ }
30
+ async embed(texts) {
31
+ if (texts.length === 0)
32
+ return [];
33
+ const res = await fetch(API_URL, {
34
+ method: 'POST',
35
+ headers: {
36
+ 'content-type': 'application/json',
37
+ authorization: `Bearer ${this.apiKey}`,
38
+ },
39
+ body: JSON.stringify({
40
+ model: MODEL_ID,
41
+ input: texts,
42
+ encoding_format: 'float',
43
+ }),
44
+ });
45
+ if (!res.ok) {
46
+ const body = await res.text().catch(() => '<unreadable body>');
47
+ throw new Error(`OpenAI embeddings call failed (${res.status}): ${body}`);
48
+ }
49
+ const json = (await res.json());
50
+ // Sort by index because the API does not guarantee response order
51
+ const sorted = [...json.data].sort((a, b) => a.index - b.index);
52
+ return sorted.map((d) => new Float32Array(d.embedding));
53
+ }
54
+ // Cloud embedder — nothing to release on the local machine.
55
+ async dispose() { }
56
+ }
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Embedder selector — reads NUOS_CATALOGUE_EMBEDDER from env.
3
+ *
4
+ * Default: ollama (local inference; no network egress; sovereignty by
5
+ * default). Alternatives: vertex (cloud Google), openai (cloud OpenAI),
6
+ * stub (deterministic hash for tests).
7
+ */
8
+ import type { Embedder } from './types.js';
9
+ export declare function selectEmbedderFromEnv(): Promise<Embedder>;
@@ -0,0 +1,27 @@
1
+ /**
2
+ * Embedder selector — reads NUOS_CATALOGUE_EMBEDDER from env.
3
+ *
4
+ * Default: ollama (local inference; no network egress; sovereignty by
5
+ * default). Alternatives: vertex (cloud Google), openai (cloud OpenAI),
6
+ * stub (deterministic hash for tests).
7
+ */
8
+ import { OllamaEmbedder } from './ollama.js';
9
+ import { VertexEmbedder } from './vertex.js';
10
+ import { OpenAIEmbedder } from './openai.js';
11
+ import { StubEmbedder } from './stub.js';
12
+ export async function selectEmbedderFromEnv() {
13
+ const name = (process.env.NUOS_CATALOGUE_EMBEDDER ?? 'ollama').toLowerCase();
14
+ switch (name) {
15
+ case 'ollama':
16
+ return OllamaEmbedder.fromEnv();
17
+ case 'vertex':
18
+ return VertexEmbedder.fromEnv();
19
+ case 'openai':
20
+ return OpenAIEmbedder.fromEnv();
21
+ case 'stub':
22
+ return new StubEmbedder();
23
+ default:
24
+ throw new Error(`Unknown embedder "${name}" (NUOS_CATALOGUE_EMBEDDER). ` +
25
+ `Use ollama | vertex | openai | stub.`);
26
+ }
27
+ }
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Deterministic hash-based embedder for tests.
3
+ *
4
+ * Not for production retrieval — but lets the indexer + search pipeline
5
+ * be exercised end-to-end without an API key. Enabled via
6
+ * NUOS_CATALOGUE_EMBEDDER=stub.
7
+ */
8
+ import type { Embedder } from './types.js';
9
+ export declare class StubEmbedder implements Embedder {
10
+ readonly dimensions = 384;
11
+ readonly modelId = "stub-sha256-bag-of-words";
12
+ embed(texts: string[]): Promise<Float32Array[]>;
13
+ private embedOne;
14
+ dispose(): Promise<void>;
15
+ }
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Deterministic hash-based embedder for tests.
3
+ *
4
+ * Not for production retrieval — but lets the indexer + search pipeline
5
+ * be exercised end-to-end without an API key. Enabled via
6
+ * NUOS_CATALOGUE_EMBEDDER=stub.
7
+ */
8
+ import { createHash } from 'node:crypto';
9
+ const DIMENSIONS = 384;
10
+ export class StubEmbedder {
11
+ dimensions = DIMENSIONS;
12
+ modelId = 'stub-sha256-bag-of-words';
13
+ async embed(texts) {
14
+ return texts.map((t) => this.embedOne(t));
15
+ }
16
+ embedOne(text) {
17
+ const out = new Float32Array(DIMENSIONS);
18
+ const tokens = text.toLowerCase().split(/[^a-z0-9]+/u).filter(Boolean);
19
+ for (const tok of tokens) {
20
+ const h = createHash('sha256').update(tok).digest();
21
+ // Spread the token across 4 dims using the first 8 hash bytes
22
+ for (let i = 0; i < 4; i++) {
23
+ const idx = h.readUInt16BE(i * 2) % DIMENSIONS;
24
+ out[idx] += 1;
25
+ }
26
+ }
27
+ // L2 normalise
28
+ let norm = 0;
29
+ for (let i = 0; i < DIMENSIONS; i++)
30
+ norm += out[i] * out[i];
31
+ norm = Math.sqrt(norm);
32
+ if (norm > 0) {
33
+ for (let i = 0; i < DIMENSIONS; i++)
34
+ out[i] /= norm;
35
+ }
36
+ return out;
37
+ }
38
+ // No-op — stub holds no resources.
39
+ async dispose() { }
40
+ }
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Embedder interface — per D010, NuVector does not generate embeddings;
3
+ * the consumer supplies them. The catalogue indexer ships its own
4
+ * embedder implementations and routes via this interface.
5
+ */
6
+ export interface Embedder {
7
+ embed(texts: string[]): Promise<Float32Array[]>;
8
+ /**
9
+ * Release any resources the embedder is holding. For local-inference
10
+ * embedders this unloads the model from RAM. For cloud embedders this
11
+ * is a no-op. Always called by the CLI at the end of an operation per
12
+ * the NuOS-wide unload-after-use commitment.
13
+ */
14
+ dispose(): Promise<void>;
15
+ readonly dimensions: number;
16
+ readonly modelId: string;
17
+ }
18
+ export type EmbedderName = 'ollama' | 'vertex' | 'openai' | 'stub';
19
+ export interface EmbedderConfig {
20
+ name: EmbedderName;
21
+ }
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Embedder interface — per D010, NuVector does not generate embeddings;
3
+ * the consumer supplies them. The catalogue indexer ships its own
4
+ * embedder implementations and routes via this interface.
5
+ */
6
+ export {};