@nusoft/nuos-build-catalogue 0.19.1 → 0.20.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -460,7 +460,17 @@ async function main() {
460
460
  // when the user switched machines and needs to pull the model
461
461
  // freshly. Same orchestrator that `init` calls internally.
462
462
  const { runLlmSetup } = await import('./setup/run-llm-setup.js');
463
+ const { ensureIndexBuilt } = await import('./setup/auto-index.js');
463
464
  const result = await runLlmSetup({ nonInteractive: false });
465
+ // After the LLM stack is ready, auto-build the search index when
466
+ // it isn't already present. Same helper init and install-protocols
467
+ // use — keeps the three commands aligned on "after this finishes
468
+ // the project is search-ready".
469
+ if (result.kind === 'already_ready' ||
470
+ result.kind === 'pulled_only' ||
471
+ result.kind === 'installed_and_pulled') {
472
+ await ensureIndexBuilt({});
473
+ }
464
474
  // Most failure paths emit guidance in-band; we exit non-zero only
465
475
  // when a pull actually failed (so CI scripting can branch on it).
466
476
  const exitCode = result.kind === 'pull_failed' || result.kind === 'install_failed' ? 1 : 0;
@@ -211,6 +211,7 @@ export async function cmdInit(prompt, options = {}) {
211
211
  // `nuos-catalogue setup-llm` later.
212
212
  if (!options.noLlm) {
213
213
  const { runLlmSetup } = await import('../setup/run-llm-setup.js');
214
+ const { ensureIndexBuilt } = await import('../setup/auto-index.js');
214
215
  await runLlmSetup({
215
216
  // The setup module writes its own progress directly to stderr; we
216
217
  // don't route through `prompt.print` because the in-place progress
@@ -224,6 +225,17 @@ export async function cmdInit(prompt, options = {}) {
224
225
  // so this is safe in unattended runs too.
225
226
  nonInteractive: false,
226
227
  });
228
+ // After LLM setup succeeds, auto-build the first search index. On a
229
+ // fresh project this is ~30s of starter-kit boilerplate; trivial,
230
+ // and finishing here means `search` works out of the box. When the
231
+ // LLM stack isn't ready, `ensureIndexBuilt` skips with a hint
232
+ // pointing back to setup-llm.
233
+ const indexResult = await ensureIndexBuilt({ cwd });
234
+ if (indexResult.kind === 'skipped_llm_not_ready') {
235
+ prompt.print('');
236
+ prompt.print(` · Skipping first-index build: ${indexResult.reason}.`);
237
+ prompt.print(` · ${indexResult.hint}`);
238
+ }
227
239
  }
228
240
  else {
229
241
  prompt.print('');
@@ -284,6 +296,13 @@ export async function cmdInstallProtocols(prompt, options = {}) {
284
296
  prompt.print('');
285
297
  prompt.print('Checking local semantic search (Ollama + qwen3-embedding:0.6b):');
286
298
  await reportLlmStatus((msg) => prompt.print(` ${msg}`));
299
+ // Auto-build/refresh the search index when the LLM is ready. The
300
+ // indexer is incremental via per-file SHA hashes: a no-change project
301
+ // takes ~1s, a project with N changed files takes O(N) embed calls.
302
+ // When the LLM stack isn't ready the helper skips silently — the
303
+ // status was already reported above by reportLlmStatus.
304
+ const { ensureIndexBuilt } = await import('../setup/auto-index.js');
305
+ await ensureIndexBuilt({ cwd });
287
306
  return { output: '', exitCode: 0 };
288
307
  }
289
308
  /**
@@ -0,0 +1,59 @@
1
+ /**
2
+ * Shared helper that runs the first search index build automatically
3
+ * from `init`, `install-protocols`, and `setup-llm`.
4
+ *
5
+ * Gated on the LLM stack being ready (Ollama + the configured embedding
6
+ * model). If the LLM isn't ready, this helper returns a `skipped_llm`
7
+ * result with a hint string the caller prints. The hint references
8
+ * `setup-llm` so the user has a clear path forward.
9
+ *
10
+ * Indexing on a fresh project takes ~30s — small enough that auto-
11
+ * running on first install is friendlier than asking. Subsequent calls
12
+ * are incremental via the per-file SHA hashes, so re-running on an
13
+ * existing index is cheap.
14
+ *
15
+ * @module setup/auto-index
16
+ */
17
+ /** Outcome of an auto-index attempt. */
18
+ export type AutoIndexResult =
19
+ /**
20
+ * The indexer ran. `indexed` includes both freshly-embedded files and
21
+ * re-embedded changed ones. `unchanged` is non-zero on subsequent
22
+ * runs — those files were SHA-matched and skipped without embedding.
23
+ */
24
+ {
25
+ kind: 'ran';
26
+ indexPath: string;
27
+ indexed: number;
28
+ unchanged: number;
29
+ chunks: number;
30
+ durationMs: number;
31
+ } | {
32
+ kind: 'skipped_llm_not_ready';
33
+ reason: string;
34
+ hint: string;
35
+ } | {
36
+ kind: 'skipped_no_catalogue';
37
+ } | {
38
+ kind: 'failed';
39
+ error: string;
40
+ };
41
+ export interface AutoIndexOptions {
42
+ /** Project root for path resolution. Defaults to `process.cwd()`. */
43
+ cwd?: string;
44
+ /** Output sink — defaults to process.stderr. */
45
+ out?: (text: string) => void;
46
+ /** Force a full reindex even if the index file already exists. */
47
+ force?: boolean;
48
+ }
49
+ /**
50
+ * Run the indexer when conditions allow. Always runs (the indexer is
51
+ * incremental — unchanged files are SHA-skipped without embedding work),
52
+ * so this both *creates* the index on first call and *refreshes* it on
53
+ * subsequent calls. Returns `skipped_llm_not_ready` with a hint when
54
+ * the Ollama probe fails — the caller prints the hint and the user runs
55
+ * `setup-llm` to fix things.
56
+ *
57
+ * Never throws on user-facing failures.
58
+ */
59
+ export declare function ensureIndexBuilt(opts?: AutoIndexOptions): Promise<AutoIndexResult>;
@@ -0,0 +1,138 @@
1
+ /**
2
+ * Shared helper that runs the first search index build automatically
3
+ * from `init`, `install-protocols`, and `setup-llm`.
4
+ *
5
+ * Gated on the LLM stack being ready (Ollama + the configured embedding
6
+ * model). If the LLM isn't ready, this helper returns a `skipped_llm`
7
+ * result with a hint string the caller prints. The hint references
8
+ * `setup-llm` so the user has a clear path forward.
9
+ *
10
+ * Indexing on a fresh project takes ~30s — small enough that auto-
11
+ * running on first install is friendlier than asking. Subsequent calls
12
+ * are incremental via the per-file SHA hashes, so re-running on an
13
+ * existing index is cheap.
14
+ *
15
+ * @module setup/auto-index
16
+ */
17
+ import { existsSync } from 'node:fs';
18
+ import { resolveBuildRoot, resolveCatalogueRoot, resolveHashPath, resolveIndexPath, } from '../path-resolution.js';
19
+ import { DEFAULT_OLLAMA_HOST, detectModelPresent, detectOllamaApi } from './ollama-detect.js';
20
+ import { DEFAULT_EMBEDDING_MODEL } from './run-llm-setup.js';
21
+ /**
22
+ * Run the indexer when conditions allow. Always runs (the indexer is
23
+ * incremental — unchanged files are SHA-skipped without embedding work),
24
+ * so this both *creates* the index on first call and *refreshes* it on
25
+ * subsequent calls. Returns `skipped_llm_not_ready` with a hint when
26
+ * the Ollama probe fails — the caller prints the hint and the user runs
27
+ * `setup-llm` to fix things.
28
+ *
29
+ * Never throws on user-facing failures.
30
+ */
31
+ export async function ensureIndexBuilt(opts = {}) {
32
+ const cwd = opts.cwd ?? process.cwd();
33
+ const out = opts.out ?? ((text) => process.stderr.write(text));
34
+ // Resolve where the index file lives without forcing the LLM stack to
35
+ // load — path resolution is cheap and offline. When the project has
36
+ // no `docs/build/` yet (e.g. install-protocols invoked in a non-
37
+ // scaffolded directory), resolveBuildRoot throws — we treat that as a
38
+ // silent no-op, since there is nothing meaningful to index.
39
+ const ctx = { cwd, env: process.env };
40
+ let buildRoot;
41
+ let catalogueRoot;
42
+ let indexPath;
43
+ let hashPath;
44
+ try {
45
+ buildRoot = resolveBuildRoot(undefined, ctx);
46
+ catalogueRoot = resolveCatalogueRoot(undefined, ctx);
47
+ indexPath = resolveIndexPath(buildRoot, undefined, ctx);
48
+ hashPath = resolveHashPath(buildRoot, undefined, ctx);
49
+ }
50
+ catch {
51
+ return { kind: 'skipped_no_catalogue' };
52
+ }
53
+ // We do not short-circuit on `existsSync(indexPath)` — the indexer is
54
+ // already incremental via the per-file SHA hash store, so running it
55
+ // when the index is up-to-date is cheap (~1s on a 270-file catalogue
56
+ // with no changes). Short-circuiting here would leave newer files
57
+ // un-embedded until the user ran `nuos-catalogue index` manually,
58
+ // which is exactly the discoverability gap the auto-index is meant to
59
+ // close.
60
+ // Probe the LLM stack — index requires Ollama + the model. If either
61
+ // is missing, skip with a hint pointing at setup-llm.
62
+ const apiHost = process.env.NUOS_CATALOGUE_OLLAMA_HOST ?? DEFAULT_OLLAMA_HOST;
63
+ const modelId = process.env.NUOS_CATALOGUE_OLLAMA_MODEL ?? DEFAULT_EMBEDDING_MODEL;
64
+ const api = await detectOllamaApi(apiHost);
65
+ if (!api.reachable) {
66
+ return {
67
+ kind: 'skipped_llm_not_ready',
68
+ reason: `Ollama is not running at ${apiHost}`,
69
+ hint: 'Run `nuos-catalogue setup-llm` to set up local semantic search, then re-run `nuos-catalogue index`.',
70
+ };
71
+ }
72
+ const model = await detectModelPresent(apiHost, modelId);
73
+ if (!model.present) {
74
+ return {
75
+ kind: 'skipped_llm_not_ready',
76
+ reason: `${modelId} is not pulled`,
77
+ hint: 'Run `nuos-catalogue setup-llm` to pull the embedding model (~600 MB), then re-run `nuos-catalogue index`.',
78
+ };
79
+ }
80
+ // LLM is ready. Run the indexer. The first run on a fresh project is
81
+ // ~30s of starter-kit content; subsequent runs are fast — the
82
+ // per-file SHA hashes mean unchanged files are skipped without
83
+ // embedding.
84
+ const isFirstRun = !existsSync(indexPath);
85
+ if (isFirstRun) {
86
+ out('Building search index for docs/build/ … (first run may take ~30 seconds)\n');
87
+ }
88
+ else {
89
+ out('Refreshing search index (incremental — only changed files are re-embedded)…\n');
90
+ }
91
+ try {
92
+ const { selectEmbedderFromEnv } = await import('../embedder/select.js');
93
+ const { openStore } = await import('../store/open.js');
94
+ const { runIndex } = await import('../indexer/upsert.js');
95
+ const embedder = await selectEmbedderFromEnv();
96
+ const store = await openStore({ storagePath: indexPath, dimensions: embedder.dimensions });
97
+ try {
98
+ const report = await runIndex({
99
+ catalogueRoot,
100
+ hashFilePath: hashPath,
101
+ store,
102
+ embedder,
103
+ force: Boolean(opts.force),
104
+ dryRun: false,
105
+ });
106
+ const changed = report.indexed + report.updated;
107
+ const secs = (report.durationMs / 1000).toFixed(1);
108
+ if (isFirstRun) {
109
+ out(`✓ Indexed ${report.indexed} file(s), ${report.chunks} chunks embedded in ${secs}s\n`);
110
+ }
111
+ else if (changed === 0) {
112
+ out(`✓ Index up-to-date (${report.unchanged} files checked, none changed) in ${secs}s\n`);
113
+ }
114
+ else {
115
+ out(`✓ Re-indexed ${changed} changed file(s) (${report.unchanged} unchanged), ` +
116
+ `${report.chunks} chunks embedded in ${secs}s\n`);
117
+ }
118
+ return {
119
+ kind: 'ran',
120
+ indexPath,
121
+ indexed: changed,
122
+ unchanged: report.unchanged,
123
+ chunks: report.chunks,
124
+ durationMs: report.durationMs,
125
+ };
126
+ }
127
+ finally {
128
+ // Unload-after-use commitment — embedder releases the model.
129
+ await embedder.dispose();
130
+ }
131
+ }
132
+ catch (err) {
133
+ const message = err instanceof Error ? err.message : String(err);
134
+ out(`\n✗ Index refresh failed: ${message}\n`);
135
+ out('Re-run `nuos-catalogue index` manually to retry.\n');
136
+ return { kind: 'failed', error: message };
137
+ }
138
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nusoft/nuos-build-catalogue",
3
- "version": "0.19.1",
3
+ "version": "0.20.1",
4
4
  "description": "NuOS build-catalogue tooling: semantic search (WU 110) + migration runner that lifts markdown artefacts into JSON-backed workflow records (WU 111, Phase G).",
5
5
  "type": "module",
6
6
  "bin": {