@nusoft/nuos-build-catalogue 0.19.1 → 0.20.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +10 -0
- package/dist/commands/init.js +19 -0
- package/dist/setup/auto-index.d.ts +59 -0
- package/dist/setup/auto-index.js +138 -0
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -460,7 +460,17 @@ async function main() {
|
|
|
460
460
|
// when the user switched machines and needs to pull the model
|
|
461
461
|
// freshly. Same orchestrator that `init` calls internally.
|
|
462
462
|
const { runLlmSetup } = await import('./setup/run-llm-setup.js');
|
|
463
|
+
const { ensureIndexBuilt } = await import('./setup/auto-index.js');
|
|
463
464
|
const result = await runLlmSetup({ nonInteractive: false });
|
|
465
|
+
// After the LLM stack is ready, auto-build the search index when
|
|
466
|
+
// it isn't already present. Same helper init and install-protocols
|
|
467
|
+
// use — keeps the three commands aligned on "after this finishes
|
|
468
|
+
// the project is search-ready".
|
|
469
|
+
if (result.kind === 'already_ready' ||
|
|
470
|
+
result.kind === 'pulled_only' ||
|
|
471
|
+
result.kind === 'installed_and_pulled') {
|
|
472
|
+
await ensureIndexBuilt({});
|
|
473
|
+
}
|
|
464
474
|
// Most failure paths emit guidance in-band; we exit non-zero only
|
|
465
475
|
// when a pull actually failed (so CI scripting can branch on it).
|
|
466
476
|
const exitCode = result.kind === 'pull_failed' || result.kind === 'install_failed' ? 1 : 0;
|
package/dist/commands/init.js
CHANGED
|
@@ -211,6 +211,7 @@ export async function cmdInit(prompt, options = {}) {
|
|
|
211
211
|
// `nuos-catalogue setup-llm` later.
|
|
212
212
|
if (!options.noLlm) {
|
|
213
213
|
const { runLlmSetup } = await import('../setup/run-llm-setup.js');
|
|
214
|
+
const { ensureIndexBuilt } = await import('../setup/auto-index.js');
|
|
214
215
|
await runLlmSetup({
|
|
215
216
|
// The setup module writes its own progress directly to stderr; we
|
|
216
217
|
// don't route through `prompt.print` because the in-place progress
|
|
@@ -224,6 +225,17 @@ export async function cmdInit(prompt, options = {}) {
|
|
|
224
225
|
// so this is safe in unattended runs too.
|
|
225
226
|
nonInteractive: false,
|
|
226
227
|
});
|
|
228
|
+
// After LLM setup succeeds, auto-build the first search index. On a
|
|
229
|
+
// fresh project this is ~30s of starter-kit boilerplate; trivial,
|
|
230
|
+
// and finishing here means `search` works out of the box. When the
|
|
231
|
+
// LLM stack isn't ready, `ensureIndexBuilt` skips with a hint
|
|
232
|
+
// pointing back to setup-llm.
|
|
233
|
+
const indexResult = await ensureIndexBuilt({ cwd });
|
|
234
|
+
if (indexResult.kind === 'skipped_llm_not_ready') {
|
|
235
|
+
prompt.print('');
|
|
236
|
+
prompt.print(` · Skipping first-index build: ${indexResult.reason}.`);
|
|
237
|
+
prompt.print(` · ${indexResult.hint}`);
|
|
238
|
+
}
|
|
227
239
|
}
|
|
228
240
|
else {
|
|
229
241
|
prompt.print('');
|
|
@@ -284,6 +296,13 @@ export async function cmdInstallProtocols(prompt, options = {}) {
|
|
|
284
296
|
prompt.print('');
|
|
285
297
|
prompt.print('Checking local semantic search (Ollama + qwen3-embedding:0.6b):');
|
|
286
298
|
await reportLlmStatus((msg) => prompt.print(` ${msg}`));
|
|
299
|
+
// Auto-build/refresh the search index when the LLM is ready. The
|
|
300
|
+
// indexer is incremental via per-file SHA hashes: a no-change project
|
|
301
|
+
// takes ~1s, a project with N changed files takes O(N) embed calls.
|
|
302
|
+
// When the LLM stack isn't ready the helper skips silently — the
|
|
303
|
+
// status was already reported above by reportLlmStatus.
|
|
304
|
+
const { ensureIndexBuilt } = await import('../setup/auto-index.js');
|
|
305
|
+
await ensureIndexBuilt({ cwd });
|
|
287
306
|
return { output: '', exitCode: 0 };
|
|
288
307
|
}
|
|
289
308
|
/**
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared helper that runs the first search index build automatically
|
|
3
|
+
* from `init`, `install-protocols`, and `setup-llm`.
|
|
4
|
+
*
|
|
5
|
+
* Gated on the LLM stack being ready (Ollama + the configured embedding
|
|
6
|
+
* model). If the LLM isn't ready, this helper returns a `skipped_llm`
|
|
7
|
+
* result with a hint string the caller prints. The hint references
|
|
8
|
+
* `setup-llm` so the user has a clear path forward.
|
|
9
|
+
*
|
|
10
|
+
* Indexing on a fresh project takes ~30s — small enough that auto-
|
|
11
|
+
* running on first install is friendlier than asking. Subsequent calls
|
|
12
|
+
* are incremental via the per-file SHA hashes, so re-running on an
|
|
13
|
+
* existing index is cheap.
|
|
14
|
+
*
|
|
15
|
+
* @module setup/auto-index
|
|
16
|
+
*/
|
|
17
|
+
/** Outcome of an auto-index attempt. */
|
|
18
|
+
export type AutoIndexResult =
|
|
19
|
+
/**
|
|
20
|
+
* The indexer ran. `indexed` includes both freshly-embedded files and
|
|
21
|
+
* re-embedded changed ones. `unchanged` is non-zero on subsequent
|
|
22
|
+
* runs — those files were SHA-matched and skipped without embedding.
|
|
23
|
+
*/
|
|
24
|
+
{
|
|
25
|
+
kind: 'ran';
|
|
26
|
+
indexPath: string;
|
|
27
|
+
indexed: number;
|
|
28
|
+
unchanged: number;
|
|
29
|
+
chunks: number;
|
|
30
|
+
durationMs: number;
|
|
31
|
+
} | {
|
|
32
|
+
kind: 'skipped_llm_not_ready';
|
|
33
|
+
reason: string;
|
|
34
|
+
hint: string;
|
|
35
|
+
} | {
|
|
36
|
+
kind: 'skipped_no_catalogue';
|
|
37
|
+
} | {
|
|
38
|
+
kind: 'failed';
|
|
39
|
+
error: string;
|
|
40
|
+
};
|
|
41
|
+
export interface AutoIndexOptions {
|
|
42
|
+
/** Project root for path resolution. Defaults to `process.cwd()`. */
|
|
43
|
+
cwd?: string;
|
|
44
|
+
/** Output sink — defaults to process.stderr. */
|
|
45
|
+
out?: (text: string) => void;
|
|
46
|
+
/** Force a full reindex even if the index file already exists. */
|
|
47
|
+
force?: boolean;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Run the indexer when conditions allow. Always runs (the indexer is
|
|
51
|
+
* incremental — unchanged files are SHA-skipped without embedding work),
|
|
52
|
+
* so this both *creates* the index on first call and *refreshes* it on
|
|
53
|
+
* subsequent calls. Returns `skipped_llm_not_ready` with a hint when
|
|
54
|
+
* the Ollama probe fails — the caller prints the hint and the user runs
|
|
55
|
+
* `setup-llm` to fix things.
|
|
56
|
+
*
|
|
57
|
+
* Never throws on user-facing failures.
|
|
58
|
+
*/
|
|
59
|
+
export declare function ensureIndexBuilt(opts?: AutoIndexOptions): Promise<AutoIndexResult>;
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared helper that runs the first search index build automatically
|
|
3
|
+
* from `init`, `install-protocols`, and `setup-llm`.
|
|
4
|
+
*
|
|
5
|
+
* Gated on the LLM stack being ready (Ollama + the configured embedding
|
|
6
|
+
* model). If the LLM isn't ready, this helper returns a `skipped_llm`
|
|
7
|
+
* result with a hint string the caller prints. The hint references
|
|
8
|
+
* `setup-llm` so the user has a clear path forward.
|
|
9
|
+
*
|
|
10
|
+
* Indexing on a fresh project takes ~30s — small enough that auto-
|
|
11
|
+
* running on first install is friendlier than asking. Subsequent calls
|
|
12
|
+
* are incremental via the per-file SHA hashes, so re-running on an
|
|
13
|
+
* existing index is cheap.
|
|
14
|
+
*
|
|
15
|
+
* @module setup/auto-index
|
|
16
|
+
*/
|
|
17
|
+
import { existsSync } from 'node:fs';
|
|
18
|
+
import { resolveBuildRoot, resolveCatalogueRoot, resolveHashPath, resolveIndexPath, } from '../path-resolution.js';
|
|
19
|
+
import { DEFAULT_OLLAMA_HOST, detectModelPresent, detectOllamaApi } from './ollama-detect.js';
|
|
20
|
+
import { DEFAULT_EMBEDDING_MODEL } from './run-llm-setup.js';
|
|
21
|
+
/**
|
|
22
|
+
* Run the indexer when conditions allow. Always runs (the indexer is
|
|
23
|
+
* incremental — unchanged files are SHA-skipped without embedding work),
|
|
24
|
+
* so this both *creates* the index on first call and *refreshes* it on
|
|
25
|
+
* subsequent calls. Returns `skipped_llm_not_ready` with a hint when
|
|
26
|
+
* the Ollama probe fails — the caller prints the hint and the user runs
|
|
27
|
+
* `setup-llm` to fix things.
|
|
28
|
+
*
|
|
29
|
+
* Never throws on user-facing failures.
|
|
30
|
+
*/
|
|
31
|
+
export async function ensureIndexBuilt(opts = {}) {
|
|
32
|
+
const cwd = opts.cwd ?? process.cwd();
|
|
33
|
+
const out = opts.out ?? ((text) => process.stderr.write(text));
|
|
34
|
+
// Resolve where the index file lives without forcing the LLM stack to
|
|
35
|
+
// load — path resolution is cheap and offline. When the project has
|
|
36
|
+
// no `docs/build/` yet (e.g. install-protocols invoked in a non-
|
|
37
|
+
// scaffolded directory), resolveBuildRoot throws — we treat that as a
|
|
38
|
+
// silent no-op, since there is nothing meaningful to index.
|
|
39
|
+
const ctx = { cwd, env: process.env };
|
|
40
|
+
let buildRoot;
|
|
41
|
+
let catalogueRoot;
|
|
42
|
+
let indexPath;
|
|
43
|
+
let hashPath;
|
|
44
|
+
try {
|
|
45
|
+
buildRoot = resolveBuildRoot(undefined, ctx);
|
|
46
|
+
catalogueRoot = resolveCatalogueRoot(undefined, ctx);
|
|
47
|
+
indexPath = resolveIndexPath(buildRoot, undefined, ctx);
|
|
48
|
+
hashPath = resolveHashPath(buildRoot, undefined, ctx);
|
|
49
|
+
}
|
|
50
|
+
catch {
|
|
51
|
+
return { kind: 'skipped_no_catalogue' };
|
|
52
|
+
}
|
|
53
|
+
// We do not short-circuit on `existsSync(indexPath)` — the indexer is
|
|
54
|
+
// already incremental via the per-file SHA hash store, so running it
|
|
55
|
+
// when the index is up-to-date is cheap (~1s on a 270-file catalogue
|
|
56
|
+
// with no changes). Short-circuiting here would leave newer files
|
|
57
|
+
// un-embedded until the user ran `nuos-catalogue index` manually,
|
|
58
|
+
// which is exactly the discoverability gap the auto-index is meant to
|
|
59
|
+
// close.
|
|
60
|
+
// Probe the LLM stack — index requires Ollama + the model. If either
|
|
61
|
+
// is missing, skip with a hint pointing at setup-llm.
|
|
62
|
+
const apiHost = process.env.NUOS_CATALOGUE_OLLAMA_HOST ?? DEFAULT_OLLAMA_HOST;
|
|
63
|
+
const modelId = process.env.NUOS_CATALOGUE_OLLAMA_MODEL ?? DEFAULT_EMBEDDING_MODEL;
|
|
64
|
+
const api = await detectOllamaApi(apiHost);
|
|
65
|
+
if (!api.reachable) {
|
|
66
|
+
return {
|
|
67
|
+
kind: 'skipped_llm_not_ready',
|
|
68
|
+
reason: `Ollama is not running at ${apiHost}`,
|
|
69
|
+
hint: 'Run `nuos-catalogue setup-llm` to set up local semantic search, then re-run `nuos-catalogue index`.',
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
const model = await detectModelPresent(apiHost, modelId);
|
|
73
|
+
if (!model.present) {
|
|
74
|
+
return {
|
|
75
|
+
kind: 'skipped_llm_not_ready',
|
|
76
|
+
reason: `${modelId} is not pulled`,
|
|
77
|
+
hint: 'Run `nuos-catalogue setup-llm` to pull the embedding model (~600 MB), then re-run `nuos-catalogue index`.',
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
// LLM is ready. Run the indexer. The first run on a fresh project is
|
|
81
|
+
// ~30s of starter-kit content; subsequent runs are fast — the
|
|
82
|
+
// per-file SHA hashes mean unchanged files are skipped without
|
|
83
|
+
// embedding.
|
|
84
|
+
const isFirstRun = !existsSync(indexPath);
|
|
85
|
+
if (isFirstRun) {
|
|
86
|
+
out('Building search index for docs/build/ … (first run may take ~30 seconds)\n');
|
|
87
|
+
}
|
|
88
|
+
else {
|
|
89
|
+
out('Refreshing search index (incremental — only changed files are re-embedded)…\n');
|
|
90
|
+
}
|
|
91
|
+
try {
|
|
92
|
+
const { selectEmbedderFromEnv } = await import('../embedder/select.js');
|
|
93
|
+
const { openStore } = await import('../store/open.js');
|
|
94
|
+
const { runIndex } = await import('../indexer/upsert.js');
|
|
95
|
+
const embedder = await selectEmbedderFromEnv();
|
|
96
|
+
const store = await openStore({ storagePath: indexPath, dimensions: embedder.dimensions });
|
|
97
|
+
try {
|
|
98
|
+
const report = await runIndex({
|
|
99
|
+
catalogueRoot,
|
|
100
|
+
hashFilePath: hashPath,
|
|
101
|
+
store,
|
|
102
|
+
embedder,
|
|
103
|
+
force: Boolean(opts.force),
|
|
104
|
+
dryRun: false,
|
|
105
|
+
});
|
|
106
|
+
const changed = report.indexed + report.updated;
|
|
107
|
+
const secs = (report.durationMs / 1000).toFixed(1);
|
|
108
|
+
if (isFirstRun) {
|
|
109
|
+
out(`✓ Indexed ${report.indexed} file(s), ${report.chunks} chunks embedded in ${secs}s\n`);
|
|
110
|
+
}
|
|
111
|
+
else if (changed === 0) {
|
|
112
|
+
out(`✓ Index up-to-date (${report.unchanged} files checked, none changed) in ${secs}s\n`);
|
|
113
|
+
}
|
|
114
|
+
else {
|
|
115
|
+
out(`✓ Re-indexed ${changed} changed file(s) (${report.unchanged} unchanged), ` +
|
|
116
|
+
`${report.chunks} chunks embedded in ${secs}s\n`);
|
|
117
|
+
}
|
|
118
|
+
return {
|
|
119
|
+
kind: 'ran',
|
|
120
|
+
indexPath,
|
|
121
|
+
indexed: changed,
|
|
122
|
+
unchanged: report.unchanged,
|
|
123
|
+
chunks: report.chunks,
|
|
124
|
+
durationMs: report.durationMs,
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
finally {
|
|
128
|
+
// Unload-after-use commitment — embedder releases the model.
|
|
129
|
+
await embedder.dispose();
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
catch (err) {
|
|
133
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
134
|
+
out(`\n✗ Index refresh failed: ${message}\n`);
|
|
135
|
+
out('Re-run `nuos-catalogue index` manually to retry.\n');
|
|
136
|
+
return { kind: 'failed', error: message };
|
|
137
|
+
}
|
|
138
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@nusoft/nuos-build-catalogue",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.20.1",
|
|
4
4
|
"description": "NuOS build-catalogue tooling: semantic search (WU 110) + migration runner that lifts markdown artefacts into JSON-backed workflow records (WU 111, Phase G).",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|