ollama-intern-mcp 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -9
- package/SECURITY.md +4 -2
- package/dist/corpus/indexer.d.ts +22 -0
- package/dist/corpus/indexer.d.ts.map +1 -1
- package/dist/corpus/indexer.js +53 -6
- package/dist/corpus/indexer.js.map +1 -1
- package/dist/corpus/manifest.d.ts +6 -0
- package/dist/corpus/manifest.d.ts.map +1 -1
- package/dist/corpus/manifest.js +71 -2
- package/dist/corpus/manifest.js.map +1 -1
- package/dist/corpus/storage.d.ts +35 -0
- package/dist/corpus/storage.d.ts.map +1 -1
- package/dist/corpus/storage.js +148 -3
- package/dist/corpus/storage.js.map +1 -1
- package/dist/guardrails/compileCheck.js +4 -0
- package/dist/guardrails/compileCheck.js.map +1 -1
- package/dist/guardrails/timeouts.d.ts +6 -0
- package/dist/guardrails/timeouts.d.ts.map +1 -1
- package/dist/guardrails/timeouts.js +18 -1
- package/dist/guardrails/timeouts.js.map +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +25 -6
- package/dist/index.js.map +1 -1
- package/dist/observability.d.ts +19 -0
- package/dist/observability.d.ts.map +1 -1
- package/dist/observability.js +25 -3
- package/dist/observability.js.map +1 -1
- package/dist/ollama.d.ts +2 -0
- package/dist/ollama.d.ts.map +1 -1
- package/dist/ollama.js +36 -1
- package/dist/ollama.js.map +1 -1
- package/dist/prewarm.d.ts +10 -0
- package/dist/prewarm.d.ts.map +1 -1
- package/dist/prewarm.js +64 -34
- package/dist/prewarm.js.map +1 -1
- package/dist/profiles.d.ts +16 -0
- package/dist/profiles.d.ts.map +1 -1
- package/dist/profiles.js +26 -0
- package/dist/profiles.js.map +1 -1
- package/dist/semaphore.d.ts +21 -0
- package/dist/semaphore.d.ts.map +1 -1
- package/dist/semaphore.js +36 -0
- package/dist/semaphore.js.map +1 -1
- package/dist/tools/artifacts/export.d.ts.map +1 -1
- package/dist/tools/artifacts/export.js +16 -3
- package/dist/tools/artifacts/export.js.map +1 -1
- package/dist/tools/artifacts/scan.d.ts.map +1 -1
- package/dist/tools/artifacts/scan.js +24 -3
- package/dist/tools/artifacts/scan.js.map +1 -1
- package/dist/tools/corpusAnswer.d.ts.map +1 -1
- package/dist/tools/corpusAnswer.js +8 -2
- package/dist/tools/corpusAnswer.js.map +1 -1
- package/dist/tools/summarizeDeep.d.ts.map +1 -1
- package/dist/tools/summarizeDeep.js +2 -1
- package/dist/tools/summarizeDeep.js.map +1 -1
- package/dist/tools/triageLogs.d.ts.map +1 -1
- package/dist/tools/triageLogs.js +20 -0
- package/dist/tools/triageLogs.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
|
|
18
18
|
An MCP server that gives Claude Code a **local intern** with rules, tiers, a desk, and a filing cabinet. Claude picks the _tool_; the tool picks the _tier_ (Instant / Workhorse / Deep / Embed); the tier writes a file you can open next week.
|
|
19
19
|
|
|
20
|
-
**Also drives [Hermes Agent](https://github.com/NousResearch/
|
|
20
|
+
**Also drives [Hermes Agent](https://github.com/NousResearch/hermes-agent) on `hermes3:8b`** — validated end-to-end 2026-04-19. The default ladder is `hermes3:8b`; `qwen3:*` is the alternate rail. See [Use with Hermes](#use-with-hermes) below.
|
|
21
21
|
|
|
22
22
|
No cloud. No telemetry. No "autonomous" anything. Every call shows its work.
|
|
23
23
|
|
|
@@ -87,13 +87,11 @@ The full tool reference lives in the [handbook](https://mcp-tool-shop-org.github
|
|
|
87
87
|
|
|
88
88
|
## Install
|
|
89
89
|
|
|
90
|
-
|
|
91
|
-
npm install -g ollama-intern-mcp
|
|
92
|
-
```
|
|
90
|
+
Requires [Ollama](https://ollama.com) running locally and the tier models pulled (see [Model pulls](#model-pulls) below).
|
|
93
91
|
|
|
94
|
-
|
|
92
|
+
### Claude Code (recommended)
|
|
95
93
|
|
|
96
|
-
|
|
94
|
+
Most users install this by adding it to their Claude Code MCP server config — no global install required. Claude Code runs the server on demand via `npx`:
|
|
97
95
|
|
|
98
96
|
```json
|
|
99
97
|
{
|
|
@@ -114,9 +112,17 @@ Requires [Ollama](https://ollama.com) running locally and the tier models pulled
|
|
|
114
112
|
|
|
115
113
|
Same block, written to `~/Library/Application Support/Claude/claude_desktop_config.json` (macOS) or `%APPDATA%\Claude\claude_desktop_config.json` (Windows).
|
|
116
114
|
|
|
115
|
+
### Global install (advanced)
|
|
116
|
+
|
|
117
|
+
Only needed if you want the binary on your `PATH` for ad-hoc use outside Claude Code:
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
npm install -g ollama-intern-mcp
|
|
121
|
+
```
|
|
122
|
+
|
|
117
123
|
### Use with Hermes
|
|
118
124
|
|
|
119
|
-
This MCP was validated end-to-end with [Hermes Agent](https://github.com/NousResearch/
|
|
125
|
+
This MCP was validated end-to-end with [Hermes Agent](https://github.com/NousResearch/hermes-agent) against `hermes3:8b` on Ollama (2026-04-19). Hermes is an external agent that *calls into* this MCP's frozen primitive surface — it does the planning, we do the work.
|
|
120
126
|
|
|
121
127
|
Reference config ([hermes.config.example.yaml](hermes.config.example.yaml) in this repo):
|
|
122
128
|
|
|
@@ -273,7 +279,7 @@ Built to the [Shipcheck](https://github.com/mcp-tool-shop-org/shipcheck) bar. Ha
|
|
|
273
279
|
- **A. Security** — SECURITY.md, threat model, no telemetry, path-safety, `confirm_write` on protected paths
|
|
274
280
|
- **B. Errors** — structured shape across all tool results; no raw stacks
|
|
275
281
|
- **C. Docs** — README current, CHANGELOG, LICENSE; tool schemas self-document
|
|
276
|
-
- **D. Hygiene** — `npm run verify` (
|
|
282
|
+
- **D. Hygiene** — `npm run verify` (481 tests), CI with dep scanning, Dependabot, lockfile, `engines.node`
|
|
277
283
|
|
|
278
284
|
---
|
|
279
285
|
|
|
@@ -282,7 +288,7 @@ Built to the [Shipcheck](https://github.com/mcp-tool-shop-org/shipcheck) bar. Ha
|
|
|
282
288
|
- **Phase 1 — Delegation Spine** ✓ shipped: atom surface, uniform envelope, tiered routing, guardrails
|
|
283
289
|
- **Phase 2 — Truth Spine** ✓ shipped: schema v2 chunking, BM25 + RRF, living corpora, evidence-backed briefs, retrieval eval pack
|
|
284
290
|
- **Phase 3 — Pack & Artifact Spine** ✓ shipped: fixed-pipeline packs with durable artifacts + continuity tier
|
|
285
|
-
- **Phase 4 — Adoption Spine**
|
|
291
|
+
- **Phase 4 — Adoption Spine** ✓ v2.0.1: three-stage health pass hardened corpus (TOCTOU, 50 MB file cap, symlink rejection, atomic writes, per-file failure capture), tool path traversal, observability (semaphore wait events, timeout error context, profile env-override logging, prewarm cold-start signal), test safety (module-load env snapshot across 10 files, `tools/call` E2E). Troubleshooting handbook + hardware minimums added for operators.
|
|
286
292
|
- **Phase 5 — M5 Max benchmarks** — publishable numbers once the hardware lands (~2026-04-24)
|
|
287
293
|
|
|
288
294
|
Phase by hardening layer. The atom/pack/artifact surface stays frozen.
|
package/SECURITY.md
CHANGED
|
@@ -13,9 +13,11 @@ The primary risks are not network-facing. They are:
|
|
|
13
13
|
|
|
14
14
|
## Reporting
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
Please do **not** file public issues for security bugs.
|
|
17
17
|
|
|
18
|
-
|
|
18
|
+
Open a private [security advisory](https://github.com/mcp-tool-shop-org/ollama-intern-mcp/security/advisories/new) via the **Security** tab on this repo. The advisory stays private until a fix is ready. We will acknowledge within 72 hours.
|
|
19
|
+
|
|
20
|
+
The repo is owned by **mcp-tool-shop-org**; advisories route to the org maintainers.
|
|
19
21
|
|
|
20
22
|
## Supported Versions
|
|
21
23
|
|
package/dist/corpus/indexer.d.ts
CHANGED
|
@@ -17,6 +17,21 @@ export interface IndexParams {
|
|
|
17
17
|
chunk_chars?: number;
|
|
18
18
|
chunk_overlap?: number;
|
|
19
19
|
client: OllamaClient;
|
|
20
|
+
/**
|
|
21
|
+
* Optional progress callback invoked after each input file is processed
|
|
22
|
+
* (success OR failure). `done` counts files that have been handled,
|
|
23
|
+
* `total` is params.paths.length, `currentPath` is the path just
|
|
24
|
+
* processed. Safe to ignore — purely observability. A second callback
|
|
25
|
+
* fires after each embed batch with done=total+batchIdx, so callers can
|
|
26
|
+
* see embed progress too; MCP tool layer can filter on `currentPath`
|
|
27
|
+
* starting with "embed:" to distinguish.
|
|
28
|
+
*/
|
|
29
|
+
onProgress?: (done: number, total: number, currentPath: string) => void;
|
|
30
|
+
}
|
|
31
|
+
/** One entry per path that could not be read/hashed during indexing. */
|
|
32
|
+
export interface IndexFailedPath {
|
|
33
|
+
path: string;
|
|
34
|
+
reason: string;
|
|
20
35
|
}
|
|
21
36
|
export interface IndexReport {
|
|
22
37
|
name: string;
|
|
@@ -35,6 +50,13 @@ export interface IndexReport {
|
|
|
35
50
|
* silent :latest drift.
|
|
36
51
|
*/
|
|
37
52
|
embed_model_resolved: string | null;
|
|
53
|
+
/**
|
|
54
|
+
* Paths that could not be read (size cap, symlink, permission denied,
|
|
55
|
+
* TOCTOU, etc.) during this index run. Indexing continues past these so
|
|
56
|
+
* one bad file in a batch of 1000 no longer halts the whole pass. Empty
|
|
57
|
+
* array on the happy path.
|
|
58
|
+
*/
|
|
59
|
+
failed_paths: IndexFailedPath[];
|
|
38
60
|
}
|
|
39
61
|
export declare function indexCorpus(params: IndexParams): Promise<IndexReport>;
|
|
40
62
|
//# sourceMappingURL=indexer.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"indexer.d.ts","sourceRoot":"","sources":["../../src/corpus/indexer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAKH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"indexer.d.ts","sourceRoot":"","sources":["../../src/corpus/indexer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAKH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAUjD,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,MAAM,EAAE,YAAY,CAAC;IACrB;;;;;;;;OAQG;IACH,UAAU,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,KAAK,IAAI,CAAC;CACzE;AAED,wEAAwE;AACxE,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;IACtB,qBAAqB,EAAE,MAAM,CAAC;IAC9B,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,UAAU,EAAE,MAAM,CAAC;IACnB;;;;;OAKG;IACH,oBAAoB,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC;;;;;OAKG;IACH,YAAY,EAAE,eAAe,EAAE,CAAC;CACjC;AAiDD,wBAAsB,WAAW,CAAC,MAAM,EAAE,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC,CAoO3E"}
|
package/dist/corpus/indexer.js
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
* This means `index` can be called repeatedly in daily use without
|
|
10
10
|
* burning the embed tier on unchanged content.
|
|
11
11
|
*/
|
|
12
|
-
import { readFile, stat } from "node:fs/promises";
|
|
12
|
+
import { readFile, stat, realpath, lstat } from "node:fs/promises";
|
|
13
13
|
import { resolve } from "node:path";
|
|
14
14
|
import { createHash } from "node:crypto";
|
|
15
15
|
import { chunkDocument, DEFAULT_CHUNK } from "./chunker.js";
|
|
@@ -17,11 +17,38 @@ import { CORPUS_SCHEMA_VERSION, loadCorpus, saveCorpus } from "./storage.js";
|
|
|
17
17
|
import { MANIFEST_SCHEMA_VERSION, loadManifest, saveManifest } from "./manifest.js";
|
|
18
18
|
import { InternError } from "../errors.js";
|
|
19
19
|
const EMBED_BATCH = 64;
|
|
20
|
+
/** Hard cap on input file size. Prevents OOM from a user pointing at a 100GB file. */
|
|
21
|
+
const MAX_FILE_BYTES = 50 * 1024 * 1024;
|
|
22
|
+
/**
|
|
23
|
+
* Read a file and hash it with TOCTOU protection.
|
|
24
|
+
*
|
|
25
|
+
* Invariant: a successful return means "the file was in exactly this state
|
|
26
|
+
* (size + mtime) when we hashed it". We stat BEFORE read (to enforce size
|
|
27
|
+
* cap and symlink rejection without reading bytes first), then stat AGAIN
|
|
28
|
+
* after read and fail if size or mtime drifted — that means the file
|
|
29
|
+
* mutated mid-read and the hash doesn't match the returned content.
|
|
30
|
+
*/
|
|
20
31
|
async function sha256File(path) {
|
|
21
|
-
|
|
32
|
+
// Symlink check FIRST — reject before any size/read work so a symlink to
|
|
33
|
+
// a sensitive file can't leak even partial bytes via error messages.
|
|
34
|
+
const lst = await lstat(path);
|
|
35
|
+
if (lst.isSymbolicLink()) {
|
|
36
|
+
throw new InternError("SOURCE_PATH_NOT_FOUND", `Refusing to index symlink: ${path}`, "Pass the real file path, not a symlink. Symlinks are rejected to avoid traversal into unintended targets.", false);
|
|
37
|
+
}
|
|
38
|
+
// Resolve to a real path as a belt-and-suspenders check against
|
|
39
|
+
// intermediate symlinked directories.
|
|
40
|
+
const realPath = await realpath(path);
|
|
41
|
+
const stBefore = await stat(realPath);
|
|
42
|
+
if (stBefore.size > MAX_FILE_BYTES) {
|
|
43
|
+
throw new InternError("SOURCE_PATH_NOT_FOUND", `File exceeds max size (${stBefore.size} bytes > ${MAX_FILE_BYTES} bytes cap): ${path}`, `Split the file or raise the cap. The 50MB limit exists to prevent OOM from a user pointing at a huge file.`, false);
|
|
44
|
+
}
|
|
45
|
+
const content = await readFile(realPath, "utf8");
|
|
22
46
|
const hash = "sha256:" + createHash("sha256").update(content).digest("hex");
|
|
23
|
-
const
|
|
24
|
-
|
|
47
|
+
const stAfter = await stat(realPath);
|
|
48
|
+
if (stAfter.size !== stBefore.size || stAfter.mtimeMs !== stBefore.mtimeMs) {
|
|
49
|
+
throw new InternError("SOURCE_PATH_NOT_FOUND", `File mutated during read (TOCTOU): ${path}`, "Another process wrote to the file while we were hashing it. Re-run the index.", true);
|
|
50
|
+
}
|
|
51
|
+
return { hash, mtime: stBefore.mtime.toISOString(), content };
|
|
25
52
|
}
|
|
26
53
|
export async function indexCorpus(params) {
|
|
27
54
|
const t0 = Date.now();
|
|
@@ -46,6 +73,11 @@ export async function indexCorpus(params) {
|
|
|
46
73
|
throw err;
|
|
47
74
|
}
|
|
48
75
|
}
|
|
76
|
+
// Refuse silent embed-model mismatch. Mixing vectors from different
|
|
77
|
+
// embed models in one corpus ruins search — the space isn't shared.
|
|
78
|
+
if (existing && existing.model_version !== params.model) {
|
|
79
|
+
throw new InternError("SCHEMA_INVALID", `Corpus "${params.name}" was indexed with embed model "${existing.model_version}"; refusing to re-index with "${params.model}".`, `Re-index with the original model, or pass a different corpus name to keep the new model isolated.`, false);
|
|
80
|
+
}
|
|
49
81
|
const reusable = new Map();
|
|
50
82
|
if (existing && existing.model_version === params.model) {
|
|
51
83
|
for (const c of existing.chunks) {
|
|
@@ -70,6 +102,7 @@ export async function indexCorpus(params) {
|
|
|
70
102
|
// Pass 1: read + hash every input, reuse where possible, collect chunks to embed.
|
|
71
103
|
const toEmbedTexts = [];
|
|
72
104
|
const toEmbedMeta = [];
|
|
105
|
+
const failedPaths = [];
|
|
73
106
|
for (const rawPath of params.paths) {
|
|
74
107
|
const absPath = resolve(rawPath);
|
|
75
108
|
seenPaths.add(absPath);
|
|
@@ -78,7 +111,14 @@ export async function indexCorpus(params) {
|
|
|
78
111
|
fileInfo = await sha256File(absPath);
|
|
79
112
|
}
|
|
80
113
|
catch (err) {
|
|
81
|
-
|
|
114
|
+
// Stage C humanization: capture per-file failure and continue so one
|
|
115
|
+
// bad file in a batch of 1000 does not halt the whole pass. Caller
|
|
116
|
+
// sees failed_paths in the report.
|
|
117
|
+
failedPaths.push({
|
|
118
|
+
path: rawPath,
|
|
119
|
+
reason: err.message ?? String(err),
|
|
120
|
+
});
|
|
121
|
+
continue;
|
|
82
122
|
}
|
|
83
123
|
totalChars += fileInfo.content.length;
|
|
84
124
|
const reuseKey = `${absPath}::${fileInfo.hash}`;
|
|
@@ -123,8 +163,14 @@ export async function indexCorpus(params) {
|
|
|
123
163
|
}
|
|
124
164
|
for (let j = 0; j < batch.length; j++) {
|
|
125
165
|
const meta = toEmbedMeta[i + j];
|
|
166
|
+
// ID is stable per (content-hash, chunk_index): re-indexing the
|
|
167
|
+
// same content produces the same chunk IDs, and a content change
|
|
168
|
+
// flips the hash so IDs can't collide across runs. Width of 6
|
|
169
|
+
// hex on the index is still >16M per file, but it's now scoped
|
|
170
|
+
// to file+content not to global run order.
|
|
171
|
+
const hashShort = meta.file_hash.replace(/^sha256:/, "").slice(0, 8);
|
|
126
172
|
allChunks.push({
|
|
127
|
-
id: `${params.name}-${
|
|
173
|
+
id: `${params.name}-${hashShort}-${meta.chunk_index.toString(16).padStart(6, "0")}`,
|
|
128
174
|
path: meta.path,
|
|
129
175
|
file_hash: meta.file_hash,
|
|
130
176
|
file_mtime: meta.file_mtime,
|
|
@@ -208,6 +254,7 @@ export async function indexCorpus(params) {
|
|
|
208
254
|
dropped_files: droppedFiles,
|
|
209
255
|
elapsed_ms: Date.now() - t0,
|
|
210
256
|
embed_model_resolved: embedModelResolved,
|
|
257
|
+
failed_paths: failedPaths,
|
|
211
258
|
};
|
|
212
259
|
}
|
|
213
260
|
//# sourceMappingURL=indexer.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"indexer.js","sourceRoot":"","sources":["../../src/corpus/indexer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;
|
|
1
|
+
{"version":3,"file":"indexer.js","sourceRoot":"","sources":["../../src/corpus/indexer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AACnE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC,OAAO,EAAE,aAAa,EAAE,aAAa,EAAqC,MAAM,cAAc,CAAC;AAC/F,OAAO,EAAE,qBAAqB,EAAE,UAAU,EAAE,UAAU,EAAqC,MAAM,cAAc,CAAC;AAChH,OAAO,EAAE,uBAAuB,EAAE,YAAY,EAAE,YAAY,EAAuB,MAAM,eAAe,CAAC;AACzG,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAE3C,MAAM,WAAW,GAAG,EAAE,CAAC;AACvB,sFAAsF;AACtF,MAAM,cAAc,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC;AAqDxC;;;;;;;;GAQG;AACH,KAAK,UAAU,UAAU,CAAC,IAAY;IACpC,yEAAyE;IACzE,qEAAqE;IACrE,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC;IAC9B,IAAI,GAAG,CAAC,cAAc,EAAE,EAAE,CAAC;QACzB,MAAM,IAAI,WAAW,CACnB,uBAAuB,EACvB,8BAA8B,IAAI,EAAE,EACpC,2GAA2G,EAC3G,KAAK,CACN,CAAC;IACJ,CAAC;IACD,gEAAgE;IAChE,sCAAsC;IACtC,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,CAAC;IACtC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,CAAC;IACtC,IAAI,QAAQ,CAAC,IAAI,GAAG,cAAc,EAAE,CAAC;QACnC,MAAM,IAAI,WAAW,CACnB,uBAAuB,EACvB,0BAA0B,QAAQ,CAAC,IAAI,YAAY,cAAc,gBAAgB,IAAI,EAAE,EACvF,4GAA4G,EAC5G,KAAK,CACN,CAAC;IACJ,CAAC;IACD,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IACjD,MAAM,IAAI,GAAG,SAAS,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAC5E,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,CAAC;IACrC,IAAI,OAAO,CAAC,IAAI,KAAK,QAAQ,CAAC,IAAI,IAAI,OAAO,CAAC,OAAO,KAAK,QAAQ,CAAC,OAAO,EAAE,CAAC;QAC3E,MAAM,IAAI,WAAW,CACnB,uBAAuB,EACvB,sCAAsC,IAAI,EAAE,EAC5C,+EAA+E,EAC/E,IAAI,CACL,CAAC;IACJ,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,CAAC,KAAK,CAAC,WAAW,EAAE,EAAE,OAAO,EAAE,CAAC;AAChE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,MAAmB;IACnD,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACtB,MAAM,IAAI,GAAiB;QACzB,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,aAAa,CAAC,WAAW;QAC5D,aAAa,EAAE,MAAM,CAAC,aAAa,IAAI,aAAa,CAAC,aAAa;KACnE,CAAC;IAEF,2DAA2D;IAC3D,wEAAwE;IACxE,uEAAuE;IACvE,mEAAmE;IACnE,6BAA6B;IAC7B,IAAI,QAAQ,GAAsB,IAAI,CAAC;IACvC,IAAI,CAAC;QACH,QAAQ,GAAG,MAAM,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAC3C,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,GAAG,YAAY,WAAW,IAAI,GAAG,CAAC,IAAI,KAAK,gBAAgB,EAAE,CAAC;YAChE,QAAQ,GAAG,IAAI,CAAC;QAClB,CAAC;aAAM,CAAC;YACN,MAAM,GAAG,CAAC;QACZ,CAAC;IACH,CAAC;IACD,oEAAoE;IACpE,oEAAoE;IACpE,IAAI,QAAQ,IAAI,QAAQ,CAAC,aAAa,KAAK,MAAM,CAAC,KAAK,EAAE,CAAC;QACxD,MAAM,IAAI,WAAW,CACnB,gBAAgB,EAChB,WAAW,MAAM,CAAC,IAAI,mCAAmC,QAAQ,CAAC,aAAa,iCAAiC,MAAM,CAAC,KAAK,IAAI,EAChI,mGAAmG,EACnG,KAAK,CACN,CAAC;IACJ,CAAC;IACD,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAyB,CAAC;IAClD,IAAI,QAAQ,IAAI,QAAQ,CAAC,aAAa,KAAK,MAAM,CAAC,KAAK,EAAE,CAAC;QACxD,KAAK,MAAM,CAAC,IAAI,QAAQ,CAAC,MAAM,EAAE,CAAC;YAChC,MAAM,GAAG,GAAG,GAAG,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,SAAS,EAAE,CAAC;YACxC,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;YACpC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACZ,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;QACzB,CAAC;IACH,CAAC;IAED,MAAM,SAAS,GAAkB,EAAE,CAAC;IACpC,MAAM,MAAM,GAAkC,EAAE,CAAC;IACjD,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,kBAAkB,GAAG,CAAC,CAAC;IAC3B,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,MAAM,SAAS,GAAG,IAAI,GAAG,EAAU,CAAC;IAEpC,8EAA8E;IAC9E,IAAI,QAAQ,EAAE,CAAC;QACb,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,IAAI,EAAE,CAAC,EAAE,CAAC;YAC3D,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAChB,CAAC;IACH,CAAC;IAED,kFAAkF;IAClF,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,MAAM,WAAW,GAUZ,EAAE,CAAC;IAER,MAAM,WAAW,GAAsB,EAAE,CAAC;IAC1C,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;QACjC,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QACvB,IAAI,QAA0D,CAAC;QAC/D,IAAI,CAAC;YACH,QAAQ,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,CAAC;QACvC,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,qEAAqE;YACrE,mEAAmE;YACnE,mCAAmC;YACnC,WAAW,CAAC,IAAI,CAAC;gBACf,IAAI,EAAE,OAAO;gBACb,MAAM,EAAG,GAAa,CAAC,OAAO,IAAI,MAAM,CAAC,GAAG,CAAC;aAC9C,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QACD,UAAU,IAAI,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC;QACtC,MAAM,QAAQ,GAAG,GAAG,OAAO,KAAK,QAAQ,CAAC,IAAI,EAAE,CAAC;QAChD,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACtC,IAAI,MAAM,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAChC,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;YAC1B,WAAW,IAAI,MAAM,CAAC,MAAM,CAAC;YAC7B,SAAS;QACX,CAAC;QACD,gDAAgD;QAChD,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,aAAa,CAAC,QAAQ,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;QAChE,MAAM,CAAC,OAAO,CAAC,GAAG,KAAK,CAAC;QACxB,KAAK,MAAM,EAAE,IAAI,MAAM,EAAE,CAAC;YACxB,WAAW,CAAC,IAAI,CAAC;gBACf,IAAI,EAAE,OAAO;gBACb,SAAS,EAAE,QAAQ,CAAC,IAAI;gBACxB,UAAU,EAAE,QAAQ,CAAC,KAAK;gBAC1B,WAAW,EAAE,EAAE,CAAC,KAAK;gBACrB,UAAU,EAAE,EAAE,CAAC,UAAU;gBACzB,QAAQ,EAAE,EAAE,CAAC,QAAQ;gBACrB,IAAI,EAAE,EAAE,CAAC,IAAI;gBACb,YAAY,EAAE,EAAE,CAAC,YAAY;gBAC7B,UAAU,EAAE,EAAE,CAAC,UAAU;aAC1B,CAAC,CAAC;YACH,YAAY,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;QAC7B,CAAC;IACH,CAAC;IAED,6DAA6D;IAC7D,qEAAqE;IACrE,sEAAsE;IACtE,oDAAoD;IACpD,IAAI,kBAAkB,GAAkB,IAAI,CAAC;IAC7C,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,IAAI,WAAW,EAAE,CAAC;YAC1D,MAAM,KAAK,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,CAAC;YACrD,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC;YAC9E,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,KAAK,KAAK,CAAC,MAAM,EAAE,CAAC;gBAC5C,MAAM,IAAI,KAAK,CACb,kBAAkB,IAAI,CAAC,UAAU,CAAC,MAAM,gBAAgB,KAAK,CAAC,MAAM,SAAS,CAC9E,CAAC;YACJ,CAAC;YACD,IAAI,kBAAkB,KAAK,IAAI,IAAI,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;gBAClE,kBAAkB,GAAG,IAAI,CAAC,KAAK,CAAC;YAClC,CAAC;YACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACtC,MAAM,IAAI,GAAG,WAAW,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;gBAChC,gEAAgE;gBAChE,iEAAiE;gBACjE,8DAA8D;gBAC9D,+DAA+D;gBAC/D,2CAA2C;gBAC3C,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBACrE,SAAS,CAAC,IAAI,CAAC;oBACb,EAAE,EAAE,GAAG,MAAM,CAAC,IAAI,IAAI,SAAS,IAAI,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;oBACnF,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,UAAU,EAAE,IAAI,CAAC,UAAU;oBAC3B,WAAW,EAAE,IAAI,CAAC,WAAW;oBAC7B,UAAU,EAAE,IAAI,CAAC,UAAU;oBAC3B,QAAQ,EAAE,IAAI,CAAC,QAAQ;oBACvB,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,MAAM,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;oBAC1B,YAAY,EAAE,IAAI,CAAC,YAAY;oBAC/B,UAAU,EAAE,IAAI,CAAC,UAAU;iBAC5B,CAAC,CAAC;gBACH,kBAAkB,IAAI,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;IACH,CAAC;IAED,+DAA+D;IAC/D,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QAClE,KAAK,MAAM,CAAC,IAAI,aAAa,EAAE,CAAC;YAC9B,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC;gBAAE,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC9C,CAAC;IACH,CAAC;IACD,KAAK,MAAM,CAAC,IAAI,YAAY;QAAE,OAAO,MAAM,CAAC,CAAC,CAAC,CAAC;IAE/C,wDAAwD;IACxD,MAAM,YAAY,GAAkC,EAAE,CAAC;IACvD,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;QAC1B,IAAI,CAAC,CAAC,IAAI,IAAI,MAAM;YAAE,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;aACvD,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,IAAI,YAAY,CAAC;YAAE,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;IAClE,CAAC;IAED,MAAM,MAAM,GAAe;QACzB,cAAc,EAAE,qBAAqB;QACrC,IAAI,EAAE,MAAM,CAAC,IAAI;QACjB,aAAa,EAAE,MAAM,CAAC,KAAK;QAC3B,YAAY,EAAE,IAAI;QAClB,UAAU,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACpC,WAAW,EAAE,IAAI,CAAC,WAAW;QAC7B,aAAa,EAAE,IAAI,CAAC,aAAa;QACjC,KAAK,EAAE;YACL,SAAS,EAAE,IAAI,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI;YACrD,MAAM,EAAE,SAAS,CAAC,MAAM;YACxB,WAAW,EAAE,UAAU;SACxB;QACD,MAAM,EAAE,YAAY;QACpB,MAAM,EAAE,SAAS;KAClB,CAAC;IAEF,MAAM,UAAU,CAAC,MAAM,CAAC,CAAC;IAEzB,oEAAoE;IACpE,sEAAsE;IACtE,oCAAoC;IACpC,MAAM,aAAa,GAAG,CAAC,GAAG,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;IAC5C,MAAM,YAAY,GAAG,MAAM,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC;IACvE,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACrC,sEAAsE;IACtE,8EAA8E;IAC9E,MAAM,mBAAmB,GAAG,kBAAkB,IAAI,YAAY,EAAE,oBAAoB,IAAI,IAAI,CAAC;IAC7F,MAAM,QAAQ,GAAmB;QAC/B,cAAc,EAAE,uBAAuB;QACvC,IAAI,EAAE,MAAM,CAAC,IAAI;QACjB,KAAK,EAAE,aAAa;QACpB,WAAW,EAAE,MAAM,CAAC,KAAK;QACzB,oBAAoB,EAAE,mBAAmB;QACzC,WAAW,EAAE,IAAI,CAAC,WAAW;QAC7B,aAAa,EAAE,IAAI,CAAC,aAAa;QACjC,UAAU,EAAE,YAAY,EAAE,UAAU,IAAI,GAAG;QAC3C,UAAU,EAAE,GAAG;KAChB,CAAC;IACF,MAAM,YAAY,CAAC,QAAQ,CAAC,CAAC;IAE7B,OAAO;QACL,IAAI,EAAE,MAAM,CAAC,IAAI;QACjB,aAAa,EAAE,MAAM,CAAC,aAAa;QACnC,SAAS,EAAE,MAAM,CAAC,KAAK,CAAC,SAAS;QACjC,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,MAAM;QAC3B,WAAW,EAAE,MAAM,CAAC,KAAK,CAAC,WAAW;QACrC,aAAa,EAAE,WAAW;QAC1B,qBAAqB,EAAE,kBAAkB;QACzC,aAAa,EAAE,YAAY;QAC3B,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,EAAE;QAC3B,oBAAoB,EAAE,kBAAkB;QACxC,YAAY,EAAE,WAAW;KAC1B,CAAC;AACJ,CAAC"}
|
|
@@ -13,6 +13,12 @@
|
|
|
13
13
|
export declare const MANIFEST_SCHEMA_VERSION = 2;
|
|
14
14
|
export interface CorpusManifest {
|
|
15
15
|
schema_version: number;
|
|
16
|
+
/**
|
|
17
|
+
* Package version that wrote this manifest. Loader rejects when this is
|
|
18
|
+
* higher than the current build — prevents a newer build writing a
|
|
19
|
+
* manifest that an older build would silently downgrade.
|
|
20
|
+
*/
|
|
21
|
+
schema_version_written_by?: string;
|
|
16
22
|
name: string;
|
|
17
23
|
/** Absolute paths the corpus is declared to contain. */
|
|
18
24
|
paths: string[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"manifest.d.ts","sourceRoot":"","sources":["../../src/corpus/manifest.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;
|
|
1
|
+
{"version":3,"file":"manifest.d.ts","sourceRoot":"","sources":["../../src/corpus/manifest.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAUH,eAAO,MAAM,uBAAuB,IAAI,CAAC;AAoFzC,MAAM,WAAW,cAAc;IAC7B,cAAc,EAAE,MAAM,CAAC;IACvB;;;;OAIG;IACH,yBAAyB,CAAC,EAAE,MAAM,CAAC;IACnC,IAAI,EAAE,MAAM,CAAC;IACb,wDAAwD;IACxD,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,iFAAiF;IACjF,WAAW,EAAE,MAAM,CAAC;IACpB;;;;;;;;OAQG;IACH,oBAAoB,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAMD,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEjD;AAED,wBAAsB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,GAAG,IAAI,CAAC,CAsB/E;AAED,wBAAsB,YAAY,CAAC,QAAQ,EAAE,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC,CAK1E"}
|
package/dist/corpus/manifest.js
CHANGED
|
@@ -11,12 +11,81 @@
|
|
|
11
11
|
* manifest as a side effect; ollama_corpus_refresh reads it.
|
|
12
12
|
*/
|
|
13
13
|
import { readFile, writeFile, mkdir } from "node:fs/promises";
|
|
14
|
-
import { existsSync } from "node:fs";
|
|
14
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
15
15
|
import { homedir } from "node:os";
|
|
16
|
-
import { join, dirname } from "node:path";
|
|
16
|
+
import { join, dirname, isAbsolute, normalize, sep } from "node:path";
|
|
17
|
+
import { fileURLToPath } from "node:url";
|
|
17
18
|
import { InternError } from "../errors.js";
|
|
18
19
|
import { assertValidCorpusName } from "./storage.js";
|
|
19
20
|
export const MANIFEST_SCHEMA_VERSION = 2;
|
|
21
|
+
/**
|
|
22
|
+
* Package version stamped on every manifest write. Loader refuses to read
|
|
23
|
+
* a manifest whose writer version is newer than this build, to prevent
|
|
24
|
+
* silent downgrade even when schema_version matches.
|
|
25
|
+
*/
|
|
26
|
+
const MANIFEST_WRITER_VERSION = (() => {
|
|
27
|
+
try {
|
|
28
|
+
const pkgUrl = new URL("../../package.json", import.meta.url);
|
|
29
|
+
const raw = readFileSync(fileURLToPath(pkgUrl), "utf8");
|
|
30
|
+
return JSON.parse(raw).version ?? "0.0.0";
|
|
31
|
+
}
|
|
32
|
+
catch {
|
|
33
|
+
return "0.0.0";
|
|
34
|
+
}
|
|
35
|
+
})();
|
|
36
|
+
function compareVersions(a, b) {
|
|
37
|
+
const pa = a.split(".").map((n) => parseInt(n, 10));
|
|
38
|
+
const pb = b.split(".").map((n) => parseInt(n, 10));
|
|
39
|
+
for (let i = 0; i < 3; i++) {
|
|
40
|
+
const ai = pa[i] ?? 0;
|
|
41
|
+
const bi = pb[i] ?? 0;
|
|
42
|
+
if (Number.isNaN(ai) || Number.isNaN(bi))
|
|
43
|
+
return 0;
|
|
44
|
+
if (ai < bi)
|
|
45
|
+
return -1;
|
|
46
|
+
if (ai > bi)
|
|
47
|
+
return 1;
|
|
48
|
+
}
|
|
49
|
+
return 0;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Allowlisted roots a manifest's paths may live under. Defaults to the
|
|
53
|
+
* user's home dir; extendable via INTERN_CORPUS_ALLOWED_ROOTS (colon-
|
|
54
|
+
* separated on POSIX, semicolon-separated on Windows). A malicious
|
|
55
|
+
* manifest that points at /etc/shadow or C:/Windows/... is rejected here.
|
|
56
|
+
*/
|
|
57
|
+
function allowedRoots() {
|
|
58
|
+
const extra = process.env.INTERN_CORPUS_ALLOWED_ROOTS;
|
|
59
|
+
const roots = [homedir()];
|
|
60
|
+
if (extra) {
|
|
61
|
+
const sep = process.platform === "win32" ? ";" : ":";
|
|
62
|
+
for (const r of extra.split(sep)) {
|
|
63
|
+
if (r.trim())
|
|
64
|
+
roots.push(r.trim());
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
return roots.map((r) => normalize(r));
|
|
68
|
+
}
|
|
69
|
+
function assertSafePath(p) {
|
|
70
|
+
if (!isAbsolute(p)) {
|
|
71
|
+
throw new InternError("SCHEMA_INVALID", `Manifest path is not absolute: ${p}`, "All manifest paths must be absolute. Re-run ollama_corpus_index to rewrite the manifest with resolved paths.", false);
|
|
72
|
+
}
|
|
73
|
+
const normalized = normalize(p);
|
|
74
|
+
// Reject any `..` segments that survived normalize (shouldn't happen on
|
|
75
|
+
// absolute paths, but be defensive).
|
|
76
|
+
const segments = normalized.split(sep);
|
|
77
|
+
if (segments.includes("..")) {
|
|
78
|
+
throw new InternError("SCHEMA_INVALID", `Manifest path contains traversal segment after normalize: ${p}`, "Reject corpora whose manifest was hand-edited with `..` segments. Re-index with trusted paths.", false);
|
|
79
|
+
}
|
|
80
|
+
const roots = allowedRoots();
|
|
81
|
+
const ok = roots.some((root) => {
|
|
82
|
+
const r = root.endsWith(sep) ? root : root + sep;
|
|
83
|
+
return normalized === root || normalized.startsWith(r);
|
|
84
|
+
});
|
|
85
|
+
if (!ok) {
|
|
86
|
+
throw new InternError("SCHEMA_INVALID", `Manifest path is outside allowed roots: ${p}`, `Path must live under one of: ${roots.join(", ")}. Set INTERN_CORPUS_ALLOWED_ROOTS to add more.`, false);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
20
89
|
function manifestDir() {
|
|
21
90
|
return process.env.INTERN_CORPUS_DIR ?? join(homedir(), ".ollama-intern", "corpora");
|
|
22
91
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"manifest.js","sourceRoot":"","sources":["../../src/corpus/manifest.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAC9D,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"manifest.js","sourceRoot":"","sources":["../../src/corpus/manifest.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAC9D,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,GAAG,EAAE,MAAM,WAAW,CAAC;AACtE,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAC3C,OAAO,EAAE,qBAAqB,EAAE,MAAM,cAAc,CAAC;AAErD,MAAM,CAAC,MAAM,uBAAuB,GAAG,CAAC,CAAC;AAEzC;;;;GAIG;AACH,MAAM,uBAAuB,GAAG,CAAC,GAAG,EAAE;IACpC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,oBAAoB,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC9D,MAAM,GAAG,GAAG,YAAY,CAAC,aAAa,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC,CAAC;QACxD,OAAQ,IAAI,CAAC,KAAK,CAAC,GAAG,CAA0B,CAAC,OAAO,IAAI,OAAO,CAAC;IACtE,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,OAAO,CAAC;IACjB,CAAC;AACH,CAAC,CAAC,EAAE,CAAC;AAEL,SAAS,eAAe,CAAC,CAAS,EAAE,CAAS;IAC3C,MAAM,EAAE,GAAG,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;IACpD,MAAM,EAAE,GAAG,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;IACpD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3B,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACtB,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACtB,IAAI,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,IAAI,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC;YAAE,OAAO,CAAC,CAAC;QACnD,IAAI,EAAE,GAAG,EAAE;YAAE,OAAO,CAAC,CAAC,CAAC;QACvB,IAAI,EAAE,GAAG,EAAE;YAAE,OAAO,CAAC,CAAC;IACxB,CAAC;IACD,OAAO,CAAC,CAAC;AACX,CAAC;AAED;;;;;GAKG;AACH,SAAS,YAAY;IACnB,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,2BAA2B,CAAC;IACtD,MAAM,KAAK,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;IAC1B,IAAI,KAAK,EAAE,CAAC;QACV,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;QACrD,KAAK,MAAM,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;YACjC,IAAI,CAAC,CAAC,IAAI,EAAE;gBAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACrC,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;AACxC,CAAC;AAED,SAAS,cAAc,CAAC,CAAS;IAC/B,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,CAAC;QACnB,MAAM,IAAI,WAAW,CACnB,gBAAgB,EAChB,kCAAkC,CAAC,EAAE,EACrC,8GAA8G,EAC9G,KAAK,CACN,CAAC;IACJ,CAAC;IACD,MAAM,UAAU,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;IAChC,wEAAwE;IACxE,qCAAqC;IACrC,MAAM,QAAQ,GAAG,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACvC,IAAI,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QAC5B,MAAM,IAAI,WAAW,CACnB,gBAAgB,EAChB,6DAA6D,CAAC,EAAE,EAChE,gGAAgG,EAChG,KAAK,CACN,CAAC;IACJ,CAAC;IACD,MAAM,KAAK,GAAG,YAAY,EAAE,CAAC;IAC7B,MAAM,EAAE,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE;QAC7B,MAAM,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,GAAG,GAAG,CAAC;QACjD,OAAO,UAAU,KAAK,IAAI,IAAI,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;IACH,IAAI,CAAC,EAAE,EAAE,CAAC;QACR,MAAM,IAAI,WAAW,CACnB,gBAAgB,EAChB,2CAA2C,CAAC,EAAE,EAC9C,gCAAgC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,gDAAgD,EAChG,KAAK,CACN,CAAC;IACJ,CAAC;AACH,CAAC;AA+BD,SAAS,WAAW;IAClB,OAAO,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,IAAI,CAAC,OAAO,EAAE,EAAE,gBAAgB,EAAE,SAAS,CAAC,CAAC;AACvF,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,OAAO,IAAI,CAAC,WAAW,EAAE,EAAE,GAAG,IAAI,gBAAgB,CAAC,CAAC;AACtD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,IAAY;IAC7C,qBAAqB,CAAC,IAAI,CAAC,CAAC;IAC5B,MAAM,IAAI,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAChC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IACnC,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAA0D,CAAC;IACxF,MAAM,KAAK,GAAG,MAAM,CAAC,cAAc,CAAC;IACpC,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;QAChB,kEAAkE;QAClE,qEAAqE;QACrE,yDAAyD;QACzD,OAAO,EAAE,GAAI,MAAyB,EAAE,cAAc,EAAE,uBAAuB,EAAE,oBAAoB,EAAE,IAAI,EAAE,CAAC;IAChH,CAAC;IACD,IAAI,KAAK,KAAK,uBAAuB,EAAE,CAAC;QACtC,MAAM,IAAI,WAAW,CACnB,gBAAgB,EAChB,wBAAwB,IAAI,mBAAmB,KAAK,IAAI,SAAS,yBAAyB,uBAAuB,WAAW,IAAI,EAAE,EAClI,uCAAuC,IAAI,sEAAsE,EACjH,KAAK,CACN,CAAC;IACJ,CAAC;IACD,OAAO,MAAwB,CAAC;AAClC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,QAAwB;IACzD,qBAAqB,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IACrC,MAAM,IAAI,GAAG,YAAY,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IACzC,MAAM,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAChD,MAAM,SAAS,CAAC,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;AACnE,CAAC"}
|
package/dist/corpus/storage.d.ts
CHANGED
|
@@ -8,6 +8,13 @@
|
|
|
8
8
|
* Claude — search handlers strip them before returning.
|
|
9
9
|
*/
|
|
10
10
|
export declare const CORPUS_SCHEMA_VERSION = 2;
|
|
11
|
+
/**
|
|
12
|
+
* Hard cap on in-memory chunk count per corpus. A single JSON file that
|
|
13
|
+
* holds 1M+ chunks will blow past Node's string-length ceiling and OOM on
|
|
14
|
+
* write. If you hit this, split the corpus.
|
|
15
|
+
*/
|
|
16
|
+
export declare const MAX_CHUNKS = 100000;
|
|
17
|
+
export declare const SCHEMA_WRITER_VERSION: string;
|
|
11
18
|
export type ChunkType = "heading" | "paragraph" | "code" | "list" | "frontmatter";
|
|
12
19
|
export interface CorpusChunk {
|
|
13
20
|
id: string;
|
|
@@ -24,6 +31,13 @@ export interface CorpusChunk {
|
|
|
24
31
|
}
|
|
25
32
|
export interface CorpusFile {
|
|
26
33
|
schema_version: number;
|
|
34
|
+
/**
|
|
35
|
+
* Package version that wrote this file. Used to reject the case where a
|
|
36
|
+
* newer build wrote the corpus and an older build (still on the same
|
|
37
|
+
* schema number) loads it and silently downgrades. Optional for
|
|
38
|
+
* backward-compat with files written before this field existed.
|
|
39
|
+
*/
|
|
40
|
+
schema_version_written_by?: string;
|
|
27
41
|
name: string;
|
|
28
42
|
model_version: string;
|
|
29
43
|
model_digest: string | null;
|
|
@@ -53,4 +67,25 @@ export interface CorpusSummary {
|
|
|
53
67
|
bytes_on_disk: number;
|
|
54
68
|
}
|
|
55
69
|
export declare function listCorpora(): Promise<CorpusSummary[]>;
|
|
70
|
+
/**
|
|
71
|
+
* Fast stale-detection primitive.
|
|
72
|
+
*
|
|
73
|
+
* Compares each manifest-declared path's current stat.mtime against the
|
|
74
|
+
* mtime stored in the corpus's chunks. Returns as soon as any drift is
|
|
75
|
+
* found — does NOT re-read file contents or re-hash, so it's cheap enough
|
|
76
|
+
* for callers to poll before deciding whether to pay the full refresh
|
|
77
|
+
* cost.
|
|
78
|
+
*
|
|
79
|
+
* Reasons:
|
|
80
|
+
* - "no_corpus" — corpus JSON missing
|
|
81
|
+
* - "no_manifest" — manifest JSON missing
|
|
82
|
+
* - "path_missing:<path>" — manifest declares a path that isn't on disk
|
|
83
|
+
* - "path_added:<path>" — manifest declares a path the corpus has never seen
|
|
84
|
+
* - "path_removed:<path>" — corpus has a path the manifest no longer lists
|
|
85
|
+
* - "mtime_drift:<path>" — file's mtime is later than the corpus record
|
|
86
|
+
*/
|
|
87
|
+
export declare function isCorpusStale(name: string): Promise<{
|
|
88
|
+
stale: boolean;
|
|
89
|
+
reason?: string;
|
|
90
|
+
}>;
|
|
56
91
|
//# sourceMappingURL=storage.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"storage.d.ts","sourceRoot":"","sources":["../../src/corpus/storage.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;
|
|
1
|
+
{"version":3,"file":"storage.d.ts","sourceRoot":"","sources":["../../src/corpus/storage.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAUH,eAAO,MAAM,qBAAqB,IAAI,CAAC;AACvC;;;;GAIG;AACH,eAAO,MAAM,UAAU,SAAU,CAAC;AAelC,eAAO,MAAM,qBAAqB,QAAc,CAAC;AAEjD,MAAM,MAAM,SAAS,GAAG,SAAS,GAAG,WAAW,GAAG,MAAM,GAAG,MAAM,GAAG,aAAa,CAAC;AAElF,MAAM,WAAW,WAAW;IAC1B,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,UAAU,EAAE,SAAS,CAAC;CACvB;AAED,MAAM,WAAW,UAAU;IACzB,cAAc,EAAE,MAAM,CAAC;IACvB;;;;;OAKG;IACH,yBAAyB,CAAC,EAAE,MAAM,CAAC;IACnC,IAAI,EAAE,MAAM,CAAC;IACb,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;IACtB,KAAK,EAAE;QACL,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,WAAW,EAAE,MAAM,CAAC;KACrB,CAAC;IACF,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC,CAAC;IACtC,MAAM,EAAE,WAAW,EAAE,CAAC;CACvB;AAgBD,wBAAgB,SAAS,IAAI,MAAM,CAElC;AAED,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAE/C;AAID,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CASxD;AAED,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC,CA4BzE;AAED,wBAAsB,UAAU,CAAC,MAAM,EAAE,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,CAuClE;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,wBAAsB,WAAW,IAAI,OAAO,CAAC,aAAa,EAAE,CAAC,CA4B5D;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAsB,aAAa,CACjC,IAAI,EAAE,MAAM,GACX,OAAO,CAAC;IAAE,KAAK,EAAE,OAAO,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC,CA4C9C"}
|
package/dist/corpus/storage.js
CHANGED
|
@@ -7,12 +7,52 @@
|
|
|
7
7
|
* Raw vectors are stored (this file is the database), but they never reach
|
|
8
8
|
* Claude — search handlers strip them before returning.
|
|
9
9
|
*/
|
|
10
|
-
import { readFile,
|
|
11
|
-
import { existsSync } from "node:fs";
|
|
10
|
+
import { readFile, mkdir, readdir, stat, rename, open, unlink } from "node:fs/promises";
|
|
11
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
12
12
|
import { homedir } from "node:os";
|
|
13
13
|
import { join, dirname } from "node:path";
|
|
14
|
+
import { fileURLToPath } from "node:url";
|
|
14
15
|
import { InternError } from "../errors.js";
|
|
16
|
+
import { loadManifest } from "./manifest.js";
|
|
15
17
|
export const CORPUS_SCHEMA_VERSION = 2;
|
|
18
|
+
/**
|
|
19
|
+
* Hard cap on in-memory chunk count per corpus. A single JSON file that
|
|
20
|
+
* holds 1M+ chunks will blow past Node's string-length ceiling and OOM on
|
|
21
|
+
* write. If you hit this, split the corpus.
|
|
22
|
+
*/
|
|
23
|
+
export const MAX_CHUNKS = 100_000;
|
|
24
|
+
/**
|
|
25
|
+
* Stamped into every write so a newer-schema file is never silently
|
|
26
|
+
* downgraded by an older build. Resolved at import time from the running
|
|
27
|
+
* package version.
|
|
28
|
+
*/
|
|
29
|
+
const PKG_VERSION = (() => {
|
|
30
|
+
try {
|
|
31
|
+
const pkgUrl = new URL("../../package.json", import.meta.url);
|
|
32
|
+
const raw = readFileSync(fileURLToPath(pkgUrl), "utf8");
|
|
33
|
+
return JSON.parse(raw).version ?? "0.0.0";
|
|
34
|
+
}
|
|
35
|
+
catch {
|
|
36
|
+
return "0.0.0";
|
|
37
|
+
}
|
|
38
|
+
})();
|
|
39
|
+
export const SCHEMA_WRITER_VERSION = PKG_VERSION;
|
|
40
|
+
/** semver-ish compare: returns -1 if a<b, 0 if equal, 1 if a>b. Falls back to 0 on parse error. */
|
|
41
|
+
function compareVersions(a, b) {
|
|
42
|
+
const pa = a.split(".").map((n) => parseInt(n, 10));
|
|
43
|
+
const pb = b.split(".").map((n) => parseInt(n, 10));
|
|
44
|
+
for (let i = 0; i < 3; i++) {
|
|
45
|
+
const ai = pa[i] ?? 0;
|
|
46
|
+
const bi = pb[i] ?? 0;
|
|
47
|
+
if (Number.isNaN(ai) || Number.isNaN(bi))
|
|
48
|
+
return 0;
|
|
49
|
+
if (ai < bi)
|
|
50
|
+
return -1;
|
|
51
|
+
if (ai > bi)
|
|
52
|
+
return 1;
|
|
53
|
+
}
|
|
54
|
+
return 0;
|
|
55
|
+
}
|
|
16
56
|
export function corpusDir() {
|
|
17
57
|
return process.env.INTERN_CORPUS_DIR ?? join(homedir(), ".ollama-intern", "corpora");
|
|
18
58
|
}
|
|
@@ -36,13 +76,51 @@ export async function loadCorpus(name) {
|
|
|
36
76
|
if (found !== CORPUS_SCHEMA_VERSION) {
|
|
37
77
|
throw new InternError("SCHEMA_INVALID", `Corpus "${name}" is at schema v${found ?? "unknown"}; this build expects v${CORPUS_SCHEMA_VERSION}. File: ${path}`, `Re-index to upgrade in place: ollama_corpus_index({ name: "${name}", paths: [<your source paths>] }). No migration is performed — the re-index rewrites ${path} with the current schema.`, false);
|
|
38
78
|
}
|
|
79
|
+
// Refuse to load a corpus that was written by a newer pkg version than
|
|
80
|
+
// ours. Same schema number, but a newer build may have added fields the
|
|
81
|
+
// current build would lose on the next write.
|
|
82
|
+
const writtenBy = parsed.schema_version_written_by;
|
|
83
|
+
if (typeof writtenBy === "string" && compareVersions(writtenBy, SCHEMA_WRITER_VERSION) > 0) {
|
|
84
|
+
throw new InternError("SCHEMA_INVALID", `Corpus "${name}" was written by v${writtenBy}; this build is v${SCHEMA_WRITER_VERSION} and refuses to downgrade. File: ${path}`, `Upgrade ollama-intern-mcp to v${writtenBy} or newer, or re-index after downgrading the package deliberately.`, false);
|
|
85
|
+
}
|
|
39
86
|
return parsed;
|
|
40
87
|
}
|
|
41
88
|
export async function saveCorpus(corpus) {
|
|
42
89
|
assertValidCorpusName(corpus.name);
|
|
90
|
+
// Refuse to serialize absurdly large corpora — a single JSON.stringify
|
|
91
|
+
// over 100k+ chunks will hit Node's max string length and OOM.
|
|
92
|
+
if (corpus.chunks.length > MAX_CHUNKS) {
|
|
93
|
+
throw new InternError("SCHEMA_INVALID", `Corpus "${corpus.name}" has ${corpus.chunks.length} chunks; cap is ${MAX_CHUNKS}.`, `Split this into multiple smaller corpora (e.g. by directory or topic). A single JSON file this large will blow Node's string-length ceiling on write.`, false);
|
|
94
|
+
}
|
|
43
95
|
const path = corpusPath(corpus.name);
|
|
44
96
|
await mkdir(dirname(path), { recursive: true });
|
|
45
|
-
|
|
97
|
+
// Stamp the writer version so older builds can refuse to downgrade.
|
|
98
|
+
const stamped = { ...corpus, schema_version_written_by: SCHEMA_WRITER_VERSION };
|
|
99
|
+
const payload = JSON.stringify(stamped);
|
|
100
|
+
// Observability: payload size in bytes (UTF-8) so ops can spot runaway growth.
|
|
101
|
+
// Using Buffer.byteLength is accurate for non-ASCII content.
|
|
102
|
+
// eslint-disable-next-line no-console
|
|
103
|
+
console.error(`[corpus:save] name=${corpus.name} chunks=${corpus.chunks.length} bytes=${Buffer.byteLength(payload, "utf8")}`);
|
|
104
|
+
// Atomic write: write to <path>.tmp, fsync, then rename. If Node crashes
|
|
105
|
+
// mid-write the original file is intact — rename on the same filesystem
|
|
106
|
+
// is atomic on both POSIX and NTFS.
|
|
107
|
+
const tmpPath = `${path}.tmp`;
|
|
108
|
+
const fh = await open(tmpPath, "w");
|
|
109
|
+
try {
|
|
110
|
+
await fh.writeFile(payload, "utf8");
|
|
111
|
+
await fh.sync();
|
|
112
|
+
}
|
|
113
|
+
finally {
|
|
114
|
+
await fh.close();
|
|
115
|
+
}
|
|
116
|
+
try {
|
|
117
|
+
await rename(tmpPath, path);
|
|
118
|
+
}
|
|
119
|
+
catch (err) {
|
|
120
|
+
// Best-effort cleanup of the temp file if rename failed.
|
|
121
|
+
await unlink(tmpPath).catch(() => { });
|
|
122
|
+
throw err;
|
|
123
|
+
}
|
|
46
124
|
}
|
|
47
125
|
export async function listCorpora() {
|
|
48
126
|
const dir = corpusDir();
|
|
@@ -78,4 +156,71 @@ export async function listCorpora() {
|
|
|
78
156
|
summaries.sort((a, b) => a.name.localeCompare(b.name));
|
|
79
157
|
return summaries;
|
|
80
158
|
}
|
|
159
|
+
/**
|
|
160
|
+
* Fast stale-detection primitive.
|
|
161
|
+
*
|
|
162
|
+
* Compares each manifest-declared path's current stat.mtime against the
|
|
163
|
+
* mtime stored in the corpus's chunks. Returns as soon as any drift is
|
|
164
|
+
* found — does NOT re-read file contents or re-hash, so it's cheap enough
|
|
165
|
+
* for callers to poll before deciding whether to pay the full refresh
|
|
166
|
+
* cost.
|
|
167
|
+
*
|
|
168
|
+
* Reasons:
|
|
169
|
+
* - "no_corpus" — corpus JSON missing
|
|
170
|
+
* - "no_manifest" — manifest JSON missing
|
|
171
|
+
* - "path_missing:<path>" — manifest declares a path that isn't on disk
|
|
172
|
+
* - "path_added:<path>" — manifest declares a path the corpus has never seen
|
|
173
|
+
* - "path_removed:<path>" — corpus has a path the manifest no longer lists
|
|
174
|
+
* - "mtime_drift:<path>" — file's mtime is later than the corpus record
|
|
175
|
+
*/
|
|
176
|
+
export async function isCorpusStale(name) {
|
|
177
|
+
assertValidCorpusName(name);
|
|
178
|
+
let corpus = null;
|
|
179
|
+
try {
|
|
180
|
+
corpus = await loadCorpus(name);
|
|
181
|
+
}
|
|
182
|
+
catch {
|
|
183
|
+
// A schema-invalid corpus IS stale (re-index needed).
|
|
184
|
+
return { stale: true, reason: "no_corpus" };
|
|
185
|
+
}
|
|
186
|
+
if (!corpus)
|
|
187
|
+
return { stale: true, reason: "no_corpus" };
|
|
188
|
+
const manifest = await loadManifest(name).catch(() => null);
|
|
189
|
+
if (!manifest)
|
|
190
|
+
return { stale: true, reason: "no_manifest" };
|
|
191
|
+
// Build a map path → latest stored mtime from chunks.
|
|
192
|
+
const storedMtimeByPath = new Map();
|
|
193
|
+
for (const c of corpus.chunks) {
|
|
194
|
+
const t = Date.parse(c.file_mtime);
|
|
195
|
+
if (Number.isNaN(t))
|
|
196
|
+
continue;
|
|
197
|
+
const prev = storedMtimeByPath.get(c.path);
|
|
198
|
+
if (prev === undefined || t > prev)
|
|
199
|
+
storedMtimeByPath.set(c.path, t);
|
|
200
|
+
}
|
|
201
|
+
const manifestSet = new Set(manifest.paths);
|
|
202
|
+
// Paths in corpus that are no longer declared by the manifest.
|
|
203
|
+
for (const p of storedMtimeByPath.keys()) {
|
|
204
|
+
if (!manifestSet.has(p))
|
|
205
|
+
return { stale: true, reason: `path_removed:${p}` };
|
|
206
|
+
}
|
|
207
|
+
for (const p of manifest.paths) {
|
|
208
|
+
let currentMtime;
|
|
209
|
+
try {
|
|
210
|
+
const st = await stat(p);
|
|
211
|
+
currentMtime = st.mtimeMs;
|
|
212
|
+
}
|
|
213
|
+
catch {
|
|
214
|
+
return { stale: true, reason: `path_missing:${p}` };
|
|
215
|
+
}
|
|
216
|
+
const stored = storedMtimeByPath.get(p);
|
|
217
|
+
if (stored === undefined)
|
|
218
|
+
return { stale: true, reason: `path_added:${p}` };
|
|
219
|
+
// Only flag when current is strictly later — allow clock skew that
|
|
220
|
+
// happens to produce an earlier mtime (e.g. touch to an older date).
|
|
221
|
+
if (currentMtime > stored + 1)
|
|
222
|
+
return { stale: true, reason: `mtime_drift:${p}` };
|
|
223
|
+
}
|
|
224
|
+
return { stale: false };
|
|
225
|
+
}
|
|
81
226
|
//# sourceMappingURL=storage.js.map
|