ralph-hero-knowledge-index 0.1.25 → 0.1.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/.mcp.json +1 -1
- package/dist/embedder.d.ts +26 -1
- package/dist/embedder.js +28 -2
- package/dist/embedder.js.map +1 -1
- package/dist/reindex.js +58 -3
- package/dist/reindex.js.map +1 -1
- package/package.json +1 -1
- package/src/__tests__/embedder.test.ts +125 -0
- package/src/__tests__/reindex.test.ts +216 -11
- package/src/embedder.ts +50 -3
- package/src/reindex.ts +66 -3
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ralph-knowledge",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.26",
|
|
4
4
|
"description": "Knowledge graph for ralph-hero: semantic search, relationship traversal, and document indexing across thoughts/ documents. Optional companion to ralph-hero.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Chad Dubiel",
|
package/.mcp.json
CHANGED
package/dist/embedder.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { type FeatureExtractionPipeline } from "@huggingface/transformers";
|
|
2
2
|
import { type Chunk, type ChunkerOptions } from "./chunker.js";
|
|
3
|
+
import type { LlmClient } from "./llm-client.js";
|
|
3
4
|
export declare function getEmbedder(): Promise<FeatureExtractionPipeline>;
|
|
4
5
|
export declare function embed(text: string): Promise<Float32Array>;
|
|
5
6
|
/**
|
|
@@ -11,6 +12,24 @@ export interface DocumentChunk extends Chunk {
|
|
|
11
12
|
embedding: Float32Array;
|
|
12
13
|
contextPrefix?: string;
|
|
13
14
|
}
|
|
15
|
+
/**
|
|
16
|
+
* Options accepted by `embedDocument`. Extends `ChunkerOptions` with optional
|
|
17
|
+
* Contextual Retrieval inputs:
|
|
18
|
+
*
|
|
19
|
+
* - `llm`: when present, each chunk is run through `llm.contextualize(fullDoc, chunkContent)`
|
|
20
|
+
* and the returned string is prepended to the embed text (and persisted on the
|
|
21
|
+
* resulting `DocumentChunk.contextPrefix`). Empty-string returns (fail-open from
|
|
22
|
+
* the LLM client) cause the embed text to fall back to the legacy
|
|
23
|
+
* `${title}\n${tagLine}\n${chunk.content}` shape.
|
|
24
|
+
* - `cachedPrefixes`: optional `Map<chunkIndex, contextPrefix>` from a prior run.
|
|
25
|
+
* When a chunk's index has a cached prefix, the LLM call is skipped and the
|
|
26
|
+
* cached string is reused verbatim. Used by the reindex content-hash cache
|
|
27
|
+
* fast-path (Task 6.4) so unchanged docs don't re-contact the LLM endpoint.
|
|
28
|
+
*/
|
|
29
|
+
export interface EmbedDocumentOptions extends ChunkerOptions {
|
|
30
|
+
llm?: LlmClient;
|
|
31
|
+
cachedPrefixes?: Map<number, string>;
|
|
32
|
+
}
|
|
14
33
|
/**
|
|
15
34
|
* Embed a document by splitting it into chunks and emitting one embedding
|
|
16
35
|
* per chunk. The embedded text for each chunk is
|
|
@@ -18,11 +37,17 @@ export interface DocumentChunk extends Chunk {
|
|
|
18
37
|
* tags) travel with every chunk embedding — matching the shape of the legacy
|
|
19
38
|
* `prepareTextForEmbedding()` but without the 500-char truncation.
|
|
20
39
|
*
|
|
40
|
+
* When `opts.llm` is provided (Phase 6 — Contextual Retrieval), a short
|
|
41
|
+
* context prefix is generated per chunk via `opts.llm.contextualize(content, chunk.content)`
|
|
42
|
+
* and prepended to the embed text as `${contextPrefix}\n${title}\n${tagLine}\n${chunk.content}`.
|
|
43
|
+
* If `contextualize` returns `""` (fail-open path), the embed text reverts to the
|
|
44
|
+
* no-context shape so we never emit a leading blank line.
|
|
45
|
+
*
|
|
21
46
|
* Short documents (<= chunkSize) produce exactly one chunk covering the whole
|
|
22
47
|
* content. Empty content yields a single chunk with empty content (so callers
|
|
23
48
|
* still get a title/tag-only embedding for stub documents).
|
|
24
49
|
*/
|
|
25
|
-
export declare function embedDocument(title: string, tags: string[], content: string, opts?:
|
|
50
|
+
export declare function embedDocument(title: string, tags: string[], content: string, opts?: EmbedDocumentOptions): Promise<DocumentChunk[]>;
|
|
26
51
|
/**
|
|
27
52
|
* Back-compat shim: kept so callers outside the reindex path can still build
|
|
28
53
|
* a title/tags/first-paragraph string. No longer used by `embedDocument` (the
|
package/dist/embedder.js
CHANGED
|
@@ -25,6 +25,12 @@ export async function embed(text) {
|
|
|
25
25
|
* tags) travel with every chunk embedding — matching the shape of the legacy
|
|
26
26
|
* `prepareTextForEmbedding()` but without the 500-char truncation.
|
|
27
27
|
*
|
|
28
|
+
* When `opts.llm` is provided (Phase 6 — Contextual Retrieval), a short
|
|
29
|
+
* context prefix is generated per chunk via `opts.llm.contextualize(content, chunk.content)`
|
|
30
|
+
* and prepended to the embed text as `${contextPrefix}\n${title}\n${tagLine}\n${chunk.content}`.
|
|
31
|
+
* If `contextualize` returns `""` (fail-open path), the embed text reverts to the
|
|
32
|
+
* no-context shape so we never emit a leading blank line.
|
|
33
|
+
*
|
|
28
34
|
* Short documents (<= chunkSize) produce exactly one chunk covering the whole
|
|
29
35
|
* content. Empty content yields a single chunk with empty content (so callers
|
|
30
36
|
* still get a title/tag-only embedding for stub documents).
|
|
@@ -37,10 +43,29 @@ export async function embedDocument(title, tags, content, opts) {
|
|
|
37
43
|
const chunks = content.length === 0
|
|
38
44
|
? [{ index: 0, content: "", charStart: 0, charEnd: 0 }]
|
|
39
45
|
: chunkText(content, opts);
|
|
46
|
+
const llm = opts?.llm;
|
|
47
|
+
const cached = opts?.cachedPrefixes;
|
|
40
48
|
const out = [];
|
|
41
49
|
for (const chunk of chunks) {
|
|
42
|
-
|
|
43
|
-
|
|
50
|
+
let contextPrefix = "";
|
|
51
|
+
if (llm) {
|
|
52
|
+
// Cache hit: reuse prior context_prefix when the caller supplied a map
|
|
53
|
+
// keyed by chunk.index. Avoids an LLM round-trip per unchanged chunk.
|
|
54
|
+
if (cached && cached.has(chunk.index)) {
|
|
55
|
+
contextPrefix = cached.get(chunk.index) ?? "";
|
|
56
|
+
}
|
|
57
|
+
else {
|
|
58
|
+
// `contextualize` is fail-open: it returns "" on any network/timeout/
|
|
59
|
+
// malformed-response error. That empty string propagates into the
|
|
60
|
+
// returned `DocumentChunk.contextPrefix` (persisted by the caller) and
|
|
61
|
+
// causes the embed text to skip the leading blank line below.
|
|
62
|
+
contextPrefix = await llm.contextualize(content, chunk.content);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
const parts = contextPrefix.length > 0
|
|
66
|
+
? [contextPrefix, title, tagLine, chunk.content]
|
|
67
|
+
: [title, tagLine, chunk.content];
|
|
68
|
+
const embedText = parts.filter(p => p.length > 0).join("\n");
|
|
44
69
|
const embedding = await embed(embedText);
|
|
45
70
|
out.push({
|
|
46
71
|
index: chunk.index,
|
|
@@ -48,6 +73,7 @@ export async function embedDocument(title, tags, content, opts) {
|
|
|
48
73
|
charStart: chunk.charStart,
|
|
49
74
|
charEnd: chunk.charEnd,
|
|
50
75
|
embedding,
|
|
76
|
+
contextPrefix,
|
|
51
77
|
});
|
|
52
78
|
}
|
|
53
79
|
return out;
|
package/dist/embedder.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"embedder.js","sourceRoot":"","sources":["../src/embedder.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,GAET,MAAM,2BAA2B,CAAC;AACnC,OAAO,EAAE,SAAS,EAAmC,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"embedder.js","sourceRoot":"","sources":["../src/embedder.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,GAET,MAAM,2BAA2B,CAAC;AACnC,OAAO,EAAE,SAAS,EAAmC,MAAM,cAAc,CAAC;AAG1E,MAAM,QAAQ,GAAG,yBAAyB,CAAC;AAE3C,IAAI,gBAAgB,GAAqC,IAAI,CAAC;AAE9D,MAAM,CAAC,KAAK,UAAU,WAAW;IAC/B,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,mEAAmE;QACnE,gBAAgB,GAAG,CAAC,MAAM,QAAQ,CAChC,oBAAoB,EACpB,QAAQ,CACT,CAA8B,CAAC;IAClC,CAAC;IACD,OAAO,gBAAgB,CAAC;AAC1B,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,IAAY;IACtC,MAAM,QAAQ,GAAG,MAAM,WAAW,EAAE,CAAC;IACrC,gFAAgF;IAChF,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE;QAClC,OAAO,EAAE,MAAM;QACf,SAAS,EAAE,IAAI;KAChB,CAAC,CAAC;IACH,OAAO,IAAI,YAAY,CAAC,MAAM,CAAC,IAAyB,CAAC,CAAC;AAC5D,CAAC;AA+BD;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,KAAa,EACb,IAAc,EACd,OAAe,EACf,IAA2B;IAE3B,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAEvD,6EAA6E;IAC7E,oEAAoE;IACpE,sCAAsC;IACtC,MAAM,MAAM,GAAY,OAAO,CAAC,MAAM,KAAK,CAAC;QAC1C,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC;QACvD,CAAC,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;IAE7B,MAAM,GAAG,GAAG,IAAI,EAAE,GAAG,CAAC;IACtB,MAAM,MAAM,GAAG,IAAI,EAAE,cAAc,CAAC;IAEpC,MAAM,GAAG,GAAoB,EAAE,CAAC;IAChC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,aAAa,GAAG,EAAE,CAAC;QACvB,IAAI,GAAG,EAAE,CAAC;YACR,uEAAuE;YACvE,sEAAsE;YACtE,IAAI,MAAM,IAAI,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;gBACtC,aAAa,GAAG,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;YAChD,CAAC;iBAAM,CAAC;gBACN,sEAAsE;gBACtE,kEAAkE;gBAClE,uEAAuE;gBACvE,8DAA8D;gBAC9D,aAAa,GAAG,MAAM,GAAG,CAAC,aAAa,CAAC,OAAO,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;YAClE,CAAC;QACH,CAAC;QAED,MAAM,KAAK,GAAG,aAAa,CAAC,MAAM,GAAG,CAAC;YACpC,CAAC,CAAC,CAAC,aAAa,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,CAAC,OAAO,CAAC;YAChD,CAAC,CAAC,CAAC,KAAK,EAAE,OAAO,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;QACpC,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC7D,MAAM,SAAS,GAAG,MAAM,KAAK,CAAC,SAAS,CAAC,CAAC;QACzC,GAAG,CAAC,IAAI,CAAC;YACP,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,SAAS,EAAE,KAAK,CAAC,SAAS;YAC1B,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,SAAS;YACT,aAAa;SACd,CAAC,CAAC;IACL,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,uBAAuB,CACrC,KAAa,EACb,IAAc,EACd,OAAe;IAEf,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IACvD,8EAA8E;IAC9E,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAC1C,MAAM,cAAc,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAC/E,MAAM,KAAK,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,cAAc,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACzE,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC"}
|
package/dist/reindex.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { readFileSync, statSync } from "node:fs";
|
|
2
2
|
import { join, relative, resolve, basename } from "node:path";
|
|
3
3
|
import { homedir } from "node:os";
|
|
4
|
+
import { createHash } from "node:crypto";
|
|
4
5
|
import { KnowledgeDB } from "./db.js";
|
|
5
6
|
import { FtsSearch } from "./search.js";
|
|
6
7
|
import { VectorSearch } from "./vector-search.js";
|
|
@@ -10,6 +11,7 @@ import { findMarkdownFiles } from "./file-scanner.js";
|
|
|
10
11
|
import { generateIndexes } from "./generate-indexes.js";
|
|
11
12
|
import { loadConfig } from "./config.js";
|
|
12
13
|
import { loadIgnoreForRoot } from "./ignore.js";
|
|
14
|
+
import { createLlmClient } from "./llm-client.js";
|
|
13
15
|
export async function reindex(dirs, dbPath, generate = false, ignorePatterns) {
|
|
14
16
|
console.log(`Indexing ${dirs.join(", ")} -> ${dbPath}`);
|
|
15
17
|
const db = new KnowledgeDB(dbPath);
|
|
@@ -17,6 +19,26 @@ export async function reindex(dirs, dbPath, generate = false, ignorePatterns) {
|
|
|
17
19
|
fts.ensureTable();
|
|
18
20
|
const vec = new VectorSearch(db);
|
|
19
21
|
vec.createIndex();
|
|
22
|
+
// Phase 6 (GH-767): Contextual Retrieval wiring.
|
|
23
|
+
// `RALPH_CONTEXTUAL_RETRIEVAL` gates the whole feature. Default on; treat
|
|
24
|
+
// literal "0" / "false" as disabled. When enabled we probe the endpoint once
|
|
25
|
+
// and fail open on unreachable — all downstream chunks then embed without a
|
|
26
|
+
// context prefix and we log a single warning so the operator knows why.
|
|
27
|
+
const flagRaw = process.env.RALPH_CONTEXTUAL_RETRIEVAL;
|
|
28
|
+
const contextualEnabled = flagRaw !== "0" && flagRaw !== "false";
|
|
29
|
+
let llm;
|
|
30
|
+
if (contextualEnabled) {
|
|
31
|
+
const llmUrl = process.env.RALPH_LLM_URL ?? "http://localhost:8000";
|
|
32
|
+
const candidate = createLlmClient();
|
|
33
|
+
const llmReady = await candidate.available();
|
|
34
|
+
if (llmReady) {
|
|
35
|
+
llm = candidate;
|
|
36
|
+
}
|
|
37
|
+
else {
|
|
38
|
+
console.warn(`LLM endpoint unreachable at ${llmUrl}, contextual retrieval disabled for this run`);
|
|
39
|
+
llm = undefined;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
20
42
|
// Schema version check — force full re-embed when embedding algorithm changes
|
|
21
43
|
const SCHEMA_VERSION = "3";
|
|
22
44
|
const currentVersion = db.getMeta("schema_version");
|
|
@@ -62,6 +84,7 @@ export async function reindex(dirs, dbPath, generate = false, ignorePatterns) {
|
|
|
62
84
|
const parsedDocs = [];
|
|
63
85
|
let indexed = 0;
|
|
64
86
|
let skipped = 0;
|
|
87
|
+
let totalChunks = 0;
|
|
65
88
|
for (const filePath of filesOnDisk) {
|
|
66
89
|
const absPath = resolve(filePath);
|
|
67
90
|
const mtime = Math.trunc(statSync(absPath).mtimeMs);
|
|
@@ -122,6 +145,29 @@ export async function reindex(dirs, dbPath, generate = false, ignorePatterns) {
|
|
|
122
145
|
db.upsertStubDocument(edge.targetId);
|
|
123
146
|
db.addRelationship(edge.sourceId, edge.targetId, "untyped", edge.context);
|
|
124
147
|
}
|
|
148
|
+
// Content-hash cache for Contextual Retrieval prefixes. The outer mtime
|
|
149
|
+
// skip at line ~75 already short-circuits the overwhelming majority of
|
|
150
|
+
// unchanged docs (no embedder or LLM calls). This inner hash check is
|
|
151
|
+
// specifically for the rare case where mtime differs but content is
|
|
152
|
+
// byte-identical (e.g., git checkout touching the file). When hash matches
|
|
153
|
+
// AND we have a live LLM AND chunks already exist, we reuse the prior
|
|
154
|
+
// context_prefix map and skip the per-chunk LLM round-trips.
|
|
155
|
+
//
|
|
156
|
+
// Simpler alternative considered: rely entirely on mtime. Rejected because
|
|
157
|
+
// the feature spec (Task 6.4 acceptance) explicitly requires re-running
|
|
158
|
+
// reindex without content changes to reuse existing context_prefix.
|
|
159
|
+
const contentHash = createHash("sha256").update(parsed.content).digest("hex").slice(0, 16);
|
|
160
|
+
const hashKey = `content_hash:${parsed.id}`;
|
|
161
|
+
const priorHash = db.getMeta(hashKey);
|
|
162
|
+
let cachedPrefixes;
|
|
163
|
+
if (llm && priorHash === contentHash) {
|
|
164
|
+
const priorChunks = db.db
|
|
165
|
+
.prepare("SELECT chunk_index, context_prefix FROM chunks WHERE document_id = ? ORDER BY chunk_index")
|
|
166
|
+
.all(parsed.id);
|
|
167
|
+
if (priorChunks.length > 0) {
|
|
168
|
+
cachedPrefixes = new Map(priorChunks.map(r => [r.chunk_index, r.context_prefix ?? ""]));
|
|
169
|
+
}
|
|
170
|
+
}
|
|
125
171
|
// Chunk-aware embedding: emit one embedding per chunk, persist to both
|
|
126
172
|
// the `chunks` table and the `documents_vec` virtual table with chunk ids
|
|
127
173
|
// of the form `${doc.id}#c${index}`.
|
|
@@ -134,13 +180,22 @@ export async function reindex(dirs, dbPath, generate = false, ignorePatterns) {
|
|
|
134
180
|
// Drop any pre-chunks schema vec row that used the bare doc id.
|
|
135
181
|
vec.deleteEmbedding(parsed.id);
|
|
136
182
|
try {
|
|
137
|
-
const chunks = await embedDocument(parsed.title, parsed.tags, parsed.content
|
|
138
|
-
|
|
183
|
+
const chunks = await embedDocument(parsed.title, parsed.tags, parsed.content, {
|
|
184
|
+
llm,
|
|
185
|
+
cachedPrefixes,
|
|
186
|
+
});
|
|
187
|
+
const insertChunk = db.db.prepare("INSERT INTO chunks (id, document_id, chunk_index, content, char_start, char_end, context_prefix) VALUES (?, ?, ?, ?, ?, ?, ?)");
|
|
139
188
|
for (const chunk of chunks) {
|
|
140
189
|
const chunkId = `${parsed.id}#c${chunk.index}`;
|
|
141
|
-
insertChunk.run(chunkId, parsed.id, chunk.index, chunk.content, chunk.charStart, chunk.charEnd);
|
|
190
|
+
insertChunk.run(chunkId, parsed.id, chunk.index, chunk.content, chunk.charStart, chunk.charEnd, chunk.contextPrefix ?? "");
|
|
142
191
|
vec.upsertEmbedding(chunkId, chunk.embedding);
|
|
192
|
+
totalChunks++;
|
|
193
|
+
if (totalChunks % 50 === 0) {
|
|
194
|
+
console.log(` ${totalChunks} chunks embedded`);
|
|
195
|
+
}
|
|
143
196
|
}
|
|
197
|
+
// Record the content hash for the next reindex cache check.
|
|
198
|
+
db.setMeta(hashKey, contentHash);
|
|
144
199
|
}
|
|
145
200
|
catch (e) {
|
|
146
201
|
console.warn(`Failed to embed ${id}: ${e.message}`);
|
package/dist/reindex.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"reindex.js","sourceRoot":"","sources":["../src/reindex.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACjD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAC9D,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AACtC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,EAAE,aAAa,EAAuB,MAAM,aAAa,CAAC;AACjE,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACtD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,UAAU,EAAwB,MAAM,aAAa,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAEhD,MAAM,CAAC,KAAK,UAAU,OAAO,CAC3B,IAAc,EACd,MAAc,EACd,WAAoB,KAAK,EACzB,cAAyB;IAEzB,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,MAAM,EAAE,CAAC,CAAC;IAExD,MAAM,EAAE,GAAG,IAAI,WAAW,CAAC,MAAM,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,IAAI,SAAS,CAAC,EAAE,CAAC,CAAC;IAC9B,GAAG,CAAC,WAAW,EAAE,CAAC;IAClB,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,EAAE,CAAC,CAAC;IACjC,GAAG,CAAC,WAAW,EAAE,CAAC;IAElB,8EAA8E;IAC9E,MAAM,cAAc,GAAG,GAAG,CAAC;IAC3B,MAAM,cAAc,GAAG,EAAE,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;IACpD,IAAI,mBAAmB,GAAG,KAAK,CAAC;IAChC,IAAI,cAAc,KAAK,cAAc,EAAE,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,uEAAuE,CAAC,CAAC;QACrF,EAAE,CAAC,gBAAgB,EAAE,CAAC;QACtB,EAAE,CAAC,OAAO,CAAC,gBAAgB,EAAE,cAAc,CAAC,CAAC;QAC7C,mBAAmB,GAAG,IAAI,CAAC;IAC7B,CAAC;IAED,kCAAkC;IAClC,MAAM,WAAW,GAAa,EAAE,CAAC;IACjC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,OAAO,GAAG,iBAAiB,CAAC,GAAG,EAAE,cAAc,CAAC,CAAC;QACvD,MAAM,KAAK,GAAG,iBAAiB,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QAC9C,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,KAAK,KAAK,CAAC,MAAM,QAAQ,CAAC,CAAC;QAC/C,WAAW,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC;IAC7B,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,SAAS,WAAW,CAAC,MAAM,uBAAuB,CAAC,CAAC;IAEhE,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAEjE,6DAA6D;IAC7D,iFAAiF;IACjF,2EAA2E;IAC3E,2DAA2D;IAC3D,MAAM,WAAW,GAAG,EAAE,CAAC,eAAe,EAAE,CAAC;IACzC,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;QACrC,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;YACpC,MAAM,EAAE,GAAG,QAAQ,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;YACvC,GAAG,CAAC,cAAc,CAAC,EAAE,CAAC,CAAC;YACvB,EAAE,CAAC,cAAc,CAAC,EAAE,CAAC,CAAC;YACtB,GAAG,CAAC,oBAAoB,CAAC,EAAE,CAAC,CAAC;YAC7B,gEAAgE;YAChE,GAAG,CAAC,eAAe,CAAC,EAAE,CAAC,CAAC;YACxB,EAAE,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC;YAChC,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;QAChB,OAAO,CAAC,GAAG,CAAC,aAAa,OAAO,gBAAgB,CAAC,CAAC;IACpD,CAAC;IAED,yCAAyC;IACzC,MAAM,UAAU,GAAqB,EAAE,CAAC;IACxC,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,MAAM,QAAQ,IAAI,WAAW,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;QAClC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC;QAEpD,8CAA8C;QAC9C,MAAM,UAAU,GAAG,EAAE,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QAC7C,IAAI,UAAU,IAAI,UAAU,CAAC,KAAK,KAAK,KAAK,EAAE,CAAC;YAC7C,OAAO,EAAE,CAAC;YACV,SAAS;QACX,CAAC;QAED,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAC5C,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACjE,MAAM,OAAO,GAAG,SAAS;YACvB,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,EAAE,OAAO,CAAC;YAC7C,CAAC,CAAC,QAAQ,CAAC;QACb,MAAM,EAAE,GAAG,QAAQ,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QAErC,MAAM,MAAM,GAAG,aAAa,CAAC,EAAE,EAAE,OAAO,EAAE,GAAG,CAAC,CAAC;QAC/C,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAExB,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,IAAI,CAAC,MAAM,CAAC,IAAI;YAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,IAAI,CAAC,MAAM,CAAC,IAAI;YAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,IAAI,CAAC,MAAM,CAAC,MAAM;YAAE,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC3C,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,OAAO,CAAC,IAAI,CAAC,cAAc,EAAE,yBAAyB,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC9E,CAAC;QAED,uEAAuE;QACvE,IAAI,EAAE,CAAC,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC;YACjC,GAAG,CAAC,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAChC,CAAC;QAED,EAAE,CAAC,cAAc,CAAC;YAChB,EAAE,EAAE,MAAM,CAAC,EAAE;YACb,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,KAAK,EAAE,MAAM,CAAC,KAAK;YACnB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,OAAO,EAAE,MAAM,CAAC,OAAO;SACxB,CAAC,CAAC;QAEH,oCAAoC;QACpC,GAAG,CAAC,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAE9B,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3B,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC;QACrC,CAAC;QAED,4EAA4E;QAC5E,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,+CAA+C,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAE9E,6EAA6E;QAC7E,gFAAgF;QAChF,8EAA8E;QAC9E,8CAA8C;QAC9C,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,aAAa,EAAE,CAAC;YACvC,EAAE,CAAC,kBAAkB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACpC,EAAE,CAAC,eAAe,CAAC,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;QAC3D,CAAC;QAED,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;YACvC,EAAE,CAAC,kBAAkB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACrC,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QAC5E,CAAC;QAED,uEAAuE;QACvE,0EAA0E;QAC1E,qCAAqC;QACrC,EAAE;QACF,oEAAoE;QACpE,0EAA0E;QAC1E,0EAA0E;QAC1E,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,0CAA0C,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACzE,GAAG,CAAC,oBAAoB,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACpC,gEAAgE;QAChE,GAAG,CAAC,eAAe,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAE/B,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,OAAO,CAAC,CAAC;YAC9E,MAAM,WAAW,GAAG,EAAE,CAAC,EAAE,CAAC,OAAO,CAC/B,4GAA4G,CAC7G,CAAC;YACF,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,MAAM,OAAO,GAAG,GAAG,MAAM,CAAC,EAAE,KAAK,KAAK,CAAC,KAAK,EAAE,CAAC;gBAC/C,WAAW,CAAC,GAAG,CACb,OAAO,EACP,MAAM,CAAC,EAAE,EACT,KAAK,CAAC,KAAK,EACX,KAAK,CAAC,OAAO,EACb,KAAK,CAAC,SAAS,EACf,KAAK,CAAC,OAAO,CACd,CAAC;gBACF,GAAG,CAAC,eAAe,CAAC,OAAO,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;YAChD,CAAC;QACH,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,OAAO,CAAC,IAAI,CAAC,mBAAmB,EAAE,KAAM,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;QACjE,CAAC;QAED,EAAE,CAAC,gBAAgB,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QAEpC,OAAO,EAAE,CAAC;QACV,IAAI,OAAO,GAAG,EAAE,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,CAAC,GAAG,CAAC,KAAK,OAAO,IAAI,WAAW,CAAC,MAAM,UAAU,CAAC,CAAC;QAC5D,CAAC;IACH,CAAC;IAED,qFAAqF;IACrF,8FAA8F;IAC9F,IAAI,mBAAmB,EAAE,CAAC;QACxB,GAAG,CAAC,YAAY,EAAE,CAAC;IACrB,CAAC;IAED,gGAAgG;IAChG,MAAM,YAAY,GAAG,IAAI,GAAG,CACzB,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,8CAA8C,CAAC,CAAC,GAAG,EAAmC;SAClG,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CACzB,CAAC;IAEF,uEAAuE;IACvE,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,MAAM,QAAQ,IAAI,YAAY,EAAE,CAAC;QACpC,IAAI,CAAC,EAAE,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC;YACjC,EAAE,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC;YAChC,SAAS,EAAE,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,aAAa,SAAS,sCAAsC,CAAC,CAAC;IAE1E,IAAI,CAAC;QACH,IAAI,QAAQ,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAChC,OAAO,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC;YACzC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;YAAS,CAAC;QACT,OAAO,CAAC,GAAG,CAAC,SAAS,OAAO,uBAAuB,OAAO,uBAAuB,CAAC,CAAC;QACnF,EAAE,CAAC,KAAK,EAAE,CAAC;IACb,CAAC;AACH,CAAC;AAED,MAAM,eAAe,GAAG,IAAI,CAAC,OAAO,EAAE,EAAE,aAAa,EAAE,cAAc,CAAC,CAAC;AAYvE;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,WAAW;IACzB,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACtC,MAAM,UAAU,GAAG,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC;IACrD,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;IAC5D,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IACtD,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IAE3D,MAAM,MAAM,GAAG,UAAU,EAAE,CAAC;IAE5B,MAAM,aAAa,GAAG,GAAW,EAAE,CACjC,KAAK;QACL,OAAO,CAAC,GAAG,CAAC,kBAAkB;QAC9B,MAAM,CAAC,MAAM;QACb,eAAe,CAAC;IAElB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;QACrC,OAAO;YACL,IAAI,EAAE,OAAO;YACb,MAAM,EAAE,aAAa,EAAE;YACvB,QAAQ,EAAE,CAAC,UAAU;YACrB,MAAM,EAAE,KAAK;YACb,MAAM;SACP,CAAC;IACJ,CAAC;IAED,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC;IACjD,IAAI,OAAO,EAAE,CAAC;QACZ,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACrE,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;YACrC,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,MAAM,EAAE,aAAa,EAAE;gBACvB,QAAQ,EAAE,CAAC,UAAU;gBACrB,MAAM,EAAE,KAAK;gBACb,MAAM;aACP,CAAC;QACJ,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5C,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;QACxC,OAAO;YACL,IAAI,EAAE,MAAM,CAAC,KAAK;YAClB,MAAM,EAAE,aAAa,EAAE;YACvB,QAAQ,EAAE,CAAC,UAAU;YACrB,MAAM,EAAE,QAAQ;YAChB,MAAM;SACP,CAAC;IACJ,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;IAC1C,OAAO;QACL,IAAI,EAAE,CAAC,gBAAgB,CAAC;QACxB,MAAM,EAAE,aAAa,EAAE;QACvB,QAAQ,EAAE,CAAC,UAAU;QACrB,MAAM,EAAE,UAAU;QAClB,MAAM;KACP,CAAC;AACJ,CAAC;AAED,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,YAAY,CAAC,CAAC;AACvD,IAAI,MAAM,EAAE,CAAC;IACX,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzD,OAAO,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,CAAC,cAAc,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;AAC9E,CAAC"}
|
|
1
|
+
{"version":3,"file":"reindex.js","sourceRoot":"","sources":["../src/reindex.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACjD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAC9D,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AACtC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,EAAE,aAAa,EAAuB,MAAM,aAAa,CAAC;AACjE,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACtD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,UAAU,EAAwB,MAAM,aAAa,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,EAAE,eAAe,EAAkB,MAAM,iBAAiB,CAAC;AAClE,MAAM,CAAC,KAAK,UAAU,OAAO,CAC3B,IAAc,EACd,MAAc,EACd,WAAoB,KAAK,EACzB,cAAyB;IAEzB,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,MAAM,EAAE,CAAC,CAAC;IAExD,MAAM,EAAE,GAAG,IAAI,WAAW,CAAC,MAAM,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,IAAI,SAAS,CAAC,EAAE,CAAC,CAAC;IAC9B,GAAG,CAAC,WAAW,EAAE,CAAC;IAClB,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,EAAE,CAAC,CAAC;IACjC,GAAG,CAAC,WAAW,EAAE,CAAC;IAElB,iDAAiD;IACjD,0EAA0E;IAC1E,6EAA6E;IAC7E,4EAA4E;IAC5E,wEAAwE;IACxE,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC;IACvD,MAAM,iBAAiB,GAAG,OAAO,KAAK,GAAG,IAAI,OAAO,KAAK,OAAO,CAAC;IACjE,IAAI,GAA0B,CAAC;IAC/B,IAAI,iBAAiB,EAAE,CAAC;QACtB,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,aAAa,IAAI,uBAAuB,CAAC;QACpE,MAAM,SAAS,GAAG,eAAe,EAAE,CAAC;QACpC,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,SAAS,EAAE,CAAC;QAC7C,IAAI,QAAQ,EAAE,CAAC;YACb,GAAG,GAAG,SAAS,CAAC;QAClB,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,IAAI,CACV,+BAA+B,MAAM,8CAA8C,CACpF,CAAC;YACF,GAAG,GAAG,SAAS,CAAC;QAClB,CAAC;IACH,CAAC;IAED,8EAA8E;IAC9E,MAAM,cAAc,GAAG,GAAG,CAAC;IAC3B,MAAM,cAAc,GAAG,EAAE,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;IACpD,IAAI,mBAAmB,GAAG,KAAK,CAAC;IAChC,IAAI,cAAc,KAAK,cAAc,EAAE,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,uEAAuE,CAAC,CAAC;QACrF,EAAE,CAAC,gBAAgB,EAAE,CAAC;QACtB,EAAE,CAAC,OAAO,CAAC,gBAAgB,EAAE,cAAc,CAAC,CAAC;QAC7C,mBAAmB,GAAG,IAAI,CAAC;IAC7B,CAAC;IAED,kCAAkC;IAClC,MAAM,WAAW,GAAa,EAAE,CAAC;IACjC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,OAAO,GAAG,iBAAiB,CAAC,GAAG,EAAE,cAAc,CAAC,CAAC;QACvD,MAAM,KAAK,GAAG,iBAAiB,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QAC9C,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,KAAK,KAAK,CAAC,MAAM,QAAQ,CAAC,CAAC;QAC/C,WAAW,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC;IAC7B,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,SAAS,WAAW,CAAC,MAAM,uBAAuB,CAAC,CAAC;IAEhE,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAEjE,6DAA6D;IAC7D,iFAAiF;IACjF,2EAA2E;IAC3E,2DAA2D;IAC3D,MAAM,WAAW,GAAG,EAAE,CAAC,eAAe,EAAE,CAAC;IACzC,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;QACrC,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;YACpC,MAAM,EAAE,GAAG,QAAQ,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;YACvC,GAAG,CAAC,cAAc,CAAC,EAAE,CAAC,CAAC;YACvB,EAAE,CAAC,cAAc,CAAC,EAAE,CAAC,CAAC;YACtB,GAAG,CAAC,oBAAoB,CAAC,EAAE,CAAC,CAAC;YAC7B,gEAAgE;YAChE,GAAG,CAAC,eAAe,CAAC,EAAE,CAAC,CAAC;YACxB,EAAE,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC;YAChC,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;QAChB,OAAO,CAAC,GAAG,CAAC,aAAa,OAAO,gBAAgB,CAAC,CAAC;IACpD,CAAC;IAED,yCAAyC;IACzC,MAAM,UAAU,GAAqB,EAAE,CAAC;IACxC,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,KAAK,MAAM,QAAQ,IAAI,WAAW,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;QAClC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC;QAEpD,8CAA8C;QAC9C,MAAM,UAAU,GAAG,EAAE,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QAC7C,IAAI,UAAU,IAAI,UAAU,CAAC,KAAK,KAAK,KAAK,EAAE,CAAC;YAC7C,OAAO,EAAE,CAAC;YACV,SAAS;QACX,CAAC;QAED,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAC5C,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACjE,MAAM,OAAO,GAAG,SAAS;YACvB,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,EAAE,OAAO,CAAC;YAC7C,CAAC,CAAC,QAAQ,CAAC;QACb,MAAM,EAAE,GAAG,QAAQ,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QAErC,MAAM,MAAM,GAAG,aAAa,CAAC,EAAE,EAAE,OAAO,EAAE,GAAG,CAAC,CAAC;QAC/C,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAExB,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,IAAI,CAAC,MAAM,CAAC,IAAI;YAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,IAAI,CAAC,MAAM,CAAC,IAAI;YAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,IAAI,CAAC,MAAM,CAAC,MAAM;YAAE,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC3C,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,OAAO,CAAC,IAAI,CAAC,cAAc,EAAE,yBAAyB,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC9E,CAAC;QAED,uEAAuE;QACvE,IAAI,EAAE,CAAC,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC;YACjC,GAAG,CAAC,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAChC,CAAC;QAED,EAAE,CAAC,cAAc,CAAC;YAChB,EAAE,EAAE,MAAM,CAAC,EAAE;YACb,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,KAAK,EAAE,MAAM,CAAC,KAAK;YACnB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,OAAO,EAAE,MAAM,CAAC,OAAO;SACxB,CAAC,CAAC;QAEH,oCAAoC;QACpC,GAAG,CAAC,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAE9B,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3B,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC;QACrC,CAAC;QAED,4EAA4E;QAC5E,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,+CAA+C,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAE9E,6EAA6E;QAC7E,gFAAgF;QAChF,8EAA8E;QAC9E,8CAA8C;QAC9C,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,aAAa,EAAE,CAAC;YACvC,EAAE,CAAC,kBAAkB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACpC,EAAE,CAAC,eAAe,CAAC,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;QAC3D,CAAC;QAED,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;YACvC,EAAE,CAAC,kBAAkB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACrC,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QAC5E,CAAC;QAED,wEAAwE;QACxE,uEAAuE;QACvE,sEAAsE;QACtE,oEAAoE;QACpE,2EAA2E;QAC3E,sEAAsE;QACtE,6DAA6D;QAC7D,EAAE;QACF,2EAA2E;QAC3E,wEAAwE;QACxE,oEAAoE;QACpE,MAAM,WAAW,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAC3F,MAAM,OAAO,GAAG,gBAAgB,MAAM,CAAC,EAAE,EAAE,CAAC;QAC5C,MAAM,SAAS,GAAG,EAAE,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAEtC,IAAI,cAA+C,CAAC;QACpD,IAAI,GAAG,IAAI,SAAS,KAAK,WAAW,EAAE,CAAC;YACrC,MAAM,WAAW,GAAG,EAAE,CAAC,EAAE;iBACtB,OAAO,CACN,2FAA2F,CAC5F;iBACA,GAAG,CAAC,MAAM,CAAC,EAAE,CAA2D,CAAC;YAC5E,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC3B,cAAc,GAAG,IAAI,GAAG,CACtB,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,cAAc,IAAI,EAAE,CAAqB,CAAC,CAClF,CAAC;YACJ,CAAC;QACH,CAAC;QAED,uEAAuE;QACvE,0EAA0E;QAC1E,qCAAqC;QACrC,EAAE;QACF,oEAAoE;QACpE,0EAA0E;QAC1E,0EAA0E;QAC1E,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,0CAA0C,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACzE,GAAG,CAAC,oBAAoB,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACpC,gEAAgE;QAChE,GAAG,CAAC,eAAe,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAE/B,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,OAAO,EAAE;gBAC5E,GAAG;gBACH,cAAc;aACf,CAAC,CAAC;YACH,MAAM,WAAW,GAAG,EAAE,CAAC,EAAE,CAAC,OAAO,CAC/B,+HAA+H,CAChI,CAAC;YACF,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,MAAM,OAAO,GAAG,GAAG,MAAM,CAAC,EAAE,KAAK,KAAK,CAAC,KAAK,EAAE,CAAC;gBAC/C,WAAW,CAAC,GAAG,CACb,OAAO,EACP,MAAM,CAAC,EAAE,EACT,KAAK,CAAC,KAAK,EACX,KAAK,CAAC,OAAO,EACb,KAAK,CAAC,SAAS,EACf,KAAK,CAAC,OAAO,EACb,KAAK,CAAC,aAAa,IAAI,EAAE,CAC1B,CAAC;gBACF,GAAG,CAAC,eAAe,CAAC,OAAO,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;gBAC9C,WAAW,EAAE,CAAC;gBACd,IAAI,WAAW,GAAG,EAAE,KAAK,CAAC,EAAE,CAAC;oBAC3B,OAAO,CAAC,GAAG,CAAC,KAAK,WAAW,kBAAkB,CAAC,CAAC;gBAClD,CAAC;YACH,CAAC;YACD,4DAA4D;YAC5D,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;QACnC,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,OAAO,CAAC,IAAI,CAAC,mBAAmB,EAAE,KAAM,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;QACjE,CAAC;QAED,EAAE,CAAC,gBAAgB,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QAEpC,OAAO,EAAE,CAAC;QACV,IAAI,OAAO,GAAG,EAAE,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,CAAC,GAAG,CAAC,KAAK,OAAO,IAAI,WAAW,CAAC,MAAM,UAAU,CAAC,CAAC;QAC5D,CAAC;IACH,CAAC;IAED,qFAAqF;IACrF,8FAA8F;IAC9F,IAAI,mBAAmB,EAAE,CAAC;QACxB,GAAG,CAAC,YAAY,EAAE,CAAC;IACrB,CAAC;IAED,gGAAgG;IAChG,MAAM,YAAY,GAAG,IAAI,GAAG,CACzB,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,8CAA8C,CAAC,CAAC,GAAG,EAAmC;SAClG,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CACzB,CAAC;IAEF,uEAAuE;IACvE,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,MAAM,QAAQ,IAAI,YAAY,EAAE,CAAC;QACpC,IAAI,CAAC,EAAE,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC;YACjC,EAAE,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC;YAChC,SAAS,EAAE,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,aAAa,SAAS,sCAAsC,CAAC,CAAC;IAE1E,IAAI,CAAC;QACH,IAAI,QAAQ,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAChC,OAAO,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC;YACzC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;YAAS,CAAC;QACT,OAAO,CAAC,GAAG,CAAC,SAAS,OAAO,uBAAuB,OAAO,uBAAuB,CAAC,CAAC;QACnF,EAAE,CAAC,KAAK,EAAE,CAAC;IACb,CAAC;AACH,CAAC;AAED,MAAM,eAAe,GAAG,IAAI,CAAC,OAAO,EAAE,EAAE,aAAa,EAAE,cAAc,CAAC,CAAC;AAYvE;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,WAAW;IACzB,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACtC,MAAM,UAAU,GAAG,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC;IACrD,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;IAC5D,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IACtD,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IAE3D,MAAM,MAAM,GAAG,UAAU,EAAE,CAAC;IAE5B,MAAM,aAAa,GAAG,GAAW,EAAE,CACjC,KAAK;QACL,OAAO,CAAC,GAAG,CAAC,kBAAkB;QAC9B,MAAM,CAAC,MAAM;QACb,eAAe,CAAC;IAElB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;QACrC,OAAO;YACL,IAAI,EAAE,OAAO;YACb,MAAM,EAAE,aAAa,EAAE;YACvB,QAAQ,EAAE,CAAC,UAAU;YACrB,MAAM,EAAE,KAAK;YACb,MAAM;SACP,CAAC;IACJ,CAAC;IAED,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC;IACjD,IAAI,OAAO,EAAE,CAAC;QACZ,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACrE,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;YACrC,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,MAAM,EAAE,aAAa,EAAE;gBACvB,QAAQ,EAAE,CAAC,UAAU;gBACrB,MAAM,EAAE,KAAK;gBACb,MAAM;aACP,CAAC;QACJ,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5C,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;QACxC,OAAO;YACL,IAAI,EAAE,MAAM,CAAC,KAAK;YAClB,MAAM,EAAE,aAAa,EAAE;YACvB,QAAQ,EAAE,CAAC,UAAU;YACrB,MAAM,EAAE,QAAQ;YAChB,MAAM;SACP,CAAC;IACJ,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;IAC1C,OAAO;QACL,IAAI,EAAE,CAAC,gBAAgB,CAAC;QACxB,MAAM,EAAE,aAAa,EAAE;QACvB,QAAQ,EAAE,CAAC,UAAU;QACrB,MAAM,EAAE,UAAU;QAClB,MAAM;KACP,CAAC;AACJ,CAAC;AAED,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,YAAY,CAAC,CAAC;AACvD,IAAI,MAAM,EAAE,CAAC;IACX,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzD,OAAO,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,CAAC,cAAc,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;AAC9E,CAAC"}
|
package/package.json
CHANGED
|
@@ -15,6 +15,14 @@ vi.mock("@huggingface/transformers", () => {
|
|
|
15
15
|
});
|
|
16
16
|
|
|
17
17
|
import { prepareTextForEmbedding, embedDocument } from "../embedder.js";
|
|
18
|
+
import type { LlmClient } from "../llm-client.js";
|
|
19
|
+
|
|
20
|
+
function makeMockLlm(contextualize: LlmClient["contextualize"]): LlmClient {
|
|
21
|
+
return {
|
|
22
|
+
available: vi.fn(async () => true),
|
|
23
|
+
contextualize: vi.fn(contextualize),
|
|
24
|
+
};
|
|
25
|
+
}
|
|
18
26
|
|
|
19
27
|
describe("prepareTextForEmbedding", () => {
|
|
20
28
|
it("includes title, tags, and first paragraph", () => {
|
|
@@ -196,4 +204,121 @@ describe("embedDocument", () => {
|
|
|
196
204
|
// With chunkSize=100 over 500 chars, we expect multiple chunks.
|
|
197
205
|
expect(result.length).toBeGreaterThan(1);
|
|
198
206
|
});
|
|
207
|
+
|
|
208
|
+
// Phase 6 (GH-767): Contextual Retrieval integration.
|
|
209
|
+
describe("contextual retrieval", () => {
|
|
210
|
+
it("calls llm.contextualize once per chunk when llm is provided", async () => {
|
|
211
|
+
const content = "A".repeat(500);
|
|
212
|
+
const mockLlm = makeMockLlm(async () => "CTX");
|
|
213
|
+
const result = await embedDocument("T", [], content, {
|
|
214
|
+
llm: mockLlm,
|
|
215
|
+
chunkSize: 100,
|
|
216
|
+
chunkOverlap: 10,
|
|
217
|
+
});
|
|
218
|
+
expect(result.length).toBeGreaterThan(1);
|
|
219
|
+
expect(mockLlm.contextualize).toHaveBeenCalledTimes(result.length);
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
it("stores non-empty contextPrefix on every returned chunk", async () => {
|
|
223
|
+
const mockLlm = makeMockLlm(async () => "THIS IS CONTEXT");
|
|
224
|
+
const result = await embedDocument("Title", ["tag"], "body text", { llm: mockLlm });
|
|
225
|
+
expect(result).toHaveLength(1);
|
|
226
|
+
expect(result[0]!.contextPrefix).toBe("THIS IS CONTEXT");
|
|
227
|
+
// Embed text prepends contextPrefix ahead of title/tags/content.
|
|
228
|
+
expect(embedCalls[0]).toBe("THIS IS CONTEXT\nTitle\ntag\nbody text");
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
it("passes the full document (not the chunk) as the first contextualize arg", async () => {
|
|
232
|
+
const longContent = "A".repeat(500);
|
|
233
|
+
const mockLlm = makeMockLlm(async () => "CTX");
|
|
234
|
+
await embedDocument("T", [], longContent, {
|
|
235
|
+
llm: mockLlm,
|
|
236
|
+
chunkSize: 100,
|
|
237
|
+
chunkOverlap: 10,
|
|
238
|
+
});
|
|
239
|
+
const calls = (mockLlm.contextualize as ReturnType<typeof vi.fn>).mock.calls;
|
|
240
|
+
expect(calls.length).toBeGreaterThan(1);
|
|
241
|
+
for (const [fullDoc] of calls) {
|
|
242
|
+
expect(fullDoc).toBe(longContent);
|
|
243
|
+
}
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
it("fail-open (empty string) yields contextPrefix: '' and omits leading blank line", async () => {
|
|
247
|
+
const mockLlm = makeMockLlm(async () => "");
|
|
248
|
+
const result = await embedDocument("Title", ["tag"], "body text", { llm: mockLlm });
|
|
249
|
+
expect(result[0]!.contextPrefix).toBe("");
|
|
250
|
+
// No leading blank line from an empty contextPrefix — falls back to the
|
|
251
|
+
// no-context embed shape.
|
|
252
|
+
expect(embedCalls[0]).toBe("Title\ntag\nbody text");
|
|
253
|
+
expect(embedCalls[0]!.startsWith("\n")).toBe(false);
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
it("uses cachedPrefixes entry for a chunk and skips llm.contextualize for that index", async () => {
|
|
257
|
+
const content = "A".repeat(500);
|
|
258
|
+
const mockLlm = makeMockLlm(async () => "LIVE CTX");
|
|
259
|
+
// First run to discover the chunk layout.
|
|
260
|
+
const baseline = await embedDocument("T", [], content, {
|
|
261
|
+
llm: mockLlm,
|
|
262
|
+
chunkSize: 100,
|
|
263
|
+
chunkOverlap: 10,
|
|
264
|
+
});
|
|
265
|
+
const chunkCount = baseline.length;
|
|
266
|
+
expect(chunkCount).toBeGreaterThan(1);
|
|
267
|
+
|
|
268
|
+
(mockLlm.contextualize as ReturnType<typeof vi.fn>).mockClear();
|
|
269
|
+
embedCalls.length = 0;
|
|
270
|
+
|
|
271
|
+
// Cache all but the last chunk index.
|
|
272
|
+
const cached = new Map<number, string>();
|
|
273
|
+
for (let i = 0; i < chunkCount - 1; i++) {
|
|
274
|
+
cached.set(i, `CACHED-${i}`);
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
const result = await embedDocument("T", [], content, {
|
|
278
|
+
llm: mockLlm,
|
|
279
|
+
cachedPrefixes: cached,
|
|
280
|
+
chunkSize: 100,
|
|
281
|
+
chunkOverlap: 10,
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
expect(result).toHaveLength(chunkCount);
|
|
285
|
+
// Only the last (uncached) chunk triggered a live LLM call.
|
|
286
|
+
expect(mockLlm.contextualize).toHaveBeenCalledTimes(1);
|
|
287
|
+
// Cached chunks preserve the cached prefix verbatim.
|
|
288
|
+
for (let i = 0; i < chunkCount - 1; i++) {
|
|
289
|
+
expect(result[i]!.contextPrefix).toBe(`CACHED-${i}`);
|
|
290
|
+
}
|
|
291
|
+
expect(result[chunkCount - 1]!.contextPrefix).toBe("LIVE CTX");
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
it("with no llm, contextPrefix is '' on every chunk and no LLM calls occur", async () => {
|
|
295
|
+
const content = "A".repeat(500);
|
|
296
|
+
const mockLlm = makeMockLlm(async () => "SHOULD NOT CALL");
|
|
297
|
+
// Note: do NOT pass `llm` into opts — this is the flag-off path.
|
|
298
|
+
const result = await embedDocument("T", [], content, {
|
|
299
|
+
chunkSize: 100,
|
|
300
|
+
chunkOverlap: 10,
|
|
301
|
+
});
|
|
302
|
+
expect(result.length).toBeGreaterThan(1);
|
|
303
|
+
expect(mockLlm.contextualize).not.toHaveBeenCalled();
|
|
304
|
+
for (const chunk of result) {
|
|
305
|
+
expect(chunk.contextPrefix).toBe("");
|
|
306
|
+
}
|
|
307
|
+
// Embed text uses the no-context shape.
|
|
308
|
+
expect(embedCalls[0]!.startsWith("T\n")).toBe(true);
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
it("cachedPrefixes without llm has no effect (no LLM, caching is moot)", async () => {
|
|
312
|
+
const content = "short content";
|
|
313
|
+
const mockLlm = makeMockLlm(async () => "LIVE");
|
|
314
|
+
const cached = new Map<number, string>([[0, "CACHED"]]);
|
|
315
|
+
const result = await embedDocument("T", [], content, {
|
|
316
|
+
cachedPrefixes: cached,
|
|
317
|
+
});
|
|
318
|
+
// Without llm, no contextualize calls happen and no cached prefix is applied
|
|
319
|
+
// (since caching is only a fast-path on the LLM branch).
|
|
320
|
+
expect(mockLlm.contextualize).not.toHaveBeenCalled();
|
|
321
|
+
expect(result[0]!.contextPrefix).toBe("");
|
|
322
|
+
});
|
|
323
|
+
});
|
|
199
324
|
});
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { describe, it, expect, vi, beforeEach } from "vitest";
|
|
1
|
+
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
|
2
2
|
import { mkdtempSync, writeFileSync, mkdirSync, unlinkSync, utimesSync } from "node:fs";
|
|
3
3
|
import { join, resolve } from "node:path";
|
|
4
4
|
import { tmpdir } from "node:os";
|
|
@@ -7,24 +7,46 @@ import { FtsSearch } from "../search.js";
|
|
|
7
7
|
import { VectorSearch } from "../vector-search.js";
|
|
8
8
|
|
|
9
9
|
// Mock embedder so we don't load the real transformer model during tests.
|
|
10
|
-
// embedDocument
|
|
11
|
-
//
|
|
10
|
+
// embedDocument honors the Phase 6 `opts.llm` contract: when present it calls
|
|
11
|
+
// `llm.contextualize(fullDoc, chunk.content)` and surfaces the returned string
|
|
12
|
+
// on `DocumentChunk.contextPrefix` (empty on fail-open). When `cachedPrefixes`
|
|
13
|
+
// is provided and has a value for a chunk's index, the live LLM call is skipped.
|
|
12
14
|
vi.mock("../embedder.js", async () => {
|
|
13
15
|
// Import the real chunker so the mock chunks content the same way as prod.
|
|
14
16
|
const { chunkText } = await import("../chunker.js");
|
|
17
|
+
type LlmLike = { contextualize: (fullDoc: string, chunk: string) => Promise<string> };
|
|
18
|
+
type EmbedOpts = { llm?: LlmLike; cachedPrefixes?: Map<number, string> };
|
|
15
19
|
return {
|
|
16
20
|
embed: vi.fn(async () => new Float32Array(384)),
|
|
17
|
-
embedDocument: vi.fn(async (
|
|
21
|
+
embedDocument: vi.fn(async (
|
|
22
|
+
_title: string,
|
|
23
|
+
_tags: string[],
|
|
24
|
+
content: string,
|
|
25
|
+
opts?: EmbedOpts,
|
|
26
|
+
) => {
|
|
18
27
|
const chunks = content.length === 0
|
|
19
28
|
? [{ index: 0, content: "", charStart: 0, charEnd: 0 }]
|
|
20
29
|
: chunkText(content);
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
30
|
+
const out = [];
|
|
31
|
+
for (const c of chunks) {
|
|
32
|
+
let contextPrefix = "";
|
|
33
|
+
if (opts?.llm) {
|
|
34
|
+
if (opts.cachedPrefixes && opts.cachedPrefixes.has(c.index)) {
|
|
35
|
+
contextPrefix = opts.cachedPrefixes.get(c.index) ?? "";
|
|
36
|
+
} else {
|
|
37
|
+
contextPrefix = await opts.llm.contextualize(content, c.content);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
out.push({
|
|
41
|
+
index: c.index,
|
|
42
|
+
content: c.content,
|
|
43
|
+
charStart: c.charStart,
|
|
44
|
+
charEnd: c.charEnd,
|
|
45
|
+
embedding: new Float32Array(384),
|
|
46
|
+
contextPrefix,
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
return out;
|
|
28
50
|
}),
|
|
29
51
|
prepareTextForEmbedding: vi.fn((title: string, tags: string[], content: string) => {
|
|
30
52
|
const tagLine = tags.length > 0 ? tags.join(", ") : "";
|
|
@@ -34,6 +56,17 @@ vi.mock("../embedder.js", async () => {
|
|
|
34
56
|
};
|
|
35
57
|
});
|
|
36
58
|
|
|
59
|
+
// Mock the LLM client so tests can deterministically control availability and
|
|
60
|
+
// contextualize() return values without touching the network.
|
|
61
|
+
const mockLlmAvailable = vi.fn(async () => true);
|
|
62
|
+
const mockLlmContextualize = vi.fn(async (_fullDoc: string, _chunk: string) => "");
|
|
63
|
+
vi.mock("../llm-client.js", () => ({
|
|
64
|
+
createLlmClient: vi.fn(() => ({
|
|
65
|
+
available: mockLlmAvailable,
|
|
66
|
+
contextualize: mockLlmContextualize,
|
|
67
|
+
})),
|
|
68
|
+
}));
|
|
69
|
+
|
|
37
70
|
import { embedDocument } from "../embedder.js";
|
|
38
71
|
import { reindex } from "../reindex.js";
|
|
39
72
|
import { KnowledgeDB } from "../db.js";
|
|
@@ -77,13 +110,32 @@ describe("findMarkdownFiles", () => {
|
|
|
77
110
|
describe("incremental reindex", () => {
|
|
78
111
|
let dir: string;
|
|
79
112
|
let dbPath: string;
|
|
113
|
+
const originalFlag = process.env.RALPH_CONTEXTUAL_RETRIEVAL;
|
|
80
114
|
|
|
81
115
|
beforeEach(() => {
|
|
82
116
|
mockedEmbed.mockClear();
|
|
117
|
+
// Reset LLM mocks to defaults: available returns true, contextualize returns "".
|
|
118
|
+
// Individual tests override these before calling `reindex(...)`.
|
|
119
|
+
mockLlmAvailable.mockReset();
|
|
120
|
+
mockLlmAvailable.mockResolvedValue(true);
|
|
121
|
+
mockLlmContextualize.mockReset();
|
|
122
|
+
mockLlmContextualize.mockResolvedValue("");
|
|
123
|
+
// Default the flag to disabled for legacy tests so the existing 17 scenarios
|
|
124
|
+
// don't accidentally call the mocked LLM — the Phase 6 tests opt back in
|
|
125
|
+
// explicitly via `process.env.RALPH_CONTEXTUAL_RETRIEVAL = "1"`.
|
|
126
|
+
process.env.RALPH_CONTEXTUAL_RETRIEVAL = "0";
|
|
83
127
|
dir = mkdtempSync(join(tmpdir(), "knowledge-reindex-"));
|
|
84
128
|
dbPath = join(dir, "test.db");
|
|
85
129
|
});
|
|
86
130
|
|
|
131
|
+
afterEach(() => {
|
|
132
|
+
if (originalFlag === undefined) {
|
|
133
|
+
delete process.env.RALPH_CONTEXTUAL_RETRIEVAL;
|
|
134
|
+
} else {
|
|
135
|
+
process.env.RALPH_CONTEXTUAL_RETRIEVAL = originalFlag;
|
|
136
|
+
}
|
|
137
|
+
});
|
|
138
|
+
|
|
87
139
|
it("scenario 1: unchanged files are skipped on second run", async () => {
|
|
88
140
|
writeFileSync(join(dir, "doc-a.md"), makeDoc("Doc A"));
|
|
89
141
|
writeFileSync(join(dir, "doc-b.md"), makeDoc("Doc B"));
|
|
@@ -529,4 +581,157 @@ describe("incremental reindex", () => {
|
|
|
529
581
|
expect(vecCount).toBe(secondCount);
|
|
530
582
|
db2.close();
|
|
531
583
|
});
|
|
584
|
+
|
|
585
|
+
// ---- Phase 6 (GH-767): Contextual Retrieval wiring ----
|
|
586
|
+
|
|
587
|
+
it("scenario 18: RALPH_CONTEXTUAL_RETRIEVAL=0 skips LLM entirely", async () => {
|
|
588
|
+
process.env.RALPH_CONTEXTUAL_RETRIEVAL = "0";
|
|
589
|
+
mockLlmContextualize.mockResolvedValue("SHOULD NOT APPEAR");
|
|
590
|
+
|
|
591
|
+
writeFileSync(join(dir, "doc-a.md"), makeDoc("Doc A"));
|
|
592
|
+
|
|
593
|
+
await reindex([dir], dbPath);
|
|
594
|
+
|
|
595
|
+
// Zero LLM activity when the flag is off.
|
|
596
|
+
expect(mockLlmAvailable).not.toHaveBeenCalled();
|
|
597
|
+
expect(mockLlmContextualize).not.toHaveBeenCalled();
|
|
598
|
+
|
|
599
|
+
// All chunks should have empty context_prefix.
|
|
600
|
+
const db = new KnowledgeDB(dbPath);
|
|
601
|
+
const rows = db.db
|
|
602
|
+
.prepare("SELECT context_prefix FROM chunks WHERE document_id = ?")
|
|
603
|
+
.all("doc-a") as Array<{ context_prefix: string }>;
|
|
604
|
+
expect(rows.length).toBeGreaterThan(0);
|
|
605
|
+
for (const r of rows) {
|
|
606
|
+
expect(r.context_prefix).toBe("");
|
|
607
|
+
}
|
|
608
|
+
db.close();
|
|
609
|
+
});
|
|
610
|
+
|
|
611
|
+
it("scenario 19: flag on + LLM unreachable -> empty context_prefix + single warning", async () => {
|
|
612
|
+
process.env.RALPH_CONTEXTUAL_RETRIEVAL = "1";
|
|
613
|
+
mockLlmAvailable.mockResolvedValue(false);
|
|
614
|
+
// contextualize should never be called because available() returned false.
|
|
615
|
+
mockLlmContextualize.mockResolvedValue("UNREACHED");
|
|
616
|
+
|
|
617
|
+
const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
|
|
618
|
+
|
|
619
|
+
try {
|
|
620
|
+
writeFileSync(join(dir, "doc-a.md"), makeDoc("Doc A"));
|
|
621
|
+
writeFileSync(join(dir, "doc-b.md"), makeDoc("Doc B"));
|
|
622
|
+
|
|
623
|
+
await reindex([dir], dbPath);
|
|
624
|
+
|
|
625
|
+
// available() probed exactly once per reindex call.
|
|
626
|
+
expect(mockLlmAvailable).toHaveBeenCalledTimes(1);
|
|
627
|
+
// contextualize() never invoked on the fail-open path.
|
|
628
|
+
expect(mockLlmContextualize).not.toHaveBeenCalled();
|
|
629
|
+
|
|
630
|
+
// Exactly one "unreachable" warning (other warnings like frontmatter are allowed).
|
|
631
|
+
const unreachableWarnings = warnSpy.mock.calls.filter(args =>
|
|
632
|
+
args.some(a => typeof a === "string" && /LLM endpoint unreachable/.test(a)),
|
|
633
|
+
);
|
|
634
|
+
expect(unreachableWarnings).toHaveLength(1);
|
|
635
|
+
|
|
636
|
+
const db = new KnowledgeDB(dbPath);
|
|
637
|
+
const rows = db.db
|
|
638
|
+
.prepare("SELECT context_prefix FROM chunks")
|
|
639
|
+
.all() as Array<{ context_prefix: string }>;
|
|
640
|
+
expect(rows.length).toBeGreaterThan(0);
|
|
641
|
+
for (const r of rows) {
|
|
642
|
+
expect(r.context_prefix).toBe("");
|
|
643
|
+
}
|
|
644
|
+
db.close();
|
|
645
|
+
} finally {
|
|
646
|
+
warnSpy.mockRestore();
|
|
647
|
+
}
|
|
648
|
+
});
|
|
649
|
+
|
|
650
|
+
it("scenario 20: flag on + reachable LLM -> non-empty context_prefix persisted", async () => {
|
|
651
|
+
process.env.RALPH_CONTEXTUAL_RETRIEVAL = "1";
|
|
652
|
+
mockLlmAvailable.mockResolvedValue(true);
|
|
653
|
+
mockLlmContextualize.mockResolvedValue("GENERATED CONTEXT");
|
|
654
|
+
|
|
655
|
+
writeFileSync(join(dir, "doc-a.md"), makeDoc("Doc A"));
|
|
656
|
+
|
|
657
|
+
await reindex([dir], dbPath);
|
|
658
|
+
|
|
659
|
+
expect(mockLlmAvailable).toHaveBeenCalledTimes(1);
|
|
660
|
+
expect(mockLlmContextualize).toHaveBeenCalled();
|
|
661
|
+
|
|
662
|
+
const db = new KnowledgeDB(dbPath);
|
|
663
|
+
const rows = db.db
|
|
664
|
+
.prepare("SELECT context_prefix FROM chunks WHERE document_id = ?")
|
|
665
|
+
.all("doc-a") as Array<{ context_prefix: string }>;
|
|
666
|
+
expect(rows.length).toBeGreaterThan(0);
|
|
667
|
+
for (const r of rows) {
|
|
668
|
+
expect(r.context_prefix).toBe("GENERATED CONTEXT");
|
|
669
|
+
}
|
|
670
|
+
db.close();
|
|
671
|
+
});
|
|
672
|
+
|
|
673
|
+
it("scenario 21: flag defaults on (undefined env) and probes LLM", async () => {
|
|
674
|
+
delete process.env.RALPH_CONTEXTUAL_RETRIEVAL;
|
|
675
|
+
mockLlmAvailable.mockResolvedValue(true);
|
|
676
|
+
mockLlmContextualize.mockResolvedValue("DEFAULT ON");
|
|
677
|
+
|
|
678
|
+
writeFileSync(join(dir, "doc-a.md"), makeDoc("Doc A"));
|
|
679
|
+
|
|
680
|
+
await reindex([dir], dbPath);
|
|
681
|
+
|
|
682
|
+
// available() probed because flag was not "0" / "false".
|
|
683
|
+
expect(mockLlmAvailable).toHaveBeenCalledTimes(1);
|
|
684
|
+
expect(mockLlmContextualize).toHaveBeenCalled();
|
|
685
|
+
});
|
|
686
|
+
|
|
687
|
+
it("scenario 22: 'false' also disables contextual retrieval", async () => {
|
|
688
|
+
process.env.RALPH_CONTEXTUAL_RETRIEVAL = "false";
|
|
689
|
+
mockLlmContextualize.mockResolvedValue("SHOULD NOT APPEAR");
|
|
690
|
+
|
|
691
|
+
writeFileSync(join(dir, "doc-a.md"), makeDoc("Doc A"));
|
|
692
|
+
|
|
693
|
+
await reindex([dir], dbPath);
|
|
694
|
+
|
|
695
|
+
expect(mockLlmAvailable).not.toHaveBeenCalled();
|
|
696
|
+
expect(mockLlmContextualize).not.toHaveBeenCalled();
|
|
697
|
+
});
|
|
698
|
+
|
|
699
|
+
it("scenario 23: re-running with unchanged content reuses cached context_prefix (no new LLM calls)", async () => {
|
|
700
|
+
process.env.RALPH_CONTEXTUAL_RETRIEVAL = "1";
|
|
701
|
+
mockLlmAvailable.mockResolvedValue(true);
|
|
702
|
+
mockLlmContextualize.mockResolvedValue("INITIAL CTX");
|
|
703
|
+
|
|
704
|
+
const filePath = join(dir, "doc-a.md");
|
|
705
|
+
writeFileSync(filePath, makeDoc("Doc A"));
|
|
706
|
+
|
|
707
|
+
await reindex([dir], dbPath);
|
|
708
|
+
const firstCallCount = mockLlmContextualize.mock.calls.length;
|
|
709
|
+
expect(firstCallCount).toBeGreaterThan(0);
|
|
710
|
+
|
|
711
|
+
// Bump mtime without changing content — this defeats the outer mtime skip
|
|
712
|
+
// and forces the inner content-hash cache check to fire.
|
|
713
|
+
const future = Date.now() / 1000 + 2;
|
|
714
|
+
utimesSync(filePath, future, future);
|
|
715
|
+
|
|
716
|
+
mockLlmContextualize.mockClear();
|
|
717
|
+
// Swap the mock return so we can prove cached prefixes were reused: if the
|
|
718
|
+
// cache missed and a live call happened, the new return value would show up
|
|
719
|
+
// in the DB.
|
|
720
|
+
mockLlmContextualize.mockResolvedValue("LIVE (SHOULD NOT OCCUR)");
|
|
721
|
+
|
|
722
|
+
await reindex([dir], dbPath);
|
|
723
|
+
|
|
724
|
+
// Zero fresh calls because content hash matched the meta cache.
|
|
725
|
+
expect(mockLlmContextualize).not.toHaveBeenCalled();
|
|
726
|
+
|
|
727
|
+
const db = new KnowledgeDB(dbPath);
|
|
728
|
+
const rows = db.db
|
|
729
|
+
.prepare("SELECT context_prefix FROM chunks WHERE document_id = ?")
|
|
730
|
+
.all("doc-a") as Array<{ context_prefix: string }>;
|
|
731
|
+
expect(rows.length).toBeGreaterThan(0);
|
|
732
|
+
for (const r of rows) {
|
|
733
|
+
expect(r.context_prefix).toBe("INITIAL CTX");
|
|
734
|
+
}
|
|
735
|
+
db.close();
|
|
736
|
+
});
|
|
532
737
|
});
|
package/src/embedder.ts
CHANGED
|
@@ -3,6 +3,7 @@ import {
|
|
|
3
3
|
type FeatureExtractionPipeline,
|
|
4
4
|
} from "@huggingface/transformers";
|
|
5
5
|
import { chunkText, type Chunk, type ChunkerOptions } from "./chunker.js";
|
|
6
|
+
import type { LlmClient } from "./llm-client.js";
|
|
6
7
|
|
|
7
8
|
const MODEL_ID = "Xenova/all-MiniLM-L6-v2";
|
|
8
9
|
|
|
@@ -39,6 +40,25 @@ export interface DocumentChunk extends Chunk {
|
|
|
39
40
|
contextPrefix?: string;
|
|
40
41
|
}
|
|
41
42
|
|
|
43
|
+
/**
|
|
44
|
+
* Options accepted by `embedDocument`. Extends `ChunkerOptions` with optional
|
|
45
|
+
* Contextual Retrieval inputs:
|
|
46
|
+
*
|
|
47
|
+
* - `llm`: when present, each chunk is run through `llm.contextualize(fullDoc, chunkContent)`
|
|
48
|
+
* and the returned string is prepended to the embed text (and persisted on the
|
|
49
|
+
* resulting `DocumentChunk.contextPrefix`). Empty-string returns (fail-open from
|
|
50
|
+
* the LLM client) cause the embed text to fall back to the legacy
|
|
51
|
+
* `${title}\n${tagLine}\n${chunk.content}` shape.
|
|
52
|
+
* - `cachedPrefixes`: optional `Map<chunkIndex, contextPrefix>` from a prior run.
|
|
53
|
+
* When a chunk's index has a cached prefix, the LLM call is skipped and the
|
|
54
|
+
* cached string is reused verbatim. Used by the reindex content-hash cache
|
|
55
|
+
* fast-path (Task 6.4) so unchanged docs don't re-contact the LLM endpoint.
|
|
56
|
+
*/
|
|
57
|
+
export interface EmbedDocumentOptions extends ChunkerOptions {
|
|
58
|
+
llm?: LlmClient;
|
|
59
|
+
cachedPrefixes?: Map<number, string>;
|
|
60
|
+
}
|
|
61
|
+
|
|
42
62
|
/**
|
|
43
63
|
* Embed a document by splitting it into chunks and emitting one embedding
|
|
44
64
|
* per chunk. The embedded text for each chunk is
|
|
@@ -46,6 +66,12 @@ export interface DocumentChunk extends Chunk {
|
|
|
46
66
|
* tags) travel with every chunk embedding — matching the shape of the legacy
|
|
47
67
|
* `prepareTextForEmbedding()` but without the 500-char truncation.
|
|
48
68
|
*
|
|
69
|
+
* When `opts.llm` is provided (Phase 6 — Contextual Retrieval), a short
|
|
70
|
+
* context prefix is generated per chunk via `opts.llm.contextualize(content, chunk.content)`
|
|
71
|
+
* and prepended to the embed text as `${contextPrefix}\n${title}\n${tagLine}\n${chunk.content}`.
|
|
72
|
+
* If `contextualize` returns `""` (fail-open path), the embed text reverts to the
|
|
73
|
+
* no-context shape so we never emit a leading blank line.
|
|
74
|
+
*
|
|
49
75
|
* Short documents (<= chunkSize) produce exactly one chunk covering the whole
|
|
50
76
|
* content. Empty content yields a single chunk with empty content (so callers
|
|
51
77
|
* still get a title/tag-only embedding for stub documents).
|
|
@@ -54,7 +80,7 @@ export async function embedDocument(
|
|
|
54
80
|
title: string,
|
|
55
81
|
tags: string[],
|
|
56
82
|
content: string,
|
|
57
|
-
opts?:
|
|
83
|
+
opts?: EmbedDocumentOptions,
|
|
58
84
|
): Promise<DocumentChunk[]> {
|
|
59
85
|
const tagLine = tags.length > 0 ? tags.join(", ") : "";
|
|
60
86
|
|
|
@@ -65,10 +91,30 @@ export async function embedDocument(
|
|
|
65
91
|
? [{ index: 0, content: "", charStart: 0, charEnd: 0 }]
|
|
66
92
|
: chunkText(content, opts);
|
|
67
93
|
|
|
94
|
+
const llm = opts?.llm;
|
|
95
|
+
const cached = opts?.cachedPrefixes;
|
|
96
|
+
|
|
68
97
|
const out: DocumentChunk[] = [];
|
|
69
98
|
for (const chunk of chunks) {
|
|
70
|
-
|
|
71
|
-
|
|
99
|
+
let contextPrefix = "";
|
|
100
|
+
if (llm) {
|
|
101
|
+
// Cache hit: reuse prior context_prefix when the caller supplied a map
|
|
102
|
+
// keyed by chunk.index. Avoids an LLM round-trip per unchanged chunk.
|
|
103
|
+
if (cached && cached.has(chunk.index)) {
|
|
104
|
+
contextPrefix = cached.get(chunk.index) ?? "";
|
|
105
|
+
} else {
|
|
106
|
+
// `contextualize` is fail-open: it returns "" on any network/timeout/
|
|
107
|
+
// malformed-response error. That empty string propagates into the
|
|
108
|
+
// returned `DocumentChunk.contextPrefix` (persisted by the caller) and
|
|
109
|
+
// causes the embed text to skip the leading blank line below.
|
|
110
|
+
contextPrefix = await llm.contextualize(content, chunk.content);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const parts = contextPrefix.length > 0
|
|
115
|
+
? [contextPrefix, title, tagLine, chunk.content]
|
|
116
|
+
: [title, tagLine, chunk.content];
|
|
117
|
+
const embedText = parts.filter(p => p.length > 0).join("\n");
|
|
72
118
|
const embedding = await embed(embedText);
|
|
73
119
|
out.push({
|
|
74
120
|
index: chunk.index,
|
|
@@ -76,6 +122,7 @@ export async function embedDocument(
|
|
|
76
122
|
charStart: chunk.charStart,
|
|
77
123
|
charEnd: chunk.charEnd,
|
|
78
124
|
embedding,
|
|
125
|
+
contextPrefix,
|
|
79
126
|
});
|
|
80
127
|
}
|
|
81
128
|
return out;
|
package/src/reindex.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { readFileSync, statSync } from "node:fs";
|
|
2
2
|
import { join, relative, resolve, basename } from "node:path";
|
|
3
3
|
import { homedir } from "node:os";
|
|
4
|
+
import { createHash } from "node:crypto";
|
|
4
5
|
import { KnowledgeDB } from "./db.js";
|
|
5
6
|
import { FtsSearch } from "./search.js";
|
|
6
7
|
import { VectorSearch } from "./vector-search.js";
|
|
@@ -10,7 +11,7 @@ import { findMarkdownFiles } from "./file-scanner.js";
|
|
|
10
11
|
import { generateIndexes } from "./generate-indexes.js";
|
|
11
12
|
import { loadConfig, type KnowledgeConfig } from "./config.js";
|
|
12
13
|
import { loadIgnoreForRoot } from "./ignore.js";
|
|
13
|
-
|
|
14
|
+
import { createLlmClient, type LlmClient } from "./llm-client.js";
|
|
14
15
|
export async function reindex(
|
|
15
16
|
dirs: string[],
|
|
16
17
|
dbPath: string,
|
|
@@ -25,6 +26,28 @@ export async function reindex(
|
|
|
25
26
|
const vec = new VectorSearch(db);
|
|
26
27
|
vec.createIndex();
|
|
27
28
|
|
|
29
|
+
// Phase 6 (GH-767): Contextual Retrieval wiring.
|
|
30
|
+
// `RALPH_CONTEXTUAL_RETRIEVAL` gates the whole feature. Default on; treat
|
|
31
|
+
// literal "0" / "false" as disabled. When enabled we probe the endpoint once
|
|
32
|
+
// and fail open on unreachable — all downstream chunks then embed without a
|
|
33
|
+
// context prefix and we log a single warning so the operator knows why.
|
|
34
|
+
const flagRaw = process.env.RALPH_CONTEXTUAL_RETRIEVAL;
|
|
35
|
+
const contextualEnabled = flagRaw !== "0" && flagRaw !== "false";
|
|
36
|
+
let llm: LlmClient | undefined;
|
|
37
|
+
if (contextualEnabled) {
|
|
38
|
+
const llmUrl = process.env.RALPH_LLM_URL ?? "http://localhost:8000";
|
|
39
|
+
const candidate = createLlmClient();
|
|
40
|
+
const llmReady = await candidate.available();
|
|
41
|
+
if (llmReady) {
|
|
42
|
+
llm = candidate;
|
|
43
|
+
} else {
|
|
44
|
+
console.warn(
|
|
45
|
+
`LLM endpoint unreachable at ${llmUrl}, contextual retrieval disabled for this run`
|
|
46
|
+
);
|
|
47
|
+
llm = undefined;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
28
51
|
// Schema version check — force full re-embed when embedding algorithm changes
|
|
29
52
|
const SCHEMA_VERSION = "3";
|
|
30
53
|
const currentVersion = db.getMeta("schema_version");
|
|
@@ -74,6 +97,7 @@ export async function reindex(
|
|
|
74
97
|
const parsedDocs: ParsedDocument[] = [];
|
|
75
98
|
let indexed = 0;
|
|
76
99
|
let skipped = 0;
|
|
100
|
+
let totalChunks = 0;
|
|
77
101
|
for (const filePath of filesOnDisk) {
|
|
78
102
|
const absPath = resolve(filePath);
|
|
79
103
|
const mtime = Math.trunc(statSync(absPath).mtimeMs);
|
|
@@ -143,6 +167,35 @@ export async function reindex(
|
|
|
143
167
|
db.addRelationship(edge.sourceId, edge.targetId, "untyped", edge.context);
|
|
144
168
|
}
|
|
145
169
|
|
|
170
|
+
// Content-hash cache for Contextual Retrieval prefixes. The outer mtime
|
|
171
|
+
// skip at line ~75 already short-circuits the overwhelming majority of
|
|
172
|
+
// unchanged docs (no embedder or LLM calls). This inner hash check is
|
|
173
|
+
// specifically for the rare case where mtime differs but content is
|
|
174
|
+
// byte-identical (e.g., git checkout touching the file). When hash matches
|
|
175
|
+
// AND we have a live LLM AND chunks already exist, we reuse the prior
|
|
176
|
+
// context_prefix map and skip the per-chunk LLM round-trips.
|
|
177
|
+
//
|
|
178
|
+
// Simpler alternative considered: rely entirely on mtime. Rejected because
|
|
179
|
+
// the feature spec (Task 6.4 acceptance) explicitly requires re-running
|
|
180
|
+
// reindex without content changes to reuse existing context_prefix.
|
|
181
|
+
const contentHash = createHash("sha256").update(parsed.content).digest("hex").slice(0, 16);
|
|
182
|
+
const hashKey = `content_hash:${parsed.id}`;
|
|
183
|
+
const priorHash = db.getMeta(hashKey);
|
|
184
|
+
|
|
185
|
+
let cachedPrefixes: Map<number, string> | undefined;
|
|
186
|
+
if (llm && priorHash === contentHash) {
|
|
187
|
+
const priorChunks = db.db
|
|
188
|
+
.prepare(
|
|
189
|
+
"SELECT chunk_index, context_prefix FROM chunks WHERE document_id = ? ORDER BY chunk_index"
|
|
190
|
+
)
|
|
191
|
+
.all(parsed.id) as Array<{ chunk_index: number; context_prefix: string }>;
|
|
192
|
+
if (priorChunks.length > 0) {
|
|
193
|
+
cachedPrefixes = new Map(
|
|
194
|
+
priorChunks.map(r => [r.chunk_index, r.context_prefix ?? ""] as [number, string])
|
|
195
|
+
);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
146
199
|
// Chunk-aware embedding: emit one embedding per chunk, persist to both
|
|
147
200
|
// the `chunks` table and the `documents_vec` virtual table with chunk ids
|
|
148
201
|
// of the form `${doc.id}#c${index}`.
|
|
@@ -156,9 +209,12 @@ export async function reindex(
|
|
|
156
209
|
vec.deleteEmbedding(parsed.id);
|
|
157
210
|
|
|
158
211
|
try {
|
|
159
|
-
const chunks = await embedDocument(parsed.title, parsed.tags, parsed.content
|
|
212
|
+
const chunks = await embedDocument(parsed.title, parsed.tags, parsed.content, {
|
|
213
|
+
llm,
|
|
214
|
+
cachedPrefixes,
|
|
215
|
+
});
|
|
160
216
|
const insertChunk = db.db.prepare(
|
|
161
|
-
"INSERT INTO chunks (id, document_id, chunk_index, content, char_start, char_end) VALUES (?, ?, ?, ?, ?, ?)"
|
|
217
|
+
"INSERT INTO chunks (id, document_id, chunk_index, content, char_start, char_end, context_prefix) VALUES (?, ?, ?, ?, ?, ?, ?)"
|
|
162
218
|
);
|
|
163
219
|
for (const chunk of chunks) {
|
|
164
220
|
const chunkId = `${parsed.id}#c${chunk.index}`;
|
|
@@ -169,9 +225,16 @@ export async function reindex(
|
|
|
169
225
|
chunk.content,
|
|
170
226
|
chunk.charStart,
|
|
171
227
|
chunk.charEnd,
|
|
228
|
+
chunk.contextPrefix ?? "",
|
|
172
229
|
);
|
|
173
230
|
vec.upsertEmbedding(chunkId, chunk.embedding);
|
|
231
|
+
totalChunks++;
|
|
232
|
+
if (totalChunks % 50 === 0) {
|
|
233
|
+
console.log(` ${totalChunks} chunks embedded`);
|
|
234
|
+
}
|
|
174
235
|
}
|
|
236
|
+
// Record the content hash for the next reindex cache check.
|
|
237
|
+
db.setMeta(hashKey, contentHash);
|
|
175
238
|
} catch (e) {
|
|
176
239
|
console.warn(`Failed to embed ${id}: ${(e as Error).message}`);
|
|
177
240
|
}
|