ralph-hero-knowledge-index 0.1.23 → 0.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ralph-knowledge",
3
- "version": "0.1.23",
3
+ "version": "0.1.24",
4
4
  "description": "Knowledge graph for ralph-hero: semantic search, relationship traversal, and document indexing across thoughts/ documents. Optional companion to ralph-hero.",
5
5
  "author": {
6
6
  "name": "Chad Dubiel",
package/.mcp.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "mcpServers": {
3
3
  "ralph-knowledge": {
4
4
  "command": "npx",
5
- "args": ["-y", "ralph-hero-knowledge-index@0.1.23"]
5
+ "args": ["-y", "ralph-hero-knowledge-index@0.1.24"]
6
6
  }
7
7
  }
8
8
  }
@@ -1,4 +1,31 @@
1
1
  import { type FeatureExtractionPipeline } from "@huggingface/transformers";
2
+ import { type Chunk, type ChunkerOptions } from "./chunker.js";
2
3
  export declare function getEmbedder(): Promise<FeatureExtractionPipeline>;
3
4
  export declare function embed(text: string): Promise<Float32Array>;
5
+ /**
6
+ * A chunk paired with the embedding of its (contextualized) content.
7
+ * Extends the base Chunk from the chunker module with an embedding vector
8
+ * and an optional contextPrefix (populated by Phase 6 — contextual retrieval).
9
+ */
10
+ export interface DocumentChunk extends Chunk {
11
+ embedding: Float32Array;
12
+ contextPrefix?: string;
13
+ }
14
+ /**
15
+ * Embed a document by splitting it into chunks and emitting one embedding
16
+ * per chunk. The embedded text for each chunk is
17
+ * `${title}\n${tagLine}\n${chunk.content}` so the semantic anchors (title +
18
+ * tags) travel with every chunk embedding — matching the shape of the legacy
19
+ * `prepareTextForEmbedding()` but without the 500-char truncation.
20
+ *
21
+ * Short documents (<= chunkSize) produce exactly one chunk covering the whole
22
+ * content. Empty content yields a single chunk with empty content (so callers
23
+ * still get a title/tag-only embedding for stub documents).
24
+ */
25
+ export declare function embedDocument(title: string, tags: string[], content: string, opts?: ChunkerOptions): Promise<DocumentChunk[]>;
26
+ /**
27
+ * Back-compat shim: kept so callers outside the reindex path can still build
28
+ * a title/tags/first-paragraph string. No longer used by `embedDocument` (the
29
+ * per-chunk flow prepends title + tags directly).
30
+ */
4
31
  export declare function prepareTextForEmbedding(title: string, tags: string[], content: string): string;
package/dist/embedder.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import { pipeline, } from "@huggingface/transformers";
2
+ import { chunkText } from "./chunker.js";
2
3
  const MODEL_ID = "Xenova/all-MiniLM-L6-v2";
3
- const MAX_CHARS = 500;
4
4
  let embedderInstance = null;
5
5
  export async function getEmbedder() {
6
6
  if (!embedderInstance) {
@@ -11,19 +11,58 @@ export async function getEmbedder() {
11
11
  }
12
12
  export async function embed(text) {
13
13
  const embedder = await getEmbedder();
14
- const truncated = text.slice(0, MAX_CHARS);
15
- const output = await embedder(truncated, {
14
+ // Pass text directly — the transformer's own 512-token window handles overflow.
15
+ const output = await embedder(text, {
16
16
  pooling: "mean",
17
17
  normalize: true,
18
18
  });
19
19
  return new Float32Array(output.data);
20
20
  }
21
+ /**
22
+ * Embed a document by splitting it into chunks and emitting one embedding
23
+ * per chunk. The embedded text for each chunk is
24
+ * `${title}\n${tagLine}\n${chunk.content}` so the semantic anchors (title +
25
+ * tags) travel with every chunk embedding — matching the shape of the legacy
26
+ * `prepareTextForEmbedding()` but without the 500-char truncation.
27
+ *
28
+ * Short documents (<= chunkSize) produce exactly one chunk covering the whole
29
+ * content. Empty content yields a single chunk with empty content (so callers
30
+ * still get a title/tag-only embedding for stub documents).
31
+ */
32
+ export async function embedDocument(title, tags, content, opts) {
33
+ const tagLine = tags.length > 0 ? tags.join(", ") : "";
34
+ // If content is empty, still emit one chunk so the document has a searchable
35
+ // embedding anchored on title + tags (preserves legacy behavior for
36
+ // frontmatter-only / stub documents).
37
+ const chunks = content.length === 0
38
+ ? [{ index: 0, content: "", charStart: 0, charEnd: 0 }]
39
+ : chunkText(content, opts);
40
+ const out = [];
41
+ for (const chunk of chunks) {
42
+ const parts = [title, tagLine, chunk.content].filter(p => p.length > 0);
43
+ const embedText = parts.join("\n");
44
+ const embedding = await embed(embedText);
45
+ out.push({
46
+ index: chunk.index,
47
+ content: chunk.content,
48
+ charStart: chunk.charStart,
49
+ charEnd: chunk.charEnd,
50
+ embedding,
51
+ });
52
+ }
53
+ return out;
54
+ }
55
+ /**
56
+ * Back-compat shim: kept so callers outside the reindex path can still build
57
+ * a title/tags/first-paragraph string. No longer used by `embedDocument` (the
58
+ * per-chunk flow prepends title + tags directly).
59
+ */
21
60
  export function prepareTextForEmbedding(title, tags, content) {
22
61
  const tagLine = tags.length > 0 ? tags.join(", ") : "";
23
62
  // Extract first paragraph: split on blank lines, take first non-empty segment
24
63
  const paragraphs = content.split(/\n\n+/);
25
64
  const firstParagraph = paragraphs.find(p => p.trim().length > 0)?.trim() ?? "";
26
65
  const parts = [title, tagLine, firstParagraph].filter(p => p.length > 0);
27
- return parts.join("\n").slice(0, MAX_CHARS);
66
+ return parts.join("\n");
28
67
  }
29
68
  //# sourceMappingURL=embedder.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"embedder.js","sourceRoot":"","sources":["../src/embedder.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,GAET,MAAM,2BAA2B,CAAC;AAEnC,MAAM,QAAQ,GAAG,yBAAyB,CAAC;AAC3C,MAAM,SAAS,GAAG,GAAG,CAAC;AAEtB,IAAI,gBAAgB,GAAqC,IAAI,CAAC;AAE9D,MAAM,CAAC,KAAK,UAAU,WAAW;IAC/B,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,mEAAmE;QACnE,gBAAgB,GAAG,CAAC,MAAM,QAAQ,CAChC,oBAAoB,EACpB,QAAQ,CACT,CAA8B,CAAC;IAClC,CAAC;IACD,OAAO,gBAAgB,CAAC;AAC1B,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,IAAY;IACtC,MAAM,QAAQ,GAAG,MAAM,WAAW,EAAE,CAAC;IACrC,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;IAC3C,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,SAAS,EAAE;QACvC,OAAO,EAAE,MAAM;QACf,SAAS,EAAE,IAAI;KAChB,CAAC,CAAC;IACH,OAAO,IAAI,YAAY,CAAC,MAAM,CAAC,IAAyB,CAAC,CAAC;AAC5D,CAAC;AAED,MAAM,UAAU,uBAAuB,CACrC,KAAa,EACb,IAAc,EACd,OAAe;IAEf,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IACvD,8EAA8E;IAC9E,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAC1C,MAAM,cAAc,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAC/E,MAAM,KAAK,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,cAAc,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACzE,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;AAC9C,CAAC"}
1
+ {"version":3,"file":"embedder.js","sourceRoot":"","sources":["../src/embedder.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,GAET,MAAM,2BAA2B,CAAC;AACnC,OAAO,EAAE,SAAS,EAAmC,MAAM,cAAc,CAAC;AAE1E,MAAM,QAAQ,GAAG,yBAAyB,CAAC;AAE3C,IAAI,gBAAgB,GAAqC,IAAI,CAAC;AAE9D,MAAM,CAAC,KAAK,UAAU,WAAW;IAC/B,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,mEAAmE;QACnE,gBAAgB,GAAG,CAAC,MAAM,QAAQ,CAChC,oBAAoB,EACpB,QAAQ,CACT,CAA8B,CAAC;IAClC,CAAC;IACD,OAAO,gBAAgB,CAAC;AAC1B,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,IAAY;IACtC,MAAM,QAAQ,GAAG,MAAM,WAAW,EAAE,CAAC;IACrC,gFAAgF;IAChF,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE;QAClC,OAAO,EAAE,MAAM;QACf,SAAS,EAAE,IAAI;KAChB,CAAC,CAAC;IACH,OAAO,IAAI,YAAY,CAAC,MAAM,CAAC,IAAyB,CAAC,CAAC;AAC5D,CAAC;AAYD;;;;;;;;;;GAUG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,KAAa,EACb,IAAc,EACd,OAAe,EACf,IAAqB;IAErB,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAEvD,6EAA6E;IAC7E,oEAAoE;IACpE,sCAAsC;IACtC,MAAM,MAAM,GAAY,OAAO,CAAC,MAAM,KAAK,CAAC;QAC1C,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC;QACvD,CAAC,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;IAE7B,MAAM,GAAG,GAAoB,EAAE,CAAC;IAChC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,KAAK,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACxE,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnC,MAAM,SAAS,GAAG,MAAM,KAAK,CAAC,SAAS,CAAC,CAAC;QACzC,GAAG,CAAC,IAAI,CAAC;YACP,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,SAAS,EAAE,KAAK,CAAC,SAAS;YAC1B,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,SAAS;SACV,CAAC,CAAC;IACL,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,uBAAuB,CACrC,KAAa,EACb,IAAc,EACd,OAAe;IAEf,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IACvD,8EAA8E;IAC9E,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAC1C,MAAM,cAAc,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAC/E,MAAM,KAAK,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,cAAc,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACzE,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC"}
package/dist/reindex.js CHANGED
@@ -4,7 +4,7 @@ import { homedir } from "node:os";
4
4
  import { KnowledgeDB } from "./db.js";
5
5
  import { FtsSearch } from "./search.js";
6
6
  import { VectorSearch } from "./vector-search.js";
7
- import { embed, prepareTextForEmbedding } from "./embedder.js";
7
+ import { embedDocument } from "./embedder.js";
8
8
  import { parseDocument } from "./parser.js";
9
9
  import { findMarkdownFiles } from "./file-scanner.js";
10
10
  import { generateIndexes } from "./generate-indexes.js";
@@ -37,7 +37,10 @@ export async function reindex(dirs, dbPath, generate = false, ignorePatterns) {
37
37
  }
38
38
  console.log(`Found ${filesOnDisk.length} total markdown files`);
39
39
  const filesOnDiskSet = new Set(filesOnDisk.map(f => resolve(f)));
40
- // Phase 1: Delete stale entries for files no longer on disk
40
+ // Phase 1: Delete stale entries for files no longer on disk.
41
+ // Chunk rows cascade from documents via ON DELETE CASCADE on chunks.document_id,
42
+ // but the vec0 virtual table does not participate in FK cascades — we must
43
+ // explicitly delete chunk-level vec rows via GLOB pattern.
41
44
  const syncedPaths = db.getAllSyncPaths();
42
45
  let deleted = 0;
43
46
  for (const syncedPath of syncedPaths) {
@@ -45,6 +48,8 @@ export async function reindex(dirs, dbPath, generate = false, ignorePatterns) {
45
48
  const id = basename(syncedPath, ".md");
46
49
  fts.deleteFtsEntry(id);
47
50
  db.deleteDocument(id);
51
+ vec.deleteChunkVecsByDoc(id);
52
+ // Also delete any legacy doc-level vec row (pre-chunks schema).
48
53
  vec.deleteEmbedding(id);
49
54
  db.deleteSyncRecord(syncedPath);
50
55
  deleted++;
@@ -117,10 +122,25 @@ export async function reindex(dirs, dbPath, generate = false, ignorePatterns) {
117
122
  db.upsertStubDocument(edge.targetId);
118
123
  db.addRelationship(edge.sourceId, edge.targetId, "untyped", edge.context);
119
124
  }
120
- const text = prepareTextForEmbedding(parsed.title, parsed.tags, parsed.content);
125
+ // Chunk-aware embedding: emit one embedding per chunk, persist to both
126
+ // the `chunks` table and the `documents_vec` virtual table with chunk ids
127
+ // of the form `${doc.id}#c${index}`.
128
+ //
129
+ // We first clear any stale chunk rows for this doc_id (the document
130
+ // body may have shrunk across re-indexes) and stale chunk vec rows (which
131
+ // don't cascade from the `chunks` table because vec0 is a virtual table).
132
+ db.db.prepare("DELETE FROM chunks WHERE document_id = ?").run(parsed.id);
133
+ vec.deleteChunkVecsByDoc(parsed.id);
134
+ // Drop any pre-chunks schema vec row that used the bare doc id.
135
+ vec.deleteEmbedding(parsed.id);
121
136
  try {
122
- const embedding = await embed(text);
123
- vec.upsertEmbedding(parsed.id, embedding);
137
+ const chunks = await embedDocument(parsed.title, parsed.tags, parsed.content);
138
+ const insertChunk = db.db.prepare("INSERT INTO chunks (id, document_id, chunk_index, content, char_start, char_end) VALUES (?, ?, ?, ?, ?, ?)");
139
+ for (const chunk of chunks) {
140
+ const chunkId = `${parsed.id}#c${chunk.index}`;
141
+ insertChunk.run(chunkId, parsed.id, chunk.index, chunk.content, chunk.charStart, chunk.charEnd);
142
+ vec.upsertEmbedding(chunkId, chunk.embedding);
143
+ }
124
144
  }
125
145
  catch (e) {
126
146
  console.warn(`Failed to embed ${id}: ${e.message}`);
@@ -1 +1 @@
1
- {"version":3,"file":"reindex.js","sourceRoot":"","sources":["../src/reindex.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACjD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAC9D,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AACtC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,KAAK,EAAE,uBAAuB,EAAE,MAAM,eAAe,CAAC;AAC/D,OAAO,EAAE,aAAa,EAAuB,MAAM,aAAa,CAAC;AACjE,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACtD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,UAAU,EAAwB,MAAM,aAAa,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAEhD,MAAM,CAAC,KAAK,UAAU,OAAO,CAC3B,IAAc,EACd,MAAc,EACd,WAAoB,KAAK,EACzB,cAAyB;IAEzB,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,MAAM,EAAE,CAAC,CAAC;IAExD,MAAM,EAAE,GAAG,IAAI,WAAW,CAAC,MAAM,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,IAAI,SAAS,CAAC,EAAE,CAAC,CAAC;IAC9B,GAAG,CAAC,WAAW,EAAE,CAAC;IAClB,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,EAAE,CAAC,CAAC;IACjC,GAAG,CAAC,WAAW,EAAE,CAAC;IAElB,8EAA8E;IAC9E,MAAM,cAAc,GAAG,GAAG,CAAC;IAC3B,MAAM,cAAc,GAAG,EAAE,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;IACpD,IAAI,mBAAmB,GAAG,KAAK,CAAC;IAChC,IAAI,cAAc,KAAK,cAAc,EAAE,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,uEAAuE,CAAC,CAAC;QACrF,EAAE,CAAC,gBAAgB,EAAE,CAAC;QACtB,EAAE,CAAC,OAAO,CAAC,gBAAgB,EAAE,cAAc,CAAC,CAAC;QAC7C,mBAAmB,GAAG,IAAI,CAAC;IAC7B,CAAC;IAED,kCAAkC;IAClC,MAAM,WAAW,GAAa,EAAE,CAAC;IACjC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,OAAO,GAAG,iBAAiB,CAAC,GAAG,EAAE,cAAc,CAAC,CAAC;QACvD,MAAM,KAAK,GAAG,iBAAiB,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QAC9C,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,KAAK,KAAK,CAAC,MAAM,QAAQ,CAAC,CAAC;QAC/C,WAAW,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC;IAC7B,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,SAAS,WAAW,CAAC,MAAM,uBAAuB,CAAC,CAAC;IAEhE,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAEjE,4DAA4D;IAC5D,MAAM,WAAW,GAAG,EAAE,CAAC,eAAe,EAAE,CAAC;IACzC,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;QACrC,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;YACpC,MAAM,EAAE,GAAG,QAAQ,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;YACvC,GAAG,CAAC,cAAc,CAAC,EAAE,CAAC,CAAC;YACvB,EAAE,CAAC,cAAc,CAAC,EAAE,CAAC,CAAC;YACtB,GAAG,CAAC,eAAe,CAAC,EAAE,CAAC,CAAC;YACxB,EAAE,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC;YAChC,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;QAChB,OAAO,CAAC,GAAG,CAAC,aAAa,OAAO,gBAAgB,CAAC,CAAC;IACpD,CAAC;IAED,yCAAyC;IACzC,MAAM,UAAU,GAAqB,EAAE,CAAC;IACxC,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,MAAM,QAAQ,IAAI,WAAW,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;QAClC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC;QAEpD,8CAA8C;QAC9C,MAAM,UAAU,GAAG,EAAE,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QAC7C,IAAI,UAAU,IAAI,UAAU,CAAC,KAAK,KAAK,KAAK,EAAE,CAAC;YAC7C,OAAO,EAAE,CAAC;YACV,SAAS;QACX,CAAC;QAED,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAC5C,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACjE,MAAM,OAAO,GAAG,SAAS;YACvB,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,EAAE,OAAO,CAAC;YAC7C,CAAC,CAAC,QAAQ,CAAC;QACb,MAAM,EAAE,GAAG,QAAQ,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QAErC,MAAM,MAAM,GAAG,aAAa,CAAC,EAAE,EAAE,OAAO,EAAE,GAAG,CAAC,CAAC;QAC/C,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAExB,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,IAAI,CAAC,MAAM,CAAC,IAAI;YAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,IAAI,CAAC,MAAM,CAAC,IAAI;YAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,IAAI,CAAC,MAAM,CAAC,MAAM;YAAE,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC3C,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,OAAO,CAAC,IAAI,CAAC,cAAc,EAAE,yBAAyB,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC9E,CAAC;QAED,uEAAuE;QACvE,IAAI,EAAE,CAAC,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC;YACjC,GAAG,CAAC,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAChC,CAAC;QAED,EAAE,CAAC,cAAc,CAAC;YAChB,EAAE,EAAE,MAAM,CAAC,EAAE;YACb,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,KAAK,EAAE,MAAM,CAAC,KAAK;YACnB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,OAAO,EAAE,MAAM,CAAC,OAAO;SACxB,CAAC,CAAC;QAEH,oCAAoC;QACpC,GAAG,CAAC,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAE9B,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3B,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC;QACrC,CAAC;QAED,4EAA4E;QAC5E,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,+CAA+C,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAE9E,6EAA6E;QAC7E,gFAAgF;QAChF,8EAA8E;QAC9E,8CAA8C;QAC9C,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,aAAa,EAAE,CAAC;YACvC,EAAE,CAAC,kBAAkB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACpC,EAAE,CAAC,eAAe,CAAC,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;QAC3D,CAAC;QAED,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;YACvC,EAAE,CAAC,kBAAkB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACrC,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QAC5E,CAAC;QAED,MAAM,IAAI,GAAG,uBAAuB,CAAC,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,OAAO,CAAC,CAAC;QAChF,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC;YACpC,GAAG,CAAC,eAAe,CAAC,MAAM,CAAC,EAAE,EAAE,SAAS,CAAC,CAAC;QAC5C,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,OAAO,CAAC,IAAI,CAAC,mBAAmB,EAAE,KAAM,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;QACjE,CAAC;QAED,EAAE,CAAC,gBAAgB,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QAEpC,OAAO,EAAE,CAAC;QACV,IAAI,OAAO,GAAG,EAAE,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,CAAC,GAAG,CAAC,KAAK,OAAO,IAAI,WAAW,CAAC,MAAM,UAAU,CAAC,CAAC;QAC5D,CAAC;IACH,CAAC;IAED,qFAAqF;IACrF,8FAA8F;IAC9F,IAAI,mBAAmB,EAAE,CAAC;QACxB,GAAG,CAAC,YAAY,EAAE,CAAC;IACrB,CAAC;IAED,gGAAgG;IAChG,MAAM,YAAY,GAAG,IAAI,GAAG,CACzB,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,8CAA8C,CAAC,CAAC,GAAG,EAAmC;SAClG,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CACzB,CAAC;IAEF,uEAAuE;IACvE,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,MAAM,QAAQ,IAAI,YAAY,EAAE,CAAC;QACpC,IAAI,CAAC,EAAE,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC;YACjC,EAAE,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC;YAChC,SAAS,EAAE,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,aAAa,SAAS,sCAAsC,CAAC,CAAC;IAE1E,IAAI,CAAC;QACH,IAAI,QAAQ,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAChC,OAAO,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC;YACzC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;YAAS,CAAC;QACT,OAAO,CAAC,GAAG,CAAC,SAAS,OAAO,uBAAuB,OAAO,uBAAuB,CAAC,CAAC;QACnF,EAAE,CAAC,KAAK,EAAE,CAAC;IACb,CAAC;AACH,CAAC;AAED,MAAM,eAAe,GAAG,IAAI,CAAC,OAAO,EAAE,EAAE,aAAa,EAAE,cAAc,CAAC,CAAC;AAYvE;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,WAAW;IACzB,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACtC,MAAM,UAAU,GAAG,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC;IACrD,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;IAC5D,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IACtD,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IAE3D,MAAM,MAAM,GAAG,UAAU,EAAE,CAAC;IAE5B,MAAM,aAAa,GAAG,GAAW,EAAE,CACjC,KAAK;QACL,OAAO,CAAC,GAAG,CAAC,kBAAkB;QAC9B,MAAM,CAAC,MAAM;QACb,eAAe,CAAC;IAElB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;QACrC,OAAO;YACL,IAAI,EAAE,OAAO;YACb,MAAM,EAAE,aAAa,EAAE;YACvB,QAAQ,EAAE,CAAC,UAAU;YACrB,MAAM,EAAE,KAAK;YACb,MAAM;SACP,CAAC;IACJ,CAAC;IAED,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC;IACjD,IAAI,OAAO,EAAE,CAAC;QACZ,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACrE,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;YACrC,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,MAAM,EAAE,aAAa,EAAE;gBACvB,QAAQ,EAAE,CAAC,UAAU;gBACrB,MAAM,EAAE,KAAK;gBACb,MAAM;aACP,CAAC;QACJ,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5C,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;QACxC,OAAO;YACL,IAAI,EAAE,MAAM,CAAC,KAAK;YAClB,MAAM,EAAE,aAAa,EAAE;YACvB,QAAQ,EAAE,CAAC,UAAU;YACrB,MAAM,EAAE,QAAQ;YAChB,MAAM;SACP,CAAC;IACJ,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;IAC1C,OAAO;QACL,IAAI,EAAE,CAAC,gBAAgB,CAAC;QACxB,MAAM,EAAE,aAAa,EAAE;QACvB,QAAQ,EAAE,CAAC,UAAU;QACrB,MAAM,EAAE,UAAU;QAClB,MAAM;KACP,CAAC;AACJ,CAAC;AAED,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,YAAY,CAAC,CAAC;AACvD,IAAI,MAAM,EAAE,CAAC;IACX,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzD,OAAO,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,CAAC,cAAc,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;AAC9E,CAAC"}
1
+ {"version":3,"file":"reindex.js","sourceRoot":"","sources":["../src/reindex.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACjD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAC9D,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AACtC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,EAAE,aAAa,EAAuB,MAAM,aAAa,CAAC;AACjE,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACtD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,UAAU,EAAwB,MAAM,aAAa,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAEhD,MAAM,CAAC,KAAK,UAAU,OAAO,CAC3B,IAAc,EACd,MAAc,EACd,WAAoB,KAAK,EACzB,cAAyB;IAEzB,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,MAAM,EAAE,CAAC,CAAC;IAExD,MAAM,EAAE,GAAG,IAAI,WAAW,CAAC,MAAM,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,IAAI,SAAS,CAAC,EAAE,CAAC,CAAC;IAC9B,GAAG,CAAC,WAAW,EAAE,CAAC;IAClB,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,EAAE,CAAC,CAAC;IACjC,GAAG,CAAC,WAAW,EAAE,CAAC;IAElB,8EAA8E;IAC9E,MAAM,cAAc,GAAG,GAAG,CAAC;IAC3B,MAAM,cAAc,GAAG,EAAE,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;IACpD,IAAI,mBAAmB,GAAG,KAAK,CAAC;IAChC,IAAI,cAAc,KAAK,cAAc,EAAE,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,uEAAuE,CAAC,CAAC;QACrF,EAAE,CAAC,gBAAgB,EAAE,CAAC;QACtB,EAAE,CAAC,OAAO,CAAC,gBAAgB,EAAE,cAAc,CAAC,CAAC;QAC7C,mBAAmB,GAAG,IAAI,CAAC;IAC7B,CAAC;IAED,kCAAkC;IAClC,MAAM,WAAW,GAAa,EAAE,CAAC;IACjC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,OAAO,GAAG,iBAAiB,CAAC,GAAG,EAAE,cAAc,CAAC,CAAC;QACvD,MAAM,KAAK,GAAG,iBAAiB,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QAC9C,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,KAAK,KAAK,CAAC,MAAM,QAAQ,CAAC,CAAC;QAC/C,WAAW,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC;IAC7B,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,SAAS,WAAW,CAAC,MAAM,uBAAuB,CAAC,CAAC;IAEhE,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAEjE,6DAA6D;IAC7D,iFAAiF;IACjF,2EAA2E;IAC3E,2DAA2D;IAC3D,MAAM,WAAW,GAAG,EAAE,CAAC,eAAe,EAAE,CAAC;IACzC,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;QACrC,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;YACpC,MAAM,EAAE,GAAG,QAAQ,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;YACvC,GAAG,CAAC,cAAc,CAAC,EAAE,CAAC,CAAC;YACvB,EAAE,CAAC,cAAc,CAAC,EAAE,CAAC,CAAC;YACtB,GAAG,CAAC,oBAAoB,CAAC,EAAE,CAAC,CAAC;YAC7B,gEAAgE;YAChE,GAAG,CAAC,eAAe,CAAC,EAAE,CAAC,CAAC;YACxB,EAAE,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC;YAChC,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;QAChB,OAAO,CAAC,GAAG,CAAC,aAAa,OAAO,gBAAgB,CAAC,CAAC;IACpD,CAAC;IAED,yCAAyC;IACzC,MAAM,UAAU,GAAqB,EAAE,CAAC;IACxC,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,MAAM,QAAQ,IAAI,WAAW,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;QAClC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC;QAEpD,8CAA8C;QAC9C,MAAM,UAAU,GAAG,EAAE,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QAC7C,IAAI,UAAU,IAAI,UAAU,CAAC,KAAK,KAAK,KAAK,EAAE,CAAC;YAC7C,OAAO,EAAE,CAAC;YACV,SAAS;QACX,CAAC;QAED,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAC5C,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACjE,MAAM,OAAO,GAAG,SAAS;YACvB,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,EAAE,OAAO,CAAC;YAC7C,CAAC,CAAC,QAAQ,CAAC;QACb,MAAM,EAAE,GAAG,QAAQ,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QAErC,MAAM,MAAM,GAAG,aAAa,CAAC,EAAE,EAAE,OAAO,EAAE,GAAG,CAAC,CAAC;QAC/C,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAExB,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,IAAI,CAAC,MAAM,CAAC,IAAI;YAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,IAAI,CAAC,MAAM,CAAC,IAAI;YAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,IAAI,CAAC,MAAM,CAAC,MAAM;YAAE,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC3C,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,OAAO,CAAC,IAAI,CAAC,cAAc,EAAE,yBAAyB,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC9E,CAAC;QAED,uEAAuE;QACvE,IAAI,EAAE,CAAC,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC;YACjC,GAAG,CAAC,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAChC,CAAC;QAED,EAAE,CAAC,cAAc,CAAC;YAChB,EAAE,EAAE,MAAM,CAAC,EAAE;YACb,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,KAAK,EAAE,MAAM,CAAC,KAAK;YACnB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,OAAO,EAAE,MAAM,CAAC,OAAO;SACxB,CAAC,CAAC;QAEH,oCAAoC;QACpC,GAAG,CAAC,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAE9B,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3B,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC;QACrC,CAAC;QAED,4EAA4E;QAC5E,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,+CAA+C,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAE9E,6EAA6E;QAC7E,gFAAgF;QAChF,8EAA8E;QAC9E,8CAA8C;QAC9C,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,aAAa,EAAE,CAAC;YACvC,EAAE,CAAC,kBAAkB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACpC,EAAE,CAAC,eAAe,CAAC,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;QAC3D,CAAC;QAED,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;YACvC,EAAE,CAAC,kBAAkB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACrC,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QAC5E,CAAC;QAED,uEAAuE;QACvE,0EAA0E;QAC1E,qCAAqC;QACrC,EAAE;QACF,oEAAoE;QACpE,0EAA0E;QAC1E,0EAA0E;QAC1E,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,0CAA0C,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACzE,GAAG,CAAC,oBAAoB,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACpC,gEAAgE;QAChE,GAAG,CAAC,eAAe,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAE/B,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,OAAO,CAAC,CAAC;YAC9E,MAAM,WAAW,GAAG,EAAE,CAAC,EAAE,CAAC,OAAO,CAC/B,4GAA4G,CAC7G,CAAC;YACF,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,MAAM,OAAO,GAAG,GAAG,MAAM,CAAC,EAAE,KAAK,KAAK,CAAC,KAAK,EAAE,CAAC;gBAC/C,WAAW,CAAC,GAAG,CACb,OAAO,EACP,MAAM,CAAC,EAAE,EACT,KAAK,CAAC,KAAK,EACX,KAAK,CAAC,OAAO,EACb,KAAK,CAAC,SAAS,EACf,KAAK,CAAC,OAAO,CACd,CAAC;gBACF,GAAG,CAAC,eAAe,CAAC,OAAO,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;YAChD,CAAC;QACH,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,OAAO,CAAC,IAAI,CAAC,mBAAmB,EAAE,KAAM,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;QACjE,CAAC;QAED,EAAE,CAAC,gBAAgB,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QAEpC,OAAO,EAAE,CAAC;QACV,IAAI,OAAO,GAAG,EAAE,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,CAAC,GAAG,CAAC,KAAK,OAAO,IAAI,WAAW,CAAC,MAAM,UAAU,CAAC,CAAC;QAC5D,CAAC;IACH,CAAC;IAED,qFAAqF;IACrF,8FAA8F;IAC9F,IAAI,mBAAmB,EAAE,CAAC;QACxB,GAAG,CAAC,YAAY,EAAE,CAAC;IACrB,CAAC;IAED,gGAAgG;IAChG,MAAM,YAAY,GAAG,IAAI,GAAG,CACzB,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,8CAA8C,CAAC,CAAC,GAAG,EAAmC;SAClG,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CACzB,CAAC;IAEF,uEAAuE;IACvE,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,MAAM,QAAQ,IAAI,YAAY,EAAE,CAAC;QACpC,IAAI,CAAC,EAAE,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC;YACjC,EAAE,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC;YAChC,SAAS,EAAE,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,aAAa,SAAS,sCAAsC,CAAC,CAAC;IAE1E,IAAI,CAAC;QACH,IAAI,QAAQ,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAChC,OAAO,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC;YACzC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;YAAS,CAAC;QACT,OAAO,CAAC,GAAG,CAAC,SAAS,OAAO,uBAAuB,OAAO,uBAAuB,CAAC,CAAC;QACnF,EAAE,CAAC,KAAK,EAAE,CAAC;IACb,CAAC;AACH,CAAC;AAED,MAAM,eAAe,GAAG,IAAI,CAAC,OAAO,EAAE,EAAE,aAAa,EAAE,cAAc,CAAC,CAAC;AAYvE;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,WAAW;IACzB,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACtC,MAAM,UAAU,GAAG,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC;IACrD,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;IAC5D,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IACtD,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IAE3D,MAAM,MAAM,GAAG,UAAU,EAAE,CAAC;IAE5B,MAAM,aAAa,GAAG,GAAW,EAAE,CACjC,KAAK;QACL,OAAO,CAAC,GAAG,CAAC,kBAAkB;QAC9B,MAAM,CAAC,MAAM;QACb,eAAe,CAAC;IAElB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;QACrC,OAAO;YACL,IAAI,EAAE,OAAO;YACb,MAAM,EAAE,aAAa,EAAE;YACvB,QAAQ,EAAE,CAAC,UAAU;YACrB,MAAM,EAAE,KAAK;YACb,MAAM;SACP,CAAC;IACJ,CAAC;IAED,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC;IACjD,IAAI,OAAO,EAAE,CAAC;QACZ,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACrE,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;YACrC,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,MAAM,EAAE,aAAa,EAAE;gBACvB,QAAQ,EAAE,CAAC,UAAU;gBACrB,MAAM,EAAE,KAAK;gBACb,MAAM;aACP,CAAC;QACJ,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5C,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;QACxC,OAAO;YACL,IAAI,EAAE,MAAM,CAAC,KAAK;YAClB,MAAM,EAAE,aAAa,EAAE;YACvB,QAAQ,EAAE,CAAC,UAAU;YACrB,MAAM,EAAE,QAAQ;YAChB,MAAM;SACP,CAAC;IACJ,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;IAC1C,OAAO;QACL,IAAI,EAAE,CAAC,gBAAgB,CAAC;QACxB,MAAM,EAAE,aAAa,EAAE;QACvB,QAAQ,EAAE,CAAC,UAAU;QACrB,MAAM,EAAE,UAAU;QAClB,MAAM;KACP,CAAC;AACJ,CAAC;AAED,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,YAAY,CAAC,CAAC;AACvD,IAAI,MAAM,EAAE,CAAC;IACX,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzD,OAAO,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,CAAC,cAAc,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;AAC9E,CAAC"}
@@ -12,5 +12,15 @@ export declare class VectorSearch {
12
12
  dropIndex(): void;
13
13
  upsertEmbedding(id: string, embedding: Float32Array): void;
14
14
  deleteEmbedding(id: string): void;
15
+ /**
16
+ * Delete all chunk-level vec rows for a document. Chunk ids follow the
17
+ * pattern `${docId}#c${index}` so we match via a SQLite GLOB.
18
+ *
19
+ * This is used by reindex to drop stale chunks when a source markdown file
20
+ * has been deleted or modified. Complements `ON DELETE CASCADE` on the
21
+ * `chunks` table (which deletes chunk rows but not their vec counterparts,
22
+ * because the vec0 virtual table does not participate in FK cascades).
23
+ */
24
+ deleteChunkVecsByDoc(docId: string): void;
15
25
  search(queryEmbedding: Float32Array, limit?: number): VectorResult[];
16
26
  }
@@ -42,6 +42,21 @@ export class VectorSearch {
42
42
  .prepare("DELETE FROM documents_vec WHERE id = ?")
43
43
  .run(id);
44
44
  }
45
+ /**
46
+ * Delete all chunk-level vec rows for a document. Chunk ids follow the
47
+ * pattern `${docId}#c${index}` so we match via a SQLite GLOB.
48
+ *
49
+ * This is used by reindex to drop stale chunks when a source markdown file
50
+ * has been deleted or modified. Complements `ON DELETE CASCADE` on the
51
+ * `chunks` table (which deletes chunk rows but not their vec counterparts,
52
+ * because the vec0 virtual table does not participate in FK cascades).
53
+ */
54
+ deleteChunkVecsByDoc(docId) {
55
+ this.ensureVecLoaded();
56
+ this.knowledgeDb.db
57
+ .prepare("DELETE FROM documents_vec WHERE id GLOB ?")
58
+ .run(`${docId}#c*`);
59
+ }
45
60
  search(queryEmbedding, limit = 10) {
46
61
  this.ensureVecLoaded();
47
62
  const buf = float32ToBuffer(queryEmbedding);
@@ -1 +1 @@
1
- {"version":3,"file":"vector-search.js","sourceRoot":"","sources":["../src/vector-search.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,SAAS,MAAM,YAAY,CAAC;AAQxC,SAAS,eAAe,CAAC,GAAiB;IACxC,OAAO,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,UAAU,CAAC,CAAC;AACjE,CAAC;AAED,MAAM,OAAO,YAAY;IAGH;IAFZ,SAAS,GAAG,KAAK,CAAC;IAE1B,YAAoB,WAAwB;QAAxB,gBAAW,GAAX,WAAW,CAAa;IAAG,CAAC;IAExC,eAAe;QACrB,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC;YACpC,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACxB,CAAC;IACH,CAAC;IAED,WAAW;QACT,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,IAAI,CAAC;;;;;KAKxB,CAAC,CAAC;IACL,CAAC;IAED,SAAS;QACP,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;IACjE,CAAC;IAED,eAAe,CAAC,EAAU,EAAE,SAAuB;QACjD,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,eAAe,CAAC,SAAS,CAAC,CAAC;QACvC,IAAI,CAAC,WAAW,CAAC,EAAE;aAChB,OAAO,CAAC,wCAAwC,CAAC;aACjD,GAAG,CAAC,EAAE,CAAC,CAAC;QACX,IAAI,CAAC,WAAW,CAAC,EAAE;aAChB,OAAO,CAAC,yDAAyD,CAAC;aAClE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;IAClB,CAAC;IAED,eAAe,CAAC,EAAU;QACxB,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,IAAI,CAAC,WAAW,CAAC,EAAE;aAChB,OAAO,CAAC,wCAAwC,CAAC;aACjD,GAAG,CAAC,EAAE,CAAC,CAAC;IACb,CAAC;IAED,MAAM,CAAC,cAA4B,EAAE,QAAgB,EAAE;QACrD,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,eAAe,CAAC,cAAc,CAAC,CAAC;QAC5C,OAAO,IAAI,CAAC,WAAW,CAAC,EAAE;aACvB,OAAO,CACN;;;;;KAKH,CACE;aACA,GAAG,CAAC,GAAG,EAAE,KAAK,CAAmB,CAAC;IACvC,CAAC;CACF"}
1
+ {"version":3,"file":"vector-search.js","sourceRoot":"","sources":["../src/vector-search.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,SAAS,MAAM,YAAY,CAAC;AAQxC,SAAS,eAAe,CAAC,GAAiB;IACxC,OAAO,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,UAAU,CAAC,CAAC;AACjE,CAAC;AAED,MAAM,OAAO,YAAY;IAGH;IAFZ,SAAS,GAAG,KAAK,CAAC;IAE1B,YAAoB,WAAwB;QAAxB,gBAAW,GAAX,WAAW,CAAa;IAAG,CAAC;IAExC,eAAe;QACrB,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC;YACpC,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACxB,CAAC;IACH,CAAC;IAED,WAAW;QACT,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,IAAI,CAAC;;;;;KAKxB,CAAC,CAAC;IACL,CAAC;IAED,SAAS;QACP,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;IACjE,CAAC;IAED,eAAe,CAAC,EAAU,EAAE,SAAuB;QACjD,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,eAAe,CAAC,SAAS,CAAC,CAAC;QACvC,IAAI,CAAC,WAAW,CAAC,EAAE;aAChB,OAAO,CAAC,wCAAwC,CAAC;aACjD,GAAG,CAAC,EAAE,CAAC,CAAC;QACX,IAAI,CAAC,WAAW,CAAC,EAAE;aAChB,OAAO,CAAC,yDAAyD,CAAC;aAClE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;IAClB,CAAC;IAED,eAAe,CAAC,EAAU;QACxB,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,IAAI,CAAC,WAAW,CAAC,EAAE;aAChB,OAAO,CAAC,wCAAwC,CAAC;aACjD,GAAG,CAAC,EAAE,CAAC,CAAC;IACb,CAAC;IAED;;;;;;;;OAQG;IACH,oBAAoB,CAAC,KAAa;QAChC,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,IAAI,CAAC,WAAW,CAAC,EAAE;aAChB,OAAO,CAAC,2CAA2C,CAAC;aACpD,GAAG,CAAC,GAAG,KAAK,KAAK,CAAC,CAAC;IACxB,CAAC;IAED,MAAM,CAAC,cAA4B,EAAE,QAAgB,EAAE;QACrD,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,eAAe,CAAC,cAAc,CAAC,CAAC;QAC5C,OAAO,IAAI,CAAC,WAAW,CAAC,EAAE;aACvB,OAAO,CACN;;;;;KAKH,CACE;aACA,GAAG,CAAC,GAAG,EAAE,KAAK,CAAmB,CAAC;IACvC,CAAC;CACF"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ralph-hero-knowledge-index",
3
- "version": "0.1.23",
3
+ "version": "0.1.24",
4
4
  "type": "module",
5
5
  "main": "dist/index.js",
6
6
  "bin": {
@@ -1,5 +1,20 @@
1
- import { describe, it, expect } from "vitest";
2
- import { prepareTextForEmbedding } from "../embedder.js";
1
+ import { describe, it, expect, vi, beforeEach } from "vitest";
2
+
3
+ // Mock @huggingface/transformers so we don't need to load the real ONNX
4
+ // model during unit tests. The fake pipeline returns a constant 384-dim
5
+ // vector; we track call count via the `embedCalls` array below.
6
+ const embedCalls: string[] = [];
7
+ vi.mock("@huggingface/transformers", () => {
8
+ const fakePipeline = async (text: string, _opts: unknown) => {
9
+ embedCalls.push(text);
10
+ return { data: new Float32Array(384) };
11
+ };
12
+ return {
13
+ pipeline: vi.fn(async () => fakePipeline),
14
+ };
15
+ });
16
+
17
+ import { prepareTextForEmbedding, embedDocument } from "../embedder.js";
3
18
 
4
19
  describe("prepareTextForEmbedding", () => {
5
20
  it("includes title, tags, and first paragraph", () => {
@@ -40,14 +55,15 @@ describe("prepareTextForEmbedding", () => {
40
55
  expect(result).not.toContain("\n\n");
41
56
  });
42
57
 
43
- it("truncates at MAX_CHARS (500) total", () => {
58
+ it("no longer truncates at 500 chars (MAX_CHARS removed)", () => {
44
59
  const longParagraph = "A".repeat(600);
45
60
  const result = prepareTextForEmbedding(
46
61
  "Title",
47
62
  ["tag1", "tag2"],
48
63
  longParagraph,
49
64
  );
50
- expect(result.length).toBe(500);
65
+ // Title (5) + \n + tag1, tag2 (10) + \n + 600 A's = 617 chars
66
+ expect(result.length).toBe(617);
51
67
  expect(result.startsWith("Title\ntag1, tag2\n")).toBe(true);
52
68
  });
53
69
 
@@ -98,3 +114,86 @@ describe("prepareTextForEmbedding", () => {
98
114
  expect(result).toBe("My Title\ngraphology, search\nFirst paragraph.");
99
115
  });
100
116
  });
117
+
118
+ describe("embedDocument", () => {
119
+ beforeEach(() => {
120
+ embedCalls.length = 0;
121
+ });
122
+
123
+ it("returns exactly one chunk for short content", async () => {
124
+ const result = await embedDocument("Title", ["tag"], "short content");
125
+ expect(result).toHaveLength(1);
126
+ expect(result[0]!.index).toBe(0);
127
+ expect(result[0]!.content).toBe("short content");
128
+ expect(result[0]!.charStart).toBe(0);
129
+ expect(result[0]!.charEnd).toBe("short content".length);
130
+ expect(result[0]!.embedding).toBeInstanceOf(Float32Array);
131
+ });
132
+
133
+ it("embeds with title + tagLine + chunk.content prepended", async () => {
134
+ await embedDocument("My Title", ["graphology", "search"], "body text");
135
+ expect(embedCalls).toHaveLength(1);
136
+ expect(embedCalls[0]).toBe("My Title\ngraphology, search\nbody text");
137
+ });
138
+
139
+ it("omits empty title/tags/content from the embed input", async () => {
140
+ await embedDocument("", [], "only content here");
141
+ expect(embedCalls).toContain("only content here");
142
+
143
+ embedCalls.length = 0;
144
+ await embedDocument("Just Title", [], "");
145
+ // Empty content -> one chunk with empty string, only title is non-empty.
146
+ expect(embedCalls).toContain("Just Title");
147
+ });
148
+
149
+ it("yields >= 4 chunks for an 8K-char document", async () => {
150
+ const longContent = "A".repeat(8000);
151
+ const result = await embedDocument("Title", [], longContent);
152
+ expect(result.length).toBeGreaterThanOrEqual(4);
153
+ // Each chunk gets its own embedding.
154
+ expect(embedCalls).toHaveLength(result.length);
155
+ });
156
+
157
+ it("produces Float32Array embeddings of length 384", async () => {
158
+ const result = await embedDocument("T", [], "hello world");
159
+ expect(result[0]!.embedding).toBeInstanceOf(Float32Array);
160
+ expect(result[0]!.embedding.length).toBe(384);
161
+ });
162
+
163
+ it("chunk indexes are monotonically increasing from 0", async () => {
164
+ const longContent = "word ".repeat(3000); // ~15K chars, many chunks
165
+ const result = await embedDocument("T", [], longContent);
166
+ expect(result.length).toBeGreaterThan(1);
167
+ for (let i = 0; i < result.length; i++) {
168
+ expect(result[i]!.index).toBe(i);
169
+ }
170
+ });
171
+
172
+ it("chunk offsets reconstruct the original content", async () => {
173
+ const content = "A".repeat(5000);
174
+ const result = await embedDocument("T", [], content);
175
+ for (const chunk of result) {
176
+ expect(content.slice(chunk.charStart, chunk.charEnd)).toBe(chunk.content);
177
+ }
178
+ });
179
+
180
+ it("empty content yields one chunk with empty content (anchors on title/tags)", async () => {
181
+ const result = await embedDocument("Just Title", ["some-tag"], "");
182
+ expect(result).toHaveLength(1);
183
+ expect(result[0]!.content).toBe("");
184
+ expect(result[0]!.charStart).toBe(0);
185
+ expect(result[0]!.charEnd).toBe(0);
186
+ // Still got embedded using title + tag.
187
+ expect(embedCalls).toContain("Just Title\nsome-tag");
188
+ });
189
+
190
+ it("respects custom chunker options", async () => {
191
+ const content = "A".repeat(500);
192
+ const result = await embedDocument("T", [], content, {
193
+ chunkSize: 100,
194
+ chunkOverlap: 10,
195
+ });
196
+ // With chunkSize=100 over 500 chars, we expect multiple chunks.
197
+ expect(result.length).toBeGreaterThan(1);
198
+ });
199
+ });
@@ -1,24 +1,44 @@
1
- import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
1
+ import { describe, it, expect, vi, beforeEach } from "vitest";
2
2
  import { mkdtempSync, writeFileSync, mkdirSync, unlinkSync, utimesSync } from "node:fs";
3
3
  import { join, resolve } from "node:path";
4
4
  import { tmpdir } from "node:os";
5
5
  import { findMarkdownFiles } from "../file-scanner.js";
6
6
  import { FtsSearch } from "../search.js";
7
+ import { VectorSearch } from "../vector-search.js";
8
+
9
+ // Mock embedder so we don't load the real transformer model during tests.
10
+ // embedDocument returns one DocumentChunk per call with a constant 384-dim
11
+ // embedding; this matches the new chunk-aware reindex flow.
12
+ vi.mock("../embedder.js", async () => {
13
+ // Import the real chunker so the mock chunks content the same way as prod.
14
+ const { chunkText } = await import("../chunker.js");
15
+ return {
16
+ embed: vi.fn(async () => new Float32Array(384)),
17
+ embedDocument: vi.fn(async (_title: string, _tags: string[], content: string) => {
18
+ const chunks = content.length === 0
19
+ ? [{ index: 0, content: "", charStart: 0, charEnd: 0 }]
20
+ : chunkText(content);
21
+ return chunks.map(c => ({
22
+ index: c.index,
23
+ content: c.content,
24
+ charStart: c.charStart,
25
+ charEnd: c.charEnd,
26
+ embedding: new Float32Array(384),
27
+ }));
28
+ }),
29
+ prepareTextForEmbedding: vi.fn((title: string, tags: string[], content: string) => {
30
+ const tagLine = tags.length > 0 ? tags.join(", ") : "";
31
+ const parts = [title, tagLine, content].filter(p => p.length > 0);
32
+ return parts.join("\n");
33
+ }),
34
+ };
35
+ });
7
36
 
8
- vi.mock("../embedder.js", () => ({
9
- embed: vi.fn(async () => new Float32Array(384)),
10
- prepareTextForEmbedding: vi.fn((title: string, tags: string[], content: string) => {
11
- const tagLine = tags.length > 0 ? tags.join(", ") : "";
12
- const parts = [title, tagLine, content].filter(p => p.length > 0);
13
- return parts.join("\n").slice(0, 500);
14
- }),
15
- }));
16
-
17
- import { embed } from "../embedder.js";
18
- import { reindex, resolveDirs } from "../reindex.js";
37
+ import { embedDocument } from "../embedder.js";
38
+ import { reindex } from "../reindex.js";
19
39
  import { KnowledgeDB } from "../db.js";
20
40
 
21
- const mockedEmbed = vi.mocked(embed);
41
+ const mockedEmbed = vi.mocked(embedDocument);
22
42
 
23
43
  function makeDoc(title: string): string {
24
44
  return `---\ndate: 2026-03-24\ntype: research\nstatus: draft\n---\n\n# ${title}\n\nContent for ${title}.`;
@@ -354,149 +374,159 @@ describe("incremental reindex", () => {
354
374
  db1.close();
355
375
  });
356
376
 
357
- it("scenario 13: reindex honors .ralphignore for file discovery", async () => {
358
- writeFileSync(join(dir, "kept.md"), makeDoc("Kept"));
359
- writeFileSync(join(dir, "skipped.md"), makeDoc("Skipped"));
360
- writeFileSync(join(dir, ".ralphignore"), "skipped.md\n");
377
+ it("scenario 13: 8K-char document produces >= 4 chunk rows", async () => {
378
+ const longBody = "A".repeat(8000);
379
+ writeFileSync(
380
+ join(dir, "long-doc.md"),
381
+ `---\ndate: 2026-03-24\ntype: research\nstatus: draft\n---\n\n# Long Doc\n\n${longBody}`,
382
+ );
361
383
 
362
384
  await reindex([dir], dbPath);
363
- expect(mockedEmbed).toHaveBeenCalledTimes(1);
364
385
 
365
386
  const db = new KnowledgeDB(dbPath);
366
- expect(db.getDocument("kept")).toBeTruthy();
367
- expect(db.getDocument("skipped")).toBeUndefined();
387
+ const row = db.db
388
+ .prepare("SELECT COUNT(*) as n FROM chunks WHERE document_id = ?")
389
+ .get("long-doc") as { n: number };
390
+ expect(row.n).toBeGreaterThanOrEqual(4);
368
391
  db.close();
369
392
  });
370
393
 
371
- it("scenario 14: reindex honors caller-supplied ignorePatterns arg", async () => {
372
- writeFileSync(join(dir, "kept.md"), makeDoc("Kept"));
373
- mkdirSync(join(dir, "drafts"));
374
- writeFileSync(join(dir, "drafts", "wip.md"), makeDoc("WIP"));
394
+ it("scenario 14: documents_vec row count equals total chunk count", async () => {
395
+ writeFileSync(join(dir, "doc-a.md"), makeDoc("Doc A"));
396
+ writeFileSync(join(dir, "doc-b.md"), makeDoc("Doc B"));
397
+ const longBody = "A".repeat(6000);
398
+ writeFileSync(
399
+ join(dir, "long-doc.md"),
400
+ `---\ndate: 2026-03-24\ntype: research\nstatus: draft\n---\n\n# Long Doc\n\n${longBody}`,
401
+ );
375
402
 
376
- await reindex([dir], dbPath, false, ["drafts/**"]);
377
- // Only kept.md should have been embedded.
378
- expect(mockedEmbed).toHaveBeenCalledTimes(1);
403
+ await reindex([dir], dbPath);
379
404
 
380
405
  const db = new KnowledgeDB(dbPath);
381
- expect(db.getDocument("kept")).toBeTruthy();
382
- expect(db.getDocument("wip")).toBeUndefined();
406
+ // Instantiating VectorSearch loads sqlite-vec so documents_vec is queryable.
407
+ new VectorSearch(db).createIndex();
408
+ const chunksRow = db.db.prepare("SELECT COUNT(*) as n FROM chunks").get() as {
409
+ n: number;
410
+ };
411
+ const vecRow = db.db
412
+ .prepare("SELECT COUNT(*) as n FROM documents_vec")
413
+ .get() as { n: number };
414
+ expect(vecRow.n).toBe(chunksRow.n);
415
+ expect(chunksRow.n).toBeGreaterThanOrEqual(3); // at least one per doc
383
416
  db.close();
384
417
  });
385
- });
386
418
 
387
- describe("resolveDirs precedence", () => {
388
- const ORIGINAL_ARGV = process.argv;
389
- const ORIGINAL_ENV = {
390
- RALPH_KNOWLEDGE_DIRS: process.env.RALPH_KNOWLEDGE_DIRS,
391
- RALPH_KNOWLEDGE_DB: process.env.RALPH_KNOWLEDGE_DB,
392
- RALPH_KNOWLEDGE_CONFIG: process.env.RALPH_KNOWLEDGE_CONFIG,
393
- };
394
- let tmpHome: string;
395
- let configDir: string;
419
+ it("scenario 15: chunk ids follow pattern {docId}#c{index}", async () => {
420
+ const longBody = "A".repeat(6000);
421
+ writeFileSync(
422
+ join(dir, "long-doc.md"),
423
+ `---\ndate: 2026-03-24\ntype: research\nstatus: draft\n---\n\n# Long Doc\n\n${longBody}`,
424
+ );
396
425
 
397
- beforeEach(() => {
398
- process.argv = ["node", "reindex.js"];
399
- delete process.env.RALPH_KNOWLEDGE_DIRS;
400
- delete process.env.RALPH_KNOWLEDGE_DB;
401
- configDir = mkdtempSync(join(tmpdir(), "resolve-dirs-"));
402
- tmpHome = configDir;
403
- process.env.RALPH_KNOWLEDGE_CONFIG = join(configDir, "knowledge.config.json");
404
- });
426
+ await reindex([dir], dbPath);
405
427
 
406
- afterEach(() => {
407
- process.argv = ORIGINAL_ARGV;
408
- for (const key of Object.keys(ORIGINAL_ENV) as (keyof typeof ORIGINAL_ENV)[]) {
409
- const orig = ORIGINAL_ENV[key];
410
- if (orig === undefined) {
411
- delete process.env[key];
412
- } else {
413
- process.env[key] = orig;
414
- }
428
+ const db = new KnowledgeDB(dbPath);
429
+ new VectorSearch(db).createIndex();
430
+ const rows = db.db
431
+ .prepare("SELECT id, chunk_index FROM chunks WHERE document_id = ? ORDER BY chunk_index")
432
+ .all("long-doc") as Array<{ id: string; chunk_index: number }>;
433
+ expect(rows.length).toBeGreaterThan(1);
434
+ const idPattern = /^long-doc#c\d+$/;
435
+ for (const r of rows) {
436
+ expect(r.id).toMatch(idPattern);
437
+ expect(r.id).toBe(`long-doc#c${r.chunk_index}`);
415
438
  }
439
+ // Verify documents_vec ids also follow the pattern for this doc.
440
+ const vecRows = db.db
441
+ .prepare("SELECT id FROM documents_vec WHERE id GLOB ?")
442
+ .all("long-doc#c*") as Array<{ id: string }>;
443
+ expect(vecRows.length).toBe(rows.length);
444
+ for (const v of vecRows) {
445
+ expect(v.id).toMatch(idPattern);
446
+ }
447
+ db.close();
416
448
  });
417
449
 
418
- it("CLI positional args beat env var even when both are set", () => {
450
+ it("scenario 16: deleting source file removes its chunks and vec rows", async () => {
451
+ const filePath = join(dir, "disposable.md");
452
+ const longBody = "A".repeat(6000);
419
453
  writeFileSync(
420
- process.env.RALPH_KNOWLEDGE_CONFIG!,
421
- JSON.stringify({ roots: ["/from/config"] }),
454
+ filePath,
455
+ `---\ndate: 2026-03-24\ntype: research\nstatus: draft\n---\n\n# Disposable\n\n${longBody}`,
422
456
  );
423
- process.argv = ["node", "reindex.js", "/from/cli"];
424
- process.env.RALPH_KNOWLEDGE_DIRS = "/from/env";
425
- const r = resolveDirs();
426
- expect(r.source).toBe("cli");
427
- expect(r.dirs).toEqual(["/from/cli"]);
428
- });
457
+ writeFileSync(join(dir, "keeper.md"), makeDoc("Keeper"));
429
458
 
430
- it("env var beats config file roots when CLI is empty", () => {
431
- writeFileSync(
432
- process.env.RALPH_KNOWLEDGE_CONFIG!,
433
- JSON.stringify({ roots: ["/from/config"] }),
434
- );
435
- process.env.RALPH_KNOWLEDGE_DIRS = "/from/env-a,/from/env-b";
436
- const r = resolveDirs();
437
- expect(r.source).toBe("env");
438
- expect(r.dirs).toEqual(["/from/env-a", "/from/env-b"]);
439
- });
459
+ await reindex([dir], dbPath);
440
460
 
441
- it("config file roots beat fallback when CLI and env are absent", () => {
442
- writeFileSync(
443
- process.env.RALPH_KNOWLEDGE_CONFIG!,
444
- JSON.stringify({ roots: ["/from/config-a", "/from/config-b"] }),
445
- );
446
- const r = resolveDirs();
447
- expect(r.source).toBe("config");
448
- expect(r.dirs).toEqual(["/from/config-a", "/from/config-b"]);
449
- });
461
+ const db1 = new KnowledgeDB(dbPath);
462
+ new VectorSearch(db1).createIndex();
463
+ const chunksBefore = db1.db
464
+ .prepare("SELECT COUNT(*) as n FROM chunks WHERE document_id = ?")
465
+ .get("disposable") as { n: number };
466
+ expect(chunksBefore.n).toBeGreaterThan(1);
467
+ const vecsBefore = db1.db
468
+ .prepare("SELECT COUNT(*) as n FROM documents_vec WHERE id GLOB ?")
469
+ .get("disposable#c*") as { n: number };
470
+ expect(vecsBefore.n).toBe(chunksBefore.n);
471
+ db1.close();
450
472
 
451
- it("falls back to ../../thoughts when no source is configured", () => {
452
- // Point env var at a nonexistent config path so loadConfig returns {}.
453
- process.env.RALPH_KNOWLEDGE_CONFIG = join(configDir, "missing.json");
454
- const r = resolveDirs();
455
- expect(r.source).toBe("fallback");
456
- expect(r.dirs).toEqual(["../../thoughts"]);
457
- });
473
+ unlinkSync(filePath);
474
+ await reindex([dir], dbPath);
458
475
 
459
- it("dbPath precedence: CLI arg > env var > config > default", () => {
460
- writeFileSync(
461
- process.env.RALPH_KNOWLEDGE_CONFIG!,
462
- JSON.stringify({ roots: ["/x"], dbPath: "/from/config.db" }),
463
- );
464
- // CLI wins
465
- process.argv = ["node", "reindex.js", "/cli/root", "/cli/override.db"];
466
- process.env.RALPH_KNOWLEDGE_DB = "/from/env.db";
467
- expect(resolveDirs().dbPath).toBe("/cli/override.db");
468
-
469
- // Env wins over config when CLI is absent
470
- process.argv = ["node", "reindex.js"];
471
- process.env.RALPH_KNOWLEDGE_DIRS = "/env/root";
472
- process.env.RALPH_KNOWLEDGE_DB = "/from/env.db";
473
- expect(resolveDirs().dbPath).toBe("/from/env.db");
474
-
475
- // Config wins when neither CLI nor env set dbPath
476
- delete process.env.RALPH_KNOWLEDGE_DB;
477
- expect(resolveDirs().dbPath).toBe("/from/config.db");
476
+ const db2 = new KnowledgeDB(dbPath);
477
+ new VectorSearch(db2).createIndex();
478
+ // Document gone -> chunks cascaded.
479
+ expect(db2.getDocument("disposable")).toBeUndefined();
480
+ const chunksAfter = db2.db
481
+ .prepare("SELECT COUNT(*) as n FROM chunks WHERE document_id = ?")
482
+ .get("disposable") as { n: number };
483
+ expect(chunksAfter.n).toBe(0);
484
+ // Vec rows for the deleted doc are gone (GLOB-based cleanup).
485
+ const vecsAfter = db2.db
486
+ .prepare("SELECT COUNT(*) as n FROM documents_vec WHERE id GLOB ?")
487
+ .get("disposable#c*") as { n: number };
488
+ expect(vecsAfter.n).toBe(0);
489
+ // The kept doc still has its chunks.
490
+ const keeperChunks = db2.db
491
+ .prepare("SELECT COUNT(*) as n FROM chunks WHERE document_id = ?")
492
+ .get("keeper") as { n: number };
493
+ expect(keeperChunks.n).toBeGreaterThanOrEqual(1);
494
+ db2.close();
478
495
  });
479
496
 
480
- it("forwards config.ignorePatterns on the returned config object", () => {
497
+ it("scenario 17: re-indexing same file does not duplicate chunks", async () => {
498
+ const filePath = join(dir, "stable.md");
499
+ const body = "A".repeat(6000);
481
500
  writeFileSync(
482
- process.env.RALPH_KNOWLEDGE_CONFIG!,
483
- JSON.stringify({
484
- roots: ["/r1"],
485
- ignorePatterns: ["draft/**", "*.bak"],
486
- }),
501
+ filePath,
502
+ `---\ndate: 2026-03-24\ntype: research\nstatus: draft\n---\n\n# Stable\n\n${body}`,
487
503
  );
488
- const r = resolveDirs();
489
- expect(r.config.ignorePatterns).toEqual(["draft/**", "*.bak"]);
490
- });
491
504
 
492
- it("treats an empty RALPH_KNOWLEDGE_DIRS as unset and falls through", () => {
493
- writeFileSync(
494
- process.env.RALPH_KNOWLEDGE_CONFIG!,
495
- JSON.stringify({ roots: ["/from/config"] }),
496
- );
497
- process.env.RALPH_KNOWLEDGE_DIRS = " , ";
498
- const r = resolveDirs();
499
- expect(r.source).toBe("config");
500
- expect(r.dirs).toEqual(["/from/config"]);
505
+ await reindex([dir], dbPath);
506
+ const db1 = new KnowledgeDB(dbPath);
507
+ const firstCount = (db1.db
508
+ .prepare("SELECT COUNT(*) as n FROM chunks WHERE document_id = ?")
509
+ .get("stable") as { n: number }).n;
510
+ db1.close();
511
+ expect(firstCount).toBeGreaterThan(1);
512
+
513
+ // Bump mtime to force re-embed.
514
+ const future = Date.now() / 1000 + 2;
515
+ utimesSync(filePath, future, future);
516
+
517
+ await reindex([dir], dbPath);
518
+ const db2 = new KnowledgeDB(dbPath);
519
+ new VectorSearch(db2).createIndex();
520
+ const secondCount = (db2.db
521
+ .prepare("SELECT COUNT(*) as n FROM chunks WHERE document_id = ?")
522
+ .get("stable") as { n: number }).n;
523
+ // Stale deletion before insert means chunk count stays the same, not 2x.
524
+ expect(secondCount).toBe(firstCount);
525
+ // And vec rows should match.
526
+ const vecCount = (db2.db
527
+ .prepare("SELECT COUNT(*) as n FROM documents_vec WHERE id GLOB ?")
528
+ .get("stable#c*") as { n: number }).n;
529
+ expect(vecCount).toBe(secondCount);
530
+ db2.close();
501
531
  });
502
532
  });
package/src/embedder.ts CHANGED
@@ -2,9 +2,9 @@ import {
2
2
  pipeline,
3
3
  type FeatureExtractionPipeline,
4
4
  } from "@huggingface/transformers";
5
+ import { chunkText, type Chunk, type ChunkerOptions } from "./chunker.js";
5
6
 
6
7
  const MODEL_ID = "Xenova/all-MiniLM-L6-v2";
7
- const MAX_CHARS = 500;
8
8
 
9
9
  let embedderInstance: FeatureExtractionPipeline | null = null;
10
10
 
@@ -21,14 +21,71 @@ export async function getEmbedder(): Promise<FeatureExtractionPipeline> {
21
21
 
22
22
  export async function embed(text: string): Promise<Float32Array> {
23
23
  const embedder = await getEmbedder();
24
- const truncated = text.slice(0, MAX_CHARS);
25
- const output = await embedder(truncated, {
24
+ // Pass text directly — the transformer's own 512-token window handles overflow.
25
+ const output = await embedder(text, {
26
26
  pooling: "mean",
27
27
  normalize: true,
28
28
  });
29
29
  return new Float32Array(output.data as ArrayLike<number>);
30
30
  }
31
31
 
32
+ /**
33
+ * A chunk paired with the embedding of its (contextualized) content.
34
+ * Extends the base Chunk from the chunker module with an embedding vector
35
+ * and an optional contextPrefix (populated by Phase 6 — contextual retrieval).
36
+ */
37
+ export interface DocumentChunk extends Chunk {
38
+ embedding: Float32Array;
39
+ contextPrefix?: string;
40
+ }
41
+
42
+ /**
43
+ * Embed a document by splitting it into chunks and emitting one embedding
44
+ * per chunk. The embedded text for each chunk is
45
+ * `${title}\n${tagLine}\n${chunk.content}` so the semantic anchors (title +
46
+ * tags) travel with every chunk embedding — matching the shape of the legacy
47
+ * `prepareTextForEmbedding()` but without the 500-char truncation.
48
+ *
49
+ * Short documents (<= chunkSize) produce exactly one chunk covering the whole
50
+ * content. Empty content yields a single chunk with empty content (so callers
51
+ * still get a title/tag-only embedding for stub documents).
52
+ */
53
+ export async function embedDocument(
54
+ title: string,
55
+ tags: string[],
56
+ content: string,
57
+ opts?: ChunkerOptions,
58
+ ): Promise<DocumentChunk[]> {
59
+ const tagLine = tags.length > 0 ? tags.join(", ") : "";
60
+
61
+ // If content is empty, still emit one chunk so the document has a searchable
62
+ // embedding anchored on title + tags (preserves legacy behavior for
63
+ // frontmatter-only / stub documents).
64
+ const chunks: Chunk[] = content.length === 0
65
+ ? [{ index: 0, content: "", charStart: 0, charEnd: 0 }]
66
+ : chunkText(content, opts);
67
+
68
+ const out: DocumentChunk[] = [];
69
+ for (const chunk of chunks) {
70
+ const parts = [title, tagLine, chunk.content].filter(p => p.length > 0);
71
+ const embedText = parts.join("\n");
72
+ const embedding = await embed(embedText);
73
+ out.push({
74
+ index: chunk.index,
75
+ content: chunk.content,
76
+ charStart: chunk.charStart,
77
+ charEnd: chunk.charEnd,
78
+ embedding,
79
+ });
80
+ }
81
+ return out;
82
+ }
83
+
84
+ /**
85
+ * Back-compat shim: kept so callers outside the reindex path can still build
86
+ * a title/tags/first-paragraph string. No longer used by `embedDocument` (the
87
+ * per-chunk flow prepends title + tags directly).
88
+ */
32
89
  export function prepareTextForEmbedding(
33
90
  title: string,
34
91
  tags: string[],
@@ -39,5 +96,5 @@ export function prepareTextForEmbedding(
39
96
  const paragraphs = content.split(/\n\n+/);
40
97
  const firstParagraph = paragraphs.find(p => p.trim().length > 0)?.trim() ?? "";
41
98
  const parts = [title, tagLine, firstParagraph].filter(p => p.length > 0);
42
- return parts.join("\n").slice(0, MAX_CHARS);
99
+ return parts.join("\n");
43
100
  }
package/src/reindex.ts CHANGED
@@ -4,7 +4,7 @@ import { homedir } from "node:os";
4
4
  import { KnowledgeDB } from "./db.js";
5
5
  import { FtsSearch } from "./search.js";
6
6
  import { VectorSearch } from "./vector-search.js";
7
- import { embed, prepareTextForEmbedding } from "./embedder.js";
7
+ import { embedDocument } from "./embedder.js";
8
8
  import { parseDocument, type ParsedDocument } from "./parser.js";
9
9
  import { findMarkdownFiles } from "./file-scanner.js";
10
10
  import { generateIndexes } from "./generate-indexes.js";
@@ -48,7 +48,10 @@ export async function reindex(
48
48
 
49
49
  const filesOnDiskSet = new Set(filesOnDisk.map(f => resolve(f)));
50
50
 
51
- // Phase 1: Delete stale entries for files no longer on disk
51
+ // Phase 1: Delete stale entries for files no longer on disk.
52
+ // Chunk rows cascade from documents via ON DELETE CASCADE on chunks.document_id,
53
+ // but the vec0 virtual table does not participate in FK cascades — we must
54
+ // explicitly delete chunk-level vec rows via GLOB pattern.
52
55
  const syncedPaths = db.getAllSyncPaths();
53
56
  let deleted = 0;
54
57
  for (const syncedPath of syncedPaths) {
@@ -56,6 +59,8 @@ export async function reindex(
56
59
  const id = basename(syncedPath, ".md");
57
60
  fts.deleteFtsEntry(id);
58
61
  db.deleteDocument(id);
62
+ vec.deleteChunkVecsByDoc(id);
63
+ // Also delete any legacy doc-level vec row (pre-chunks schema).
59
64
  vec.deleteEmbedding(id);
60
65
  db.deleteSyncRecord(syncedPath);
61
66
  deleted++;
@@ -138,10 +143,35 @@ export async function reindex(
138
143
  db.addRelationship(edge.sourceId, edge.targetId, "untyped", edge.context);
139
144
  }
140
145
 
141
- const text = prepareTextForEmbedding(parsed.title, parsed.tags, parsed.content);
146
+ // Chunk-aware embedding: emit one embedding per chunk, persist to both
147
+ // the `chunks` table and the `documents_vec` virtual table with chunk ids
148
+ // of the form `${doc.id}#c${index}`.
149
+ //
150
+ // We first clear any stale chunk rows for this doc_id (the document
151
+ // body may have shrunk across re-indexes) and stale chunk vec rows (which
152
+ // don't cascade from the `chunks` table because vec0 is a virtual table).
153
+ db.db.prepare("DELETE FROM chunks WHERE document_id = ?").run(parsed.id);
154
+ vec.deleteChunkVecsByDoc(parsed.id);
155
+ // Drop any pre-chunks schema vec row that used the bare doc id.
156
+ vec.deleteEmbedding(parsed.id);
157
+
142
158
  try {
143
- const embedding = await embed(text);
144
- vec.upsertEmbedding(parsed.id, embedding);
159
+ const chunks = await embedDocument(parsed.title, parsed.tags, parsed.content);
160
+ const insertChunk = db.db.prepare(
161
+ "INSERT INTO chunks (id, document_id, chunk_index, content, char_start, char_end) VALUES (?, ?, ?, ?, ?, ?)"
162
+ );
163
+ for (const chunk of chunks) {
164
+ const chunkId = `${parsed.id}#c${chunk.index}`;
165
+ insertChunk.run(
166
+ chunkId,
167
+ parsed.id,
168
+ chunk.index,
169
+ chunk.content,
170
+ chunk.charStart,
171
+ chunk.charEnd,
172
+ );
173
+ vec.upsertEmbedding(chunkId, chunk.embedding);
174
+ }
145
175
  } catch (e) {
146
176
  console.warn(`Failed to embed ${id}: ${(e as Error).message}`);
147
177
  }
@@ -54,6 +54,22 @@ export class VectorSearch {
54
54
  .run(id);
55
55
  }
56
56
 
57
+ /**
58
+ * Delete all chunk-level vec rows for a document. Chunk ids follow the
59
+ * pattern `${docId}#c${index}` so we match via a SQLite GLOB.
60
+ *
61
+ * This is used by reindex to drop stale chunks when a source markdown file
62
+ * has been deleted or modified. Complements `ON DELETE CASCADE` on the
63
+ * `chunks` table (which deletes chunk rows but not their vec counterparts,
64
+ * because the vec0 virtual table does not participate in FK cascades).
65
+ */
66
+ deleteChunkVecsByDoc(docId: string): void {
67
+ this.ensureVecLoaded();
68
+ this.knowledgeDb.db
69
+ .prepare("DELETE FROM documents_vec WHERE id GLOB ?")
70
+ .run(`${docId}#c*`);
71
+ }
72
+
57
73
  search(queryEmbedding: Float32Array, limit: number = 10): VectorResult[] {
58
74
  this.ensureVecLoaded();
59
75
  const buf = float32ToBuffer(queryEmbedding);