ralph-hero-knowledge-index 0.1.23 → 0.1.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ralph-knowledge",
3
- "version": "0.1.23",
3
+ "version": "0.1.25",
4
4
  "description": "Knowledge graph for ralph-hero: semantic search, relationship traversal, and document indexing across thoughts/ documents. Optional companion to ralph-hero.",
5
5
  "author": {
6
6
  "name": "Chad Dubiel",
package/.mcp.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "mcpServers": {
3
3
  "ralph-knowledge": {
4
4
  "command": "npx",
5
- "args": ["-y", "ralph-hero-knowledge-index@0.1.23"]
5
+ "args": ["-y", "ralph-hero-knowledge-index@0.1.25"]
6
6
  }
7
7
  }
8
8
  }
@@ -1,4 +1,31 @@
1
1
  import { type FeatureExtractionPipeline } from "@huggingface/transformers";
2
+ import { type Chunk, type ChunkerOptions } from "./chunker.js";
2
3
  export declare function getEmbedder(): Promise<FeatureExtractionPipeline>;
3
4
  export declare function embed(text: string): Promise<Float32Array>;
5
+ /**
6
+ * A chunk paired with the embedding of its (contextualized) content.
7
+ * Extends the base Chunk from the chunker module with an embedding vector
8
+ * and an optional contextPrefix (populated by Phase 6 — contextual retrieval).
9
+ */
10
+ export interface DocumentChunk extends Chunk {
11
+ embedding: Float32Array;
12
+ contextPrefix?: string;
13
+ }
14
+ /**
15
+ * Embed a document by splitting it into chunks and emitting one embedding
16
+ * per chunk. The embedded text for each chunk is
17
+ * `${title}\n${tagLine}\n${chunk.content}` so the semantic anchors (title +
18
+ * tags) travel with every chunk embedding — matching the shape of the legacy
19
+ * `prepareTextForEmbedding()` but without the 500-char truncation.
20
+ *
21
+ * Short documents (<= chunkSize) produce exactly one chunk covering the whole
22
+ * content. Empty content yields a single chunk with empty content (so callers
23
+ * still get a title/tag-only embedding for stub documents).
24
+ */
25
+ export declare function embedDocument(title: string, tags: string[], content: string, opts?: ChunkerOptions): Promise<DocumentChunk[]>;
26
+ /**
27
+ * Back-compat shim: kept so callers outside the reindex path can still build
28
+ * a title/tags/first-paragraph string. No longer used by `embedDocument` (the
29
+ * per-chunk flow prepends title + tags directly).
30
+ */
4
31
  export declare function prepareTextForEmbedding(title: string, tags: string[], content: string): string;
package/dist/embedder.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import { pipeline, } from "@huggingface/transformers";
2
+ import { chunkText } from "./chunker.js";
2
3
  const MODEL_ID = "Xenova/all-MiniLM-L6-v2";
3
- const MAX_CHARS = 500;
4
4
  let embedderInstance = null;
5
5
  export async function getEmbedder() {
6
6
  if (!embedderInstance) {
@@ -11,19 +11,58 @@ export async function getEmbedder() {
11
11
  }
12
12
  export async function embed(text) {
13
13
  const embedder = await getEmbedder();
14
- const truncated = text.slice(0, MAX_CHARS);
15
- const output = await embedder(truncated, {
14
+ // Pass text directly — the transformer's own 512-token window handles overflow.
15
+ const output = await embedder(text, {
16
16
  pooling: "mean",
17
17
  normalize: true,
18
18
  });
19
19
  return new Float32Array(output.data);
20
20
  }
21
+ /**
22
+ * Embed a document by splitting it into chunks and emitting one embedding
23
+ * per chunk. The embedded text for each chunk is
24
+ * `${title}\n${tagLine}\n${chunk.content}` so the semantic anchors (title +
25
+ * tags) travel with every chunk embedding — matching the shape of the legacy
26
+ * `prepareTextForEmbedding()` but without the 500-char truncation.
27
+ *
28
+ * Short documents (<= chunkSize) produce exactly one chunk covering the whole
29
+ * content. Empty content yields a single chunk with empty content (so callers
30
+ * still get a title/tag-only embedding for stub documents).
31
+ */
32
+ export async function embedDocument(title, tags, content, opts) {
33
+ const tagLine = tags.length > 0 ? tags.join(", ") : "";
34
+ // If content is empty, still emit one chunk so the document has a searchable
35
+ // embedding anchored on title + tags (preserves legacy behavior for
36
+ // frontmatter-only / stub documents).
37
+ const chunks = content.length === 0
38
+ ? [{ index: 0, content: "", charStart: 0, charEnd: 0 }]
39
+ : chunkText(content, opts);
40
+ const out = [];
41
+ for (const chunk of chunks) {
42
+ const parts = [title, tagLine, chunk.content].filter(p => p.length > 0);
43
+ const embedText = parts.join("\n");
44
+ const embedding = await embed(embedText);
45
+ out.push({
46
+ index: chunk.index,
47
+ content: chunk.content,
48
+ charStart: chunk.charStart,
49
+ charEnd: chunk.charEnd,
50
+ embedding,
51
+ });
52
+ }
53
+ return out;
54
+ }
55
+ /**
56
+ * Back-compat shim: kept so callers outside the reindex path can still build
57
+ * a title/tags/first-paragraph string. No longer used by `embedDocument` (the
58
+ * per-chunk flow prepends title + tags directly).
59
+ */
21
60
  export function prepareTextForEmbedding(title, tags, content) {
22
61
  const tagLine = tags.length > 0 ? tags.join(", ") : "";
23
62
  // Extract first paragraph: split on blank lines, take first non-empty segment
24
63
  const paragraphs = content.split(/\n\n+/);
25
64
  const firstParagraph = paragraphs.find(p => p.trim().length > 0)?.trim() ?? "";
26
65
  const parts = [title, tagLine, firstParagraph].filter(p => p.length > 0);
27
- return parts.join("\n").slice(0, MAX_CHARS);
66
+ return parts.join("\n");
28
67
  }
29
68
  //# sourceMappingURL=embedder.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"embedder.js","sourceRoot":"","sources":["../src/embedder.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,GAET,MAAM,2BAA2B,CAAC;AAEnC,MAAM,QAAQ,GAAG,yBAAyB,CAAC;AAC3C,MAAM,SAAS,GAAG,GAAG,CAAC;AAEtB,IAAI,gBAAgB,GAAqC,IAAI,CAAC;AAE9D,MAAM,CAAC,KAAK,UAAU,WAAW;IAC/B,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,mEAAmE;QACnE,gBAAgB,GAAG,CAAC,MAAM,QAAQ,CAChC,oBAAoB,EACpB,QAAQ,CACT,CAA8B,CAAC;IAClC,CAAC;IACD,OAAO,gBAAgB,CAAC;AAC1B,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,IAAY;IACtC,MAAM,QAAQ,GAAG,MAAM,WAAW,EAAE,CAAC;IACrC,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;IAC3C,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,SAAS,EAAE;QACvC,OAAO,EAAE,MAAM;QACf,SAAS,EAAE,IAAI;KAChB,CAAC,CAAC;IACH,OAAO,IAAI,YAAY,CAAC,MAAM,CAAC,IAAyB,CAAC,CAAC;AAC5D,CAAC;AAED,MAAM,UAAU,uBAAuB,CACrC,KAAa,EACb,IAAc,EACd,OAAe;IAEf,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IACvD,8EAA8E;IAC9E,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAC1C,MAAM,cAAc,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAC/E,MAAM,KAAK,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,cAAc,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACzE,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;AAC9C,CAAC"}
1
+ {"version":3,"file":"embedder.js","sourceRoot":"","sources":["../src/embedder.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,GAET,MAAM,2BAA2B,CAAC;AACnC,OAAO,EAAE,SAAS,EAAmC,MAAM,cAAc,CAAC;AAE1E,MAAM,QAAQ,GAAG,yBAAyB,CAAC;AAE3C,IAAI,gBAAgB,GAAqC,IAAI,CAAC;AAE9D,MAAM,CAAC,KAAK,UAAU,WAAW;IAC/B,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,mEAAmE;QACnE,gBAAgB,GAAG,CAAC,MAAM,QAAQ,CAChC,oBAAoB,EACpB,QAAQ,CACT,CAA8B,CAAC;IAClC,CAAC;IACD,OAAO,gBAAgB,CAAC;AAC1B,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,IAAY;IACtC,MAAM,QAAQ,GAAG,MAAM,WAAW,EAAE,CAAC;IACrC,gFAAgF;IAChF,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE;QAClC,OAAO,EAAE,MAAM;QACf,SAAS,EAAE,IAAI;KAChB,CAAC,CAAC;IACH,OAAO,IAAI,YAAY,CAAC,MAAM,CAAC,IAAyB,CAAC,CAAC;AAC5D,CAAC;AAYD;;;;;;;;;;GAUG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,KAAa,EACb,IAAc,EACd,OAAe,EACf,IAAqB;IAErB,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAEvD,6EAA6E;IAC7E,oEAAoE;IACpE,sCAAsC;IACtC,MAAM,MAAM,GAAY,OAAO,CAAC,MAAM,KAAK,CAAC;QAC1C,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC;QACvD,CAAC,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;IAE7B,MAAM,GAAG,GAAoB,EAAE,CAAC;IAChC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,KAAK,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACxE,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnC,MAAM,SAAS,GAAG,MAAM,KAAK,CAAC,SAAS,CAAC,CAAC;QACzC,GAAG,CAAC,IAAI,CAAC;YACP,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,SAAS,EAAE,KAAK,CAAC,SAAS;YAC1B,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,SAAS;SACV,CAAC,CAAC;IACL,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,uBAAuB,CACrC,KAAa,EACb,IAAc,EACd,OAAe;IAEf,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IACvD,8EAA8E;IAC9E,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAC1C,MAAM,cAAc,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAC/E,MAAM,KAAK,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,cAAc,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACzE,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC"}
@@ -1,3 +1,8 @@
1
+ /**
2
+ * Maximum snippet length (in characters) when the snippet is sourced from a
3
+ * chunk's content. Keeps the MCP payload compact while still representative.
4
+ */
5
+ const SNIPPET_MAX_CHARS = 300;
1
6
  export class HybridSearch {
2
7
  db;
3
8
  fts;
@@ -53,9 +58,22 @@ export class HybridSearch {
53
58
  });
54
59
  const queryEmbedding = await this.embedFn(query);
55
60
  const vecResults = this.vec.search(queryEmbedding, limit * 2);
56
- // Build RRF score map, keyed by document_id. When vec ids are chunk ids
57
- // like `{doc}#c{n}`, we collapse to the parent doc for scoring but
58
- // remember the best-scoring chunk id per doc for later meta enrichment.
61
+ // Bucket vector results by doc_id, keeping the best-ranked chunk per doc.
62
+ // vecResults is already sorted by distance ascending, so the first
63
+ // occurrence of a given doc_id has the smallest rank (best match).
64
+ const buckets = new Map();
65
+ for (let i = 0; i < vecResults.length; i++) {
66
+ const hit = vecResults[i];
67
+ const docId = this.docIdFromVecId(hit.id);
68
+ if (buckets.has(docId))
69
+ continue; // Already have best rank for this doc
70
+ buckets.set(docId, {
71
+ bestRank: i,
72
+ bestChunkId: hit.id,
73
+ bestContent: hit.content ?? "",
74
+ });
75
+ }
76
+ // Build RRF score map (keyed by doc_id for both FTS and vector buckets)
59
77
  const scores = new Map();
60
78
  const bestChunkByDoc = new Map();
61
79
  for (let i = 0; i < ftsResults.length; i++) {
@@ -63,16 +81,13 @@ export class HybridSearch {
63
81
  const rrfScore = 1 / (HybridSearch.RRF_K + i + 1);
64
82
  scores.set(id, (scores.get(id) ?? 0) + rrfScore);
65
83
  }
66
- for (let i = 0; i < vecResults.length; i++) {
67
- const vecId = vecResults[i].id;
68
- const docId = this.docIdFromVecId(vecId);
69
- const rrfScore = 1 / (HybridSearch.RRF_K + i + 1);
84
+ for (const [docId, bucket] of buckets) {
85
+ const rrfScore = 1 / (HybridSearch.RRF_K + bucket.bestRank + 1);
70
86
  scores.set(docId, (scores.get(docId) ?? 0) + rrfScore);
71
- if (vecId !== docId) {
72
- const existing = bestChunkByDoc.get(docId);
73
- if (!existing || i < existing.rank) {
74
- bestChunkByDoc.set(docId, { chunkId: vecId, rank: i });
75
- }
87
+ // Track best chunk for later enrichment
88
+ const existing = bestChunkByDoc.get(docId);
89
+ if (!existing || bucket.bestRank < existing.rank) {
90
+ bestChunkByDoc.set(docId, { chunkId: bucket.bestChunkId, rank: bucket.bestRank });
76
91
  }
77
92
  }
78
93
  // Build a lookup of FTS results by id for quick access
@@ -80,12 +95,20 @@ export class HybridSearch {
80
95
  for (const r of ftsResults) {
81
96
  ftsById.set(r.id, r);
82
97
  }
83
- // Assemble combined results
98
+ // Assemble combined results. For vector-hit docs, replace the snippet
99
+ // with the winning chunk's content (truncated). FTS-only hits keep the
100
+ // FTS snippet.
84
101
  const combined = [];
85
102
  for (const [id, rrfScore] of scores) {
86
103
  const ftsHit = ftsById.get(id);
104
+ const bucket = buckets.get(id);
87
105
  if (ftsHit) {
88
- combined.push({ ...ftsHit, score: rrfScore });
106
+ // FTS hit (possibly also a vector hit): prefer the chunk snippet when
107
+ // the vector side contributed real chunk content.
108
+ const snippet = bucket && bucket.bestContent
109
+ ? bucket.bestContent.slice(0, SNIPPET_MAX_CHARS)
110
+ : ftsHit.snippet;
111
+ combined.push({ ...ftsHit, score: rrfScore, snippet });
89
112
  }
90
113
  else {
91
114
  // Vector-only result: fetch document metadata from db
@@ -93,6 +116,9 @@ export class HybridSearch {
93
116
  // Skip stub documents — they have no real content or path
94
117
  if (!doc || doc.isStub)
95
118
  continue;
119
+ const snippet = bucket
120
+ ? bucket.bestContent.slice(0, SNIPPET_MAX_CHARS)
121
+ : "";
96
122
  combined.push({
97
123
  id: doc.id,
98
124
  path: doc.path,
@@ -101,7 +127,7 @@ export class HybridSearch {
101
127
  status: doc.status,
102
128
  date: doc.date,
103
129
  score: rrfScore,
104
- snippet: "",
130
+ snippet,
105
131
  });
106
132
  }
107
133
  }
@@ -1 +1 @@
1
- {"version":3,"file":"hybrid-search.js","sourceRoot":"","sources":["../src/hybrid-search.ts"],"names":[],"mappings":"AAgBA,MAAM,OAAO,YAAY;IAIJ;IACA;IACA;IACA;IANX,MAAM,CAAU,KAAK,GAAG,EAAE,CAAC;IAEnC,YACmB,EAAe,EACf,GAAc,EACd,GAAiB,EACjB,OAAgB;QAHhB,OAAE,GAAF,EAAE,CAAa;QACf,QAAG,GAAH,GAAG,CAAW;QACd,QAAG,GAAH,GAAG,CAAc;QACjB,YAAO,GAAP,OAAO,CAAS;IAChC,CAAC;IAEJ;;;OAGG;IACK,iBAAiB;QACvB,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,EAAE;aACnB,OAAO,CACN,qEAAqE,CACtE;aACA,GAAG,EAAE,CAAC;QACT,OAAO,GAAG,KAAK,SAAS,CAAC;IAC3B,CAAC;IAED;;;;OAIG;IACK,cAAc,CAAC,KAAa;QAClC,MAAM,MAAM,GAAG,KAAK,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;QACvC,IAAI,MAAM,KAAK,CAAC,CAAC;YAAE,OAAO,KAAK,CAAC;QAChC,MAAM,MAAM,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACvC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC;YAAE,OAAO,KAAK,CAAC;QAC/D,OAAO,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;IAChC,CAAC;IAEO,UAAU,CAAC,OAAe;QAChC,IAAI,CAAC,IAAI,CAAC,iBAAiB,EAAE;YAAE,OAAO,SAAS,CAAC;QAChD,OAAO,IAAI,CAAC,EAAE,CAAC,EAAE;aACd,OAAO,CACN;kCAC0B,CAC3B;aACA,GAAG,CAAC,OAAO,CAAyB,CAAC;IAC1C,CAAC;IAED,KAAK,CAAC,MAAM,CACV,KAAa,EACb,UAAyB,EAAE;QAE3B,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,iBAAiB,GAAG,KAAK,EAAE,KAAK,GAAG,EAAE,EAAE,UAAU,EAAE,GAAG,OAAO,CAAC;QAElF,0EAA0E;QAC1E,gCAAgC;QAChC,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,EAAE;YACxC,iBAAiB,EAAE,IAAI;YACvB,KAAK,EAAE,KAAK,GAAG,CAAC;YAChB,UAAU;SACX,CAAC,CAAC;QAEH,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QACjD,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,cAAc,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;QAE9D,wEAAwE;QACxE,mEAAmE;QACnE,wEAAwE;QACxE,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;QACzC,MAAM,cAAc,GAAG,IAAI,GAAG,EAA6C,CAAC;QAE5E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3C,MAAM,EAAE,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC5B,MAAM,QAAQ,GAAG,CAAC,GAAG,CAAC,YAAY,CAAC,KAAK,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;YAClD,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC;QACnD,CAAC;QAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3C,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC;YACzC,MAAM,QAAQ,GAAG,CAAC,GAAG,CAAC,YAAY,CAAC,KAAK,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;YAClD,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC;YACvD,IAAI,KAAK,KAAK,KAAK,EAAE,CAAC;gBACpB,MAAM,QAAQ,GAAG,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBAC3C,IAAI,CAAC,QAAQ,IAAI,CAAC,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC;oBACnC,cAAc,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC;gBACzD,CAAC;YACH,CAAC;QACH,CAAC;QAED,uDAAuD;QACvD,MAAM,OAAO,GAAG,IAAI,GAAG,EAAwB,CAAC;QAChD,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;YAC3B,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;QACvB,CAAC;QAED,4BAA4B;QAC5B,MAAM,QAAQ,GAAmB,EAAE,CAAC;QAEpC,KAAK,MAAM,CAAC,EAAE,EAAE,QAAQ,CAAC,IAAI,MAAM,EAAE,CAAC;YACpC,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC/B,IAAI,MAAM,EAAE,CAAC;gBACX,QAAQ,CAAC,IAAI,CAAC,EAAE,GAAG,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC;YAChD,CAAC;iBAAM,CAAC;gBACN,sDAAsD;gBACtD,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC;gBACpC,0DAA0D;gBAC1D,IAAI,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM;oBAAE,SAAS;gBACjC,QAAQ,CAAC,IAAI,CAAC;oBACZ,EAAE,EAAE,GAAG,CAAC,EAAE;oBACV,IAAI,EAAE,GAAG,CAAC,IAAc;oBACxB,KAAK,EAAE,GAAG,CAAC,KAAK;oBAChB,IAAI,EAAE,GAAG,CAAC,IAAI;oBACd,MAAM,EAAE,GAAG,CAAC,MAAM;oBAClB,IAAI,EAAE,GAAG,CAAC,IAAI;oBACd,KAAK,EAAE,QAAQ;oBACf,OAAO,EAAE,EAAE;iBACZ,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,+BAA+B;QAC/B,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QAE3C,0BAA0B;QAC1B,IAAI,QAAQ,GAAG,QAAQ,CAAC;QACxB,IAAI,CAAC,iBAAiB,EAAE,CAAC;YACvB,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,YAAY,CAAC,CAAC;QAC/D,CAAC;QAED,oBAAoB;QACpB,IAAI,IAAI,EAAE,CAAC;YACT,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;QACrD,CAAC;QAED,oBAAoB;QACpB,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC;YAC7B,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;gBAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBACtC,OAAO,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YAC5C,CAAC,CAAC,CAAC;QACL,CAAC;QAED,sEAAsE;QACtE,uEAAuE;QACvE,sDAAsD;QACtD,IAAI,UAAU,IAAI,UAAU,KAAK,KAAK,EAAE,CAAC;YACvC,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;gBAC/B,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,aAAa,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBACzC,gDAAgD;gBAChD,OAAO,CAAC,IAAI,IAAI,KAAK,CAAC,KAAK,UAAU,CAAC;YACxC,CAAC,CAAC,CAAC;QACL,CAAC;QAED,oEAAoE;QACpE,kBAAkB;QAClB,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,MAAM,IAAI,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YACtC,IAAI,CAAC,IAAI;gBAAE,SAAS;YACpB,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAC5C,IAAI,CAAC,KAAK;gBAAE,SAAS;YACrB,CAAC,CAAC,WAAW,GAAG,KAAK,CAAC,EAAE,CAAC;YACzB,CAAC,CAAC,UAAU,GAAG,KAAK,CAAC,WAAW,CAAC;YACjC,CAAC,CAAC,SAAS,GAAG,KAAK,CAAC,UAAU,CAAC;YAC/B,CAAC,CAAC,OAAO,GAAG,KAAK,CAAC,QAAQ,CAAC;YAC3B,CAAC,CAAC,aAAa,GAAG,KAAK,CAAC,cAAc,CAAC;QACzC,CAAC;QAED,OAAO,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IAClC,CAAC"}
1
+ {"version":3,"file":"hybrid-search.js","sourceRoot":"","sources":["../src/hybrid-search.ts"],"names":[],"mappings":"AAgBA;;;GAGG;AACH,MAAM,iBAAiB,GAAG,GAAG,CAAC;AAa9B,MAAM,OAAO,YAAY;IAIJ;IACA;IACA;IACA;IANX,MAAM,CAAU,KAAK,GAAG,EAAE,CAAC;IAEnC,YACmB,EAAe,EACf,GAAc,EACd,GAAiB,EACjB,OAAgB;QAHhB,OAAE,GAAF,EAAE,CAAa;QACf,QAAG,GAAH,GAAG,CAAW;QACd,QAAG,GAAH,GAAG,CAAc;QACjB,YAAO,GAAP,OAAO,CAAS;IAChC,CAAC;IAEJ;;;OAGG;IACK,iBAAiB;QACvB,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,EAAE;aACnB,OAAO,CACN,qEAAqE,CACtE;aACA,GAAG,EAAE,CAAC;QACT,OAAO,GAAG,KAAK,SAAS,CAAC;IAC3B,CAAC;IAED;;;;OAIG;IACK,cAAc,CAAC,KAAa;QAClC,MAAM,MAAM,GAAG,KAAK,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;QACvC,IAAI,MAAM,KAAK,CAAC,CAAC;YAAE,OAAO,KAAK,CAAC;QAChC,MAAM,MAAM,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACvC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC;YAAE,OAAO,KAAK,CAAC;QAC/D,OAAO,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;IAChC,CAAC;IAEO,UAAU,CAAC,OAAe;QAChC,IAAI,CAAC,IAAI,CAAC,iBAAiB,EAAE;YAAE,OAAO,SAAS,CAAC;QAChD,OAAO,IAAI,CAAC,EAAE,CAAC,EAAE;aACd,OAAO,CACN;kCAC0B,CAC3B;aACA,GAAG,CAAC,OAAO,CAAyB,CAAC;IAC1C,CAAC;IAED,KAAK,CAAC,MAAM,CACV,KAAa,EACb,UAAyB,EAAE;QAE3B,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,iBAAiB,GAAG,KAAK,EAAE,KAAK,GAAG,EAAE,EAAE,UAAU,EAAE,GAAG,OAAO,CAAC;QAElF,0EAA0E;QAC1E,gCAAgC;QAChC,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,EAAE;YACxC,iBAAiB,EAAE,IAAI;YACvB,KAAK,EAAE,KAAK,GAAG,CAAC;YAChB,UAAU;SACX,CAAC,CAAC;QAEH,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QACjD,MAAM,UAAU,GAAmB,IAAI,CAAC,GAAG,CAAC,MAAM,CAChD,cAAc,EACd,KAAK,GAAG,CAAC,CACV,CAAC;QAEF,0EAA0E;QAC1E,mEAAmE;QACnE,mEAAmE;QACnE,MAAM,OAAO,GAAG,IAAI,GAAG,EAAqB,CAAC;QAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3C,MAAM,GAAG,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;YAC1B,MAAM,KAAK,GAAG,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC1C,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC;gBAAE,SAAS,CAAC,sCAAsC;YACxE,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE;gBACjB,QAAQ,EAAE,CAAC;gBACX,WAAW,EAAE,GAAG,CAAC,EAAE;gBACnB,WAAW,EAAE,GAAG,CAAC,OAAO,IAAI,EAAE;aAC/B,CAAC,CAAC;QACL,CAAC;QAED,wEAAwE;QACxE,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;QACzC,MAAM,cAAc,GAAG,IAAI,GAAG,EAA6C,CAAC;QAE5E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3C,MAAM,EAAE,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC5B,MAAM,QAAQ,GAAG,CAAC,GAAG,CAAC,YAAY,CAAC,KAAK,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;YAClD,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC;QACnD,CAAC;QAED,KAAK,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACtC,MAAM,QAAQ,GAAG,CAAC,GAAG,CAAC,YAAY,CAAC,KAAK,GAAG,MAAM,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC;YAChE,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC;YACvD,wCAAwC;YACxC,MAAM,QAAQ,GAAG,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;YAC3C,IAAI,CAAC,QAAQ,IAAI,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC;gBACjD,cAAc,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,OAAO,EAAE,MAAM,CAAC,WAAW,EAAE,IAAI,EAAE,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;YACpF,CAAC;QACH,CAAC;QAED,uDAAuD;QACvD,MAAM,OAAO,GAAG,IAAI,GAAG,EAAwB,CAAC;QAChD,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;YAC3B,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;QACvB,CAAC;QAED,sEAAsE;QACtE,uEAAuE;QACvE,eAAe;QACf,MAAM,QAAQ,GAAmB,EAAE,CAAC;QAEpC,KAAK,MAAM,CAAC,EAAE,EAAE,QAAQ,CAAC,IAAI,MAAM,EAAE,CAAC;YACpC,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC/B,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC/B,IAAI,MAAM,EAAE,CAAC;gBACX,sEAAsE;gBACtE,kDAAkD;gBAClD,MAAM,OAAO,GACX,MAAM,IAAI,MAAM,CAAC,WAAW;oBAC1B,CAAC,CAAC,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,iBAAiB,CAAC;oBAChD,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC;gBACrB,QAAQ,CAAC,IAAI,CAAC,EAAE,GAAG,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC;YACzD,CAAC;iBAAM,CAAC;gBACN,sDAAsD;gBACtD,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC;gBACpC,0DAA0D;gBAC1D,IAAI,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM;oBAAE,SAAS;gBACjC,MAAM,OAAO,GAAG,MAAM;oBACpB,CAAC,CAAC,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,iBAAiB,CAAC;oBAChD,CAAC,CAAC,EAAE,CAAC;gBACP,QAAQ,CAAC,IAAI,CAAC;oBACZ,EAAE,EAAE,GAAG,CAAC,EAAE;oBACV,IAAI,EAAE,GAAG,CAAC,IAAc;oBACxB,KAAK,EAAE,GAAG,CAAC,KAAK;oBAChB,IAAI,EAAE,GAAG,CAAC,IAAI;oBACd,MAAM,EAAE,GAAG,CAAC,MAAM;oBAClB,IAAI,EAAE,GAAG,CAAC,IAAI;oBACd,KAAK,EAAE,QAAQ;oBACf,OAAO;iBACR,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,+BAA+B;QAC/B,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QAE3C,0BAA0B;QAC1B,IAAI,QAAQ,GAAG,QAAQ,CAAC;QACxB,IAAI,CAAC,iBAAiB,EAAE,CAAC;YACvB,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,YAAY,CAAC,CAAC;QAC/D,CAAC;QAED,oBAAoB;QACpB,IAAI,IAAI,EAAE,CAAC;YACT,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;QACrD,CAAC;QAED,oBAAoB;QACpB,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC;YAC7B,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;gBAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBACtC,OAAO,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YAC5C,CAAC,CAAC,CAAC;QACL,CAAC;QAED,sEAAsE;QACtE,uEAAuE;QACvE,sDAAsD;QACtD,IAAI,UAAU,IAAI,UAAU,KAAK,KAAK,EAAE,CAAC;YACvC,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;gBAC/B,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,aAAa,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBACzC,gDAAgD;gBAChD,OAAO,CAAC,IAAI,IAAI,KAAK,CAAC,KAAK,UAAU,CAAC;YACxC,CAAC,CAAC,CAAC;QACL,CAAC;QAED,oEAAoE;QACpE,kBAAkB;QAClB,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,MAAM,IAAI,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YACtC,IAAI,CAAC,IAAI;gBAAE,SAAS;YACpB,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAC5C,IAAI,CAAC,KAAK;gBAAE,SAAS;YACrB,CAAC,CAAC,WAAW,GAAG,KAAK,CAAC,EAAE,CAAC;YACzB,CAAC,CAAC,UAAU,GAAG,KAAK,CAAC,WAAW,CAAC;YACjC,CAAC,CAAC,SAAS,GAAG,KAAK,CAAC,UAAU,CAAC;YAC/B,CAAC,CAAC,OAAO,GAAG,KAAK,CAAC,QAAQ,CAAC;YAC3B,CAAC,CAAC,aAAa,GAAG,KAAK,CAAC,cAAc,CAAC;QACzC,CAAC;QAED,OAAO,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IAClC,CAAC"}
package/dist/reindex.js CHANGED
@@ -4,7 +4,7 @@ import { homedir } from "node:os";
4
4
  import { KnowledgeDB } from "./db.js";
5
5
  import { FtsSearch } from "./search.js";
6
6
  import { VectorSearch } from "./vector-search.js";
7
- import { embed, prepareTextForEmbedding } from "./embedder.js";
7
+ import { embedDocument } from "./embedder.js";
8
8
  import { parseDocument } from "./parser.js";
9
9
  import { findMarkdownFiles } from "./file-scanner.js";
10
10
  import { generateIndexes } from "./generate-indexes.js";
@@ -37,7 +37,10 @@ export async function reindex(dirs, dbPath, generate = false, ignorePatterns) {
37
37
  }
38
38
  console.log(`Found ${filesOnDisk.length} total markdown files`);
39
39
  const filesOnDiskSet = new Set(filesOnDisk.map(f => resolve(f)));
40
- // Phase 1: Delete stale entries for files no longer on disk
40
+ // Phase 1: Delete stale entries for files no longer on disk.
41
+ // Chunk rows cascade from documents via ON DELETE CASCADE on chunks.document_id,
42
+ // but the vec0 virtual table does not participate in FK cascades — we must
43
+ // explicitly delete chunk-level vec rows via GLOB pattern.
41
44
  const syncedPaths = db.getAllSyncPaths();
42
45
  let deleted = 0;
43
46
  for (const syncedPath of syncedPaths) {
@@ -45,6 +48,8 @@ export async function reindex(dirs, dbPath, generate = false, ignorePatterns) {
45
48
  const id = basename(syncedPath, ".md");
46
49
  fts.deleteFtsEntry(id);
47
50
  db.deleteDocument(id);
51
+ vec.deleteChunkVecsByDoc(id);
52
+ // Also delete any legacy doc-level vec row (pre-chunks schema).
48
53
  vec.deleteEmbedding(id);
49
54
  db.deleteSyncRecord(syncedPath);
50
55
  deleted++;
@@ -117,10 +122,25 @@ export async function reindex(dirs, dbPath, generate = false, ignorePatterns) {
117
122
  db.upsertStubDocument(edge.targetId);
118
123
  db.addRelationship(edge.sourceId, edge.targetId, "untyped", edge.context);
119
124
  }
120
- const text = prepareTextForEmbedding(parsed.title, parsed.tags, parsed.content);
125
+ // Chunk-aware embedding: emit one embedding per chunk, persist to both
126
+ // the `chunks` table and the `documents_vec` virtual table with chunk ids
127
+ // of the form `${doc.id}#c${index}`.
128
+ //
129
+ // We first clear any stale chunk rows for this doc_id (the document
130
+ // body may have shrunk across re-indexes) and stale chunk vec rows (which
131
+ // don't cascade from the `chunks` table because vec0 is a virtual table).
132
+ db.db.prepare("DELETE FROM chunks WHERE document_id = ?").run(parsed.id);
133
+ vec.deleteChunkVecsByDoc(parsed.id);
134
+ // Drop any pre-chunks schema vec row that used the bare doc id.
135
+ vec.deleteEmbedding(parsed.id);
121
136
  try {
122
- const embedding = await embed(text);
123
- vec.upsertEmbedding(parsed.id, embedding);
137
+ const chunks = await embedDocument(parsed.title, parsed.tags, parsed.content);
138
+ const insertChunk = db.db.prepare("INSERT INTO chunks (id, document_id, chunk_index, content, char_start, char_end) VALUES (?, ?, ?, ?, ?, ?)");
139
+ for (const chunk of chunks) {
140
+ const chunkId = `${parsed.id}#c${chunk.index}`;
141
+ insertChunk.run(chunkId, parsed.id, chunk.index, chunk.content, chunk.charStart, chunk.charEnd);
142
+ vec.upsertEmbedding(chunkId, chunk.embedding);
143
+ }
124
144
  }
125
145
  catch (e) {
126
146
  console.warn(`Failed to embed ${id}: ${e.message}`);
@@ -1 +1 @@
1
- {"version":3,"file":"reindex.js","sourceRoot":"","sources":["../src/reindex.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACjD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAC9D,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AACtC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,KAAK,EAAE,uBAAuB,EAAE,MAAM,eAAe,CAAC;AAC/D,OAAO,EAAE,aAAa,EAAuB,MAAM,aAAa,CAAC;AACjE,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACtD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,UAAU,EAAwB,MAAM,aAAa,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAEhD,MAAM,CAAC,KAAK,UAAU,OAAO,CAC3B,IAAc,EACd,MAAc,EACd,WAAoB,KAAK,EACzB,cAAyB;IAEzB,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,MAAM,EAAE,CAAC,CAAC;IAExD,MAAM,EAAE,GAAG,IAAI,WAAW,CAAC,MAAM,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,IAAI,SAAS,CAAC,EAAE,CAAC,CAAC;IAC9B,GAAG,CAAC,WAAW,EAAE,CAAC;IAClB,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,EAAE,CAAC,CAAC;IACjC,GAAG,CAAC,WAAW,EAAE,CAAC;IAElB,8EAA8E;IAC9E,MAAM,cAAc,GAAG,GAAG,CAAC;IAC3B,MAAM,cAAc,GAAG,EAAE,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;IACpD,IAAI,mBAAmB,GAAG,KAAK,CAAC;IAChC,IAAI,cAAc,KAAK,cAAc,EAAE,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,uEAAuE,CAAC,CAAC;QACrF,EAAE,CAAC,gBAAgB,EAAE,CAAC;QACtB,EAAE,CAAC,OAAO,CAAC,gBAAgB,EAAE,cAAc,CAAC,CAAC;QAC7C,mBAAmB,GAAG,IAAI,CAAC;IAC7B,CAAC;IAED,kCAAkC;IAClC,MAAM,WAAW,GAAa,EAAE,CAAC;IACjC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,OAAO,GAAG,iBAAiB,CAAC,GAAG,EAAE,cAAc,CAAC,CAAC;QACvD,MAAM,KAAK,GAAG,iBAAiB,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QAC9C,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,KAAK,KAAK,CAAC,MAAM,QAAQ,CAAC,CAAC;QAC/C,WAAW,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC;IAC7B,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,SAAS,WAAW,CAAC,MAAM,uBAAuB,CAAC,CAAC;IAEhE,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAEjE,4DAA4D;IAC5D,MAAM,WAAW,GAAG,EAAE,CAAC,eAAe,EAAE,CAAC;IACzC,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;QACrC,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;YACpC,MAAM,EAAE,GAAG,QAAQ,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;YACvC,GAAG,CAAC,cAAc,CAAC,EAAE,CAAC,CAAC;YACvB,EAAE,CAAC,cAAc,CAAC,EAAE,CAAC,CAAC;YACtB,GAAG,CAAC,eAAe,CAAC,EAAE,CAAC,CAAC;YACxB,EAAE,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC;YAChC,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;QAChB,OAAO,CAAC,GAAG,CAAC,aAAa,OAAO,gBAAgB,CAAC,CAAC;IACpD,CAAC;IAED,yCAAyC;IACzC,MAAM,UAAU,GAAqB,EAAE,CAAC;IACxC,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,MAAM,QAAQ,IAAI,WAAW,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;QAClC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC;QAEpD,8CAA8C;QAC9C,MAAM,UAAU,GAAG,EAAE,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QAC7C,IAAI,UAAU,IAAI,UAAU,CAAC,KAAK,KAAK,KAAK,EAAE,CAAC;YAC7C,OAAO,EAAE,CAAC;YACV,SAAS;QACX,CAAC;QAED,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAC5C,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACjE,MAAM,OAAO,GAAG,SAAS;YACvB,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,EAAE,OAAO,CAAC;YAC7C,CAAC,CAAC,QAAQ,CAAC;QACb,MAAM,EAAE,GAAG,QAAQ,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QAErC,MAAM,MAAM,GAAG,aAAa,CAAC,EAAE,EAAE,OAAO,EAAE,GAAG,CAAC,CAAC;QAC/C,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAExB,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,IAAI,CAAC,MAAM,CAAC,IAAI;YAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,IAAI,CAAC,MAAM,CAAC,IAAI;YAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,IAAI,CAAC,MAAM,CAAC,MAAM;YAAE,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC3C,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,OAAO,CAAC,IAAI,CAAC,cAAc,EAAE,yBAAyB,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC9E,CAAC;QAED,uEAAuE;QACvE,IAAI,EAAE,CAAC,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC;YACjC,GAAG,CAAC,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAChC,CAAC;QAED,EAAE,CAAC,cAAc,CAAC;YAChB,EAAE,EAAE,MAAM,CAAC,EAAE;YACb,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,KAAK,EAAE,MAAM,CAAC,KAAK;YACnB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,OAAO,EAAE,MAAM,CAAC,OAAO;SACxB,CAAC,CAAC;QAEH,oCAAoC;QACpC,GAAG,CAAC,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAE9B,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3B,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC;QACrC,CAAC;QAED,4EAA4E;QAC5E,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,+CAA+C,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAE9E,6EAA6E;QAC7E,gFAAgF;QAChF,8EAA8E;QAC9E,8CAA8C;QAC9C,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,aAAa,EAAE,CAAC;YACvC,EAAE,CAAC,kBAAkB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACpC,EAAE,CAAC,eAAe,CAAC,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;QAC3D,CAAC;QAED,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;YACvC,EAAE,CAAC,kBAAkB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACrC,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QAC5E,CAAC;QAED,MAAM,IAAI,GAAG,uBAAuB,CAAC,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,OAAO,CAAC,CAAC;QAChF,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC;YACpC,GAAG,CAAC,eAAe,CAAC,MAAM,CAAC,EAAE,EAAE,SAAS,CAAC,CAAC;QAC5C,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,OAAO,CAAC,IAAI,CAAC,mBAAmB,EAAE,KAAM,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;QACjE,CAAC;QAED,EAAE,CAAC,gBAAgB,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QAEpC,OAAO,EAAE,CAAC;QACV,IAAI,OAAO,GAAG,EAAE,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,CAAC,GAAG,CAAC,KAAK,OAAO,IAAI,WAAW,CAAC,MAAM,UAAU,CAAC,CAAC;QAC5D,CAAC;IACH,CAAC;IAED,qFAAqF;IACrF,8FAA8F;IAC9F,IAAI,mBAAmB,EAAE,CAAC;QACxB,GAAG,CAAC,YAAY,EAAE,CAAC;IACrB,CAAC;IAED,gGAAgG;IAChG,MAAM,YAAY,GAAG,IAAI,GAAG,CACzB,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,8CAA8C,CAAC,CAAC,GAAG,EAAmC;SAClG,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CACzB,CAAC;IAEF,uEAAuE;IACvE,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,MAAM,QAAQ,IAAI,YAAY,EAAE,CAAC;QACpC,IAAI,CAAC,EAAE,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC;YACjC,EAAE,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC;YAChC,SAAS,EAAE,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,aAAa,SAAS,sCAAsC,CAAC,CAAC;IAE1E,IAAI,CAAC;QACH,IAAI,QAAQ,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAChC,OAAO,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC;YACzC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;YAAS,CAAC;QACT,OAAO,CAAC,GAAG,CAAC,SAAS,OAAO,uBAAuB,OAAO,uBAAuB,CAAC,CAAC;QACnF,EAAE,CAAC,KAAK,EAAE,CAAC;IACb,CAAC;AACH,CAAC;AAED,MAAM,eAAe,GAAG,IAAI,CAAC,OAAO,EAAE,EAAE,aAAa,EAAE,cAAc,CAAC,CAAC;AAYvE;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,WAAW;IACzB,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACtC,MAAM,UAAU,GAAG,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC;IACrD,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;IAC5D,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IACtD,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IAE3D,MAAM,MAAM,GAAG,UAAU,EAAE,CAAC;IAE5B,MAAM,aAAa,GAAG,GAAW,EAAE,CACjC,KAAK;QACL,OAAO,CAAC,GAAG,CAAC,kBAAkB;QAC9B,MAAM,CAAC,MAAM;QACb,eAAe,CAAC;IAElB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;QACrC,OAAO;YACL,IAAI,EAAE,OAAO;YACb,MAAM,EAAE,aAAa,EAAE;YACvB,QAAQ,EAAE,CAAC,UAAU;YACrB,MAAM,EAAE,KAAK;YACb,MAAM;SACP,CAAC;IACJ,CAAC;IAED,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC;IACjD,IAAI,OAAO,EAAE,CAAC;QACZ,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACrE,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;YACrC,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,MAAM,EAAE,aAAa,EAAE;gBACvB,QAAQ,EAAE,CAAC,UAAU;gBACrB,MAAM,EAAE,KAAK;gBACb,MAAM;aACP,CAAC;QACJ,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5C,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;QACxC,OAAO;YACL,IAAI,EAAE,MAAM,CAAC,KAAK;YAClB,MAAM,EAAE,aAAa,EAAE;YACvB,QAAQ,EAAE,CAAC,UAAU;YACrB,MAAM,EAAE,QAAQ;YAChB,MAAM;SACP,CAAC;IACJ,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;IAC1C,OAAO;QACL,IAAI,EAAE,CAAC,gBAAgB,CAAC;QACxB,MAAM,EAAE,aAAa,EAAE;QACvB,QAAQ,EAAE,CAAC,UAAU;QACrB,MAAM,EAAE,UAAU;QAClB,MAAM;KACP,CAAC;AACJ,CAAC;AAED,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,YAAY,CAAC,CAAC;AACvD,IAAI,MAAM,EAAE,CAAC;IACX,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzD,OAAO,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,CAAC,cAAc,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;AAC9E,CAAC"}
1
+ {"version":3,"file":"reindex.js","sourceRoot":"","sources":["../src/reindex.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACjD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAC9D,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AACtC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,EAAE,aAAa,EAAuB,MAAM,aAAa,CAAC;AACjE,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACtD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,UAAU,EAAwB,MAAM,aAAa,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAEhD,MAAM,CAAC,KAAK,UAAU,OAAO,CAC3B,IAAc,EACd,MAAc,EACd,WAAoB,KAAK,EACzB,cAAyB;IAEzB,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,MAAM,EAAE,CAAC,CAAC;IAExD,MAAM,EAAE,GAAG,IAAI,WAAW,CAAC,MAAM,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,IAAI,SAAS,CAAC,EAAE,CAAC,CAAC;IAC9B,GAAG,CAAC,WAAW,EAAE,CAAC;IAClB,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,EAAE,CAAC,CAAC;IACjC,GAAG,CAAC,WAAW,EAAE,CAAC;IAElB,8EAA8E;IAC9E,MAAM,cAAc,GAAG,GAAG,CAAC;IAC3B,MAAM,cAAc,GAAG,EAAE,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;IACpD,IAAI,mBAAmB,GAAG,KAAK,CAAC;IAChC,IAAI,cAAc,KAAK,cAAc,EAAE,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,uEAAuE,CAAC,CAAC;QACrF,EAAE,CAAC,gBAAgB,EAAE,CAAC;QACtB,EAAE,CAAC,OAAO,CAAC,gBAAgB,EAAE,cAAc,CAAC,CAAC;QAC7C,mBAAmB,GAAG,IAAI,CAAC;IAC7B,CAAC;IAED,kCAAkC;IAClC,MAAM,WAAW,GAAa,EAAE,CAAC;IACjC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,OAAO,GAAG,iBAAiB,CAAC,GAAG,EAAE,cAAc,CAAC,CAAC;QACvD,MAAM,KAAK,GAAG,iBAAiB,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QAC9C,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,KAAK,KAAK,CAAC,MAAM,QAAQ,CAAC,CAAC;QAC/C,WAAW,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC;IAC7B,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,SAAS,WAAW,CAAC,MAAM,uBAAuB,CAAC,CAAC;IAEhE,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAEjE,6DAA6D;IAC7D,iFAAiF;IACjF,2EAA2E;IAC3E,2DAA2D;IAC3D,MAAM,WAAW,GAAG,EAAE,CAAC,eAAe,EAAE,CAAC;IACzC,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;QACrC,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;YACpC,MAAM,EAAE,GAAG,QAAQ,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;YACvC,GAAG,CAAC,cAAc,CAAC,EAAE,CAAC,CAAC;YACvB,EAAE,CAAC,cAAc,CAAC,EAAE,CAAC,CAAC;YACtB,GAAG,CAAC,oBAAoB,CAAC,EAAE,CAAC,CAAC;YAC7B,gEAAgE;YAChE,GAAG,CAAC,eAAe,CAAC,EAAE,CAAC,CAAC;YACxB,EAAE,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC;YAChC,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;QAChB,OAAO,CAAC,GAAG,CAAC,aAAa,OAAO,gBAAgB,CAAC,CAAC;IACpD,CAAC;IAED,yCAAyC;IACzC,MAAM,UAAU,GAAqB,EAAE,CAAC;IACxC,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,MAAM,QAAQ,IAAI,WAAW,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;QAClC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC;QAEpD,8CAA8C;QAC9C,MAAM,UAAU,GAAG,EAAE,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QAC7C,IAAI,UAAU,IAAI,UAAU,CAAC,KAAK,KAAK,KAAK,EAAE,CAAC;YAC7C,OAAO,EAAE,CAAC;YACV,SAAS;QACX,CAAC;QAED,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAC5C,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACjE,MAAM,OAAO,GAAG,SAAS;YACvB,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,EAAE,OAAO,CAAC;YAC7C,CAAC,CAAC,QAAQ,CAAC;QACb,MAAM,EAAE,GAAG,QAAQ,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QAErC,MAAM,MAAM,GAAG,aAAa,CAAC,EAAE,EAAE,OAAO,EAAE,GAAG,CAAC,CAAC;QAC/C,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAExB,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,IAAI,CAAC,MAAM,CAAC,IAAI;YAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,IAAI,CAAC,MAAM,CAAC,IAAI;YAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,IAAI,CAAC,MAAM,CAAC,MAAM;YAAE,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC3C,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,OAAO,CAAC,IAAI,CAAC,cAAc,EAAE,yBAAyB,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC9E,CAAC;QAED,uEAAuE;QACvE,IAAI,EAAE,CAAC,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC;YACjC,GAAG,CAAC,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAChC,CAAC;QAED,EAAE,CAAC,cAAc,CAAC;YAChB,EAAE,EAAE,MAAM,CAAC,EAAE;YACb,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,KAAK,EAAE,MAAM,CAAC,KAAK;YACnB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,OAAO,EAAE,MAAM,CAAC,OAAO;SACxB,CAAC,CAAC;QAEH,oCAAoC;QACpC,GAAG,CAAC,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAE9B,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3B,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC;QACrC,CAAC;QAED,4EAA4E;QAC5E,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,+CAA+C,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAE9E,6EAA6E;QAC7E,gFAAgF;QAChF,8EAA8E;QAC9E,8CAA8C;QAC9C,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,aAAa,EAAE,CAAC;YACvC,EAAE,CAAC,kBAAkB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACpC,EAAE,CAAC,eAAe,CAAC,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;QAC3D,CAAC;QAED,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;YACvC,EAAE,CAAC,kBAAkB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACrC,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QAC5E,CAAC;QAED,uEAAuE;QACvE,0EAA0E;QAC1E,qCAAqC;QACrC,EAAE;QACF,oEAAoE;QACpE,0EAA0E;QAC1E,0EAA0E;QAC1E,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,0CAA0C,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACzE,GAAG,CAAC,oBAAoB,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACpC,gEAAgE;QAChE,GAAG,CAAC,eAAe,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAE/B,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,OAAO,CAAC,CAAC;YAC9E,MAAM,WAAW,GAAG,EAAE,CAAC,EAAE,CAAC,OAAO,CAC/B,4GAA4G,CAC7G,CAAC;YACF,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,MAAM,OAAO,GAAG,GAAG,MAAM,CAAC,EAAE,KAAK,KAAK,CAAC,KAAK,EAAE,CAAC;gBAC/C,WAAW,CAAC,GAAG,CACb,OAAO,EACP,MAAM,CAAC,EAAE,EACT,KAAK,CAAC,KAAK,EACX,KAAK,CAAC,OAAO,EACb,KAAK,CAAC,SAAS,EACf,KAAK,CAAC,OAAO,CACd,CAAC;gBACF,GAAG,CAAC,eAAe,CAAC,OAAO,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;YAChD,CAAC;QACH,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,OAAO,CAAC,IAAI,CAAC,mBAAmB,EAAE,KAAM,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;QACjE,CAAC;QAED,EAAE,CAAC,gBAAgB,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QAEpC,OAAO,EAAE,CAAC;QACV,IAAI,OAAO,GAAG,EAAE,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,CAAC,GAAG,CAAC,KAAK,OAAO,IAAI,WAAW,CAAC,MAAM,UAAU,CAAC,CAAC;QAC5D,CAAC;IACH,CAAC;IAED,qFAAqF;IACrF,8FAA8F;IAC9F,IAAI,mBAAmB,EAAE,CAAC;QACxB,GAAG,CAAC,YAAY,EAAE,CAAC;IACrB,CAAC;IAED,gGAAgG;IAChG,MAAM,YAAY,GAAG,IAAI,GAAG,CACzB,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,8CAA8C,CAAC,CAAC,GAAG,EAAmC;SAClG,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CACzB,CAAC;IAEF,uEAAuE;IACvE,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,MAAM,QAAQ,IAAI,YAAY,EAAE,CAAC;QACpC,IAAI,CAAC,EAAE,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC;YACjC,EAAE,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC;YAChC,SAAS,EAAE,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,aAAa,SAAS,sCAAsC,CAAC,CAAC;IAE1E,IAAI,CAAC;QACH,IAAI,QAAQ,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAChC,OAAO,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC;YACzC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;YAAS,CAAC;QACT,OAAO,CAAC,GAAG,CAAC,SAAS,OAAO,uBAAuB,OAAO,uBAAuB,CAAC,CAAC;QACnF,EAAE,CAAC,KAAK,EAAE,CAAC;IACb,CAAC;AACH,CAAC;AAED,MAAM,eAAe,GAAG,IAAI,CAAC,OAAO,EAAE,EAAE,aAAa,EAAE,cAAc,CAAC,CAAC;AAYvE;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,WAAW;IACzB,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACtC,MAAM,UAAU,GAAG,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC;IACrD,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;IAC5D,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IACtD,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IAE3D,MAAM,MAAM,GAAG,UAAU,EAAE,CAAC;IAE5B,MAAM,aAAa,GAAG,GAAW,EAAE,CACjC,KAAK;QACL,OAAO,CAAC,GAAG,CAAC,kBAAkB;QAC9B,MAAM,CAAC,MAAM;QACb,eAAe,CAAC;IAElB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;QACrC,OAAO;YACL,IAAI,EAAE,OAAO;YACb,MAAM,EAAE,aAAa,EAAE;YACvB,QAAQ,EAAE,CAAC,UAAU;YACrB,MAAM,EAAE,KAAK;YACb,MAAM;SACP,CAAC;IACJ,CAAC;IAED,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC;IACjD,IAAI,OAAO,EAAE,CAAC;QACZ,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACrE,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;YACrC,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,MAAM,EAAE,aAAa,EAAE;gBACvB,QAAQ,EAAE,CAAC,UAAU;gBACrB,MAAM,EAAE,KAAK;gBACb,MAAM;aACP,CAAC;QACJ,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5C,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;QACxC,OAAO;YACL,IAAI,EAAE,MAAM,CAAC,KAAK;YAClB,MAAM,EAAE,aAAa,EAAE;YACvB,QAAQ,EAAE,CAAC,UAAU;YACrB,MAAM,EAAE,QAAQ;YAChB,MAAM;SACP,CAAC;IACJ,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;IAC1C,OAAO;QACL,IAAI,EAAE,CAAC,gBAAgB,CAAC;QACxB,MAAM,EAAE,aAAa,EAAE;QACvB,QAAQ,EAAE,CAAC,UAAU;QACrB,MAAM,EAAE,UAAU;QAClB,MAAM;KACP,CAAC;AACJ,CAAC;AAED,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,YAAY,CAAC,CAAC;AACvD,IAAI,MAAM,EAAE,CAAC;IACX,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzD,OAAO,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,CAAC,cAAc,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;AAC9E,CAAC"}
@@ -2,6 +2,12 @@ import type { KnowledgeDB } from "./db.js";
2
2
  export interface VectorResult {
3
3
  id: string;
4
4
  distance: number;
5
+ /**
6
+ * Chunk content populated via LEFT JOIN to `chunks` table when the vec id
7
+ * matches a chunk row. When the vec id is doc-level (back-compat / legacy
8
+ * fixtures) or no matching chunks row exists, this is `null`.
9
+ */
10
+ content?: string | null;
5
11
  }
6
12
  export declare class VectorSearch {
7
13
  private knowledgeDb;
@@ -12,5 +18,15 @@ export declare class VectorSearch {
12
18
  dropIndex(): void;
13
19
  upsertEmbedding(id: string, embedding: Float32Array): void;
14
20
  deleteEmbedding(id: string): void;
21
+ /**
22
+ * Delete all chunk-level vec rows for a document. Chunk ids follow the
23
+ * pattern `${docId}#c${index}` so we match via a SQLite GLOB.
24
+ *
25
+ * This is used by reindex to drop stale chunks when a source markdown file
26
+ * has been deleted or modified. Complements `ON DELETE CASCADE` on the
27
+ * `chunks` table (which deletes chunk rows but not their vec counterparts,
28
+ * because the vec0 virtual table does not participate in FK cascades).
29
+ */
30
+ deleteChunkVecsByDoc(docId: string): void;
15
31
  search(queryEmbedding: Float32Array, limit?: number): VectorResult[];
16
32
  }
@@ -42,13 +42,32 @@ export class VectorSearch {
42
42
  .prepare("DELETE FROM documents_vec WHERE id = ?")
43
43
  .run(id);
44
44
  }
45
+ /**
46
+ * Delete all chunk-level vec rows for a document. Chunk ids follow the
47
+ * pattern `${docId}#c${index}` so we match via a SQLite GLOB.
48
+ *
49
+ * This is used by reindex to drop stale chunks when a source markdown file
50
+ * has been deleted or modified. Complements `ON DELETE CASCADE` on the
51
+ * `chunks` table (which deletes chunk rows but not their vec counterparts,
52
+ * because the vec0 virtual table does not participate in FK cascades).
53
+ */
54
+ deleteChunkVecsByDoc(docId) {
55
+ this.ensureVecLoaded();
56
+ this.knowledgeDb.db
57
+ .prepare("DELETE FROM documents_vec WHERE id GLOB ?")
58
+ .run(`${docId}#c*`);
59
+ }
45
60
  search(queryEmbedding, limit = 10) {
46
61
  this.ensureVecLoaded();
47
62
  const buf = float32ToBuffer(queryEmbedding);
63
+ // LEFT JOIN to `chunks` so chunk-level vec rows surface their content.
64
+ // Doc-level vec ids (no matching chunks row) return content = NULL, which
65
+ // preserves back-compat for pre-chunks callers and legacy test fixtures.
48
66
  return this.knowledgeDb.db
49
67
  .prepare(`
50
- SELECT id, distance
68
+ SELECT documents_vec.id, distance, chunks.content
51
69
  FROM documents_vec
70
+ LEFT JOIN chunks ON chunks.id = documents_vec.id
52
71
  WHERE embedding MATCH ? AND k = ?
53
72
  ORDER BY distance
54
73
  `)
@@ -1 +1 @@
1
- {"version":3,"file":"vector-search.js","sourceRoot":"","sources":["../src/vector-search.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,SAAS,MAAM,YAAY,CAAC;AAQxC,SAAS,eAAe,CAAC,GAAiB;IACxC,OAAO,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,UAAU,CAAC,CAAC;AACjE,CAAC;AAED,MAAM,OAAO,YAAY;IAGH;IAFZ,SAAS,GAAG,KAAK,CAAC;IAE1B,YAAoB,WAAwB;QAAxB,gBAAW,GAAX,WAAW,CAAa;IAAG,CAAC;IAExC,eAAe;QACrB,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC;YACpC,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACxB,CAAC;IACH,CAAC;IAED,WAAW;QACT,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,IAAI,CAAC;;;;;KAKxB,CAAC,CAAC;IACL,CAAC;IAED,SAAS;QACP,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;IACjE,CAAC;IAED,eAAe,CAAC,EAAU,EAAE,SAAuB;QACjD,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,eAAe,CAAC,SAAS,CAAC,CAAC;QACvC,IAAI,CAAC,WAAW,CAAC,EAAE;aAChB,OAAO,CAAC,wCAAwC,CAAC;aACjD,GAAG,CAAC,EAAE,CAAC,CAAC;QACX,IAAI,CAAC,WAAW,CAAC,EAAE;aAChB,OAAO,CAAC,yDAAyD,CAAC;aAClE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;IAClB,CAAC;IAED,eAAe,CAAC,EAAU;QACxB,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,IAAI,CAAC,WAAW,CAAC,EAAE;aAChB,OAAO,CAAC,wCAAwC,CAAC;aACjD,GAAG,CAAC,EAAE,CAAC,CAAC;IACb,CAAC;IAED,MAAM,CAAC,cAA4B,EAAE,QAAgB,EAAE;QACrD,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,eAAe,CAAC,cAAc,CAAC,CAAC;QAC5C,OAAO,IAAI,CAAC,WAAW,CAAC,EAAE;aACvB,OAAO,CACN;;;;;KAKH,CACE;aACA,GAAG,CAAC,GAAG,EAAE,KAAK,CAAmB,CAAC;IACvC,CAAC;CACF"}
1
+ {"version":3,"file":"vector-search.js","sourceRoot":"","sources":["../src/vector-search.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,SAAS,MAAM,YAAY,CAAC;AAcxC,SAAS,eAAe,CAAC,GAAiB;IACxC,OAAO,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,UAAU,CAAC,CAAC;AACjE,CAAC;AAED,MAAM,OAAO,YAAY;IAGH;IAFZ,SAAS,GAAG,KAAK,CAAC;IAE1B,YAAoB,WAAwB;QAAxB,gBAAW,GAAX,WAAW,CAAa;IAAG,CAAC;IAExC,eAAe;QACrB,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC;YACpC,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACxB,CAAC;IACH,CAAC;IAED,WAAW;QACT,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,IAAI,CAAC;;;;;KAKxB,CAAC,CAAC;IACL,CAAC;IAED,SAAS;QACP,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;IACjE,CAAC;IAED,eAAe,CAAC,EAAU,EAAE,SAAuB;QACjD,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,eAAe,CAAC,SAAS,CAAC,CAAC;QACvC,IAAI,CAAC,WAAW,CAAC,EAAE;aAChB,OAAO,CAAC,wCAAwC,CAAC;aACjD,GAAG,CAAC,EAAE,CAAC,CAAC;QACX,IAAI,CAAC,WAAW,CAAC,EAAE;aAChB,OAAO,CAAC,yDAAyD,CAAC;aAClE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;IAClB,CAAC;IAED,eAAe,CAAC,EAAU;QACxB,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,IAAI,CAAC,WAAW,CAAC,EAAE;aAChB,OAAO,CAAC,wCAAwC,CAAC;aACjD,GAAG,CAAC,EAAE,CAAC,CAAC;IACb,CAAC;IAED;;;;;;;;OAQG;IACH,oBAAoB,CAAC,KAAa;QAChC,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,IAAI,CAAC,WAAW,CAAC,EAAE;aAChB,OAAO,CAAC,2CAA2C,CAAC;aACpD,GAAG,CAAC,GAAG,KAAK,KAAK,CAAC,CAAC;IACxB,CAAC;IAED,MAAM,CAAC,cAA4B,EAAE,QAAgB,EAAE;QACrD,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,eAAe,CAAC,cAAc,CAAC,CAAC;QAC5C,uEAAuE;QACvE,0EAA0E;QAC1E,yEAAyE;QACzE,OAAO,IAAI,CAAC,WAAW,CAAC,EAAE;aACvB,OAAO,CACN;;;;;;KAMH,CACE;aACA,GAAG,CAAC,GAAG,EAAE,KAAK,CAAmB,CAAC;IACvC,CAAC;CACF"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ralph-hero-knowledge-index",
3
- "version": "0.1.23",
3
+ "version": "0.1.25",
4
4
  "type": "module",
5
5
  "main": "dist/index.js",
6
6
  "bin": {
@@ -1,5 +1,20 @@
1
- import { describe, it, expect } from "vitest";
2
- import { prepareTextForEmbedding } from "../embedder.js";
1
+ import { describe, it, expect, vi, beforeEach } from "vitest";
2
+
3
+ // Mock @huggingface/transformers so we don't need to load the real ONNX
4
+ // model during unit tests. The fake pipeline returns a constant 384-dim
5
+ // vector; we track call count via the `embedCalls` array below.
6
+ const embedCalls: string[] = [];
7
+ vi.mock("@huggingface/transformers", () => {
8
+ const fakePipeline = async (text: string, _opts: unknown) => {
9
+ embedCalls.push(text);
10
+ return { data: new Float32Array(384) };
11
+ };
12
+ return {
13
+ pipeline: vi.fn(async () => fakePipeline),
14
+ };
15
+ });
16
+
17
+ import { prepareTextForEmbedding, embedDocument } from "../embedder.js";
3
18
 
4
19
  describe("prepareTextForEmbedding", () => {
5
20
  it("includes title, tags, and first paragraph", () => {
@@ -40,14 +55,15 @@ describe("prepareTextForEmbedding", () => {
40
55
  expect(result).not.toContain("\n\n");
41
56
  });
42
57
 
43
- it("truncates at MAX_CHARS (500) total", () => {
58
+ it("no longer truncates at 500 chars (MAX_CHARS removed)", () => {
44
59
  const longParagraph = "A".repeat(600);
45
60
  const result = prepareTextForEmbedding(
46
61
  "Title",
47
62
  ["tag1", "tag2"],
48
63
  longParagraph,
49
64
  );
50
- expect(result.length).toBe(500);
65
+ // Title (5) + \n + tag1, tag2 (10) + \n + 600 A's = 617 chars
66
+ expect(result.length).toBe(617);
51
67
  expect(result.startsWith("Title\ntag1, tag2\n")).toBe(true);
52
68
  });
53
69
 
@@ -98,3 +114,86 @@ describe("prepareTextForEmbedding", () => {
98
114
  expect(result).toBe("My Title\ngraphology, search\nFirst paragraph.");
99
115
  });
100
116
  });
117
+
118
+ describe("embedDocument", () => {
119
+ beforeEach(() => {
120
+ embedCalls.length = 0;
121
+ });
122
+
123
+ it("returns exactly one chunk for short content", async () => {
124
+ const result = await embedDocument("Title", ["tag"], "short content");
125
+ expect(result).toHaveLength(1);
126
+ expect(result[0]!.index).toBe(0);
127
+ expect(result[0]!.content).toBe("short content");
128
+ expect(result[0]!.charStart).toBe(0);
129
+ expect(result[0]!.charEnd).toBe("short content".length);
130
+ expect(result[0]!.embedding).toBeInstanceOf(Float32Array);
131
+ });
132
+
133
+ it("embeds with title + tagLine + chunk.content prepended", async () => {
134
+ await embedDocument("My Title", ["graphology", "search"], "body text");
135
+ expect(embedCalls).toHaveLength(1);
136
+ expect(embedCalls[0]).toBe("My Title\ngraphology, search\nbody text");
137
+ });
138
+
139
+ it("omits empty title/tags/content from the embed input", async () => {
140
+ await embedDocument("", [], "only content here");
141
+ expect(embedCalls).toContain("only content here");
142
+
143
+ embedCalls.length = 0;
144
+ await embedDocument("Just Title", [], "");
145
+ // Empty content -> one chunk with empty string, only title is non-empty.
146
+ expect(embedCalls).toContain("Just Title");
147
+ });
148
+
149
+ it("yields >= 4 chunks for an 8K-char document", async () => {
150
+ const longContent = "A".repeat(8000);
151
+ const result = await embedDocument("Title", [], longContent);
152
+ expect(result.length).toBeGreaterThanOrEqual(4);
153
+ // Each chunk gets its own embedding.
154
+ expect(embedCalls).toHaveLength(result.length);
155
+ });
156
+
157
+ it("produces Float32Array embeddings of length 384", async () => {
158
+ const result = await embedDocument("T", [], "hello world");
159
+ expect(result[0]!.embedding).toBeInstanceOf(Float32Array);
160
+ expect(result[0]!.embedding.length).toBe(384);
161
+ });
162
+
163
+ it("chunk indexes are monotonically increasing from 0", async () => {
164
+ const longContent = "word ".repeat(3000); // ~15K chars, many chunks
165
+ const result = await embedDocument("T", [], longContent);
166
+ expect(result.length).toBeGreaterThan(1);
167
+ for (let i = 0; i < result.length; i++) {
168
+ expect(result[i]!.index).toBe(i);
169
+ }
170
+ });
171
+
172
+ it("chunk offsets reconstruct the original content", async () => {
173
+ const content = "A".repeat(5000);
174
+ const result = await embedDocument("T", [], content);
175
+ for (const chunk of result) {
176
+ expect(content.slice(chunk.charStart, chunk.charEnd)).toBe(chunk.content);
177
+ }
178
+ });
179
+
180
+ it("empty content yields one chunk with empty content (anchors on title/tags)", async () => {
181
+ const result = await embedDocument("Just Title", ["some-tag"], "");
182
+ expect(result).toHaveLength(1);
183
+ expect(result[0]!.content).toBe("");
184
+ expect(result[0]!.charStart).toBe(0);
185
+ expect(result[0]!.charEnd).toBe(0);
186
+ // Still got embedded using title + tag.
187
+ expect(embedCalls).toContain("Just Title\nsome-tag");
188
+ });
189
+
190
+ it("respects custom chunker options", async () => {
191
+ const content = "A".repeat(500);
192
+ const result = await embedDocument("T", [], content, {
193
+ chunkSize: 100,
194
+ chunkOverlap: 10,
195
+ });
196
+ // With chunkSize=100 over 500 chars, we expect multiple chunks.
197
+ expect(result.length).toBeGreaterThan(1);
198
+ });
199
+ });