ralph-hero-knowledge-index 0.1.21 → 0.1.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/.mcp.json +1 -1
- package/README.md +109 -0
- package/dist/config.d.ts +32 -0
- package/dist/config.js +75 -0
- package/dist/config.js.map +1 -0
- package/dist/db.d.ts +7 -0
- package/dist/db.js +17 -0
- package/dist/db.js.map +1 -1
- package/dist/embedder.d.ts +27 -0
- package/dist/embedder.js +43 -4
- package/dist/embedder.js.map +1 -1
- package/dist/file-scanner.d.ts +13 -1
- package/dist/file-scanner.js +30 -3
- package/dist/file-scanner.js.map +1 -1
- package/dist/hybrid-search.d.ts +12 -0
- package/dist/hybrid-search.js +74 -5
- package/dist/hybrid-search.js.map +1 -1
- package/dist/ignore.d.ts +29 -0
- package/dist/ignore.js +65 -0
- package/dist/ignore.js.map +1 -0
- package/dist/index.d.ts +9 -1
- package/dist/index.js +166 -6
- package/dist/index.js.map +1 -1
- package/dist/llm-client.d.ts +41 -0
- package/dist/llm-client.js +98 -0
- package/dist/llm-client.js.map +1 -0
- package/dist/reindex.d.ts +22 -3
- package/dist/reindex.js +85 -13
- package/dist/reindex.js.map +1 -1
- package/dist/search.d.ts +12 -0
- package/dist/search.js +15 -1
- package/dist/search.js.map +1 -1
- package/dist/vector-search.d.ts +10 -0
- package/dist/vector-search.js +15 -0
- package/dist/vector-search.js.map +1 -1
- package/package.json +2 -1
- package/src/__tests__/config.test.ts +173 -0
- package/src/__tests__/embedder.test.ts +103 -4
- package/src/__tests__/file-scanner.test.ts +88 -0
- package/src/__tests__/hybrid-search.test.ts +107 -0
- package/src/__tests__/ignore.test.ts +86 -0
- package/src/__tests__/index.test.ts +450 -0
- package/src/__tests__/llm-client.test.ts +349 -0
- package/src/__tests__/memory-stats.test.ts +204 -0
- package/src/__tests__/reindex.test.ts +187 -11
- package/src/__tests__/search.test.ts +37 -0
- package/src/config.ts +105 -0
- package/src/db.ts +17 -0
- package/src/embedder.ts +61 -4
- package/src/file-scanner.ts +28 -3
- package/src/hybrid-search.ts +88 -5
- package/src/ignore.ts +82 -0
- package/src/index.ts +202 -7
- package/src/llm-client.ts +136 -0
- package/src/reindex.ts +115 -14
- package/src/search.ts +27 -1
- package/src/vector-search.ts +16 -0
package/dist/reindex.js
CHANGED
|
@@ -4,11 +4,13 @@ import { homedir } from "node:os";
|
|
|
4
4
|
import { KnowledgeDB } from "./db.js";
|
|
5
5
|
import { FtsSearch } from "./search.js";
|
|
6
6
|
import { VectorSearch } from "./vector-search.js";
|
|
7
|
-
import {
|
|
7
|
+
import { embedDocument } from "./embedder.js";
|
|
8
8
|
import { parseDocument } from "./parser.js";
|
|
9
9
|
import { findMarkdownFiles } from "./file-scanner.js";
|
|
10
10
|
import { generateIndexes } from "./generate-indexes.js";
|
|
11
|
-
|
|
11
|
+
import { loadConfig } from "./config.js";
|
|
12
|
+
import { loadIgnoreForRoot } from "./ignore.js";
|
|
13
|
+
export async function reindex(dirs, dbPath, generate = false, ignorePatterns) {
|
|
12
14
|
console.log(`Indexing ${dirs.join(", ")} -> ${dbPath}`);
|
|
13
15
|
const db = new KnowledgeDB(dbPath);
|
|
14
16
|
const fts = new FtsSearch(db);
|
|
@@ -28,13 +30,17 @@ export async function reindex(dirs, dbPath, generate = false) {
|
|
|
28
30
|
// Phase 1: Discover files on disk
|
|
29
31
|
const filesOnDisk = [];
|
|
30
32
|
for (const dir of dirs) {
|
|
31
|
-
const
|
|
33
|
+
const matcher = loadIgnoreForRoot(dir, ignorePatterns);
|
|
34
|
+
const found = findMarkdownFiles(dir, matcher);
|
|
32
35
|
console.log(` ${dir}: ${found.length} files`);
|
|
33
36
|
filesOnDisk.push(...found);
|
|
34
37
|
}
|
|
35
38
|
console.log(`Found ${filesOnDisk.length} total markdown files`);
|
|
36
39
|
const filesOnDiskSet = new Set(filesOnDisk.map(f => resolve(f)));
|
|
37
|
-
// Phase 1: Delete stale entries for files no longer on disk
|
|
40
|
+
// Phase 1: Delete stale entries for files no longer on disk.
|
|
41
|
+
// Chunk rows cascade from documents via ON DELETE CASCADE on chunks.document_id,
|
|
42
|
+
// but the vec0 virtual table does not participate in FK cascades — we must
|
|
43
|
+
// explicitly delete chunk-level vec rows via GLOB pattern.
|
|
38
44
|
const syncedPaths = db.getAllSyncPaths();
|
|
39
45
|
let deleted = 0;
|
|
40
46
|
for (const syncedPath of syncedPaths) {
|
|
@@ -42,6 +48,8 @@ export async function reindex(dirs, dbPath, generate = false) {
|
|
|
42
48
|
const id = basename(syncedPath, ".md");
|
|
43
49
|
fts.deleteFtsEntry(id);
|
|
44
50
|
db.deleteDocument(id);
|
|
51
|
+
vec.deleteChunkVecsByDoc(id);
|
|
52
|
+
// Also delete any legacy doc-level vec row (pre-chunks schema).
|
|
45
53
|
vec.deleteEmbedding(id);
|
|
46
54
|
db.deleteSyncRecord(syncedPath);
|
|
47
55
|
deleted++;
|
|
@@ -114,10 +122,25 @@ export async function reindex(dirs, dbPath, generate = false) {
|
|
|
114
122
|
db.upsertStubDocument(edge.targetId);
|
|
115
123
|
db.addRelationship(edge.sourceId, edge.targetId, "untyped", edge.context);
|
|
116
124
|
}
|
|
117
|
-
|
|
125
|
+
// Chunk-aware embedding: emit one embedding per chunk, persist to both
|
|
126
|
+
// the `chunks` table and the `documents_vec` virtual table with chunk ids
|
|
127
|
+
// of the form `${doc.id}#c${index}`.
|
|
128
|
+
//
|
|
129
|
+
// We first clear any stale chunk rows for this doc_id (the document
|
|
130
|
+
// body may have shrunk across re-indexes) and stale chunk vec rows (which
|
|
131
|
+
// don't cascade from the `chunks` table because vec0 is a virtual table).
|
|
132
|
+
db.db.prepare("DELETE FROM chunks WHERE document_id = ?").run(parsed.id);
|
|
133
|
+
vec.deleteChunkVecsByDoc(parsed.id);
|
|
134
|
+
// Drop any pre-chunks schema vec row that used the bare doc id.
|
|
135
|
+
vec.deleteEmbedding(parsed.id);
|
|
118
136
|
try {
|
|
119
|
-
const
|
|
120
|
-
|
|
137
|
+
const chunks = await embedDocument(parsed.title, parsed.tags, parsed.content);
|
|
138
|
+
const insertChunk = db.db.prepare("INSERT INTO chunks (id, document_id, chunk_index, content, char_start, char_end) VALUES (?, ?, ?, ?, ?, ?)");
|
|
139
|
+
for (const chunk of chunks) {
|
|
140
|
+
const chunkId = `${parsed.id}#c${chunk.index}`;
|
|
141
|
+
insertChunk.run(chunkId, parsed.id, chunk.index, chunk.content, chunk.charStart, chunk.charEnd);
|
|
142
|
+
vec.upsertEmbedding(chunkId, chunk.embedding);
|
|
143
|
+
}
|
|
121
144
|
}
|
|
122
145
|
catch (e) {
|
|
123
146
|
console.warn(`Failed to embed ${id}: ${e.message}`);
|
|
@@ -158,28 +181,77 @@ export async function reindex(dirs, dbPath, generate = false) {
|
|
|
158
181
|
}
|
|
159
182
|
}
|
|
160
183
|
const DEFAULT_DB_PATH = join(homedir(), ".ralph-hero", "knowledge.db");
|
|
184
|
+
/**
|
|
185
|
+
* Resolve the set of roots, database path, and generate flag for a reindex
|
|
186
|
+
* run. Precedence (highest to lowest):
|
|
187
|
+
* 1. CLI positional args
|
|
188
|
+
* 2. `RALPH_KNOWLEDGE_DIRS` environment variable
|
|
189
|
+
* 3. `config.roots` from `~/.ralph/knowledge.config.json`
|
|
190
|
+
* 4. `"../../thoughts"` fallback
|
|
191
|
+
*
|
|
192
|
+
* `dbPath` precedence is independent: CLI `.db` positional > `RALPH_KNOWLEDGE_DB`
|
|
193
|
+
* env var > `config.dbPath` > {@link DEFAULT_DB_PATH}.
|
|
194
|
+
*
|
|
195
|
+
* The returned `config` is forwarded to the caller so `ignorePatterns` can be
|
|
196
|
+
* threaded into {@link reindex}.
|
|
197
|
+
*/
|
|
161
198
|
export function resolveDirs() {
|
|
162
199
|
const cliArgs = process.argv.slice(2);
|
|
163
200
|
const noGenerate = cliArgs.includes("--no-generate");
|
|
164
201
|
const positional = cliArgs.filter(a => !a.startsWith("--"));
|
|
165
202
|
const cliDb = positional.find(a => a.endsWith(".db"));
|
|
166
203
|
const cliDirs = positional.filter(a => !a.endsWith(".db"));
|
|
204
|
+
const config = loadConfig();
|
|
205
|
+
const resolveDbPath = () => cliDb ??
|
|
206
|
+
process.env.RALPH_KNOWLEDGE_DB ??
|
|
207
|
+
config.dbPath ??
|
|
208
|
+
DEFAULT_DB_PATH;
|
|
167
209
|
if (cliDirs.length > 0) {
|
|
168
|
-
|
|
210
|
+
console.log("Using roots from: CLI");
|
|
211
|
+
return {
|
|
212
|
+
dirs: cliDirs,
|
|
213
|
+
dbPath: resolveDbPath(),
|
|
214
|
+
generate: !noGenerate,
|
|
215
|
+
source: "cli",
|
|
216
|
+
config,
|
|
217
|
+
};
|
|
169
218
|
}
|
|
170
219
|
const envDirs = process.env.RALPH_KNOWLEDGE_DIRS;
|
|
171
220
|
if (envDirs) {
|
|
221
|
+
const parsed = envDirs.split(",").map(d => d.trim()).filter(Boolean);
|
|
222
|
+
if (parsed.length > 0) {
|
|
223
|
+
console.log("Using roots from: env");
|
|
224
|
+
return {
|
|
225
|
+
dirs: parsed,
|
|
226
|
+
dbPath: resolveDbPath(),
|
|
227
|
+
generate: !noGenerate,
|
|
228
|
+
source: "env",
|
|
229
|
+
config,
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
if (config.roots && config.roots.length > 0) {
|
|
234
|
+
console.log("Using roots from: config");
|
|
172
235
|
return {
|
|
173
|
-
dirs:
|
|
174
|
-
dbPath:
|
|
236
|
+
dirs: config.roots,
|
|
237
|
+
dbPath: resolveDbPath(),
|
|
175
238
|
generate: !noGenerate,
|
|
239
|
+
source: "config",
|
|
240
|
+
config,
|
|
176
241
|
};
|
|
177
242
|
}
|
|
178
|
-
|
|
243
|
+
console.log("Using roots from: fallback");
|
|
244
|
+
return {
|
|
245
|
+
dirs: ["../../thoughts"],
|
|
246
|
+
dbPath: resolveDbPath(),
|
|
247
|
+
generate: !noGenerate,
|
|
248
|
+
source: "fallback",
|
|
249
|
+
config,
|
|
250
|
+
};
|
|
179
251
|
}
|
|
180
252
|
const isMain = process.argv[1]?.endsWith("reindex.js");
|
|
181
253
|
if (isMain) {
|
|
182
|
-
const { dirs, dbPath, generate } = resolveDirs();
|
|
183
|
-
reindex(dirs, dbPath, generate).catch(console.error);
|
|
254
|
+
const { dirs, dbPath, generate, config } = resolveDirs();
|
|
255
|
+
reindex(dirs, dbPath, generate, config.ignorePatterns).catch(console.error);
|
|
184
256
|
}
|
|
185
257
|
//# sourceMappingURL=reindex.js.map
|
package/dist/reindex.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"reindex.js","sourceRoot":"","sources":["../src/reindex.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACjD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAC9D,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AACtC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"reindex.js","sourceRoot":"","sources":["../src/reindex.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACjD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAC9D,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AACtC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,EAAE,aAAa,EAAuB,MAAM,aAAa,CAAC;AACjE,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACtD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,UAAU,EAAwB,MAAM,aAAa,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAEhD,MAAM,CAAC,KAAK,UAAU,OAAO,CAC3B,IAAc,EACd,MAAc,EACd,WAAoB,KAAK,EACzB,cAAyB;IAEzB,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,MAAM,EAAE,CAAC,CAAC;IAExD,MAAM,EAAE,GAAG,IAAI,WAAW,CAAC,MAAM,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,IAAI,SAAS,CAAC,EAAE,CAAC,CAAC;IAC9B,GAAG,CAAC,WAAW,EAAE,CAAC;IAClB,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,EAAE,CAAC,CAAC;IACjC,GAAG,CAAC,WAAW,EAAE,CAAC;IAElB,8EAA8E;IAC9E,MAAM,cAAc,GAAG,GAAG,CAAC;IAC3B,MAAM,cAAc,GAAG,EAAE,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;IACpD,IAAI,mBAAmB,GAAG,KAAK,CAAC;IAChC,IAAI,cAAc,KAAK,cAAc,EAAE,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,uEAAuE,CAAC,CAAC;QACrF,EAAE,CAAC,gBAAgB,EAAE,CAAC;QACtB,EAAE,CAAC,OAAO,CAAC,gBAAgB,EAAE,cAAc,CAAC,CAAC;QAC7C,mBAAmB,GAAG,IAAI,CAAC;IAC7B,CAAC;IAED,kCAAkC;IAClC,MAAM,WAAW,GAAa,EAAE,CAAC;IACjC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,OAAO,GAAG,iBAAiB,CAAC,GAAG,EAAE,cAAc,CAAC,CAAC;QACvD,MAAM,KAAK,GAAG,iBAAiB,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QAC9C,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,KAAK,KAAK,CAAC,MAAM,QAAQ,CAAC,CAAC;QAC/C,WAAW,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC;IAC7B,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,SAAS,WAAW,CAAC,MAAM,uBAAuB,CAAC,CAAC;IAEhE,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAEjE,6DAA6D;IAC7D,iFAAiF;IACjF,2EAA2E;IAC3E,2DAA2D;IAC3D,MAAM,WAAW,GAAG,EAAE,CAAC,eAAe,EAAE,CAAC;IACzC,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;QACrC,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;YACpC,MAAM,EAAE,GAAG,QAAQ,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;YACvC,GAAG,CAAC,cAAc,CAAC,EAAE,CAAC,CAAC;YACvB,EAAE,CAAC,cAAc,CAAC,EAAE,CAAC,CAAC;YACtB,GAAG,CAAC,oBAAoB,CAAC,EAAE,CAAC,CAAC;YAC7B,gEAAgE;YAChE,GAAG,CAAC,eAAe,CAAC,EAAE,CAAC,CAAC;YACxB,EAAE,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC;YAChC,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;QAChB,OAAO,CAAC,GAAG,CAAC,aAAa,OAAO,gBAAgB,CAAC,CAAC;IACpD,CAAC;IAED,yCAAyC;IACzC,MAAM,UAAU,GAAqB,EAAE,CAAC;IACxC,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,MAAM,QAAQ,IAAI,WAAW,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;QAClC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC;QAEpD,8CAA8C;QAC9C,MAAM,UAAU,GAAG,EAAE,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QAC7C,IAAI,UAAU,IAAI,UAAU,CAAC,KAAK,KAAK,KAAK,EAAE,CAAC;YAC7C,OAAO,EAAE,CAAC;YACV,SAAS;QACX,CAAC;QAED,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAC5C,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACjE,MAAM,OAAO,GAAG,SAAS;YACvB,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,EAAE,OAAO,CAAC;YAC7C,CAAC,CAAC,QAAQ,CAAC;QACb,MAAM,EAAE,GAAG,QAAQ,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QAErC,MAAM,MAAM,GAAG,aAAa,CAAC,EAAE,EAAE,OAAO,EAAE,GAAG,CAAC,CAAC;QAC/C,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAExB,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,IAAI,CAAC,MAAM,CAAC,IAAI;YAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,IAAI,CAAC,MAAM,CAAC,IAAI;YAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,IAAI,CAAC,MAAM,CAAC,MAAM;YAAE,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC3C,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,OAAO,CAAC,IAAI,CAAC,cAAc,EAAE,yBAAyB,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC9E,CAAC;QAED,uEAAuE;QACvE,IAAI,EAAE,CAAC,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC;YACjC,GAAG,CAAC,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAChC,CAAC;QAED,EAAE,CAAC,cAAc,CAAC;YAChB,EAAE,EAAE,MAAM,CAAC,EAAE;YACb,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,KAAK,EAAE,MAAM,CAAC,KAAK;YACnB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,OAAO,EAAE,MAAM,CAAC,OAAO;SACxB,CAAC,CAAC;QAEH,oCAAoC;QACpC,GAAG,CAAC,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAE9B,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3B,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC;QACrC,CAAC;QAED,4EAA4E;QAC5E,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,+CAA+C,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAE9E,6EAA6E;QAC7E,gFAAgF;QAChF,8EAA8E;QAC9E,8CAA8C;QAC9C,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,aAAa,EAAE,CAAC;YACvC,EAAE,CAAC,kBAAkB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACpC,EAAE,CAAC,eAAe,CAAC,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;QAC3D,CAAC;QAED,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;YACvC,EAAE,CAAC,kBAAkB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACrC,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QAC5E,CAAC;QAED,uEAAuE;QACvE,0EAA0E;QAC1E,qCAAqC;QACrC,EAAE;QACF,oEAAoE;QACpE,0EAA0E;QAC1E,0EAA0E;QAC1E,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,0CAA0C,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACzE,GAAG,CAAC,oBAAoB,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACpC,gEAAgE;QAChE,GAAG,CAAC,eAAe,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAE/B,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,OAAO,CAAC,CAAC;YAC9E,MAAM,WAAW,GAAG,EAAE,CAAC,EAAE,CAAC,OAAO,CAC/B,4GAA4G,CAC7G,CAAC;YACF,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,MAAM,OAAO,GAAG,GAAG,MAAM,CAAC,EAAE,KAAK,KAAK,CAAC,KAAK,EAAE,CAAC;gBAC/C,WAAW,CAAC,GAAG,CACb,OAAO,EACP,MAAM,CAAC,EAAE,EACT,KAAK,CAAC,KAAK,EACX,KAAK,CAAC,OAAO,EACb,KAAK,CAAC,SAAS,EACf,KAAK,CAAC,OAAO,CACd,CAAC;gBACF,GAAG,CAAC,eAAe,CAAC,OAAO,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;YAChD,CAAC;QACH,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,OAAO,CAAC,IAAI,CAAC,mBAAmB,EAAE,KAAM,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;QACjE,CAAC;QAED,EAAE,CAAC,gBAAgB,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QAEpC,OAAO,EAAE,CAAC;QACV,IAAI,OAAO,GAAG,EAAE,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,CAAC,GAAG,CAAC,KAAK,OAAO,IAAI,WAAW,CAAC,MAAM,UAAU,CAAC,CAAC;QAC5D,CAAC;IACH,CAAC;IAED,qFAAqF;IACrF,8FAA8F;IAC9F,IAAI,mBAAmB,EAAE,CAAC;QACxB,GAAG,CAAC,YAAY,EAAE,CAAC;IACrB,CAAC;IAED,gGAAgG;IAChG,MAAM,YAAY,GAAG,IAAI,GAAG,CACzB,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,8CAA8C,CAAC,CAAC,GAAG,EAAmC;SAClG,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CACzB,CAAC;IAEF,uEAAuE;IACvE,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,MAAM,QAAQ,IAAI,YAAY,EAAE,CAAC;QACpC,IAAI,CAAC,EAAE,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC;YACjC,EAAE,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC;YAChC,SAAS,EAAE,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,aAAa,SAAS,sCAAsC,CAAC,CAAC;IAE1E,IAAI,CAAC;QACH,IAAI,QAAQ,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAChC,OAAO,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC;YACzC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;YAAS,CAAC;QACT,OAAO,CAAC,GAAG,CAAC,SAAS,OAAO,uBAAuB,OAAO,uBAAuB,CAAC,CAAC;QACnF,EAAE,CAAC,KAAK,EAAE,CAAC;IACb,CAAC;AACH,CAAC;AAED,MAAM,eAAe,GAAG,IAAI,CAAC,OAAO,EAAE,EAAE,aAAa,EAAE,cAAc,CAAC,CAAC;AAYvE;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,WAAW;IACzB,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACtC,MAAM,UAAU,GAAG,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC;IACrD,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;IAC5D,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IACtD,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IAE3D,MAAM,MAAM,GAAG,UAAU,EAAE,CAAC;IAE5B,MAAM,aAAa,GAAG,GAAW,EAAE,CACjC,KAAK;QACL,OAAO,CAAC,GAAG,CAAC,kBAAkB;QAC9B,MAAM,CAAC,MAAM;QACb,eAAe,CAAC;IAElB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;QACrC,OAAO;YACL,IAAI,EAAE,OAAO;YACb,MAAM,EAAE,aAAa,EAAE;YACvB,QAAQ,EAAE,CAAC,UAAU;YACrB,MAAM,EAAE,KAAK;YACb,MAAM;SACP,CAAC;IACJ,CAAC;IAED,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC;IACjD,IAAI,OAAO,EAAE,CAAC;QACZ,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACrE,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;YACrC,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,MAAM,EAAE,aAAa,EAAE;gBACvB,QAAQ,EAAE,CAAC,UAAU;gBACrB,MAAM,EAAE,KAAK;gBACb,MAAM;aACP,CAAC;QACJ,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5C,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;QACxC,OAAO;YACL,IAAI,EAAE,MAAM,CAAC,KAAK;YAClB,MAAM,EAAE,aAAa,EAAE;YACvB,QAAQ,EAAE,CAAC,UAAU;YACrB,MAAM,EAAE,QAAQ;YAChB,MAAM;SACP,CAAC;IACJ,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;IAC1C,OAAO;QACL,IAAI,EAAE,CAAC,gBAAgB,CAAC;QACxB,MAAM,EAAE,aAAa,EAAE;QACvB,QAAQ,EAAE,CAAC,UAAU;QACrB,MAAM,EAAE,UAAU;QAClB,MAAM;KACP,CAAC;AACJ,CAAC;AAED,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,YAAY,CAAC,CAAC;AACvD,IAAI,MAAM,EAAE,CAAC;IACX,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzD,OAAO,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,CAAC,cAAc,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;AAC9E,CAAC"}
|
package/dist/search.d.ts
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import type { KnowledgeDB } from "./db.js";
|
|
2
|
+
export type MemoryTier = "doc" | "raw" | "reflection" | "any";
|
|
2
3
|
export interface SearchOptions {
|
|
3
4
|
type?: string;
|
|
4
5
|
tags?: string[];
|
|
5
6
|
includeSuperseded?: boolean;
|
|
6
7
|
limit?: number;
|
|
8
|
+
memoryTier?: MemoryTier;
|
|
7
9
|
}
|
|
8
10
|
export interface SearchResult {
|
|
9
11
|
id: string;
|
|
@@ -14,6 +16,11 @@ export interface SearchResult {
|
|
|
14
16
|
date: string | null;
|
|
15
17
|
score: number;
|
|
16
18
|
snippet: string;
|
|
19
|
+
chunkIndex?: number;
|
|
20
|
+
charStart?: number;
|
|
21
|
+
charEnd?: number;
|
|
22
|
+
contextPrefix?: string;
|
|
23
|
+
bestChunkId?: string;
|
|
17
24
|
}
|
|
18
25
|
export declare class FtsSearch {
|
|
19
26
|
private readonly db;
|
|
@@ -37,5 +44,10 @@ export declare class FtsSearch {
|
|
|
37
44
|
ensureTable(): void;
|
|
38
45
|
rebuildIndex(): void;
|
|
39
46
|
private escapeFts5Query;
|
|
47
|
+
/**
|
|
48
|
+
* Returns true when the `documents.memory_tier` column exists (schema v3+).
|
|
49
|
+
* On v2 schemas this is false and the memoryTier filter is silently ignored.
|
|
50
|
+
*/
|
|
51
|
+
private memoryTierColumnExists;
|
|
40
52
|
search(query: string, options?: SearchOptions): SearchResult[];
|
|
41
53
|
}
|
package/dist/search.js
CHANGED
|
@@ -66,8 +66,18 @@ export class FtsSearch {
|
|
|
66
66
|
return '""';
|
|
67
67
|
return tokens.map(t => '"' + t.replace(/"/g, '""') + '"').join(" ");
|
|
68
68
|
}
|
|
69
|
+
/**
|
|
70
|
+
* Returns true when the `documents.memory_tier` column exists (schema v3+).
|
|
71
|
+
* On v2 schemas this is false and the memoryTier filter is silently ignored.
|
|
72
|
+
*/
|
|
73
|
+
memoryTierColumnExists() {
|
|
74
|
+
const rows = this.db.db
|
|
75
|
+
.prepare("PRAGMA table_info(documents)")
|
|
76
|
+
.all();
|
|
77
|
+
return rows.some((r) => r.name === "memory_tier");
|
|
78
|
+
}
|
|
69
79
|
search(query, options = {}) {
|
|
70
|
-
const { type, tags, includeSuperseded = false, limit = 20 } = options;
|
|
80
|
+
const { type, tags, includeSuperseded = false, limit = 20, memoryTier } = options;
|
|
71
81
|
const conditions = ["documents_fts MATCH @query"];
|
|
72
82
|
const params = { query: this.escapeFts5Query(query), limit };
|
|
73
83
|
if (!includeSuperseded) {
|
|
@@ -77,6 +87,10 @@ export class FtsSearch {
|
|
|
77
87
|
conditions.push("d.type = @type");
|
|
78
88
|
params.type = type;
|
|
79
89
|
}
|
|
90
|
+
if (memoryTier && memoryTier !== "any" && this.memoryTierColumnExists()) {
|
|
91
|
+
conditions.push("d.memory_tier = @memoryTier");
|
|
92
|
+
params.memoryTier = memoryTier;
|
|
93
|
+
}
|
|
80
94
|
let joinClause = "";
|
|
81
95
|
if (tags && tags.length > 0) {
|
|
82
96
|
joinClause = "JOIN tags t ON t.doc_id = d.id";
|
package/dist/search.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"search.js","sourceRoot":"","sources":["../src/search.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"search.js","sourceRoot":"","sources":["../src/search.ts"],"names":[],"mappings":"AA8BA,MAAM,OAAO,SAAS;IACH,EAAE,CAAc;IAEjC,YAAY,EAAe;QACzB,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC;IACf,CAAC;IAED;;;;OAIG;IACH,cAAc,CAAC,KAAa;QAC1B,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC,OAAO,CAC5B,gEAAgE,CACjE,CAAC,GAAG,CAAC,KAAK,CAAgF,CAAC;QAC5F,IAAI,CAAC,GAAG;YAAE,OAAO;QACjB,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC,OAAO,CAChB,oGAAoG,CACrG,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC;IACrD,CAAC;IAED;;;OAGG;IACH,cAAc,CAAC,KAAa;QAC1B,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC,OAAO,CAC5B,gEAAgE,CACjE,CAAC,GAAG,CAAC,KAAK,CAAgF,CAAC;QAC5F,IAAI,CAAC,GAAG;YAAE,OAAO;QACjB,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC,OAAO,CAChB,2EAA2E,CAC5E,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC;IACrD,CAAC;IAED;;;;OAIG;IACH,WAAW;QACT,4DAA4D;QAC5D,MAAM,MAAM,GAAG,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC,OAAO,CAC/B,4EAA4E,CAC7E,CAAC,GAAG,EAAE,CAAC;QACR,IAAI,MAAM;YAAE,OAAO;QACnB,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC;;;;;;;;KAQf,CAAC,CAAC;IACL,CAAC;IAED,YAAY;QACV,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;QACtD,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC;;;;;;;;KAQf,CAAC,CAAC;QACH,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC;;;KAGf,CAAC,CAAC;IACL,CAAC;IAEO,eAAe,CAAC,GAAW;QACjC,MAAM,MAAM,GAAG,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAChD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,IAAI,CAAC;QACrC,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACtE,CAAC;IAED;;;OAGG;IACK,sBAAsB;QAC5B,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,EAAE;aACpB,OAAO,CAAC,8BAA8B,CAAC;aACvC,GAAG,EAA6B,CAAC;QACpC,OAAO,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,aAAa,CAAC,CAAC;IACpD,CAAC;IAED,MAAM,CAAC,KAAa,EAAE,UAAyB,EAAE;QAC/C,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,iBAAiB,GAAG,KAAK,EAAE,KAAK,GAAG,EAAE,EAAE,UAAU,EAAE,GAAG,OAAO,CAAC;QAElF,MAAM,UAAU,GAAa,CAAC,4BAA4B,CAAC,CAAC;QAC5D,MAAM,MAAM,GAA4B,EAAE,KAAK,EAAE,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC,EAAE,KAAK,EAAE,CAAC;QAEtF,IAAI,CAAC,iBAAiB,EAAE,CAAC;YACvB,UAAU,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;QAClD,CAAC;QAED,IAAI,IAAI,EAAE,CAAC;YACT,UAAU,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;YAClC,MAAM,CAAC,IAAI,GAAG,IAAI,CAAC;QACrB,CAAC;QAED,IAAI,UAAU,IAAI,UAAU,KAAK,KAAK,IAAI,IAAI,CAAC,sBAAsB,EAAE,EAAE,CAAC;YACxE,UAAU,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;YAC/C,MAAM,CAAC,UAAU,GAAG,UAAU,CAAC;QACjC,CAAC;QAED,IAAI,UAAU,GAAG,EAAE,CAAC;QACpB,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,UAAU,GAAG,gCAAgC,CAAC;YAC9C,MAAM,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;YACvD,UAAU,CAAC,IAAI,CAAC,aAAa,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAC5D,IAAI,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE;gBACtB,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC;YAC1B,CAAC,CAAC,CAAC;QACL,CAAC;QAED,MAAM,WAAW,GAAG,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAE7C,MAAM,GAAG,GAAG;;;;;;;;;;;;QAYR,UAAU;cACJ,WAAW;;;KAGpB,CAAC;QAEF,OAAO,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAmB,CAAC;IAC/D,CAAC;CACF"}
|
package/dist/vector-search.d.ts
CHANGED
|
@@ -12,5 +12,15 @@ export declare class VectorSearch {
|
|
|
12
12
|
dropIndex(): void;
|
|
13
13
|
upsertEmbedding(id: string, embedding: Float32Array): void;
|
|
14
14
|
deleteEmbedding(id: string): void;
|
|
15
|
+
/**
|
|
16
|
+
* Delete all chunk-level vec rows for a document. Chunk ids follow the
|
|
17
|
+
* pattern `${docId}#c${index}` so we match via a SQLite GLOB.
|
|
18
|
+
*
|
|
19
|
+
* This is used by reindex to drop stale chunks when a source markdown file
|
|
20
|
+
* has been deleted or modified. Complements `ON DELETE CASCADE` on the
|
|
21
|
+
* `chunks` table (which deletes chunk rows but not their vec counterparts,
|
|
22
|
+
* because the vec0 virtual table does not participate in FK cascades).
|
|
23
|
+
*/
|
|
24
|
+
deleteChunkVecsByDoc(docId: string): void;
|
|
15
25
|
search(queryEmbedding: Float32Array, limit?: number): VectorResult[];
|
|
16
26
|
}
|
package/dist/vector-search.js
CHANGED
|
@@ -42,6 +42,21 @@ export class VectorSearch {
|
|
|
42
42
|
.prepare("DELETE FROM documents_vec WHERE id = ?")
|
|
43
43
|
.run(id);
|
|
44
44
|
}
|
|
45
|
+
/**
|
|
46
|
+
* Delete all chunk-level vec rows for a document. Chunk ids follow the
|
|
47
|
+
* pattern `${docId}#c${index}` so we match via a SQLite GLOB.
|
|
48
|
+
*
|
|
49
|
+
* This is used by reindex to drop stale chunks when a source markdown file
|
|
50
|
+
* has been deleted or modified. Complements `ON DELETE CASCADE` on the
|
|
51
|
+
* `chunks` table (which deletes chunk rows but not their vec counterparts,
|
|
52
|
+
* because the vec0 virtual table does not participate in FK cascades).
|
|
53
|
+
*/
|
|
54
|
+
deleteChunkVecsByDoc(docId) {
|
|
55
|
+
this.ensureVecLoaded();
|
|
56
|
+
this.knowledgeDb.db
|
|
57
|
+
.prepare("DELETE FROM documents_vec WHERE id GLOB ?")
|
|
58
|
+
.run(`${docId}#c*`);
|
|
59
|
+
}
|
|
45
60
|
search(queryEmbedding, limit = 10) {
|
|
46
61
|
this.ensureVecLoaded();
|
|
47
62
|
const buf = float32ToBuffer(queryEmbedding);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vector-search.js","sourceRoot":"","sources":["../src/vector-search.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,SAAS,MAAM,YAAY,CAAC;AAQxC,SAAS,eAAe,CAAC,GAAiB;IACxC,OAAO,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,UAAU,CAAC,CAAC;AACjE,CAAC;AAED,MAAM,OAAO,YAAY;IAGH;IAFZ,SAAS,GAAG,KAAK,CAAC;IAE1B,YAAoB,WAAwB;QAAxB,gBAAW,GAAX,WAAW,CAAa;IAAG,CAAC;IAExC,eAAe;QACrB,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC;YACpC,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACxB,CAAC;IACH,CAAC;IAED,WAAW;QACT,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,IAAI,CAAC;;;;;KAKxB,CAAC,CAAC;IACL,CAAC;IAED,SAAS;QACP,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;IACjE,CAAC;IAED,eAAe,CAAC,EAAU,EAAE,SAAuB;QACjD,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,eAAe,CAAC,SAAS,CAAC,CAAC;QACvC,IAAI,CAAC,WAAW,CAAC,EAAE;aAChB,OAAO,CAAC,wCAAwC,CAAC;aACjD,GAAG,CAAC,EAAE,CAAC,CAAC;QACX,IAAI,CAAC,WAAW,CAAC,EAAE;aAChB,OAAO,CAAC,yDAAyD,CAAC;aAClE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;IAClB,CAAC;IAED,eAAe,CAAC,EAAU;QACxB,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,IAAI,CAAC,WAAW,CAAC,EAAE;aAChB,OAAO,CAAC,wCAAwC,CAAC;aACjD,GAAG,CAAC,EAAE,CAAC,CAAC;IACb,CAAC;IAED,MAAM,CAAC,cAA4B,EAAE,QAAgB,EAAE;QACrD,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,eAAe,CAAC,cAAc,CAAC,CAAC;QAC5C,OAAO,IAAI,CAAC,WAAW,CAAC,EAAE;aACvB,OAAO,CACN;;;;;KAKH,CACE;aACA,GAAG,CAAC,GAAG,EAAE,KAAK,CAAmB,CAAC;IACvC,CAAC;CACF"}
|
|
1
|
+
{"version":3,"file":"vector-search.js","sourceRoot":"","sources":["../src/vector-search.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,SAAS,MAAM,YAAY,CAAC;AAQxC,SAAS,eAAe,CAAC,GAAiB;IACxC,OAAO,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,UAAU,CAAC,CAAC;AACjE,CAAC;AAED,MAAM,OAAO,YAAY;IAGH;IAFZ,SAAS,GAAG,KAAK,CAAC;IAE1B,YAAoB,WAAwB;QAAxB,gBAAW,GAAX,WAAW,CAAa;IAAG,CAAC;IAExC,eAAe;QACrB,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC;YACpC,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACxB,CAAC;IACH,CAAC;IAED,WAAW;QACT,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,IAAI,CAAC;;;;;KAKxB,CAAC,CAAC;IACL,CAAC;IAED,SAAS;QACP,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;IACjE,CAAC;IAED,eAAe,CAAC,EAAU,EAAE,SAAuB;QACjD,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,eAAe,CAAC,SAAS,CAAC,CAAC;QACvC,IAAI,CAAC,WAAW,CAAC,EAAE;aAChB,OAAO,CAAC,wCAAwC,CAAC;aACjD,GAAG,CAAC,EAAE,CAAC,CAAC;QACX,IAAI,CAAC,WAAW,CAAC,EAAE;aAChB,OAAO,CAAC,yDAAyD,CAAC;aAClE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;IAClB,CAAC;IAED,eAAe,CAAC,EAAU;QACxB,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,IAAI,CAAC,WAAW,CAAC,EAAE;aAChB,OAAO,CAAC,wCAAwC,CAAC;aACjD,GAAG,CAAC,EAAE,CAAC,CAAC;IACb,CAAC;IAED;;;;;;;;OAQG;IACH,oBAAoB,CAAC,KAAa;QAChC,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,IAAI,CAAC,WAAW,CAAC,EAAE;aAChB,OAAO,CAAC,2CAA2C,CAAC;aACpD,GAAG,CAAC,GAAG,KAAK,KAAK,CAAC,CAAC;IACxB,CAAC;IAED,MAAM,CAAC,cAA4B,EAAE,QAAgB,EAAE;QACrD,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,eAAe,CAAC,cAAc,CAAC,CAAC;QAC5C,OAAO,IAAI,CAAC,WAAW,CAAC,EAAE;aACvB,OAAO,CACN;;;;;KAKH,CACE;aACA,GAAG,CAAC,GAAG,EAAE,KAAK,CAAmB,CAAC;IACvC,CAAC;CACF"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ralph-hero-knowledge-index",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.24",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -31,6 +31,7 @@
|
|
|
31
31
|
"graphology-simple-path": "^0.2.0",
|
|
32
32
|
"graphology-traversal": "^0.3.1",
|
|
33
33
|
"graphology-types": "^0.24.8",
|
|
34
|
+
"ignore": "^5.3.2",
|
|
34
35
|
"sqlite-vec": "^0.1.7-alpha.10",
|
|
35
36
|
"yaml": "^2.7.0",
|
|
36
37
|
"zod": "^3.25.0"
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
|
|
2
|
+
import { mkdtempSync, writeFileSync, mkdirSync, existsSync, rmSync } from "node:fs";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { tmpdir, homedir } from "node:os";
|
|
5
|
+
import { loadConfig, expandHome, resolveConfigPath } from "../config.js";
|
|
6
|
+
|
|
7
|
+
describe("expandHome", () => {
|
|
8
|
+
it("returns input unchanged when it does not start with ~", () => {
|
|
9
|
+
expect(expandHome("/absolute/path")).toBe("/absolute/path");
|
|
10
|
+
expect(expandHome("relative/path")).toBe("relative/path");
|
|
11
|
+
expect(expandHome("")).toBe("");
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
it("expands a lone ~", () => {
|
|
15
|
+
expect(expandHome("~")).toBe(homedir());
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
it("expands ~/ prefix to homedir/rest", () => {
|
|
19
|
+
expect(expandHome("~/thoughts")).toBe(join(homedir(), "thoughts"));
|
|
20
|
+
expect(expandHome("~/foo/bar")).toBe(join(homedir(), "foo/bar"));
|
|
21
|
+
});
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
describe("loadConfig", () => {
|
|
25
|
+
let originalEnv: string | undefined;
|
|
26
|
+
let tmpDir: string;
|
|
27
|
+
|
|
28
|
+
beforeEach(() => {
|
|
29
|
+
originalEnv = process.env.RALPH_KNOWLEDGE_CONFIG;
|
|
30
|
+
tmpDir = mkdtempSync(join(tmpdir(), "ralph-config-"));
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
afterEach(() => {
|
|
34
|
+
if (originalEnv === undefined) {
|
|
35
|
+
delete process.env.RALPH_KNOWLEDGE_CONFIG;
|
|
36
|
+
} else {
|
|
37
|
+
process.env.RALPH_KNOWLEDGE_CONFIG = originalEnv;
|
|
38
|
+
}
|
|
39
|
+
if (existsSync(tmpDir)) {
|
|
40
|
+
rmSync(tmpDir, { recursive: true, force: true });
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
it("returns {} when the config file is missing", () => {
|
|
45
|
+
// Point env var at a nonexistent path so we don't read the real ~/.ralph file.
|
|
46
|
+
process.env.RALPH_KNOWLEDGE_CONFIG = join(tmpDir, "nope.json");
|
|
47
|
+
expect(loadConfig()).toEqual({});
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
it("returns {} and warns on malformed JSON", () => {
|
|
51
|
+
const configPath = join(tmpDir, "broken.json");
|
|
52
|
+
writeFileSync(configPath, "{ not: valid json");
|
|
53
|
+
process.env.RALPH_KNOWLEDGE_CONFIG = configPath;
|
|
54
|
+
const warn = vi.spyOn(console, "warn").mockImplementation(() => {});
|
|
55
|
+
const result = loadConfig();
|
|
56
|
+
expect(result).toEqual({});
|
|
57
|
+
expect(warn).toHaveBeenCalledTimes(1);
|
|
58
|
+
const msg = warn.mock.calls[0][0] as string;
|
|
59
|
+
expect(msg).toContain("Malformed JSON");
|
|
60
|
+
warn.mockRestore();
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it("returns {} and warns when top-level is not an object", () => {
|
|
64
|
+
const configPath = join(tmpDir, "array.json");
|
|
65
|
+
writeFileSync(configPath, JSON.stringify(["not", "an", "object"]));
|
|
66
|
+
process.env.RALPH_KNOWLEDGE_CONFIG = configPath;
|
|
67
|
+
const warn = vi.spyOn(console, "warn").mockImplementation(() => {});
|
|
68
|
+
expect(loadConfig()).toEqual({});
|
|
69
|
+
expect(warn).toHaveBeenCalledTimes(1);
|
|
70
|
+
warn.mockRestore();
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
it("expands ~ prefixes in roots[] to absolute paths", () => {
|
|
74
|
+
const configPath = join(tmpDir, "tilde.json");
|
|
75
|
+
writeFileSync(
|
|
76
|
+
configPath,
|
|
77
|
+
JSON.stringify({ roots: ["~/thoughts", "/absolute/dir"] }),
|
|
78
|
+
);
|
|
79
|
+
process.env.RALPH_KNOWLEDGE_CONFIG = configPath;
|
|
80
|
+
const cfg = loadConfig();
|
|
81
|
+
expect(cfg.roots).toEqual([
|
|
82
|
+
join(homedir(), "thoughts"),
|
|
83
|
+
"/absolute/dir",
|
|
84
|
+
]);
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
it("expands ~ in dbPath", () => {
|
|
88
|
+
const configPath = join(tmpDir, "db.json");
|
|
89
|
+
writeFileSync(
|
|
90
|
+
configPath,
|
|
91
|
+
JSON.stringify({ dbPath: "~/.ralph-hero/knowledge.db" }),
|
|
92
|
+
);
|
|
93
|
+
process.env.RALPH_KNOWLEDGE_CONFIG = configPath;
|
|
94
|
+
const cfg = loadConfig();
|
|
95
|
+
expect(cfg.dbPath).toBe(join(homedir(), ".ralph-hero/knowledge.db"));
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
it("loads ignorePatterns as provided (no expansion)", () => {
|
|
99
|
+
const configPath = join(tmpDir, "ignore.json");
|
|
100
|
+
writeFileSync(
|
|
101
|
+
configPath,
|
|
102
|
+
JSON.stringify({ ignorePatterns: ["**/drafts/**", "*.bak"] }),
|
|
103
|
+
);
|
|
104
|
+
process.env.RALPH_KNOWLEDGE_CONFIG = configPath;
|
|
105
|
+
const cfg = loadConfig();
|
|
106
|
+
expect(cfg.ignorePatterns).toEqual(["**/drafts/**", "*.bak"]);
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
it("honors RALPH_KNOWLEDGE_CONFIG env var override", () => {
|
|
110
|
+
const configPath = join(tmpDir, "override.json");
|
|
111
|
+
writeFileSync(
|
|
112
|
+
configPath,
|
|
113
|
+
JSON.stringify({ roots: ["/x"], ignorePatterns: ["y/**"], dbPath: "/z.db" }),
|
|
114
|
+
);
|
|
115
|
+
process.env.RALPH_KNOWLEDGE_CONFIG = configPath;
|
|
116
|
+
const cfg = loadConfig();
|
|
117
|
+
expect(cfg).toEqual({
|
|
118
|
+
roots: ["/x"],
|
|
119
|
+
ignorePatterns: ["y/**"],
|
|
120
|
+
dbPath: "/z.db",
|
|
121
|
+
});
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
it("drops non-string roots and ignorePatterns entries", () => {
|
|
125
|
+
const configPath = join(tmpDir, "mixed.json");
|
|
126
|
+
writeFileSync(
|
|
127
|
+
configPath,
|
|
128
|
+
JSON.stringify({
|
|
129
|
+
roots: ["/a", 42, null, "/b"],
|
|
130
|
+
ignorePatterns: ["good", 7, "more"],
|
|
131
|
+
}),
|
|
132
|
+
);
|
|
133
|
+
process.env.RALPH_KNOWLEDGE_CONFIG = configPath;
|
|
134
|
+
const cfg = loadConfig();
|
|
135
|
+
expect(cfg.roots).toEqual(["/a", "/b"]);
|
|
136
|
+
expect(cfg.ignorePatterns).toEqual(["good", "more"]);
|
|
137
|
+
});
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
describe("resolveConfigPath", () => {
|
|
141
|
+
let originalEnv: string | undefined;
|
|
142
|
+
|
|
143
|
+
beforeEach(() => {
|
|
144
|
+
originalEnv = process.env.RALPH_KNOWLEDGE_CONFIG;
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
afterEach(() => {
|
|
148
|
+
if (originalEnv === undefined) {
|
|
149
|
+
delete process.env.RALPH_KNOWLEDGE_CONFIG;
|
|
150
|
+
} else {
|
|
151
|
+
process.env.RALPH_KNOWLEDGE_CONFIG = originalEnv;
|
|
152
|
+
}
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
it("defaults to ~/.ralph/knowledge.config.json when env var is unset", () => {
|
|
156
|
+
delete process.env.RALPH_KNOWLEDGE_CONFIG;
|
|
157
|
+
expect(resolveConfigPath()).toBe(
|
|
158
|
+
join(homedir(), ".ralph", "knowledge.config.json"),
|
|
159
|
+
);
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
it("expands ~ prefix in RALPH_KNOWLEDGE_CONFIG", () => {
|
|
163
|
+
process.env.RALPH_KNOWLEDGE_CONFIG = "~/custom/knowledge.json";
|
|
164
|
+
expect(resolveConfigPath()).toBe(
|
|
165
|
+
join(homedir(), "custom/knowledge.json"),
|
|
166
|
+
);
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
it("uses RALPH_KNOWLEDGE_CONFIG absolute path verbatim", () => {
|
|
170
|
+
process.env.RALPH_KNOWLEDGE_CONFIG = "/etc/ralph/knowledge.json";
|
|
171
|
+
expect(resolveConfigPath()).toBe("/etc/ralph/knowledge.json");
|
|
172
|
+
});
|
|
173
|
+
});
|
|
@@ -1,5 +1,20 @@
|
|
|
1
|
-
import { describe, it, expect } from "vitest";
|
|
2
|
-
|
|
1
|
+
import { describe, it, expect, vi, beforeEach } from "vitest";
|
|
2
|
+
|
|
3
|
+
// Mock @huggingface/transformers so we don't need to load the real ONNX
|
|
4
|
+
// model during unit tests. The fake pipeline returns a constant 384-dim
|
|
5
|
+
// vector; we track call count via the `embedCalls` array below.
|
|
6
|
+
const embedCalls: string[] = [];
|
|
7
|
+
vi.mock("@huggingface/transformers", () => {
|
|
8
|
+
const fakePipeline = async (text: string, _opts: unknown) => {
|
|
9
|
+
embedCalls.push(text);
|
|
10
|
+
return { data: new Float32Array(384) };
|
|
11
|
+
};
|
|
12
|
+
return {
|
|
13
|
+
pipeline: vi.fn(async () => fakePipeline),
|
|
14
|
+
};
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
import { prepareTextForEmbedding, embedDocument } from "../embedder.js";
|
|
3
18
|
|
|
4
19
|
describe("prepareTextForEmbedding", () => {
|
|
5
20
|
it("includes title, tags, and first paragraph", () => {
|
|
@@ -40,14 +55,15 @@ describe("prepareTextForEmbedding", () => {
|
|
|
40
55
|
expect(result).not.toContain("\n\n");
|
|
41
56
|
});
|
|
42
57
|
|
|
43
|
-
it("truncates at MAX_CHARS
|
|
58
|
+
it("no longer truncates at 500 chars (MAX_CHARS removed)", () => {
|
|
44
59
|
const longParagraph = "A".repeat(600);
|
|
45
60
|
const result = prepareTextForEmbedding(
|
|
46
61
|
"Title",
|
|
47
62
|
["tag1", "tag2"],
|
|
48
63
|
longParagraph,
|
|
49
64
|
);
|
|
50
|
-
|
|
65
|
+
// Title (5) + \n + tag1, tag2 (10) + \n + 600 A's = 617 chars
|
|
66
|
+
expect(result.length).toBe(617);
|
|
51
67
|
expect(result.startsWith("Title\ntag1, tag2\n")).toBe(true);
|
|
52
68
|
});
|
|
53
69
|
|
|
@@ -98,3 +114,86 @@ describe("prepareTextForEmbedding", () => {
|
|
|
98
114
|
expect(result).toBe("My Title\ngraphology, search\nFirst paragraph.");
|
|
99
115
|
});
|
|
100
116
|
});
|
|
117
|
+
|
|
118
|
+
describe("embedDocument", () => {
|
|
119
|
+
beforeEach(() => {
|
|
120
|
+
embedCalls.length = 0;
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
it("returns exactly one chunk for short content", async () => {
|
|
124
|
+
const result = await embedDocument("Title", ["tag"], "short content");
|
|
125
|
+
expect(result).toHaveLength(1);
|
|
126
|
+
expect(result[0]!.index).toBe(0);
|
|
127
|
+
expect(result[0]!.content).toBe("short content");
|
|
128
|
+
expect(result[0]!.charStart).toBe(0);
|
|
129
|
+
expect(result[0]!.charEnd).toBe("short content".length);
|
|
130
|
+
expect(result[0]!.embedding).toBeInstanceOf(Float32Array);
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
it("embeds with title + tagLine + chunk.content prepended", async () => {
|
|
134
|
+
await embedDocument("My Title", ["graphology", "search"], "body text");
|
|
135
|
+
expect(embedCalls).toHaveLength(1);
|
|
136
|
+
expect(embedCalls[0]).toBe("My Title\ngraphology, search\nbody text");
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
it("omits empty title/tags/content from the embed input", async () => {
|
|
140
|
+
await embedDocument("", [], "only content here");
|
|
141
|
+
expect(embedCalls).toContain("only content here");
|
|
142
|
+
|
|
143
|
+
embedCalls.length = 0;
|
|
144
|
+
await embedDocument("Just Title", [], "");
|
|
145
|
+
// Empty content -> one chunk with empty string, only title is non-empty.
|
|
146
|
+
expect(embedCalls).toContain("Just Title");
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
it("yields >= 4 chunks for an 8K-char document", async () => {
|
|
150
|
+
const longContent = "A".repeat(8000);
|
|
151
|
+
const result = await embedDocument("Title", [], longContent);
|
|
152
|
+
expect(result.length).toBeGreaterThanOrEqual(4);
|
|
153
|
+
// Each chunk gets its own embedding.
|
|
154
|
+
expect(embedCalls).toHaveLength(result.length);
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
it("produces Float32Array embeddings of length 384", async () => {
|
|
158
|
+
const result = await embedDocument("T", [], "hello world");
|
|
159
|
+
expect(result[0]!.embedding).toBeInstanceOf(Float32Array);
|
|
160
|
+
expect(result[0]!.embedding.length).toBe(384);
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
it("chunk indexes are monotonically increasing from 0", async () => {
|
|
164
|
+
const longContent = "word ".repeat(3000); // ~15K chars, many chunks
|
|
165
|
+
const result = await embedDocument("T", [], longContent);
|
|
166
|
+
expect(result.length).toBeGreaterThan(1);
|
|
167
|
+
for (let i = 0; i < result.length; i++) {
|
|
168
|
+
expect(result[i]!.index).toBe(i);
|
|
169
|
+
}
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
it("chunk offsets reconstruct the original content", async () => {
|
|
173
|
+
const content = "A".repeat(5000);
|
|
174
|
+
const result = await embedDocument("T", [], content);
|
|
175
|
+
for (const chunk of result) {
|
|
176
|
+
expect(content.slice(chunk.charStart, chunk.charEnd)).toBe(chunk.content);
|
|
177
|
+
}
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
it("empty content yields one chunk with empty content (anchors on title/tags)", async () => {
|
|
181
|
+
const result = await embedDocument("Just Title", ["some-tag"], "");
|
|
182
|
+
expect(result).toHaveLength(1);
|
|
183
|
+
expect(result[0]!.content).toBe("");
|
|
184
|
+
expect(result[0]!.charStart).toBe(0);
|
|
185
|
+
expect(result[0]!.charEnd).toBe(0);
|
|
186
|
+
// Still got embedded using title + tag.
|
|
187
|
+
expect(embedCalls).toContain("Just Title\nsome-tag");
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
it("respects custom chunker options", async () => {
|
|
191
|
+
const content = "A".repeat(500);
|
|
192
|
+
const result = await embedDocument("T", [], content, {
|
|
193
|
+
chunkSize: 100,
|
|
194
|
+
chunkOverlap: 10,
|
|
195
|
+
});
|
|
196
|
+
// With chunkSize=100 over 500 chars, we expect multiple chunks.
|
|
197
|
+
expect(result.length).toBeGreaterThan(1);
|
|
198
|
+
});
|
|
199
|
+
});
|