@tichopad/notes-query-tool 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +24 -0
- package/dist/main.js +1416 -24
- package/dist/main.js.map +2 -2
- package/package.json +53 -60
package/dist/main.js
CHANGED
|
@@ -1,26 +1,1418 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
2
|
+
|
|
3
|
+
// src/main.ts
|
|
4
|
+
import { defineCommand as defineCommand4, runMain } from "citty";
|
|
5
|
+
|
|
6
|
+
// package.json
|
|
7
|
+
var package_default = {
|
|
8
|
+
name: "@tichopad/notes-query-tool",
|
|
9
|
+
type: "module",
|
|
10
|
+
version: "0.1.2",
|
|
11
|
+
description: "CLI tool for indexing and querying Markdown notes",
|
|
12
|
+
license: "Unlicense",
|
|
13
|
+
repository: {
|
|
14
|
+
url: "https://github.com/tichopad/notes-query-tool"
|
|
15
|
+
},
|
|
16
|
+
engines: {
|
|
17
|
+
node: ">=24"
|
|
18
|
+
},
|
|
19
|
+
packageManager: "pnpm@11.5.1",
|
|
20
|
+
main: "dist/main.js",
|
|
21
|
+
bin: {
|
|
22
|
+
nqt: "dist/main.js"
|
|
23
|
+
},
|
|
24
|
+
files: [
|
|
25
|
+
"dist"
|
|
26
|
+
],
|
|
27
|
+
scripts: {
|
|
28
|
+
"db:delete-stale-lock": "rm ./dbdata/postmaster.pid",
|
|
29
|
+
"db:generate": "drizzle-kit generate",
|
|
30
|
+
"db:query": "node scripts/query.ts",
|
|
31
|
+
"db:push": "drizzle-kit push",
|
|
32
|
+
"db:studio": "drizzle-kit studio",
|
|
33
|
+
"benchdata:load": "pnpm run dev load --glob 'benchdata/**/*.md'",
|
|
34
|
+
"benchdata:reindex": "rm -rf dbdata && pnpm run benchdata:load",
|
|
35
|
+
"testdata:load": "pnpm run dev load --glob 'testdata/**/*.md'",
|
|
36
|
+
"testdata:reindex": "rm -rf dbdata && pnpm run testdata:load",
|
|
37
|
+
dev: "node src/main.ts",
|
|
38
|
+
build: "node scripts/build.ts",
|
|
39
|
+
test: "node --test --test-reporter=spec 'src/**/*.test.ts'",
|
|
40
|
+
bench: "node bench/retrieval.ts",
|
|
41
|
+
check: "biome check . && tsc --noEmit && pnpm run test",
|
|
42
|
+
fix: "biome check --write ."
|
|
43
|
+
},
|
|
44
|
+
dependencies: {
|
|
45
|
+
"@electric-sql/pglite": "0.5.1",
|
|
46
|
+
"@electric-sql/pglite-pgvector": "0.0.2",
|
|
47
|
+
"@huggingface/transformers": "4.2.0",
|
|
48
|
+
citty: "0.2.2",
|
|
49
|
+
consola: "3.4.2",
|
|
50
|
+
"drizzle-orm": "0.45.2",
|
|
51
|
+
marked: "18.0.4",
|
|
52
|
+
yaml: "2.9.0"
|
|
53
|
+
},
|
|
54
|
+
devDependencies: {
|
|
55
|
+
"@biomejs/biome": "2.4.16",
|
|
56
|
+
"@types/node": "24.12.4",
|
|
57
|
+
"drizzle-kit": "0.31.10",
|
|
58
|
+
esbuild: "0.28.0",
|
|
59
|
+
typescript: "6.0.3"
|
|
60
|
+
}
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
// src/commands/drop.ts
|
|
64
|
+
import { createInterface } from "node:readline";
|
|
65
|
+
import { defineCommand } from "citty";
|
|
66
|
+
|
|
67
|
+
// src/database/base-repository.ts
|
|
68
|
+
import { eq } from "drizzle-orm";
|
|
69
|
+
|
|
70
|
+
// src/database/client.ts
|
|
71
|
+
import { PGlite } from "@electric-sql/pglite";
|
|
72
|
+
import { pg_trgm } from "@electric-sql/pglite/contrib/pg_trgm";
|
|
73
|
+
import { unaccent } from "@electric-sql/pglite/contrib/unaccent";
|
|
74
|
+
import { vector } from "@electric-sql/pglite-pgvector";
|
|
75
|
+
import { drizzle } from "drizzle-orm/pglite";
|
|
76
|
+
|
|
77
|
+
// src/config.ts
|
|
78
|
+
import { homedir } from "node:os";
|
|
79
|
+
import { join } from "node:path";
|
|
80
|
+
var MODEL_ID = "onnx-community/embeddinggemma-300m-ONNX";
|
|
81
|
+
var MODEL_DTYPE = "fp32";
|
|
82
|
+
var EMBEDDING_DIMS = 768;
|
|
83
|
+
function getDataDir() {
|
|
84
|
+
const xdg = process.env.XDG_DATA_HOME;
|
|
85
|
+
const base = xdg && xdg.length > 0 ? xdg : join(homedir(), ".local", "share");
|
|
86
|
+
return `${join(base, "nqt")}/`;
|
|
87
|
+
}
|
|
88
|
+
var DB_DATA_DIR = getDataDir();
|
|
89
|
+
var CHUNK_LIMIT_CHARS = 2e3;
|
|
90
|
+
var VECTOR_LIMIT = 30;
|
|
91
|
+
var FTS_LIMIT = 20;
|
|
92
|
+
var TRIGRAM_LIMIT = 20;
|
|
93
|
+
var VECTOR_WEIGHT = 0.3;
|
|
94
|
+
var FTS_WEIGHT = 0.4;
|
|
95
|
+
var TRIGRAM_WEIGHT = 0.3;
|
|
96
|
+
var TRIGRAM_THRESHOLD = 0.3;
|
|
97
|
+
var LINK_BOOST = 0.2;
|
|
98
|
+
var LINK_BOOST_CAP = 0.4;
|
|
99
|
+
var LINK_SOURCE_TOP_N = 10;
|
|
100
|
+
|
|
101
|
+
// src/database/client.ts
|
|
102
|
+
var DB_EXTENSIONS = {
|
|
103
|
+
unaccent,
|
|
104
|
+
vector,
|
|
105
|
+
pg_trgm
|
|
106
|
+
};
|
|
107
|
+
function createDbClient(dataDir = DB_DATA_DIR) {
|
|
108
|
+
const pglite = new PGlite({
|
|
109
|
+
dataDir,
|
|
110
|
+
extensions: DB_EXTENSIONS
|
|
111
|
+
});
|
|
112
|
+
return drizzle({ client: pglite });
|
|
113
|
+
}
|
|
114
|
+
var _db;
|
|
115
|
+
function getDb() {
|
|
116
|
+
if (!_db) {
|
|
117
|
+
_db = createDbClient();
|
|
118
|
+
}
|
|
119
|
+
return _db;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// src/database/schema/bases.ts
|
|
123
|
+
import { sql } from "drizzle-orm";
|
|
124
|
+
import { integer, pgTable, text, timestamp } from "drizzle-orm/pg-core";
|
|
125
|
+
var basesTable = pgTable("bases", {
|
|
126
|
+
id: integer("id").primaryKey().generatedAlwaysAsIdentity(),
|
|
127
|
+
name: text("name").notNull().unique(),
|
|
128
|
+
createdAt: timestamp("created_at").notNull().default(sql`now()`),
|
|
129
|
+
updatedAt: timestamp("updated_at").notNull().default(sql`now()`)
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
// src/database/base-repository.ts
|
|
133
|
+
var DbBaseRepository = class {
|
|
134
|
+
db;
|
|
135
|
+
constructor(db) {
|
|
136
|
+
this.db = db ?? getDb();
|
|
137
|
+
}
|
|
138
|
+
async getBaseByName(name) {
|
|
139
|
+
const [base] = await this.db.select().from(basesTable).where(eq(basesTable.name, name)).limit(1);
|
|
140
|
+
return base;
|
|
141
|
+
}
|
|
142
|
+
async getOrCreateBase(name) {
|
|
143
|
+
const existing = await this.getBaseByName(name);
|
|
144
|
+
if (existing) {
|
|
145
|
+
return existing;
|
|
146
|
+
}
|
|
147
|
+
const [created] = await this.db.insert(basesTable).values({ name }).returning();
|
|
148
|
+
if (!created) {
|
|
149
|
+
throw new Error(`Failed to create base: ${name}`);
|
|
150
|
+
}
|
|
151
|
+
return created;
|
|
152
|
+
}
|
|
153
|
+
async deleteBase(name) {
|
|
154
|
+
await this.db.delete(basesTable).where(eq(basesTable.name, name));
|
|
155
|
+
}
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
// src/commands/drop.ts
|
|
159
|
+
var dropCommand = defineCommand({
|
|
160
|
+
meta: {
|
|
161
|
+
name: "drop",
|
|
162
|
+
description: "Drop a knowledge base and all its indexed data"
|
|
163
|
+
},
|
|
164
|
+
args: {
|
|
165
|
+
base: {
|
|
166
|
+
type: "string",
|
|
167
|
+
description: "Knowledge base name to drop",
|
|
168
|
+
default: "default"
|
|
169
|
+
},
|
|
170
|
+
force: {
|
|
171
|
+
type: "boolean",
|
|
172
|
+
description: "Skip confirmation prompt",
|
|
173
|
+
default: false
|
|
174
|
+
}
|
|
175
|
+
},
|
|
176
|
+
async run({ args }) {
|
|
177
|
+
const baseRepo = new DbBaseRepository();
|
|
178
|
+
const base = await baseRepo.getBaseByName(args.base);
|
|
179
|
+
if (!base) {
|
|
180
|
+
console.error(`Error: Base '${args.base}' not found.`);
|
|
181
|
+
process.exit(1);
|
|
182
|
+
}
|
|
183
|
+
if (!args.force) {
|
|
184
|
+
const confirmed = await confirm(
|
|
185
|
+
`Are you sure you want to drop base '${args.base}'? [y/N] `
|
|
186
|
+
);
|
|
187
|
+
if (!confirmed) {
|
|
188
|
+
console.log("Aborted.");
|
|
189
|
+
return;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
await baseRepo.deleteBase(args.base);
|
|
193
|
+
console.log(`Base '${args.base}' dropped.`);
|
|
194
|
+
}
|
|
195
|
+
});
|
|
196
|
+
function confirm(prompt) {
|
|
197
|
+
return new Promise((resolve) => {
|
|
198
|
+
const rl = createInterface({
|
|
199
|
+
input: process.stdin,
|
|
200
|
+
output: process.stdout
|
|
201
|
+
});
|
|
202
|
+
rl.question(prompt, (answer) => {
|
|
203
|
+
rl.close();
|
|
204
|
+
resolve(answer.toLowerCase() === "y" || answer.toLowerCase() === "yes");
|
|
205
|
+
});
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// src/commands/load.ts
|
|
210
|
+
import { createHash } from "node:crypto";
|
|
211
|
+
import { readFile } from "node:fs/promises";
|
|
212
|
+
import { defineCommand as defineCommand2 } from "citty";
|
|
213
|
+
|
|
214
|
+
// src/embedder.ts
|
|
215
|
+
import {
|
|
216
|
+
pipeline
|
|
217
|
+
} from "@huggingface/transformers";
|
|
218
|
+
|
|
219
|
+
// src/logger.ts
|
|
220
|
+
import { createConsola } from "consola";
|
|
221
|
+
function resolveLevel() {
|
|
222
|
+
const raw = process.env.NQT_LOG_LEVEL;
|
|
223
|
+
if (raw !== void 0) {
|
|
224
|
+
const n = Number(raw);
|
|
225
|
+
if (Number.isInteger(n) && n >= 0 && n <= 5) {
|
|
226
|
+
return n;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
return 3;
|
|
230
|
+
}
|
|
231
|
+
var _consola = createConsola({ level: resolveLevel() });
|
|
232
|
+
function setLogLevel(level) {
|
|
233
|
+
_consola.level = level;
|
|
234
|
+
}
|
|
235
|
+
var logger = {
|
|
236
|
+
info(message, ...args) {
|
|
237
|
+
_consola.info(message, ...args);
|
|
238
|
+
},
|
|
239
|
+
warn(message, ...args) {
|
|
240
|
+
_consola.warn(message, ...args);
|
|
241
|
+
},
|
|
242
|
+
error(message, ...args) {
|
|
243
|
+
_consola.error(message, ...args);
|
|
244
|
+
},
|
|
245
|
+
debug(message, ...args) {
|
|
246
|
+
_consola.debug(message, ...args);
|
|
247
|
+
},
|
|
248
|
+
trace(message, ...args) {
|
|
249
|
+
_consola.trace(message, ...args);
|
|
250
|
+
}
|
|
251
|
+
};
|
|
252
|
+
|
|
253
|
+
// src/embedder.ts
|
|
254
|
+
async function createEmbedder(device) {
|
|
255
|
+
return await pipeline("feature-extraction", MODEL_ID, {
|
|
256
|
+
device,
|
|
257
|
+
dtype: MODEL_DTYPE,
|
|
258
|
+
progress_callback: (event) => {
|
|
259
|
+
const nameOrFile = event.file || event.model || event.name || "model files";
|
|
260
|
+
if (!process.stderr.isTTY) {
|
|
261
|
+
if (event.status === "init") {
|
|
262
|
+
logger.info(`Starting download: ${nameOrFile}`);
|
|
263
|
+
} else if (event.status === "done") {
|
|
264
|
+
logger.info(`Finished download: ${nameOrFile}`);
|
|
265
|
+
} else if (event.status === "ready") {
|
|
266
|
+
logger.info(`Model ready: ${nameOrFile}`);
|
|
267
|
+
}
|
|
268
|
+
return;
|
|
269
|
+
}
|
|
270
|
+
if (event.status === "progress") {
|
|
271
|
+
const pct = Math.round(event.progress || 0);
|
|
272
|
+
process.stderr.write(`\x1B[2K\rDownloading ${nameOrFile}: ${pct}%`);
|
|
273
|
+
} else if (event.status === "done") {
|
|
274
|
+
process.stderr.write(`\x1B[2K\rDownloaded ${nameOrFile}
|
|
275
|
+
`);
|
|
276
|
+
} else if (event.status === "ready") {
|
|
277
|
+
process.stderr.write(`\x1B[2K\rModel ready
|
|
278
|
+
`);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
});
|
|
282
|
+
}
|
|
283
|
+
function extractVector(data) {
|
|
284
|
+
const vector3 = Array.from(data);
|
|
285
|
+
if (vector3.some((v) => !Number.isFinite(v))) {
|
|
286
|
+
return [];
|
|
287
|
+
}
|
|
288
|
+
return vector3;
|
|
289
|
+
}
|
|
290
|
+
var QUERY_PREFIX = "task: search result | query: ";
|
|
291
|
+
var DOC_PREFIX_PREFIX = "title: ";
|
|
292
|
+
var DOC_PREFIX_INFIX = " | text: ";
|
|
293
|
+
var DEFAULT_TITLE = "none";
|
|
294
|
+
async function initEmbedder() {
|
|
295
|
+
let embed;
|
|
296
|
+
let device = "webgpu";
|
|
297
|
+
try {
|
|
298
|
+
embed = await createEmbedder("webgpu");
|
|
299
|
+
logger.debug("Embedder loaded on WebGPU");
|
|
300
|
+
} catch {
|
|
301
|
+
logger.warn("WebGPU unavailable, using CPU.");
|
|
302
|
+
device = "cpu";
|
|
303
|
+
embed = await createEmbedder("cpu");
|
|
304
|
+
logger.debug("Embedder loaded on CPU");
|
|
305
|
+
}
|
|
306
|
+
async function getEmbedding(text4) {
|
|
307
|
+
const result = await embed(text4, {
|
|
308
|
+
pooling: "mean",
|
|
309
|
+
normalize: true
|
|
310
|
+
});
|
|
311
|
+
const vector3 = extractVector(result.data);
|
|
312
|
+
if (vector3.length > 0) {
|
|
313
|
+
return vector3;
|
|
314
|
+
}
|
|
315
|
+
if (device === "webgpu") {
|
|
316
|
+
logger.warn("WebGPU produced invalid embeddings, falling back to CPU...");
|
|
317
|
+
device = "cpu";
|
|
318
|
+
embed = await createEmbedder("cpu");
|
|
319
|
+
logger.debug("Embedder loaded on CPU");
|
|
320
|
+
return getEmbedding(text4);
|
|
321
|
+
}
|
|
322
|
+
throw new Error("Embedding model produced non-finite values");
|
|
323
|
+
}
|
|
324
|
+
return {
|
|
325
|
+
embedQuery(text4) {
|
|
326
|
+
return getEmbedding(QUERY_PREFIX + text4);
|
|
327
|
+
},
|
|
328
|
+
embedDocument(body, title) {
|
|
329
|
+
const t = title?.trim() || DEFAULT_TITLE;
|
|
330
|
+
return getEmbedding(DOC_PREFIX_PREFIX + t + DOC_PREFIX_INFIX + body);
|
|
331
|
+
},
|
|
332
|
+
async dispose() {
|
|
333
|
+
await embed.dispose();
|
|
334
|
+
}
|
|
335
|
+
};
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
// src/files/chunker.ts
|
|
339
|
+
import { marked } from "marked";
|
|
340
|
+
var MAX_HEADER_LEVEL = 6;
|
|
341
|
+
function chunkMarkdown(md, limit) {
|
|
342
|
+
if (limit <= 0) throw new Error("limit must be > 0");
|
|
343
|
+
if (md.length === 0) return [];
|
|
344
|
+
if (md.trim().length === 0) return [];
|
|
345
|
+
const pos = lexWithOffsets(md);
|
|
346
|
+
if (pos.length === 0) return [];
|
|
347
|
+
const root = buildRootSection(pos);
|
|
348
|
+
const chunks = renderSection(root, [], limit);
|
|
349
|
+
return greedyMerge(chunks, limit);
|
|
350
|
+
}
|
|
351
|
+
function lexWithOffsets(md) {
|
|
352
|
+
const tokens = marked.lexer(md);
|
|
353
|
+
const out = [];
|
|
354
|
+
let cursor = 0;
|
|
355
|
+
for (const t of tokens) {
|
|
356
|
+
const raw = t.raw ?? "";
|
|
357
|
+
const start = cursor;
|
|
358
|
+
const end = cursor + raw.length;
|
|
359
|
+
out.push({ token: t, start, end, raw });
|
|
360
|
+
cursor = end;
|
|
361
|
+
}
|
|
362
|
+
return out;
|
|
363
|
+
}
|
|
364
|
+
function buildRootSection(tokens) {
|
|
365
|
+
return {
|
|
366
|
+
heading: null,
|
|
367
|
+
level: 0,
|
|
368
|
+
body: tokens,
|
|
369
|
+
start: tokens[0]?.start ?? 0,
|
|
370
|
+
end: tokens[tokens.length - 1]?.end ?? 0
|
|
371
|
+
};
|
|
372
|
+
}
|
|
373
|
+
function groupByShallowestHeading(body) {
|
|
374
|
+
let shallowest = MAX_HEADER_LEVEL + 1;
|
|
375
|
+
for (const pt of body) {
|
|
376
|
+
if (isHeading(pt.token)) {
|
|
377
|
+
const lvl = pt.token.depth;
|
|
378
|
+
if (lvl < shallowest) shallowest = lvl;
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
if (shallowest > MAX_HEADER_LEVEL) return null;
|
|
382
|
+
const sections = [];
|
|
383
|
+
let current = null;
|
|
384
|
+
for (const pt of body) {
|
|
385
|
+
if (isHeading(pt.token) && pt.token.depth === shallowest) {
|
|
386
|
+
if (current) sections.push(finalizeSection(current));
|
|
387
|
+
current = {
|
|
388
|
+
heading: pt,
|
|
389
|
+
level: shallowest,
|
|
390
|
+
body: [],
|
|
391
|
+
start: pt.start,
|
|
392
|
+
end: pt.end
|
|
393
|
+
};
|
|
394
|
+
} else {
|
|
395
|
+
if (!current) {
|
|
396
|
+
current = {
|
|
397
|
+
heading: null,
|
|
398
|
+
level: 0,
|
|
399
|
+
body: [],
|
|
400
|
+
start: pt.start,
|
|
401
|
+
end: pt.end
|
|
402
|
+
};
|
|
403
|
+
}
|
|
404
|
+
current.body.push(pt);
|
|
405
|
+
current.end = pt.end;
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
if (current) sections.push(finalizeSection(current));
|
|
409
|
+
return sections;
|
|
410
|
+
}
|
|
411
|
+
function finalizeSection(s) {
|
|
412
|
+
const last = s.body[s.body.length - 1];
|
|
413
|
+
if (last) {
|
|
414
|
+
s.end = last.end;
|
|
415
|
+
} else if (s.heading) {
|
|
416
|
+
s.end = s.heading.end;
|
|
417
|
+
}
|
|
418
|
+
return s;
|
|
419
|
+
}
|
|
420
|
+
function isHeading(t) {
|
|
421
|
+
return t.type === "heading";
|
|
422
|
+
}
|
|
423
|
+
function renderSection(section, ancestors, limit) {
|
|
424
|
+
const headingLine = section.heading ? headingLineOf(section.heading) : null;
|
|
425
|
+
const childAncestors = headingLine ? [...ancestors, headingLine] : ancestors;
|
|
426
|
+
const bodyStart = section.body[0]?.start ?? section.heading?.end ?? section.start;
|
|
427
|
+
const bodyEnd = section.body[section.body.length - 1]?.end ?? section.heading?.end ?? section.end;
|
|
428
|
+
const bodyText = concatRaw(section.body);
|
|
429
|
+
const ownHeadingPrefix = section.heading ? `${headingLine}
|
|
430
|
+
|
|
431
|
+
` : "";
|
|
432
|
+
const fullBody = trimTrailingNewlines(ownHeadingPrefix + bodyText);
|
|
433
|
+
const assembled = assemble(ancestors, fullBody, limit);
|
|
434
|
+
if (assembled !== null && assembled.text.length <= limit && fullBody.length > 0) {
|
|
435
|
+
return [
|
|
436
|
+
{
|
|
437
|
+
text: assembled.text,
|
|
438
|
+
breadcrumb: assembled.breadcrumb,
|
|
439
|
+
startOffset: section.heading ? section.heading.start : bodyStart,
|
|
440
|
+
endOffset: bodyEnd
|
|
441
|
+
}
|
|
442
|
+
];
|
|
443
|
+
}
|
|
444
|
+
const subs = groupByShallowestHeading(section.body);
|
|
445
|
+
if (subs && subs.length > 0 && subs.some((s) => s.heading !== null)) {
|
|
446
|
+
const out = [];
|
|
447
|
+
for (const sub of subs) {
|
|
448
|
+
if (sub.heading === null) {
|
|
449
|
+
if (sub.body.some((t) => t.raw.trim().length > 0)) {
|
|
450
|
+
out.push(...splitBody(sub.body, childAncestors, limit));
|
|
451
|
+
}
|
|
452
|
+
} else {
|
|
453
|
+
out.push(...renderSection(sub, childAncestors, limit));
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
return out;
|
|
457
|
+
}
|
|
458
|
+
return splitBody(section.body, childAncestors, limit);
|
|
459
|
+
}
|
|
460
|
+
function splitBody(body, ancestors, limit) {
|
|
461
|
+
if (body.length === 0) return [];
|
|
462
|
+
const out = [];
|
|
463
|
+
for (const pt of body) {
|
|
464
|
+
const trimmedRaw = trimTrailingNewlines(pt.raw);
|
|
465
|
+
if (trimmedRaw.trim().length === 0) continue;
|
|
466
|
+
const assembled = assemble(ancestors, trimmedRaw, limit);
|
|
467
|
+
if (assembled !== null && assembled.text.length <= limit) {
|
|
468
|
+
out.push({
|
|
469
|
+
text: assembled.text,
|
|
470
|
+
breadcrumb: assembled.breadcrumb,
|
|
471
|
+
startOffset: pt.start,
|
|
472
|
+
endOffset: pt.end
|
|
473
|
+
});
|
|
474
|
+
} else {
|
|
475
|
+
out.push(...splitOversizeBlock(pt, ancestors, limit));
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
return out;
|
|
479
|
+
}
|
|
480
|
+
function splitOversizeBlock(pt, ancestors, limit) {
|
|
481
|
+
const raw = trimTrailingNewlines(pt.raw);
|
|
482
|
+
const sentences = splitSentences(raw);
|
|
483
|
+
if (sentences.length > 1) {
|
|
484
|
+
return assembleSlices(
|
|
485
|
+
sentences,
|
|
486
|
+
pt.start,
|
|
487
|
+
ancestors,
|
|
488
|
+
limit,
|
|
489
|
+
(remaining) => splitWordsThenChars(
|
|
490
|
+
remaining,
|
|
491
|
+
limit,
|
|
492
|
+
breadcrumbBudgetSize(ancestors, limit)
|
|
493
|
+
)
|
|
494
|
+
);
|
|
495
|
+
}
|
|
496
|
+
return assembleSlicesRaw(
|
|
497
|
+
splitWordsThenChars(raw, limit, breadcrumbBudgetSize(ancestors, limit)),
|
|
498
|
+
pt.start,
|
|
499
|
+
ancestors,
|
|
500
|
+
limit
|
|
501
|
+
);
|
|
502
|
+
}
|
|
503
|
+
function splitSentences(raw) {
|
|
504
|
+
const slices = [];
|
|
505
|
+
const regex = /[^.!?]+[.!?]+(?:\s+|$)/g;
|
|
506
|
+
let lastEnd = 0;
|
|
507
|
+
let m;
|
|
508
|
+
while ((m = regex.exec(raw)) !== null) {
|
|
509
|
+
slices.push({ text: m[0], offset: m.index });
|
|
510
|
+
lastEnd = m.index + m[0].length;
|
|
511
|
+
}
|
|
512
|
+
if (lastEnd < raw.length) {
|
|
513
|
+
slices.push({ text: raw.slice(lastEnd), offset: lastEnd });
|
|
514
|
+
}
|
|
515
|
+
return slices.length === 0 ? [{ text: raw, offset: 0 }] : slices;
|
|
516
|
+
}
|
|
517
|
+
function splitWordsThenChars(raw, limit, budget) {
|
|
518
|
+
const avail = Math.max(1, limit - budget);
|
|
519
|
+
const slices = [];
|
|
520
|
+
const wordRegex = /\S+\s*/g;
|
|
521
|
+
const words = [];
|
|
522
|
+
let m;
|
|
523
|
+
while ((m = wordRegex.exec(raw)) !== null) {
|
|
524
|
+
words.push({ text: m[0], offset: m.index });
|
|
525
|
+
}
|
|
526
|
+
const first = words[0];
|
|
527
|
+
if (!first) return hardSlice(raw, avail, 0);
|
|
528
|
+
let cur = "";
|
|
529
|
+
let curOffset = first.offset;
|
|
530
|
+
for (const w of words) {
|
|
531
|
+
if (w.text.length > avail) {
|
|
532
|
+
if (cur.length > 0) {
|
|
533
|
+
slices.push({ text: cur, offset: curOffset });
|
|
534
|
+
cur = "";
|
|
535
|
+
}
|
|
536
|
+
slices.push(...hardSlice(w.text, avail, w.offset));
|
|
537
|
+
curOffset = w.offset + w.text.length;
|
|
538
|
+
continue;
|
|
539
|
+
}
|
|
540
|
+
if (cur.length + w.text.length > avail) {
|
|
541
|
+
slices.push({ text: cur, offset: curOffset });
|
|
542
|
+
cur = w.text;
|
|
543
|
+
curOffset = w.offset;
|
|
544
|
+
} else {
|
|
545
|
+
if (cur.length === 0) curOffset = w.offset;
|
|
546
|
+
cur += w.text;
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
if (cur.length > 0) slices.push({ text: cur, offset: curOffset });
|
|
550
|
+
return slices;
|
|
551
|
+
}
|
|
552
|
+
function hardSlice(text4, size, baseOffset) {
|
|
553
|
+
const out = [];
|
|
554
|
+
for (let i = 0; i < text4.length; i += size) {
|
|
555
|
+
out.push({ text: text4.slice(i, i + size), offset: baseOffset + i });
|
|
556
|
+
}
|
|
557
|
+
return out;
|
|
558
|
+
}
|
|
559
|
+
function assembleSlices(slices, baseOffset, ancestors, limit, furtherSplit) {
|
|
560
|
+
const budget = breadcrumbBudgetSize(ancestors, limit);
|
|
561
|
+
const avail = Math.max(1, limit - budget);
|
|
562
|
+
const out = [];
|
|
563
|
+
let buf = "";
|
|
564
|
+
let bufOffset = slices[0]?.offset ?? 0;
|
|
565
|
+
for (const s of slices) {
|
|
566
|
+
if (s.text.length > avail) {
|
|
567
|
+
if (buf.length > 0) {
|
|
568
|
+
out.push(emit(buf, baseOffset + bufOffset, ancestors, limit));
|
|
569
|
+
buf = "";
|
|
570
|
+
}
|
|
571
|
+
const finer = furtherSplit(s.text);
|
|
572
|
+
for (const f of finer) {
|
|
573
|
+
out.push(
|
|
574
|
+
emit(f.text, baseOffset + s.offset + f.offset, ancestors, limit)
|
|
575
|
+
);
|
|
576
|
+
}
|
|
577
|
+
continue;
|
|
578
|
+
}
|
|
579
|
+
if (buf.length + s.text.length > avail) {
|
|
580
|
+
out.push(emit(buf, baseOffset + bufOffset, ancestors, limit));
|
|
581
|
+
buf = s.text;
|
|
582
|
+
bufOffset = s.offset;
|
|
583
|
+
} else {
|
|
584
|
+
if (buf.length === 0) bufOffset = s.offset;
|
|
585
|
+
buf += s.text;
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
if (buf.length > 0)
|
|
589
|
+
out.push(emit(buf, baseOffset + bufOffset, ancestors, limit));
|
|
590
|
+
return out;
|
|
591
|
+
}
|
|
592
|
+
function assembleSlicesRaw(slices, baseOffset, ancestors, limit) {
|
|
593
|
+
const out = [];
|
|
594
|
+
for (const s of slices) {
|
|
595
|
+
out.push(emit(s.text, baseOffset + s.offset, ancestors, limit));
|
|
596
|
+
}
|
|
597
|
+
return out;
|
|
598
|
+
}
|
|
599
|
+
function emit(body, startOffset, ancestors, limit) {
|
|
600
|
+
const trimmed = trimTrailingNewlines(body);
|
|
601
|
+
const a = assemble(ancestors, trimmed, limit);
|
|
602
|
+
if (!a)
|
|
603
|
+
throw new Error(
|
|
604
|
+
`emit: assemble returned null for body length ${trimmed.length}, limit ${limit}`
|
|
605
|
+
);
|
|
606
|
+
const { text: text4, breadcrumb } = a;
|
|
607
|
+
if (text4.length > limit)
|
|
608
|
+
throw new Error(
|
|
609
|
+
`emit: assembled text length ${text4.length} exceeds limit ${limit} \u2014 body was not pre-split correctly`
|
|
610
|
+
);
|
|
611
|
+
return {
|
|
612
|
+
text: text4,
|
|
613
|
+
breadcrumb,
|
|
614
|
+
startOffset,
|
|
615
|
+
endOffset: startOffset + body.length
|
|
616
|
+
};
|
|
617
|
+
}
|
|
618
|
+
function assemble(ancestors, body, limit) {
|
|
619
|
+
let a = [...ancestors];
|
|
620
|
+
while (true) {
|
|
621
|
+
if (body.length <= limit) return { text: body, breadcrumb: a };
|
|
622
|
+
if (a.length === 0) return null;
|
|
623
|
+
a = a.slice(1);
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
function breadcrumbBudgetSize(ancestors, limit) {
|
|
627
|
+
if (ancestors.length === 0) return 0;
|
|
628
|
+
const prefix = `${ancestors.join("\n")}
|
|
629
|
+
|
|
630
|
+
`;
|
|
631
|
+
if (prefix.length >= limit) {
|
|
632
|
+
return 0;
|
|
633
|
+
}
|
|
634
|
+
return prefix.length;
|
|
635
|
+
}
|
|
636
|
+
function concatRaw(tokens) {
|
|
637
|
+
let s = "";
|
|
638
|
+
for (const t of tokens) s += t.raw;
|
|
639
|
+
return s;
|
|
640
|
+
}
|
|
641
|
+
function headingLineOf(pt) {
|
|
642
|
+
return pt.raw.replace(/\n+$/, "");
|
|
643
|
+
}
|
|
644
|
+
function trimTrailingNewlines(s) {
|
|
645
|
+
return s.replace(/\n+$/, "");
|
|
646
|
+
}
|
|
647
|
+
function greedyMerge(chunks, limit) {
|
|
648
|
+
if (chunks.length <= 1) return chunks;
|
|
649
|
+
const out = [];
|
|
650
|
+
const first = chunks[0];
|
|
651
|
+
if (!first) return chunks;
|
|
652
|
+
let cur = first;
|
|
653
|
+
for (let i = 1; i < chunks.length; i++) {
|
|
654
|
+
const next = chunks[i];
|
|
655
|
+
if (!next) continue;
|
|
656
|
+
if (!sameBreadcrumb(cur.breadcrumb, next.breadcrumb)) {
|
|
657
|
+
out.push(cur);
|
|
658
|
+
cur = next;
|
|
659
|
+
continue;
|
|
660
|
+
}
|
|
661
|
+
const mergedBody = `${cur.text}
|
|
662
|
+
|
|
663
|
+
${next.text}`;
|
|
664
|
+
const prefix = cur.breadcrumb.length > 0 ? `${cur.breadcrumb.join("\n")}
|
|
665
|
+
|
|
666
|
+
` : "";
|
|
667
|
+
if (prefix.length + mergedBody.length <= limit) {
|
|
668
|
+
cur = {
|
|
669
|
+
text: mergedBody,
|
|
670
|
+
breadcrumb: cur.breadcrumb,
|
|
671
|
+
startOffset: cur.startOffset,
|
|
672
|
+
endOffset: next.endOffset
|
|
673
|
+
};
|
|
674
|
+
} else {
|
|
675
|
+
out.push(cur);
|
|
676
|
+
cur = next;
|
|
677
|
+
}
|
|
678
|
+
}
|
|
679
|
+
out.push(cur);
|
|
680
|
+
return out;
|
|
681
|
+
}
|
|
682
|
+
function sameBreadcrumb(a, b) {
|
|
683
|
+
if (a.length !== b.length) return false;
|
|
684
|
+
for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) return false;
|
|
685
|
+
return true;
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
// src/files/load-files.ts
|
|
689
|
+
import { glob } from "node:fs/promises";
|
|
690
|
+
import { basename } from "node:path";
|
|
691
|
+
async function* loadFilesByGlob(globPattern) {
|
|
692
|
+
for await (const entry of glob(globPattern, {
|
|
693
|
+
withFileTypes: true,
|
|
694
|
+
exclude: (f) => {
|
|
695
|
+
const b = basename(f.name);
|
|
696
|
+
return b !== "." && b.startsWith(".");
|
|
697
|
+
}
|
|
698
|
+
})) {
|
|
699
|
+
if (entry.isFile()) {
|
|
700
|
+
yield `${entry.parentPath}/${entry.name}`;
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
// src/commands/load/load-repository.ts
|
|
706
|
+
import { and, count, eq as eq2, sql as sql4 } from "drizzle-orm";
|
|
707
|
+
|
|
708
|
+
// src/database/schema/chunks.ts
|
|
709
|
+
import { sql as sql3 } from "drizzle-orm";
|
|
710
|
+
import {
|
|
711
|
+
customType,
|
|
712
|
+
index,
|
|
713
|
+
integer as integer3,
|
|
714
|
+
pgTable as pgTable3,
|
|
715
|
+
text as text3,
|
|
716
|
+
timestamp as timestamp3,
|
|
717
|
+
vector as vector2
|
|
718
|
+
} from "drizzle-orm/pg-core";
|
|
719
|
+
|
|
720
|
+
// src/database/schema/files.ts
|
|
721
|
+
import { sql as sql2 } from "drizzle-orm";
|
|
722
|
+
import {
|
|
723
|
+
integer as integer2,
|
|
724
|
+
jsonb,
|
|
725
|
+
pgTable as pgTable2,
|
|
726
|
+
text as text2,
|
|
727
|
+
timestamp as timestamp2,
|
|
728
|
+
unique
|
|
729
|
+
} from "drizzle-orm/pg-core";
|
|
730
|
+
var filesTable = pgTable2(
|
|
731
|
+
"files",
|
|
732
|
+
{
|
|
733
|
+
id: integer2("id").primaryKey().generatedAlwaysAsIdentity(),
|
|
734
|
+
baseId: integer2("base_id").notNull().references(() => basesTable.id, { onDelete: "cascade" }),
|
|
735
|
+
filePath: text2("file_path").notNull(),
|
|
736
|
+
contentHash: text2("content_hash").notNull(),
|
|
737
|
+
attributes: jsonb("attributes").$type(),
|
|
738
|
+
createdAt: timestamp2("created_at").notNull().default(sql2`now()`),
|
|
739
|
+
updatedAt: timestamp2("updated_at").notNull().default(sql2`now()`)
|
|
740
|
+
},
|
|
741
|
+
(table) => [unique().on(table.baseId, table.filePath)]
|
|
742
|
+
);
|
|
743
|
+
|
|
744
|
+
// src/database/schema/chunks.ts
|
|
745
|
+
var tsvector = customType({
|
|
746
|
+
dataType() {
|
|
747
|
+
return "tsvector";
|
|
748
|
+
}
|
|
749
|
+
});
|
|
750
|
+
var chunksTable = pgTable3(
|
|
751
|
+
"chunks",
|
|
752
|
+
{
|
|
753
|
+
id: integer3("id").primaryKey().generatedAlwaysAsIdentity(),
|
|
754
|
+
fileId: integer3("file_id").notNull().references(() => filesTable.id, { onDelete: "cascade" }),
|
|
755
|
+
chunkIndex: integer3("chunk_index").notNull(),
|
|
756
|
+
content: text3("content").notNull(),
|
|
757
|
+
breadcrumbs: text3("breadcrumbs").array().notNull(),
|
|
758
|
+
embedding: vector2("embedding", { dimensions: EMBEDDING_DIMS }),
|
|
759
|
+
fts: tsvector("fts"),
|
|
760
|
+
createdAt: timestamp3("created_at").notNull().default(sql3`now()`),
|
|
761
|
+
updatedAt: timestamp3("updated_at").notNull().default(sql3`now()`)
|
|
762
|
+
},
|
|
763
|
+
(table) => [index("chunks_fts_idx").using("gin", table.fts)]
|
|
764
|
+
);
|
|
765
|
+
|
|
766
|
+
// src/commands/load/load-repository.ts
|
|
767
|
+
var DbLoadRepository = class {
|
|
768
|
+
db;
|
|
769
|
+
constructor(db) {
|
|
770
|
+
this.db = db ?? getDb();
|
|
771
|
+
}
|
|
772
|
+
async getFileProcessingState(filePath, baseId) {
|
|
773
|
+
const [file] = await this.db.select({ id: filesTable.id, contentHash: filesTable.contentHash }).from(filesTable).where(
|
|
774
|
+
and(eq2(filesTable.filePath, filePath), eq2(filesTable.baseId, baseId))
|
|
775
|
+
).limit(1);
|
|
776
|
+
if (!file) {
|
|
777
|
+
return null;
|
|
778
|
+
}
|
|
779
|
+
const [chunkResult] = await this.db.select({
|
|
780
|
+
total: count(),
|
|
781
|
+
withEmbedding: count(
|
|
782
|
+
sql4`CASE WHEN ${chunksTable.embedding} IS NOT NULL THEN 1 END`
|
|
783
|
+
)
|
|
784
|
+
}).from(chunksTable).where(eq2(chunksTable.fileId, file.id));
|
|
785
|
+
const total = chunkResult?.total ?? 0;
|
|
786
|
+
const withEmbedding = chunkResult?.withEmbedding ?? 0;
|
|
787
|
+
const hasStoredChunksWithEmbeddings = total > 0 && withEmbedding === total;
|
|
788
|
+
return {
|
|
789
|
+
fileId: file.id,
|
|
790
|
+
contentHash: file.contentHash,
|
|
791
|
+
hasStoredChunksWithEmbeddings
|
|
792
|
+
};
|
|
793
|
+
}
|
|
794
|
+
async upsertFile(filePath, contentHash, title, _updatedAt, baseId) {
|
|
795
|
+
const attributes = {};
|
|
796
|
+
if (title !== null) {
|
|
797
|
+
attributes.title = title;
|
|
798
|
+
}
|
|
799
|
+
const [file] = await this.db.insert(filesTable).values({ filePath, contentHash, attributes, baseId }).onConflictDoUpdate({
|
|
800
|
+
target: [filesTable.baseId, filesTable.filePath],
|
|
801
|
+
set: {
|
|
802
|
+
contentHash,
|
|
803
|
+
attributes,
|
|
804
|
+
updatedAt: sql4`now()`
|
|
805
|
+
}
|
|
806
|
+
}).returning({ id: filesTable.id });
|
|
807
|
+
if (!file) {
|
|
808
|
+
throw new Error(`Failed to upsert file: ${filePath}`);
|
|
809
|
+
}
|
|
810
|
+
return { id: file.id };
|
|
811
|
+
}
|
|
812
|
+
async replaceFileChunks(fileId, chunks) {
|
|
813
|
+
await this.db.transaction(async (tx) => {
|
|
814
|
+
await tx.delete(chunksTable).where(eq2(chunksTable.fileId, fileId));
|
|
815
|
+
if (chunks.length > 0) {
|
|
816
|
+
const newChunks = chunks.map(
|
|
817
|
+
(chunk) => ({
|
|
818
|
+
fileId,
|
|
819
|
+
chunkIndex: chunk.chunkIndex,
|
|
820
|
+
content: chunk.content,
|
|
821
|
+
breadcrumbs: chunk.breadcrumbs,
|
|
822
|
+
embedding: chunk.embedding,
|
|
823
|
+
fts: sql4`to_tsvector('simple', unaccent(${chunk.content}))`
|
|
824
|
+
})
|
|
825
|
+
);
|
|
826
|
+
await tx.insert(chunksTable).values(newChunks);
|
|
827
|
+
}
|
|
828
|
+
});
|
|
829
|
+
}
|
|
830
|
+
};
|
|
831
|
+
|
|
832
|
+
// src/commands/load/process-file.ts
|
|
833
|
+
import path from "node:path";
|
|
834
|
+
|
|
835
|
+
// src/files/frontmatter.ts
|
|
836
|
+
import { parse as parseYaml } from "yaml";
|
|
837
|
+
var FRONTMATTER_RE = /^---\r?\n([\s\S]*?)\r?\n---\r?\n?/;
|
|
838
|
+
function extractFrontmatter(content) {
|
|
839
|
+
const match = content.match(FRONTMATTER_RE);
|
|
840
|
+
if (!match) {
|
|
841
|
+
return { attributes: null, body: content };
|
|
842
|
+
}
|
|
843
|
+
try {
|
|
844
|
+
const parsed = parseYaml(match[1]);
|
|
845
|
+
const attributes = parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : null;
|
|
846
|
+
return { attributes, body: content.slice(match[0].length) };
|
|
847
|
+
} catch {
|
|
848
|
+
return { attributes: null, body: content };
|
|
849
|
+
}
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
// src/commands/load/build-document-header.ts
|
|
853
|
+
function normalizeTitle(value) {
|
|
854
|
+
if (typeof value !== "string") return null;
|
|
855
|
+
const title = value.trim();
|
|
856
|
+
return title ? title : null;
|
|
857
|
+
}
|
|
858
|
+
function normalizeStringList(value) {
|
|
859
|
+
if (!Array.isArray(value)) return [];
|
|
860
|
+
return value.flatMap((item) => {
|
|
861
|
+
if (typeof item !== "string") return [];
|
|
862
|
+
const normalized = item.trim();
|
|
863
|
+
return normalized ? [normalized] : [];
|
|
864
|
+
});
|
|
865
|
+
}
|
|
866
|
+
function normalizeDocumentMetadata(attributes) {
|
|
867
|
+
if (!attributes) return null;
|
|
868
|
+
return {
|
|
869
|
+
title: normalizeTitle(attributes.title),
|
|
870
|
+
aliases: normalizeStringList(attributes.aliases),
|
|
871
|
+
tags: normalizeStringList(attributes.tags)
|
|
872
|
+
};
|
|
873
|
+
}
|
|
874
|
+
function buildDocumentHeader(basename2, parentDir, attributes) {
|
|
875
|
+
const headerLines = [`File: ${basename2}`, `Path: ${parentDir}`];
|
|
876
|
+
const metadata = normalizeDocumentMetadata(attributes);
|
|
877
|
+
if (metadata) {
|
|
878
|
+
if (metadata.title) headerLines.push(`Title: ${metadata.title}`);
|
|
879
|
+
if (metadata.aliases.length > 0)
|
|
880
|
+
headerLines.push(`Aliases: ${metadata.aliases.join(", ")}`);
|
|
881
|
+
if (metadata.tags.length > 0)
|
|
882
|
+
headerLines.push(`Tags: ${metadata.tags.join(", ")}`);
|
|
883
|
+
}
|
|
884
|
+
const headerPrefix = headerLines.join("\n");
|
|
885
|
+
const titleParts = [basename2];
|
|
886
|
+
if (metadata) {
|
|
887
|
+
if (metadata.title && metadata.title !== basename2)
|
|
888
|
+
titleParts.push(metadata.title);
|
|
889
|
+
if (metadata.aliases.length > 0)
|
|
890
|
+
titleParts.push(`aliases: ${metadata.aliases.join(", ")}`);
|
|
891
|
+
if (metadata.tags.length > 0)
|
|
892
|
+
titleParts.push(`tags: ${metadata.tags.join(", ")}`);
|
|
893
|
+
}
|
|
894
|
+
const titleString = titleParts.join("; ");
|
|
895
|
+
return { headerPrefix, titleString };
|
|
896
|
+
}
|
|
897
|
+
|
|
898
|
+
// src/commands/load/decide-file-processing.ts
|
|
899
|
+
function decideFileProcessing(nextContentHash, existing) {
|
|
900
|
+
if (existing === null) return { action: "process" };
|
|
901
|
+
if (existing.contentHash !== nextContentHash) return { action: "process" };
|
|
902
|
+
if (!existing.hasStoredChunksWithEmbeddings) return { action: "process" };
|
|
903
|
+
return { action: "skip" };
|
|
904
|
+
}
|
|
905
|
+
|
|
906
|
+
// src/commands/load/process-file.ts
|
|
907
|
+
async function processLoadedFile(filePath, deps) {
|
|
908
|
+
const { repo, baseId, readText, hashContent, chunkMarkdown: chunkMarkdown2, embedDocument } = deps;
|
|
909
|
+
const content = await readText(filePath);
|
|
910
|
+
const contentHash = hashContent(content);
|
|
911
|
+
const existingState = await repo.getFileProcessingState(filePath, baseId);
|
|
912
|
+
const decision = decideFileProcessing(contentHash, existingState);
|
|
913
|
+
logger.debug(`[${filePath}] decision: ${decision.action}`);
|
|
914
|
+
if (decision.action === "skip") {
|
|
915
|
+
logger.info(`${filePath} -> skipped (unchanged)`);
|
|
916
|
+
return { status: "skipped", chunkCount: 0 };
|
|
917
|
+
}
|
|
918
|
+
const { attributes, body } = extractFrontmatter(content);
|
|
919
|
+
const basename2 = path.basename(filePath, ".md");
|
|
920
|
+
const parentDir = path.basename(path.dirname(filePath));
|
|
921
|
+
const { headerPrefix, titleString } = buildDocumentHeader(
|
|
922
|
+
basename2,
|
|
923
|
+
parentDir,
|
|
924
|
+
attributes
|
|
925
|
+
);
|
|
926
|
+
const chunks = chunkMarkdown2(body || content, CHUNK_LIMIT_CHARS);
|
|
927
|
+
logger.debug(`[${filePath}] produced ${chunks.length} chunks`);
|
|
928
|
+
const { id: fileId } = await repo.upsertFile(
|
|
929
|
+
filePath,
|
|
930
|
+
contentHash,
|
|
931
|
+
null,
|
|
932
|
+
/* @__PURE__ */ new Date(),
|
|
933
|
+
baseId
|
|
934
|
+
);
|
|
935
|
+
const chunkDocs = await Promise.all(
|
|
936
|
+
chunks.map(async (chunk, i) => {
|
|
937
|
+
const augmented = `${headerPrefix}
|
|
938
|
+
|
|
939
|
+
${chunk.text}`;
|
|
940
|
+
const bodyText = chunk.text.trim();
|
|
941
|
+
const embedding = bodyText ? await embedDocument(bodyText, titleString) : await embedDocument(augmented, titleString);
|
|
942
|
+
logger.trace(`[${filePath}] chunk ${i} embedded (${embedding.length}d)`);
|
|
943
|
+
return {
|
|
944
|
+
content: augmented,
|
|
945
|
+
embedding,
|
|
946
|
+
chunkIndex: i,
|
|
947
|
+
breadcrumbs: chunk.breadcrumb
|
|
948
|
+
};
|
|
949
|
+
})
|
|
950
|
+
);
|
|
951
|
+
await repo.replaceFileChunks(fileId, chunkDocs);
|
|
952
|
+
logger.debug(`[${filePath}] chunks written to DB`);
|
|
953
|
+
logger.info(`${filePath} \u2192 ${chunks.length} chunks`);
|
|
954
|
+
return { status: "processed", chunkCount: chunks.length };
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
// src/commands/load.ts
|
|
958
|
+
var loadCommand = defineCommand2({
|
|
959
|
+
meta: {
|
|
960
|
+
name: "load",
|
|
961
|
+
description: "Load notes files"
|
|
962
|
+
},
|
|
963
|
+
args: {
|
|
964
|
+
glob: {
|
|
965
|
+
type: "string",
|
|
966
|
+
description: "Files glob (e.g. 'notes/**/*.md')",
|
|
967
|
+
required: true
|
|
968
|
+
},
|
|
969
|
+
base: {
|
|
970
|
+
type: "string",
|
|
971
|
+
description: "Knowledge base name to use",
|
|
972
|
+
default: "default"
|
|
973
|
+
}
|
|
974
|
+
},
|
|
975
|
+
async run({ args }) {
|
|
976
|
+
const start = performance.now();
|
|
977
|
+
logger.debug("Starting load...");
|
|
978
|
+
let filesSeen = 0;
|
|
979
|
+
let filesSkipped = 0;
|
|
980
|
+
let filesProcessed = 0;
|
|
981
|
+
let chunksProduced = 0;
|
|
982
|
+
const repo = new DbLoadRepository();
|
|
983
|
+
const baseRepo = new DbBaseRepository();
|
|
984
|
+
const base = await baseRepo.getOrCreateBase(args.base);
|
|
985
|
+
logger.debug(`Using base: ${base.name} (id=${base.id})`);
|
|
986
|
+
let embedder = null;
|
|
987
|
+
const getEmbedDocument = async (body, title) => {
|
|
988
|
+
if (!embedder) {
|
|
989
|
+
embedder = await initEmbedder();
|
|
990
|
+
logger.debug("Embedder initialised");
|
|
991
|
+
}
|
|
992
|
+
return embedder.embedDocument(body, title);
|
|
993
|
+
};
|
|
994
|
+
const filePaths = [];
|
|
995
|
+
const results = [];
|
|
996
|
+
for await (const filePath of loadFilesByGlob(args.glob)) {
|
|
997
|
+
logger.debug(`Processing file: ${filePath}`);
|
|
998
|
+
const result = await processLoadedFile(filePath, {
|
|
999
|
+
repo,
|
|
1000
|
+
baseId: base.id,
|
|
1001
|
+
readText: (p) => readFile(p, "utf8"),
|
|
1002
|
+
hashContent: (content) => createHash("sha256").update(content).digest("hex"),
|
|
1003
|
+
chunkMarkdown,
|
|
1004
|
+
embedDocument: getEmbedDocument
|
|
1005
|
+
});
|
|
1006
|
+
filePaths.push(filePath);
|
|
1007
|
+
results.push(result);
|
|
1008
|
+
}
|
|
1009
|
+
filesSeen = filePaths.length;
|
|
1010
|
+
for (const result of results) {
|
|
1011
|
+
if (result.status === "skipped") {
|
|
1012
|
+
filesSkipped++;
|
|
1013
|
+
} else {
|
|
1014
|
+
filesProcessed++;
|
|
1015
|
+
chunksProduced += result.chunkCount;
|
|
1016
|
+
}
|
|
1017
|
+
}
|
|
1018
|
+
console.log(
|
|
1019
|
+
`Done. ${filesSeen} files seen, ${filesProcessed} processed, ${filesSkipped} skipped, ${chunksProduced} chunks total.`
|
|
1020
|
+
);
|
|
1021
|
+
console.log(
|
|
1022
|
+
`Time taken: ${((performance.now() - start) / 1e3).toFixed(2)}s`
|
|
1023
|
+
);
|
|
1024
|
+
}
|
|
1025
|
+
});
|
|
1026
|
+
|
|
1027
|
+
// src/commands/query.ts
|
|
1028
|
+
import { defineCommand as defineCommand3 } from "citty";
|
|
1029
|
+
|
|
1030
|
+
// src/query/execute.ts
|
|
1031
|
+
import { and as and2, cosineDistance, desc, eq as eq3, gt, sql as sql5 } from "drizzle-orm";
|
|
1032
|
+
|
|
1033
|
+
// src/query/scoring.ts
|
|
1034
|
+
import path2 from "node:path";
|
|
1035
|
+
function fuseScores(vectorResults, ftsResults, trigramResults, weights) {
|
|
1036
|
+
logger.trace(
|
|
1037
|
+
`Fusing scores \u2014 vector: ${vectorResults.length}, fts: ${ftsResults.length}, trigram: ${trigramResults.length}`
|
|
1038
|
+
);
|
|
1039
|
+
const maxSimilarity = Math.max(
|
|
1040
|
+
...vectorResults.map((r) => r.similarity),
|
|
1041
|
+
1e-9
|
|
1042
|
+
);
|
|
1043
|
+
const maxRank = Math.max(...ftsResults.map((r) => r.rank), 1e-9);
|
|
1044
|
+
const maxTrigram = Math.max(...trigramResults.map((r) => r.score), 1e-9);
|
|
1045
|
+
const merged = /* @__PURE__ */ new Map();
|
|
1046
|
+
for (const r of vectorResults) {
|
|
1047
|
+
merged.set(r.id, {
|
|
1048
|
+
id: r.id,
|
|
1049
|
+
filePath: r.filePath,
|
|
1050
|
+
chunkIndex: r.chunkIndex,
|
|
1051
|
+
breadcrumbs: r.breadcrumbs,
|
|
1052
|
+
content: r.content,
|
|
1053
|
+
score: r.similarity / maxSimilarity * weights.vector
|
|
1054
|
+
});
|
|
1055
|
+
}
|
|
1056
|
+
for (const r of ftsResults) {
|
|
1057
|
+
const ftsScore = r.rank / maxRank * weights.fts;
|
|
1058
|
+
const existing = merged.get(r.id);
|
|
1059
|
+
if (existing) {
|
|
1060
|
+
existing.score += ftsScore;
|
|
1061
|
+
} else {
|
|
1062
|
+
merged.set(r.id, {
|
|
1063
|
+
id: r.id,
|
|
1064
|
+
filePath: r.filePath,
|
|
1065
|
+
chunkIndex: r.chunkIndex,
|
|
1066
|
+
breadcrumbs: r.breadcrumbs,
|
|
1067
|
+
content: r.content,
|
|
1068
|
+
score: ftsScore
|
|
1069
|
+
});
|
|
1070
|
+
}
|
|
1071
|
+
}
|
|
1072
|
+
for (const r of trigramResults) {
|
|
1073
|
+
const tgScore = r.score / maxTrigram * weights.trigram;
|
|
1074
|
+
const existing = merged.get(r.id);
|
|
1075
|
+
if (existing) {
|
|
1076
|
+
existing.score += tgScore;
|
|
1077
|
+
} else {
|
|
1078
|
+
merged.set(r.id, {
|
|
1079
|
+
id: r.id,
|
|
1080
|
+
filePath: r.filePath,
|
|
1081
|
+
chunkIndex: r.chunkIndex,
|
|
1082
|
+
breadcrumbs: r.breadcrumbs,
|
|
1083
|
+
content: r.content,
|
|
1084
|
+
score: tgScore
|
|
1085
|
+
});
|
|
1086
|
+
}
|
|
1087
|
+
}
|
|
1088
|
+
return merged;
|
|
1089
|
+
}
|
|
1090
|
+
function extractWikilinks(content) {
|
|
1091
|
+
const re = /\[\[([^\]|#]+)(?:[|#][^\]]*)?\]\]/g;
|
|
1092
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1093
|
+
let m = re.exec(content);
|
|
1094
|
+
while (m !== null) {
|
|
1095
|
+
if (m[1] !== void 0) seen.add(m[1].trim());
|
|
1096
|
+
m = re.exec(content);
|
|
1097
|
+
}
|
|
1098
|
+
return [...seen];
|
|
1099
|
+
}
|
|
1100
|
+
function rerankByWikilinks(merged, allFilePaths, topN = LINK_SOURCE_TOP_N, linkBoost = LINK_BOOST, linkBoostCap = LINK_BOOST_CAP) {
|
|
1101
|
+
const basenameToFilePaths = /* @__PURE__ */ new Map();
|
|
1102
|
+
for (const fp of allFilePaths) {
|
|
1103
|
+
const base = path2.basename(fp, ".md");
|
|
1104
|
+
if (!basenameToFilePaths.has(base)) {
|
|
1105
|
+
basenameToFilePaths.set(base, /* @__PURE__ */ new Set());
|
|
1106
|
+
}
|
|
1107
|
+
basenameToFilePaths.get(base)?.add(fp);
|
|
1108
|
+
}
|
|
1109
|
+
const filePathsInResults = new Set(
|
|
1110
|
+
[...merged.values()].map((r) => r.filePath)
|
|
1111
|
+
);
|
|
1112
|
+
const topSources = [...merged.values()].sort((a, b) => b.score - a.score).slice(0, topN);
|
|
1113
|
+
const boosts = /* @__PURE__ */ new Map();
|
|
1114
|
+
for (const src of topSources) {
|
|
1115
|
+
const links = extractWikilinks(src.content);
|
|
1116
|
+
for (const link of links) {
|
|
1117
|
+
const targets = basenameToFilePaths.get(link);
|
|
1118
|
+
if (!targets) continue;
|
|
1119
|
+
for (const fp of targets) {
|
|
1120
|
+
if (fp === src.filePath) continue;
|
|
1121
|
+
if (!filePathsInResults.has(fp)) continue;
|
|
1122
|
+
const prev = boosts.get(fp) ?? 0;
|
|
1123
|
+
const boost = Math.min(prev + linkBoost * src.score, linkBoostCap);
|
|
1124
|
+
boosts.set(fp, boost);
|
|
1125
|
+
logger.trace(`Boosting ${fp} by ${(boost - prev).toFixed(4)}`);
|
|
1126
|
+
}
|
|
1127
|
+
}
|
|
1128
|
+
}
|
|
1129
|
+
const result = /* @__PURE__ */ new Map();
|
|
1130
|
+
for (const [id, chunk] of merged) {
|
|
1131
|
+
const boost = boosts.get(chunk.filePath) ?? 0;
|
|
1132
|
+
result.set(
|
|
1133
|
+
id,
|
|
1134
|
+
boost > 0 ? { ...chunk, score: chunk.score + boost } : chunk
|
|
1135
|
+
);
|
|
1136
|
+
}
|
|
1137
|
+
return result;
|
|
1138
|
+
}
|
|
1139
|
+
function poolByFile(merged, topK) {
|
|
1140
|
+
const byFile = /* @__PURE__ */ new Map();
|
|
1141
|
+
for (const result of merged.values()) {
|
|
1142
|
+
const existing = byFile.get(result.filePath);
|
|
1143
|
+
if (!existing || result.score > existing.result.score) {
|
|
1144
|
+
byFile.set(result.filePath, {
|
|
1145
|
+
result,
|
|
1146
|
+
extraChunks: existing ? existing.extraChunks + 1 : 0
|
|
1147
|
+
});
|
|
1148
|
+
} else {
|
|
1149
|
+
existing.extraChunks++;
|
|
1150
|
+
}
|
|
1151
|
+
}
|
|
1152
|
+
return [...byFile.values()].map(({ result }) => result).sort((a, b) => b.score - a.score).slice(0, topK);
|
|
1153
|
+
}
|
|
1154
|
+
|
|
1155
|
+
// src/query/execute.ts
|
|
1156
|
+
async function executeQuery(opts) {
|
|
1157
|
+
const {
|
|
1158
|
+
vectorText,
|
|
1159
|
+
queryText,
|
|
1160
|
+
trigramText,
|
|
1161
|
+
embedQuery,
|
|
1162
|
+
db = getDb(),
|
|
1163
|
+
baseId,
|
|
1164
|
+
weights = {
|
|
1165
|
+
vector: VECTOR_WEIGHT,
|
|
1166
|
+
fts: FTS_WEIGHT,
|
|
1167
|
+
trigram: TRIGRAM_WEIGHT
|
|
1168
|
+
},
|
|
1169
|
+
limits = {
|
|
1170
|
+
vector: VECTOR_LIMIT,
|
|
1171
|
+
fts: FTS_LIMIT,
|
|
1172
|
+
trigram: TRIGRAM_LIMIT
|
|
1173
|
+
},
|
|
1174
|
+
trigramThreshold = TRIGRAM_THRESHOLD,
|
|
1175
|
+
trigramMode = "strict",
|
|
1176
|
+
topK = 10
|
|
1177
|
+
} = opts;
|
|
1178
|
+
const queryVector = await embedQuery(vectorText);
|
|
1179
|
+
const effectiveTrigramText = trigramText ?? queryText;
|
|
1180
|
+
logger.debug(
|
|
1181
|
+
`Executing query \u2014 vector: "${vectorText}", fulltext: "${queryText}"`
|
|
1182
|
+
);
|
|
1183
|
+
const similarity = sql5`1 - (${cosineDistance(chunksTable.embedding, queryVector)})`;
|
|
1184
|
+
const trigramFn = trigramMode === "strict" ? "strict_word_similarity" : "word_similarity";
|
|
1185
|
+
const trigramOp = trigramMode === "strict" ? sql5.raw("<<%") : sql5.raw("<%");
|
|
1186
|
+
const trigramScore = sql5`${sql5.raw(trigramFn)}(${effectiveTrigramText}, ${chunksTable.content})`;
|
|
1187
|
+
const [vectorResults, ftsResults, trigramResults] = await Promise.all([
|
|
1188
|
+
db.select({
|
|
1189
|
+
id: chunksTable.id,
|
|
1190
|
+
filePath: filesTable.filePath,
|
|
1191
|
+
chunkIndex: chunksTable.chunkIndex,
|
|
1192
|
+
breadcrumbs: chunksTable.breadcrumbs,
|
|
1193
|
+
content: chunksTable.content,
|
|
1194
|
+
similarity
|
|
1195
|
+
}).from(chunksTable).innerJoin(filesTable, eq3(chunksTable.fileId, filesTable.id)).where(and2(gt(similarity, 0), eq3(filesTable.baseId, baseId))).orderBy(desc(similarity)).limit(limits.vector),
|
|
1196
|
+
db.select({
|
|
1197
|
+
id: chunksTable.id,
|
|
1198
|
+
filePath: filesTable.filePath,
|
|
1199
|
+
chunkIndex: chunksTable.chunkIndex,
|
|
1200
|
+
breadcrumbs: chunksTable.breadcrumbs,
|
|
1201
|
+
content: chunksTable.content,
|
|
1202
|
+
rank: sql5`ts_rank(${chunksTable.fts}, websearch_to_tsquery('simple', unaccent(${queryText})))`
|
|
1203
|
+
}).from(chunksTable).innerJoin(filesTable, eq3(chunksTable.fileId, filesTable.id)).where(
|
|
1204
|
+
and2(
|
|
1205
|
+
sql5`${chunksTable.fts} @@ websearch_to_tsquery('simple', unaccent(${queryText}))`,
|
|
1206
|
+
eq3(filesTable.baseId, baseId)
|
|
1207
|
+
)
|
|
1208
|
+
).orderBy(
|
|
1209
|
+
desc(
|
|
1210
|
+
sql5`ts_rank(${chunksTable.fts}, websearch_to_tsquery('simple', unaccent(${queryText})))`
|
|
1211
|
+
)
|
|
1212
|
+
).limit(limits.fts),
|
|
1213
|
+
db.transaction(async (tx) => {
|
|
1214
|
+
await tx.execute(sql5`SELECT set_limit(${trigramThreshold})`);
|
|
1215
|
+
return tx.select({
|
|
1216
|
+
id: chunksTable.id,
|
|
1217
|
+
filePath: filesTable.filePath,
|
|
1218
|
+
chunkIndex: chunksTable.chunkIndex,
|
|
1219
|
+
breadcrumbs: chunksTable.breadcrumbs,
|
|
1220
|
+
content: chunksTable.content,
|
|
1221
|
+
score: trigramScore
|
|
1222
|
+
}).from(chunksTable).innerJoin(filesTable, eq3(chunksTable.fileId, filesTable.id)).where(
|
|
1223
|
+
and2(
|
|
1224
|
+
sql5`${effectiveTrigramText} ${trigramOp} ${chunksTable.content}`,
|
|
1225
|
+
eq3(filesTable.baseId, baseId)
|
|
1226
|
+
)
|
|
1227
|
+
).orderBy(desc(trigramScore)).limit(limits.trigram);
|
|
1228
|
+
})
|
|
1229
|
+
]);
|
|
1230
|
+
logger.debug(
|
|
1231
|
+
`Vector: ${vectorResults.length} hits, FTS: ${ftsResults.length} hits, Trigram: ${trigramResults.length} hits`
|
|
1232
|
+
);
|
|
1233
|
+
const allFiles = await db.select({ filePath: filesTable.filePath }).from(filesTable).where(eq3(filesTable.baseId, baseId));
|
|
1234
|
+
const fused = fuseScores(vectorResults, ftsResults, trigramResults, weights);
|
|
1235
|
+
logger.debug(`After fusion: ${fused.size} unique chunks`);
|
|
1236
|
+
const reranked = rerankByWikilinks(
|
|
1237
|
+
fused,
|
|
1238
|
+
allFiles.map((f) => f.filePath)
|
|
1239
|
+
);
|
|
1240
|
+
logger.debug(`After wikilink rerank: ${reranked.size} chunks`);
|
|
1241
|
+
const results = poolByFile(reranked, topK);
|
|
1242
|
+
logger.debug(`Returning ${results.length} results`);
|
|
1243
|
+
return results;
|
|
1244
|
+
}
|
|
1245
|
+
|
|
1246
|
+
// src/commands/query.ts
|
|
1247
|
+
var queryCommand = defineCommand3({
|
|
1248
|
+
meta: {
|
|
1249
|
+
name: "query",
|
|
1250
|
+
description: "Search notes by semantic query"
|
|
1251
|
+
},
|
|
1252
|
+
args: {
|
|
1253
|
+
vector: {
|
|
1254
|
+
type: "string",
|
|
1255
|
+
alias: "v",
|
|
1256
|
+
description: "Semantic query for vector search",
|
|
1257
|
+
required: true
|
|
1258
|
+
},
|
|
1259
|
+
fulltext: {
|
|
1260
|
+
type: "string",
|
|
1261
|
+
alias: "f",
|
|
1262
|
+
description: 'Keyword query for full-text search (supports PostgreSQL websearch syntax: OR, -word, "phrases")',
|
|
1263
|
+
required: true
|
|
1264
|
+
},
|
|
1265
|
+
trigram: {
|
|
1266
|
+
type: "string",
|
|
1267
|
+
alias: "g",
|
|
1268
|
+
description: "Plain-text keyword for trigram search (defaults to --fulltext)",
|
|
1269
|
+
required: false
|
|
1270
|
+
},
|
|
1271
|
+
trigramMode: {
|
|
1272
|
+
type: "string",
|
|
1273
|
+
alias: "t",
|
|
1274
|
+
description: "Trigram operator: 'strict' (strict_word_similarity, <<%) or 'word' (word_similarity, <%)",
|
|
1275
|
+
default: "strict"
|
|
1276
|
+
},
|
|
1277
|
+
base: {
|
|
1278
|
+
type: "string",
|
|
1279
|
+
description: "Knowledge base name to use",
|
|
1280
|
+
default: "default"
|
|
1281
|
+
}
|
|
1282
|
+
},
|
|
1283
|
+
async run({ args }) {
|
|
1284
|
+
const mode = args.trigramMode;
|
|
1285
|
+
if (mode !== "strict" && mode !== "word") {
|
|
1286
|
+
throw new Error(
|
|
1287
|
+
`Invalid --trigram-mode "${mode}". Must be "strict" or "word".`
|
|
1288
|
+
);
|
|
1289
|
+
}
|
|
1290
|
+
const baseRepo = new DbBaseRepository();
|
|
1291
|
+
const base = await baseRepo.getBaseByName(args.base);
|
|
1292
|
+
if (!base) {
|
|
1293
|
+
logger.error(`Base '${args.base}' does not exist.`);
|
|
1294
|
+
process.exit(1);
|
|
1295
|
+
}
|
|
1296
|
+
const embedder = await initEmbedder();
|
|
1297
|
+
const results = await executeQuery({
|
|
1298
|
+
vectorText: args.vector,
|
|
1299
|
+
queryText: args.fulltext,
|
|
1300
|
+
trigramText: args.trigram,
|
|
1301
|
+
embedQuery: embedder.embedQuery.bind(embedder),
|
|
1302
|
+
trigramMode: mode,
|
|
1303
|
+
baseId: base.id
|
|
1304
|
+
});
|
|
1305
|
+
if (results.length === 0) {
|
|
1306
|
+
logger.info("No matching chunks found.");
|
|
1307
|
+
return;
|
|
1308
|
+
}
|
|
1309
|
+
for (const row of results) {
|
|
1310
|
+
let output = "";
|
|
1311
|
+
output += `<file path="${row.filePath}">
|
|
1312
|
+
`;
|
|
1313
|
+
output += `<meta>
|
|
1314
|
+
`;
|
|
1315
|
+
output += `Chunk index: ${row.chunkIndex}
|
|
1316
|
+
`;
|
|
1317
|
+
output += `Score: ${Number(row.score).toFixed(3)}
|
|
1318
|
+
`;
|
|
1319
|
+
if (row.breadcrumbs.length > 0) {
|
|
1320
|
+
output += `Breadcrumbs: ${row.breadcrumbs.join(" > ")}
|
|
1321
|
+
`;
|
|
1322
|
+
}
|
|
1323
|
+
output += `</meta>
|
|
1324
|
+
`;
|
|
1325
|
+
output += `<content>
|
|
1326
|
+
`;
|
|
1327
|
+
output += `${row.content}
|
|
1328
|
+
`;
|
|
1329
|
+
output += `</content>
|
|
1330
|
+
`;
|
|
1331
|
+
output += `</file>
|
|
1332
|
+
`;
|
|
1333
|
+
console.log(output);
|
|
1334
|
+
}
|
|
1335
|
+
}
|
|
1336
|
+
});
|
|
1337
|
+
|
|
1338
|
+
// src/database/migrate.ts
|
|
1339
|
+
import { join as join2 } from "node:path";
|
|
1340
|
+
import { fileURLToPath } from "node:url";
|
|
1341
|
+
import { migrate } from "drizzle-orm/pglite/migrator";
|
|
1342
|
+
var migrationsRelativePath = true ? "./drizzle" : "../../drizzle";
|
|
1343
|
+
var migrationsFolder = join2(
|
|
1344
|
+
fileURLToPath(new URL(".", import.meta.url)),
|
|
1345
|
+
migrationsRelativePath
|
|
1346
|
+
);
|
|
1347
|
+
async function runMigrations(db) {
|
|
1348
|
+
try {
|
|
1349
|
+
logger.debug("Running DB migrations...");
|
|
1350
|
+
await migrate(db, { migrationsFolder });
|
|
1351
|
+
logger.debug("Migrations complete");
|
|
1352
|
+
} catch (err) {
|
|
1353
|
+
throw new Error(
|
|
1354
|
+
`Migration failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
1355
|
+
{ cause: err }
|
|
1356
|
+
);
|
|
1357
|
+
}
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
// src/main.ts
|
|
1361
|
+
var main = defineCommand4({
|
|
1362
|
+
meta: {
|
|
1363
|
+
name: "notes-query-tool",
|
|
1364
|
+
version: package_default.version,
|
|
1365
|
+
description: package_default.description,
|
|
1366
|
+
alias: "nqt"
|
|
1367
|
+
},
|
|
1368
|
+
args: {
|
|
1369
|
+
verbose: {
|
|
1370
|
+
type: "boolean",
|
|
1371
|
+
description: "Enable verbose logging (sets log level to max)",
|
|
1372
|
+
default: false
|
|
1373
|
+
},
|
|
1374
|
+
base: {
|
|
1375
|
+
type: "string",
|
|
1376
|
+
description: "Knowledge base name to use",
|
|
1377
|
+
default: "default"
|
|
1378
|
+
}
|
|
1379
|
+
},
|
|
1380
|
+
subCommands: {
|
|
1381
|
+
// Handles loading notes, chunking and indexing them in the database
|
|
1382
|
+
load: loadCommand,
|
|
1383
|
+
// Handles querying the indexed notes
|
|
1384
|
+
query: queryCommand,
|
|
1385
|
+
// Handles dropping a knowledge base and all its indexed data
|
|
1386
|
+
drop: dropCommand
|
|
1387
|
+
},
|
|
1388
|
+
// Runs before any subcommand
|
|
1389
|
+
async setup({ args }) {
|
|
1390
|
+
if (args.verbose) {
|
|
1391
|
+
setLogLevel(999);
|
|
1392
|
+
}
|
|
1393
|
+
const db = getDb();
|
|
1394
|
+
await db.$client.waitReady;
|
|
1395
|
+
await runMigrations(db);
|
|
1396
|
+
},
|
|
1397
|
+
// Runs after the subcommand finishes
|
|
1398
|
+
async cleanup() {
|
|
1399
|
+
await getDb().$client.close();
|
|
1400
|
+
}
|
|
1401
|
+
});
|
|
1402
|
+
await runMain(main).catch((error) => {
|
|
1403
|
+
closeDbAndExit(`Error: ${error}`, 1);
|
|
1404
|
+
});
|
|
1405
|
+
process.on("unhandledRejection", (reason) => {
|
|
1406
|
+
closeDbAndExit(`Unhandled Rejection: ${reason}`, 1);
|
|
1407
|
+
});
|
|
1408
|
+
process.on("uncaughtException", (error) => {
|
|
1409
|
+
closeDbAndExit(`Uncaught Exception: ${error}`, 1);
|
|
1410
|
+
});
|
|
1411
|
+
process.on("SIGINT", () => {
|
|
1412
|
+
closeDbAndExit("Received SIGINT, shutting down...", 0);
|
|
1413
|
+
});
|
|
1414
|
+
function closeDbAndExit(message, code) {
|
|
1415
|
+
logger.error(message);
|
|
1416
|
+
getDb().$client.close().finally(() => process.exit(code));
|
|
1417
|
+
}
|
|
26
1418
|
//# sourceMappingURL=main.js.map
|