@tobilu/qmd 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +110 -0
- package/README.md +62 -2
- package/bin/qmd +24 -4
- package/dist/ast.d.ts +64 -0
- package/dist/ast.js +324 -0
- package/dist/bench/bench.d.ts +21 -0
- package/dist/bench/bench.js +185 -0
- package/dist/bench/score.d.ts +26 -0
- package/dist/bench/score.js +67 -0
- package/dist/bench/types.d.ts +67 -0
- package/dist/bench/types.js +8 -0
- package/dist/cli/formatter.js +5 -1
- package/dist/cli/qmd.d.ts +2 -1
- package/dist/cli/qmd.js +338 -21
- package/dist/collections.d.ts +11 -0
- package/dist/db.d.ts +8 -0
- package/dist/db.js +44 -3
- package/dist/embedded-skills.d.ts +6 -0
- package/dist/embedded-skills.js +14 -0
- package/dist/index.d.ts +7 -1
- package/dist/index.js +13 -3
- package/dist/llm.d.ts +12 -3
- package/dist/llm.js +95 -25
- package/dist/mcp/server.js +29 -5
- package/dist/store.d.ts +56 -6
- package/dist/store.js +401 -138
- package/package.json +34 -17
package/dist/db.js
CHANGED
|
@@ -4,6 +4,11 @@
|
|
|
4
4
|
* Provides a unified Database export that works under both Bun (bun:sqlite)
|
|
5
5
|
* and Node.js (better-sqlite3). The APIs are nearly identical — the main
|
|
6
6
|
* difference is the import path.
|
|
7
|
+
*
|
|
8
|
+
* On macOS, Apple's system SQLite is compiled with SQLITE_OMIT_LOAD_EXTENSION,
|
|
9
|
+
* which prevents loading native extensions like sqlite-vec. When running under
|
|
10
|
+
* Bun we call Database.setCustomSQLite() to swap in Homebrew's full-featured
|
|
11
|
+
* SQLite build before creating any database instances.
|
|
7
12
|
*/
|
|
8
13
|
export const isBun = typeof globalThis.Bun !== "undefined";
|
|
9
14
|
let _Database;
|
|
@@ -11,9 +16,35 @@ let _sqliteVecLoad;
|
|
|
11
16
|
if (isBun) {
|
|
12
17
|
// Dynamic string prevents tsc from resolving bun:sqlite on Node.js builds
|
|
13
18
|
const bunSqlite = "bun:" + "sqlite";
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
19
|
+
const BunDatabase = (await import(/* @vite-ignore */ bunSqlite)).Database;
|
|
20
|
+
// See: https://bun.com/docs/runtime/sqlite#setcustomsqlite
|
|
21
|
+
if (process.platform === "darwin") {
|
|
22
|
+
const homebrewPaths = [
|
|
23
|
+
"/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib", // Apple Silicon
|
|
24
|
+
"/usr/local/opt/sqlite/lib/libsqlite3.dylib", // Intel
|
|
25
|
+
];
|
|
26
|
+
for (const p of homebrewPaths) {
|
|
27
|
+
try {
|
|
28
|
+
BunDatabase.setCustomSQLite(p);
|
|
29
|
+
break;
|
|
30
|
+
}
|
|
31
|
+
catch { }
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
_Database = BunDatabase;
|
|
35
|
+
// setCustomSQLite may have silently failed — test that extensions actually work.
|
|
36
|
+
try {
|
|
37
|
+
const { getLoadablePath } = await import("sqlite-vec");
|
|
38
|
+
const vecPath = getLoadablePath();
|
|
39
|
+
const testDb = new BunDatabase(":memory:");
|
|
40
|
+
testDb.loadExtension(vecPath);
|
|
41
|
+
testDb.close();
|
|
42
|
+
_sqliteVecLoad = (db) => db.loadExtension(vecPath);
|
|
43
|
+
}
|
|
44
|
+
catch {
|
|
45
|
+
// Vector search won't work, but BM25 and other operations are unaffected.
|
|
46
|
+
_sqliteVecLoad = null;
|
|
47
|
+
}
|
|
17
48
|
}
|
|
18
49
|
else {
|
|
19
50
|
_Database = (await import("better-sqlite3")).default;
|
|
@@ -28,7 +59,17 @@ export function openDatabase(path) {
|
|
|
28
59
|
}
|
|
29
60
|
/**
|
|
30
61
|
* Load the sqlite-vec extension into a database.
|
|
62
|
+
*
|
|
63
|
+
* Throws with platform-specific fix instructions when the extension is
|
|
64
|
+
* unavailable.
|
|
31
65
|
*/
|
|
32
66
|
export function loadSqliteVec(db) {
|
|
67
|
+
if (!_sqliteVecLoad) {
|
|
68
|
+
const hint = isBun && process.platform === "darwin"
|
|
69
|
+
? "On macOS with Bun, install Homebrew SQLite: brew install sqlite\n" +
|
|
70
|
+
"Or install qmd with npm instead: npm install -g @tobilu/qmd"
|
|
71
|
+
: "Ensure the sqlite-vec native module is installed correctly.";
|
|
72
|
+
throw new Error(`sqlite-vec extension is unavailable. ${hint}`);
|
|
73
|
+
}
|
|
33
74
|
_sqliteVecLoad(db);
|
|
34
75
|
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
// Generated from skills/qmd source files. Keep this in sync when updating the packaged skill.
|
|
2
|
+
const EMBEDDED_QMD_SKILL_BASE64 = {
|
|
3
|
+
"SKILL.md": "LS0tCm5hbWU6IHFtZApkZXNjcmlwdGlvbjogU2VhcmNoIG1hcmtkb3duIGtub3dsZWRnZSBiYXNlcywgbm90ZXMsIGFuZCBkb2N1bWVudGF0aW9uIHVzaW5nIFFNRC4gVXNlIHdoZW4gdXNlcnMgYXNrIHRvIHNlYXJjaCBub3RlcywgZmluZCBkb2N1bWVudHMsIG9yIGxvb2sgdXAgaW5mb3JtYXRpb24uCmxpY2Vuc2U6IE1JVApjb21wYXRpYmlsaXR5OiBSZXF1aXJlcyBxbWQgQ0xJIG9yIE1DUCBzZXJ2ZXIuIEluc3RhbGwgdmlhIGBucG0gaW5zdGFsbCAtZyBAdG9iaWx1L3FtZGAuCm1ldGFkYXRhOgogIGF1dGhvcjogdG9iaQogIHZlcnNpb246ICIyLjAuMCIKYWxsb3dlZC10b29sczogQmFzaChxbWQ6KiksIG1jcF9fcW1kX18qCi0tLQoKIyBRTUQgLSBRdWljayBNYXJrZG93biBTZWFyY2gKCkxvY2FsIHNlYXJjaCBlbmdpbmUgZm9yIG1hcmtkb3duIGNvbnRlbnQuCgojIyBTdGF0dXMKCiFgcW1kIHN0YXR1cyAyPi9kZXYvbnVsbCB8fCBlY2hvICJOb3QgaW5zdGFsbGVkOiBucG0gaW5zdGFsbCAtZyBAdG9iaWx1L3FtZCJgCgojIyBNQ1A6IGBxdWVyeWAKCmBgYGpzb24KewogICJzZWFyY2hlcyI6IFsKICAgIHsgInR5cGUiOiAibGV4IiwgInF1ZXJ5IjogIkNBUCB0aGVvcmVtIGNvbnNpc3RlbmN5IiB9LAogICAgeyAidHlwZSI6ICJ2ZWMiLCAicXVlcnkiOiAidHJhZGVvZmYgYmV0d2VlbiBjb25zaXN0ZW5jeSBhbmQgYXZhaWxhYmlsaXR5IiB9CiAgXSwKICAiY29sbGVjdGlvbnMiOiBbImRvY3MiXSwKICAibGltaXQiOiAxMAp9CmBgYAoKIyMjIFF1ZXJ5IFR5cGVzCgp8IFR5cGUgfCBNZXRob2QgfCBJbnB1dCB8CnwtLS0tLS18LS0tLS0tLS18LS0tLS0tLXwKfCBgbGV4YCB8IEJNMjUgfCBLZXl3b3JkcyDigJQgZXhhY3QgdGVybXMsIG5hbWVzLCBjb2RlIHwKfCBgdmVjYCB8IFZlY3RvciB8IFF1ZXN0aW9uIOKAlCBuYXR1cmFsIGxhbmd1YWdlIHwKfCBgaHlkZWAgfCBWZWN0b3IgfCBBbnN3ZXIg4oCUIGh5cG90aGV0aWNhbCByZXN1bHQgKDUwLTEwMCB3b3JkcykgfAoKIyMjIFdyaXRpbmcgR29vZCBRdWVyaWVzCgoqKmxleCAoa2V5d29yZCkqKgotIDItNSB0ZXJtcywgbm8gZmlsbGVyIHdvcmRzCi0gRXhhY3QgcGhyYXNlOiBgImNvbm5lY3Rpb24gcG9vbCJgIChxdW90ZWQpCi0gRXhjbHVkZSB0ZXJtczogYHBlcmZvcm1hbmNlIC1zcG9ydHNgIChtaW51cyBwcmVmaXgpCi0gQ29kZSBpZGVudGlmaWVycyB3b3JrOiBgaGFuZGxlRXJyb3IgYXN5bmNgCgoqKnZlYyAoc2VtYW50aWMpKioKLSBGdWxsIG5hdHVyYWwgbGFuZ3VhZ2UgcXVlc3Rpb24KLSBCZSBzcGVjaWZpYzogYCJob3cgZG9lcyB0aGUgcmF0ZSBsaW1pdGVyIGhhbmRsZSBidXJzdCB0cmFmZmljImAKLSBJbmNsdWRlIGNvbnRleHQ6IGAiaW4gdGhlIHBheW1lbnQgc2VydmljZSwgaG93IGFyZSByZWZ1bmRzIHByb2Nlc3NlZCJgCgoqKmh5ZGUgKGh5cG90aGV0aWNhbCBkb2N1bWVudCkqKgotIFdyaXRlIDUwLTEwMCB3b3JkcyBvZiB3aGF0IHRoZSAqYW5zd2VyKiBsb29rcyBsaWtlCi0gVXNlIHRoZSB2b2NhYnVsYXJ5IHlvdSBleHBlY3QgaW4gdGhlIHJlc3VsdAoKKipleHBhbmQgKGF1dG8tZXhwYW5kKSoqCi0gVXNlIGEgc2luZ2xlLWxpbmUgcXVlcnkgKGltcGxpY2l0KSBvciBgZXhwYW5kOiBxdWVzdGlvbmAgb24gaXRzIG93biBsaW5lCi0gTGV0cyB0aGUgbG9jYWwgTExNIGdlbmVyYXRlIGxleC92ZWMvaHlkZSB2YXJpYXRpb25zCi0gRG8gbm90IG1peCBgZXhwYW5kOmAgd2l0aCBvdGhlciB0eXBlZCBsaW5lcyDigJQgaXQncyBlaXRoZXIgYSBzdGFuZGFsb25lIGV4cGFuZCBxdWVyeSBvciBhIGZ1bGwgcXVlcnkgZG9jdW1lbnQKCiMjIyBJbnRlbnQgKERpc2FtYmlndWF0aW9uKQoKV2hlbiBhIHF1ZXJ5IHRlcm0gaXMgYW1iaWd1b3VzLCBhZGQgYGludGVudGAgdG8gc3RlZXIgcmVzdWx0czoKCmBgYGpzb24KewogICJzZWFyY2hlcyI6IFsKICAgIHsgInR5cGUiOiAibGV4IiwgInF1ZXJ5IjogInBlcmZvcm1hbmNlIiB9CiAgXSwKICAiaW50ZW50IjogIndlYiBwYWdlIGxvYWQgdGltZXMgYW5kIENvcmUgV2ViIFZpdGFscyIKfQpgYGAKCkludGVudCBhZmZlY3RzIGV4cGFuc2lvbiwgcmVyYW5raW5nLCBjaHVuayBzZWxlY3Rpb24sIGFuZCBzbmlwcGV0IGV4dHJhY3Rpb24uIEl0IGRvZXMgbm90IHNlYXJjaCBvbiBpdHMgb3duIOKAlCBpdCdzIGEgc3RlZXJpbmcgc2lnbmFsIHRoYXQgZGlzYW1iaWd1YXRlcyBxdWVyaWVzIGxpa2UgInBlcmZvcm1hbmNlIiAod2ViLXBlcmYgdnMgdGVhbSBoZWFsdGggdnMgZml0bmVzcykuCgojIyMgQ29tYmluaW5nIFR5cGVzCgp8IEdvYWwgfCBBcHByb2FjaCB8CnwtLS0tLS18LS0tLS0tLS0tLXwKfCBLbm93IGV4YWN0IHRlcm1zIHwgYGxleGAgb25seSB8CnwgRG9uJ3Qga25vdyB2b2NhYnVsYXJ5IHwgVXNlIGEgc2luZ2xlLWxpbmUgcXVlcnkgKGltcGxpY2l0IGBleHBhbmQ6YCkgb3IgYHZlY2AgfAp8IEJlc3QgcmVjYWxsIHwgYGxleGAgKyBgdmVjYCB8CnwgQ29tcGxleCB0b3BpYyB8IGBsZXhgICsgYHZlY2AgKyBgaHlkZWAgfAp8IEFtYmlndW91cyBxdWVyeSB8IEFkZCBgaW50ZW50YCB0byBhbnkgY29tYmluYXRpb24gYWJvdmUgfAoKRmlyc3QgcXVlcnkgZ2V0cyAyeCB3ZWlnaHQgaW4gZnVzaW9uIOKAlCBwdXQgeW91ciBiZXN0IGd1ZXNzIGZpcnN0LgoKIyMjIExleCBRdWVyeSBTeW50YXgKCnwgU3ludGF4IHwgTWVhbmluZyB8IEV4YW1wbGUgfAp8LS0tLS0tLS18LS0tLS0tLS0tfC0tLS0tLS0tLXwKfCBgdGVybWAgfCBQcmVmaXggbWF0Y2ggfCBgcGVyZmAgbWF0Y2hlcyAicGVyZm9ybWFuY2UiIHwKfCBgInBocmFzZSJgIHwgRXhhY3QgcGhyYXNlIHwgYCJyYXRlIGxpbWl0ZXIiYCB8CnwgYC10ZXJtYCB8IEV4Y2x1ZGUgfCBgcGVyZm9ybWFuY2UgLXNwb3J0c2AgfAoKTm90ZTogYC10ZXJtYCBvbmx5IHdvcmtzIGluIGxleCBxdWVyaWVzLCBub3QgdmVjL2h5ZGUuCgojIyMgQ29sbGVjdGlvbiBGaWx0ZXJpbmcKCmBgYGpzb24KeyAiY29sbGVjdGlvbnMiOiBbImRvY3MiXSB9ICAgICAgICAgICAgICAvLyBTaW5nbGUKeyAiY29sbGVjdGlvbnMiOiBbImRvY3MiLCAibm90ZXMiXSB9ICAgICAvLyBNdWx0aXBsZSAoT1IpCmBgYAoKT21pdCB0byBzZWFyY2ggYWxsIGNvbGxlY3Rpb25zLgoKIyMgT3RoZXIgTUNQIFRvb2xzCgp8IFRvb2wgfCBVc2UgfAp8LS0tLS0tfC0tLS0tfAp8IGBnZXRgIHwgUmV0cmlldmUgZG9jIGJ5IHBhdGggb3IgYCNkb2NpZGAgfAp8IGBtdWx0aV9nZXRgIHwgUmV0cmlldmUgbXVsdGlwbGUgYnkgZ2xvYi9saXN0IHwKfCBgc3RhdHVzYCB8IENvbGxlY3Rpb25zIGFuZCBoZWFsdGggfAoKIyMgQ0xJCgpgYGBiYXNoCnFtZCBxdWVyeSAicXVlc3Rpb24iICAgICAgICAgICAgICAjIEF1dG8tZXhwYW5kICsgcmVyYW5rCnFtZCBxdWVyeSAkJ2xleDogWFxudmVjOiBZJyAgICAgICAjIFN0cnVjdHVyZWQKcW1kIHF1ZXJ5ICQnZXhwYW5kOiBxdWVzdGlvbicgICAgICMgRXhwbGljaXQgZXhwYW5kCnFtZCBxdWVyeSAtLWpzb24gLS1leHBsYWluICJxIiAgICAjIFNob3cgc2NvcmUgdHJhY2VzIChSUkYgKyByZXJhbmsgYmxlbmQpCnFtZCBzZWFyY2ggImtleXdvcmRzIiAgICAgICAgICAgICAjIEJNMjUgb25seSAobm8gTExNKQpxbWQgZ2V0ICIjYWJjMTIzIiAgICAgICAgICAgICAgICAgIyBCeSBkb2NpZApxbWQgbXVsdGktZ2V0ICJqb3VybmFscy8yMDI2LSoubWQiIC1sIDQwICAjIEJhdGNoIHB1bGwgc25pcHBldHMgYnkgZ2xvYgpxbWQgbXVsdGktZ2V0IG5vdGVzL2Zvby5tZCxub3Rlcy9iYXIubWQgICAjIENvbW1hLXNlcGFyYXRlZCBsaXN0LCBwcmVzZXJ2ZXMgb3JkZXIKYGBgCgojIyBIVFRQIEFQSQoKYGBgYmFzaApjdXJsIC1YIFBPU1QgaHR0cDovL2xvY2FsaG9zdDo4MTgxL3F1ZXJ5IFwKICAtSCAiQ29udGVudC1UeXBlOiBhcHBsaWNhdGlvbi9qc29uIiBcCiAgLWQgJ3sic2VhcmNoZXMiOiBbeyJ0eXBlIjogImxleCIsICJxdWVyeSI6ICJ0ZXN0In1dfScKYGBgCgojIyBTZXR1cAoKYGBgYmFzaApucG0gaW5zdGFsbCAtZyBAdG9iaWx1L3FtZApxbWQgY29sbGVjdGlvbiBhZGQgfi9ub3RlcyAtLW5hbWUgbm90ZXMKcW1kIGVtYmVkCmBgYAo=",
|
|
4
|
+
"references/mcp-setup.md": "IyBRTUQgTUNQIFNlcnZlciBTZXR1cAoKIyMgSW5zdGFsbAoKYGBgYmFzaApucG0gaW5zdGFsbCAtZyBAdG9iaWx1L3FtZApxbWQgY29sbGVjdGlvbiBhZGQgfi9wYXRoL3RvL21hcmtkb3duIC0tbmFtZSBteWtub3dsZWRnZQpxbWQgZW1iZWQKYGBgCgojIyBDb25maWd1cmUgTUNQIENsaWVudAoKKipDbGF1ZGUgQ29kZSoqIChgfi8uY2xhdWRlL3NldHRpbmdzLmpzb25gKToKYGBganNvbgp7CiAgIm1jcFNlcnZlcnMiOiB7CiAgICAicW1kIjogeyAiY29tbWFuZCI6ICJxbWQiLCAiYXJncyI6IFsibWNwIl0gfQogIH0KfQpgYGAKCioqQ2xhdWRlIERlc2t0b3AqKiAoYH4vTGlicmFyeS9BcHBsaWNhdGlvbiBTdXBwb3J0L0NsYXVkZS9jbGF1ZGVfZGVza3RvcF9jb25maWcuanNvbmApOgpgYGBqc29uCnsKICAibWNwU2VydmVycyI6IHsKICAgICJxbWQiOiB7ICJjb21tYW5kIjogInFtZCIsICJhcmdzIjogWyJtY3AiXSB9CiAgfQp9CmBgYAoKKipPcGVuQ2xhdyoqIChgfi8ub3BlbmNsYXcvb3BlbmNsYXcuanNvbmApOgpgYGBqc29uCnsKICAibWNwIjogewogICAgInNlcnZlcnMiOiB7CiAgICAgICJxbWQiOiB7ICJjb21tYW5kIjogInFtZCIsICJhcmdzIjogWyJtY3AiXSB9CiAgICB9CiAgfQp9CmBgYAoKIyMgSFRUUCBNb2RlCgpgYGBiYXNoCnFtZCBtY3AgLS1odHRwICAgICAgICAgICAgICAjIFBvcnQgODE4MQpxbWQgbWNwIC0taHR0cCAtLWRhZW1vbiAgICAgIyBCYWNrZ3JvdW5kCnFtZCBtY3Agc3RvcCAgICAgICAgICAgICAgICAjIFN0b3AgZGFlbW9uCmBgYAoKIyMgVG9vbHMKCiMjIyBzdHJ1Y3R1cmVkX3NlYXJjaAoKU2VhcmNoIHdpdGggcHJlLWV4cGFuZGVkIHF1ZXJpZXMuCgpgYGBqc29uCnsKICAic2VhcmNoZXMiOiBbCiAgICB7ICJ0eXBlIjogImxleCIsICJxdWVyeSI6ICJrZXl3b3JkIHBocmFzZXMiIH0sCiAgICB7ICJ0eXBlIjogInZlYyIsICJxdWVyeSI6ICJuYXR1cmFsIGxhbmd1YWdlIHF1ZXN0aW9uIiB9LAogICAgeyAidHlwZSI6ICJoeWRlIiwgInF1ZXJ5IjogImh5cG90aGV0aWNhbCBhbnN3ZXIgcGFzc2FnZS4uLiIgfQogIF0sCiAgImxpbWl0IjogMTAsCiAgImNvbGxlY3Rpb24iOiAib3B0aW9uYWwiLAogICJtaW5TY29yZSI6IDAuMAp9CmBgYAoKfCBUeXBlIHwgTWV0aG9kIHwgSW5wdXQgfAp8LS0tLS0tfC0tLS0tLS0tfC0tLS0tLS18CnwgYGxleGAgfCBCTTI1IHwgS2V5d29yZHMgKDItNSB0ZXJtcykgfAp8IGB2ZWNgIHwgVmVjdG9yIHwgUXVlc3Rpb24gfAp8IGBoeWRlYCB8IFZlY3RvciB8IEFuc3dlciBwYXNzYWdlICg1MC0xMDAgd29yZHMpIHwKCiMjIyBnZXQKClJldHJpZXZlIGRvY3VtZW50IGJ5IHBhdGggb3IgYCNkb2NpZGAuCgp8IFBhcmFtIHwgVHlwZSB8IERlc2NyaXB0aW9uIHwKfC0tLS0tLS18LS0tLS0tfC0tLS0tLS0tLS0tLS18CnwgYHBhdGhgIHwgc3RyaW5nIHwgRmlsZSBwYXRoIG9yIGAjZG9jaWRgIHwKfCBgZnVsbGAgfCBib29sPyB8IFJldHVybiBmdWxsIGNvbnRlbnQgfAp8IGBsaW5lTnVtYmVyc2AgfCBib29sPyB8IEFkZCBsaW5lIG51bWJlcnMgfAoKIyMjIG11bHRpX2dldAoKUmV0cmlldmUgbXVsdGlwbGUgZG9jdW1lbnRzLgoKfCBQYXJhbSB8IFR5cGUgfCBEZXNjcmlwdGlvbiB8CnwtLS0tLS0tfC0tLS0tLXwtLS0tLS0tLS0tLS0tfAp8IGBwYXR0ZXJuYCB8IHN0cmluZyB8IEdsb2Igb3IgY29tbWEtc2VwYXJhdGVkIGxpc3QgfAp8IGBtYXhCeXRlc2AgfCBudW1iZXI/IHwgU2tpcCBsYXJnZSBmaWxlcyAoZGVmYXVsdCAxMEtCKSB8CgojIyMgc3RhdHVzCgpJbmRleCBoZWFsdGggYW5kIGNvbGxlY3Rpb25zLiBObyBwYXJhbXMuCgojIyBUcm91Ymxlc2hvb3RpbmcKCi0gKipOb3Qgc3RhcnRpbmcqKjogYHdoaWNoIHFtZGAsIGBxbWQgbWNwYCBtYW51YWxseQotICoqTm8gcmVzdWx0cyoqOiBgcW1kIGNvbGxlY3Rpb24gbGlzdGAsIGBxbWQgZW1iZWRgCi0gKipTbG93IGZpcnN0IHNlYXJjaCoqOiBOb3JtYWwsIG1vZGVscyBsb2FkaW5nICh+M0dCKQo="
|
|
5
|
+
};
|
|
6
|
+
export function getEmbeddedQmdSkillFiles() {
|
|
7
|
+
return Object.entries(EMBEDDED_QMD_SKILL_BASE64).map(([relativePath, encoded]) => ({
|
|
8
|
+
relativePath,
|
|
9
|
+
content: Buffer.from(encoded, 'base64').toString('utf8'),
|
|
10
|
+
}));
|
|
11
|
+
}
|
|
12
|
+
export function getEmbeddedQmdSkillContent() {
|
|
13
|
+
return Buffer.from(EMBEDDED_QMD_SKILL_BASE64["SKILL.md"], "base64").toString("utf8");
|
|
14
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -16,11 +16,12 @@
|
|
|
16
16
|
* const results = await store.search({ query: "how does auth work?" })
|
|
17
17
|
* await store.close()
|
|
18
18
|
*/
|
|
19
|
-
import { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES, type Store as InternalStore, type DocumentResult, type DocumentNotFound, type SearchResult, type HybridQueryResult, type HybridQueryOptions, type HybridQueryExplain, type ExpandedQuery, type StructuredSearchOptions, type MultiGetResult, type IndexStatus, type IndexHealthInfo, type SearchHooks, type ReindexProgress, type ReindexResult, type EmbedProgress, type EmbedResult } from "./store.js";
|
|
19
|
+
import { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES, type Store as InternalStore, type DocumentResult, type DocumentNotFound, type SearchResult, type HybridQueryResult, type HybridQueryOptions, type HybridQueryExplain, type ExpandedQuery, type StructuredSearchOptions, type MultiGetResult, type IndexStatus, type IndexHealthInfo, type SearchHooks, type ReindexProgress, type ReindexResult, type EmbedProgress, type EmbedResult, type ChunkStrategy } from "./store.js";
|
|
20
20
|
import { type Collection, type CollectionConfig, type NamedCollection, type ContextMap } from "./collections.js";
|
|
21
21
|
export type { DocumentResult, DocumentNotFound, SearchResult, HybridQueryResult, HybridQueryOptions, HybridQueryExplain, ExpandedQuery, StructuredSearchOptions, MultiGetResult, IndexStatus, IndexHealthInfo, SearchHooks, ReindexProgress, ReindexResult, EmbedProgress, EmbedResult, Collection, CollectionConfig, NamedCollection, ContextMap, };
|
|
22
22
|
export type { InternalStore };
|
|
23
23
|
export { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES };
|
|
24
|
+
export type { ChunkStrategy } from "./store.js";
|
|
24
25
|
export { getDefaultDbPath } from "./store.js";
|
|
25
26
|
export { Maintenance } from "./maintenance.js";
|
|
26
27
|
/**
|
|
@@ -65,6 +66,8 @@ export interface SearchOptions {
|
|
|
65
66
|
minScore?: number;
|
|
66
67
|
/** Include explain traces */
|
|
67
68
|
explain?: boolean;
|
|
69
|
+
/** Chunk strategy: "auto" (default, uses AST for code files) or "regex" (legacy) */
|
|
70
|
+
chunkStrategy?: ChunkStrategy;
|
|
68
71
|
}
|
|
69
72
|
/**
|
|
70
73
|
* Options for searchLex() — BM25 keyword search.
|
|
@@ -183,6 +186,9 @@ export interface QMDStore {
|
|
|
183
186
|
embed(options?: {
|
|
184
187
|
force?: boolean;
|
|
185
188
|
model?: string;
|
|
189
|
+
maxDocsPerBatch?: number;
|
|
190
|
+
maxBatchBytes?: number;
|
|
191
|
+
chunkStrategy?: ChunkStrategy;
|
|
186
192
|
onProgress?: (info: EmbedProgress) => void;
|
|
187
193
|
}): Promise<EmbedResult>;
|
|
188
194
|
/** Get index status (document counts, collections, embedding state) */
|
package/dist/index.js
CHANGED
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
import { createStore as createStoreInternal, hybridQuery, structuredSearch, extractSnippet, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_MULTI_GET_MAX_BYTES, reindexCollection, generateEmbeddings, listCollections as storeListCollections, syncConfigToDb, getStoreCollections, getStoreCollection, getStoreGlobalContext, getStoreContexts, upsertStoreCollection, deleteStoreCollection, renameStoreCollection, updateStoreContext, removeStoreContext, setStoreGlobalContext, vacuumDatabase, cleanupOrphanedContent, cleanupOrphanedVectors, deleteLLMCache, deleteInactiveDocuments, clearAllEmbeddings, } from "./store.js";
|
|
20
20
|
import { LlamaCpp, } from "./llm.js";
|
|
21
21
|
import { setConfigSource, loadConfig, addCollection as collectionsAddCollection, removeCollection as collectionsRemoveCollection, renameCollection as collectionsRenameCollection, addContext as collectionsAddContext, removeContext as collectionsRemoveContext, setGlobalContext as collectionsSetGlobalContext, } from "./collections.js";
|
|
22
|
-
// Re-export utility functions used by frontends
|
|
22
|
+
// Re-export utility functions and types used by frontends
|
|
23
23
|
export { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES };
|
|
24
24
|
// Re-export getDefaultDbPath for CLI/MCP that need the default database location
|
|
25
25
|
export { getDefaultDbPath } from "./store.js";
|
|
@@ -63,21 +63,26 @@ export async function createStore(options) {
|
|
|
63
63
|
// Track whether we have a YAML config path for write-through
|
|
64
64
|
const hasYamlConfig = !!options.configPath;
|
|
65
65
|
// Sync config into SQLite store_collections
|
|
66
|
+
let config;
|
|
66
67
|
if (options.configPath) {
|
|
67
68
|
// YAML mode: inject config source for write-through, sync to DB
|
|
68
69
|
setConfigSource({ configPath: options.configPath });
|
|
69
|
-
|
|
70
|
+
config = loadConfig();
|
|
70
71
|
syncConfigToDb(db, config);
|
|
71
72
|
}
|
|
72
73
|
else if (options.config) {
|
|
73
74
|
// Inline config mode: inject config source for mutations, sync to DB
|
|
74
75
|
setConfigSource({ config: options.config });
|
|
75
|
-
|
|
76
|
+
config = options.config;
|
|
77
|
+
syncConfigToDb(db, config);
|
|
76
78
|
}
|
|
77
79
|
// else: DB-only mode — no external config, use existing store_collections
|
|
78
80
|
// Create a per-store LlamaCpp instance — lazy-loads models on first use,
|
|
79
81
|
// auto-unloads after 5 min inactivity to free VRAM.
|
|
80
82
|
const llm = new LlamaCpp({
|
|
83
|
+
embedModel: config?.models?.embed,
|
|
84
|
+
generateModel: config?.models?.generate,
|
|
85
|
+
rerankModel: config?.models?.rerank,
|
|
81
86
|
inactivityTimeoutMs: 5 * 60 * 1000,
|
|
82
87
|
disposeModelsOnInactivity: true,
|
|
83
88
|
});
|
|
@@ -105,6 +110,7 @@ export async function createStore(options) {
|
|
|
105
110
|
explain: opts.explain,
|
|
106
111
|
intent: opts.intent,
|
|
107
112
|
skipRerank,
|
|
113
|
+
chunkStrategy: opts.chunkStrategy,
|
|
108
114
|
});
|
|
109
115
|
}
|
|
110
116
|
// Simple query string — use hybridQuery (expand + search + rerank)
|
|
@@ -115,6 +121,7 @@ export async function createStore(options) {
|
|
|
115
121
|
explain: opts.explain,
|
|
116
122
|
intent: opts.intent,
|
|
117
123
|
skipRerank,
|
|
124
|
+
chunkStrategy: opts.chunkStrategy,
|
|
118
125
|
});
|
|
119
126
|
},
|
|
120
127
|
searchLex: async (q, opts) => internal.searchFTS(q, opts?.limit, opts?.collection),
|
|
@@ -210,6 +217,9 @@ export async function createStore(options) {
|
|
|
210
217
|
return generateEmbeddings(internal, {
|
|
211
218
|
force: embedOpts?.force,
|
|
212
219
|
model: embedOpts?.model,
|
|
220
|
+
maxDocsPerBatch: embedOpts?.maxDocsPerBatch,
|
|
221
|
+
maxBatchBytes: embedOpts?.maxBatchBytes,
|
|
222
|
+
chunkStrategy: embedOpts?.chunkStrategy,
|
|
213
223
|
onProgress: embedOpts?.onProgress,
|
|
214
224
|
});
|
|
215
225
|
},
|
package/dist/llm.d.ts
CHANGED
|
@@ -105,7 +105,7 @@ export type LLMSessionOptions = {
|
|
|
105
105
|
*/
|
|
106
106
|
export interface ILLMSession {
|
|
107
107
|
embed(text: string, options?: EmbedOptions): Promise<EmbeddingResult | null>;
|
|
108
|
-
embedBatch(texts: string[]): Promise<(EmbeddingResult | null)[]>;
|
|
108
|
+
embedBatch(texts: string[], options?: EmbedOptions): Promise<(EmbeddingResult | null)[]>;
|
|
109
109
|
expandQuery(query: string, options?: {
|
|
110
110
|
context?: string;
|
|
111
111
|
includeLexical?: boolean;
|
|
@@ -137,7 +137,7 @@ export type RerankDocument = {
|
|
|
137
137
|
};
|
|
138
138
|
export declare const LFM2_GENERATE_MODEL = "hf:LiquidAI/LFM2-1.2B-GGUF/LFM2-1.2B-Q4_K_M.gguf";
|
|
139
139
|
export declare const LFM2_INSTRUCT_MODEL = "hf:LiquidAI/LFM2.5-1.2B-Instruct-GGUF/LFM2.5-1.2B-Instruct-Q4_K_M.gguf";
|
|
140
|
-
export declare const DEFAULT_EMBED_MODEL_URI:
|
|
140
|
+
export declare const DEFAULT_EMBED_MODEL_URI = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
|
|
141
141
|
export declare const DEFAULT_RERANK_MODEL_URI = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf";
|
|
142
142
|
export declare const DEFAULT_GENERATE_MODEL_URI = "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf";
|
|
143
143
|
export declare const DEFAULT_MODEL_CACHE_DIR: string;
|
|
@@ -232,6 +232,7 @@ export declare class LlamaCpp implements LLM {
|
|
|
232
232
|
private disposeModelsOnInactivity;
|
|
233
233
|
private disposed;
|
|
234
234
|
constructor(config?: LlamaCppConfig);
|
|
235
|
+
get embedModelName(): string;
|
|
235
236
|
/**
|
|
236
237
|
* Reset the inactivity timer. Called after each model operation.
|
|
237
238
|
* When timer fires, models are unloaded to free memory (if no active sessions).
|
|
@@ -306,6 +307,7 @@ export declare class LlamaCpp implements LLM {
|
|
|
306
307
|
* - Combined: drops from 11.6 GB (auto, no flash) to 568 MB per context (20×)
|
|
307
308
|
*/
|
|
308
309
|
private static readonly RERANK_CONTEXT_SIZE;
|
|
310
|
+
private static readonly EMBED_CONTEXT_SIZE;
|
|
309
311
|
private ensureRerankContexts;
|
|
310
312
|
/**
|
|
311
313
|
* Tokenize text using the embedding model's tokenizer
|
|
@@ -320,12 +322,19 @@ export declare class LlamaCpp implements LLM {
|
|
|
320
322
|
* Detokenize token IDs back to text
|
|
321
323
|
*/
|
|
322
324
|
detokenize(tokens: readonly LlamaToken[]): Promise<string>;
|
|
325
|
+
/**
|
|
326
|
+
* Truncate text to fit within the embedding model's context window.
|
|
327
|
+
* Uses the model's own tokenizer for accurate token counting, then
|
|
328
|
+
* detokenizes back to text if truncation is needed.
|
|
329
|
+
* Returns the (possibly truncated) text and whether truncation occurred.
|
|
330
|
+
*/
|
|
331
|
+
private truncateToContextSize;
|
|
323
332
|
embed(text: string, options?: EmbedOptions): Promise<EmbeddingResult | null>;
|
|
324
333
|
/**
|
|
325
334
|
* Batch embed multiple texts efficiently
|
|
326
335
|
* Uses Promise.all for parallel embedding - node-llama-cpp handles batching internally
|
|
327
336
|
*/
|
|
328
|
-
embedBatch(texts: string[]): Promise<(EmbeddingResult | null)[]>;
|
|
337
|
+
embedBatch(texts: string[], options?: EmbedOptions): Promise<(EmbeddingResult | null)[]>;
|
|
329
338
|
generate(prompt: string, options?: GenerateOptions): Promise<GenerateResult | null>;
|
|
330
339
|
modelExists(modelUri: string): Promise<ModelInfo>;
|
|
331
340
|
expandQuery(query: string, options?: {
|
package/dist/llm.js
CHANGED
|
@@ -47,8 +47,8 @@ export function formatDocForEmbedding(text, title, modelUri) {
|
|
|
47
47
|
// =============================================================================
|
|
48
48
|
// HuggingFace model URIs for node-llama-cpp
|
|
49
49
|
// Format: hf:<user>/<repo>/<file>
|
|
50
|
-
// Override via QMD_EMBED_MODEL env var (e.g. hf:Qwen/Qwen3-Embedding-0.6B-GGUF/
|
|
51
|
-
const DEFAULT_EMBED_MODEL =
|
|
50
|
+
// Override via QMD_EMBED_MODEL env var (e.g. hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf)
|
|
51
|
+
const DEFAULT_EMBED_MODEL = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
|
|
52
52
|
const DEFAULT_RERANK_MODEL = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf";
|
|
53
53
|
// const DEFAULT_GENERATE_MODEL = "hf:ggml-org/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf";
|
|
54
54
|
const DEFAULT_GENERATE_MODEL = "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf";
|
|
@@ -61,7 +61,9 @@ export const DEFAULT_EMBED_MODEL_URI = DEFAULT_EMBED_MODEL;
|
|
|
61
61
|
export const DEFAULT_RERANK_MODEL_URI = DEFAULT_RERANK_MODEL;
|
|
62
62
|
export const DEFAULT_GENERATE_MODEL_URI = DEFAULT_GENERATE_MODEL;
|
|
63
63
|
// Local model cache directory
|
|
64
|
-
const MODEL_CACHE_DIR =
|
|
64
|
+
const MODEL_CACHE_DIR = process.env.XDG_CACHE_HOME
|
|
65
|
+
? join(process.env.XDG_CACHE_HOME, "qmd", "models")
|
|
66
|
+
: join(homedir(), ".cache", "qmd", "models");
|
|
65
67
|
export const DEFAULT_MODEL_CACHE_DIR = MODEL_CACHE_DIR;
|
|
66
68
|
function parseHfUri(model) {
|
|
67
69
|
if (!model.startsWith("hf:"))
|
|
@@ -187,14 +189,17 @@ export class LlamaCpp {
|
|
|
187
189
|
// Track disposal state to prevent double-dispose
|
|
188
190
|
disposed = false;
|
|
189
191
|
constructor(config = {}) {
|
|
190
|
-
this.embedModelUri = config.embedModel || DEFAULT_EMBED_MODEL;
|
|
191
|
-
this.generateModelUri = config.generateModel || DEFAULT_GENERATE_MODEL;
|
|
192
|
-
this.rerankModelUri = config.rerankModel || DEFAULT_RERANK_MODEL;
|
|
192
|
+
this.embedModelUri = config.embedModel || process.env.QMD_EMBED_MODEL || DEFAULT_EMBED_MODEL;
|
|
193
|
+
this.generateModelUri = config.generateModel || process.env.QMD_GENERATE_MODEL || DEFAULT_GENERATE_MODEL;
|
|
194
|
+
this.rerankModelUri = config.rerankModel || process.env.QMD_RERANK_MODEL || DEFAULT_RERANK_MODEL;
|
|
193
195
|
this.modelCacheDir = config.modelCacheDir || MODEL_CACHE_DIR;
|
|
194
196
|
this.expandContextSize = resolveExpandContextSize(config.expandContextSize);
|
|
195
197
|
this.inactivityTimeoutMs = config.inactivityTimeoutMs ?? DEFAULT_INACTIVITY_TIMEOUT_MS;
|
|
196
198
|
this.disposeModelsOnInactivity = config.disposeModelsOnInactivity ?? false;
|
|
197
199
|
}
|
|
200
|
+
get embedModelName() {
|
|
201
|
+
return this.embedModelUri;
|
|
202
|
+
}
|
|
198
203
|
/**
|
|
199
204
|
* Reset the inactivity timer. Called after each model operation.
|
|
200
205
|
* When timer fires, models are unloaded to free memory (if no active sessions).
|
|
@@ -289,11 +294,29 @@ export class LlamaCpp {
|
|
|
289
294
|
*/
|
|
290
295
|
async ensureLlama() {
|
|
291
296
|
if (!this.llama) {
|
|
292
|
-
|
|
293
|
-
|
|
297
|
+
// Allow override via QMD_LLAMA_GPU: "false" | "off" | "none" forces CPU
|
|
298
|
+
const gpuOverride = (process.env.QMD_LLAMA_GPU ?? "").toLowerCase();
|
|
299
|
+
const forceCpu = ["false", "off", "none", "disable", "disabled", "0"].includes(gpuOverride);
|
|
300
|
+
const loadLlama = async (gpu) => await getLlama({
|
|
294
301
|
build: "autoAttempt",
|
|
295
|
-
logLevel: LlamaLogLevel.error
|
|
302
|
+
logLevel: LlamaLogLevel.error,
|
|
303
|
+
gpu,
|
|
296
304
|
});
|
|
305
|
+
let llama;
|
|
306
|
+
if (forceCpu) {
|
|
307
|
+
llama = await loadLlama(false);
|
|
308
|
+
}
|
|
309
|
+
else {
|
|
310
|
+
try {
|
|
311
|
+
llama = await loadLlama("auto");
|
|
312
|
+
}
|
|
313
|
+
catch (err) {
|
|
314
|
+
// GPU backend (e.g. Vulkan on headless/driverless machines) can throw at init.
|
|
315
|
+
// Fall back to CPU so qmd still works.
|
|
316
|
+
process.stderr.write(`QMD Warning: GPU init failed (${err instanceof Error ? err.message : String(err)}), falling back to CPU.\n`);
|
|
317
|
+
llama = await loadLlama(false);
|
|
318
|
+
}
|
|
319
|
+
}
|
|
297
320
|
if (llama.gpu === false) {
|
|
298
321
|
process.stderr.write("QMD Warning: no GPU acceleration, running on CPU (slow). Run 'qmd status' for details.\n");
|
|
299
322
|
}
|
|
@@ -394,6 +417,7 @@ export class LlamaCpp {
|
|
|
394
417
|
for (let i = 0; i < n; i++) {
|
|
395
418
|
try {
|
|
396
419
|
this.embedContexts.push(await model.createEmbeddingContext({
|
|
420
|
+
contextSize: LlamaCpp.EMBED_CONTEXT_SIZE,
|
|
397
421
|
...(threads > 0 ? { threads } : {}),
|
|
398
422
|
}));
|
|
399
423
|
}
|
|
@@ -484,9 +508,20 @@ export class LlamaCpp {
|
|
|
484
508
|
* - Combined: drops from 11.6 GB (auto, no flash) to 568 MB per context (20×)
|
|
485
509
|
*/
|
|
486
510
|
// Qwen3 reranker template adds ~200 tokens overhead (system prompt, tags, etc.)
|
|
487
|
-
//
|
|
488
|
-
//
|
|
489
|
-
|
|
511
|
+
// Default 2048 was too small for longer documents (e.g. session transcripts,
|
|
512
|
+
// CJK text, or large markdown files) — callers hit "input lengths exceed
|
|
513
|
+
// context size" errors even after truncation because the overhead estimate
|
|
514
|
+
// was insufficient. 4096 comfortably fits the largest real-world chunks
|
|
515
|
+
// while staying well below the 40 960-token auto size.
|
|
516
|
+
// Override with QMD_RERANK_CONTEXT_SIZE env var if you need more headroom.
|
|
517
|
+
static RERANK_CONTEXT_SIZE = (() => {
|
|
518
|
+
const v = parseInt(process.env.QMD_RERANK_CONTEXT_SIZE ?? "", 10);
|
|
519
|
+
return Number.isFinite(v) && v > 0 ? v : 4096;
|
|
520
|
+
})();
|
|
521
|
+
static EMBED_CONTEXT_SIZE = (() => {
|
|
522
|
+
const v = parseInt(process.env.QMD_EMBED_CONTEXT_SIZE ?? "", 10);
|
|
523
|
+
return Number.isFinite(v) && v > 0 ? v : 2048;
|
|
524
|
+
})();
|
|
490
525
|
async ensureRerankContexts() {
|
|
491
526
|
if (this.rerankContexts.length === 0) {
|
|
492
527
|
const model = await this.ensureRerankModel();
|
|
@@ -555,15 +590,41 @@ export class LlamaCpp {
|
|
|
555
590
|
// ==========================================================================
|
|
556
591
|
// Core API methods
|
|
557
592
|
// ==========================================================================
|
|
593
|
+
/**
|
|
594
|
+
* Truncate text to fit within the embedding model's context window.
|
|
595
|
+
* Uses the model's own tokenizer for accurate token counting, then
|
|
596
|
+
* detokenizes back to text if truncation is needed.
|
|
597
|
+
* Returns the (possibly truncated) text and whether truncation occurred.
|
|
598
|
+
*/
|
|
599
|
+
async truncateToContextSize(text) {
|
|
600
|
+
if (!this.embedModel)
|
|
601
|
+
return { text, truncated: false };
|
|
602
|
+
const maxTokens = this.embedModel.trainContextSize;
|
|
603
|
+
if (maxTokens <= 0)
|
|
604
|
+
return { text, truncated: false };
|
|
605
|
+
const tokens = this.embedModel.tokenize(text);
|
|
606
|
+
if (tokens.length <= maxTokens)
|
|
607
|
+
return { text, truncated: false };
|
|
608
|
+
// Leave a small margin (4 tokens) for BOS/EOS overhead
|
|
609
|
+
const safeLimit = Math.max(1, maxTokens - 4);
|
|
610
|
+
const truncatedTokens = tokens.slice(0, safeLimit);
|
|
611
|
+
const truncatedText = this.embedModel.detokenize(truncatedTokens);
|
|
612
|
+
return { text: truncatedText, truncated: true };
|
|
613
|
+
}
|
|
558
614
|
async embed(text, options = {}) {
|
|
559
615
|
// Ping activity at start to keep models alive during this operation
|
|
560
616
|
this.touchActivity();
|
|
561
617
|
try {
|
|
562
618
|
const context = await this.ensureEmbedContext();
|
|
563
|
-
|
|
619
|
+
// Guard: truncate text that exceeds model context window to prevent GGML crash
|
|
620
|
+
const { text: safeText, truncated } = await this.truncateToContextSize(text);
|
|
621
|
+
if (truncated) {
|
|
622
|
+
console.warn(`⚠ Text truncated to fit embedding context (${this.embedModel?.trainContextSize} tokens)`);
|
|
623
|
+
}
|
|
624
|
+
const embedding = await context.getEmbeddingFor(safeText);
|
|
564
625
|
return {
|
|
565
626
|
embedding: Array.from(embedding.vector),
|
|
566
|
-
model: this.embedModelUri,
|
|
627
|
+
model: options.model ?? this.embedModelUri,
|
|
567
628
|
};
|
|
568
629
|
}
|
|
569
630
|
catch (error) {
|
|
@@ -575,7 +636,7 @@ export class LlamaCpp {
|
|
|
575
636
|
* Batch embed multiple texts efficiently
|
|
576
637
|
* Uses Promise.all for parallel embedding - node-llama-cpp handles batching internally
|
|
577
638
|
*/
|
|
578
|
-
async embedBatch(texts) {
|
|
639
|
+
async embedBatch(texts, options = {}) {
|
|
579
640
|
if (this._ciMode)
|
|
580
641
|
throw new Error("LLM operations are disabled in CI (set CI=true)");
|
|
581
642
|
// Ping activity at start to keep models alive during this operation
|
|
@@ -591,9 +652,13 @@ export class LlamaCpp {
|
|
|
591
652
|
const embeddings = [];
|
|
592
653
|
for (const text of texts) {
|
|
593
654
|
try {
|
|
594
|
-
const
|
|
655
|
+
const { text: safeText, truncated } = await this.truncateToContextSize(text);
|
|
656
|
+
if (truncated) {
|
|
657
|
+
console.warn(`⚠ Batch text truncated to fit embedding context (${this.embedModel?.trainContextSize} tokens)`);
|
|
658
|
+
}
|
|
659
|
+
const embedding = await context.getEmbeddingFor(safeText);
|
|
595
660
|
this.touchActivity();
|
|
596
|
-
embeddings.push({ embedding: Array.from(embedding.vector), model: this.embedModelUri });
|
|
661
|
+
embeddings.push({ embedding: Array.from(embedding.vector), model: options.model ?? this.embedModelUri });
|
|
597
662
|
}
|
|
598
663
|
catch (err) {
|
|
599
664
|
console.error("Embedding error for text:", err);
|
|
@@ -610,9 +675,13 @@ export class LlamaCpp {
|
|
|
610
675
|
const results = [];
|
|
611
676
|
for (const text of chunk) {
|
|
612
677
|
try {
|
|
613
|
-
const
|
|
678
|
+
const { text: safeText, truncated } = await this.truncateToContextSize(text);
|
|
679
|
+
if (truncated) {
|
|
680
|
+
console.warn(`⚠ Batch text truncated to fit embedding context (${this.embedModel?.trainContextSize} tokens)`);
|
|
681
|
+
}
|
|
682
|
+
const embedding = await ctx.getEmbeddingFor(safeText);
|
|
614
683
|
this.touchActivity();
|
|
615
|
-
results.push({ embedding: Array.from(embedding.vector), model: this.embedModelUri });
|
|
684
|
+
results.push({ embedding: Array.from(embedding.vector), model: options.model ?? this.embedModelUri });
|
|
616
685
|
}
|
|
617
686
|
catch (err) {
|
|
618
687
|
console.error("Embedding error for text:", err);
|
|
@@ -767,8 +836,10 @@ export class LlamaCpp {
|
|
|
767
836
|
await genContext.dispose();
|
|
768
837
|
}
|
|
769
838
|
}
|
|
770
|
-
// Qwen3 reranker chat template overhead (system prompt, tags, separators)
|
|
771
|
-
|
|
839
|
+
// Qwen3 reranker chat template overhead (system prompt, tags, separators).
|
|
840
|
+
// Measured at ~350 tokens on real queries; use 512 as a safe upper bound so
|
|
841
|
+
// the truncation budget never lets a document slip past the context limit.
|
|
842
|
+
static RERANK_TEMPLATE_OVERHEAD = 512;
|
|
772
843
|
static RERANK_TARGET_DOCS_PER_CONTEXT = 10;
|
|
773
844
|
async rerank(query, documents, options = {}) {
|
|
774
845
|
if (this._ciMode)
|
|
@@ -1028,8 +1099,8 @@ class LLMSession {
|
|
|
1028
1099
|
async embed(text, options) {
|
|
1029
1100
|
return this.withOperation(() => this.manager.getLlamaCpp().embed(text, options));
|
|
1030
1101
|
}
|
|
1031
|
-
async embedBatch(texts) {
|
|
1032
|
-
return this.withOperation(() => this.manager.getLlamaCpp().embedBatch(texts));
|
|
1102
|
+
async embedBatch(texts, options) {
|
|
1103
|
+
return this.withOperation(() => this.manager.getLlamaCpp().embedBatch(texts, options));
|
|
1033
1104
|
}
|
|
1034
1105
|
async expandQuery(query, options) {
|
|
1035
1106
|
return this.withOperation(() => this.manager.getLlamaCpp().expandQuery(query, options));
|
|
@@ -1106,8 +1177,7 @@ let defaultLlamaCpp = null;
|
|
|
1106
1177
|
*/
|
|
1107
1178
|
export function getDefaultLlamaCpp() {
|
|
1108
1179
|
if (!defaultLlamaCpp) {
|
|
1109
|
-
|
|
1110
|
-
defaultLlamaCpp = new LlamaCpp(embedModel ? { embedModel } : {});
|
|
1180
|
+
defaultLlamaCpp = new LlamaCpp();
|
|
1111
1181
|
}
|
|
1112
1182
|
return defaultLlamaCpp;
|
|
1113
1183
|
}
|
package/dist/mcp/server.js
CHANGED
|
@@ -8,13 +8,17 @@
|
|
|
8
8
|
*/
|
|
9
9
|
import { createServer } from "node:http";
|
|
10
10
|
import { randomUUID } from "node:crypto";
|
|
11
|
+
import { readFileSync } from "node:fs";
|
|
12
|
+
import { join, dirname } from "node:path";
|
|
11
13
|
import { fileURLToPath } from "url";
|
|
12
14
|
import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
13
15
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
14
16
|
import { WebStandardStreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js";
|
|
15
17
|
import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js";
|
|
16
18
|
import { z } from "zod";
|
|
19
|
+
import { existsSync } from "fs";
|
|
17
20
|
import { createStore, extractSnippet, addLineNumbers, getDefaultDbPath, DEFAULT_MULTI_GET_MAX_BYTES, } from "../index.js";
|
|
21
|
+
import { getConfigPath } from "../collections.js";
|
|
18
22
|
// =============================================================================
|
|
19
23
|
// Helper functions
|
|
20
24
|
// =============================================================================
|
|
@@ -39,6 +43,16 @@ function formatSearchSummary(results, query) {
|
|
|
39
43
|
}
|
|
40
44
|
return lines.join('\n');
|
|
41
45
|
}
|
|
46
|
+
function getPackageVersion() {
|
|
47
|
+
try {
|
|
48
|
+
const pkgPath = join(dirname(fileURLToPath(import.meta.url)), "../../package.json");
|
|
49
|
+
const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
|
|
50
|
+
return pkg.version ?? "unknown";
|
|
51
|
+
}
|
|
52
|
+
catch {
|
|
53
|
+
return "unknown";
|
|
54
|
+
}
|
|
55
|
+
}
|
|
42
56
|
// =============================================================================
|
|
43
57
|
// MCP Server
|
|
44
58
|
// =============================================================================
|
|
@@ -108,7 +122,7 @@ async function buildInstructions(store) {
|
|
|
108
122
|
* Shared by both stdio and HTTP transports.
|
|
109
123
|
*/
|
|
110
124
|
async function createMcpServer(store) {
|
|
111
|
-
const server = new McpServer({ name: "qmd", version:
|
|
125
|
+
const server = new McpServer({ name: "qmd", version: getPackageVersion() }, { instructions: await buildInstructions(store) });
|
|
112
126
|
// Pre-fetch default collection names for search tools
|
|
113
127
|
const defaultCollectionNames = await store.getDefaultCollectionNames();
|
|
114
128
|
// ---------------------------------------------------------------------------
|
|
@@ -218,8 +232,9 @@ Intent-aware lex (C++ performance, not sports):
|
|
|
218
232
|
candidateLimit: z.number().optional().describe("Maximum candidates to rerank (default: 40, lower = faster but may miss results)"),
|
|
219
233
|
collections: z.array(z.string()).optional().describe("Filter to collections (OR match)"),
|
|
220
234
|
intent: z.string().optional().describe("Background context to disambiguate the query. Example: query='performance', intent='web page load times and Core Web Vitals'. Does not search on its own."),
|
|
235
|
+
rerank: z.boolean().optional().default(true).describe("Rerank results using LLM (default: true). Set to false for faster results on CPU-only machines."),
|
|
221
236
|
},
|
|
222
|
-
}, async ({ searches, limit, minScore, candidateLimit, collections, intent }) => {
|
|
237
|
+
}, async ({ searches, limit, minScore, candidateLimit, collections, intent, rerank }) => {
|
|
223
238
|
// Map to internal format
|
|
224
239
|
const queries = searches.map(s => ({
|
|
225
240
|
type: s.type,
|
|
@@ -232,6 +247,7 @@ Intent-aware lex (C++ performance, not sports):
|
|
|
232
247
|
collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
|
|
233
248
|
limit,
|
|
234
249
|
minScore,
|
|
250
|
+
rerank,
|
|
235
251
|
intent,
|
|
236
252
|
});
|
|
237
253
|
// Use first lex or vec query for snippet extraction
|
|
@@ -387,7 +403,7 @@ Intent-aware lex (C++ performance, not sports):
|
|
|
387
403
|
` Collections: ${status.collections.length}`,
|
|
388
404
|
];
|
|
389
405
|
for (const col of status.collections) {
|
|
390
|
-
summary.push(` - ${col.path} (${col.documents} docs)`);
|
|
406
|
+
summary.push(` - ${col.name}: ${col.path} (${col.documents} docs)`);
|
|
391
407
|
}
|
|
392
408
|
return {
|
|
393
409
|
content: [{ type: "text", text: summary.join('\n') }],
|
|
@@ -400,7 +416,11 @@ Intent-aware lex (C++ performance, not sports):
|
|
|
400
416
|
// Transport: stdio (default)
|
|
401
417
|
// =============================================================================
|
|
402
418
|
export async function startMcpServer() {
|
|
403
|
-
const
|
|
419
|
+
const configPath = getConfigPath();
|
|
420
|
+
const store = await createStore({
|
|
421
|
+
dbPath: getDefaultDbPath(),
|
|
422
|
+
...(existsSync(configPath) ? { configPath } : {}),
|
|
423
|
+
});
|
|
404
424
|
const server = await createMcpServer(store);
|
|
405
425
|
const transport = new StdioServerTransport();
|
|
406
426
|
await server.connect(transport);
|
|
@@ -410,7 +430,11 @@ export async function startMcpServer() {
|
|
|
410
430
|
* Binds to localhost only. Returns a handle for shutdown and port discovery.
|
|
411
431
|
*/
|
|
412
432
|
export async function startMcpHttpServer(port, options) {
|
|
413
|
-
const
|
|
433
|
+
const configPath = getConfigPath();
|
|
434
|
+
const store = await createStore({
|
|
435
|
+
dbPath: getDefaultDbPath(),
|
|
436
|
+
...(existsSync(configPath) ? { configPath } : {}),
|
|
437
|
+
});
|
|
414
438
|
// Pre-fetch default collection names for REST endpoint
|
|
415
439
|
const defaultCollectionNames = await store.getDefaultCollectionNames();
|
|
416
440
|
// Session map: each client gets its own McpServer + Transport pair (MCP spec requirement).
|