@tobilu/qmd 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/db.js CHANGED
@@ -4,6 +4,11 @@
4
4
  * Provides a unified Database export that works under both Bun (bun:sqlite)
5
5
  * and Node.js (better-sqlite3). The APIs are nearly identical — the main
6
6
  * difference is the import path.
7
+ *
8
+ * On macOS, Apple's system SQLite is compiled with SQLITE_OMIT_LOAD_EXTENSION,
9
+ * which prevents loading native extensions like sqlite-vec. When running under
10
+ * Bun we call Database.setCustomSQLite() to swap in Homebrew's full-featured
11
+ * SQLite build before creating any database instances.
7
12
  */
8
13
  export const isBun = typeof globalThis.Bun !== "undefined";
9
14
  let _Database;
@@ -11,9 +16,35 @@ let _sqliteVecLoad;
11
16
  if (isBun) {
12
17
  // Dynamic string prevents tsc from resolving bun:sqlite on Node.js builds
13
18
  const bunSqlite = "bun:" + "sqlite";
14
- _Database = (await import(/* @vite-ignore */ bunSqlite)).Database;
15
- const { getLoadablePath } = await import("sqlite-vec");
16
- _sqliteVecLoad = (db) => db.loadExtension(getLoadablePath());
19
+ const BunDatabase = (await import(/* @vite-ignore */ bunSqlite)).Database;
20
+ // See: https://bun.com/docs/runtime/sqlite#setcustomsqlite
21
+ if (process.platform === "darwin") {
22
+ const homebrewPaths = [
23
+ "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib", // Apple Silicon
24
+ "/usr/local/opt/sqlite/lib/libsqlite3.dylib", // Intel
25
+ ];
26
+ for (const p of homebrewPaths) {
27
+ try {
28
+ BunDatabase.setCustomSQLite(p);
29
+ break;
30
+ }
31
+ catch { }
32
+ }
33
+ }
34
+ _Database = BunDatabase;
35
+ // setCustomSQLite may have silently failed — test that extensions actually work.
36
+ try {
37
+ const { getLoadablePath } = await import("sqlite-vec");
38
+ const vecPath = getLoadablePath();
39
+ const testDb = new BunDatabase(":memory:");
40
+ testDb.loadExtension(vecPath);
41
+ testDb.close();
42
+ _sqliteVecLoad = (db) => db.loadExtension(vecPath);
43
+ }
44
+ catch {
45
+ // Vector search won't work, but BM25 and other operations are unaffected.
46
+ _sqliteVecLoad = null;
47
+ }
17
48
  }
18
49
  else {
19
50
  _Database = (await import("better-sqlite3")).default;
@@ -28,7 +59,17 @@ export function openDatabase(path) {
28
59
  }
29
60
  /**
30
61
  * Load the sqlite-vec extension into a database.
62
+ *
63
+ * Throws with platform-specific fix instructions when the extension is
64
+ * unavailable.
31
65
  */
32
66
  export function loadSqliteVec(db) {
67
+ if (!_sqliteVecLoad) {
68
+ const hint = isBun && process.platform === "darwin"
69
+ ? "On macOS with Bun, install Homebrew SQLite: brew install sqlite\n" +
70
+ "Or install qmd with npm instead: npm install -g @tobilu/qmd"
71
+ : "Ensure the sqlite-vec native module is installed correctly.";
72
+ throw new Error(`sqlite-vec extension is unavailable. ${hint}`);
73
+ }
33
74
  _sqliteVecLoad(db);
34
75
  }
@@ -0,0 +1,6 @@
1
+ export type EmbeddedSkillFile = {
2
+ relativePath: string;
3
+ content: string;
4
+ };
5
+ export declare function getEmbeddedQmdSkillFiles(): EmbeddedSkillFile[];
6
+ export declare function getEmbeddedQmdSkillContent(): string;
@@ -0,0 +1,14 @@
1
+ // Generated from skills/qmd source files. Keep this in sync when updating the packaged skill.
2
+ const EMBEDDED_QMD_SKILL_BASE64 = {
3
+ "SKILL.md": "LS0tCm5hbWU6IHFtZApkZXNjcmlwdGlvbjogU2VhcmNoIG1hcmtkb3duIGtub3dsZWRnZSBiYXNlcywgbm90ZXMsIGFuZCBkb2N1bWVudGF0aW9uIHVzaW5nIFFNRC4gVXNlIHdoZW4gdXNlcnMgYXNrIHRvIHNlYXJjaCBub3RlcywgZmluZCBkb2N1bWVudHMsIG9yIGxvb2sgdXAgaW5mb3JtYXRpb24uCmxpY2Vuc2U6IE1JVApjb21wYXRpYmlsaXR5OiBSZXF1aXJlcyBxbWQgQ0xJIG9yIE1DUCBzZXJ2ZXIuIEluc3RhbGwgdmlhIGBucG0gaW5zdGFsbCAtZyBAdG9iaWx1L3FtZGAuCm1ldGFkYXRhOgogIGF1dGhvcjogdG9iaQogIHZlcnNpb246ICIyLjAuMCIKYWxsb3dlZC10b29sczogQmFzaChxbWQ6KiksIG1jcF9fcW1kX18qCi0tLQoKIyBRTUQgLSBRdWljayBNYXJrZG93biBTZWFyY2gKCkxvY2FsIHNlYXJjaCBlbmdpbmUgZm9yIG1hcmtkb3duIGNvbnRlbnQuCgojIyBTdGF0dXMKCiFgcW1kIHN0YXR1cyAyPi9kZXYvbnVsbCB8fCBlY2hvICJOb3QgaW5zdGFsbGVkOiBucG0gaW5zdGFsbCAtZyBAdG9iaWx1L3FtZCJgCgojIyBNQ1A6IGBxdWVyeWAKCmBgYGpzb24KewogICJzZWFyY2hlcyI6IFsKICAgIHsgInR5cGUiOiAibGV4IiwgInF1ZXJ5IjogIkNBUCB0aGVvcmVtIGNvbnNpc3RlbmN5IiB9LAogICAgeyAidHlwZSI6ICJ2ZWMiLCAicXVlcnkiOiAidHJhZGVvZmYgYmV0d2VlbiBjb25zaXN0ZW5jeSBhbmQgYXZhaWxhYmlsaXR5IiB9CiAgXSwKICAiY29sbGVjdGlvbnMiOiBbImRvY3MiXSwKICAibGltaXQiOiAxMAp9CmBgYAoKIyMjIFF1ZXJ5IFR5cGVzCgp8IFR5cGUgfCBNZXRob2QgfCBJbnB1dCB8CnwtLS0tLS18LS0tLS0tLS18LS0tLS0tLXwKfCBgbGV4YCB8IEJNMjUgfCBLZXl3b3JkcyDigJQgZXhhY3QgdGVybXMsIG5hbWVzLCBjb2RlIHwKfCBgdmVjYCB8IFZlY3RvciB8IFF1ZXN0aW9uIOKAlCBuYXR1cmFsIGxhbmd1YWdlIHwKfCBgaHlkZWAgfCBWZWN0b3IgfCBBbnN3ZXIg4oCUIGh5cG90aGV0aWNhbCByZXN1bHQgKDUwLTEwMCB3b3JkcykgfAoKIyMjIFdyaXRpbmcgR29vZCBRdWVyaWVzCgoqKmxleCAoa2V5d29yZCkqKgotIDItNSB0ZXJtcywgbm8gZmlsbGVyIHdvcmRzCi0gRXhhY3QgcGhyYXNlOiBgImNvbm5lY3Rpb24gcG9vbCJgIChxdW90ZWQpCi0gRXhjbHVkZSB0ZXJtczogYHBlcmZvcm1hbmNlIC1zcG9ydHNgIChtaW51cyBwcmVmaXgpCi0gQ29kZSBpZGVudGlmaWVycyB3b3JrOiBgaGFuZGxlRXJyb3IgYXN5bmNgCgoqKnZlYyAoc2VtYW50aWMpKioKLSBGdWxsIG5hdHVyYWwgbGFuZ3VhZ2UgcXVlc3Rpb24KLSBCZSBzcGVjaWZpYzogYCJob3cgZG9lcyB0aGUgcmF0ZSBsaW1pdGVyIGhhbmRsZSBidXJzdCB0cmFmZmljImAKLSBJbmNsdWRlIGNvbnRleHQ6IGAiaW4gdGhlIHBheW1lbnQgc2VydmljZSwgaG93IGFyZSByZWZ1bmRzIHByb2Nlc3NlZCJgCgoqKmh5ZGUgKGh5cG90aGV0aWNhbCBkb2N1bWVudCkqKgotIFdyaXRlIDUwLTEwMCB3b3JkcyBvZiB3aGF0IHRoZSAqYW5zd2VyKiBsb29rcyBsaWtlCi0gVXNlIHRoZSB2b2NhYnVsYXJ5IHlvdSBleHBlY3QgaW4gdGhlIHJlc3VsdAoKKipleHBhbmQgKGF1dG8tZXhwYW5kKSoqCi0gVXNlIGEgc2luZ2xlLWxpbmUgcXVlcnkgKGltcGxpY2l0KSBvciBgZXhwYW5kOiBxdWVzdGlvbmAgb24gaXRzIG93biBsaW5lCi0gTGV0cyB0aGUgbG9jYWwgTExNIGdlbmVyYXRlIGxleC92ZWMvaHlkZSB2YXJpYXRpb25zCi0gRG8gbm90IG1peCBgZXhwYW5kOmAgd2l0aCBvdGhlciB0eXBlZCBsaW5lcyDigJQgaXQncyBlaXRoZXIgYSBzdGFuZGFsb25lIGV4cGFuZCBxdWVyeSBvciBhIGZ1bGwgcXVlcnkgZG9jdW1lbnQKCiMjIyBJbnRlbnQgKERpc2FtYmlndWF0aW9uKQoKV2hlbiBhIHF1ZXJ5IHRlcm0gaXMgYW1iaWd1b3VzLCBhZGQgYGludGVudGAgdG8gc3RlZXIgcmVzdWx0czoKCmBgYGpzb24KewogICJzZWFyY2hlcyI6IFsKICAgIHsgInR5cGUiOiAibGV4IiwgInF1ZXJ5IjogInBlcmZvcm1hbmNlIiB9CiAgXSwKICAiaW50ZW50IjogIndlYiBwYWdlIGxvYWQgdGltZXMgYW5kIENvcmUgV2ViIFZpdGFscyIKfQpgYGAKCkludGVudCBhZmZlY3RzIGV4cGFuc2lvbiwgcmVyYW5raW5nLCBjaHVuayBzZWxlY3Rpb24sIGFuZCBzbmlwcGV0IGV4dHJhY3Rpb24uIEl0IGRvZXMgbm90IHNlYXJjaCBvbiBpdHMgb3duIOKAlCBpdCdzIGEgc3RlZXJpbmcgc2lnbmFsIHRoYXQgZGlzYW1iaWd1YXRlcyBxdWVyaWVzIGxpa2UgInBlcmZvcm1hbmNlIiAod2ViLXBlcmYgdnMgdGVhbSBoZWFsdGggdnMgZml0bmVzcykuCgojIyMgQ29tYmluaW5nIFR5cGVzCgp8IEdvYWwgfCBBcHByb2FjaCB8CnwtLS0tLS18LS0tLS0tLS0tLXwKfCBLbm93IGV4YWN0IHRlcm1zIHwgYGxleGAgb25seSB8CnwgRG9uJ3Qga25vdyB2b2NhYnVsYXJ5IHwgVXNlIGEgc2luZ2xlLWxpbmUgcXVlcnkgKGltcGxpY2l0IGBleHBhbmQ6YCkgb3IgYHZlY2AgfAp8IEJlc3QgcmVjYWxsIHwgYGxleGAgKyBgdmVjYCB8CnwgQ29tcGxleCB0b3BpYyB8IGBsZXhgICsgYHZlY2AgKyBgaHlkZWAgfAp8IEFtYmlndW91cyBxdWVyeSB8IEFkZCBgaW50ZW50YCB0byBhbnkgY29tYmluYXRpb24gYWJvdmUgfAoKRmlyc3QgcXVlcnkgZ2V0cyAyeCB3ZWlnaHQgaW4gZnVzaW9uIOKAlCBwdXQgeW91ciBiZXN0IGd1ZXNzIGZpcnN0LgoKIyMjIExleCBRdWVyeSBTeW50YXgKCnwgU3ludGF4IHwgTWVhbmluZyB8IEV4YW1wbGUgfAp8LS0tLS0tLS18LS0tLS0tLS0tfC0tLS0tLS0tLXwKfCBgdGVybWAgfCBQcmVmaXggbWF0Y2ggfCBgcGVyZmAgbWF0Y2hlcyAicGVyZm9ybWFuY2UiIHwKfCBgInBocmFzZSJgIHwgRXhhY3QgcGhyYXNlIHwgYCJyYXRlIGxpbWl0ZXIiYCB8CnwgYC10ZXJtYCB8IEV4Y2x1ZGUgfCBgcGVyZm9ybWFuY2UgLXNwb3J0c2AgfAoKTm90ZTogYC10ZXJtYCBvbmx5IHdvcmtzIGluIGxleCBxdWVyaWVzLCBub3QgdmVjL2h5ZGUuCgojIyMgQ29sbGVjdGlvbiBGaWx0ZXJpbmcKCmBgYGpzb24KeyAiY29sbGVjdGlvbnMiOiBbImRvY3MiXSB9ICAgICAgICAgICAgICAvLyBTaW5nbGUKeyAiY29sbGVjdGlvbnMiOiBbImRvY3MiLCAibm90ZXMiXSB9ICAgICAvLyBNdWx0aXBsZSAoT1IpCmBgYAoKT21pdCB0byBzZWFyY2ggYWxsIGNvbGxlY3Rpb25zLgoKIyMgT3RoZXIgTUNQIFRvb2xzCgp8IFRvb2wgfCBVc2UgfAp8LS0tLS0tfC0tLS0tfAp8IGBnZXRgIHwgUmV0cmlldmUgZG9jIGJ5IHBhdGggb3IgYCNkb2NpZGAgfAp8IGBtdWx0aV9nZXRgIHwgUmV0cmlldmUgbXVsdGlwbGUgYnkgZ2xvYi9saXN0IHwKfCBgc3RhdHVzYCB8IENvbGxlY3Rpb25zIGFuZCBoZWFsdGggfAoKIyMgQ0xJCgpgYGBiYXNoCnFtZCBxdWVyeSAicXVlc3Rpb24iICAgICAgICAgICAgICAjIEF1dG8tZXhwYW5kICsgcmVyYW5rCnFtZCBxdWVyeSAkJ2xleDogWFxudmVjOiBZJyAgICAgICAjIFN0cnVjdHVyZWQKcW1kIHF1ZXJ5ICQnZXhwYW5kOiBxdWVzdGlvbicgICAgICMgRXhwbGljaXQgZXhwYW5kCnFtZCBxdWVyeSAtLWpzb24gLS1leHBsYWluICJxIiAgICAjIFNob3cgc2NvcmUgdHJhY2VzIChSUkYgKyByZXJhbmsgYmxlbmQpCnFtZCBzZWFyY2ggImtleXdvcmRzIiAgICAgICAgICAgICAjIEJNMjUgb25seSAobm8gTExNKQpxbWQgZ2V0ICIjYWJjMTIzIiAgICAgICAgICAgICAgICAgIyBCeSBkb2NpZApxbWQgbXVsdGktZ2V0ICJqb3VybmFscy8yMDI2LSoubWQiIC1sIDQwICAjIEJhdGNoIHB1bGwgc25pcHBldHMgYnkgZ2xvYgpxbWQgbXVsdGktZ2V0IG5vdGVzL2Zvby5tZCxub3Rlcy9iYXIubWQgICAjIENvbW1hLXNlcGFyYXRlZCBsaXN0LCBwcmVzZXJ2ZXMgb3JkZXIKYGBgCgojIyBIVFRQIEFQSQoKYGBgYmFzaApjdXJsIC1YIFBPU1QgaHR0cDovL2xvY2FsaG9zdDo4MTgxL3F1ZXJ5IFwKICAtSCAiQ29udGVudC1UeXBlOiBhcHBsaWNhdGlvbi9qc29uIiBcCiAgLWQgJ3sic2VhcmNoZXMiOiBbeyJ0eXBlIjogImxleCIsICJxdWVyeSI6ICJ0ZXN0In1dfScKYGBgCgojIyBTZXR1cAoKYGBgYmFzaApucG0gaW5zdGFsbCAtZyBAdG9iaWx1L3FtZApxbWQgY29sbGVjdGlvbiBhZGQgfi9ub3RlcyAtLW5hbWUgbm90ZXMKcW1kIGVtYmVkCmBgYAo=",
4
+ "references/mcp-setup.md": "IyBRTUQgTUNQIFNlcnZlciBTZXR1cAoKIyMgSW5zdGFsbAoKYGBgYmFzaApucG0gaW5zdGFsbCAtZyBAdG9iaWx1L3FtZApxbWQgY29sbGVjdGlvbiBhZGQgfi9wYXRoL3RvL21hcmtkb3duIC0tbmFtZSBteWtub3dsZWRnZQpxbWQgZW1iZWQKYGBgCgojIyBDb25maWd1cmUgTUNQIENsaWVudAoKKipDbGF1ZGUgQ29kZSoqIChgfi8uY2xhdWRlL3NldHRpbmdzLmpzb25gKToKYGBganNvbgp7CiAgIm1jcFNlcnZlcnMiOiB7CiAgICAicW1kIjogeyAiY29tbWFuZCI6ICJxbWQiLCAiYXJncyI6IFsibWNwIl0gfQogIH0KfQpgYGAKCioqQ2xhdWRlIERlc2t0b3AqKiAoYH4vTGlicmFyeS9BcHBsaWNhdGlvbiBTdXBwb3J0L0NsYXVkZS9jbGF1ZGVfZGVza3RvcF9jb25maWcuanNvbmApOgpgYGBqc29uCnsKICAibWNwU2VydmVycyI6IHsKICAgICJxbWQiOiB7ICJjb21tYW5kIjogInFtZCIsICJhcmdzIjogWyJtY3AiXSB9CiAgfQp9CmBgYAoKKipPcGVuQ2xhdyoqIChgfi8ub3BlbmNsYXcvb3BlbmNsYXcuanNvbmApOgpgYGBqc29uCnsKICAibWNwIjogewogICAgInNlcnZlcnMiOiB7CiAgICAgICJxbWQiOiB7ICJjb21tYW5kIjogInFtZCIsICJhcmdzIjogWyJtY3AiXSB9CiAgICB9CiAgfQp9CmBgYAoKIyMgSFRUUCBNb2RlCgpgYGBiYXNoCnFtZCBtY3AgLS1odHRwICAgICAgICAgICAgICAjIFBvcnQgODE4MQpxbWQgbWNwIC0taHR0cCAtLWRhZW1vbiAgICAgIyBCYWNrZ3JvdW5kCnFtZCBtY3Agc3RvcCAgICAgICAgICAgICAgICAjIFN0b3AgZGFlbW9uCmBgYAoKIyMgVG9vbHMKCiMjIyBzdHJ1Y3R1cmVkX3NlYXJjaAoKU2VhcmNoIHdpdGggcHJlLWV4cGFuZGVkIHF1ZXJpZXMuCgpgYGBqc29uCnsKICAic2VhcmNoZXMiOiBbCiAgICB7ICJ0eXBlIjogImxleCIsICJxdWVyeSI6ICJrZXl3b3JkIHBocmFzZXMiIH0sCiAgICB7ICJ0eXBlIjogInZlYyIsICJxdWVyeSI6ICJuYXR1cmFsIGxhbmd1YWdlIHF1ZXN0aW9uIiB9LAogICAgeyAidHlwZSI6ICJoeWRlIiwgInF1ZXJ5IjogImh5cG90aGV0aWNhbCBhbnN3ZXIgcGFzc2FnZS4uLiIgfQogIF0sCiAgImxpbWl0IjogMTAsCiAgImNvbGxlY3Rpb24iOiAib3B0aW9uYWwiLAogICJtaW5TY29yZSI6IDAuMAp9CmBgYAoKfCBUeXBlIHwgTWV0aG9kIHwgSW5wdXQgfAp8LS0tLS0tfC0tLS0tLS0tfC0tLS0tLS18CnwgYGxleGAgfCBCTTI1IHwgS2V5d29yZHMgKDItNSB0ZXJtcykgfAp8IGB2ZWNgIHwgVmVjdG9yIHwgUXVlc3Rpb24gfAp8IGBoeWRlYCB8IFZlY3RvciB8IEFuc3dlciBwYXNzYWdlICg1MC0xMDAgd29yZHMpIHwKCiMjIyBnZXQKClJldHJpZXZlIGRvY3VtZW50IGJ5IHBhdGggb3IgYCNkb2NpZGAuCgp8IFBhcmFtIHwgVHlwZSB8IERlc2NyaXB0aW9uIHwKfC0tLS0tLS18LS0tLS0tfC0tLS0tLS0tLS0tLS18CnwgYHBhdGhgIHwgc3RyaW5nIHwgRmlsZSBwYXRoIG9yIGAjZG9jaWRgIHwKfCBgZnVsbGAgfCBib29sPyB8IFJldHVybiBmdWxsIGNvbnRlbnQgfAp8IGBsaW5lTnVtYmVyc2AgfCBib29sPyB8IEFkZCBsaW5lIG51bWJlcnMgfAoKIyMjIG11bHRpX2dldAoKUmV0cmlldmUgbXVsdGlwbGUgZG9jdW1lbnRzLgoKfCBQYXJhbSB8IFR5cGUgfCBEZXNjcmlwdGlvbiB8CnwtLS0tLS0tfC0tLS0tLXwtLS0tLS0tLS0tLS0tfAp8IGBwYXR0ZXJuYCB8IHN0cmluZyB8IEdsb2Igb3IgY29tbWEtc2VwYXJhdGVkIGxpc3QgfAp8IGBtYXhCeXRlc2AgfCBudW1iZXI/IHwgU2tpcCBsYXJnZSBmaWxlcyAoZGVmYXVsdCAxMEtCKSB8CgojIyMgc3RhdHVzCgpJbmRleCBoZWFsdGggYW5kIGNvbGxlY3Rpb25zLiBObyBwYXJhbXMuCgojIyBUcm91Ymxlc2hvb3RpbmcKCi0gKipOb3Qgc3RhcnRpbmcqKjogYHdoaWNoIHFtZGAsIGBxbWQgbWNwYCBtYW51YWxseQotICoqTm8gcmVzdWx0cyoqOiBgcW1kIGNvbGxlY3Rpb24gbGlzdGAsIGBxbWQgZW1iZWRgCi0gKipTbG93IGZpcnN0IHNlYXJjaCoqOiBOb3JtYWwsIG1vZGVscyBsb2FkaW5nICh+M0dCKQo="
5
+ };
6
+ export function getEmbeddedQmdSkillFiles() {
7
+ return Object.entries(EMBEDDED_QMD_SKILL_BASE64).map(([relativePath, encoded]) => ({
8
+ relativePath,
9
+ content: Buffer.from(encoded, 'base64').toString('utf8'),
10
+ }));
11
+ }
12
+ export function getEmbeddedQmdSkillContent() {
13
+ return Buffer.from(EMBEDDED_QMD_SKILL_BASE64["SKILL.md"], "base64").toString("utf8");
14
+ }
package/dist/index.d.ts CHANGED
@@ -16,11 +16,12 @@
16
16
  * const results = await store.search({ query: "how does auth work?" })
17
17
  * await store.close()
18
18
  */
19
- import { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES, type Store as InternalStore, type DocumentResult, type DocumentNotFound, type SearchResult, type HybridQueryResult, type HybridQueryOptions, type HybridQueryExplain, type ExpandedQuery, type StructuredSearchOptions, type MultiGetResult, type IndexStatus, type IndexHealthInfo, type SearchHooks, type ReindexProgress, type ReindexResult, type EmbedProgress, type EmbedResult } from "./store.js";
19
+ import { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES, type Store as InternalStore, type DocumentResult, type DocumentNotFound, type SearchResult, type HybridQueryResult, type HybridQueryOptions, type HybridQueryExplain, type ExpandedQuery, type StructuredSearchOptions, type MultiGetResult, type IndexStatus, type IndexHealthInfo, type SearchHooks, type ReindexProgress, type ReindexResult, type EmbedProgress, type EmbedResult, type ChunkStrategy } from "./store.js";
20
20
  import { type Collection, type CollectionConfig, type NamedCollection, type ContextMap } from "./collections.js";
21
21
  export type { DocumentResult, DocumentNotFound, SearchResult, HybridQueryResult, HybridQueryOptions, HybridQueryExplain, ExpandedQuery, StructuredSearchOptions, MultiGetResult, IndexStatus, IndexHealthInfo, SearchHooks, ReindexProgress, ReindexResult, EmbedProgress, EmbedResult, Collection, CollectionConfig, NamedCollection, ContextMap, };
22
22
  export type { InternalStore };
23
23
  export { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES };
24
+ export type { ChunkStrategy } from "./store.js";
24
25
  export { getDefaultDbPath } from "./store.js";
25
26
  export { Maintenance } from "./maintenance.js";
26
27
  /**
@@ -65,6 +66,8 @@ export interface SearchOptions {
65
66
  minScore?: number;
66
67
  /** Include explain traces */
67
68
  explain?: boolean;
69
+ /** Chunk strategy: "auto" (default, uses AST for code files) or "regex" (legacy) */
70
+ chunkStrategy?: ChunkStrategy;
68
71
  }
69
72
  /**
70
73
  * Options for searchLex() — BM25 keyword search.
@@ -183,6 +186,9 @@ export interface QMDStore {
183
186
  embed(options?: {
184
187
  force?: boolean;
185
188
  model?: string;
189
+ maxDocsPerBatch?: number;
190
+ maxBatchBytes?: number;
191
+ chunkStrategy?: ChunkStrategy;
186
192
  onProgress?: (info: EmbedProgress) => void;
187
193
  }): Promise<EmbedResult>;
188
194
  /** Get index status (document counts, collections, embedding state) */
package/dist/index.js CHANGED
@@ -19,7 +19,7 @@
19
19
  import { createStore as createStoreInternal, hybridQuery, structuredSearch, extractSnippet, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_MULTI_GET_MAX_BYTES, reindexCollection, generateEmbeddings, listCollections as storeListCollections, syncConfigToDb, getStoreCollections, getStoreCollection, getStoreGlobalContext, getStoreContexts, upsertStoreCollection, deleteStoreCollection, renameStoreCollection, updateStoreContext, removeStoreContext, setStoreGlobalContext, vacuumDatabase, cleanupOrphanedContent, cleanupOrphanedVectors, deleteLLMCache, deleteInactiveDocuments, clearAllEmbeddings, } from "./store.js";
20
20
  import { LlamaCpp, } from "./llm.js";
21
21
  import { setConfigSource, loadConfig, addCollection as collectionsAddCollection, removeCollection as collectionsRemoveCollection, renameCollection as collectionsRenameCollection, addContext as collectionsAddContext, removeContext as collectionsRemoveContext, setGlobalContext as collectionsSetGlobalContext, } from "./collections.js";
22
- // Re-export utility functions used by frontends
22
+ // Re-export utility functions and types used by frontends
23
23
  export { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES };
24
24
  // Re-export getDefaultDbPath for CLI/MCP that need the default database location
25
25
  export { getDefaultDbPath } from "./store.js";
@@ -63,21 +63,26 @@ export async function createStore(options) {
63
63
  // Track whether we have a YAML config path for write-through
64
64
  const hasYamlConfig = !!options.configPath;
65
65
  // Sync config into SQLite store_collections
66
+ let config;
66
67
  if (options.configPath) {
67
68
  // YAML mode: inject config source for write-through, sync to DB
68
69
  setConfigSource({ configPath: options.configPath });
69
- const config = loadConfig();
70
+ config = loadConfig();
70
71
  syncConfigToDb(db, config);
71
72
  }
72
73
  else if (options.config) {
73
74
  // Inline config mode: inject config source for mutations, sync to DB
74
75
  setConfigSource({ config: options.config });
75
- syncConfigToDb(db, options.config);
76
+ config = options.config;
77
+ syncConfigToDb(db, config);
76
78
  }
77
79
  // else: DB-only mode — no external config, use existing store_collections
78
80
  // Create a per-store LlamaCpp instance — lazy-loads models on first use,
79
81
  // auto-unloads after 5 min inactivity to free VRAM.
80
82
  const llm = new LlamaCpp({
83
+ embedModel: config?.models?.embed,
84
+ generateModel: config?.models?.generate,
85
+ rerankModel: config?.models?.rerank,
81
86
  inactivityTimeoutMs: 5 * 60 * 1000,
82
87
  disposeModelsOnInactivity: true,
83
88
  });
@@ -105,6 +110,7 @@ export async function createStore(options) {
105
110
  explain: opts.explain,
106
111
  intent: opts.intent,
107
112
  skipRerank,
113
+ chunkStrategy: opts.chunkStrategy,
108
114
  });
109
115
  }
110
116
  // Simple query string — use hybridQuery (expand + search + rerank)
@@ -115,6 +121,7 @@ export async function createStore(options) {
115
121
  explain: opts.explain,
116
122
  intent: opts.intent,
117
123
  skipRerank,
124
+ chunkStrategy: opts.chunkStrategy,
118
125
  });
119
126
  },
120
127
  searchLex: async (q, opts) => internal.searchFTS(q, opts?.limit, opts?.collection),
@@ -210,6 +217,9 @@ export async function createStore(options) {
210
217
  return generateEmbeddings(internal, {
211
218
  force: embedOpts?.force,
212
219
  model: embedOpts?.model,
220
+ maxDocsPerBatch: embedOpts?.maxDocsPerBatch,
221
+ maxBatchBytes: embedOpts?.maxBatchBytes,
222
+ chunkStrategy: embedOpts?.chunkStrategy,
213
223
  onProgress: embedOpts?.onProgress,
214
224
  });
215
225
  },
package/dist/llm.d.ts CHANGED
@@ -105,7 +105,7 @@ export type LLMSessionOptions = {
105
105
  */
106
106
  export interface ILLMSession {
107
107
  embed(text: string, options?: EmbedOptions): Promise<EmbeddingResult | null>;
108
- embedBatch(texts: string[]): Promise<(EmbeddingResult | null)[]>;
108
+ embedBatch(texts: string[], options?: EmbedOptions): Promise<(EmbeddingResult | null)[]>;
109
109
  expandQuery(query: string, options?: {
110
110
  context?: string;
111
111
  includeLexical?: boolean;
@@ -137,7 +137,7 @@ export type RerankDocument = {
137
137
  };
138
138
  export declare const LFM2_GENERATE_MODEL = "hf:LiquidAI/LFM2-1.2B-GGUF/LFM2-1.2B-Q4_K_M.gguf";
139
139
  export declare const LFM2_INSTRUCT_MODEL = "hf:LiquidAI/LFM2.5-1.2B-Instruct-GGUF/LFM2.5-1.2B-Instruct-Q4_K_M.gguf";
140
- export declare const DEFAULT_EMBED_MODEL_URI: string;
140
+ export declare const DEFAULT_EMBED_MODEL_URI = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
141
141
  export declare const DEFAULT_RERANK_MODEL_URI = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf";
142
142
  export declare const DEFAULT_GENERATE_MODEL_URI = "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf";
143
143
  export declare const DEFAULT_MODEL_CACHE_DIR: string;
@@ -232,6 +232,7 @@ export declare class LlamaCpp implements LLM {
232
232
  private disposeModelsOnInactivity;
233
233
  private disposed;
234
234
  constructor(config?: LlamaCppConfig);
235
+ get embedModelName(): string;
235
236
  /**
236
237
  * Reset the inactivity timer. Called after each model operation.
237
238
  * When timer fires, models are unloaded to free memory (if no active sessions).
@@ -306,6 +307,7 @@ export declare class LlamaCpp implements LLM {
306
307
  * - Combined: drops from 11.6 GB (auto, no flash) to 568 MB per context (20×)
307
308
  */
308
309
  private static readonly RERANK_CONTEXT_SIZE;
310
+ private static readonly EMBED_CONTEXT_SIZE;
309
311
  private ensureRerankContexts;
310
312
  /**
311
313
  * Tokenize text using the embedding model's tokenizer
@@ -320,12 +322,19 @@ export declare class LlamaCpp implements LLM {
320
322
  * Detokenize token IDs back to text
321
323
  */
322
324
  detokenize(tokens: readonly LlamaToken[]): Promise<string>;
325
+ /**
326
+ * Truncate text to fit within the embedding model's context window.
327
+ * Uses the model's own tokenizer for accurate token counting, then
328
+ * detokenizes back to text if truncation is needed.
329
+ * Returns the (possibly truncated) text and whether truncation occurred.
330
+ */
331
+ private truncateToContextSize;
323
332
  embed(text: string, options?: EmbedOptions): Promise<EmbeddingResult | null>;
324
333
  /**
325
334
  * Batch embed multiple texts efficiently
326
335
  * Uses Promise.all for parallel embedding - node-llama-cpp handles batching internally
327
336
  */
328
- embedBatch(texts: string[]): Promise<(EmbeddingResult | null)[]>;
337
+ embedBatch(texts: string[], options?: EmbedOptions): Promise<(EmbeddingResult | null)[]>;
329
338
  generate(prompt: string, options?: GenerateOptions): Promise<GenerateResult | null>;
330
339
  modelExists(modelUri: string): Promise<ModelInfo>;
331
340
  expandQuery(query: string, options?: {
package/dist/llm.js CHANGED
@@ -47,8 +47,8 @@ export function formatDocForEmbedding(text, title, modelUri) {
47
47
  // =============================================================================
48
48
  // HuggingFace model URIs for node-llama-cpp
49
49
  // Format: hf:<user>/<repo>/<file>
50
- // Override via QMD_EMBED_MODEL env var (e.g. hf:Qwen/Qwen3-Embedding-0.6B-GGUF/qwen3-embedding-0.6b-q8_0.gguf)
51
- const DEFAULT_EMBED_MODEL = process.env.QMD_EMBED_MODEL ?? "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
50
+ // Override via QMD_EMBED_MODEL env var (e.g. hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf)
51
+ const DEFAULT_EMBED_MODEL = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
52
52
  const DEFAULT_RERANK_MODEL = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf";
53
53
  // const DEFAULT_GENERATE_MODEL = "hf:ggml-org/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf";
54
54
  const DEFAULT_GENERATE_MODEL = "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf";
@@ -61,7 +61,9 @@ export const DEFAULT_EMBED_MODEL_URI = DEFAULT_EMBED_MODEL;
61
61
  export const DEFAULT_RERANK_MODEL_URI = DEFAULT_RERANK_MODEL;
62
62
  export const DEFAULT_GENERATE_MODEL_URI = DEFAULT_GENERATE_MODEL;
63
63
  // Local model cache directory
64
- const MODEL_CACHE_DIR = join(homedir(), ".cache", "qmd", "models");
64
+ const MODEL_CACHE_DIR = process.env.XDG_CACHE_HOME
65
+ ? join(process.env.XDG_CACHE_HOME, "qmd", "models")
66
+ : join(homedir(), ".cache", "qmd", "models");
65
67
  export const DEFAULT_MODEL_CACHE_DIR = MODEL_CACHE_DIR;
66
68
  function parseHfUri(model) {
67
69
  if (!model.startsWith("hf:"))
@@ -187,14 +189,17 @@ export class LlamaCpp {
187
189
  // Track disposal state to prevent double-dispose
188
190
  disposed = false;
189
191
  constructor(config = {}) {
190
- this.embedModelUri = config.embedModel || DEFAULT_EMBED_MODEL;
191
- this.generateModelUri = config.generateModel || DEFAULT_GENERATE_MODEL;
192
- this.rerankModelUri = config.rerankModel || DEFAULT_RERANK_MODEL;
192
+ this.embedModelUri = config.embedModel || process.env.QMD_EMBED_MODEL || DEFAULT_EMBED_MODEL;
193
+ this.generateModelUri = config.generateModel || process.env.QMD_GENERATE_MODEL || DEFAULT_GENERATE_MODEL;
194
+ this.rerankModelUri = config.rerankModel || process.env.QMD_RERANK_MODEL || DEFAULT_RERANK_MODEL;
193
195
  this.modelCacheDir = config.modelCacheDir || MODEL_CACHE_DIR;
194
196
  this.expandContextSize = resolveExpandContextSize(config.expandContextSize);
195
197
  this.inactivityTimeoutMs = config.inactivityTimeoutMs ?? DEFAULT_INACTIVITY_TIMEOUT_MS;
196
198
  this.disposeModelsOnInactivity = config.disposeModelsOnInactivity ?? false;
197
199
  }
200
+ get embedModelName() {
201
+ return this.embedModelUri;
202
+ }
198
203
  /**
199
204
  * Reset the inactivity timer. Called after each model operation.
200
205
  * When timer fires, models are unloaded to free memory (if no active sessions).
@@ -289,11 +294,29 @@ export class LlamaCpp {
289
294
  */
290
295
  async ensureLlama() {
291
296
  if (!this.llama) {
292
- const llama = await getLlama({
293
- // attempt to build
297
+ // Allow override via QMD_LLAMA_GPU: "false" | "off" | "none" forces CPU
298
+ const gpuOverride = (process.env.QMD_LLAMA_GPU ?? "").toLowerCase();
299
+ const forceCpu = ["false", "off", "none", "disable", "disabled", "0"].includes(gpuOverride);
300
+ const loadLlama = async (gpu) => await getLlama({
294
301
  build: "autoAttempt",
295
- logLevel: LlamaLogLevel.error
302
+ logLevel: LlamaLogLevel.error,
303
+ gpu,
296
304
  });
305
+ let llama;
306
+ if (forceCpu) {
307
+ llama = await loadLlama(false);
308
+ }
309
+ else {
310
+ try {
311
+ llama = await loadLlama("auto");
312
+ }
313
+ catch (err) {
314
+ // GPU backend (e.g. Vulkan on headless/driverless machines) can throw at init.
315
+ // Fall back to CPU so qmd still works.
316
+ process.stderr.write(`QMD Warning: GPU init failed (${err instanceof Error ? err.message : String(err)}), falling back to CPU.\n`);
317
+ llama = await loadLlama(false);
318
+ }
319
+ }
297
320
  if (llama.gpu === false) {
298
321
  process.stderr.write("QMD Warning: no GPU acceleration, running on CPU (slow). Run 'qmd status' for details.\n");
299
322
  }
@@ -394,6 +417,7 @@ export class LlamaCpp {
394
417
  for (let i = 0; i < n; i++) {
395
418
  try {
396
419
  this.embedContexts.push(await model.createEmbeddingContext({
420
+ contextSize: LlamaCpp.EMBED_CONTEXT_SIZE,
397
421
  ...(threads > 0 ? { threads } : {}),
398
422
  }));
399
423
  }
@@ -484,9 +508,20 @@ export class LlamaCpp {
484
508
  * - Combined: drops from 11.6 GB (auto, no flash) to 568 MB per context (20×)
485
509
  */
486
510
  // Qwen3 reranker template adds ~200 tokens overhead (system prompt, tags, etc.)
487
- // Chunks are max 800 tokens, so 800 + 200 + query ≈ 1100 tokens typical.
488
- // Use 2048 for safety margin. Still 17× less than auto (40960).
489
- static RERANK_CONTEXT_SIZE = 2048;
511
+ // Default 2048 was too small for longer documents (e.g. session transcripts,
512
+ // CJK text, or large markdown files) callers hit "input lengths exceed
513
+ // context size" errors even after truncation because the overhead estimate
514
+ // was insufficient. 4096 comfortably fits the largest real-world chunks
515
+ // while staying well below the 40 960-token auto size.
516
+ // Override with QMD_RERANK_CONTEXT_SIZE env var if you need more headroom.
517
+ static RERANK_CONTEXT_SIZE = (() => {
518
+ const v = parseInt(process.env.QMD_RERANK_CONTEXT_SIZE ?? "", 10);
519
+ return Number.isFinite(v) && v > 0 ? v : 4096;
520
+ })();
521
+ static EMBED_CONTEXT_SIZE = (() => {
522
+ const v = parseInt(process.env.QMD_EMBED_CONTEXT_SIZE ?? "", 10);
523
+ return Number.isFinite(v) && v > 0 ? v : 2048;
524
+ })();
490
525
  async ensureRerankContexts() {
491
526
  if (this.rerankContexts.length === 0) {
492
527
  const model = await this.ensureRerankModel();
@@ -555,15 +590,41 @@ export class LlamaCpp {
555
590
  // ==========================================================================
556
591
  // Core API methods
557
592
  // ==========================================================================
593
+ /**
594
+ * Truncate text to fit within the embedding model's context window.
595
+ * Uses the model's own tokenizer for accurate token counting, then
596
+ * detokenizes back to text if truncation is needed.
597
+ * Returns the (possibly truncated) text and whether truncation occurred.
598
+ */
599
+ async truncateToContextSize(text) {
600
+ if (!this.embedModel)
601
+ return { text, truncated: false };
602
+ const maxTokens = this.embedModel.trainContextSize;
603
+ if (maxTokens <= 0)
604
+ return { text, truncated: false };
605
+ const tokens = this.embedModel.tokenize(text);
606
+ if (tokens.length <= maxTokens)
607
+ return { text, truncated: false };
608
+ // Leave a small margin (4 tokens) for BOS/EOS overhead
609
+ const safeLimit = Math.max(1, maxTokens - 4);
610
+ const truncatedTokens = tokens.slice(0, safeLimit);
611
+ const truncatedText = this.embedModel.detokenize(truncatedTokens);
612
+ return { text: truncatedText, truncated: true };
613
+ }
558
614
  async embed(text, options = {}) {
559
615
  // Ping activity at start to keep models alive during this operation
560
616
  this.touchActivity();
561
617
  try {
562
618
  const context = await this.ensureEmbedContext();
563
- const embedding = await context.getEmbeddingFor(text);
619
+ // Guard: truncate text that exceeds model context window to prevent GGML crash
620
+ const { text: safeText, truncated } = await this.truncateToContextSize(text);
621
+ if (truncated) {
622
+ console.warn(`⚠ Text truncated to fit embedding context (${this.embedModel?.trainContextSize} tokens)`);
623
+ }
624
+ const embedding = await context.getEmbeddingFor(safeText);
564
625
  return {
565
626
  embedding: Array.from(embedding.vector),
566
- model: this.embedModelUri,
627
+ model: options.model ?? this.embedModelUri,
567
628
  };
568
629
  }
569
630
  catch (error) {
@@ -575,7 +636,7 @@ export class LlamaCpp {
575
636
  * Batch embed multiple texts efficiently
576
637
  * Uses Promise.all for parallel embedding - node-llama-cpp handles batching internally
577
638
  */
578
- async embedBatch(texts) {
639
+ async embedBatch(texts, options = {}) {
579
640
  if (this._ciMode)
580
641
  throw new Error("LLM operations are disabled in CI (set CI=true)");
581
642
  // Ping activity at start to keep models alive during this operation
@@ -591,9 +652,13 @@ export class LlamaCpp {
591
652
  const embeddings = [];
592
653
  for (const text of texts) {
593
654
  try {
594
- const embedding = await context.getEmbeddingFor(text);
655
+ const { text: safeText, truncated } = await this.truncateToContextSize(text);
656
+ if (truncated) {
657
+ console.warn(`⚠ Batch text truncated to fit embedding context (${this.embedModel?.trainContextSize} tokens)`);
658
+ }
659
+ const embedding = await context.getEmbeddingFor(safeText);
595
660
  this.touchActivity();
596
- embeddings.push({ embedding: Array.from(embedding.vector), model: this.embedModelUri });
661
+ embeddings.push({ embedding: Array.from(embedding.vector), model: options.model ?? this.embedModelUri });
597
662
  }
598
663
  catch (err) {
599
664
  console.error("Embedding error for text:", err);
@@ -610,9 +675,13 @@ export class LlamaCpp {
610
675
  const results = [];
611
676
  for (const text of chunk) {
612
677
  try {
613
- const embedding = await ctx.getEmbeddingFor(text);
678
+ const { text: safeText, truncated } = await this.truncateToContextSize(text);
679
+ if (truncated) {
680
+ console.warn(`⚠ Batch text truncated to fit embedding context (${this.embedModel?.trainContextSize} tokens)`);
681
+ }
682
+ const embedding = await ctx.getEmbeddingFor(safeText);
614
683
  this.touchActivity();
615
- results.push({ embedding: Array.from(embedding.vector), model: this.embedModelUri });
684
+ results.push({ embedding: Array.from(embedding.vector), model: options.model ?? this.embedModelUri });
616
685
  }
617
686
  catch (err) {
618
687
  console.error("Embedding error for text:", err);
@@ -767,8 +836,10 @@ export class LlamaCpp {
767
836
  await genContext.dispose();
768
837
  }
769
838
  }
770
- // Qwen3 reranker chat template overhead (system prompt, tags, separators)
771
- static RERANK_TEMPLATE_OVERHEAD = 200;
839
+ // Qwen3 reranker chat template overhead (system prompt, tags, separators).
840
+ // Measured at ~350 tokens on real queries; use 512 as a safe upper bound so
841
+ // the truncation budget never lets a document slip past the context limit.
842
+ static RERANK_TEMPLATE_OVERHEAD = 512;
772
843
  static RERANK_TARGET_DOCS_PER_CONTEXT = 10;
773
844
  async rerank(query, documents, options = {}) {
774
845
  if (this._ciMode)
@@ -1028,8 +1099,8 @@ class LLMSession {
1028
1099
  async embed(text, options) {
1029
1100
  return this.withOperation(() => this.manager.getLlamaCpp().embed(text, options));
1030
1101
  }
1031
- async embedBatch(texts) {
1032
- return this.withOperation(() => this.manager.getLlamaCpp().embedBatch(texts));
1102
+ async embedBatch(texts, options) {
1103
+ return this.withOperation(() => this.manager.getLlamaCpp().embedBatch(texts, options));
1033
1104
  }
1034
1105
  async expandQuery(query, options) {
1035
1106
  return this.withOperation(() => this.manager.getLlamaCpp().expandQuery(query, options));
@@ -1106,8 +1177,7 @@ let defaultLlamaCpp = null;
1106
1177
  */
1107
1178
  export function getDefaultLlamaCpp() {
1108
1179
  if (!defaultLlamaCpp) {
1109
- const embedModel = process.env.QMD_EMBED_MODEL;
1110
- defaultLlamaCpp = new LlamaCpp(embedModel ? { embedModel } : {});
1180
+ defaultLlamaCpp = new LlamaCpp();
1111
1181
  }
1112
1182
  return defaultLlamaCpp;
1113
1183
  }
@@ -8,13 +8,17 @@
8
8
  */
9
9
  import { createServer } from "node:http";
10
10
  import { randomUUID } from "node:crypto";
11
+ import { readFileSync } from "node:fs";
12
+ import { join, dirname } from "node:path";
11
13
  import { fileURLToPath } from "url";
12
14
  import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
13
15
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
14
16
  import { WebStandardStreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js";
15
17
  import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js";
16
18
  import { z } from "zod";
19
+ import { existsSync } from "fs";
17
20
  import { createStore, extractSnippet, addLineNumbers, getDefaultDbPath, DEFAULT_MULTI_GET_MAX_BYTES, } from "../index.js";
21
+ import { getConfigPath } from "../collections.js";
18
22
  // =============================================================================
19
23
  // Helper functions
20
24
  // =============================================================================
@@ -39,6 +43,16 @@ function formatSearchSummary(results, query) {
39
43
  }
40
44
  return lines.join('\n');
41
45
  }
46
+ function getPackageVersion() {
47
+ try {
48
+ const pkgPath = join(dirname(fileURLToPath(import.meta.url)), "../../package.json");
49
+ const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
50
+ return pkg.version ?? "unknown";
51
+ }
52
+ catch {
53
+ return "unknown";
54
+ }
55
+ }
42
56
  // =============================================================================
43
57
  // MCP Server
44
58
  // =============================================================================
@@ -108,7 +122,7 @@ async function buildInstructions(store) {
108
122
  * Shared by both stdio and HTTP transports.
109
123
  */
110
124
  async function createMcpServer(store) {
111
- const server = new McpServer({ name: "qmd", version: "0.9.9" }, { instructions: await buildInstructions(store) });
125
+ const server = new McpServer({ name: "qmd", version: getPackageVersion() }, { instructions: await buildInstructions(store) });
112
126
  // Pre-fetch default collection names for search tools
113
127
  const defaultCollectionNames = await store.getDefaultCollectionNames();
114
128
  // ---------------------------------------------------------------------------
@@ -218,8 +232,9 @@ Intent-aware lex (C++ performance, not sports):
218
232
  candidateLimit: z.number().optional().describe("Maximum candidates to rerank (default: 40, lower = faster but may miss results)"),
219
233
  collections: z.array(z.string()).optional().describe("Filter to collections (OR match)"),
220
234
  intent: z.string().optional().describe("Background context to disambiguate the query. Example: query='performance', intent='web page load times and Core Web Vitals'. Does not search on its own."),
235
+ rerank: z.boolean().optional().default(true).describe("Rerank results using LLM (default: true). Set to false for faster results on CPU-only machines."),
221
236
  },
222
- }, async ({ searches, limit, minScore, candidateLimit, collections, intent }) => {
237
+ }, async ({ searches, limit, minScore, candidateLimit, collections, intent, rerank }) => {
223
238
  // Map to internal format
224
239
  const queries = searches.map(s => ({
225
240
  type: s.type,
@@ -232,6 +247,7 @@ Intent-aware lex (C++ performance, not sports):
232
247
  collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
233
248
  limit,
234
249
  minScore,
250
+ rerank,
235
251
  intent,
236
252
  });
237
253
  // Use first lex or vec query for snippet extraction
@@ -387,7 +403,7 @@ Intent-aware lex (C++ performance, not sports):
387
403
  ` Collections: ${status.collections.length}`,
388
404
  ];
389
405
  for (const col of status.collections) {
390
- summary.push(` - ${col.path} (${col.documents} docs)`);
406
+ summary.push(` - ${col.name}: ${col.path} (${col.documents} docs)`);
391
407
  }
392
408
  return {
393
409
  content: [{ type: "text", text: summary.join('\n') }],
@@ -400,7 +416,11 @@ Intent-aware lex (C++ performance, not sports):
400
416
  // Transport: stdio (default)
401
417
  // =============================================================================
402
418
  export async function startMcpServer() {
403
- const store = await createStore({ dbPath: getDefaultDbPath() });
419
+ const configPath = getConfigPath();
420
+ const store = await createStore({
421
+ dbPath: getDefaultDbPath(),
422
+ ...(existsSync(configPath) ? { configPath } : {}),
423
+ });
404
424
  const server = await createMcpServer(store);
405
425
  const transport = new StdioServerTransport();
406
426
  await server.connect(transport);
@@ -410,7 +430,11 @@ export async function startMcpServer() {
410
430
  * Binds to localhost only. Returns a handle for shutdown and port discovery.
411
431
  */
412
432
  export async function startMcpHttpServer(port, options) {
413
- const store = await createStore({ dbPath: getDefaultDbPath() });
433
+ const configPath = getConfigPath();
434
+ const store = await createStore({
435
+ dbPath: getDefaultDbPath(),
436
+ ...(existsSync(configPath) ? { configPath } : {}),
437
+ });
414
438
  // Pre-fetch default collection names for REST endpoint
415
439
  const defaultCollectionNames = await store.getDefaultCollectionNames();
416
440
  // Session map: each client gets its own McpServer + Transport pair (MCP spec requirement).