@tobilu/qmd 2.0.1 → 2.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +177 -0
- package/README.md +64 -1
- package/bin/qmd +49 -4
- package/dist/ast.d.ts +65 -0
- package/dist/ast.js +334 -0
- package/dist/bench/bench.d.ts +23 -0
- package/dist/bench/bench.js +280 -0
- package/dist/bench/score.d.ts +33 -0
- package/dist/bench/score.js +88 -0
- package/dist/bench/types.d.ts +80 -0
- package/dist/bench/types.js +8 -0
- package/dist/cli/formatter.js +5 -1
- package/dist/cli/qmd.d.ts +27 -0
- package/dist/cli/qmd.js +1328 -115
- package/dist/collections.d.ts +20 -0
- package/dist/collections.js +32 -7
- package/dist/db.d.ts +14 -3
- package/dist/db.js +45 -4
- package/dist/index.d.ts +11 -1
- package/dist/index.js +18 -5
- package/dist/llm.d.ts +77 -6
- package/dist/llm.js +445 -62
- package/dist/mcp/server.d.ts +6 -3
- package/dist/mcp/server.js +68 -29
- package/dist/paths.d.ts +1 -0
- package/dist/paths.js +4 -0
- package/dist/store.d.ts +148 -23
- package/dist/store.js +1018 -255
- package/package.json +48 -20
- package/scripts/build.mjs +29 -0
- package/scripts/check-package-grammars.mjs +29 -0
- package/scripts/package-smoke.mjs +65 -0
- package/scripts/test-all.mjs +27 -0
- package/skills/qmd/SKILL.md +203 -0
- package/skills/qmd/references/mcp-setup.md +102 -0
- package/skills/release/SKILL.md +139 -0
- package/skills/release/scripts/install-hooks.sh +38 -0
- package/dist/embedded-skills.d.ts +0 -6
- package/dist/embedded-skills.js +0 -14
package/dist/collections.d.ts
CHANGED
|
@@ -21,12 +21,23 @@ export interface Collection {
|
|
|
21
21
|
update?: string;
|
|
22
22
|
includeByDefault?: boolean;
|
|
23
23
|
}
|
|
24
|
+
/**
|
|
25
|
+
* Model configuration for embedding, reranking, and generation
|
|
26
|
+
*/
|
|
27
|
+
export interface ModelsConfig {
|
|
28
|
+
embed?: string;
|
|
29
|
+
rerank?: string;
|
|
30
|
+
generate?: string;
|
|
31
|
+
}
|
|
24
32
|
/**
|
|
25
33
|
* The complete configuration file structure
|
|
26
34
|
*/
|
|
27
35
|
export interface CollectionConfig {
|
|
28
36
|
global_context?: string;
|
|
37
|
+
editor_uri?: string;
|
|
38
|
+
editor_uri_template?: string;
|
|
29
39
|
collections: Record<string, Collection>;
|
|
40
|
+
models?: ModelsConfig;
|
|
30
41
|
}
|
|
31
42
|
/**
|
|
32
43
|
* Collection with its name (for return values)
|
|
@@ -49,6 +60,15 @@ export declare function setConfigSource(source?: {
|
|
|
49
60
|
* Config file will be ~/.config/qmd/{indexName}.yml
|
|
50
61
|
*/
|
|
51
62
|
export declare function setConfigIndexName(name: string): void;
|
|
63
|
+
/**
|
|
64
|
+
* Find a project-local QMD config by walking upward from startDir.
|
|
65
|
+
* The local config lives at .qmd/index.yaml or .qmd/index.yml and,
|
|
66
|
+
* when used by the CLI, keeps both config and index DB writes inside
|
|
67
|
+
* the project instead of the global ~/.config / ~/.cache locations.
|
|
68
|
+
*/
|
|
69
|
+
export declare function findLocalConfigPath(startDir?: string): string | undefined;
|
|
70
|
+
/** Return the local SQLite index path paired with a local .qmd/index.yaml file. */
|
|
71
|
+
export declare function getLocalDbPath(configPath: string): string;
|
|
52
72
|
/**
|
|
53
73
|
* Load configuration from the configured source.
|
|
54
74
|
* - Inline config: returns the in-memory object directly
|
package/dist/collections.js
CHANGED
|
@@ -5,8 +5,8 @@
|
|
|
5
5
|
* Collections define which directories to index and their associated contexts.
|
|
6
6
|
*/
|
|
7
7
|
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
8
|
-
import { join, dirname } from "path";
|
|
9
|
-
import {
|
|
8
|
+
import { join, dirname, resolve } from "path";
|
|
9
|
+
import { qmdHomedir } from "./paths.js";
|
|
10
10
|
import YAML from "yaml";
|
|
11
11
|
// ============================================================================
|
|
12
12
|
// Configuration paths
|
|
@@ -47,9 +47,7 @@ export function setConfigSource(source) {
|
|
|
47
47
|
export function setConfigIndexName(name) {
|
|
48
48
|
// Resolve relative paths to absolute paths and sanitize for use as filename
|
|
49
49
|
if (name.includes('/')) {
|
|
50
|
-
const
|
|
51
|
-
const { cwd } = require('process');
|
|
52
|
-
const absolutePath = resolve(cwd(), name);
|
|
50
|
+
const absolutePath = resolve(process.cwd(), name);
|
|
53
51
|
// Replace path separators with underscores to create a valid filename
|
|
54
52
|
currentIndexName = absolutePath.replace(/\//g, '_').replace(/^_/, '');
|
|
55
53
|
}
|
|
@@ -66,11 +64,37 @@ function getConfigDir() {
|
|
|
66
64
|
if (process.env.XDG_CONFIG_HOME) {
|
|
67
65
|
return join(process.env.XDG_CONFIG_HOME, "qmd");
|
|
68
66
|
}
|
|
69
|
-
return join(
|
|
67
|
+
return join(qmdHomedir(), ".config", "qmd");
|
|
70
68
|
}
|
|
71
69
|
function getConfigFilePath() {
|
|
72
70
|
return join(getConfigDir(), `${currentIndexName}.yml`);
|
|
73
71
|
}
|
|
72
|
+
/**
|
|
73
|
+
* Find a project-local QMD config by walking upward from startDir.
|
|
74
|
+
* The local config lives at .qmd/index.yaml or .qmd/index.yml and,
|
|
75
|
+
* when used by the CLI, keeps both config and index DB writes inside
|
|
76
|
+
* the project instead of the global ~/.config / ~/.cache locations.
|
|
77
|
+
*/
|
|
78
|
+
export function findLocalConfigPath(startDir = process.cwd()) {
|
|
79
|
+
let dir = resolve(startDir);
|
|
80
|
+
while (true) {
|
|
81
|
+
const qmdDir = join(dir, ".qmd");
|
|
82
|
+
const yamlPath = join(qmdDir, "index.yaml");
|
|
83
|
+
if (existsSync(yamlPath))
|
|
84
|
+
return yamlPath;
|
|
85
|
+
const ymlPath = join(qmdDir, "index.yml");
|
|
86
|
+
if (existsSync(ymlPath))
|
|
87
|
+
return ymlPath;
|
|
88
|
+
const parent = dirname(dir);
|
|
89
|
+
if (parent === dir)
|
|
90
|
+
return undefined;
|
|
91
|
+
dir = parent;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
/** Return the local SQLite index path paired with a local .qmd/index.yaml file. */
|
|
95
|
+
export function getLocalDbPath(configPath) {
|
|
96
|
+
return join(dirname(configPath), "index.sqlite");
|
|
97
|
+
}
|
|
74
98
|
/**
|
|
75
99
|
* Ensure config directory exists
|
|
76
100
|
*/
|
|
@@ -101,7 +125,8 @@ export function loadConfig() {
|
|
|
101
125
|
}
|
|
102
126
|
try {
|
|
103
127
|
const content = readFileSync(configPath, "utf-8");
|
|
104
|
-
const
|
|
128
|
+
const parsed = YAML.parse(content);
|
|
129
|
+
const config = parsed ?? { collections: {} };
|
|
105
130
|
// Ensure collections object exists
|
|
106
131
|
if (!config.collections) {
|
|
107
132
|
config.collections = {};
|
package/dist/db.d.ts
CHANGED
|
@@ -4,8 +4,15 @@
|
|
|
4
4
|
* Provides a unified Database export that works under both Bun (bun:sqlite)
|
|
5
5
|
* and Node.js (better-sqlite3). The APIs are nearly identical — the main
|
|
6
6
|
* difference is the import path.
|
|
7
|
+
*
|
|
8
|
+
* On macOS, Apple's system SQLite is compiled with SQLITE_OMIT_LOAD_EXTENSION,
|
|
9
|
+
* which prevents loading native extensions like sqlite-vec. When running under
|
|
10
|
+
* Bun we call Database.setCustomSQLite() to swap in Homebrew's full-featured
|
|
11
|
+
* SQLite build before creating any database instances.
|
|
7
12
|
*/
|
|
8
13
|
export declare const isBun: boolean;
|
|
14
|
+
export type SQLiteValue = string | number | bigint | Buffer | Uint8Array | Float32Array | null;
|
|
15
|
+
export type SQLiteParams = readonly SQLiteValue[];
|
|
9
16
|
/**
|
|
10
17
|
* Open a SQLite database. Works with both bun:sqlite and better-sqlite3.
|
|
11
18
|
*/
|
|
@@ -17,17 +24,21 @@ export interface Database {
|
|
|
17
24
|
exec(sql: string): void;
|
|
18
25
|
prepare(sql: string): Statement;
|
|
19
26
|
loadExtension(path: string): void;
|
|
27
|
+
transaction<T extends (...args: SQLiteValue[]) => unknown>(fn: T): T;
|
|
20
28
|
close(): void;
|
|
21
29
|
}
|
|
22
30
|
export interface Statement {
|
|
23
|
-
run(...params:
|
|
31
|
+
run(...params: SQLiteValue[]): {
|
|
24
32
|
changes: number;
|
|
25
33
|
lastInsertRowid: number | bigint;
|
|
26
34
|
};
|
|
27
|
-
get(...params:
|
|
28
|
-
all(...params:
|
|
35
|
+
get<T = unknown>(...params: SQLiteValue[]): T | undefined;
|
|
36
|
+
all<T = unknown>(...params: SQLiteValue[]): T[];
|
|
29
37
|
}
|
|
30
38
|
/**
|
|
31
39
|
* Load the sqlite-vec extension into a database.
|
|
40
|
+
*
|
|
41
|
+
* Throws with platform-specific fix instructions when the extension is
|
|
42
|
+
* unavailable.
|
|
32
43
|
*/
|
|
33
44
|
export declare function loadSqliteVec(db: Database): void;
|
package/dist/db.js
CHANGED
|
@@ -4,16 +4,47 @@
|
|
|
4
4
|
* Provides a unified Database export that works under both Bun (bun:sqlite)
|
|
5
5
|
* and Node.js (better-sqlite3). The APIs are nearly identical — the main
|
|
6
6
|
* difference is the import path.
|
|
7
|
+
*
|
|
8
|
+
* On macOS, Apple's system SQLite is compiled with SQLITE_OMIT_LOAD_EXTENSION,
|
|
9
|
+
* which prevents loading native extensions like sqlite-vec. When running under
|
|
10
|
+
* Bun we call Database.setCustomSQLite() to swap in Homebrew's full-featured
|
|
11
|
+
* SQLite build before creating any database instances.
|
|
7
12
|
*/
|
|
8
|
-
export const isBun =
|
|
13
|
+
export const isBun = "Bun" in globalThis;
|
|
9
14
|
let _Database;
|
|
10
15
|
let _sqliteVecLoad;
|
|
11
16
|
if (isBun) {
|
|
12
17
|
// Dynamic string prevents tsc from resolving bun:sqlite on Node.js builds
|
|
13
18
|
const bunSqlite = "bun:" + "sqlite";
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
19
|
+
const BunDatabase = (await import(/* @vite-ignore */ bunSqlite)).Database;
|
|
20
|
+
// See: https://bun.com/docs/runtime/sqlite#setcustomsqlite
|
|
21
|
+
if (process.platform === "darwin") {
|
|
22
|
+
const homebrewPaths = [
|
|
23
|
+
"/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib", // Apple Silicon
|
|
24
|
+
"/usr/local/opt/sqlite/lib/libsqlite3.dylib", // Intel
|
|
25
|
+
];
|
|
26
|
+
for (const p of homebrewPaths) {
|
|
27
|
+
try {
|
|
28
|
+
BunDatabase.setCustomSQLite(p);
|
|
29
|
+
break;
|
|
30
|
+
}
|
|
31
|
+
catch { }
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
_Database = BunDatabase;
|
|
35
|
+
// setCustomSQLite may have silently failed — test that extensions actually work.
|
|
36
|
+
try {
|
|
37
|
+
const { getLoadablePath } = await import("sqlite-vec");
|
|
38
|
+
const vecPath = getLoadablePath();
|
|
39
|
+
const testDb = new BunDatabase(":memory:");
|
|
40
|
+
testDb.loadExtension(vecPath);
|
|
41
|
+
testDb.close();
|
|
42
|
+
_sqliteVecLoad = (db) => db.loadExtension(vecPath);
|
|
43
|
+
}
|
|
44
|
+
catch {
|
|
45
|
+
// Vector search won't work, but BM25 and other operations are unaffected.
|
|
46
|
+
_sqliteVecLoad = null;
|
|
47
|
+
}
|
|
17
48
|
}
|
|
18
49
|
else {
|
|
19
50
|
_Database = (await import("better-sqlite3")).default;
|
|
@@ -28,7 +59,17 @@ export function openDatabase(path) {
|
|
|
28
59
|
}
|
|
29
60
|
/**
|
|
30
61
|
* Load the sqlite-vec extension into a database.
|
|
62
|
+
*
|
|
63
|
+
* Throws with platform-specific fix instructions when the extension is
|
|
64
|
+
* unavailable.
|
|
31
65
|
*/
|
|
32
66
|
export function loadSqliteVec(db) {
|
|
67
|
+
if (!_sqliteVecLoad) {
|
|
68
|
+
const hint = isBun && process.platform === "darwin"
|
|
69
|
+
? "On macOS with Bun, install Homebrew SQLite: brew install sqlite\n" +
|
|
70
|
+
"Or install qmd with npm instead: npm install -g @tobilu/qmd"
|
|
71
|
+
: "Ensure the sqlite-vec native module is installed correctly.";
|
|
72
|
+
throw new Error(`sqlite-vec extension is unavailable. ${hint}`);
|
|
73
|
+
}
|
|
33
74
|
_sqliteVecLoad(db);
|
|
34
75
|
}
|
package/dist/index.d.ts
CHANGED
|
@@ -16,11 +16,12 @@
|
|
|
16
16
|
* const results = await store.search({ query: "how does auth work?" })
|
|
17
17
|
* await store.close()
|
|
18
18
|
*/
|
|
19
|
-
import { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES, type Store as InternalStore, type DocumentResult, type DocumentNotFound, type SearchResult, type HybridQueryResult, type HybridQueryOptions, type HybridQueryExplain, type ExpandedQuery, type StructuredSearchOptions, type MultiGetResult, type IndexStatus, type IndexHealthInfo, type SearchHooks, type ReindexProgress, type ReindexResult, type EmbedProgress, type EmbedResult } from "./store.js";
|
|
19
|
+
import { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES, type Store as InternalStore, type DocumentResult, type DocumentNotFound, type SearchResult, type HybridQueryResult, type HybridQueryOptions, type HybridQueryExplain, type ExpandedQuery, type StructuredSearchOptions, type MultiGetResult, type IndexStatus, type IndexHealthInfo, type SearchHooks, type ReindexProgress, type ReindexResult, type EmbedProgress, type EmbedResult, type ChunkStrategy } from "./store.js";
|
|
20
20
|
import { type Collection, type CollectionConfig, type NamedCollection, type ContextMap } from "./collections.js";
|
|
21
21
|
export type { DocumentResult, DocumentNotFound, SearchResult, HybridQueryResult, HybridQueryOptions, HybridQueryExplain, ExpandedQuery, StructuredSearchOptions, MultiGetResult, IndexStatus, IndexHealthInfo, SearchHooks, ReindexProgress, ReindexResult, EmbedProgress, EmbedResult, Collection, CollectionConfig, NamedCollection, ContextMap, };
|
|
22
22
|
export type { InternalStore };
|
|
23
23
|
export { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES };
|
|
24
|
+
export type { ChunkStrategy } from "./store.js";
|
|
24
25
|
export { getDefaultDbPath } from "./store.js";
|
|
25
26
|
export { Maintenance } from "./maintenance.js";
|
|
26
27
|
/**
|
|
@@ -61,10 +62,14 @@ export interface SearchOptions {
|
|
|
61
62
|
collections?: string[];
|
|
62
63
|
/** Max results (default: 10) */
|
|
63
64
|
limit?: number;
|
|
65
|
+
/** Max candidates to rerank (default: 40) */
|
|
66
|
+
candidateLimit?: number;
|
|
64
67
|
/** Minimum score threshold */
|
|
65
68
|
minScore?: number;
|
|
66
69
|
/** Include explain traces */
|
|
67
70
|
explain?: boolean;
|
|
71
|
+
/** Chunk strategy: "auto" (default, uses AST for code files) or "regex" (legacy) */
|
|
72
|
+
chunkStrategy?: ChunkStrategy;
|
|
68
73
|
}
|
|
69
74
|
/**
|
|
70
75
|
* Options for searchLex() — BM25 keyword search.
|
|
@@ -183,6 +188,11 @@ export interface QMDStore {
|
|
|
183
188
|
embed(options?: {
|
|
184
189
|
force?: boolean;
|
|
185
190
|
model?: string;
|
|
191
|
+
/** Restrict embedding to documents in one collection. */
|
|
192
|
+
collection?: string;
|
|
193
|
+
maxDocsPerBatch?: number;
|
|
194
|
+
maxBatchBytes?: number;
|
|
195
|
+
chunkStrategy?: ChunkStrategy;
|
|
186
196
|
onProgress?: (info: EmbedProgress) => void;
|
|
187
197
|
}): Promise<EmbedResult>;
|
|
188
198
|
/** Get index status (document counts, collections, embedding state) */
|
package/dist/index.js
CHANGED
|
@@ -16,10 +16,10 @@
|
|
|
16
16
|
* const results = await store.search({ query: "how does auth work?" })
|
|
17
17
|
* await store.close()
|
|
18
18
|
*/
|
|
19
|
-
import { createStore as createStoreInternal, hybridQuery, structuredSearch, extractSnippet, addLineNumbers,
|
|
19
|
+
import { createStore as createStoreInternal, hybridQuery, structuredSearch, extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES, reindexCollection, generateEmbeddings, listCollections as storeListCollections, syncConfigToDb, getStoreCollections, getStoreCollection, getStoreGlobalContext, getStoreContexts, upsertStoreCollection, deleteStoreCollection, renameStoreCollection, updateStoreContext, removeStoreContext, setStoreGlobalContext, vacuumDatabase, cleanupOrphanedContent, cleanupOrphanedVectors, deleteLLMCache, deleteInactiveDocuments, clearAllEmbeddings, } from "./store.js";
|
|
20
20
|
import { LlamaCpp, } from "./llm.js";
|
|
21
21
|
import { setConfigSource, loadConfig, addCollection as collectionsAddCollection, removeCollection as collectionsRemoveCollection, renameCollection as collectionsRenameCollection, addContext as collectionsAddContext, removeContext as collectionsRemoveContext, setGlobalContext as collectionsSetGlobalContext, } from "./collections.js";
|
|
22
|
-
// Re-export utility functions used by frontends
|
|
22
|
+
// Re-export utility functions and types used by frontends
|
|
23
23
|
export { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES };
|
|
24
24
|
// Re-export getDefaultDbPath for CLI/MCP that need the default database location
|
|
25
25
|
export { getDefaultDbPath } from "./store.js";
|
|
@@ -63,21 +63,26 @@ export async function createStore(options) {
|
|
|
63
63
|
// Track whether we have a YAML config path for write-through
|
|
64
64
|
const hasYamlConfig = !!options.configPath;
|
|
65
65
|
// Sync config into SQLite store_collections
|
|
66
|
+
let config;
|
|
66
67
|
if (options.configPath) {
|
|
67
68
|
// YAML mode: inject config source for write-through, sync to DB
|
|
68
69
|
setConfigSource({ configPath: options.configPath });
|
|
69
|
-
|
|
70
|
+
config = loadConfig();
|
|
70
71
|
syncConfigToDb(db, config);
|
|
71
72
|
}
|
|
72
73
|
else if (options.config) {
|
|
73
74
|
// Inline config mode: inject config source for mutations, sync to DB
|
|
74
75
|
setConfigSource({ config: options.config });
|
|
75
|
-
|
|
76
|
+
config = options.config;
|
|
77
|
+
syncConfigToDb(db, config);
|
|
76
78
|
}
|
|
77
79
|
// else: DB-only mode — no external config, use existing store_collections
|
|
78
80
|
// Create a per-store LlamaCpp instance — lazy-loads models on first use,
|
|
79
81
|
// auto-unloads after 5 min inactivity to free VRAM.
|
|
80
82
|
const llm = new LlamaCpp({
|
|
83
|
+
embedModel: config?.models?.embed,
|
|
84
|
+
generateModel: config?.models?.generate,
|
|
85
|
+
rerankModel: config?.models?.rerank,
|
|
81
86
|
inactivityTimeoutMs: 5 * 60 * 1000,
|
|
82
87
|
disposeModelsOnInactivity: true,
|
|
83
88
|
});
|
|
@@ -104,7 +109,9 @@ export async function createStore(options) {
|
|
|
104
109
|
minScore: opts.minScore,
|
|
105
110
|
explain: opts.explain,
|
|
106
111
|
intent: opts.intent,
|
|
112
|
+
candidateLimit: opts.candidateLimit,
|
|
107
113
|
skipRerank,
|
|
114
|
+
chunkStrategy: opts.chunkStrategy,
|
|
108
115
|
});
|
|
109
116
|
}
|
|
110
117
|
// Simple query string — use hybridQuery (expand + search + rerank)
|
|
@@ -114,11 +121,13 @@ export async function createStore(options) {
|
|
|
114
121
|
minScore: opts.minScore,
|
|
115
122
|
explain: opts.explain,
|
|
116
123
|
intent: opts.intent,
|
|
124
|
+
candidateLimit: opts.candidateLimit,
|
|
117
125
|
skipRerank,
|
|
126
|
+
chunkStrategy: opts.chunkStrategy,
|
|
118
127
|
});
|
|
119
128
|
},
|
|
120
129
|
searchLex: async (q, opts) => internal.searchFTS(q, opts?.limit, opts?.collection),
|
|
121
|
-
searchVector: async (q, opts) => internal.searchVec(q,
|
|
130
|
+
searchVector: async (q, opts) => internal.searchVec(q, llm.embedModelName, opts?.limit, opts?.collection),
|
|
122
131
|
expandQuery: async (q, opts) => internal.expandQuery(q, undefined, opts?.intent),
|
|
123
132
|
get: async (pathOrDocid, opts) => internal.findDocument(pathOrDocid, opts),
|
|
124
133
|
getDocumentBody: async (pathOrDocid, opts) => {
|
|
@@ -210,6 +219,10 @@ export async function createStore(options) {
|
|
|
210
219
|
return generateEmbeddings(internal, {
|
|
211
220
|
force: embedOpts?.force,
|
|
212
221
|
model: embedOpts?.model,
|
|
222
|
+
collection: embedOpts?.collection,
|
|
223
|
+
maxDocsPerBatch: embedOpts?.maxDocsPerBatch,
|
|
224
|
+
maxBatchBytes: embedOpts?.maxBatchBytes,
|
|
225
|
+
chunkStrategy: embedOpts?.chunkStrategy,
|
|
213
226
|
onProgress: embedOpts?.onProgress,
|
|
214
227
|
});
|
|
215
228
|
},
|
package/dist/llm.d.ts
CHANGED
|
@@ -3,7 +3,27 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Provides embeddings, text generation, and reranking using local GGUF models.
|
|
5
5
|
*/
|
|
6
|
-
import {
|
|
6
|
+
import type { Llama, Token as LlamaToken } from "node-llama-cpp";
|
|
7
|
+
type NodeLlamaCppModule = {
|
|
8
|
+
getLlama: (options: Record<string, unknown>) => Promise<Llama>;
|
|
9
|
+
getLlamaGpuTypes?: (include?: "supported" | "allValid") => Promise<LlamaGpuMode[]>;
|
|
10
|
+
resolveModelFile: (model: string, cacheDir: string) => Promise<string>;
|
|
11
|
+
LlamaChatSession: new (options: {
|
|
12
|
+
contextSequence: unknown;
|
|
13
|
+
}) => {
|
|
14
|
+
prompt: (prompt: string, options?: Record<string, unknown>) => Promise<string>;
|
|
15
|
+
};
|
|
16
|
+
LlamaLogLevel: {
|
|
17
|
+
error: unknown;
|
|
18
|
+
};
|
|
19
|
+
};
|
|
20
|
+
export declare function setNodeLlamaCppModuleForTest(module: NodeLlamaCppModule | null): void;
|
|
21
|
+
/**
|
|
22
|
+
* Some node-llama-cpp native build/probe paths write library noise to stdout.
|
|
23
|
+
* JSON APIs must reserve stdout for machine-readable payloads, so route that
|
|
24
|
+
* noise to stderr while native llama initialization is in progress.
|
|
25
|
+
*/
|
|
26
|
+
export declare function withNativeStdoutRedirectedToStderr<T>(fn: () => Promise<T>): Promise<T>;
|
|
7
27
|
/**
|
|
8
28
|
* Detect if a model URI uses the Qwen3-Embedding format.
|
|
9
29
|
* Qwen3-Embedding uses a different prompting style than nomic/embeddinggemma.
|
|
@@ -105,7 +125,7 @@ export type LLMSessionOptions = {
|
|
|
105
125
|
*/
|
|
106
126
|
export interface ILLMSession {
|
|
107
127
|
embed(text: string, options?: EmbedOptions): Promise<EmbeddingResult | null>;
|
|
108
|
-
embedBatch(texts: string[]): Promise<(EmbeddingResult | null)[]>;
|
|
128
|
+
embedBatch(texts: string[], options?: EmbedOptions): Promise<(EmbeddingResult | null)[]>;
|
|
109
129
|
expandQuery(query: string, options?: {
|
|
110
130
|
context?: string;
|
|
111
131
|
includeLexical?: boolean;
|
|
@@ -137,9 +157,18 @@ export type RerankDocument = {
|
|
|
137
157
|
};
|
|
138
158
|
export declare const LFM2_GENERATE_MODEL = "hf:LiquidAI/LFM2-1.2B-GGUF/LFM2-1.2B-Q4_K_M.gguf";
|
|
139
159
|
export declare const LFM2_INSTRUCT_MODEL = "hf:LiquidAI/LFM2.5-1.2B-Instruct-GGUF/LFM2.5-1.2B-Instruct-Q4_K_M.gguf";
|
|
140
|
-
export declare const DEFAULT_EMBED_MODEL_URI:
|
|
160
|
+
export declare const DEFAULT_EMBED_MODEL_URI = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
|
|
141
161
|
export declare const DEFAULT_RERANK_MODEL_URI = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf";
|
|
142
162
|
export declare const DEFAULT_GENERATE_MODEL_URI = "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf";
|
|
163
|
+
export type ModelResolutionConfig = {
|
|
164
|
+
embed?: string;
|
|
165
|
+
generate?: string;
|
|
166
|
+
rerank?: string;
|
|
167
|
+
};
|
|
168
|
+
export declare function resolveEmbedModel(config?: ModelResolutionConfig): string;
|
|
169
|
+
export declare function resolveGenerateModel(config?: ModelResolutionConfig): string;
|
|
170
|
+
export declare function resolveRerankModel(config?: ModelResolutionConfig): string;
|
|
171
|
+
export declare function resolveModels(config?: ModelResolutionConfig): Required<ModelResolutionConfig>;
|
|
143
172
|
export declare const DEFAULT_MODEL_CACHE_DIR: string;
|
|
144
173
|
export type PullResult = {
|
|
145
174
|
model: string;
|
|
@@ -147,6 +176,19 @@ export type PullResult = {
|
|
|
147
176
|
sizeBytes: number;
|
|
148
177
|
refreshed: boolean;
|
|
149
178
|
};
|
|
179
|
+
export type GgufFileInspection = {
|
|
180
|
+
exists: boolean;
|
|
181
|
+
valid: boolean;
|
|
182
|
+
kind: "missing" | "gguf" | "html" | "invalid";
|
|
183
|
+
sizeBytes?: number;
|
|
184
|
+
magic?: string;
|
|
185
|
+
details: string;
|
|
186
|
+
};
|
|
187
|
+
/**
|
|
188
|
+
* Inspect a potential GGUF model file without mutating it.
|
|
189
|
+
* Used by doctor for early diagnostics and by runtime validation before load.
|
|
190
|
+
*/
|
|
191
|
+
export declare function inspectGgufFile(filePath: string): GgufFileInspection;
|
|
150
192
|
export declare function pullModels(models: string[], options?: {
|
|
151
193
|
refresh?: boolean;
|
|
152
194
|
cacheDir?: string;
|
|
@@ -211,6 +253,16 @@ export type LlamaCppConfig = {
|
|
|
211
253
|
*/
|
|
212
254
|
disposeModelsOnInactivity?: boolean;
|
|
213
255
|
};
|
|
256
|
+
export type LlamaGpuMode = "auto" | "metal" | "vulkan" | "cuda" | false;
|
|
257
|
+
type ParallelismOptions = {
|
|
258
|
+
gpu: string | false;
|
|
259
|
+
platform?: NodeJS.Platform;
|
|
260
|
+
computed: number;
|
|
261
|
+
envValue?: string;
|
|
262
|
+
};
|
|
263
|
+
export declare function resolveParallelismOverride(envValue?: string | undefined): number | undefined;
|
|
264
|
+
export declare function resolveSafeParallelism(options: ParallelismOptions): number;
|
|
265
|
+
export declare function resolveLlamaGpuMode(envValue?: string | undefined, forceCpuValue?: string | undefined): LlamaGpuMode;
|
|
214
266
|
export declare class LlamaCpp implements LLM {
|
|
215
267
|
private readonly _ciMode;
|
|
216
268
|
private llama;
|
|
@@ -232,6 +284,9 @@ export declare class LlamaCpp implements LLM {
|
|
|
232
284
|
private disposeModelsOnInactivity;
|
|
233
285
|
private disposed;
|
|
234
286
|
constructor(config?: LlamaCppConfig);
|
|
287
|
+
get embedModelName(): string;
|
|
288
|
+
get generateModelName(): string;
|
|
289
|
+
get rerankModelName(): string;
|
|
235
290
|
/**
|
|
236
291
|
* Reset the inactivity timer. Called after each model operation.
|
|
237
292
|
* When timer fires, models are unloaded to free memory (if no active sessions).
|
|
@@ -256,8 +311,12 @@ export declare class LlamaCpp implements LLM {
|
|
|
256
311
|
* Initialize the llama instance (lazy)
|
|
257
312
|
*/
|
|
258
313
|
private ensureLlama;
|
|
314
|
+
private isCpuOffloadForced;
|
|
315
|
+
private modelLoadOptions;
|
|
259
316
|
/**
|
|
260
|
-
* Resolve a model URI to a local path, downloading if needed
|
|
317
|
+
* Resolve a model URI to a local path, downloading if needed.
|
|
318
|
+
* Validates the downloaded file is actually a GGUF model (not an HTML error page
|
|
319
|
+
* from a proxy or firewall).
|
|
261
320
|
*/
|
|
262
321
|
private resolveModel;
|
|
263
322
|
/**
|
|
@@ -306,6 +365,7 @@ export declare class LlamaCpp implements LLM {
|
|
|
306
365
|
* - Combined: drops from 11.6 GB (auto, no flash) to 568 MB per context (20×)
|
|
307
366
|
*/
|
|
308
367
|
private static readonly RERANK_CONTEXT_SIZE;
|
|
368
|
+
private static readonly EMBED_CONTEXT_SIZE;
|
|
309
369
|
private ensureRerankContexts;
|
|
310
370
|
/**
|
|
311
371
|
* Tokenize text using the embedding model's tokenizer
|
|
@@ -320,12 +380,20 @@ export declare class LlamaCpp implements LLM {
|
|
|
320
380
|
* Detokenize token IDs back to text
|
|
321
381
|
*/
|
|
322
382
|
detokenize(tokens: readonly LlamaToken[]): Promise<string>;
|
|
383
|
+
/**
|
|
384
|
+
* Truncate text to fit within the embedding model's context window.
|
|
385
|
+
* Uses the model's own tokenizer for accurate token counting, then
|
|
386
|
+
* detokenizes back to text if truncation is needed.
|
|
387
|
+
* Returns the (possibly truncated) text and whether truncation occurred.
|
|
388
|
+
*/
|
|
389
|
+
private resolveEmbedTokenLimit;
|
|
390
|
+
private truncateToContextSize;
|
|
323
391
|
embed(text: string, options?: EmbedOptions): Promise<EmbeddingResult | null>;
|
|
324
392
|
/**
|
|
325
393
|
* Batch embed multiple texts efficiently
|
|
326
394
|
* Uses Promise.all for parallel embedding - node-llama-cpp handles batching internally
|
|
327
395
|
*/
|
|
328
|
-
embedBatch(texts: string[]): Promise<(EmbeddingResult | null)[]>;
|
|
396
|
+
embedBatch(texts: string[], options?: EmbedOptions): Promise<(EmbeddingResult | null)[]>;
|
|
329
397
|
generate(prompt: string, options?: GenerateOptions): Promise<GenerateResult | null>;
|
|
330
398
|
modelExists(modelUri: string): Promise<ModelInfo>;
|
|
331
399
|
expandQuery(query: string, options?: {
|
|
@@ -340,7 +408,9 @@ export declare class LlamaCpp implements LLM {
|
|
|
340
408
|
* Get device/GPU info for status display.
|
|
341
409
|
* Initializes llama if not already done.
|
|
342
410
|
*/
|
|
343
|
-
getDeviceInfo(
|
|
411
|
+
getDeviceInfo(options?: {
|
|
412
|
+
allowBuild?: boolean;
|
|
413
|
+
}): Promise<{
|
|
344
414
|
gpu: string | false;
|
|
345
415
|
gpuOffloading: boolean;
|
|
346
416
|
gpuDevices: string[];
|
|
@@ -397,3 +467,4 @@ export declare function setDefaultLlamaCpp(llm: LlamaCpp | null): void;
|
|
|
397
467
|
* Call this before process exit to prevent NAPI crashes.
|
|
398
468
|
*/
|
|
399
469
|
export declare function disposeDefaultLlamaCpp(): Promise<void>;
|
|
470
|
+
export {};
|