@ambicuity/kindx 0.1.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +51 -0
- package/README.md +409 -129
- package/bin/kindx +38 -0
- package/capabilities/kindx/SKILL.md +127 -0
- package/capabilities/kindx/references/mcp-setup.md +102 -0
- package/dist/catalogs.js +57 -16
- package/dist/inference.d.ts +82 -7
- package/dist/inference.js +241 -49
- package/dist/kindx.js +425 -91
- package/dist/migrate.d.ts +2 -0
- package/dist/migrate.js +133 -0
- package/dist/protocol.d.ts +2 -1
- package/dist/protocol.js +110 -6
- package/dist/remote-llm.d.ts +23 -0
- package/dist/remote-llm.js +307 -0
- package/dist/repository.d.ts +18 -1
- package/dist/repository.js +260 -35
- package/dist/watcher.d.ts +29 -0
- package/dist/watcher.js +243 -0
- package/package.json +26 -11
package/bin/kindx
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import child_process from 'child_process';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import fs from 'fs';
|
|
5
|
+
import { fileURLToPath, pathToFileURL } from 'url';
|
|
6
|
+
|
|
7
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
8
|
+
const __dirname = path.dirname(__filename);
|
|
9
|
+
|
|
10
|
+
// Path to the compiled kindx executable
|
|
11
|
+
const binPath = path.join(__dirname, '..', 'dist', 'kindx.js');
|
|
12
|
+
|
|
13
|
+
if (!fs.existsSync(binPath)) {
|
|
14
|
+
console.error("Executable not found at " + binPath);
|
|
15
|
+
console.error("Did you run 'npm run build'?");
|
|
16
|
+
process.exit(1);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// Detect if running under bun. Bun has ABI mismatches with node's better-sqlite3
|
|
20
|
+
if (process.versions && process.versions.bun) {
|
|
21
|
+
console.warn("WARNING: Running KINDX under Bun is known to cause better-sqlite3 ABI crashes.");
|
|
22
|
+
console.warn("Attempting to spawn the process under Node.js instead...");
|
|
23
|
+
|
|
24
|
+
try {
|
|
25
|
+
const result = child_process.spawnSync('node', [binPath, ...process.argv.slice(2)], {
|
|
26
|
+
stdio: 'inherit',
|
|
27
|
+
env: process.env
|
|
28
|
+
});
|
|
29
|
+
process.exit(result.status ?? 0);
|
|
30
|
+
} catch (err) {
|
|
31
|
+
console.error("Failed to spawn node. Please ensure Node.js is installed.");
|
|
32
|
+
process.exit(1);
|
|
33
|
+
}
|
|
34
|
+
} else {
|
|
35
|
+
// If not bun, just import the compiled js directly
|
|
36
|
+
process.argv[1] = binPath;
|
|
37
|
+
import(pathToFileURL(binPath).href);
|
|
38
|
+
}
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: kindx
|
|
3
|
+
description: Search markdown knowledge bases, notes, and documentation using KINDX. Use when users ask to search notes, find documents, or look up information.
|
|
4
|
+
license: MIT
|
|
5
|
+
compatibility: Requires kindx CLI or MCP server. Install via `npm install -g @ambicuity/kindx`.
|
|
6
|
+
metadata:
|
|
7
|
+
author: riteshrana
|
|
8
|
+
version: "2.0.0"
|
|
9
|
+
allowed-tools: Bash(kindx:*), mcp__kindx__*
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
# KINDX - Knowledge INDexer
|
|
13
|
+
|
|
14
|
+
Local search engine for markdown content.
|
|
15
|
+
|
|
16
|
+
## Status
|
|
17
|
+
|
|
18
|
+
!`kindx status 2>/dev/null || echo "Not installed: npm install -g @ambicuity/kindx"`
|
|
19
|
+
|
|
20
|
+
## MCP: `query`
|
|
21
|
+
|
|
22
|
+
```json
|
|
23
|
+
{
|
|
24
|
+
"searches": [
|
|
25
|
+
{ "type": "lex", "query": "CAP theorem consistency" },
|
|
26
|
+
{ "type": "vec", "query": "tradeoff between consistency and availability" }
|
|
27
|
+
],
|
|
28
|
+
"collections": ["docs"],
|
|
29
|
+
"limit": 10
|
|
30
|
+
}
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### Query Types
|
|
34
|
+
|
|
35
|
+
| Type | Method | Input |
|
|
36
|
+
|------|--------|-------|
|
|
37
|
+
| `lex` | BM25 | Keywords — exact terms, names, code |
|
|
38
|
+
| `vec` | Vector | Question — natural language |
|
|
39
|
+
| `hyde` | Vector | Answer — hypothetical result (50-100 words) |
|
|
40
|
+
|
|
41
|
+
### Writing Good Queries
|
|
42
|
+
|
|
43
|
+
**lex (keyword)**
|
|
44
|
+
- 2-5 terms, no filler words
|
|
45
|
+
- Exact phrase: `"connection pool"` (quoted)
|
|
46
|
+
- Exclude terms: `performance -sports` (minus prefix)
|
|
47
|
+
- Code identifiers work: `handleError async`
|
|
48
|
+
|
|
49
|
+
**vec (semantic)**
|
|
50
|
+
- Full natural language question
|
|
51
|
+
- Be specific: `"how does the rate limiter handle burst traffic"`
|
|
52
|
+
- Include context: `"in the payment service, how are refunds processed"`
|
|
53
|
+
|
|
54
|
+
**hyde (hypothetical document)**
|
|
55
|
+
- Write 50-100 words of what the *answer* looks like
|
|
56
|
+
- Use the vocabulary you expect in the result
|
|
57
|
+
|
|
58
|
+
**expand (auto-expand)**
|
|
59
|
+
- Use a single-line query (implicit) or `expand: question` on its own line
|
|
60
|
+
- Lets the local LLM generate lex/vec/hyde variations
|
|
61
|
+
- Do not mix `expand:` with other typed lines — it's either a standalone expand query or a full query document
|
|
62
|
+
|
|
63
|
+
### Combining Types
|
|
64
|
+
|
|
65
|
+
| Goal | Approach |
|
|
66
|
+
|------|----------|
|
|
67
|
+
| Know exact terms | `lex` only |
|
|
68
|
+
| Don't know vocabulary | Use a single-line query (implicit `expand:`) or `vec` |
|
|
69
|
+
| Best recall | `lex` + `vec` |
|
|
70
|
+
| Complex topic | `lex` + `vec` + `hyde` |
|
|
71
|
+
|
|
72
|
+
First query gets 2x weight in fusion — put your best guess first.
|
|
73
|
+
|
|
74
|
+
### Lex Query Syntax
|
|
75
|
+
|
|
76
|
+
| Syntax | Meaning | Example |
|
|
77
|
+
|--------|---------|---------|
|
|
78
|
+
| `term` | Prefix match | `perf` matches "performance" |
|
|
79
|
+
| `"phrase"` | Exact phrase | `"rate limiter"` |
|
|
80
|
+
| `-term` | Exclude | `performance -sports` |
|
|
81
|
+
|
|
82
|
+
Note: `-term` only works in lex queries, not vec/hyde.
|
|
83
|
+
|
|
84
|
+
### Collection Filtering
|
|
85
|
+
|
|
86
|
+
```json
|
|
87
|
+
{ "collections": ["docs"] } // Single
|
|
88
|
+
{ "collections": ["docs", "notes"] } // Multiple (OR)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Omit to search all collections.
|
|
92
|
+
|
|
93
|
+
## Other MCP Tools
|
|
94
|
+
|
|
95
|
+
| Tool | Use |
|
|
96
|
+
|------|-----|
|
|
97
|
+
| `get` | Retrieve doc by path or `#docid` |
|
|
98
|
+
| `multi_get` | Retrieve multiple by glob/list |
|
|
99
|
+
| `status` | Collections and health |
|
|
100
|
+
|
|
101
|
+
## CLI
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
kindx query "question" # Auto-expand + rerank
|
|
105
|
+
kindx query $'lex: X\nvec: Y' # Structured
|
|
106
|
+
kindx query $'expand: question' # Explicit expand
|
|
107
|
+
kindx search "keywords" # BM25 only (no LLM)
|
|
108
|
+
kindx get "#abc123" # By docid
|
|
109
|
+
kindx multi-get "journals/2026-*.md" -l 40 # Batch pull snippets by glob
|
|
110
|
+
kindx multi-get notes/foo.md,notes/bar.md # Comma-separated list, preserves order
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## HTTP API
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
curl -X POST http://localhost:8181/query \
|
|
117
|
+
-H "Content-Type: application/json" \
|
|
118
|
+
-d '{"searches": [{"type": "lex", "query": "test"}]}'
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## Setup
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
npm install -g @ambicuity/kindx
|
|
125
|
+
kindx collection add ~/notes --name notes
|
|
126
|
+
kindx embed
|
|
127
|
+
```
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# KINDX MCP Server Setup
|
|
2
|
+
|
|
3
|
+
## Install
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
npm install -g @ambicuity/kindx
|
|
7
|
+
kindx collection add ~/path/to/markdown --name myknowledge
|
|
8
|
+
kindx embed
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Configure MCP Client
|
|
12
|
+
|
|
13
|
+
**Claude Code** (`~/.claude/settings.json`):
|
|
14
|
+
```json
|
|
15
|
+
{
|
|
16
|
+
"mcpServers": {
|
|
17
|
+
"kindx": { "command": "kindx", "args": ["mcp"] }
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
**Claude Desktop** (`~/Library/Application Support/Claude/claude_desktop_config.json`):
|
|
23
|
+
```json
|
|
24
|
+
{
|
|
25
|
+
"mcpServers": {
|
|
26
|
+
"kindx": { "command": "kindx", "args": ["mcp"] }
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
**OpenClaw** (`~/.openclaw/openclaw.json`):
|
|
32
|
+
```json
|
|
33
|
+
{
|
|
34
|
+
"mcp": {
|
|
35
|
+
"servers": {
|
|
36
|
+
"kindx": { "command": "kindx", "args": ["mcp"] }
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## HTTP Mode
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
kindx mcp --http # Port 8181
|
|
46
|
+
kindx mcp --http --daemon # Background
|
|
47
|
+
kindx mcp stop # Stop daemon
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Tools
|
|
51
|
+
|
|
52
|
+
### structured_search
|
|
53
|
+
|
|
54
|
+
Search with pre-expanded queries.
|
|
55
|
+
|
|
56
|
+
```json
|
|
57
|
+
{
|
|
58
|
+
"searches": [
|
|
59
|
+
{ "type": "lex", "query": "keyword phrases" },
|
|
60
|
+
{ "type": "vec", "query": "natural language question" },
|
|
61
|
+
{ "type": "hyde", "query": "hypothetical answer passage..." }
|
|
62
|
+
],
|
|
63
|
+
"limit": 10,
|
|
64
|
+
"collection": "optional",
|
|
65
|
+
"minScore": 0.0
|
|
66
|
+
}
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
| Type | Method | Input |
|
|
70
|
+
|------|--------|-------|
|
|
71
|
+
| `lex` | BM25 | Keywords (2-5 terms) |
|
|
72
|
+
| `vec` | Vector | Question |
|
|
73
|
+
| `hyde` | Vector | Answer passage (50-100 words) |
|
|
74
|
+
|
|
75
|
+
### get
|
|
76
|
+
|
|
77
|
+
Retrieve document by path or `#docid`.
|
|
78
|
+
|
|
79
|
+
| Param | Type | Description |
|
|
80
|
+
|-------|------|-------------|
|
|
81
|
+
| `path` | string | File path or `#docid` |
|
|
82
|
+
| `full` | bool? | Return full content |
|
|
83
|
+
| `lineNumbers` | bool? | Add line numbers |
|
|
84
|
+
|
|
85
|
+
### multi_get
|
|
86
|
+
|
|
87
|
+
Retrieve multiple documents.
|
|
88
|
+
|
|
89
|
+
| Param | Type | Description |
|
|
90
|
+
|-------|------|-------------|
|
|
91
|
+
| `pattern` | string | Glob or comma-separated list |
|
|
92
|
+
| `maxBytes` | number? | Skip large files (default 10KB) |
|
|
93
|
+
|
|
94
|
+
### status
|
|
95
|
+
|
|
96
|
+
Index health and collections. No params.
|
|
97
|
+
|
|
98
|
+
## Troubleshooting
|
|
99
|
+
|
|
100
|
+
- **Not starting**: `which kindx`, `kindx mcp` manually
|
|
101
|
+
- **No results**: `kindx collection list`, `kindx embed`
|
|
102
|
+
- **Slow first search**: Normal, models loading (~3GB)
|
package/dist/catalogs.js
CHANGED
|
@@ -101,7 +101,13 @@ export function saveConfig(config) {
|
|
|
101
101
|
*/
|
|
102
102
|
export function getCollection(name) {
|
|
103
103
|
const config = loadConfig();
|
|
104
|
-
|
|
104
|
+
let collection;
|
|
105
|
+
if (Array.isArray(config.collections)) {
|
|
106
|
+
collection = config.collections.find((c) => c.name === name);
|
|
107
|
+
}
|
|
108
|
+
else {
|
|
109
|
+
collection = config.collections[name];
|
|
110
|
+
}
|
|
105
111
|
if (!collection) {
|
|
106
112
|
return null;
|
|
107
113
|
}
|
|
@@ -134,7 +140,13 @@ export function getDefaultCollectionNames() {
|
|
|
134
140
|
*/
|
|
135
141
|
export function updateCollectionSettings(name, settings) {
|
|
136
142
|
const config = loadConfig();
|
|
137
|
-
|
|
143
|
+
let collection;
|
|
144
|
+
if (Array.isArray(config.collections)) {
|
|
145
|
+
collection = config.collections.find((c) => c.name === name);
|
|
146
|
+
}
|
|
147
|
+
else {
|
|
148
|
+
collection = config.collections[name];
|
|
149
|
+
}
|
|
138
150
|
if (!collection)
|
|
139
151
|
return false;
|
|
140
152
|
if (settings.update !== undefined) {
|
|
@@ -162,11 +174,22 @@ export function updateCollectionSettings(name, settings) {
|
|
|
162
174
|
*/
|
|
163
175
|
export function addCollection(name, path, pattern = "**/*.md") {
|
|
164
176
|
const config = loadConfig();
|
|
165
|
-
config.collections
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
177
|
+
if (Array.isArray(config.collections)) {
|
|
178
|
+
const existingIdx = config.collections.findIndex((c) => c.name === name);
|
|
179
|
+
if (existingIdx >= 0) {
|
|
180
|
+
config.collections[existingIdx] = { name, path, pattern, context: config.collections[existingIdx].context };
|
|
181
|
+
}
|
|
182
|
+
else {
|
|
183
|
+
config.collections.push({ name, path, pattern });
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
else {
|
|
187
|
+
config.collections[name] = {
|
|
188
|
+
path,
|
|
189
|
+
pattern,
|
|
190
|
+
context: config.collections[name]?.context, // Preserve existing context
|
|
191
|
+
};
|
|
192
|
+
}
|
|
170
193
|
saveConfig(config);
|
|
171
194
|
}
|
|
172
195
|
/**
|
|
@@ -174,10 +197,18 @@ export function addCollection(name, path, pattern = "**/*.md") {
|
|
|
174
197
|
*/
|
|
175
198
|
export function removeCollection(name) {
|
|
176
199
|
const config = loadConfig();
|
|
177
|
-
if (
|
|
178
|
-
|
|
200
|
+
if (Array.isArray(config.collections)) {
|
|
201
|
+
const idx = config.collections.findIndex((c) => c.name === name);
|
|
202
|
+
if (idx === -1)
|
|
203
|
+
return false;
|
|
204
|
+
config.collections.splice(idx, 1);
|
|
205
|
+
}
|
|
206
|
+
else {
|
|
207
|
+
if (!config.collections[name]) {
|
|
208
|
+
return false;
|
|
209
|
+
}
|
|
210
|
+
delete config.collections[name];
|
|
179
211
|
}
|
|
180
|
-
delete config.collections[name];
|
|
181
212
|
saveConfig(config);
|
|
182
213
|
return true;
|
|
183
214
|
}
|
|
@@ -186,14 +217,24 @@ export function removeCollection(name) {
|
|
|
186
217
|
*/
|
|
187
218
|
export function renameCollection(oldName, newName) {
|
|
188
219
|
const config = loadConfig();
|
|
189
|
-
if (
|
|
190
|
-
|
|
220
|
+
if (Array.isArray(config.collections)) {
|
|
221
|
+
if (config.collections.some((c) => c.name === newName))
|
|
222
|
+
throw new Error(`Collection '${newName}' already exists`);
|
|
223
|
+
const idx = config.collections.findIndex((c) => c.name === oldName);
|
|
224
|
+
if (idx === -1)
|
|
225
|
+
return false;
|
|
226
|
+
config.collections[idx].name = newName;
|
|
191
227
|
}
|
|
192
|
-
|
|
193
|
-
|
|
228
|
+
else {
|
|
229
|
+
if (!config.collections[oldName]) {
|
|
230
|
+
return false;
|
|
231
|
+
}
|
|
232
|
+
if (config.collections[newName]) {
|
|
233
|
+
throw new Error(`Collection '${newName}' already exists`);
|
|
234
|
+
}
|
|
235
|
+
config.collections[newName] = config.collections[oldName];
|
|
236
|
+
delete config.collections[oldName];
|
|
194
237
|
}
|
|
195
|
-
config.collections[newName] = config.collections[oldName];
|
|
196
|
-
delete config.collections[oldName];
|
|
197
238
|
saveConfig(config);
|
|
198
239
|
return true;
|
|
199
240
|
}
|
package/dist/inference.d.ts
CHANGED
|
@@ -159,6 +159,10 @@ export interface LLM {
|
|
|
159
159
|
* Get embeddings for text
|
|
160
160
|
*/
|
|
161
161
|
embed(text: string, options?: EmbedOptions): Promise<EmbeddingResult | null>;
|
|
162
|
+
/**
|
|
163
|
+
* Batch get embeddings for text
|
|
164
|
+
*/
|
|
165
|
+
embedBatch(texts: string[]): Promise<(EmbeddingResult | null)[]>;
|
|
162
166
|
/**
|
|
163
167
|
* Generate text completion
|
|
164
168
|
*/
|
|
@@ -180,6 +184,28 @@ export interface LLM {
|
|
|
180
184
|
* Returns list of documents with relevance scores (higher = more relevant)
|
|
181
185
|
*/
|
|
182
186
|
rerank(query: string, documents: RerankDocument[], options?: RerankOptions): Promise<RerankResult>;
|
|
187
|
+
/**
|
|
188
|
+
* Tokenize text into backend-specific tokens (optional, implemented by local models)
|
|
189
|
+
*/
|
|
190
|
+
tokenize?(text: string): Promise<readonly any[]>;
|
|
191
|
+
/**
|
|
192
|
+
* Detokenize token IDs back to text (optional)
|
|
193
|
+
*/
|
|
194
|
+
detokenize?(tokens: readonly any[]): Promise<string>;
|
|
195
|
+
/**
|
|
196
|
+
* Get device and GPU accelerator info (optional)
|
|
197
|
+
*/
|
|
198
|
+
getDeviceInfo?(): Promise<{
|
|
199
|
+
gpu: string | false;
|
|
200
|
+
gpuOffloading: boolean;
|
|
201
|
+
gpuDevices: string[];
|
|
202
|
+
vram?: {
|
|
203
|
+
total: number;
|
|
204
|
+
used: number;
|
|
205
|
+
free: number;
|
|
206
|
+
};
|
|
207
|
+
cpuCores: number;
|
|
208
|
+
}>;
|
|
183
209
|
/**
|
|
184
210
|
* Dispose of resources
|
|
185
211
|
*/
|
|
@@ -216,6 +242,42 @@ export type LlamaCppConfig = {
|
|
|
216
242
|
* memory reclaim.
|
|
217
243
|
*/
|
|
218
244
|
disposeModelsOnInactivity?: boolean;
|
|
245
|
+
/**
|
|
246
|
+
* Force low-VRAM mode on/off.
|
|
247
|
+
* When undefined, KINDX auto-detects low VRAM from free GPU memory.
|
|
248
|
+
* Can also be set via KINDX_LOW_VRAM=1|0.
|
|
249
|
+
*/
|
|
250
|
+
lowVram?: boolean;
|
|
251
|
+
/**
|
|
252
|
+
* Optional VRAM budget in MB. When set, KINDX constrains context sizing and
|
|
253
|
+
* parallelism to fit this budget. Can also be set via KINDX_VRAM_BUDGET_MB.
|
|
254
|
+
*/
|
|
255
|
+
vramBudgetMB?: number;
|
|
256
|
+
/**
|
|
257
|
+
* Free VRAM threshold in MB for auto low-VRAM mode (default: 6144 MB).
|
|
258
|
+
* Can also be set via KINDX_LOW_VRAM_THRESHOLD_MB.
|
|
259
|
+
*/
|
|
260
|
+
lowVramThresholdMB?: number;
|
|
261
|
+
/**
|
|
262
|
+
* Parallelism cap for embedding contexts when low-VRAM mode is active (default: 2).
|
|
263
|
+
* Can also be set via KINDX_LOW_VRAM_EMBED_PARALLELISM.
|
|
264
|
+
*/
|
|
265
|
+
lowVramEmbedParallelism?: number;
|
|
266
|
+
/**
|
|
267
|
+
* Parallelism cap for reranker contexts when low-VRAM mode is active (default: 1).
|
|
268
|
+
* Can also be set via KINDX_LOW_VRAM_RERANK_PARALLELISM.
|
|
269
|
+
*/
|
|
270
|
+
lowVramRerankParallelism?: number;
|
|
271
|
+
/**
|
|
272
|
+
* Expansion context size used when low-VRAM mode is active (default: 1024).
|
|
273
|
+
* Can also be set via KINDX_LOW_VRAM_EXPAND_CONTEXT_SIZE.
|
|
274
|
+
*/
|
|
275
|
+
lowVramExpandContextSize?: number;
|
|
276
|
+
/**
|
|
277
|
+
* Rerank context size used when low-VRAM mode is active (default: 1024).
|
|
278
|
+
* Can also be set via KINDX_LOW_VRAM_RERANK_CONTEXT_SIZE.
|
|
279
|
+
*/
|
|
280
|
+
lowVramRerankContextSize?: number;
|
|
219
281
|
};
|
|
220
282
|
export declare class LlamaCpp implements LLM {
|
|
221
283
|
private llama;
|
|
@@ -230,6 +292,15 @@ export declare class LlamaCpp implements LLM {
|
|
|
230
292
|
private modelCacheDir;
|
|
231
293
|
private rerankContextSize;
|
|
232
294
|
private expandContextSize;
|
|
295
|
+
private lowVramOverride;
|
|
296
|
+
private vramBudgetMB;
|
|
297
|
+
private lowVramThresholdMB;
|
|
298
|
+
private lowVramEmbedParallelism;
|
|
299
|
+
private lowVramRerankParallelism;
|
|
300
|
+
private lowVramExpandContextSize;
|
|
301
|
+
private lowVramRerankContextSize;
|
|
302
|
+
private memoryPolicyPromise;
|
|
303
|
+
private lowVramWarningShown;
|
|
233
304
|
private embedModelLoadPromise;
|
|
234
305
|
private generateModelLoadPromise;
|
|
235
306
|
private rerankModelLoadPromise;
|
|
@@ -270,10 +341,14 @@ export declare class LlamaCpp implements LLM {
|
|
|
270
341
|
* Load embedding model (lazy)
|
|
271
342
|
*/
|
|
272
343
|
private ensureEmbedModel;
|
|
344
|
+
private showLowVramWarning;
|
|
345
|
+
private resolveMemoryPolicy;
|
|
346
|
+
private effectiveExpandContextSize;
|
|
347
|
+
private effectiveRerankContextSize;
|
|
273
348
|
/**
|
|
274
349
|
* Compute how many parallel contexts to create.
|
|
275
350
|
*
|
|
276
|
-
* GPU: constrained by VRAM
|
|
351
|
+
* GPU: constrained by free VRAM / budget and low-VRAM policy caps.
|
|
277
352
|
* CPU: constrained by cores. Splitting threads across contexts enables
|
|
278
353
|
* true parallelism (each context runs on its own cores). Use at most
|
|
279
354
|
* half the math cores, with at least 4 threads per context.
|
|
@@ -384,15 +459,15 @@ export declare function withLLMSession<T>(fn: (session: ILLMSession) => Promise<
|
|
|
384
459
|
*/
|
|
385
460
|
export declare function canUnloadLLM(): boolean;
|
|
386
461
|
/**
|
|
387
|
-
* Get the default
|
|
462
|
+
* Get the default LLM instance (creates one if needed)
|
|
388
463
|
*/
|
|
389
|
-
export declare function
|
|
464
|
+
export declare function getDefaultLLM(): LLM;
|
|
390
465
|
/**
|
|
391
|
-
* Set a custom default
|
|
466
|
+
* Set a custom default LLM instance (useful for testing)
|
|
392
467
|
*/
|
|
393
|
-
export declare function
|
|
468
|
+
export declare function setDefaultLLM(llm: LLM | null): void;
|
|
394
469
|
/**
|
|
395
|
-
* Dispose the default
|
|
470
|
+
* Dispose the default LLM instance if it exists.
|
|
396
471
|
* Call this before process exit to prevent NAPI crashes.
|
|
397
472
|
*/
|
|
398
|
-
export declare function
|
|
473
|
+
export declare function disposeDefaultLLM(): Promise<void>;
|