@gmickel/gno 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +256 -0
- package/assets/skill/SKILL.md +112 -0
- package/assets/skill/cli-reference.md +327 -0
- package/assets/skill/examples.md +234 -0
- package/assets/skill/mcp-reference.md +159 -0
- package/package.json +90 -0
- package/src/app/constants.ts +313 -0
- package/src/cli/colors.ts +65 -0
- package/src/cli/commands/ask.ts +545 -0
- package/src/cli/commands/cleanup.ts +105 -0
- package/src/cli/commands/collection/add.ts +120 -0
- package/src/cli/commands/collection/index.ts +10 -0
- package/src/cli/commands/collection/list.ts +108 -0
- package/src/cli/commands/collection/remove.ts +64 -0
- package/src/cli/commands/collection/rename.ts +95 -0
- package/src/cli/commands/context/add.ts +67 -0
- package/src/cli/commands/context/check.ts +153 -0
- package/src/cli/commands/context/index.ts +10 -0
- package/src/cli/commands/context/list.ts +109 -0
- package/src/cli/commands/context/rm.ts +52 -0
- package/src/cli/commands/doctor.ts +393 -0
- package/src/cli/commands/embed.ts +462 -0
- package/src/cli/commands/get.ts +356 -0
- package/src/cli/commands/index-cmd.ts +119 -0
- package/src/cli/commands/index.ts +102 -0
- package/src/cli/commands/init.ts +328 -0
- package/src/cli/commands/ls.ts +217 -0
- package/src/cli/commands/mcp/config.ts +300 -0
- package/src/cli/commands/mcp/index.ts +24 -0
- package/src/cli/commands/mcp/install.ts +203 -0
- package/src/cli/commands/mcp/paths.ts +470 -0
- package/src/cli/commands/mcp/status.ts +222 -0
- package/src/cli/commands/mcp/uninstall.ts +158 -0
- package/src/cli/commands/mcp.ts +20 -0
- package/src/cli/commands/models/clear.ts +103 -0
- package/src/cli/commands/models/index.ts +32 -0
- package/src/cli/commands/models/list.ts +214 -0
- package/src/cli/commands/models/path.ts +51 -0
- package/src/cli/commands/models/pull.ts +199 -0
- package/src/cli/commands/models/use.ts +85 -0
- package/src/cli/commands/multi-get.ts +400 -0
- package/src/cli/commands/query.ts +220 -0
- package/src/cli/commands/ref-parser.ts +108 -0
- package/src/cli/commands/reset.ts +191 -0
- package/src/cli/commands/search.ts +136 -0
- package/src/cli/commands/shared.ts +156 -0
- package/src/cli/commands/skill/index.ts +19 -0
- package/src/cli/commands/skill/install.ts +197 -0
- package/src/cli/commands/skill/paths-cmd.ts +81 -0
- package/src/cli/commands/skill/paths.ts +191 -0
- package/src/cli/commands/skill/show.ts +73 -0
- package/src/cli/commands/skill/uninstall.ts +141 -0
- package/src/cli/commands/status.ts +205 -0
- package/src/cli/commands/update.ts +68 -0
- package/src/cli/commands/vsearch.ts +188 -0
- package/src/cli/context.ts +64 -0
- package/src/cli/errors.ts +64 -0
- package/src/cli/format/search-results.ts +211 -0
- package/src/cli/options.ts +183 -0
- package/src/cli/program.ts +1330 -0
- package/src/cli/run.ts +213 -0
- package/src/cli/ui.ts +92 -0
- package/src/config/defaults.ts +20 -0
- package/src/config/index.ts +55 -0
- package/src/config/loader.ts +161 -0
- package/src/config/paths.ts +87 -0
- package/src/config/saver.ts +153 -0
- package/src/config/types.ts +280 -0
- package/src/converters/adapters/markitdownTs/adapter.ts +140 -0
- package/src/converters/adapters/officeparser/adapter.ts +126 -0
- package/src/converters/canonicalize.ts +89 -0
- package/src/converters/errors.ts +218 -0
- package/src/converters/index.ts +51 -0
- package/src/converters/mime.ts +163 -0
- package/src/converters/native/markdown.ts +115 -0
- package/src/converters/native/plaintext.ts +56 -0
- package/src/converters/path.ts +48 -0
- package/src/converters/pipeline.ts +159 -0
- package/src/converters/registry.ts +74 -0
- package/src/converters/types.ts +123 -0
- package/src/converters/versions.ts +24 -0
- package/src/index.ts +27 -0
- package/src/ingestion/chunker.ts +238 -0
- package/src/ingestion/index.ts +32 -0
- package/src/ingestion/language.ts +276 -0
- package/src/ingestion/sync.ts +671 -0
- package/src/ingestion/types.ts +219 -0
- package/src/ingestion/walker.ts +235 -0
- package/src/llm/cache.ts +467 -0
- package/src/llm/errors.ts +191 -0
- package/src/llm/index.ts +58 -0
- package/src/llm/nodeLlamaCpp/adapter.ts +133 -0
- package/src/llm/nodeLlamaCpp/embedding.ts +165 -0
- package/src/llm/nodeLlamaCpp/generation.ts +88 -0
- package/src/llm/nodeLlamaCpp/lifecycle.ts +317 -0
- package/src/llm/nodeLlamaCpp/rerank.ts +94 -0
- package/src/llm/registry.ts +86 -0
- package/src/llm/types.ts +129 -0
- package/src/mcp/resources/index.ts +151 -0
- package/src/mcp/server.ts +229 -0
- package/src/mcp/tools/get.ts +220 -0
- package/src/mcp/tools/index.ts +160 -0
- package/src/mcp/tools/multi-get.ts +263 -0
- package/src/mcp/tools/query.ts +226 -0
- package/src/mcp/tools/search.ts +119 -0
- package/src/mcp/tools/status.ts +81 -0
- package/src/mcp/tools/vsearch.ts +198 -0
- package/src/pipeline/chunk-lookup.ts +44 -0
- package/src/pipeline/expansion.ts +256 -0
- package/src/pipeline/explain.ts +115 -0
- package/src/pipeline/fusion.ts +185 -0
- package/src/pipeline/hybrid.ts +535 -0
- package/src/pipeline/index.ts +64 -0
- package/src/pipeline/query-language.ts +118 -0
- package/src/pipeline/rerank.ts +223 -0
- package/src/pipeline/search.ts +261 -0
- package/src/pipeline/types.ts +328 -0
- package/src/pipeline/vsearch.ts +348 -0
- package/src/store/index.ts +41 -0
- package/src/store/migrations/001-initial.ts +196 -0
- package/src/store/migrations/index.ts +20 -0
- package/src/store/migrations/runner.ts +187 -0
- package/src/store/sqlite/adapter.ts +1242 -0
- package/src/store/sqlite/index.ts +7 -0
- package/src/store/sqlite/setup.ts +129 -0
- package/src/store/sqlite/types.ts +28 -0
- package/src/store/types.ts +506 -0
- package/src/store/vector/index.ts +13 -0
- package/src/store/vector/sqlite-vec.ts +373 -0
- package/src/store/vector/stats.ts +152 -0
- package/src/store/vector/types.ts +115 -0
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Main LLM adapter for node-llama-cpp.
|
|
3
|
+
* Factory for creating port instances.
|
|
4
|
+
*
|
|
5
|
+
* @module src/llm/nodeLlamaCpp/adapter
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { Config } from '../../config/types';
|
|
9
|
+
import { ModelCache } from '../cache';
|
|
10
|
+
import { getActivePreset, getModelConfig } from '../registry';
|
|
11
|
+
import type {
|
|
12
|
+
EmbeddingPort,
|
|
13
|
+
GenerationPort,
|
|
14
|
+
LlmResult,
|
|
15
|
+
RerankPort,
|
|
16
|
+
} from '../types';
|
|
17
|
+
import { NodeLlamaCppEmbedding } from './embedding';
|
|
18
|
+
import { NodeLlamaCppGeneration } from './generation';
|
|
19
|
+
import { getModelManager, type ModelManager } from './lifecycle';
|
|
20
|
+
import { NodeLlamaCppRerank } from './rerank';
|
|
21
|
+
|
|
22
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
23
|
+
// Adapter
|
|
24
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
25
|
+
|
|
26
|
+
export class LlmAdapter {
|
|
27
|
+
private readonly manager: ModelManager;
|
|
28
|
+
private readonly cache: ModelCache;
|
|
29
|
+
private readonly config: Config;
|
|
30
|
+
|
|
31
|
+
constructor(config: Config, cacheDir?: string) {
|
|
32
|
+
this.config = config;
|
|
33
|
+
const modelConfig = getModelConfig(config);
|
|
34
|
+
this.manager = getModelManager(modelConfig);
|
|
35
|
+
this.cache = new ModelCache(cacheDir);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Create an embedding port.
|
|
40
|
+
*/
|
|
41
|
+
async createEmbeddingPort(
|
|
42
|
+
modelUri?: string
|
|
43
|
+
): Promise<LlmResult<EmbeddingPort>> {
|
|
44
|
+
const preset = getActivePreset(this.config);
|
|
45
|
+
const uri = modelUri ?? preset.embed;
|
|
46
|
+
|
|
47
|
+
// Resolve model path from cache
|
|
48
|
+
const resolved = await this.cache.resolve(uri, 'embed');
|
|
49
|
+
if (!resolved.ok) {
|
|
50
|
+
return resolved;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
return {
|
|
54
|
+
ok: true,
|
|
55
|
+
value: new NodeLlamaCppEmbedding(this.manager, uri, resolved.value),
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Create a generation port.
|
|
61
|
+
*/
|
|
62
|
+
async createGenerationPort(
|
|
63
|
+
modelUri?: string
|
|
64
|
+
): Promise<LlmResult<GenerationPort>> {
|
|
65
|
+
const preset = getActivePreset(this.config);
|
|
66
|
+
const uri = modelUri ?? preset.gen;
|
|
67
|
+
|
|
68
|
+
// Resolve model path from cache
|
|
69
|
+
const resolved = await this.cache.resolve(uri, 'gen');
|
|
70
|
+
if (!resolved.ok) {
|
|
71
|
+
return resolved;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return {
|
|
75
|
+
ok: true,
|
|
76
|
+
value: new NodeLlamaCppGeneration(this.manager, uri, resolved.value),
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Create a rerank port.
|
|
82
|
+
*/
|
|
83
|
+
async createRerankPort(modelUri?: string): Promise<LlmResult<RerankPort>> {
|
|
84
|
+
const preset = getActivePreset(this.config);
|
|
85
|
+
const uri = modelUri ?? preset.rerank;
|
|
86
|
+
|
|
87
|
+
// Resolve model path from cache
|
|
88
|
+
const resolved = await this.cache.resolve(uri, 'rerank');
|
|
89
|
+
if (!resolved.ok) {
|
|
90
|
+
return resolved;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return {
|
|
94
|
+
ok: true,
|
|
95
|
+
value: new NodeLlamaCppRerank(this.manager, uri, resolved.value),
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Get the model cache instance.
|
|
101
|
+
*/
|
|
102
|
+
getCache(): ModelCache {
|
|
103
|
+
return this.cache;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Get the model manager instance.
|
|
108
|
+
*/
|
|
109
|
+
getManager(): ModelManager {
|
|
110
|
+
return this.manager;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Dispose all resources.
|
|
115
|
+
*/
|
|
116
|
+
async dispose(): Promise<void> {
|
|
117
|
+
await this.manager.disposeAll();
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
122
|
+
// Factory
|
|
123
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Create an LLM adapter instance.
|
|
127
|
+
*/
|
|
128
|
+
export function createLlmAdapter(
|
|
129
|
+
config: Config,
|
|
130
|
+
cacheDir?: string
|
|
131
|
+
): LlmAdapter {
|
|
132
|
+
return new LlmAdapter(config, cacheDir);
|
|
133
|
+
}
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding port implementation using node-llama-cpp.
|
|
3
|
+
*
|
|
4
|
+
* @module src/llm/nodeLlamaCpp/embedding
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { inferenceFailedError } from '../errors';
|
|
8
|
+
import type { EmbeddingPort, LlmResult } from '../types';
|
|
9
|
+
import type { ModelManager } from './lifecycle';
|
|
10
|
+
|
|
11
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
12
|
+
// Types
|
|
13
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
14
|
+
|
|
15
|
+
// LlamaModel type from node-llama-cpp
|
|
16
|
+
type LlamaModel = Awaited<
|
|
17
|
+
ReturnType<
|
|
18
|
+
Awaited<ReturnType<typeof import('node-llama-cpp').getLlama>>['loadModel']
|
|
19
|
+
>
|
|
20
|
+
>;
|
|
21
|
+
|
|
22
|
+
type LlamaEmbeddingContext = Awaited<
|
|
23
|
+
ReturnType<LlamaModel['createEmbeddingContext']>
|
|
24
|
+
>;
|
|
25
|
+
|
|
26
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
27
|
+
// Implementation
|
|
28
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
export class NodeLlamaCppEmbedding implements EmbeddingPort {
|
|
31
|
+
private context: LlamaEmbeddingContext | null = null;
|
|
32
|
+
private contextPromise: Promise<LlmResult<LlamaEmbeddingContext>> | null =
|
|
33
|
+
null;
|
|
34
|
+
private dims: number | null = null;
|
|
35
|
+
private readonly manager: ModelManager;
|
|
36
|
+
readonly modelUri: string;
|
|
37
|
+
private readonly modelPath: string;
|
|
38
|
+
|
|
39
|
+
constructor(manager: ModelManager, modelUri: string, modelPath: string) {
|
|
40
|
+
this.manager = manager;
|
|
41
|
+
this.modelUri = modelUri;
|
|
42
|
+
this.modelPath = modelPath;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
async init(): Promise<LlmResult<void>> {
|
|
46
|
+
const ctx = await this.getContext();
|
|
47
|
+
if (!ctx.ok) {
|
|
48
|
+
return ctx;
|
|
49
|
+
}
|
|
50
|
+
return { ok: true, value: undefined };
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
async embed(text: string): Promise<LlmResult<number[]>> {
|
|
54
|
+
const ctx = await this.getContext();
|
|
55
|
+
if (!ctx.ok) {
|
|
56
|
+
return ctx;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
try {
|
|
60
|
+
const embedding = await ctx.value.getEmbeddingFor(text);
|
|
61
|
+
const vector = Array.from(embedding.vector) as number[];
|
|
62
|
+
|
|
63
|
+
// Cache dimensions on first call
|
|
64
|
+
if (this.dims === null) {
|
|
65
|
+
this.dims = vector.length;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return { ok: true, value: vector };
|
|
69
|
+
} catch (e) {
|
|
70
|
+
return { ok: false, error: inferenceFailedError(this.modelUri, e) };
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
async embedBatch(texts: string[]): Promise<LlmResult<number[][]>> {
|
|
75
|
+
const ctx = await this.getContext();
|
|
76
|
+
if (!ctx.ok) {
|
|
77
|
+
return ctx;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
try {
|
|
81
|
+
const results: number[][] = [];
|
|
82
|
+
for (const text of texts) {
|
|
83
|
+
const embedding = await ctx.value.getEmbeddingFor(text);
|
|
84
|
+
const vector = Array.from(embedding.vector) as number[];
|
|
85
|
+
results.push(vector);
|
|
86
|
+
|
|
87
|
+
// Cache dimensions on first call
|
|
88
|
+
if (this.dims === null) {
|
|
89
|
+
this.dims = vector.length;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
return { ok: true, value: results };
|
|
93
|
+
} catch (e) {
|
|
94
|
+
return { ok: false, error: inferenceFailedError(this.modelUri, e) };
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
dimensions(): number {
|
|
99
|
+
if (this.dims === null) {
|
|
100
|
+
throw new Error('Call init() or embed() first to initialize dimensions');
|
|
101
|
+
}
|
|
102
|
+
return this.dims;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
async dispose(): Promise<void> {
|
|
106
|
+
// Clear promise first to prevent reuse of disposed context
|
|
107
|
+
this.contextPromise = null;
|
|
108
|
+
if (this.context) {
|
|
109
|
+
try {
|
|
110
|
+
await this.context.dispose();
|
|
111
|
+
} catch {
|
|
112
|
+
// Ignore disposal errors
|
|
113
|
+
}
|
|
114
|
+
this.context = null;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
119
|
+
// Private
|
|
120
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
121
|
+
|
|
122
|
+
private getContext(): Promise<LlmResult<LlamaEmbeddingContext>> {
|
|
123
|
+
// Return cached context
|
|
124
|
+
if (this.context) {
|
|
125
|
+
return Promise.resolve({ ok: true, value: this.context });
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Reuse in-flight promise to prevent concurrent context creation
|
|
129
|
+
if (this.contextPromise) {
|
|
130
|
+
return this.contextPromise;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
this.contextPromise = this.createContext();
|
|
134
|
+
return this.contextPromise;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
private async createContext(): Promise<LlmResult<LlamaEmbeddingContext>> {
|
|
138
|
+
const model = await this.manager.loadModel(
|
|
139
|
+
this.modelPath,
|
|
140
|
+
this.modelUri,
|
|
141
|
+
'embed'
|
|
142
|
+
);
|
|
143
|
+
if (!model.ok) {
|
|
144
|
+
this.contextPromise = null; // Allow retry
|
|
145
|
+
return model;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
try {
|
|
149
|
+
// Cast to access createEmbeddingContext
|
|
150
|
+
const llamaModel = model.value.model as LlamaModel;
|
|
151
|
+
this.context = await llamaModel.createEmbeddingContext();
|
|
152
|
+
|
|
153
|
+
// Cache dimensions from model (available without running embed)
|
|
154
|
+
const size = llamaModel.embeddingVectorSize;
|
|
155
|
+
if (this.dims === null && typeof size === 'number' && size > 0) {
|
|
156
|
+
this.dims = size;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
return { ok: true, value: this.context };
|
|
160
|
+
} catch (e) {
|
|
161
|
+
this.contextPromise = null; // Allow retry
|
|
162
|
+
return { ok: false, error: inferenceFailedError(this.modelUri, e) };
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Generation port implementation using node-llama-cpp.
|
|
3
|
+
*
|
|
4
|
+
* @module src/llm/nodeLlamaCpp/generation
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { inferenceFailedError } from '../errors';
|
|
8
|
+
import type { GenerationPort, GenParams, LlmResult } from '../types';
|
|
9
|
+
import type { ModelManager } from './lifecycle';
|
|
10
|
+
|
|
11
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
12
|
+
// Types
|
|
13
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
14
|
+
|
|
15
|
+
type LlamaModel = Awaited<
|
|
16
|
+
ReturnType<
|
|
17
|
+
Awaited<ReturnType<typeof import('node-llama-cpp').getLlama>>['loadModel']
|
|
18
|
+
>
|
|
19
|
+
>;
|
|
20
|
+
|
|
21
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
22
|
+
// Default Parameters (for determinism)
|
|
23
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
24
|
+
|
|
25
|
+
const DEFAULT_TEMPERATURE = 0;
|
|
26
|
+
const DEFAULT_SEED = 42;
|
|
27
|
+
const DEFAULT_MAX_TOKENS = 256;
|
|
28
|
+
|
|
29
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
30
|
+
// Implementation
|
|
31
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
32
|
+
|
|
33
|
+
export class NodeLlamaCppGeneration implements GenerationPort {
|
|
34
|
+
private readonly manager: ModelManager;
|
|
35
|
+
readonly modelUri: string;
|
|
36
|
+
private readonly modelPath: string;
|
|
37
|
+
|
|
38
|
+
constructor(manager: ModelManager, modelUri: string, modelPath: string) {
|
|
39
|
+
this.manager = manager;
|
|
40
|
+
this.modelUri = modelUri;
|
|
41
|
+
this.modelPath = modelPath;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
async generate(
|
|
45
|
+
prompt: string,
|
|
46
|
+
params?: GenParams
|
|
47
|
+
): Promise<LlmResult<string>> {
|
|
48
|
+
const model = await this.manager.loadModel(
|
|
49
|
+
this.modelPath,
|
|
50
|
+
this.modelUri,
|
|
51
|
+
'gen'
|
|
52
|
+
);
|
|
53
|
+
if (!model.ok) {
|
|
54
|
+
return model;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const llamaModel = model.value.model as LlamaModel;
|
|
58
|
+
const context = await llamaModel.createContext();
|
|
59
|
+
|
|
60
|
+
try {
|
|
61
|
+
// Import LlamaChatSession dynamically
|
|
62
|
+
const { LlamaChatSession } = await import('node-llama-cpp');
|
|
63
|
+
const session = new LlamaChatSession({
|
|
64
|
+
contextSequence: context.getSequence(),
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
// Note: stop sequences not yet supported - requires stopOnTrigger API
|
|
68
|
+
const response = await session.prompt(prompt, {
|
|
69
|
+
temperature: params?.temperature ?? DEFAULT_TEMPERATURE,
|
|
70
|
+
seed: params?.seed ?? DEFAULT_SEED,
|
|
71
|
+
maxTokens: params?.maxTokens ?? DEFAULT_MAX_TOKENS,
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
return { ok: true, value: response };
|
|
75
|
+
} catch (e) {
|
|
76
|
+
return { ok: false, error: inferenceFailedError(this.modelUri, e) };
|
|
77
|
+
} finally {
|
|
78
|
+
await context.dispose().catch(() => {
|
|
79
|
+
// Ignore disposal errors
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
async dispose(): Promise<void> {
|
|
85
|
+
// Generation doesn't hold persistent context
|
|
86
|
+
// Model cleanup is handled by ModelManager
|
|
87
|
+
}
|
|
88
|
+
}
|
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model lifecycle manager.
|
|
3
|
+
* Handles lazy loading, caching, and disposal of LLM models.
|
|
4
|
+
*
|
|
5
|
+
* @module src/llm/nodeLlamaCpp/lifecycle
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { ModelConfig } from '../../config/types';
|
|
9
|
+
import { loadFailedError, outOfMemoryError, timeoutError } from '../errors';
|
|
10
|
+
import type { LlmResult, LoadedModel, ModelType } from '../types';
|
|
11
|
+
|
|
12
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
13
|
+
// Types
|
|
14
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
15
|
+
|
|
16
|
+
type Llama = Awaited<ReturnType<typeof import('node-llama-cpp').getLlama>>;
|
|
17
|
+
type LlamaModel = Awaited<ReturnType<Llama['loadModel']>>;
|
|
18
|
+
|
|
19
|
+
interface CachedModel {
|
|
20
|
+
uri: string;
|
|
21
|
+
type: ModelType;
|
|
22
|
+
model: LlamaModel;
|
|
23
|
+
loadedAt: number;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
27
|
+
// ModelManager
|
|
28
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
export class ModelManager {
|
|
31
|
+
private llama: Llama | null = null;
|
|
32
|
+
private readonly models: Map<string, CachedModel> = new Map();
|
|
33
|
+
private readonly disposalTimers: Map<string, ReturnType<typeof setTimeout>> =
|
|
34
|
+
new Map();
|
|
35
|
+
private readonly inflightLoads: Map<string, Promise<LlmResult<LoadedModel>>> =
|
|
36
|
+
new Map();
|
|
37
|
+
private readonly config: ModelConfig;
|
|
38
|
+
|
|
39
|
+
constructor(config: ModelConfig) {
|
|
40
|
+
this.config = config;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Get or initialize the Llama instance.
|
|
45
|
+
* Uses lazy loading - only imports node-llama-cpp on first use.
|
|
46
|
+
*/
|
|
47
|
+
async getLlama(): Promise<Llama> {
|
|
48
|
+
if (!this.llama) {
|
|
49
|
+
const { getLlama, LlamaLogLevel } = await import('node-llama-cpp');
|
|
50
|
+
// Suppress model loading warnings (vocab tokens, pooling type)
|
|
51
|
+
this.llama = await getLlama({ logLevel: LlamaLogLevel.error });
|
|
52
|
+
}
|
|
53
|
+
return this.llama;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Load a model by path.
|
|
58
|
+
* Uses caching, inflight deduplication, and TTL-based disposal.
|
|
59
|
+
*/
|
|
60
|
+
loadModel(
|
|
61
|
+
modelPath: string,
|
|
62
|
+
uri: string,
|
|
63
|
+
type: ModelType
|
|
64
|
+
): Promise<LlmResult<LoadedModel>> {
|
|
65
|
+
// Check cache first
|
|
66
|
+
const cached = this.models.get(uri);
|
|
67
|
+
if (cached) {
|
|
68
|
+
this.resetDisposalTimer(uri);
|
|
69
|
+
return Promise.resolve({
|
|
70
|
+
ok: true as const,
|
|
71
|
+
value: {
|
|
72
|
+
uri: cached.uri,
|
|
73
|
+
type: cached.type,
|
|
74
|
+
model: cached.model,
|
|
75
|
+
loadedAt: cached.loadedAt,
|
|
76
|
+
},
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Check for inflight load (deduplicate concurrent requests)
|
|
81
|
+
const inflight = this.inflightLoads.get(uri);
|
|
82
|
+
if (inflight) {
|
|
83
|
+
return inflight;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Start new load with cleanup
|
|
87
|
+
const loadPromise = this.loadModelInternal(modelPath, uri, type).finally(
|
|
88
|
+
() => {
|
|
89
|
+
this.inflightLoads.delete(uri);
|
|
90
|
+
}
|
|
91
|
+
);
|
|
92
|
+
this.inflightLoads.set(uri, loadPromise);
|
|
93
|
+
return loadPromise;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Internal model loading with timeout handling.
|
|
98
|
+
*/
|
|
99
|
+
private async loadModelInternal(
|
|
100
|
+
modelPath: string,
|
|
101
|
+
uri: string,
|
|
102
|
+
type: ModelType
|
|
103
|
+
): Promise<LlmResult<LoadedModel>> {
|
|
104
|
+
const timeoutMs = this.config.loadTimeout;
|
|
105
|
+
let timeoutId: ReturnType<typeof setTimeout> | null = null;
|
|
106
|
+
let timedOut = false;
|
|
107
|
+
|
|
108
|
+
// Capture loadPromise outside try block so we can dispose it on timeout
|
|
109
|
+
let loadPromise: Promise<LlamaModel> | null = null;
|
|
110
|
+
|
|
111
|
+
try {
|
|
112
|
+
const llama = await this.getLlama();
|
|
113
|
+
loadPromise = llama.loadModel({ modelPath });
|
|
114
|
+
|
|
115
|
+
// Create timeout with proper cleanup
|
|
116
|
+
const timeoutPromise = new Promise<never>((_, reject) => {
|
|
117
|
+
timeoutId = setTimeout(() => {
|
|
118
|
+
timedOut = true;
|
|
119
|
+
reject(new Error(`Load timeout after ${timeoutMs}ms`));
|
|
120
|
+
}, timeoutMs);
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
const model = await Promise.race([loadPromise, timeoutPromise]);
|
|
124
|
+
|
|
125
|
+
// Clear timeout on success
|
|
126
|
+
if (timeoutId) {
|
|
127
|
+
clearTimeout(timeoutId);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
const now = Date.now();
|
|
131
|
+
const cachedModel: CachedModel = {
|
|
132
|
+
uri,
|
|
133
|
+
type,
|
|
134
|
+
model,
|
|
135
|
+
loadedAt: now,
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
this.models.set(uri, cachedModel);
|
|
139
|
+
this.setDisposalTimer(uri);
|
|
140
|
+
|
|
141
|
+
return {
|
|
142
|
+
ok: true,
|
|
143
|
+
value: {
|
|
144
|
+
uri,
|
|
145
|
+
type,
|
|
146
|
+
model,
|
|
147
|
+
loadedAt: now,
|
|
148
|
+
},
|
|
149
|
+
};
|
|
150
|
+
} catch (e) {
|
|
151
|
+
// Clear timeout on error
|
|
152
|
+
if (timeoutId) {
|
|
153
|
+
clearTimeout(timeoutId);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Dispose late-arriving model after timeout to prevent memory leak
|
|
157
|
+
if (timedOut && loadPromise) {
|
|
158
|
+
loadPromise.then(
|
|
159
|
+
(model) => {
|
|
160
|
+
// Dispose model that arrived after timeout
|
|
161
|
+
model.dispose().catch(() => {
|
|
162
|
+
// Ignore dispose errors
|
|
163
|
+
});
|
|
164
|
+
},
|
|
165
|
+
() => {
|
|
166
|
+
// Ignore load errors after timeout
|
|
167
|
+
}
|
|
168
|
+
);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
if (e instanceof Error) {
|
|
172
|
+
if (e.message.includes('timeout')) {
|
|
173
|
+
return {
|
|
174
|
+
ok: false,
|
|
175
|
+
error: timeoutError(uri, 'load', this.config.loadTimeout),
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
if (e.message.includes('out of memory') || e.message.includes('OOM')) {
|
|
179
|
+
return { ok: false, error: outOfMemoryError(uri, e) };
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
return { ok: false, error: loadFailedError(uri, e) };
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Get a loaded model by URI (no loading).
|
|
188
|
+
*/
|
|
189
|
+
getLoadedModel(uri: string): CachedModel | undefined {
|
|
190
|
+
const model = this.models.get(uri);
|
|
191
|
+
if (model) {
|
|
192
|
+
this.resetDisposalTimer(uri);
|
|
193
|
+
}
|
|
194
|
+
return model;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Check if a model is loaded.
|
|
199
|
+
*/
|
|
200
|
+
isLoaded(uri: string): boolean {
|
|
201
|
+
return this.models.has(uri);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Dispose a specific model.
|
|
206
|
+
*/
|
|
207
|
+
async dispose(uri: string): Promise<void> {
|
|
208
|
+
const cached = this.models.get(uri);
|
|
209
|
+
if (!cached) {
|
|
210
|
+
return;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// Clear disposal timer
|
|
214
|
+
const timer = this.disposalTimers.get(uri);
|
|
215
|
+
if (timer) {
|
|
216
|
+
clearTimeout(timer);
|
|
217
|
+
this.disposalTimers.delete(uri);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Dispose the model
|
|
221
|
+
try {
|
|
222
|
+
await cached.model.dispose();
|
|
223
|
+
} catch {
|
|
224
|
+
// Ignore disposal errors
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
this.models.delete(uri);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* Dispose all loaded models.
|
|
232
|
+
*/
|
|
233
|
+
async disposeAll(): Promise<void> {
|
|
234
|
+
// Clear all timers
|
|
235
|
+
for (const timer of this.disposalTimers.values()) {
|
|
236
|
+
clearTimeout(timer);
|
|
237
|
+
}
|
|
238
|
+
this.disposalTimers.clear();
|
|
239
|
+
|
|
240
|
+
// Dispose all models
|
|
241
|
+
for (const [uri, cached] of this.models) {
|
|
242
|
+
try {
|
|
243
|
+
await cached.model.dispose();
|
|
244
|
+
} catch {
|
|
245
|
+
// Ignore disposal errors
|
|
246
|
+
}
|
|
247
|
+
this.models.delete(uri);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// Clear llama instance
|
|
251
|
+
this.llama = null;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Get list of loaded models.
|
|
256
|
+
*/
|
|
257
|
+
getLoadedModels(): Array<{ uri: string; type: ModelType; loadedAt: number }> {
|
|
258
|
+
return Array.from(this.models.values()).map((m) => ({
|
|
259
|
+
uri: m.uri,
|
|
260
|
+
type: m.type,
|
|
261
|
+
loadedAt: m.loadedAt,
|
|
262
|
+
}));
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
266
|
+
// Private
|
|
267
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
268
|
+
|
|
269
|
+
private setDisposalTimer(uri: string): void {
|
|
270
|
+
const timer = setTimeout(() => {
|
|
271
|
+
this.dispose(uri).catch(() => {
|
|
272
|
+
// Ignore disposal errors in timer callback
|
|
273
|
+
});
|
|
274
|
+
}, this.config.warmModelTtl);
|
|
275
|
+
|
|
276
|
+
// Allow CLI processes to exit without waiting for TTL timer
|
|
277
|
+
if (typeof timer.unref === 'function') {
|
|
278
|
+
timer.unref();
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
this.disposalTimers.set(uri, timer);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
private resetDisposalTimer(uri: string): void {
|
|
285
|
+
const existing = this.disposalTimers.get(uri);
|
|
286
|
+
if (existing) {
|
|
287
|
+
clearTimeout(existing);
|
|
288
|
+
}
|
|
289
|
+
this.setDisposalTimer(uri);
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
294
|
+
// Singleton
|
|
295
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
296
|
+
|
|
297
|
+
let defaultManager: ModelManager | null = null;
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Get the default ModelManager singleton.
|
|
301
|
+
*/
|
|
302
|
+
export function getModelManager(config: ModelConfig): ModelManager {
|
|
303
|
+
if (!defaultManager) {
|
|
304
|
+
defaultManager = new ModelManager(config);
|
|
305
|
+
}
|
|
306
|
+
return defaultManager;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
/**
|
|
310
|
+
* Reset the default manager (for testing).
|
|
311
|
+
*/
|
|
312
|
+
export async function resetModelManager(): Promise<void> {
|
|
313
|
+
if (defaultManager) {
|
|
314
|
+
await defaultManager.disposeAll();
|
|
315
|
+
defaultManager = null;
|
|
316
|
+
}
|
|
317
|
+
}
|