@gmickel/gno 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/README.md +256 -0
  2. package/assets/skill/SKILL.md +112 -0
  3. package/assets/skill/cli-reference.md +327 -0
  4. package/assets/skill/examples.md +234 -0
  5. package/assets/skill/mcp-reference.md +159 -0
  6. package/package.json +90 -0
  7. package/src/app/constants.ts +313 -0
  8. package/src/cli/colors.ts +65 -0
  9. package/src/cli/commands/ask.ts +545 -0
  10. package/src/cli/commands/cleanup.ts +105 -0
  11. package/src/cli/commands/collection/add.ts +120 -0
  12. package/src/cli/commands/collection/index.ts +10 -0
  13. package/src/cli/commands/collection/list.ts +108 -0
  14. package/src/cli/commands/collection/remove.ts +64 -0
  15. package/src/cli/commands/collection/rename.ts +95 -0
  16. package/src/cli/commands/context/add.ts +67 -0
  17. package/src/cli/commands/context/check.ts +153 -0
  18. package/src/cli/commands/context/index.ts +10 -0
  19. package/src/cli/commands/context/list.ts +109 -0
  20. package/src/cli/commands/context/rm.ts +52 -0
  21. package/src/cli/commands/doctor.ts +393 -0
  22. package/src/cli/commands/embed.ts +462 -0
  23. package/src/cli/commands/get.ts +356 -0
  24. package/src/cli/commands/index-cmd.ts +119 -0
  25. package/src/cli/commands/index.ts +102 -0
  26. package/src/cli/commands/init.ts +328 -0
  27. package/src/cli/commands/ls.ts +217 -0
  28. package/src/cli/commands/mcp/config.ts +300 -0
  29. package/src/cli/commands/mcp/index.ts +24 -0
  30. package/src/cli/commands/mcp/install.ts +203 -0
  31. package/src/cli/commands/mcp/paths.ts +470 -0
  32. package/src/cli/commands/mcp/status.ts +222 -0
  33. package/src/cli/commands/mcp/uninstall.ts +158 -0
  34. package/src/cli/commands/mcp.ts +20 -0
  35. package/src/cli/commands/models/clear.ts +103 -0
  36. package/src/cli/commands/models/index.ts +32 -0
  37. package/src/cli/commands/models/list.ts +214 -0
  38. package/src/cli/commands/models/path.ts +51 -0
  39. package/src/cli/commands/models/pull.ts +199 -0
  40. package/src/cli/commands/models/use.ts +85 -0
  41. package/src/cli/commands/multi-get.ts +400 -0
  42. package/src/cli/commands/query.ts +220 -0
  43. package/src/cli/commands/ref-parser.ts +108 -0
  44. package/src/cli/commands/reset.ts +191 -0
  45. package/src/cli/commands/search.ts +136 -0
  46. package/src/cli/commands/shared.ts +156 -0
  47. package/src/cli/commands/skill/index.ts +19 -0
  48. package/src/cli/commands/skill/install.ts +197 -0
  49. package/src/cli/commands/skill/paths-cmd.ts +81 -0
  50. package/src/cli/commands/skill/paths.ts +191 -0
  51. package/src/cli/commands/skill/show.ts +73 -0
  52. package/src/cli/commands/skill/uninstall.ts +141 -0
  53. package/src/cli/commands/status.ts +205 -0
  54. package/src/cli/commands/update.ts +68 -0
  55. package/src/cli/commands/vsearch.ts +188 -0
  56. package/src/cli/context.ts +64 -0
  57. package/src/cli/errors.ts +64 -0
  58. package/src/cli/format/search-results.ts +211 -0
  59. package/src/cli/options.ts +183 -0
  60. package/src/cli/program.ts +1330 -0
  61. package/src/cli/run.ts +213 -0
  62. package/src/cli/ui.ts +92 -0
  63. package/src/config/defaults.ts +20 -0
  64. package/src/config/index.ts +55 -0
  65. package/src/config/loader.ts +161 -0
  66. package/src/config/paths.ts +87 -0
  67. package/src/config/saver.ts +153 -0
  68. package/src/config/types.ts +280 -0
  69. package/src/converters/adapters/markitdownTs/adapter.ts +140 -0
  70. package/src/converters/adapters/officeparser/adapter.ts +126 -0
  71. package/src/converters/canonicalize.ts +89 -0
  72. package/src/converters/errors.ts +218 -0
  73. package/src/converters/index.ts +51 -0
  74. package/src/converters/mime.ts +163 -0
  75. package/src/converters/native/markdown.ts +115 -0
  76. package/src/converters/native/plaintext.ts +56 -0
  77. package/src/converters/path.ts +48 -0
  78. package/src/converters/pipeline.ts +159 -0
  79. package/src/converters/registry.ts +74 -0
  80. package/src/converters/types.ts +123 -0
  81. package/src/converters/versions.ts +24 -0
  82. package/src/index.ts +27 -0
  83. package/src/ingestion/chunker.ts +238 -0
  84. package/src/ingestion/index.ts +32 -0
  85. package/src/ingestion/language.ts +276 -0
  86. package/src/ingestion/sync.ts +671 -0
  87. package/src/ingestion/types.ts +219 -0
  88. package/src/ingestion/walker.ts +235 -0
  89. package/src/llm/cache.ts +467 -0
  90. package/src/llm/errors.ts +191 -0
  91. package/src/llm/index.ts +58 -0
  92. package/src/llm/nodeLlamaCpp/adapter.ts +133 -0
  93. package/src/llm/nodeLlamaCpp/embedding.ts +165 -0
  94. package/src/llm/nodeLlamaCpp/generation.ts +88 -0
  95. package/src/llm/nodeLlamaCpp/lifecycle.ts +317 -0
  96. package/src/llm/nodeLlamaCpp/rerank.ts +94 -0
  97. package/src/llm/registry.ts +86 -0
  98. package/src/llm/types.ts +129 -0
  99. package/src/mcp/resources/index.ts +151 -0
  100. package/src/mcp/server.ts +229 -0
  101. package/src/mcp/tools/get.ts +220 -0
  102. package/src/mcp/tools/index.ts +160 -0
  103. package/src/mcp/tools/multi-get.ts +263 -0
  104. package/src/mcp/tools/query.ts +226 -0
  105. package/src/mcp/tools/search.ts +119 -0
  106. package/src/mcp/tools/status.ts +81 -0
  107. package/src/mcp/tools/vsearch.ts +198 -0
  108. package/src/pipeline/chunk-lookup.ts +44 -0
  109. package/src/pipeline/expansion.ts +256 -0
  110. package/src/pipeline/explain.ts +115 -0
  111. package/src/pipeline/fusion.ts +185 -0
  112. package/src/pipeline/hybrid.ts +535 -0
  113. package/src/pipeline/index.ts +64 -0
  114. package/src/pipeline/query-language.ts +118 -0
  115. package/src/pipeline/rerank.ts +223 -0
  116. package/src/pipeline/search.ts +261 -0
  117. package/src/pipeline/types.ts +328 -0
  118. package/src/pipeline/vsearch.ts +348 -0
  119. package/src/store/index.ts +41 -0
  120. package/src/store/migrations/001-initial.ts +196 -0
  121. package/src/store/migrations/index.ts +20 -0
  122. package/src/store/migrations/runner.ts +187 -0
  123. package/src/store/sqlite/adapter.ts +1242 -0
  124. package/src/store/sqlite/index.ts +7 -0
  125. package/src/store/sqlite/setup.ts +129 -0
  126. package/src/store/sqlite/types.ts +28 -0
  127. package/src/store/types.ts +506 -0
  128. package/src/store/vector/index.ts +13 -0
  129. package/src/store/vector/sqlite-vec.ts +373 -0
  130. package/src/store/vector/stats.ts +152 -0
  131. package/src/store/vector/types.ts +115 -0
@@ -0,0 +1,133 @@
1
+ /**
2
+ * Main LLM adapter for node-llama-cpp.
3
+ * Factory for creating port instances.
4
+ *
5
+ * @module src/llm/nodeLlamaCpp/adapter
6
+ */
7
+
8
+ import type { Config } from '../../config/types';
9
+ import { ModelCache } from '../cache';
10
+ import { getActivePreset, getModelConfig } from '../registry';
11
+ import type {
12
+ EmbeddingPort,
13
+ GenerationPort,
14
+ LlmResult,
15
+ RerankPort,
16
+ } from '../types';
17
+ import { NodeLlamaCppEmbedding } from './embedding';
18
+ import { NodeLlamaCppGeneration } from './generation';
19
+ import { getModelManager, type ModelManager } from './lifecycle';
20
+ import { NodeLlamaCppRerank } from './rerank';
21
+
22
+ // ─────────────────────────────────────────────────────────────────────────────
23
+ // Adapter
24
+ // ─────────────────────────────────────────────────────────────────────────────
25
+
26
+ export class LlmAdapter {
27
+ private readonly manager: ModelManager;
28
+ private readonly cache: ModelCache;
29
+ private readonly config: Config;
30
+
31
+ constructor(config: Config, cacheDir?: string) {
32
+ this.config = config;
33
+ const modelConfig = getModelConfig(config);
34
+ this.manager = getModelManager(modelConfig);
35
+ this.cache = new ModelCache(cacheDir);
36
+ }
37
+
38
+ /**
39
+ * Create an embedding port.
40
+ */
41
+ async createEmbeddingPort(
42
+ modelUri?: string
43
+ ): Promise<LlmResult<EmbeddingPort>> {
44
+ const preset = getActivePreset(this.config);
45
+ const uri = modelUri ?? preset.embed;
46
+
47
+ // Resolve model path from cache
48
+ const resolved = await this.cache.resolve(uri, 'embed');
49
+ if (!resolved.ok) {
50
+ return resolved;
51
+ }
52
+
53
+ return {
54
+ ok: true,
55
+ value: new NodeLlamaCppEmbedding(this.manager, uri, resolved.value),
56
+ };
57
+ }
58
+
59
+ /**
60
+ * Create a generation port.
61
+ */
62
+ async createGenerationPort(
63
+ modelUri?: string
64
+ ): Promise<LlmResult<GenerationPort>> {
65
+ const preset = getActivePreset(this.config);
66
+ const uri = modelUri ?? preset.gen;
67
+
68
+ // Resolve model path from cache
69
+ const resolved = await this.cache.resolve(uri, 'gen');
70
+ if (!resolved.ok) {
71
+ return resolved;
72
+ }
73
+
74
+ return {
75
+ ok: true,
76
+ value: new NodeLlamaCppGeneration(this.manager, uri, resolved.value),
77
+ };
78
+ }
79
+
80
+ /**
81
+ * Create a rerank port.
82
+ */
83
+ async createRerankPort(modelUri?: string): Promise<LlmResult<RerankPort>> {
84
+ const preset = getActivePreset(this.config);
85
+ const uri = modelUri ?? preset.rerank;
86
+
87
+ // Resolve model path from cache
88
+ const resolved = await this.cache.resolve(uri, 'rerank');
89
+ if (!resolved.ok) {
90
+ return resolved;
91
+ }
92
+
93
+ return {
94
+ ok: true,
95
+ value: new NodeLlamaCppRerank(this.manager, uri, resolved.value),
96
+ };
97
+ }
98
+
99
+ /**
100
+ * Get the model cache instance.
101
+ */
102
+ getCache(): ModelCache {
103
+ return this.cache;
104
+ }
105
+
106
+ /**
107
+ * Get the model manager instance.
108
+ */
109
+ getManager(): ModelManager {
110
+ return this.manager;
111
+ }
112
+
113
+ /**
114
+ * Dispose all resources.
115
+ */
116
+ async dispose(): Promise<void> {
117
+ await this.manager.disposeAll();
118
+ }
119
+ }
120
+
121
+ // ─────────────────────────────────────────────────────────────────────────────
122
+ // Factory
123
+ // ─────────────────────────────────────────────────────────────────────────────
124
+
125
+ /**
126
+ * Create an LLM adapter instance.
127
+ */
128
+ export function createLlmAdapter(
129
+ config: Config,
130
+ cacheDir?: string
131
+ ): LlmAdapter {
132
+ return new LlmAdapter(config, cacheDir);
133
+ }
@@ -0,0 +1,165 @@
1
+ /**
2
+ * Embedding port implementation using node-llama-cpp.
3
+ *
4
+ * @module src/llm/nodeLlamaCpp/embedding
5
+ */
6
+
7
+ import { inferenceFailedError } from '../errors';
8
+ import type { EmbeddingPort, LlmResult } from '../types';
9
+ import type { ModelManager } from './lifecycle';
10
+
11
+ // ─────────────────────────────────────────────────────────────────────────────
12
+ // Types
13
+ // ─────────────────────────────────────────────────────────────────────────────
14
+
15
+ // LlamaModel type from node-llama-cpp
16
+ type LlamaModel = Awaited<
17
+ ReturnType<
18
+ Awaited<ReturnType<typeof import('node-llama-cpp').getLlama>>['loadModel']
19
+ >
20
+ >;
21
+
22
+ type LlamaEmbeddingContext = Awaited<
23
+ ReturnType<LlamaModel['createEmbeddingContext']>
24
+ >;
25
+
26
+ // ─────────────────────────────────────────────────────────────────────────────
27
+ // Implementation
28
+ // ─────────────────────────────────────────────────────────────────────────────
29
+
30
+ export class NodeLlamaCppEmbedding implements EmbeddingPort {
31
+ private context: LlamaEmbeddingContext | null = null;
32
+ private contextPromise: Promise<LlmResult<LlamaEmbeddingContext>> | null =
33
+ null;
34
+ private dims: number | null = null;
35
+ private readonly manager: ModelManager;
36
+ readonly modelUri: string;
37
+ private readonly modelPath: string;
38
+
39
+ constructor(manager: ModelManager, modelUri: string, modelPath: string) {
40
+ this.manager = manager;
41
+ this.modelUri = modelUri;
42
+ this.modelPath = modelPath;
43
+ }
44
+
45
+ async init(): Promise<LlmResult<void>> {
46
+ const ctx = await this.getContext();
47
+ if (!ctx.ok) {
48
+ return ctx;
49
+ }
50
+ return { ok: true, value: undefined };
51
+ }
52
+
53
+ async embed(text: string): Promise<LlmResult<number[]>> {
54
+ const ctx = await this.getContext();
55
+ if (!ctx.ok) {
56
+ return ctx;
57
+ }
58
+
59
+ try {
60
+ const embedding = await ctx.value.getEmbeddingFor(text);
61
+ const vector = Array.from(embedding.vector) as number[];
62
+
63
+ // Cache dimensions on first call
64
+ if (this.dims === null) {
65
+ this.dims = vector.length;
66
+ }
67
+
68
+ return { ok: true, value: vector };
69
+ } catch (e) {
70
+ return { ok: false, error: inferenceFailedError(this.modelUri, e) };
71
+ }
72
+ }
73
+
74
+ async embedBatch(texts: string[]): Promise<LlmResult<number[][]>> {
75
+ const ctx = await this.getContext();
76
+ if (!ctx.ok) {
77
+ return ctx;
78
+ }
79
+
80
+ try {
81
+ const results: number[][] = [];
82
+ for (const text of texts) {
83
+ const embedding = await ctx.value.getEmbeddingFor(text);
84
+ const vector = Array.from(embedding.vector) as number[];
85
+ results.push(vector);
86
+
87
+ // Cache dimensions on first call
88
+ if (this.dims === null) {
89
+ this.dims = vector.length;
90
+ }
91
+ }
92
+ return { ok: true, value: results };
93
+ } catch (e) {
94
+ return { ok: false, error: inferenceFailedError(this.modelUri, e) };
95
+ }
96
+ }
97
+
98
+ dimensions(): number {
99
+ if (this.dims === null) {
100
+ throw new Error('Call init() or embed() first to initialize dimensions');
101
+ }
102
+ return this.dims;
103
+ }
104
+
105
+ async dispose(): Promise<void> {
106
+ // Clear promise first to prevent reuse of disposed context
107
+ this.contextPromise = null;
108
+ if (this.context) {
109
+ try {
110
+ await this.context.dispose();
111
+ } catch {
112
+ // Ignore disposal errors
113
+ }
114
+ this.context = null;
115
+ }
116
+ }
117
+
118
+ // ───────────────────────────────────────────────────────────────────────────
119
+ // Private
120
+ // ───────────────────────────────────────────────────────────────────────────
121
+
122
+ private getContext(): Promise<LlmResult<LlamaEmbeddingContext>> {
123
+ // Return cached context
124
+ if (this.context) {
125
+ return Promise.resolve({ ok: true, value: this.context });
126
+ }
127
+
128
+ // Reuse in-flight promise to prevent concurrent context creation
129
+ if (this.contextPromise) {
130
+ return this.contextPromise;
131
+ }
132
+
133
+ this.contextPromise = this.createContext();
134
+ return this.contextPromise;
135
+ }
136
+
137
+ private async createContext(): Promise<LlmResult<LlamaEmbeddingContext>> {
138
+ const model = await this.manager.loadModel(
139
+ this.modelPath,
140
+ this.modelUri,
141
+ 'embed'
142
+ );
143
+ if (!model.ok) {
144
+ this.contextPromise = null; // Allow retry
145
+ return model;
146
+ }
147
+
148
+ try {
149
+ // Cast to access createEmbeddingContext
150
+ const llamaModel = model.value.model as LlamaModel;
151
+ this.context = await llamaModel.createEmbeddingContext();
152
+
153
+ // Cache dimensions from model (available without running embed)
154
+ const size = llamaModel.embeddingVectorSize;
155
+ if (this.dims === null && typeof size === 'number' && size > 0) {
156
+ this.dims = size;
157
+ }
158
+
159
+ return { ok: true, value: this.context };
160
+ } catch (e) {
161
+ this.contextPromise = null; // Allow retry
162
+ return { ok: false, error: inferenceFailedError(this.modelUri, e) };
163
+ }
164
+ }
165
+ }
@@ -0,0 +1,88 @@
1
+ /**
2
+ * Generation port implementation using node-llama-cpp.
3
+ *
4
+ * @module src/llm/nodeLlamaCpp/generation
5
+ */
6
+
7
+ import { inferenceFailedError } from '../errors';
8
+ import type { GenerationPort, GenParams, LlmResult } from '../types';
9
+ import type { ModelManager } from './lifecycle';
10
+
11
+ // ─────────────────────────────────────────────────────────────────────────────
12
+ // Types
13
+ // ─────────────────────────────────────────────────────────────────────────────
14
+
15
+ type LlamaModel = Awaited<
16
+ ReturnType<
17
+ Awaited<ReturnType<typeof import('node-llama-cpp').getLlama>>['loadModel']
18
+ >
19
+ >;
20
+
21
+ // ─────────────────────────────────────────────────────────────────────────────
22
+ // Default Parameters (for determinism)
23
+ // ─────────────────────────────────────────────────────────────────────────────
24
+
25
+ const DEFAULT_TEMPERATURE = 0;
26
+ const DEFAULT_SEED = 42;
27
+ const DEFAULT_MAX_TOKENS = 256;
28
+
29
+ // ─────────────────────────────────────────────────────────────────────────────
30
+ // Implementation
31
+ // ─────────────────────────────────────────────────────────────────────────────
32
+
33
+ export class NodeLlamaCppGeneration implements GenerationPort {
34
+ private readonly manager: ModelManager;
35
+ readonly modelUri: string;
36
+ private readonly modelPath: string;
37
+
38
+ constructor(manager: ModelManager, modelUri: string, modelPath: string) {
39
+ this.manager = manager;
40
+ this.modelUri = modelUri;
41
+ this.modelPath = modelPath;
42
+ }
43
+
44
+ async generate(
45
+ prompt: string,
46
+ params?: GenParams
47
+ ): Promise<LlmResult<string>> {
48
+ const model = await this.manager.loadModel(
49
+ this.modelPath,
50
+ this.modelUri,
51
+ 'gen'
52
+ );
53
+ if (!model.ok) {
54
+ return model;
55
+ }
56
+
57
+ const llamaModel = model.value.model as LlamaModel;
58
+ const context = await llamaModel.createContext();
59
+
60
+ try {
61
+ // Import LlamaChatSession dynamically
62
+ const { LlamaChatSession } = await import('node-llama-cpp');
63
+ const session = new LlamaChatSession({
64
+ contextSequence: context.getSequence(),
65
+ });
66
+
67
+ // Note: stop sequences not yet supported - requires stopOnTrigger API
68
+ const response = await session.prompt(prompt, {
69
+ temperature: params?.temperature ?? DEFAULT_TEMPERATURE,
70
+ seed: params?.seed ?? DEFAULT_SEED,
71
+ maxTokens: params?.maxTokens ?? DEFAULT_MAX_TOKENS,
72
+ });
73
+
74
+ return { ok: true, value: response };
75
+ } catch (e) {
76
+ return { ok: false, error: inferenceFailedError(this.modelUri, e) };
77
+ } finally {
78
+ await context.dispose().catch(() => {
79
+ // Ignore disposal errors
80
+ });
81
+ }
82
+ }
83
+
84
+ async dispose(): Promise<void> {
85
+ // Generation doesn't hold persistent context
86
+ // Model cleanup is handled by ModelManager
87
+ }
88
+ }
@@ -0,0 +1,317 @@
1
+ /**
2
+ * Model lifecycle manager.
3
+ * Handles lazy loading, caching, and disposal of LLM models.
4
+ *
5
+ * @module src/llm/nodeLlamaCpp/lifecycle
6
+ */
7
+
8
+ import type { ModelConfig } from '../../config/types';
9
+ import { loadFailedError, outOfMemoryError, timeoutError } from '../errors';
10
+ import type { LlmResult, LoadedModel, ModelType } from '../types';
11
+
12
+ // ─────────────────────────────────────────────────────────────────────────────
13
+ // Types
14
+ // ─────────────────────────────────────────────────────────────────────────────
15
+
16
+ type Llama = Awaited<ReturnType<typeof import('node-llama-cpp').getLlama>>;
17
+ type LlamaModel = Awaited<ReturnType<Llama['loadModel']>>;
18
+
19
+ interface CachedModel {
20
+ uri: string;
21
+ type: ModelType;
22
+ model: LlamaModel;
23
+ loadedAt: number;
24
+ }
25
+
26
+ // ─────────────────────────────────────────────────────────────────────────────
27
+ // ModelManager
28
+ // ─────────────────────────────────────────────────────────────────────────────
29
+
30
+ export class ModelManager {
31
+ private llama: Llama | null = null;
32
+ private readonly models: Map<string, CachedModel> = new Map();
33
+ private readonly disposalTimers: Map<string, ReturnType<typeof setTimeout>> =
34
+ new Map();
35
+ private readonly inflightLoads: Map<string, Promise<LlmResult<LoadedModel>>> =
36
+ new Map();
37
+ private readonly config: ModelConfig;
38
+
39
+ constructor(config: ModelConfig) {
40
+ this.config = config;
41
+ }
42
+
43
+ /**
44
+ * Get or initialize the Llama instance.
45
+ * Uses lazy loading - only imports node-llama-cpp on first use.
46
+ */
47
+ async getLlama(): Promise<Llama> {
48
+ if (!this.llama) {
49
+ const { getLlama, LlamaLogLevel } = await import('node-llama-cpp');
50
+ // Suppress model loading warnings (vocab tokens, pooling type)
51
+ this.llama = await getLlama({ logLevel: LlamaLogLevel.error });
52
+ }
53
+ return this.llama;
54
+ }
55
+
56
+ /**
57
+ * Load a model by path.
58
+ * Uses caching, inflight deduplication, and TTL-based disposal.
59
+ */
60
+ loadModel(
61
+ modelPath: string,
62
+ uri: string,
63
+ type: ModelType
64
+ ): Promise<LlmResult<LoadedModel>> {
65
+ // Check cache first
66
+ const cached = this.models.get(uri);
67
+ if (cached) {
68
+ this.resetDisposalTimer(uri);
69
+ return Promise.resolve({
70
+ ok: true as const,
71
+ value: {
72
+ uri: cached.uri,
73
+ type: cached.type,
74
+ model: cached.model,
75
+ loadedAt: cached.loadedAt,
76
+ },
77
+ });
78
+ }
79
+
80
+ // Check for inflight load (deduplicate concurrent requests)
81
+ const inflight = this.inflightLoads.get(uri);
82
+ if (inflight) {
83
+ return inflight;
84
+ }
85
+
86
+ // Start new load with cleanup
87
+ const loadPromise = this.loadModelInternal(modelPath, uri, type).finally(
88
+ () => {
89
+ this.inflightLoads.delete(uri);
90
+ }
91
+ );
92
+ this.inflightLoads.set(uri, loadPromise);
93
+ return loadPromise;
94
+ }
95
+
96
+ /**
97
+ * Internal model loading with timeout handling.
98
+ */
99
+ private async loadModelInternal(
100
+ modelPath: string,
101
+ uri: string,
102
+ type: ModelType
103
+ ): Promise<LlmResult<LoadedModel>> {
104
+ const timeoutMs = this.config.loadTimeout;
105
+ let timeoutId: ReturnType<typeof setTimeout> | null = null;
106
+ let timedOut = false;
107
+
108
+ // Capture loadPromise outside try block so we can dispose it on timeout
109
+ let loadPromise: Promise<LlamaModel> | null = null;
110
+
111
+ try {
112
+ const llama = await this.getLlama();
113
+ loadPromise = llama.loadModel({ modelPath });
114
+
115
+ // Create timeout with proper cleanup
116
+ const timeoutPromise = new Promise<never>((_, reject) => {
117
+ timeoutId = setTimeout(() => {
118
+ timedOut = true;
119
+ reject(new Error(`Load timeout after ${timeoutMs}ms`));
120
+ }, timeoutMs);
121
+ });
122
+
123
+ const model = await Promise.race([loadPromise, timeoutPromise]);
124
+
125
+ // Clear timeout on success
126
+ if (timeoutId) {
127
+ clearTimeout(timeoutId);
128
+ }
129
+
130
+ const now = Date.now();
131
+ const cachedModel: CachedModel = {
132
+ uri,
133
+ type,
134
+ model,
135
+ loadedAt: now,
136
+ };
137
+
138
+ this.models.set(uri, cachedModel);
139
+ this.setDisposalTimer(uri);
140
+
141
+ return {
142
+ ok: true,
143
+ value: {
144
+ uri,
145
+ type,
146
+ model,
147
+ loadedAt: now,
148
+ },
149
+ };
150
+ } catch (e) {
151
+ // Clear timeout on error
152
+ if (timeoutId) {
153
+ clearTimeout(timeoutId);
154
+ }
155
+
156
+ // Dispose late-arriving model after timeout to prevent memory leak
157
+ if (timedOut && loadPromise) {
158
+ loadPromise.then(
159
+ (model) => {
160
+ // Dispose model that arrived after timeout
161
+ model.dispose().catch(() => {
162
+ // Ignore dispose errors
163
+ });
164
+ },
165
+ () => {
166
+ // Ignore load errors after timeout
167
+ }
168
+ );
169
+ }
170
+
171
+ if (e instanceof Error) {
172
+ if (e.message.includes('timeout')) {
173
+ return {
174
+ ok: false,
175
+ error: timeoutError(uri, 'load', this.config.loadTimeout),
176
+ };
177
+ }
178
+ if (e.message.includes('out of memory') || e.message.includes('OOM')) {
179
+ return { ok: false, error: outOfMemoryError(uri, e) };
180
+ }
181
+ }
182
+ return { ok: false, error: loadFailedError(uri, e) };
183
+ }
184
+ }
185
+
186
+ /**
187
+ * Get a loaded model by URI (no loading).
188
+ */
189
+ getLoadedModel(uri: string): CachedModel | undefined {
190
+ const model = this.models.get(uri);
191
+ if (model) {
192
+ this.resetDisposalTimer(uri);
193
+ }
194
+ return model;
195
+ }
196
+
197
+ /**
198
+ * Check if a model is loaded.
199
+ */
200
+ isLoaded(uri: string): boolean {
201
+ return this.models.has(uri);
202
+ }
203
+
204
+ /**
205
+ * Dispose a specific model.
206
+ */
207
+ async dispose(uri: string): Promise<void> {
208
+ const cached = this.models.get(uri);
209
+ if (!cached) {
210
+ return;
211
+ }
212
+
213
+ // Clear disposal timer
214
+ const timer = this.disposalTimers.get(uri);
215
+ if (timer) {
216
+ clearTimeout(timer);
217
+ this.disposalTimers.delete(uri);
218
+ }
219
+
220
+ // Dispose the model
221
+ try {
222
+ await cached.model.dispose();
223
+ } catch {
224
+ // Ignore disposal errors
225
+ }
226
+
227
+ this.models.delete(uri);
228
+ }
229
+
230
+ /**
231
+ * Dispose all loaded models.
232
+ */
233
+ async disposeAll(): Promise<void> {
234
+ // Clear all timers
235
+ for (const timer of this.disposalTimers.values()) {
236
+ clearTimeout(timer);
237
+ }
238
+ this.disposalTimers.clear();
239
+
240
+ // Dispose all models
241
+ for (const [uri, cached] of this.models) {
242
+ try {
243
+ await cached.model.dispose();
244
+ } catch {
245
+ // Ignore disposal errors
246
+ }
247
+ this.models.delete(uri);
248
+ }
249
+
250
+ // Clear llama instance
251
+ this.llama = null;
252
+ }
253
+
254
+ /**
255
+ * Get list of loaded models.
256
+ */
257
+ getLoadedModels(): Array<{ uri: string; type: ModelType; loadedAt: number }> {
258
+ return Array.from(this.models.values()).map((m) => ({
259
+ uri: m.uri,
260
+ type: m.type,
261
+ loadedAt: m.loadedAt,
262
+ }));
263
+ }
264
+
265
+ // ───────────────────────────────────────────────────────────────────────────
266
+ // Private
267
+ // ───────────────────────────────────────────────────────────────────────────
268
+
269
+ private setDisposalTimer(uri: string): void {
270
+ const timer = setTimeout(() => {
271
+ this.dispose(uri).catch(() => {
272
+ // Ignore disposal errors in timer callback
273
+ });
274
+ }, this.config.warmModelTtl);
275
+
276
+ // Allow CLI processes to exit without waiting for TTL timer
277
+ if (typeof timer.unref === 'function') {
278
+ timer.unref();
279
+ }
280
+
281
+ this.disposalTimers.set(uri, timer);
282
+ }
283
+
284
+ private resetDisposalTimer(uri: string): void {
285
+ const existing = this.disposalTimers.get(uri);
286
+ if (existing) {
287
+ clearTimeout(existing);
288
+ }
289
+ this.setDisposalTimer(uri);
290
+ }
291
+ }
292
+
293
+ // ─────────────────────────────────────────────────────────────────────────────
294
+ // Singleton
295
+ // ─────────────────────────────────────────────────────────────────────────────
296
+
297
+ let defaultManager: ModelManager | null = null;
298
+
299
+ /**
300
+ * Get the default ModelManager singleton.
301
+ */
302
+ export function getModelManager(config: ModelConfig): ModelManager {
303
+ if (!defaultManager) {
304
+ defaultManager = new ModelManager(config);
305
+ }
306
+ return defaultManager;
307
+ }
308
+
309
+ /**
310
+ * Reset the default manager (for testing).
311
+ */
312
+ export async function resetModelManager(): Promise<void> {
313
+ if (defaultManager) {
314
+ await defaultManager.disposeAll();
315
+ defaultManager = null;
316
+ }
317
+ }