moflo 4.10.2 → 4.10.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/guidance/shipped/moflo-core-guidance.md +16 -0
- package/README.md +20 -19
- package/bin/session-start-launcher.mjs +112 -5
- package/dist/src/cli/commands/doctor-checks-config.js +99 -21
- package/dist/src/cli/commands/doctor-fixes.js +43 -0
- package/dist/src/cli/commands/doctor-version.js +1 -1
- package/dist/src/cli/commands/index.js +0 -6
- package/dist/src/cli/init/executor.js +2 -2
- package/dist/src/cli/mcp-tools/swarm-tools.js +3 -4
- package/dist/src/cli/services/moflo-paths.js +6 -5
- package/dist/src/cli/services/moflo-require.js +2 -2
- package/dist/src/cli/shared/core/config/loader.js +2 -2
- package/dist/src/cli/version.js +1 -1
- package/package.json +2 -2
- package/dist/src/cli/appliance/gguf-engine.js +0 -425
- package/dist/src/cli/appliance/ruvllm-bridge.js +0 -231
- package/dist/src/cli/appliance/rvfa-builder.js +0 -325
- package/dist/src/cli/appliance/rvfa-distribution.js +0 -370
- package/dist/src/cli/appliance/rvfa-format.js +0 -393
- package/dist/src/cli/appliance/rvfa-runner.js +0 -238
- package/dist/src/cli/appliance/rvfa-signing.js +0 -351
- package/dist/src/cli/commands/appliance-advanced.js +0 -213
- package/dist/src/cli/commands/appliance.js +0 -404
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* MoFlo runtime state directory constants.
|
|
3
3
|
*
|
|
4
|
-
* MoFlo owns its state under `.moflo/` at the project root.
|
|
5
|
-
*
|
|
4
|
+
* MoFlo owns its state under `.moflo/` at the project root. Pre-#699 builds
|
|
5
|
+
* used `.claude-flow/`; both legacy locations are still recognized as
|
|
6
6
|
* read-only sources for the version-bump-gated cherry-pick (#851) but are
|
|
7
7
|
* never relocated or renamed automatically — leaving them in place gives
|
|
8
8
|
* consumers a recovery source and avoids the failure modes that motivated
|
|
@@ -27,11 +27,12 @@ export const MEMORY_DB_FILE = 'moflo.db';
|
|
|
27
27
|
/** HNSW persisted index sidecar. Lives next to the DB at `<root>/.moflo/hnsw.index`. */
|
|
28
28
|
export const HNSW_INDEX_FILE = 'hnsw.index';
|
|
29
29
|
/**
|
|
30
|
-
* Legacy runtime directory
|
|
31
|
-
* migration code paths — production code should use
|
|
30
|
+
* Legacy `.claude-flow/` runtime directory used by pre-#699 moflo builds.
|
|
31
|
+
* Only referenced from migration code paths — production code should use
|
|
32
|
+
* {@link MOFLO_DIR}.
|
|
32
33
|
*/
|
|
33
34
|
export const LEGACY_CLAUDE_FLOW_DIR = '.claude-flow';
|
|
34
|
-
/** Legacy `.swarm/` directory used by
|
|
35
|
+
/** Legacy `.swarm/` directory used by pre-#727 moflo builds for the memory DB. */
|
|
35
36
|
export const LEGACY_SWARM_DIR = '.swarm';
|
|
36
37
|
/** Legacy memory DB filename — only ever inside `.swarm/`. Pre-#727. */
|
|
37
38
|
export const LEGACY_MEMORY_DB_FILE = 'memory.db';
|
|
@@ -116,8 +116,8 @@ export function mofloResolve(specifier) {
|
|
|
116
116
|
// ≈ 6 hops to the moflo root. 12 gives headroom for worktree/monorepo layouts.
|
|
117
117
|
const MAX_WALK_DEPTH = 12;
|
|
118
118
|
// Names a package.json may carry while still being "us" — covers the moflo
|
|
119
|
-
// rename
|
|
120
|
-
const MOFLO_PACKAGE_NAMES = new Set(['moflo', 'claude-flow'
|
|
119
|
+
// rename from the pre-collapse claude-flow workspace.
|
|
120
|
+
const MOFLO_PACKAGE_NAMES = new Set(['moflo', 'claude-flow']);
|
|
121
121
|
// Walk up from this file's dir, returning the first non-null `test(dir)` result.
|
|
122
122
|
function walkUpFromSelf(test) {
|
|
123
123
|
let dir = dirname(fileURLToPath(import.meta.url));
|
|
@@ -11,8 +11,8 @@ import { defaultSystemConfig, mergeWithDefaults } from './defaults.js';
|
|
|
11
11
|
/**
|
|
12
12
|
* Configuration file names to search for. Canonical names come first;
|
|
13
13
|
* `claude-flow.*` names are kept as legacy fallback so consumers upgrading
|
|
14
|
-
* from
|
|
15
|
-
*
|
|
14
|
+
* from pre-#699 moflo builds keep loading their existing config without a
|
|
15
|
+
* manual rename.
|
|
16
16
|
*/
|
|
17
17
|
const CONFIG_FILE_NAMES = [
|
|
18
18
|
'moflo.config.json',
|
package/dist/src/cli/version.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "moflo",
|
|
3
|
-
"version": "4.10.
|
|
3
|
+
"version": "4.10.4",
|
|
4
4
|
"description": "MoFlo — AI agent orchestration for Claude Code. A standalone, opinionated toolkit with semantic memory, learned routing, gates, spells, and the /flo issue-execution skill.",
|
|
5
5
|
"main": "dist/src/cli/index.js",
|
|
6
6
|
"type": "module",
|
|
@@ -95,7 +95,7 @@
|
|
|
95
95
|
"@typescript-eslint/eslint-plugin": "^7.18.0",
|
|
96
96
|
"@typescript-eslint/parser": "^7.18.0",
|
|
97
97
|
"eslint": "^8.0.0",
|
|
98
|
-
"moflo": "^4.10.
|
|
98
|
+
"moflo": "^4.10.3",
|
|
99
99
|
"tsx": "^4.21.0",
|
|
100
100
|
"typescript": "^5.9.3",
|
|
101
101
|
"vitest": "^4.0.0"
|
|
@@ -1,425 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* ruvLLM GGUF Inference Engine -- Pure Node.js GGUF Model Interface
|
|
3
|
-
*
|
|
4
|
-
* Provides:
|
|
5
|
-
* 1. GGUF binary header parsing (metadata without loading weights)
|
|
6
|
-
* 2. Model loading abstraction (node-llama-cpp when available, metadata-only fallback)
|
|
7
|
-
* 3. Token generation interface with async iterator streaming
|
|
8
|
-
* 4. KV-cache persistence to RVF-compatible binary format
|
|
9
|
-
*
|
|
10
|
-
* Zero external dependencies. node-llama-cpp is an optional peer.
|
|
11
|
-
*
|
|
12
|
-
* @module moflo/appliance/gguf-engine
|
|
13
|
-
*/
|
|
14
|
-
import { open, readFile, writeFile, stat as fsStat } from 'node:fs/promises';
|
|
15
|
-
import { createHash } from 'node:crypto';
|
|
16
|
-
import { basename } from 'node:path';
|
|
17
|
-
// ── GGUF Metadata Value Types ───────────────────────────────
|
|
18
|
-
var GgufValueType;
|
|
19
|
-
(function (GgufValueType) {
|
|
20
|
-
GgufValueType[GgufValueType["UINT8"] = 0] = "UINT8";
|
|
21
|
-
GgufValueType[GgufValueType["INT8"] = 1] = "INT8";
|
|
22
|
-
GgufValueType[GgufValueType["UINT16"] = 2] = "UINT16";
|
|
23
|
-
GgufValueType[GgufValueType["INT16"] = 3] = "INT16";
|
|
24
|
-
GgufValueType[GgufValueType["UINT32"] = 4] = "UINT32";
|
|
25
|
-
GgufValueType[GgufValueType["INT32"] = 5] = "INT32";
|
|
26
|
-
GgufValueType[GgufValueType["FLOAT32"] = 6] = "FLOAT32";
|
|
27
|
-
GgufValueType[GgufValueType["BOOL"] = 7] = "BOOL";
|
|
28
|
-
GgufValueType[GgufValueType["STRING"] = 8] = "STRING";
|
|
29
|
-
GgufValueType[GgufValueType["ARRAY"] = 9] = "ARRAY";
|
|
30
|
-
GgufValueType[GgufValueType["UINT64"] = 10] = "UINT64";
|
|
31
|
-
GgufValueType[GgufValueType["INT64"] = 11] = "INT64";
|
|
32
|
-
GgufValueType[GgufValueType["FLOAT64"] = 12] = "FLOAT64";
|
|
33
|
-
})(GgufValueType || (GgufValueType = {}));
|
|
34
|
-
const GGUF_MAGIC = 0x46554747; // "GGUF" in little-endian
|
|
35
|
-
const RVKV_MAGIC = 0x564B5652; // "RVKV" in little-endian
|
|
36
|
-
const RVKV_VERSION = 1;
|
|
37
|
-
// ── Internal Buffer Reader ──────────────────────────────────
|
|
38
|
-
/** Stateful cursor over a Buffer for sequential binary reads. */
|
|
39
|
-
class BufferReader {
|
|
40
|
-
buf;
|
|
41
|
-
offset = 0;
|
|
42
|
-
constructor(buf) {
|
|
43
|
-
this.buf = buf;
|
|
44
|
-
}
|
|
45
|
-
get remaining() { return this.buf.length - this.offset; }
|
|
46
|
-
readU8() { const v = this.buf.readUInt8(this.offset); this.offset += 1; return v; }
|
|
47
|
-
readI8() { const v = this.buf.readInt8(this.offset); this.offset += 1; return v; }
|
|
48
|
-
readU16() { const v = this.buf.readUInt16LE(this.offset); this.offset += 2; return v; }
|
|
49
|
-
readI16() { const v = this.buf.readInt16LE(this.offset); this.offset += 2; return v; }
|
|
50
|
-
readU32() { const v = this.buf.readUInt32LE(this.offset); this.offset += 4; return v; }
|
|
51
|
-
readI32() { const v = this.buf.readInt32LE(this.offset); this.offset += 4; return v; }
|
|
52
|
-
readF32() { const v = this.buf.readFloatLE(this.offset); this.offset += 4; return v; }
|
|
53
|
-
readF64() { const v = this.buf.readDoubleLE(this.offset); this.offset += 8; return v; }
|
|
54
|
-
readU64() { const v = this.buf.readBigUInt64LE(this.offset); this.offset += 8; return v; }
|
|
55
|
-
readI64() { const v = this.buf.readBigInt64LE(this.offset); this.offset += 8; return v; }
|
|
56
|
-
/** Safe for values up to 2^53. Real GGUF files never exceed this for tensor/kv counts. */
|
|
57
|
-
readU64AsNumber() { return Number(this.readU64()); }
|
|
58
|
-
readBool() { return this.readU8() !== 0; }
|
|
59
|
-
/** GGUF string: [length u64 LE][utf-8 bytes]. */
|
|
60
|
-
readString() {
|
|
61
|
-
const len = this.readU64AsNumber();
|
|
62
|
-
if (len === 0)
|
|
63
|
-
return '';
|
|
64
|
-
if (len > this.remaining)
|
|
65
|
-
throw new Error(`String length ${len} exceeds remaining buffer`);
|
|
66
|
-
const s = this.buf.toString('utf-8', this.offset, this.offset + len);
|
|
67
|
-
this.offset += len;
|
|
68
|
-
return s;
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
// ── GGUF Value Reading ──────────────────────────────────────
|
|
72
|
-
/** Read a typed scalar from the buffer (shared by value and array-element readers). */
|
|
73
|
-
function readScalar(reader, t) {
|
|
74
|
-
switch (t) {
|
|
75
|
-
case GgufValueType.UINT8: return reader.readU8();
|
|
76
|
-
case GgufValueType.INT8: return reader.readI8();
|
|
77
|
-
case GgufValueType.UINT16: return reader.readU16();
|
|
78
|
-
case GgufValueType.INT16: return reader.readI16();
|
|
79
|
-
case GgufValueType.UINT32: return reader.readU32();
|
|
80
|
-
case GgufValueType.INT32: return reader.readI32();
|
|
81
|
-
case GgufValueType.FLOAT32: return reader.readF32();
|
|
82
|
-
case GgufValueType.BOOL: return reader.readBool();
|
|
83
|
-
case GgufValueType.STRING: return reader.readString();
|
|
84
|
-
case GgufValueType.UINT64: return Number(reader.readU64());
|
|
85
|
-
case GgufValueType.INT64: return Number(reader.readI64());
|
|
86
|
-
case GgufValueType.FLOAT64: return reader.readF64();
|
|
87
|
-
default: return undefined;
|
|
88
|
-
}
|
|
89
|
-
}
|
|
90
|
-
/** Read a single GGUF typed value (scalar or array) from the buffer. */
|
|
91
|
-
function readGgufValue(reader) {
|
|
92
|
-
const valueType = reader.readU32();
|
|
93
|
-
if (valueType === GgufValueType.ARRAY) {
|
|
94
|
-
const elemType = reader.readU32();
|
|
95
|
-
const len = reader.readU64AsNumber();
|
|
96
|
-
const arr = [];
|
|
97
|
-
for (let i = 0; i < len; i++) {
|
|
98
|
-
const v = readScalar(reader, elemType);
|
|
99
|
-
if (v === undefined)
|
|
100
|
-
throw new Error(`Unknown GGUF array element type: ${elemType}`);
|
|
101
|
-
arr.push(v);
|
|
102
|
-
}
|
|
103
|
-
return arr;
|
|
104
|
-
}
|
|
105
|
-
const v = readScalar(reader, valueType);
|
|
106
|
-
if (v === undefined)
|
|
107
|
-
throw new Error(`Unknown GGUF value type: ${valueType}`);
|
|
108
|
-
return v;
|
|
109
|
-
}
|
|
110
|
-
// ── GGUF Header Parsing ─────────────────────────────────────
|
|
111
|
-
/**
|
|
112
|
-
* Parse the header and metadata from a GGUF file without loading tensors.
|
|
113
|
-
* Reads only the first 256 KB of the file.
|
|
114
|
-
*/
|
|
115
|
-
export async function parseGgufHeader(path) {
|
|
116
|
-
const fileInfo = await fsStat(path);
|
|
117
|
-
const readSize = Math.min(fileInfo.size, 256 * 1024);
|
|
118
|
-
const fh = await open(path, 'r');
|
|
119
|
-
try {
|
|
120
|
-
const buf = Buffer.alloc(readSize);
|
|
121
|
-
await fh.read(buf, 0, readSize, 0);
|
|
122
|
-
return parseGgufBuffer(buf, fileInfo.size, path);
|
|
123
|
-
}
|
|
124
|
-
finally {
|
|
125
|
-
await fh.close();
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
function parseGgufBuffer(buf, fileSize, filePath) {
|
|
129
|
-
const reader = new BufferReader(buf);
|
|
130
|
-
const magic = reader.readU32();
|
|
131
|
-
if (magic !== GGUF_MAGIC) {
|
|
132
|
-
throw new Error(`Invalid GGUF magic: 0x${magic.toString(16)} (expected 0x${GGUF_MAGIC.toString(16)})`);
|
|
133
|
-
}
|
|
134
|
-
const version = reader.readU32();
|
|
135
|
-
if (version < 2 || version > 3) {
|
|
136
|
-
throw new Error(`Unsupported GGUF version: ${version} (expected 2 or 3)`);
|
|
137
|
-
}
|
|
138
|
-
const tensorCount = reader.readU64AsNumber();
|
|
139
|
-
const kvCount = reader.readU64AsNumber();
|
|
140
|
-
const metadata = {};
|
|
141
|
-
for (let i = 0; i < kvCount; i++) {
|
|
142
|
-
if (reader.remaining < 12)
|
|
143
|
-
break;
|
|
144
|
-
try {
|
|
145
|
-
const key = reader.readString();
|
|
146
|
-
metadata[key] = readGgufValue(reader);
|
|
147
|
-
}
|
|
148
|
-
catch {
|
|
149
|
-
break; // reached end of read window
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
const arch = asString(metadata['general.architecture']);
|
|
153
|
-
const pfx = arch || 'llama'; // fallback prefix for well-known keys
|
|
154
|
-
return {
|
|
155
|
-
magic: 'GGUF', version, tensorCount, kvCount,
|
|
156
|
-
architecture: arch,
|
|
157
|
-
name: asString(metadata['general.name']),
|
|
158
|
-
contextLength: asNumber(metadata[`${pfx}.context_length`]),
|
|
159
|
-
embeddingLength: asNumber(metadata[`${pfx}.embedding_length`]),
|
|
160
|
-
blockCount: asNumber(metadata[`${pfx}.block_count`]),
|
|
161
|
-
vocabSize: inferVocabSize(metadata),
|
|
162
|
-
quantization: inferQuantFromMetadata(metadata, filePath),
|
|
163
|
-
fileSize, metadata,
|
|
164
|
-
};
|
|
165
|
-
}
|
|
166
|
-
// ── Metadata Helpers ────────────────────────────────────────
|
|
167
|
-
function asString(v) { return typeof v === 'string' ? v : undefined; }
|
|
168
|
-
function asNumber(v) { return typeof v === 'number' ? v : undefined; }
|
|
169
|
-
const QUANT_RE = [
|
|
170
|
-
[/q2_k/i, 'Q2_K'], [/q3_k_s/i, 'Q3_K_S'], [/q3_k_m/i, 'Q3_K_M'], [/q3_k_l/i, 'Q3_K_L'],
|
|
171
|
-
[/q4_k_s/i, 'Q4_K_S'], [/q4_k_m/i, 'Q4_K_M'], [/q4_0/i, 'Q4_0'], [/q4_1/i, 'Q4_1'],
|
|
172
|
-
[/q5_k_s/i, 'Q5_K_S'], [/q5_k_m/i, 'Q5_K_M'], [/q5_0/i, 'Q5_0'], [/q5_1/i, 'Q5_1'],
|
|
173
|
-
[/q6_k/i, 'Q6_K'], [/q8_0/i, 'Q8_0'], [/f16/i, 'F16'], [/f32/i, 'F32'],
|
|
174
|
-
];
|
|
175
|
-
function inferQuantFromMetadata(meta, filePath) {
|
|
176
|
-
const ft = meta['general.file_type'];
|
|
177
|
-
if (typeof ft === 'number')
|
|
178
|
-
return `file_type_${ft}`;
|
|
179
|
-
const name = basename(filePath);
|
|
180
|
-
for (const [re, label] of QUANT_RE)
|
|
181
|
-
if (re.test(name))
|
|
182
|
-
return label;
|
|
183
|
-
return 'unknown';
|
|
184
|
-
}
|
|
185
|
-
function inferVocabSize(meta) {
|
|
186
|
-
const tokens = meta['tokenizer.ggml.tokens'];
|
|
187
|
-
if (Array.isArray(tokens))
|
|
188
|
-
return tokens.length;
|
|
189
|
-
return asNumber(meta['tokenizer.ggml.vocab_size']);
|
|
190
|
-
}
|
|
191
|
-
// ── GGUF Engine ─────────────────────────────────────────────
|
|
192
|
-
export class GgufEngine {
|
|
193
|
-
config;
|
|
194
|
-
llamaCpp = null;
|
|
195
|
-
llamaModel = null;
|
|
196
|
-
llamaContext = null;
|
|
197
|
-
loadedModels = new Map();
|
|
198
|
-
activeModelPath = null;
|
|
199
|
-
kvCache = new Map();
|
|
200
|
-
constructor(config) {
|
|
201
|
-
this.config = {
|
|
202
|
-
contextSize: config.contextSize ?? 4096,
|
|
203
|
-
maxTokens: config.maxTokens ?? 512,
|
|
204
|
-
temperature: config.temperature ?? 0.7,
|
|
205
|
-
kvCachePath: config.kvCachePath ?? '',
|
|
206
|
-
verbose: config.verbose ?? false,
|
|
207
|
-
};
|
|
208
|
-
}
|
|
209
|
-
/** Probe for node-llama-cpp availability. */
|
|
210
|
-
async initialize() {
|
|
211
|
-
this.llamaCpp = await this.tryLoadLlamaCpp();
|
|
212
|
-
if (this.config.verbose) {
|
|
213
|
-
console.log(`[gguf-engine] node-llama-cpp: ${this.llamaCpp ? 'available' : 'not found (metadata-only mode)'}`);
|
|
214
|
-
}
|
|
215
|
-
}
|
|
216
|
-
/** Parse GGUF header and optionally load the model for inference. */
|
|
217
|
-
async loadModel(path) {
|
|
218
|
-
const meta = await parseGgufHeader(path);
|
|
219
|
-
this.loadedModels.set(path, meta);
|
|
220
|
-
this.activeModelPath = path;
|
|
221
|
-
if (this.llamaCpp) {
|
|
222
|
-
try {
|
|
223
|
-
const { getLlama } = this.llamaCpp;
|
|
224
|
-
const llama = await getLlama();
|
|
225
|
-
this.llamaModel = await llama.loadModel({ modelPath: path });
|
|
226
|
-
this.llamaContext = await this.llamaModel.createContext({ contextSize: this.config.contextSize });
|
|
227
|
-
if (this.config.verbose)
|
|
228
|
-
console.log(`[gguf-engine] Model loaded: ${basename(path)}`);
|
|
229
|
-
}
|
|
230
|
-
catch (err) {
|
|
231
|
-
if (this.config.verbose)
|
|
232
|
-
console.warn('[gguf-engine] node-llama-cpp load failed:', err);
|
|
233
|
-
this.llamaModel = null;
|
|
234
|
-
this.llamaContext = null;
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
return meta;
|
|
238
|
-
}
|
|
239
|
-
/** Generate text. Delegates to node-llama-cpp or returns a metadata-only stub. */
|
|
240
|
-
async generate(request) {
|
|
241
|
-
const start = performance.now();
|
|
242
|
-
const modelPath = request.model ?? this.activeModelPath;
|
|
243
|
-
const modelName = modelPath ? basename(modelPath) : 'none';
|
|
244
|
-
if (this.llamaContext && this.llamaModel) {
|
|
245
|
-
try {
|
|
246
|
-
const session = new this.llamaCpp.LlamaChatSession({
|
|
247
|
-
contextSequence: this.llamaContext.getSequence(),
|
|
248
|
-
});
|
|
249
|
-
const text = await session.prompt(request.prompt, {
|
|
250
|
-
maxTokens: request.maxTokens ?? this.config.maxTokens,
|
|
251
|
-
temperature: request.temperature ?? this.config.temperature,
|
|
252
|
-
stopGenerationTrigger: request.stopSequences
|
|
253
|
-
? request.stopSequences.map((s) => new this.llamaCpp.LlamaText([s]))
|
|
254
|
-
: undefined,
|
|
255
|
-
});
|
|
256
|
-
// Use llama.cpp tokenizer for accurate count when available, else estimate
|
|
257
|
-
let tokensUsed;
|
|
258
|
-
try {
|
|
259
|
-
const seq = this.llamaContext.getSequence();
|
|
260
|
-
tokensUsed = seq.tokenCount ?? Math.ceil(text.length / 4);
|
|
261
|
-
}
|
|
262
|
-
catch {
|
|
263
|
-
tokensUsed = Math.ceil(text.length / 4); // ~4 chars per token heuristic
|
|
264
|
-
}
|
|
265
|
-
return {
|
|
266
|
-
text, model: modelName, tokensUsed,
|
|
267
|
-
latencyMs: performance.now() - start, metadataOnly: false,
|
|
268
|
-
};
|
|
269
|
-
}
|
|
270
|
-
catch (err) {
|
|
271
|
-
if (this.config.verbose)
|
|
272
|
-
console.warn('[gguf-engine] Generation failed:', err);
|
|
273
|
-
}
|
|
274
|
-
}
|
|
275
|
-
// Metadata-only fallback
|
|
276
|
-
const meta = modelPath ? this.loadedModels.get(modelPath) : undefined;
|
|
277
|
-
return {
|
|
278
|
-
text: meta
|
|
279
|
-
? `[metadata-only] Model: ${meta.name ?? modelName}, arch: ${meta.architecture ?? 'unknown'}, ctx: ${meta.contextLength ?? 'unknown'}`
|
|
280
|
-
: '[metadata-only] No model loaded',
|
|
281
|
-
model: modelName, tokensUsed: 0,
|
|
282
|
-
latencyMs: performance.now() - start, metadataOnly: true,
|
|
283
|
-
};
|
|
284
|
-
}
|
|
285
|
-
/** Stream tokens via async iterator. Falls back to yielding full response. */
|
|
286
|
-
async *stream(request) {
|
|
287
|
-
if (this.llamaContext && this.llamaModel) {
|
|
288
|
-
try {
|
|
289
|
-
const session = new this.llamaCpp.LlamaChatSession({
|
|
290
|
-
contextSequence: this.llamaContext.getSequence(),
|
|
291
|
-
});
|
|
292
|
-
const it = session.promptWithMeta(request.prompt, {
|
|
293
|
-
maxTokens: request.maxTokens ?? this.config.maxTokens,
|
|
294
|
-
temperature: request.temperature ?? this.config.temperature,
|
|
295
|
-
});
|
|
296
|
-
if (it && typeof it[Symbol.asyncIterator] === 'function') {
|
|
297
|
-
for await (const chunk of it) {
|
|
298
|
-
if (typeof chunk === 'string')
|
|
299
|
-
yield chunk;
|
|
300
|
-
else if (chunk?.text)
|
|
301
|
-
yield chunk.text;
|
|
302
|
-
}
|
|
303
|
-
return;
|
|
304
|
-
}
|
|
305
|
-
}
|
|
306
|
-
catch { /* fall through to single-chunk fallback */ }
|
|
307
|
-
}
|
|
308
|
-
const response = await this.generate(request);
|
|
309
|
-
yield response.text;
|
|
310
|
-
}
|
|
311
|
-
/**
|
|
312
|
-
* Persist the KV cache to an RVF-compatible binary file.
|
|
313
|
-
* Format: RVKV magic | version u32 | model SHA-256 (32B) | entry count u32
|
|
314
|
-
* entries: [key_len u32, key, val_len u32, val] | footer SHA-256 (32B)
|
|
315
|
-
*/
|
|
316
|
-
async persistKvCache(outputPath) {
|
|
317
|
-
const path = outputPath || this.config.kvCachePath;
|
|
318
|
-
if (!path)
|
|
319
|
-
throw new Error('No KV cache output path specified');
|
|
320
|
-
const modelHash = createHash('sha256').update(this.activeModelPath ?? 'no-model').digest();
|
|
321
|
-
const entryBufs = [];
|
|
322
|
-
for (const [key, value] of this.kvCache) {
|
|
323
|
-
const keyBuf = Buffer.from(key, 'utf-8');
|
|
324
|
-
const hdr = Buffer.alloc(8);
|
|
325
|
-
hdr.writeUInt32LE(keyBuf.length, 0);
|
|
326
|
-
hdr.writeUInt32LE(value.length, 4);
|
|
327
|
-
entryBufs.push(hdr, keyBuf, value);
|
|
328
|
-
}
|
|
329
|
-
const entryData = Buffer.concat(entryBufs);
|
|
330
|
-
const footer = createHash('sha256').update(entryData).digest();
|
|
331
|
-
const header = Buffer.alloc(44);
|
|
332
|
-
header.writeUInt32LE(RVKV_MAGIC, 0);
|
|
333
|
-
header.writeUInt32LE(RVKV_VERSION, 4);
|
|
334
|
-
modelHash.copy(header, 8);
|
|
335
|
-
header.writeUInt32LE(this.kvCache.size, 40);
|
|
336
|
-
await writeFile(path, Buffer.concat([header, entryData, footer]));
|
|
337
|
-
if (this.config.verbose)
|
|
338
|
-
console.log(`[gguf-engine] KV cache persisted: ${this.kvCache.size} entries`);
|
|
339
|
-
}
|
|
340
|
-
/** Restore KV cache from an RVF-compatible binary file. */
|
|
341
|
-
async loadKvCache(inputPath) {
|
|
342
|
-
const data = await readFile(inputPath);
|
|
343
|
-
if (data.length < 44)
|
|
344
|
-
throw new Error('KV cache file too small');
|
|
345
|
-
const magic = data.readUInt32LE(0);
|
|
346
|
-
if (magic !== RVKV_MAGIC)
|
|
347
|
-
throw new Error(`Invalid KV cache magic: 0x${magic.toString(16)}`);
|
|
348
|
-
const version = data.readUInt32LE(4);
|
|
349
|
-
if (version !== RVKV_VERSION)
|
|
350
|
-
throw new Error(`Unsupported KV cache version: ${version}`);
|
|
351
|
-
const entryCount = data.readUInt32LE(40);
|
|
352
|
-
let offset = 44;
|
|
353
|
-
const entries = new Map();
|
|
354
|
-
for (let i = 0; i < entryCount; i++) {
|
|
355
|
-
if (offset + 8 > data.length)
|
|
356
|
-
throw new Error('KV cache file truncated');
|
|
357
|
-
const keyLen = data.readUInt32LE(offset);
|
|
358
|
-
const valLen = data.readUInt32LE(offset + 4);
|
|
359
|
-
offset += 8;
|
|
360
|
-
if (offset + keyLen + valLen > data.length)
|
|
361
|
-
throw new Error('KV cache file truncated');
|
|
362
|
-
entries.set(data.toString('utf-8', offset, offset + keyLen), Buffer.from(data.subarray(offset + keyLen, offset + keyLen + valLen)));
|
|
363
|
-
offset += keyLen + valLen;
|
|
364
|
-
}
|
|
365
|
-
// Verify footer hash (mandatory)
|
|
366
|
-
if (offset + 32 > data.length) {
|
|
367
|
-
throw new Error('KV cache file missing SHA256 footer');
|
|
368
|
-
}
|
|
369
|
-
const stored = data.subarray(offset, offset + 32);
|
|
370
|
-
const computed = createHash('sha256').update(data.subarray(44, offset)).digest();
|
|
371
|
-
if (!stored.equals(computed))
|
|
372
|
-
throw new Error('KV cache integrity check failed: hash mismatch');
|
|
373
|
-
this.kvCache = entries;
|
|
374
|
-
if (this.config.verbose)
|
|
375
|
-
console.log(`[gguf-engine] KV cache loaded: ${entries.size} entries`);
|
|
376
|
-
}
|
|
377
|
-
/** Return metadata for all loaded models. */
|
|
378
|
-
getLoadedModels() { return Array.from(this.loadedModels.values()); }
|
|
379
|
-
/** Store a key-value pair in the in-memory KV cache. */
|
|
380
|
-
setKvEntry(key, value) { this.kvCache.set(key, value); }
|
|
381
|
-
/** Retrieve a key-value pair from the in-memory KV cache. */
|
|
382
|
-
getKvEntry(key) { return this.kvCache.get(key); }
|
|
383
|
-
/** Release resources, unload models, and optionally persist the KV cache. */
|
|
384
|
-
async shutdown() {
|
|
385
|
-
if (this.config.kvCachePath && this.kvCache.size > 0) {
|
|
386
|
-
try {
|
|
387
|
-
await this.persistKvCache(this.config.kvCachePath);
|
|
388
|
-
}
|
|
389
|
-
catch (err) {
|
|
390
|
-
if (this.config.verbose)
|
|
391
|
-
console.warn('[gguf-engine] KV persist failed:', err);
|
|
392
|
-
}
|
|
393
|
-
}
|
|
394
|
-
if (this.llamaContext?.dispose) {
|
|
395
|
-
try {
|
|
396
|
-
await this.llamaContext.dispose();
|
|
397
|
-
}
|
|
398
|
-
catch { /* ignore */ }
|
|
399
|
-
}
|
|
400
|
-
if (this.llamaModel?.dispose) {
|
|
401
|
-
try {
|
|
402
|
-
await this.llamaModel.dispose();
|
|
403
|
-
}
|
|
404
|
-
catch { /* ignore */ }
|
|
405
|
-
}
|
|
406
|
-
this.llamaContext = null;
|
|
407
|
-
this.llamaModel = null;
|
|
408
|
-
this.activeModelPath = null;
|
|
409
|
-
this.loadedModels.clear();
|
|
410
|
-
this.kvCache.clear();
|
|
411
|
-
if (this.config.verbose)
|
|
412
|
-
console.log('[gguf-engine] Shutdown complete');
|
|
413
|
-
}
|
|
414
|
-
// ── Private ───────────────────────────────────────────────
|
|
415
|
-
async tryLoadLlamaCpp() {
|
|
416
|
-
// @ts-ignore -- optional peer dependency, may not be installed
|
|
417
|
-
try {
|
|
418
|
-
return await import('node-llama-cpp');
|
|
419
|
-
}
|
|
420
|
-
catch {
|
|
421
|
-
return null;
|
|
422
|
-
}
|
|
423
|
-
}
|
|
424
|
-
}
|
|
425
|
-
//# sourceMappingURL=gguf-engine.js.map
|