@agorapete/wllama 3.5.1-q2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitmodules +3 -0
- package/.prettierignore +38 -0
- package/AGENTS.md +1 -0
- package/CMakeLists.txt +131 -0
- package/LICENCE +21 -0
- package/README-dev.md +178 -0
- package/README.md +225 -0
- package/README_banner.png +0 -0
- package/assets/screenshot_0.png +0 -0
- package/cpp/generate_glue_prototype.js +115 -0
- package/cpp/glue.hpp +664 -0
- package/cpp/test_glue.cpp +80 -0
- package/cpp/wllama-context.h +1172 -0
- package/cpp/wllama-fs.h +148 -0
- package/cpp/wllama.cpp +187 -0
- package/cpp/wllama.h +6 -0
- package/esm/cache-manager.d.ts +130 -0
- package/esm/debug.d.ts +28 -0
- package/esm/glue/glue.d.ts +22 -0
- package/esm/glue/messages.d.ts +146 -0
- package/esm/huggingface.d.ts +31 -0
- package/esm/index.cjs +3406 -0
- package/esm/index.d.ts +8 -0
- package/esm/index.js +3387 -0
- package/esm/index.min.js +1 -0
- package/esm/index.min.js.map +1 -0
- package/esm/model-manager.d.ts +136 -0
- package/esm/storage/cos.d.ts +36 -0
- package/esm/storage/index.d.ts +33 -0
- package/esm/storage/opfs.d.ts +12 -0
- package/esm/types/oai-compat.d.ts +278 -0
- package/esm/types/types.d.ts +112 -0
- package/esm/utils.d.ts +119 -0
- package/esm/wasm/source-map.d.ts +1 -0
- package/esm/wasm/wllama.wasm +0 -0
- package/esm/wasm-from-cdn.d.ts +8 -0
- package/esm/wllama.d.ts +397 -0
- package/esm/worker.d.ts +92 -0
- package/esm/workers-code/generated.d.ts +4 -0
- package/guides/intro-v2.md +132 -0
- package/guides/intro-v3.1.md +40 -0
- package/guides/intro-v3.md +230 -0
- package/index.ts +1 -0
- package/package.json +71 -0
- package/scripts/bisect_test.sh +33 -0
- package/scripts/build_hf_space.sh +26 -0
- package/scripts/build_source_map.js +269 -0
- package/scripts/build_wasm.sh +19 -0
- package/scripts/build_worker.sh +38 -0
- package/scripts/check_debug_build.js +30 -0
- package/scripts/check_package_size.js +25 -0
- package/scripts/docker-compose.yml +76 -0
- package/scripts/generate_wasm_from_cdn.js +24 -0
- package/scripts/http_server.js +44 -0
- package/scripts/post_build.sh +32 -0
- package/src/cache-manager.ts +358 -0
- package/src/debug.ts +111 -0
- package/src/glue/glue.ts +291 -0
- package/src/glue/messages.ts +773 -0
- package/src/huggingface.ts +151 -0
- package/src/index.ts +8 -0
- package/src/mjs.test.ts +44 -0
- package/src/model-manager.test.ts +200 -0
- package/src/model-manager.ts +359 -0
- package/src/storage/cos.test.ts +83 -0
- package/src/storage/cos.ts +171 -0
- package/src/storage/index.ts +40 -0
- package/src/storage/opfs.ts +119 -0
- package/src/types/oai-compat.ts +342 -0
- package/src/types/types.ts +133 -0
- package/src/utils.test.ts +231 -0
- package/src/utils.ts +403 -0
- package/src/wasm/source-map.ts +7 -0
- package/src/wasm/wllama.js +1 -0
- package/src/wasm/wllama.wasm +0 -0
- package/src/wasm-from-cdn.ts +13 -0
- package/src/wllama.test.ts +392 -0
- package/src/wllama.ts +1138 -0
- package/src/wllama.wgpu.test.ts +62 -0
- package/src/worker.ts +443 -0
- package/src/workers-code/generated.ts +11 -0
- package/src/workers-code/llama-cpp.js +511 -0
- package/src/workers-code/opfs-utils.js +150 -0
- package/tsconfig.build.json +34 -0
- package/tsup.config.ts +23 -0
- package/vitest.config.ts +61 -0
package/cpp/glue.hpp
ADDED
|
@@ -0,0 +1,664 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
/**
|
|
3
|
+
* Simple serializer / deserializer inspired by protobuf
|
|
4
|
+
*
|
|
5
|
+
* Structure:
|
|
6
|
+
* - 4 bytes magic number (GLUE_MAGIC)
|
|
7
|
+
* - 4 bytes version number (GLUE_VERSION)
|
|
8
|
+
* - 8 bytes message prototype ID
|
|
9
|
+
* - 4 bytes message length, unsigned number
|
|
10
|
+
* - message data
|
|
11
|
+
*
|
|
12
|
+
* Each field in the message is encoded as:
|
|
13
|
+
* - 4 bytes data type
|
|
14
|
+
* - 4 bytes size, unsigned number (only for array and string)
|
|
15
|
+
* - data
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
#include <cstdint>
|
|
19
|
+
#include <string>
|
|
20
|
+
#include <vector>
|
|
21
|
+
#include <functional>
|
|
22
|
+
|
|
23
|
+
// reserved for future, do not edit the version number for now
|
|
24
|
+
#define GLUE_VERSION 1
|
|
25
|
+
|
|
26
|
+
#define GLUE_MAGIC 0x45554c47 // "GLUE"
|
|
27
|
+
#define GLUE_PROTO_ID_LEN 8
|
|
28
|
+
|
|
29
|
+
#ifndef GLUE_DEBUG
|
|
30
|
+
#define GLUE_DEBUG(...)
|
|
31
|
+
#endif
|
|
32
|
+
|
|
33
|
+
#define BITS_TO_BYTES(x) ((x) / 8)
|
|
34
|
+
|
|
35
|
+
// Data types
|
|
36
|
+
// Note: we're doing polymorphism using enum to prevent using virtual functions
|
|
37
|
+
|
|
38
|
+
enum glue_dtype
|
|
39
|
+
{
|
|
40
|
+
GLUE_DTYPE_NULL,
|
|
41
|
+
GLUE_DTYPE_BOOL,
|
|
42
|
+
GLUE_DTYPE_INT,
|
|
43
|
+
GLUE_DTYPE_FLOAT,
|
|
44
|
+
GLUE_DTYPE_STRING,
|
|
45
|
+
GLUE_DTYPE_RAW,
|
|
46
|
+
GLUE_DTYPE_ARRAY_BOOL,
|
|
47
|
+
GLUE_DTYPE_ARRAY_INT,
|
|
48
|
+
GLUE_DTYPE_ARRAY_FLOAT,
|
|
49
|
+
GLUE_DTYPE_ARRAY_STRING,
|
|
50
|
+
GLUE_DTYPE_ARRAY_RAW,
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
using glue_data_ptr = const char *;
|
|
54
|
+
|
|
55
|
+
struct glue_outbuf
|
|
56
|
+
{
|
|
57
|
+
std::vector<char> data;
|
|
58
|
+
glue_outbuf()
|
|
59
|
+
{
|
|
60
|
+
data.reserve(1024);
|
|
61
|
+
}
|
|
62
|
+
void append(const char *val, size_t size)
|
|
63
|
+
{
|
|
64
|
+
GLUE_DEBUG(" << offset = 0x%02zx\n", data.size());
|
|
65
|
+
data.insert(data.end(), val, val + size);
|
|
66
|
+
}
|
|
67
|
+
void append_str(const std::string &val)
|
|
68
|
+
{
|
|
69
|
+
GLUE_DEBUG(" << offset = 0x%02zx\n", data.size());
|
|
70
|
+
data.insert(data.end(), val.begin(), val.end());
|
|
71
|
+
}
|
|
72
|
+
void append_u32(uint32_t val)
|
|
73
|
+
{
|
|
74
|
+
GLUE_DEBUG(" << offset = 0x%02zx\n", data.size());
|
|
75
|
+
data.insert(data.end(), (char *)&val, (char *)&val + BITS_TO_BYTES(32));
|
|
76
|
+
}
|
|
77
|
+
void append_i32(int32_t val)
|
|
78
|
+
{
|
|
79
|
+
GLUE_DEBUG(" << offset = 0x%02zx\n", data.size());
|
|
80
|
+
data.insert(data.end(), (char *)&val, (char *)&val + BITS_TO_BYTES(32));
|
|
81
|
+
}
|
|
82
|
+
void append_f32(float val)
|
|
83
|
+
{
|
|
84
|
+
GLUE_DEBUG(" << offset = 0x%02zx\n", data.size());
|
|
85
|
+
data.insert(data.end(), (char *)&val, (char *)&val + BITS_TO_BYTES(32));
|
|
86
|
+
}
|
|
87
|
+
void clear() {
|
|
88
|
+
data.clear();
|
|
89
|
+
data.reserve(1024);
|
|
90
|
+
}
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
struct glue_inbuf
|
|
94
|
+
{
|
|
95
|
+
glue_data_ptr base;
|
|
96
|
+
glue_data_ptr cur;
|
|
97
|
+
glue_inbuf(glue_data_ptr data) : base(data), cur(data) {}
|
|
98
|
+
uint32_t read_u32()
|
|
99
|
+
{
|
|
100
|
+
GLUE_DEBUG(" >> offset = 0x%02zx\n", cur - base);
|
|
101
|
+
uint32_t val = *(uint32_t *)cur;
|
|
102
|
+
cur += BITS_TO_BYTES(32);
|
|
103
|
+
return val;
|
|
104
|
+
}
|
|
105
|
+
int32_t read_i32()
|
|
106
|
+
{
|
|
107
|
+
GLUE_DEBUG(" >> offset = 0x%02zx\n", cur - base);
|
|
108
|
+
int32_t val = *(int32_t *)cur;
|
|
109
|
+
cur += BITS_TO_BYTES(32);
|
|
110
|
+
return val;
|
|
111
|
+
}
|
|
112
|
+
float read_f32()
|
|
113
|
+
{
|
|
114
|
+
GLUE_DEBUG(" >> offset = 0x%02zx\n", cur - base);
|
|
115
|
+
float val = *(float *)cur;
|
|
116
|
+
cur += BITS_TO_BYTES(32);
|
|
117
|
+
return val;
|
|
118
|
+
}
|
|
119
|
+
std::string read_str(uint32_t size)
|
|
120
|
+
{
|
|
121
|
+
GLUE_DEBUG(" >> offset = 0x%02zx\n", cur - base);
|
|
122
|
+
std::string val(cur, size);
|
|
123
|
+
cur += size;
|
|
124
|
+
return val;
|
|
125
|
+
}
|
|
126
|
+
std::vector<char> read_raw(uint32_t size)
|
|
127
|
+
{
|
|
128
|
+
GLUE_DEBUG(" >> offset = 0x%02zx\n", cur - base);
|
|
129
|
+
std::vector<char> val;
|
|
130
|
+
val.reserve(size);
|
|
131
|
+
val.insert(val.end(), cur, cur + size);
|
|
132
|
+
cur += size;
|
|
133
|
+
return val;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// for array
|
|
137
|
+
void read(uint32_t &out) { out = read_u32(); }
|
|
138
|
+
void read(int32_t &out) { out = read_i32(); }
|
|
139
|
+
void read(float &out) { out = read_f32(); }
|
|
140
|
+
void read(std::string &out)
|
|
141
|
+
{
|
|
142
|
+
uint32_t size = read_u32();
|
|
143
|
+
out = read_str(size);
|
|
144
|
+
}
|
|
145
|
+
void read(std::vector<char> &out)
|
|
146
|
+
{
|
|
147
|
+
uint32_t size = read_u32();
|
|
148
|
+
out = read_raw(size);
|
|
149
|
+
}
|
|
150
|
+
void read(std::vector<uint8_t> &out)
|
|
151
|
+
{
|
|
152
|
+
uint32_t size = read_u32();
|
|
153
|
+
auto tmp = read_raw(size);
|
|
154
|
+
out.assign((uint8_t*)tmp.data(), (uint8_t*)tmp.data() + tmp.size());
|
|
155
|
+
}
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
struct glue_type_base;
|
|
159
|
+
struct glue_handler
|
|
160
|
+
{
|
|
161
|
+
const char *name = nullptr;
|
|
162
|
+
std::vector<glue_type_base *> fields;
|
|
163
|
+
|
|
164
|
+
glue_handler(const char *name) : name(name) {}
|
|
165
|
+
void register_field(glue_type_base *field)
|
|
166
|
+
{
|
|
167
|
+
fields.push_back(field);
|
|
168
|
+
};
|
|
169
|
+
void serialize(glue_outbuf &output);
|
|
170
|
+
void deserialize(glue_inbuf &input);
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
struct glue_type_base
|
|
174
|
+
{
|
|
175
|
+
const char *name = nullptr;
|
|
176
|
+
glue_dtype dtype = GLUE_DTYPE_NULL;
|
|
177
|
+
glue_handler handler;
|
|
178
|
+
|
|
179
|
+
glue_type_base() = delete;
|
|
180
|
+
glue_type_base(const char *name, glue_handler &handler, glue_dtype dtype) : name(name), handler(handler), dtype(dtype)
|
|
181
|
+
{
|
|
182
|
+
handler.register_field(this);
|
|
183
|
+
}
|
|
184
|
+
bool is_null() { return dtype == GLUE_DTYPE_NULL; }
|
|
185
|
+
bool not_null() { return !is_null(); }
|
|
186
|
+
void set_null() { dtype = GLUE_DTYPE_NULL; }
|
|
187
|
+
bool parse_type(glue_inbuf &input)
|
|
188
|
+
{
|
|
189
|
+
dtype = (glue_dtype)input.read_u32();
|
|
190
|
+
if (dtype == GLUE_DTYPE_NULL)
|
|
191
|
+
{
|
|
192
|
+
GLUE_DEBUG(" >> null\n");
|
|
193
|
+
return true;
|
|
194
|
+
}
|
|
195
|
+
return false;
|
|
196
|
+
}
|
|
197
|
+
};
|
|
198
|
+
|
|
199
|
+
struct glue_bool : glue_type_base
|
|
200
|
+
{
|
|
201
|
+
bool value = false;
|
|
202
|
+
|
|
203
|
+
glue_bool(const char *name, glue_handler &handler) : glue_type_base(name, handler, GLUE_DTYPE_BOOL) {}
|
|
204
|
+
void parse(glue_inbuf &input)
|
|
205
|
+
{
|
|
206
|
+
if (parse_type(input))
|
|
207
|
+
return;
|
|
208
|
+
value = (bool)input.read_u32();
|
|
209
|
+
GLUE_DEBUG(" >> bool %d\n", value);
|
|
210
|
+
}
|
|
211
|
+
void serialize(glue_outbuf &output)
|
|
212
|
+
{
|
|
213
|
+
GLUE_DEBUG(" << bool %d\n", value);
|
|
214
|
+
output.append_u32(dtype);
|
|
215
|
+
output.append_u32(value);
|
|
216
|
+
}
|
|
217
|
+
};
|
|
218
|
+
|
|
219
|
+
struct glue_int : glue_type_base
|
|
220
|
+
{
|
|
221
|
+
int32_t value = 0;
|
|
222
|
+
|
|
223
|
+
glue_int(const char *name, glue_handler &handler) : glue_type_base(name, handler, GLUE_DTYPE_INT) {}
|
|
224
|
+
void parse(glue_inbuf &input)
|
|
225
|
+
{
|
|
226
|
+
if (parse_type(input))
|
|
227
|
+
return;
|
|
228
|
+
value = input.read_i32();
|
|
229
|
+
GLUE_DEBUG(" >> int %d\n", value);
|
|
230
|
+
}
|
|
231
|
+
void serialize(glue_outbuf &output)
|
|
232
|
+
{
|
|
233
|
+
GLUE_DEBUG(" << int %d\n", value);
|
|
234
|
+
output.append_u32(dtype);
|
|
235
|
+
output.append_i32(value);
|
|
236
|
+
}
|
|
237
|
+
};
|
|
238
|
+
|
|
239
|
+
struct glue_float : glue_type_base
|
|
240
|
+
{
|
|
241
|
+
float value = 0.0f;
|
|
242
|
+
|
|
243
|
+
glue_float(const char *name, glue_handler &handler) : glue_type_base(name, handler, GLUE_DTYPE_FLOAT) {}
|
|
244
|
+
void parse(glue_inbuf &input)
|
|
245
|
+
{
|
|
246
|
+
if (parse_type(input))
|
|
247
|
+
return;
|
|
248
|
+
value = input.read_f32();
|
|
249
|
+
GLUE_DEBUG(" >> float %f\n", value);
|
|
250
|
+
}
|
|
251
|
+
void serialize(glue_outbuf &output)
|
|
252
|
+
{
|
|
253
|
+
GLUE_DEBUG(" << float %f\n", value);
|
|
254
|
+
output.append_u32(dtype);
|
|
255
|
+
output.append_f32(value);
|
|
256
|
+
}
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
struct glue_str : glue_type_base
|
|
260
|
+
{
|
|
261
|
+
std::string value;
|
|
262
|
+
|
|
263
|
+
glue_str(const char *name, glue_handler &handler) : glue_type_base(name, handler, GLUE_DTYPE_STRING) {}
|
|
264
|
+
void parse(glue_inbuf &input)
|
|
265
|
+
{
|
|
266
|
+
if (parse_type(input))
|
|
267
|
+
return;
|
|
268
|
+
uint32_t size = input.read_u32();
|
|
269
|
+
value = input.read_str(size);
|
|
270
|
+
GLUE_DEBUG(" >> string %s\n", value.c_str());
|
|
271
|
+
}
|
|
272
|
+
void serialize(glue_outbuf &output)
|
|
273
|
+
{
|
|
274
|
+
GLUE_DEBUG(" << string %s\n", value.c_str());
|
|
275
|
+
output.append_u32(dtype);
|
|
276
|
+
output.append_u32(value.size());
|
|
277
|
+
output.append_str(value);
|
|
278
|
+
}
|
|
279
|
+
};
|
|
280
|
+
|
|
281
|
+
struct glue_raw : glue_type_base
|
|
282
|
+
{
|
|
283
|
+
std::vector<char> buf;
|
|
284
|
+
|
|
285
|
+
glue_raw(const char *name, glue_handler &handler) : glue_type_base(name, handler, GLUE_DTYPE_RAW) {}
|
|
286
|
+
void parse(glue_inbuf &input)
|
|
287
|
+
{
|
|
288
|
+
if (parse_type(input))
|
|
289
|
+
return;
|
|
290
|
+
uint32_t size = input.read_u32();
|
|
291
|
+
buf = input.read_raw(size);
|
|
292
|
+
GLUE_DEBUG(" >> raw, size = %zu\n", buf.size());
|
|
293
|
+
}
|
|
294
|
+
void serialize(glue_outbuf &output)
|
|
295
|
+
{
|
|
296
|
+
GLUE_DEBUG(" << raw, size = %zu\n", buf.size());
|
|
297
|
+
output.append_u32(dtype);
|
|
298
|
+
output.append_u32(buf.size());
|
|
299
|
+
output.append(buf.data(), buf.size());
|
|
300
|
+
}
|
|
301
|
+
};
|
|
302
|
+
|
|
303
|
+
template <typename T>
|
|
304
|
+
struct glue_arr : glue_type_base
|
|
305
|
+
{
|
|
306
|
+
std::vector<T> arr;
|
|
307
|
+
std::function<void(T &, glue_outbuf &)> serialize_elem;
|
|
308
|
+
|
|
309
|
+
glue_arr(const char *name, glue_handler &handler, glue_dtype dtype) : glue_type_base(name, handler, dtype) {}
|
|
310
|
+
void parse(glue_inbuf &input)
|
|
311
|
+
{
|
|
312
|
+
if (parse_type(input))
|
|
313
|
+
return;
|
|
314
|
+
uint32_t size = input.read_u32();
|
|
315
|
+
GLUE_DEBUG(" >> array[%u]\n", size);
|
|
316
|
+
arr.reserve(size);
|
|
317
|
+
for (uint32_t i = 0; i < size; i++)
|
|
318
|
+
{
|
|
319
|
+
T elem;
|
|
320
|
+
input.read(elem);
|
|
321
|
+
arr.push_back(std::move(elem));
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
void serialize(glue_outbuf &output)
|
|
325
|
+
{
|
|
326
|
+
GLUE_DEBUG(" << array[%zu]\n", arr.size());
|
|
327
|
+
output.append_u32(dtype);
|
|
328
|
+
output.append_u32(arr.size());
|
|
329
|
+
for (auto elem : arr)
|
|
330
|
+
{
|
|
331
|
+
serialize_elem(elem, output);
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
};
|
|
335
|
+
|
|
336
|
+
#define DEF_GLUE_ARR(tname, dtype, enum_type, serialize_fn) \
|
|
337
|
+
struct glue_arr_##tname : glue_arr<dtype> \
|
|
338
|
+
{ \
|
|
339
|
+
glue_arr_##tname(const char *name, glue_handler &handler) : glue_arr<dtype>(name, handler, enum_type) \
|
|
340
|
+
{ \
|
|
341
|
+
serialize_elem = [](dtype & elem, glue_outbuf & output) serialize_fn; \
|
|
342
|
+
} \
|
|
343
|
+
};
|
|
344
|
+
|
|
345
|
+
DEF_GLUE_ARR(bool, uint32_t, GLUE_DTYPE_ARRAY_BOOL, {
|
|
346
|
+
output.append_u32(elem);
|
|
347
|
+
})
|
|
348
|
+
DEF_GLUE_ARR(int, int32_t, GLUE_DTYPE_ARRAY_INT, {
|
|
349
|
+
output.append_i32(elem);
|
|
350
|
+
})
|
|
351
|
+
DEF_GLUE_ARR(float, float, GLUE_DTYPE_ARRAY_FLOAT, {
|
|
352
|
+
output.append_f32(elem);
|
|
353
|
+
})
|
|
354
|
+
DEF_GLUE_ARR(str, std::string, GLUE_DTYPE_ARRAY_STRING, {
|
|
355
|
+
output.append_u32(elem.size());
|
|
356
|
+
output.append_str(elem);
|
|
357
|
+
})
|
|
358
|
+
DEF_GLUE_ARR(raw, std::vector<uint8_t>, GLUE_DTYPE_ARRAY_RAW, {
|
|
359
|
+
output.append_u32(elem.size());
|
|
360
|
+
output.append((const char*)elem.data(), elem.size());
|
|
361
|
+
})
|
|
362
|
+
|
|
363
|
+
// Message base
|
|
364
|
+
|
|
365
|
+
void glue_handler::serialize(glue_outbuf &output)
|
|
366
|
+
{
|
|
367
|
+
output.clear();
|
|
368
|
+
output.append_u32(GLUE_MAGIC);
|
|
369
|
+
output.append_u32(GLUE_VERSION);
|
|
370
|
+
output.append(name, 8);
|
|
371
|
+
GLUE_DEBUG("Serializing message %s\n", name);
|
|
372
|
+
GLUE_DEBUG("Fields: %zu\n", fields.size());
|
|
373
|
+
for (auto field : fields)
|
|
374
|
+
{
|
|
375
|
+
GLUE_DEBUG("Serializing field %s, type = %d\n", field->name, field->dtype);
|
|
376
|
+
switch (field->dtype)
|
|
377
|
+
{
|
|
378
|
+
case GLUE_DTYPE_NULL:
|
|
379
|
+
output.append_u32(GLUE_DTYPE_NULL);
|
|
380
|
+
break;
|
|
381
|
+
case GLUE_DTYPE_BOOL:
|
|
382
|
+
((glue_bool *)field)->serialize(output);
|
|
383
|
+
break;
|
|
384
|
+
case GLUE_DTYPE_INT:
|
|
385
|
+
((glue_int *)field)->serialize(output);
|
|
386
|
+
break;
|
|
387
|
+
case GLUE_DTYPE_FLOAT:
|
|
388
|
+
((glue_float *)field)->serialize(output);
|
|
389
|
+
break;
|
|
390
|
+
case GLUE_DTYPE_STRING:
|
|
391
|
+
((glue_str *)field)->serialize(output);
|
|
392
|
+
break;
|
|
393
|
+
case GLUE_DTYPE_RAW:
|
|
394
|
+
((glue_raw *)field)->serialize(output);
|
|
395
|
+
break;
|
|
396
|
+
case GLUE_DTYPE_ARRAY_BOOL:
|
|
397
|
+
((glue_arr_bool *)field)->serialize(output);
|
|
398
|
+
break;
|
|
399
|
+
case GLUE_DTYPE_ARRAY_INT:
|
|
400
|
+
((glue_arr_int *)field)->serialize(output);
|
|
401
|
+
break;
|
|
402
|
+
case GLUE_DTYPE_ARRAY_FLOAT:
|
|
403
|
+
((glue_arr_float *)field)->serialize(output);
|
|
404
|
+
break;
|
|
405
|
+
case GLUE_DTYPE_ARRAY_STRING:
|
|
406
|
+
((glue_arr_str *)field)->serialize(output);
|
|
407
|
+
break;
|
|
408
|
+
case GLUE_DTYPE_ARRAY_RAW:
|
|
409
|
+
((glue_arr_raw *)field)->serialize(output);
|
|
410
|
+
break;
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
void glue_handler::deserialize(glue_inbuf &input)
|
|
416
|
+
{
|
|
417
|
+
uint32_t magic = input.read_u32();
|
|
418
|
+
if (magic != GLUE_MAGIC)
|
|
419
|
+
{
|
|
420
|
+
throw std::runtime_error("Invalid magic number");
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
uint32_t version = input.read_u32();
|
|
424
|
+
if (version != GLUE_VERSION)
|
|
425
|
+
{
|
|
426
|
+
throw std::runtime_error("Version mismatch");
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
std::string proto_id = input.read_str(GLUE_PROTO_ID_LEN);
|
|
430
|
+
if (proto_id != name)
|
|
431
|
+
{
|
|
432
|
+
throw std::runtime_error("Prototype ID mismatch " + proto_id + " != " + name);
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
GLUE_DEBUG("Deserializing message %s\n", name);
|
|
436
|
+
for (auto field : fields)
|
|
437
|
+
{
|
|
438
|
+
GLUE_DEBUG("Deserializing field %s, type = %d\n", field->name, field->dtype);
|
|
439
|
+
switch (field->dtype)
|
|
440
|
+
{
|
|
441
|
+
case GLUE_DTYPE_NULL:
|
|
442
|
+
field->parse_type(input);
|
|
443
|
+
break;
|
|
444
|
+
case GLUE_DTYPE_BOOL:
|
|
445
|
+
((glue_bool *)field)->parse(input);
|
|
446
|
+
break;
|
|
447
|
+
case GLUE_DTYPE_INT:
|
|
448
|
+
((glue_int *)field)->parse(input);
|
|
449
|
+
break;
|
|
450
|
+
case GLUE_DTYPE_FLOAT:
|
|
451
|
+
((glue_float *)field)->parse(input);
|
|
452
|
+
break;
|
|
453
|
+
case GLUE_DTYPE_STRING:
|
|
454
|
+
((glue_str *)field)->parse(input);
|
|
455
|
+
break;
|
|
456
|
+
case GLUE_DTYPE_RAW:
|
|
457
|
+
((glue_raw *)field)->parse(input);
|
|
458
|
+
case GLUE_DTYPE_ARRAY_BOOL:
|
|
459
|
+
((glue_arr_bool *)field)->parse(input);
|
|
460
|
+
break;
|
|
461
|
+
case GLUE_DTYPE_ARRAY_INT:
|
|
462
|
+
((glue_arr_int *)field)->parse(input);
|
|
463
|
+
break;
|
|
464
|
+
case GLUE_DTYPE_ARRAY_FLOAT:
|
|
465
|
+
((glue_arr_float *)field)->parse(input);
|
|
466
|
+
break;
|
|
467
|
+
case GLUE_DTYPE_ARRAY_STRING:
|
|
468
|
+
((glue_arr_str *)field)->parse(input);
|
|
469
|
+
break;
|
|
470
|
+
case GLUE_DTYPE_ARRAY_RAW:
|
|
471
|
+
((glue_arr_raw *)field)->parse(input);
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
template <std::size_t N>
|
|
477
|
+
constexpr auto &PROTO_ID(char const (&s)[N])
|
|
478
|
+
{
|
|
479
|
+
static_assert(N == GLUE_PROTO_ID_LEN + 1, "Prototype ID must be 8 characters long");
|
|
480
|
+
return s;
|
|
481
|
+
}
|
|
482
|
+
#define GLUE_FIELD(type, name) glue_##type name = glue_##type(#name, handler);
|
|
483
|
+
#define GLUE_FIELD_NULLABLE(type, name) glue_##type name = glue_##type(#name, handler);
|
|
484
|
+
#define GLUE_HANDLER(name) glue_handler handler = glue_handler(PROTO_ID(name));
|
|
485
|
+
|
|
486
|
+
// Message for events
|
|
487
|
+
|
|
488
|
+
struct glue_msg_error
|
|
489
|
+
{
|
|
490
|
+
GLUE_HANDLER("erro_evt")
|
|
491
|
+
GLUE_FIELD(str, message)
|
|
492
|
+
};
|
|
493
|
+
|
|
494
|
+
// Message for actions
|
|
495
|
+
|
|
496
|
+
struct glue_msg_load_req
|
|
497
|
+
{
|
|
498
|
+
GLUE_HANDLER("load_req")
|
|
499
|
+
GLUE_FIELD(arr_str, model_paths)
|
|
500
|
+
GLUE_FIELD_NULLABLE(str, mmproj_path)
|
|
501
|
+
GLUE_FIELD(bool, n_ctx_auto)
|
|
502
|
+
GLUE_FIELD(bool, use_mmap)
|
|
503
|
+
GLUE_FIELD(bool, use_mlock)
|
|
504
|
+
GLUE_FIELD(int, n_gpu_layers)
|
|
505
|
+
GLUE_FIELD(int, n_ctx)
|
|
506
|
+
GLUE_FIELD(int, n_threads)
|
|
507
|
+
GLUE_FIELD_NULLABLE(str, model_alias)
|
|
508
|
+
GLUE_FIELD_NULLABLE(int, log_level)
|
|
509
|
+
GLUE_FIELD_NULLABLE(bool, embeddings)
|
|
510
|
+
GLUE_FIELD_NULLABLE(bool, offload_kqv)
|
|
511
|
+
GLUE_FIELD_NULLABLE(int, n_batch)
|
|
512
|
+
GLUE_FIELD_NULLABLE(int, n_ubatch)
|
|
513
|
+
GLUE_FIELD_NULLABLE(int, n_parallel)
|
|
514
|
+
GLUE_FIELD_NULLABLE(str, pooling_type)
|
|
515
|
+
GLUE_FIELD_NULLABLE(str, rope_scaling_type)
|
|
516
|
+
GLUE_FIELD_NULLABLE(float, rope_freq_base)
|
|
517
|
+
GLUE_FIELD_NULLABLE(float, rope_freq_scale)
|
|
518
|
+
GLUE_FIELD_NULLABLE(float, yarn_ext_factor)
|
|
519
|
+
GLUE_FIELD_NULLABLE(float, yarn_attn_factor)
|
|
520
|
+
GLUE_FIELD_NULLABLE(float, yarn_beta_fast)
|
|
521
|
+
GLUE_FIELD_NULLABLE(float, yarn_beta_slow)
|
|
522
|
+
GLUE_FIELD_NULLABLE(int, yarn_orig_ctx)
|
|
523
|
+
GLUE_FIELD_NULLABLE(str, cache_type_k)
|
|
524
|
+
GLUE_FIELD_NULLABLE(str, cache_type_v)
|
|
525
|
+
GLUE_FIELD_NULLABLE(bool, kv_unified)
|
|
526
|
+
GLUE_FIELD_NULLABLE(bool, flash_attn)
|
|
527
|
+
GLUE_FIELD_NULLABLE(bool, swa_full)
|
|
528
|
+
GLUE_FIELD_NULLABLE(int, n_ctx_checkpoints)
|
|
529
|
+
GLUE_FIELD_NULLABLE(int, checkpoint_min_step)
|
|
530
|
+
GLUE_FIELD_NULLABLE(str, chat_template)
|
|
531
|
+
GLUE_FIELD_NULLABLE(bool, jinja)
|
|
532
|
+
GLUE_FIELD_NULLABLE(arr_str, default_template_kwargs_keys)
|
|
533
|
+
GLUE_FIELD_NULLABLE(arr_str, default_template_kwargs_vals)
|
|
534
|
+
GLUE_FIELD_NULLABLE(bool, reasoning)
|
|
535
|
+
GLUE_FIELD_NULLABLE(int, image_min_tokens)
|
|
536
|
+
GLUE_FIELD_NULLABLE(int, image_max_tokens)
|
|
537
|
+
GLUE_FIELD_NULLABLE(bool, warmup)
|
|
538
|
+
GLUE_FIELD_NULLABLE(bool, no_kv_offload)
|
|
539
|
+
GLUE_FIELD_NULLABLE(bool, mmproj_offload)
|
|
540
|
+
GLUE_FIELD_NULLABLE(bool, cont_batching)
|
|
541
|
+
GLUE_FIELD_NULLABLE(int, n_keep)
|
|
542
|
+
GLUE_FIELD_NULLABLE(bool, ctx_shift)
|
|
543
|
+
GLUE_FIELD_NULLABLE(bool, cache_idle_slots)
|
|
544
|
+
GLUE_FIELD_NULLABLE(int, n_cache_reuse)
|
|
545
|
+
GLUE_FIELD_NULLABLE(arr_str, lora_paths)
|
|
546
|
+
GLUE_FIELD_NULLABLE(arr_float, lora_scales)
|
|
547
|
+
GLUE_FIELD_NULLABLE(bool, lora_init_without_apply)
|
|
548
|
+
GLUE_FIELD_NULLABLE(str, spec_draft_model)
|
|
549
|
+
GLUE_FIELD_NULLABLE(int, spec_draft_ngl)
|
|
550
|
+
GLUE_FIELD_NULLABLE(int, spec_draft_n_max)
|
|
551
|
+
GLUE_FIELD_NULLABLE(int, spec_draft_n_min)
|
|
552
|
+
GLUE_FIELD_NULLABLE(float, spec_draft_p_min)
|
|
553
|
+
GLUE_FIELD_NULLABLE(int, spec_draft_threads)
|
|
554
|
+
GLUE_FIELD_NULLABLE(int, spec_draft_threads_batch)
|
|
555
|
+
GLUE_FIELD_NULLABLE(arr_str, kv_overrides_keys)
|
|
556
|
+
GLUE_FIELD_NULLABLE(arr_str, kv_overrides_vals)
|
|
557
|
+
GLUE_FIELD_NULLABLE(int, reasoning_budget_tokens)
|
|
558
|
+
GLUE_FIELD_NULLABLE(str, reasoning_budget_message)
|
|
559
|
+
GLUE_FIELD_NULLABLE(str, reasoning_format)
|
|
560
|
+
GLUE_FIELD_NULLABLE(bool, skip_chat_parsing)
|
|
561
|
+
GLUE_FIELD_NULLABLE(bool, prefill_assistant)
|
|
562
|
+
};
|
|
563
|
+
|
|
564
|
+
struct glue_msg_load_res
|
|
565
|
+
{
|
|
566
|
+
GLUE_HANDLER("load_res")
|
|
567
|
+
GLUE_FIELD(bool, success)
|
|
568
|
+
GLUE_FIELD(int, n_ctx)
|
|
569
|
+
GLUE_FIELD(int, n_batch)
|
|
570
|
+
GLUE_FIELD(int, n_ubatch)
|
|
571
|
+
GLUE_FIELD(int, n_vocab)
|
|
572
|
+
GLUE_FIELD(int, n_ctx_train)
|
|
573
|
+
GLUE_FIELD(int, n_embd)
|
|
574
|
+
GLUE_FIELD(int, n_layer)
|
|
575
|
+
GLUE_FIELD(arr_str, metadata_key)
|
|
576
|
+
GLUE_FIELD(arr_str, metadata_val)
|
|
577
|
+
GLUE_FIELD(int, token_bos)
|
|
578
|
+
GLUE_FIELD(int, token_eos)
|
|
579
|
+
GLUE_FIELD(int, token_eot)
|
|
580
|
+
GLUE_FIELD(arr_int, list_tokens_eog)
|
|
581
|
+
GLUE_FIELD(bool, add_bos_token)
|
|
582
|
+
GLUE_FIELD(bool, add_eos_token)
|
|
583
|
+
GLUE_FIELD(bool, has_encoder)
|
|
584
|
+
GLUE_FIELD(int, token_decoder_start)
|
|
585
|
+
GLUE_FIELD(str, media_marker)
|
|
586
|
+
GLUE_FIELD(bool, has_image_input)
|
|
587
|
+
GLUE_FIELD(bool, has_audio_input)
|
|
588
|
+
};
|
|
589
|
+
|
|
590
|
+
/////////
|
|
591
|
+
|
|
592
|
+
struct glue_msg_completion_req
|
|
593
|
+
{
|
|
594
|
+
GLUE_HANDLER("cmpl_req")
|
|
595
|
+
GLUE_FIELD(bool, is_chat)
|
|
596
|
+
GLUE_FIELD(str, data_json)
|
|
597
|
+
GLUE_FIELD(arr_raw, files)
|
|
598
|
+
};
|
|
599
|
+
|
|
600
|
+
struct glue_msg_completion_res
|
|
601
|
+
{
|
|
602
|
+
GLUE_HANDLER("cmpl_res")
|
|
603
|
+
GLUE_FIELD(bool, success)
|
|
604
|
+
};
|
|
605
|
+
|
|
606
|
+
/////////
|
|
607
|
+
|
|
608
|
+
struct glue_msg_embedding_req
|
|
609
|
+
{
|
|
610
|
+
GLUE_HANDLER("embd_req")
|
|
611
|
+
GLUE_FIELD(str, data_json)
|
|
612
|
+
GLUE_FIELD(arr_raw, files)
|
|
613
|
+
};
|
|
614
|
+
|
|
615
|
+
struct glue_msg_embedding_res
|
|
616
|
+
{
|
|
617
|
+
GLUE_HANDLER("embd_res")
|
|
618
|
+
GLUE_FIELD(bool, success)
|
|
619
|
+
};
|
|
620
|
+
|
|
621
|
+
/////////
|
|
622
|
+
|
|
623
|
+
struct glue_msg_rerank_req
|
|
624
|
+
{
|
|
625
|
+
GLUE_HANDLER("rrnk_req")
|
|
626
|
+
GLUE_FIELD(str, data_json)
|
|
627
|
+
};
|
|
628
|
+
|
|
629
|
+
struct glue_msg_rerank_res
|
|
630
|
+
{
|
|
631
|
+
GLUE_HANDLER("rrnk_res")
|
|
632
|
+
GLUE_FIELD(bool, success)
|
|
633
|
+
};
|
|
634
|
+
|
|
635
|
+
/////////
|
|
636
|
+
|
|
637
|
+
struct glue_msg_get_result_req
|
|
638
|
+
{
|
|
639
|
+
GLUE_HANDLER("gres_req")
|
|
640
|
+
};
|
|
641
|
+
|
|
642
|
+
struct glue_msg_get_result_res
|
|
643
|
+
{
|
|
644
|
+
GLUE_HANDLER("gres_res")
|
|
645
|
+
GLUE_FIELD(bool, success)
|
|
646
|
+
GLUE_FIELD(bool, has_more)
|
|
647
|
+
GLUE_FIELD(bool, is_error)
|
|
648
|
+
GLUE_FIELD(str, data_json)
|
|
649
|
+
};
|
|
650
|
+
|
|
651
|
+
/////////
|
|
652
|
+
|
|
653
|
+
struct glue_msg_test_backend_ops_req
|
|
654
|
+
{
|
|
655
|
+
GLUE_HANDLER("tbop_req")
|
|
656
|
+
GLUE_FIELD(arr_str, args)
|
|
657
|
+
};
|
|
658
|
+
|
|
659
|
+
struct glue_msg_test_backend_ops_res
|
|
660
|
+
{
|
|
661
|
+
GLUE_HANDLER("tbop_res")
|
|
662
|
+
GLUE_FIELD(int, retcode)
|
|
663
|
+
GLUE_FIELD(bool, success)
|
|
664
|
+
};
|