@agorapete/wllama 3.5.1-q2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/.gitmodules +3 -0
  2. package/.prettierignore +38 -0
  3. package/AGENTS.md +1 -0
  4. package/CMakeLists.txt +131 -0
  5. package/LICENCE +21 -0
  6. package/README-dev.md +178 -0
  7. package/README.md +225 -0
  8. package/README_banner.png +0 -0
  9. package/assets/screenshot_0.png +0 -0
  10. package/cpp/generate_glue_prototype.js +115 -0
  11. package/cpp/glue.hpp +664 -0
  12. package/cpp/test_glue.cpp +80 -0
  13. package/cpp/wllama-context.h +1172 -0
  14. package/cpp/wllama-fs.h +148 -0
  15. package/cpp/wllama.cpp +187 -0
  16. package/cpp/wllama.h +6 -0
  17. package/esm/cache-manager.d.ts +130 -0
  18. package/esm/debug.d.ts +28 -0
  19. package/esm/glue/glue.d.ts +22 -0
  20. package/esm/glue/messages.d.ts +146 -0
  21. package/esm/huggingface.d.ts +31 -0
  22. package/esm/index.cjs +3406 -0
  23. package/esm/index.d.ts +8 -0
  24. package/esm/index.js +3387 -0
  25. package/esm/index.min.js +1 -0
  26. package/esm/index.min.js.map +1 -0
  27. package/esm/model-manager.d.ts +136 -0
  28. package/esm/storage/cos.d.ts +36 -0
  29. package/esm/storage/index.d.ts +33 -0
  30. package/esm/storage/opfs.d.ts +12 -0
  31. package/esm/types/oai-compat.d.ts +278 -0
  32. package/esm/types/types.d.ts +112 -0
  33. package/esm/utils.d.ts +119 -0
  34. package/esm/wasm/source-map.d.ts +1 -0
  35. package/esm/wasm/wllama.wasm +0 -0
  36. package/esm/wasm-from-cdn.d.ts +8 -0
  37. package/esm/wllama.d.ts +397 -0
  38. package/esm/worker.d.ts +92 -0
  39. package/esm/workers-code/generated.d.ts +4 -0
  40. package/guides/intro-v2.md +132 -0
  41. package/guides/intro-v3.1.md +40 -0
  42. package/guides/intro-v3.md +230 -0
  43. package/index.ts +1 -0
  44. package/package.json +71 -0
  45. package/scripts/bisect_test.sh +33 -0
  46. package/scripts/build_hf_space.sh +26 -0
  47. package/scripts/build_source_map.js +269 -0
  48. package/scripts/build_wasm.sh +19 -0
  49. package/scripts/build_worker.sh +38 -0
  50. package/scripts/check_debug_build.js +30 -0
  51. package/scripts/check_package_size.js +25 -0
  52. package/scripts/docker-compose.yml +76 -0
  53. package/scripts/generate_wasm_from_cdn.js +24 -0
  54. package/scripts/http_server.js +44 -0
  55. package/scripts/post_build.sh +32 -0
  56. package/src/cache-manager.ts +358 -0
  57. package/src/debug.ts +111 -0
  58. package/src/glue/glue.ts +291 -0
  59. package/src/glue/messages.ts +773 -0
  60. package/src/huggingface.ts +151 -0
  61. package/src/index.ts +8 -0
  62. package/src/mjs.test.ts +44 -0
  63. package/src/model-manager.test.ts +200 -0
  64. package/src/model-manager.ts +359 -0
  65. package/src/storage/cos.test.ts +83 -0
  66. package/src/storage/cos.ts +171 -0
  67. package/src/storage/index.ts +40 -0
  68. package/src/storage/opfs.ts +119 -0
  69. package/src/types/oai-compat.ts +342 -0
  70. package/src/types/types.ts +133 -0
  71. package/src/utils.test.ts +231 -0
  72. package/src/utils.ts +403 -0
  73. package/src/wasm/source-map.ts +7 -0
  74. package/src/wasm/wllama.js +1 -0
  75. package/src/wasm/wllama.wasm +0 -0
  76. package/src/wasm-from-cdn.ts +13 -0
  77. package/src/wllama.test.ts +392 -0
  78. package/src/wllama.ts +1138 -0
  79. package/src/wllama.wgpu.test.ts +62 -0
  80. package/src/worker.ts +443 -0
  81. package/src/workers-code/generated.ts +11 -0
  82. package/src/workers-code/llama-cpp.js +511 -0
  83. package/src/workers-code/opfs-utils.js +150 -0
  84. package/tsconfig.build.json +34 -0
  85. package/tsup.config.ts +23 -0
  86. package/vitest.config.ts +61 -0
package/cpp/glue.hpp ADDED
@@ -0,0 +1,664 @@
1
+ #pragma once
2
+ /**
3
+ * Simple serializer / deserializer inspired by protobuf
4
+ *
5
+ * Structure:
6
+ * - 4 bytes magic number (GLUE_MAGIC)
7
+ * - 4 bytes version number (GLUE_VERSION)
8
+ * - 8 bytes message prototype ID
9
+ * - 4 bytes message length, unsigned number
10
+ * - message data
11
+ *
12
+ * Each field in the message is encoded as:
13
+ * - 4 bytes data type
14
+ * - 4 bytes size, unsigned number (only for array and string)
15
+ * - data
16
+ */
17
+
18
+ #include <cstdint>
19
+ #include <string>
20
+ #include <vector>
21
+ #include <functional>
22
+
23
+ // reserved for future, do not edit the version number for now
24
+ #define GLUE_VERSION 1
25
+
26
+ #define GLUE_MAGIC 0x45554c47 // "GLUE"
27
+ #define GLUE_PROTO_ID_LEN 8
28
+
29
+ #ifndef GLUE_DEBUG
30
+ #define GLUE_DEBUG(...)
31
+ #endif
32
+
33
+ #define BITS_TO_BYTES(x) ((x) / 8)
34
+
35
+ // Data types
36
+ // Note: we're doing polymorphism using enum to prevent using virtual functions
37
+
38
+ enum glue_dtype
39
+ {
40
+ GLUE_DTYPE_NULL,
41
+ GLUE_DTYPE_BOOL,
42
+ GLUE_DTYPE_INT,
43
+ GLUE_DTYPE_FLOAT,
44
+ GLUE_DTYPE_STRING,
45
+ GLUE_DTYPE_RAW,
46
+ GLUE_DTYPE_ARRAY_BOOL,
47
+ GLUE_DTYPE_ARRAY_INT,
48
+ GLUE_DTYPE_ARRAY_FLOAT,
49
+ GLUE_DTYPE_ARRAY_STRING,
50
+ GLUE_DTYPE_ARRAY_RAW,
51
+ };
52
+
53
+ using glue_data_ptr = const char *;
54
+
55
+ struct glue_outbuf
56
+ {
57
+ std::vector<char> data;
58
+ glue_outbuf()
59
+ {
60
+ data.reserve(1024);
61
+ }
62
+ void append(const char *val, size_t size)
63
+ {
64
+ GLUE_DEBUG(" << offset = 0x%02zx\n", data.size());
65
+ data.insert(data.end(), val, val + size);
66
+ }
67
+ void append_str(const std::string &val)
68
+ {
69
+ GLUE_DEBUG(" << offset = 0x%02zx\n", data.size());
70
+ data.insert(data.end(), val.begin(), val.end());
71
+ }
72
+ void append_u32(uint32_t val)
73
+ {
74
+ GLUE_DEBUG(" << offset = 0x%02zx\n", data.size());
75
+ data.insert(data.end(), (char *)&val, (char *)&val + BITS_TO_BYTES(32));
76
+ }
77
+ void append_i32(int32_t val)
78
+ {
79
+ GLUE_DEBUG(" << offset = 0x%02zx\n", data.size());
80
+ data.insert(data.end(), (char *)&val, (char *)&val + BITS_TO_BYTES(32));
81
+ }
82
+ void append_f32(float val)
83
+ {
84
+ GLUE_DEBUG(" << offset = 0x%02zx\n", data.size());
85
+ data.insert(data.end(), (char *)&val, (char *)&val + BITS_TO_BYTES(32));
86
+ }
87
+ void clear() {
88
+ data.clear();
89
+ data.reserve(1024);
90
+ }
91
+ };
92
+
93
+ struct glue_inbuf
94
+ {
95
+ glue_data_ptr base;
96
+ glue_data_ptr cur;
97
+ glue_inbuf(glue_data_ptr data) : base(data), cur(data) {}
98
+ uint32_t read_u32()
99
+ {
100
+ GLUE_DEBUG(" >> offset = 0x%02zx\n", cur - base);
101
+ uint32_t val = *(uint32_t *)cur;
102
+ cur += BITS_TO_BYTES(32);
103
+ return val;
104
+ }
105
+ int32_t read_i32()
106
+ {
107
+ GLUE_DEBUG(" >> offset = 0x%02zx\n", cur - base);
108
+ int32_t val = *(int32_t *)cur;
109
+ cur += BITS_TO_BYTES(32);
110
+ return val;
111
+ }
112
+ float read_f32()
113
+ {
114
+ GLUE_DEBUG(" >> offset = 0x%02zx\n", cur - base);
115
+ float val = *(float *)cur;
116
+ cur += BITS_TO_BYTES(32);
117
+ return val;
118
+ }
119
+ std::string read_str(uint32_t size)
120
+ {
121
+ GLUE_DEBUG(" >> offset = 0x%02zx\n", cur - base);
122
+ std::string val(cur, size);
123
+ cur += size;
124
+ return val;
125
+ }
126
+ std::vector<char> read_raw(uint32_t size)
127
+ {
128
+ GLUE_DEBUG(" >> offset = 0x%02zx\n", cur - base);
129
+ std::vector<char> val;
130
+ val.reserve(size);
131
+ val.insert(val.end(), cur, cur + size);
132
+ cur += size;
133
+ return val;
134
+ }
135
+
136
+ // for array
137
+ void read(uint32_t &out) { out = read_u32(); }
138
+ void read(int32_t &out) { out = read_i32(); }
139
+ void read(float &out) { out = read_f32(); }
140
+ void read(std::string &out)
141
+ {
142
+ uint32_t size = read_u32();
143
+ out = read_str(size);
144
+ }
145
+ void read(std::vector<char> &out)
146
+ {
147
+ uint32_t size = read_u32();
148
+ out = read_raw(size);
149
+ }
150
+ void read(std::vector<uint8_t> &out)
151
+ {
152
+ uint32_t size = read_u32();
153
+ auto tmp = read_raw(size);
154
+ out.assign((uint8_t*)tmp.data(), (uint8_t*)tmp.data() + tmp.size());
155
+ }
156
+ };
157
+
158
+ struct glue_type_base;
159
+ struct glue_handler
160
+ {
161
+ const char *name = nullptr;
162
+ std::vector<glue_type_base *> fields;
163
+
164
+ glue_handler(const char *name) : name(name) {}
165
+ void register_field(glue_type_base *field)
166
+ {
167
+ fields.push_back(field);
168
+ };
169
+ void serialize(glue_outbuf &output);
170
+ void deserialize(glue_inbuf &input);
171
+ };
172
+
173
+ struct glue_type_base
174
+ {
175
+ const char *name = nullptr;
176
+ glue_dtype dtype = GLUE_DTYPE_NULL;
177
+ glue_handler handler;
178
+
179
+ glue_type_base() = delete;
180
+ glue_type_base(const char *name, glue_handler &handler, glue_dtype dtype) : name(name), handler(handler), dtype(dtype)
181
+ {
182
+ handler.register_field(this);
183
+ }
184
+ bool is_null() { return dtype == GLUE_DTYPE_NULL; }
185
+ bool not_null() { return !is_null(); }
186
+ void set_null() { dtype = GLUE_DTYPE_NULL; }
187
+ bool parse_type(glue_inbuf &input)
188
+ {
189
+ dtype = (glue_dtype)input.read_u32();
190
+ if (dtype == GLUE_DTYPE_NULL)
191
+ {
192
+ GLUE_DEBUG(" >> null\n");
193
+ return true;
194
+ }
195
+ return false;
196
+ }
197
+ };
198
+
199
+ struct glue_bool : glue_type_base
200
+ {
201
+ bool value = false;
202
+
203
+ glue_bool(const char *name, glue_handler &handler) : glue_type_base(name, handler, GLUE_DTYPE_BOOL) {}
204
+ void parse(glue_inbuf &input)
205
+ {
206
+ if (parse_type(input))
207
+ return;
208
+ value = (bool)input.read_u32();
209
+ GLUE_DEBUG(" >> bool %d\n", value);
210
+ }
211
+ void serialize(glue_outbuf &output)
212
+ {
213
+ GLUE_DEBUG(" << bool %d\n", value);
214
+ output.append_u32(dtype);
215
+ output.append_u32(value);
216
+ }
217
+ };
218
+
219
+ struct glue_int : glue_type_base
220
+ {
221
+ int32_t value = 0;
222
+
223
+ glue_int(const char *name, glue_handler &handler) : glue_type_base(name, handler, GLUE_DTYPE_INT) {}
224
+ void parse(glue_inbuf &input)
225
+ {
226
+ if (parse_type(input))
227
+ return;
228
+ value = input.read_i32();
229
+ GLUE_DEBUG(" >> int %d\n", value);
230
+ }
231
+ void serialize(glue_outbuf &output)
232
+ {
233
+ GLUE_DEBUG(" << int %d\n", value);
234
+ output.append_u32(dtype);
235
+ output.append_i32(value);
236
+ }
237
+ };
238
+
239
+ struct glue_float : glue_type_base
240
+ {
241
+ float value = 0.0f;
242
+
243
+ glue_float(const char *name, glue_handler &handler) : glue_type_base(name, handler, GLUE_DTYPE_FLOAT) {}
244
+ void parse(glue_inbuf &input)
245
+ {
246
+ if (parse_type(input))
247
+ return;
248
+ value = input.read_f32();
249
+ GLUE_DEBUG(" >> float %f\n", value);
250
+ }
251
+ void serialize(glue_outbuf &output)
252
+ {
253
+ GLUE_DEBUG(" << float %f\n", value);
254
+ output.append_u32(dtype);
255
+ output.append_f32(value);
256
+ }
257
+ };
258
+
259
+ struct glue_str : glue_type_base
260
+ {
261
+ std::string value;
262
+
263
+ glue_str(const char *name, glue_handler &handler) : glue_type_base(name, handler, GLUE_DTYPE_STRING) {}
264
+ void parse(glue_inbuf &input)
265
+ {
266
+ if (parse_type(input))
267
+ return;
268
+ uint32_t size = input.read_u32();
269
+ value = input.read_str(size);
270
+ GLUE_DEBUG(" >> string %s\n", value.c_str());
271
+ }
272
+ void serialize(glue_outbuf &output)
273
+ {
274
+ GLUE_DEBUG(" << string %s\n", value.c_str());
275
+ output.append_u32(dtype);
276
+ output.append_u32(value.size());
277
+ output.append_str(value);
278
+ }
279
+ };
280
+
281
+ struct glue_raw : glue_type_base
282
+ {
283
+ std::vector<char> buf;
284
+
285
+ glue_raw(const char *name, glue_handler &handler) : glue_type_base(name, handler, GLUE_DTYPE_RAW) {}
286
+ void parse(glue_inbuf &input)
287
+ {
288
+ if (parse_type(input))
289
+ return;
290
+ uint32_t size = input.read_u32();
291
+ buf = input.read_raw(size);
292
+ GLUE_DEBUG(" >> raw, size = %zu\n", buf.size());
293
+ }
294
+ void serialize(glue_outbuf &output)
295
+ {
296
+ GLUE_DEBUG(" << raw, size = %zu\n", buf.size());
297
+ output.append_u32(dtype);
298
+ output.append_u32(buf.size());
299
+ output.append(buf.data(), buf.size());
300
+ }
301
+ };
302
+
303
+ template <typename T>
304
+ struct glue_arr : glue_type_base
305
+ {
306
+ std::vector<T> arr;
307
+ std::function<void(T &, glue_outbuf &)> serialize_elem;
308
+
309
+ glue_arr(const char *name, glue_handler &handler, glue_dtype dtype) : glue_type_base(name, handler, dtype) {}
310
+ void parse(glue_inbuf &input)
311
+ {
312
+ if (parse_type(input))
313
+ return;
314
+ uint32_t size = input.read_u32();
315
+ GLUE_DEBUG(" >> array[%u]\n", size);
316
+ arr.reserve(size);
317
+ for (uint32_t i = 0; i < size; i++)
318
+ {
319
+ T elem;
320
+ input.read(elem);
321
+ arr.push_back(std::move(elem));
322
+ }
323
+ }
324
+ void serialize(glue_outbuf &output)
325
+ {
326
+ GLUE_DEBUG(" << array[%zu]\n", arr.size());
327
+ output.append_u32(dtype);
328
+ output.append_u32(arr.size());
329
+ for (auto elem : arr)
330
+ {
331
+ serialize_elem(elem, output);
332
+ }
333
+ }
334
+ };
335
+
336
+ #define DEF_GLUE_ARR(tname, dtype, enum_type, serialize_fn) \
337
+ struct glue_arr_##tname : glue_arr<dtype> \
338
+ { \
339
+ glue_arr_##tname(const char *name, glue_handler &handler) : glue_arr<dtype>(name, handler, enum_type) \
340
+ { \
341
+ serialize_elem = [](dtype & elem, glue_outbuf & output) serialize_fn; \
342
+ } \
343
+ };
344
+
345
+ DEF_GLUE_ARR(bool, uint32_t, GLUE_DTYPE_ARRAY_BOOL, {
346
+ output.append_u32(elem);
347
+ })
348
+ DEF_GLUE_ARR(int, int32_t, GLUE_DTYPE_ARRAY_INT, {
349
+ output.append_i32(elem);
350
+ })
351
+ DEF_GLUE_ARR(float, float, GLUE_DTYPE_ARRAY_FLOAT, {
352
+ output.append_f32(elem);
353
+ })
354
+ DEF_GLUE_ARR(str, std::string, GLUE_DTYPE_ARRAY_STRING, {
355
+ output.append_u32(elem.size());
356
+ output.append_str(elem);
357
+ })
358
+ DEF_GLUE_ARR(raw, std::vector<uint8_t>, GLUE_DTYPE_ARRAY_RAW, {
359
+ output.append_u32(elem.size());
360
+ output.append((const char*)elem.data(), elem.size());
361
+ })
362
+
363
+ // Message base
364
+
365
+ void glue_handler::serialize(glue_outbuf &output)
366
+ {
367
+ output.clear();
368
+ output.append_u32(GLUE_MAGIC);
369
+ output.append_u32(GLUE_VERSION);
370
+ output.append(name, 8);
371
+ GLUE_DEBUG("Serializing message %s\n", name);
372
+ GLUE_DEBUG("Fields: %zu\n", fields.size());
373
+ for (auto field : fields)
374
+ {
375
+ GLUE_DEBUG("Serializing field %s, type = %d\n", field->name, field->dtype);
376
+ switch (field->dtype)
377
+ {
378
+ case GLUE_DTYPE_NULL:
379
+ output.append_u32(GLUE_DTYPE_NULL);
380
+ break;
381
+ case GLUE_DTYPE_BOOL:
382
+ ((glue_bool *)field)->serialize(output);
383
+ break;
384
+ case GLUE_DTYPE_INT:
385
+ ((glue_int *)field)->serialize(output);
386
+ break;
387
+ case GLUE_DTYPE_FLOAT:
388
+ ((glue_float *)field)->serialize(output);
389
+ break;
390
+ case GLUE_DTYPE_STRING:
391
+ ((glue_str *)field)->serialize(output);
392
+ break;
393
+ case GLUE_DTYPE_RAW:
394
+ ((glue_raw *)field)->serialize(output);
395
+ break;
396
+ case GLUE_DTYPE_ARRAY_BOOL:
397
+ ((glue_arr_bool *)field)->serialize(output);
398
+ break;
399
+ case GLUE_DTYPE_ARRAY_INT:
400
+ ((glue_arr_int *)field)->serialize(output);
401
+ break;
402
+ case GLUE_DTYPE_ARRAY_FLOAT:
403
+ ((glue_arr_float *)field)->serialize(output);
404
+ break;
405
+ case GLUE_DTYPE_ARRAY_STRING:
406
+ ((glue_arr_str *)field)->serialize(output);
407
+ break;
408
+ case GLUE_DTYPE_ARRAY_RAW:
409
+ ((glue_arr_raw *)field)->serialize(output);
410
+ break;
411
+ }
412
+ }
413
+ }
414
+
415
+ void glue_handler::deserialize(glue_inbuf &input)
416
+ {
417
+ uint32_t magic = input.read_u32();
418
+ if (magic != GLUE_MAGIC)
419
+ {
420
+ throw std::runtime_error("Invalid magic number");
421
+ }
422
+
423
+ uint32_t version = input.read_u32();
424
+ if (version != GLUE_VERSION)
425
+ {
426
+ throw std::runtime_error("Version mismatch");
427
+ }
428
+
429
+ std::string proto_id = input.read_str(GLUE_PROTO_ID_LEN);
430
+ if (proto_id != name)
431
+ {
432
+ throw std::runtime_error("Prototype ID mismatch " + proto_id + " != " + name);
433
+ }
434
+
435
+ GLUE_DEBUG("Deserializing message %s\n", name);
436
+ for (auto field : fields)
437
+ {
438
+ GLUE_DEBUG("Deserializing field %s, type = %d\n", field->name, field->dtype);
439
+ switch (field->dtype)
440
+ {
441
+ case GLUE_DTYPE_NULL:
442
+ field->parse_type(input);
443
+ break;
444
+ case GLUE_DTYPE_BOOL:
445
+ ((glue_bool *)field)->parse(input);
446
+ break;
447
+ case GLUE_DTYPE_INT:
448
+ ((glue_int *)field)->parse(input);
449
+ break;
450
+ case GLUE_DTYPE_FLOAT:
451
+ ((glue_float *)field)->parse(input);
452
+ break;
453
+ case GLUE_DTYPE_STRING:
454
+ ((glue_str *)field)->parse(input);
455
+ break;
456
+ case GLUE_DTYPE_RAW:
457
+ ((glue_raw *)field)->parse(input);
458
+ case GLUE_DTYPE_ARRAY_BOOL:
459
+ ((glue_arr_bool *)field)->parse(input);
460
+ break;
461
+ case GLUE_DTYPE_ARRAY_INT:
462
+ ((glue_arr_int *)field)->parse(input);
463
+ break;
464
+ case GLUE_DTYPE_ARRAY_FLOAT:
465
+ ((glue_arr_float *)field)->parse(input);
466
+ break;
467
+ case GLUE_DTYPE_ARRAY_STRING:
468
+ ((glue_arr_str *)field)->parse(input);
469
+ break;
470
+ case GLUE_DTYPE_ARRAY_RAW:
471
+ ((glue_arr_raw *)field)->parse(input);
472
+ }
473
+ }
474
+ }
475
+
476
+ template <std::size_t N>
477
+ constexpr auto &PROTO_ID(char const (&s)[N])
478
+ {
479
+ static_assert(N == GLUE_PROTO_ID_LEN + 1, "Prototype ID must be 8 characters long");
480
+ return s;
481
+ }
482
+ #define GLUE_FIELD(type, name) glue_##type name = glue_##type(#name, handler);
483
+ #define GLUE_FIELD_NULLABLE(type, name) glue_##type name = glue_##type(#name, handler);
484
+ #define GLUE_HANDLER(name) glue_handler handler = glue_handler(PROTO_ID(name));
485
+
486
+ // Message for events
487
+
488
+ struct glue_msg_error
489
+ {
490
+ GLUE_HANDLER("erro_evt")
491
+ GLUE_FIELD(str, message)
492
+ };
493
+
494
+ // Message for actions
495
+
496
+ struct glue_msg_load_req
497
+ {
498
+ GLUE_HANDLER("load_req")
499
+ GLUE_FIELD(arr_str, model_paths)
500
+ GLUE_FIELD_NULLABLE(str, mmproj_path)
501
+ GLUE_FIELD(bool, n_ctx_auto)
502
+ GLUE_FIELD(bool, use_mmap)
503
+ GLUE_FIELD(bool, use_mlock)
504
+ GLUE_FIELD(int, n_gpu_layers)
505
+ GLUE_FIELD(int, n_ctx)
506
+ GLUE_FIELD(int, n_threads)
507
+ GLUE_FIELD_NULLABLE(str, model_alias)
508
+ GLUE_FIELD_NULLABLE(int, log_level)
509
+ GLUE_FIELD_NULLABLE(bool, embeddings)
510
+ GLUE_FIELD_NULLABLE(bool, offload_kqv)
511
+ GLUE_FIELD_NULLABLE(int, n_batch)
512
+ GLUE_FIELD_NULLABLE(int, n_ubatch)
513
+ GLUE_FIELD_NULLABLE(int, n_parallel)
514
+ GLUE_FIELD_NULLABLE(str, pooling_type)
515
+ GLUE_FIELD_NULLABLE(str, rope_scaling_type)
516
+ GLUE_FIELD_NULLABLE(float, rope_freq_base)
517
+ GLUE_FIELD_NULLABLE(float, rope_freq_scale)
518
+ GLUE_FIELD_NULLABLE(float, yarn_ext_factor)
519
+ GLUE_FIELD_NULLABLE(float, yarn_attn_factor)
520
+ GLUE_FIELD_NULLABLE(float, yarn_beta_fast)
521
+ GLUE_FIELD_NULLABLE(float, yarn_beta_slow)
522
+ GLUE_FIELD_NULLABLE(int, yarn_orig_ctx)
523
+ GLUE_FIELD_NULLABLE(str, cache_type_k)
524
+ GLUE_FIELD_NULLABLE(str, cache_type_v)
525
+ GLUE_FIELD_NULLABLE(bool, kv_unified)
526
+ GLUE_FIELD_NULLABLE(bool, flash_attn)
527
+ GLUE_FIELD_NULLABLE(bool, swa_full)
528
+ GLUE_FIELD_NULLABLE(int, n_ctx_checkpoints)
529
+ GLUE_FIELD_NULLABLE(int, checkpoint_min_step)
530
+ GLUE_FIELD_NULLABLE(str, chat_template)
531
+ GLUE_FIELD_NULLABLE(bool, jinja)
532
+ GLUE_FIELD_NULLABLE(arr_str, default_template_kwargs_keys)
533
+ GLUE_FIELD_NULLABLE(arr_str, default_template_kwargs_vals)
534
+ GLUE_FIELD_NULLABLE(bool, reasoning)
535
+ GLUE_FIELD_NULLABLE(int, image_min_tokens)
536
+ GLUE_FIELD_NULLABLE(int, image_max_tokens)
537
+ GLUE_FIELD_NULLABLE(bool, warmup)
538
+ GLUE_FIELD_NULLABLE(bool, no_kv_offload)
539
+ GLUE_FIELD_NULLABLE(bool, mmproj_offload)
540
+ GLUE_FIELD_NULLABLE(bool, cont_batching)
541
+ GLUE_FIELD_NULLABLE(int, n_keep)
542
+ GLUE_FIELD_NULLABLE(bool, ctx_shift)
543
+ GLUE_FIELD_NULLABLE(bool, cache_idle_slots)
544
+ GLUE_FIELD_NULLABLE(int, n_cache_reuse)
545
+ GLUE_FIELD_NULLABLE(arr_str, lora_paths)
546
+ GLUE_FIELD_NULLABLE(arr_float, lora_scales)
547
+ GLUE_FIELD_NULLABLE(bool, lora_init_without_apply)
548
+ GLUE_FIELD_NULLABLE(str, spec_draft_model)
549
+ GLUE_FIELD_NULLABLE(int, spec_draft_ngl)
550
+ GLUE_FIELD_NULLABLE(int, spec_draft_n_max)
551
+ GLUE_FIELD_NULLABLE(int, spec_draft_n_min)
552
+ GLUE_FIELD_NULLABLE(float, spec_draft_p_min)
553
+ GLUE_FIELD_NULLABLE(int, spec_draft_threads)
554
+ GLUE_FIELD_NULLABLE(int, spec_draft_threads_batch)
555
+ GLUE_FIELD_NULLABLE(arr_str, kv_overrides_keys)
556
+ GLUE_FIELD_NULLABLE(arr_str, kv_overrides_vals)
557
+ GLUE_FIELD_NULLABLE(int, reasoning_budget_tokens)
558
+ GLUE_FIELD_NULLABLE(str, reasoning_budget_message)
559
+ GLUE_FIELD_NULLABLE(str, reasoning_format)
560
+ GLUE_FIELD_NULLABLE(bool, skip_chat_parsing)
561
+ GLUE_FIELD_NULLABLE(bool, prefill_assistant)
562
+ };
563
+
564
+ struct glue_msg_load_res
565
+ {
566
+ GLUE_HANDLER("load_res")
567
+ GLUE_FIELD(bool, success)
568
+ GLUE_FIELD(int, n_ctx)
569
+ GLUE_FIELD(int, n_batch)
570
+ GLUE_FIELD(int, n_ubatch)
571
+ GLUE_FIELD(int, n_vocab)
572
+ GLUE_FIELD(int, n_ctx_train)
573
+ GLUE_FIELD(int, n_embd)
574
+ GLUE_FIELD(int, n_layer)
575
+ GLUE_FIELD(arr_str, metadata_key)
576
+ GLUE_FIELD(arr_str, metadata_val)
577
+ GLUE_FIELD(int, token_bos)
578
+ GLUE_FIELD(int, token_eos)
579
+ GLUE_FIELD(int, token_eot)
580
+ GLUE_FIELD(arr_int, list_tokens_eog)
581
+ GLUE_FIELD(bool, add_bos_token)
582
+ GLUE_FIELD(bool, add_eos_token)
583
+ GLUE_FIELD(bool, has_encoder)
584
+ GLUE_FIELD(int, token_decoder_start)
585
+ GLUE_FIELD(str, media_marker)
586
+ GLUE_FIELD(bool, has_image_input)
587
+ GLUE_FIELD(bool, has_audio_input)
588
+ };
589
+
590
+ /////////
591
+
592
+ struct glue_msg_completion_req
593
+ {
594
+ GLUE_HANDLER("cmpl_req")
595
+ GLUE_FIELD(bool, is_chat)
596
+ GLUE_FIELD(str, data_json)
597
+ GLUE_FIELD(arr_raw, files)
598
+ };
599
+
600
+ struct glue_msg_completion_res
601
+ {
602
+ GLUE_HANDLER("cmpl_res")
603
+ GLUE_FIELD(bool, success)
604
+ };
605
+
606
+ /////////
607
+
608
+ struct glue_msg_embedding_req
609
+ {
610
+ GLUE_HANDLER("embd_req")
611
+ GLUE_FIELD(str, data_json)
612
+ GLUE_FIELD(arr_raw, files)
613
+ };
614
+
615
+ struct glue_msg_embedding_res
616
+ {
617
+ GLUE_HANDLER("embd_res")
618
+ GLUE_FIELD(bool, success)
619
+ };
620
+
621
+ /////////
622
+
623
+ struct glue_msg_rerank_req
624
+ {
625
+ GLUE_HANDLER("rrnk_req")
626
+ GLUE_FIELD(str, data_json)
627
+ };
628
+
629
+ struct glue_msg_rerank_res
630
+ {
631
+ GLUE_HANDLER("rrnk_res")
632
+ GLUE_FIELD(bool, success)
633
+ };
634
+
635
+ /////////
636
+
637
+ struct glue_msg_get_result_req
638
+ {
639
+ GLUE_HANDLER("gres_req")
640
+ };
641
+
642
+ struct glue_msg_get_result_res
643
+ {
644
+ GLUE_HANDLER("gres_res")
645
+ GLUE_FIELD(bool, success)
646
+ GLUE_FIELD(bool, has_more)
647
+ GLUE_FIELD(bool, is_error)
648
+ GLUE_FIELD(str, data_json)
649
+ };
650
+
651
+ /////////
652
+
653
+ struct glue_msg_test_backend_ops_req
654
+ {
655
+ GLUE_HANDLER("tbop_req")
656
+ GLUE_FIELD(arr_str, args)
657
+ };
658
+
659
+ struct glue_msg_test_backend_ops_res
660
+ {
661
+ GLUE_HANDLER("tbop_res")
662
+ GLUE_FIELD(int, retcode)
663
+ GLUE_FIELD(bool, success)
664
+ };