@fugood/llama.node 0.3.13 → 0.3.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +1 -1
- package/package.json +1 -1
- package/src/LlamaContext.cpp +98 -76
- package/src/LlamaContext.h +1 -1
- package/src/common.hpp +1 -2
- package/src/llama.cpp/.github/workflows/build.yml +60 -10
- package/src/llama.cpp/.github/workflows/server.yml +2 -0
- package/src/llama.cpp/common/CMakeLists.txt +3 -3
- package/src/llama.cpp/common/arg.cpp +112 -11
- package/src/llama.cpp/common/chat.cpp +960 -266
- package/src/llama.cpp/common/chat.h +135 -0
- package/src/llama.cpp/common/common.cpp +27 -171
- package/src/llama.cpp/common/common.h +27 -67
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +4 -5
- package/src/llama.cpp/common/json-schema-to-grammar.h +0 -1
- package/src/llama.cpp/common/{minja.hpp → minja/minja.hpp} +37 -5
- package/src/llama.cpp/common/ngram-cache.cpp +1 -0
- package/src/llama.cpp/common/sampling.cpp +45 -7
- package/src/llama.cpp/common/speculative.cpp +6 -5
- package/src/llama.cpp/common/speculative.h +1 -1
- package/src/llama.cpp/docs/build.md +45 -7
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +3 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +1 -0
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +4 -2
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +2 -3
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +1 -1
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +373 -107
- package/src/llama.cpp/examples/llava/clip.h +19 -3
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +341 -0
- package/src/llama.cpp/examples/llava/llava.cpp +4 -2
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +30 -11
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -0
- package/src/llama.cpp/examples/main/main.cpp +73 -28
- package/src/llama.cpp/examples/parallel/parallel.cpp +1 -0
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -0
- package/src/llama.cpp/examples/quantize/quantize.cpp +1 -0
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +882 -237
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +35 -26
- package/src/llama.cpp/examples/run/run.cpp +110 -67
- package/src/llama.cpp/examples/server/server.cpp +82 -87
- package/src/llama.cpp/examples/server/utils.hpp +94 -107
- package/src/llama.cpp/examples/sycl/run-llama2.sh +2 -2
- package/src/llama.cpp/examples/tts/tts.cpp +251 -142
- package/src/llama.cpp/ggml/CMakeLists.txt +13 -1
- package/src/llama.cpp/ggml/include/ggml-alloc.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-backend.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-cpu.h +3 -0
- package/src/llama.cpp/ggml/include/ggml.h +5 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +10 -7
- package/src/llama.cpp/ggml/src/ggml-alloc.c +24 -15
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +58 -54
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +3 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +132 -17
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +151 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1396 -386
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1432 -151
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +22 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +259 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +61 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +288 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +15 -2
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +14 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -5
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +235 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +6 -2
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +1 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +220 -116
- package/src/llama.cpp/ggml/src/ggml-quants.c +114 -114
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +51 -10
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +33 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +136 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +308 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +168 -721
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -77
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +146 -42
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +13 -3
- package/src/llama.cpp/ggml/src/ggml.c +8 -3
- package/src/llama.cpp/include/llama.h +19 -5
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +1 -0
- package/src/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
- package/src/llama.cpp/requirements.txt +1 -0
- package/src/llama.cpp/src/llama-arch.cpp +21 -0
- package/src/llama.cpp/src/llama-arch.h +1 -0
- package/src/llama.cpp/src/llama-chat.cpp +1 -0
- package/src/llama.cpp/src/llama-grammar.cpp +182 -182
- package/src/llama.cpp/src/llama-grammar.h +12 -3
- package/src/llama.cpp/src/llama-kv-cache.h +1 -0
- package/src/llama.cpp/src/llama-mmap.cpp +11 -1
- package/src/llama.cpp/src/llama-model.cpp +69 -5
- package/src/llama.cpp/src/llama-sampling.cpp +43 -10
- package/src/llama.cpp/src/llama-vocab.cpp +12 -0
- package/src/llama.cpp/src/llama.cpp +147 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +166 -110
- package/src/llama.cpp/tests/test-chat-template.cpp +32 -22
- package/src/llama.cpp/tests/test-chat.cpp +593 -395
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +63 -63
- package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -9
- package/src/llama.cpp/Sources/llama/llama.h +0 -4
- package/src/llama.cpp/common/chat.hpp +0 -55
- /package/src/llama.cpp/common/{chat-template.hpp → minja/chat-template.hpp} +0 -0
|
@@ -345,194 +345,194 @@ const char * llama_grammar_parser::parse_sequence(
|
|
|
345
345
|
size_t last_sym_start = rule.size();
|
|
346
346
|
const char * pos = src;
|
|
347
347
|
|
|
348
|
-
|
|
348
|
+
auto handle_repetitions = [&](int min_times, int max_times) {
|
|
349
349
|
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
350
|
+
if (last_sym_start == rule.size()) {
|
|
351
|
+
throw std::runtime_error(std::string("expecting preceding item to */+/?/{ at ") + pos);
|
|
352
|
+
}
|
|
353
353
|
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
}
|
|
354
|
+
// apply transformation to previous symbol (last_sym_start to end) according to
|
|
355
|
+
// the following rewrite rules:
|
|
356
|
+
// S{m,n} --> S S S (m times) S'(n-m)
|
|
357
|
+
// S'(x) ::= S S'(x-1) |
|
|
358
|
+
// (... n-m definitions of these S' rules ...)
|
|
359
|
+
// S'(1) ::= S |
|
|
360
|
+
// S{m,} --> S S S (m times) S'
|
|
361
|
+
// S' ::= S S' |
|
|
362
|
+
// S* --> S{0,}
|
|
363
|
+
// --> S' ::= S S' |
|
|
364
|
+
// S+ --> S{1,}
|
|
365
|
+
// --> S S'
|
|
366
|
+
// S' ::= S S' |
|
|
367
|
+
// S? --> S{0,1}
|
|
368
|
+
// --> S'
|
|
369
|
+
// S' ::= S |
|
|
370
|
+
|
|
371
|
+
llama_grammar_rule prev_rule(rule.begin() + last_sym_start, rule.end());
|
|
372
|
+
if (min_times == 0) {
|
|
373
|
+
rule.resize(last_sym_start);
|
|
374
|
+
} else {
|
|
375
|
+
// Repeat the previous elements (min_times - 1) times
|
|
376
|
+
for (int i = 1; i < min_times; i++) {
|
|
377
|
+
rule.insert(rule.end(), prev_rule.begin(), prev_rule.end());
|
|
379
378
|
}
|
|
379
|
+
}
|
|
380
380
|
|
|
381
|
-
|
|
382
|
-
|
|
381
|
+
uint32_t last_rec_rule_id = 0;
|
|
382
|
+
auto n_opt = max_times < 0 ? 1 : max_times - min_times;
|
|
383
383
|
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
}
|
|
391
|
-
rec_rule.push_back({LLAMA_GRETYPE_ALT, 0});
|
|
392
|
-
rec_rule.push_back({LLAMA_GRETYPE_END, 0});
|
|
393
|
-
add_rule( rec_rule_id, rec_rule);
|
|
394
|
-
last_rec_rule_id = rec_rule_id;
|
|
384
|
+
llama_grammar_rule rec_rule(prev_rule);
|
|
385
|
+
for (int i = 0; i < n_opt; i++) {
|
|
386
|
+
rec_rule.resize(prev_rule.size());
|
|
387
|
+
uint32_t rec_rule_id = generate_symbol_id( rule_name);
|
|
388
|
+
if (i > 0 || max_times < 0) {
|
|
389
|
+
rec_rule.push_back({LLAMA_GRETYPE_RULE_REF, max_times < 0 ? rec_rule_id : last_rec_rule_id});
|
|
395
390
|
}
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
391
|
+
rec_rule.push_back({LLAMA_GRETYPE_ALT, 0});
|
|
392
|
+
rec_rule.push_back({LLAMA_GRETYPE_END, 0});
|
|
393
|
+
add_rule( rec_rule_id, rec_rule);
|
|
394
|
+
last_rec_rule_id = rec_rule_id;
|
|
395
|
+
}
|
|
396
|
+
if (n_opt > 0) {
|
|
397
|
+
rule.push_back({LLAMA_GRETYPE_RULE_REF, last_rec_rule_id});
|
|
398
|
+
}
|
|
399
|
+
};
|
|
400
400
|
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
}
|
|
409
|
-
auto char_pair = parse_char(pos);
|
|
410
|
-
pos = char_pair.second;
|
|
411
|
-
rule.push_back({LLAMA_GRETYPE_CHAR, char_pair.first});
|
|
401
|
+
while (*pos) {
|
|
402
|
+
if (*pos == '"') { // literal string
|
|
403
|
+
pos++;
|
|
404
|
+
last_sym_start = rule.size();
|
|
405
|
+
while (*pos != '"') {
|
|
406
|
+
if (!*pos) {
|
|
407
|
+
throw std::runtime_error("unexpected end of input");
|
|
412
408
|
}
|
|
413
|
-
|
|
414
|
-
|
|
409
|
+
auto char_pair = parse_char(pos);
|
|
410
|
+
pos = char_pair.second;
|
|
411
|
+
rule.push_back({LLAMA_GRETYPE_CHAR, char_pair.first});
|
|
412
|
+
}
|
|
413
|
+
pos = parse_space(pos + 1, is_nested);
|
|
414
|
+
} else if (*pos == '[') { // char range(s)
|
|
415
|
+
pos++;
|
|
416
|
+
enum llama_gretype start_type = LLAMA_GRETYPE_CHAR;
|
|
417
|
+
if (*pos == '^') {
|
|
415
418
|
pos++;
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
419
|
+
start_type = LLAMA_GRETYPE_CHAR_NOT;
|
|
420
|
+
}
|
|
421
|
+
last_sym_start = rule.size();
|
|
422
|
+
while (*pos != ']') {
|
|
423
|
+
if (!*pos) {
|
|
424
|
+
throw std::runtime_error("unexpected end of input");
|
|
420
425
|
}
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
426
|
+
auto char_pair = parse_char(pos);
|
|
427
|
+
pos = char_pair.second;
|
|
428
|
+
enum llama_gretype type = last_sym_start < rule.size()
|
|
429
|
+
? LLAMA_GRETYPE_CHAR_ALT
|
|
430
|
+
: start_type;
|
|
431
|
+
|
|
432
|
+
rule.push_back({type, char_pair.first});
|
|
433
|
+
if (pos[0] == '-' && pos[1] != ']') {
|
|
434
|
+
if (!pos[1]) {
|
|
424
435
|
throw std::runtime_error("unexpected end of input");
|
|
425
436
|
}
|
|
426
|
-
auto
|
|
427
|
-
pos
|
|
428
|
-
|
|
429
|
-
? LLAMA_GRETYPE_CHAR_ALT
|
|
430
|
-
: start_type;
|
|
431
|
-
|
|
432
|
-
rule.push_back({type, char_pair.first});
|
|
433
|
-
if (pos[0] == '-' && pos[1] != ']') {
|
|
434
|
-
if (!pos[1]) {
|
|
435
|
-
throw std::runtime_error("unexpected end of input");
|
|
436
|
-
}
|
|
437
|
-
auto endchar_pair = parse_char(pos + 1);
|
|
438
|
-
pos = endchar_pair.second;
|
|
439
|
-
rule.push_back({LLAMA_GRETYPE_CHAR_RNG_UPPER, endchar_pair.first});
|
|
440
|
-
}
|
|
441
|
-
}
|
|
442
|
-
pos = parse_space(pos + 1, is_nested);
|
|
443
|
-
} else if (is_word_char(*pos)) { // rule reference
|
|
444
|
-
const char * name_end = parse_name(pos);
|
|
445
|
-
uint32_t ref_rule_id = get_symbol_id(pos, name_end - pos);
|
|
446
|
-
pos = parse_space(name_end, is_nested);
|
|
447
|
-
last_sym_start = rule.size();
|
|
448
|
-
rule.push_back({LLAMA_GRETYPE_RULE_REF, ref_rule_id});
|
|
449
|
-
} else if (*pos == '(') { // grouping
|
|
450
|
-
// parse nested alternates into synthesized rule
|
|
451
|
-
pos = parse_space(pos + 1, true);
|
|
452
|
-
uint32_t sub_rule_id = generate_symbol_id(rule_name);
|
|
453
|
-
pos = parse_alternates(pos, rule_name, sub_rule_id, true);
|
|
454
|
-
last_sym_start = rule.size();
|
|
455
|
-
// output reference to synthesized rule
|
|
456
|
-
rule.push_back({LLAMA_GRETYPE_RULE_REF, sub_rule_id});
|
|
457
|
-
if (*pos != ')') {
|
|
458
|
-
throw std::runtime_error(std::string("expecting ')' at ") + pos);
|
|
437
|
+
auto endchar_pair = parse_char(pos + 1);
|
|
438
|
+
pos = endchar_pair.second;
|
|
439
|
+
rule.push_back({LLAMA_GRETYPE_CHAR_RNG_UPPER, endchar_pair.first});
|
|
459
440
|
}
|
|
441
|
+
}
|
|
442
|
+
pos = parse_space(pos + 1, is_nested);
|
|
443
|
+
} else if (is_word_char(*pos)) { // rule reference
|
|
444
|
+
const char * name_end = parse_name(pos);
|
|
445
|
+
uint32_t ref_rule_id = get_symbol_id(pos, name_end - pos);
|
|
446
|
+
pos = parse_space(name_end, is_nested);
|
|
447
|
+
last_sym_start = rule.size();
|
|
448
|
+
rule.push_back({LLAMA_GRETYPE_RULE_REF, ref_rule_id});
|
|
449
|
+
} else if (*pos == '(') { // grouping
|
|
450
|
+
// parse nested alternates into synthesized rule
|
|
451
|
+
pos = parse_space(pos + 1, true);
|
|
452
|
+
uint32_t sub_rule_id = generate_symbol_id(rule_name);
|
|
453
|
+
pos = parse_alternates(pos, rule_name, sub_rule_id, true);
|
|
454
|
+
last_sym_start = rule.size();
|
|
455
|
+
// output reference to synthesized rule
|
|
456
|
+
rule.push_back({LLAMA_GRETYPE_RULE_REF, sub_rule_id});
|
|
457
|
+
if (*pos != ')') {
|
|
458
|
+
throw std::runtime_error(std::string("expecting ')' at ") + pos);
|
|
459
|
+
}
|
|
460
|
+
pos = parse_space(pos + 1, is_nested);
|
|
461
|
+
} else if (*pos == '.') { // any char
|
|
462
|
+
last_sym_start = rule.size();
|
|
463
|
+
rule.push_back({LLAMA_GRETYPE_CHAR_ANY, 0});
|
|
464
|
+
pos = parse_space(pos + 1, is_nested);
|
|
465
|
+
} else if (*pos == '*') {
|
|
466
|
+
pos = parse_space(pos + 1, is_nested);
|
|
467
|
+
handle_repetitions(0, -1);
|
|
468
|
+
} else if (*pos == '+') {
|
|
469
|
+
pos = parse_space(pos + 1, is_nested);
|
|
470
|
+
handle_repetitions(1, -1);
|
|
471
|
+
} else if (*pos == '?') {
|
|
472
|
+
pos = parse_space(pos + 1, is_nested);
|
|
473
|
+
handle_repetitions(0, 1);
|
|
474
|
+
} else if (*pos == '{') {
|
|
475
|
+
pos = parse_space(pos + 1, is_nested);
|
|
476
|
+
|
|
477
|
+
if (!is_digit_char(*pos)) {
|
|
478
|
+
throw std::runtime_error(std::string("expecting an int at ") + pos);
|
|
479
|
+
}
|
|
480
|
+
const char * int_end = parse_int(pos);
|
|
481
|
+
int min_times = std::stoul(std::string(pos, int_end - pos));
|
|
482
|
+
pos = parse_space(int_end, is_nested);
|
|
483
|
+
|
|
484
|
+
int max_times = -1;
|
|
485
|
+
|
|
486
|
+
if (*pos == '}') {
|
|
487
|
+
max_times = min_times;
|
|
460
488
|
pos = parse_space(pos + 1, is_nested);
|
|
461
|
-
} else if (*pos == '
|
|
462
|
-
last_sym_start = rule.size();
|
|
463
|
-
rule.push_back({LLAMA_GRETYPE_CHAR_ANY, 0});
|
|
464
|
-
pos = parse_space(pos + 1, is_nested);
|
|
465
|
-
} else if (*pos == '*') {
|
|
466
|
-
pos = parse_space(pos + 1, is_nested);
|
|
467
|
-
handle_repetitions(0, -1);
|
|
468
|
-
} else if (*pos == '+') {
|
|
469
|
-
pos = parse_space(pos + 1, is_nested);
|
|
470
|
-
handle_repetitions(1, -1);
|
|
471
|
-
} else if (*pos == '?') {
|
|
472
|
-
pos = parse_space(pos + 1, is_nested);
|
|
473
|
-
handle_repetitions(0, 1);
|
|
474
|
-
} else if (*pos == '{') {
|
|
489
|
+
} else if (*pos == ',') {
|
|
475
490
|
pos = parse_space(pos + 1, is_nested);
|
|
476
491
|
|
|
477
|
-
if (
|
|
478
|
-
|
|
492
|
+
if (is_digit_char(*pos)) {
|
|
493
|
+
const char * int_end = parse_int(pos);
|
|
494
|
+
max_times = std::stoul(std::string(pos, int_end - pos));
|
|
495
|
+
pos = parse_space(int_end, is_nested);
|
|
479
496
|
}
|
|
480
|
-
const char * int_end = parse_int(pos);
|
|
481
|
-
int min_times = std::stoul(std::string(pos, int_end - pos));
|
|
482
|
-
pos = parse_space(int_end, is_nested);
|
|
483
|
-
|
|
484
|
-
int max_times = -1;
|
|
485
|
-
|
|
486
|
-
if (*pos == '}') {
|
|
487
|
-
max_times = min_times;
|
|
488
|
-
pos = parse_space(pos + 1, is_nested);
|
|
489
|
-
} else if (*pos == ',') {
|
|
490
|
-
pos = parse_space(pos + 1, is_nested);
|
|
491
|
-
|
|
492
|
-
if (is_digit_char(*pos)) {
|
|
493
|
-
const char * int_end = parse_int(pos);
|
|
494
|
-
max_times = std::stoul(std::string(pos, int_end - pos));
|
|
495
|
-
pos = parse_space(int_end, is_nested);
|
|
496
|
-
}
|
|
497
497
|
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
}
|
|
501
|
-
pos = parse_space(pos + 1, is_nested);
|
|
502
|
-
} else {
|
|
503
|
-
throw std::runtime_error(std::string("expecting ',' at ") + pos);
|
|
498
|
+
if (*pos != '}') {
|
|
499
|
+
throw std::runtime_error(std::string("expecting '}' at ") + pos);
|
|
504
500
|
}
|
|
505
|
-
|
|
501
|
+
pos = parse_space(pos + 1, is_nested);
|
|
506
502
|
} else {
|
|
507
|
-
|
|
503
|
+
throw std::runtime_error(std::string("expecting ',' at ") + pos);
|
|
508
504
|
}
|
|
505
|
+
handle_repetitions(min_times, max_times);
|
|
506
|
+
} else {
|
|
507
|
+
break;
|
|
509
508
|
}
|
|
510
|
-
return pos;
|
|
511
509
|
}
|
|
510
|
+
return pos;
|
|
511
|
+
}
|
|
512
512
|
|
|
513
513
|
const char * llama_grammar_parser::parse_rule(const char * src) {
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
514
|
+
const char * name_end = parse_name(src);
|
|
515
|
+
const char * pos = parse_space(name_end, false);
|
|
516
|
+
size_t name_len = name_end - src;
|
|
517
|
+
uint32_t rule_id = get_symbol_id(src, name_len);
|
|
518
|
+
const std::string name(src, name_len);
|
|
519
|
+
|
|
520
|
+
if (!(pos[0] == ':' && pos[1] == ':' && pos[2] == '=')) {
|
|
521
|
+
throw std::runtime_error(std::string("expecting ::= at ") + pos);
|
|
522
|
+
}
|
|
523
|
+
pos = parse_space(pos + 3, true);
|
|
524
524
|
|
|
525
|
-
|
|
525
|
+
pos = parse_alternates(pos, name, rule_id, false);
|
|
526
526
|
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
}
|
|
534
|
-
return parse_space(pos, true);
|
|
527
|
+
if (*pos == '\r') {
|
|
528
|
+
pos += pos[1] == '\n' ? 2 : 1;
|
|
529
|
+
} else if (*pos == '\n') {
|
|
530
|
+
pos++;
|
|
531
|
+
} else if (*pos) {
|
|
532
|
+
throw std::runtime_error(std::string("expecting newline or end at ") + pos);
|
|
535
533
|
}
|
|
534
|
+
return parse_space(pos, true);
|
|
535
|
+
}
|
|
536
536
|
|
|
537
537
|
bool llama_grammar_parser::parse(const char * src) {
|
|
538
538
|
try {
|
|
@@ -969,7 +969,7 @@ struct llama_grammar * llama_grammar_init_impl(
|
|
|
969
969
|
/* .awaiting_trigger = */ false,
|
|
970
970
|
/* .trigger_buffer = */ "",
|
|
971
971
|
/* .trigger_tokens = */ {},
|
|
972
|
-
/* .
|
|
972
|
+
/* .trigger_patterns = */ {},
|
|
973
973
|
};
|
|
974
974
|
}
|
|
975
975
|
|
|
@@ -978,19 +978,15 @@ struct llama_grammar * llama_grammar_init_impl(
|
|
|
978
978
|
const char * grammar_str,
|
|
979
979
|
const char * grammar_root,
|
|
980
980
|
bool lazy,
|
|
981
|
-
const char **
|
|
982
|
-
size_t
|
|
981
|
+
const char ** trigger_patterns,
|
|
982
|
+
size_t num_trigger_patterns,
|
|
983
983
|
const llama_token * trigger_tokens,
|
|
984
984
|
size_t num_trigger_tokens) {
|
|
985
985
|
llama_grammar_parser parser;
|
|
986
986
|
|
|
987
987
|
// if there is a grammar, parse it
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
}
|
|
991
|
-
|
|
992
|
-
// will be empty (default) if there are parse errors
|
|
993
|
-
if (parser.rules.empty()) {
|
|
988
|
+
// rules will be empty (default) if there are parse errors
|
|
989
|
+
if (!parser.parse(grammar_str) || parser.rules.empty()) {
|
|
994
990
|
fprintf(stderr, "%s: failed to parse grammar\n", __func__);
|
|
995
991
|
return nullptr;
|
|
996
992
|
}
|
|
@@ -1054,14 +1050,16 @@ struct llama_grammar * llama_grammar_init_impl(
|
|
|
1054
1050
|
} while (true);
|
|
1055
1051
|
|
|
1056
1052
|
std::vector<llama_token> vec_trigger_tokens;
|
|
1057
|
-
std::vector<
|
|
1053
|
+
std::vector<llama_grammar_trigger_pattern> vec_trigger_patterns;
|
|
1058
1054
|
for (size_t i = 0; i < num_trigger_tokens; i++) {
|
|
1059
1055
|
GGML_ASSERT(trigger_tokens != nullptr);
|
|
1060
1056
|
vec_trigger_tokens.push_back(trigger_tokens[i]);
|
|
1061
1057
|
}
|
|
1062
|
-
for (size_t i = 0; i <
|
|
1063
|
-
GGML_ASSERT(
|
|
1064
|
-
|
|
1058
|
+
for (size_t i = 0; i < num_trigger_patterns; i++) {
|
|
1059
|
+
GGML_ASSERT(trigger_patterns != nullptr);
|
|
1060
|
+
auto & trigger = vec_trigger_patterns.emplace_back();
|
|
1061
|
+
trigger.pattern = trigger_patterns[i];
|
|
1062
|
+
trigger.regex = std::regex(trigger.pattern);
|
|
1065
1063
|
}
|
|
1066
1064
|
|
|
1067
1065
|
// Important: vec_rules has to be moved here, not copied, because stacks contains
|
|
@@ -1076,7 +1074,7 @@ struct llama_grammar * llama_grammar_init_impl(
|
|
|
1076
1074
|
/* .awaiting_trigger = */ lazy,
|
|
1077
1075
|
/* .trigger_buffer = */ "",
|
|
1078
1076
|
std::move(vec_trigger_tokens),
|
|
1079
|
-
std::move(
|
|
1077
|
+
std::move(vec_trigger_patterns),
|
|
1080
1078
|
};
|
|
1081
1079
|
}
|
|
1082
1080
|
|
|
@@ -1089,7 +1087,7 @@ void llama_grammar_free_impl(struct llama_grammar * grammar) {
|
|
|
1089
1087
|
}
|
|
1090
1088
|
|
|
1091
1089
|
struct llama_grammar * llama_grammar_clone_impl(const struct llama_grammar & grammar) {
|
|
1092
|
-
|
|
1090
|
+
auto * result = new llama_grammar {
|
|
1093
1091
|
grammar.vocab,
|
|
1094
1092
|
grammar.rules,
|
|
1095
1093
|
grammar.stacks,
|
|
@@ -1098,7 +1096,7 @@ struct llama_grammar * llama_grammar_clone_impl(const struct llama_grammar & gra
|
|
|
1098
1096
|
grammar.awaiting_trigger,
|
|
1099
1097
|
grammar.trigger_buffer,
|
|
1100
1098
|
grammar.trigger_tokens,
|
|
1101
|
-
grammar.
|
|
1099
|
+
grammar.trigger_patterns,
|
|
1102
1100
|
};
|
|
1103
1101
|
|
|
1104
1102
|
// redirect elements in stacks to point to new rules
|
|
@@ -1173,16 +1171,18 @@ void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token
|
|
|
1173
1171
|
LLAMA_LOG_DEBUG("Grammar triggered on token %u (`%s`)", token, piece.c_str());
|
|
1174
1172
|
return;
|
|
1175
1173
|
} else {
|
|
1176
|
-
// TODO: consider a smarter incremental substring search algorithm (store last position to search from).
|
|
1177
1174
|
grammar.trigger_buffer += piece;
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1175
|
+
|
|
1176
|
+
std::smatch match;
|
|
1177
|
+
for (const auto & trigger_pattern : grammar.trigger_patterns) {
|
|
1178
|
+
if (std::regex_match(grammar.trigger_buffer, match, trigger_pattern.regex)) {
|
|
1181
1179
|
grammar.awaiting_trigger = false;
|
|
1182
|
-
|
|
1180
|
+
// get from the first match to the end of the string
|
|
1181
|
+
auto constrained_str = grammar.trigger_buffer.substr(match.position(1));
|
|
1182
|
+
// std::string constrained_str(match[1].first, grammar.trigger_buffer.end());
|
|
1183
1183
|
grammar.trigger_buffer.clear();
|
|
1184
1184
|
llama_grammar_accept_str(grammar, constrained_str);
|
|
1185
|
-
LLAMA_LOG_DEBUG("Grammar triggered on
|
|
1185
|
+
LLAMA_LOG_DEBUG("Grammar triggered on regex: '%s'\n", constrained_str.c_str());
|
|
1186
1186
|
return;
|
|
1187
1187
|
}
|
|
1188
1188
|
}
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
#include "llama.h"
|
|
4
4
|
|
|
5
5
|
#include <map>
|
|
6
|
+
#include <regex>
|
|
6
7
|
#include <string>
|
|
7
8
|
#include <vector>
|
|
8
9
|
|
|
@@ -105,6 +106,11 @@ struct llama_grammar_parser {
|
|
|
105
106
|
void print(FILE * file);
|
|
106
107
|
};
|
|
107
108
|
|
|
109
|
+
struct llama_grammar_trigger_pattern {
|
|
110
|
+
std::string pattern;
|
|
111
|
+
std::regex regex;
|
|
112
|
+
};
|
|
113
|
+
|
|
108
114
|
struct llama_grammar {
|
|
109
115
|
// note: allow null vocab for testing (not great)
|
|
110
116
|
const llama_vocab * vocab;
|
|
@@ -122,7 +128,10 @@ struct llama_grammar {
|
|
|
122
128
|
bool awaiting_trigger = false; // Initialized to true for lazy grammars only
|
|
123
129
|
std::string trigger_buffer; // Output buffered by lazy grammar. Will be cleared once trigger is found.
|
|
124
130
|
std::vector<llama_token> trigger_tokens; // Tokens that trigger a lazy grammar, or tokens to force printing of (even if special).
|
|
125
|
-
std::vector<
|
|
131
|
+
std::vector<llama_grammar_trigger_pattern>
|
|
132
|
+
trigger_patterns; // Regular expressions that trigger a lazy grammar. Must be a full match of the entire generated
|
|
133
|
+
// string, and the grammar will be given the string from the first match group onwards.
|
|
134
|
+
|
|
126
135
|
};
|
|
127
136
|
|
|
128
137
|
//
|
|
@@ -141,8 +150,8 @@ struct llama_grammar * llama_grammar_init_impl(
|
|
|
141
150
|
const char * grammar_str,
|
|
142
151
|
const char * grammar_root,
|
|
143
152
|
bool lazy,
|
|
144
|
-
const char **
|
|
145
|
-
size_t
|
|
153
|
+
const char ** trigger_patterns,
|
|
154
|
+
size_t num_trigger_patterns,
|
|
146
155
|
const llama_token * trigger_tokens,
|
|
147
156
|
size_t num_trigger_tokens);
|
|
148
157
|
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
#include <climits>
|
|
9
9
|
#include <stdexcept>
|
|
10
10
|
#include <cerrno>
|
|
11
|
+
#include <algorithm>
|
|
11
12
|
|
|
12
13
|
#ifdef __has_include
|
|
13
14
|
#if __has_include(<unistd.h>)
|
|
@@ -34,6 +35,10 @@
|
|
|
34
35
|
#include <io.h>
|
|
35
36
|
#endif
|
|
36
37
|
|
|
38
|
+
#if defined(__APPLE__)
|
|
39
|
+
#include <TargetConditionals.h>
|
|
40
|
+
#endif
|
|
41
|
+
|
|
37
42
|
// TODO: consider moving to llama-impl.h if needed in more places
|
|
38
43
|
#if defined(_WIN32)
|
|
39
44
|
static std::string llama_format_win_err(DWORD err) {
|
|
@@ -471,7 +476,11 @@ struct llama_mlock::impl {
|
|
|
471
476
|
|
|
472
477
|
char* errmsg = std::strerror(errno);
|
|
473
478
|
bool suggest = (errno == ENOMEM);
|
|
474
|
-
|
|
479
|
+
#if defined(TARGET_OS_VISION) || defined(TARGET_OS_TV)
|
|
480
|
+
// visionOS/tvOS dont't support RLIMIT_MEMLOCK
|
|
481
|
+
// Skip resource limit checks on visionOS/tvOS
|
|
482
|
+
suggest = false;
|
|
483
|
+
#else
|
|
475
484
|
struct rlimit lock_limit;
|
|
476
485
|
if (suggest && getrlimit(RLIMIT_MEMLOCK, &lock_limit)) {
|
|
477
486
|
suggest = false;
|
|
@@ -479,6 +488,7 @@ struct llama_mlock::impl {
|
|
|
479
488
|
if (suggest && (lock_limit.rlim_max > lock_limit.rlim_cur + size)) {
|
|
480
489
|
suggest = false;
|
|
481
490
|
}
|
|
491
|
+
#endif
|
|
482
492
|
|
|
483
493
|
LLAMA_LOG_WARN("warning: failed to mlock %zu-byte buffer (after previously locking %zu bytes): %s\n%s",
|
|
484
494
|
size, this->size, errmsg, suggest ? MLOCK_SUGGESTION : "");
|