@fugood/llama.node 0.3.13 → 0.3.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +1 -1
- package/package.json +1 -1
- package/src/LlamaContext.cpp +98 -76
- package/src/LlamaContext.h +1 -1
- package/src/common.hpp +1 -2
- package/src/llama.cpp/.github/workflows/build.yml +60 -10
- package/src/llama.cpp/.github/workflows/server.yml +2 -0
- package/src/llama.cpp/common/CMakeLists.txt +3 -3
- package/src/llama.cpp/common/arg.cpp +112 -11
- package/src/llama.cpp/common/chat.cpp +960 -266
- package/src/llama.cpp/common/chat.h +135 -0
- package/src/llama.cpp/common/common.cpp +27 -171
- package/src/llama.cpp/common/common.h +27 -67
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +4 -5
- package/src/llama.cpp/common/json-schema-to-grammar.h +0 -1
- package/src/llama.cpp/common/{minja.hpp → minja/minja.hpp} +37 -5
- package/src/llama.cpp/common/ngram-cache.cpp +1 -0
- package/src/llama.cpp/common/sampling.cpp +45 -7
- package/src/llama.cpp/common/speculative.cpp +6 -5
- package/src/llama.cpp/common/speculative.h +1 -1
- package/src/llama.cpp/docs/build.md +45 -7
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +3 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +1 -0
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +4 -2
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +2 -3
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +1 -1
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +373 -107
- package/src/llama.cpp/examples/llava/clip.h +19 -3
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +341 -0
- package/src/llama.cpp/examples/llava/llava.cpp +4 -2
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +30 -11
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -0
- package/src/llama.cpp/examples/main/main.cpp +73 -28
- package/src/llama.cpp/examples/parallel/parallel.cpp +1 -0
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -0
- package/src/llama.cpp/examples/quantize/quantize.cpp +1 -0
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +882 -237
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +35 -26
- package/src/llama.cpp/examples/run/run.cpp +110 -67
- package/src/llama.cpp/examples/server/server.cpp +82 -87
- package/src/llama.cpp/examples/server/utils.hpp +94 -107
- package/src/llama.cpp/examples/sycl/run-llama2.sh +2 -2
- package/src/llama.cpp/examples/tts/tts.cpp +251 -142
- package/src/llama.cpp/ggml/CMakeLists.txt +13 -1
- package/src/llama.cpp/ggml/include/ggml-alloc.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-backend.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-cpu.h +3 -0
- package/src/llama.cpp/ggml/include/ggml.h +5 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +10 -7
- package/src/llama.cpp/ggml/src/ggml-alloc.c +24 -15
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +58 -54
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +3 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +132 -17
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +151 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1396 -386
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1432 -151
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +22 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +259 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +61 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +288 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +15 -2
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +14 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -5
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +235 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +6 -2
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +1 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +220 -116
- package/src/llama.cpp/ggml/src/ggml-quants.c +114 -114
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +51 -10
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +33 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +136 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +308 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +168 -721
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -77
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +146 -42
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +13 -3
- package/src/llama.cpp/ggml/src/ggml.c +8 -3
- package/src/llama.cpp/include/llama.h +19 -5
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +1 -0
- package/src/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
- package/src/llama.cpp/requirements.txt +1 -0
- package/src/llama.cpp/src/llama-arch.cpp +21 -0
- package/src/llama.cpp/src/llama-arch.h +1 -0
- package/src/llama.cpp/src/llama-chat.cpp +1 -0
- package/src/llama.cpp/src/llama-grammar.cpp +182 -182
- package/src/llama.cpp/src/llama-grammar.h +12 -3
- package/src/llama.cpp/src/llama-kv-cache.h +1 -0
- package/src/llama.cpp/src/llama-mmap.cpp +11 -1
- package/src/llama.cpp/src/llama-model.cpp +69 -5
- package/src/llama.cpp/src/llama-sampling.cpp +43 -10
- package/src/llama.cpp/src/llama-vocab.cpp +12 -0
- package/src/llama.cpp/src/llama.cpp +147 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +166 -110
- package/src/llama.cpp/tests/test-chat-template.cpp +32 -22
- package/src/llama.cpp/tests/test-chat.cpp +593 -395
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +63 -63
- package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -9
- package/src/llama.cpp/Sources/llama/llama.h +0 -4
- package/src/llama.cpp/common/chat.hpp +0 -55
- /package/src/llama.cpp/common/{chat-template.hpp → minja/chat-template.hpp} +0 -0
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/bin/win32/arm64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/binding.ts
CHANGED
|
@@ -87,7 +87,7 @@ export type LlamaCompletionOptions = {
|
|
|
87
87
|
stop?: string[]
|
|
88
88
|
grammar?: string
|
|
89
89
|
grammar_lazy?: boolean
|
|
90
|
-
grammar_triggers?: { word: string; at_start: boolean }[]
|
|
90
|
+
grammar_triggers?: { type: number; word: string; at_start: boolean }[]
|
|
91
91
|
preserved_tokens?: string[]
|
|
92
92
|
}
|
|
93
93
|
|
package/package.json
CHANGED
package/src/LlamaContext.cpp
CHANGED
|
@@ -272,7 +272,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
272
272
|
_sess = sess;
|
|
273
273
|
_info = common_params_get_system_info(params);
|
|
274
274
|
|
|
275
|
-
_templates =
|
|
275
|
+
_templates = common_chat_templates_init(model, params.chat_template);
|
|
276
276
|
}
|
|
277
277
|
|
|
278
278
|
// getSystemInfo(): string
|
|
@@ -355,22 +355,22 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
|
|
|
355
355
|
Napi::Object minja = Napi::Object::New(info.Env());
|
|
356
356
|
minja.Set("default", validateModelChatTemplate(model, true, ""));
|
|
357
357
|
Napi::Object defaultCaps = Napi::Object::New(info.Env());
|
|
358
|
-
defaultCaps.Set("tools", _templates.template_default->original_caps().supports_tools);
|
|
359
|
-
defaultCaps.Set("toolCalls", _templates.template_default->original_caps().supports_tool_calls);
|
|
360
|
-
defaultCaps.Set("toolResponses", _templates.template_default->original_caps().supports_tool_responses);
|
|
361
|
-
defaultCaps.Set("systemRole", _templates.template_default->original_caps().supports_system_role);
|
|
362
|
-
defaultCaps.Set("parallelToolCalls", _templates.template_default->original_caps().supports_parallel_tool_calls);
|
|
363
|
-
defaultCaps.Set("toolCallId", _templates.template_default->original_caps().supports_tool_call_id);
|
|
358
|
+
defaultCaps.Set("tools", _templates.get()->template_default->original_caps().supports_tools);
|
|
359
|
+
defaultCaps.Set("toolCalls", _templates.get()->template_default->original_caps().supports_tool_calls);
|
|
360
|
+
defaultCaps.Set("toolResponses", _templates.get()->template_default->original_caps().supports_tool_responses);
|
|
361
|
+
defaultCaps.Set("systemRole", _templates.get()->template_default->original_caps().supports_system_role);
|
|
362
|
+
defaultCaps.Set("parallelToolCalls", _templates.get()->template_default->original_caps().supports_parallel_tool_calls);
|
|
363
|
+
defaultCaps.Set("toolCallId", _templates.get()->template_default->original_caps().supports_tool_call_id);
|
|
364
364
|
minja.Set("defaultCaps", defaultCaps);
|
|
365
365
|
minja.Set("toolUse", validateModelChatTemplate(model, true, "tool_use"));
|
|
366
|
-
if (_templates.template_tool_use) {
|
|
366
|
+
if (_templates.get()->template_tool_use) {
|
|
367
367
|
Napi::Object toolUseCaps = Napi::Object::New(info.Env());
|
|
368
|
-
toolUseCaps.Set("tools", _templates.template_tool_use->original_caps().supports_tools);
|
|
369
|
-
toolUseCaps.Set("toolCalls", _templates.template_tool_use->original_caps().supports_tool_calls);
|
|
370
|
-
toolUseCaps.Set("toolResponses", _templates.template_tool_use->original_caps().supports_tool_responses);
|
|
371
|
-
toolUseCaps.Set("systemRole", _templates.template_tool_use->original_caps().supports_system_role);
|
|
372
|
-
toolUseCaps.Set("parallelToolCalls", _templates.template_tool_use->original_caps().supports_parallel_tool_calls);
|
|
373
|
-
toolUseCaps.Set("toolCallId", _templates.template_tool_use->original_caps().supports_tool_call_id);
|
|
368
|
+
toolUseCaps.Set("tools", _templates.get()->template_tool_use->original_caps().supports_tools);
|
|
369
|
+
toolUseCaps.Set("toolCalls", _templates.get()->template_tool_use->original_caps().supports_tool_calls);
|
|
370
|
+
toolUseCaps.Set("toolResponses", _templates.get()->template_tool_use->original_caps().supports_tool_responses);
|
|
371
|
+
toolUseCaps.Set("systemRole", _templates.get()->template_tool_use->original_caps().supports_system_role);
|
|
372
|
+
toolUseCaps.Set("parallelToolCalls", _templates.get()->template_tool_use->original_caps().supports_parallel_tool_calls);
|
|
373
|
+
toolUseCaps.Set("toolCallId", _templates.get()->template_tool_use->original_caps().supports_tool_call_id);
|
|
374
374
|
minja.Set("toolUseCaps", toolUseCaps);
|
|
375
375
|
}
|
|
376
376
|
chatTemplates.Set("minja", minja);
|
|
@@ -385,7 +385,7 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
|
|
|
385
385
|
|
|
386
386
|
common_chat_params getFormattedChatWithJinja(
|
|
387
387
|
const std::shared_ptr<LlamaSession> &sess,
|
|
388
|
-
const
|
|
388
|
+
const common_chat_templates_ptr &templates,
|
|
389
389
|
const std::string &messages,
|
|
390
390
|
const std::string &chat_template,
|
|
391
391
|
const std::string &json_schema,
|
|
@@ -393,72 +393,46 @@ common_chat_params getFormattedChatWithJinja(
|
|
|
393
393
|
const bool ¶llel_tool_calls,
|
|
394
394
|
const std::string &tool_choice
|
|
395
395
|
) {
|
|
396
|
-
|
|
397
|
-
inputs.messages = json::parse(messages);
|
|
396
|
+
common_chat_templates_inputs inputs;
|
|
397
|
+
inputs.messages = common_chat_msgs_parse_oaicompat(json::parse(messages));
|
|
398
398
|
auto useTools = !tools.empty();
|
|
399
399
|
if (useTools) {
|
|
400
|
-
|
|
400
|
+
inputs.tools = common_chat_tools_parse_oaicompat(json::parse(tools));
|
|
401
401
|
}
|
|
402
402
|
inputs.parallel_tool_calls = parallel_tool_calls;
|
|
403
403
|
if (!tool_choice.empty()) {
|
|
404
|
-
|
|
404
|
+
inputs.tool_choice = common_chat_tool_choice_parse_oaicompat(tool_choice);
|
|
405
405
|
}
|
|
406
406
|
if (!json_schema.empty()) {
|
|
407
|
-
|
|
407
|
+
inputs.json_schema = json::parse(json_schema);
|
|
408
408
|
}
|
|
409
409
|
inputs.extract_reasoning = sess->params().reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
|
410
|
-
inputs.stream = true;
|
|
411
410
|
|
|
412
411
|
// If chat_template is provided, create new one and use it (probably slow)
|
|
413
412
|
if (!chat_template.empty()) {
|
|
414
|
-
auto
|
|
415
|
-
|
|
416
|
-
if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
|
|
417
|
-
inputs.parallel_tool_calls = false;
|
|
418
|
-
}
|
|
419
|
-
return common_chat_params_init(*template_ptr, inputs);
|
|
413
|
+
auto tmps = common_chat_templates_init(sess->model(), chat_template);
|
|
414
|
+
return common_chat_templates_apply(tmps.get(), inputs);
|
|
420
415
|
} else {
|
|
421
|
-
|
|
422
|
-
if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
|
|
423
|
-
inputs.parallel_tool_calls = false;
|
|
424
|
-
}
|
|
425
|
-
return common_chat_params_init(*template_ptr, inputs);
|
|
416
|
+
return common_chat_templates_apply(templates.get(), inputs);
|
|
426
417
|
}
|
|
427
418
|
}
|
|
428
419
|
|
|
429
420
|
std::string getFormattedChat(
|
|
430
421
|
const struct llama_model * model,
|
|
431
|
-
const
|
|
422
|
+
const common_chat_templates_ptr &templates,
|
|
432
423
|
const std::string &messages,
|
|
433
424
|
const std::string &chat_template
|
|
434
425
|
) {
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
std::vector<common_chat_msg> chat_msgs;
|
|
439
|
-
for (const auto &msg : chat_json) {
|
|
440
|
-
chat_msgs.push_back({
|
|
441
|
-
msg["role"].get<std::string>(),
|
|
442
|
-
msg["content"].get<std::string>()
|
|
443
|
-
});
|
|
444
|
-
}
|
|
426
|
+
common_chat_templates_inputs inputs;
|
|
427
|
+
inputs.messages = common_chat_msgs_parse_oaicompat(json::parse(messages));
|
|
428
|
+
inputs.use_jinja = false;
|
|
445
429
|
|
|
446
430
|
// If chat_template is provided, create new one and use it (probably slow)
|
|
447
431
|
if (!chat_template.empty()) {
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
*tmp.template_default,
|
|
451
|
-
chat_msgs,
|
|
452
|
-
true,
|
|
453
|
-
false
|
|
454
|
-
);
|
|
432
|
+
auto tmps = common_chat_templates_init(model, chat_template);
|
|
433
|
+
return common_chat_templates_apply(tmps.get(), inputs).prompt;
|
|
455
434
|
} else {
|
|
456
|
-
|
|
457
|
-
*templates.template_default,
|
|
458
|
-
chat_msgs,
|
|
459
|
-
true,
|
|
460
|
-
false
|
|
461
|
-
);
|
|
435
|
+
return common_chat_templates_apply(templates.get(), inputs).prompt;
|
|
462
436
|
}
|
|
463
437
|
}
|
|
464
438
|
|
|
@@ -504,20 +478,21 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
|
|
|
504
478
|
auto chatParams = getFormattedChatWithJinja(_sess, _templates, messages, chat_template, json_schema_str, tools_str, parallel_tool_calls, tool_choice);
|
|
505
479
|
|
|
506
480
|
Napi::Object result = Napi::Object::New(env);
|
|
507
|
-
result.Set("prompt", chatParams.prompt
|
|
481
|
+
result.Set("prompt", chatParams.prompt);
|
|
508
482
|
// chat_format: int
|
|
509
483
|
result.Set("chat_format", static_cast<int>(chatParams.format));
|
|
510
484
|
// grammar: string
|
|
511
485
|
result.Set("grammar", chatParams.grammar);
|
|
512
486
|
// grammar_lazy: boolean
|
|
513
487
|
result.Set("grammea_lazy", chatParams.grammar_lazy);
|
|
514
|
-
// grammar_triggers: [{
|
|
488
|
+
// grammar_triggers: [{ value: string, token: number }]
|
|
515
489
|
Napi::Array grammar_triggers = Napi::Array::New(env);
|
|
516
490
|
for (size_t i = 0; i < chatParams.grammar_triggers.size(); i++) {
|
|
517
491
|
const auto & trigger = chatParams.grammar_triggers[i];
|
|
518
492
|
Napi::Object triggerObj = Napi::Object::New(env);
|
|
519
|
-
triggerObj.Set("
|
|
520
|
-
triggerObj.Set("
|
|
493
|
+
triggerObj.Set("type", Napi::Number::New(env, trigger.type));
|
|
494
|
+
triggerObj.Set("value", Napi::String::New(env, trigger.value));
|
|
495
|
+
triggerObj.Set("token", Napi::Number::New(env, trigger.token));
|
|
521
496
|
grammar_triggers.Set(i, triggerObj);
|
|
522
497
|
}
|
|
523
498
|
result.Set("grammar_triggers", grammar_triggers);
|
|
@@ -594,6 +569,60 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
594
569
|
}
|
|
595
570
|
}
|
|
596
571
|
|
|
572
|
+
// Handle preserved_tokens from options
|
|
573
|
+
if (options.Has("preserved_tokens")) {
|
|
574
|
+
auto preserved_tokens = options.Get("preserved_tokens").As<Napi::Array>();
|
|
575
|
+
for (size_t i = 0; i < preserved_tokens.Length(); i++) {
|
|
576
|
+
auto token = preserved_tokens.Get(i).ToString().Utf8Value();
|
|
577
|
+
auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
|
|
578
|
+
if (ids.size() == 1) {
|
|
579
|
+
params.sampling.preserved_tokens.insert(ids[0]);
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
// Handle grammar_triggers from options
|
|
585
|
+
if (options.Has("grammar_triggers")) {
|
|
586
|
+
auto grammar_triggers = options.Get("grammar_triggers").As<Napi::Array>();
|
|
587
|
+
for (size_t i = 0; i < grammar_triggers.Length(); i++) {
|
|
588
|
+
auto trigger_obj = grammar_triggers.Get(i).As<Napi::Object>();
|
|
589
|
+
|
|
590
|
+
auto type = static_cast<common_grammar_trigger_type>(trigger_obj.Get("type").ToNumber().Int32Value());
|
|
591
|
+
auto word = trigger_obj.Get("value").ToString().Utf8Value();
|
|
592
|
+
|
|
593
|
+
if (type == COMMON_GRAMMAR_TRIGGER_TYPE_WORD) {
|
|
594
|
+
auto ids = common_tokenize(_sess->context(), word, /* add_special= */ false, /* parse_special= */ true);
|
|
595
|
+
if (ids.size() == 1) {
|
|
596
|
+
auto token = ids[0];
|
|
597
|
+
if (std::find(params.sampling.preserved_tokens.begin(), params.sampling.preserved_tokens.end(), (llama_token) token) == params.sampling.preserved_tokens.end()) {
|
|
598
|
+
throw std::runtime_error("Grammar trigger word should be marked as preserved token");
|
|
599
|
+
}
|
|
600
|
+
common_grammar_trigger trigger;
|
|
601
|
+
trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN;
|
|
602
|
+
trigger.value = word;
|
|
603
|
+
trigger.token = token;
|
|
604
|
+
params.sampling.grammar_triggers.push_back(std::move(trigger));
|
|
605
|
+
} else {
|
|
606
|
+
params.sampling.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, word});
|
|
607
|
+
}
|
|
608
|
+
} else {
|
|
609
|
+
common_grammar_trigger trigger;
|
|
610
|
+
trigger.type = type;
|
|
611
|
+
trigger.value = word;
|
|
612
|
+
if (type == COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN) {
|
|
613
|
+
auto token = (llama_token) trigger_obj.Get("token").ToNumber().Int32Value();
|
|
614
|
+
trigger.token = token;
|
|
615
|
+
}
|
|
616
|
+
params.sampling.grammar_triggers.push_back(std::move(trigger));
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
// Handle grammar_lazy from options
|
|
622
|
+
if (options.Has("grammar_lazy")) {
|
|
623
|
+
params.sampling.grammar_lazy = options.Get("grammar_lazy").ToBoolean().Value();
|
|
624
|
+
}
|
|
625
|
+
|
|
597
626
|
if (options.Has("messages") && options.Get("messages").IsArray()) {
|
|
598
627
|
auto messages = options.Get("messages").As<Napi::Array>();
|
|
599
628
|
auto chat_template = get_option<std::string>(options, "chat_template", "");
|
|
@@ -616,33 +645,26 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
616
645
|
tool_choice
|
|
617
646
|
);
|
|
618
647
|
|
|
619
|
-
params.prompt = chatParams.prompt
|
|
648
|
+
params.prompt = chatParams.prompt;
|
|
620
649
|
|
|
621
650
|
chat_format = chatParams.format;
|
|
622
651
|
|
|
652
|
+
for (const auto & token : chatParams.preserved_tokens) {
|
|
653
|
+
auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
|
|
654
|
+
if (ids.size() == 1) {
|
|
655
|
+
params.sampling.preserved_tokens.insert(ids[0]);
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
|
|
623
659
|
if (!has_grammar_set) {
|
|
624
660
|
// grammar param always wins jinja template & json_schema
|
|
625
661
|
params.sampling.grammar = chatParams.grammar;
|
|
626
662
|
params.sampling.grammar_lazy = chatParams.grammar_lazy;
|
|
627
|
-
|
|
628
663
|
for (const auto & trigger : chatParams.grammar_triggers) {
|
|
629
|
-
|
|
630
|
-
if (ids.size() == 1) {
|
|
631
|
-
params.sampling.grammar_trigger_tokens.push_back(ids[0]);
|
|
632
|
-
params.sampling.preserved_tokens.insert(ids[0]);
|
|
633
|
-
continue;
|
|
634
|
-
}
|
|
635
|
-
params.sampling.grammar_trigger_words.push_back(trigger);
|
|
664
|
+
params.sampling.grammar_triggers.push_back(trigger);
|
|
636
665
|
}
|
|
637
666
|
has_grammar_set = true;
|
|
638
667
|
}
|
|
639
|
-
|
|
640
|
-
for (const auto & token : chatParams.preserved_tokens) {
|
|
641
|
-
auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
|
|
642
|
-
if (ids.size() == 1) {
|
|
643
|
-
params.sampling.preserved_tokens.insert(ids[0]);
|
|
644
|
-
}
|
|
645
|
-
}
|
|
646
668
|
|
|
647
669
|
for (const auto & stop : chatParams.additional_stops) {
|
|
648
670
|
stop_words.push_back(stop);
|
package/src/LlamaContext.h
CHANGED
|
@@ -28,7 +28,7 @@ private:
|
|
|
28
28
|
std::string _info;
|
|
29
29
|
Napi::Object _meta;
|
|
30
30
|
LlamaSessionPtr _sess = nullptr;
|
|
31
|
-
|
|
31
|
+
common_chat_templates_ptr _templates;
|
|
32
32
|
std::vector<common_adapter_lora_info> _lora;
|
|
33
33
|
LlamaCompletionWorker *_wip = nullptr;
|
|
34
34
|
};
|
package/src/common.hpp
CHANGED
|
@@ -173,7 +173,15 @@ jobs:
|
|
|
173
173
|
name: llama-bin-macos-x64.zip
|
|
174
174
|
|
|
175
175
|
ubuntu-cpu-cmake:
|
|
176
|
-
|
|
176
|
+
strategy:
|
|
177
|
+
matrix:
|
|
178
|
+
include:
|
|
179
|
+
- build: 'x64'
|
|
180
|
+
os: ubuntu-22.04
|
|
181
|
+
- build: 'arm64'
|
|
182
|
+
os: ubuntu-22.04-arm
|
|
183
|
+
|
|
184
|
+
runs-on: ${{ matrix.os }}
|
|
177
185
|
|
|
178
186
|
steps:
|
|
179
187
|
- name: Clone
|
|
@@ -239,14 +247,14 @@ jobs:
|
|
|
239
247
|
run: |
|
|
240
248
|
cp LICENSE ./build/bin/
|
|
241
249
|
cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
|
|
242
|
-
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu
|
|
250
|
+
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip ./build/bin/*
|
|
243
251
|
|
|
244
252
|
- name: Upload artifacts
|
|
245
253
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
246
254
|
uses: actions/upload-artifact@v4
|
|
247
255
|
with:
|
|
248
|
-
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu
|
|
249
|
-
name: llama-bin-ubuntu
|
|
256
|
+
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip
|
|
257
|
+
name: llama-bin-ubuntu-${{ matrix.build }}.zip
|
|
250
258
|
|
|
251
259
|
ubuntu-latest-cmake-sanitizer:
|
|
252
260
|
runs-on: ubuntu-latest
|
|
@@ -459,6 +467,7 @@ jobs:
|
|
|
459
467
|
run: |
|
|
460
468
|
cmake -B build -S . \
|
|
461
469
|
-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
|
|
470
|
+
-DGGML_HIP_ROCWMMA_FATTN=ON \
|
|
462
471
|
-DGGML_HIP=ON
|
|
463
472
|
cmake --build build --config Release -j $(nproc)
|
|
464
473
|
|
|
@@ -468,6 +477,7 @@ jobs:
|
|
|
468
477
|
cmake -B build2 -S . \
|
|
469
478
|
-DCMAKE_C_COMPILER=hipcc \
|
|
470
479
|
-DCMAKE_CXX_COMPILER=hipcc \
|
|
480
|
+
-DGGML_HIP_ROCWMMA_FATTN=ON \
|
|
471
481
|
-DGGML_HIP=ON
|
|
472
482
|
cmake --build build2 --config Release -j $(nproc)
|
|
473
483
|
|
|
@@ -702,12 +712,11 @@ jobs:
|
|
|
702
712
|
-DLLAMA_BUILD_SERVER=OFF \
|
|
703
713
|
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
|
|
704
714
|
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
|
|
705
|
-
sudo cmake --install build --config Release
|
|
706
715
|
|
|
707
716
|
- name: xcodebuild for swift package
|
|
708
717
|
id: xcodebuild
|
|
709
718
|
run: |
|
|
710
|
-
|
|
719
|
+
./build-xcframework.sh
|
|
711
720
|
|
|
712
721
|
windows-msys2:
|
|
713
722
|
runs-on: windows-latest
|
|
@@ -765,7 +774,7 @@ jobs:
|
|
|
765
774
|
env:
|
|
766
775
|
OPENBLAS_VERSION: 0.3.23
|
|
767
776
|
SDE_VERSION: 9.33.0-2024-01-07
|
|
768
|
-
VULKAN_VERSION: 1.
|
|
777
|
+
VULKAN_VERSION: 1.4.304.1
|
|
769
778
|
|
|
770
779
|
strategy:
|
|
771
780
|
matrix:
|
|
@@ -1195,6 +1204,11 @@ jobs:
|
|
|
1195
1204
|
id: checkout
|
|
1196
1205
|
uses: actions/checkout@v4
|
|
1197
1206
|
|
|
1207
|
+
- name: Clone rocWMMA repository
|
|
1208
|
+
id: clone_rocwmma
|
|
1209
|
+
run: |
|
|
1210
|
+
git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
|
|
1211
|
+
|
|
1198
1212
|
- name: Install
|
|
1199
1213
|
id: depends
|
|
1200
1214
|
run: |
|
|
@@ -1224,8 +1238,10 @@ jobs:
|
|
|
1224
1238
|
cmake -G "Unix Makefiles" -B build -S . `
|
|
1225
1239
|
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
|
|
1226
1240
|
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
|
|
1241
|
+
-DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/" `
|
|
1227
1242
|
-DCMAKE_BUILD_TYPE=Release `
|
|
1228
1243
|
-DGGML_HIP=ON `
|
|
1244
|
+
-DGGML_HIP_ROCWMMA_FATTN=ON `
|
|
1229
1245
|
-DGGML_RPC=ON
|
|
1230
1246
|
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
|
|
1231
1247
|
|
|
@@ -1244,6 +1260,11 @@ jobs:
|
|
|
1244
1260
|
with:
|
|
1245
1261
|
fetch-depth: 0
|
|
1246
1262
|
|
|
1263
|
+
- name: Clone rocWMMA repository
|
|
1264
|
+
id: clone_rocwmma
|
|
1265
|
+
run: |
|
|
1266
|
+
git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
|
|
1267
|
+
|
|
1247
1268
|
- name: ccache
|
|
1248
1269
|
uses: hendrikmuhs/ccache-action@v1.2.16
|
|
1249
1270
|
with:
|
|
@@ -1273,8 +1294,10 @@ jobs:
|
|
|
1273
1294
|
cmake -G "Unix Makefiles" -B build -S . `
|
|
1274
1295
|
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
|
|
1275
1296
|
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
|
|
1297
|
+
-DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/" `
|
|
1276
1298
|
-DCMAKE_BUILD_TYPE=Release `
|
|
1277
1299
|
-DAMDGPU_TARGETS=${{ matrix.gpu_target }} `
|
|
1300
|
+
-DGGML_HIP_ROCWMMA_FATTN=ON `
|
|
1278
1301
|
-DGGML_HIP=ON `
|
|
1279
1302
|
-DGGML_RPC=ON
|
|
1280
1303
|
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
|
|
@@ -1313,6 +1336,8 @@ jobs:
|
|
|
1313
1336
|
steps:
|
|
1314
1337
|
- name: Checkout code
|
|
1315
1338
|
uses: actions/checkout@v4
|
|
1339
|
+
with:
|
|
1340
|
+
fetch-depth: 0
|
|
1316
1341
|
|
|
1317
1342
|
- name: Build
|
|
1318
1343
|
id: cmake_build
|
|
@@ -1328,15 +1353,40 @@ jobs:
|
|
|
1328
1353
|
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
|
|
1329
1354
|
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
|
1330
1355
|
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
|
|
1331
|
-
sudo cmake --install build --config Release
|
|
1332
1356
|
|
|
1333
1357
|
- name: xcodebuild for swift package
|
|
1334
1358
|
id: xcodebuild
|
|
1335
1359
|
run: |
|
|
1336
|
-
|
|
1360
|
+
./build-xcframework.sh
|
|
1337
1361
|
|
|
1338
1362
|
- name: Build Xcode project
|
|
1339
|
-
run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
|
|
1363
|
+
run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build
|
|
1364
|
+
|
|
1365
|
+
- name: Determine tag name
|
|
1366
|
+
id: tag
|
|
1367
|
+
shell: bash
|
|
1368
|
+
run: |
|
|
1369
|
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
|
1370
|
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
|
1371
|
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
|
1372
|
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
|
1373
|
+
else
|
|
1374
|
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
|
1375
|
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
|
1376
|
+
fi
|
|
1377
|
+
|
|
1378
|
+
- name: Pack artifacts
|
|
1379
|
+
id: pack_artifacts
|
|
1380
|
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
1381
|
+
run: |
|
|
1382
|
+
zip -r llama-${{ steps.tag.outputs.name }}-xcframework.zip build-apple/llama.xcframework
|
|
1383
|
+
|
|
1384
|
+
- name: Upload artifacts
|
|
1385
|
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
1386
|
+
uses: actions/upload-artifact@v4
|
|
1387
|
+
with:
|
|
1388
|
+
path: llama-${{ steps.tag.outputs.name }}-xcframework.zip
|
|
1389
|
+
name: llama-${{ steps.tag.outputs.name }}-xcframework
|
|
1340
1390
|
|
|
1341
1391
|
android-build:
|
|
1342
1392
|
runs-on: ubuntu-latest
|
|
@@ -57,8 +57,7 @@ add_library(${TARGET} STATIC
|
|
|
57
57
|
arg.h
|
|
58
58
|
base64.hpp
|
|
59
59
|
chat.cpp
|
|
60
|
-
chat.
|
|
61
|
-
chat-template.hpp
|
|
60
|
+
chat.h
|
|
62
61
|
common.cpp
|
|
63
62
|
common.h
|
|
64
63
|
console.cpp
|
|
@@ -68,7 +67,8 @@ add_library(${TARGET} STATIC
|
|
|
68
67
|
llguidance.cpp
|
|
69
68
|
log.cpp
|
|
70
69
|
log.h
|
|
71
|
-
minja.hpp
|
|
70
|
+
minja/chat-template.hpp
|
|
71
|
+
minja/minja.hpp
|
|
72
72
|
ngram-cache.cpp
|
|
73
73
|
ngram-cache.h
|
|
74
74
|
sampling.cpp
|