@fugood/llama.node 0.3.13 → 0.3.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +1 -1
- package/package.json +1 -1
- package/src/LlamaContext.cpp +98 -76
- package/src/LlamaContext.h +1 -1
- package/src/common.hpp +1 -2
- package/src/llama.cpp/.github/workflows/build.yml +89 -10
- package/src/llama.cpp/.github/workflows/server.yml +2 -0
- package/src/llama.cpp/CMakeLists.txt +9 -1
- package/src/llama.cpp/cmake/common.cmake +2 -0
- package/src/llama.cpp/common/CMakeLists.txt +3 -3
- package/src/llama.cpp/common/arg.cpp +132 -13
- package/src/llama.cpp/common/chat.cpp +960 -266
- package/src/llama.cpp/common/chat.h +135 -0
- package/src/llama.cpp/common/common.cpp +33 -174
- package/src/llama.cpp/common/common.h +27 -67
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +4 -5
- package/src/llama.cpp/common/json-schema-to-grammar.h +0 -1
- package/src/llama.cpp/common/{minja.hpp → minja/minja.hpp} +37 -5
- package/src/llama.cpp/common/ngram-cache.cpp +1 -0
- package/src/llama.cpp/common/sampling.cpp +45 -7
- package/src/llama.cpp/common/speculative.cpp +10 -9
- package/src/llama.cpp/common/speculative.h +1 -1
- package/src/llama.cpp/docs/build.md +45 -7
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +2 -2
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +4 -2
- package/src/llama.cpp/examples/embedding/embedding.cpp +2 -1
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +4 -2
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +2 -2
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +3 -4
- package/src/llama.cpp/examples/infill/infill.cpp +2 -2
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +2 -2
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +5 -5
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +373 -107
- package/src/llama.cpp/examples/llava/clip.h +19 -3
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +341 -0
- package/src/llama.cpp/examples/llava/llava.cpp +4 -2
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +30 -11
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +7 -6
- package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
- package/src/llama.cpp/examples/main/main.cpp +79 -34
- package/src/llama.cpp/examples/parallel/parallel.cpp +6 -5
- package/src/llama.cpp/examples/passkey/passkey.cpp +15 -14
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +6 -6
- package/src/llama.cpp/examples/quantize/quantize.cpp +1 -0
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -2
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +1 -1
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +882 -237
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +35 -26
- package/src/llama.cpp/examples/run/run.cpp +196 -108
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +2 -2
- package/src/llama.cpp/examples/server/server.cpp +113 -101
- package/src/llama.cpp/examples/server/utils.hpp +94 -105
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
- package/src/llama.cpp/examples/speculative/speculative.cpp +14 -14
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/sycl/run-llama2.sh +2 -2
- package/src/llama.cpp/examples/tts/tts.cpp +263 -151
- package/src/llama.cpp/ggml/CMakeLists.txt +14 -1
- package/src/llama.cpp/ggml/cmake/common.cmake +26 -0
- package/src/llama.cpp/ggml/include/ggml-alloc.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-backend.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-cpu.h +3 -0
- package/src/llama.cpp/ggml/include/ggml.h +29 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +15 -34
- package/src/llama.cpp/ggml/src/ggml-alloc.c +24 -15
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +58 -54
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +6 -2
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +3 -7
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +3 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +139 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +151 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1546 -387
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1645 -113
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +22 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +259 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +61 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +288 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +15 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +2 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -1
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +14 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -5
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +242 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +6 -6
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +1 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -138
- package/src/llama.cpp/ggml/src/ggml-quants.c +114 -114
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +5 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +117 -36
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +33 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +147 -16
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +40 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +307 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +262 -746
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -78
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +114 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +4 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +305 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +498 -188
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +16 -3
- package/src/llama.cpp/ggml/src/ggml.c +93 -5
- package/src/llama.cpp/include/llama.h +105 -27
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +1 -0
- package/src/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
- package/src/llama.cpp/requirements.txt +1 -0
- package/src/llama.cpp/src/CMakeLists.txt +5 -2
- package/src/llama.cpp/src/llama-adapter.cpp +19 -20
- package/src/llama.cpp/src/llama-adapter.h +11 -9
- package/src/llama.cpp/src/llama-arch.cpp +123 -16
- package/src/llama.cpp/src/llama-arch.h +19 -0
- package/src/llama.cpp/src/llama-batch.h +2 -2
- package/src/llama.cpp/src/llama-chat.cpp +1 -0
- package/src/llama.cpp/src/llama-context.cpp +2253 -1222
- package/src/llama.cpp/src/llama-context.h +214 -77
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-grammar.cpp +182 -182
- package/src/llama.cpp/src/llama-grammar.h +12 -3
- package/src/llama.cpp/src/llama-graph.cpp +1662 -0
- package/src/llama.cpp/src/llama-graph.h +574 -0
- package/src/llama.cpp/src/llama-hparams.cpp +8 -0
- package/src/llama.cpp/src/llama-hparams.h +9 -0
- package/src/llama.cpp/src/llama-io.cpp +15 -0
- package/src/llama.cpp/src/llama-io.h +35 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +1006 -291
- package/src/llama.cpp/src/llama-kv-cache.h +178 -109
- package/src/llama.cpp/src/llama-memory.cpp +1 -0
- package/src/llama.cpp/src/llama-memory.h +21 -0
- package/src/llama.cpp/src/llama-mmap.cpp +11 -1
- package/src/llama.cpp/src/llama-model.cpp +8230 -122
- package/src/llama.cpp/src/llama-model.h +34 -1
- package/src/llama.cpp/src/llama-quant.cpp +10 -1
- package/src/llama.cpp/src/llama-sampling.cpp +43 -10
- package/src/llama.cpp/src/llama-vocab.cpp +12 -0
- package/src/llama.cpp/src/llama.cpp +51 -9837
- package/src/llama.cpp/tests/test-backend-ops.cpp +247 -112
- package/src/llama.cpp/tests/test-chat-template.cpp +32 -22
- package/src/llama.cpp/tests/test-chat.cpp +593 -395
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +63 -63
- package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -9
- package/src/llama.cpp/Sources/llama/llama.h +0 -4
- package/src/llama.cpp/common/chat.hpp +0 -55
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +0 -143
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +0 -9
- /package/src/llama.cpp/common/{chat-template.hpp → minja/chat-template.hpp} +0 -0
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/bin/win32/arm64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/binding.ts
CHANGED
|
@@ -87,7 +87,7 @@ export type LlamaCompletionOptions = {
|
|
|
87
87
|
stop?: string[]
|
|
88
88
|
grammar?: string
|
|
89
89
|
grammar_lazy?: boolean
|
|
90
|
-
grammar_triggers?: { word: string; at_start: boolean }[]
|
|
90
|
+
grammar_triggers?: { type: number; word: string; at_start: boolean }[]
|
|
91
91
|
preserved_tokens?: string[]
|
|
92
92
|
}
|
|
93
93
|
|
package/package.json
CHANGED
package/src/LlamaContext.cpp
CHANGED
|
@@ -272,7 +272,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
272
272
|
_sess = sess;
|
|
273
273
|
_info = common_params_get_system_info(params);
|
|
274
274
|
|
|
275
|
-
_templates =
|
|
275
|
+
_templates = common_chat_templates_init(model, params.chat_template);
|
|
276
276
|
}
|
|
277
277
|
|
|
278
278
|
// getSystemInfo(): string
|
|
@@ -355,22 +355,22 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
|
|
|
355
355
|
Napi::Object minja = Napi::Object::New(info.Env());
|
|
356
356
|
minja.Set("default", validateModelChatTemplate(model, true, ""));
|
|
357
357
|
Napi::Object defaultCaps = Napi::Object::New(info.Env());
|
|
358
|
-
defaultCaps.Set("tools", _templates.template_default->original_caps().supports_tools);
|
|
359
|
-
defaultCaps.Set("toolCalls", _templates.template_default->original_caps().supports_tool_calls);
|
|
360
|
-
defaultCaps.Set("toolResponses", _templates.template_default->original_caps().supports_tool_responses);
|
|
361
|
-
defaultCaps.Set("systemRole", _templates.template_default->original_caps().supports_system_role);
|
|
362
|
-
defaultCaps.Set("parallelToolCalls", _templates.template_default->original_caps().supports_parallel_tool_calls);
|
|
363
|
-
defaultCaps.Set("toolCallId", _templates.template_default->original_caps().supports_tool_call_id);
|
|
358
|
+
defaultCaps.Set("tools", _templates.get()->template_default->original_caps().supports_tools);
|
|
359
|
+
defaultCaps.Set("toolCalls", _templates.get()->template_default->original_caps().supports_tool_calls);
|
|
360
|
+
defaultCaps.Set("toolResponses", _templates.get()->template_default->original_caps().supports_tool_responses);
|
|
361
|
+
defaultCaps.Set("systemRole", _templates.get()->template_default->original_caps().supports_system_role);
|
|
362
|
+
defaultCaps.Set("parallelToolCalls", _templates.get()->template_default->original_caps().supports_parallel_tool_calls);
|
|
363
|
+
defaultCaps.Set("toolCallId", _templates.get()->template_default->original_caps().supports_tool_call_id);
|
|
364
364
|
minja.Set("defaultCaps", defaultCaps);
|
|
365
365
|
minja.Set("toolUse", validateModelChatTemplate(model, true, "tool_use"));
|
|
366
|
-
if (_templates.template_tool_use) {
|
|
366
|
+
if (_templates.get()->template_tool_use) {
|
|
367
367
|
Napi::Object toolUseCaps = Napi::Object::New(info.Env());
|
|
368
|
-
toolUseCaps.Set("tools", _templates.template_tool_use->original_caps().supports_tools);
|
|
369
|
-
toolUseCaps.Set("toolCalls", _templates.template_tool_use->original_caps().supports_tool_calls);
|
|
370
|
-
toolUseCaps.Set("toolResponses", _templates.template_tool_use->original_caps().supports_tool_responses);
|
|
371
|
-
toolUseCaps.Set("systemRole", _templates.template_tool_use->original_caps().supports_system_role);
|
|
372
|
-
toolUseCaps.Set("parallelToolCalls", _templates.template_tool_use->original_caps().supports_parallel_tool_calls);
|
|
373
|
-
toolUseCaps.Set("toolCallId", _templates.template_tool_use->original_caps().supports_tool_call_id);
|
|
368
|
+
toolUseCaps.Set("tools", _templates.get()->template_tool_use->original_caps().supports_tools);
|
|
369
|
+
toolUseCaps.Set("toolCalls", _templates.get()->template_tool_use->original_caps().supports_tool_calls);
|
|
370
|
+
toolUseCaps.Set("toolResponses", _templates.get()->template_tool_use->original_caps().supports_tool_responses);
|
|
371
|
+
toolUseCaps.Set("systemRole", _templates.get()->template_tool_use->original_caps().supports_system_role);
|
|
372
|
+
toolUseCaps.Set("parallelToolCalls", _templates.get()->template_tool_use->original_caps().supports_parallel_tool_calls);
|
|
373
|
+
toolUseCaps.Set("toolCallId", _templates.get()->template_tool_use->original_caps().supports_tool_call_id);
|
|
374
374
|
minja.Set("toolUseCaps", toolUseCaps);
|
|
375
375
|
}
|
|
376
376
|
chatTemplates.Set("minja", minja);
|
|
@@ -385,7 +385,7 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
|
|
|
385
385
|
|
|
386
386
|
common_chat_params getFormattedChatWithJinja(
|
|
387
387
|
const std::shared_ptr<LlamaSession> &sess,
|
|
388
|
-
const
|
|
388
|
+
const common_chat_templates_ptr &templates,
|
|
389
389
|
const std::string &messages,
|
|
390
390
|
const std::string &chat_template,
|
|
391
391
|
const std::string &json_schema,
|
|
@@ -393,72 +393,46 @@ common_chat_params getFormattedChatWithJinja(
|
|
|
393
393
|
const bool ¶llel_tool_calls,
|
|
394
394
|
const std::string &tool_choice
|
|
395
395
|
) {
|
|
396
|
-
|
|
397
|
-
inputs.messages = json::parse(messages);
|
|
396
|
+
common_chat_templates_inputs inputs;
|
|
397
|
+
inputs.messages = common_chat_msgs_parse_oaicompat(json::parse(messages));
|
|
398
398
|
auto useTools = !tools.empty();
|
|
399
399
|
if (useTools) {
|
|
400
|
-
|
|
400
|
+
inputs.tools = common_chat_tools_parse_oaicompat(json::parse(tools));
|
|
401
401
|
}
|
|
402
402
|
inputs.parallel_tool_calls = parallel_tool_calls;
|
|
403
403
|
if (!tool_choice.empty()) {
|
|
404
|
-
|
|
404
|
+
inputs.tool_choice = common_chat_tool_choice_parse_oaicompat(tool_choice);
|
|
405
405
|
}
|
|
406
406
|
if (!json_schema.empty()) {
|
|
407
|
-
|
|
407
|
+
inputs.json_schema = json::parse(json_schema);
|
|
408
408
|
}
|
|
409
409
|
inputs.extract_reasoning = sess->params().reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
|
410
|
-
inputs.stream = true;
|
|
411
410
|
|
|
412
411
|
// If chat_template is provided, create new one and use it (probably slow)
|
|
413
412
|
if (!chat_template.empty()) {
|
|
414
|
-
auto
|
|
415
|
-
|
|
416
|
-
if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
|
|
417
|
-
inputs.parallel_tool_calls = false;
|
|
418
|
-
}
|
|
419
|
-
return common_chat_params_init(*template_ptr, inputs);
|
|
413
|
+
auto tmps = common_chat_templates_init(sess->model(), chat_template);
|
|
414
|
+
return common_chat_templates_apply(tmps.get(), inputs);
|
|
420
415
|
} else {
|
|
421
|
-
|
|
422
|
-
if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
|
|
423
|
-
inputs.parallel_tool_calls = false;
|
|
424
|
-
}
|
|
425
|
-
return common_chat_params_init(*template_ptr, inputs);
|
|
416
|
+
return common_chat_templates_apply(templates.get(), inputs);
|
|
426
417
|
}
|
|
427
418
|
}
|
|
428
419
|
|
|
429
420
|
std::string getFormattedChat(
|
|
430
421
|
const struct llama_model * model,
|
|
431
|
-
const
|
|
422
|
+
const common_chat_templates_ptr &templates,
|
|
432
423
|
const std::string &messages,
|
|
433
424
|
const std::string &chat_template
|
|
434
425
|
) {
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
std::vector<common_chat_msg> chat_msgs;
|
|
439
|
-
for (const auto &msg : chat_json) {
|
|
440
|
-
chat_msgs.push_back({
|
|
441
|
-
msg["role"].get<std::string>(),
|
|
442
|
-
msg["content"].get<std::string>()
|
|
443
|
-
});
|
|
444
|
-
}
|
|
426
|
+
common_chat_templates_inputs inputs;
|
|
427
|
+
inputs.messages = common_chat_msgs_parse_oaicompat(json::parse(messages));
|
|
428
|
+
inputs.use_jinja = false;
|
|
445
429
|
|
|
446
430
|
// If chat_template is provided, create new one and use it (probably slow)
|
|
447
431
|
if (!chat_template.empty()) {
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
*tmp.template_default,
|
|
451
|
-
chat_msgs,
|
|
452
|
-
true,
|
|
453
|
-
false
|
|
454
|
-
);
|
|
432
|
+
auto tmps = common_chat_templates_init(model, chat_template);
|
|
433
|
+
return common_chat_templates_apply(tmps.get(), inputs).prompt;
|
|
455
434
|
} else {
|
|
456
|
-
|
|
457
|
-
*templates.template_default,
|
|
458
|
-
chat_msgs,
|
|
459
|
-
true,
|
|
460
|
-
false
|
|
461
|
-
);
|
|
435
|
+
return common_chat_templates_apply(templates.get(), inputs).prompt;
|
|
462
436
|
}
|
|
463
437
|
}
|
|
464
438
|
|
|
@@ -504,20 +478,21 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
|
|
|
504
478
|
auto chatParams = getFormattedChatWithJinja(_sess, _templates, messages, chat_template, json_schema_str, tools_str, parallel_tool_calls, tool_choice);
|
|
505
479
|
|
|
506
480
|
Napi::Object result = Napi::Object::New(env);
|
|
507
|
-
result.Set("prompt", chatParams.prompt
|
|
481
|
+
result.Set("prompt", chatParams.prompt);
|
|
508
482
|
// chat_format: int
|
|
509
483
|
result.Set("chat_format", static_cast<int>(chatParams.format));
|
|
510
484
|
// grammar: string
|
|
511
485
|
result.Set("grammar", chatParams.grammar);
|
|
512
486
|
// grammar_lazy: boolean
|
|
513
487
|
result.Set("grammea_lazy", chatParams.grammar_lazy);
|
|
514
|
-
// grammar_triggers: [{
|
|
488
|
+
// grammar_triggers: [{ value: string, token: number }]
|
|
515
489
|
Napi::Array grammar_triggers = Napi::Array::New(env);
|
|
516
490
|
for (size_t i = 0; i < chatParams.grammar_triggers.size(); i++) {
|
|
517
491
|
const auto & trigger = chatParams.grammar_triggers[i];
|
|
518
492
|
Napi::Object triggerObj = Napi::Object::New(env);
|
|
519
|
-
triggerObj.Set("
|
|
520
|
-
triggerObj.Set("
|
|
493
|
+
triggerObj.Set("type", Napi::Number::New(env, trigger.type));
|
|
494
|
+
triggerObj.Set("value", Napi::String::New(env, trigger.value));
|
|
495
|
+
triggerObj.Set("token", Napi::Number::New(env, trigger.token));
|
|
521
496
|
grammar_triggers.Set(i, triggerObj);
|
|
522
497
|
}
|
|
523
498
|
result.Set("grammar_triggers", grammar_triggers);
|
|
@@ -594,6 +569,60 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
594
569
|
}
|
|
595
570
|
}
|
|
596
571
|
|
|
572
|
+
// Handle preserved_tokens from options
|
|
573
|
+
if (options.Has("preserved_tokens")) {
|
|
574
|
+
auto preserved_tokens = options.Get("preserved_tokens").As<Napi::Array>();
|
|
575
|
+
for (size_t i = 0; i < preserved_tokens.Length(); i++) {
|
|
576
|
+
auto token = preserved_tokens.Get(i).ToString().Utf8Value();
|
|
577
|
+
auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
|
|
578
|
+
if (ids.size() == 1) {
|
|
579
|
+
params.sampling.preserved_tokens.insert(ids[0]);
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
// Handle grammar_triggers from options
|
|
585
|
+
if (options.Has("grammar_triggers")) {
|
|
586
|
+
auto grammar_triggers = options.Get("grammar_triggers").As<Napi::Array>();
|
|
587
|
+
for (size_t i = 0; i < grammar_triggers.Length(); i++) {
|
|
588
|
+
auto trigger_obj = grammar_triggers.Get(i).As<Napi::Object>();
|
|
589
|
+
|
|
590
|
+
auto type = static_cast<common_grammar_trigger_type>(trigger_obj.Get("type").ToNumber().Int32Value());
|
|
591
|
+
auto word = trigger_obj.Get("value").ToString().Utf8Value();
|
|
592
|
+
|
|
593
|
+
if (type == COMMON_GRAMMAR_TRIGGER_TYPE_WORD) {
|
|
594
|
+
auto ids = common_tokenize(_sess->context(), word, /* add_special= */ false, /* parse_special= */ true);
|
|
595
|
+
if (ids.size() == 1) {
|
|
596
|
+
auto token = ids[0];
|
|
597
|
+
if (std::find(params.sampling.preserved_tokens.begin(), params.sampling.preserved_tokens.end(), (llama_token) token) == params.sampling.preserved_tokens.end()) {
|
|
598
|
+
throw std::runtime_error("Grammar trigger word should be marked as preserved token");
|
|
599
|
+
}
|
|
600
|
+
common_grammar_trigger trigger;
|
|
601
|
+
trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN;
|
|
602
|
+
trigger.value = word;
|
|
603
|
+
trigger.token = token;
|
|
604
|
+
params.sampling.grammar_triggers.push_back(std::move(trigger));
|
|
605
|
+
} else {
|
|
606
|
+
params.sampling.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, word});
|
|
607
|
+
}
|
|
608
|
+
} else {
|
|
609
|
+
common_grammar_trigger trigger;
|
|
610
|
+
trigger.type = type;
|
|
611
|
+
trigger.value = word;
|
|
612
|
+
if (type == COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN) {
|
|
613
|
+
auto token = (llama_token) trigger_obj.Get("token").ToNumber().Int32Value();
|
|
614
|
+
trigger.token = token;
|
|
615
|
+
}
|
|
616
|
+
params.sampling.grammar_triggers.push_back(std::move(trigger));
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
// Handle grammar_lazy from options
|
|
622
|
+
if (options.Has("grammar_lazy")) {
|
|
623
|
+
params.sampling.grammar_lazy = options.Get("grammar_lazy").ToBoolean().Value();
|
|
624
|
+
}
|
|
625
|
+
|
|
597
626
|
if (options.Has("messages") && options.Get("messages").IsArray()) {
|
|
598
627
|
auto messages = options.Get("messages").As<Napi::Array>();
|
|
599
628
|
auto chat_template = get_option<std::string>(options, "chat_template", "");
|
|
@@ -616,33 +645,26 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
616
645
|
tool_choice
|
|
617
646
|
);
|
|
618
647
|
|
|
619
|
-
params.prompt = chatParams.prompt
|
|
648
|
+
params.prompt = chatParams.prompt;
|
|
620
649
|
|
|
621
650
|
chat_format = chatParams.format;
|
|
622
651
|
|
|
652
|
+
for (const auto & token : chatParams.preserved_tokens) {
|
|
653
|
+
auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
|
|
654
|
+
if (ids.size() == 1) {
|
|
655
|
+
params.sampling.preserved_tokens.insert(ids[0]);
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
|
|
623
659
|
if (!has_grammar_set) {
|
|
624
660
|
// grammar param always wins jinja template & json_schema
|
|
625
661
|
params.sampling.grammar = chatParams.grammar;
|
|
626
662
|
params.sampling.grammar_lazy = chatParams.grammar_lazy;
|
|
627
|
-
|
|
628
663
|
for (const auto & trigger : chatParams.grammar_triggers) {
|
|
629
|
-
|
|
630
|
-
if (ids.size() == 1) {
|
|
631
|
-
params.sampling.grammar_trigger_tokens.push_back(ids[0]);
|
|
632
|
-
params.sampling.preserved_tokens.insert(ids[0]);
|
|
633
|
-
continue;
|
|
634
|
-
}
|
|
635
|
-
params.sampling.grammar_trigger_words.push_back(trigger);
|
|
664
|
+
params.sampling.grammar_triggers.push_back(trigger);
|
|
636
665
|
}
|
|
637
666
|
has_grammar_set = true;
|
|
638
667
|
}
|
|
639
|
-
|
|
640
|
-
for (const auto & token : chatParams.preserved_tokens) {
|
|
641
|
-
auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
|
|
642
|
-
if (ids.size() == 1) {
|
|
643
|
-
params.sampling.preserved_tokens.insert(ids[0]);
|
|
644
|
-
}
|
|
645
|
-
}
|
|
646
668
|
|
|
647
669
|
for (const auto & stop : chatParams.additional_stops) {
|
|
648
670
|
stop_words.push_back(stop);
|
package/src/LlamaContext.h
CHANGED
|
@@ -28,7 +28,7 @@ private:
|
|
|
28
28
|
std::string _info;
|
|
29
29
|
Napi::Object _meta;
|
|
30
30
|
LlamaSessionPtr _sess = nullptr;
|
|
31
|
-
|
|
31
|
+
common_chat_templates_ptr _templates;
|
|
32
32
|
std::vector<common_adapter_lora_info> _lora;
|
|
33
33
|
LlamaCompletionWorker *_wip = nullptr;
|
|
34
34
|
};
|
package/src/common.hpp
CHANGED
|
@@ -173,7 +173,15 @@ jobs:
|
|
|
173
173
|
name: llama-bin-macos-x64.zip
|
|
174
174
|
|
|
175
175
|
ubuntu-cpu-cmake:
|
|
176
|
-
|
|
176
|
+
strategy:
|
|
177
|
+
matrix:
|
|
178
|
+
include:
|
|
179
|
+
- build: 'x64'
|
|
180
|
+
os: ubuntu-22.04
|
|
181
|
+
- build: 'arm64'
|
|
182
|
+
os: ubuntu-22.04-arm
|
|
183
|
+
|
|
184
|
+
runs-on: ${{ matrix.os }}
|
|
177
185
|
|
|
178
186
|
steps:
|
|
179
187
|
- name: Clone
|
|
@@ -239,14 +247,14 @@ jobs:
|
|
|
239
247
|
run: |
|
|
240
248
|
cp LICENSE ./build/bin/
|
|
241
249
|
cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
|
|
242
|
-
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu
|
|
250
|
+
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip ./build/bin/*
|
|
243
251
|
|
|
244
252
|
- name: Upload artifacts
|
|
245
253
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
246
254
|
uses: actions/upload-artifact@v4
|
|
247
255
|
with:
|
|
248
|
-
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu
|
|
249
|
-
name: llama-bin-ubuntu
|
|
256
|
+
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip
|
|
257
|
+
name: llama-bin-ubuntu-${{ matrix.build }}.zip
|
|
250
258
|
|
|
251
259
|
ubuntu-latest-cmake-sanitizer:
|
|
252
260
|
runs-on: ubuntu-latest
|
|
@@ -459,6 +467,7 @@ jobs:
|
|
|
459
467
|
run: |
|
|
460
468
|
cmake -B build -S . \
|
|
461
469
|
-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
|
|
470
|
+
-DGGML_HIP_ROCWMMA_FATTN=ON \
|
|
462
471
|
-DGGML_HIP=ON
|
|
463
472
|
cmake --build build --config Release -j $(nproc)
|
|
464
473
|
|
|
@@ -468,6 +477,7 @@ jobs:
|
|
|
468
477
|
cmake -B build2 -S . \
|
|
469
478
|
-DCMAKE_C_COMPILER=hipcc \
|
|
470
479
|
-DCMAKE_CXX_COMPILER=hipcc \
|
|
480
|
+
-DGGML_HIP_ROCWMMA_FATTN=ON \
|
|
471
481
|
-DGGML_HIP=ON
|
|
472
482
|
cmake --build build2 --config Release -j $(nproc)
|
|
473
483
|
|
|
@@ -666,6 +676,35 @@ jobs:
|
|
|
666
676
|
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
|
667
677
|
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
|
|
668
678
|
|
|
679
|
+
macOS-latest-cmake-visionos:
|
|
680
|
+
runs-on: macos-latest
|
|
681
|
+
|
|
682
|
+
steps:
|
|
683
|
+
- name: Clone
|
|
684
|
+
id: checkout
|
|
685
|
+
uses: actions/checkout@v4
|
|
686
|
+
|
|
687
|
+
- name: Dependencies
|
|
688
|
+
id: depends
|
|
689
|
+
continue-on-error: true
|
|
690
|
+
run: |
|
|
691
|
+
brew update
|
|
692
|
+
|
|
693
|
+
- name: Build
|
|
694
|
+
id: cmake_build
|
|
695
|
+
run: |
|
|
696
|
+
sysctl -a
|
|
697
|
+
cmake -B build -G Xcode \
|
|
698
|
+
-DGGML_METAL_USE_BF16=ON \
|
|
699
|
+
-DGGML_METAL_EMBED_LIBRARY=ON \
|
|
700
|
+
-DLLAMA_BUILD_EXAMPLES=OFF \
|
|
701
|
+
-DLLAMA_BUILD_TESTS=OFF \
|
|
702
|
+
-DLLAMA_BUILD_SERVER=OFF \
|
|
703
|
+
-DCMAKE_SYSTEM_NAME=visionOS \
|
|
704
|
+
-DCMAKE_OSX_DEPLOYMENT_TARGET=1.0 \
|
|
705
|
+
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
|
706
|
+
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
|
|
707
|
+
|
|
669
708
|
macOS-latest-swift:
|
|
670
709
|
runs-on: macos-latest
|
|
671
710
|
|
|
@@ -702,12 +741,11 @@ jobs:
|
|
|
702
741
|
-DLLAMA_BUILD_SERVER=OFF \
|
|
703
742
|
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
|
|
704
743
|
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
|
|
705
|
-
sudo cmake --install build --config Release
|
|
706
744
|
|
|
707
745
|
- name: xcodebuild for swift package
|
|
708
746
|
id: xcodebuild
|
|
709
747
|
run: |
|
|
710
|
-
|
|
748
|
+
./build-xcframework.sh
|
|
711
749
|
|
|
712
750
|
windows-msys2:
|
|
713
751
|
runs-on: windows-latest
|
|
@@ -765,7 +803,7 @@ jobs:
|
|
|
765
803
|
env:
|
|
766
804
|
OPENBLAS_VERSION: 0.3.23
|
|
767
805
|
SDE_VERSION: 9.33.0-2024-01-07
|
|
768
|
-
VULKAN_VERSION: 1.
|
|
806
|
+
VULKAN_VERSION: 1.4.304.1
|
|
769
807
|
|
|
770
808
|
strategy:
|
|
771
809
|
matrix:
|
|
@@ -1195,6 +1233,11 @@ jobs:
|
|
|
1195
1233
|
id: checkout
|
|
1196
1234
|
uses: actions/checkout@v4
|
|
1197
1235
|
|
|
1236
|
+
- name: Clone rocWMMA repository
|
|
1237
|
+
id: clone_rocwmma
|
|
1238
|
+
run: |
|
|
1239
|
+
git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
|
|
1240
|
+
|
|
1198
1241
|
- name: Install
|
|
1199
1242
|
id: depends
|
|
1200
1243
|
run: |
|
|
@@ -1224,8 +1267,10 @@ jobs:
|
|
|
1224
1267
|
cmake -G "Unix Makefiles" -B build -S . `
|
|
1225
1268
|
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
|
|
1226
1269
|
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
|
|
1270
|
+
-DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/" `
|
|
1227
1271
|
-DCMAKE_BUILD_TYPE=Release `
|
|
1228
1272
|
-DGGML_HIP=ON `
|
|
1273
|
+
-DGGML_HIP_ROCWMMA_FATTN=ON `
|
|
1229
1274
|
-DGGML_RPC=ON
|
|
1230
1275
|
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
|
|
1231
1276
|
|
|
@@ -1244,6 +1289,11 @@ jobs:
|
|
|
1244
1289
|
with:
|
|
1245
1290
|
fetch-depth: 0
|
|
1246
1291
|
|
|
1292
|
+
- name: Clone rocWMMA repository
|
|
1293
|
+
id: clone_rocwmma
|
|
1294
|
+
run: |
|
|
1295
|
+
git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
|
|
1296
|
+
|
|
1247
1297
|
- name: ccache
|
|
1248
1298
|
uses: hendrikmuhs/ccache-action@v1.2.16
|
|
1249
1299
|
with:
|
|
@@ -1273,8 +1323,10 @@ jobs:
|
|
|
1273
1323
|
cmake -G "Unix Makefiles" -B build -S . `
|
|
1274
1324
|
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
|
|
1275
1325
|
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
|
|
1326
|
+
-DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/" `
|
|
1276
1327
|
-DCMAKE_BUILD_TYPE=Release `
|
|
1277
1328
|
-DAMDGPU_TARGETS=${{ matrix.gpu_target }} `
|
|
1329
|
+
-DGGML_HIP_ROCWMMA_FATTN=ON `
|
|
1278
1330
|
-DGGML_HIP=ON `
|
|
1279
1331
|
-DGGML_RPC=ON
|
|
1280
1332
|
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
|
|
@@ -1313,6 +1365,8 @@ jobs:
|
|
|
1313
1365
|
steps:
|
|
1314
1366
|
- name: Checkout code
|
|
1315
1367
|
uses: actions/checkout@v4
|
|
1368
|
+
with:
|
|
1369
|
+
fetch-depth: 0
|
|
1316
1370
|
|
|
1317
1371
|
- name: Build
|
|
1318
1372
|
id: cmake_build
|
|
@@ -1328,15 +1382,40 @@ jobs:
|
|
|
1328
1382
|
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
|
|
1329
1383
|
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
|
1330
1384
|
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
|
|
1331
|
-
sudo cmake --install build --config Release
|
|
1332
1385
|
|
|
1333
1386
|
- name: xcodebuild for swift package
|
|
1334
1387
|
id: xcodebuild
|
|
1335
1388
|
run: |
|
|
1336
|
-
|
|
1389
|
+
./build-xcframework.sh
|
|
1337
1390
|
|
|
1338
1391
|
- name: Build Xcode project
|
|
1339
|
-
run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
|
|
1392
|
+
run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build
|
|
1393
|
+
|
|
1394
|
+
- name: Determine tag name
|
|
1395
|
+
id: tag
|
|
1396
|
+
shell: bash
|
|
1397
|
+
run: |
|
|
1398
|
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
|
1399
|
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
|
1400
|
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
|
1401
|
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
|
1402
|
+
else
|
|
1403
|
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
|
1404
|
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
|
1405
|
+
fi
|
|
1406
|
+
|
|
1407
|
+
- name: Pack artifacts
|
|
1408
|
+
id: pack_artifacts
|
|
1409
|
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
1410
|
+
run: |
|
|
1411
|
+
zip --symlinks -r llama-${{ steps.tag.outputs.name }}-xcframework.zip build-apple/llama.xcframework
|
|
1412
|
+
|
|
1413
|
+
- name: Upload artifacts
|
|
1414
|
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
1415
|
+
uses: actions/upload-artifact@v4
|
|
1416
|
+
with:
|
|
1417
|
+
path: llama-${{ steps.tag.outputs.name }}-xcframework.zip
|
|
1418
|
+
name: llama-${{ steps.tag.outputs.name }}-xcframework
|
|
1340
1419
|
|
|
1341
1420
|
android-build:
|
|
1342
1421
|
runs-on: ubuntu-latest
|
|
@@ -29,6 +29,8 @@ else()
|
|
|
29
29
|
set(LLAMA_STANDALONE OFF)
|
|
30
30
|
endif()
|
|
31
31
|
|
|
32
|
+
option(LLAMA_USE_SYSTEM_GGML "Use system libggml" OFF)
|
|
33
|
+
|
|
32
34
|
if (EMSCRIPTEN)
|
|
33
35
|
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
|
34
36
|
|
|
@@ -145,7 +147,13 @@ endif()
|
|
|
145
147
|
# 3rd-party
|
|
146
148
|
#
|
|
147
149
|
|
|
148
|
-
if (
|
|
150
|
+
if (LLAMA_USE_SYSTEM_GGML)
|
|
151
|
+
message(STATUS "Using system-provided libggml, skipping ggml build")
|
|
152
|
+
find_package(ggml REQUIRED)
|
|
153
|
+
add_library(ggml ALIAS ggml::ggml)
|
|
154
|
+
endif()
|
|
155
|
+
|
|
156
|
+
if (NOT TARGET ggml AND NOT LLAMA_USE_SYSTEM_GGML)
|
|
149
157
|
add_subdirectory(ggml)
|
|
150
158
|
# ... otherwise assume ggml is added by a parent CMakeLists.txt
|
|
151
159
|
endif()
|
|
@@ -57,8 +57,7 @@ add_library(${TARGET} STATIC
|
|
|
57
57
|
arg.h
|
|
58
58
|
base64.hpp
|
|
59
59
|
chat.cpp
|
|
60
|
-
chat.
|
|
61
|
-
chat-template.hpp
|
|
60
|
+
chat.h
|
|
62
61
|
common.cpp
|
|
63
62
|
common.h
|
|
64
63
|
console.cpp
|
|
@@ -68,7 +67,8 @@ add_library(${TARGET} STATIC
|
|
|
68
67
|
llguidance.cpp
|
|
69
68
|
log.cpp
|
|
70
69
|
log.h
|
|
71
|
-
minja.hpp
|
|
70
|
+
minja/chat-template.hpp
|
|
71
|
+
minja/minja.hpp
|
|
72
72
|
ngram-cache.cpp
|
|
73
73
|
ngram-cache.h
|
|
74
74
|
sampling.cpp
|