@fugood/llama.node 0.3.11 → 0.3.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +1 -0
- package/lib/index.js +26 -20
- package/lib/index.ts +32 -28
- package/package.json +1 -1
- package/src/LlamaCompletionWorker.cpp +14 -0
- package/src/LlamaContext.cpp +13 -4
- package/src/llama.cpp/.github/workflows/build.yml +35 -3
- package/src/llama.cpp/.github/workflows/docker.yml +2 -0
- package/src/llama.cpp/.github/workflows/labeler.yml +1 -1
- package/src/llama.cpp/common/CMakeLists.txt +20 -3
- package/src/llama.cpp/common/arg.cpp +180 -3
- package/src/llama.cpp/common/chat-template.hpp +21 -7
- package/src/llama.cpp/common/chat.cpp +220 -101
- package/src/llama.cpp/common/chat.hpp +3 -0
- package/src/llama.cpp/common/common.h +15 -7
- package/src/llama.cpp/common/llguidance.cpp +3 -3
- package/src/llama.cpp/common/log.cpp +1 -0
- package/src/llama.cpp/common/log.h +2 -1
- package/src/llama.cpp/common/minja.hpp +24 -9
- package/src/llama.cpp/common/sampling.cpp +52 -46
- package/src/llama.cpp/common/speculative.h +1 -1
- package/src/llama.cpp/docs/build.md +2 -2
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +2 -1
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +6 -5
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +1 -0
- package/src/llama.cpp/examples/run/run.cpp +5 -12
- package/src/llama.cpp/examples/server/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/server/httplib.h +381 -292
- package/src/llama.cpp/examples/server/server.cpp +58 -47
- package/src/llama.cpp/examples/server/utils.hpp +7 -5
- package/src/llama.cpp/ggml/include/ggml-cpu.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-metal.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +0 -2
- package/src/llama.cpp/ggml/include/ggml.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-common.h +0 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +6 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +852 -268
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +200 -107
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +9 -8
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +2 -2
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +26 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +6 -7
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +812 -569
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +25 -1
- package/src/llama.cpp/ggml/src/ggml.c +1 -1
- package/src/llama.cpp/include/llama.h +14 -10
- package/src/llama.cpp/src/llama-grammar.cpp +1 -1
- package/src/llama.cpp/src/llama-grammar.h +1 -1
- package/src/llama.cpp/src/llama-impl.h +6 -6
- package/src/llama.cpp/src/llama-kv-cache.h +1 -1
- package/src/llama.cpp/src/llama-mmap.h +1 -0
- package/src/llama.cpp/src/llama-model.cpp +1 -1
- package/src/llama.cpp/src/llama-sampling.cpp +131 -57
- package/src/llama.cpp/src/llama.cpp +7 -5
- package/src/llama.cpp/src/unicode.cpp +9 -2
- package/src/llama.cpp/tests/test-backend-ops.cpp +5 -5
- package/src/llama.cpp/tests/test-chat.cpp +237 -69
- package/src/llama.cpp/tests/test-gguf.cpp +4 -4
- package/src/llama.cpp/tests/test-sampling.cpp +15 -0
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/bin/win32/arm64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/binding.ts
CHANGED
package/lib/index.js
CHANGED
|
@@ -23,15 +23,39 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
23
23
|
});
|
|
24
24
|
};
|
|
25
25
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
-
exports.
|
|
26
|
+
exports.loadLlamaModelInfo = exports.initLlama = exports.loadModel = exports.toggleNativeLog = void 0;
|
|
27
27
|
exports.addNativeLogListener = addNativeLogListener;
|
|
28
28
|
const binding_1 = require("./binding");
|
|
29
29
|
__exportStar(require("./binding"), exports);
|
|
30
30
|
const mods = {};
|
|
31
|
+
const logListeners = [];
|
|
32
|
+
const logCallback = (level, text) => {
|
|
33
|
+
logListeners.forEach((listener) => listener(level, text));
|
|
34
|
+
};
|
|
35
|
+
let logEnabled = false;
|
|
36
|
+
const refreshNativeLogSetup = () => {
|
|
37
|
+
Object.entries(mods).forEach(([, mod]) => {
|
|
38
|
+
mod.LlamaContext.toggleNativeLog(logEnabled, logCallback);
|
|
39
|
+
});
|
|
40
|
+
};
|
|
41
|
+
const toggleNativeLog = (enable) => __awaiter(void 0, void 0, void 0, function* () {
|
|
42
|
+
logEnabled = enable;
|
|
43
|
+
refreshNativeLogSetup();
|
|
44
|
+
});
|
|
45
|
+
exports.toggleNativeLog = toggleNativeLog;
|
|
46
|
+
function addNativeLogListener(listener) {
|
|
47
|
+
logListeners.push(listener);
|
|
48
|
+
return {
|
|
49
|
+
remove: () => {
|
|
50
|
+
logListeners.splice(logListeners.indexOf(listener), 1);
|
|
51
|
+
},
|
|
52
|
+
};
|
|
53
|
+
}
|
|
31
54
|
const loadModel = (options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
32
55
|
var _a, _b;
|
|
33
56
|
const variant = (_a = options.lib_variant) !== null && _a !== void 0 ? _a : 'default';
|
|
34
57
|
(_b = mods[variant]) !== null && _b !== void 0 ? _b : (mods[variant] = yield (0, binding_1.loadModule)(options.lib_variant));
|
|
58
|
+
refreshNativeLogSetup();
|
|
35
59
|
return new mods[variant].LlamaContext(options);
|
|
36
60
|
});
|
|
37
61
|
exports.loadModel = loadModel;
|
|
@@ -47,25 +71,7 @@ const loadLlamaModelInfo = (path) => __awaiter(void 0, void 0, void 0, function*
|
|
|
47
71
|
var _a;
|
|
48
72
|
const variant = 'default';
|
|
49
73
|
(_a = mods[variant]) !== null && _a !== void 0 ? _a : (mods[variant] = yield (0, binding_1.loadModule)(variant));
|
|
74
|
+
refreshNativeLogSetup();
|
|
50
75
|
return mods[variant].LlamaContext.loadModelInfo(path, modelInfoSkip);
|
|
51
76
|
});
|
|
52
77
|
exports.loadLlamaModelInfo = loadLlamaModelInfo;
|
|
53
|
-
const logListeners = [];
|
|
54
|
-
const logCallback = (level, text) => {
|
|
55
|
-
logListeners.forEach((listener) => listener(level, text));
|
|
56
|
-
};
|
|
57
|
-
const toggleNativeLog = (enable, options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
58
|
-
var _a, _b;
|
|
59
|
-
const v = (_a = options === null || options === void 0 ? void 0 : options.variant) !== null && _a !== void 0 ? _a : 'default';
|
|
60
|
-
(_b = mods[v]) !== null && _b !== void 0 ? _b : (mods[v] = yield (0, binding_1.loadModule)(v));
|
|
61
|
-
return mods[v].LlamaContext.toggleNativeLog(enable, logCallback);
|
|
62
|
-
});
|
|
63
|
-
exports.toggleNativeLog = toggleNativeLog;
|
|
64
|
-
function addNativeLogListener(listener) {
|
|
65
|
-
logListeners.push(listener);
|
|
66
|
-
return {
|
|
67
|
-
remove: () => {
|
|
68
|
-
logListeners.splice(logListeners.indexOf(listener), 1);
|
|
69
|
-
},
|
|
70
|
-
};
|
|
71
|
-
}
|
package/lib/index.ts
CHANGED
|
@@ -9,11 +9,42 @@ export interface LlamaModelOptionsExtended extends LlamaModelOptions {
|
|
|
9
9
|
|
|
10
10
|
const mods: { [key: string]: Module } = {}
|
|
11
11
|
|
|
12
|
+
const logListeners: Array<(level: string, text: string) => void> = []
|
|
13
|
+
|
|
14
|
+
const logCallback = (level: string, text: string) => {
|
|
15
|
+
logListeners.forEach((listener) => listener(level, text))
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
let logEnabled = false
|
|
19
|
+
|
|
20
|
+
const refreshNativeLogSetup = () => {
|
|
21
|
+
Object.entries(mods).forEach(([, mod]) => {
|
|
22
|
+
mod.LlamaContext.toggleNativeLog(logEnabled, logCallback)
|
|
23
|
+
})
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export const toggleNativeLog = async (enable: boolean) => {
|
|
27
|
+
logEnabled = enable
|
|
28
|
+
refreshNativeLogSetup()
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export function addNativeLogListener(
|
|
32
|
+
listener: (level: string, text: string) => void,
|
|
33
|
+
): { remove: () => void } {
|
|
34
|
+
logListeners.push(listener)
|
|
35
|
+
return {
|
|
36
|
+
remove: () => {
|
|
37
|
+
logListeners.splice(logListeners.indexOf(listener), 1)
|
|
38
|
+
},
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
12
42
|
export const loadModel = async (
|
|
13
43
|
options: LlamaModelOptionsExtended,
|
|
14
44
|
): Promise<LlamaContext> => {
|
|
15
45
|
const variant = options.lib_variant ?? 'default'
|
|
16
46
|
mods[variant] ??= await loadModule(options.lib_variant)
|
|
47
|
+
refreshNativeLogSetup()
|
|
17
48
|
return new mods[variant].LlamaContext(options)
|
|
18
49
|
}
|
|
19
50
|
|
|
@@ -30,33 +61,6 @@ const modelInfoSkip = [
|
|
|
30
61
|
export const loadLlamaModelInfo = async (path: string): Promise<Object> => {
|
|
31
62
|
const variant = 'default'
|
|
32
63
|
mods[variant] ??= await loadModule(variant)
|
|
64
|
+
refreshNativeLogSetup()
|
|
33
65
|
return mods[variant].LlamaContext.loadModelInfo(path, modelInfoSkip)
|
|
34
66
|
}
|
|
35
|
-
|
|
36
|
-
const logListeners: Array<(level: string, text: string) => void> = []
|
|
37
|
-
|
|
38
|
-
const logCallback = (level: string, text: string) => {
|
|
39
|
-
logListeners.forEach((listener) => listener(level, text))
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
export const toggleNativeLog = async (
|
|
43
|
-
enable: boolean,
|
|
44
|
-
options?: {
|
|
45
|
-
variant?: LibVariant
|
|
46
|
-
},
|
|
47
|
-
) => {
|
|
48
|
-
const v = options?.variant ?? 'default'
|
|
49
|
-
mods[v] ??= await loadModule(v)
|
|
50
|
-
return mods[v].LlamaContext.toggleNativeLog(enable, logCallback)
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
export function addNativeLogListener(
|
|
54
|
-
listener: (level: string, text: string) => void,
|
|
55
|
-
): { remove: () => void } {
|
|
56
|
-
logListeners.push(listener)
|
|
57
|
-
return {
|
|
58
|
-
remove: () => {
|
|
59
|
-
logListeners.splice(logListeners.indexOf(listener), 1)
|
|
60
|
-
},
|
|
61
|
-
}
|
|
62
|
-
}
|
package/package.json
CHANGED
|
@@ -165,9 +165,17 @@ void LlamaCompletionWorker::OnOK() {
|
|
|
165
165
|
Napi::String::New(env, _result.text.c_str()));
|
|
166
166
|
|
|
167
167
|
Napi::Array tool_calls = Napi::Array::New(Napi::AsyncWorker::Env());
|
|
168
|
+
std::string * reasoning_content = nullptr;
|
|
169
|
+
std::string * content = nullptr;
|
|
168
170
|
if (!_stop) {
|
|
169
171
|
try {
|
|
170
172
|
common_chat_msg message = common_chat_parse(_result.text, static_cast<common_chat_format>(_chat_format));
|
|
173
|
+
if (!message.reasoning_content.empty()) {
|
|
174
|
+
reasoning_content = &message.reasoning_content;
|
|
175
|
+
}
|
|
176
|
+
if (!message.content.empty()) {
|
|
177
|
+
content = &message.content;
|
|
178
|
+
}
|
|
171
179
|
for (size_t i = 0; i < message.tool_calls.size(); i++) {
|
|
172
180
|
const auto &tc = message.tool_calls[i];
|
|
173
181
|
Napi::Object tool_call = Napi::Object::New(env);
|
|
@@ -188,6 +196,12 @@ void LlamaCompletionWorker::OnOK() {
|
|
|
188
196
|
if (tool_calls.Length() > 0) {
|
|
189
197
|
result.Set("tool_calls", tool_calls);
|
|
190
198
|
}
|
|
199
|
+
if (reasoning_content) {
|
|
200
|
+
result.Set("reasoning_content", Napi::String::New(env, reasoning_content->c_str()));
|
|
201
|
+
}
|
|
202
|
+
if (content) {
|
|
203
|
+
result.Set("content", Napi::String::New(env, content->c_str()));
|
|
204
|
+
}
|
|
191
205
|
|
|
192
206
|
auto ctx = _sess->context();
|
|
193
207
|
const auto timings_token = llama_perf_context(ctx);
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -185,6 +185,13 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
185
185
|
|
|
186
186
|
params.chat_template = get_option<std::string>(options, "chat_template", "");
|
|
187
187
|
|
|
188
|
+
std::string reasoning_format = get_option<std::string>(options, "reasoning_format", "none");
|
|
189
|
+
if (reasoning_format == "deepseek") {
|
|
190
|
+
params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
|
191
|
+
} else {
|
|
192
|
+
params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
|
193
|
+
}
|
|
194
|
+
|
|
188
195
|
params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
|
|
189
196
|
params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
|
|
190
197
|
params.n_ubatch = get_option<int32_t>(options, "n_ubatch", 512);
|
|
@@ -377,7 +384,7 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
|
|
|
377
384
|
}
|
|
378
385
|
|
|
379
386
|
common_chat_params getFormattedChatWithJinja(
|
|
380
|
-
const
|
|
387
|
+
const std::shared_ptr<LlamaSession> &sess,
|
|
381
388
|
const common_chat_templates &templates,
|
|
382
389
|
const std::string &messages,
|
|
383
390
|
const std::string &chat_template,
|
|
@@ -399,11 +406,12 @@ common_chat_params getFormattedChatWithJinja(
|
|
|
399
406
|
if (!json_schema.empty()) {
|
|
400
407
|
inputs.json_schema = json::parse(json_schema);
|
|
401
408
|
}
|
|
409
|
+
inputs.extract_reasoning = sess->params().reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
|
402
410
|
inputs.stream = true;
|
|
403
411
|
|
|
404
412
|
// If chat_template is provided, create new one and use it (probably slow)
|
|
405
413
|
if (!chat_template.empty()) {
|
|
406
|
-
auto tmp = common_chat_templates_from_model(model, chat_template);
|
|
414
|
+
auto tmp = common_chat_templates_from_model(sess->model(), chat_template);
|
|
407
415
|
const common_chat_template* template_ptr = useTools && tmp.template_tool_use ? tmp.template_tool_use.get() : tmp.template_default.get();
|
|
408
416
|
if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
|
|
409
417
|
inputs.parallel_tool_calls = false;
|
|
@@ -493,7 +501,7 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
|
|
|
493
501
|
auto parallel_tool_calls = get_option<bool>(params, "parallel_tool_calls", false);
|
|
494
502
|
auto tool_choice = get_option<std::string>(params, "tool_choice", "");
|
|
495
503
|
|
|
496
|
-
auto chatParams = getFormattedChatWithJinja(_sess
|
|
504
|
+
auto chatParams = getFormattedChatWithJinja(_sess, _templates, messages, chat_template, json_schema_str, tools_str, parallel_tool_calls, tool_choice);
|
|
497
505
|
|
|
498
506
|
Napi::Object result = Napi::Object::New(env);
|
|
499
507
|
result.Set("prompt", chatParams.prompt.get<std::string>());
|
|
@@ -598,7 +606,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
598
606
|
auto tool_choice = get_option<std::string>(options, "tool_choice", "none");
|
|
599
607
|
|
|
600
608
|
auto chatParams = getFormattedChatWithJinja(
|
|
601
|
-
_sess
|
|
609
|
+
_sess,
|
|
602
610
|
_templates,
|
|
603
611
|
json_stringify(messages),
|
|
604
612
|
chat_template,
|
|
@@ -685,6 +693,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
685
693
|
params.sampling.dry_base = get_option<float>(options, "dry_base", 2);
|
|
686
694
|
params.sampling.dry_allowed_length = get_option<float>(options, "dry_allowed_length", -1);
|
|
687
695
|
params.sampling.dry_penalty_last_n = get_option<float>(options, "dry_penalty_last_n", 0);
|
|
696
|
+
params.sampling.top_n_sigma = get_option<float>(options, "top_n_sigma", -1.0f);
|
|
688
697
|
params.sampling.ignore_eos = get_option<bool>(options, "ignore_eos", false);
|
|
689
698
|
params.n_keep = get_option<int32_t>(options, "n_keep", 0);
|
|
690
699
|
params.sampling.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
|
|
@@ -129,7 +129,7 @@ jobs:
|
|
|
129
129
|
run: |
|
|
130
130
|
sysctl -a
|
|
131
131
|
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
|
|
132
|
-
# https://github.com/
|
|
132
|
+
# https://github.com/ggml-org/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
|
|
133
133
|
cmake -B build \
|
|
134
134
|
-DCMAKE_BUILD_RPATH="@loader_path" \
|
|
135
135
|
-DLLAMA_FATAL_WARNINGS=ON \
|
|
@@ -374,6 +374,8 @@ jobs:
|
|
|
374
374
|
- name: Clone
|
|
375
375
|
id: checkout
|
|
376
376
|
uses: actions/checkout@v4
|
|
377
|
+
with:
|
|
378
|
+
fetch-depth: 0
|
|
377
379
|
|
|
378
380
|
- name: ccache
|
|
379
381
|
uses: hendrikmuhs/ccache-action@v1.2.16
|
|
@@ -401,7 +403,35 @@ jobs:
|
|
|
401
403
|
run: |
|
|
402
404
|
cd build
|
|
403
405
|
# This is using llvmpipe and runs slower than other backends
|
|
404
|
-
ctest -L main --verbose --timeout
|
|
406
|
+
ctest -L main --verbose --timeout 2700
|
|
407
|
+
|
|
408
|
+
- name: Determine tag name
|
|
409
|
+
id: tag
|
|
410
|
+
shell: bash
|
|
411
|
+
run: |
|
|
412
|
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
|
413
|
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
|
414
|
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
|
415
|
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
|
416
|
+
else
|
|
417
|
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
|
418
|
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
|
419
|
+
fi
|
|
420
|
+
|
|
421
|
+
- name: Pack artifacts
|
|
422
|
+
id: pack_artifacts
|
|
423
|
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
424
|
+
run: |
|
|
425
|
+
cp LICENSE ./build/bin/
|
|
426
|
+
cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
|
|
427
|
+
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip ./build/bin/*
|
|
428
|
+
|
|
429
|
+
- name: Upload artifacts
|
|
430
|
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
431
|
+
uses: actions/upload-artifact@v4
|
|
432
|
+
with:
|
|
433
|
+
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip
|
|
434
|
+
name: llama-bin-ubuntu-vulkan-x64.zip
|
|
405
435
|
|
|
406
436
|
ubuntu-22-cmake-hip:
|
|
407
437
|
runs-on: ubuntu-22.04
|
|
@@ -443,7 +473,7 @@ jobs:
|
|
|
443
473
|
|
|
444
474
|
ubuntu-22-cmake-musa:
|
|
445
475
|
runs-on: ubuntu-22.04
|
|
446
|
-
container: mthreads/musa:rc3.1.
|
|
476
|
+
container: mthreads/musa:rc3.1.1-devel-ubuntu22.04
|
|
447
477
|
|
|
448
478
|
steps:
|
|
449
479
|
- name: Clone
|
|
@@ -1345,8 +1375,10 @@ jobs:
|
|
|
1345
1375
|
|
|
1346
1376
|
needs:
|
|
1347
1377
|
- ubuntu-cpu-cmake
|
|
1378
|
+
- ubuntu-22-cmake-vulkan
|
|
1348
1379
|
- windows-latest-cmake
|
|
1349
1380
|
- windows-2019-cmake-cuda
|
|
1381
|
+
- windows-latest-cmake-sycl
|
|
1350
1382
|
- windows-latest-cmake-hip-release
|
|
1351
1383
|
- macOS-latest-cmake-arm64
|
|
1352
1384
|
- macOS-latest-cmake-x64
|
|
@@ -96,6 +96,22 @@ if (LLAMA_LLGUIDANCE)
|
|
|
96
96
|
include(ExternalProject)
|
|
97
97
|
set(LLGUIDANCE_SRC ${CMAKE_BINARY_DIR}/llguidance/source)
|
|
98
98
|
set(LLGUIDANCE_PATH ${LLGUIDANCE_SRC}/target/release)
|
|
99
|
+
|
|
100
|
+
# Set the correct library file extension based on platform
|
|
101
|
+
if (WIN32)
|
|
102
|
+
set(LLGUIDANCE_LIB_NAME "llguidance.lib")
|
|
103
|
+
# Add Windows-specific libraries
|
|
104
|
+
set(LLGUIDANCE_PLATFORM_LIBS
|
|
105
|
+
ws2_32 # Windows Sockets API
|
|
106
|
+
userenv # For GetUserProfileDirectoryW
|
|
107
|
+
ntdll # For NT functions
|
|
108
|
+
bcrypt # For BCryptGenRandom
|
|
109
|
+
)
|
|
110
|
+
else()
|
|
111
|
+
set(LLGUIDANCE_LIB_NAME "libllguidance.a")
|
|
112
|
+
set(LLGUIDANCE_PLATFORM_LIBS "")
|
|
113
|
+
endif()
|
|
114
|
+
|
|
99
115
|
ExternalProject_Add(llguidance_ext
|
|
100
116
|
GIT_REPOSITORY https://github.com/guidance-ai/llguidance
|
|
101
117
|
# v0.6.12:
|
|
@@ -106,17 +122,18 @@ if (LLAMA_LLGUIDANCE)
|
|
|
106
122
|
CONFIGURE_COMMAND ""
|
|
107
123
|
BUILD_COMMAND cargo build --release
|
|
108
124
|
INSTALL_COMMAND ""
|
|
109
|
-
BUILD_BYPRODUCTS ${LLGUIDANCE_PATH}
|
|
125
|
+
BUILD_BYPRODUCTS ${LLGUIDANCE_PATH}/${LLGUIDANCE_LIB_NAME} ${LLGUIDANCE_PATH}/llguidance.h
|
|
110
126
|
UPDATE_COMMAND ""
|
|
111
127
|
)
|
|
112
128
|
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_LLGUIDANCE)
|
|
113
129
|
|
|
114
130
|
add_library(llguidance STATIC IMPORTED)
|
|
115
|
-
set_target_properties(llguidance PROPERTIES IMPORTED_LOCATION ${LLGUIDANCE_PATH}
|
|
131
|
+
set_target_properties(llguidance PROPERTIES IMPORTED_LOCATION ${LLGUIDANCE_PATH}/${LLGUIDANCE_LIB_NAME})
|
|
116
132
|
add_dependencies(llguidance llguidance_ext)
|
|
117
133
|
|
|
118
134
|
target_include_directories(${TARGET} PRIVATE ${LLGUIDANCE_PATH})
|
|
119
|
-
|
|
135
|
+
# Add platform libraries to the main target
|
|
136
|
+
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
|
|
120
137
|
endif ()
|
|
121
138
|
|
|
122
139
|
target_include_directories(${TARGET} PUBLIC .)
|