@fugood/llama.node 0.3.11 → 0.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/bin/darwin/arm64/llama-node.node +0 -0
  2. package/bin/darwin/x64/llama-node.node +0 -0
  3. package/bin/linux/arm64/llama-node.node +0 -0
  4. package/bin/linux/x64/llama-node.node +0 -0
  5. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  6. package/bin/linux-cuda/x64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  9. package/bin/win32/arm64/llama-node.node +0 -0
  10. package/bin/win32/arm64/node.lib +0 -0
  11. package/bin/win32/x64/llama-node.node +0 -0
  12. package/bin/win32/x64/node.lib +0 -0
  13. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  14. package/bin/win32-vulkan/arm64/node.lib +0 -0
  15. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/x64/node.lib +0 -0
  17. package/lib/binding.ts +1 -0
  18. package/lib/index.js +26 -20
  19. package/lib/index.ts +32 -28
  20. package/package.json +1 -1
  21. package/src/LlamaCompletionWorker.cpp +14 -0
  22. package/src/LlamaContext.cpp +13 -4
  23. package/src/llama.cpp/.github/workflows/build.yml +35 -3
  24. package/src/llama.cpp/.github/workflows/docker.yml +2 -0
  25. package/src/llama.cpp/.github/workflows/labeler.yml +1 -1
  26. package/src/llama.cpp/common/CMakeLists.txt +20 -3
  27. package/src/llama.cpp/common/arg.cpp +180 -3
  28. package/src/llama.cpp/common/chat-template.hpp +21 -7
  29. package/src/llama.cpp/common/chat.cpp +220 -101
  30. package/src/llama.cpp/common/chat.hpp +3 -0
  31. package/src/llama.cpp/common/common.h +15 -7
  32. package/src/llama.cpp/common/llguidance.cpp +3 -3
  33. package/src/llama.cpp/common/log.cpp +1 -0
  34. package/src/llama.cpp/common/log.h +2 -1
  35. package/src/llama.cpp/common/minja.hpp +24 -9
  36. package/src/llama.cpp/common/sampling.cpp +52 -46
  37. package/src/llama.cpp/common/speculative.h +1 -1
  38. package/src/llama.cpp/docs/build.md +2 -2
  39. package/src/llama.cpp/examples/imatrix/imatrix.cpp +2 -1
  40. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +6 -5
  41. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +1 -1
  42. package/src/llama.cpp/examples/perplexity/perplexity.cpp +1 -0
  43. package/src/llama.cpp/examples/run/run.cpp +5 -12
  44. package/src/llama.cpp/examples/server/CMakeLists.txt +1 -1
  45. package/src/llama.cpp/examples/server/httplib.h +381 -292
  46. package/src/llama.cpp/examples/server/server.cpp +58 -47
  47. package/src/llama.cpp/examples/server/utils.hpp +7 -5
  48. package/src/llama.cpp/ggml/include/ggml-cpu.h +1 -1
  49. package/src/llama.cpp/ggml/include/ggml-metal.h +1 -1
  50. package/src/llama.cpp/ggml/include/ggml-vulkan.h +0 -2
  51. package/src/llama.cpp/ggml/include/ggml.h +1 -1
  52. package/src/llama.cpp/ggml/src/ggml-common.h +0 -2
  53. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +6 -12
  54. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +852 -268
  55. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +200 -107
  56. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -5
  57. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +9 -8
  58. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +2 -2
  59. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +26 -4
  60. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +6 -7
  61. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +812 -569
  62. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +25 -1
  63. package/src/llama.cpp/ggml/src/ggml.c +1 -1
  64. package/src/llama.cpp/include/llama.h +14 -10
  65. package/src/llama.cpp/src/llama-grammar.cpp +1 -1
  66. package/src/llama.cpp/src/llama-grammar.h +1 -1
  67. package/src/llama.cpp/src/llama-impl.h +6 -6
  68. package/src/llama.cpp/src/llama-kv-cache.h +1 -1
  69. package/src/llama.cpp/src/llama-mmap.h +1 -0
  70. package/src/llama.cpp/src/llama-model.cpp +1 -1
  71. package/src/llama.cpp/src/llama-sampling.cpp +131 -57
  72. package/src/llama.cpp/src/llama.cpp +7 -5
  73. package/src/llama.cpp/src/unicode.cpp +9 -2
  74. package/src/llama.cpp/tests/test-backend-ops.cpp +5 -5
  75. package/src/llama.cpp/tests/test-chat.cpp +237 -69
  76. package/src/llama.cpp/tests/test-gguf.cpp +4 -4
  77. package/src/llama.cpp/tests/test-sampling.cpp +15 -0
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/lib/binding.ts CHANGED
@@ -8,6 +8,7 @@ export type ChatMessage = {
8
8
  export type LlamaModelOptions = {
9
9
  model: string
10
10
  chat_template?: string
11
+ reasoning_format?: string
11
12
  embedding?: boolean
12
13
  embd_normalize?: number
13
14
  pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
package/lib/index.js CHANGED
@@ -23,15 +23,39 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
23
23
  });
24
24
  };
25
25
  Object.defineProperty(exports, "__esModule", { value: true });
26
- exports.toggleNativeLog = exports.loadLlamaModelInfo = exports.initLlama = exports.loadModel = void 0;
26
+ exports.loadLlamaModelInfo = exports.initLlama = exports.loadModel = exports.toggleNativeLog = void 0;
27
27
  exports.addNativeLogListener = addNativeLogListener;
28
28
  const binding_1 = require("./binding");
29
29
  __exportStar(require("./binding"), exports);
30
30
  const mods = {};
31
+ const logListeners = [];
32
+ const logCallback = (level, text) => {
33
+ logListeners.forEach((listener) => listener(level, text));
34
+ };
35
+ let logEnabled = false;
36
+ const refreshNativeLogSetup = () => {
37
+ Object.entries(mods).forEach(([, mod]) => {
38
+ mod.LlamaContext.toggleNativeLog(logEnabled, logCallback);
39
+ });
40
+ };
41
+ const toggleNativeLog = (enable) => __awaiter(void 0, void 0, void 0, function* () {
42
+ logEnabled = enable;
43
+ refreshNativeLogSetup();
44
+ });
45
+ exports.toggleNativeLog = toggleNativeLog;
46
+ function addNativeLogListener(listener) {
47
+ logListeners.push(listener);
48
+ return {
49
+ remove: () => {
50
+ logListeners.splice(logListeners.indexOf(listener), 1);
51
+ },
52
+ };
53
+ }
31
54
  const loadModel = (options) => __awaiter(void 0, void 0, void 0, function* () {
32
55
  var _a, _b;
33
56
  const variant = (_a = options.lib_variant) !== null && _a !== void 0 ? _a : 'default';
34
57
  (_b = mods[variant]) !== null && _b !== void 0 ? _b : (mods[variant] = yield (0, binding_1.loadModule)(options.lib_variant));
58
+ refreshNativeLogSetup();
35
59
  return new mods[variant].LlamaContext(options);
36
60
  });
37
61
  exports.loadModel = loadModel;
@@ -47,25 +71,7 @@ const loadLlamaModelInfo = (path) => __awaiter(void 0, void 0, void 0, function*
47
71
  var _a;
48
72
  const variant = 'default';
49
73
  (_a = mods[variant]) !== null && _a !== void 0 ? _a : (mods[variant] = yield (0, binding_1.loadModule)(variant));
74
+ refreshNativeLogSetup();
50
75
  return mods[variant].LlamaContext.loadModelInfo(path, modelInfoSkip);
51
76
  });
52
77
  exports.loadLlamaModelInfo = loadLlamaModelInfo;
53
- const logListeners = [];
54
- const logCallback = (level, text) => {
55
- logListeners.forEach((listener) => listener(level, text));
56
- };
57
- const toggleNativeLog = (enable, options) => __awaiter(void 0, void 0, void 0, function* () {
58
- var _a, _b;
59
- const v = (_a = options === null || options === void 0 ? void 0 : options.variant) !== null && _a !== void 0 ? _a : 'default';
60
- (_b = mods[v]) !== null && _b !== void 0 ? _b : (mods[v] = yield (0, binding_1.loadModule)(v));
61
- return mods[v].LlamaContext.toggleNativeLog(enable, logCallback);
62
- });
63
- exports.toggleNativeLog = toggleNativeLog;
64
- function addNativeLogListener(listener) {
65
- logListeners.push(listener);
66
- return {
67
- remove: () => {
68
- logListeners.splice(logListeners.indexOf(listener), 1);
69
- },
70
- };
71
- }
package/lib/index.ts CHANGED
@@ -9,11 +9,42 @@ export interface LlamaModelOptionsExtended extends LlamaModelOptions {
9
9
 
10
10
  const mods: { [key: string]: Module } = {}
11
11
 
12
+ const logListeners: Array<(level: string, text: string) => void> = []
13
+
14
+ const logCallback = (level: string, text: string) => {
15
+ logListeners.forEach((listener) => listener(level, text))
16
+ }
17
+
18
+ let logEnabled = false
19
+
20
+ const refreshNativeLogSetup = () => {
21
+ Object.entries(mods).forEach(([, mod]) => {
22
+ mod.LlamaContext.toggleNativeLog(logEnabled, logCallback)
23
+ })
24
+ }
25
+
26
+ export const toggleNativeLog = async (enable: boolean) => {
27
+ logEnabled = enable
28
+ refreshNativeLogSetup()
29
+ }
30
+
31
+ export function addNativeLogListener(
32
+ listener: (level: string, text: string) => void,
33
+ ): { remove: () => void } {
34
+ logListeners.push(listener)
35
+ return {
36
+ remove: () => {
37
+ logListeners.splice(logListeners.indexOf(listener), 1)
38
+ },
39
+ }
40
+ }
41
+
12
42
  export const loadModel = async (
13
43
  options: LlamaModelOptionsExtended,
14
44
  ): Promise<LlamaContext> => {
15
45
  const variant = options.lib_variant ?? 'default'
16
46
  mods[variant] ??= await loadModule(options.lib_variant)
47
+ refreshNativeLogSetup()
17
48
  return new mods[variant].LlamaContext(options)
18
49
  }
19
50
 
@@ -30,33 +61,6 @@ const modelInfoSkip = [
30
61
  export const loadLlamaModelInfo = async (path: string): Promise<Object> => {
31
62
  const variant = 'default'
32
63
  mods[variant] ??= await loadModule(variant)
64
+ refreshNativeLogSetup()
33
65
  return mods[variant].LlamaContext.loadModelInfo(path, modelInfoSkip)
34
66
  }
35
-
36
- const logListeners: Array<(level: string, text: string) => void> = []
37
-
38
- const logCallback = (level: string, text: string) => {
39
- logListeners.forEach((listener) => listener(level, text))
40
- }
41
-
42
- export const toggleNativeLog = async (
43
- enable: boolean,
44
- options?: {
45
- variant?: LibVariant
46
- },
47
- ) => {
48
- const v = options?.variant ?? 'default'
49
- mods[v] ??= await loadModule(v)
50
- return mods[v].LlamaContext.toggleNativeLog(enable, logCallback)
51
- }
52
-
53
- export function addNativeLogListener(
54
- listener: (level: string, text: string) => void,
55
- ): { remove: () => void } {
56
- logListeners.push(listener)
57
- return {
58
- remove: () => {
59
- logListeners.splice(logListeners.indexOf(listener), 1)
60
- },
61
- }
62
- }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.3.11",
4
+ "version": "0.3.13",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -165,9 +165,17 @@ void LlamaCompletionWorker::OnOK() {
165
165
  Napi::String::New(env, _result.text.c_str()));
166
166
 
167
167
  Napi::Array tool_calls = Napi::Array::New(Napi::AsyncWorker::Env());
168
+ std::string * reasoning_content = nullptr;
169
+ std::string * content = nullptr;
168
170
  if (!_stop) {
169
171
  try {
170
172
  common_chat_msg message = common_chat_parse(_result.text, static_cast<common_chat_format>(_chat_format));
173
+ if (!message.reasoning_content.empty()) {
174
+ reasoning_content = &message.reasoning_content;
175
+ }
176
+ if (!message.content.empty()) {
177
+ content = &message.content;
178
+ }
171
179
  for (size_t i = 0; i < message.tool_calls.size(); i++) {
172
180
  const auto &tc = message.tool_calls[i];
173
181
  Napi::Object tool_call = Napi::Object::New(env);
@@ -188,6 +196,12 @@ void LlamaCompletionWorker::OnOK() {
188
196
  if (tool_calls.Length() > 0) {
189
197
  result.Set("tool_calls", tool_calls);
190
198
  }
199
+ if (reasoning_content) {
200
+ result.Set("reasoning_content", Napi::String::New(env, reasoning_content->c_str()));
201
+ }
202
+ if (content) {
203
+ result.Set("content", Napi::String::New(env, content->c_str()));
204
+ }
191
205
 
192
206
  auto ctx = _sess->context();
193
207
  const auto timings_token = llama_perf_context(ctx);
@@ -185,6 +185,13 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
185
185
 
186
186
  params.chat_template = get_option<std::string>(options, "chat_template", "");
187
187
 
188
+ std::string reasoning_format = get_option<std::string>(options, "reasoning_format", "none");
189
+ if (reasoning_format == "deepseek") {
190
+ params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
191
+ } else {
192
+ params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
193
+ }
194
+
188
195
  params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
189
196
  params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
190
197
  params.n_ubatch = get_option<int32_t>(options, "n_ubatch", 512);
@@ -377,7 +384,7 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
377
384
  }
378
385
 
379
386
  common_chat_params getFormattedChatWithJinja(
380
- const struct llama_model * model,
387
+ const std::shared_ptr<LlamaSession> &sess,
381
388
  const common_chat_templates &templates,
382
389
  const std::string &messages,
383
390
  const std::string &chat_template,
@@ -399,11 +406,12 @@ common_chat_params getFormattedChatWithJinja(
399
406
  if (!json_schema.empty()) {
400
407
  inputs.json_schema = json::parse(json_schema);
401
408
  }
409
+ inputs.extract_reasoning = sess->params().reasoning_format != COMMON_REASONING_FORMAT_NONE;
402
410
  inputs.stream = true;
403
411
 
404
412
  // If chat_template is provided, create new one and use it (probably slow)
405
413
  if (!chat_template.empty()) {
406
- auto tmp = common_chat_templates_from_model(model, chat_template);
414
+ auto tmp = common_chat_templates_from_model(sess->model(), chat_template);
407
415
  const common_chat_template* template_ptr = useTools && tmp.template_tool_use ? tmp.template_tool_use.get() : tmp.template_default.get();
408
416
  if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
409
417
  inputs.parallel_tool_calls = false;
@@ -493,7 +501,7 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
493
501
  auto parallel_tool_calls = get_option<bool>(params, "parallel_tool_calls", false);
494
502
  auto tool_choice = get_option<std::string>(params, "tool_choice", "");
495
503
 
496
- auto chatParams = getFormattedChatWithJinja(_sess->model(), _templates, messages, chat_template, json_schema_str, tools_str, parallel_tool_calls, tool_choice);
504
+ auto chatParams = getFormattedChatWithJinja(_sess, _templates, messages, chat_template, json_schema_str, tools_str, parallel_tool_calls, tool_choice);
497
505
 
498
506
  Napi::Object result = Napi::Object::New(env);
499
507
  result.Set("prompt", chatParams.prompt.get<std::string>());
@@ -598,7 +606,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
598
606
  auto tool_choice = get_option<std::string>(options, "tool_choice", "none");
599
607
 
600
608
  auto chatParams = getFormattedChatWithJinja(
601
- _sess->model(),
609
+ _sess,
602
610
  _templates,
603
611
  json_stringify(messages),
604
612
  chat_template,
@@ -685,6 +693,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
685
693
  params.sampling.dry_base = get_option<float>(options, "dry_base", 2);
686
694
  params.sampling.dry_allowed_length = get_option<float>(options, "dry_allowed_length", -1);
687
695
  params.sampling.dry_penalty_last_n = get_option<float>(options, "dry_penalty_last_n", 0);
696
+ params.sampling.top_n_sigma = get_option<float>(options, "top_n_sigma", -1.0f);
688
697
  params.sampling.ignore_eos = get_option<bool>(options, "ignore_eos", false);
689
698
  params.n_keep = get_option<int32_t>(options, "n_keep", 0);
690
699
  params.sampling.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
@@ -129,7 +129,7 @@ jobs:
129
129
  run: |
130
130
  sysctl -a
131
131
  # Metal is disabled due to intermittent failures with Github runners not having a GPU:
132
- # https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
132
+ # https://github.com/ggml-org/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
133
133
  cmake -B build \
134
134
  -DCMAKE_BUILD_RPATH="@loader_path" \
135
135
  -DLLAMA_FATAL_WARNINGS=ON \
@@ -374,6 +374,8 @@ jobs:
374
374
  - name: Clone
375
375
  id: checkout
376
376
  uses: actions/checkout@v4
377
+ with:
378
+ fetch-depth: 0
377
379
 
378
380
  - name: ccache
379
381
  uses: hendrikmuhs/ccache-action@v1.2.16
@@ -401,7 +403,35 @@ jobs:
401
403
  run: |
402
404
  cd build
403
405
  # This is using llvmpipe and runs slower than other backends
404
- ctest -L main --verbose --timeout 1800
406
+ ctest -L main --verbose --timeout 2700
407
+
408
+ - name: Determine tag name
409
+ id: tag
410
+ shell: bash
411
+ run: |
412
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
413
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
414
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
415
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
416
+ else
417
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
418
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
419
+ fi
420
+
421
+ - name: Pack artifacts
422
+ id: pack_artifacts
423
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
424
+ run: |
425
+ cp LICENSE ./build/bin/
426
+ cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
427
+ zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip ./build/bin/*
428
+
429
+ - name: Upload artifacts
430
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
431
+ uses: actions/upload-artifact@v4
432
+ with:
433
+ path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip
434
+ name: llama-bin-ubuntu-vulkan-x64.zip
405
435
 
406
436
  ubuntu-22-cmake-hip:
407
437
  runs-on: ubuntu-22.04
@@ -443,7 +473,7 @@ jobs:
443
473
 
444
474
  ubuntu-22-cmake-musa:
445
475
  runs-on: ubuntu-22.04
446
- container: mthreads/musa:rc3.1.0-devel-ubuntu22.04
476
+ container: mthreads/musa:rc3.1.1-devel-ubuntu22.04
447
477
 
448
478
  steps:
449
479
  - name: Clone
@@ -1345,8 +1375,10 @@ jobs:
1345
1375
 
1346
1376
  needs:
1347
1377
  - ubuntu-cpu-cmake
1378
+ - ubuntu-22-cmake-vulkan
1348
1379
  - windows-latest-cmake
1349
1380
  - windows-2019-cmake-cuda
1381
+ - windows-latest-cmake-sycl
1350
1382
  - windows-latest-cmake-hip-release
1351
1383
  - macOS-latest-cmake-arm64
1352
1384
  - macOS-latest-cmake-x64
@@ -51,6 +51,8 @@ jobs:
51
51
 
52
52
  - name: Set up QEMU
53
53
  uses: docker/setup-qemu-action@v3
54
+ with:
55
+ image: tonistiigi/binfmt:qemu-v7.0.0-28
54
56
 
55
57
  - name: Set up Docker Buildx
56
58
  uses: docker/setup-buildx-action@v3
@@ -11,7 +11,7 @@ jobs:
11
11
  steps:
12
12
  - uses: actions/checkout@v4
13
13
  with:
14
- repository: "ggerganov/llama.cpp"
14
+ repository: "ggml-org/llama.cpp"
15
15
  - uses: actions/labeler@v5
16
16
  with:
17
17
  configuration-path: '.github/labeler.yml'
@@ -96,6 +96,22 @@ if (LLAMA_LLGUIDANCE)
96
96
  include(ExternalProject)
97
97
  set(LLGUIDANCE_SRC ${CMAKE_BINARY_DIR}/llguidance/source)
98
98
  set(LLGUIDANCE_PATH ${LLGUIDANCE_SRC}/target/release)
99
+
100
+ # Set the correct library file extension based on platform
101
+ if (WIN32)
102
+ set(LLGUIDANCE_LIB_NAME "llguidance.lib")
103
+ # Add Windows-specific libraries
104
+ set(LLGUIDANCE_PLATFORM_LIBS
105
+ ws2_32 # Windows Sockets API
106
+ userenv # For GetUserProfileDirectoryW
107
+ ntdll # For NT functions
108
+ bcrypt # For BCryptGenRandom
109
+ )
110
+ else()
111
+ set(LLGUIDANCE_LIB_NAME "libllguidance.a")
112
+ set(LLGUIDANCE_PLATFORM_LIBS "")
113
+ endif()
114
+
99
115
  ExternalProject_Add(llguidance_ext
100
116
  GIT_REPOSITORY https://github.com/guidance-ai/llguidance
101
117
  # v0.6.12:
@@ -106,17 +122,18 @@ if (LLAMA_LLGUIDANCE)
106
122
  CONFIGURE_COMMAND ""
107
123
  BUILD_COMMAND cargo build --release
108
124
  INSTALL_COMMAND ""
109
- BUILD_BYPRODUCTS ${LLGUIDANCE_PATH}/libllguidance.a ${LLGUIDANCE_PATH}/llguidance.h
125
+ BUILD_BYPRODUCTS ${LLGUIDANCE_PATH}/${LLGUIDANCE_LIB_NAME} ${LLGUIDANCE_PATH}/llguidance.h
110
126
  UPDATE_COMMAND ""
111
127
  )
112
128
  target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_LLGUIDANCE)
113
129
 
114
130
  add_library(llguidance STATIC IMPORTED)
115
- set_target_properties(llguidance PROPERTIES IMPORTED_LOCATION ${LLGUIDANCE_PATH}/libllguidance.a)
131
+ set_target_properties(llguidance PROPERTIES IMPORTED_LOCATION ${LLGUIDANCE_PATH}/${LLGUIDANCE_LIB_NAME})
116
132
  add_dependencies(llguidance llguidance_ext)
117
133
 
118
134
  target_include_directories(${TARGET} PRIVATE ${LLGUIDANCE_PATH})
119
- set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance)
135
+ # Add platform libraries to the main target
136
+ set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
120
137
  endif ()
121
138
 
122
139
  target_include_directories(${TARGET} PUBLIC .)