@fugood/llama.node 0.4.7 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/CMakeLists.txt +4 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/lib/binding.ts +66 -6
  11. package/lib/index.js +59 -17
  12. package/lib/index.ts +74 -23
  13. package/package.json +1 -1
  14. package/src/DecodeAudioTokenWorker.cpp +40 -0
  15. package/src/DecodeAudioTokenWorker.h +22 -0
  16. package/src/EmbeddingWorker.cpp +7 -5
  17. package/src/LlamaCompletionWorker.cpp +68 -54
  18. package/src/LlamaCompletionWorker.h +7 -8
  19. package/src/LlamaContext.cpp +551 -235
  20. package/src/LlamaContext.h +26 -4
  21. package/src/LoadSessionWorker.cpp +4 -2
  22. package/src/SaveSessionWorker.cpp +10 -6
  23. package/src/TokenizeWorker.cpp +23 -14
  24. package/src/TokenizeWorker.h +2 -2
  25. package/src/addons.cc +8 -11
  26. package/src/common.hpp +129 -126
  27. package/src/llama.cpp/.github/workflows/build.yml +2 -2
  28. package/src/llama.cpp/.github/workflows/release.yml +152 -129
  29. package/src/llama.cpp/.github/workflows/winget.yml +42 -0
  30. package/src/llama.cpp/common/arg.cpp +14 -13
  31. package/src/llama.cpp/common/common.cpp +4 -75
  32. package/src/llama.cpp/common/common.h +7 -12
  33. package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -13
  34. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -11
  35. package/src/llama.cpp/examples/parallel/parallel.cpp +0 -9
  36. package/src/llama.cpp/examples/retrieval/retrieval.cpp +6 -6
  37. package/src/llama.cpp/examples/simple/simple.cpp +1 -1
  38. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
  39. package/src/llama.cpp/examples/sycl/run-llama2.sh +4 -4
  40. package/src/llama.cpp/examples/sycl/run-llama3.sh +28 -0
  41. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
  42. package/src/llama.cpp/examples/sycl/win-run-llama3.bat +9 -0
  43. package/src/llama.cpp/ggml/include/ggml-opt.h +2 -0
  44. package/src/llama.cpp/ggml/include/ggml.h +11 -0
  45. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +274 -0
  46. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +27 -0
  47. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +18 -2
  48. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1 -0
  49. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +107 -0
  50. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +16 -0
  51. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
  52. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -155
  53. package/src/llama.cpp/ggml/src/ggml-opt.cpp +5 -0
  54. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +43 -12
  55. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +171 -112
  56. package/src/llama.cpp/ggml/src/ggml.c +64 -18
  57. package/src/llama.cpp/include/llama.h +24 -124
  58. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
  59. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
  60. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  61. package/src/llama.cpp/src/llama-batch.cpp +3 -1
  62. package/src/llama.cpp/src/llama-context.cpp +60 -110
  63. package/src/llama.cpp/src/llama-graph.cpp +137 -233
  64. package/src/llama.cpp/src/llama-graph.h +49 -7
  65. package/src/llama.cpp/src/llama-hparams.cpp +17 -1
  66. package/src/llama.cpp/src/llama-hparams.h +34 -5
  67. package/src/llama.cpp/src/llama-kv-cache.cpp +654 -321
  68. package/src/llama.cpp/src/llama-kv-cache.h +201 -85
  69. package/src/llama.cpp/src/llama-memory.h +3 -2
  70. package/src/llama.cpp/src/llama-model.cpp +273 -94
  71. package/src/llama.cpp/src/llama-model.h +4 -1
  72. package/src/llama.cpp/tests/test-arg-parser.cpp +1 -1
  73. package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +1 -0
  74. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +13 -2
  75. package/src/llama.cpp/tools/mtmd/clip-impl.h +108 -11
  76. package/src/llama.cpp/tools/mtmd/clip.cpp +466 -88
  77. package/src/llama.cpp/tools/mtmd/clip.h +6 -4
  78. package/src/llama.cpp/tools/mtmd/miniaudio.h +93468 -0
  79. package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +855 -0
  80. package/src/llama.cpp/tools/mtmd/mtmd-audio.h +62 -0
  81. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +21 -14
  82. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +36 -49
  83. package/src/llama.cpp/tools/mtmd/mtmd.cpp +362 -98
  84. package/src/llama.cpp/tools/mtmd/mtmd.h +52 -21
  85. package/src/llama.cpp/tools/run/run.cpp +2 -2
  86. package/src/llama.cpp/tools/server/server.cpp +158 -47
  87. package/src/llama.cpp/tools/server/utils.hpp +71 -43
  88. package/src/llama.cpp/tools/tts/tts.cpp +4 -2
  89. package/src/tts_utils.cpp +342 -0
  90. package/src/tts_utils.h +62 -0
  91. package/bin/win32/arm64/llama-node.node +0 -0
  92. package/bin/win32/arm64/node.lib +0 -0
  93. package/bin/win32/x64/llama-node.node +0 -0
  94. package/bin/win32/x64/node.lib +0 -0
  95. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  96. package/bin/win32-vulkan/arm64/node.lib +0 -0
  97. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  98. package/bin/win32-vulkan/x64/node.lib +0 -0
@@ -1,4 +1,4 @@
1
- name: Create Release
1
+ name: Release
2
2
 
3
3
  on:
4
4
  workflow_dispatch: # allows manual triggering
@@ -227,6 +227,66 @@ jobs:
227
227
  path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip
228
228
  name: llama-bin-ubuntu-vulkan-x64.zip
229
229
 
230
+ windows-cpu:
231
+ runs-on: windows-latest
232
+
233
+ strategy:
234
+ matrix:
235
+ include:
236
+ - arch: 'x64'
237
+ - arch: 'arm64'
238
+
239
+ steps:
240
+ - name: Clone
241
+ uses: actions/checkout@v4
242
+ with:
243
+ fetch-depth: 0
244
+
245
+ - name: ccache
246
+ uses: hendrikmuhs/ccache-action@v1.2.16
247
+ with:
248
+ key: windows-latest-cmake-cpu-${{ matrix.arch }}
249
+ variant: ccache
250
+ evict-old-files: 1d
251
+
252
+ - name: Install Ninja
253
+ run: |
254
+ choco install ninja
255
+
256
+ - name: libCURL
257
+ id: get_libcurl
258
+ uses: ./.github/actions/windows-setup-curl
259
+ with:
260
+ architecture: ${{ matrix.arch == 'x64' && 'win64' || 'win64a' }}
261
+
262
+ - name: Build
263
+ env:
264
+ CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
265
+ run: |
266
+ cmake -S . -B build -G "Ninja Multi-Config" `
267
+ -D CMAKE_TOOLCHAIN_FILE=cmake/${{ matrix.arch }}-windows-llvm.cmake `
268
+ -DGGML_NATIVE=OFF `
269
+ -DGGML_BACKEND_DL=ON `
270
+ -DGGML_CPU_ALL_VARIANTS=ON `
271
+ -DGGML_OPENMP=OFF `
272
+ -DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include" `
273
+ ${{ env.CMAKE_ARGS }}
274
+ cmake --build build --config Release
275
+
276
+ - name: Pack artifacts
277
+ id: pack_artifacts
278
+ env:
279
+ CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
280
+ run: |
281
+ Copy-Item $env:CURL_PATH\bin\libcurl-${{ matrix.arch }}.dll .\build\bin\Release\
282
+ 7z a llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\*
283
+
284
+ - name: Upload artifacts
285
+ uses: actions/upload-artifact@v4
286
+ with:
287
+ path: llama-bin-win-cpu-${{ matrix.arch }}.zip
288
+ name: llama-bin-win-cpu-${{ matrix.arch }}.zip
289
+
230
290
  windows:
231
291
  runs-on: windows-latest
232
292
 
@@ -237,52 +297,30 @@ jobs:
237
297
  strategy:
238
298
  matrix:
239
299
  include:
240
- - build: 'cpu-x64'
300
+ - backend: 'vulkan'
241
301
  arch: 'x64'
242
- defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF'
243
- #- build: 'openblas-x64'
244
- # arch: 'x64'
245
- # defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
246
- - build: 'vulkan-x64'
247
- arch: 'x64'
248
- defines: '-DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON'
249
- - build: 'cpu-arm64'
250
- arch: 'arm64'
251
- defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF'
252
- - build: 'opencl-adreno-arm64'
302
+ defines: '-DGGML_VULKAN=ON'
303
+ target: 'ggml-vulkan'
304
+ - backend: 'opencl-adreno'
253
305
  arch: 'arm64'
254
306
  defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
307
+ target: 'ggml-opencl'
255
308
 
256
309
  steps:
257
310
  - name: Clone
258
311
  id: checkout
259
312
  uses: actions/checkout@v4
260
- with:
261
- fetch-depth: 0
262
313
 
263
314
  - name: ccache
264
315
  uses: hendrikmuhs/ccache-action@v1.2.16
265
316
  with:
266
- key: windows-latest-cmake-${{ matrix.build }}
317
+ key: windows-latest-cmake-${{ matrix.backend }}-${{ matrix.arch }}
267
318
  variant: ccache
268
319
  evict-old-files: 1d
269
320
 
270
- - name: Download OpenBLAS
271
- id: get_openblas
272
- if: ${{ matrix.build == 'openblas-x64' }}
273
- run: |
274
- curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip"
275
- curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE"
276
- mkdir $env:RUNNER_TEMP/openblas
277
- tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas
278
- $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
279
- $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
280
- $lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
281
- & $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll
282
-
283
321
  - name: Install Vulkan SDK
284
322
  id: get_vulkan
285
- if: ${{ matrix.build == 'vulkan-x64' }}
323
+ if: ${{ matrix.backend == 'vulkan' }}
286
324
  run: |
287
325
  curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
288
326
  & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
@@ -296,7 +334,7 @@ jobs:
296
334
 
297
335
  - name: Install OpenCL Headers and Libs
298
336
  id: install_opencl
299
- if: ${{ matrix.build == 'opencl-adreno-arm64' }}
337
+ if: ${{ matrix.backend == 'opencl-adreno' && matrix.arch == 'arm64' }}
300
338
  run: |
301
339
  git clone https://github.com/KhronosGroup/OpenCL-Headers
302
340
  cd OpenCL-Headers
@@ -314,46 +352,22 @@ jobs:
314
352
  -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
315
353
  cmake --build build-arm64-release --target install --config release
316
354
 
317
- - name: libCURL
318
- id: get_libcurl
319
- uses: ./.github/actions/windows-setup-curl
320
- with:
321
- architecture: ${{ matrix.arch == 'x64' && 'win64' || 'win64a' }}
322
-
323
355
  - name: Build
324
356
  id: cmake_build
325
- env:
326
- CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
327
- run: |
328
- cmake -S . -B build ${{ matrix.defines }} `
329
- -DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include" `
330
- ${{ env.CMAKE_ARGS }}
331
- cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
332
-
333
- - name: Add libopenblas.dll
334
- id: add_libopenblas_dll
335
- if: ${{ matrix.build == 'openblas-x64' }}
336
357
  run: |
337
- cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll
338
- cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt
339
-
340
- - name: Determine tag name
341
- id: tag
342
- uses: ./.github/actions/get-tag-name
358
+ cmake -S . -B build ${{ matrix.defines }} -DGGML_NATIVE=OFF -DGGML_CPU=OFF -DGGML_BACKEND_DL=ON -DLLAMA_CURL=OFF
359
+ cmake --build build --config Release --target ${{ matrix.target }}
343
360
 
344
361
  - name: Pack artifacts
345
362
  id: pack_artifacts
346
- env:
347
- CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
348
363
  run: |
349
- Copy-Item $env:CURL_PATH\bin\libcurl-${{ matrix.arch }}.dll .\build\bin\Release\
350
- 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\*
364
+ 7z a llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip .\build\bin\Release\${{ matrix.target }}.dll
351
365
 
352
366
  - name: Upload artifacts
353
367
  uses: actions/upload-artifact@v4
354
368
  with:
355
- path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip
356
- name: llama-bin-win-${{ matrix.build }}.zip
369
+ path: llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip
370
+ name: llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip
357
371
 
358
372
  windows-cuda:
359
373
  runs-on: windows-2019
@@ -366,8 +380,6 @@ jobs:
366
380
  - name: Clone
367
381
  id: checkout
368
382
  uses: actions/checkout@v4
369
- with:
370
- fetch-depth: 0
371
383
 
372
384
  - name: Install ccache
373
385
  uses: hendrikmuhs/ccache-action@v1.2.16
@@ -386,45 +398,30 @@ jobs:
386
398
  run: |
387
399
  choco install ninja
388
400
 
389
- - name: libCURL
390
- id: get_libcurl
391
- uses: ./.github/actions/windows-setup-curl
392
-
393
401
  - name: Build
394
402
  id: cmake_build
395
403
  shell: cmd
396
- env:
397
- CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
398
404
  run: |
399
405
  call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
400
406
  cmake -S . -B build -G "Ninja Multi-Config" ^
401
- -DGGML_NATIVE=OFF ^
402
407
  -DGGML_BACKEND_DL=ON ^
403
- -DGGML_CPU_ALL_VARIANTS=ON ^
408
+ -DGGML_NATIVE=OFF ^
409
+ -DGGML_CPU=OFF ^
404
410
  -DGGML_CUDA=ON ^
405
- -DCURL_LIBRARY="%CURL_PATH%/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="%CURL_PATH%/include" ^
406
- ${{ env.CMAKE_ARGS }}
411
+ -DLLAMA_CURL=OFF
407
412
  set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
408
- cmake --build build --config Release -j %NINJA_JOBS% -t ggml
409
- cmake --build build --config Release
410
-
411
- - name: Determine tag name
412
- id: tag
413
- uses: ./.github/actions/get-tag-name
413
+ cmake --build build --config Release -j %NINJA_JOBS% --target ggml-cuda
414
414
 
415
415
  - name: Pack artifacts
416
416
  id: pack_artifacts
417
- env:
418
- CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
419
417
  run: |
420
- cp $env:CURL_PATH\bin\libcurl-x64.dll .\build\bin\Release\libcurl-x64.dll
421
- 7z a llama-${{ steps.tag.outputs.name }}-bin-win-cuda${{ matrix.cuda }}-x64.zip .\build\bin\Release\*
418
+ 7z a llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\ggml-cuda.dll
422
419
 
423
420
  - name: Upload artifacts
424
421
  uses: actions/upload-artifact@v4
425
422
  with:
426
- path: llama-${{ steps.tag.outputs.name }}-bin-win-cuda${{ matrix.cuda }}-x64.zip
427
- name: llama-bin-win-cuda${{ matrix.cuda }}-x64.zip
423
+ path: llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip
424
+ name: llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip
428
425
 
429
426
  - name: Copy and pack Cuda runtime
430
427
  run: |
@@ -432,13 +429,13 @@ jobs:
432
429
  $dst='.\build\bin\cudart\'
433
430
  robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
434
431
  robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
435
- 7z a cudart-llama-bin-win-cuda${{ matrix.cuda }}-x64.zip $dst\*
432
+ 7z a cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip $dst\*
436
433
 
437
434
  - name: Upload Cuda runtime
438
435
  uses: actions/upload-artifact@v4
439
436
  with:
440
- path: cudart-llama-bin-win-cuda${{ matrix.cuda }}-x64.zip
441
- name: cudart-llama-bin-win-cuda${{ matrix.cuda }}-x64.zip
437
+ path: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip
438
+ name: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip
442
439
 
443
440
  windows-sycl:
444
441
  runs-on: windows-latest
@@ -448,15 +445,14 @@ jobs:
448
445
  shell: bash
449
446
 
450
447
  env:
451
- WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe
448
+ WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/7cd9bba0-7aab-4e30-b3ae-2221006a4a05/intel-oneapi-base-toolkit-2025.1.1.34_offline.exe
452
449
  WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
453
450
  ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
451
+
454
452
  steps:
455
453
  - name: Clone
456
454
  id: checkout
457
455
  uses: actions/checkout@v4
458
- with:
459
- fetch-depth: 0
460
456
 
461
457
  - name: ccache
462
458
  uses: hendrikmuhs/ccache-action@v1.2.16
@@ -469,15 +465,18 @@ jobs:
469
465
  run: |
470
466
  scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
471
467
 
472
- # TODO: add libcurl support ; we will also need to modify win-build-sycl.bat to accept user-specified args
473
-
474
468
  - name: Build
475
469
  id: cmake_build
476
- run: examples/sycl/win-build-sycl.bat
477
-
478
- - name: Determine tag name
479
- id: tag
480
- uses: ./.github/actions/get-tag-name
470
+ shell: cmd
471
+ run: |
472
+ call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force
473
+ cmake -G "Ninja" -B build ^
474
+ -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx ^
475
+ -DCMAKE_BUILD_TYPE=Release ^
476
+ -DGGML_BACKEND_DL=ON -DBUILD_SHARED_LIBS=ON ^
477
+ -DGGML_CPU=OFF -DGGML_SYCL=ON ^
478
+ -DLLAMA_CURL=OFF
479
+ cmake --build build --target ggml-sycl -j
481
480
 
482
481
  - name: Build the release package
483
482
  id: pack_artifacts
@@ -502,12 +501,12 @@ jobs:
502
501
  cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin
503
502
 
504
503
  echo "cp oneAPI running time dll files to ./build/bin done"
505
- 7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/*
504
+ 7z a llama-bin-win-sycl-x64.zip ./build/bin/*
506
505
 
507
506
  - name: Upload the release package
508
507
  uses: actions/upload-artifact@v4
509
508
  with:
510
- path: llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip
509
+ path: llama-bin-win-sycl-x64.zip
511
510
  name: llama-bin-win-sycl-x64.zip
512
511
 
513
512
  windows-hip:
@@ -515,14 +514,14 @@ jobs:
515
514
 
516
515
  strategy:
517
516
  matrix:
518
- gpu_target: [gfx1100, gfx1101, gfx1030]
517
+ include:
518
+ - name: "radeon"
519
+ gpu_targets: "gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032"
519
520
 
520
521
  steps:
521
522
  - name: Clone
522
523
  id: checkout
523
524
  uses: actions/checkout@v4
524
- with:
525
- fetch-depth: 0
526
525
 
527
526
  - name: Clone rocWMMA repository
528
527
  id: clone_rocwmma
@@ -532,7 +531,7 @@ jobs:
532
531
  - name: ccache
533
532
  uses: hendrikmuhs/ccache-action@v1.2.16
534
533
  with:
535
- key: windows-latest-cmake-hip-release
534
+ key: windows-latest-cmake-hip-${{ matrix.name }}-x64
536
535
  evict-old-files: 1d
537
536
 
538
537
  - name: Install
@@ -550,50 +549,39 @@ jobs:
550
549
  run: |
551
550
  & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
552
551
 
553
- - name: libCURL
554
- id: get_libcurl
555
- uses: ./.github/actions/windows-setup-curl
556
-
557
552
  - name: Build
558
553
  id: cmake_build
559
- env:
560
- CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
561
554
  run: |
562
555
  $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
563
556
  $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
564
557
  cmake -G "Unix Makefiles" -B build -S . `
565
558
  -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
566
559
  -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
567
- -DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/" `
560
+ -DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/ -Wno-ignored-attributes -Wno-nested-anon-types" `
568
561
  -DCMAKE_BUILD_TYPE=Release `
569
- -DAMDGPU_TARGETS=${{ matrix.gpu_target }} `
562
+ -DGGML_BACKEND_DL=ON `
563
+ -DGGML_NATIVE=OFF `
564
+ -DGGML_CPU=OFF `
565
+ -DAMDGPU_TARGETS="${{ matrix.gpu_targets }}" `
570
566
  -DGGML_HIP_ROCWMMA_FATTN=ON `
571
567
  -DGGML_HIP=ON `
572
- -DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include" `
573
- ${{ env.CMAKE_ARGS }}
574
- cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
568
+ -DLLAMA_CURL=OFF
569
+ cmake --build build --target ggml-hip -j ${env:NUMBER_OF_PROCESSORS}
575
570
  md "build\bin\rocblas\library\"
576
571
  cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
577
572
  cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
578
573
  cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"
579
574
 
580
- - name: Determine tag name
581
- id: tag
582
- uses: ./.github/actions/get-tag-name
583
-
584
575
  - name: Pack artifacts
585
576
  id: pack_artifacts
586
- env:
587
- CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
588
577
  run: |
589
- cp $env:CURL_PATH\bin\libcurl-x64.dll .\build\bin\libcurl-x64.dll
590
- 7z a llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip .\build\bin\*
578
+ 7z a llama-bin-win-hip-${{ matrix.name }}-x64.zip .\build\bin\*
591
579
 
592
580
  - name: Upload artifacts
593
581
  uses: actions/upload-artifact@v4
594
582
  with:
595
- path: llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
596
- name: llama-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
583
+ path: llama-bin-win-hip-${{ matrix.name }}-x64.zip
584
+ name: llama-bin-win-hip-${{ matrix.name }}-x64.zip
597
585
 
598
586
  ios-xcode-build:
599
587
  runs-on: macos-latest
@@ -655,14 +643,16 @@ jobs:
655
643
  runs-on: ubuntu-latest
656
644
 
657
645
  needs:
658
- - ubuntu-22-cpu
659
- - ubuntu-22-vulkan
660
646
  - windows
647
+ - windows-cpu
661
648
  - windows-cuda
662
649
  - windows-sycl
663
650
  - windows-hip
651
+ - ubuntu-22-cpu
652
+ - ubuntu-22-vulkan
664
653
  - macOS-arm64
665
654
  - macOS-x64
655
+ - ios-xcode-build
666
656
 
667
657
  steps:
668
658
  - name: Clone
@@ -680,10 +670,43 @@ jobs:
680
670
  uses: actions/download-artifact@v4
681
671
  with:
682
672
  path: ./artifact
673
+ merge-multiple: true
683
674
 
684
675
  - name: Move artifacts
685
676
  id: move_artifacts
686
- run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release
677
+ run: |
678
+ mkdir -p release
679
+
680
+ echo "Adding CPU backend files to existing zips..."
681
+ for arch in x64 arm64; do
682
+ cpu_zip="artifact/llama-bin-win-cpu-${arch}.zip"
683
+ temp_dir=$(mktemp -d)
684
+ echo "Extracting CPU backend for $arch..."
685
+ unzip "$cpu_zip" -d "$temp_dir"
686
+
687
+ echo "Adding CPU files to $arch zips..."
688
+ for target_zip in artifact/llama-bin-win-*-${arch}.zip; do
689
+ if [[ "$target_zip" == "$cpu_zip" ]]; then
690
+ continue
691
+ fi
692
+ echo "Adding CPU backend to $(basename "$target_zip")"
693
+ realpath_target_zip=$(realpath "$target_zip")
694
+ (cd "$temp_dir" && zip -r "$realpath_target_zip" .)
695
+ done
696
+
697
+ rm -rf "$temp_dir"
698
+ done
699
+
700
+ echo "Renaming and moving zips to release..."
701
+ for zip_file in artifact/llama-bin-win-*.zip; do
702
+ base_name=$(basename "$zip_file" .zip)
703
+ zip_name="llama-${{ steps.tag.outputs.name }}-${base_name#llama-}.zip"
704
+ echo "Moving $zip_file to release/$zip_name"
705
+ mv "$zip_file" "release/$zip_name"
706
+ done
707
+
708
+ echo "Moving other artifacts..."
709
+ mv -v artifact/*.zip release
687
710
 
688
711
  - name: Create release
689
712
  id: create_release
@@ -702,7 +725,7 @@ jobs:
702
725
  const path = require('path');
703
726
  const fs = require('fs');
704
727
  const release_id = '${{ steps.create_release.outputs.id }}';
705
- for (let file of await fs.readdirSync('./artifact/release')) {
728
+ for (let file of await fs.readdirSync('./release')) {
706
729
  if (path.extname(file) === '.zip') {
707
730
  console.log('uploadReleaseAsset', file);
708
731
  await github.repos.uploadReleaseAsset({
@@ -710,7 +733,7 @@ jobs:
710
733
  repo: context.repo.repo,
711
734
  release_id: release_id,
712
735
  name: file,
713
- data: await fs.readFileSync(`./artifact/release/${file}`)
736
+ data: await fs.readFileSync(`./release/${file}`)
714
737
  });
715
738
  }
716
739
  }
@@ -0,0 +1,42 @@
1
+ name: Update Winget Package
2
+
3
+ on:
4
+ workflow_dispatch: # allows manual triggering
5
+ schedule:
6
+ - cron: '28 5 * * *' # Update every day at 5:28 UTC
7
+
8
+ jobs:
9
+ update:
10
+ name: Update Winget Package
11
+ runs-on: ubuntu-latest
12
+
13
+ steps:
14
+ - name: Install cargo binstall
15
+ uses: cargo-bins/cargo-binstall@268643a6b5ea099f5718ee5cd3ff7dc89a5eb49b
16
+
17
+ - name: Install komac
18
+ run: |
19
+ cargo binstall komac@2.11.2 -y
20
+
21
+ - name: Find latest release
22
+ id: find_latest_release
23
+ uses: actions/github-script@v6
24
+ with:
25
+ script: |
26
+ const { data: releases } = await github.rest.repos.listReleases({
27
+ owner: context.repo.owner,
28
+ repo: context.repo.repo,
29
+ });
30
+ console.log("Latest release:", releases[0].tag_name);
31
+ return releases[0].tag_name;
32
+
33
+ - name: Update manifest
34
+ env:
35
+ VERSION: ${{ steps.find_latest_release.outputs.result }}
36
+ run: |
37
+ echo "Updating manifest..."
38
+ komac update --version ${{ env.VERSION }} \
39
+ --urls "https://github.com/ggml-org/llama.cpp/releases/download/${{ env.VERSION }}/llama-${{ env.VERSION }}-bin-win-vulkan-x64.zip" \
40
+ --token ${{ secrets.WINGET_GITHUB_TOKEN }} \
41
+ --submit \
42
+ ggml.llamacpp
@@ -39,7 +39,7 @@
39
39
  using json = nlohmann::ordered_json;
40
40
 
41
41
  std::initializer_list<enum llama_example> mmproj_examples = {
42
- LLAMA_EXAMPLE_LLAVA,
42
+ LLAMA_EXAMPLE_MTMD,
43
43
  LLAMA_EXAMPLE_SERVER,
44
44
  };
45
45
 
@@ -1445,6 +1445,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1445
1445
  params.n_keep = value;
1446
1446
  }
1447
1447
  ));
1448
+ add_opt(common_arg(
1449
+ {"--swa-full"},
1450
+ string_format("use full-size SWA cache (default: %s)\n"
1451
+ "[(more info)](https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)", params.swa_full ? "true" : "false"),
1452
+ [](common_params & params) {
1453
+ params.swa_full = true;
1454
+ }
1455
+ ).set_env("LLAMA_ARG_SWA_FULL"));
1448
1456
  add_opt(common_arg(
1449
1457
  {"--no-context-shift"},
1450
1458
  string_format("disables context shift on infinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),
@@ -1670,7 +1678,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1670
1678
  [](common_params & params) {
1671
1679
  params.warmup = false;
1672
1680
  }
1673
- ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_EMBEDDING}));
1681
+ ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL}));
1674
1682
  add_opt(common_arg(
1675
1683
  {"--spm-infill"},
1676
1684
  string_format(
@@ -2057,13 +2065,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
2057
2065
  params.grp_attn_w = value;
2058
2066
  }
2059
2067
  ).set_env("LLAMA_ARG_GRP_ATTN_W").set_examples({LLAMA_EXAMPLE_MAIN}));
2060
- add_opt(common_arg(
2061
- {"-dkvc", "--dump-kv-cache"},
2062
- "verbose print of the KV cache",
2063
- [](common_params & params) {
2064
- params.dump_kv_cache = true;
2065
- }
2066
- ));
2067
2068
  add_opt(common_arg(
2068
2069
  {"-nkvo", "--no-kv-offload"},
2069
2070
  "disable KV offload",
@@ -2232,12 +2233,12 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
2232
2233
  }
2233
2234
  ).set_examples(mmproj_examples).set_env("LLAMA_ARG_NO_MMPROJ_OFFLOAD"));
2234
2235
  add_opt(common_arg(
2235
- {"--image"}, "FILE",
2236
- "path to an image file. use with multimodal models. Specify multiple times for batching",
2236
+ {"--image", "--audio"}, "FILE",
2237
+ "path to an image or audio file. use with multimodal models, can be repeated if you have multiple files\n",
2237
2238
  [](common_params & params, const std::string & value) {
2238
2239
  params.image.emplace_back(value);
2239
2240
  }
2240
- ).set_examples({LLAMA_EXAMPLE_LLAVA}));
2241
+ ).set_examples({LLAMA_EXAMPLE_MTMD}));
2241
2242
  if (llama_supports_rpc()) {
2242
2243
  add_opt(common_arg(
2243
2244
  {"--rpc"}, "SERVERS",
@@ -2867,7 +2868,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
2867
2868
  [](common_params & params, const std::string & value) {
2868
2869
  params.chat_template = value;
2869
2870
  }
2870
- ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_LLAVA}).set_env("LLAMA_ARG_CHAT_TEMPLATE"));
2871
+ ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MTMD}).set_env("LLAMA_ARG_CHAT_TEMPLATE"));
2871
2872
  add_opt(common_arg(
2872
2873
  {"--chat-template-file"}, "JINJA_TEMPLATE_FILE",
2873
2874
  string_format(