@fugood/llama.node 0.4.7 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +4 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/lib/binding.ts +66 -6
- package/lib/index.js +59 -17
- package/lib/index.ts +74 -23
- package/package.json +1 -1
- package/src/DecodeAudioTokenWorker.cpp +40 -0
- package/src/DecodeAudioTokenWorker.h +22 -0
- package/src/EmbeddingWorker.cpp +7 -5
- package/src/LlamaCompletionWorker.cpp +68 -54
- package/src/LlamaCompletionWorker.h +7 -8
- package/src/LlamaContext.cpp +551 -235
- package/src/LlamaContext.h +26 -4
- package/src/LoadSessionWorker.cpp +4 -2
- package/src/SaveSessionWorker.cpp +10 -6
- package/src/TokenizeWorker.cpp +23 -14
- package/src/TokenizeWorker.h +2 -2
- package/src/addons.cc +8 -11
- package/src/common.hpp +129 -126
- package/src/llama.cpp/.github/workflows/build.yml +2 -2
- package/src/llama.cpp/.github/workflows/release.yml +152 -129
- package/src/llama.cpp/.github/workflows/winget.yml +42 -0
- package/src/llama.cpp/common/arg.cpp +14 -13
- package/src/llama.cpp/common/common.cpp +4 -75
- package/src/llama.cpp/common/common.h +7 -12
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -13
- package/src/llama.cpp/examples/lookup/lookup.cpp +0 -11
- package/src/llama.cpp/examples/parallel/parallel.cpp +0 -9
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +6 -6
- package/src/llama.cpp/examples/simple/simple.cpp +1 -1
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
- package/src/llama.cpp/examples/sycl/run-llama2.sh +4 -4
- package/src/llama.cpp/examples/sycl/run-llama3.sh +28 -0
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/sycl/win-run-llama3.bat +9 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +2 -0
- package/src/llama.cpp/ggml/include/ggml.h +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +274 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +27 -0
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +18 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +107 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -155
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +5 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +43 -12
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +171 -112
- package/src/llama.cpp/ggml/src/ggml.c +64 -18
- package/src/llama.cpp/include/llama.h +24 -124
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
- package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/src/llama.cpp/src/llama-batch.cpp +3 -1
- package/src/llama.cpp/src/llama-context.cpp +60 -110
- package/src/llama.cpp/src/llama-graph.cpp +137 -233
- package/src/llama.cpp/src/llama-graph.h +49 -7
- package/src/llama.cpp/src/llama-hparams.cpp +17 -1
- package/src/llama.cpp/src/llama-hparams.h +34 -5
- package/src/llama.cpp/src/llama-kv-cache.cpp +654 -321
- package/src/llama.cpp/src/llama-kv-cache.h +201 -85
- package/src/llama.cpp/src/llama-memory.h +3 -2
- package/src/llama.cpp/src/llama-model.cpp +273 -94
- package/src/llama.cpp/src/llama-model.h +4 -1
- package/src/llama.cpp/tests/test-arg-parser.cpp +1 -1
- package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +1 -0
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +13 -2
- package/src/llama.cpp/tools/mtmd/clip-impl.h +108 -11
- package/src/llama.cpp/tools/mtmd/clip.cpp +466 -88
- package/src/llama.cpp/tools/mtmd/clip.h +6 -4
- package/src/llama.cpp/tools/mtmd/miniaudio.h +93468 -0
- package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +855 -0
- package/src/llama.cpp/tools/mtmd/mtmd-audio.h +62 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +21 -14
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +36 -49
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +362 -98
- package/src/llama.cpp/tools/mtmd/mtmd.h +52 -21
- package/src/llama.cpp/tools/run/run.cpp +2 -2
- package/src/llama.cpp/tools/server/server.cpp +158 -47
- package/src/llama.cpp/tools/server/utils.hpp +71 -43
- package/src/llama.cpp/tools/tts/tts.cpp +4 -2
- package/src/tts_utils.cpp +342 -0
- package/src/tts_utils.h +62 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
name:
|
|
1
|
+
name: Release
|
|
2
2
|
|
|
3
3
|
on:
|
|
4
4
|
workflow_dispatch: # allows manual triggering
|
|
@@ -227,6 +227,66 @@ jobs:
|
|
|
227
227
|
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip
|
|
228
228
|
name: llama-bin-ubuntu-vulkan-x64.zip
|
|
229
229
|
|
|
230
|
+
windows-cpu:
|
|
231
|
+
runs-on: windows-latest
|
|
232
|
+
|
|
233
|
+
strategy:
|
|
234
|
+
matrix:
|
|
235
|
+
include:
|
|
236
|
+
- arch: 'x64'
|
|
237
|
+
- arch: 'arm64'
|
|
238
|
+
|
|
239
|
+
steps:
|
|
240
|
+
- name: Clone
|
|
241
|
+
uses: actions/checkout@v4
|
|
242
|
+
with:
|
|
243
|
+
fetch-depth: 0
|
|
244
|
+
|
|
245
|
+
- name: ccache
|
|
246
|
+
uses: hendrikmuhs/ccache-action@v1.2.16
|
|
247
|
+
with:
|
|
248
|
+
key: windows-latest-cmake-cpu-${{ matrix.arch }}
|
|
249
|
+
variant: ccache
|
|
250
|
+
evict-old-files: 1d
|
|
251
|
+
|
|
252
|
+
- name: Install Ninja
|
|
253
|
+
run: |
|
|
254
|
+
choco install ninja
|
|
255
|
+
|
|
256
|
+
- name: libCURL
|
|
257
|
+
id: get_libcurl
|
|
258
|
+
uses: ./.github/actions/windows-setup-curl
|
|
259
|
+
with:
|
|
260
|
+
architecture: ${{ matrix.arch == 'x64' && 'win64' || 'win64a' }}
|
|
261
|
+
|
|
262
|
+
- name: Build
|
|
263
|
+
env:
|
|
264
|
+
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
|
|
265
|
+
run: |
|
|
266
|
+
cmake -S . -B build -G "Ninja Multi-Config" `
|
|
267
|
+
-D CMAKE_TOOLCHAIN_FILE=cmake/${{ matrix.arch }}-windows-llvm.cmake `
|
|
268
|
+
-DGGML_NATIVE=OFF `
|
|
269
|
+
-DGGML_BACKEND_DL=ON `
|
|
270
|
+
-DGGML_CPU_ALL_VARIANTS=ON `
|
|
271
|
+
-DGGML_OPENMP=OFF `
|
|
272
|
+
-DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include" `
|
|
273
|
+
${{ env.CMAKE_ARGS }}
|
|
274
|
+
cmake --build build --config Release
|
|
275
|
+
|
|
276
|
+
- name: Pack artifacts
|
|
277
|
+
id: pack_artifacts
|
|
278
|
+
env:
|
|
279
|
+
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
|
|
280
|
+
run: |
|
|
281
|
+
Copy-Item $env:CURL_PATH\bin\libcurl-${{ matrix.arch }}.dll .\build\bin\Release\
|
|
282
|
+
7z a llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\*
|
|
283
|
+
|
|
284
|
+
- name: Upload artifacts
|
|
285
|
+
uses: actions/upload-artifact@v4
|
|
286
|
+
with:
|
|
287
|
+
path: llama-bin-win-cpu-${{ matrix.arch }}.zip
|
|
288
|
+
name: llama-bin-win-cpu-${{ matrix.arch }}.zip
|
|
289
|
+
|
|
230
290
|
windows:
|
|
231
291
|
runs-on: windows-latest
|
|
232
292
|
|
|
@@ -237,52 +297,30 @@ jobs:
|
|
|
237
297
|
strategy:
|
|
238
298
|
matrix:
|
|
239
299
|
include:
|
|
240
|
-
-
|
|
300
|
+
- backend: 'vulkan'
|
|
241
301
|
arch: 'x64'
|
|
242
|
-
defines: '-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
# defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
|
|
246
|
-
- build: 'vulkan-x64'
|
|
247
|
-
arch: 'x64'
|
|
248
|
-
defines: '-DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON'
|
|
249
|
-
- build: 'cpu-arm64'
|
|
250
|
-
arch: 'arm64'
|
|
251
|
-
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF'
|
|
252
|
-
- build: 'opencl-adreno-arm64'
|
|
302
|
+
defines: '-DGGML_VULKAN=ON'
|
|
303
|
+
target: 'ggml-vulkan'
|
|
304
|
+
- backend: 'opencl-adreno'
|
|
253
305
|
arch: 'arm64'
|
|
254
306
|
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
|
|
307
|
+
target: 'ggml-opencl'
|
|
255
308
|
|
|
256
309
|
steps:
|
|
257
310
|
- name: Clone
|
|
258
311
|
id: checkout
|
|
259
312
|
uses: actions/checkout@v4
|
|
260
|
-
with:
|
|
261
|
-
fetch-depth: 0
|
|
262
313
|
|
|
263
314
|
- name: ccache
|
|
264
315
|
uses: hendrikmuhs/ccache-action@v1.2.16
|
|
265
316
|
with:
|
|
266
|
-
key: windows-latest-cmake-${{ matrix.
|
|
317
|
+
key: windows-latest-cmake-${{ matrix.backend }}-${{ matrix.arch }}
|
|
267
318
|
variant: ccache
|
|
268
319
|
evict-old-files: 1d
|
|
269
320
|
|
|
270
|
-
- name: Download OpenBLAS
|
|
271
|
-
id: get_openblas
|
|
272
|
-
if: ${{ matrix.build == 'openblas-x64' }}
|
|
273
|
-
run: |
|
|
274
|
-
curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip"
|
|
275
|
-
curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE"
|
|
276
|
-
mkdir $env:RUNNER_TEMP/openblas
|
|
277
|
-
tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas
|
|
278
|
-
$vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
|
|
279
|
-
$msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
|
|
280
|
-
$lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
|
|
281
|
-
& $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll
|
|
282
|
-
|
|
283
321
|
- name: Install Vulkan SDK
|
|
284
322
|
id: get_vulkan
|
|
285
|
-
if: ${{ matrix.
|
|
323
|
+
if: ${{ matrix.backend == 'vulkan' }}
|
|
286
324
|
run: |
|
|
287
325
|
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
|
|
288
326
|
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
|
|
@@ -296,7 +334,7 @@ jobs:
|
|
|
296
334
|
|
|
297
335
|
- name: Install OpenCL Headers and Libs
|
|
298
336
|
id: install_opencl
|
|
299
|
-
if: ${{ matrix.
|
|
337
|
+
if: ${{ matrix.backend == 'opencl-adreno' && matrix.arch == 'arm64' }}
|
|
300
338
|
run: |
|
|
301
339
|
git clone https://github.com/KhronosGroup/OpenCL-Headers
|
|
302
340
|
cd OpenCL-Headers
|
|
@@ -314,46 +352,22 @@ jobs:
|
|
|
314
352
|
-DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
|
|
315
353
|
cmake --build build-arm64-release --target install --config release
|
|
316
354
|
|
|
317
|
-
- name: libCURL
|
|
318
|
-
id: get_libcurl
|
|
319
|
-
uses: ./.github/actions/windows-setup-curl
|
|
320
|
-
with:
|
|
321
|
-
architecture: ${{ matrix.arch == 'x64' && 'win64' || 'win64a' }}
|
|
322
|
-
|
|
323
355
|
- name: Build
|
|
324
356
|
id: cmake_build
|
|
325
|
-
env:
|
|
326
|
-
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
|
|
327
|
-
run: |
|
|
328
|
-
cmake -S . -B build ${{ matrix.defines }} `
|
|
329
|
-
-DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include" `
|
|
330
|
-
${{ env.CMAKE_ARGS }}
|
|
331
|
-
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
|
|
332
|
-
|
|
333
|
-
- name: Add libopenblas.dll
|
|
334
|
-
id: add_libopenblas_dll
|
|
335
|
-
if: ${{ matrix.build == 'openblas-x64' }}
|
|
336
357
|
run: |
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
- name: Determine tag name
|
|
341
|
-
id: tag
|
|
342
|
-
uses: ./.github/actions/get-tag-name
|
|
358
|
+
cmake -S . -B build ${{ matrix.defines }} -DGGML_NATIVE=OFF -DGGML_CPU=OFF -DGGML_BACKEND_DL=ON -DLLAMA_CURL=OFF
|
|
359
|
+
cmake --build build --config Release --target ${{ matrix.target }}
|
|
343
360
|
|
|
344
361
|
- name: Pack artifacts
|
|
345
362
|
id: pack_artifacts
|
|
346
|
-
env:
|
|
347
|
-
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
|
|
348
363
|
run: |
|
|
349
|
-
|
|
350
|
-
7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\*
|
|
364
|
+
7z a llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip .\build\bin\Release\${{ matrix.target }}.dll
|
|
351
365
|
|
|
352
366
|
- name: Upload artifacts
|
|
353
367
|
uses: actions/upload-artifact@v4
|
|
354
368
|
with:
|
|
355
|
-
path: llama-${{
|
|
356
|
-
name: llama-bin-win-${{ matrix.
|
|
369
|
+
path: llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip
|
|
370
|
+
name: llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip
|
|
357
371
|
|
|
358
372
|
windows-cuda:
|
|
359
373
|
runs-on: windows-2019
|
|
@@ -366,8 +380,6 @@ jobs:
|
|
|
366
380
|
- name: Clone
|
|
367
381
|
id: checkout
|
|
368
382
|
uses: actions/checkout@v4
|
|
369
|
-
with:
|
|
370
|
-
fetch-depth: 0
|
|
371
383
|
|
|
372
384
|
- name: Install ccache
|
|
373
385
|
uses: hendrikmuhs/ccache-action@v1.2.16
|
|
@@ -386,45 +398,30 @@ jobs:
|
|
|
386
398
|
run: |
|
|
387
399
|
choco install ninja
|
|
388
400
|
|
|
389
|
-
- name: libCURL
|
|
390
|
-
id: get_libcurl
|
|
391
|
-
uses: ./.github/actions/windows-setup-curl
|
|
392
|
-
|
|
393
401
|
- name: Build
|
|
394
402
|
id: cmake_build
|
|
395
403
|
shell: cmd
|
|
396
|
-
env:
|
|
397
|
-
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
|
|
398
404
|
run: |
|
|
399
405
|
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
|
|
400
406
|
cmake -S . -B build -G "Ninja Multi-Config" ^
|
|
401
|
-
-DGGML_NATIVE=OFF ^
|
|
402
407
|
-DGGML_BACKEND_DL=ON ^
|
|
403
|
-
-
|
|
408
|
+
-DGGML_NATIVE=OFF ^
|
|
409
|
+
-DGGML_CPU=OFF ^
|
|
404
410
|
-DGGML_CUDA=ON ^
|
|
405
|
-
-
|
|
406
|
-
${{ env.CMAKE_ARGS }}
|
|
411
|
+
-DLLAMA_CURL=OFF
|
|
407
412
|
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
|
|
408
|
-
cmake --build build --config Release -j %NINJA_JOBS%
|
|
409
|
-
cmake --build build --config Release
|
|
410
|
-
|
|
411
|
-
- name: Determine tag name
|
|
412
|
-
id: tag
|
|
413
|
-
uses: ./.github/actions/get-tag-name
|
|
413
|
+
cmake --build build --config Release -j %NINJA_JOBS% --target ggml-cuda
|
|
414
414
|
|
|
415
415
|
- name: Pack artifacts
|
|
416
416
|
id: pack_artifacts
|
|
417
|
-
env:
|
|
418
|
-
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
|
|
419
417
|
run: |
|
|
420
|
-
|
|
421
|
-
7z a llama-${{ steps.tag.outputs.name }}-bin-win-cuda${{ matrix.cuda }}-x64.zip .\build\bin\Release\*
|
|
418
|
+
7z a llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\ggml-cuda.dll
|
|
422
419
|
|
|
423
420
|
- name: Upload artifacts
|
|
424
421
|
uses: actions/upload-artifact@v4
|
|
425
422
|
with:
|
|
426
|
-
path: llama
|
|
427
|
-
name: llama-bin-win-cuda
|
|
423
|
+
path: llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip
|
|
424
|
+
name: llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip
|
|
428
425
|
|
|
429
426
|
- name: Copy and pack Cuda runtime
|
|
430
427
|
run: |
|
|
@@ -432,13 +429,13 @@ jobs:
|
|
|
432
429
|
$dst='.\build\bin\cudart\'
|
|
433
430
|
robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
|
|
434
431
|
robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
|
|
435
|
-
7z a cudart-llama-bin-win-cuda
|
|
432
|
+
7z a cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip $dst\*
|
|
436
433
|
|
|
437
434
|
- name: Upload Cuda runtime
|
|
438
435
|
uses: actions/upload-artifact@v4
|
|
439
436
|
with:
|
|
440
|
-
path: cudart-llama-bin-win-cuda
|
|
441
|
-
name: cudart-llama-bin-win-cuda
|
|
437
|
+
path: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip
|
|
438
|
+
name: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip
|
|
442
439
|
|
|
443
440
|
windows-sycl:
|
|
444
441
|
runs-on: windows-latest
|
|
@@ -448,15 +445,14 @@ jobs:
|
|
|
448
445
|
shell: bash
|
|
449
446
|
|
|
450
447
|
env:
|
|
451
|
-
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/
|
|
448
|
+
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/7cd9bba0-7aab-4e30-b3ae-2221006a4a05/intel-oneapi-base-toolkit-2025.1.1.34_offline.exe
|
|
452
449
|
WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
|
|
453
450
|
ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
|
|
451
|
+
|
|
454
452
|
steps:
|
|
455
453
|
- name: Clone
|
|
456
454
|
id: checkout
|
|
457
455
|
uses: actions/checkout@v4
|
|
458
|
-
with:
|
|
459
|
-
fetch-depth: 0
|
|
460
456
|
|
|
461
457
|
- name: ccache
|
|
462
458
|
uses: hendrikmuhs/ccache-action@v1.2.16
|
|
@@ -469,15 +465,18 @@ jobs:
|
|
|
469
465
|
run: |
|
|
470
466
|
scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
|
|
471
467
|
|
|
472
|
-
# TODO: add libcurl support ; we will also need to modify win-build-sycl.bat to accept user-specified args
|
|
473
|
-
|
|
474
468
|
- name: Build
|
|
475
469
|
id: cmake_build
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
470
|
+
shell: cmd
|
|
471
|
+
run: |
|
|
472
|
+
call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force
|
|
473
|
+
cmake -G "Ninja" -B build ^
|
|
474
|
+
-DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx ^
|
|
475
|
+
-DCMAKE_BUILD_TYPE=Release ^
|
|
476
|
+
-DGGML_BACKEND_DL=ON -DBUILD_SHARED_LIBS=ON ^
|
|
477
|
+
-DGGML_CPU=OFF -DGGML_SYCL=ON ^
|
|
478
|
+
-DLLAMA_CURL=OFF
|
|
479
|
+
cmake --build build --target ggml-sycl -j
|
|
481
480
|
|
|
482
481
|
- name: Build the release package
|
|
483
482
|
id: pack_artifacts
|
|
@@ -502,12 +501,12 @@ jobs:
|
|
|
502
501
|
cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin
|
|
503
502
|
|
|
504
503
|
echo "cp oneAPI running time dll files to ./build/bin done"
|
|
505
|
-
7z a llama
|
|
504
|
+
7z a llama-bin-win-sycl-x64.zip ./build/bin/*
|
|
506
505
|
|
|
507
506
|
- name: Upload the release package
|
|
508
507
|
uses: actions/upload-artifact@v4
|
|
509
508
|
with:
|
|
510
|
-
path: llama
|
|
509
|
+
path: llama-bin-win-sycl-x64.zip
|
|
511
510
|
name: llama-bin-win-sycl-x64.zip
|
|
512
511
|
|
|
513
512
|
windows-hip:
|
|
@@ -515,14 +514,14 @@ jobs:
|
|
|
515
514
|
|
|
516
515
|
strategy:
|
|
517
516
|
matrix:
|
|
518
|
-
|
|
517
|
+
include:
|
|
518
|
+
- name: "radeon"
|
|
519
|
+
gpu_targets: "gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032"
|
|
519
520
|
|
|
520
521
|
steps:
|
|
521
522
|
- name: Clone
|
|
522
523
|
id: checkout
|
|
523
524
|
uses: actions/checkout@v4
|
|
524
|
-
with:
|
|
525
|
-
fetch-depth: 0
|
|
526
525
|
|
|
527
526
|
- name: Clone rocWMMA repository
|
|
528
527
|
id: clone_rocwmma
|
|
@@ -532,7 +531,7 @@ jobs:
|
|
|
532
531
|
- name: ccache
|
|
533
532
|
uses: hendrikmuhs/ccache-action@v1.2.16
|
|
534
533
|
with:
|
|
535
|
-
key: windows-latest-cmake-hip-
|
|
534
|
+
key: windows-latest-cmake-hip-${{ matrix.name }}-x64
|
|
536
535
|
evict-old-files: 1d
|
|
537
536
|
|
|
538
537
|
- name: Install
|
|
@@ -550,50 +549,39 @@ jobs:
|
|
|
550
549
|
run: |
|
|
551
550
|
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
|
|
552
551
|
|
|
553
|
-
- name: libCURL
|
|
554
|
-
id: get_libcurl
|
|
555
|
-
uses: ./.github/actions/windows-setup-curl
|
|
556
|
-
|
|
557
552
|
- name: Build
|
|
558
553
|
id: cmake_build
|
|
559
|
-
env:
|
|
560
|
-
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
|
|
561
554
|
run: |
|
|
562
555
|
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
|
563
556
|
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
|
|
564
557
|
cmake -G "Unix Makefiles" -B build -S . `
|
|
565
558
|
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
|
|
566
559
|
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
|
|
567
|
-
-DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/" `
|
|
560
|
+
-DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/ -Wno-ignored-attributes -Wno-nested-anon-types" `
|
|
568
561
|
-DCMAKE_BUILD_TYPE=Release `
|
|
569
|
-
-
|
|
562
|
+
-DGGML_BACKEND_DL=ON `
|
|
563
|
+
-DGGML_NATIVE=OFF `
|
|
564
|
+
-DGGML_CPU=OFF `
|
|
565
|
+
-DAMDGPU_TARGETS="${{ matrix.gpu_targets }}" `
|
|
570
566
|
-DGGML_HIP_ROCWMMA_FATTN=ON `
|
|
571
567
|
-DGGML_HIP=ON `
|
|
572
|
-
-
|
|
573
|
-
|
|
574
|
-
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
|
|
568
|
+
-DLLAMA_CURL=OFF
|
|
569
|
+
cmake --build build --target ggml-hip -j ${env:NUMBER_OF_PROCESSORS}
|
|
575
570
|
md "build\bin\rocblas\library\"
|
|
576
571
|
cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
|
|
577
572
|
cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
|
|
578
573
|
cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"
|
|
579
574
|
|
|
580
|
-
- name: Determine tag name
|
|
581
|
-
id: tag
|
|
582
|
-
uses: ./.github/actions/get-tag-name
|
|
583
|
-
|
|
584
575
|
- name: Pack artifacts
|
|
585
576
|
id: pack_artifacts
|
|
586
|
-
env:
|
|
587
|
-
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
|
|
588
577
|
run: |
|
|
589
|
-
|
|
590
|
-
7z a llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip .\build\bin\*
|
|
578
|
+
7z a llama-bin-win-hip-${{ matrix.name }}-x64.zip .\build\bin\*
|
|
591
579
|
|
|
592
580
|
- name: Upload artifacts
|
|
593
581
|
uses: actions/upload-artifact@v4
|
|
594
582
|
with:
|
|
595
|
-
path: llama
|
|
596
|
-
name: llama-bin-win-hip
|
|
583
|
+
path: llama-bin-win-hip-${{ matrix.name }}-x64.zip
|
|
584
|
+
name: llama-bin-win-hip-${{ matrix.name }}-x64.zip
|
|
597
585
|
|
|
598
586
|
ios-xcode-build:
|
|
599
587
|
runs-on: macos-latest
|
|
@@ -655,14 +643,16 @@ jobs:
|
|
|
655
643
|
runs-on: ubuntu-latest
|
|
656
644
|
|
|
657
645
|
needs:
|
|
658
|
-
- ubuntu-22-cpu
|
|
659
|
-
- ubuntu-22-vulkan
|
|
660
646
|
- windows
|
|
647
|
+
- windows-cpu
|
|
661
648
|
- windows-cuda
|
|
662
649
|
- windows-sycl
|
|
663
650
|
- windows-hip
|
|
651
|
+
- ubuntu-22-cpu
|
|
652
|
+
- ubuntu-22-vulkan
|
|
664
653
|
- macOS-arm64
|
|
665
654
|
- macOS-x64
|
|
655
|
+
- ios-xcode-build
|
|
666
656
|
|
|
667
657
|
steps:
|
|
668
658
|
- name: Clone
|
|
@@ -680,10 +670,43 @@ jobs:
|
|
|
680
670
|
uses: actions/download-artifact@v4
|
|
681
671
|
with:
|
|
682
672
|
path: ./artifact
|
|
673
|
+
merge-multiple: true
|
|
683
674
|
|
|
684
675
|
- name: Move artifacts
|
|
685
676
|
id: move_artifacts
|
|
686
|
-
run:
|
|
677
|
+
run: |
|
|
678
|
+
mkdir -p release
|
|
679
|
+
|
|
680
|
+
echo "Adding CPU backend files to existing zips..."
|
|
681
|
+
for arch in x64 arm64; do
|
|
682
|
+
cpu_zip="artifact/llama-bin-win-cpu-${arch}.zip"
|
|
683
|
+
temp_dir=$(mktemp -d)
|
|
684
|
+
echo "Extracting CPU backend for $arch..."
|
|
685
|
+
unzip "$cpu_zip" -d "$temp_dir"
|
|
686
|
+
|
|
687
|
+
echo "Adding CPU files to $arch zips..."
|
|
688
|
+
for target_zip in artifact/llama-bin-win-*-${arch}.zip; do
|
|
689
|
+
if [[ "$target_zip" == "$cpu_zip" ]]; then
|
|
690
|
+
continue
|
|
691
|
+
fi
|
|
692
|
+
echo "Adding CPU backend to $(basename "$target_zip")"
|
|
693
|
+
realpath_target_zip=$(realpath "$target_zip")
|
|
694
|
+
(cd "$temp_dir" && zip -r "$realpath_target_zip" .)
|
|
695
|
+
done
|
|
696
|
+
|
|
697
|
+
rm -rf "$temp_dir"
|
|
698
|
+
done
|
|
699
|
+
|
|
700
|
+
echo "Renaming and moving zips to release..."
|
|
701
|
+
for zip_file in artifact/llama-bin-win-*.zip; do
|
|
702
|
+
base_name=$(basename "$zip_file" .zip)
|
|
703
|
+
zip_name="llama-${{ steps.tag.outputs.name }}-${base_name#llama-}.zip"
|
|
704
|
+
echo "Moving $zip_file to release/$zip_name"
|
|
705
|
+
mv "$zip_file" "release/$zip_name"
|
|
706
|
+
done
|
|
707
|
+
|
|
708
|
+
echo "Moving other artifacts..."
|
|
709
|
+
mv -v artifact/*.zip release
|
|
687
710
|
|
|
688
711
|
- name: Create release
|
|
689
712
|
id: create_release
|
|
@@ -702,7 +725,7 @@ jobs:
|
|
|
702
725
|
const path = require('path');
|
|
703
726
|
const fs = require('fs');
|
|
704
727
|
const release_id = '${{ steps.create_release.outputs.id }}';
|
|
705
|
-
for (let file of await fs.readdirSync('./
|
|
728
|
+
for (let file of await fs.readdirSync('./release')) {
|
|
706
729
|
if (path.extname(file) === '.zip') {
|
|
707
730
|
console.log('uploadReleaseAsset', file);
|
|
708
731
|
await github.repos.uploadReleaseAsset({
|
|
@@ -710,7 +733,7 @@ jobs:
|
|
|
710
733
|
repo: context.repo.repo,
|
|
711
734
|
release_id: release_id,
|
|
712
735
|
name: file,
|
|
713
|
-
data: await fs.readFileSync(`./
|
|
736
|
+
data: await fs.readFileSync(`./release/${file}`)
|
|
714
737
|
});
|
|
715
738
|
}
|
|
716
739
|
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
name: Update Winget Package
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch: # allows manual triggering
|
|
5
|
+
schedule:
|
|
6
|
+
- cron: '28 5 * * *' # Update every day at 5:28 UTC
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
update:
|
|
10
|
+
name: Update Winget Package
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
|
|
13
|
+
steps:
|
|
14
|
+
- name: Install cargo binstall
|
|
15
|
+
uses: cargo-bins/cargo-binstall@268643a6b5ea099f5718ee5cd3ff7dc89a5eb49b
|
|
16
|
+
|
|
17
|
+
- name: Install komac
|
|
18
|
+
run: |
|
|
19
|
+
cargo binstall komac@2.11.2 -y
|
|
20
|
+
|
|
21
|
+
- name: Find latest release
|
|
22
|
+
id: find_latest_release
|
|
23
|
+
uses: actions/github-script@v6
|
|
24
|
+
with:
|
|
25
|
+
script: |
|
|
26
|
+
const { data: releases } = await github.rest.repos.listReleases({
|
|
27
|
+
owner: context.repo.owner,
|
|
28
|
+
repo: context.repo.repo,
|
|
29
|
+
});
|
|
30
|
+
console.log("Latest release:", releases[0].tag_name);
|
|
31
|
+
return releases[0].tag_name;
|
|
32
|
+
|
|
33
|
+
- name: Update manifest
|
|
34
|
+
env:
|
|
35
|
+
VERSION: ${{ steps.find_latest_release.outputs.result }}
|
|
36
|
+
run: |
|
|
37
|
+
echo "Updating manifest..."
|
|
38
|
+
komac update --version ${{ env.VERSION }} \
|
|
39
|
+
--urls "https://github.com/ggml-org/llama.cpp/releases/download/${{ env.VERSION }}/llama-${{ env.VERSION }}-bin-win-vulkan-x64.zip" \
|
|
40
|
+
--token ${{ secrets.WINGET_GITHUB_TOKEN }} \
|
|
41
|
+
--submit \
|
|
42
|
+
ggml.llamacpp
|
|
@@ -39,7 +39,7 @@
|
|
|
39
39
|
using json = nlohmann::ordered_json;
|
|
40
40
|
|
|
41
41
|
std::initializer_list<enum llama_example> mmproj_examples = {
|
|
42
|
-
|
|
42
|
+
LLAMA_EXAMPLE_MTMD,
|
|
43
43
|
LLAMA_EXAMPLE_SERVER,
|
|
44
44
|
};
|
|
45
45
|
|
|
@@ -1445,6 +1445,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1445
1445
|
params.n_keep = value;
|
|
1446
1446
|
}
|
|
1447
1447
|
));
|
|
1448
|
+
add_opt(common_arg(
|
|
1449
|
+
{"--swa-full"},
|
|
1450
|
+
string_format("use full-size SWA cache (default: %s)\n"
|
|
1451
|
+
"[(more info)](https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)", params.swa_full ? "true" : "false"),
|
|
1452
|
+
[](common_params & params) {
|
|
1453
|
+
params.swa_full = true;
|
|
1454
|
+
}
|
|
1455
|
+
).set_env("LLAMA_ARG_SWA_FULL"));
|
|
1448
1456
|
add_opt(common_arg(
|
|
1449
1457
|
{"--no-context-shift"},
|
|
1450
1458
|
string_format("disables context shift on infinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),
|
|
@@ -1670,7 +1678,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1670
1678
|
[](common_params & params) {
|
|
1671
1679
|
params.warmup = false;
|
|
1672
1680
|
}
|
|
1673
|
-
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_EMBEDDING}));
|
|
1681
|
+
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL}));
|
|
1674
1682
|
add_opt(common_arg(
|
|
1675
1683
|
{"--spm-infill"},
|
|
1676
1684
|
string_format(
|
|
@@ -2057,13 +2065,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
2057
2065
|
params.grp_attn_w = value;
|
|
2058
2066
|
}
|
|
2059
2067
|
).set_env("LLAMA_ARG_GRP_ATTN_W").set_examples({LLAMA_EXAMPLE_MAIN}));
|
|
2060
|
-
add_opt(common_arg(
|
|
2061
|
-
{"-dkvc", "--dump-kv-cache"},
|
|
2062
|
-
"verbose print of the KV cache",
|
|
2063
|
-
[](common_params & params) {
|
|
2064
|
-
params.dump_kv_cache = true;
|
|
2065
|
-
}
|
|
2066
|
-
));
|
|
2067
2068
|
add_opt(common_arg(
|
|
2068
2069
|
{"-nkvo", "--no-kv-offload"},
|
|
2069
2070
|
"disable KV offload",
|
|
@@ -2232,12 +2233,12 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
2232
2233
|
}
|
|
2233
2234
|
).set_examples(mmproj_examples).set_env("LLAMA_ARG_NO_MMPROJ_OFFLOAD"));
|
|
2234
2235
|
add_opt(common_arg(
|
|
2235
|
-
{"--image"}, "FILE",
|
|
2236
|
-
"path to an image file. use with multimodal models
|
|
2236
|
+
{"--image", "--audio"}, "FILE",
|
|
2237
|
+
"path to an image or audio file. use with multimodal models, can be repeated if you have multiple files\n",
|
|
2237
2238
|
[](common_params & params, const std::string & value) {
|
|
2238
2239
|
params.image.emplace_back(value);
|
|
2239
2240
|
}
|
|
2240
|
-
).set_examples({
|
|
2241
|
+
).set_examples({LLAMA_EXAMPLE_MTMD}));
|
|
2241
2242
|
if (llama_supports_rpc()) {
|
|
2242
2243
|
add_opt(common_arg(
|
|
2243
2244
|
{"--rpc"}, "SERVERS",
|
|
@@ -2867,7 +2868,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
2867
2868
|
[](common_params & params, const std::string & value) {
|
|
2868
2869
|
params.chat_template = value;
|
|
2869
2870
|
}
|
|
2870
|
-
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER,
|
|
2871
|
+
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MTMD}).set_env("LLAMA_ARG_CHAT_TEMPLATE"));
|
|
2871
2872
|
add_opt(common_arg(
|
|
2872
2873
|
{"--chat-template-file"}, "JINJA_TEMPLATE_FILE",
|
|
2873
2874
|
string_format(
|