@fugood/llama.node 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +9 -0
- package/README.md +1 -1
- package/bin/darwin/arm64/default.metallib +0 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/default.metallib +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +1 -1
- package/package.json +2 -1
- package/patches/llama.patch +22 -0
- package/src/LlamaContext.cpp +2 -2
- package/src/TokenizeWorker.cpp +1 -1
- package/src/llama.cpp/CMakeLists.txt +82 -54
- package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +16 -0
- package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +6 -0
- package/src/llama.cpp/common/common.cpp +748 -754
- package/src/llama.cpp/common/common.h +49 -41
- package/src/llama.cpp/common/grammar-parser.cpp +10 -1
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +6 -6
- package/src/llama.cpp/common/log.h +5 -5
- package/src/llama.cpp/common/sampling.cpp +92 -10
- package/src/llama.cpp/common/sampling.h +6 -1
- package/src/llama.cpp/common/train.cpp +2 -2
- package/src/llama.cpp/examples/CMakeLists.txt +3 -0
- package/src/llama.cpp/examples/batched/batched.cpp +1 -1
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +13 -4
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +2 -2
- package/src/llama.cpp/examples/finetune/finetune.cpp +4 -3
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +2 -2
- package/src/llama.cpp/examples/infill/infill.cpp +8 -8
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +57 -8
- package/src/llama.cpp/examples/llama.android/llama/CMakeLists.txt +55 -0
- package/src/llama.cpp/examples/llama.android/{app → llama}/src/main/cpp/CMakeLists.txt +7 -8
- package/src/llama.cpp/examples/llama.android/{app → llama}/src/main/cpp/llama-android.cpp +14 -14
- package/src/llama.cpp/examples/llava/clip.h +1 -1
- package/src/llama.cpp/examples/llava/llava-cli.cpp +27 -7
- package/src/llama.cpp/examples/llava/llava.cpp +0 -15
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -1
- package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
- package/src/llama.cpp/examples/main/main.cpp +29 -17
- package/src/llama.cpp/examples/parallel/parallel.cpp +1 -1
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +9 -9
- package/src/llama.cpp/examples/quantize/quantize.cpp +2 -2
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +2 -2
- package/src/llama.cpp/examples/rpc/CMakeLists.txt +2 -0
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +134 -0
- package/src/llama.cpp/examples/server/server.cpp +33 -25
- package/src/llama.cpp/examples/server/utils.hpp +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +359 -9
- package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +4 -3
- package/src/llama.cpp/ggml-backend.c +2 -3
- package/src/llama.cpp/ggml-common.h +0 -54
- package/src/llama.cpp/ggml-cuda.h +1 -0
- package/src/llama.cpp/ggml-impl.h +51 -0
- package/src/llama.cpp/ggml-kompute.cpp +13 -3
- package/src/llama.cpp/ggml-opencl.cpp +4 -1
- package/src/llama.cpp/ggml-quants.c +3715 -2050
- package/src/llama.cpp/ggml-rpc.cpp +1155 -0
- package/src/llama.cpp/ggml-rpc.h +24 -0
- package/src/llama.cpp/ggml-sycl.cpp +119 -673
- package/src/llama.cpp/ggml-vulkan-shaders.hpp +9351 -5627
- package/src/llama.cpp/ggml-vulkan.cpp +203 -224
- package/src/llama.cpp/ggml.c +1208 -1483
- package/src/llama.cpp/ggml.h +71 -46
- package/src/llama.cpp/llama.cpp +1374 -938
- package/src/llama.cpp/llama.h +22 -6
- package/src/llama.cpp/requirements.txt +0 -2
- package/src/llama.cpp/tests/CMakeLists.txt +1 -1
- package/src/llama.cpp/tests/test-backend-ops.cpp +120 -57
- package/src/llama.cpp/tests/test-chat-template.cpp +16 -4
- package/src/llama.cpp/tests/test-grad0.cpp +43 -83
- package/src/llama.cpp/tests/test-grammar-integration.cpp +46 -0
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +27 -3
- package/src/llama.cpp/unicode-data.cpp +6969 -2169
- package/src/llama.cpp/unicode-data.h +15 -12
- package/src/llama.cpp/unicode.cpp +89 -111
- package/src/llama.cpp/unicode.h +44 -12
- package/src/llama.cpp/build.zig +0 -172
- package/src/llama.cpp/ggml-mpi.c +0 -216
- package/src/llama.cpp/ggml-mpi.h +0 -39
- package/src/llama.cpp/requirements/requirements-convert-lora-to-ggml.txt +0 -2
- package/src/llama.cpp/requirements/requirements-convert-persimmon-to-gguf.txt +0 -2
package/CMakeLists.txt
CHANGED
|
@@ -64,6 +64,15 @@ if (VULKAN_SDK)
|
|
|
64
64
|
find_package(Vulkan REQUIRED)
|
|
65
65
|
endif()
|
|
66
66
|
|
|
67
|
+
find_program(PATCH patch REQUIRED)
|
|
68
|
+
|
|
69
|
+
add_custom_target(
|
|
70
|
+
patch ALL
|
|
71
|
+
COMMAND ${PATCH} -p1 -N < ${CMAKE_SOURCE_DIR}/patches/llama.patch || true
|
|
72
|
+
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src/llama.cpp
|
|
73
|
+
COMMENT "Applying patches"
|
|
74
|
+
)
|
|
75
|
+
|
|
67
76
|
set(LLAMA_STATIC ON CACHE BOOL "Build llama as static library")
|
|
68
77
|
add_subdirectory("src/llama.cpp")
|
|
69
78
|
|
package/README.md
CHANGED
|
@@ -30,7 +30,7 @@ const context = await loadModel({
|
|
|
30
30
|
})
|
|
31
31
|
|
|
32
32
|
// Do completion
|
|
33
|
-
const { text
|
|
33
|
+
const { text } = await context.completion(
|
|
34
34
|
{
|
|
35
35
|
prompt: 'This is a conversation between user and llama, a friendly chatbot. respond in simple markdown.\n\nUser: Hello!\nLlama:',
|
|
36
36
|
n_predict: 100,
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/binding.ts
CHANGED
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "0.2.
|
|
4
|
+
"version": "0.2.2",
|
|
5
5
|
"description": "Llama.cpp for Node.js",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -38,6 +38,7 @@
|
|
|
38
38
|
]
|
|
39
39
|
},
|
|
40
40
|
"files": [
|
|
41
|
+
"patches/*.patch",
|
|
41
42
|
"bin/**/*",
|
|
42
43
|
"src/**/*.{c,cc,cpp,h,hh,hpp,txt,cmake}",
|
|
43
44
|
"lib/*.js",
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp
|
|
2
|
+
index b9449be0..cfa0f774 100644
|
|
3
|
+
--- a/ggml-vulkan.cpp
|
|
4
|
+
+++ b/ggml-vulkan.cpp
|
|
5
|
+
@@ -525,9 +525,15 @@ static void ggml_vk_create_pipeline(ggml_backend_vk_context * ctx, vk_pipeline&
|
|
6
|
+
vk::PipelineCreateFlags(),
|
|
7
|
+
pipeline_shader_create_info,
|
|
8
|
+
pipeline->layout);
|
|
9
|
+
- pipeline->pipeline = ctx->device->device.createComputePipeline(VK_NULL_HANDLE, compute_pipeline_create_info).value;
|
|
10
|
+
|
|
11
|
+
- ctx->device->pipelines.push_back(pipeline);
|
|
12
|
+
+ try {
|
|
13
|
+
+ pipeline->pipeline = ctx->device->device.createComputePipeline(VK_NULL_HANDLE, compute_pipeline_create_info).value;
|
|
14
|
+
+ ctx->device->pipelines.push_back(pipeline);
|
|
15
|
+
+ } catch (vk::UnknownError const&) {
|
|
16
|
+
+ std::cerr << "ggml_vk_create_pipeline: Failed to create pipeline " << name << std::endl;
|
|
17
|
+
+ ggml_vk_destroy_pipeline(ctx->device->device, pipeline);
|
|
18
|
+
+ pipeline.reset();
|
|
19
|
+
+ }
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
static void ggml_vk_destroy_pipeline(vk::Device& device, vk_pipeline& pipeline) {
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -61,7 +61,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
61
61
|
params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
|
|
62
62
|
params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
|
|
63
63
|
params.n_threads =
|
|
64
|
-
get_option<int32_t>(options, "n_threads",
|
|
64
|
+
get_option<int32_t>(options, "n_threads", cpu_get_num_math() / 2);
|
|
65
65
|
params.n_gpu_layers = get_option<int32_t>(options, "n_gpu_layers", -1);
|
|
66
66
|
params.use_mlock = get_option<bool>(options, "use_mlock", false);
|
|
67
67
|
params.use_mmap = get_option<bool>(options, "use_mmap", true);
|
|
@@ -81,7 +81,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
81
81
|
}
|
|
82
82
|
|
|
83
83
|
_sess = std::make_shared<LlamaSession>(model, ctx, params);
|
|
84
|
-
_info =
|
|
84
|
+
_info = gpt_params_get_system_info(params);
|
|
85
85
|
}
|
|
86
86
|
|
|
87
87
|
// getSystemInfo(): string
|
package/src/TokenizeWorker.cpp
CHANGED
|
@@ -7,7 +7,7 @@ TokenizeWorker::TokenizeWorker(const Napi::CallbackInfo &info,
|
|
|
7
7
|
|
|
8
8
|
void TokenizeWorker::Execute() {
|
|
9
9
|
const auto tokens = ::llama_tokenize(_sess->context(), _text, false);
|
|
10
|
-
_result
|
|
10
|
+
_result.tokens = std::move(tokens);
|
|
11
11
|
}
|
|
12
12
|
|
|
13
13
|
void TokenizeWorker::OnOK() {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
cmake_minimum_required(VERSION 3.14)
|
|
1
|
+
cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
|
|
2
2
|
project("llama.cpp" C CXX)
|
|
3
3
|
include(CheckIncludeFileCXX)
|
|
4
4
|
|
|
@@ -72,11 +72,13 @@ else()
|
|
|
72
72
|
set(INS_ENB ON)
|
|
73
73
|
endif()
|
|
74
74
|
|
|
75
|
+
option(LLAMA_SVE "llama: enable SVE" OFF)
|
|
75
76
|
option(LLAMA_AVX "llama: enable AVX" ${INS_ENB})
|
|
76
77
|
option(LLAMA_AVX2 "llama: enable AVX2" ${INS_ENB})
|
|
77
78
|
option(LLAMA_AVX512 "llama: enable AVX512" OFF)
|
|
78
79
|
option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF)
|
|
79
80
|
option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF)
|
|
81
|
+
option(LLAMA_AVX512_BF16 "llama: enable AVX512-BF16" OFF)
|
|
80
82
|
option(LLAMA_FMA "llama: enable FMA" ${INS_ENB})
|
|
81
83
|
# in MSVC F16C is implied with AVX2/AVX512
|
|
82
84
|
if (NOT MSVC)
|
|
@@ -122,8 +124,7 @@ set(LLAMA_METAL_MACOSX_VERSION_MIN "" CACHE STRING
|
|
|
122
124
|
"llama: metal minimum macOS version")
|
|
123
125
|
set(LLAMA_METAL_STD "" CACHE STRING "llama: metal standard version (-std flag)")
|
|
124
126
|
option(LLAMA_KOMPUTE "llama: use Kompute" OFF)
|
|
125
|
-
option(
|
|
126
|
-
option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)
|
|
127
|
+
option(LLAMA_RPC "llama: use RPC" OFF)
|
|
127
128
|
option(LLAMA_SYCL "llama: use SYCL" OFF)
|
|
128
129
|
option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF)
|
|
129
130
|
set(LLAMA_SYCL_TARGET "INTEL" CACHE STRING "llama: sycl target device")
|
|
@@ -133,6 +134,8 @@ set(LLAMA_SCHED_MAX_COPIES "4" CACHE STRING "llama: max input copies for pipeli
|
|
|
133
134
|
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
|
|
134
135
|
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
|
|
135
136
|
option(LLAMA_BUILD_SERVER "llama: build server example" ON)
|
|
137
|
+
option(LLAMA_LASX "llama: enable lasx" ON)
|
|
138
|
+
option(LLAMA_LSX "llama: enable lsx" ON)
|
|
136
139
|
|
|
137
140
|
# add perf arguments
|
|
138
141
|
option(LLAMA_PERF "llama: enable perf" OFF)
|
|
@@ -296,7 +299,7 @@ if (LLAMA_BLAS)
|
|
|
296
299
|
if (LLAMA_STATIC)
|
|
297
300
|
set(BLA_STATIC ON)
|
|
298
301
|
endif()
|
|
299
|
-
if (
|
|
302
|
+
if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
|
|
300
303
|
set(BLA_SIZEOF_INTEGER 8)
|
|
301
304
|
endif()
|
|
302
305
|
|
|
@@ -381,10 +384,6 @@ if (LLAMA_LLAMAFILE)
|
|
|
381
384
|
set(GGML_SOURCES_LLAMAFILE sgemm.cpp)
|
|
382
385
|
endif()
|
|
383
386
|
|
|
384
|
-
if (LLAMA_QKK_64)
|
|
385
|
-
add_compile_definitions(GGML_QKK_64)
|
|
386
|
-
endif()
|
|
387
|
-
|
|
388
387
|
if (LLAMA_CUBLAS)
|
|
389
388
|
message(WARNING "LLAMA_CUBLAS is deprecated and will be removed in the future.\nUse LLAMA_CUDA instead")
|
|
390
389
|
set(LLAMA_CUDA ON)
|
|
@@ -465,33 +464,15 @@ if (LLAMA_CUDA)
|
|
|
465
464
|
endif()
|
|
466
465
|
endif()
|
|
467
466
|
|
|
468
|
-
if (
|
|
469
|
-
|
|
470
|
-
find_package(MPI)
|
|
471
|
-
if (MPI_C_FOUND)
|
|
472
|
-
message(STATUS "MPI found")
|
|
473
|
-
|
|
474
|
-
set(GGML_HEADERS_MPI ggml-mpi.h)
|
|
475
|
-
set(GGML_SOURCES_MPI ggml-mpi.c)
|
|
476
|
-
|
|
477
|
-
add_compile_definitions(GGML_USE_MPI)
|
|
478
|
-
add_compile_definitions(${MPI_C_COMPILE_DEFINITIONS})
|
|
479
|
-
|
|
480
|
-
if (NOT MSVC)
|
|
481
|
-
add_compile_options(-Wno-cast-qual)
|
|
482
|
-
endif()
|
|
467
|
+
if (LLAMA_RPC)
|
|
468
|
+
add_compile_definitions(GGML_USE_RPC)
|
|
483
469
|
|
|
484
|
-
|
|
485
|
-
set(
|
|
486
|
-
|
|
487
|
-
# Even if you're only using the C header, C++ programs may bring in MPI
|
|
488
|
-
# C++ functions, so more linkage is needed
|
|
489
|
-
if (MPI_CXX_FOUND)
|
|
490
|
-
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${MPI_CXX_LIBRARIES})
|
|
491
|
-
endif()
|
|
492
|
-
else()
|
|
493
|
-
message(WARNING "MPI not found")
|
|
470
|
+
if (WIN32)
|
|
471
|
+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ws2_32)
|
|
494
472
|
endif()
|
|
473
|
+
|
|
474
|
+
set(GGML_HEADERS_RPC ggml-rpc.h)
|
|
475
|
+
set(GGML_SOURCES_RPC ggml-rpc.cpp)
|
|
495
476
|
endif()
|
|
496
477
|
|
|
497
478
|
if (LLAMA_CLBLAST)
|
|
@@ -520,6 +501,12 @@ if (LLAMA_VULKAN)
|
|
|
520
501
|
|
|
521
502
|
add_compile_definitions(GGML_USE_VULKAN)
|
|
522
503
|
|
|
504
|
+
# Workaround to the "can't dereference invalidated vector iterator" bug in clang-cl debug build
|
|
505
|
+
# Posssibly relevant: https://stackoverflow.com/questions/74748276/visual-studio-no-displays-the-correct-length-of-stdvector
|
|
506
|
+
if (MSVC AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
|
507
|
+
add_compile_definitions(_ITERATOR_DEBUG_LEVEL=0)
|
|
508
|
+
endif()
|
|
509
|
+
|
|
523
510
|
if (LLAMA_VULKAN_CHECK_RESULTS)
|
|
524
511
|
add_compile_definitions(GGML_VULKAN_CHECK_RESULTS)
|
|
525
512
|
endif()
|
|
@@ -543,16 +530,37 @@ if (LLAMA_VULKAN)
|
|
|
543
530
|
endif()
|
|
544
531
|
|
|
545
532
|
if (LLAMA_HIPBLAS)
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
533
|
+
if ($ENV{ROCM_PATH})
|
|
534
|
+
set(ROCM_PATH $ENV{ROCM_PATH})
|
|
535
|
+
else()
|
|
536
|
+
set(ROCM_PATH /opt/rocm)
|
|
550
537
|
endif()
|
|
538
|
+
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH})
|
|
551
539
|
|
|
552
|
-
|
|
553
|
-
|
|
540
|
+
# CMake on Windows doesn't support the HIP language yet
|
|
541
|
+
if(WIN32)
|
|
542
|
+
set(CXX_IS_HIPCC TRUE)
|
|
543
|
+
else()
|
|
544
|
+
string(REGEX MATCH "hipcc(\.bat)?$" CXX_IS_HIPCC "${CMAKE_CXX_COMPILER}")
|
|
554
545
|
endif()
|
|
555
546
|
|
|
547
|
+
if(CXX_IS_HIPCC)
|
|
548
|
+
if(LINUX)
|
|
549
|
+
if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
|
|
550
|
+
message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
|
|
551
|
+
endif()
|
|
552
|
+
|
|
553
|
+
message(WARNING "Setting hipcc as the C++ compiler is legacy behavior."
|
|
554
|
+
" Prefer setting the HIP compiler directly. See README for details.")
|
|
555
|
+
endif()
|
|
556
|
+
else()
|
|
557
|
+
# Forward AMDGPU_TARGETS to CMAKE_HIP_ARCHITECTURES.
|
|
558
|
+
if(AMDGPU_TARGETS AND NOT CMAKE_HIP_ARCHITECTURES)
|
|
559
|
+
set(CMAKE_HIP_ARCHITECTURES ${AMDGPU_TARGETS})
|
|
560
|
+
endif()
|
|
561
|
+
cmake_minimum_required(VERSION 3.21)
|
|
562
|
+
enable_language(HIP)
|
|
563
|
+
endif()
|
|
556
564
|
find_package(hip REQUIRED)
|
|
557
565
|
find_package(hipblas REQUIRED)
|
|
558
566
|
find_package(rocblas REQUIRED)
|
|
@@ -586,13 +594,18 @@ if (LLAMA_HIPBLAS)
|
|
|
586
594
|
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
|
|
587
595
|
add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
|
|
588
596
|
|
|
589
|
-
|
|
597
|
+
if (CXX_IS_HIPCC)
|
|
598
|
+
set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
|
|
599
|
+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} hip::device)
|
|
600
|
+
else()
|
|
601
|
+
set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE HIP)
|
|
602
|
+
endif()
|
|
590
603
|
|
|
591
604
|
if (LLAMA_STATIC)
|
|
592
605
|
message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
|
|
593
606
|
endif()
|
|
594
607
|
|
|
595
|
-
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS}
|
|
608
|
+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} PUBLIC hip::host roc::rocblas roc::hipblas)
|
|
596
609
|
endif()
|
|
597
610
|
|
|
598
611
|
if (LLAMA_SYCL)
|
|
@@ -995,6 +1008,11 @@ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR CMAKE_GENERATOR_PLATFORM_LWR STR
|
|
|
995
1008
|
if (GGML_COMPILER_SUPPORT_DOTPROD)
|
|
996
1009
|
add_compile_definitions(__ARM_FEATURE_DOTPROD)
|
|
997
1010
|
endif ()
|
|
1011
|
+
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmlaq_f32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8)
|
|
1012
|
+
if (GGML_COMPILER_SUPPORT_MATMUL_INT8)
|
|
1013
|
+
add_compile_definitions(__ARM_FEATURE_MATMUL_INT8)
|
|
1014
|
+
endif ()
|
|
1015
|
+
|
|
998
1016
|
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float16_t _a; float16x8_t _s = vdupq_n_f16(_a); return 0; }" GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
|
|
999
1017
|
if (GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
|
|
1000
1018
|
add_compile_definitions(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
|
|
@@ -1023,6 +1041,9 @@ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR CMAKE_GENERATOR_PLATFORM_LWR STR
|
|
|
1023
1041
|
# Raspberry Pi 3, 4, Zero 2 (32-bit)
|
|
1024
1042
|
list(APPEND ARCH_FLAGS -mno-unaligned-access)
|
|
1025
1043
|
endif()
|
|
1044
|
+
if (LLAMA_SVE)
|
|
1045
|
+
list(APPEND ARCH_FLAGS -march=armv8.6-a+sve)
|
|
1046
|
+
endif()
|
|
1026
1047
|
endif()
|
|
1027
1048
|
elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
|
|
1028
1049
|
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
|
|
@@ -1047,6 +1068,10 @@ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LW
|
|
|
1047
1068
|
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>)
|
|
1048
1069
|
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
|
|
1049
1070
|
endif()
|
|
1071
|
+
if (LLAMA_AVX512_BF16)
|
|
1072
|
+
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512BF16__>)
|
|
1073
|
+
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512BF16__>)
|
|
1074
|
+
endif()
|
|
1050
1075
|
elseif (LLAMA_AVX2)
|
|
1051
1076
|
list(APPEND ARCH_FLAGS /arch:AVX2)
|
|
1052
1077
|
elseif (LLAMA_AVX)
|
|
@@ -1078,6 +1103,9 @@ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LW
|
|
|
1078
1103
|
if (LLAMA_AVX512_VNNI)
|
|
1079
1104
|
list(APPEND ARCH_FLAGS -mavx512vnni)
|
|
1080
1105
|
endif()
|
|
1106
|
+
if (LLAMA_AVX512_BF16)
|
|
1107
|
+
list(APPEND ARCH_FLAGS -mavx512bf16)
|
|
1108
|
+
endif()
|
|
1081
1109
|
endif()
|
|
1082
1110
|
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
|
|
1083
1111
|
message(STATUS "PowerPC detected")
|
|
@@ -1087,6 +1115,17 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
|
|
|
1087
1115
|
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
|
|
1088
1116
|
#TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
|
|
1089
1117
|
endif()
|
|
1118
|
+
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
|
|
1119
|
+
message(STATUS "loongarch64 detected")
|
|
1120
|
+
|
|
1121
|
+
list(APPEND ARCH_FLAGS -march=loongarch64)
|
|
1122
|
+
if (LLAMA_LASX)
|
|
1123
|
+
list(APPEND ARCH_FLAGS -mlasx)
|
|
1124
|
+
endif()
|
|
1125
|
+
if (LLAMA_LSX)
|
|
1126
|
+
list(APPEND ARCH_FLAGS -mlsx)
|
|
1127
|
+
endif()
|
|
1128
|
+
|
|
1090
1129
|
else()
|
|
1091
1130
|
message(STATUS "Unknown architecture")
|
|
1092
1131
|
endif()
|
|
@@ -1175,7 +1214,7 @@ add_library(ggml OBJECT
|
|
|
1175
1214
|
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
|
|
1176
1215
|
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
|
|
1177
1216
|
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
|
|
1178
|
-
${
|
|
1217
|
+
${GGML_SOURCES_RPC} ${GGML_HEADERS_RPC}
|
|
1179
1218
|
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
|
|
1180
1219
|
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
|
|
1181
1220
|
${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
|
|
@@ -1262,7 +1301,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake
|
|
|
1262
1301
|
|
|
1263
1302
|
set(GGML_PUBLIC_HEADERS "ggml.h" "ggml-alloc.h" "ggml-backend.h"
|
|
1264
1303
|
"${GGML_HEADERS_CUDA}" "${GGML_HEADERS_OPENCL}"
|
|
1265
|
-
"${GGML_HEADERS_METAL}" "${
|
|
1304
|
+
"${GGML_HEADERS_METAL}" "${GGML_HEADERS_EXTRA}")
|
|
1266
1305
|
|
|
1267
1306
|
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
|
|
1268
1307
|
install(TARGETS ggml PUBLIC_HEADER)
|
|
@@ -1281,17 +1320,6 @@ install(
|
|
|
1281
1320
|
WORLD_READ
|
|
1282
1321
|
WORLD_EXECUTE
|
|
1283
1322
|
DESTINATION ${CMAKE_INSTALL_BINDIR})
|
|
1284
|
-
install(
|
|
1285
|
-
FILES convert-lora-to-ggml.py
|
|
1286
|
-
PERMISSIONS
|
|
1287
|
-
OWNER_READ
|
|
1288
|
-
OWNER_WRITE
|
|
1289
|
-
OWNER_EXECUTE
|
|
1290
|
-
GROUP_READ
|
|
1291
|
-
GROUP_EXECUTE
|
|
1292
|
-
WORLD_READ
|
|
1293
|
-
WORLD_EXECUTE
|
|
1294
|
-
DESTINATION ${CMAKE_INSTALL_BINDIR})
|
|
1295
1323
|
if (LLAMA_METAL)
|
|
1296
1324
|
install(
|
|
1297
1325
|
FILES ggml-metal.metal
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
set( CMAKE_SYSTEM_NAME Windows )
|
|
2
|
+
set( CMAKE_SYSTEM_PROCESSOR arm64 )
|
|
3
|
+
|
|
4
|
+
set( target arm64-pc-windows-msvc )
|
|
5
|
+
|
|
6
|
+
set( CMAKE_C_COMPILER clang )
|
|
7
|
+
set( CMAKE_CXX_COMPILER clang++ )
|
|
8
|
+
|
|
9
|
+
set( CMAKE_C_COMPILER_TARGET ${target} )
|
|
10
|
+
set( CMAKE_CXX_COMPILER_TARGET ${target} )
|
|
11
|
+
|
|
12
|
+
set( arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast" )
|
|
13
|
+
set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments" )
|
|
14
|
+
|
|
15
|
+
set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
|
|
16
|
+
set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
|