@fugood/llama.node 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/CMakeLists.txt +9 -0
  2. package/README.md +1 -1
  3. package/bin/darwin/arm64/default.metallib +0 -0
  4. package/bin/darwin/arm64/llama-node.node +0 -0
  5. package/bin/darwin/x64/default.metallib +0 -0
  6. package/bin/darwin/x64/llama-node.node +0 -0
  7. package/bin/linux/arm64/llama-node.node +0 -0
  8. package/bin/linux/x64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  10. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  11. package/bin/win32/arm64/llama-node.node +0 -0
  12. package/bin/win32/arm64/node.lib +0 -0
  13. package/bin/win32/x64/llama-node.node +0 -0
  14. package/bin/win32/x64/node.lib +0 -0
  15. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/arm64/node.lib +0 -0
  17. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  18. package/bin/win32-vulkan/x64/node.lib +0 -0
  19. package/lib/binding.ts +1 -1
  20. package/package.json +2 -1
  21. package/patches/llama.patch +22 -0
  22. package/src/LlamaContext.cpp +2 -2
  23. package/src/TokenizeWorker.cpp +1 -1
  24. package/src/llama.cpp/CMakeLists.txt +82 -54
  25. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +16 -0
  26. package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +6 -0
  27. package/src/llama.cpp/common/common.cpp +748 -754
  28. package/src/llama.cpp/common/common.h +49 -41
  29. package/src/llama.cpp/common/grammar-parser.cpp +10 -1
  30. package/src/llama.cpp/common/json-schema-to-grammar.cpp +6 -6
  31. package/src/llama.cpp/common/log.h +5 -5
  32. package/src/llama.cpp/common/sampling.cpp +92 -10
  33. package/src/llama.cpp/common/sampling.h +6 -1
  34. package/src/llama.cpp/common/train.cpp +2 -2
  35. package/src/llama.cpp/examples/CMakeLists.txt +3 -0
  36. package/src/llama.cpp/examples/batched/batched.cpp +1 -1
  37. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
  38. package/src/llama.cpp/examples/embedding/embedding.cpp +13 -4
  39. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +2 -2
  40. package/src/llama.cpp/examples/finetune/finetune.cpp +4 -3
  41. package/src/llama.cpp/examples/imatrix/imatrix.cpp +2 -2
  42. package/src/llama.cpp/examples/infill/infill.cpp +8 -8
  43. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +57 -8
  44. package/src/llama.cpp/examples/llama.android/llama/CMakeLists.txt +55 -0
  45. package/src/llama.cpp/examples/llama.android/{app → llama}/src/main/cpp/CMakeLists.txt +7 -8
  46. package/src/llama.cpp/examples/llama.android/{app → llama}/src/main/cpp/llama-android.cpp +14 -14
  47. package/src/llama.cpp/examples/llava/clip.h +1 -1
  48. package/src/llama.cpp/examples/llava/llava-cli.cpp +27 -7
  49. package/src/llama.cpp/examples/llava/llava.cpp +0 -15
  50. package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -1
  51. package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
  52. package/src/llama.cpp/examples/main/main.cpp +29 -17
  53. package/src/llama.cpp/examples/parallel/parallel.cpp +1 -1
  54. package/src/llama.cpp/examples/perplexity/perplexity.cpp +9 -9
  55. package/src/llama.cpp/examples/quantize/quantize.cpp +2 -2
  56. package/src/llama.cpp/examples/retrieval/retrieval.cpp +2 -2
  57. package/src/llama.cpp/examples/rpc/CMakeLists.txt +2 -0
  58. package/src/llama.cpp/examples/rpc/rpc-server.cpp +134 -0
  59. package/src/llama.cpp/examples/server/server.cpp +33 -25
  60. package/src/llama.cpp/examples/server/utils.hpp +1 -1
  61. package/src/llama.cpp/examples/tokenize/tokenize.cpp +359 -9
  62. package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +4 -3
  63. package/src/llama.cpp/ggml-backend.c +2 -3
  64. package/src/llama.cpp/ggml-common.h +0 -54
  65. package/src/llama.cpp/ggml-cuda.h +1 -0
  66. package/src/llama.cpp/ggml-impl.h +51 -0
  67. package/src/llama.cpp/ggml-kompute.cpp +13 -3
  68. package/src/llama.cpp/ggml-opencl.cpp +4 -1
  69. package/src/llama.cpp/ggml-quants.c +3715 -2050
  70. package/src/llama.cpp/ggml-rpc.cpp +1155 -0
  71. package/src/llama.cpp/ggml-rpc.h +24 -0
  72. package/src/llama.cpp/ggml-sycl.cpp +119 -673
  73. package/src/llama.cpp/ggml-vulkan-shaders.hpp +9351 -5627
  74. package/src/llama.cpp/ggml-vulkan.cpp +203 -224
  75. package/src/llama.cpp/ggml.c +1208 -1483
  76. package/src/llama.cpp/ggml.h +71 -46
  77. package/src/llama.cpp/llama.cpp +1374 -938
  78. package/src/llama.cpp/llama.h +22 -6
  79. package/src/llama.cpp/requirements.txt +0 -2
  80. package/src/llama.cpp/tests/CMakeLists.txt +1 -1
  81. package/src/llama.cpp/tests/test-backend-ops.cpp +120 -57
  82. package/src/llama.cpp/tests/test-chat-template.cpp +16 -4
  83. package/src/llama.cpp/tests/test-grad0.cpp +43 -83
  84. package/src/llama.cpp/tests/test-grammar-integration.cpp +46 -0
  85. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +27 -3
  86. package/src/llama.cpp/unicode-data.cpp +6969 -2169
  87. package/src/llama.cpp/unicode-data.h +15 -12
  88. package/src/llama.cpp/unicode.cpp +89 -111
  89. package/src/llama.cpp/unicode.h +44 -12
  90. package/src/llama.cpp/build.zig +0 -172
  91. package/src/llama.cpp/ggml-mpi.c +0 -216
  92. package/src/llama.cpp/ggml-mpi.h +0 -39
  93. package/src/llama.cpp/requirements/requirements-convert-lora-to-ggml.txt +0 -2
  94. package/src/llama.cpp/requirements/requirements-convert-persimmon-to-gguf.txt +0 -2
package/CMakeLists.txt CHANGED
@@ -64,6 +64,15 @@ if (VULKAN_SDK)
64
64
  find_package(Vulkan REQUIRED)
65
65
  endif()
66
66
 
67
+ find_program(PATCH patch REQUIRED)
68
+
69
+ add_custom_target(
70
+ patch ALL
71
+ COMMAND ${PATCH} -p1 -N < ${CMAKE_SOURCE_DIR}/patches/llama.patch || true
72
+ WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src/llama.cpp
73
+ COMMENT "Applying patches"
74
+ )
75
+
67
76
  set(LLAMA_STATIC ON CACHE BOOL "Build llama as static library")
68
77
  add_subdirectory("src/llama.cpp")
69
78
 
package/README.md CHANGED
@@ -30,7 +30,7 @@ const context = await loadModel({
30
30
  })
31
31
 
32
32
  // Do completion
33
- const { text, timings } = await context.completion(
33
+ const { text } = await context.completion(
34
34
  {
35
35
  prompt: 'This is a conversation between user and llama, a friendly chatbot. respond in simple markdown.\n\nUser: Hello!\nLlama:',
36
36
  n_predict: 100,
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/lib/binding.ts CHANGED
@@ -62,7 +62,7 @@ export interface Module {
62
62
  LlamaContext: LlamaContext
63
63
  }
64
64
 
65
- export type LibVariant = 'default' | 'opencl'
65
+ export type LibVariant = 'default' | 'vulkan'
66
66
 
67
67
  const setupEnv = (variant?: string) => {
68
68
  const postfix = variant ? `-${variant}` : ''
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.2.0",
4
+ "version": "0.2.2",
5
5
  "description": "Llama.cpp for Node.js",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -38,6 +38,7 @@
38
38
  ]
39
39
  },
40
40
  "files": [
41
+ "patches/*.patch",
41
42
  "bin/**/*",
42
43
  "src/**/*.{c,cc,cpp,h,hh,hpp,txt,cmake}",
43
44
  "lib/*.js",
@@ -0,0 +1,22 @@
1
+ diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp
2
+ index b9449be0..cfa0f774 100644
3
+ --- a/ggml-vulkan.cpp
4
+ +++ b/ggml-vulkan.cpp
5
+ @@ -525,9 +525,15 @@ static void ggml_vk_create_pipeline(ggml_backend_vk_context * ctx, vk_pipeline&
6
+ vk::PipelineCreateFlags(),
7
+ pipeline_shader_create_info,
8
+ pipeline->layout);
9
+ - pipeline->pipeline = ctx->device->device.createComputePipeline(VK_NULL_HANDLE, compute_pipeline_create_info).value;
10
+
11
+ - ctx->device->pipelines.push_back(pipeline);
12
+ + try {
13
+ + pipeline->pipeline = ctx->device->device.createComputePipeline(VK_NULL_HANDLE, compute_pipeline_create_info).value;
14
+ + ctx->device->pipelines.push_back(pipeline);
15
+ + } catch (vk::UnknownError const&) {
16
+ + std::cerr << "ggml_vk_create_pipeline: Failed to create pipeline " << name << std::endl;
17
+ + ggml_vk_destroy_pipeline(ctx->device->device, pipeline);
18
+ + pipeline.reset();
19
+ + }
20
+ }
21
+
22
+ static void ggml_vk_destroy_pipeline(vk::Device& device, vk_pipeline& pipeline) {
@@ -61,7 +61,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
61
61
  params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
62
62
  params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
63
63
  params.n_threads =
64
- get_option<int32_t>(options, "n_threads", get_math_cpu_count() / 2);
64
+ get_option<int32_t>(options, "n_threads", cpu_get_num_math() / 2);
65
65
  params.n_gpu_layers = get_option<int32_t>(options, "n_gpu_layers", -1);
66
66
  params.use_mlock = get_option<bool>(options, "use_mlock", false);
67
67
  params.use_mmap = get_option<bool>(options, "use_mmap", true);
@@ -81,7 +81,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
81
81
  }
82
82
 
83
83
  _sess = std::make_shared<LlamaSession>(model, ctx, params);
84
- _info = get_system_info(params);
84
+ _info = gpt_params_get_system_info(params);
85
85
  }
86
86
 
87
87
  // getSystemInfo(): string
@@ -7,7 +7,7 @@ TokenizeWorker::TokenizeWorker(const Napi::CallbackInfo &info,
7
7
 
8
8
  void TokenizeWorker::Execute() {
9
9
  const auto tokens = ::llama_tokenize(_sess->context(), _text, false);
10
- _result = {.tokens = std::move(tokens)};
10
+ _result.tokens = std::move(tokens);
11
11
  }
12
12
 
13
13
  void TokenizeWorker::OnOK() {
@@ -1,4 +1,4 @@
1
- cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
1
+ cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
2
2
  project("llama.cpp" C CXX)
3
3
  include(CheckIncludeFileCXX)
4
4
 
@@ -72,11 +72,13 @@ else()
72
72
  set(INS_ENB ON)
73
73
  endif()
74
74
 
75
+ option(LLAMA_SVE "llama: enable SVE" OFF)
75
76
  option(LLAMA_AVX "llama: enable AVX" ${INS_ENB})
76
77
  option(LLAMA_AVX2 "llama: enable AVX2" ${INS_ENB})
77
78
  option(LLAMA_AVX512 "llama: enable AVX512" OFF)
78
79
  option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF)
79
80
  option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF)
81
+ option(LLAMA_AVX512_BF16 "llama: enable AVX512-BF16" OFF)
80
82
  option(LLAMA_FMA "llama: enable FMA" ${INS_ENB})
81
83
  # in MSVC F16C is implied with AVX2/AVX512
82
84
  if (NOT MSVC)
@@ -122,8 +124,7 @@ set(LLAMA_METAL_MACOSX_VERSION_MIN "" CACHE STRING
122
124
  "llama: metal minimum macOS version")
123
125
  set(LLAMA_METAL_STD "" CACHE STRING "llama: metal standard version (-std flag)")
124
126
  option(LLAMA_KOMPUTE "llama: use Kompute" OFF)
125
- option(LLAMA_MPI "llama: use MPI" OFF)
126
- option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)
127
+ option(LLAMA_RPC "llama: use RPC" OFF)
127
128
  option(LLAMA_SYCL "llama: use SYCL" OFF)
128
129
  option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF)
129
130
  set(LLAMA_SYCL_TARGET "INTEL" CACHE STRING "llama: sycl target device")
@@ -133,6 +134,8 @@ set(LLAMA_SCHED_MAX_COPIES "4" CACHE STRING "llama: max input copies for pipeli
133
134
  option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
134
135
  option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
135
136
  option(LLAMA_BUILD_SERVER "llama: build server example" ON)
137
+ option(LLAMA_LASX "llama: enable lasx" ON)
138
+ option(LLAMA_LSX "llama: enable lsx" ON)
136
139
 
137
140
  # add perf arguments
138
141
  option(LLAMA_PERF "llama: enable perf" OFF)
@@ -296,7 +299,7 @@ if (LLAMA_BLAS)
296
299
  if (LLAMA_STATIC)
297
300
  set(BLA_STATIC ON)
298
301
  endif()
299
- if ($(CMAKE_VERSION) VERSION_GREATER_EQUAL 3.22)
302
+ if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
300
303
  set(BLA_SIZEOF_INTEGER 8)
301
304
  endif()
302
305
 
@@ -381,10 +384,6 @@ if (LLAMA_LLAMAFILE)
381
384
  set(GGML_SOURCES_LLAMAFILE sgemm.cpp)
382
385
  endif()
383
386
 
384
- if (LLAMA_QKK_64)
385
- add_compile_definitions(GGML_QKK_64)
386
- endif()
387
-
388
387
  if (LLAMA_CUBLAS)
389
388
  message(WARNING "LLAMA_CUBLAS is deprecated and will be removed in the future.\nUse LLAMA_CUDA instead")
390
389
  set(LLAMA_CUDA ON)
@@ -465,33 +464,15 @@ if (LLAMA_CUDA)
465
464
  endif()
466
465
  endif()
467
466
 
468
- if (LLAMA_MPI)
469
- cmake_minimum_required(VERSION 3.10)
470
- find_package(MPI)
471
- if (MPI_C_FOUND)
472
- message(STATUS "MPI found")
473
-
474
- set(GGML_HEADERS_MPI ggml-mpi.h)
475
- set(GGML_SOURCES_MPI ggml-mpi.c)
476
-
477
- add_compile_definitions(GGML_USE_MPI)
478
- add_compile_definitions(${MPI_C_COMPILE_DEFINITIONS})
479
-
480
- if (NOT MSVC)
481
- add_compile_options(-Wno-cast-qual)
482
- endif()
467
+ if (LLAMA_RPC)
468
+ add_compile_definitions(GGML_USE_RPC)
483
469
 
484
- set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${MPI_C_LIBRARIES})
485
- set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${MPI_C_INCLUDE_DIRS})
486
-
487
- # Even if you're only using the C header, C++ programs may bring in MPI
488
- # C++ functions, so more linkage is needed
489
- if (MPI_CXX_FOUND)
490
- set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${MPI_CXX_LIBRARIES})
491
- endif()
492
- else()
493
- message(WARNING "MPI not found")
470
+ if (WIN32)
471
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ws2_32)
494
472
  endif()
473
+
474
+ set(GGML_HEADERS_RPC ggml-rpc.h)
475
+ set(GGML_SOURCES_RPC ggml-rpc.cpp)
495
476
  endif()
496
477
 
497
478
  if (LLAMA_CLBLAST)
@@ -520,6 +501,12 @@ if (LLAMA_VULKAN)
520
501
 
521
502
  add_compile_definitions(GGML_USE_VULKAN)
522
503
 
504
+ # Workaround to the "can't dereference invalidated vector iterator" bug in clang-cl debug build
505
+ # Posssibly relevant: https://stackoverflow.com/questions/74748276/visual-studio-no-displays-the-correct-length-of-stdvector
506
+ if (MSVC AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
507
+ add_compile_definitions(_ITERATOR_DEBUG_LEVEL=0)
508
+ endif()
509
+
523
510
  if (LLAMA_VULKAN_CHECK_RESULTS)
524
511
  add_compile_definitions(GGML_VULKAN_CHECK_RESULTS)
525
512
  endif()
@@ -543,16 +530,37 @@ if (LLAMA_VULKAN)
543
530
  endif()
544
531
 
545
532
  if (LLAMA_HIPBLAS)
546
- list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
547
-
548
- if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
549
- message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang")
533
+ if ($ENV{ROCM_PATH})
534
+ set(ROCM_PATH $ENV{ROCM_PATH})
535
+ else()
536
+ set(ROCM_PATH /opt/rocm)
550
537
  endif()
538
+ list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH})
551
539
 
552
- if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
553
- message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
540
+ # CMake on Windows doesn't support the HIP language yet
541
+ if(WIN32)
542
+ set(CXX_IS_HIPCC TRUE)
543
+ else()
544
+ string(REGEX MATCH "hipcc(\.bat)?$" CXX_IS_HIPCC "${CMAKE_CXX_COMPILER}")
554
545
  endif()
555
546
 
547
+ if(CXX_IS_HIPCC)
548
+ if(LINUX)
549
+ if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
550
+ message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
551
+ endif()
552
+
553
+ message(WARNING "Setting hipcc as the C++ compiler is legacy behavior."
554
+ " Prefer setting the HIP compiler directly. See README for details.")
555
+ endif()
556
+ else()
557
+ # Forward AMDGPU_TARGETS to CMAKE_HIP_ARCHITECTURES.
558
+ if(AMDGPU_TARGETS AND NOT CMAKE_HIP_ARCHITECTURES)
559
+ set(CMAKE_HIP_ARCHITECTURES ${AMDGPU_TARGETS})
560
+ endif()
561
+ cmake_minimum_required(VERSION 3.21)
562
+ enable_language(HIP)
563
+ endif()
556
564
  find_package(hip REQUIRED)
557
565
  find_package(hipblas REQUIRED)
558
566
  find_package(rocblas REQUIRED)
@@ -586,13 +594,18 @@ if (LLAMA_HIPBLAS)
586
594
  add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
587
595
  add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
588
596
 
589
- set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
597
+ if (CXX_IS_HIPCC)
598
+ set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
599
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} hip::device)
600
+ else()
601
+ set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE HIP)
602
+ endif()
590
603
 
591
604
  if (LLAMA_STATIC)
592
605
  message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
593
606
  endif()
594
607
 
595
- set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
608
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} PUBLIC hip::host roc::rocblas roc::hipblas)
596
609
  endif()
597
610
 
598
611
  if (LLAMA_SYCL)
@@ -995,6 +1008,11 @@ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR CMAKE_GENERATOR_PLATFORM_LWR STR
995
1008
  if (GGML_COMPILER_SUPPORT_DOTPROD)
996
1009
  add_compile_definitions(__ARM_FEATURE_DOTPROD)
997
1010
  endif ()
1011
+ check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmlaq_f32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8)
1012
+ if (GGML_COMPILER_SUPPORT_MATMUL_INT8)
1013
+ add_compile_definitions(__ARM_FEATURE_MATMUL_INT8)
1014
+ endif ()
1015
+
998
1016
  check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float16_t _a; float16x8_t _s = vdupq_n_f16(_a); return 0; }" GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
999
1017
  if (GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
1000
1018
  add_compile_definitions(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
@@ -1023,6 +1041,9 @@ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR CMAKE_GENERATOR_PLATFORM_LWR STR
1023
1041
  # Raspberry Pi 3, 4, Zero 2 (32-bit)
1024
1042
  list(APPEND ARCH_FLAGS -mno-unaligned-access)
1025
1043
  endif()
1044
+ if (LLAMA_SVE)
1045
+ list(APPEND ARCH_FLAGS -march=armv8.6-a+sve)
1046
+ endif()
1026
1047
  endif()
1027
1048
  elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
1028
1049
  (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
@@ -1047,6 +1068,10 @@ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LW
1047
1068
  add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>)
1048
1069
  add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
1049
1070
  endif()
1071
+ if (LLAMA_AVX512_BF16)
1072
+ add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512BF16__>)
1073
+ add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512BF16__>)
1074
+ endif()
1050
1075
  elseif (LLAMA_AVX2)
1051
1076
  list(APPEND ARCH_FLAGS /arch:AVX2)
1052
1077
  elseif (LLAMA_AVX)
@@ -1078,6 +1103,9 @@ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LW
1078
1103
  if (LLAMA_AVX512_VNNI)
1079
1104
  list(APPEND ARCH_FLAGS -mavx512vnni)
1080
1105
  endif()
1106
+ if (LLAMA_AVX512_BF16)
1107
+ list(APPEND ARCH_FLAGS -mavx512bf16)
1108
+ endif()
1081
1109
  endif()
1082
1110
  elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
1083
1111
  message(STATUS "PowerPC detected")
@@ -1087,6 +1115,17 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
1087
1115
  list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
1088
1116
  #TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
1089
1117
  endif()
1118
+ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
1119
+ message(STATUS "loongarch64 detected")
1120
+
1121
+ list(APPEND ARCH_FLAGS -march=loongarch64)
1122
+ if (LLAMA_LASX)
1123
+ list(APPEND ARCH_FLAGS -mlasx)
1124
+ endif()
1125
+ if (LLAMA_LSX)
1126
+ list(APPEND ARCH_FLAGS -mlsx)
1127
+ endif()
1128
+
1090
1129
  else()
1091
1130
  message(STATUS "Unknown architecture")
1092
1131
  endif()
@@ -1175,7 +1214,7 @@ add_library(ggml OBJECT
1175
1214
  ${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
1176
1215
  ${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
1177
1216
  ${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
1178
- ${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI}
1217
+ ${GGML_SOURCES_RPC} ${GGML_HEADERS_RPC}
1179
1218
  ${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
1180
1219
  ${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
1181
1220
  ${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
@@ -1262,7 +1301,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake
1262
1301
 
1263
1302
  set(GGML_PUBLIC_HEADERS "ggml.h" "ggml-alloc.h" "ggml-backend.h"
1264
1303
  "${GGML_HEADERS_CUDA}" "${GGML_HEADERS_OPENCL}"
1265
- "${GGML_HEADERS_METAL}" "${GGML_HEADERS_MPI}" "${GGML_HEADERS_EXTRA}")
1304
+ "${GGML_HEADERS_METAL}" "${GGML_HEADERS_EXTRA}")
1266
1305
 
1267
1306
  set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
1268
1307
  install(TARGETS ggml PUBLIC_HEADER)
@@ -1281,17 +1320,6 @@ install(
1281
1320
  WORLD_READ
1282
1321
  WORLD_EXECUTE
1283
1322
  DESTINATION ${CMAKE_INSTALL_BINDIR})
1284
- install(
1285
- FILES convert-lora-to-ggml.py
1286
- PERMISSIONS
1287
- OWNER_READ
1288
- OWNER_WRITE
1289
- OWNER_EXECUTE
1290
- GROUP_READ
1291
- GROUP_EXECUTE
1292
- WORLD_READ
1293
- WORLD_EXECUTE
1294
- DESTINATION ${CMAKE_INSTALL_BINDIR})
1295
1323
  if (LLAMA_METAL)
1296
1324
  install(
1297
1325
  FILES ggml-metal.metal
@@ -0,0 +1,16 @@
1
+ set( CMAKE_SYSTEM_NAME Windows )
2
+ set( CMAKE_SYSTEM_PROCESSOR arm64 )
3
+
4
+ set( target arm64-pc-windows-msvc )
5
+
6
+ set( CMAKE_C_COMPILER clang )
7
+ set( CMAKE_CXX_COMPILER clang++ )
8
+
9
+ set( CMAKE_C_COMPILER_TARGET ${target} )
10
+ set( CMAKE_CXX_COMPILER_TARGET ${target} )
11
+
12
+ set( arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast" )
13
+ set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments" )
14
+
15
+ set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
16
+ set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
@@ -0,0 +1,6 @@
1
+ set( CMAKE_SYSTEM_NAME Windows )
2
+ set( CMAKE_SYSTEM_PROCESSOR arm64 )
3
+
4
+ set( target arm64-pc-windows-msvc )
5
+ set( CMAKE_C_COMPILER_TARGET ${target} )
6
+ set( CMAKE_CXX_COMPILER_TARGET ${target} )