@fugood/llama.node 0.3.14 → 0.3.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/bin/darwin/arm64/llama-node.node +0 -0
  2. package/bin/darwin/x64/llama-node.node +0 -0
  3. package/bin/linux/arm64/llama-node.node +0 -0
  4. package/bin/linux/x64/llama-node.node +0 -0
  5. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  6. package/bin/linux-cuda/x64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  9. package/bin/win32/arm64/llama-node.node +0 -0
  10. package/bin/win32/arm64/node.lib +0 -0
  11. package/bin/win32/x64/llama-node.node +0 -0
  12. package/bin/win32/x64/node.lib +0 -0
  13. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  14. package/bin/win32-vulkan/arm64/node.lib +0 -0
  15. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/x64/node.lib +0 -0
  17. package/package.json +1 -1
  18. package/src/llama.cpp/.github/workflows/build.yml +30 -1
  19. package/src/llama.cpp/CMakeLists.txt +9 -1
  20. package/src/llama.cpp/cmake/common.cmake +2 -0
  21. package/src/llama.cpp/common/arg.cpp +20 -2
  22. package/src/llama.cpp/common/common.cpp +6 -3
  23. package/src/llama.cpp/common/speculative.cpp +4 -4
  24. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +2 -2
  25. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +1 -1
  26. package/src/llama.cpp/examples/embedding/embedding.cpp +1 -1
  27. package/src/llama.cpp/examples/gritlm/gritlm.cpp +2 -2
  28. package/src/llama.cpp/examples/imatrix/imatrix.cpp +1 -1
  29. package/src/llama.cpp/examples/infill/infill.cpp +2 -2
  30. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +2 -2
  31. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +4 -4
  32. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +1 -1
  33. package/src/llama.cpp/examples/lookahead/lookahead.cpp +6 -6
  34. package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
  35. package/src/llama.cpp/examples/main/main.cpp +6 -6
  36. package/src/llama.cpp/examples/parallel/parallel.cpp +5 -5
  37. package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
  38. package/src/llama.cpp/examples/perplexity/perplexity.cpp +6 -6
  39. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -2
  40. package/src/llama.cpp/examples/retrieval/retrieval.cpp +1 -1
  41. package/src/llama.cpp/examples/run/run.cpp +91 -46
  42. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +2 -2
  43. package/src/llama.cpp/examples/server/server.cpp +37 -15
  44. package/src/llama.cpp/examples/server/utils.hpp +3 -1
  45. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
  46. package/src/llama.cpp/examples/speculative/speculative.cpp +14 -14
  47. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
  48. package/src/llama.cpp/examples/tts/tts.cpp +20 -9
  49. package/src/llama.cpp/ggml/CMakeLists.txt +1 -0
  50. package/src/llama.cpp/ggml/cmake/common.cmake +26 -0
  51. package/src/llama.cpp/ggml/include/ggml.h +24 -0
  52. package/src/llama.cpp/ggml/src/CMakeLists.txt +10 -28
  53. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +6 -2
  54. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -5
  55. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +15 -7
  56. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +1493 -12
  57. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +150 -1
  58. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +284 -29
  59. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +2 -1
  60. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -1
  61. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +7 -0
  62. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -4
  63. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +95 -22
  64. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +35 -12
  65. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -1
  66. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +93 -27
  67. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
  68. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +12 -13
  69. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +40 -40
  70. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +12 -43
  71. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -2
  72. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +109 -40
  73. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -1
  74. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +19 -20
  75. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +114 -6
  76. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +6 -0
  77. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -1
  78. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +305 -0
  79. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +10 -0
  80. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +398 -158
  81. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -4
  82. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +7 -2
  83. package/src/llama.cpp/ggml/src/ggml.c +85 -2
  84. package/src/llama.cpp/include/llama.h +86 -22
  85. package/src/llama.cpp/src/CMakeLists.txt +5 -2
  86. package/src/llama.cpp/src/llama-adapter.cpp +19 -20
  87. package/src/llama.cpp/src/llama-adapter.h +11 -9
  88. package/src/llama.cpp/src/llama-arch.cpp +103 -16
  89. package/src/llama.cpp/src/llama-arch.h +18 -0
  90. package/src/llama.cpp/src/llama-batch.h +2 -2
  91. package/src/llama.cpp/src/llama-context.cpp +2253 -1222
  92. package/src/llama.cpp/src/llama-context.h +214 -77
  93. package/src/llama.cpp/src/llama-cparams.h +1 -0
  94. package/src/llama.cpp/src/llama-graph.cpp +1662 -0
  95. package/src/llama.cpp/src/llama-graph.h +574 -0
  96. package/src/llama.cpp/src/llama-hparams.cpp +8 -0
  97. package/src/llama.cpp/src/llama-hparams.h +9 -0
  98. package/src/llama.cpp/src/llama-io.cpp +15 -0
  99. package/src/llama.cpp/src/llama-io.h +35 -0
  100. package/src/llama.cpp/src/llama-kv-cache.cpp +1006 -291
  101. package/src/llama.cpp/src/llama-kv-cache.h +178 -110
  102. package/src/llama.cpp/src/llama-memory.cpp +1 -0
  103. package/src/llama.cpp/src/llama-memory.h +21 -0
  104. package/src/llama.cpp/src/llama-model.cpp +8244 -173
  105. package/src/llama.cpp/src/llama-model.h +34 -1
  106. package/src/llama.cpp/src/llama-quant.cpp +10 -1
  107. package/src/llama.cpp/src/llama.cpp +51 -9984
  108. package/src/llama.cpp/tests/test-backend-ops.cpp +145 -23
  109. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +0 -143
  110. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +0 -9
@@ -1,4 +1,5 @@
1
1
  include(CheckCXXCompilerFlag)
2
+ include("../cmake/common.cmake")
2
3
 
3
4
  add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES})
4
5
 
@@ -24,33 +25,6 @@ if (NOT MSVC)
24
25
  endif()
25
26
  endif()
26
27
 
27
- function(ggml_get_flags CCID CCVER)
28
- set(C_FLAGS "")
29
- set(CXX_FLAGS "")
30
-
31
- if (CCID MATCHES "Clang")
32
- set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return)
33
- set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi)
34
-
35
- if (
36
- (CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
37
- (CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
38
- )
39
- list(APPEND C_FLAGS -Wdouble-promotion)
40
- endif()
41
- elseif (CCID STREQUAL "GNU")
42
- set(C_FLAGS -Wdouble-promotion)
43
- set(CXX_FLAGS -Wno-array-bounds)
44
-
45
- if (CCVER VERSION_GREATER_EQUAL 8.1.0)
46
- list(APPEND CXX_FLAGS -Wextra-semi)
47
- endif()
48
- endif()
49
-
50
- set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE)
51
- set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
52
- endfunction()
53
-
54
28
  if (GGML_FATAL_WARNINGS)
55
29
  if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
56
30
  list(APPEND C_FLAGS -Werror)
@@ -102,7 +76,11 @@ if (GGML_CCACHE)
102
76
  set(GGML_CCACHE_VARIANT sccache)
103
77
  endif()
104
78
  # TODO: should not be set globally
105
- set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${GGML_CCACHE_VARIANT}")
79
+ if (GGML_SYCL AND GGML_CCACHE_FOUND AND WIN32)
80
+ set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "ccache compiler_type=icl")
81
+ else ()
82
+ set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${GGML_CCACHE_VARIANT}")
83
+ endif ()
106
84
  set(ENV{CCACHE_SLOPPINESS} time_macros)
107
85
  message(STATUS "${GGML_CCACHE_VARIANT} found, compilation results will be cached. Disable with GGML_CCACHE=OFF.")
108
86
  else()
@@ -351,6 +329,10 @@ if (CMAKE_SYSTEM_NAME MATCHES "Android")
351
329
  target_link_libraries(ggml-base PRIVATE dl)
352
330
  endif()
353
331
 
332
+ if(CMAKE_SYSTEM_NAME MATCHES "visionOS")
333
+ target_compile_definitions(ggml-base PUBLIC _DARWIN_C_SOURCE)
334
+ endif()
335
+
354
336
  if (BUILD_SHARED_LIBS)
355
337
  foreach (target ggml-base ggml)
356
338
  set_target_properties(${target} PROPERTIES POSITION_INDEPENDENT_CODE ON)
@@ -2790,10 +2790,14 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
2790
2790
  (char*)output_buffer + batch1 * output_stride, ACL_FLOAT16,
2791
2791
  output_elem_size, output_ne, output_nb, 2, ACL_FORMAT_ND,
2792
2792
  output_ne_offset);
2793
+ int64_t antiquantGroupSize = 0;
2794
+ if (src0->ne[0] > QK8_0) {
2795
+ antiquantGroupSize = QK8_0;
2796
+ }
2793
2797
 
2794
2798
  ACL_CHECK(aclnnWeightQuantBatchMatmulV2GetWorkspaceSize(
2795
2799
  acl_input_tensor, acl_weight_tensor, acl_scale_tensor, nullptr,
2796
- nullptr, nullptr, nullptr, QK8_0, acl_output_tensor,
2800
+ nullptr, nullptr, nullptr, antiquantGroupSize, acl_output_tensor,
2797
2801
  &workspaceSize, &executor));
2798
2802
  if (workspaceAddr == nullptr) {
2799
2803
  workspaceAddr = workspace_allocator.alloc(workspaceSize);
@@ -2833,7 +2837,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
2833
2837
 
2834
2838
  ACL_CHECK(aclnnWeightQuantBatchMatmulV2GetWorkspaceSize(
2835
2839
  acl_input_tensor, acl_weight_tensor, acl_scale_tensor,
2836
- nullptr, nullptr, nullptr, nullptr, QK8_0,
2840
+ nullptr, nullptr, nullptr, nullptr, antiquantGroupSize,
2837
2841
  acl_output_tensor, &workspaceSize, &executor));
2838
2842
  ACL_CHECK(aclnnWeightQuantBatchMatmulV2(
2839
2843
  workspaceAddr, workspaceSize, executor, ctx.stream()));
@@ -1689,11 +1689,6 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
1689
1689
  case GGML_OP_MUL_MAT: {
1690
1690
  switch (op->src[0]->type) {
1691
1691
  case GGML_TYPE_Q8_0:
1692
- // Current groupsize should not be greater than k-1 in
1693
- // aclnnWeightQuantBatchMatmulV2GetWorkspaceSize
1694
- if (op->src[0]->ne[0] <= QK8_0) {
1695
- return false;
1696
- }
1697
1692
  case GGML_TYPE_F16:
1698
1693
  case GGML_TYPE_F32:
1699
1694
  case GGML_TYPE_Q4_0:
@@ -287,17 +287,25 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
287
287
  endif()
288
288
  endif()
289
289
  endif()
290
- elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
290
+ elseif ("${CMAKE_SYSTEM_PROCESSOR} " STREQUAL "ppc64le " OR "${CMAKE_SYSTEM_PROCESSOR} " STREQUAL "powerpc ")
291
291
  message(STATUS "PowerPC detected")
292
- execute_process(COMMAND bash -c "grep POWER /proc/cpuinfo | head -n 1" OUTPUT_VARIABLE POWER_M)
293
- if (${POWER_M} MATCHES "POWER10")
294
- list(APPEND ARCH_FLAGS -mcpu=power10)
295
- elseif (${POWER_M} MATCHES "POWER9")
296
- list(APPEND ARCH_FLAGS -mcpu=power9)
292
+ if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
293
+ file(READ "/proc/cpuinfo" POWER10_M)
294
+ elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "powerpc")
295
+ execute_process(COMMAND bash -c "prtconf |grep 'Implementation' | head -n 1" OUTPUT_VARIABLE POWER10_M)
296
+ endif()
297
+
298
+ string(REGEX MATCHALL "POWER *([0-9]+)" MATCHED_STRING "${POWER10_M}")
299
+ string(REGEX REPLACE "POWER *([0-9]+)" "\\1" EXTRACTED_NUMBER "${MATCHED_STRING}")
300
+
301
+ if (EXTRACTED_NUMBER GREATER_EQUAL 10)
302
+ list(APPEND ARCH_FLAGS -mcpu=power10 -mpowerpc64)
303
+ elseif (EXTRACTED_NUMBER EQUAL 9)
304
+ list(APPEND ARCH_FLAGS -mcpu=power9 -mpowerpc64)
297
305
  elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
298
306
  list(APPEND ARCH_FLAGS -mcpu=powerpc64le -mtune=native)
299
307
  else()
300
- list(APPEND ARCH_FLAGS -mcpu=powerpc64 -mtune=native)
308
+ list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
301
309
  endif()
302
310
  elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
303
311
  message(STATUS "loongarch64 detected")