@fugood/llama.node 1.3.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/package.json +14 -14
  2. package/scripts/llama.cpp.patch +8 -8
  3. package/src/llama.cpp/common/CMakeLists.txt +2 -0
  4. package/src/llama.cpp/common/arg.cpp +44 -999
  5. package/src/llama.cpp/common/arg.h +2 -2
  6. package/src/llama.cpp/common/chat.cpp +17 -2
  7. package/src/llama.cpp/common/common.cpp +33 -0
  8. package/src/llama.cpp/common/common.h +15 -1
  9. package/src/llama.cpp/common/download.cpp +1054 -0
  10. package/src/llama.cpp/common/download.h +55 -0
  11. package/src/llama.cpp/ggml/CMakeLists.txt +1 -1
  12. package/src/llama.cpp/ggml/include/ggml.h +2 -0
  13. package/src/llama.cpp/ggml/src/CMakeLists.txt +6 -3
  14. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +29 -11
  15. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +428 -26
  16. package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +4 -5
  17. package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +108 -49
  18. package/src/llama.cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  19. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +3 -1
  20. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +21 -21
  21. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +172 -75
  22. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +0 -4
  23. package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +82 -21
  24. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +25 -25
  25. package/src/llama.cpp/include/llama.h +7 -3
  26. package/src/llama.cpp/src/CMakeLists.txt +95 -0
  27. package/src/llama.cpp/src/llama-arch.cpp +108 -0
  28. package/src/llama.cpp/src/llama-arch.h +11 -0
  29. package/src/llama.cpp/src/llama-batch.cpp +63 -31
  30. package/src/llama.cpp/src/llama-batch.h +12 -1
  31. package/src/llama.cpp/src/llama-chat.cpp +32 -0
  32. package/src/llama.cpp/src/llama-chat.h +1 -0
  33. package/src/llama.cpp/src/llama-context.cpp +36 -13
  34. package/src/llama.cpp/src/llama-context.h +5 -5
  35. package/src/llama.cpp/src/llama-cparams.h +1 -0
  36. package/src/llama.cpp/src/llama-graph.cpp +3 -3
  37. package/src/llama.cpp/src/llama-hparams.cpp +11 -1
  38. package/src/llama.cpp/src/llama-hparams.h +6 -0
  39. package/src/llama.cpp/src/llama-kv-cache-iswa.cpp +3 -1
  40. package/src/llama.cpp/src/llama-kv-cache.cpp +33 -1
  41. package/src/llama.cpp/src/llama-kv-cells.h +44 -2
  42. package/src/llama.cpp/src/llama-memory-recurrent.cpp +4 -3
  43. package/src/llama.cpp/src/llama-model.cpp +320 -13171
  44. package/src/llama.cpp/src/llama-model.h +8 -0
  45. package/src/llama.cpp/src/llama-quant.cpp +1 -1
  46. package/src/llama.cpp/src/llama-vocab.cpp +5 -0
  47. package/src/llama.cpp/src/llama-vocab.h +1 -0
  48. package/src/llama.cpp/src/models/apertus.cpp +125 -0
  49. package/src/llama.cpp/src/models/arcee.cpp +135 -0
  50. package/src/llama.cpp/src/models/arctic.cpp +138 -0
  51. package/src/llama.cpp/src/models/arwkv7.cpp +86 -0
  52. package/src/llama.cpp/src/models/baichuan.cpp +122 -0
  53. package/src/llama.cpp/src/models/bailingmoe.cpp +144 -0
  54. package/src/llama.cpp/src/models/bailingmoe2.cpp +135 -0
  55. package/src/llama.cpp/src/models/bert.cpp +176 -0
  56. package/src/llama.cpp/src/models/bitnet.cpp +160 -0
  57. package/src/llama.cpp/src/models/bloom.cpp +101 -0
  58. package/src/llama.cpp/src/models/chameleon.cpp +178 -0
  59. package/src/llama.cpp/src/models/chatglm.cpp +132 -0
  60. package/src/llama.cpp/src/models/codeshell.cpp +111 -0
  61. package/src/llama.cpp/src/models/cogvlm.cpp +100 -0
  62. package/src/llama.cpp/src/models/cohere2-iswa.cpp +131 -0
  63. package/src/llama.cpp/src/models/command-r.cpp +122 -0
  64. package/src/llama.cpp/src/models/dbrx.cpp +123 -0
  65. package/src/llama.cpp/src/models/deci.cpp +135 -0
  66. package/src/llama.cpp/src/models/deepseek.cpp +144 -0
  67. package/src/llama.cpp/src/models/deepseek2.cpp +236 -0
  68. package/src/llama.cpp/src/models/dots1.cpp +134 -0
  69. package/src/llama.cpp/src/models/dream.cpp +105 -0
  70. package/src/llama.cpp/src/models/ernie4-5-moe.cpp +150 -0
  71. package/src/llama.cpp/src/models/ernie4-5.cpp +110 -0
  72. package/src/llama.cpp/src/models/exaone.cpp +114 -0
  73. package/src/llama.cpp/src/models/exaone4.cpp +123 -0
  74. package/src/llama.cpp/src/models/falcon-h1.cpp +113 -0
  75. package/src/llama.cpp/src/models/falcon.cpp +120 -0
  76. package/src/llama.cpp/src/models/gemma-embedding.cpp +120 -0
  77. package/src/llama.cpp/src/models/gemma.cpp +112 -0
  78. package/src/llama.cpp/src/models/gemma2-iswa.cpp +125 -0
  79. package/src/llama.cpp/src/models/gemma3-iswa.cpp +131 -0
  80. package/src/llama.cpp/src/models/gemma3n-iswa.cpp +377 -0
  81. package/src/llama.cpp/src/models/glm4-moe.cpp +153 -0
  82. package/src/llama.cpp/src/models/glm4.cpp +127 -0
  83. package/src/llama.cpp/src/models/gpt2.cpp +105 -0
  84. package/src/llama.cpp/src/models/gptneox.cpp +144 -0
  85. package/src/llama.cpp/src/models/granite-hybrid.cpp +196 -0
  86. package/src/llama.cpp/src/models/granite.cpp +211 -0
  87. package/src/llama.cpp/src/models/graph-context-mamba.cpp +283 -0
  88. package/src/llama.cpp/src/models/grok.cpp +159 -0
  89. package/src/llama.cpp/src/models/grovemoe.cpp +141 -0
  90. package/src/llama.cpp/src/models/hunyuan-dense.cpp +132 -0
  91. package/src/llama.cpp/src/models/hunyuan-moe.cpp +154 -0
  92. package/src/llama.cpp/src/models/internlm2.cpp +120 -0
  93. package/src/llama.cpp/src/models/jais.cpp +86 -0
  94. package/src/llama.cpp/src/models/jamba.cpp +106 -0
  95. package/src/llama.cpp/src/models/lfm2.cpp +173 -0
  96. package/src/llama.cpp/src/models/llada-moe.cpp +122 -0
  97. package/src/llama.cpp/src/models/llada.cpp +99 -0
  98. package/src/llama.cpp/src/models/llama-iswa.cpp +174 -0
  99. package/src/llama.cpp/src/models/llama.cpp +155 -0
  100. package/src/llama.cpp/src/models/mamba.cpp +55 -0
  101. package/src/llama.cpp/src/models/minicpm3.cpp +199 -0
  102. package/src/llama.cpp/src/models/minimax-m2.cpp +124 -0
  103. package/src/llama.cpp/src/models/models.h +481 -0
  104. package/src/llama.cpp/src/models/mpt.cpp +126 -0
  105. package/src/llama.cpp/src/models/nemotron-h.cpp +121 -0
  106. package/src/llama.cpp/src/models/nemotron.cpp +122 -0
  107. package/src/llama.cpp/src/models/neo-bert.cpp +104 -0
  108. package/src/llama.cpp/src/models/olmo.cpp +121 -0
  109. package/src/llama.cpp/src/models/olmo2.cpp +150 -0
  110. package/src/llama.cpp/src/models/olmoe.cpp +124 -0
  111. package/src/llama.cpp/src/models/openai-moe-iswa.cpp +124 -0
  112. package/src/llama.cpp/src/models/openelm.cpp +124 -0
  113. package/src/llama.cpp/src/models/orion.cpp +123 -0
  114. package/src/llama.cpp/src/models/pangu-embedded.cpp +121 -0
  115. package/src/llama.cpp/src/models/phi2.cpp +121 -0
  116. package/src/llama.cpp/src/models/phi3.cpp +152 -0
  117. package/src/llama.cpp/src/models/plamo.cpp +110 -0
  118. package/src/llama.cpp/src/models/plamo2.cpp +316 -0
  119. package/src/llama.cpp/src/models/plm.cpp +168 -0
  120. package/src/llama.cpp/src/models/qwen.cpp +108 -0
  121. package/src/llama.cpp/src/models/qwen2.cpp +117 -0
  122. package/src/llama.cpp/src/models/qwen2moe.cpp +151 -0
  123. package/src/llama.cpp/src/models/qwen2vl.cpp +117 -0
  124. package/src/llama.cpp/src/models/qwen3.cpp +117 -0
  125. package/src/llama.cpp/src/models/qwen3moe.cpp +124 -0
  126. package/src/llama.cpp/src/models/qwen3vl-moe.cpp +149 -0
  127. package/src/llama.cpp/src/models/qwen3vl.cpp +141 -0
  128. package/src/llama.cpp/src/models/refact.cpp +94 -0
  129. package/src/llama.cpp/src/models/rwkv6-base.cpp +162 -0
  130. package/src/llama.cpp/src/models/rwkv6.cpp +94 -0
  131. package/src/llama.cpp/src/models/rwkv6qwen2.cpp +86 -0
  132. package/src/llama.cpp/src/models/rwkv7-base.cpp +135 -0
  133. package/src/llama.cpp/src/models/rwkv7.cpp +90 -0
  134. package/src/llama.cpp/src/models/seed-oss.cpp +124 -0
  135. package/src/llama.cpp/src/models/smallthinker.cpp +120 -0
  136. package/src/llama.cpp/src/models/smollm3.cpp +128 -0
  137. package/src/llama.cpp/src/models/stablelm.cpp +146 -0
  138. package/src/llama.cpp/src/models/starcoder.cpp +100 -0
  139. package/src/llama.cpp/src/models/starcoder2.cpp +121 -0
  140. package/src/llama.cpp/src/models/t5-dec.cpp +166 -0
  141. package/src/llama.cpp/src/models/t5-enc.cpp +96 -0
  142. package/src/llama.cpp/src/models/wavtokenizer-dec.cpp +149 -0
  143. package/src/llama.cpp/src/models/xverse.cpp +108 -0
@@ -0,0 +1,55 @@
1
+ #pragma once
2
+
3
+ #include <string>
4
+
5
+ struct common_params_model;
6
+
7
+ //
8
+ // download functionalities
9
+ //
10
+
11
+ struct common_cached_model_info {
12
+ std::string manifest_path;
13
+ std::string user;
14
+ std::string model;
15
+ std::string tag;
16
+ size_t size = 0; // GGUF size in bytes
17
+ std::string to_string() const {
18
+ return user + "/" + model + ":" + tag;
19
+ }
20
+ };
21
+
22
+ struct common_hf_file_res {
23
+ std::string repo; // repo name with ":tag" removed
24
+ std::string ggufFile;
25
+ std::string mmprojFile;
26
+ };
27
+
28
+ /**
29
+ * Allow getting the HF file from the HF repo with tag (like ollama), for example:
30
+ * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
31
+ * - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M
32
+ * - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s
33
+ * Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo)
34
+ *
35
+ * Return pair of <repo, file> (with "repo" already having tag removed)
36
+ *
37
+ * Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files.
38
+ */
39
+ common_hf_file_res common_get_hf_file(
40
+ const std::string & hf_repo_with_tag,
41
+ const std::string & bearer_token,
42
+ bool offline);
43
+
44
+ // returns true if download succeeded
45
+ bool common_download_model(
46
+ const common_params_model & model,
47
+ const std::string & bearer_token,
48
+ bool offline);
49
+
50
+ // returns list of cached models
51
+ std::vector<common_cached_model_info> common_list_cached_models();
52
+
53
+ // resolve and download model from Docker registry
54
+ // return local path to downloaded model file
55
+ std::string common_docker_resolve_model(const std::string & docker);
@@ -168,7 +168,7 @@ option(GGML_RV_ZFH "ggml: enable riscv zfh" ON)
168
168
  option(GGML_RV_ZVFH "ggml: enable riscv zvfh" ON)
169
169
  option(GGML_RV_ZICBOP "ggml: enable riscv zicbop" ON)
170
170
  option(GGML_XTHEADVECTOR "ggml: enable xtheadvector" OFF)
171
- option(GGML_VXE "ggml: enable vxe" ON)
171
+ option(GGML_VXE "ggml: enable vxe" ${GGML_NATIVE})
172
172
 
173
173
  option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
174
174
  set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
@@ -242,6 +242,7 @@
242
242
  #define GGML_ROPE_TYPE_NEOX 2
243
243
  #define GGML_ROPE_TYPE_MROPE 8
244
244
  #define GGML_ROPE_TYPE_VISION 24
245
+ #define GGML_ROPE_TYPE_IMROPE 40 // binary: 101000
245
246
 
246
247
  #define GGML_MROPE_SECTIONS 4
247
248
 
@@ -2107,6 +2108,7 @@ extern "C" {
2107
2108
  enum ggml_scale_mode {
2108
2109
  GGML_SCALE_MODE_NEAREST = 0,
2109
2110
  GGML_SCALE_MODE_BILINEAR = 1,
2111
+ GGML_SCALE_MODE_BICUBIC = 2,
2110
2112
 
2111
2113
  GGML_SCALE_MODE_COUNT
2112
2114
  };
@@ -308,6 +308,10 @@ function(ggml_add_cpu_backend_variant tag_name)
308
308
  set(GGML_INTERNAL_${feat} ON)
309
309
  endforeach()
310
310
  elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
311
+ foreach (feat VXE2 NNPA)
312
+ set(GGML_INTERNAL_${feat} OFF)
313
+ endforeach()
314
+
311
315
  foreach (feat ${ARGN})
312
316
  set(GGML_INTERNAL_${feat} ON)
313
317
  endforeach()
@@ -377,9 +381,8 @@ if (GGML_CPU_ALL_VARIANTS)
377
381
  endif()
378
382
  elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
379
383
  if (CMAKE_SYSTEM_NAME MATCHES "Linux")
380
- ggml_add_cpu_backend_variant(s390x_z15 Z15 VXE)
381
- # ggml_add_cpu_backend_variant(s390x_z16 Z16 VXE)
382
- # ggml_add_cpu_backend_variant(s390x_z17 Z17 VXE)
384
+ ggml_add_cpu_backend_variant(z15 Z15 VXE2)
385
+ ggml_add_cpu_backend_variant(z16 Z16 VXE2 NNPA)
383
386
  else()
384
387
  message(FATAL_ERROR "Unsupported s390x target OS: ${CMAKE_SYSTEM_NAME}")
385
388
  endif()
@@ -126,25 +126,36 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
126
126
  )
127
127
  if (NOT ARM_MCPU_RESULT)
128
128
  string(REGEX MATCH "-mcpu=[^ ']+" ARM_MCPU_FLAG "${ARM_MCPU}")
129
+ string(REGEX MATCH "-march=[^ ']+" ARM_MARCH_FLAG "${ARM_MCPU}")
130
+
131
+ # on some old GCC we need to read -march=
132
+ if (ARM_MARCH_FLAG AND NOT "${ARM_MARCH_FLAG}" STREQUAL "-march=native")
133
+ set(ARM_NATIVE_FLAG "${ARM_MARCH_FLAG}")
134
+ elseif(ARM_MCPU_FLAG AND NOT "${ARM_MCPU_FLAG}" STREQUAL "-mcpu=native")
135
+ set(ARM_NATIVE_FLAG "${ARM_MCPU_FLAG}")
136
+ endif()
129
137
  endif()
130
- if ("${ARM_MCPU_FLAG}" STREQUAL "")
131
- set(ARM_MCPU_FLAG -mcpu=native)
132
- message(STATUS "ARM -mcpu not found, -mcpu=native will be used")
138
+
139
+ if ("${ARM_NATIVE_FLAG}" STREQUAL "")
140
+ set(ARM_NATIVE_FLAG -mcpu=native)
141
+ message(WARNING "ARM -march/-mcpu not found, -mcpu=native will be used")
142
+ else()
143
+ message(STATUS "ARM detected flags: ${ARM_NATIVE_FLAG}")
133
144
  endif()
134
145
 
135
146
  include(CheckCXXSourceRuns)
136
147
 
137
148
  function(check_arm_feature tag code)
138
149
  set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
139
- set(CMAKE_REQUIRED_FLAGS "${ARM_MCPU_FLAG}+${tag}")
150
+ set(CMAKE_REQUIRED_FLAGS "${ARM_NATIVE_FLAG}+${tag}")
140
151
  check_cxx_source_runs("${code}" GGML_MACHINE_SUPPORTS_${tag})
141
152
  if (GGML_MACHINE_SUPPORTS_${tag})
142
- set(ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX}+${tag}" PARENT_SCOPE)
153
+ set(ARM_NATIVE_FLAG_FIX "${ARM_NATIVE_FLAG_FIX}+${tag}" PARENT_SCOPE)
143
154
  else()
144
- set(CMAKE_REQUIRED_FLAGS "${ARM_MCPU_FLAG}+no${tag}")
155
+ set(CMAKE_REQUIRED_FLAGS "${ARM_NATIVE_FLAG}+no${tag}")
145
156
  check_cxx_source_compiles("int main() { return 0; }" GGML_MACHINE_SUPPORTS_no${tag})
146
157
  if (GGML_MACHINE_SUPPORTS_no${tag})
147
- set(ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX}+no${tag}" PARENT_SCOPE)
158
+ set(ARM_NATIVE_FLAG_FIX "${ARM_NATIVE_FLAG_FIX}+no${tag}" PARENT_SCOPE)
148
159
  endif()
149
160
  endif()
150
161
  set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
@@ -155,7 +166,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
155
166
  check_arm_feature(sve "#include <arm_sve.h>\nint main() { svfloat32_t _a, _b; volatile svfloat32_t _c = svadd_f32_z(svptrue_b8(), _a, _b); return 0; }")
156
167
  check_arm_feature(sme "#include <arm_sme.h>\n__arm_locally_streaming int main() { __asm__ volatile(\"smstart; smstop;\"); return 0; }")
157
168
 
158
- list(APPEND ARCH_FLAGS "${ARM_MCPU_FLAG}${ARM_MCPU_FLAG_FIX}")
169
+ list(APPEND ARCH_FLAGS "${ARM_NATIVE_FLAG}${ARM_NATIVE_FLAG_FIX}")
159
170
  else()
160
171
  if (GGML_CPU_ARM_ARCH)
161
172
  list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
@@ -504,11 +515,18 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
504
515
  endforeach()
505
516
  endif()
506
517
 
507
- if (GGML_VXE OR GGML_INTERNAL_VXE)
508
- message(STATUS "VX/VXE/VXE2 enabled")
518
+ if (GGML_VXE OR GGML_INTERNAL_VXE2)
519
+ message(STATUS "VXE2 enabled")
509
520
  list(APPEND ARCH_FLAGS -mvx -mzvector)
510
- list(APPEND ARCH_DEFINITIONS GGML_VXE)
521
+ list(APPEND ARCH_DEFINITIONS GGML_USE_VXE2)
511
522
  endif()
523
+
524
+ if (GGML_INTERNAL_NNPA)
525
+ message(STATUS "NNPA enabled")
526
+ list(APPEND ARCH_DEFINITIONS GGML_USE_NNPA)
527
+ endif()
528
+
529
+ ggml_add_cpu_backend_features(${GGML_CPU_NAME} s390 ${ARCH_DEFINITIONS})
512
530
  elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "wasm")
513
531
  message(STATUS "Wasm detected")
514
532
  list (APPEND GGML_CPU_SOURCES ggml-cpu/arch/wasm/quants.c)