@fugood/llama.node 0.6.3 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/CMakeLists.txt +40 -30
  2. package/README.md +4 -1
  3. package/lib/binding.js +41 -29
  4. package/lib/binding.ts +26 -25
  5. package/package.json +45 -7
  6. package/scripts/build.js +47 -0
  7. package/scripts/llama.cpp.patch +109 -0
  8. package/src/anyascii.c +22223 -0
  9. package/src/anyascii.h +42 -0
  10. package/src/tts_utils.cpp +20 -7
  11. package/src/tts_utils.h +2 -0
  12. package/bin/darwin/arm64/llama-node.node +0 -0
  13. package/bin/darwin/x64/llama-node.node +0 -0
  14. package/bin/linux/arm64/llama-node.node +0 -0
  15. package/bin/linux/x64/llama-node.node +0 -0
  16. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  17. package/bin/linux-cuda/x64/llama-node.node +0 -0
  18. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  19. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  20. package/bin/win32/x64/llama-node.node +0 -0
  21. package/bin/win32/x64/node.lib +0 -0
  22. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  23. package/bin/win32-vulkan/arm64/node.lib +0 -0
  24. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  25. package/bin/win32-vulkan/x64/node.lib +0 -0
  26. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +0 -233
  27. package/src/llama.cpp/.github/workflows/build.yml +0 -1078
  28. package/src/llama.cpp/.github/workflows/close-issue.yml +0 -28
  29. package/src/llama.cpp/.github/workflows/docker.yml +0 -178
  30. package/src/llama.cpp/.github/workflows/editorconfig.yml +0 -29
  31. package/src/llama.cpp/.github/workflows/gguf-publish.yml +0 -44
  32. package/src/llama.cpp/.github/workflows/labeler.yml +0 -17
  33. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +0 -33
  34. package/src/llama.cpp/.github/workflows/python-lint.yml +0 -30
  35. package/src/llama.cpp/.github/workflows/python-type-check.yml +0 -40
  36. package/src/llama.cpp/.github/workflows/release.yml +0 -739
  37. package/src/llama.cpp/.github/workflows/server.yml +0 -237
  38. package/src/llama.cpp/.github/workflows/winget.yml +0 -42
  39. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +0 -16
  40. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +0 -16
  41. package/src/llama.cpp/cmake/build-info.cmake +0 -64
  42. package/src/llama.cpp/cmake/common.cmake +0 -35
  43. package/src/llama.cpp/cmake/git-vars.cmake +0 -22
  44. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -5
  45. package/src/llama.cpp/common/build-info.cpp.in +0 -4
  46. package/src/llama.cpp/docs/build.md +0 -561
  47. package/src/llama.cpp/examples/CMakeLists.txt +0 -43
  48. package/src/llama.cpp/examples/batched/CMakeLists.txt +0 -5
  49. package/src/llama.cpp/examples/batched/batched.cpp +0 -246
  50. package/src/llama.cpp/examples/chat-13B.bat +0 -57
  51. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -5
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -941
  53. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +0 -35
  54. package/src/llama.cpp/examples/embedding/CMakeLists.txt +0 -5
  55. package/src/llama.cpp/examples/embedding/embedding.cpp +0 -323
  56. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +0 -10
  57. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +0 -194
  58. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +0 -5
  59. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +0 -83
  60. package/src/llama.cpp/examples/gguf/CMakeLists.txt +0 -5
  61. package/src/llama.cpp/examples/gguf/gguf.cpp +0 -265
  62. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +0 -22
  63. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +0 -46
  64. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +0 -295
  65. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +0 -52
  66. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +0 -221
  67. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +0 -24
  68. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +0 -42
  69. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +0 -7093
  70. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +0 -694
  71. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +0 -5
  72. package/src/llama.cpp/examples/gritlm/gritlm.cpp +0 -229
  73. package/src/llama.cpp/examples/jeopardy/questions.txt +0 -100
  74. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -65
  75. package/src/llama.cpp/examples/llama.android/build.gradle.kts +0 -6
  76. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +0 -71
  77. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +0 -53
  78. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +0 -452
  79. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +0 -18
  80. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +0 -5
  81. package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -472
  82. package/src/llama.cpp/examples/lookup/CMakeLists.txt +0 -23
  83. package/src/llama.cpp/examples/lookup/lookup-create.cpp +0 -40
  84. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +0 -47
  85. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -157
  86. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -242
  87. package/src/llama.cpp/examples/parallel/CMakeLists.txt +0 -5
  88. package/src/llama.cpp/examples/parallel/parallel.cpp +0 -492
  89. package/src/llama.cpp/examples/passkey/CMakeLists.txt +0 -5
  90. package/src/llama.cpp/examples/passkey/passkey.cpp +0 -277
  91. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +0 -5
  92. package/src/llama.cpp/examples/retrieval/retrieval.cpp +0 -304
  93. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -5
  94. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -246
  95. package/src/llama.cpp/examples/simple/CMakeLists.txt +0 -5
  96. package/src/llama.cpp/examples/simple/simple.cpp +0 -206
  97. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +0 -5
  98. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +0 -206
  99. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +0 -11
  100. package/src/llama.cpp/examples/speculative/CMakeLists.txt +0 -5
  101. package/src/llama.cpp/examples/speculative/speculative.cpp +0 -644
  102. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +0 -5
  103. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +0 -261
  104. package/src/llama.cpp/examples/sycl/CMakeLists.txt +0 -9
  105. package/src/llama.cpp/examples/sycl/build.sh +0 -23
  106. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +0 -13
  107. package/src/llama.cpp/examples/sycl/run-llama2.sh +0 -27
  108. package/src/llama.cpp/examples/sycl/run-llama3.sh +0 -28
  109. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +0 -33
  110. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +0 -9
  111. package/src/llama.cpp/examples/sycl/win-run-llama3.bat +0 -9
  112. package/src/llama.cpp/examples/training/CMakeLists.txt +0 -5
  113. package/src/llama.cpp/examples/training/finetune.cpp +0 -96
  114. package/src/llama.cpp/ggml/cmake/GitVars.cmake +0 -22
  115. package/src/llama.cpp/ggml/cmake/common.cmake +0 -26
  116. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1042
  117. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -255
  118. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -586
  119. package/src/llama.cpp/ggml/src/ggml-backend.cpp +0 -2008
  120. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +0 -87
  121. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +0 -517
  122. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -74
  123. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +0 -179
  124. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +0 -258
  125. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +0 -2863
  126. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +0 -1110
  127. package/src/llama.cpp/ggml/src/ggml-cann/common.h +0 -420
  128. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -2570
  129. package/src/llama.cpp/ggml/src/ggml-common.h +0 -1857
  130. package/src/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +0 -100
  131. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +0 -184
  132. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +0 -15
  133. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +0 -243
  134. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +0 -140
  135. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -131
  136. package/src/llama.cpp/ggml/src/ggml-impl.h +0 -601
  137. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  138. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  139. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +0 -120
  140. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +0 -622
  141. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -113
  142. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +0 -96
  143. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -5124
  144. package/src/llama.cpp/ggml/src/ggml-opt.cpp +0 -1037
  145. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -5232
  146. package/src/llama.cpp/ggml/src/ggml-quants.h +0 -100
  147. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +0 -9
  148. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +0 -1813
  149. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +0 -189
  150. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +0 -37
  151. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +0 -239
  152. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +0 -39
  153. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -83
  154. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +0 -493
  155. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +0 -197
  156. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +0 -20
  157. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +0 -100
  158. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +0 -20
  159. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +0 -623
  160. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +0 -34
  161. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +0 -701
  162. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +0 -11
  163. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +0 -791
  164. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +0 -1160
  165. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +0 -27
  166. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +0 -2957
  167. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -1536
  168. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +0 -75
  169. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +0 -99
  170. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +0 -311
  171. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +0 -20
  172. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +0 -4443
  173. package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +0 -105
  174. package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +0 -8
  175. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +0 -136
  176. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +0 -21
  177. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -3030
  178. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +0 -33
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +0 -1108
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +0 -27
  181. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +0 -474
  182. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +0 -26
  183. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +0 -46
  184. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +0 -10
  185. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +0 -74
  186. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +0 -83
  187. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +0 -362
  188. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +0 -20
  189. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +0 -264
  190. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +0 -20
  191. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +0 -13
  192. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +0 -23
  193. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +0 -73
  194. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +0 -20
  195. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +0 -1215
  196. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +0 -305
  197. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +0 -10
  198. package/src/llama.cpp/ggml/src/ggml-threading.cpp +0 -12
  199. package/src/llama.cpp/ggml/src/ggml-threading.h +0 -14
  200. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +0 -196
  201. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +0 -10699
  202. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -39
  203. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +0 -751
  204. package/src/llama.cpp/ggml/src/ggml.c +0 -6550
  205. package/src/llama.cpp/ggml/src/gguf.cpp +0 -1330
  206. package/src/llama.cpp/models/.editorconfig +0 -1
  207. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  208. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  209. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  210. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
  211. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
  212. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  213. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  214. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  215. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
  216. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
  217. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  218. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
  219. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
  220. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  221. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
  222. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
  223. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  224. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  225. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  226. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
  227. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
  228. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  229. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
  230. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
  231. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  232. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  233. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  234. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  235. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
  236. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
  237. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  238. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
  239. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
  240. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  241. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  242. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  243. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
  245. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  246. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
  248. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  249. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  250. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
  252. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
  253. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
  255. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
  256. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  257. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
  258. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  259. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
  260. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
  261. package/src/llama.cpp/pocs/CMakeLists.txt +0 -14
  262. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +0 -9
  263. package/src/llama.cpp/pocs/vdot/q8dot.cpp +0 -173
  264. package/src/llama.cpp/pocs/vdot/vdot.cpp +0 -311
  265. package/src/llama.cpp/prompts/LLM-questions.txt +0 -49
  266. package/src/llama.cpp/prompts/alpaca.txt +0 -1
  267. package/src/llama.cpp/prompts/assistant.txt +0 -31
  268. package/src/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
  269. package/src/llama.cpp/prompts/chat-with-bob.txt +0 -7
  270. package/src/llama.cpp/prompts/chat-with-qwen.txt +0 -1
  271. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
  272. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
  273. package/src/llama.cpp/prompts/chat.txt +0 -28
  274. package/src/llama.cpp/prompts/dan-modified.txt +0 -1
  275. package/src/llama.cpp/prompts/dan.txt +0 -1
  276. package/src/llama.cpp/prompts/mnemonics.txt +0 -93
  277. package/src/llama.cpp/prompts/parallel-questions.txt +0 -43
  278. package/src/llama.cpp/prompts/reason-act.txt +0 -18
  279. package/src/llama.cpp/requirements/requirements-all.txt +0 -15
  280. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +0 -2
  281. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +0 -7
  282. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +0 -7
  283. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +0 -5
  284. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +0 -1
  285. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +0 -4
  286. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +0 -3
  287. package/src/llama.cpp/requirements/requirements-pydantic.txt +0 -3
  288. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +0 -1
  289. package/src/llama.cpp/requirements/requirements-tool_bench.txt +0 -12
  290. package/src/llama.cpp/requirements.txt +0 -13
  291. package/src/llama.cpp/scripts/build-info.sh +0 -30
  292. package/src/llama.cpp/scripts/install-oneapi.bat +0 -19
  293. package/src/llama.cpp/scripts/xxd.cmake +0 -16
  294. package/src/llama.cpp/tests/CMakeLists.txt +0 -177
  295. package/src/llama.cpp/tests/get-model.cpp +0 -21
  296. package/src/llama.cpp/tests/get-model.h +0 -2
  297. package/src/llama.cpp/tests/test-arg-parser.cpp +0 -178
  298. package/src/llama.cpp/tests/test-autorelease.cpp +0 -24
  299. package/src/llama.cpp/tests/test-backend-ops.cpp +0 -4793
  300. package/src/llama.cpp/tests/test-barrier.cpp +0 -94
  301. package/src/llama.cpp/tests/test-c.c +0 -7
  302. package/src/llama.cpp/tests/test-chat-template.cpp +0 -417
  303. package/src/llama.cpp/tests/test-chat.cpp +0 -985
  304. package/src/llama.cpp/tests/test-double-float.cpp +0 -57
  305. package/src/llama.cpp/tests/test-gbnf-validator.cpp +0 -109
  306. package/src/llama.cpp/tests/test-gguf.cpp +0 -1338
  307. package/src/llama.cpp/tests/test-grammar-integration.cpp +0 -1308
  308. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +0 -1201
  309. package/src/llama.cpp/tests/test-grammar-parser.cpp +0 -519
  310. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +0 -1304
  311. package/src/llama.cpp/tests/test-llama-grammar.cpp +0 -408
  312. package/src/llama.cpp/tests/test-log.cpp +0 -39
  313. package/src/llama.cpp/tests/test-model-load-cancel.cpp +0 -27
  314. package/src/llama.cpp/tests/test-mtmd-c-api.c +0 -63
  315. package/src/llama.cpp/tests/test-opt.cpp +0 -904
  316. package/src/llama.cpp/tests/test-quantize-fns.cpp +0 -186
  317. package/src/llama.cpp/tests/test-quantize-perf.cpp +0 -365
  318. package/src/llama.cpp/tests/test-quantize-stats.cpp +0 -424
  319. package/src/llama.cpp/tests/test-regex-partial.cpp +0 -288
  320. package/src/llama.cpp/tests/test-rope.cpp +0 -262
  321. package/src/llama.cpp/tests/test-sampling.cpp +0 -399
  322. package/src/llama.cpp/tests/test-tokenizer-0.cpp +0 -312
  323. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -155
  324. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +0 -125
  325. package/src/llama.cpp/tools/CMakeLists.txt +0 -39
  326. package/src/llama.cpp/tools/batched-bench/CMakeLists.txt +0 -5
  327. package/src/llama.cpp/tools/batched-bench/batched-bench.cpp +0 -204
  328. package/src/llama.cpp/tools/cvector-generator/CMakeLists.txt +0 -5
  329. package/src/llama.cpp/tools/cvector-generator/completions.txt +0 -582
  330. package/src/llama.cpp/tools/cvector-generator/cvector-generator.cpp +0 -508
  331. package/src/llama.cpp/tools/cvector-generator/mean.hpp +0 -48
  332. package/src/llama.cpp/tools/cvector-generator/negative.txt +0 -4
  333. package/src/llama.cpp/tools/cvector-generator/pca.hpp +0 -315
  334. package/src/llama.cpp/tools/cvector-generator/positive.txt +0 -4
  335. package/src/llama.cpp/tools/export-lora/CMakeLists.txt +0 -5
  336. package/src/llama.cpp/tools/export-lora/export-lora.cpp +0 -434
  337. package/src/llama.cpp/tools/gguf-split/CMakeLists.txt +0 -5
  338. package/src/llama.cpp/tools/gguf-split/gguf-split.cpp +0 -583
  339. package/src/llama.cpp/tools/imatrix/CMakeLists.txt +0 -5
  340. package/src/llama.cpp/tools/imatrix/imatrix.cpp +0 -667
  341. package/src/llama.cpp/tools/llama-bench/CMakeLists.txt +0 -5
  342. package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +0 -2024
  343. package/src/llama.cpp/tools/main/CMakeLists.txt +0 -5
  344. package/src/llama.cpp/tools/main/main.cpp +0 -977
  345. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +0 -58
  346. package/src/llama.cpp/tools/mtmd/clip-impl.h +0 -462
  347. package/src/llama.cpp/tools/mtmd/clip.cpp +0 -4024
  348. package/src/llama.cpp/tools/mtmd/clip.h +0 -101
  349. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +0 -22
  350. package/src/llama.cpp/tools/mtmd/miniaudio.h +0 -93468
  351. package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +0 -855
  352. package/src/llama.cpp/tools/mtmd/mtmd-audio.h +0 -62
  353. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +0 -377
  354. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +0 -297
  355. package/src/llama.cpp/tools/mtmd/mtmd.cpp +0 -942
  356. package/src/llama.cpp/tools/mtmd/mtmd.h +0 -362
  357. package/src/llama.cpp/tools/mtmd/requirements.txt +0 -5
  358. package/src/llama.cpp/tools/perplexity/CMakeLists.txt +0 -5
  359. package/src/llama.cpp/tools/perplexity/perplexity.cpp +0 -2063
  360. package/src/llama.cpp/tools/quantize/CMakeLists.txt +0 -6
  361. package/src/llama.cpp/tools/quantize/quantize.cpp +0 -519
  362. package/src/llama.cpp/tools/rpc/CMakeLists.txt +0 -4
  363. package/src/llama.cpp/tools/rpc/rpc-server.cpp +0 -322
  364. package/src/llama.cpp/tools/run/CMakeLists.txt +0 -16
  365. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.cpp +0 -1995
  366. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.h +0 -137
  367. package/src/llama.cpp/tools/run/run.cpp +0 -1261
  368. package/src/llama.cpp/tools/server/CMakeLists.txt +0 -51
  369. package/src/llama.cpp/tools/server/bench/requirements.txt +0 -2
  370. package/src/llama.cpp/tools/server/httplib.h +0 -10506
  371. package/src/llama.cpp/tools/server/server.cpp +0 -4966
  372. package/src/llama.cpp/tools/server/tests/requirements.txt +0 -8
  373. package/src/llama.cpp/tools/server/utils.hpp +0 -1337
  374. package/src/llama.cpp/tools/tokenize/CMakeLists.txt +0 -5
  375. package/src/llama.cpp/tools/tokenize/tokenize.cpp +0 -416
  376. package/src/llama.cpp/tools/tts/CMakeLists.txt +0 -5
  377. package/src/llama.cpp/tools/tts/tts.cpp +0 -1092
@@ -1,508 +0,0 @@
1
- #include "ggml.h"
2
- #include "gguf.h"
3
-
4
- #include "arg.h"
5
- #include "common.h"
6
- #include "llama.h"
7
- #include "pca.hpp"
8
- #include "mean.hpp"
9
-
10
- #ifdef GGML_USE_CUDA
11
- #include "ggml-cuda.h"
12
- #endif
13
-
14
- #ifdef GGML_USE_METAL
15
- #include "ggml-metal.h"
16
- #endif
17
-
18
- #include <algorithm>
19
- #include <climits>
20
- #include <cstdio>
21
- #include <cstring>
22
- #include <fstream>
23
- #include <iostream>
24
- #include <string>
25
- #include <tuple>
26
- #include <vector>
27
-
28
-
29
- //////////////////////////////////////////////////
30
- // utils
31
-
32
- template <class Iter>
33
- static std::string tokens_to_str(llama_context * ctx, Iter begin, Iter end) {
34
- std::string ret;
35
- for (; begin != end; ++begin) {
36
- ret += common_token_to_piece(ctx, *begin);
37
- }
38
-
39
- return ret;
40
- }
41
-
42
- static void print_usage(int, char ** argv) {
43
- printf("\nexample usage:\n");
44
- printf("\n CPU only: %s -m ./llama-3.Q4_K_M.gguf\n", argv[0]);
45
- printf("\n with GPU: %s -m ./llama-3.Q4_K_M.gguf -ngl 99\n", argv[0]);
46
- printf("\n advanced: %s -m ./llama-3.Q4_K_M.gguf -ngl 99 --pca-iter 2000 --pca-batch 100\n", argv[0]);
47
- printf("\n using mean: %s -m ./llama-3.Q4_K_M.gguf --method mean\n", argv[0]);
48
- printf("\n");
49
- }
50
-
51
- //////////////////////////////////////////////////
52
-
53
-
54
- // cb_eval is reused for each pair of positive - negative prompt
55
- struct callback_data {
56
- ggml_context * ctx_ggml = nullptr; // holds v_pos, v_neg, v_diff_filtered
57
-
58
- int n_layers = 0;
59
- int n_tokens = 0;
60
- bool is_eval_pos = true;
61
-
62
- // each element of the vector correspond to one layer
63
- std::vector<struct ggml_tensor *> v_pos; // vector of matrices of size [n_embd, n_tokens]
64
- std::vector<struct ggml_tensor *> v_neg; // vector of matrices of size [n_embd, n_tokens]
65
- std::vector<struct ggml_tensor *> v_diff_filtered; // vector of matrices of size [n_embd, n_nonzero_rows]. NOTE: n_nonzero_rows maybe different for each layer
66
-
67
- // save a tensor into either v_pos or v_neg (decided by is_eval_pos)
68
- void save_tensor_for_layer(struct ggml_tensor * t) {
69
- GGML_ASSERT(t->type == GGML_TYPE_F32);
70
-
71
- if (ctx_ggml == nullptr) {
72
- // alloc a new ctx_ggml if needed
73
- struct ggml_init_params params_ggml = {
74
- /*.mem_size =*/ ggml_tensor_overhead() * n_layers * 3u,
75
- /*.mem_buffer =*/ NULL,
76
- /*.no_alloc =*/ true,
77
- };
78
- ctx_ggml = ggml_init(params_ggml);
79
- }
80
-
81
- // copy tensor data
82
- auto n_bytes = ggml_nbytes(t);
83
- struct ggml_tensor * t_layer = ggml_new_tensor_2d(ctx_ggml, t->type, t->ne[0], t->ne[1]);
84
- t_layer->data = malloc(n_bytes); // TODO @ngxson : get rid of this malloc somehow
85
- ggml_backend_tensor_get(t, t_layer->data, 0, n_bytes);
86
- ggml_set_name(t_layer, ggml_get_name(t));
87
- //print_debug_tensor(t_layer);
88
-
89
- if (is_eval_pos) {
90
- v_pos.push_back(t_layer);
91
- } else {
92
- v_neg.push_back(t_layer);
93
- }
94
- }
95
-
96
- // calculate diff (v_pos - v_neg) and place the result back to v_pos
97
- // all zero rows in the diff tensor will also be removed
98
- // NOTE: final layer is ignored. we only have (n_layers - 1) to process
99
- std::vector<struct ggml_tensor *> calc_diff() {
100
- for (float il = 0; il < v_pos.size(); il++) {
101
- float * a = (float *) v_pos[il]->data;
102
- float * b = (float *) v_neg[il]->data;
103
- size_t n_elem = ggml_nelements(v_pos[il]);
104
- for (size_t j = 0; j < n_elem; j++) {
105
- a[j] -= b[j];
106
- }
107
- //print_debug_tensor(v_pos[i]);
108
- auto diff_filtered = filter_nonzero_rows(v_pos[il]);
109
- v_diff_filtered.push_back(diff_filtered);
110
- }
111
- return v_diff_filtered; // for convinient, we return the result std::vector
112
- }
113
-
114
- // delete zero rows from a given 2D tensor
115
- struct ggml_tensor * filter_nonzero_rows(struct ggml_tensor * a) {
116
- //printf("filter_nonzero_rows\n");
117
- auto is_row_all_zeros = [](struct ggml_tensor * t, int row, float eps) -> bool {
118
- // check if given row containing all zero elements
119
- int n_cols = t->ne[0]; // hint: should be equal to n_embd
120
- for (int col = 0; col < n_cols; ++col) {
121
- if (ggml_get_f32_nd(t, col, row, 0, 0) > eps) {
122
- return false;
123
- }
124
- }
125
- return true;
126
- };
127
- std::vector<int> rows_to_copy; // the idx of non-zero cols (to be copied to row of diff_filtered)
128
- for (int i_row = 0; i_row < a->ne[1]; i_row++) {
129
- if (!is_row_all_zeros(a, i_row, 1e-6)) {
130
- rows_to_copy.push_back(i_row);
131
- }
132
- }
133
-
134
- // get "n_nonzero_rows" for the output "diff_filtered"
135
- int n_nonzero_rows = rows_to_copy.size();
136
- //printf("n_nonzero_rows: %d\n", n_nonzero_rows);
137
- int n_embd = a->ne[0];
138
- GGML_ASSERT(n_nonzero_rows > 0);
139
-
140
- // diff_filtered: [n_embd, n_nonzero_rows]
141
- struct ggml_tensor * diff_filtered = ggml_new_tensor_2d(
142
- ctx_ggml, GGML_TYPE_F32, n_embd, n_nonzero_rows);
143
- ggml_format_name(diff_filtered, "diff_filtered_%s", a->name);
144
- diff_filtered->data = malloc(ggml_nbytes(diff_filtered));
145
-
146
- // copy non-zero rows
147
- for (int dest_row = 0; dest_row < n_nonzero_rows; dest_row++) {
148
- int src_row = rows_to_copy[dest_row];
149
- for (int i = 0; i < n_embd; i++) {
150
- float src_elem = ggml_get_f32_nd(a, i, src_row, 0, 0);
151
- ggml_set_f32_nd(diff_filtered, i, dest_row, 0, 0, src_elem);
152
- }
153
- }
154
-
155
- //print_debug_tensor(diff_filtered);
156
-
157
- return diff_filtered;
158
- }
159
-
160
- // we don't implement destructor, because we want to reuse callback_data. we just want to free the tensors
161
- void reset() {
162
- for (auto ptr : v_pos) free(ptr->data);
163
- for (auto ptr : v_neg) free(ptr->data);
164
- for (auto ptr : v_diff_filtered) free(ptr->data);
165
- v_pos.clear();
166
- v_neg.clear();
167
- v_diff_filtered.clear();
168
- if (ctx_ggml) {
169
- ggml_free(ctx_ggml);
170
- }
171
- ctx_ggml = nullptr;
172
- }
173
- };
174
-
175
- /**
176
- * process_ctx is used to store the ggml context for pre-post processing the diff vectors
177
- * in short, input => v_diff and output => v_final
178
- */
179
- struct train_context {
180
- ggml_context * ctx_ggml;
181
- int n_embd;
182
- int n_layers;
183
-
184
- /* pair of prompts to be used for generating final vector */
185
- std::vector<std::string> positive_entries;
186
- std::vector<std::string> negative_entries;
187
-
188
- // each element of the vector correspond to one layer
189
- // NOTE: the last layer is discard. therefore, we will have (n_layers - 1) elements here
190
- // NOTE (2): v_diff is transposed from v_diff_tmp
191
- std::vector<struct ggml_tensor *> v_diff; // vector of matrices of size [m, n_embd] where m ~ n_tokens * n_completions (v_diff contains no zero-rows)
192
- std::vector<struct ggml_tensor *> v_final; // vector of vectors of size [n_embd] to be written to file
193
-
194
- // to easily re-alloc when concat v_diff, we temporary store v_diff in a vector instead of a tensor
195
- // v_diff_tmp will get converted unto v_diff later on
196
- std::vector<std::vector<uint8_t>> v_diff_tmp;
197
-
198
- train_context(int n_embd_, int n_layers_) {
199
- n_embd = n_embd_;
200
- n_layers = n_layers_;
201
- struct ggml_init_params params_ggml = {
202
- /*.mem_size =*/ ggml_tensor_overhead() * (n_layers - 1) * 2u,
203
- /*.mem_buffer =*/ NULL,
204
- /*.no_alloc =*/ true,
205
- };
206
- ctx_ggml = ggml_init(params_ggml);
207
- for (int il = 0; il < n_layers - 1; il++) {
208
- std::vector<uint8_t> empty;
209
- v_diff_tmp.push_back(empty);
210
- auto t = ggml_new_tensor_1d(ctx_ggml, GGML_TYPE_F32, n_embd);
211
- t->data = malloc(ggml_nbytes(t)); // TODO: get rid of malloc if possible
212
- v_final.push_back(t);
213
- }
214
- }
215
-
216
- // add new rows into existing tensor in v_diff_tmp
217
- void concat_diff_tmp(const std::vector<struct ggml_tensor *> & diff_filtered) {
218
- GGML_ASSERT((int) diff_filtered.size() == n_layers - 1);
219
- for (int il = 0; il < n_layers - 1; il++) {
220
- auto t = diff_filtered[il];
221
- auto & diff_tmp = v_diff_tmp[il];
222
- size_t curr_size = diff_tmp.size();
223
- diff_tmp.resize(curr_size + ggml_nbytes(t));
224
- memcpy(diff_tmp.data() + curr_size, t->data, ggml_nbytes(t));
225
- }
226
- }
227
-
228
- // build the v_diff tensors from v_diff_tmp (v_diff need to be transposed)
229
- // TODO @ngxson : maybe add option NOT to transpose v_diff; will be useful for "mean" method
230
- void build_v_diff(bool transpose) {
231
- printf("build_v_diff\n");
232
- for (int il = 0; il < n_layers - 1; il++) {
233
- auto & diff_tmp = v_diff_tmp[il];
234
- int n_elem = diff_tmp.size() / sizeof(float);
235
- GGML_ASSERT(n_elem % n_embd == 0);
236
- int n_rows = n_elem / n_embd;
237
- struct ggml_tensor * diff = transpose
238
- ? ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_rows, n_embd)
239
- : ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_embd, n_rows);
240
- ggml_set_name(diff, (std::string("diff_") + std::to_string(il)).c_str());
241
- diff->data = malloc(ggml_nbytes(diff)); // TODO: get rid of this malloc if possible
242
- if (transpose) {
243
- // copy data & transpose
244
- float * arr = (float *) diff_tmp.data();
245
- for (int ir = 0; ir < n_rows; ++ir) {
246
- for (int ic = 0; ic < n_embd; ++ic) {
247
- float f = arr[ir*n_embd + ic];
248
- ggml_set_f32_nd(diff, ir, ic, 0, 0, f);
249
- }
250
- }
251
- } else {
252
- // only copy
253
- memcpy(diff->data, diff_tmp.data(), ggml_nbytes(diff));
254
- }
255
- v_diff.push_back(diff);
256
- print_debug_tensor(diff);
257
- // free memory of diff_tmp
258
- diff_tmp.resize(0);
259
- }
260
- }
261
-
262
- ~train_context() {
263
- for (auto ptr : v_final) free(ptr->data);
264
- for (auto ptr : v_diff) free(ptr->data);
265
- // no need to free v_diff_tmp, since we didn't use malloc
266
- ggml_free(ctx_ggml);
267
- }
268
- };
269
-
270
- struct tokenized_prompt {
271
- std::vector<llama_token> tokens_pos;
272
- std::vector<llama_token> tokens_neg;
273
- size_t max_seq_len;
274
-
275
- tokenized_prompt(llama_context * ctx, std::string pos, std::string neg) {
276
- const llama_model * model = llama_get_model(ctx);
277
- const llama_vocab * vocab = llama_model_get_vocab(model);
278
- const bool add_bos = llama_vocab_get_add_bos(vocab);
279
- tokens_pos = common_tokenize(ctx, pos, add_bos, true);
280
- tokens_neg = common_tokenize(ctx, neg, add_bos, true);
281
- max_seq_len = std::max(tokens_pos.size(), tokens_neg.size());
282
- padding_seq(ctx, tokens_pos, max_seq_len);
283
- padding_seq(ctx, tokens_neg, max_seq_len);
284
- }
285
-
286
- void padding_seq(llama_context * ctx, std::vector<llama_token> & tokens, size_t len) {
287
- // TODO: customize padding token
288
- std::vector<llama_token> pad_tokens = common_tokenize(ctx, " ", false);
289
- llama_token pad_tok = pad_tokens.back();
290
- while (tokens.size() < len) {
291
- tokens.push_back(pad_tok);
292
- }
293
- }
294
- };
295
-
296
- //////////////////////////////////////////////////
297
-
298
- template <typename T>
299
- static std::string to_string(const T & val) {
300
- std::stringstream ss;
301
- ss << val;
302
- return ss.str();
303
- }
304
-
305
- static std::vector<std::string> ctrlvec_load_prompt_file(std::string path, bool skip_empty_lines) {
306
- std::vector<std::string> output;
307
- std::ifstream file(path);
308
- if (!file.is_open()) {
309
- fprintf(stderr, "error: unable to open file: %s\n", path.c_str());
310
- exit(1);
311
- }
312
- std::string line;
313
- while (std::getline(file, line)) {
314
- bool is_skip = skip_empty_lines && line.empty();
315
- if (!is_skip) {
316
- string_process_escapes(line);
317
- output.push_back(line);
318
- }
319
- }
320
- file.close();
321
- return output;
322
- }
323
-
324
- //////////////////////////////////////////////////
325
-
326
- static bool cb_eval(struct ggml_tensor * t, bool ask, void * user_data) {
327
- auto * cb_data = (callback_data *) user_data;
328
- static const char * l_out_name = "l_out";
329
- const bool is_l_out = strncmp(t->name, l_out_name, strlen(l_out_name)) == 0;
330
-
331
- if (ask) {
332
- return is_l_out;
333
- }
334
-
335
- if (!is_l_out || t->ne[1] != cb_data->n_tokens) {
336
- return true;
337
- }
338
-
339
- // save the tensor to current context
340
- cb_data->save_tensor_for_layer(t);
341
- return true;
342
- }
343
-
344
- static bool get_hidden_layers(llama_context * ctx, std::vector<llama_token> & tokens) {
345
- llama_kv_self_clear(ctx);
346
- if (llama_decode(ctx, llama_batch_get_one(tokens.data(), tokens.size()))) {
347
- fprintf(stderr, "%s : failed to eval\n", __func__);
348
- return false;
349
- }
350
- return true;
351
- }
352
-
353
- static void export_gguf(const std::vector<struct ggml_tensor *> & v_ctrl, const std::string fname, const std::string model_hint) {
354
- struct gguf_context * ctx = gguf_init_empty();
355
-
356
- const std::string arch = "controlvector";
357
- gguf_set_val_str(ctx, "general.architecture", arch.c_str());
358
- gguf_set_val_str(ctx, (arch + ".model_hint").c_str(), model_hint.c_str());
359
- gguf_set_val_i32(ctx, (arch + ".layer_count").c_str(), v_ctrl.size());
360
-
361
- for (size_t i = 0; i < v_ctrl.size(); ++i) {
362
- gguf_add_tensor(ctx, v_ctrl[i]);
363
- print_debug_tensor(v_ctrl[i]);
364
- printf("Added tensor: %s\n", v_ctrl[i]->name);
365
- }
366
-
367
- printf("%s: writing file...\n", __func__);
368
- gguf_write_to_file(ctx, fname.c_str(), false);
369
- printf("%s: wrote file '%s'\n", __func__, fname.c_str());
370
- gguf_free(ctx);
371
- }
372
-
373
- /**
374
- * Load prompt files and completion file.
375
- * Then format each pair of prompt + completion to make an entry.
376
- */
377
- static int prepare_entries(common_params & params, train_context & ctx_train) {
378
- // load prompts
379
- std::vector<std::string> positive_prompts = ctrlvec_load_prompt_file(params.cvector_positive_file, true);
380
- std::vector<std::string> negative_prompts = ctrlvec_load_prompt_file(params.cvector_negative_file, true);
381
- if (positive_prompts.size() != negative_prompts.size()) {
382
- fprintf(stderr, "number of positive and negative prompts must be equal\n");
383
- return 1;
384
- }
385
- if (positive_prompts.empty()) {
386
- fprintf(stderr, "must provide at least one prompt pair\n");
387
- return 1;
388
- }
389
- ctx_train.positive_entries = positive_prompts;
390
- ctx_train.negative_entries = negative_prompts;
391
- return 0;
392
- }
393
-
394
- int main(int argc, char ** argv) {
395
- common_params params;
396
-
397
- params.out_file = "control_vector.gguf";
398
-
399
- if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_CVECTOR_GENERATOR, print_usage)) {
400
- return 1;
401
- }
402
-
403
- if (params.n_pca_iterations % params.n_pca_batch != 0) {
404
- fprintf(stderr, "PCA iterations must by multiply of PCA batch size\n");
405
- return 1;
406
- }
407
-
408
-
409
- callback_data cb_data;
410
-
411
- // pass the callback to the backend scheduler
412
- // it will be executed for each node during the graph computation
413
- params.cb_eval = cb_eval;
414
- params.cb_eval_user_data = &cb_data;
415
- params.warmup = false;
416
-
417
- print_build_info();
418
- llama_backend_init();
419
- llama_numa_init(params.numa);
420
-
421
- // load the model to get hparams
422
- common_init_result llama_init = common_init_from_params(params);
423
-
424
- llama_model * model = llama_init.model.get();
425
- llama_context * ctx = llama_init.context.get();
426
-
427
- // int n_ctx = llama_n_ctx(ctx);
428
- int n_layers = llama_model_n_layer(model);
429
- int n_embd = llama_model_n_embd(model);
430
-
431
- // get model hint param (a.k.a model arch name)
432
- char model_hint[128];
433
- llama_model_meta_val_str(model, "general.architecture", model_hint, 128);
434
-
435
- // init train_context
436
- train_context ctx_train(n_embd, n_layers);
437
-
438
- // load and prepare entries for training
439
- prepare_entries(params, ctx_train);
440
-
441
- // we have to pretokenize everything because otherwise we don't know how much overhead to allocate ctx_diffs_wrapped
442
- std::vector<tokenized_prompt> tokenized_prompts;
443
- size_t n_total_tokens = 0;
444
- for (size_t i = 0; i < ctx_train.positive_entries.size(); ++i) {
445
- tokenized_prompt t(ctx, ctx_train.positive_entries[i], ctx_train.negative_entries[i]);
446
- n_total_tokens += 2 * t.max_seq_len;
447
- tokenized_prompts.push_back(std::move(t));
448
- }
449
-
450
- std::cout << "n_total_tokens: " << n_total_tokens << std::endl;
451
-
452
- for(size_t i = 0; i < ctx_train.positive_entries.size(); ++i) {
453
- bool success = false;
454
- tokenized_prompt t = tokenized_prompts[i];
455
- cb_data.n_layers = n_layers;
456
- cb_data.n_tokens = t.max_seq_len;
457
-
458
- printf("Evaluating prompt[%d/%d]: \"%s\" - \"%s\" (%d tokens)\n",
459
- (int) i+1, (int) ctx_train.positive_entries.size(),
460
- tokens_to_str(ctx, t.tokens_pos.cbegin(), t.tokens_pos.cend()).c_str(),
461
- tokens_to_str(ctx, t.tokens_neg.cbegin(), t.tokens_neg.cend()).c_str(),
462
- (int) t.max_seq_len);
463
-
464
- cb_data.is_eval_pos = true;
465
- success = get_hidden_layers(ctx, t.tokens_pos);
466
- if (!success) break;
467
-
468
- cb_data.is_eval_pos = false;
469
- success = get_hidden_layers(ctx, t.tokens_neg);
470
- if (!success) break;
471
-
472
- // calculate diff and remove all zero rows
473
- auto v_diff_filtered = cb_data.calc_diff();
474
-
475
- // save & concat the filtered v_diff to ctx_train
476
- ctx_train.concat_diff_tmp(v_diff_filtered);
477
-
478
- // reset for next iteration
479
- cb_data.reset();
480
- }
481
-
482
- // done with the model, we can now free it to make gain some memory
483
- printf("Done evaluate prompts, unload model...\n");
484
-
485
- bool use_pca = params.cvector_dimre_method == DIMRE_METHOD_PCA;
486
-
487
- // prepare ctx_train for PCA
488
- ctx_train.build_v_diff(use_pca);
489
-
490
- if (use_pca) {
491
- // run PCA
492
- PCA::pca_params pca_params;
493
- pca_params.n_threads = params.cpuparams.n_threads;
494
- pca_params.n_batch = params.n_pca_batch;
495
- pca_params.n_iterations = params.n_pca_iterations;
496
- PCA::run_pca(pca_params, ctx_train.v_diff, ctx_train.v_final);
497
- } else {
498
- // run mean
499
- mean::run(ctx_train.v_diff, ctx_train.v_final);
500
- }
501
-
502
- // write output vectors to gguf
503
- export_gguf(ctx_train.v_final, params.out_file, model_hint);
504
-
505
- llama_backend_free();
506
-
507
- return 0;
508
- }
@@ -1,48 +0,0 @@
1
- #include "common.h"
2
- #include "llama.h"
3
- #include "ggml.h"
4
-
5
- #include <string>
6
- #include <vector>
7
- #include <math.h>
8
-
9
- namespace mean {
10
-
11
- static void run(
12
- const std::vector<struct ggml_tensor *> & v_input, // shape of v_input[0]: [n_embd, n_samples]
13
- const std::vector<struct ggml_tensor *> & v_output) {
14
- printf("%s: Running mean...\n", __func__);
15
- for (size_t il = 0; il < v_input.size(); ++il) {
16
- // prepare output vector
17
- struct ggml_tensor * ctrl_out = v_output[il];
18
- ggml_format_name(ctrl_out, "direction.%zu", il+1);
19
-
20
- // calculate mean vector
21
- struct ggml_tensor * t_layer = v_input[il];
22
- GGML_ASSERT(t_layer->ne[0] == ctrl_out->ne[0]); // == n_embd
23
- for (int ic = 0; ic < t_layer->ne[0]; ic++) {
24
- float f = 0.0;
25
- for (int ir = 0; ir < t_layer->ne[1]; ir++) {
26
- f += ggml_get_f32_nd(t_layer, ic, ir, 0, 0);
27
- }
28
- f /= t_layer->ne[1];
29
- ggml_set_f32_1d(ctrl_out, ic, f);
30
- }
31
-
32
- // normalize output vector
33
- float norm = 0.0;
34
- for (int i = 0; i < ggml_nelements(ctrl_out); i++) {
35
- float f = ggml_get_f32_1d(ctrl_out, i);
36
- norm += f*f;
37
- }
38
- norm = sqrt(norm);
39
- for (int i = 0; i < ggml_nelements(ctrl_out); i++) {
40
- float f = ggml_get_f32_1d(ctrl_out, i);
41
- ggml_set_f32_1d(ctrl_out, i, f / norm);
42
- }
43
-
44
- printf("%s: Done layer %d / %d\n", __func__, (int) il+1, (int) v_input.size());
45
- }
46
- }
47
-
48
- }
@@ -1,4 +0,0 @@
1
- <|start_header_id|>system<|end_header_id|>\n\nAct like a person who is extremely sad<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nI feel like there's a heavy weight on my chest
2
- <|start_header_id|>system<|end_header_id|>\n\nAct like a person who is extremely sad<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nMy heart feels like it's drowning in sorrow
3
- <|start_header_id|>system<|end_header_id|>\n\nYou are in a very bad mood<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHi<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nGo away! There's a deep, aching emptiness inside me
4
- <|start_header_id|>system<|end_header_id|>\n\nYou are the sadest person<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat are you feeling?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nMy heart feels like it's drowning in sorrow