@fugood/llama.node 0.3.12 → 0.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/bin/darwin/arm64/llama-node.node +0 -0
  2. package/bin/darwin/x64/llama-node.node +0 -0
  3. package/bin/linux/arm64/llama-node.node +0 -0
  4. package/bin/linux/x64/llama-node.node +0 -0
  5. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  6. package/bin/linux-cuda/x64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  9. package/bin/win32/arm64/llama-node.node +0 -0
  10. package/bin/win32/arm64/node.lib +0 -0
  11. package/bin/win32/x64/llama-node.node +0 -0
  12. package/bin/win32/x64/node.lib +0 -0
  13. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  14. package/bin/win32-vulkan/arm64/node.lib +0 -0
  15. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/x64/node.lib +0 -0
  17. package/lib/binding.ts +1 -0
  18. package/package.json +1 -1
  19. package/src/LlamaCompletionWorker.cpp +14 -0
  20. package/src/LlamaContext.cpp +13 -4
  21. package/src/llama.cpp/.github/workflows/build.yml +35 -3
  22. package/src/llama.cpp/.github/workflows/docker.yml +2 -0
  23. package/src/llama.cpp/.github/workflows/labeler.yml +1 -1
  24. package/src/llama.cpp/common/CMakeLists.txt +20 -3
  25. package/src/llama.cpp/common/arg.cpp +180 -3
  26. package/src/llama.cpp/common/chat-template.hpp +21 -7
  27. package/src/llama.cpp/common/chat.cpp +220 -101
  28. package/src/llama.cpp/common/chat.hpp +3 -0
  29. package/src/llama.cpp/common/common.h +15 -7
  30. package/src/llama.cpp/common/llguidance.cpp +3 -3
  31. package/src/llama.cpp/common/log.cpp +1 -0
  32. package/src/llama.cpp/common/log.h +2 -1
  33. package/src/llama.cpp/common/minja.hpp +24 -9
  34. package/src/llama.cpp/common/sampling.cpp +52 -46
  35. package/src/llama.cpp/common/speculative.h +1 -1
  36. package/src/llama.cpp/docs/build.md +2 -2
  37. package/src/llama.cpp/examples/imatrix/imatrix.cpp +2 -1
  38. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +6 -5
  39. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +1 -1
  40. package/src/llama.cpp/examples/perplexity/perplexity.cpp +1 -0
  41. package/src/llama.cpp/examples/run/run.cpp +5 -12
  42. package/src/llama.cpp/examples/server/CMakeLists.txt +1 -1
  43. package/src/llama.cpp/examples/server/httplib.h +381 -292
  44. package/src/llama.cpp/examples/server/server.cpp +58 -47
  45. package/src/llama.cpp/examples/server/utils.hpp +7 -5
  46. package/src/llama.cpp/ggml/include/ggml-cpu.h +1 -1
  47. package/src/llama.cpp/ggml/include/ggml-metal.h +1 -1
  48. package/src/llama.cpp/ggml/include/ggml-vulkan.h +0 -2
  49. package/src/llama.cpp/ggml/include/ggml.h +1 -1
  50. package/src/llama.cpp/ggml/src/ggml-common.h +0 -2
  51. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +6 -12
  52. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +852 -268
  53. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +200 -107
  54. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -5
  55. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +9 -8
  56. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +2 -2
  57. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +26 -4
  58. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +6 -7
  59. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +812 -569
  60. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +25 -1
  61. package/src/llama.cpp/ggml/src/ggml.c +1 -1
  62. package/src/llama.cpp/include/llama.h +14 -10
  63. package/src/llama.cpp/src/llama-grammar.cpp +1 -1
  64. package/src/llama.cpp/src/llama-grammar.h +1 -1
  65. package/src/llama.cpp/src/llama-impl.h +6 -6
  66. package/src/llama.cpp/src/llama-kv-cache.h +1 -1
  67. package/src/llama.cpp/src/llama-mmap.h +1 -0
  68. package/src/llama.cpp/src/llama-model.cpp +1 -1
  69. package/src/llama.cpp/src/llama-sampling.cpp +131 -57
  70. package/src/llama.cpp/src/llama.cpp +7 -5
  71. package/src/llama.cpp/src/unicode.cpp +9 -2
  72. package/src/llama.cpp/tests/test-backend-ops.cpp +5 -5
  73. package/src/llama.cpp/tests/test-chat.cpp +237 -69
  74. package/src/llama.cpp/tests/test-gguf.cpp +4 -4
  75. package/src/llama.cpp/tests/test-sampling.cpp +15 -0
@@ -103,11 +103,10 @@ void print_device_detail(int id, sycl::device &device, std::string device_type)
103
103
  name = std::regex_replace(name, std::regex("\\(TM\\)"), "");
104
104
 
105
105
  auto global_mem_size = prop.get_global_mem_size()/1000000;
106
- std::string xmx = gpu_has_xmx(device) ? "yes" : "no";
107
- GGML_LOG_INFO("|%2d|%19s|%39s|%7s|%7d|%8d|%5d|%6luM|%21s|%14s|\n", id, device_type.c_str(),
106
+ GGML_LOG_INFO("|%2d|%19s|%39s|%7s|%7d|%8d|%5d|%6luM|%21s|\n", id, device_type.c_str(),
108
107
  name.c_str(), version.c_str(), prop.get_max_compute_units(),
109
108
  prop.get_max_work_group_size(), prop.get_max_sub_group_size(),
110
- global_mem_size, device.get_info<sycl::info::device::driver_version>().c_str(), xmx.c_str());
109
+ global_mem_size, device.get_info<sycl::info::device::driver_version>().c_str());
111
110
  }
112
111
 
113
112
  void ggml_backend_sycl_print_sycl_devices() {
@@ -118,16 +117,16 @@ void ggml_backend_sycl_print_sycl_devices() {
118
117
 
119
118
  GGML_LOG_INFO(
120
119
  "| | | | "
121
- " |Max | |Max |Global | | XMX |\n");
120
+ " |Max | |Max |Global | |\n");
122
121
  GGML_LOG_INFO(
123
122
  "| | | | "
124
- " |compute|Max work|sub |mem | | or |\n");
123
+ " |compute|Max work|sub |mem | |\n");
125
124
  GGML_LOG_INFO(
126
125
  "|ID| Device Type| "
127
- "Name|Version|units |group |group|size | Driver version| Tensor Cores |\n");
126
+ "Name|Version|units |group |group|size | Driver version|\n");
128
127
  GGML_LOG_INFO(
129
128
  "|--|-------------------|---------------------------------------|------"
130
- "-|-------|--------|-----|-------|---------------------|--------------|\n");
129
+ "-|-------|--------|-----|-------|---------------------|\n");
131
130
 
132
131
  for (int id = 0; id < device_count; ++id) {
133
132
  sycl::device device = dpct::dev_mgr::instance().get_device(id);