@fugood/llama.node 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/CMakeLists.txt +9 -0
  2. package/README.md +1 -1
  3. package/bin/darwin/arm64/default.metallib +0 -0
  4. package/bin/darwin/arm64/llama-node.node +0 -0
  5. package/bin/darwin/x64/default.metallib +0 -0
  6. package/bin/darwin/x64/llama-node.node +0 -0
  7. package/bin/linux/arm64/llama-node.node +0 -0
  8. package/bin/linux/x64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  10. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  11. package/bin/win32/arm64/llama-node.node +0 -0
  12. package/bin/win32/arm64/node.lib +0 -0
  13. package/bin/win32/x64/llama-node.node +0 -0
  14. package/bin/win32/x64/node.lib +0 -0
  15. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/arm64/node.lib +0 -0
  17. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  18. package/bin/win32-vulkan/x64/node.lib +0 -0
  19. package/lib/binding.ts +1 -1
  20. package/package.json +2 -1
  21. package/patches/llama.patch +22 -0
  22. package/src/TokenizeWorker.cpp +1 -1
  23. package/src/llama.cpp/CMakeLists.txt +14 -12
  24. package/src/llama.cpp/common/common.cpp +19 -5
  25. package/src/llama.cpp/common/common.h +2 -0
  26. package/src/llama.cpp/common/grammar-parser.cpp +9 -0
  27. package/src/llama.cpp/common/sampling.cpp +3 -3
  28. package/src/llama.cpp/common/sampling.h +1 -1
  29. package/src/llama.cpp/examples/CMakeLists.txt +3 -0
  30. package/src/llama.cpp/examples/embedding/embedding.cpp +10 -2
  31. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +56 -7
  32. package/src/llama.cpp/examples/llama.android/{app/src/main/cpp → llama}/CMakeLists.txt +1 -1
  33. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +49 -0
  34. package/src/llama.cpp/examples/llama.android/{app → llama}/src/main/cpp/llama-android.cpp +14 -14
  35. package/src/llama.cpp/examples/llava/llava-cli.cpp +26 -6
  36. package/src/llama.cpp/examples/main/main.cpp +5 -1
  37. package/src/llama.cpp/examples/rpc/CMakeLists.txt +2 -0
  38. package/src/llama.cpp/examples/rpc/rpc-server.cpp +70 -0
  39. package/src/llama.cpp/examples/server/server.cpp +12 -16
  40. package/src/llama.cpp/examples/server/utils.hpp +1 -1
  41. package/src/llama.cpp/ggml-backend.c +2 -2
  42. package/src/llama.cpp/ggml-kompute.cpp +9 -3
  43. package/src/llama.cpp/ggml-quants.c +6 -0
  44. package/src/llama.cpp/ggml-rpc.cpp +1023 -0
  45. package/src/llama.cpp/ggml-rpc.h +24 -0
  46. package/src/llama.cpp/ggml-sycl.cpp +20 -143
  47. package/src/llama.cpp/ggml-vulkan.cpp +4 -2
  48. package/src/llama.cpp/ggml.c +116 -271
  49. package/src/llama.cpp/ggml.h +12 -15
  50. package/src/llama.cpp/llama.cpp +451 -265
  51. package/src/llama.cpp/llama.h +3 -0
  52. package/src/llama.cpp/requirements.txt +0 -1
  53. package/src/llama.cpp/tests/CMakeLists.txt +1 -1
  54. package/src/llama.cpp/tests/test-backend-ops.cpp +16 -19
  55. package/src/llama.cpp/tests/test-grammar-integration.cpp +46 -0
  56. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +27 -3
  57. package/src/llama.cpp/requirements/requirements-convert-lora-to-ggml.txt +0 -2
@@ -468,7 +468,6 @@ extern "C" {
468
468
  GGML_OP_SOFT_MAX_BACK,
469
469
  GGML_OP_ROPE,
470
470
  GGML_OP_ROPE_BACK,
471
- GGML_OP_ALIBI,
472
471
  GGML_OP_CLAMP,
473
472
  GGML_OP_CONV_TRANSPOSE_1D,
474
473
  GGML_OP_IM2COL,
@@ -520,6 +519,7 @@ extern "C" {
520
519
  GGML_UNARY_OP_TANH,
521
520
  GGML_UNARY_OP_ELU,
522
521
  GGML_UNARY_OP_RELU,
522
+ GGML_UNARY_OP_SIGMOID,
523
523
  GGML_UNARY_OP_GELU,
524
524
  GGML_UNARY_OP_GELU_QUICK,
525
525
  GGML_UNARY_OP_SILU,
@@ -1074,6 +1074,14 @@ extern "C" {
1074
1074
  struct ggml_context * ctx,
1075
1075
  struct ggml_tensor * a);
1076
1076
 
1077
+ GGML_API struct ggml_tensor * ggml_sigmoid(
1078
+ struct ggml_context * ctx,
1079
+ struct ggml_tensor * a);
1080
+
1081
+ GGML_API struct ggml_tensor * ggml_sigmoid_inplace(
1082
+ struct ggml_context * ctx,
1083
+ struct ggml_tensor * a);
1084
+
1077
1085
  GGML_API struct ggml_tensor * ggml_gelu(
1078
1086
  struct ggml_context * ctx,
1079
1087
  struct ggml_tensor * a);
@@ -1428,15 +1436,13 @@ extern "C" {
1428
1436
  struct ggml_context * ctx,
1429
1437
  struct ggml_tensor * a);
1430
1438
 
1431
- // fused soft_max(a*scale + mask + pos[i]*(ALiBi slope))
1439
+ // fused soft_max(a*scale + mask*(ALiBi slope))
1432
1440
  // mask is optional
1433
- // pos is required when max_bias > 0.0f
1434
1441
  // max_bias = 0.0f for no ALiBi
1435
1442
  GGML_API struct ggml_tensor * ggml_soft_max_ext(
1436
1443
  struct ggml_context * ctx,
1437
1444
  struct ggml_tensor * a,
1438
1445
  struct ggml_tensor * mask,
1439
- struct ggml_tensor * pos,
1440
1446
  float scale,
1441
1447
  float max_bias);
1442
1448
 
@@ -1538,16 +1544,6 @@ extern "C" {
1538
1544
  float xpos_base,
1539
1545
  bool xpos_down);
1540
1546
 
1541
- // alibi position embedding
1542
- // in-place, returns view(a)
1543
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_alibi(
1544
- struct ggml_context * ctx,
1545
- struct ggml_tensor * a,
1546
- int n_past,
1547
- int n_head,
1548
- float bias_max),
1549
- "use ggml_soft_max_ext instead (will be removed in Mar 2024)");
1550
-
1551
1547
  // clamp
1552
1548
  // in-place, returns view(a)
1553
1549
  GGML_API struct ggml_tensor * ggml_clamp(
@@ -1744,7 +1740,8 @@ extern "C" {
1744
1740
  struct ggml_tensor * k,
1745
1741
  struct ggml_tensor * v,
1746
1742
  struct ggml_tensor * mask,
1747
- float scale);
1743
+ float scale,
1744
+ float max_bias);
1748
1745
 
1749
1746
  GGML_API void ggml_flash_attn_ext_set_prec(
1750
1747
  struct ggml_tensor * a,