cui-llama.rn 1.3.0 → 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/android/src/main/CMakeLists.txt +6 -1
  2. package/android/src/main/jni.cpp +6 -6
  3. package/cpp/amx/amx.cpp +196 -0
  4. package/cpp/amx/amx.h +20 -0
  5. package/cpp/amx/common.h +101 -0
  6. package/cpp/amx/mmq.cpp +2524 -0
  7. package/cpp/amx/mmq.h +16 -0
  8. package/cpp/common.cpp +1981 -1682
  9. package/cpp/common.h +636 -600
  10. package/cpp/ggml-aarch64.c +129 -129
  11. package/cpp/ggml-aarch64.h +19 -19
  12. package/cpp/ggml-alloc.c +1038 -1040
  13. package/cpp/ggml-alloc.h +76 -76
  14. package/cpp/ggml-backend-impl.h +238 -216
  15. package/cpp/ggml-backend-reg.cpp +423 -195
  16. package/cpp/ggml-backend.cpp +1999 -1997
  17. package/cpp/ggml-backend.h +351 -328
  18. package/cpp/ggml-common.h +1859 -1853
  19. package/cpp/ggml-cpp.h +38 -38
  20. package/cpp/ggml-cpu-aarch64.c +3823 -3560
  21. package/cpp/ggml-cpu-aarch64.h +32 -30
  22. package/cpp/ggml-cpu-impl.h +386 -371
  23. package/cpp/ggml-cpu-quants.c +10835 -10822
  24. package/cpp/ggml-cpu-quants.h +63 -63
  25. package/cpp/ggml-cpu.c +99 -103
  26. package/cpp/ggml-cpu.cpp +69 -17
  27. package/cpp/ggml-cpu.h +152 -177
  28. package/cpp/ggml-impl.h +556 -550
  29. package/cpp/ggml-metal.h +66 -66
  30. package/cpp/ggml-metal.m +4426 -4294
  31. package/cpp/ggml-quants.c +5247 -5247
  32. package/cpp/ggml-quants.h +100 -100
  33. package/cpp/ggml-threading.cpp +12 -12
  34. package/cpp/ggml-threading.h +12 -12
  35. package/cpp/ggml.c +7618 -8180
  36. package/cpp/ggml.h +2255 -2411
  37. package/cpp/json-schema-to-grammar.cpp +1045 -0
  38. package/cpp/json-schema-to-grammar.h +8 -0
  39. package/cpp/json.hpp +24766 -0
  40. package/cpp/llama-grammar.cpp +1138 -1138
  41. package/cpp/llama-grammar.h +144 -144
  42. package/cpp/llama-impl.h +181 -181
  43. package/cpp/llama-sampling.cpp +2348 -2348
  44. package/cpp/llama-sampling.h +48 -48
  45. package/cpp/llama-vocab.cpp +1984 -1984
  46. package/cpp/llama-vocab.h +170 -170
  47. package/cpp/llama.cpp +22332 -22132
  48. package/cpp/llama.h +1259 -1253
  49. package/cpp/log.cpp +401 -401
  50. package/cpp/log.h +121 -121
  51. package/cpp/rn-llama.hpp +6 -6
  52. package/cpp/sampling.cpp +505 -466
  53. package/cpp/sampling.h +22 -1
  54. package/cpp/sgemm.cpp +1884 -1884
  55. package/cpp/speculative.cpp +270 -0
  56. package/cpp/speculative.h +28 -0
  57. package/cpp/unicode.cpp +11 -0
  58. package/ios/RNLlamaContext.mm +13 -0
  59. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  60. package/lib/commonjs/grammar.js +4 -2
  61. package/lib/commonjs/grammar.js.map +1 -1
  62. package/lib/commonjs/index.js.map +1 -1
  63. package/lib/module/NativeRNLlama.js.map +1 -1
  64. package/lib/module/grammar.js +2 -1
  65. package/lib/module/grammar.js.map +1 -1
  66. package/lib/module/index.js.map +1 -1
  67. package/lib/typescript/NativeRNLlama.d.ts +94 -4
  68. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  69. package/lib/typescript/grammar.d.ts +5 -6
  70. package/lib/typescript/grammar.d.ts.map +1 -1
  71. package/lib/typescript/index.d.ts +4 -2
  72. package/lib/typescript/index.d.ts.map +1 -1
  73. package/package.json +2 -1
  74. package/src/NativeRNLlama.ts +97 -10
  75. package/src/grammar.ts +10 -8
  76. package/src/index.ts +22 -1
package/cpp/sampling.h CHANGED
@@ -36,7 +36,7 @@ struct common_sampler;
36
36
 
37
37
  // llama_sampler API overloads
38
38
 
39
- struct common_sampler * common_sampler_init(const struct llama_model * model, const struct common_sampler_params & params);
39
+ struct common_sampler * common_sampler_init(const struct llama_model * model, const struct common_params_sampling & params);
40
40
 
41
41
  void common_sampler_free(struct common_sampler * gsmpl);
42
42
 
@@ -60,6 +60,27 @@ void common_perf_print(const struct llama_context * ctx, const struct common_sam
60
60
  //
61
61
  llama_token common_sampler_sample(struct common_sampler * gsmpl, struct llama_context * ctx, int idx, bool grammar_first = false);
62
62
 
63
+ // generalized version of common_sampler_sample
64
+ //
65
+ // will cross-reference the sampled tokens with a batch of draft tokens and accept those that match
66
+ // if the sampler disagrees at some point, we stop and return the accepted tokens up to now
67
+ //
68
+ // common_sampler_sample_n(gsmpl, ctx, { idx }, {});
69
+ //
70
+ // is equivalent to
71
+ //
72
+ // common_sampler_sample(gsmpl, ctx, idx);
73
+ // common_sampler_accept(gsmpl, token, true);
74
+ //
75
+ // requires: idxs.size() == draft.size() + 1
76
+ //
77
+ // returns at least 1 token, up to idxs.size()
78
+ //
79
+ std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sampler * gsmpl, struct llama_context * ctx, const std::vector<int> & idxs, const llama_tokens & draft, bool grammar_first = false);
80
+
81
+ // assume idxs == [ 0, 1, 2, ..., draft.size() ]
82
+ std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sampler * gsmpl, struct llama_context * ctx, const llama_tokens & draft, bool grammar_first = false);
83
+
63
84
  uint32_t common_sampler_get_seed(const struct common_sampler * gsmpl);
64
85
 
65
86
  // helpers