cui-llama.rn 1.4.4 → 1.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. package/android/src/main/CMakeLists.txt +2 -2
  2. package/android/src/main/jni.cpp +12 -10
  3. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  11. package/cpp/chat-template.hpp +529 -529
  12. package/cpp/chat.cpp +959 -265
  13. package/cpp/chat.h +135 -0
  14. package/cpp/common.cpp +2064 -1996
  15. package/cpp/common.h +700 -744
  16. package/cpp/ggml-alloc.c +1039 -1030
  17. package/cpp/ggml-alloc.h +1 -1
  18. package/cpp/ggml-backend-impl.h +255 -255
  19. package/cpp/ggml-backend-reg.cpp +586 -582
  20. package/cpp/ggml-backend.cpp +2004 -2002
  21. package/cpp/ggml-backend.h +354 -354
  22. package/cpp/ggml-common.h +1851 -1851
  23. package/cpp/ggml-cpp.h +39 -39
  24. package/cpp/ggml-cpu-aarch64.cpp +4248 -4247
  25. package/cpp/ggml-cpu-aarch64.h +8 -8
  26. package/cpp/ggml-cpu-impl.h +531 -380
  27. package/cpp/ggml-cpu-quants.c +12527 -11517
  28. package/cpp/ggml-cpu-traits.cpp +36 -36
  29. package/cpp/ggml-cpu-traits.h +38 -38
  30. package/cpp/ggml-cpu.c +15766 -14485
  31. package/cpp/ggml-cpu.cpp +655 -633
  32. package/cpp/ggml-cpu.h +138 -135
  33. package/cpp/ggml-impl.h +567 -567
  34. package/cpp/ggml-metal-impl.h +235 -0
  35. package/cpp/ggml-metal.h +66 -66
  36. package/cpp/ggml-metal.m +5146 -5002
  37. package/cpp/ggml-opt.cpp +854 -854
  38. package/cpp/ggml-opt.h +216 -216
  39. package/cpp/ggml-quants.c +5238 -5238
  40. package/cpp/ggml-threading.h +14 -14
  41. package/cpp/ggml.c +6529 -6524
  42. package/cpp/ggml.h +2198 -2194
  43. package/cpp/gguf.cpp +1329 -1329
  44. package/cpp/gguf.h +202 -202
  45. package/cpp/json-schema-to-grammar.cpp +1024 -1025
  46. package/cpp/json-schema-to-grammar.h +21 -22
  47. package/cpp/json.hpp +24766 -24766
  48. package/cpp/llama-adapter.cpp +347 -347
  49. package/cpp/llama-adapter.h +74 -74
  50. package/cpp/llama-arch.cpp +1513 -1492
  51. package/cpp/llama-arch.h +403 -402
  52. package/cpp/llama-batch.cpp +368 -368
  53. package/cpp/llama-batch.h +88 -88
  54. package/cpp/llama-chat.cpp +588 -587
  55. package/cpp/llama-chat.h +53 -53
  56. package/cpp/llama-context.cpp +1775 -1775
  57. package/cpp/llama-context.h +128 -128
  58. package/cpp/llama-cparams.cpp +1 -1
  59. package/cpp/llama-cparams.h +37 -37
  60. package/cpp/llama-cpp.h +30 -30
  61. package/cpp/llama-grammar.cpp +1219 -1219
  62. package/cpp/llama-grammar.h +173 -164
  63. package/cpp/llama-hparams.cpp +71 -71
  64. package/cpp/llama-hparams.h +139 -139
  65. package/cpp/llama-impl.cpp +167 -167
  66. package/cpp/llama-impl.h +61 -61
  67. package/cpp/llama-kv-cache.cpp +718 -718
  68. package/cpp/llama-kv-cache.h +219 -218
  69. package/cpp/llama-mmap.cpp +600 -590
  70. package/cpp/llama-mmap.h +68 -68
  71. package/cpp/llama-model-loader.cpp +1124 -1124
  72. package/cpp/llama-model-loader.h +167 -167
  73. package/cpp/llama-model.cpp +4087 -4023
  74. package/cpp/llama-model.h +370 -370
  75. package/cpp/llama-sampling.cpp +2558 -2525
  76. package/cpp/llama-sampling.h +32 -32
  77. package/cpp/llama-vocab.cpp +3264 -3252
  78. package/cpp/llama-vocab.h +125 -125
  79. package/cpp/llama.cpp +10284 -10137
  80. package/cpp/llama.h +1354 -1340
  81. package/cpp/log.cpp +393 -423
  82. package/cpp/log.h +132 -132
  83. package/cpp/minja/chat-template.hpp +529 -0
  84. package/cpp/minja/minja.hpp +2915 -0
  85. package/cpp/minja.hpp +2915 -2883
  86. package/cpp/rn-llama.cpp +20 -37
  87. package/cpp/rn-llama.h +12 -2
  88. package/cpp/sampling.cpp +570 -532
  89. package/cpp/sgemm.cpp +2598 -2598
  90. package/cpp/sgemm.h +14 -14
  91. package/cpp/speculative.cpp +278 -277
  92. package/cpp/speculative.h +28 -28
  93. package/package.json +1 -1
  94. package/android/src/main/build-arm64/CMakeCache.txt +0 -429
  95. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
  96. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCXXCompiler.cmake +0 -101
  97. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_C.bin +0 -0
  98. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_CXX.bin +0 -0
  99. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
  100. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
  101. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
  102. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
  103. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
  104. package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -431
  105. package/android/src/main/build-arm64/CMakeFiles/CMakeDirectoryInformation.cmake +0 -16
  106. package/android/src/main/build-arm64/CMakeFiles/Makefile.cmake +0 -165
  107. package/android/src/main/build-arm64/CMakeFiles/Makefile2 +0 -297
  108. package/android/src/main/build-arm64/CMakeFiles/Progress/1 +0 -1
  109. package/android/src/main/build-arm64/CMakeFiles/Progress/2 +0 -1
  110. package/android/src/main/build-arm64/CMakeFiles/Progress/3 +0 -1
  111. package/android/src/main/build-arm64/CMakeFiles/Progress/4 +0 -1
  112. package/android/src/main/build-arm64/CMakeFiles/Progress/5 +0 -1
  113. package/android/src/main/build-arm64/CMakeFiles/Progress/6 +0 -1
  114. package/android/src/main/build-arm64/CMakeFiles/Progress/count.txt +0 -1
  115. package/android/src/main/build-arm64/CMakeFiles/TargetDirectories.txt +0 -8
  116. package/android/src/main/build-arm64/CMakeFiles/cmake.check_cache +0 -1
  117. package/android/src/main/build-arm64/CMakeFiles/progress.marks +0 -1
  118. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o +0 -0
  119. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o.d +0 -58
  120. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o +0 -0
  121. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o.d +0 -756
  122. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o +0 -0
  123. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o.d +0 -709
  124. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o +0 -0
  125. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o.d +0 -714
  126. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o +0 -0
  127. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o.d +0 -62
  128. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o +0 -0
  129. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o.d +0 -708
  130. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o +0 -0
  131. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o.d +0 -113
  132. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o +0 -0
  133. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o.d +0 -713
  134. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o +0 -0
  135. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o.d +0 -763
  136. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o +0 -0
  137. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o.d +0 -61
  138. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o +0 -0
  139. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o.d +0 -707
  140. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o +0 -0
  141. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o.d +0 -104
  142. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o +0 -0
  143. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o.d +0 -714
  144. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o +0 -0
  145. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o.d +0 -723
  146. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/DependInfo.cmake +0 -62
  147. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/build.make +0 -722
  148. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/cmake_clean.cmake +0 -89
  149. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.make +0 -2
  150. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.ts +0 -2
  151. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/depend.make +0 -2
  152. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/flags.make +0 -17
  153. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/progress.make +0 -41
  154. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/DependInfo.cmake +0 -62
  155. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/build.make +0 -722
  156. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/cmake_clean.cmake +0 -89
  157. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.make +0 -2
  158. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.ts +0 -2
  159. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/depend.make +0 -2
  160. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/flags.make +0 -17
  161. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/progress.make +0 -41
  162. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/DependInfo.cmake +0 -62
  163. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/build.make +0 -722
  164. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/cmake_clean.cmake +0 -89
  165. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.make +0 -2
  166. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.ts +0 -2
  167. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/depend.make +0 -2
  168. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/flags.make +0 -17
  169. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/progress.make +0 -41
  170. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/DependInfo.cmake +0 -62
  171. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/build.make +0 -722
  172. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/cmake_clean.cmake +0 -89
  173. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.make +0 -2
  174. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.ts +0 -2
  175. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/depend.make +0 -2
  176. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/flags.make +0 -17
  177. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/progress.make +0 -41
  178. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/DependInfo.cmake +0 -62
  179. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/build.make +0 -722
  180. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/cmake_clean.cmake +0 -89
  181. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.make +0 -2
  182. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.ts +0 -2
  183. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/depend.make +0 -2
  184. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/flags.make +0 -17
  185. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/progress.make +0 -41
  186. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/DependInfo.cmake +0 -62
  187. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/build.make +0 -722
  188. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/cmake_clean.cmake +0 -89
  189. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.make +0 -2
  190. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.ts +0 -2
  191. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/depend.make +0 -2
  192. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/flags.make +0 -17
  193. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/progress.make +0 -41
  194. package/android/src/main/build-arm64/Makefile +0 -1862
  195. package/android/src/main/build-arm64/cmake_install.cmake +0 -66
  196. package/cpp/chat.hpp +0 -55
  197. package/cpp/rn-llama.hpp +0 -913
package/cpp/llama-batch.h CHANGED
@@ -1,88 +1,88 @@
1
- #pragma once
2
-
3
- #include "llama.h"
4
-
5
- #include <array>
6
- #include <vector>
7
-
8
- // very similar to llama_batch,
9
- // but has more metadata about sequences
10
- struct llama_ubatch {
11
- bool equal_seqs;
12
- // TODO: whole_seqs for embeddings?
13
-
14
- uint32_t n_tokens; // total tokens (n_seq_tokens * n_seqs)
15
- uint32_t n_seq_tokens; // tokens per sequence
16
- uint32_t n_seqs;
17
-
18
- llama_token * token; // [n_tokens]
19
- float * embd; // [n_embd, n_tokens]
20
- llama_pos * pos; // [n_tokens]
21
- int32_t * n_seq_id; // [n_seqs]
22
- llama_seq_id ** seq_id; // [n_seqs]
23
- int8_t * output; // [n_tokens]
24
- };
25
-
26
- struct llama_sbatch_seq {
27
- int32_t n_seq_id;
28
-
29
- llama_seq_id * seq_id;
30
-
31
- size_t offset;
32
- size_t length;
33
- };
34
-
35
- // sequence-length-aware batch splitting
36
- struct llama_sbatch {
37
- // tokens left in this batch
38
- size_t n_tokens;
39
-
40
- size_t n_embd;
41
-
42
- bool logits_all; // TODO: remove once lctx.logits_all is removed too
43
-
44
- // sorted indices into the batch
45
- std::vector<size_t> ids;
46
- // batch indices of the output
47
- std::vector<size_t> out_ids;
48
- std::vector<llama_sbatch_seq> seq;
49
-
50
- const llama_batch * batch = nullptr;
51
-
52
- // buffers for the ubatch
53
- std::vector<llama_token> ubatch_token;
54
- std::vector<float> ubatch_embd;
55
- std::vector<llama_pos> ubatch_pos;
56
- std::vector<int32_t> ubatch_n_seq_id;
57
- std::vector<llama_seq_id *> ubatch_seq_id;
58
- std::vector<int8_t> ubatch_output;
59
-
60
- llama_ubatch reserve_ubatch(size_t n_ubatch, bool has_embd = false);
61
-
62
- void add_seq_to_ubatch(llama_ubatch & ubatch, llama_sbatch_seq & seq, size_t length);
63
-
64
- // simple split, unknown number of sequences of unequal lengths
65
- llama_ubatch split_simple(size_t n_ubatch);
66
-
67
- // make batches of equal-length sequences
68
- llama_ubatch split_equal(size_t n_ubatch);
69
-
70
- // sequence-wise split
71
- llama_ubatch split_seq(size_t n_ubatch);
72
-
73
- void from_batch(const llama_batch & batch, size_t n_embd, bool simple_split = false, bool logits_all = false);
74
- };
75
-
76
- // temporary allocate memory for the input batch if needed
77
- struct llama_batch_allocr {
78
- struct llama_batch batch;
79
-
80
- std::array<llama_seq_id, 1> seq_id_0 = { 0 }; // default sequence id
81
- std::vector<llama_pos> pos;
82
- std::vector<int32_t> n_seq_id;
83
- std::vector<llama_seq_id *> seq_id;
84
- std::vector<int8_t> logits;
85
-
86
- // optionally fulfill the batch returned by llama_batch_get_one
87
- llama_batch_allocr(struct llama_batch in_batch, llama_pos p0);
88
- };
1
+ #pragma once
2
+
3
+ #include "llama.h"
4
+
5
+ #include <array>
6
+ #include <vector>
7
+
8
+ // very similar to llama_batch,
9
+ // but has more metadata about sequences
10
+ struct llama_ubatch {
11
+ bool equal_seqs;
12
+ // TODO: whole_seqs for embeddings?
13
+
14
+ uint32_t n_tokens; // total tokens (n_seq_tokens * n_seqs)
15
+ uint32_t n_seq_tokens; // tokens per sequence
16
+ uint32_t n_seqs;
17
+
18
+ llama_token * token; // [n_tokens]
19
+ float * embd; // [n_embd, n_tokens]
20
+ llama_pos * pos; // [n_tokens]
21
+ int32_t * n_seq_id; // [n_seqs]
22
+ llama_seq_id ** seq_id; // [n_seqs]
23
+ int8_t * output; // [n_tokens]
24
+ };
25
+
26
+ struct llama_sbatch_seq {
27
+ int32_t n_seq_id;
28
+
29
+ llama_seq_id * seq_id;
30
+
31
+ size_t offset;
32
+ size_t length;
33
+ };
34
+
35
+ // sequence-length-aware batch splitting
36
+ struct llama_sbatch {
37
+ // tokens left in this batch
38
+ size_t n_tokens;
39
+
40
+ size_t n_embd;
41
+
42
+ bool logits_all; // TODO: remove once lctx.logits_all is removed too
43
+
44
+ // sorted indices into the batch
45
+ std::vector<size_t> ids;
46
+ // batch indices of the output
47
+ std::vector<size_t> out_ids;
48
+ std::vector<llama_sbatch_seq> seq;
49
+
50
+ const llama_batch * batch = nullptr;
51
+
52
+ // buffers for the ubatch
53
+ std::vector<llama_token> ubatch_token;
54
+ std::vector<float> ubatch_embd;
55
+ std::vector<llama_pos> ubatch_pos;
56
+ std::vector<int32_t> ubatch_n_seq_id;
57
+ std::vector<llama_seq_id *> ubatch_seq_id;
58
+ std::vector<int8_t> ubatch_output;
59
+
60
+ llama_ubatch reserve_ubatch(size_t n_ubatch, bool has_embd = false);
61
+
62
+ void add_seq_to_ubatch(llama_ubatch & ubatch, llama_sbatch_seq & seq, size_t length);
63
+
64
+ // simple split, unknown number of sequences of unequal lengths
65
+ llama_ubatch split_simple(size_t n_ubatch);
66
+
67
+ // make batches of equal-length sequences
68
+ llama_ubatch split_equal(size_t n_ubatch);
69
+
70
+ // sequence-wise split
71
+ llama_ubatch split_seq(size_t n_ubatch);
72
+
73
+ void from_batch(const llama_batch & batch, size_t n_embd, bool simple_split = false, bool logits_all = false);
74
+ };
75
+
76
+ // temporary allocate memory for the input batch if needed
77
+ struct llama_batch_allocr {
78
+ struct llama_batch batch;
79
+
80
+ std::array<llama_seq_id, 1> seq_id_0 = { 0 }; // default sequence id
81
+ std::vector<llama_pos> pos;
82
+ std::vector<int32_t> n_seq_id;
83
+ std::vector<llama_seq_id *> seq_id;
84
+ std::vector<int8_t> logits;
85
+
86
+ // optionally fulfill the batch returned by llama_batch_get_one
87
+ llama_batch_allocr(struct llama_batch in_batch, llama_pos p0);
88
+ };