cui-llama.rn 1.3.3 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/android/src/main/CMakeLists.txt +5 -7
  2. package/android/src/main/java/com/rnllama/LlamaContext.java +4 -4
  3. package/android/src/main/jni.cpp +9 -9
  4. package/cpp/common.cpp +28 -44
  5. package/cpp/common.h +35 -14
  6. package/cpp/ggml-alloc.c +0 -1
  7. package/cpp/ggml-backend-impl.h +38 -20
  8. package/cpp/ggml-backend-reg.cpp +246 -92
  9. package/cpp/ggml-backend.h +1 -0
  10. package/cpp/ggml-common.h +42 -48
  11. package/cpp/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +642 -223
  12. package/cpp/ggml-cpu-aarch64.h +2 -26
  13. package/cpp/ggml-cpu-traits.cpp +36 -0
  14. package/cpp/ggml-cpu-traits.h +38 -0
  15. package/cpp/ggml-cpu.c +14122 -13971
  16. package/cpp/ggml-cpu.cpp +627 -715
  17. package/cpp/ggml-cpu.h +0 -17
  18. package/cpp/ggml-impl.h +22 -6
  19. package/cpp/ggml-metal.m +482 -24
  20. package/cpp/ggml-quants.c +0 -9
  21. package/cpp/ggml-threading.h +4 -2
  22. package/cpp/ggml.c +284 -178
  23. package/cpp/ggml.h +73 -25
  24. package/cpp/llama-grammar.cpp +15 -15
  25. package/cpp/llama-grammar.h +2 -5
  26. package/cpp/llama-sampling.cpp +35 -90
  27. package/cpp/llama-vocab.cpp +7 -2
  28. package/cpp/llama-vocab.h +1 -1
  29. package/cpp/llama.cpp +1782 -586
  30. package/cpp/llama.h +20 -19
  31. package/cpp/sampling.cpp +11 -16
  32. package/cpp/sgemm.cpp +265 -258
  33. package/cpp/sgemm.h +2 -2
  34. package/cpp/speculative.cpp +4 -0
  35. package/cpp/unicode.cpp +51 -51
  36. package/cpp/unicode.h +9 -10
  37. package/lib/commonjs/index.js +38 -1
  38. package/lib/commonjs/index.js.map +1 -1
  39. package/lib/module/index.js +36 -0
  40. package/lib/module/index.js.map +1 -1
  41. package/lib/typescript/NativeRNLlama.d.ts +2 -3
  42. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  43. package/lib/typescript/index.d.ts +36 -2
  44. package/lib/typescript/index.d.ts.map +1 -1
  45. package/package.json +1 -1
  46. package/src/NativeRNLlama.ts +3 -3
  47. package/src/index.ts +46 -2
  48. package/cpp/amx/amx.cpp +0 -196
  49. package/cpp/amx/amx.h +0 -20
  50. package/cpp/amx/common.h +0 -101
  51. package/cpp/amx/mmq.cpp +0 -2524
  52. package/cpp/amx/mmq.h +0 -16
  53. package/cpp/ggml-aarch64.c +0 -129
  54. package/cpp/ggml-aarch64.h +0 -19
package/cpp/amx/mmq.h DELETED
@@ -1,16 +0,0 @@
1
- #pragma once
2
- #include "common.h"
3
-
4
- #ifdef __cplusplus
5
- extern "C" {
6
- #endif
7
-
8
- size_t lm_ggml_backend_amx_get_alloc_size(const struct lm_ggml_tensor * tensor);
9
-
10
- void lm_ggml_backend_amx_convert_weight(struct lm_ggml_tensor * tensor, const void * data, size_t offset, size_t size);
11
-
12
- void lm_ggml_backend_amx_mul_mat(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
13
-
14
- #ifdef __cplusplus
15
- }
16
- #endif
@@ -1,129 +0,0 @@
1
- #define LM_GGML_COMMON_DECL_C
2
- #include "ggml-common.h"
3
-
4
- #include "ggml-aarch64.h"
5
- #include "ggml-impl.h"
6
- #include "ggml-quants.h"
7
- #include <assert.h>
8
-
9
- #define UNUSED LM_GGML_UNUSED
10
-
11
- static block_q4_0x4 make_block_q4_0x4(block_q4_0 * in, unsigned int blck_size_interleave) {
12
- block_q4_0x4 out;
13
-
14
- for (int i = 0; i < 4; i++) {
15
- out.d[i] = in[i].d;
16
- }
17
-
18
- const int end = QK4_0 * 2 / blck_size_interleave;
19
-
20
- if (blck_size_interleave == 8) {
21
- const uint64_t xor_mask = 0x8888888888888888ULL;
22
- for (int i = 0; i < end; ++i) {
23
- int src_id = i % 4;
24
- int src_offset = (i / 4) * blck_size_interleave;
25
- int dst_offset = i * blck_size_interleave;
26
-
27
- uint64_t elems;
28
- // Using memcpy to avoid unaligned memory accesses
29
- memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t));
30
- elems ^= xor_mask;
31
- memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t));
32
- }
33
- } else if (blck_size_interleave == 4) {
34
- const uint32_t xor_mask = 0x88888888;
35
- for (int i = 0; i < end; ++i) {
36
- int src_id = i % 4;
37
- int src_offset = (i / 4) * blck_size_interleave;
38
- int dst_offset = i * blck_size_interleave;
39
-
40
- uint32_t elems;
41
- memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint32_t));
42
- elems ^= xor_mask;
43
- memcpy(&out.qs[dst_offset], &elems, sizeof(uint32_t));
44
- }
45
- } else {
46
- LM_GGML_ASSERT(false);
47
- }
48
-
49
- return out;
50
- }
51
-
52
- // interleave 8 block_q4_0s in blocks of blck_size_interleave
53
- // returns an interleaved block_q4_0x8
54
- // in the interleaved block_q4_0x8, place deltas for 8 block_q4_0 blocks
55
- // first, then interleave quants from 8 block_q4_0s in blocks of blck_size_interleave
56
- static block_q4_0x8 make_block_q4_0x8(block_q4_0 * in, unsigned int blck_size_interleave) {
57
- block_q4_0x8 out;
58
-
59
- for (int i = 0; i < 8; i++) {
60
- out.d[i] = in[i].d;
61
- }
62
-
63
- const int end = QK4_0 * 4 / blck_size_interleave;
64
- const uint64_t xor_mask = 0x8888888888888888ULL;
65
-
66
- for (int i = 0; i < end; ++i) {
67
- int src_id = i % 8;
68
- int src_offset = (i / 8) * blck_size_interleave;
69
- int dst_offset = i * blck_size_interleave;
70
-
71
- uint64_t elems;
72
- memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t));
73
- elems ^= xor_mask;
74
- memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t));
75
- }
76
-
77
- return out;
78
- }
79
-
80
- static size_t quantize_q4_0_nr_bl(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, int nrows_interleaved, int blck_size_interleave) {
81
- assert(n_per_row % QK4_0 == 0);
82
- const int nb = n_per_row / QK4_0;
83
-
84
- void * out_ptr = NULL;
85
- if (nrows_interleaved == 8) {
86
- out_ptr = (block_q4_0x8 *) dst;
87
- }
88
- else if (nrows_interleaved == 4) {
89
- out_ptr = (block_q4_0x4 *) dst;
90
- }
91
- assert(nrows_interleaved <= 8);
92
- block_q4_0 dst_tmp[8];
93
-
94
- for (int b = 0; b < (nrow * n_per_row); b += nrows_interleaved * n_per_row) {
95
-
96
- for (int64_t x = 0; x < nb; x++) {
97
-
98
- for (int i = 0; i < nrows_interleaved; i++ ) {
99
- quantize_row_q4_0_ref(src + b + i * n_per_row + x * QK4_0, (block_q4_0 *) dst_tmp + i, QK4_0);
100
- }
101
-
102
- if (nrows_interleaved == 8) {
103
- *(block_q4_0x8 *) out_ptr = make_block_q4_0x8(dst_tmp, blck_size_interleave);
104
- out_ptr = (block_q4_0x8 *) out_ptr + 1;
105
- }
106
- else if (nrows_interleaved == 4) {
107
- *(block_q4_0x4 *) out_ptr = make_block_q4_0x4(dst_tmp, blck_size_interleave);
108
- out_ptr = (block_q4_0x4 *) out_ptr + 1;
109
- }
110
- }
111
- }
112
-
113
- return ((nrow * n_per_row) / QK4_0 * sizeof(block_q4_0));
114
- }
115
-
116
- size_t quantize_q4_0_4x4(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
117
- UNUSED(quant_weights);
118
- return quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 4, 4);
119
- }
120
-
121
- size_t quantize_q4_0_4x8(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
122
- UNUSED(quant_weights);
123
- return quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 4, 8);
124
- }
125
-
126
- size_t quantize_q4_0_8x8(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
127
- UNUSED(quant_weights);
128
- return quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 8, 8);
129
- }
@@ -1,19 +0,0 @@
1
- #pragma once
2
-
3
- #include "ggml.h"
4
-
5
- // GGML internal header
6
-
7
- #ifdef __cplusplus
8
- extern "C" {
9
- #endif
10
-
11
- // Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization")
12
- size_t quantize_q4_0_4x4(const float * LM_GGML_RESTRICT src, void * LM_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
13
- size_t quantize_q4_0_4x8(const float * LM_GGML_RESTRICT src, void * LM_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
14
- size_t quantize_q4_0_8x8(const float * LM_GGML_RESTRICT src, void * LM_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
15
-
16
- #ifdef __cplusplus
17
- }
18
- #endif
19
-