cui-llama.rn 1.1.6 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/java/com/rnllama/LlamaContext.java +11 -3
- package/android/src/main/jni.cpp +28 -4
- package/cpp/common.cpp +3 -0
- package/cpp/common.h +2 -0
- package/cpp/ggml-aarch64.c +1794 -1368
- package/cpp/ggml-alloc.c +6 -0
- package/cpp/ggml-backend-impl.h +10 -9
- package/cpp/ggml-backend.c +25 -0
- package/cpp/ggml-backend.h +2 -1
- package/cpp/ggml-cpu-impl.h +614 -0
- package/cpp/ggml-impl.h +13 -609
- package/cpp/ggml-metal.m +1 -0
- package/cpp/ggml-quants.c +1 -0
- package/cpp/ggml.c +457 -144
- package/cpp/ggml.h +37 -8
- package/cpp/llama-impl.h +2 -0
- package/cpp/llama-sampling.cpp +7 -5
- package/cpp/llama-vocab.cpp +1 -5
- package/cpp/llama-vocab.h +9 -5
- package/cpp/llama.cpp +202 -30
- package/cpp/llama.h +2 -0
- package/cpp/log.cpp +1 -1
- package/cpp/log.h +2 -0
- package/cpp/sampling.cpp +9 -1
- package/cpp/sgemm.cpp +1 -0
- package/cpp/unicode.cpp +1 -0
- package/lib/commonjs/index.js +8 -1
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/index.js +8 -1
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/index.d.ts +1 -1
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/index.ts +18 -4
package/cpp/ggml-alloc.c
CHANGED
@@ -294,6 +294,12 @@ static void lm_ggml_dyn_tallocr_reset(struct lm_ggml_dyn_tallocr * alloc) {
|
|
294
294
|
alloc->free_blocks[0].offset = 0;
|
295
295
|
alloc->free_blocks[0].size = SIZE_MAX/2; // restrict maximum size of a measure allocator to half size_t max to avoid overflows
|
296
296
|
alloc->max_size = 0;
|
297
|
+
|
298
|
+
#ifdef LM_GGML_ALLOCATOR_DEBUG
|
299
|
+
for (int i = 0; i < 1024; i++) {
|
300
|
+
alloc->allocated_tensors[i].tensor = NULL;
|
301
|
+
}
|
302
|
+
#endif
|
297
303
|
}
|
298
304
|
|
299
305
|
static struct lm_ggml_dyn_tallocr * lm_ggml_dyn_tallocr_new(size_t alignment) {
|
package/cpp/ggml-backend-impl.h
CHANGED
@@ -38,15 +38,16 @@ extern "C" {
|
|
38
38
|
typedef void * lm_ggml_backend_buffer_context_t;
|
39
39
|
|
40
40
|
struct lm_ggml_backend_buffer_i {
|
41
|
-
const char * (*LM_GGML_CALL get_name)
|
42
|
-
void (*LM_GGML_CALL free_buffer)(lm_ggml_backend_buffer_t buffer);
|
43
|
-
void * (*LM_GGML_CALL get_base)
|
44
|
-
void (*LM_GGML_CALL init_tensor)(lm_ggml_backend_buffer_t buffer, struct lm_ggml_tensor * tensor);
|
45
|
-
void (*LM_GGML_CALL
|
46
|
-
void (*LM_GGML_CALL
|
47
|
-
|
48
|
-
|
49
|
-
void (*LM_GGML_CALL
|
41
|
+
const char * (*LM_GGML_CALL get_name) (lm_ggml_backend_buffer_t buffer);
|
42
|
+
void (*LM_GGML_CALL free_buffer) (lm_ggml_backend_buffer_t buffer);
|
43
|
+
void * (*LM_GGML_CALL get_base) (lm_ggml_backend_buffer_t buffer);
|
44
|
+
void (*LM_GGML_CALL init_tensor) (lm_ggml_backend_buffer_t buffer, struct lm_ggml_tensor * tensor);
|
45
|
+
void (*LM_GGML_CALL memset_tensor) (lm_ggml_backend_buffer_t buffer, struct lm_ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
|
46
|
+
void (*LM_GGML_CALL set_tensor) (lm_ggml_backend_buffer_t buffer, struct lm_ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
47
|
+
void (*LM_GGML_CALL get_tensor) (lm_ggml_backend_buffer_t buffer, const struct lm_ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
48
|
+
bool (*LM_GGML_CALL cpy_tensor) (lm_ggml_backend_buffer_t buffer, const struct lm_ggml_tensor * src, struct lm_ggml_tensor * dst); // dst is in the buffer, src may be in any buffer
|
49
|
+
void (*LM_GGML_CALL clear) (lm_ggml_backend_buffer_t buffer, uint8_t value);
|
50
|
+
void (*LM_GGML_CALL reset) (lm_ggml_backend_buffer_t buffer); // reset any internal state due to tensor initialization, such as tensor extras
|
50
51
|
};
|
51
52
|
|
52
53
|
struct lm_ggml_backend_buffer {
|
package/cpp/ggml-backend.c
CHANGED
@@ -246,6 +246,22 @@ LM_GGML_CALL void lm_ggml_backend_tensor_get(const struct lm_ggml_tensor * tenso
|
|
246
246
|
buf->iface.get_tensor(buf, tensor, data, offset, size);
|
247
247
|
}
|
248
248
|
|
249
|
+
LM_GGML_API LM_GGML_CALL void lm_ggml_backend_tensor_memset(struct lm_ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
|
250
|
+
lm_ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
251
|
+
|
252
|
+
LM_GGML_ASSERT(buf != NULL && "tensor buffer not set");
|
253
|
+
LM_GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
254
|
+
LM_GGML_ASSERT(offset + size <= lm_ggml_nbytes(tensor) && "tensor write out of bounds");
|
255
|
+
|
256
|
+
if (!size) {
|
257
|
+
return;
|
258
|
+
}
|
259
|
+
|
260
|
+
LM_GGML_ASSERT(buf->iface.memset_tensor != NULL && "memset not supported by backend buffer");
|
261
|
+
|
262
|
+
buf->iface.memset_tensor(buf, tensor, value, offset, size);
|
263
|
+
}
|
264
|
+
|
249
265
|
void lm_ggml_backend_synchronize(lm_ggml_backend_t backend) {
|
250
266
|
if (backend->iface.synchronize == NULL) {
|
251
267
|
return;
|
@@ -569,6 +585,12 @@ LM_GGML_CALL static void lm_ggml_backend_cpu_buffer_free_buffer(lm_ggml_backend_
|
|
569
585
|
free(buffer->context);
|
570
586
|
}
|
571
587
|
|
588
|
+
LM_GGML_CALL static void lm_ggml_backend_cpu_buffer_memset_tensor(lm_ggml_backend_buffer_t buffer, struct lm_ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
|
589
|
+
memset((char *)tensor->data + offset, value, size);
|
590
|
+
|
591
|
+
LM_GGML_UNUSED(buffer);
|
592
|
+
}
|
593
|
+
|
572
594
|
LM_GGML_CALL static void lm_ggml_backend_cpu_buffer_set_tensor(lm_ggml_backend_buffer_t buffer, struct lm_ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
573
595
|
memcpy((char *)tensor->data + offset, data, size);
|
574
596
|
|
@@ -600,6 +622,7 @@ static struct lm_ggml_backend_buffer_i cpu_backend_buffer_i = {
|
|
600
622
|
/* .free_buffer = */ lm_ggml_backend_cpu_buffer_free_buffer,
|
601
623
|
/* .get_base = */ lm_ggml_backend_cpu_buffer_get_base,
|
602
624
|
/* .init_tensor = */ NULL, // no initialization required
|
625
|
+
/* .memset_tensor = */ lm_ggml_backend_cpu_buffer_memset_tensor,
|
603
626
|
/* .set_tensor = */ lm_ggml_backend_cpu_buffer_set_tensor,
|
604
627
|
/* .get_tensor = */ lm_ggml_backend_cpu_buffer_get_tensor,
|
605
628
|
/* .cpy_tensor = */ lm_ggml_backend_cpu_buffer_cpy_tensor,
|
@@ -613,6 +636,7 @@ static struct lm_ggml_backend_buffer_i cpu_backend_buffer_i_from_ptr = {
|
|
613
636
|
/* .free_buffer = */ NULL, // ptr is not owned by the buffer, so it does not need to be freed
|
614
637
|
/* .get_base = */ lm_ggml_backend_cpu_buffer_get_base,
|
615
638
|
/* .init_tensor = */ NULL, // no initialization required
|
639
|
+
/* .memset_tensor = */ lm_ggml_backend_cpu_buffer_memset_tensor,
|
616
640
|
/* .set_tensor = */ lm_ggml_backend_cpu_buffer_set_tensor,
|
617
641
|
/* .get_tensor = */ lm_ggml_backend_cpu_buffer_get_tensor,
|
618
642
|
/* .cpy_tensor = */ lm_ggml_backend_cpu_buffer_cpy_tensor,
|
@@ -980,6 +1004,7 @@ static struct lm_ggml_backend_buffer_i lm_ggml_backend_multi_buffer_context_inte
|
|
980
1004
|
/* .free_buffer = */ lm_ggml_backend_multi_buffer_free_buffer,
|
981
1005
|
/* .get_base = */ NULL,
|
982
1006
|
/* .init_tensor = */ NULL,
|
1007
|
+
/* .memset_tensor = */ NULL,
|
983
1008
|
/* .set_tensor = */ NULL,
|
984
1009
|
/* .get_tensor = */ NULL,
|
985
1010
|
/* .cpy_tensor = */ NULL,
|
package/cpp/ggml-backend.h
CHANGED
@@ -66,6 +66,7 @@ extern "C" {
|
|
66
66
|
// "offset" refers to the offset of the tensor data for setting/getting data
|
67
67
|
LM_GGML_API LM_GGML_CALL void lm_ggml_backend_tensor_set( struct lm_ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
68
68
|
LM_GGML_API LM_GGML_CALL void lm_ggml_backend_tensor_get(const struct lm_ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
69
|
+
LM_GGML_API LM_GGML_CALL void lm_ggml_backend_tensor_memset( struct lm_ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
|
69
70
|
|
70
71
|
LM_GGML_API void lm_ggml_backend_synchronize(lm_ggml_backend_t backend);
|
71
72
|
|
@@ -122,7 +123,7 @@ extern "C" {
|
|
122
123
|
// The backend registry is a registry of all the available backends, and allows initializing backends in a generic way
|
123
124
|
|
124
125
|
LM_GGML_API size_t lm_ggml_backend_reg_get_count(void);
|
125
|
-
LM_GGML_API size_t lm_ggml_backend_reg_find_by_name(const char * name);
|
126
|
+
LM_GGML_API size_t lm_ggml_backend_reg_find_by_name(const char * name); // returns index of backend with name, or SIZE_MAX if not found
|
126
127
|
LM_GGML_API lm_ggml_backend_t lm_ggml_backend_reg_init_backend_from_str(const char * backend_str); // str is backend_name:params (params is optional)
|
127
128
|
LM_GGML_API const char * lm_ggml_backend_reg_get_name(size_t i);
|
128
129
|
LM_GGML_API lm_ggml_backend_t lm_ggml_backend_reg_init_backend(size_t i, const char * params); // params is backend-specific
|