cui-llama.rn 1.2.4 → 1.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -2
- package/android/src/main/CMakeLists.txt +1 -0
- package/android/src/main/java/com/rnllama/LlamaContext.java +0 -3
- package/android/src/main/jni.cpp +2 -4
- package/cpp/common.cpp +6 -14
- package/cpp/common.h +59 -40
- package/cpp/ggml-aarch64.c +269 -0
- package/cpp/ggml-backend-impl.h +4 -15
- package/cpp/ggml-backend.cpp +1640 -1604
- package/cpp/ggml-backend.h +13 -25
- package/cpp/ggml-cpp.h +38 -0
- package/cpp/ggml-cpu.c +13720 -0
- package/cpp/ggml-cpu.h +150 -0
- package/cpp/ggml-impl.h +87 -0
- package/cpp/ggml-metal.m +185 -71
- package/cpp/ggml-quants.c +38 -51
- package/cpp/ggml.c +4442 -19516
- package/cpp/ggml.h +25 -146
- package/cpp/llama-sampling.cpp +392 -241
- package/cpp/llama-sampling.h +18 -0
- package/cpp/llama-vocab.cpp +16 -0
- package/cpp/llama-vocab.h +5 -0
- package/cpp/llama.cpp +2084 -2007
- package/cpp/llama.h +13 -11
- package/cpp/sampling.cpp +19 -11
- package/cpp/sgemm.cpp +57 -0
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +0 -1
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +0 -1
package/cpp/ggml-backend.h
CHANGED
@@ -114,11 +114,12 @@ extern "C" {
|
|
114
114
|
//
|
115
115
|
|
116
116
|
enum lm_ggml_backend_dev_type {
|
117
|
+
// CPU device using system memory
|
117
118
|
LM_GGML_BACKEND_DEVICE_TYPE_CPU,
|
119
|
+
// GPU device using dedicated memory
|
118
120
|
LM_GGML_BACKEND_DEVICE_TYPE_GPU,
|
119
|
-
// devices
|
120
|
-
|
121
|
-
LM_GGML_BACKEND_DEVICE_TYPE_GPU_FULL
|
121
|
+
// accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX)
|
122
|
+
LM_GGML_BACKEND_DEVICE_TYPE_ACCEL
|
122
123
|
};
|
123
124
|
|
124
125
|
// functionality supported by the device
|
@@ -167,10 +168,14 @@ extern "C" {
|
|
167
168
|
LM_GGML_API lm_ggml_backend_dev_t lm_ggml_backend_reg_dev_get(lm_ggml_backend_reg_t reg, size_t index);
|
168
169
|
LM_GGML_API void * lm_ggml_backend_reg_get_proc_address(lm_ggml_backend_reg_t reg, const char * name);
|
169
170
|
|
171
|
+
// Common functions that may be obtained using lm_ggml_backend_reg_get_proc_address
|
170
172
|
|
171
|
-
//
|
172
|
-
typedef lm_ggml_backend_buffer_type_t
|
173
|
-
|
173
|
+
// Split buffer type for tensor parallelism
|
174
|
+
typedef lm_ggml_backend_buffer_type_t (*lm_ggml_backend_split_buffer_type_t)(int main_device, const float * tensor_split);
|
175
|
+
// Set the number of threads for the backend
|
176
|
+
typedef void (*lm_ggml_backend_set_n_threads_t)(lm_ggml_backend_t backend, int n_threads);
|
177
|
+
// Get additional buffer types provided by the device (returns a NULL-terminated array)
|
178
|
+
typedef lm_ggml_backend_buffer_type_t * (*lm_ggml_backend_dev_get_extra_bufts_t)(lm_ggml_backend_dev_t device);
|
174
179
|
|
175
180
|
//
|
176
181
|
// Backend registry
|
@@ -192,7 +197,7 @@ extern "C" {
|
|
192
197
|
LM_GGML_API lm_ggml_backend_t lm_ggml_backend_init_by_name(const char * name, const char * params);
|
193
198
|
// = lm_ggml_backend_dev_init(lm_ggml_backend_dev_by_type(type), params)
|
194
199
|
LM_GGML_API lm_ggml_backend_t lm_ggml_backend_init_by_type(enum lm_ggml_backend_dev_type type, const char * params);
|
195
|
-
// = lm_ggml_backend_dev_init(lm_ggml_backend_dev_by_type(
|
200
|
+
// = lm_ggml_backend_dev_init(lm_ggml_backend_dev_by_type(GPU) OR lm_ggml_backend_dev_by_type(CPU), NULL)
|
196
201
|
LM_GGML_API lm_ggml_backend_t lm_ggml_backend_init_best(void);
|
197
202
|
|
198
203
|
//
|
@@ -300,27 +305,10 @@ extern "C" {
|
|
300
305
|
LM_GGML_API void lm_ggml_backend_tensor_alloc(lm_ggml_backend_buffer_t buffer, struct lm_ggml_tensor * tensor, void * addr);
|
301
306
|
LM_GGML_API void lm_ggml_backend_view_init(struct lm_ggml_tensor * tensor);
|
302
307
|
|
303
|
-
//
|
304
|
-
// CPU backend
|
305
|
-
//
|
306
|
-
|
307
|
-
LM_GGML_API lm_ggml_backend_t lm_ggml_backend_cpu_init(void);
|
308
|
-
|
309
|
-
LM_GGML_API bool lm_ggml_backend_is_cpu (lm_ggml_backend_t backend);
|
310
|
-
LM_GGML_API void lm_ggml_backend_cpu_set_n_threads (lm_ggml_backend_t backend_cpu, int n_threads);
|
311
|
-
LM_GGML_API void lm_ggml_backend_cpu_set_threadpool (lm_ggml_backend_t backend_cpu, lm_ggml_threadpool_t threadpool);
|
312
|
-
LM_GGML_API void lm_ggml_backend_cpu_set_abort_callback(lm_ggml_backend_t backend_cpu, lm_ggml_abort_callback abort_callback, void * abort_callback_data);
|
313
|
-
|
314
|
-
// Create a backend buffer from an existing pointer
|
308
|
+
// CPU buffer types are always available
|
315
309
|
LM_GGML_API lm_ggml_backend_buffer_t lm_ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
316
310
|
LM_GGML_API lm_ggml_backend_buffer_type_t lm_ggml_backend_cpu_buffer_type(void);
|
317
311
|
|
318
|
-
LM_GGML_API lm_ggml_backend_reg_t lm_ggml_backend_cpu_reg(void);
|
319
|
-
|
320
|
-
#ifdef LM_GGML_USE_CPU_HBM
|
321
|
-
LM_GGML_API lm_ggml_backend_buffer_type_t lm_ggml_backend_cpu_hbm_buffer_type(void);
|
322
|
-
#endif
|
323
|
-
|
324
312
|
#ifdef __cplusplus
|
325
313
|
}
|
326
314
|
#endif
|
package/cpp/ggml-cpp.h
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#ifndef __cplusplus
|
4
|
+
#error "This header is for C++ only"
|
5
|
+
#endif
|
6
|
+
|
7
|
+
#include "ggml.h"
|
8
|
+
#include "ggml-alloc.h"
|
9
|
+
#include "ggml-backend.h"
|
10
|
+
#include <memory>
|
11
|
+
|
12
|
+
// Smart pointers for ggml types
|
13
|
+
|
14
|
+
// ggml
|
15
|
+
|
16
|
+
struct lm_ggml_context_deleter { void operator()(lm_ggml_context * ctx) { lm_ggml_free(ctx); } };
|
17
|
+
struct lm_gguf_context_deleter { void operator()(lm_gguf_context * ctx) { lm_gguf_free(ctx); } };
|
18
|
+
|
19
|
+
typedef std::unique_ptr<lm_ggml_context, lm_ggml_context_deleter> lm_ggml_context_ptr;
|
20
|
+
typedef std::unique_ptr<lm_gguf_context, lm_gguf_context_deleter> lm_gguf_context_ptr;
|
21
|
+
|
22
|
+
// ggml-alloc
|
23
|
+
|
24
|
+
struct lm_ggml_gallocr_deleter { void operator()(lm_ggml_gallocr_t galloc) { lm_ggml_gallocr_free(galloc); } };
|
25
|
+
|
26
|
+
typedef std::unique_ptr<lm_ggml_gallocr_t, lm_ggml_gallocr_deleter> lm_ggml_gallocr_ptr;
|
27
|
+
|
28
|
+
// ggml-backend
|
29
|
+
|
30
|
+
struct lm_ggml_backend_deleter { void operator()(lm_ggml_backend_t backend) { lm_ggml_backend_free(backend); } };
|
31
|
+
struct lm_ggml_backend_buffer_deleter { void operator()(lm_ggml_backend_buffer_t buffer) { lm_ggml_backend_buffer_free(buffer); } };
|
32
|
+
struct lm_ggml_backend_event_deleter { void operator()(lm_ggml_backend_event_t event) { lm_ggml_backend_event_free(event); } };
|
33
|
+
struct lm_ggml_backend_sched_deleter { void operator()(lm_ggml_backend_sched_t sched) { lm_ggml_backend_sched_free(sched); } };
|
34
|
+
|
35
|
+
typedef std::unique_ptr<lm_ggml_backend, lm_ggml_backend_deleter> lm_ggml_backend_ptr;
|
36
|
+
typedef std::unique_ptr<lm_ggml_backend_buffer, lm_ggml_backend_buffer_deleter> lm_ggml_backend_buffer_ptr;
|
37
|
+
typedef std::unique_ptr<lm_ggml_backend_event, lm_ggml_backend_event_deleter> lm_ggml_backend_event_ptr;
|
38
|
+
typedef std::unique_ptr<lm_ggml_backend_sched, lm_ggml_backend_sched_deleter> lm_ggml_backend_sched_ptr;
|