cui-llama.rn 1.0.3 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -39
- package/android/src/main/CMakeLists.txt +12 -2
- package/android/src/main/java/com/rnllama/LlamaContext.java +29 -9
- package/android/src/main/java/com/rnllama/RNLlama.java +33 -1
- package/android/src/main/jni.cpp +62 -8
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +5 -0
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +5 -0
- package/cpp/common.cpp +3237 -3231
- package/cpp/common.h +469 -468
- package/cpp/ggml-aarch64.c +2193 -2193
- package/cpp/ggml-aarch64.h +39 -39
- package/cpp/ggml-alloc.c +1036 -1042
- package/cpp/ggml-backend-impl.h +153 -153
- package/cpp/ggml-backend.c +2240 -2234
- package/cpp/ggml-backend.h +238 -238
- package/cpp/ggml-common.h +1833 -1829
- package/cpp/ggml-impl.h +755 -655
- package/cpp/ggml-metal.h +65 -65
- package/cpp/ggml-metal.m +3269 -3269
- package/cpp/ggml-quants.c +14872 -14860
- package/cpp/ggml-quants.h +132 -132
- package/cpp/ggml.c +22055 -22044
- package/cpp/ggml.h +2453 -2447
- package/cpp/llama-grammar.cpp +539 -0
- package/cpp/llama-grammar.h +39 -0
- package/cpp/llama-impl.h +26 -0
- package/cpp/llama-sampling.cpp +635 -0
- package/cpp/llama-sampling.h +56 -0
- package/cpp/llama-vocab.cpp +1721 -0
- package/cpp/llama-vocab.h +130 -0
- package/cpp/llama.cpp +19171 -21892
- package/cpp/llama.h +1240 -1217
- package/cpp/log.h +737 -737
- package/cpp/rn-llama.hpp +207 -29
- package/cpp/sampling.cpp +460 -460
- package/cpp/sgemm.cpp +1027 -1027
- package/cpp/sgemm.h +14 -14
- package/cpp/unicode.cpp +6 -0
- package/cpp/unicode.h +3 -0
- package/ios/RNLlama.mm +15 -6
- package/ios/RNLlamaContext.h +2 -8
- package/ios/RNLlamaContext.mm +41 -34
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/chat.js +37 -0
- package/lib/commonjs/chat.js.map +1 -0
- package/lib/commonjs/index.js +14 -1
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/chat.js +31 -0
- package/lib/module/chat.js.map +1 -0
- package/lib/module/index.js +14 -1
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +5 -1
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/chat.d.ts +10 -0
- package/lib/typescript/chat.d.ts.map +1 -0
- package/lib/typescript/index.d.ts +9 -2
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +10 -1
- package/src/chat.ts +44 -0
- package/src/index.ts +31 -4
package/cpp/ggml-metal.h
CHANGED
@@ -1,65 +1,65 @@
|
|
1
|
-
// An interface allowing to compute lm_ggml_cgraph with Metal
|
2
|
-
//
|
3
|
-
// This is a fully functional interface that extends ggml with GPU support for Apple devices.
|
4
|
-
// A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, etc.)
|
5
|
-
//
|
6
|
-
// How it works?
|
7
|
-
//
|
8
|
-
// As long as your program can create and evaluate a lm_ggml_cgraph on the CPU, you can use this
|
9
|
-
// interface to evaluate the same graph on the GPU. Instead of using lm_ggml_graph_compute(), you
|
10
|
-
// use lm_ggml_metal_graph_compute() (or lm_ggml_vulkan_graph_compute(), etc.)
|
11
|
-
//
|
12
|
-
// You only need to make sure that all memory buffers that you used during the graph creation
|
13
|
-
// are mapped to the device memory with the lm_ggml_metal_add_buffer() function. This mapping is
|
14
|
-
// used during the graph evaluation to determine the arguments of the compute kernels.
|
15
|
-
//
|
16
|
-
// Synchronization between device and host memory (for example for input and output tensors)
|
17
|
-
// is done with the lm_ggml_metal_set_tensor() and lm_ggml_metal_get_tensor() functions.
|
18
|
-
//
|
19
|
-
|
20
|
-
#pragma once
|
21
|
-
|
22
|
-
#include "ggml.h"
|
23
|
-
#include "ggml-backend.h"
|
24
|
-
|
25
|
-
#include <stddef.h>
|
26
|
-
#include <stdbool.h>
|
27
|
-
|
28
|
-
// max memory buffers that can be mapped to the device
|
29
|
-
#define LM_GGML_METAL_MAX_BUFFERS 64
|
30
|
-
|
31
|
-
struct lm_ggml_tensor;
|
32
|
-
struct lm_ggml_cgraph;
|
33
|
-
|
34
|
-
#ifdef __cplusplus
|
35
|
-
extern "C" {
|
36
|
-
#endif
|
37
|
-
|
38
|
-
//
|
39
|
-
// backend API
|
40
|
-
// user-code should use only these functions
|
41
|
-
//
|
42
|
-
|
43
|
-
LM_GGML_API void lm_ggml_backend_metal_log_set_callback(lm_ggml_log_callback log_callback, void * user_data);
|
44
|
-
|
45
|
-
LM_GGML_API lm_ggml_backend_t lm_ggml_backend_metal_init(void);
|
46
|
-
|
47
|
-
LM_GGML_API bool lm_ggml_backend_is_metal(lm_ggml_backend_t backend);
|
48
|
-
|
49
|
-
LM_GGML_API LM_GGML_CALL lm_ggml_backend_buffer_t lm_ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size);
|
50
|
-
|
51
|
-
LM_GGML_API void lm_ggml_backend_metal_set_n_cb(lm_ggml_backend_t backend, int n_cb);
|
52
|
-
|
53
|
-
LM_GGML_API LM_GGML_CALL lm_ggml_backend_buffer_type_t lm_ggml_backend_metal_buffer_type(void);
|
54
|
-
|
55
|
-
// helper to check if the device supports a specific family
|
56
|
-
// ideally, the user code should be doing these checks
|
57
|
-
// ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
|
58
|
-
LM_GGML_API bool lm_ggml_backend_metal_supports_family(lm_ggml_backend_t backend, int family);
|
59
|
-
|
60
|
-
// capture all command buffers committed the next time `lm_ggml_backend_graph_compute` is called
|
61
|
-
LM_GGML_API void lm_ggml_backend_metal_capture_next_compute(lm_ggml_backend_t backend);
|
62
|
-
|
63
|
-
#ifdef __cplusplus
|
64
|
-
}
|
65
|
-
#endif
|
1
|
+
// An interface allowing to compute lm_ggml_cgraph with Metal
|
2
|
+
//
|
3
|
+
// This is a fully functional interface that extends ggml with GPU support for Apple devices.
|
4
|
+
// A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, etc.)
|
5
|
+
//
|
6
|
+
// How it works?
|
7
|
+
//
|
8
|
+
// As long as your program can create and evaluate a lm_ggml_cgraph on the CPU, you can use this
|
9
|
+
// interface to evaluate the same graph on the GPU. Instead of using lm_ggml_graph_compute(), you
|
10
|
+
// use lm_ggml_metal_graph_compute() (or lm_ggml_vulkan_graph_compute(), etc.)
|
11
|
+
//
|
12
|
+
// You only need to make sure that all memory buffers that you used during the graph creation
|
13
|
+
// are mapped to the device memory with the lm_ggml_metal_add_buffer() function. This mapping is
|
14
|
+
// used during the graph evaluation to determine the arguments of the compute kernels.
|
15
|
+
//
|
16
|
+
// Synchronization between device and host memory (for example for input and output tensors)
|
17
|
+
// is done with the lm_ggml_metal_set_tensor() and lm_ggml_metal_get_tensor() functions.
|
18
|
+
//
|
19
|
+
|
20
|
+
#pragma once
|
21
|
+
|
22
|
+
#include "ggml.h"
|
23
|
+
#include "ggml-backend.h"
|
24
|
+
|
25
|
+
#include <stddef.h>
|
26
|
+
#include <stdbool.h>
|
27
|
+
|
28
|
+
// max memory buffers that can be mapped to the device
|
29
|
+
#define LM_GGML_METAL_MAX_BUFFERS 64
|
30
|
+
|
31
|
+
struct lm_ggml_tensor;
|
32
|
+
struct lm_ggml_cgraph;
|
33
|
+
|
34
|
+
#ifdef __cplusplus
|
35
|
+
extern "C" {
|
36
|
+
#endif
|
37
|
+
|
38
|
+
//
|
39
|
+
// backend API
|
40
|
+
// user-code should use only these functions
|
41
|
+
//
|
42
|
+
|
43
|
+
LM_GGML_API void lm_ggml_backend_metal_log_set_callback(lm_ggml_log_callback log_callback, void * user_data);
|
44
|
+
|
45
|
+
LM_GGML_API lm_ggml_backend_t lm_ggml_backend_metal_init(void);
|
46
|
+
|
47
|
+
LM_GGML_API bool lm_ggml_backend_is_metal(lm_ggml_backend_t backend);
|
48
|
+
|
49
|
+
LM_GGML_API LM_GGML_CALL lm_ggml_backend_buffer_t lm_ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size);
|
50
|
+
|
51
|
+
LM_GGML_API void lm_ggml_backend_metal_set_n_cb(lm_ggml_backend_t backend, int n_cb);
|
52
|
+
|
53
|
+
LM_GGML_API LM_GGML_CALL lm_ggml_backend_buffer_type_t lm_ggml_backend_metal_buffer_type(void);
|
54
|
+
|
55
|
+
// helper to check if the device supports a specific family
|
56
|
+
// ideally, the user code should be doing these checks
|
57
|
+
// ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
|
58
|
+
LM_GGML_API bool lm_ggml_backend_metal_supports_family(lm_ggml_backend_t backend, int family);
|
59
|
+
|
60
|
+
// capture all command buffers committed the next time `lm_ggml_backend_graph_compute` is called
|
61
|
+
LM_GGML_API void lm_ggml_backend_metal_capture_next_compute(lm_ggml_backend_t backend);
|
62
|
+
|
63
|
+
#ifdef __cplusplus
|
64
|
+
}
|
65
|
+
#endif
|