cui-llama.rn 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +20 -0
- package/README.md +330 -0
- package/android/build.gradle +107 -0
- package/android/gradle.properties +5 -0
- package/android/src/main/AndroidManifest.xml +4 -0
- package/android/src/main/CMakeLists.txt +69 -0
- package/android/src/main/java/com/rnllama/LlamaContext.java +353 -0
- package/android/src/main/java/com/rnllama/RNLlama.java +446 -0
- package/android/src/main/java/com/rnllama/RNLlamaPackage.java +48 -0
- package/android/src/main/jni.cpp +635 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +94 -0
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +95 -0
- package/cpp/README.md +4 -0
- package/cpp/common.cpp +3237 -0
- package/cpp/common.h +467 -0
- package/cpp/ggml-aarch64.c +2193 -0
- package/cpp/ggml-aarch64.h +39 -0
- package/cpp/ggml-alloc.c +1041 -0
- package/cpp/ggml-alloc.h +76 -0
- package/cpp/ggml-backend-impl.h +153 -0
- package/cpp/ggml-backend.c +2225 -0
- package/cpp/ggml-backend.h +236 -0
- package/cpp/ggml-common.h +1829 -0
- package/cpp/ggml-impl.h +655 -0
- package/cpp/ggml-metal.h +65 -0
- package/cpp/ggml-metal.m +3273 -0
- package/cpp/ggml-quants.c +15022 -0
- package/cpp/ggml-quants.h +132 -0
- package/cpp/ggml.c +22034 -0
- package/cpp/ggml.h +2444 -0
- package/cpp/grammar-parser.cpp +536 -0
- package/cpp/grammar-parser.h +29 -0
- package/cpp/json-schema-to-grammar.cpp +1045 -0
- package/cpp/json-schema-to-grammar.h +8 -0
- package/cpp/json.hpp +24766 -0
- package/cpp/llama.cpp +21789 -0
- package/cpp/llama.h +1201 -0
- package/cpp/log.h +737 -0
- package/cpp/rn-llama.hpp +630 -0
- package/cpp/sampling.cpp +460 -0
- package/cpp/sampling.h +160 -0
- package/cpp/sgemm.cpp +1027 -0
- package/cpp/sgemm.h +14 -0
- package/cpp/unicode-data.cpp +7032 -0
- package/cpp/unicode-data.h +20 -0
- package/cpp/unicode.cpp +812 -0
- package/cpp/unicode.h +64 -0
- package/ios/RNLlama.h +11 -0
- package/ios/RNLlama.mm +302 -0
- package/ios/RNLlama.xcodeproj/project.pbxproj +278 -0
- package/ios/RNLlamaContext.h +39 -0
- package/ios/RNLlamaContext.mm +426 -0
- package/jest/mock.js +169 -0
- package/lib/commonjs/NativeRNLlama.js +10 -0
- package/lib/commonjs/NativeRNLlama.js.map +1 -0
- package/lib/commonjs/grammar.js +574 -0
- package/lib/commonjs/grammar.js.map +1 -0
- package/lib/commonjs/index.js +151 -0
- package/lib/commonjs/index.js.map +1 -0
- package/lib/module/NativeRNLlama.js +3 -0
- package/lib/module/NativeRNLlama.js.map +1 -0
- package/lib/module/grammar.js +566 -0
- package/lib/module/grammar.js.map +1 -0
- package/lib/module/index.js +129 -0
- package/lib/module/index.js.map +1 -0
- package/lib/typescript/NativeRNLlama.d.ts +107 -0
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -0
- package/lib/typescript/grammar.d.ts +38 -0
- package/lib/typescript/grammar.d.ts.map +1 -0
- package/lib/typescript/index.d.ts +46 -0
- package/lib/typescript/index.d.ts.map +1 -0
- package/llama-rn.podspec +56 -0
- package/package.json +230 -0
- package/src/NativeRNLlama.ts +132 -0
- package/src/grammar.ts +849 -0
- package/src/index.ts +182 -0
@@ -0,0 +1,132 @@
|
|
1
|
+
import type { TurboModule } from 'react-native';
|
2
|
+
import { TurboModuleRegistry } from 'react-native';
|
3
|
+
|
4
|
+
export type NativeContextParams = {
|
5
|
+
model: string
|
6
|
+
is_model_asset?: boolean
|
7
|
+
|
8
|
+
embedding?: boolean
|
9
|
+
|
10
|
+
n_ctx?: number
|
11
|
+
n_batch?: number
|
12
|
+
|
13
|
+
n_threads?: number
|
14
|
+
n_gpu_layers?: number
|
15
|
+
|
16
|
+
use_mlock?: boolean
|
17
|
+
use_mmap?: boolean
|
18
|
+
|
19
|
+
lora?: string // lora_adaptor
|
20
|
+
lora_scaled?: number
|
21
|
+
lora_base?: string
|
22
|
+
|
23
|
+
rope_freq_base?: number
|
24
|
+
rope_freq_scale?: number
|
25
|
+
}
|
26
|
+
|
27
|
+
export type NativeCompletionParams = {
|
28
|
+
prompt: string
|
29
|
+
grammar?: string
|
30
|
+
stop?: Array<string> // -> antiprompt
|
31
|
+
|
32
|
+
n_threads?: number
|
33
|
+
n_predict?: number
|
34
|
+
n_probs?: number
|
35
|
+
top_k?: number
|
36
|
+
top_p?: number
|
37
|
+
min_p?: number
|
38
|
+
tfs_z?: number
|
39
|
+
typical_p?: number
|
40
|
+
temperature?: number // -> temp
|
41
|
+
penalty_last_n?: number
|
42
|
+
penalty_repeat?: number
|
43
|
+
penalty_freq?: number
|
44
|
+
penalty_present?: number
|
45
|
+
mirostat?: number
|
46
|
+
mirostat_tau?: number
|
47
|
+
mirostat_eta?: number
|
48
|
+
penalize_nl?: boolean
|
49
|
+
seed?: number
|
50
|
+
|
51
|
+
ignore_eos?: boolean
|
52
|
+
logit_bias?: Array<Array<number>>
|
53
|
+
|
54
|
+
emit_partial_completion: boolean
|
55
|
+
}
|
56
|
+
|
57
|
+
export type NativeCompletionTokenProbItem = {
|
58
|
+
tok_str: string
|
59
|
+
prob: number
|
60
|
+
}
|
61
|
+
|
62
|
+
export type NativeCompletionTokenProb = {
|
63
|
+
content: string
|
64
|
+
probs: Array<NativeCompletionTokenProbItem>
|
65
|
+
}
|
66
|
+
|
67
|
+
export type NativeCompletionResultTimings = {
|
68
|
+
prompt_n: number
|
69
|
+
prompt_ms: number
|
70
|
+
prompt_per_token_ms: number
|
71
|
+
prompt_per_second: number
|
72
|
+
predicted_n: number
|
73
|
+
predicted_ms: number
|
74
|
+
predicted_per_token_ms: number
|
75
|
+
predicted_per_second: number
|
76
|
+
}
|
77
|
+
|
78
|
+
export type NativeCompletionResult = {
|
79
|
+
text: string
|
80
|
+
|
81
|
+
tokens_predicted: number
|
82
|
+
tokens_evaluated: number
|
83
|
+
truncated: boolean
|
84
|
+
stopped_eos: boolean
|
85
|
+
stopped_word: string
|
86
|
+
stopped_limit: number
|
87
|
+
stopping_word: string
|
88
|
+
tokens_cached: number
|
89
|
+
timings: NativeCompletionResultTimings
|
90
|
+
|
91
|
+
completion_probabilities?: Array<NativeCompletionTokenProb>
|
92
|
+
}
|
93
|
+
|
94
|
+
export type NativeTokenizeResult = {
|
95
|
+
tokens: Array<number>
|
96
|
+
}
|
97
|
+
|
98
|
+
export type NativeEmbeddingResult = {
|
99
|
+
embedding: Array<number>
|
100
|
+
}
|
101
|
+
|
102
|
+
export type NativeLlamaContext = {
|
103
|
+
contextId: number
|
104
|
+
gpu: boolean
|
105
|
+
reasonNoGPU: string
|
106
|
+
model: Object
|
107
|
+
}
|
108
|
+
|
109
|
+
export type NativeSessionLoadResult = {
|
110
|
+
tokens_loaded: number
|
111
|
+
prompt: string
|
112
|
+
}
|
113
|
+
|
114
|
+
export interface Spec extends TurboModule {
|
115
|
+
setContextLimit(limit: number): Promise<void>;
|
116
|
+
initContext(params: NativeContextParams): Promise<NativeLlamaContext>;
|
117
|
+
|
118
|
+
loadSession(contextId: number, filepath: string): Promise<NativeSessionLoadResult>;
|
119
|
+
saveSession(contextId: number, filepath: string, size: number): Promise<number>;
|
120
|
+
completion(contextId: number, params: NativeCompletionParams): Promise<NativeCompletionResult>;
|
121
|
+
stopCompletion(contextId: number): Promise<void>;
|
122
|
+
tokenize(contextId: number, text: string): Promise<NativeTokenizeResult>;
|
123
|
+
detokenize(contextId: number, tokens: number[]): Promise<string>;
|
124
|
+
embedding(contextId: number, text: string): Promise<NativeEmbeddingResult>;
|
125
|
+
bench(contextId: number, pp: number, tg: number, pl: number, nr: number): Promise<string>;
|
126
|
+
|
127
|
+
releaseContext(contextId: number): Promise<void>;
|
128
|
+
|
129
|
+
releaseAllContexts(): Promise<void>;
|
130
|
+
}
|
131
|
+
|
132
|
+
export default TurboModuleRegistry.get<Spec>('RNLlama') as Spec;
|