cui-llama.rn 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +20 -0
- package/README.md +330 -0
- package/android/build.gradle +107 -0
- package/android/gradle.properties +5 -0
- package/android/src/main/AndroidManifest.xml +4 -0
- package/android/src/main/CMakeLists.txt +69 -0
- package/android/src/main/java/com/rnllama/LlamaContext.java +353 -0
- package/android/src/main/java/com/rnllama/RNLlama.java +446 -0
- package/android/src/main/java/com/rnllama/RNLlamaPackage.java +48 -0
- package/android/src/main/jni.cpp +635 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +94 -0
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +95 -0
- package/cpp/README.md +4 -0
- package/cpp/common.cpp +3237 -0
- package/cpp/common.h +467 -0
- package/cpp/ggml-aarch64.c +2193 -0
- package/cpp/ggml-aarch64.h +39 -0
- package/cpp/ggml-alloc.c +1041 -0
- package/cpp/ggml-alloc.h +76 -0
- package/cpp/ggml-backend-impl.h +153 -0
- package/cpp/ggml-backend.c +2225 -0
- package/cpp/ggml-backend.h +236 -0
- package/cpp/ggml-common.h +1829 -0
- package/cpp/ggml-impl.h +655 -0
- package/cpp/ggml-metal.h +65 -0
- package/cpp/ggml-metal.m +3273 -0
- package/cpp/ggml-quants.c +15022 -0
- package/cpp/ggml-quants.h +132 -0
- package/cpp/ggml.c +22034 -0
- package/cpp/ggml.h +2444 -0
- package/cpp/grammar-parser.cpp +536 -0
- package/cpp/grammar-parser.h +29 -0
- package/cpp/json-schema-to-grammar.cpp +1045 -0
- package/cpp/json-schema-to-grammar.h +8 -0
- package/cpp/json.hpp +24766 -0
- package/cpp/llama.cpp +21789 -0
- package/cpp/llama.h +1201 -0
- package/cpp/log.h +737 -0
- package/cpp/rn-llama.hpp +630 -0
- package/cpp/sampling.cpp +460 -0
- package/cpp/sampling.h +160 -0
- package/cpp/sgemm.cpp +1027 -0
- package/cpp/sgemm.h +14 -0
- package/cpp/unicode-data.cpp +7032 -0
- package/cpp/unicode-data.h +20 -0
- package/cpp/unicode.cpp +812 -0
- package/cpp/unicode.h +64 -0
- package/ios/RNLlama.h +11 -0
- package/ios/RNLlama.mm +302 -0
- package/ios/RNLlama.xcodeproj/project.pbxproj +278 -0
- package/ios/RNLlamaContext.h +39 -0
- package/ios/RNLlamaContext.mm +426 -0
- package/jest/mock.js +169 -0
- package/lib/commonjs/NativeRNLlama.js +10 -0
- package/lib/commonjs/NativeRNLlama.js.map +1 -0
- package/lib/commonjs/grammar.js +574 -0
- package/lib/commonjs/grammar.js.map +1 -0
- package/lib/commonjs/index.js +151 -0
- package/lib/commonjs/index.js.map +1 -0
- package/lib/module/NativeRNLlama.js +3 -0
- package/lib/module/NativeRNLlama.js.map +1 -0
- package/lib/module/grammar.js +566 -0
- package/lib/module/grammar.js.map +1 -0
- package/lib/module/index.js +129 -0
- package/lib/module/index.js.map +1 -0
- package/lib/typescript/NativeRNLlama.d.ts +107 -0
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -0
- package/lib/typescript/grammar.d.ts +38 -0
- package/lib/typescript/grammar.d.ts.map +1 -0
- package/lib/typescript/index.d.ts +46 -0
- package/lib/typescript/index.d.ts.map +1 -0
- package/llama-rn.podspec +56 -0
- package/package.json +230 -0
- package/src/NativeRNLlama.ts +132 -0
- package/src/grammar.ts +849 -0
- package/src/index.ts +182 -0
package/src/index.ts
ADDED
@@ -0,0 +1,182 @@
|
|
1
|
+
import { NativeEventEmitter, DeviceEventEmitter, Platform } from 'react-native'
|
2
|
+
import type { DeviceEventEmitterStatic } from 'react-native'
|
3
|
+
import RNLlama from './NativeRNLlama'
|
4
|
+
import type {
|
5
|
+
NativeContextParams,
|
6
|
+
NativeLlamaContext,
|
7
|
+
NativeCompletionParams,
|
8
|
+
NativeCompletionTokenProb,
|
9
|
+
NativeCompletionResult,
|
10
|
+
NativeTokenizeResult,
|
11
|
+
NativeEmbeddingResult,
|
12
|
+
NativeSessionLoadResult,
|
13
|
+
} from './NativeRNLlama'
|
14
|
+
import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar'
|
15
|
+
|
16
|
+
export { SchemaGrammarConverter, convertJsonSchemaToGrammar }
|
17
|
+
|
18
|
+
const EVENT_ON_TOKEN = '@RNLlama_onToken'
|
19
|
+
|
20
|
+
let EventEmitter: NativeEventEmitter | DeviceEventEmitterStatic
|
21
|
+
if (Platform.OS === 'ios') {
|
22
|
+
// @ts-ignore
|
23
|
+
EventEmitter = new NativeEventEmitter(RNLlama)
|
24
|
+
}
|
25
|
+
if (Platform.OS === 'android') {
|
26
|
+
EventEmitter = DeviceEventEmitter
|
27
|
+
}
|
28
|
+
|
29
|
+
export type TokenData = {
|
30
|
+
token: string
|
31
|
+
completion_probabilities?: Array<NativeCompletionTokenProb>
|
32
|
+
}
|
33
|
+
|
34
|
+
type TokenNativeEvent = {
|
35
|
+
contextId: number
|
36
|
+
tokenResult: TokenData
|
37
|
+
}
|
38
|
+
|
39
|
+
export type ContextParams = NativeContextParams
|
40
|
+
|
41
|
+
export type CompletionParams = Omit<NativeCompletionParams, 'emit_partial_completion'>
|
42
|
+
|
43
|
+
export type BenchResult = {
|
44
|
+
modelDesc: string
|
45
|
+
modelSize: number
|
46
|
+
modelNParams: number
|
47
|
+
ppAvg: number
|
48
|
+
ppStd: number
|
49
|
+
tgAvg: number
|
50
|
+
tgStd: number
|
51
|
+
}
|
52
|
+
|
53
|
+
export class LlamaContext {
|
54
|
+
id: number
|
55
|
+
|
56
|
+
gpu: boolean = false
|
57
|
+
|
58
|
+
reasonNoGPU: string = ''
|
59
|
+
|
60
|
+
model: Object = {}
|
61
|
+
|
62
|
+
constructor({
|
63
|
+
contextId,
|
64
|
+
gpu,
|
65
|
+
reasonNoGPU,
|
66
|
+
model,
|
67
|
+
}: NativeLlamaContext) {
|
68
|
+
this.id = contextId
|
69
|
+
this.gpu = gpu
|
70
|
+
this.reasonNoGPU = reasonNoGPU
|
71
|
+
this.model = model
|
72
|
+
}
|
73
|
+
|
74
|
+
/**
|
75
|
+
* Load cached prompt & completion state from a file.
|
76
|
+
*/
|
77
|
+
async loadSession(filepath: string): Promise<NativeSessionLoadResult> {
|
78
|
+
return RNLlama.loadSession(this.id, filepath)
|
79
|
+
}
|
80
|
+
|
81
|
+
/**
|
82
|
+
* Save current cached prompt & completion state to a file.
|
83
|
+
*/
|
84
|
+
async saveSession(filepath: string, options?: { tokenSize: number }): Promise<number> {
|
85
|
+
return RNLlama.saveSession(this.id, filepath, options?.tokenSize || -1)
|
86
|
+
}
|
87
|
+
|
88
|
+
async completion(
|
89
|
+
params: CompletionParams,
|
90
|
+
callback?: (data: TokenData) => void,
|
91
|
+
): Promise<NativeCompletionResult> {
|
92
|
+
let tokenListener: any = callback && EventEmitter.addListener(
|
93
|
+
EVENT_ON_TOKEN,
|
94
|
+
(evt: TokenNativeEvent) => {
|
95
|
+
const { contextId, tokenResult } = evt
|
96
|
+
if (contextId !== this.id) return
|
97
|
+
callback(tokenResult)
|
98
|
+
},
|
99
|
+
)
|
100
|
+
const promise = RNLlama.completion(this.id, {
|
101
|
+
...params,
|
102
|
+
emit_partial_completion: !!callback,
|
103
|
+
})
|
104
|
+
return promise
|
105
|
+
.then((completionResult) => {
|
106
|
+
tokenListener?.remove()
|
107
|
+
tokenListener = null
|
108
|
+
return completionResult
|
109
|
+
})
|
110
|
+
.catch((err: any) => {
|
111
|
+
tokenListener?.remove()
|
112
|
+
tokenListener = null
|
113
|
+
throw err
|
114
|
+
})
|
115
|
+
}
|
116
|
+
|
117
|
+
stopCompletion(): Promise<void> {
|
118
|
+
return RNLlama.stopCompletion(this.id)
|
119
|
+
}
|
120
|
+
|
121
|
+
tokenize(text: string): Promise<NativeTokenizeResult> {
|
122
|
+
return RNLlama.tokenize(this.id, text)
|
123
|
+
}
|
124
|
+
|
125
|
+
detokenize(tokens: number[]): Promise<string> {
|
126
|
+
return RNLlama.detokenize(this.id, tokens)
|
127
|
+
}
|
128
|
+
|
129
|
+
embedding(text: string): Promise<NativeEmbeddingResult> {
|
130
|
+
return RNLlama.embedding(this.id, text)
|
131
|
+
}
|
132
|
+
|
133
|
+
async bench(pp: number, tg: number, pl: number, nr: number): Promise<BenchResult> {
|
134
|
+
const result = await RNLlama.bench(this.id, pp, tg, pl, nr)
|
135
|
+
const [
|
136
|
+
modelDesc,
|
137
|
+
modelSize,
|
138
|
+
modelNParams,
|
139
|
+
ppAvg,
|
140
|
+
ppStd,
|
141
|
+
tgAvg,
|
142
|
+
tgStd,
|
143
|
+
] = JSON.parse(result)
|
144
|
+
return {
|
145
|
+
modelDesc,
|
146
|
+
modelSize,
|
147
|
+
modelNParams,
|
148
|
+
ppAvg,
|
149
|
+
ppStd,
|
150
|
+
tgAvg,
|
151
|
+
tgStd,
|
152
|
+
}
|
153
|
+
}
|
154
|
+
|
155
|
+
async release(): Promise<void> {
|
156
|
+
return RNLlama.releaseContext(this.id)
|
157
|
+
}
|
158
|
+
}
|
159
|
+
|
160
|
+
export async function setContextLimit(limit: number): Promise<void> {
|
161
|
+
return RNLlama.setContextLimit(limit)
|
162
|
+
}
|
163
|
+
|
164
|
+
export async function initLlama({
|
165
|
+
model,
|
166
|
+
is_model_asset: isModelAsset,
|
167
|
+
...rest
|
168
|
+
}: ContextParams): Promise<LlamaContext> {
|
169
|
+
let path = model
|
170
|
+
if (path.startsWith('file://')) path = path.slice(7)
|
171
|
+
const { contextId, gpu, reasonNoGPU, model: modelDetails } =
|
172
|
+
await RNLlama.initContext({
|
173
|
+
model: path,
|
174
|
+
is_model_asset: !!isModelAsset,
|
175
|
+
...rest,
|
176
|
+
})
|
177
|
+
return new LlamaContext({ contextId, gpu, reasonNoGPU, model: modelDetails })
|
178
|
+
}
|
179
|
+
|
180
|
+
export async function releaseAllLlama(): Promise<void> {
|
181
|
+
return RNLlama.releaseAllContexts()
|
182
|
+
}
|