cui-llama.rn 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/LICENSE +20 -0
  2. package/README.md +330 -0
  3. package/android/build.gradle +107 -0
  4. package/android/gradle.properties +5 -0
  5. package/android/src/main/AndroidManifest.xml +4 -0
  6. package/android/src/main/CMakeLists.txt +69 -0
  7. package/android/src/main/java/com/rnllama/LlamaContext.java +353 -0
  8. package/android/src/main/java/com/rnllama/RNLlama.java +446 -0
  9. package/android/src/main/java/com/rnllama/RNLlamaPackage.java +48 -0
  10. package/android/src/main/jni.cpp +635 -0
  11. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +94 -0
  12. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +95 -0
  13. package/cpp/README.md +4 -0
  14. package/cpp/common.cpp +3237 -0
  15. package/cpp/common.h +467 -0
  16. package/cpp/ggml-aarch64.c +2193 -0
  17. package/cpp/ggml-aarch64.h +39 -0
  18. package/cpp/ggml-alloc.c +1041 -0
  19. package/cpp/ggml-alloc.h +76 -0
  20. package/cpp/ggml-backend-impl.h +153 -0
  21. package/cpp/ggml-backend.c +2225 -0
  22. package/cpp/ggml-backend.h +236 -0
  23. package/cpp/ggml-common.h +1829 -0
  24. package/cpp/ggml-impl.h +655 -0
  25. package/cpp/ggml-metal.h +65 -0
  26. package/cpp/ggml-metal.m +3273 -0
  27. package/cpp/ggml-quants.c +15022 -0
  28. package/cpp/ggml-quants.h +132 -0
  29. package/cpp/ggml.c +22034 -0
  30. package/cpp/ggml.h +2444 -0
  31. package/cpp/grammar-parser.cpp +536 -0
  32. package/cpp/grammar-parser.h +29 -0
  33. package/cpp/json-schema-to-grammar.cpp +1045 -0
  34. package/cpp/json-schema-to-grammar.h +8 -0
  35. package/cpp/json.hpp +24766 -0
  36. package/cpp/llama.cpp +21789 -0
  37. package/cpp/llama.h +1201 -0
  38. package/cpp/log.h +737 -0
  39. package/cpp/rn-llama.hpp +630 -0
  40. package/cpp/sampling.cpp +460 -0
  41. package/cpp/sampling.h +160 -0
  42. package/cpp/sgemm.cpp +1027 -0
  43. package/cpp/sgemm.h +14 -0
  44. package/cpp/unicode-data.cpp +7032 -0
  45. package/cpp/unicode-data.h +20 -0
  46. package/cpp/unicode.cpp +812 -0
  47. package/cpp/unicode.h +64 -0
  48. package/ios/RNLlama.h +11 -0
  49. package/ios/RNLlama.mm +302 -0
  50. package/ios/RNLlama.xcodeproj/project.pbxproj +278 -0
  51. package/ios/RNLlamaContext.h +39 -0
  52. package/ios/RNLlamaContext.mm +426 -0
  53. package/jest/mock.js +169 -0
  54. package/lib/commonjs/NativeRNLlama.js +10 -0
  55. package/lib/commonjs/NativeRNLlama.js.map +1 -0
  56. package/lib/commonjs/grammar.js +574 -0
  57. package/lib/commonjs/grammar.js.map +1 -0
  58. package/lib/commonjs/index.js +151 -0
  59. package/lib/commonjs/index.js.map +1 -0
  60. package/lib/module/NativeRNLlama.js +3 -0
  61. package/lib/module/NativeRNLlama.js.map +1 -0
  62. package/lib/module/grammar.js +566 -0
  63. package/lib/module/grammar.js.map +1 -0
  64. package/lib/module/index.js +129 -0
  65. package/lib/module/index.js.map +1 -0
  66. package/lib/typescript/NativeRNLlama.d.ts +107 -0
  67. package/lib/typescript/NativeRNLlama.d.ts.map +1 -0
  68. package/lib/typescript/grammar.d.ts +38 -0
  69. package/lib/typescript/grammar.d.ts.map +1 -0
  70. package/lib/typescript/index.d.ts +46 -0
  71. package/lib/typescript/index.d.ts.map +1 -0
  72. package/llama-rn.podspec +56 -0
  73. package/package.json +230 -0
  74. package/src/NativeRNLlama.ts +132 -0
  75. package/src/grammar.ts +849 -0
  76. package/src/index.ts +182 -0
package/src/index.ts ADDED
@@ -0,0 +1,182 @@
1
+ import { NativeEventEmitter, DeviceEventEmitter, Platform } from 'react-native'
2
+ import type { DeviceEventEmitterStatic } from 'react-native'
3
+ import RNLlama from './NativeRNLlama'
4
+ import type {
5
+ NativeContextParams,
6
+ NativeLlamaContext,
7
+ NativeCompletionParams,
8
+ NativeCompletionTokenProb,
9
+ NativeCompletionResult,
10
+ NativeTokenizeResult,
11
+ NativeEmbeddingResult,
12
+ NativeSessionLoadResult,
13
+ } from './NativeRNLlama'
14
+ import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar'
15
+
16
+ export { SchemaGrammarConverter, convertJsonSchemaToGrammar }
17
+
18
+ const EVENT_ON_TOKEN = '@RNLlama_onToken'
19
+
20
+ let EventEmitter: NativeEventEmitter | DeviceEventEmitterStatic
21
+ if (Platform.OS === 'ios') {
22
+ // @ts-ignore
23
+ EventEmitter = new NativeEventEmitter(RNLlama)
24
+ }
25
+ if (Platform.OS === 'android') {
26
+ EventEmitter = DeviceEventEmitter
27
+ }
28
+
29
+ export type TokenData = {
30
+ token: string
31
+ completion_probabilities?: Array<NativeCompletionTokenProb>
32
+ }
33
+
34
+ type TokenNativeEvent = {
35
+ contextId: number
36
+ tokenResult: TokenData
37
+ }
38
+
39
+ export type ContextParams = NativeContextParams
40
+
41
+ export type CompletionParams = Omit<NativeCompletionParams, 'emit_partial_completion'>
42
+
43
+ export type BenchResult = {
44
+ modelDesc: string
45
+ modelSize: number
46
+ modelNParams: number
47
+ ppAvg: number
48
+ ppStd: number
49
+ tgAvg: number
50
+ tgStd: number
51
+ }
52
+
53
+ export class LlamaContext {
54
+ id: number
55
+
56
+ gpu: boolean = false
57
+
58
+ reasonNoGPU: string = ''
59
+
60
+ model: Object = {}
61
+
62
+ constructor({
63
+ contextId,
64
+ gpu,
65
+ reasonNoGPU,
66
+ model,
67
+ }: NativeLlamaContext) {
68
+ this.id = contextId
69
+ this.gpu = gpu
70
+ this.reasonNoGPU = reasonNoGPU
71
+ this.model = model
72
+ }
73
+
74
+ /**
75
+ * Load cached prompt & completion state from a file.
76
+ */
77
+ async loadSession(filepath: string): Promise<NativeSessionLoadResult> {
78
+ return RNLlama.loadSession(this.id, filepath)
79
+ }
80
+
81
+ /**
82
+ * Save current cached prompt & completion state to a file.
83
+ */
84
+ async saveSession(filepath: string, options?: { tokenSize: number }): Promise<number> {
85
+ return RNLlama.saveSession(this.id, filepath, options?.tokenSize || -1)
86
+ }
87
+
88
+ async completion(
89
+ params: CompletionParams,
90
+ callback?: (data: TokenData) => void,
91
+ ): Promise<NativeCompletionResult> {
92
+ let tokenListener: any = callback && EventEmitter.addListener(
93
+ EVENT_ON_TOKEN,
94
+ (evt: TokenNativeEvent) => {
95
+ const { contextId, tokenResult } = evt
96
+ if (contextId !== this.id) return
97
+ callback(tokenResult)
98
+ },
99
+ )
100
+ const promise = RNLlama.completion(this.id, {
101
+ ...params,
102
+ emit_partial_completion: !!callback,
103
+ })
104
+ return promise
105
+ .then((completionResult) => {
106
+ tokenListener?.remove()
107
+ tokenListener = null
108
+ return completionResult
109
+ })
110
+ .catch((err: any) => {
111
+ tokenListener?.remove()
112
+ tokenListener = null
113
+ throw err
114
+ })
115
+ }
116
+
117
+ stopCompletion(): Promise<void> {
118
+ return RNLlama.stopCompletion(this.id)
119
+ }
120
+
121
+ tokenize(text: string): Promise<NativeTokenizeResult> {
122
+ return RNLlama.tokenize(this.id, text)
123
+ }
124
+
125
+ detokenize(tokens: number[]): Promise<string> {
126
+ return RNLlama.detokenize(this.id, tokens)
127
+ }
128
+
129
+ embedding(text: string): Promise<NativeEmbeddingResult> {
130
+ return RNLlama.embedding(this.id, text)
131
+ }
132
+
133
+ async bench(pp: number, tg: number, pl: number, nr: number): Promise<BenchResult> {
134
+ const result = await RNLlama.bench(this.id, pp, tg, pl, nr)
135
+ const [
136
+ modelDesc,
137
+ modelSize,
138
+ modelNParams,
139
+ ppAvg,
140
+ ppStd,
141
+ tgAvg,
142
+ tgStd,
143
+ ] = JSON.parse(result)
144
+ return {
145
+ modelDesc,
146
+ modelSize,
147
+ modelNParams,
148
+ ppAvg,
149
+ ppStd,
150
+ tgAvg,
151
+ tgStd,
152
+ }
153
+ }
154
+
155
+ async release(): Promise<void> {
156
+ return RNLlama.releaseContext(this.id)
157
+ }
158
+ }
159
+
160
+ export async function setContextLimit(limit: number): Promise<void> {
161
+ return RNLlama.setContextLimit(limit)
162
+ }
163
+
164
+ export async function initLlama({
165
+ model,
166
+ is_model_asset: isModelAsset,
167
+ ...rest
168
+ }: ContextParams): Promise<LlamaContext> {
169
+ let path = model
170
+ if (path.startsWith('file://')) path = path.slice(7)
171
+ const { contextId, gpu, reasonNoGPU, model: modelDetails } =
172
+ await RNLlama.initContext({
173
+ model: path,
174
+ is_model_asset: !!isModelAsset,
175
+ ...rest,
176
+ })
177
+ return new LlamaContext({ contextId, gpu, reasonNoGPU, model: modelDetails })
178
+ }
179
+
180
+ export async function releaseAllLlama(): Promise<void> {
181
+ return RNLlama.releaseAllContexts()
182
+ }