cui-llama.rn 1.0.3 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -39
- package/android/src/main/CMakeLists.txt +12 -2
- package/android/src/main/java/com/rnllama/LlamaContext.java +29 -9
- package/android/src/main/java/com/rnllama/RNLlama.java +33 -1
- package/android/src/main/jni.cpp +62 -8
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +5 -0
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +5 -0
- package/cpp/common.cpp +3237 -3231
- package/cpp/common.h +469 -468
- package/cpp/ggml-aarch64.c +2193 -2193
- package/cpp/ggml-aarch64.h +39 -39
- package/cpp/ggml-alloc.c +1036 -1042
- package/cpp/ggml-backend-impl.h +153 -153
- package/cpp/ggml-backend.c +2240 -2234
- package/cpp/ggml-backend.h +238 -238
- package/cpp/ggml-common.h +1833 -1829
- package/cpp/ggml-impl.h +755 -655
- package/cpp/ggml-metal.h +65 -65
- package/cpp/ggml-metal.m +3269 -3269
- package/cpp/ggml-quants.c +14872 -14860
- package/cpp/ggml-quants.h +132 -132
- package/cpp/ggml.c +22055 -22044
- package/cpp/ggml.h +2453 -2447
- package/cpp/llama-grammar.cpp +539 -0
- package/cpp/llama-grammar.h +39 -0
- package/cpp/llama-impl.h +26 -0
- package/cpp/llama-sampling.cpp +635 -0
- package/cpp/llama-sampling.h +56 -0
- package/cpp/llama-vocab.cpp +1721 -0
- package/cpp/llama-vocab.h +130 -0
- package/cpp/llama.cpp +19171 -21892
- package/cpp/llama.h +1240 -1217
- package/cpp/log.h +737 -737
- package/cpp/rn-llama.hpp +207 -29
- package/cpp/sampling.cpp +460 -460
- package/cpp/sgemm.cpp +1027 -1027
- package/cpp/sgemm.h +14 -14
- package/cpp/unicode.cpp +6 -0
- package/cpp/unicode.h +3 -0
- package/ios/RNLlama.mm +15 -6
- package/ios/RNLlamaContext.h +2 -8
- package/ios/RNLlamaContext.mm +41 -34
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/chat.js +37 -0
- package/lib/commonjs/chat.js.map +1 -0
- package/lib/commonjs/index.js +14 -1
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/chat.js +31 -0
- package/lib/module/chat.js.map +1 -0
- package/lib/module/index.js +14 -1
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +5 -1
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/chat.d.ts +10 -0
- package/lib/typescript/chat.d.ts.map +1 -0
- package/lib/typescript/index.d.ts +9 -2
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +10 -1
- package/src/chat.ts +44 -0
- package/src/index.ts +31 -4
package/src/NativeRNLlama.ts
CHANGED
@@ -19,7 +19,6 @@ export type NativeContextParams = {
|
|
19
19
|
|
20
20
|
lora?: string // lora_adaptor
|
21
21
|
lora_scaled?: number
|
22
|
-
lora_base?: string
|
23
22
|
|
24
23
|
rope_freq_base?: number
|
25
24
|
rope_freq_scale?: number
|
@@ -112,6 +111,11 @@ export type NativeSessionLoadResult = {
|
|
112
111
|
prompt: string
|
113
112
|
}
|
114
113
|
|
114
|
+
export type NativeLlamaChatMessage = {
|
115
|
+
role: string
|
116
|
+
content: string
|
117
|
+
}
|
118
|
+
|
115
119
|
export interface Spec extends TurboModule {
|
116
120
|
setContextLimit(limit: number): Promise<void>
|
117
121
|
initContext(params: NativeContextParams): Promise<NativeLlamaContext>
|
@@ -132,6 +136,11 @@ export interface Spec extends TurboModule {
|
|
132
136
|
stopCompletion(contextId: number): Promise<void>
|
133
137
|
tokenizeAsync(contextId: number, text: string): Promise<NativeTokenizeResult>
|
134
138
|
tokenizeSync(contextId: number, text: string): NativeTokenizeResult
|
139
|
+
getFormattedChat(
|
140
|
+
contextId: number,
|
141
|
+
messages: NativeLlamaChatMessage[],
|
142
|
+
chatTemplate?: string,
|
143
|
+
): Promise<string>
|
135
144
|
detokenize(contextId: number, tokens: number[]): Promise<string>
|
136
145
|
embedding(contextId: number, text: string): Promise<NativeEmbeddingResult>
|
137
146
|
bench(
|
package/src/chat.ts
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
import type { NativeLlamaChatMessage } from './NativeRNLlama'
|
2
|
+
|
3
|
+
export type RNLlamaMessagePart = {
|
4
|
+
text?: string
|
5
|
+
}
|
6
|
+
|
7
|
+
export type RNLlamaOAICompatibleMessage = {
|
8
|
+
role: string
|
9
|
+
content?: string | RNLlamaMessagePart[] | any // any for check invalid content type
|
10
|
+
}
|
11
|
+
|
12
|
+
export function formatChat(
|
13
|
+
messages: RNLlamaOAICompatibleMessage[],
|
14
|
+
): NativeLlamaChatMessage[] {
|
15
|
+
const chat: NativeLlamaChatMessage[] = []
|
16
|
+
|
17
|
+
messages.forEach((currMsg) => {
|
18
|
+
const role: string = currMsg.role || ''
|
19
|
+
|
20
|
+
let content: string = ''
|
21
|
+
if ('content' in currMsg) {
|
22
|
+
if (typeof currMsg.content === 'string') {
|
23
|
+
;({ content } = currMsg)
|
24
|
+
} else if (Array.isArray(currMsg.content)) {
|
25
|
+
currMsg.content.forEach((part) => {
|
26
|
+
if ('text' in part) {
|
27
|
+
content += `${content ? '\n' : ''}${part.text}`
|
28
|
+
}
|
29
|
+
})
|
30
|
+
} else {
|
31
|
+
throw new TypeError(
|
32
|
+
"Invalid 'content' type (ref: https://github.com/ggerganov/llama.cpp/issues/8367)",
|
33
|
+
)
|
34
|
+
}
|
35
|
+
} else {
|
36
|
+
throw new Error(
|
37
|
+
"Missing 'content' (ref: https://github.com/ggerganov/llama.cpp/issues/8367)",
|
38
|
+
)
|
39
|
+
}
|
40
|
+
|
41
|
+
chat.push({ role, content })
|
42
|
+
})
|
43
|
+
return chat
|
44
|
+
}
|
package/src/index.ts
CHANGED
@@ -12,6 +12,8 @@ import type {
|
|
12
12
|
NativeSessionLoadResult,
|
13
13
|
} from './NativeRNLlama'
|
14
14
|
import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar'
|
15
|
+
import type { RNLlamaOAICompatibleMessage } from './chat'
|
16
|
+
import { formatChat } from './chat'
|
15
17
|
|
16
18
|
export { SchemaGrammarConverter, convertJsonSchemaToGrammar }
|
17
19
|
|
@@ -40,8 +42,11 @@ export type ContextParams = NativeContextParams
|
|
40
42
|
|
41
43
|
export type CompletionParams = Omit<
|
42
44
|
NativeCompletionParams,
|
43
|
-
'emit_partial_completion'
|
44
|
-
>
|
45
|
+
'emit_partial_completion' | 'prompt'
|
46
|
+
> & {
|
47
|
+
prompt?: string
|
48
|
+
messages?: RNLlamaOAICompatibleMessage[]
|
49
|
+
}
|
45
50
|
|
46
51
|
export type BenchResult = {
|
47
52
|
modelDesc: string
|
@@ -60,7 +65,9 @@ export class LlamaContext {
|
|
60
65
|
|
61
66
|
reasonNoGPU: string = ''
|
62
67
|
|
63
|
-
model:
|
68
|
+
model: {
|
69
|
+
isChatTemplateSupported?: boolean
|
70
|
+
} = {}
|
64
71
|
|
65
72
|
constructor({ contextId, gpu, reasonNoGPU, model }: NativeLlamaContext) {
|
66
73
|
this.id = contextId
|
@@ -74,7 +81,7 @@ export class LlamaContext {
|
|
74
81
|
*/
|
75
82
|
async loadSession(filepath: string): Promise<NativeSessionLoadResult> {
|
76
83
|
let path = filepath
|
77
|
-
if (
|
84
|
+
if (path.startsWith('file://')) path = path.slice(7)
|
78
85
|
return RNLlama.loadSession(this.id, path)
|
79
86
|
}
|
80
87
|
|
@@ -88,10 +95,27 @@ export class LlamaContext {
|
|
88
95
|
return RNLlama.saveSession(this.id, filepath, options?.tokenSize || -1)
|
89
96
|
}
|
90
97
|
|
98
|
+
async getFormattedChat(
|
99
|
+
messages: RNLlamaOAICompatibleMessage[],
|
100
|
+
): Promise<string> {
|
101
|
+
const chat = formatChat(messages)
|
102
|
+
return RNLlama.getFormattedChat(
|
103
|
+
this.id,
|
104
|
+
chat,
|
105
|
+
this.model?.isChatTemplateSupported ? undefined : 'chatml',
|
106
|
+
)
|
107
|
+
}
|
108
|
+
|
91
109
|
async completion(
|
92
110
|
params: CompletionParams,
|
93
111
|
callback?: (data: TokenData) => void,
|
94
112
|
): Promise<NativeCompletionResult> {
|
113
|
+
|
114
|
+
let finalPrompt = params.prompt
|
115
|
+
if (params.messages) { // messages always win
|
116
|
+
finalPrompt = await this.getFormattedChat(params.messages)
|
117
|
+
}
|
118
|
+
|
95
119
|
let tokenListener: any =
|
96
120
|
callback &&
|
97
121
|
EventEmitter.addListener(EVENT_ON_TOKEN, (evt: TokenNativeEvent) => {
|
@@ -99,8 +123,11 @@ export class LlamaContext {
|
|
99
123
|
if (contextId !== this.id) return
|
100
124
|
callback(tokenResult)
|
101
125
|
})
|
126
|
+
|
127
|
+
if (!finalPrompt) throw new Error('Prompt is required')
|
102
128
|
const promise = RNLlama.completion(this.id, {
|
103
129
|
...params,
|
130
|
+
prompt: finalPrompt,
|
104
131
|
emit_partial_completion: !!callback,
|
105
132
|
})
|
106
133
|
return promise
|