cui-llama.rn 1.0.3 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README.md +35 -39
  2. package/android/src/main/CMakeLists.txt +12 -2
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +29 -9
  4. package/android/src/main/java/com/rnllama/RNLlama.java +33 -1
  5. package/android/src/main/jni.cpp +62 -8
  6. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +5 -0
  7. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +5 -0
  8. package/cpp/common.cpp +3237 -3231
  9. package/cpp/common.h +469 -468
  10. package/cpp/ggml-aarch64.c +2193 -2193
  11. package/cpp/ggml-aarch64.h +39 -39
  12. package/cpp/ggml-alloc.c +1036 -1042
  13. package/cpp/ggml-backend-impl.h +153 -153
  14. package/cpp/ggml-backend.c +2240 -2234
  15. package/cpp/ggml-backend.h +238 -238
  16. package/cpp/ggml-common.h +1833 -1829
  17. package/cpp/ggml-impl.h +755 -655
  18. package/cpp/ggml-metal.h +65 -65
  19. package/cpp/ggml-metal.m +3269 -3269
  20. package/cpp/ggml-quants.c +14872 -14860
  21. package/cpp/ggml-quants.h +132 -132
  22. package/cpp/ggml.c +22055 -22044
  23. package/cpp/ggml.h +2453 -2447
  24. package/cpp/llama-grammar.cpp +539 -0
  25. package/cpp/llama-grammar.h +39 -0
  26. package/cpp/llama-impl.h +26 -0
  27. package/cpp/llama-sampling.cpp +635 -0
  28. package/cpp/llama-sampling.h +56 -0
  29. package/cpp/llama-vocab.cpp +1721 -0
  30. package/cpp/llama-vocab.h +130 -0
  31. package/cpp/llama.cpp +19171 -21892
  32. package/cpp/llama.h +1240 -1217
  33. package/cpp/log.h +737 -737
  34. package/cpp/rn-llama.hpp +207 -29
  35. package/cpp/sampling.cpp +460 -460
  36. package/cpp/sgemm.cpp +1027 -1027
  37. package/cpp/sgemm.h +14 -14
  38. package/cpp/unicode.cpp +6 -0
  39. package/cpp/unicode.h +3 -0
  40. package/ios/RNLlama.mm +15 -6
  41. package/ios/RNLlamaContext.h +2 -8
  42. package/ios/RNLlamaContext.mm +41 -34
  43. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  44. package/lib/commonjs/chat.js +37 -0
  45. package/lib/commonjs/chat.js.map +1 -0
  46. package/lib/commonjs/index.js +14 -1
  47. package/lib/commonjs/index.js.map +1 -1
  48. package/lib/module/NativeRNLlama.js.map +1 -1
  49. package/lib/module/chat.js +31 -0
  50. package/lib/module/chat.js.map +1 -0
  51. package/lib/module/index.js +14 -1
  52. package/lib/module/index.js.map +1 -1
  53. package/lib/typescript/NativeRNLlama.d.ts +5 -1
  54. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  55. package/lib/typescript/chat.d.ts +10 -0
  56. package/lib/typescript/chat.d.ts.map +1 -0
  57. package/lib/typescript/index.d.ts +9 -2
  58. package/lib/typescript/index.d.ts.map +1 -1
  59. package/package.json +1 -1
  60. package/src/NativeRNLlama.ts +10 -1
  61. package/src/chat.ts +44 -0
  62. package/src/index.ts +31 -4
@@ -19,7 +19,6 @@ export type NativeContextParams = {
19
19
 
20
20
  lora?: string // lora_adaptor
21
21
  lora_scaled?: number
22
- lora_base?: string
23
22
 
24
23
  rope_freq_base?: number
25
24
  rope_freq_scale?: number
@@ -112,6 +111,11 @@ export type NativeSessionLoadResult = {
112
111
  prompt: string
113
112
  }
114
113
 
114
+ export type NativeLlamaChatMessage = {
115
+ role: string
116
+ content: string
117
+ }
118
+
115
119
  export interface Spec extends TurboModule {
116
120
  setContextLimit(limit: number): Promise<void>
117
121
  initContext(params: NativeContextParams): Promise<NativeLlamaContext>
@@ -132,6 +136,11 @@ export interface Spec extends TurboModule {
132
136
  stopCompletion(contextId: number): Promise<void>
133
137
  tokenizeAsync(contextId: number, text: string): Promise<NativeTokenizeResult>
134
138
  tokenizeSync(contextId: number, text: string): NativeTokenizeResult
139
+ getFormattedChat(
140
+ contextId: number,
141
+ messages: NativeLlamaChatMessage[],
142
+ chatTemplate?: string,
143
+ ): Promise<string>
135
144
  detokenize(contextId: number, tokens: number[]): Promise<string>
136
145
  embedding(contextId: number, text: string): Promise<NativeEmbeddingResult>
137
146
  bench(
package/src/chat.ts ADDED
@@ -0,0 +1,44 @@
1
+ import type { NativeLlamaChatMessage } from './NativeRNLlama'
2
+
3
+ export type RNLlamaMessagePart = {
4
+ text?: string
5
+ }
6
+
7
+ export type RNLlamaOAICompatibleMessage = {
8
+ role: string
9
+ content?: string | RNLlamaMessagePart[] | any // any for check invalid content type
10
+ }
11
+
12
+ export function formatChat(
13
+ messages: RNLlamaOAICompatibleMessage[],
14
+ ): NativeLlamaChatMessage[] {
15
+ const chat: NativeLlamaChatMessage[] = []
16
+
17
+ messages.forEach((currMsg) => {
18
+ const role: string = currMsg.role || ''
19
+
20
+ let content: string = ''
21
+ if ('content' in currMsg) {
22
+ if (typeof currMsg.content === 'string') {
23
+ ;({ content } = currMsg)
24
+ } else if (Array.isArray(currMsg.content)) {
25
+ currMsg.content.forEach((part) => {
26
+ if ('text' in part) {
27
+ content += `${content ? '\n' : ''}${part.text}`
28
+ }
29
+ })
30
+ } else {
31
+ throw new TypeError(
32
+ "Invalid 'content' type (ref: https://github.com/ggerganov/llama.cpp/issues/8367)",
33
+ )
34
+ }
35
+ } else {
36
+ throw new Error(
37
+ "Missing 'content' (ref: https://github.com/ggerganov/llama.cpp/issues/8367)",
38
+ )
39
+ }
40
+
41
+ chat.push({ role, content })
42
+ })
43
+ return chat
44
+ }
package/src/index.ts CHANGED
@@ -12,6 +12,8 @@ import type {
12
12
  NativeSessionLoadResult,
13
13
  } from './NativeRNLlama'
14
14
  import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar'
15
+ import type { RNLlamaOAICompatibleMessage } from './chat'
16
+ import { formatChat } from './chat'
15
17
 
16
18
  export { SchemaGrammarConverter, convertJsonSchemaToGrammar }
17
19
 
@@ -40,8 +42,11 @@ export type ContextParams = NativeContextParams
40
42
 
41
43
  export type CompletionParams = Omit<
42
44
  NativeCompletionParams,
43
- 'emit_partial_completion'
44
- >
45
+ 'emit_partial_completion' | 'prompt'
46
+ > & {
47
+ prompt?: string
48
+ messages?: RNLlamaOAICompatibleMessage[]
49
+ }
45
50
 
46
51
  export type BenchResult = {
47
52
  modelDesc: string
@@ -60,7 +65,9 @@ export class LlamaContext {
60
65
 
61
66
  reasonNoGPU: string = ''
62
67
 
63
- model: Object = {}
68
+ model: {
69
+ isChatTemplateSupported?: boolean
70
+ } = {}
64
71
 
65
72
  constructor({ contextId, gpu, reasonNoGPU, model }: NativeLlamaContext) {
66
73
  this.id = contextId
@@ -74,7 +81,7 @@ export class LlamaContext {
74
81
  */
75
82
  async loadSession(filepath: string): Promise<NativeSessionLoadResult> {
76
83
  let path = filepath
77
- if (filepath.startsWith(`file://`)) path = path.slice(7)
84
+ if (path.startsWith('file://')) path = path.slice(7)
78
85
  return RNLlama.loadSession(this.id, path)
79
86
  }
80
87
 
@@ -88,10 +95,27 @@ export class LlamaContext {
88
95
  return RNLlama.saveSession(this.id, filepath, options?.tokenSize || -1)
89
96
  }
90
97
 
98
+ async getFormattedChat(
99
+ messages: RNLlamaOAICompatibleMessage[],
100
+ ): Promise<string> {
101
+ const chat = formatChat(messages)
102
+ return RNLlama.getFormattedChat(
103
+ this.id,
104
+ chat,
105
+ this.model?.isChatTemplateSupported ? undefined : 'chatml',
106
+ )
107
+ }
108
+
91
109
  async completion(
92
110
  params: CompletionParams,
93
111
  callback?: (data: TokenData) => void,
94
112
  ): Promise<NativeCompletionResult> {
113
+
114
+ let finalPrompt = params.prompt
115
+ if (params.messages) { // messages always win
116
+ finalPrompt = await this.getFormattedChat(params.messages)
117
+ }
118
+
95
119
  let tokenListener: any =
96
120
  callback &&
97
121
  EventEmitter.addListener(EVENT_ON_TOKEN, (evt: TokenNativeEvent) => {
@@ -99,8 +123,11 @@ export class LlamaContext {
99
123
  if (contextId !== this.id) return
100
124
  callback(tokenResult)
101
125
  })
126
+
127
+ if (!finalPrompt) throw new Error('Prompt is required')
102
128
  const promise = RNLlama.completion(this.id, {
103
129
  ...params,
130
+ prompt: finalPrompt,
104
131
  emit_partial_completion: !!callback,
105
132
  })
106
133
  return promise