cactus-react-native 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +1 -1
  2. package/android/src/main/java/com/cactus/Cactus.java +35 -0
  3. package/android/src/main/java/com/cactus/LlamaContext.java +5 -0
  4. package/android/src/main/jni.cpp +46 -15
  5. package/android/src/main/jniLibs/arm64-v8a/libcactus.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_dotprod.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_dotprod_i8mm.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_i8mm.so +0 -0
  11. package/android/src/newarch/java/com/cactus/CactusModule.java +5 -0
  12. package/android/src/oldarch/java/com/cactus/CactusModule.java +5 -0
  13. package/ios/Cactus.mm +21 -0
  14. package/ios/CactusContext.h +1 -0
  15. package/ios/CactusContext.mm +6 -2
  16. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +0 -12
  17. package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
  18. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/cactus_ffi.h +0 -12
  19. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
  20. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/cactus_ffi.h +0 -12
  21. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/cactus +0 -0
  22. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/cactus_ffi.h +0 -12
  23. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
  24. package/lib/commonjs/NativeCactus.js +0 -1
  25. package/lib/commonjs/NativeCactus.js.map +1 -1
  26. package/lib/commonjs/chat.js +36 -0
  27. package/lib/commonjs/chat.js.map +1 -1
  28. package/lib/commonjs/index.js +0 -23
  29. package/lib/commonjs/index.js.map +1 -1
  30. package/lib/commonjs/lm.js +102 -57
  31. package/lib/commonjs/lm.js.map +1 -1
  32. package/lib/commonjs/tools.js +0 -7
  33. package/lib/commonjs/tools.js.map +1 -1
  34. package/lib/commonjs/tts.js +1 -4
  35. package/lib/commonjs/tts.js.map +1 -1
  36. package/lib/commonjs/vlm.js +100 -31
  37. package/lib/commonjs/vlm.js.map +1 -1
  38. package/lib/module/NativeCactus.js +0 -3
  39. package/lib/module/NativeCactus.js.map +1 -1
  40. package/lib/module/chat.js +34 -0
  41. package/lib/module/chat.js.map +1 -1
  42. package/lib/module/index.js +1 -10
  43. package/lib/module/index.js.map +1 -1
  44. package/lib/module/lm.js +102 -57
  45. package/lib/module/lm.js.map +1 -1
  46. package/lib/module/tools.js +0 -7
  47. package/lib/module/tools.js.map +1 -1
  48. package/lib/module/tts.js +1 -4
  49. package/lib/module/tts.js.map +1 -1
  50. package/lib/module/vlm.js +100 -31
  51. package/lib/module/vlm.js.map +1 -1
  52. package/lib/typescript/NativeCactus.d.ts +1 -142
  53. package/lib/typescript/NativeCactus.d.ts.map +1 -1
  54. package/lib/typescript/chat.d.ts +11 -0
  55. package/lib/typescript/chat.d.ts.map +1 -1
  56. package/lib/typescript/index.d.ts +2 -4
  57. package/lib/typescript/index.d.ts.map +1 -1
  58. package/lib/typescript/lm.d.ts +12 -6
  59. package/lib/typescript/lm.d.ts.map +1 -1
  60. package/lib/typescript/tools.d.ts.map +1 -1
  61. package/lib/typescript/tts.d.ts.map +1 -1
  62. package/lib/typescript/vlm.d.ts +10 -2
  63. package/lib/typescript/vlm.d.ts.map +1 -1
  64. package/package.json +1 -1
  65. package/src/NativeCactus.ts +6 -175
  66. package/src/chat.ts +46 -1
  67. package/src/index.ts +6 -17
  68. package/src/lm.ts +112 -58
  69. package/src/tools.ts +0 -5
  70. package/src/tts.ts +1 -4
  71. package/src/vlm.ts +113 -31
  72. package/android/src/main/jniLibs/x86_64/libcactus.so +0 -0
  73. package/android/src/main/jniLibs/x86_64/libcactus_x86_64.so +0 -0
  74. package/lib/commonjs/grammar.js +0 -560
  75. package/lib/commonjs/grammar.js.map +0 -1
  76. package/lib/module/grammar.js +0 -553
  77. package/lib/module/grammar.js.map +0 -1
  78. package/lib/typescript/grammar.d.ts +0 -37
  79. package/lib/typescript/grammar.d.ts.map +0 -1
  80. package/src/grammar.ts +0 -854
package/src/lm.ts CHANGED
@@ -1,4 +1,6 @@
1
1
  import { initLlama, LlamaContext } from './index'
2
+ // @ts-ignore
3
+ import { Platform } from 'react-native'
2
4
  import type {
3
5
  ContextParams,
4
6
  CompletionParams,
@@ -7,8 +9,10 @@ import type {
7
9
  EmbeddingParams,
8
10
  NativeEmbeddingResult,
9
11
  } from './index'
12
+
10
13
  import { Telemetry } from './telemetry'
11
14
  import { setCactusToken, getVertexAIEmbedding } from './remote'
15
+ import { ConversationHistoryManager } from './chat'
12
16
 
13
17
  interface CactusLMReturn {
14
18
  lm: CactusLM | null
@@ -16,10 +20,20 @@ interface CactusLMReturn {
16
20
  }
17
21
 
18
22
  export class CactusLM {
19
- private context: LlamaContext
23
+ protected context: LlamaContext
24
+ protected conversationHistoryManager: ConversationHistoryManager
25
+
26
+ // the initPromise enables a "async singleton" initialization pattern which
27
+ // protects against a race condition in the event of multiple init attempts
28
+ private static _initCache: Map<string, Promise<CactusLMReturn>> = new Map();
29
+
30
+ private static getCacheKey(params: ContextParams, cactusToken?: string, retryOptions?: { maxRetries?: number; delayMs?: number }): string {
31
+ return JSON.stringify({ params, cactusToken, retryOptions });
32
+ }
20
33
 
21
- private constructor(context: LlamaContext) {
34
+ protected constructor(context: LlamaContext) {
22
35
  this.context = context
36
+ this.conversationHistoryManager = new ConversationHistoryManager()
23
37
  }
24
38
 
25
39
  static async init(
@@ -28,72 +42,108 @@ export class CactusLM {
28
42
  cactusToken?: string,
29
43
  retryOptions?: { maxRetries?: number; delayMs?: number },
30
44
  ): Promise<CactusLMReturn> {
45
+
31
46
  if (cactusToken) {
32
47
  setCactusToken(cactusToken);
33
48
  }
34
49
 
35
- const maxRetries = retryOptions?.maxRetries ?? 3;
36
- const delayMs = retryOptions?.delayMs ?? 1000;
37
-
38
- const configs = [
39
- params,
40
- { ...params, n_gpu_layers: 0 }
41
- ];
42
-
43
- const sleep = (ms: number): Promise<void> => {
44
- return new Promise(resolve => {
45
- const start = Date.now();
46
- const wait = () => {
47
- if (Date.now() - start >= ms) {
48
- resolve();
49
- } else {
50
- Promise.resolve().then(wait);
51
- }
52
- };
53
- wait();
54
- });
55
- };
50
+ const key = CactusLM.getCacheKey(params, cactusToken, retryOptions);
51
+ if (CactusLM._initCache.has(key)) {
52
+ // concurrent initialization calls with the same params all get the same cached Promise
53
+ return CactusLM._initCache.get(key)!;
54
+ }
56
55
 
57
- for (const config of configs) {
58
- let lastError: Error | null = null;
59
-
60
- for (let attempt = 1; attempt <= maxRetries; attempt++) {
61
- try {
62
- const context = await initLlama(config, onProgress);
63
- return { lm: new CactusLM(context), error: null };
64
- } catch (e) {
65
- lastError = e as Error;
66
- const isLastConfig = configs.indexOf(config) === configs.length - 1;
67
- const isLastAttempt = attempt === maxRetries;
68
-
69
- Telemetry.error(e as Error, {
70
- n_gpu_layers: config.n_gpu_layers ?? null,
71
- n_ctx: config.n_ctx ?? null,
72
- model: config.model ?? null,
73
- });
74
-
75
- if (!isLastAttempt) {
76
- const delay = delayMs * Math.pow(2, attempt - 1);
77
- await sleep(delay);
78
- } else if (!isLastConfig) {
79
- break;
56
+ const initPromise = (async () => {
57
+ const maxRetries = retryOptions?.maxRetries ?? 3;
58
+ const delayMs = retryOptions?.delayMs ?? 1000;
59
+
60
+ const configs = [
61
+ params,
62
+ { ...params, n_gpu_layers: 0 }
63
+ ];
64
+
65
+ const sleep = (ms: number): Promise<void> => {
66
+ return new Promise(resolve => {
67
+ const start = Date.now();
68
+ const wait = () => {
69
+ if (Date.now() - start >= ms) {
70
+ resolve();
71
+ } else {
72
+ Promise.resolve().then(wait);
73
+ }
74
+ };
75
+ wait();
76
+ });
77
+ };
78
+
79
+ for (const config of configs) {
80
+ let lastError: Error | null = null;
81
+
82
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
83
+ try {
84
+ const context = await initLlama(config, onProgress);
85
+ return { lm: new CactusLM(context), error: null };
86
+ } catch (e) {
87
+ lastError = e as Error;
88
+ const isLastConfig = configs.indexOf(config) === configs.length - 1;
89
+ const isLastAttempt = attempt === maxRetries;
90
+
91
+ Telemetry.error(e as Error, {
92
+ n_gpu_layers: config.n_gpu_layers ?? null,
93
+ n_ctx: config.n_ctx ?? null,
94
+ model: config.model ?? null,
95
+ });
96
+
97
+ if (!isLastAttempt) {
98
+ const delay = delayMs * Math.pow(2, attempt - 1);
99
+ await sleep(delay);
100
+ } else if (!isLastConfig) {
101
+ break;
102
+ }
80
103
  }
81
104
  }
105
+
106
+ if (configs.indexOf(config) === configs.length - 1 && lastError) {
107
+ return { lm: null, error: lastError };
108
+ }
82
109
  }
83
-
84
- if (configs.indexOf(config) === configs.length - 1 && lastError) {
85
- return { lm: null, error: lastError };
86
- }
110
+ return { lm: null, error: new Error('Failed to initialize CactusLM after all retries') };
111
+ })();
112
+
113
+ CactusLM._initCache.set(key, initPromise);
114
+
115
+ const result = await initPromise;
116
+ if (result.error) {
117
+ CactusLM._initCache.delete(key); // Reset on failure to allow retries
87
118
  }
88
- return { lm: null, error: new Error('Failed to initialize CactusLM after all retries') };
119
+ return result;
89
120
  }
90
121
 
91
- async completion(
122
+ completion = async (
92
123
  messages: CactusOAICompatibleMessage[],
93
124
  params: CompletionParams = {},
94
125
  callback?: (data: any) => void,
95
- ): Promise<NativeCompletionResult> {
96
- return await this.context.completion({ messages, ...params }, callback);
126
+ ): Promise<NativeCompletionResult> => {
127
+ const { newMessages, requiresReset } =
128
+ this.conversationHistoryManager.processNewMessages(messages);
129
+
130
+ if (requiresReset) {
131
+ this.context?.rewind();
132
+ this.conversationHistoryManager.reset();
133
+ }
134
+
135
+ if (newMessages.length === 0) {
136
+ console.warn('No messages to complete!');
137
+ }
138
+
139
+ const result = await this.context.completion({ messages: newMessages, ...params }, callback);
140
+
141
+ this.conversationHistoryManager.update(newMessages, {
142
+ role: 'assistant',
143
+ content: result.content,
144
+ });
145
+
146
+ return result;
97
147
  }
98
148
 
99
149
  async embedding(
@@ -136,23 +186,27 @@ export class CactusLM {
136
186
  return result;
137
187
  }
138
188
 
139
- private async _handleLocalEmbedding(text: string, params?: EmbeddingParams): Promise<NativeEmbeddingResult> {
189
+ protected async _handleLocalEmbedding(text: string, params?: EmbeddingParams): Promise<NativeEmbeddingResult> {
140
190
  return this.context.embedding(text, params);
141
191
  }
142
192
 
143
- private async _handleRemoteEmbedding(text: string): Promise<NativeEmbeddingResult> {
193
+ protected async _handleRemoteEmbedding(text: string): Promise<NativeEmbeddingResult> {
144
194
  const embeddingValues = await getVertexAIEmbedding(text);
145
195
  return {
146
196
  embedding: embeddingValues,
147
197
  };
148
198
  }
149
199
 
150
- async rewind(): Promise<void> {
151
- // @ts-ignore
200
+ rewind = async (): Promise<void> => {
152
201
  return this.context?.rewind()
153
202
  }
154
203
 
155
204
  async release(): Promise<void> {
156
205
  return this.context.release()
157
206
  }
207
+
208
+ async stopCompletion(): Promise<void> {
209
+ return await this.context.stopCompletion()
210
+ }
211
+
158
212
  }
package/src/tools.ts CHANGED
@@ -56,22 +56,18 @@ export class Tools {
56
56
 
57
57
  export async function parseAndExecuteTool(result: NativeCompletionResult, tools: Tools): Promise<{toolCalled: boolean, toolName?: string, toolInput?: any, toolOutput?: any}> {
58
58
  if (!result.tool_calls || result.tool_calls.length === 0) {
59
- // console.log('No tool calls found');
60
59
  return {toolCalled: false};
61
60
  }
62
61
 
63
62
  try {
64
63
  const toolCall = result.tool_calls[0];
65
64
  if (!toolCall) {
66
- // console.log('No tool call found');
67
65
  return {toolCalled: false};
68
66
  }
69
67
  const toolName = toolCall.function.name;
70
68
  const toolInput = JSON.parse(toolCall.function.arguments);
71
69
 
72
- // console.log('Calling tool:', toolName, toolInput);
73
70
  const toolOutput = await tools.execute(toolName, toolInput);
74
- // console.log('Tool called result:', toolOutput);
75
71
 
76
72
  return {
77
73
  toolCalled: true,
@@ -80,7 +76,6 @@ export async function parseAndExecuteTool(result: NativeCompletionResult, tools:
80
76
  toolOutput
81
77
  };
82
78
  } catch (error) {
83
- // console.error('Error parsing tool call:', error);
84
79
  return {toolCalled: false};
85
80
  }
86
81
  }
package/src/tts.ts CHANGED
@@ -31,10 +31,7 @@ export class CactusTTS {
31
31
  speakerJsonStr,
32
32
  textToSpeak,
33
33
  )
34
- // This part is simplified. In a real scenario, the tokens from
35
- // the main model would be generated and passed to decodeAudioTokens.
36
- // For now, we are assuming a direct path which may not be fully functional
37
- // without the main model's token output for TTS.
34
+ // To-DO: Fix
38
35
  const tokens = (await this.context.tokenize(formatted_prompt)).tokens
39
36
  return decodeAudioTokens(this.context.id, tokens)
40
37
  }
package/src/vlm.ts CHANGED
@@ -3,15 +3,15 @@ import {
3
3
  initMultimodal,
4
4
  multimodalCompletion,
5
5
  LlamaContext,
6
+ type ContextParams,
7
+ type CompletionParams,
8
+ type CactusOAICompatibleMessage,
9
+ type NativeCompletionResult,
6
10
  } from './index'
7
- import type {
8
- ContextParams,
9
- CompletionParams,
10
- CactusOAICompatibleMessage,
11
- NativeCompletionResult,
12
- } from './index'
11
+
13
12
  import { Telemetry } from './telemetry'
14
13
  import { setCactusToken, getTextCompletion, getVisionCompletion } from './remote'
14
+ import { ConversationHistoryManager } from './chat'
15
15
 
16
16
  interface CactusVLMReturn {
17
17
  vlm: CactusVLM | null
@@ -29,43 +29,101 @@ export type VLMCompletionParams = Omit<CompletionParams, 'prompt'> & {
29
29
 
30
30
  export class CactusVLM {
31
31
  private context: LlamaContext
32
-
32
+ protected conversationHistoryManager: ConversationHistoryManager
33
+
34
+ // see CactusLM for detailed docs
35
+ private static _initCache: Map<string, Promise<CactusVLMReturn>> = new Map();
36
+
37
+ private static getCacheKey(params: VLMContextParams, cactusToken?: string, retryOptions?: { maxRetries?: number; delayMs?: number }): string {
38
+ return JSON.stringify({ params, cactusToken, retryOptions });
39
+ }
40
+
33
41
  private constructor(context: LlamaContext) {
34
42
  this.context = context
43
+ this.conversationHistoryManager = new ConversationHistoryManager()
35
44
  }
36
45
 
37
46
  static async init(
38
47
  params: VLMContextParams,
39
48
  onProgress?: (progress: number) => void,
40
49
  cactusToken?: string,
50
+ retryOptions?: { maxRetries?: number; delayMs?: number },
41
51
  ): Promise<CactusVLMReturn> {
42
52
  if (cactusToken) {
43
53
  setCactusToken(cactusToken);
44
54
  }
45
55
 
46
- const configs = [
47
- params,
48
- { ...params, n_gpu_layers: 0 }
49
- ];
56
+ const key = CactusVLM.getCacheKey(params, cactusToken, retryOptions);
57
+ if (CactusVLM._initCache.has(key)) {
58
+ return CactusVLM._initCache.get(key)!;
59
+ }
50
60
 
51
- for (const config of configs) {
52
- try {
53
- const context = await initLlama(config, onProgress)
54
- await initMultimodal(context.id, params.mmproj, false)
55
- return {vlm: new CactusVLM(context), error: null}
56
- } catch (e) {
57
- Telemetry.error(e as Error, {
58
- n_gpu_layers: config.n_gpu_layers ?? null,
59
- n_ctx: config.n_ctx ?? null,
60
- model: config.model ?? null,
61
+ const initPromise = (async () => {
62
+ const maxRetries = retryOptions?.maxRetries ?? 3;
63
+ const delayMs = retryOptions?.delayMs ?? 1000;
64
+
65
+ const configs = [
66
+ params,
67
+ { ...params, n_gpu_layers: 0 }
68
+ ];
69
+
70
+ const sleep = (ms: number): Promise<void> => {
71
+ return new Promise(resolve => {
72
+ const start = Date.now();
73
+ const wait = () => {
74
+ if (Date.now() - start >= ms) {
75
+ resolve();
76
+ } else {
77
+ Promise.resolve().then(wait);
78
+ }
79
+ };
80
+ wait();
61
81
  });
62
- if (configs.indexOf(config) === configs.length - 1) {
63
- return {vlm: null, error: e as Error}
82
+ };
83
+
84
+ for (const config of configs) {
85
+ let lastError: Error | null = null;
86
+
87
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
88
+ try {
89
+ const context = await initLlama(config, onProgress)
90
+ await initMultimodal(context.id, params.mmproj, false)
91
+ return {vlm: new CactusVLM(context), error: null}
92
+ } catch (e) {
93
+ lastError = e as Error;
94
+ const isLastConfig = configs.indexOf(config) === configs.length - 1;
95
+ const isLastAttempt = attempt === maxRetries;
96
+
97
+ Telemetry.error(e as Error, {
98
+ n_gpu_layers: config.n_gpu_layers ?? null,
99
+ n_ctx: config.n_ctx ?? null,
100
+ model: config.model ?? null,
101
+ });
102
+
103
+ if (!isLastAttempt) {
104
+ const delay = delayMs * Math.pow(2, attempt - 1);
105
+ await sleep(delay);
106
+ } else if (!isLastConfig) {
107
+ break;
108
+ }
109
+ }
110
+ }
111
+
112
+ if (configs.indexOf(config) === configs.length - 1 && lastError) {
113
+ return {vlm: null, error: lastError}
64
114
  }
65
115
  }
66
- }
67
116
 
68
- return {vlm: null, error: new Error('Failed to initialize CactusVLM')}
117
+ return {vlm: null, error: new Error('Failed to initialize CactusVLM')}
118
+ })();
119
+
120
+ CactusVLM._initCache.set(key, initPromise);
121
+
122
+ const result = await initPromise;
123
+ if (result.error) {
124
+ CactusVLM._initCache.delete(key);
125
+ }
126
+ return result;
69
127
  }
70
128
 
71
129
  async completion(
@@ -111,26 +169,47 @@ export class CactusVLM {
111
169
  return result;
112
170
  }
113
171
 
114
- private async _handleLocalCompletion(
172
+ private _handleLocalCompletion = async(
115
173
  messages: CactusOAICompatibleMessage[],
116
174
  params: VLMCompletionParams,
117
175
  callback?: (data: any) => void,
118
- ): Promise<NativeCompletionResult> {
176
+ ): Promise<NativeCompletionResult> => {
177
+ const { newMessages, requiresReset } =
178
+ this.conversationHistoryManager.processNewMessages(messages);
179
+
180
+ if (requiresReset) {
181
+ this.context?.rewind();
182
+ this.conversationHistoryManager.reset();
183
+ }
184
+
185
+ if (newMessages.length === 0) {
186
+ console.warn('No messages to complete!');
187
+ }
188
+
189
+ let result: NativeCompletionResult;
190
+
119
191
  if (params.images && params.images.length > 0) {
120
- const formattedPrompt = await this.context.getFormattedChat(messages)
192
+ const formattedPrompt = await this.context.getFormattedChat(newMessages)
121
193
  const prompt =
122
194
  typeof formattedPrompt === 'string'
123
195
  ? formattedPrompt
124
196
  : formattedPrompt.prompt
125
- return await multimodalCompletion(
197
+ result = await multimodalCompletion(
126
198
  this.context.id,
127
199
  prompt,
128
200
  params.images,
129
201
  { ...params, prompt, emit_partial_completion: !!callback },
130
202
  )
131
203
  } else {
132
- return await this.context.completion({ messages, ...params }, callback)
204
+ result = await this.context.completion({ messages: newMessages, ...params }, callback)
133
205
  }
206
+
207
+ this.conversationHistoryManager.update(newMessages, {
208
+ role: 'assistant',
209
+ content: result.content || result.text,
210
+ });
211
+
212
+ return result;
134
213
  }
135
214
 
136
215
  private async _handleRemoteCompletion(
@@ -181,11 +260,14 @@ export class CactusVLM {
181
260
  }
182
261
 
183
262
  async rewind(): Promise<void> {
184
- // @ts-ignore
185
263
  return this.context?.rewind()
186
264
  }
187
265
 
188
266
  async release(): Promise<void> {
189
267
  return this.context.release()
190
268
  }
269
+
270
+ async stopCompletion(): Promise<void> {
271
+ return await this.context.stopCompletion()
272
+ }
191
273
  }