cactus-react-native 0.1.3 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/README.md +551 -720
  2. package/android/src/main/java/com/cactus/Cactus.java +41 -0
  3. package/android/src/main/java/com/cactus/LlamaContext.java +19 -0
  4. package/android/src/newarch/java/com/cactus/CactusModule.java +5 -0
  5. package/android/src/oldarch/java/com/cactus/CactusModule.java +5 -0
  6. package/ios/Cactus.mm +14 -0
  7. package/ios/CactusContext.h +1 -0
  8. package/ios/CactusContext.mm +18 -0
  9. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
  10. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
  11. package/lib/commonjs/NativeCactus.js +10 -0
  12. package/lib/commonjs/NativeCactus.js.map +1 -1
  13. package/lib/commonjs/chat.js +37 -0
  14. package/lib/commonjs/grammar.js +560 -0
  15. package/lib/commonjs/index.js +545 -0
  16. package/lib/commonjs/index.js.map +1 -1
  17. package/lib/commonjs/lm.js +106 -0
  18. package/lib/commonjs/lm.js.map +1 -1
  19. package/lib/commonjs/projectId.js +8 -0
  20. package/lib/commonjs/projectId.js.map +1 -0
  21. package/lib/commonjs/remote.js +153 -0
  22. package/lib/commonjs/remote.js.map +1 -0
  23. package/lib/commonjs/telemetry.js +103 -0
  24. package/lib/commonjs/telemetry.js.map +1 -0
  25. package/lib/commonjs/tools.js +79 -0
  26. package/lib/commonjs/tools.js.map +1 -0
  27. package/lib/commonjs/tts.js +32 -0
  28. package/lib/commonjs/tts.js.map +1 -1
  29. package/lib/commonjs/vlm.js +150 -0
  30. package/lib/commonjs/vlm.js.map +1 -0
  31. package/lib/module/NativeCactus.js +8 -0
  32. package/lib/module/NativeCactus.js.map +1 -1
  33. package/lib/module/chat.js +33 -0
  34. package/lib/module/grammar.js +553 -0
  35. package/lib/module/index.js +435 -0
  36. package/lib/module/index.js.map +1 -1
  37. package/lib/module/lm.js +101 -0
  38. package/lib/module/lm.js.map +1 -0
  39. package/lib/module/projectId.js +4 -0
  40. package/lib/module/projectId.js.map +1 -0
  41. package/lib/module/remote.js +144 -0
  42. package/lib/module/remote.js.map +1 -0
  43. package/lib/module/telemetry.js +98 -0
  44. package/lib/module/telemetry.js.map +1 -0
  45. package/lib/module/tools.js +73 -0
  46. package/lib/module/tools.js.map +1 -0
  47. package/lib/module/tts.js +27 -0
  48. package/lib/module/tts.js.map +1 -1
  49. package/lib/module/vlm.js +145 -0
  50. package/lib/module/vlm.js.map +1 -1
  51. package/lib/typescript/NativeCactus.d.ts +7 -0
  52. package/lib/typescript/NativeCactus.d.ts.map +1 -1
  53. package/lib/typescript/index.d.ts +3 -1
  54. package/lib/typescript/index.d.ts.map +1 -1
  55. package/lib/typescript/lm.d.ts +11 -34
  56. package/lib/typescript/lm.d.ts.map +1 -1
  57. package/lib/typescript/projectId.d.ts +2 -0
  58. package/lib/typescript/projectId.d.ts.map +1 -0
  59. package/lib/typescript/remote.d.ts +7 -0
  60. package/lib/typescript/remote.d.ts.map +1 -0
  61. package/lib/typescript/telemetry.d.ts +25 -0
  62. package/lib/typescript/telemetry.d.ts.map +1 -0
  63. package/lib/typescript/tools.d.ts +0 -3
  64. package/lib/typescript/tools.d.ts.map +1 -1
  65. package/lib/typescript/tts.d.ts.map +1 -1
  66. package/lib/typescript/vlm.d.ts +14 -34
  67. package/lib/typescript/vlm.d.ts.map +1 -1
  68. package/package.json +4 -4
  69. package/scripts/postInstall.js +33 -0
  70. package/src/NativeCactus.ts +7 -0
  71. package/src/index.ts +122 -46
  72. package/src/lm.ts +80 -5
  73. package/src/projectId.ts +1 -0
  74. package/src/remote.ts +175 -0
  75. package/src/telemetry.ts +138 -0
  76. package/src/tools.ts +17 -58
  77. package/src/vlm.ts +129 -8
  78. package/android/src/main/jniLibs/x86_64/libcactus.so +0 -0
  79. package/android/src/main/jniLibs/x86_64/libcactus_x86_64.so +0 -0
package/README.md CHANGED
@@ -1,48 +1,49 @@
1
1
  # Cactus React Native
2
2
 
3
- A powerful React Native library for running Large Language Models (LLMs) and Vision Language Models (VLMs) directly on mobile devices, with full support for chat completions, multimodal inputs, embeddings, text-to-speech and advanced features.
3
+ Running LLMs, VLMs, and TTS models directly on mobile devices.
4
4
 
5
5
  ## Installation
6
6
 
7
- ```bash
8
- npm install cactus-react-native react-native-fs
9
- # or
10
- yarn add cactus-react-native react-native-fs
7
+ ```json
8
+ {
9
+ "dependencies": {
10
+ "cactus-react-native": "^0.2.0",
11
+ "react-native-fs": "^2.20.0"
12
+ }
13
+ }
11
14
  ```
12
15
 
13
- **Additional Setup:**
14
- - For iOS: `cd ios && npx pod-install` or `yarn pod-install`
15
- - For Android: Ensure your `minSdkVersion` is 24 or higher
16
-
17
- > **Important**: `react-native-fs` is required for file system access to download and manage model files locally.
16
+ **Setup:**
17
+ - iOS: `cd ios && npx pod-install`
18
+ - Android: Ensure `minSdkVersion` 24+
18
19
 
19
20
  ## Quick Start
20
21
 
21
- ### Basic Text Completion
22
-
23
22
  ```typescript
24
23
  import { CactusLM } from 'cactus-react-native';
24
+ import RNFS from 'react-native-fs';
25
25
 
26
- // Initialize a language model
27
- const lm = await CactusLM.init({
28
- model: '/path/to/your/model.gguf',
26
+ const modelPath = `${RNFS.DocumentDirectoryPath}/model.gguf`;
27
+
28
+ const { lm, error } = await CactusLM.init({
29
+ model: modelPath,
29
30
  n_ctx: 2048,
30
31
  n_threads: 4,
31
32
  });
32
33
 
33
- // Generate text
34
- const messages = [{ role: 'user', content: 'Hello, how are you?' }];
35
- const params = { n_predict: 100, temperature: 0.7 };
34
+ if (error) throw error;
36
35
 
37
- const result = await lm.completion(messages, params);
36
+ const messages = [{ role: 'user', content: 'Hello!' }];
37
+ const result = await lm.completion(messages, { n_predict: 100 });
38
38
  console.log(result.text);
39
+ lm.release();
39
40
  ```
40
41
 
41
- ### Complete Chat App Example
42
+ ## Streaming Chat
42
43
 
43
44
  ```typescript
44
45
  import React, { useState, useEffect } from 'react';
45
- import { View, Text, TextInput, TouchableOpacity } from 'react-native';
46
+ import { View, Text, TextInput, TouchableOpacity, ScrollView, ActivityIndicator } from 'react-native';
46
47
  import { CactusLM } from 'cactus-react-native';
47
48
  import RNFS from 'react-native-fs';
48
49
 
@@ -51,110 +52,170 @@ interface Message {
51
52
  content: string;
52
53
  }
53
54
 
54
- export default function ChatApp() {
55
+ export default function ChatScreen() {
55
56
  const [lm, setLM] = useState<CactusLM | null>(null);
56
57
  const [messages, setMessages] = useState<Message[]>([]);
57
58
  const [input, setInput] = useState('');
58
- const [loading, setLoading] = useState(true);
59
+ const [isLoading, setIsLoading] = useState(true);
60
+ const [isGenerating, setIsGenerating] = useState(false);
59
61
 
60
62
  useEffect(() => {
61
63
  initializeModel();
64
+ return () => {
65
+ lm?.release();
66
+ };
62
67
  }, []);
63
68
 
64
- async function initializeModel() {
69
+ const initializeModel = async () => {
65
70
  try {
66
- // Download model (example URL)
67
71
  const modelUrl = 'https://huggingface.co/Cactus-Compute/Qwen3-600m-Instruct-GGUF/resolve/main/Qwen3-0.6B-Q8_0.gguf';
68
- const modelPath = `${RNFS.DocumentDirectoryPath}/model.gguf`;
69
-
70
- // Download if not exists
71
- if (!(await RNFS.exists(modelPath))) {
72
- await RNFS.downloadFile({
73
- fromUrl: modelUrl,
74
- toFile: modelPath,
75
- }).promise;
76
- }
72
+ const modelPath = await downloadModel(modelUrl, 'qwen-600m.gguf');
77
73
 
78
- // Initialize language model
79
- const cactusLM = await CactusLM.init({
74
+ const { lm: model, error } = await CactusLM.init({
80
75
  model: modelPath,
81
76
  n_ctx: 2048,
82
77
  n_threads: 4,
83
- n_gpu_layers: 99, // Use GPU acceleration
78
+ n_gpu_layers: 99,
84
79
  });
85
80
 
86
- setLM(cactusLM);
87
- setLoading(false);
81
+ if (error) throw error;
82
+ setLM(model);
88
83
  } catch (error) {
89
84
  console.error('Failed to initialize model:', error);
85
+ } finally {
86
+ setIsLoading(false);
90
87
  }
91
- }
88
+ };
89
+
90
+ const downloadModel = async (url: string, filename: string): Promise<string> => {
91
+ const path = `${RNFS.DocumentDirectoryPath}/${filename}`;
92
+
93
+ if (await RNFS.exists(path)) return path;
94
+
95
+ console.log('Downloading model...');
96
+ await RNFS.downloadFile({
97
+ fromUrl: url,
98
+ toFile: path,
99
+ progress: (res) => {
100
+ const progress = res.bytesWritten / res.contentLength;
101
+ console.log(`Download progress: ${(progress * 100).toFixed(1)}%`);
102
+ },
103
+ }).promise;
104
+
105
+ return path;
106
+ };
92
107
 
93
- async function sendMessage() {
94
- if (!lm || !input.trim()) return;
108
+ const sendMessage = async () => {
109
+ if (!lm || !input.trim() || isGenerating) return;
95
110
 
96
- const userMessage: Message = { role: 'user', content: input };
111
+ const userMessage: Message = { role: 'user', content: input.trim() };
97
112
  const newMessages = [...messages, userMessage];
98
- setMessages(newMessages);
113
+ setMessages([...newMessages, { role: 'assistant', content: '' }]);
99
114
  setInput('');
115
+ setIsGenerating(true);
100
116
 
101
117
  try {
102
- const params = {
103
- n_predict: 256,
118
+ let response = '';
119
+ await lm.completion(newMessages, {
120
+ n_predict: 200,
104
121
  temperature: 0.7,
105
122
  stop: ['</s>', '<|end|>'],
106
- };
107
-
108
- const result = await lm.completion(newMessages, params);
109
-
110
- const assistantMessage: Message = {
111
- role: 'assistant',
112
- content: result.text
113
- };
114
- setMessages([...newMessages, assistantMessage]);
123
+ }, (token) => {
124
+ response += token.token;
125
+ setMessages(prev => [
126
+ ...prev.slice(0, -1),
127
+ { role: 'assistant', content: response }
128
+ ]);
129
+ });
115
130
  } catch (error) {
116
- console.error('Completion failed:', error);
131
+ console.error('Generation failed:', error);
132
+ setMessages(prev => [
133
+ ...prev.slice(0, -1),
134
+ { role: 'assistant', content: 'Error generating response' }
135
+ ]);
136
+ } finally {
137
+ setIsGenerating(false);
117
138
  }
118
- }
139
+ };
119
140
 
120
- if (loading) {
141
+ if (isLoading) {
121
142
  return (
122
143
  <View style={{ flex: 1, justifyContent: 'center', alignItems: 'center' }}>
123
- <Text>Loading model...</Text>
144
+ <ActivityIndicator size="large" />
145
+ <Text style={{ marginTop: 16 }}>Loading model...</Text>
124
146
  </View>
125
147
  );
126
148
  }
127
149
 
128
150
  return (
129
- <View style={{ flex: 1, padding: 16 }}>
130
- {/* Messages */}
131
- <View style={{ flex: 1 }}>
151
+ <View style={{ flex: 1, backgroundColor: '#f5f5f5' }}>
152
+ <ScrollView style={{ flex: 1, padding: 16 }}>
132
153
  {messages.map((msg, index) => (
133
- <Text key={index} style={{
134
- backgroundColor: msg.role === 'user' ? '#007AFF' : '#f0f0f0',
135
- color: msg.role === 'user' ? 'white' : 'black',
136
- padding: 8,
137
- margin: 4,
138
- borderRadius: 8,
139
- }}>
140
- {msg.content}
141
- </Text>
154
+ <View
155
+ key={index}
156
+ style={{
157
+ backgroundColor: msg.role === 'user' ? '#007AFF' : '#ffffff',
158
+ padding: 12,
159
+ marginVertical: 4,
160
+ borderRadius: 12,
161
+ alignSelf: msg.role === 'user' ? 'flex-end' : 'flex-start',
162
+ maxWidth: '80%',
163
+ shadowColor: '#000',
164
+ shadowOffset: { width: 0, height: 1 },
165
+ shadowOpacity: 0.2,
166
+ shadowRadius: 2,
167
+ elevation: 2,
168
+ }}
169
+ >
170
+ <Text style={{
171
+ color: msg.role === 'user' ? '#ffffff' : '#000000',
172
+ fontSize: 16,
173
+ }}>
174
+ {msg.content}
175
+ </Text>
176
+ </View>
142
177
  ))}
143
- </View>
144
-
145
- {/* Input */}
146
- <View style={{ flexDirection: 'row' }}>
178
+ </ScrollView>
179
+
180
+ <View style={{
181
+ flexDirection: 'row',
182
+ padding: 16,
183
+ backgroundColor: '#ffffff',
184
+ borderTopWidth: 1,
185
+ borderTopColor: '#e0e0e0',
186
+ }}>
147
187
  <TextInput
148
- style={{ flex: 1, borderWidth: 1, padding: 8, borderRadius: 4 }}
188
+ style={{
189
+ flex: 1,
190
+ borderWidth: 1,
191
+ borderColor: '#e0e0e0',
192
+ borderRadius: 20,
193
+ paddingHorizontal: 16,
194
+ paddingVertical: 10,
195
+ fontSize: 16,
196
+ backgroundColor: '#f8f8f8',
197
+ }}
149
198
  value={input}
150
199
  onChangeText={setInput}
151
200
  placeholder="Type a message..."
201
+ multiline
202
+ onSubmitEditing={sendMessage}
152
203
  />
153
- <TouchableOpacity
204
+ <TouchableOpacity
154
205
  onPress={sendMessage}
155
- style={{ backgroundColor: '#007AFF', padding: 8, borderRadius: 4, marginLeft: 8 }}
206
+ disabled={isGenerating || !input.trim()}
207
+ style={{
208
+ backgroundColor: isGenerating ? '#cccccc' : '#007AFF',
209
+ borderRadius: 20,
210
+ paddingHorizontal: 16,
211
+ paddingVertical: 10,
212
+ marginLeft: 8,
213
+ justifyContent: 'center',
214
+ }}
156
215
  >
157
- <Text style={{ color: 'white' }}>Send</Text>
216
+ <Text style={{ color: '#ffffff', fontWeight: 'bold' }}>
217
+ {isGenerating ? '...' : 'Send'}
218
+ </Text>
158
219
  </TouchableOpacity>
159
220
  </View>
160
221
  </View>
@@ -162,797 +223,567 @@ export default function ChatApp() {
162
223
  }
163
224
  ```
164
225
 
165
- ## File Path Requirements
166
-
167
- **Critical**: Cactus requires **absolute local file paths**, not Metro bundler URLs or asset references.
168
-
169
- ### ❌ Won't Work
170
- ```typescript
171
- // Metro bundler URLs
172
- 'http://localhost:8081/assets/model.gguf'
173
-
174
- // React Native asset requires
175
- require('./assets/model.gguf')
176
-
177
- // Relative paths
178
- './models/model.gguf'
179
- ```
180
-
181
- ### ✅ Will Work
182
- ```typescript
183
- import RNFS from 'react-native-fs';
184
-
185
- // Absolute paths in app directories
186
- const modelPath = `${RNFS.DocumentDirectoryPath}/model.gguf`;
187
- const imagePath = `${RNFS.DocumentDirectoryPath}/image.jpg`;
188
-
189
- // Downloaded/copied files
190
- const downloadModel = async () => {
191
- const modelUrl = 'https://example.com/model.gguf';
192
- const localPath = `${RNFS.DocumentDirectoryPath}/model.gguf`;
193
-
194
- await RNFS.downloadFile({
195
- fromUrl: modelUrl,
196
- toFile: localPath,
197
- }).promise;
198
-
199
- return localPath; // Use this path with Cactus
200
- };
201
- ```
202
-
203
- ### Image Assets
204
- For images, you need to copy them to local storage first:
205
-
206
- ```typescript
207
- // Copy bundled asset to local storage
208
- const copyAssetToLocal = async (assetName: string): Promise<string> => {
209
- const assetPath = `${RNFS.MainBundlePath}/${assetName}`;
210
- const localPath = `${RNFS.DocumentDirectoryPath}/${assetName}`;
211
-
212
- if (!(await RNFS.exists(localPath))) {
213
- await RNFS.copyFile(assetPath, localPath);
214
- }
215
-
216
- return localPath;
217
- };
218
-
219
- // Usage
220
- const imagePath = await copyAssetToLocal('demo.jpg');
221
- const params = { images: [imagePath], n_predict: 200 };
222
- const result = await vlm.completion(messages, params);
223
- ```
224
-
225
- ### External Images
226
- Download external images to local storage:
227
-
228
- ```typescript
229
- const downloadImage = async (imageUrl: string): Promise<string> => {
230
- const localPath = `${RNFS.DocumentDirectoryPath}/temp_image.jpg`;
231
-
232
- await RNFS.downloadFile({
233
- fromUrl: imageUrl,
234
- toFile: localPath,
235
- }).promise;
236
-
237
- return localPath;
238
- };
239
- ```
240
-
241
226
  ## Core APIs
242
227
 
243
- ### CactusLM (Language Model)
244
-
245
- For text-only language models:
228
+ ### CactusLM
246
229
 
247
230
  ```typescript
248
231
  import { CactusLM } from 'cactus-react-native';
249
232
 
250
- // Initialize
251
- const lm = await CactusLM.init({
233
+ const { lm, error } = await CactusLM.init({
252
234
  model: '/path/to/model.gguf',
253
- n_ctx: 4096, // Context window size
254
- n_batch: 512, // Batch size for processing
255
- n_threads: 4, // Number of threads
256
- n_gpu_layers: 99, // GPU layers (0 = CPU only)
235
+ n_ctx: 2048,
236
+ n_threads: 4,
237
+ n_gpu_layers: 99,
238
+ embedding: true,
257
239
  });
258
240
 
259
- // Text completion
260
- const messages = [
261
- { role: 'system', content: 'You are a helpful assistant.' },
262
- { role: 'user', content: 'What is the capital of France?' },
263
- ];
264
-
265
- const params = {
241
+ const messages = [{ role: 'user', content: 'What is AI?' }];
242
+ const result = await lm.completion(messages, {
266
243
  n_predict: 200,
267
244
  temperature: 0.7,
268
- top_p: 0.9,
269
- stop: ['</s>', '\n\n'],
270
- };
271
-
272
- const result = await lm.completion(messages, params);
273
-
274
- // Embeddings
275
- const embeddingResult = await lm.embedding('Your text here');
276
- console.log('Embedding vector:', embeddingResult.embedding);
245
+ stop: ['</s>'],
246
+ });
277
247
 
278
- // Cleanup
279
- await lm.rewind(); // Clear conversation
280
- await lm.release(); // Release resources
248
+ const embedding = await lm.embedding('Your text here');
249
+ await lm.rewind();
250
+ await lm.release();
281
251
  ```
282
252
 
283
- ### CactusVLM (Vision Language Model)
284
-
285
- For multimodal models that can process both text and images:
253
+ ### CactusVLM
286
254
 
287
255
  ```typescript
288
256
  import { CactusVLM } from 'cactus-react-native';
289
257
 
290
- // Initialize with multimodal projector
291
- const vlm = await CactusVLM.init({
258
+ const { vlm, error } = await CactusVLM.init({
292
259
  model: '/path/to/vision-model.gguf',
293
260
  mmproj: '/path/to/mmproj.gguf',
294
261
  n_ctx: 2048,
295
- n_threads: 4,
296
- n_gpu_layers: 99, // GPU for main model, CPU for projector
297
262
  });
298
263
 
299
- // Image + text completion
300
- const messages = [{ role: 'user', content: 'What do you see in this image?' }];
301
- const params = {
264
+ const messages = [{ role: 'user', content: 'Describe this image' }];
265
+ const result = await vlm.completion(messages, {
302
266
  images: ['/path/to/image.jpg'],
303
267
  n_predict: 200,
304
268
  temperature: 0.3,
305
- };
306
-
307
- const result = await vlm.completion(messages, params);
308
-
309
- // Text-only completion (same interface)
310
- const textMessages = [{ role: 'user', content: 'Tell me a joke' }];
311
- const textParams = { n_predict: 100 };
312
- const textResult = await vlm.completion(textMessages, textParams);
269
+ });
313
270
 
314
- // Cleanup
315
- await vlm.rewind();
316
271
  await vlm.release();
317
272
  ```
318
273
 
319
- ### CactusTTS (Text-to-Speech)
320
-
321
- For text-to-speech generation:
274
+ ### CactusTTS
322
275
 
323
276
  ```typescript
324
- import { CactusTTS } from 'cactus-react-native';
277
+ import { CactusTTS, initLlama } from 'cactus-react-native';
325
278
 
326
- // Initialize with vocoder
327
- const tts = await CactusTTS.init({
279
+ const context = await initLlama({
328
280
  model: '/path/to/tts-model.gguf',
329
- vocoder: '/path/to/vocoder.gguf',
330
281
  n_ctx: 1024,
331
- n_threads: 4,
332
282
  });
333
283
 
334
- // Generate speech
335
- const text = 'Hello, this is a test of text-to-speech functionality.';
336
- const params = {
337
- voice_id: 0,
338
- temperature: 0.7,
339
- speed: 1.0,
340
- };
341
-
342
- const audioResult = await tts.generateSpeech(text, params);
343
- console.log('Audio data:', audioResult.audio_data);
284
+ const tts = await CactusTTS.init(context, '/path/to/vocoder.gguf');
344
285
 
345
- // Advanced token-based generation
346
- const tokens = await tts.getGuideTokens('Your text here');
347
- const audio = await tts.decodeTokens(tokens);
286
+ const audio = await tts.generate(
287
+ 'Hello, this is text-to-speech',
288
+ '{"speaker_id": 0}'
289
+ );
348
290
 
349
- // Cleanup
350
291
  await tts.release();
351
292
  ```
352
293
 
353
- ## Text Completion
354
-
355
- ### Basic Completion
356
-
357
- ```typescript
358
- const lm = await CactusLM.init({
359
- model: '/path/to/model.gguf',
360
- n_ctx: 2048,
361
- });
362
-
363
- const messages = [
364
- { role: 'user', content: 'Write a short poem about coding' }
365
- ];
366
-
367
- const params = {
368
- n_predict: 200,
369
- temperature: 0.8,
370
- top_p: 0.9,
371
- stop: ['</s>', '\n\n'],
372
- };
373
-
374
- const result = await lm.completion(messages, params);
375
-
376
- console.log(result.text);
377
- console.log(`Tokens: ${result.tokens_predicted}`);
378
- console.log(`Speed: ${result.timings.predicted_per_second.toFixed(2)} tokens/sec`);
379
- ```
380
-
381
- ### Streaming Completion
294
+ ## Advanced Usage
382
295
 
383
- ```typescript
384
- const result = await lm.completion(messages, params, (token) => {
385
- // Called for each generated token
386
- console.log('Token:', token.token);
387
- updateUI(token.token);
388
- });
389
- ```
390
-
391
- ### Advanced Parameters
296
+ ### Model Manager
392
297
 
393
298
  ```typescript
394
- const params = {
395
- // Generation control
396
- n_predict: 256, // Max tokens to generate
397
- temperature: 0.7, // Randomness (0.0 - 2.0)
398
- top_p: 0.9, // Nucleus sampling
399
- top_k: 40, // Top-k sampling
400
- min_p: 0.05, // Minimum probability
299
+ class ModelManager {
300
+ private models = new Map<string, CactusLM | CactusVLM>();
401
301
 
402
- // Repetition control
403
- penalty_repeat: 1.1, // Repetition penalty
404
- penalty_freq: 0.0, // Frequency penalty
405
- penalty_present: 0.0, // Presence penalty
302
+ async loadLM(name: string, modelPath: string): Promise<CactusLM> {
303
+ if (this.models.has(name)) {
304
+ return this.models.get(name) as CactusLM;
305
+ }
306
+
307
+ const { lm, error } = await CactusLM.init({
308
+ model: modelPath,
309
+ n_ctx: 2048,
310
+ });
311
+
312
+ if (error) throw error;
313
+ this.models.set(name, lm);
314
+ return lm;
315
+ }
406
316
 
407
- // Stop conditions
408
- stop: ['</s>', '<|end|>', '\n\n'],
409
- ignore_eos: false,
317
+ async loadVLM(name: string, modelPath: string, mmprojPath: string): Promise<CactusVLM> {
318
+ if (this.models.has(name)) {
319
+ return this.models.get(name) as CactusVLM;
320
+ }
321
+
322
+ const { vlm, error } = await CactusVLM.init({
323
+ model: modelPath,
324
+ mmproj: mmprojPath,
325
+ });
326
+
327
+ if (error) throw error;
328
+ this.models.set(name, vlm);
329
+ return vlm;
330
+ }
410
331
 
411
- // Sampling methods
412
- mirostat: 0, // Mirostat sampling (0=disabled)
413
- mirostat_tau: 5.0, // Target entropy
414
- mirostat_eta: 0.1, // Learning rate
332
+ async releaseModel(name: string): Promise<void> {
333
+ const model = this.models.get(name);
334
+ if (model) {
335
+ await model.release();
336
+ this.models.delete(name);
337
+ }
338
+ }
415
339
 
416
- // Advanced
417
- seed: -1, // Random seed (-1 = random)
418
- n_probs: 0, // Return token probabilities
419
- };
420
- ```
421
-
422
- ## Multimodal (Vision)
423
-
424
- ### Setup Vision Model
425
-
426
- ```typescript
427
- import { CactusVLM } from 'cactus-react-native';
428
-
429
- const vlm = await CactusVLM.init({
430
- model: '/path/to/vision-model.gguf',
431
- mmproj: '/path/to/mmproj.gguf', // Multimodal projector
432
- n_ctx: 4096,
433
- });
434
- ```
435
-
436
- ### Image Analysis
437
-
438
- ```typescript
439
- // Analyze single image
440
- const messages = [{ role: 'user', content: 'Describe this image in detail' }];
441
- const params = {
442
- images: ['/path/to/image.jpg'],
443
- n_predict: 200,
444
- temperature: 0.3,
445
- };
446
-
447
- const result = await vlm.completion(messages, params);
448
- console.log(result.text);
449
- ```
450
-
451
- ### Multi-Image Analysis
452
-
453
- ```typescript
454
- const imagePaths = [
455
- '/path/to/image1.jpg',
456
- '/path/to/image2.jpg',
457
- '/path/to/image3.jpg'
458
- ];
459
-
460
- const messages = [{ role: 'user', content: 'Compare these images and explain the differences' }];
461
- const params = {
462
- images: imagePaths,
463
- n_predict: 300,
464
- temperature: 0.4,
465
- };
466
-
467
- const result = await vlm.completion(messages, params);
468
- ```
469
-
470
- ### Conversation with Images
471
-
472
- ```typescript
473
- const conversation = [
474
- { role: 'user', content: 'What do you see in this image?' }
475
- ];
476
-
477
- const params = {
478
- images: ['/path/to/image.jpg'],
479
- n_predict: 256,
480
- temperature: 0.3,
481
- };
482
-
483
- const result = await vlm.completion(conversation, params);
484
- ```
485
-
486
- ## Embeddings
487
-
488
- ### Text Embeddings
489
-
490
- ```typescript
491
- // Enable embeddings during initialization
492
- const lm = await CactusLM.init({
493
- model: '/path/to/embedding-model.gguf',
494
- embedding: true, // Enable embedding mode
495
- n_ctx: 512, // Smaller context for embeddings
496
- });
497
-
498
- // Generate embeddings
499
- const text = 'Your text here';
500
- const result = await lm.embedding(text);
501
- console.log('Embedding vector:', result.embedding);
502
- console.log('Dimensions:', result.embedding.length);
503
- ```
504
-
505
- ### Batch Embeddings
506
-
507
- ```typescript
508
- const texts = [
509
- 'The quick brown fox',
510
- 'Machine learning is fascinating',
511
- 'React Native development'
512
- ];
513
-
514
- const embeddings = await Promise.all(
515
- texts.map(text => lm.embedding(text))
516
- );
517
-
518
- // Calculate similarity
519
- function cosineSimilarity(a: number[], b: number[]): number {
520
- const dotProduct = a.reduce((sum, ai, i) => sum + ai * b[i], 0);
521
- const magnitudeA = Math.sqrt(a.reduce((sum, ai) => sum + ai * ai, 0));
522
- const magnitudeB = Math.sqrt(b.reduce((sum, bi) => sum + bi * bi, 0));
523
- return dotProduct / (magnitudeA * magnitudeB);
340
+ async releaseAll(): Promise<void> {
341
+ await Promise.all(
342
+ Array.from(this.models.values()).map(model => model.release())
343
+ );
344
+ this.models.clear();
345
+ }
524
346
  }
525
347
 
526
- const similarity = cosineSimilarity(
527
- embeddings[0].embedding,
528
- embeddings[1].embedding
529
- );
348
+ const modelManager = new ModelManager();
530
349
  ```
531
350
 
532
- ## Text-to-Speech (TTS)
533
-
534
- Cactus supports text-to-speech through vocoder models, allowing you to generate speech from text.
535
-
536
- ### Setup TTS Model
351
+ ### File Management Hook
537
352
 
538
353
  ```typescript
539
- import { CactusTTS } from 'cactus-react-native';
540
-
541
- const tts = await CactusTTS.init({
542
- model: '/path/to/text-model.gguf',
543
- vocoder: '/path/to/vocoder-model.gguf',
544
- n_ctx: 2048,
545
- });
546
- ```
354
+ import { useState, useCallback } from 'react';
355
+ import RNFS from 'react-native-fs';
547
356
 
548
- ### Basic Text-to-Speech
357
+ interface DownloadProgress {
358
+ progress: number;
359
+ isDownloading: boolean;
360
+ error: string | null;
361
+ }
549
362
 
550
- ```typescript
551
- const text = 'Hello, this is a test of text-to-speech functionality.';
552
- const params = {
553
- voice_id: 0, // Speaker voice ID
554
- temperature: 0.7, // Speech variation
555
- speed: 1.0, // Speech speed
363
+ export const useModelDownload = () => {
364
+ const [downloads, setDownloads] = useState<Map<string, DownloadProgress>>(new Map());
365
+
366
+ const downloadModel = useCallback(async (url: string, filename: string): Promise<string> => {
367
+ const path = `${RNFS.DocumentDirectoryPath}/${filename}`;
368
+
369
+ if (await RNFS.exists(path)) {
370
+ const stats = await RNFS.stat(path);
371
+ if (stats.size > 0) return path;
372
+ }
373
+
374
+ setDownloads(prev => new Map(prev.set(filename, {
375
+ progress: 0,
376
+ isDownloading: true,
377
+ error: null,
378
+ })));
379
+
380
+ try {
381
+ await RNFS.downloadFile({
382
+ fromUrl: url,
383
+ toFile: path,
384
+ progress: (res) => {
385
+ const progress = res.bytesWritten / res.contentLength;
386
+ setDownloads(prev => new Map(prev.set(filename, {
387
+ progress,
388
+ isDownloading: true,
389
+ error: null,
390
+ })));
391
+ },
392
+ }).promise;
393
+
394
+ setDownloads(prev => new Map(prev.set(filename, {
395
+ progress: 1,
396
+ isDownloading: false,
397
+ error: null,
398
+ })));
399
+
400
+ return path;
401
+ } catch (error) {
402
+ setDownloads(prev => new Map(prev.set(filename, {
403
+ progress: 0,
404
+ isDownloading: false,
405
+ error: error.message,
406
+ })));
407
+ throw error;
408
+ }
409
+ }, []);
410
+
411
+ return { downloadModel, downloads };
556
412
  };
557
-
558
- const result = await tts.generateSpeech(text, params);
559
-
560
- console.log('Audio data:', result.audio_data);
561
- console.log('Sample rate:', result.sample_rate);
562
- console.log('Audio format:', result.format);
563
- ```
564
-
565
- ### Advanced TTS with Token Control
566
-
567
- ```typescript
568
- // Get guide tokens for precise control
569
- const tokensResult = await tts.getGuideTokens(
570
- 'This text will be converted to speech tokens.'
571
- );
572
-
573
- console.log('Guide tokens:', tokensResult.tokens);
574
- console.log('Token count:', tokensResult.tokens.length);
575
-
576
- // Decode tokens to audio
577
- const audioResult = await tts.decodeTokens(tokensResult.tokens);
578
-
579
- console.log('Decoded audio:', audioResult.audio_data);
580
- console.log('Duration:', audioResult.duration_seconds);
581
413
  ```
582
414
 
583
- ### Complete TTS Example
415
+ ### Vision Chat Component
584
416
 
585
417
  ```typescript
586
418
  import React, { useState, useEffect } from 'react';
587
- import { View, Text, TextInput, TouchableOpacity, Alert } from 'react-native';
588
- import { Audio } from 'expo-av';
419
+ import { View, Text, TouchableOpacity, Image, Alert } from 'react-native';
420
+ import { launchImageLibrary } from 'react-native-image-picker';
421
+ import { CactusVLM } from 'cactus-react-native';
589
422
  import RNFS from 'react-native-fs';
590
- import { CactusTTS } from 'cactus-react-native';
591
423
 
592
- export default function TTSDemo() {
593
- const [tts, setTTS] = useState<CactusTTS | null>(null);
594
- const [text, setText] = useState('Hello, this is a test of speech synthesis.');
595
- const [isGenerating, setIsGenerating] = useState(false);
596
- const [sound, setSound] = useState<Audio.Sound | null>(null);
424
+ export default function VisionChat() {
425
+ const [vlm, setVLM] = useState<CactusVLM | null>(null);
426
+ const [imagePath, setImagePath] = useState<string | null>(null);
427
+ const [response, setResponse] = useState('');
428
+ const [isLoading, setIsLoading] = useState(true);
429
+ const [isAnalyzing, setIsAnalyzing] = useState(false);
597
430
 
598
431
  useEffect(() => {
599
- initializeTTS();
432
+ initializeVLM();
600
433
  return () => {
601
- if (sound) {
602
- sound.unloadAsync();
603
- }
434
+ vlm?.release();
604
435
  };
605
436
  }, []);
606
437
 
607
- async function initializeTTS() {
438
+ const initializeVLM = async () => {
608
439
  try {
609
- // Download and initialize models
610
- const modelPath = await downloadModel();
611
- const vocoderPath = await downloadVocoder();
440
+ const modelUrl = 'https://huggingface.co/Cactus-Compute/SmolVLM2-500m-Instruct-GGUF/resolve/main/SmolVLM2-500M-Video-Instruct-Q8_0.gguf';
441
+ const mmprojUrl = 'https://huggingface.co/Cactus-Compute/SmolVLM2-500m-Instruct-GGUF/resolve/main/mmproj-SmolVLM2-500M-Video-Instruct-Q8_0.gguf';
442
+
443
+ const [modelPath, mmprojPath] = await Promise.all([
444
+ downloadFile(modelUrl, 'smolvlm-model.gguf'),
445
+ downloadFile(mmprojUrl, 'smolvlm-mmproj.gguf'),
446
+ ]);
612
447
 
613
- const cactusTTS = await CactusTTS.init({
448
+ const { vlm: model, error } = await CactusVLM.init({
614
449
  model: modelPath,
615
- vocoder: vocoderPath,
616
- n_ctx: 1024,
617
- n_threads: 4,
450
+ mmproj: mmprojPath,
451
+ n_ctx: 2048,
618
452
  });
619
453
 
620
- setTTS(cactusTTS);
454
+ if (error) throw error;
455
+ setVLM(model);
621
456
  } catch (error) {
622
- console.error('Failed to initialize TTS:', error);
623
- Alert.alert('Error', 'Failed to initialize TTS');
457
+ console.error('Failed to initialize VLM:', error);
458
+ Alert.alert('Error', 'Failed to initialize vision model');
459
+ } finally {
460
+ setIsLoading(false);
624
461
  }
625
- }
462
+ };
463
+
464
+ const downloadFile = async (url: string, filename: string): Promise<string> => {
465
+ const path = `${RNFS.DocumentDirectoryPath}/${filename}`;
466
+
467
+ if (await RNFS.exists(path)) return path;
468
+
469
+ await RNFS.downloadFile({ fromUrl: url, toFile: path }).promise;
470
+ return path;
471
+ };
472
+
473
+ const pickImage = () => {
474
+ launchImageLibrary(
475
+ {
476
+ mediaType: 'photo',
477
+ quality: 0.8,
478
+ includeBase64: false,
479
+ },
480
+ (response) => {
481
+ if (response.assets && response.assets[0]) {
482
+ setImagePath(response.assets[0].uri!);
483
+ setResponse('');
484
+ }
485
+ }
486
+ );
487
+ };
626
488
 
627
- async function generateSpeech() {
628
- if (!tts || !text.trim()) return;
489
+ const analyzeImage = async () => {
490
+ if (!vlm || !imagePath) return;
629
491
 
630
- setIsGenerating(true);
492
+ setIsAnalyzing(true);
631
493
  try {
632
- const params = {
633
- voice_id: 0,
634
- temperature: 0.7,
635
- speed: 1.0,
636
- };
637
-
638
- const result = await tts.generateSpeech(text, params);
639
-
640
- // Save audio to file
641
- const audioPath = `${RNFS.DocumentDirectoryPath}/speech.wav`;
642
- await RNFS.writeFile(audioPath, result.audio_data, 'base64');
643
-
644
- // Play audio
645
- const { sound: audioSound } = await Audio.Sound.createAsync({
646
- uri: `file://${audioPath}`,
647
- });
494
+ const messages = [{ role: 'user', content: 'Describe this image in detail' }];
648
495
 
649
- setSound(audioSound);
650
- await audioSound.playAsync();
496
+ let analysisResponse = '';
497
+ const result = await vlm.completion(messages, {
498
+ images: [imagePath],
499
+ n_predict: 300,
500
+ temperature: 0.3,
501
+ }, (token) => {
502
+ analysisResponse += token.token;
503
+ setResponse(analysisResponse);
504
+ });
651
505
 
652
- console.log(`Generated speech: ${result.duration_seconds}s`);
506
+ setResponse(analysisResponse || result.text);
653
507
  } catch (error) {
654
- console.error('Speech generation failed:', error);
655
- Alert.alert('Error', 'Failed to generate speech');
508
+ console.error('Analysis failed:', error);
509
+ Alert.alert('Error', 'Failed to analyze image');
656
510
  } finally {
657
- setIsGenerating(false);
511
+ setIsAnalyzing(false);
658
512
  }
659
- }
513
+ };
660
514
 
661
- // Helper functions for downloading models would go here...
515
+ if (isLoading) {
516
+ return (
517
+ <View style={{ flex: 1, justifyContent: 'center', alignItems: 'center' }}>
518
+ <Text>Loading vision model...</Text>
519
+ </View>
520
+ );
521
+ }
662
522
 
663
523
  return (
664
524
  <View style={{ flex: 1, padding: 16 }}>
665
- <Text style={{ fontSize: 18, marginBottom: 16 }}>
666
- Text-to-Speech Demo
525
+ <Text style={{ fontSize: 24, fontWeight: 'bold', marginBottom: 20 }}>
526
+ Vision Chat
667
527
  </Text>
668
528
 
669
- <TextInput
670
- style={{
671
- borderWidth: 1,
672
- borderColor: '#ddd',
673
- borderRadius: 8,
674
- padding: 12,
675
- marginBottom: 16,
676
- minHeight: 100,
677
- }}
678
- value={text}
679
- onChangeText={setText}
680
- placeholder="Enter text to convert to speech..."
681
- multiline
682
- />
529
+ {imagePath && (
530
+ <Image
531
+ source={{ uri: imagePath }}
532
+ style={{
533
+ width: '100%',
534
+ height: 200,
535
+ borderRadius: 8,
536
+ marginBottom: 16,
537
+ }}
538
+ resizeMode="contain"
539
+ />
540
+ )}
683
541
 
684
- <TouchableOpacity
685
- onPress={generateSpeech}
686
- disabled={isGenerating || !tts}
687
- style={{
688
- backgroundColor: isGenerating ? '#ccc' : '#007AFF',
689
- padding: 16,
690
- borderRadius: 8,
691
- alignItems: 'center',
692
- }}
693
- >
694
- <Text style={{ color: 'white', fontSize: 16, fontWeight: 'bold' }}>
695
- {isGenerating ? 'Generating...' : 'Generate Speech'}
542
+ <View style={{ flexDirection: 'row', marginBottom: 16 }}>
543
+ <TouchableOpacity
544
+ onPress={pickImage}
545
+ style={{
546
+ backgroundColor: '#007AFF',
547
+ padding: 12,
548
+ borderRadius: 8,
549
+ marginRight: 8,
550
+ flex: 1,
551
+ }}
552
+ >
553
+ <Text style={{ color: 'white', textAlign: 'center', fontWeight: 'bold' }}>
554
+ Pick Image
555
+ </Text>
556
+ </TouchableOpacity>
557
+
558
+ <TouchableOpacity
559
+ onPress={analyzeImage}
560
+ disabled={!imagePath || isAnalyzing}
561
+ style={{
562
+ backgroundColor: !imagePath || isAnalyzing ? '#cccccc' : '#34C759',
563
+ padding: 12,
564
+ borderRadius: 8,
565
+ flex: 1,
566
+ }}
567
+ >
568
+ <Text style={{ color: 'white', textAlign: 'center', fontWeight: 'bold' }}>
569
+ {isAnalyzing ? 'Analyzing...' : 'Analyze'}
570
+ </Text>
571
+ </TouchableOpacity>
572
+ </View>
573
+
574
+ <View style={{
575
+ flex: 1,
576
+ backgroundColor: '#f8f8f8',
577
+ borderRadius: 8,
578
+ padding: 16,
579
+ }}>
580
+ <Text style={{ fontSize: 16, lineHeight: 24 }}>
581
+ {response || 'Select an image and tap Analyze to get started'}
696
582
  </Text>
697
- </TouchableOpacity>
583
+ </View>
698
584
  </View>
699
585
  );
700
586
  }
701
587
  ```
702
588
 
703
- ## Advanced Features
704
-
705
- ### Session Management
706
-
707
- For the low-level API, you can still access session management:
589
+ ### Cloud Fallback
708
590
 
709
591
  ```typescript
710
- import { initLlama } from 'cactus-react-native';
592
+ const { lm } = await CactusLM.init({
593
+ model: '/path/to/model.gguf',
594
+ n_ctx: 2048,
595
+ }, undefined, 'your_cactus_token');
711
596
 
712
- const context = await initLlama({ model: '/path/to/model.gguf' });
597
+ // Try local first, fallback to cloud if local fails
598
+ const embedding = await lm.embedding('text', undefined, 'localfirst');
713
599
 
714
- // Save session
715
- const tokensKept = await context.saveSession('/path/to/session.bin', {
716
- tokenSize: 1024 // Number of tokens to keep
717
- });
600
+ // Vision models also support cloud fallback
601
+ const { vlm } = await CactusVLM.init({
602
+ model: '/path/to/model.gguf',
603
+ mmproj: '/path/to/mmproj.gguf',
604
+ }, undefined, 'your_cactus_token');
718
605
 
719
- // Load session
720
- const sessionInfo = await context.loadSession('/path/to/session.bin');
721
- console.log(`Loaded ${sessionInfo.tokens_loaded} tokens`);
606
+ const result = await vlm.completion(messages, {
607
+ images: ['/path/to/image.jpg'],
608
+ mode: 'localfirst',
609
+ });
722
610
  ```
723
611
 
724
- ### LoRA Adapters
612
+ ### Embeddings & Similarity
725
613
 
726
614
  ```typescript
727
- const context = await initLlama({ model: '/path/to/model.gguf' });
615
+ const { lm } = await CactusLM.init({
616
+ model: '/path/to/model.gguf',
617
+ embedding: true,
618
+ });
728
619
 
729
- // Apply LoRA adapters
730
- await context.applyLoraAdapters([
731
- { path: '/path/to/lora1.gguf', scaled: 1.0 },
732
- { path: '/path/to/lora2.gguf', scaled: 0.8 }
733
- ]);
620
+ const embedding1 = await lm.embedding('machine learning');
621
+ const embedding2 = await lm.embedding('artificial intelligence');
734
622
 
735
- // Get loaded adapters
736
- const adapters = await context.getLoadedLoraAdapters();
737
- console.log('Loaded adapters:', adapters);
623
+ function cosineSimilarity(a: number[], b: number[]): number {
624
+ const dotProduct = a.reduce((sum, ai, i) => sum + ai * b[i], 0);
625
+ const magnitudeA = Math.sqrt(a.reduce((sum, ai) => sum + ai * ai, 0));
626
+ const magnitudeB = Math.sqrt(b.reduce((sum, bi) => sum + bi * bi, 0));
627
+ return dotProduct / (magnitudeA * magnitudeB);
628
+ }
738
629
 
739
- // Remove adapters
740
- await context.removeLoraAdapters();
630
+ const similarity = cosineSimilarity(embedding1.embedding, embedding2.embedding);
631
+ console.log('Similarity:', similarity);
741
632
  ```
742
633
 
743
- ### Structured Output (JSON)
744
-
745
- ```typescript
746
- const messages = [
747
- { role: 'user', content: 'Extract information about this person: John Doe, 30 years old, software engineer from San Francisco' }
748
- ];
749
-
750
- const params = {
751
- response_format: {
752
- type: 'json_object',
753
- schema: {
754
- type: 'object',
755
- properties: {
756
- name: { type: 'string' },
757
- age: { type: 'number' },
758
- profession: { type: 'string' },
759
- location: { type: 'string' }
760
- },
761
- required: ['name', 'age']
762
- }
763
- }
764
- };
765
-
766
- const result = await lm.completion(messages, params);
767
- const person = JSON.parse(result.text);
768
- console.log(person.name); // "John Doe"
769
- ```
634
+ ## Error Handling & Performance
770
635
 
771
- ### Performance Monitoring
636
+ ### Production Error Handling
772
637
 
773
638
  ```typescript
774
- const result = await lm.completion(messages, { n_predict: 100 });
775
-
776
- console.log('Performance metrics:');
777
- console.log(`Prompt tokens: ${result.timings.prompt_n}`);
778
- console.log(`Generated tokens: ${result.timings.predicted_n}`);
779
- console.log(`Prompt speed: ${result.timings.prompt_per_second.toFixed(2)} tokens/sec`);
780
- console.log(`Generation speed: ${result.timings.predicted_per_second.toFixed(2)} tokens/sec`);
781
- console.log(`Total time: ${(result.timings.prompt_ms + result.timings.predicted_ms).toFixed(0)}ms`);
782
- ```
783
-
784
- ## Best Practices
785
-
786
- ### Model Management
787
-
788
- ```typitten
789
- class ModelManager {
790
- private models = new Map<string, CactusLM | CactusVLM | CactusTTS>();
791
-
792
- async loadLM(name: string, modelPath: string): Promise<CactusLM> {
793
- if (this.models.has(name)) {
794
- return this.models.get(name)! as CactusLM;
639
+ async function safeModelInit(modelPath: string): Promise<CactusLM> {
640
+ const configs = [
641
+ { model: modelPath, n_ctx: 4096, n_gpu_layers: 99 },
642
+ { model: modelPath, n_ctx: 2048, n_gpu_layers: 99 },
643
+ { model: modelPath, n_ctx: 2048, n_gpu_layers: 0 },
644
+ { model: modelPath, n_ctx: 1024, n_gpu_layers: 0 },
645
+ ];
646
+
647
+ for (const config of configs) {
648
+ try {
649
+ const { lm, error } = await CactusLM.init(config);
650
+ if (error) throw error;
651
+ return lm;
652
+ } catch (error) {
653
+ console.warn('Config failed:', config, error.message);
654
+ if (configs.indexOf(config) === configs.length - 1) {
655
+ throw new Error(`All configurations failed. Last error: ${error.message}`);
656
+ }
795
657
  }
796
-
797
- const lm = await CactusLM.init({ model: modelPath });
798
- this.models.set(name, lm);
799
- return lm;
800
- }
801
-
802
- async loadVLM(name: string, modelPath: string, mmprojPath: string): Promise<CactusVLM> {
803
- if (this.models.has(name)) {
804
- return this.models.get(name)! as CactusVLM;
805
- }
806
-
807
- const vlm = await CactusVLM.init({ model: modelPath, mmproj: mmprojPath });
808
- this.models.set(name, vlm);
809
- return vlm;
810
658
  }
659
+
660
+ throw new Error('Model initialization failed');
661
+ }
811
662
 
812
- async unloadModel(name: string): Promise<void> {
813
- const model = this.models.get(name);
814
- if (model) {
815
- await model.release();
816
- this.models.delete(name);
663
+ async function safeCompletion(lm: CactusLM, messages: any[], retries = 3): Promise<any> {
664
+ for (let i = 0; i < retries; i++) {
665
+ try {
666
+ return await lm.completion(messages, { n_predict: 200 });
667
+ } catch (error) {
668
+ if (error.message.includes('Context is busy') && i < retries - 1) {
669
+ await new Promise(resolve => setTimeout(resolve, 1000));
670
+ continue;
671
+ }
672
+ throw error;
817
673
  }
818
674
  }
819
-
820
- async unloadAll(): Promise<void> {
821
- await Promise.all(
822
- Array.from(this.models.values()).map(model => model.release())
823
- );
824
- this.models.clear();
825
- }
826
675
  }
827
676
  ```
828
677
 
829
- ### Error Handling
678
+ ### Memory Management
830
679
 
831
680
  ```typescript
832
- async function safeCompletion(lm: CactusLM, messages: any[]) {
833
- try {
834
- const result = await lm.completion(messages, {
835
- n_predict: 256,
836
- temperature: 0.7,
837
- });
838
- return { success: true, data: result };
839
- } catch (error) {
840
- if (error.message.includes('Context is busy')) {
841
- // Handle concurrent requests
842
- await new Promise(resolve => setTimeout(resolve, 100));
843
- return safeCompletion(lm, messages);
844
- } else if (error.message.includes('Context not found')) {
845
- // Handle context cleanup
846
- throw new Error('Model context was released');
847
- } else {
848
- // Handle other errors
849
- console.error('Completion failed:', error);
850
- return { success: false, error: error.message };
681
+ import { AppState, AppStateStatus } from 'react-native';
682
+
683
+ class AppModelManager {
684
+ private modelManager = new ModelManager();
685
+
686
+ constructor() {
687
+ AppState.addEventListener('change', this.handleAppStateChange);
688
+ }
689
+
690
+ private handleAppStateChange = (nextAppState: AppStateStatus) => {
691
+ if (nextAppState === 'background') {
692
+ // Release non-essential models when app goes to background
693
+ this.modelManager.releaseAll();
694
+ }
695
+ };
696
+
697
+ async getModel(name: string, modelPath: string): Promise<CactusLM> {
698
+ try {
699
+ return await this.modelManager.loadLM(name, modelPath);
700
+ } catch (error) {
701
+ // Handle low memory by releasing other models
702
+ await this.modelManager.releaseAll();
703
+ return await this.modelManager.loadLM(name, modelPath);
851
704
  }
852
705
  }
853
706
  }
854
707
  ```
855
708
 
856
- ### Memory Management
709
+ ### Performance Optimization
857
710
 
858
711
  ```typescript
859
- // Monitor memory usage
860
- const checkMemory = () => {
861
- if (Platform.OS === 'android') {
862
- // Android-specific memory monitoring
863
- console.log('Memory warning - consider releasing unused models');
864
- }
712
+ // Optimize for device capabilities
713
+ const getOptimalConfig = () => {
714
+ const { OS } = Platform;
715
+ const isHighEndDevice = true; // Implement device detection logic
716
+
717
+ return {
718
+ n_ctx: isHighEndDevice ? 4096 : 2048,
719
+ n_gpu_layers: OS === 'ios' ? 99 : 0, // iOS generally has better GPU support
720
+ n_threads: isHighEndDevice ? 6 : 4,
721
+ n_batch: isHighEndDevice ? 512 : 256,
722
+ };
865
723
  };
866
724
 
867
- // Release models when app goes to background
868
- import { AppState } from 'react-native';
869
-
870
- AppState.addEventListener('change', (nextAppState) => {
871
- if (nextAppState === 'background') {
872
- // Release non-essential models
873
- modelManager.unloadAll();
874
- }
725
+ const config = getOptimalConfig();
726
+ const { lm } = await CactusLM.init({
727
+ model: modelPath,
728
+ ...config,
875
729
  });
876
730
  ```
877
731
 
878
732
  ## API Reference
879
733
 
880
- ### High-Level APIs
881
-
882
- - `CactusLM.init(params: ContextParams): Promise<CactusLM>` - Initialize language model
883
- - `CactusVLM.init(params: VLMContextParams): Promise<CactusVLM>` - Initialize vision language model
884
- - `CactusTTS.init(params: TTSContextParams): Promise<CactusTTS>` - Initialize text-to-speech model
885
-
886
- ### CactusLM Methods
887
-
888
- - `completion(messages: CactusOAICompatibleMessage[], params: CompletionParams, callback?: (token: TokenData) => void): Promise<NativeCompletionResult>`
889
- - `embedding(text: string, params?: EmbeddingParams): Promise<NativeEmbeddingResult>`
890
- - `rewind(): Promise<void>` - Clear conversation history
891
- - `release(): Promise<void>` - Release resources
892
-
893
- ### CactusVLM Methods
894
-
895
- - `completion(messages: CactusOAICompatibleMessage[], params: VLMCompletionParams, callback?: (token: TokenData) => void): Promise<NativeCompletionResult>`
896
- - `rewind(): Promise<void>` - Clear conversation history
897
- - `release(): Promise<void>` - Release resources
734
+ ### CactusLM
898
735
 
899
- ### CactusTTS Methods
736
+ **init(params, onProgress?, cactusToken?)**
737
+ - `model: string` - Path to GGUF model file
738
+ - `n_ctx?: number` - Context size (default: 2048)
739
+ - `n_threads?: number` - CPU threads (default: 4)
740
+ - `n_gpu_layers?: number` - GPU layers (default: 99)
741
+ - `embedding?: boolean` - Enable embeddings (default: false)
742
+ - `n_batch?: number` - Batch size (default: 512)
900
743
 
901
- - `generateSpeech(text: string, params: TTSSpeechParams): Promise<NativeAudioCompletionResult>`
902
- - `getGuideTokens(text: string): Promise<NativeAudioTokensResult>`
903
- - `decodeTokens(tokens: number[]): Promise<NativeAudioDecodeResult>`
904
- - `release(): Promise<void>` - Release resources
744
+ **completion(messages, params?, callback?)**
745
+ - `messages: Array<{role: string, content: string}>` - Chat messages
746
+ - `n_predict?: number` - Max tokens (default: -1)
747
+ - `temperature?: number` - Randomness 0.0-2.0 (default: 0.8)
748
+ - `top_p?: number` - Nucleus sampling (default: 0.95)
749
+ - `top_k?: number` - Top-k sampling (default: 40)
750
+ - `stop?: string[]` - Stop sequences
751
+ - `callback?: (token) => void` - Streaming callback
905
752
 
906
- ### Low-Level Functions (Advanced)
753
+ **embedding(text, params?, mode?)**
754
+ - `text: string` - Text to embed
755
+ - `mode?: string` - 'local' | 'localfirst' | 'remotefirst' | 'remote'
907
756
 
908
- For advanced use cases, the original low-level API is still available:
757
+ ### CactusVLM
909
758
 
910
- - `initLlama(params: ContextParams): Promise<LlamaContext>` - Initialize a model context
911
- - `releaseAllLlama(): Promise<void>` - Release all contexts
912
- - `setContextLimit(limit: number): Promise<void>` - Set maximum contexts
913
- - `toggleNativeLog(enabled: boolean): Promise<void>` - Enable/disable native logging
759
+ **init(params, onProgress?, cactusToken?)**
760
+ - All CactusLM params plus:
761
+ - `mmproj: string` - Path to multimodal projector
914
762
 
915
- ## Troubleshooting
763
+ **completion(messages, params?, callback?)**
764
+ - All CactusLM completion params plus:
765
+ - `images?: string[]` - Array of image paths
766
+ - `mode?: string` - Cloud fallback mode
916
767
 
917
- ### Common Issues
768
+ ### Types
918
769
 
919
- **Model Loading Fails**
920
770
  ```typescript
921
- // Check file exists and is accessible
922
- if (!(await RNFS.exists(modelPath))) {
923
- throw new Error('Model file not found');
771
+ interface CactusOAICompatibleMessage {
772
+ role: 'system' | 'user' | 'assistant';
773
+ content: string;
924
774
  }
925
775
 
926
- // Check file size
927
- const stats = await RNFS.stat(modelPath);
928
- console.log('Model size:', stats.size);
929
- ```
930
-
931
- **Out of Memory**
932
- ```typescript
933
- // Reduce context size
934
- const lm = await CactusLM.init({
935
- model: '/path/to/model.gguf',
936
- n_ctx: 1024, // Reduce from 4096
937
- n_batch: 128, // Reduce batch size
938
- });
939
- ```
776
+ interface NativeCompletionResult {
777
+ text: string;
778
+ tokens_predicted: number;
779
+ tokens_evaluated: number;
780
+ timings: {
781
+ predicted_per_second: number;
782
+ prompt_per_second: number;
783
+ };
784
+ }
940
785
 
941
- **GPU Issues**
942
- ```typescript
943
- // Disable GPU if having issues
944
- const lm = await CactusLM.init({
945
- model: '/path/to/model.gguf',
946
- n_gpu_layers: 0, // Use CPU only
947
- });
786
+ interface NativeEmbeddingResult {
787
+ embedding: number[];
788
+ }
948
789
  ```
949
-
950
- ### Performance Tips
951
-
952
- 1. **Use appropriate context sizes** - Larger contexts use more memory
953
- 2. **Optimize batch sizes** - Balance between speed and memory
954
- 3. **Cache models** - Don't reload models unnecessarily
955
- 4. **Use GPU acceleration** - When available and stable
956
- 5. **Monitor memory usage** - Release models when not needed
957
-
958
- This documentation covers the essential usage patterns for cactus-react-native. For more examples, check the [example apps](../examples/) in the repository.