cactus-react-native 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +872 -146
  2. package/android/src/main/CMakeLists.txt +1 -1
  3. package/android/src/main/jniLibs/arm64-v8a/libcactus.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_dotprod.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_dotprod_i8mm.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_i8mm.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/libcactus.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/libcactus_x86_64.so +0 -0
  11. package/ios/CMakeLists.txt +6 -6
  12. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus.h +12 -0
  13. package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
  14. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/cactus.h +12 -0
  15. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
  16. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/cactus.h +12 -0
  17. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/cactus +0 -0
  18. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/cactus.h +12 -0
  19. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
  20. package/lib/commonjs/index.js.map +1 -1
  21. package/lib/commonjs/lm.js.map +1 -0
  22. package/lib/commonjs/tts.js.map +1 -0
  23. package/lib/commonjs/vlm.js.map +0 -0
  24. package/lib/module/index.js.map +1 -1
  25. package/lib/module/lm.js.map +0 -0
  26. package/lib/module/tts.js.map +1 -0
  27. package/lib/module/vlm.js.map +1 -0
  28. package/lib/typescript/index.d.ts +5 -1
  29. package/lib/typescript/index.d.ts.map +1 -1
  30. package/lib/typescript/lm.d.ts +41 -0
  31. package/lib/typescript/lm.d.ts.map +1 -0
  32. package/lib/typescript/tts.d.ts +10 -0
  33. package/lib/typescript/tts.d.ts.map +1 -0
  34. package/lib/typescript/vlm.d.ts +44 -0
  35. package/lib/typescript/vlm.d.ts.map +1 -0
  36. package/package.json +2 -1
  37. package/src/index.ts +11 -1
  38. package/src/lm.ts +49 -0
  39. package/src/tts.ts +45 -0
  40. package/src/vlm.ts +70 -0
  41. package/lib/commonjs/NativeCactus.js +0 -10
  42. package/lib/commonjs/chat.js +0 -37
  43. package/lib/commonjs/grammar.js +0 -560
  44. package/lib/commonjs/index.js +0 -412
  45. package/lib/commonjs/tools.js +0 -118
  46. package/lib/commonjs/tools.js.map +0 -1
  47. package/lib/module/NativeCactus.js +0 -8
  48. package/lib/module/chat.js +0 -33
  49. package/lib/module/grammar.js +0 -553
  50. package/lib/module/index.js +0 -363
  51. package/lib/module/tools.js +0 -110
  52. package/lib/module/tools.js.map +0 -1
package/README.md CHANGED
@@ -1,232 +1,958 @@
1
- # Cactus for React Native
1
+ # Cactus React Native
2
2
 
3
- A lightweight, high-performance framework for running AI models on mobile devices with React Native.
3
+ A powerful React Native library for running Large Language Models (LLMs) and Vision Language Models (VLMs) directly on mobile devices, with full support for chat completions, multimodal inputs, embeddings, text-to-speech and advanced features.
4
4
 
5
5
  ## Installation
6
6
 
7
7
  ```bash
8
- # Using npm
9
- npm install react-native-fs
10
- npm install cactus-react-native
8
+ npm install cactus-react-native react-native-fs
9
+ # or
10
+ yarn add cactus-react-native react-native-fs
11
+ ```
11
12
 
12
- # Using yarn
13
- yarn add react-native-fs
14
- yarn add cactus-react-native
13
+ **Additional Setup:**
14
+ - For iOS: `cd ios && npx pod-install` or `yarn pod-install`
15
+ - For Android: Ensure your `minSdkVersion` is 24 or higher
15
16
 
16
- # For iOS, install pods if not on Expo
17
- npx pod-install
18
- ```
17
+ > **Important**: `react-native-fs` is required for file system access to download and manage model files locally.
19
18
 
20
- ## Basic Usage
19
+ ## Quick Start
21
20
 
22
- ### Initialize a Model
21
+ ### Basic Text Completion
23
22
 
24
23
  ```typescript
25
- import { initLlama, LlamaContext } from 'cactus-react-native';
24
+ import { CactusLM } from 'cactus-react-native';
26
25
 
27
- // Initialize the model
28
- const context = await initLlama({
29
- model: 'models/llama-2-7b-chat.gguf', // Path to your model
30
- n_ctx: 2048, // Context size
31
- n_batch: 512, // Batch size for prompt processing
32
- n_threads: 4 // Number of threads to use
26
+ // Initialize a language model
27
+ const lm = await CactusLM.init({
28
+ model: '/path/to/your/model.gguf',
29
+ n_ctx: 2048,
30
+ n_threads: 4,
33
31
  });
32
+
33
+ // Generate text
34
+ const messages = [{ role: 'user', content: 'Hello, how are you?' }];
35
+ const params = { n_predict: 100, temperature: 0.7 };
36
+
37
+ const result = await lm.completion(messages, params);
38
+ console.log(result.text);
39
+ ```
40
+
41
+ ### Complete Chat App Example
42
+
43
+ ```typescript
44
+ import React, { useState, useEffect } from 'react';
45
+ import { View, Text, TextInput, TouchableOpacity } from 'react-native';
46
+ import { CactusLM } from 'cactus-react-native';
47
+ import RNFS from 'react-native-fs';
48
+
49
+ interface Message {
50
+ role: 'user' | 'assistant';
51
+ content: string;
52
+ }
53
+
54
+ export default function ChatApp() {
55
+ const [lm, setLM] = useState<CactusLM | null>(null);
56
+ const [messages, setMessages] = useState<Message[]>([]);
57
+ const [input, setInput] = useState('');
58
+ const [loading, setLoading] = useState(true);
59
+
60
+ useEffect(() => {
61
+ initializeModel();
62
+ }, []);
63
+
64
+ async function initializeModel() {
65
+ try {
66
+ // Download model (example URL)
67
+ const modelUrl = 'https://huggingface.co/Cactus-Compute/Qwen3-600m-Instruct-GGUF/resolve/main/Qwen3-0.6B-Q8_0.gguf';
68
+ const modelPath = `${RNFS.DocumentDirectoryPath}/model.gguf`;
69
+
70
+ // Download if not exists
71
+ if (!(await RNFS.exists(modelPath))) {
72
+ await RNFS.downloadFile({
73
+ fromUrl: modelUrl,
74
+ toFile: modelPath,
75
+ }).promise;
76
+ }
77
+
78
+ // Initialize language model
79
+ const cactusLM = await CactusLM.init({
80
+ model: modelPath,
81
+ n_ctx: 2048,
82
+ n_threads: 4,
83
+ n_gpu_layers: 99, // Use GPU acceleration
84
+ });
85
+
86
+ setLM(cactusLM);
87
+ setLoading(false);
88
+ } catch (error) {
89
+ console.error('Failed to initialize model:', error);
90
+ }
91
+ }
92
+
93
+ async function sendMessage() {
94
+ if (!lm || !input.trim()) return;
95
+
96
+ const userMessage: Message = { role: 'user', content: input };
97
+ const newMessages = [...messages, userMessage];
98
+ setMessages(newMessages);
99
+ setInput('');
100
+
101
+ try {
102
+ const params = {
103
+ n_predict: 256,
104
+ temperature: 0.7,
105
+ stop: ['</s>', '<|end|>'],
106
+ };
107
+
108
+ const result = await lm.completion(newMessages, params);
109
+
110
+ const assistantMessage: Message = {
111
+ role: 'assistant',
112
+ content: result.text
113
+ };
114
+ setMessages([...newMessages, assistantMessage]);
115
+ } catch (error) {
116
+ console.error('Completion failed:', error);
117
+ }
118
+ }
119
+
120
+ if (loading) {
121
+ return (
122
+ <View style={{ flex: 1, justifyContent: 'center', alignItems: 'center' }}>
123
+ <Text>Loading model...</Text>
124
+ </View>
125
+ );
126
+ }
127
+
128
+ return (
129
+ <View style={{ flex: 1, padding: 16 }}>
130
+ {/* Messages */}
131
+ <View style={{ flex: 1 }}>
132
+ {messages.map((msg, index) => (
133
+ <Text key={index} style={{
134
+ backgroundColor: msg.role === 'user' ? '#007AFF' : '#f0f0f0',
135
+ color: msg.role === 'user' ? 'white' : 'black',
136
+ padding: 8,
137
+ margin: 4,
138
+ borderRadius: 8,
139
+ }}>
140
+ {msg.content}
141
+ </Text>
142
+ ))}
143
+ </View>
144
+
145
+ {/* Input */}
146
+ <View style={{ flexDirection: 'row' }}>
147
+ <TextInput
148
+ style={{ flex: 1, borderWidth: 1, padding: 8, borderRadius: 4 }}
149
+ value={input}
150
+ onChangeText={setInput}
151
+ placeholder="Type a message..."
152
+ />
153
+ <TouchableOpacity
154
+ onPress={sendMessage}
155
+ style={{ backgroundColor: '#007AFF', padding: 8, borderRadius: 4, marginLeft: 8 }}
156
+ >
157
+ <Text style={{ color: 'white' }}>Send</Text>
158
+ </TouchableOpacity>
159
+ </View>
160
+ </View>
161
+ );
162
+ }
163
+ ```
164
+
165
+ ## File Path Requirements
166
+
167
+ **Critical**: Cactus requires **absolute local file paths**, not Metro bundler URLs or asset references.
168
+
169
+ ### ❌ Won't Work
170
+ ```typescript
171
+ // Metro bundler URLs
172
+ 'http://localhost:8081/assets/model.gguf'
173
+
174
+ // React Native asset requires
175
+ require('./assets/model.gguf')
176
+
177
+ // Relative paths
178
+ './models/model.gguf'
179
+ ```
180
+
181
+ ### ✅ Will Work
182
+ ```typescript
183
+ import RNFS from 'react-native-fs';
184
+
185
+ // Absolute paths in app directories
186
+ const modelPath = `${RNFS.DocumentDirectoryPath}/model.gguf`;
187
+ const imagePath = `${RNFS.DocumentDirectoryPath}/image.jpg`;
188
+
189
+ // Downloaded/copied files
190
+ const downloadModel = async () => {
191
+ const modelUrl = 'https://example.com/model.gguf';
192
+ const localPath = `${RNFS.DocumentDirectoryPath}/model.gguf`;
193
+
194
+ await RNFS.downloadFile({
195
+ fromUrl: modelUrl,
196
+ toFile: localPath,
197
+ }).promise;
198
+
199
+ return localPath; // Use this path with Cactus
200
+ };
201
+ ```
202
+
203
+ ### Image Assets
204
+ For images, you need to copy them to local storage first:
205
+
206
+ ```typescript
207
+ // Copy bundled asset to local storage
208
+ const copyAssetToLocal = async (assetName: string): Promise<string> => {
209
+ const assetPath = `${RNFS.MainBundlePath}/${assetName}`;
210
+ const localPath = `${RNFS.DocumentDirectoryPath}/${assetName}`;
211
+
212
+ if (!(await RNFS.exists(localPath))) {
213
+ await RNFS.copyFile(assetPath, localPath);
214
+ }
215
+
216
+ return localPath;
217
+ };
218
+
219
+ // Usage
220
+ const imagePath = await copyAssetToLocal('demo.jpg');
221
+ const params = { images: [imagePath], n_predict: 200 };
222
+ const result = await vlm.completion(messages, params);
34
223
  ```
35
224
 
36
- ### Text Completion
225
+ ### External Images
226
+ Download external images to local storage:
37
227
 
38
228
  ```typescript
39
- // Generate text completion
40
- const result = await context.completion({
41
- prompt: "Explain quantum computing in simple terms",
229
+ const downloadImage = async (imageUrl: string): Promise<string> => {
230
+ const localPath = `${RNFS.DocumentDirectoryPath}/temp_image.jpg`;
231
+
232
+ await RNFS.downloadFile({
233
+ fromUrl: imageUrl,
234
+ toFile: localPath,
235
+ }).promise;
236
+
237
+ return localPath;
238
+ };
239
+ ```
240
+
241
+ ## Core APIs
242
+
243
+ ### CactusLM (Language Model)
244
+
245
+ For text-only language models:
246
+
247
+ ```typescript
248
+ import { CactusLM } from 'cactus-react-native';
249
+
250
+ // Initialize
251
+ const lm = await CactusLM.init({
252
+ model: '/path/to/model.gguf',
253
+ n_ctx: 4096, // Context window size
254
+ n_batch: 512, // Batch size for processing
255
+ n_threads: 4, // Number of threads
256
+ n_gpu_layers: 99, // GPU layers (0 = CPU only)
257
+ });
258
+
259
+ // Text completion
260
+ const messages = [
261
+ { role: 'system', content: 'You are a helpful assistant.' },
262
+ { role: 'user', content: 'What is the capital of France?' },
263
+ ];
264
+
265
+ const params = {
266
+ n_predict: 200,
42
267
  temperature: 0.7,
43
- top_k: 40,
44
- top_p: 0.95,
45
- n_predict: 512
46
- }, (token) => {
47
- // Process each token as it's generated
48
- console.log(token.token);
268
+ top_p: 0.9,
269
+ stop: ['</s>', '\n\n'],
270
+ };
271
+
272
+ const result = await lm.completion(messages, params);
273
+
274
+ // Embeddings
275
+ const embeddingResult = await lm.embedding('Your text here');
276
+ console.log('Embedding vector:', embeddingResult.embedding);
277
+
278
+ // Cleanup
279
+ await lm.rewind(); // Clear conversation
280
+ await lm.release(); // Release resources
281
+ ```
282
+
283
+ ### CactusVLM (Vision Language Model)
284
+
285
+ For multimodal models that can process both text and images:
286
+
287
+ ```typescript
288
+ import { CactusVLM } from 'cactus-react-native';
289
+
290
+ // Initialize with multimodal projector
291
+ const vlm = await CactusVLM.init({
292
+ model: '/path/to/vision-model.gguf',
293
+ mmproj: '/path/to/mmproj.gguf',
294
+ n_ctx: 2048,
295
+ n_threads: 4,
296
+ n_gpu_layers: 99, // GPU for main model, CPU for projector
49
297
  });
50
298
 
51
- // Clean up when done
52
- await context.release();
299
+ // Image + text completion
300
+ const messages = [{ role: 'user', content: 'What do you see in this image?' }];
301
+ const params = {
302
+ images: ['/path/to/image.jpg'],
303
+ n_predict: 200,
304
+ temperature: 0.3,
305
+ };
306
+
307
+ const result = await vlm.completion(messages, params);
308
+
309
+ // Text-only completion (same interface)
310
+ const textMessages = [{ role: 'user', content: 'Tell me a joke' }];
311
+ const textParams = { n_predict: 100 };
312
+ const textResult = await vlm.completion(textMessages, textParams);
313
+
314
+ // Cleanup
315
+ await vlm.rewind();
316
+ await vlm.release();
317
+ ```
318
+
319
+ ### CactusTTS (Text-to-Speech)
320
+
321
+ For text-to-speech generation:
322
+
323
+ ```typescript
324
+ import { CactusTTS } from 'cactus-react-native';
325
+
326
+ // Initialize with vocoder
327
+ const tts = await CactusTTS.init({
328
+ model: '/path/to/tts-model.gguf',
329
+ vocoder: '/path/to/vocoder.gguf',
330
+ n_ctx: 1024,
331
+ n_threads: 4,
332
+ });
333
+
334
+ // Generate speech
335
+ const text = 'Hello, this is a test of text-to-speech functionality.';
336
+ const params = {
337
+ voice_id: 0,
338
+ temperature: 0.7,
339
+ speed: 1.0,
340
+ };
341
+
342
+ const audioResult = await tts.generateSpeech(text, params);
343
+ console.log('Audio data:', audioResult.audio_data);
344
+
345
+ // Advanced token-based generation
346
+ const tokens = await tts.getGuideTokens('Your text here');
347
+ const audio = await tts.decodeTokens(tokens);
348
+
349
+ // Cleanup
350
+ await tts.release();
53
351
  ```
54
352
 
55
- ### Chat Completion
353
+ ## Text Completion
354
+
355
+ ### Basic Completion
56
356
 
57
357
  ```typescript
58
- // Chat messages following OpenAI format
358
+ const lm = await CactusLM.init({
359
+ model: '/path/to/model.gguf',
360
+ n_ctx: 2048,
361
+ });
362
+
59
363
  const messages = [
60
- { role: "system", content: "You are a helpful assistant." },
61
- { role: "user", content: "What is machine learning?" }
364
+ { role: 'user', content: 'Write a short poem about coding' }
62
365
  ];
63
366
 
64
- // Generate chat completion
65
- const result = await context.completion({
66
- messages: messages,
67
- temperature: 0.7,
68
- top_k: 40,
69
- top_p: 0.95,
70
- n_predict: 512
71
- }, (token) => {
72
- // Process each token
73
- console.log(token.token);
367
+ const params = {
368
+ n_predict: 200,
369
+ temperature: 0.8,
370
+ top_p: 0.9,
371
+ stop: ['</s>', '\n\n'],
372
+ };
373
+
374
+ const result = await lm.completion(messages, params);
375
+
376
+ console.log(result.text);
377
+ console.log(`Tokens: ${result.tokens_predicted}`);
378
+ console.log(`Speed: ${result.timings.predicted_per_second.toFixed(2)} tokens/sec`);
379
+ ```
380
+
381
+ ### Streaming Completion
382
+
383
+ ```typescript
384
+ const result = await lm.completion(messages, params, (token) => {
385
+ // Called for each generated token
386
+ console.log('Token:', token.token);
387
+ updateUI(token.token);
74
388
  });
75
389
  ```
76
390
 
77
- ## Advanced Features
391
+ ### Advanced Parameters
78
392
 
79
- ### JSON Mode with Schema Validation
393
+ ```typescript
394
+ const params = {
395
+ // Generation control
396
+ n_predict: 256, // Max tokens to generate
397
+ temperature: 0.7, // Randomness (0.0 - 2.0)
398
+ top_p: 0.9, // Nucleus sampling
399
+ top_k: 40, // Top-k sampling
400
+ min_p: 0.05, // Minimum probability
401
+
402
+ // Repetition control
403
+ penalty_repeat: 1.1, // Repetition penalty
404
+ penalty_freq: 0.0, // Frequency penalty
405
+ penalty_present: 0.0, // Presence penalty
406
+
407
+ // Stop conditions
408
+ stop: ['</s>', '<|end|>', '\n\n'],
409
+ ignore_eos: false,
410
+
411
+ // Sampling methods
412
+ mirostat: 0, // Mirostat sampling (0=disabled)
413
+ mirostat_tau: 5.0, // Target entropy
414
+ mirostat_eta: 0.1, // Learning rate
415
+
416
+ // Advanced
417
+ seed: -1, // Random seed (-1 = random)
418
+ n_probs: 0, // Return token probabilities
419
+ };
420
+ ```
421
+
422
+ ## Multimodal (Vision)
423
+
424
+ ### Setup Vision Model
80
425
 
81
426
  ```typescript
82
- // Define a JSON schema
83
- const schema = {
84
- type: "object",
85
- properties: {
86
- name: { type: "string" },
87
- age: { type: "number" },
88
- hobbies: {
89
- type: "array",
90
- items: { type: "string" }
91
- }
92
- },
93
- required: ["name", "age"]
427
+ import { CactusVLM } from 'cactus-react-native';
428
+
429
+ const vlm = await CactusVLM.init({
430
+ model: '/path/to/vision-model.gguf',
431
+ mmproj: '/path/to/mmproj.gguf', // Multimodal projector
432
+ n_ctx: 4096,
433
+ });
434
+ ```
435
+
436
+ ### Image Analysis
437
+
438
+ ```typescript
439
+ // Analyze single image
440
+ const messages = [{ role: 'user', content: 'Describe this image in detail' }];
441
+ const params = {
442
+ images: ['/path/to/image.jpg'],
443
+ n_predict: 200,
444
+ temperature: 0.3,
94
445
  };
95
446
 
96
- // Generate JSON-structured output
97
- const result = await context.completion({
98
- prompt: "Generate a profile for a fictional person",
99
- response_format: {
100
- type: "json_schema",
101
- json_schema: {
102
- schema: schema,
103
- strict: true
104
- }
105
- },
106
- temperature: 0.7,
107
- n_predict: 512
447
+ const result = await vlm.completion(messages, params);
448
+ console.log(result.text);
449
+ ```
450
+
451
+ ### Multi-Image Analysis
452
+
453
+ ```typescript
454
+ const imagePaths = [
455
+ '/path/to/image1.jpg',
456
+ '/path/to/image2.jpg',
457
+ '/path/to/image3.jpg'
458
+ ];
459
+
460
+ const messages = [{ role: 'user', content: 'Compare these images and explain the differences' }];
461
+ const params = {
462
+ images: imagePaths,
463
+ n_predict: 300,
464
+ temperature: 0.4,
465
+ };
466
+
467
+ const result = await vlm.completion(messages, params);
468
+ ```
469
+
470
+ ### Conversation with Images
471
+
472
+ ```typescript
473
+ const conversation = [
474
+ { role: 'user', content: 'What do you see in this image?' }
475
+ ];
476
+
477
+ const params = {
478
+ images: ['/path/to/image.jpg'],
479
+ n_predict: 256,
480
+ temperature: 0.3,
481
+ };
482
+
483
+ const result = await vlm.completion(conversation, params);
484
+ ```
485
+
486
+ ## Embeddings
487
+
488
+ ### Text Embeddings
489
+
490
+ ```typescript
491
+ // Enable embeddings during initialization
492
+ const lm = await CactusLM.init({
493
+ model: '/path/to/embedding-model.gguf',
494
+ embedding: true, // Enable embedding mode
495
+ n_ctx: 512, // Smaller context for embeddings
108
496
  });
109
497
 
110
- // The result will be valid JSON according to the schema
111
- const jsonData = JSON.parse(result.text);
498
+ // Generate embeddings
499
+ const text = 'Your text here';
500
+ const result = await lm.embedding(text);
501
+ console.log('Embedding vector:', result.embedding);
502
+ console.log('Dimensions:', result.embedding.length);
503
+ ```
504
+
505
+ ### Batch Embeddings
506
+
507
+ ```typescript
508
+ const texts = [
509
+ 'The quick brown fox',
510
+ 'Machine learning is fascinating',
511
+ 'React Native development'
512
+ ];
513
+
514
+ const embeddings = await Promise.all(
515
+ texts.map(text => lm.embedding(text))
516
+ );
517
+
518
+ // Calculate similarity
519
+ function cosineSimilarity(a: number[], b: number[]): number {
520
+ const dotProduct = a.reduce((sum, ai, i) => sum + ai * b[i], 0);
521
+ const magnitudeA = Math.sqrt(a.reduce((sum, ai) => sum + ai * ai, 0));
522
+ const magnitudeB = Math.sqrt(b.reduce((sum, bi) => sum + bi * bi, 0));
523
+ return dotProduct / (magnitudeA * magnitudeB);
524
+ }
525
+
526
+ const similarity = cosineSimilarity(
527
+ embeddings[0].embedding,
528
+ embeddings[1].embedding
529
+ );
112
530
  ```
113
531
 
114
- ### Working with Embeddings
532
+ ## Text-to-Speech (TTS)
533
+
534
+ Cactus supports text-to-speech through vocoder models, allowing you to generate speech from text.
535
+
536
+ ### Setup TTS Model
115
537
 
116
538
  ```typescript
117
- // Generate embeddings for text
118
- const embedding = await context.embedding("This is a sample text", {
119
- pooling_type: "mean" // Options: "none", "mean", "cls", "last", "rank"
539
+ import { CactusTTS } from 'cactus-react-native';
540
+
541
+ const tts = await CactusTTS.init({
542
+ model: '/path/to/text-model.gguf',
543
+ vocoder: '/path/to/vocoder-model.gguf',
544
+ n_ctx: 2048,
120
545
  });
546
+ ```
547
+
548
+ ### Basic Text-to-Speech
549
+
550
+ ```typescript
551
+ const text = 'Hello, this is a test of text-to-speech functionality.';
552
+ const params = {
553
+ voice_id: 0, // Speaker voice ID
554
+ temperature: 0.7, // Speech variation
555
+ speed: 1.0, // Speech speed
556
+ };
557
+
558
+ const result = await tts.generateSpeech(text, params);
559
+
560
+ console.log('Audio data:', result.audio_data);
561
+ console.log('Sample rate:', result.sample_rate);
562
+ console.log('Audio format:', result.format);
563
+ ```
564
+
565
+ ### Advanced TTS with Token Control
566
+
567
+ ```typescript
568
+ // Get guide tokens for precise control
569
+ const tokensResult = await tts.getGuideTokens(
570
+ 'This text will be converted to speech tokens.'
571
+ );
572
+
573
+ console.log('Guide tokens:', tokensResult.tokens);
574
+ console.log('Token count:', tokensResult.tokens.length);
121
575
 
122
- console.log(`Embedding dimensions: ${embedding.embedding.length}`);
123
- // Use the embedding for similarity comparison, clustering, etc.
576
+ // Decode tokens to audio
577
+ const audioResult = await tts.decodeTokens(tokensResult.tokens);
578
+
579
+ console.log('Decoded audio:', audioResult.audio_data);
580
+ console.log('Duration:', audioResult.duration_seconds);
124
581
  ```
125
582
 
583
+ ### Complete TTS Example
584
+
585
+ ```typescript
586
+ import React, { useState, useEffect } from 'react';
587
+ import { View, Text, TextInput, TouchableOpacity, Alert } from 'react-native';
588
+ import { Audio } from 'expo-av';
589
+ import RNFS from 'react-native-fs';
590
+ import { CactusTTS } from 'cactus-react-native';
591
+
592
+ export default function TTSDemo() {
593
+ const [tts, setTTS] = useState<CactusTTS | null>(null);
594
+ const [text, setText] = useState('Hello, this is a test of speech synthesis.');
595
+ const [isGenerating, setIsGenerating] = useState(false);
596
+ const [sound, setSound] = useState<Audio.Sound | null>(null);
597
+
598
+ useEffect(() => {
599
+ initializeTTS();
600
+ return () => {
601
+ if (sound) {
602
+ sound.unloadAsync();
603
+ }
604
+ };
605
+ }, []);
606
+
607
+ async function initializeTTS() {
608
+ try {
609
+ // Download and initialize models
610
+ const modelPath = await downloadModel();
611
+ const vocoderPath = await downloadVocoder();
612
+
613
+ const cactusTTS = await CactusTTS.init({
614
+ model: modelPath,
615
+ vocoder: vocoderPath,
616
+ n_ctx: 1024,
617
+ n_threads: 4,
618
+ });
619
+
620
+ setTTS(cactusTTS);
621
+ } catch (error) {
622
+ console.error('Failed to initialize TTS:', error);
623
+ Alert.alert('Error', 'Failed to initialize TTS');
624
+ }
625
+ }
626
+
627
+ async function generateSpeech() {
628
+ if (!tts || !text.trim()) return;
629
+
630
+ setIsGenerating(true);
631
+ try {
632
+ const params = {
633
+ voice_id: 0,
634
+ temperature: 0.7,
635
+ speed: 1.0,
636
+ };
637
+
638
+ const result = await tts.generateSpeech(text, params);
639
+
640
+ // Save audio to file
641
+ const audioPath = `${RNFS.DocumentDirectoryPath}/speech.wav`;
642
+ await RNFS.writeFile(audioPath, result.audio_data, 'base64');
643
+
644
+ // Play audio
645
+ const { sound: audioSound } = await Audio.Sound.createAsync({
646
+ uri: `file://${audioPath}`,
647
+ });
648
+
649
+ setSound(audioSound);
650
+ await audioSound.playAsync();
651
+
652
+ console.log(`Generated speech: ${result.duration_seconds}s`);
653
+ } catch (error) {
654
+ console.error('Speech generation failed:', error);
655
+ Alert.alert('Error', 'Failed to generate speech');
656
+ } finally {
657
+ setIsGenerating(false);
658
+ }
659
+ }
660
+
661
+ // Helper functions for downloading models would go here...
662
+
663
+ return (
664
+ <View style={{ flex: 1, padding: 16 }}>
665
+ <Text style={{ fontSize: 18, marginBottom: 16 }}>
666
+ Text-to-Speech Demo
667
+ </Text>
668
+
669
+ <TextInput
670
+ style={{
671
+ borderWidth: 1,
672
+ borderColor: '#ddd',
673
+ borderRadius: 8,
674
+ padding: 12,
675
+ marginBottom: 16,
676
+ minHeight: 100,
677
+ }}
678
+ value={text}
679
+ onChangeText={setText}
680
+ placeholder="Enter text to convert to speech..."
681
+ multiline
682
+ />
683
+
684
+ <TouchableOpacity
685
+ onPress={generateSpeech}
686
+ disabled={isGenerating || !tts}
687
+ style={{
688
+ backgroundColor: isGenerating ? '#ccc' : '#007AFF',
689
+ padding: 16,
690
+ borderRadius: 8,
691
+ alignItems: 'center',
692
+ }}
693
+ >
694
+ <Text style={{ color: 'white', fontSize: 16, fontWeight: 'bold' }}>
695
+ {isGenerating ? 'Generating...' : 'Generate Speech'}
696
+ </Text>
697
+ </TouchableOpacity>
698
+ </View>
699
+ );
700
+ }
701
+ ```
702
+
703
+ ## Advanced Features
704
+
126
705
  ### Session Management
127
706
 
707
+ For the low-level API, you can still access session management:
708
+
128
709
  ```typescript
129
- // Save the current session state
130
- const tokenCount = await context.saveSession("session.bin", { tokenSize: 1024 });
131
- console.log(`Saved session with ${tokenCount} tokens`);
710
+ import { initLlama } from 'cactus-react-native';
711
+
712
+ const context = await initLlama({ model: '/path/to/model.gguf' });
132
713
 
133
- // Load a saved session
134
- const loadResult = await context.loadSession("session.bin");
135
- console.log(`Loaded session: ${loadResult.success}`);
714
+ // Save session
715
+ const tokensKept = await context.saveSession('/path/to/session.bin', {
716
+ tokenSize: 1024 // Number of tokens to keep
717
+ });
718
+
719
+ // Load session
720
+ const sessionInfo = await context.loadSession('/path/to/session.bin');
721
+ console.log(`Loaded ${sessionInfo.tokens_loaded} tokens`);
136
722
  ```
137
723
 
138
- ### Working with LoRA Adapters
724
+ ### LoRA Adapters
139
725
 
140
726
  ```typescript
141
- // Apply LoRA adapters to the model
727
+ const context = await initLlama({ model: '/path/to/model.gguf' });
728
+
729
+ // Apply LoRA adapters
142
730
  await context.applyLoraAdapters([
143
- { path: "models/lora_adapter.bin", scaled: 0.8 }
731
+ { path: '/path/to/lora1.gguf', scaled: 1.0 },
732
+ { path: '/path/to/lora2.gguf', scaled: 0.8 }
144
733
  ]);
145
734
 
146
- // Get currently loaded adapters
147
- const loadedAdapters = await context.getLoadedLoraAdapters();
735
+ // Get loaded adapters
736
+ const adapters = await context.getLoadedLoraAdapters();
737
+ console.log('Loaded adapters:', adapters);
148
738
 
149
- // Remove all LoRA adapters
739
+ // Remove adapters
150
740
  await context.removeLoraAdapters();
151
741
  ```
152
742
 
153
- ### Model Benchmarking
743
+ ### Structured Output (JSON)
154
744
 
155
745
  ```typescript
156
- // Benchmark the model performance
157
- const benchResult = await context.bench(
158
- 32, // pp: prompt processing tests
159
- 32, // tg: token generation tests
160
- 512, // pl: prompt length
161
- 5 // nr: number of runs
162
- );
746
+ const messages = [
747
+ { role: 'user', content: 'Extract information about this person: John Doe, 30 years old, software engineer from San Francisco' }
748
+ ];
163
749
 
164
- console.log(`Average token generation speed: ${benchResult.tgAvg} tokens/sec`);
165
- console.log(`Model size: ${benchResult.modelSize} bytes`);
750
+ const params = {
751
+ response_format: {
752
+ type: 'json_object',
753
+ schema: {
754
+ type: 'object',
755
+ properties: {
756
+ name: { type: 'string' },
757
+ age: { type: 'number' },
758
+ profession: { type: 'string' },
759
+ location: { type: 'string' }
760
+ },
761
+ required: ['name', 'age']
762
+ }
763
+ }
764
+ };
765
+
766
+ const result = await lm.completion(messages, params);
767
+ const person = JSON.parse(result.text);
768
+ console.log(person.name); // "John Doe"
166
769
  ```
167
770
 
168
- ### Native Logging
771
+ ### Performance Monitoring
169
772
 
170
773
  ```typescript
171
- import { addNativeLogListener, toggleNativeLog } from 'cactus-react-native';
774
+ const result = await lm.completion(messages, { n_predict: 100 });
775
+
776
+ console.log('Performance metrics:');
777
+ console.log(`Prompt tokens: ${result.timings.prompt_n}`);
778
+ console.log(`Generated tokens: ${result.timings.predicted_n}`);
779
+ console.log(`Prompt speed: ${result.timings.prompt_per_second.toFixed(2)} tokens/sec`);
780
+ console.log(`Generation speed: ${result.timings.predicted_per_second.toFixed(2)} tokens/sec`);
781
+ console.log(`Total time: ${(result.timings.prompt_ms + result.timings.predicted_ms).toFixed(0)}ms`);
782
+ ```
172
783
 
173
- // Enable native logging
174
- await toggleNativeLog(true);
784
+ ## Best Practices
175
785
 
176
- // Add a listener for native logs
177
- const logListener = addNativeLogListener((level, text) => {
178
- console.log(`[${level}] ${text}`);
179
- });
786
+ ### Model Management
787
+
788
+ ```typitten
789
+ class ModelManager {
790
+ private models = new Map<string, CactusLM | CactusVLM | CactusTTS>();
180
791
 
181
- // Remove the listener when no longer needed
182
- logListener.remove();
792
+ async loadLM(name: string, modelPath: string): Promise<CactusLM> {
793
+ if (this.models.has(name)) {
794
+ return this.models.get(name)! as CactusLM;
795
+ }
796
+
797
+ const lm = await CactusLM.init({ model: modelPath });
798
+ this.models.set(name, lm);
799
+ return lm;
800
+ }
801
+
802
+ async loadVLM(name: string, modelPath: string, mmprojPath: string): Promise<CactusVLM> {
803
+ if (this.models.has(name)) {
804
+ return this.models.get(name)! as CactusVLM;
805
+ }
806
+
807
+ const vlm = await CactusVLM.init({ model: modelPath, mmproj: mmprojPath });
808
+ this.models.set(name, vlm);
809
+ return vlm;
810
+ }
811
+
812
+ async unloadModel(name: string): Promise<void> {
813
+ const model = this.models.get(name);
814
+ if (model) {
815
+ await model.release();
816
+ this.models.delete(name);
817
+ }
818
+ }
819
+
820
+ async unloadAll(): Promise<void> {
821
+ await Promise.all(
822
+ Array.from(this.models.values()).map(model => model.release())
823
+ );
824
+ this.models.clear();
825
+ }
826
+ }
183
827
  ```
184
828
 
185
- ## Error Handling
829
+ ### Error Handling
186
830
 
187
831
  ```typescript
188
- try {
189
- const context = await initLlama({
190
- model: 'models/non-existent-model.gguf',
191
- n_ctx: 2048,
192
- n_threads: 4
193
- });
194
- } catch (error) {
195
- console.error('Failed to initialize model:', error);
832
+ async function safeCompletion(lm: CactusLM, messages: any[]) {
833
+ try {
834
+ const result = await lm.completion(messages, {
835
+ n_predict: 256,
836
+ temperature: 0.7,
837
+ });
838
+ return { success: true, data: result };
839
+ } catch (error) {
840
+ if (error.message.includes('Context is busy')) {
841
+ // Handle concurrent requests
842
+ await new Promise(resolve => setTimeout(resolve, 100));
843
+ return safeCompletion(lm, messages);
844
+ } else if (error.message.includes('Context not found')) {
845
+ // Handle context cleanup
846
+ throw new Error('Model context was released');
847
+ } else {
848
+ // Handle other errors
849
+ console.error('Completion failed:', error);
850
+ return { success: false, error: error.message };
851
+ }
852
+ }
196
853
  }
197
854
  ```
198
855
 
199
- ## Best Practices
856
+ ### Memory Management
200
857
 
201
- 1. **Model Management**
202
- - Store models in the app's document directory
203
- - Consider model size when targeting specific devices
204
- - Smaller models like SmolLM (135M) work well on most devices
858
+ ```typescript
859
+ // Monitor memory usage
860
+ const checkMemory = () => {
861
+ if (Platform.OS === 'android') {
862
+ // Android-specific memory monitoring
863
+ console.log('Memory warning - consider releasing unused models');
864
+ }
865
+ };
205
866
 
206
- 2. **Performance Optimization**
207
- - Adjust `n_threads` based on the device's capabilities
208
- - Use a smaller `n_ctx` for memory-constrained devices
209
- - Consider INT8 or INT4 quantized models for better performance
867
+ // Release models when app goes to background
868
+ import { AppState } from 'react-native';
210
869
 
211
- 3. **Battery Efficiency**
212
- - Release the model context when not in use
213
- - Process inference in smaller batches
214
- - Consider background processing for long generations
870
+ AppState.addEventListener('change', (nextAppState) => {
871
+ if (nextAppState === 'background') {
872
+ // Release non-essential models
873
+ modelManager.unloadAll();
874
+ }
875
+ });
876
+ ```
877
+
878
+ ## API Reference
879
+
880
+ ### High-Level APIs
881
+
882
+ - `CactusLM.init(params: ContextParams): Promise<CactusLM>` - Initialize language model
883
+ - `CactusVLM.init(params: VLMContextParams): Promise<CactusVLM>` - Initialize vision language model
884
+ - `CactusTTS.init(params: TTSContextParams): Promise<CactusTTS>` - Initialize text-to-speech model
885
+
886
+ ### CactusLM Methods
887
+
888
+ - `completion(messages: CactusOAICompatibleMessage[], params: CompletionParams, callback?: (token: TokenData) => void): Promise<NativeCompletionResult>`
889
+ - `embedding(text: string, params?: EmbeddingParams): Promise<NativeEmbeddingResult>`
890
+ - `rewind(): Promise<void>` - Clear conversation history
891
+ - `release(): Promise<void>` - Release resources
892
+
893
+ ### CactusVLM Methods
894
+
895
+ - `completion(messages: CactusOAICompatibleMessage[], params: VLMCompletionParams, callback?: (token: TokenData) => void): Promise<NativeCompletionResult>`
896
+ - `rewind(): Promise<void>` - Clear conversation history
897
+ - `release(): Promise<void>` - Release resources
898
+
899
+ ### CactusTTS Methods
900
+
901
+ - `generateSpeech(text: string, params: TTSSpeechParams): Promise<NativeAudioCompletionResult>`
902
+ - `getGuideTokens(text: string): Promise<NativeAudioTokensResult>`
903
+ - `decodeTokens(tokens: number[]): Promise<NativeAudioDecodeResult>`
904
+ - `release(): Promise<void>` - Release resources
905
+
906
+ ### Low-Level Functions (Advanced)
907
+
908
+ For advanced use cases, the original low-level API is still available:
909
+
910
+ - `initLlama(params: ContextParams): Promise<LlamaContext>` - Initialize a model context
911
+ - `releaseAllLlama(): Promise<void>` - Release all contexts
912
+ - `setContextLimit(limit: number): Promise<void>` - Set maximum contexts
913
+ - `toggleNativeLog(enabled: boolean): Promise<void>` - Enable/disable native logging
215
914
 
216
- 4. **Memory Management**
217
- - Always call `context.release()` when done with a model
218
- - Use `releaseAllLlama()` when switching between multiple models
915
+ ## Troubleshooting
219
916
 
220
- ## Example App
917
+ ### Common Issues
221
918
 
222
- For a complete working example, check out the [React Native example app](https://github.com/cactus-compute/cactus/tree/main/examples/react-example) in the repository.
919
+ **Model Loading Fails**
920
+ ```typescript
921
+ // Check file exists and is accessible
922
+ if (!(await RNFS.exists(modelPath))) {
923
+ throw new Error('Model file not found');
924
+ }
925
+
926
+ // Check file size
927
+ const stats = await RNFS.stat(modelPath);
928
+ console.log('Model size:', stats.size);
929
+ ```
930
+
931
+ **Out of Memory**
932
+ ```typescript
933
+ // Reduce context size
934
+ const lm = await CactusLM.init({
935
+ model: '/path/to/model.gguf',
936
+ n_ctx: 1024, // Reduce from 4096
937
+ n_batch: 128, // Reduce batch size
938
+ });
939
+ ```
940
+
941
+ **GPU Issues**
942
+ ```typescript
943
+ // Disable GPU if having issues
944
+ const lm = await CactusLM.init({
945
+ model: '/path/to/model.gguf',
946
+ n_gpu_layers: 0, // Use CPU only
947
+ });
948
+ ```
223
949
 
224
- This example demonstrates:
225
- - Loading and initializing models
226
- - Building a chat interface
227
- - Streaming responses
228
- - Proper resource management
950
+ ### Performance Tips
229
951
 
230
- ## License
952
+ 1. **Use appropriate context sizes** - Larger contexts use more memory
953
+ 2. **Optimize batch sizes** - Balance between speed and memory
954
+ 3. **Cache models** - Don't reload models unnecessarily
955
+ 4. **Use GPU acceleration** - When available and stable
956
+ 5. **Monitor memory usage** - Release models when not needed
231
957
 
232
- This project is licensed under the Apache 2.0 License.
958
+ This documentation covers the essential usage patterns for cactus-react-native. For more examples, check the [example apps](../examples/) in the repository.