react-native-litert-lm 0.1.1 โ†’ 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/README.md +149 -31
  2. package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +307 -61
  3. package/cpp/HybridLiteRTLM.cpp +85 -31
  4. package/cpp/HybridLiteRTLM.hpp +4 -0
  5. package/cpp/include/stb_image.h +7988 -0
  6. package/lib/hooks.d.ts +16 -0
  7. package/lib/hooks.js +114 -0
  8. package/lib/index.d.ts +27 -2
  9. package/lib/index.js +50 -6
  10. package/lib/modelFactory.d.ts +5 -0
  11. package/lib/modelFactory.js +42 -0
  12. package/lib/specs/LiteRTLM.nitro.d.ts +19 -0
  13. package/lib/templates.d.ts +51 -0
  14. package/lib/templates.js +81 -0
  15. package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +2 -0
  16. package/nitrogen/generated/android/c++/JFunc_void_double.hpp +75 -0
  17. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +33 -1
  18. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +2 -0
  19. package/nitrogen/generated/android/c++/JLLMConfig.hpp +6 -1
  20. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Func_void_double.kt +80 -0
  21. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +13 -0
  22. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/LLMConfig.kt +5 -2
  23. package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +2 -0
  24. package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +2 -0
  25. package/nitrogen/generated/shared/c++/LLMConfig.hpp +7 -2
  26. package/package.json +1 -1
  27. package/src/hooks.ts +152 -0
  28. package/src/index.ts +41 -3
  29. package/src/modelFactory.ts +49 -0
  30. package/src/specs/LiteRTLM.nitro.ts +26 -0
  31. package/src/templates.ts +105 -0
package/README.md CHANGED
@@ -10,12 +10,12 @@ High-performance LLM inference for React Native powered by [LiteRT-LM](https://g
10
10
  - ๐Ÿ“ฆ **Bundled Tokenizer** - No separate tokenization library needed
11
11
  - ๐Ÿ”„ **Streaming Support** - Token-by-token generation callbacks
12
12
  - ๐Ÿ“ฑ **Cross-Platform** - Android API 26+
13
- - ๐Ÿšง **Multimodal** - Image and audio input (Coming Soon)
13
+ - ๐Ÿ–ผ๏ธ **Multimodal** - Image and audio input support (Android Beta, iOS coming soon)
14
14
  - ๐Ÿงต **Async API** - Non-blocking inference to prevent UI freezes
15
15
 
16
16
  ## Status
17
17
 
18
- > โš ๏ธ **Early Preview**: This library is under active development. Android is functional with enough RAM, iOS implementation pending LiteRT-LM iOS release. Please report any issues on the [GitHub repository](https://github.com/litert-community/react-native-litert-lm).
18
+ > โš ๏ธ **Early Preview**: This library is under active development. Android is functional with enough RAM, iOS implementation pending LiteRT-LM iOS release. Please report any issues on the [GitHub issues](https://github.com/hung-yueh/react-native-litert-lm/issues).
19
19
 
20
20
  ## Installation
21
21
 
@@ -54,13 +54,40 @@ cd android && ./gradlew clean
54
54
  cd ios && pod install # iOS coming soon
55
55
  ```
56
56
 
57
+ ## Example App
58
+
59
+ The repository includes a fully functional example app in the `example/` directory.
60
+
61
+ To run it:
62
+
63
+ 1. **Navigate to the example directory:**
64
+
65
+ ```bash
66
+ cd example
67
+ ```
68
+
69
+ 2. **Install dependencies:**
70
+
71
+ ```bash
72
+ npm install
73
+ ```
74
+
75
+ 3. **Run on Android:**
76
+ ```bash
77
+ npx expo run:android
78
+ ```
79
+
57
80
  ## Model Management
58
81
 
59
82
  LiteRT-LM models (like Gemma 3n) are large files (3GB+) and cannot be bundled directly into your app's binary. You must download them at runtime to a writable directory (e.g., `DocumentDirectory`).
60
83
 
61
- ### Downloading Models
84
+ ### Automatic Downloading
85
+
86
+ The library supports automatic downloading when you pass a URL to `loadModel` or `useModel`.
87
+
88
+ ### Manual Downloading (Optional)
62
89
 
63
- We recommend using `rn-fetch-blob` or `expo-file-system` to download models.
90
+ If you prefer to manage downloads manually (e.g., using `rn-fetch-blob` or `expo-file-system`), you can download the file to a local path and pass that path to the library.
64
91
 
65
92
  ```typescript
66
93
  import { FileSystem } from "react-native-file-access";
@@ -80,18 +107,53 @@ async function downloadModel() {
80
107
 
81
108
  ## Usage
82
109
 
83
- ### Basic Generation
110
+ ### React Hook (Recommended)
111
+
112
+ The `useModel` hook manages the model lifecycle, including downloading, loading, and unloading.
113
+
114
+ ```typescript
115
+ import { useModel, GEMMA_3N_E2B_IT_INT4 } from "react-native-litert-lm";
116
+
117
+ function App() {
118
+ const {
119
+ model,
120
+ isReady,
121
+ downloadProgress,
122
+ load, // Manually trigger load
123
+ deleteModel // Delete model file
124
+ } = useModel(
125
+ GEMMA_3N_E2B_IT_INT4,
126
+ {
127
+ backend: "cpu",
128
+ autoLoad: true, // Default: true. Set false to load manually.
129
+ systemPrompt: "You are a helpful assistant."
130
+ }
131
+ );
132
+
133
+ if (!isReady) {
134
+ return <Text>Loading... {Math.round(downloadProgress * 100)}%</Text>;
135
+ }
136
+
137
+ const generate = async () => {
138
+ const response = await model.sendMessage("Hello!");
139
+ console.log(response);
140
+ };
141
+
142
+ return <Button title="Generate" onPress={generate} />;
143
+ }
144
+ ```
145
+
146
+ ### Manual Usage
84
147
 
85
148
  ```typescript
86
149
  import { createLLM } from "react-native-litert-lm";
87
150
 
88
151
  const llm = createLLM();
89
152
 
90
- // Load a Gemma 3n model (async)
91
- await llm.loadModel("/path/to/gemma-3n-e2b.litertlm", {
153
+ // Load a model from URL (auto-downloads) or local path
154
+ await llm.loadModel("https://example.com/model.litertlm", {
92
155
  backend: "gpu",
93
- temperature: 0.7,
94
- maxTokens: 512,
156
+ systemPrompt: "You are a helpful assistant.",
95
157
  });
96
158
 
97
159
  // Generate response (async)
@@ -114,19 +176,26 @@ llm.sendMessageAsync("Tell me a story", (token, done) => {
114
176
  ### Multimodal (Image/Audio)
115
177
 
116
178
  ```typescript
117
- // Image input (for vision models)
118
- // Note: Currently throws error on Android (Coming Soon)
119
- const response = await llm.sendMessageWithImage(
120
- "What's in this image?",
121
- "/path/to/image.jpg",
122
- );
123
-
124
- // Audio input (for audio models)
125
- // Note: Currently throws error on Android (Coming Soon)
126
- const transcription = await llm.sendMessageWithAudio(
127
- "Transcribe this audio",
128
- "/path/to/audio.wav",
129
- );
179
+ import { checkMultimodalSupport } from "react-native-litert-lm";
180
+
181
+ // Check platform support first
182
+ const error = checkMultimodalSupport();
183
+ if (error) {
184
+ console.warn(error); // iOS not yet supported
185
+ } else {
186
+ // Image input (for vision models like Gemma 3n)
187
+ // Images >1024px are automatically resized to prevent OOM
188
+ const response = await llm.sendMessageWithImage(
189
+ "What's in this image?",
190
+ "/path/to/image.jpg",
191
+ );
192
+
193
+ // Audio input (for audio models)
194
+ const transcription = await llm.sendMessageWithAudio(
195
+ "Transcribe this audio",
196
+ "/path/to/audio.wav",
197
+ );
198
+ }
130
199
  ```
131
200
 
132
201
  ### Check Performance
@@ -139,15 +208,18 @@ console.log(`Speed: ${stats.tokensPerSecond.toFixed(1)} tokens/sec`);
139
208
 
140
209
  ## Supported Models
141
210
 
142
- Download `.litertlm` models from [HuggingFace](https://huggingface.co/litert-community):
211
+ Download `.litertlm` models automatically using the exported constants or from [HuggingFace](https://huggingface.co/litert-community):
143
212
 
144
- | Model | Size | Min Device RAM | Use Case |
145
- | ------------- | ------ | -------------- | ------------------------- |
146
- | Gemma 3n E2B | ~3GB | 4GB+ | Efficient, fast responses |
147
- | Gemma 3n E4B | ~4GB | 8GB+ | Higher quality |
148
- | Gemma 3 1B | ~1GB | 4GB+ | Smallest, fastest |
149
- | Phi-4 Mini | ~2GB | 4GB+ | Microsoft's small LLM |
150
- | Qwen 2.5 1.5B | ~1.5GB | 4GB+ | Multilingual |
213
+ | Model Constant | Description | Size | Min Device RAM |
214
+ | :--------------------- | :------------------------------------- | :--- | :------------- |
215
+ | `GEMMA_3N_E2B_IT_INT4` | Gemma 3n E2B (Instruction Tuned, Int4) | ~3GB | 4GB+ |
216
+
217
+ | Other Models | Size | Min Device RAM | Use Case |
218
+ | ------------- | ------ | -------------- | --------------------- |
219
+ | Gemma 3n E4B | ~4GB | 8GB+ | Higher quality |
220
+ | Gemma 3 1B | ~1GB | 4GB+ | Smallest, fastest |
221
+ | Phi-4 Mini | ~2GB | 4GB+ | Microsoft's small LLM |
222
+ | Qwen 2.5 1.5B | ~1.5GB | 4GB+ | Multilingual |
151
223
 
152
224
  ## API Reference
153
225
 
@@ -157,7 +229,8 @@ Creates a new LLM inference engine instance.
157
229
 
158
230
  ### `loadModel(path, config?): Promise<void>`
159
231
 
160
- - `path: string` - Absolute path to `.litertlm` file
232
+ - `path: string` - Absolute path to `.litertlm` file OR a public URL (http/https). If a URL is provided, the model will be downloaded automatically.
233
+ - `config.systemPrompt` - System prompt to guide model behavior (e.g., "You are a helpful assistant.")
161
234
  - `config.backend` - `'cpu'` | `'gpu'` | `'npu'` (default: `'gpu'`)
162
235
  - `config.temperature` - Sampling temperature (default: 0.7)
163
236
  - `config.topK` - Top-K sampling (default: 40)
@@ -203,6 +276,10 @@ Clear context and start fresh.
203
276
 
204
277
  Release all native resources.
205
278
 
279
+ ### `deleteModel(fileName): Promise<void>`
280
+
281
+ Deletes a model file from the app's internal storage and cleans up the engine instance.
282
+
206
283
  ### `getRecommendedBackend(): Backend`
207
284
 
208
285
  Returns the recommended backend for the current platform (usually `'gpu'`).
@@ -220,6 +297,47 @@ if (warning) {
220
297
  }
221
298
  ```
222
299
 
300
+ ### `checkMultimodalSupport(): string | undefined`
301
+
302
+ Returns an error message if multimodal (image/audio) is not supported on the current platform, or `undefined` if OK.
303
+
304
+ ```typescript
305
+ import { checkMultimodalSupport } from "react-native-litert-lm";
306
+
307
+ const error = checkMultimodalSupport();
308
+ if (error) {
309
+ console.warn(error); // iOS multimodal not yet supported
310
+ }
311
+ ```
312
+
313
+ ### Prompt Templates
314
+
315
+ For advanced use cases where you need to manually format prompts:
316
+
317
+ ```typescript
318
+ import {
319
+ applyGemmaTemplate,
320
+ applyPhiTemplate,
321
+ applyLlamaTemplate,
322
+ ChatMessage,
323
+ } from "react-native-litert-lm";
324
+
325
+ const history: ChatMessage[] = [
326
+ { role: "user", content: "Hello!" },
327
+ { role: "model", content: "Hi there!" },
328
+ { role: "user", content: "Tell me a joke" },
329
+ ];
330
+
331
+ // For Gemma models
332
+ const gemmaPrompt = applyGemmaTemplate(history, "You are a comedian.");
333
+
334
+ // For Phi models
335
+ const phiPrompt = applyPhiTemplate(history);
336
+
337
+ // For Llama models
338
+ const llamaPrompt = applyLlamaTemplate(history, "You are helpful.");
339
+ ```
340
+
223
341
  ## Requirements
224
342
 
225
343
  - React Native 0.76+
@@ -20,8 +20,11 @@ import com.margelo.nitro.dev.litert.litertlm.LLMConfig
20
20
  import com.margelo.nitro.dev.litert.litertlm.Message
21
21
  import com.margelo.nitro.dev.litert.litertlm.Role
22
22
  import com.margelo.nitro.core.Promise
23
+ import com.google.ai.edge.litertlm.Content
24
+
23
25
 
24
26
  // Alias to avoid confusion with our generated Message type
27
+ // Alias to avoid confusion
25
28
  typealias LiteRTMessage = com.google.ai.edge.litertlm.Message
26
29
 
27
30
  /**
@@ -34,6 +37,20 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
34
37
 
35
38
  companion object {
36
39
  private const val TAG = "HybridLiteRTLM"
40
+ private val initLock = Any()
41
+
42
+ /**
43
+ * Initialize the native library.
44
+ * Must be called from Application.onCreate() to register the HybridObject.
45
+ */
46
+ fun initialize() {
47
+ try {
48
+ // Call generated internal OnLoad to load the library
49
+ LiteRTLMOnLoad.initializeNative()
50
+ } catch (e: Throwable) {
51
+ Log.e(TAG, "Failed to initialize LiteRTLM native library", e)
52
+ }
53
+ }
37
54
  }
38
55
 
39
56
  init {
@@ -43,6 +60,9 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
43
60
  // LiteRT-LM Engine and Conversation
44
61
  private var engine: Engine? = null
45
62
  private var conversation: Conversation? = null
63
+
64
+ @Volatile
65
+ private var isClosed = false
46
66
 
47
67
  // Conversation history for getHistory()
48
68
  private val history = mutableListOf<Message>()
@@ -72,64 +92,74 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
72
92
  // -------------------------------------------------------------------------
73
93
  override fun loadModel(modelPath: String, config: LLMConfig?): Promise<Unit> {
74
94
  return Promise.parallel {
75
- Log.i(TAG, "loadModel: $modelPath")
76
-
77
- // Clean up existing resources
78
- close()
79
-
80
- // Apply configuration
81
- config?.let { cfg ->
82
- cfg.backend?.let { backend = it }
83
- cfg.temperature?.let { temperature = it }
84
- cfg.topK?.let { topK = it.toInt() }
85
- cfg.topP?.let { topP = it }
86
- cfg.maxTokens?.let { maxTokens = it.toInt() }
87
- }
88
-
89
- try {
90
- // Map our Backend enum to LiteRT-LM Backend enum
91
- val lmBackend = when (backend) {
92
- Backend.GPU -> com.google.ai.edge.litertlm.Backend.GPU
93
- Backend.NPU -> {
94
- Log.i(TAG, "NPU backend requested - requires hardware support")
95
- com.google.ai.edge.litertlm.Backend.NPU
96
- }
97
- else -> com.google.ai.edge.litertlm.Backend.CPU
95
+ // Serialize initialization to prevent OOM from concurrent loads
96
+ synchronized(initLock) {
97
+ if (isClosed) {
98
+ throw RuntimeException("Cannot load model: LiteRTLM instance is closed")
98
99
  }
99
100
 
100
- // Vision backend: hardcoded to GPU (required by Gemma 3n)
101
- val lmVisionBackend = com.google.ai.edge.litertlm.Backend.GPU
101
+ Log.i(TAG, "loadModel: $modelPath")
102
+
103
+ // Clean up existing resources
104
+ // We call internal cleanup that doesn't set isClosed
105
+ cleanupInternal()
106
+
107
+ // Apply configuration
108
+ config?.let { cfg ->
109
+ cfg.backend?.let { backend = it }
110
+ cfg.temperature?.let { temperature = it }
111
+ cfg.topK?.let { topK = it.toInt() }
112
+ cfg.topP?.let { topP = it }
113
+ cfg.maxTokens?.let { maxTokens = it.toInt() }
114
+ }
115
+
116
+ try {
117
+ // Map our Backend enum to LiteRT-LM Backend enum
118
+ val lmBackend = when (backend) {
119
+ Backend.GPU -> com.google.ai.edge.litertlm.Backend.GPU
120
+ Backend.NPU -> {
121
+ Log.i(TAG, "NPU backend requested - requires hardware support")
122
+ com.google.ai.edge.litertlm.Backend.NPU
123
+ }
124
+ else -> com.google.ai.edge.litertlm.Backend.CPU
125
+ }
102
126
 
103
- // Audio backend: hardcoded to CPU (optimal for audio processing)
104
- val lmAudioBackend = com.google.ai.edge.litertlm.Backend.CPU
105
-
106
- Log.i(TAG, "Backend config: main=$lmBackend, vision=$lmVisionBackend (hardcoded), audio=$lmAudioBackend (hardcoded)")
107
-
108
- // Get cache directory from application context
109
- val cacheDirectory = LiteRTLMInitProvider.applicationContext?.cacheDir?.absolutePath
110
- Log.i(TAG, "Using cache directory: $cacheDirectory")
111
-
112
- // Create Engine configuration
113
- val engineConfig = EngineConfig(
114
- modelPath = modelPath,
115
- backend = lmBackend,
116
- visionBackend = lmVisionBackend,
117
- audioBackend = lmAudioBackend,
118
- maxNumTokens = maxTokens,
119
- cacheDir = cacheDirectory
120
- )
121
-
122
- // Initialize Engine
123
- engine = Engine(engineConfig).also { it.initialize() }
124
- Log.i(TAG, "Engine created and initialized successfully")
125
-
126
- // Create Conversation
127
- createNewConversation()
128
- Log.i(TAG, "Conversation created successfully")
129
-
130
- } catch (e: Exception) {
131
- Log.e(TAG, "Failed to load model: ${e.message}", e)
132
- throw RuntimeException("Failed to load model: ${e.message}", e)
127
+ // Vision backend: hardcoded to GPU (required by Gemma 3n)
128
+ val lmVisionBackend = com.google.ai.edge.litertlm.Backend.GPU
129
+
130
+ // Audio backend: hardcoded to CPU (optimal for audio processing)
131
+ val lmAudioBackend = com.google.ai.edge.litertlm.Backend.CPU
132
+
133
+ Log.i(TAG, "Backend config: main=$lmBackend, vision=$lmVisionBackend (hardcoded), audio=$lmAudioBackend (hardcoded)")
134
+
135
+ // Get cache directory from application context
136
+ val cacheDirectory = LiteRTLMInitProvider.applicationContext?.cacheDir?.absolutePath
137
+ Log.i(TAG, "Using cache directory: $cacheDirectory")
138
+
139
+ // Create Engine configuration
140
+ val engineConfig = EngineConfig(
141
+ modelPath = modelPath,
142
+ backend = lmBackend,
143
+ visionBackend = lmVisionBackend,
144
+ audioBackend = lmAudioBackend,
145
+ maxNumTokens = maxTokens,
146
+ cacheDir = cacheDirectory
147
+ )
148
+
149
+ if (isClosed) return@synchronized
150
+
151
+ // Initialize Engine
152
+ engine = Engine(engineConfig).also { it.initialize() }
153
+ Log.i(TAG, "Engine created and initialized successfully")
154
+
155
+ // Create Conversation
156
+ createNewConversation()
157
+ Log.i(TAG, "Conversation created successfully")
158
+
159
+ } catch (e: Exception) {
160
+ Log.e(TAG, "Failed to load model: ${e.message}", e)
161
+ throw RuntimeException("Failed to load model: ${e.message}", e)
162
+ }
133
163
  }
134
164
  }
135
165
  }
@@ -230,17 +260,215 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
230
260
  // -------------------------------------------------------------------------
231
261
  // Multimodal methods
232
262
  // -------------------------------------------------------------------------
263
+
264
+ /**
265
+ * Resize image if dimensions exceed maxDimension to prevent OOM.
266
+ * Gemma 3n's vision encoder is optimized for 512x512 or 1024x1024.
267
+ * Passing larger images can spike memory 500MB+.
268
+ */
269
+ private fun resizeImageIfNeeded(imagePath: String, maxDimension: Int = 1024): String {
270
+ val originalBitmap = android.graphics.BitmapFactory.decodeFile(imagePath)
271
+ ?: throw RuntimeException("Failed to decode image: $imagePath")
272
+
273
+ val width = originalBitmap.width
274
+ val height = originalBitmap.height
275
+
276
+ // If already within bounds, return original path
277
+ if (width <= maxDimension && height <= maxDimension) {
278
+ originalBitmap.recycle()
279
+ return imagePath
280
+ }
281
+
282
+ Log.i(TAG, "Resizing image from ${width}x${height} to fit ${maxDimension}px")
283
+
284
+ val scale = maxDimension.toFloat() / maxOf(width, height)
285
+ val newWidth = (width * scale).toInt()
286
+ val newHeight = (height * scale).toInt()
287
+
288
+ val resizedBitmap = android.graphics.Bitmap.createScaledBitmap(originalBitmap, newWidth, newHeight, true)
289
+ originalBitmap.recycle()
290
+
291
+ // Save to temp file
292
+ val cacheDir = LiteRTLMInitProvider.applicationContext?.cacheDir
293
+ ?: throw RuntimeException("Application context not available for image resizing")
294
+ val tempFile = java.io.File(cacheDir, "resized_${System.currentTimeMillis()}.jpg")
295
+ java.io.FileOutputStream(tempFile).use { out ->
296
+ resizedBitmap.compress(android.graphics.Bitmap.CompressFormat.JPEG, 90, out)
297
+ }
298
+ resizedBitmap.recycle()
299
+
300
+ Log.i(TAG, "Resized image saved to: ${tempFile.absolutePath} (${newWidth}x${newHeight})")
301
+ return tempFile.absolutePath
302
+ }
303
+
233
304
  override fun sendMessageWithImage(message: String, imagePath: String): Promise<String> {
234
305
  return Promise.parallel {
235
- // TODO: Implement image loading from path
236
- throw RuntimeException("Multimodal (Image) not yet implemented in this wrapper")
306
+ ensureLoaded()
307
+ Log.i(TAG, "sendMessageWithImage: $message, path=$imagePath")
308
+
309
+ // Resize image to prevent OOM on high-resolution photos
310
+ val processedImagePath = resizeImageIfNeeded(imagePath)
311
+
312
+ // Create multimodal message
313
+ // Use factory method Message.of passing a list of Content
314
+ val textContent = Content.Text(message)
315
+
316
+ val contentList = listOf(
317
+ textContent,
318
+ Content.ImageFile(processedImagePath)
319
+ )
320
+
321
+ val userMsg = LiteRTMessage.of(contentList)
322
+
323
+ // Add to history
324
+ history.add(Message(Role.USER, "$message [Image]"))
325
+
326
+ val responseMsg = conversation!!.sendMessage(userMsg)
327
+
328
+ val response = responseMsg.contents
329
+ .filterIsInstance<Content.Text>()
330
+ .joinToString("") { it.text }
331
+
332
+ history.add(Message(Role.MODEL, response))
333
+
334
+ response
335
+ }
336
+ }
337
+
338
+ override fun downloadModel(url: String, fileName: String, onProgress: ((Double) -> Unit)?): Promise<String> {
339
+ return Promise.parallel {
340
+ Log.i(TAG, "downloadModel: $url -> $fileName")
341
+
342
+ val context = LiteRTLMInitProvider.applicationContext ?: throw RuntimeException("Context not available")
343
+ val modelsDir = java.io.File(context.filesDir, "models")
344
+ if (!modelsDir.exists()) {
345
+ modelsDir.mkdirs()
346
+ }
347
+
348
+ val modelFile = java.io.File(modelsDir, fileName)
349
+ val tempFile = java.io.File(modelsDir, "$fileName.tmp")
350
+
351
+ // Check if file exists and has content
352
+ if (modelFile.exists() && modelFile.length() > 0) {
353
+ Log.i(TAG, "Model already exists at: ${modelFile.absolutePath}")
354
+ onProgress?.invoke(1.0)
355
+ return@parallel modelFile.absolutePath
356
+ }
357
+
358
+ Log.i(TAG, "Downloading model to temp file: ${tempFile.absolutePath}")
359
+ onProgress?.invoke(0.0)
360
+
361
+ try {
362
+ val connection = java.net.URL(url).openConnection() as java.net.HttpURLConnection
363
+ connection.connectTimeout = 15000 // 15s
364
+ connection.readTimeout = 0 // Infinite for large files
365
+ connection.doInput = true
366
+ connection.connect()
367
+
368
+ if (connection.responseCode != java.net.HttpURLConnection.HTTP_OK) {
369
+ throw RuntimeException("Failed to download model: HTTP ${connection.responseCode}")
370
+ }
371
+
372
+ val contentLength = connection.contentLengthLong // Use long for large files
373
+ val input = connection.inputStream
374
+ val output = java.io.FileOutputStream(tempFile)
375
+
376
+ val buffer = ByteArray(8 * 1024)
377
+ var bytesRead: Int
378
+ var totalBytesRead = 0L
379
+ var lastProgressUpdate = 0L
380
+
381
+ while (input.read(buffer).also { bytesRead = it } != -1) {
382
+ output.write(buffer, 0, bytesRead)
383
+ totalBytesRead += bytesRead
384
+
385
+ if (contentLength > 0 && onProgress != null) {
386
+ val currentTime = System.currentTimeMillis()
387
+ // Update roughly every 100ms to avoid flooding JS bridge
388
+ if (currentTime - lastProgressUpdate > 100) {
389
+ val progress = totalBytesRead.toDouble() / contentLength.toDouble()
390
+ onProgress(progress)
391
+ lastProgressUpdate = currentTime
392
+ }
393
+ }
394
+ }
395
+
396
+ output.flush()
397
+ output.close()
398
+ input.close()
399
+ connection.disconnect()
400
+
401
+ // Atomic rename
402
+ if (tempFile.renameTo(modelFile)) {
403
+ Log.i(TAG, "Download complete and renamed to: ${modelFile.absolutePath}")
404
+ onProgress?.invoke(1.0)
405
+ return@parallel modelFile.absolutePath
406
+ } else {
407
+ throw RuntimeException("Failed to rename temp file to model file")
408
+ }
409
+
410
+ } catch (e: Exception) {
411
+ Log.e(TAG, "Download failed", e)
412
+ if (tempFile.exists()) {
413
+ tempFile.delete()
414
+ }
415
+ throw RuntimeException("Download failed: ${e.message}", e)
416
+ }
417
+ }
418
+ }
419
+
420
+ override fun deleteModel(fileName: String): Promise<Unit> {
421
+ return Promise.parallel {
422
+ Log.i(TAG, "deleteModel: $fileName")
423
+ val context = LiteRTLMInitProvider.applicationContext ?: throw RuntimeException("Context not available")
424
+ val modelsDir = java.io.File(context.filesDir, "models")
425
+ val modelFile = java.io.File(modelsDir, fileName)
426
+
427
+ if (modelFile.exists()) {
428
+ val deleted = modelFile.delete()
429
+ if (deleted) {
430
+ Log.i(TAG, "Deleted model: ${modelFile.absolutePath}")
431
+ // Ensure engine references are cleared if they point to this file
432
+ // We use cleanupInternal() which releases resources WITHOUT marking the instance as closed.
433
+ if (engine != null) {
434
+ Log.i(TAG, "Cleaning up engine after deleting model file.")
435
+ cleanupInternal()
436
+ }
437
+ } else {
438
+ Log.e(TAG, "Failed to delete model: ${modelFile.absolutePath}")
439
+ throw RuntimeException("Failed to delete model: ${modelFile.absolutePath}")
440
+ }
441
+ } else {
442
+ Log.w(TAG, "Model not found for deletion: ${modelFile.absolutePath}")
443
+ }
237
444
  }
238
445
  }
239
446
 
240
447
  override fun sendMessageWithAudio(message: String, audioPath: String): Promise<String> {
241
448
  return Promise.parallel {
242
- // TODO: Implement audio loading from path
243
- throw RuntimeException("Multimodal (Audio) not yet implemented in this wrapper")
449
+ ensureLoaded()
450
+ Log.i(TAG, "sendMessageWithAudio: $message, path=$audioPath")
451
+
452
+ // Load audio
453
+
454
+ val contentList = listOf(
455
+ Content.Text(message),
456
+ Content.AudioFile(audioPath)
457
+ )
458
+
459
+ val userMsg = LiteRTMessage.of(contentList)
460
+
461
+ history.add(Message(Role.USER, "$message [Audio]"))
462
+
463
+ val responseMsg = conversation!!.sendMessage(userMsg)
464
+
465
+ val response = responseMsg.contents
466
+ .filterIsInstance<Content.Text>()
467
+ .joinToString("") { it.text }
468
+
469
+ history.add(Message(Role.MODEL, response))
470
+
471
+ response
244
472
  }
245
473
  }
246
474
 
@@ -270,10 +498,26 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
270
498
 
271
499
  override fun close() {
272
500
  Log.d(TAG, "Closing resources")
501
+ isClosed = true
502
+ cleanupInternal()
503
+ }
504
+
505
+ private fun cleanupInternal() {
273
506
  try {
274
507
  conversation = null
275
- engine = null // Engine destructor should handle cleanup
276
- // In C++ we'd close explicitly, Kotlin GC helps but explicit close method is better if SDK has it
508
+ // Explicitly close engine if it supports it to free native memory immediately
509
+ // Assuming Engine implements AutoCloseable or has close()
510
+ if (engine is AutoCloseable) {
511
+ (engine as AutoCloseable).close()
512
+ } else {
513
+ // Try reflection or just null it if no close method
514
+ try {
515
+ engine?.javaClass?.getMethod("close")?.invoke(engine)
516
+ } catch (e: Exception) {
517
+ // Method not found, rely on GC
518
+ }
519
+ }
520
+ engine = null
277
521
  } catch (e: Exception) {
278
522
  Log.e(TAG, "Error closing resources", e)
279
523
  }
@@ -290,4 +534,6 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
290
534
  // Dispose old conversation if needed
291
535
  conversation = engine!!.createConversation()
292
536
  }
537
+
538
+
293
539
  }