react-native-litert-lm 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/README.md +147 -28
  2. package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +254 -58
  3. package/cpp/HybridLiteRTLM.cpp +30 -9
  4. package/cpp/HybridLiteRTLM.hpp +4 -0
  5. package/lib/hooks.d.ts +16 -0
  6. package/lib/hooks.js +114 -0
  7. package/lib/index.d.ts +27 -2
  8. package/lib/index.js +50 -6
  9. package/lib/modelFactory.d.ts +5 -0
  10. package/lib/modelFactory.js +42 -0
  11. package/lib/specs/LiteRTLM.nitro.d.ts +19 -0
  12. package/lib/templates.d.ts +51 -0
  13. package/lib/templates.js +81 -0
  14. package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +2 -0
  15. package/nitrogen/generated/android/c++/JFunc_void_double.hpp +75 -0
  16. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +33 -1
  17. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +2 -0
  18. package/nitrogen/generated/android/c++/JLLMConfig.hpp +6 -1
  19. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Func_void_double.kt +80 -0
  20. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +13 -0
  21. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/LLMConfig.kt +5 -2
  22. package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +2 -0
  23. package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +2 -0
  24. package/nitrogen/generated/shared/c++/LLMConfig.hpp +7 -2
  25. package/package.json +1 -1
  26. package/src/hooks.ts +152 -0
  27. package/src/index.ts +41 -3
  28. package/src/modelFactory.ts +49 -0
  29. package/src/specs/LiteRTLM.nitro.ts +26 -0
  30. package/src/templates.ts +105 -0
package/README.md CHANGED
@@ -54,13 +54,40 @@ cd android && ./gradlew clean
54
54
  cd ios && pod install # iOS coming soon
55
55
  ```
56
56
 
57
+ ## Example App
58
+
59
+ The repository includes a fully functional example app in the `example/` directory.
60
+
61
+ To run it:
62
+
63
+ 1. **Navigate to the example directory:**
64
+
65
+ ```bash
66
+ cd example
67
+ ```
68
+
69
+ 2. **Install dependencies:**
70
+
71
+ ```bash
72
+ npm install
73
+ ```
74
+
75
+ 3. **Run on Android:**
76
+ ```bash
77
+ npx expo run:android
78
+ ```
79
+
57
80
  ## Model Management
58
81
 
59
82
  LiteRT-LM models (like Gemma 3n) are large files (3GB+) and cannot be bundled directly into your app's binary. You must download them at runtime to a writable directory (e.g., `DocumentDirectory`).
60
83
 
61
- ### Downloading Models
84
+ ### Automatic Downloading
85
+
86
+ The library supports automatic downloading when you pass a URL to `loadModel` or `useModel`.
87
+
88
+ ### Manual Downloading (Optional)
62
89
 
63
- We recommend using `rn-fetch-blob` or `expo-file-system` to download models.
90
+ If you prefer to manage downloads manually (e.g., using `rn-fetch-blob` or `expo-file-system`), you can download the file to a local path and pass that path to the library.
64
91
 
65
92
  ```typescript
66
93
  import { FileSystem } from "react-native-file-access";
@@ -80,18 +107,53 @@ async function downloadModel() {
80
107
 
81
108
  ## Usage
82
109
 
83
- ### Basic Generation
110
+ ### React Hook (Recommended)
111
+
112
+ The `useModel` hook manages the model lifecycle, including downloading, loading, and unloading.
113
+
114
+ ```typescript
115
+ import { useModel, GEMMA_3N_E2B_IT_INT4 } from "react-native-litert-lm";
116
+
117
+ function App() {
118
+ const {
119
+ model,
120
+ isReady,
121
+ downloadProgress,
122
+ load, // Manually trigger load
123
+ deleteModel // Delete model file
124
+ } = useModel(
125
+ GEMMA_3N_E2B_IT_INT4,
126
+ {
127
+ backend: "cpu",
128
+ autoLoad: true, // Default: true. Set false to load manually.
129
+ systemPrompt: "You are a helpful assistant."
130
+ }
131
+ );
132
+
133
+ if (!isReady) {
134
+ return <Text>Loading... {Math.round(downloadProgress * 100)}%</Text>;
135
+ }
136
+
137
+ const generate = async () => {
138
+ const response = await model.sendMessage("Hello!");
139
+ console.log(response);
140
+ };
141
+
142
+ return <Button title="Generate" onPress={generate} />;
143
+ }
144
+ ```
145
+
146
+ ### Manual Usage
84
147
 
85
148
  ```typescript
86
149
  import { createLLM } from "react-native-litert-lm";
87
150
 
88
151
  const llm = createLLM();
89
152
 
90
- // Load a Gemma 3n model (async)
91
- await llm.loadModel("/path/to/gemma-3n-e2b.litertlm", {
153
+ // Load a model from URL (auto-downloads) or local path
154
+ await llm.loadModel("https://example.com/model.litertlm", {
92
155
  backend: "gpu",
93
- temperature: 0.7,
94
- maxTokens: 512,
156
+ systemPrompt: "You are a helpful assistant.",
95
157
  });
96
158
 
97
159
  // Generate response (async)
@@ -114,18 +176,26 @@ llm.sendMessageAsync("Tell me a story", (token, done) => {
114
176
  ### Multimodal (Image/Audio)
115
177
 
116
178
  ```typescript
117
- // Image input (for vision models like Gemma 3n)
118
- // ⚠️ Ensure model is loaded with { maxTokens: 1024+ }
119
- const response = await llm.sendMessageWithImage(
120
- "What's in this image?",
121
- "/path/to/image.jpg",
122
- );
123
-
124
- // Audio input (for audio models)
125
- const transcription = await llm.sendMessageWithAudio(
126
- "Transcribe this audio",
127
- "/path/to/audio.wav",
128
- );
179
+ import { checkMultimodalSupport } from "react-native-litert-lm";
180
+
181
+ // Check platform support first
182
+ const error = checkMultimodalSupport();
183
+ if (error) {
184
+ console.warn(error); // iOS not yet supported
185
+ } else {
186
+ // Image input (for vision models like Gemma 3n)
187
+ // Images >1024px are automatically resized to prevent OOM
188
+ const response = await llm.sendMessageWithImage(
189
+ "What's in this image?",
190
+ "/path/to/image.jpg",
191
+ );
192
+
193
+ // Audio input (for audio models)
194
+ const transcription = await llm.sendMessageWithAudio(
195
+ "Transcribe this audio",
196
+ "/path/to/audio.wav",
197
+ );
198
+ }
129
199
  ```
130
200
 
131
201
  ### Check Performance
@@ -138,15 +208,18 @@ console.log(`Speed: ${stats.tokensPerSecond.toFixed(1)} tokens/sec`);
138
208
 
139
209
  ## Supported Models
140
210
 
141
- Download `.litertlm` models from [HuggingFace](https://huggingface.co/litert-community):
211
+ Download `.litertlm` models automatically using the exported constants or from [HuggingFace](https://huggingface.co/litert-community):
142
212
 
143
- | Model | Size | Min Device RAM | Use Case |
144
- | ------------- | ------ | -------------- | ------------------------- |
145
- | Gemma 3n E2B | ~3GB | 4GB+ | Efficient, fast responses |
146
- | Gemma 3n E4B | ~4GB | 8GB+ | Higher quality |
147
- | Gemma 3 1B | ~1GB | 4GB+ | Smallest, fastest |
148
- | Phi-4 Mini | ~2GB | 4GB+ | Microsoft's small LLM |
149
- | Qwen 2.5 1.5B | ~1.5GB | 4GB+ | Multilingual |
213
+ | Model Constant | Description | Size | Min Device RAM |
214
+ | :--------------------- | :------------------------------------- | :--- | :------------- |
215
+ | `GEMMA_3N_E2B_IT_INT4` | Gemma 3n E2B (Instruction Tuned, Int4) | ~3GB | 4GB+ |
216
+
217
+ | Other Models | Size | Min Device RAM | Use Case |
218
+ | ------------- | ------ | -------------- | --------------------- |
219
+ | Gemma 3n E4B | ~4GB | 8GB+ | Higher quality |
220
+ | Gemma 3 1B | ~1GB | 4GB+ | Smallest, fastest |
221
+ | Phi-4 Mini | ~2GB | 4GB+ | Microsoft's small LLM |
222
+ | Qwen 2.5 1.5B | ~1.5GB | 4GB+ | Multilingual |
150
223
 
151
224
  ## API Reference
152
225
 
@@ -156,7 +229,8 @@ Creates a new LLM inference engine instance.
156
229
 
157
230
  ### `loadModel(path, config?): Promise<void>`
158
231
 
159
- - `path: string` - Absolute path to `.litertlm` file
232
+ - `path: string` - Absolute path to `.litertlm` file OR a public URL (http/https). If a URL is provided, the model will be downloaded automatically.
233
+ - `config.systemPrompt` - System prompt to guide model behavior (e.g., "You are a helpful assistant.")
160
234
  - `config.backend` - `'cpu'` | `'gpu'` | `'npu'` (default: `'gpu'`)
161
235
  - `config.temperature` - Sampling temperature (default: 0.7)
162
236
  - `config.topK` - Top-K sampling (default: 40)
@@ -202,6 +276,10 @@ Clear context and start fresh.
202
276
 
203
277
  Release all native resources.
204
278
 
279
+ ### `deleteModel(fileName): Promise<void>`
280
+
281
+ Deletes a model file from the app's internal storage and cleans up the engine instance.
282
+
205
283
  ### `getRecommendedBackend(): Backend`
206
284
 
207
285
  Returns the recommended backend for the current platform (usually `'gpu'`).
@@ -219,6 +297,47 @@ if (warning) {
219
297
  }
220
298
  ```
221
299
 
300
+ ### `checkMultimodalSupport(): string | undefined`
301
+
302
+ Returns an error message if multimodal (image/audio) is not supported on the current platform, or `undefined` if OK.
303
+
304
+ ```typescript
305
+ import { checkMultimodalSupport } from "react-native-litert-lm";
306
+
307
+ const error = checkMultimodalSupport();
308
+ if (error) {
309
+ console.warn(error); // iOS multimodal not yet supported
310
+ }
311
+ ```
312
+
313
+ ### Prompt Templates
314
+
315
+ For advanced use cases where you need to manually format prompts:
316
+
317
+ ```typescript
318
+ import {
319
+ applyGemmaTemplate,
320
+ applyPhiTemplate,
321
+ applyLlamaTemplate,
322
+ ChatMessage,
323
+ } from "react-native-litert-lm";
324
+
325
+ const history: ChatMessage[] = [
326
+ { role: "user", content: "Hello!" },
327
+ { role: "model", content: "Hi there!" },
328
+ { role: "user", content: "Tell me a joke" },
329
+ ];
330
+
331
+ // For Gemma models
332
+ const gemmaPrompt = applyGemmaTemplate(history, "You are a comedian.");
333
+
334
+ // For Phi models
335
+ const phiPrompt = applyPhiTemplate(history);
336
+
337
+ // For Llama models
338
+ const llamaPrompt = applyLlamaTemplate(history, "You are helpful.");
339
+ ```
340
+
222
341
  ## Requirements
223
342
 
224
343
  - React Native 0.76+
@@ -37,6 +37,20 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
37
37
 
38
38
  companion object {
39
39
  private const val TAG = "HybridLiteRTLM"
40
+ private val initLock = Any()
41
+
42
+ /**
43
+ * Initialize the native library.
44
+ * Must be called from Application.onCreate() to register the HybridObject.
45
+ */
46
+ fun initialize() {
47
+ try {
48
+ // Call generated internal OnLoad to load the library
49
+ LiteRTLMOnLoad.initializeNative()
50
+ } catch (e: Throwable) {
51
+ Log.e(TAG, "Failed to initialize LiteRTLM native library", e)
52
+ }
53
+ }
40
54
  }
41
55
 
42
56
  init {
@@ -46,6 +60,9 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
46
60
  // LiteRT-LM Engine and Conversation
47
61
  private var engine: Engine? = null
48
62
  private var conversation: Conversation? = null
63
+
64
+ @Volatile
65
+ private var isClosed = false
49
66
 
50
67
  // Conversation history for getHistory()
51
68
  private val history = mutableListOf<Message>()
@@ -75,64 +92,74 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
75
92
  // -------------------------------------------------------------------------
76
93
  override fun loadModel(modelPath: String, config: LLMConfig?): Promise<Unit> {
77
94
  return Promise.parallel {
78
- Log.i(TAG, "loadModel: $modelPath")
79
-
80
- // Clean up existing resources
81
- close()
82
-
83
- // Apply configuration
84
- config?.let { cfg ->
85
- cfg.backend?.let { backend = it }
86
- cfg.temperature?.let { temperature = it }
87
- cfg.topK?.let { topK = it.toInt() }
88
- cfg.topP?.let { topP = it }
89
- cfg.maxTokens?.let { maxTokens = it.toInt() }
90
- }
91
-
92
- try {
93
- // Map our Backend enum to LiteRT-LM Backend enum
94
- val lmBackend = when (backend) {
95
- Backend.GPU -> com.google.ai.edge.litertlm.Backend.GPU
96
- Backend.NPU -> {
97
- Log.i(TAG, "NPU backend requested - requires hardware support")
98
- com.google.ai.edge.litertlm.Backend.NPU
99
- }
100
- else -> com.google.ai.edge.litertlm.Backend.CPU
95
+ // Serialize initialization to prevent OOM from concurrent loads
96
+ synchronized(initLock) {
97
+ if (isClosed) {
98
+ throw RuntimeException("Cannot load model: LiteRTLM instance is closed")
101
99
  }
102
100
 
103
- // Vision backend: hardcoded to GPU (required by Gemma 3n)
104
- val lmVisionBackend = com.google.ai.edge.litertlm.Backend.GPU
101
+ Log.i(TAG, "loadModel: $modelPath")
102
+
103
+ // Clean up existing resources
104
+ // We call internal cleanup that doesn't set isClosed
105
+ cleanupInternal()
106
+
107
+ // Apply configuration
108
+ config?.let { cfg ->
109
+ cfg.backend?.let { backend = it }
110
+ cfg.temperature?.let { temperature = it }
111
+ cfg.topK?.let { topK = it.toInt() }
112
+ cfg.topP?.let { topP = it }
113
+ cfg.maxTokens?.let { maxTokens = it.toInt() }
114
+ }
115
+
116
+ try {
117
+ // Map our Backend enum to LiteRT-LM Backend enum
118
+ val lmBackend = when (backend) {
119
+ Backend.GPU -> com.google.ai.edge.litertlm.Backend.GPU
120
+ Backend.NPU -> {
121
+ Log.i(TAG, "NPU backend requested - requires hardware support")
122
+ com.google.ai.edge.litertlm.Backend.NPU
123
+ }
124
+ else -> com.google.ai.edge.litertlm.Backend.CPU
125
+ }
105
126
 
106
- // Audio backend: hardcoded to CPU (optimal for audio processing)
107
- val lmAudioBackend = com.google.ai.edge.litertlm.Backend.CPU
108
-
109
- Log.i(TAG, "Backend config: main=$lmBackend, vision=$lmVisionBackend (hardcoded), audio=$lmAudioBackend (hardcoded)")
110
-
111
- // Get cache directory from application context
112
- val cacheDirectory = LiteRTLMInitProvider.applicationContext?.cacheDir?.absolutePath
113
- Log.i(TAG, "Using cache directory: $cacheDirectory")
114
-
115
- // Create Engine configuration
116
- val engineConfig = EngineConfig(
117
- modelPath = modelPath,
118
- backend = lmBackend,
119
- visionBackend = lmVisionBackend,
120
- audioBackend = lmAudioBackend,
121
- maxNumTokens = maxTokens,
122
- cacheDir = cacheDirectory
123
- )
124
-
125
- // Initialize Engine
126
- engine = Engine(engineConfig).also { it.initialize() }
127
- Log.i(TAG, "Engine created and initialized successfully")
128
-
129
- // Create Conversation
130
- createNewConversation()
131
- Log.i(TAG, "Conversation created successfully")
132
-
133
- } catch (e: Exception) {
134
- Log.e(TAG, "Failed to load model: ${e.message}", e)
135
- throw RuntimeException("Failed to load model: ${e.message}", e)
127
+ // Vision backend: hardcoded to GPU (required by Gemma 3n)
128
+ val lmVisionBackend = com.google.ai.edge.litertlm.Backend.GPU
129
+
130
+ // Audio backend: hardcoded to CPU (optimal for audio processing)
131
+ val lmAudioBackend = com.google.ai.edge.litertlm.Backend.CPU
132
+
133
+ Log.i(TAG, "Backend config: main=$lmBackend, vision=$lmVisionBackend (hardcoded), audio=$lmAudioBackend (hardcoded)")
134
+
135
+ // Get cache directory from application context
136
+ val cacheDirectory = LiteRTLMInitProvider.applicationContext?.cacheDir?.absolutePath
137
+ Log.i(TAG, "Using cache directory: $cacheDirectory")
138
+
139
+ // Create Engine configuration
140
+ val engineConfig = EngineConfig(
141
+ modelPath = modelPath,
142
+ backend = lmBackend,
143
+ visionBackend = lmVisionBackend,
144
+ audioBackend = lmAudioBackend,
145
+ maxNumTokens = maxTokens,
146
+ cacheDir = cacheDirectory
147
+ )
148
+
149
+ if (isClosed) return@synchronized
150
+
151
+ // Initialize Engine
152
+ engine = Engine(engineConfig).also { it.initialize() }
153
+ Log.i(TAG, "Engine created and initialized successfully")
154
+
155
+ // Create Conversation
156
+ createNewConversation()
157
+ Log.i(TAG, "Conversation created successfully")
158
+
159
+ } catch (e: Exception) {
160
+ Log.e(TAG, "Failed to load model: ${e.message}", e)
161
+ throw RuntimeException("Failed to load model: ${e.message}", e)
162
+ }
136
163
  }
137
164
  }
138
165
  }
@@ -233,18 +260,62 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
233
260
  // -------------------------------------------------------------------------
234
261
  // Multimodal methods
235
262
  // -------------------------------------------------------------------------
263
+
264
+ /**
265
+ * Resize image if dimensions exceed maxDimension to prevent OOM.
266
+ * Gemma 3n's vision encoder is optimized for 512x512 or 1024x1024.
267
+ * Passing larger images can spike memory 500MB+.
268
+ */
269
+ private fun resizeImageIfNeeded(imagePath: String, maxDimension: Int = 1024): String {
270
+ val originalBitmap = android.graphics.BitmapFactory.decodeFile(imagePath)
271
+ ?: throw RuntimeException("Failed to decode image: $imagePath")
272
+
273
+ val width = originalBitmap.width
274
+ val height = originalBitmap.height
275
+
276
+ // If already within bounds, return original path
277
+ if (width <= maxDimension && height <= maxDimension) {
278
+ originalBitmap.recycle()
279
+ return imagePath
280
+ }
281
+
282
+ Log.i(TAG, "Resizing image from ${width}x${height} to fit ${maxDimension}px")
283
+
284
+ val scale = maxDimension.toFloat() / maxOf(width, height)
285
+ val newWidth = (width * scale).toInt()
286
+ val newHeight = (height * scale).toInt()
287
+
288
+ val resizedBitmap = android.graphics.Bitmap.createScaledBitmap(originalBitmap, newWidth, newHeight, true)
289
+ originalBitmap.recycle()
290
+
291
+ // Save to temp file
292
+ val cacheDir = LiteRTLMInitProvider.applicationContext?.cacheDir
293
+ ?: throw RuntimeException("Application context not available for image resizing")
294
+ val tempFile = java.io.File(cacheDir, "resized_${System.currentTimeMillis()}.jpg")
295
+ java.io.FileOutputStream(tempFile).use { out ->
296
+ resizedBitmap.compress(android.graphics.Bitmap.CompressFormat.JPEG, 90, out)
297
+ }
298
+ resizedBitmap.recycle()
299
+
300
+ Log.i(TAG, "Resized image saved to: ${tempFile.absolutePath} (${newWidth}x${newHeight})")
301
+ return tempFile.absolutePath
302
+ }
303
+
236
304
  override fun sendMessageWithImage(message: String, imagePath: String): Promise<String> {
237
305
  return Promise.parallel {
238
306
  ensureLoaded()
239
307
  Log.i(TAG, "sendMessageWithImage: $message, path=$imagePath")
240
308
 
309
+ // Resize image to prevent OOM on high-resolution photos
310
+ val processedImagePath = resizeImageIfNeeded(imagePath)
311
+
241
312
  // Create multimodal message
242
313
  // Use factory method Message.of passing a list of Content
243
314
  val textContent = Content.Text(message)
244
315
 
245
316
  val contentList = listOf(
246
317
  textContent,
247
- Content.ImageFile(imagePath)
318
+ Content.ImageFile(processedImagePath)
248
319
  )
249
320
 
250
321
  val userMsg = LiteRTMessage.of(contentList)
@@ -264,6 +335,115 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
264
335
  }
265
336
  }
266
337
 
338
+ override fun downloadModel(url: String, fileName: String, onProgress: ((Double) -> Unit)?): Promise<String> {
339
+ return Promise.parallel {
340
+ Log.i(TAG, "downloadModel: $url -> $fileName")
341
+
342
+ val context = LiteRTLMInitProvider.applicationContext ?: throw RuntimeException("Context not available")
343
+ val modelsDir = java.io.File(context.filesDir, "models")
344
+ if (!modelsDir.exists()) {
345
+ modelsDir.mkdirs()
346
+ }
347
+
348
+ val modelFile = java.io.File(modelsDir, fileName)
349
+ val tempFile = java.io.File(modelsDir, "$fileName.tmp")
350
+
351
+ // Check if file exists and has content
352
+ if (modelFile.exists() && modelFile.length() > 0) {
353
+ Log.i(TAG, "Model already exists at: ${modelFile.absolutePath}")
354
+ onProgress?.invoke(1.0)
355
+ return@parallel modelFile.absolutePath
356
+ }
357
+
358
+ Log.i(TAG, "Downloading model to temp file: ${tempFile.absolutePath}")
359
+ onProgress?.invoke(0.0)
360
+
361
+ try {
362
+ val connection = java.net.URL(url).openConnection() as java.net.HttpURLConnection
363
+ connection.connectTimeout = 15000 // 15s
364
+ connection.readTimeout = 0 // Infinite for large files
365
+ connection.doInput = true
366
+ connection.connect()
367
+
368
+ if (connection.responseCode != java.net.HttpURLConnection.HTTP_OK) {
369
+ throw RuntimeException("Failed to download model: HTTP ${connection.responseCode}")
370
+ }
371
+
372
+ val contentLength = connection.contentLengthLong // Use long for large files
373
+ val input = connection.inputStream
374
+ val output = java.io.FileOutputStream(tempFile)
375
+
376
+ val buffer = ByteArray(8 * 1024)
377
+ var bytesRead: Int
378
+ var totalBytesRead = 0L
379
+ var lastProgressUpdate = 0L
380
+
381
+ while (input.read(buffer).also { bytesRead = it } != -1) {
382
+ output.write(buffer, 0, bytesRead)
383
+ totalBytesRead += bytesRead
384
+
385
+ if (contentLength > 0 && onProgress != null) {
386
+ val currentTime = System.currentTimeMillis()
387
+ // Update roughly every 100ms to avoid flooding JS bridge
388
+ if (currentTime - lastProgressUpdate > 100) {
389
+ val progress = totalBytesRead.toDouble() / contentLength.toDouble()
390
+ onProgress(progress)
391
+ lastProgressUpdate = currentTime
392
+ }
393
+ }
394
+ }
395
+
396
+ output.flush()
397
+ output.close()
398
+ input.close()
399
+ connection.disconnect()
400
+
401
+ // Atomic rename
402
+ if (tempFile.renameTo(modelFile)) {
403
+ Log.i(TAG, "Download complete and renamed to: ${modelFile.absolutePath}")
404
+ onProgress?.invoke(1.0)
405
+ return@parallel modelFile.absolutePath
406
+ } else {
407
+ throw RuntimeException("Failed to rename temp file to model file")
408
+ }
409
+
410
+ } catch (e: Exception) {
411
+ Log.e(TAG, "Download failed", e)
412
+ if (tempFile.exists()) {
413
+ tempFile.delete()
414
+ }
415
+ throw RuntimeException("Download failed: ${e.message}", e)
416
+ }
417
+ }
418
+ }
419
+
420
+ override fun deleteModel(fileName: String): Promise<Unit> {
421
+ return Promise.parallel {
422
+ Log.i(TAG, "deleteModel: $fileName")
423
+ val context = LiteRTLMInitProvider.applicationContext ?: throw RuntimeException("Context not available")
424
+ val modelsDir = java.io.File(context.filesDir, "models")
425
+ val modelFile = java.io.File(modelsDir, fileName)
426
+
427
+ if (modelFile.exists()) {
428
+ val deleted = modelFile.delete()
429
+ if (deleted) {
430
+ Log.i(TAG, "Deleted model: ${modelFile.absolutePath}")
431
+ // Ensure engine references are cleared if they point to this file
432
+ // We use cleanupInternal() which releases resources WITHOUT marking the instance as closed.
433
+ if (engine != null) {
434
+ Log.i(TAG, "Cleaning up engine after deleting model file.")
435
+ cleanupInternal()
436
+ }
437
+ } else {
438
+ Log.e(TAG, "Failed to delete model: ${modelFile.absolutePath}")
439
+ throw RuntimeException("Failed to delete model: ${modelFile.absolutePath}")
440
+ }
441
+ } else {
442
+ Log.w(TAG, "Model not found for deletion: ${modelFile.absolutePath}")
443
+ }
444
+ }
445
+ }
446
+
267
447
  override fun sendMessageWithAudio(message: String, audioPath: String): Promise<String> {
268
448
  return Promise.parallel {
269
449
  ensureLoaded()
@@ -318,10 +498,26 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
318
498
 
319
499
  override fun close() {
320
500
  Log.d(TAG, "Closing resources")
501
+ isClosed = true
502
+ cleanupInternal()
503
+ }
504
+
505
+ private fun cleanupInternal() {
321
506
  try {
322
507
  conversation = null
323
- engine = null // Engine destructor should handle cleanup
324
- // In C++ we'd close explicitly, Kotlin GC helps but explicit close method is better if SDK has it
508
+ // Explicitly close engine if it supports it to free native memory immediately
509
+ // Assuming Engine implements AutoCloseable or has close()
510
+ if (engine is AutoCloseable) {
511
+ (engine as AutoCloseable).close()
512
+ } else {
513
+ // Try reflection or just null it if no close method
514
+ try {
515
+ engine?.javaClass?.getMethod("close")?.invoke(engine)
516
+ } catch (e: Exception) {
517
+ // Method not found, rely on GC
518
+ }
519
+ }
520
+ engine = null
325
521
  } catch (e: Exception) {
326
522
  Log.e(TAG, "Error closing resources", e)
327
523
  }
@@ -283,16 +283,34 @@ std::string HybridLiteRTLM::sendMessageWithImage(
283
283
  return response->content;
284
284
 
285
285
  #else
286
- // Stub: just process text with image path noted
287
- // Verify file exists at least
288
- std::ifstream f(imagePath.c_str());
289
- if (!f.good()) {
290
- // Don't crash, just log/stub
291
- }
292
- return sendMessage(message + " [Image: " + imagePath + "]");
286
+ // iOS: LiteRT-LM SDK not yet available, throw clear error
287
+ throw std::runtime_error(
288
+ "sendMessageWithImage is not supported on iOS. "
289
+ "LiteRT-LM iOS SDK is not yet available. "
290
+ "Please use text-only sendMessage() for now.");
291
+ #endif
292
+ }
293
+
293
294
  #endif
294
295
  }
295
296
 
297
+ //------------------------------------------------------------------------------
298
+ // downloadModel - Download model file from URL
299
+ //------------------------------------------------------------------------------
300
+ std::future<std::string> HybridLiteRTLM::downloadModel(
301
+ const std::string& url,
302
+ const std::string& fileName,
303
+ const std::optional<std::function<void(double)>>& onProgress) {
304
+
305
+ // Return a future that throws an exception
306
+ return std::async(std::launch::async, []() -> std::string {
307
+ throw std::runtime_error(
308
+ "downloadModel is not supported on iOS yet. "
309
+ "Please download the model manually using a separate library."
310
+ );
311
+ });
312
+ }
313
+
296
314
  //------------------------------------------------------------------------------
297
315
  // sendMessageWithAudio - Multimodal audio + text
298
316
  //------------------------------------------------------------------------------
@@ -349,8 +367,11 @@ std::string HybridLiteRTLM::sendMessageWithAudio(
349
367
  return response->content;
350
368
 
351
369
  #else
352
- // Stub: just process text with audio path noted
353
- return sendMessage(message + " [Audio: " + audioPath + "]");
370
+ // iOS: LiteRT-LM SDK not yet available, throw clear error
371
+ throw std::runtime_error(
372
+ "sendMessageWithAudio is not supported on iOS. "
373
+ "LiteRT-LM iOS SDK is not yet available. "
374
+ "Please use text-only sendMessage() for now.");
354
375
  #endif
355
376
  }
356
377
 
@@ -58,6 +58,10 @@ public:
58
58
 
59
59
  std::string sendMessageWithImage(const std::string& message,
60
60
  const std::string& imagePath) override;
61
+
62
+ std::future<std::string> downloadModel(const std::string& url,
63
+ const std::string& fileName,
64
+ const std::optional<std::function<void(double)>>& onProgress) override;
61
65
 
62
66
  std::string sendMessageWithAudio(const std::string& message,
63
67
  const std::string& audioPath) override;