npm - react-native-litert-lm - Versions diffs - 0.1.1 → 0.2.1 - Mend

react-native-litert-lm 0.1.1 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/README.md +149 -31
package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +307 -61
package/cpp/HybridLiteRTLM.cpp +85 -31
package/cpp/HybridLiteRTLM.hpp +4 -0
package/cpp/include/stb_image.h +7988 -0
package/lib/hooks.d.ts +16 -0
package/lib/hooks.js +114 -0
package/lib/index.d.ts +27 -2
package/lib/index.js +50 -6
package/lib/modelFactory.d.ts +5 -0
package/lib/modelFactory.js +42 -0
package/lib/specs/LiteRTLM.nitro.d.ts +19 -0
package/lib/templates.d.ts +51 -0
package/lib/templates.js +81 -0
package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +2 -0
package/nitrogen/generated/android/c++/JFunc_void_double.hpp +75 -0
package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +33 -1
package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +2 -0
package/nitrogen/generated/android/c++/JLLMConfig.hpp +6 -1
package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Func_void_double.kt +80 -0
package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +13 -0
package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/LLMConfig.kt +5 -2
package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +2 -0
package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +2 -0
package/nitrogen/generated/shared/c++/LLMConfig.hpp +7 -2
package/package.json +1 -1
package/src/hooks.ts +152 -0
package/src/index.ts +41 -3
package/src/modelFactory.ts +49 -0
package/src/specs/LiteRTLM.nitro.ts +26 -0
package/src/templates.ts +105 -0

package/README.md CHANGED Viewed

@@ -10,12 +10,12 @@ High-performance LLM inference for React Native powered by [LiteRT-LM](https://g
 - 📦 **Bundled Tokenizer** - No separate tokenization library needed
 - 🔄 **Streaming Support** - Token-by-token generation callbacks
 - 📱 **Cross-Platform** - Android API 26+
-- 🚧 **Multimodal** - Image and audio input (Coming Soon)
+- 🖼️ **Multimodal** - Image and audio input support (Android Beta, iOS coming soon)
 - 🧵 **Async API** - Non-blocking inference to prevent UI freezes
 ## Status
-> ⚠️ **Early Preview**: This library is under active development. Android is functional with enough RAM, iOS implementation pending LiteRT-LM iOS release. Please report any issues on the [GitHub repository](https://github.com/litert-community/react-native-litert-lm).
+> ⚠️ **Early Preview**: This library is under active development. Android is functional with enough RAM, iOS implementation pending LiteRT-LM iOS release. Please report any issues on the [GitHub issues](https://github.com/hung-yueh/react-native-litert-lm/issues).
 ## Installation
@@ -54,13 +54,40 @@ cd android && ./gradlew clean
 cd ios && pod install  # iOS coming soon
 ```
+## Example App
+The repository includes a fully functional example app in the `example/` directory.
+To run it:
+1.  **Navigate to the example directory:**
+    ```bash
+    cd example
+    ```
+2.  **Install dependencies:**
+    ```bash
+    npm install
+    ```
+3.  **Run on Android:**
+    ```bash
+    npx expo run:android
+    ```
 ## Model Management
 LiteRT-LM models (like Gemma 3n) are large files (3GB+) and cannot be bundled directly into your app's binary. You must download them at runtime to a writable directory (e.g., `DocumentDirectory`).
-### Downloading Models
+### Automatic Downloading
+The library supports automatic downloading when you pass a URL to `loadModel` or `useModel`.
+### Manual Downloading (Optional)
-We recommend using `rn-fetch-blob` or `expo-file-system` to download models.
+If you prefer to manage downloads manually (e.g., using `rn-fetch-blob` or `expo-file-system`), you can download the file to a local path and pass that path to the library.
 ```typescript
 import { FileSystem } from "react-native-file-access";
@@ -80,18 +107,53 @@ async function downloadModel() {
 ## Usage
-### Basic Generation
+### React Hook (Recommended)
+The `useModel` hook manages the model lifecycle, including downloading, loading, and unloading.
+```typescript
+import { useModel, GEMMA_3N_E2B_IT_INT4 } from "react-native-litert-lm";
+function App() {
+  const {
+    model,
+    isReady,
+    downloadProgress,
+    load,   // Manually trigger load
+    deleteModel // Delete model file
+  } = useModel(
+    GEMMA_3N_E2B_IT_INT4,
+    {
+      backend: "cpu",
+      autoLoad: true, // Default: true. Set false to load manually.
+      systemPrompt: "You are a helpful assistant."
+    }
+  );
+  if (!isReady) {
+    return <Text>Loading... {Math.round(downloadProgress * 100)}%</Text>;
+  }
+  const generate = async () => {
+    const response = await model.sendMessage("Hello!");
+    console.log(response);
+  };
+  return <Button title="Generate" onPress={generate} />;
+}
+```
+### Manual Usage
 ```typescript
 import { createLLM } from "react-native-litert-lm";
 const llm = createLLM();
-// Load a Gemma 3n model (async)
-await llm.loadModel("/path/to/gemma-3n-e2b.litertlm", {
+// Load a model from URL (auto-downloads) or local path
+await llm.loadModel("https://example.com/model.litertlm", {
   backend: "gpu",
-  temperature: 0.7,
-  maxTokens: 512,
+  systemPrompt: "You are a helpful assistant.",
 });
 // Generate response (async)
@@ -114,19 +176,26 @@ llm.sendMessageAsync("Tell me a story", (token, done) => {
 ### Multimodal (Image/Audio)
 ```typescript
-// Image input (for vision models)
-// Note: Currently throws error on Android (Coming Soon)
-const response = await llm.sendMessageWithImage(
-  "What's in this image?",
-  "/path/to/image.jpg",
-);
-// Audio input (for audio models)
-// Note: Currently throws error on Android (Coming Soon)
-const transcription = await llm.sendMessageWithAudio(
-  "Transcribe this audio",
-  "/path/to/audio.wav",
-);
+import { checkMultimodalSupport } from "react-native-litert-lm";
+// Check platform support first
+const error = checkMultimodalSupport();
+if (error) {
+  console.warn(error); // iOS not yet supported
+} else {
+  // Image input (for vision models like Gemma 3n)
+  // Images >1024px are automatically resized to prevent OOM
+  const response = await llm.sendMessageWithImage(
+    "What's in this image?",
+    "/path/to/image.jpg",
+  );
+  // Audio input (for audio models)
+  const transcription = await llm.sendMessageWithAudio(
+    "Transcribe this audio",
+    "/path/to/audio.wav",
+  );
+}
 ```
 ### Check Performance
@@ -139,15 +208,18 @@ console.log(`Speed: ${stats.tokensPerSecond.toFixed(1)} tokens/sec`);
 ## Supported Models
-Download `.litertlm` models from [HuggingFace](https://huggingface.co/litert-community):
+Download `.litertlm` models automatically using the exported constants or from [HuggingFace](https://huggingface.co/litert-community):
-| Model         | Size   | Min Device RAM | Use Case                  |
-| ------------- | ------ | -------------- | ------------------------- |
-| Gemma 3n E2B  | ~3GB   | 4GB+           | Efficient, fast responses |
-| Gemma 3n E4B  | ~4GB   | 8GB+           | Higher quality            |
-| Gemma 3 1B    | ~1GB   | 4GB+           | Smallest, fastest         |
-| Phi-4 Mini    | ~2GB   | 4GB+           | Microsoft's small LLM     |
-| Qwen 2.5 1.5B | ~1.5GB | 4GB+           | Multilingual              |
+| Model Constant         | Description                            | Size | Min Device RAM |
+| :--------------------- | :------------------------------------- | :--- | :------------- |
+| `GEMMA_3N_E2B_IT_INT4` | Gemma 3n E2B (Instruction Tuned, Int4) | ~3GB | 4GB+           |
+| Other Models  | Size   | Min Device RAM | Use Case              |
+| ------------- | ------ | -------------- | --------------------- |
+| Gemma 3n E4B  | ~4GB   | 8GB+           | Higher quality        |
+| Gemma 3 1B    | ~1GB   | 4GB+           | Smallest, fastest     |
+| Phi-4 Mini    | ~2GB   | 4GB+           | Microsoft's small LLM |
+| Qwen 2.5 1.5B | ~1.5GB | 4GB+           | Multilingual          |
 ## API Reference
@@ -157,7 +229,8 @@ Creates a new LLM inference engine instance.
 ### `loadModel(path, config?): Promise<void>`
-- `path: string` - Absolute path to `.litertlm` file
+- `path: string` - Absolute path to `.litertlm` file OR a public URL (http/https). If a URL is provided, the model will be downloaded automatically.
+- `config.systemPrompt` - System prompt to guide model behavior (e.g., "You are a helpful assistant.")
 - `config.backend` - `'cpu'` | `'gpu'` | `'npu'` (default: `'gpu'`)
 - `config.temperature` - Sampling temperature (default: 0.7)
 - `config.topK` - Top-K sampling (default: 40)
@@ -203,6 +276,10 @@ Clear context and start fresh.
 Release all native resources.
+### `deleteModel(fileName): Promise<void>`
+Deletes a model file from the app's internal storage and cleans up the engine instance.
 ### `getRecommendedBackend(): Backend`
 Returns the recommended backend for the current platform (usually `'gpu'`).
@@ -220,6 +297,47 @@ if (warning) {
 }
 ```
+### `checkMultimodalSupport(): string | undefined`
+Returns an error message if multimodal (image/audio) is not supported on the current platform, or `undefined` if OK.
+```typescript
+import { checkMultimodalSupport } from "react-native-litert-lm";
+const error = checkMultimodalSupport();
+if (error) {
+  console.warn(error); // iOS multimodal not yet supported
+}
+```
+### Prompt Templates
+For advanced use cases where you need to manually format prompts:
+```typescript
+import {
+  applyGemmaTemplate,
+  applyPhiTemplate,
+  applyLlamaTemplate,
+  ChatMessage,
+} from "react-native-litert-lm";
+const history: ChatMessage[] = [
+  { role: "user", content: "Hello!" },
+  { role: "model", content: "Hi there!" },
+  { role: "user", content: "Tell me a joke" },
+];
+// For Gemma models
+const gemmaPrompt = applyGemmaTemplate(history, "You are a comedian.");
+// For Phi models
+const phiPrompt = applyPhiTemplate(history);
+// For Llama models
+const llamaPrompt = applyLlamaTemplate(history, "You are helpful.");
+```
 ## Requirements
 - React Native 0.76+

package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt CHANGED Viewed

@@ -20,8 +20,11 @@ import com.margelo.nitro.dev.litert.litertlm.LLMConfig
 import com.margelo.nitro.dev.litert.litertlm.Message
 import com.margelo.nitro.dev.litert.litertlm.Role
 import com.margelo.nitro.core.Promise
+import com.google.ai.edge.litertlm.Content
 // Alias to avoid confusion with our generated Message type
+// Alias to avoid confusion
 typealias LiteRTMessage = com.google.ai.edge.litertlm.Message
 /**
@@ -34,6 +37,20 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
     companion object {
         private const val TAG = "HybridLiteRTLM"
+        private val initLock = Any()
+        /**
+         * Initialize the native library.
+         * Must be called from Application.onCreate() to register the HybridObject.
+         */
+        fun initialize() {
+            try {
+                // Call generated internal OnLoad to load the library
+                LiteRTLMOnLoad.initializeNative()
+            } catch (e: Throwable) {
+                Log.e(TAG, "Failed to initialize LiteRTLM native library", e)
+            }
+        }
     }
     init {
@@ -43,6 +60,9 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
     // LiteRT-LM Engine and Conversation
     private var engine: Engine? = null
     private var conversation: Conversation? = null
+    @Volatile
+    private var isClosed = false
     // Conversation history for getHistory()
     private val history = mutableListOf<Message>()
@@ -72,64 +92,74 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
     // -------------------------------------------------------------------------
     override fun loadModel(modelPath: String, config: LLMConfig?): Promise<Unit> {
         return Promise.parallel {
-            Log.i(TAG, "loadModel: $modelPath")
-            // Clean up existing resources
-            close()
-            // Apply configuration
-            config?.let { cfg ->
-                cfg.backend?.let { backend = it }
-                cfg.temperature?.let { temperature = it }
-                cfg.topK?.let { topK = it.toInt() }
-                cfg.topP?.let { topP = it }
-                cfg.maxTokens?.let { maxTokens = it.toInt() }
-            }
-            try {
-                // Map our Backend enum to LiteRT-LM Backend enum
-                val lmBackend = when (backend) {
-                    Backend.GPU -> com.google.ai.edge.litertlm.Backend.GPU
-                    Backend.NPU -> {
-                        Log.i(TAG, "NPU backend requested - requires hardware support")
-                        com.google.ai.edge.litertlm.Backend.NPU
-                    }
-                    else -> com.google.ai.edge.litertlm.Backend.CPU
+            // Serialize initialization to prevent OOM from concurrent loads
+            synchronized(initLock) {
+                if (isClosed) {
+                    throw RuntimeException("Cannot load model: LiteRTLM instance is closed")
                 }
-                // Vision backend: hardcoded to GPU (required by Gemma 3n)
-                val lmVisionBackend = com.google.ai.edge.litertlm.Backend.GPU
+                Log.i(TAG, "loadModel: $modelPath")
+                // Clean up existing resources
+                // We call internal cleanup that doesn't set isClosed
+                cleanupInternal()
+                // Apply configuration
+                config?.let { cfg ->
+                    cfg.backend?.let { backend = it }
+                    cfg.temperature?.let { temperature = it }
+                    cfg.topK?.let { topK = it.toInt() }
+                    cfg.topP?.let { topP = it }
+                    cfg.maxTokens?.let { maxTokens = it.toInt() }
+                }
+                try {
+                    // Map our Backend enum to LiteRT-LM Backend enum
+                    val lmBackend = when (backend) {
+                        Backend.GPU -> com.google.ai.edge.litertlm.Backend.GPU
+                        Backend.NPU -> {
+                            Log.i(TAG, "NPU backend requested - requires hardware support")
+                            com.google.ai.edge.litertlm.Backend.NPU
+                        }
+                        else -> com.google.ai.edge.litertlm.Backend.CPU
+                    }
-                // Audio backend: hardcoded to CPU (optimal for audio processing)
-                val lmAudioBackend = com.google.ai.edge.litertlm.Backend.CPU
-                Log.i(TAG, "Backend config: main=$lmBackend, vision=$lmVisionBackend (hardcoded), audio=$lmAudioBackend (hardcoded)")
-                // Get cache directory from application context
-                val cacheDirectory = LiteRTLMInitProvider.applicationContext?.cacheDir?.absolutePath
-                Log.i(TAG, "Using cache directory: $cacheDirectory")
-                // Create Engine configuration
-                val engineConfig = EngineConfig(
-                    modelPath = modelPath,
-                    backend = lmBackend,
-                    visionBackend = lmVisionBackend,
-                    audioBackend = lmAudioBackend,
-                    maxNumTokens = maxTokens,
-                    cacheDir = cacheDirectory
-                )
-                // Initialize Engine
-                engine = Engine(engineConfig).also { it.initialize() }
-                Log.i(TAG, "Engine created and initialized successfully")
-                // Create Conversation
-                createNewConversation()
-                Log.i(TAG, "Conversation created successfully")
-            } catch (e: Exception) {
-                Log.e(TAG, "Failed to load model: ${e.message}", e)
-                throw RuntimeException("Failed to load model: ${e.message}", e)
+                    // Vision backend: hardcoded to GPU (required by Gemma 3n)
+                    val lmVisionBackend = com.google.ai.edge.litertlm.Backend.GPU
+                    // Audio backend: hardcoded to CPU (optimal for audio processing)
+                    val lmAudioBackend = com.google.ai.edge.litertlm.Backend.CPU
+                    Log.i(TAG, "Backend config: main=$lmBackend, vision=$lmVisionBackend (hardcoded), audio=$lmAudioBackend (hardcoded)")
+                    // Get cache directory from application context
+                    val cacheDirectory = LiteRTLMInitProvider.applicationContext?.cacheDir?.absolutePath
+                    Log.i(TAG, "Using cache directory: $cacheDirectory")
+                    // Create Engine configuration
+                    val engineConfig = EngineConfig(
+                        modelPath = modelPath,
+                        backend = lmBackend,
+                        visionBackend = lmVisionBackend,
+                        audioBackend = lmAudioBackend,
+                        maxNumTokens = maxTokens,
+                        cacheDir = cacheDirectory
+                    )
+                    if (isClosed) return@synchronized
+                    // Initialize Engine
+                    engine = Engine(engineConfig).also { it.initialize() }
+                    Log.i(TAG, "Engine created and initialized successfully")
+                    // Create Conversation
+                    createNewConversation()
+                    Log.i(TAG, "Conversation created successfully")
+                } catch (e: Exception) {
+                    Log.e(TAG, "Failed to load model: ${e.message}", e)
+                    throw RuntimeException("Failed to load model: ${e.message}", e)
+                }
             }
         }
     }
@@ -230,17 +260,215 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
     // -------------------------------------------------------------------------
     // Multimodal methods
     // -------------------------------------------------------------------------
+    /**
+     * Resize image if dimensions exceed maxDimension to prevent OOM.
+     * Gemma 3n's vision encoder is optimized for 512x512 or 1024x1024.
+     * Passing larger images can spike memory 500MB+.
+     */
+    private fun resizeImageIfNeeded(imagePath: String, maxDimension: Int = 1024): String {
+        val originalBitmap = android.graphics.BitmapFactory.decodeFile(imagePath)
+            ?: throw RuntimeException("Failed to decode image: $imagePath")
+        val width = originalBitmap.width
+        val height = originalBitmap.height
+        // If already within bounds, return original path
+        if (width <= maxDimension && height <= maxDimension) {
+            originalBitmap.recycle()
+            return imagePath
+        }
+        Log.i(TAG, "Resizing image from ${width}x${height} to fit ${maxDimension}px")
+        val scale = maxDimension.toFloat() / maxOf(width, height)
+        val newWidth = (width * scale).toInt()
+        val newHeight = (height * scale).toInt()
+        val resizedBitmap = android.graphics.Bitmap.createScaledBitmap(originalBitmap, newWidth, newHeight, true)
+        originalBitmap.recycle()
+        // Save to temp file
+        val cacheDir = LiteRTLMInitProvider.applicationContext?.cacheDir
+            ?: throw RuntimeException("Application context not available for image resizing")
+        val tempFile = java.io.File(cacheDir, "resized_${System.currentTimeMillis()}.jpg")
+        java.io.FileOutputStream(tempFile).use { out ->
+            resizedBitmap.compress(android.graphics.Bitmap.CompressFormat.JPEG, 90, out)
+        }
+        resizedBitmap.recycle()
+        Log.i(TAG, "Resized image saved to: ${tempFile.absolutePath} (${newWidth}x${newHeight})")
+        return tempFile.absolutePath
+    }
     override fun sendMessageWithImage(message: String, imagePath: String): Promise<String> {
         return Promise.parallel {
-             // TODO: Implement image loading from path
-            throw RuntimeException("Multimodal (Image) not yet implemented in this wrapper")
+            ensureLoaded()
+            Log.i(TAG, "sendMessageWithImage: $message, path=$imagePath")
+            // Resize image to prevent OOM on high-resolution photos
+            val processedImagePath = resizeImageIfNeeded(imagePath)
+            // Create multimodal message
+            // Use factory method Message.of passing a list of Content
+            val textContent = Content.Text(message)
+            val contentList = listOf(
+                textContent,
+                Content.ImageFile(processedImagePath)
+            )
+            val userMsg = LiteRTMessage.of(contentList)
+            // Add to history
+            history.add(Message(Role.USER, "$message [Image]"))
+            val responseMsg = conversation!!.sendMessage(userMsg)
+            val response = responseMsg.contents
+                .filterIsInstance<Content.Text>()
+                .joinToString("") { it.text }
+            history.add(Message(Role.MODEL, response))
+            response
+        }
+    }
+    override fun downloadModel(url: String, fileName: String, onProgress: ((Double) -> Unit)?): Promise<String> {
+        return Promise.parallel {
+            Log.i(TAG, "downloadModel: $url -> $fileName")
+            val context = LiteRTLMInitProvider.applicationContext ?: throw RuntimeException("Context not available")
+            val modelsDir = java.io.File(context.filesDir, "models")
+            if (!modelsDir.exists()) {
+                modelsDir.mkdirs()
+            }
+            val modelFile = java.io.File(modelsDir, fileName)
+            val tempFile = java.io.File(modelsDir, "$fileName.tmp")
+            // Check if file exists and has content
+            if (modelFile.exists() && modelFile.length() > 0) {
+                Log.i(TAG, "Model already exists at: ${modelFile.absolutePath}")
+                onProgress?.invoke(1.0)
+                return@parallel modelFile.absolutePath
+            }
+            Log.i(TAG, "Downloading model to temp file: ${tempFile.absolutePath}")
+            onProgress?.invoke(0.0)
+            try {
+                val connection = java.net.URL(url).openConnection() as java.net.HttpURLConnection
+                connection.connectTimeout = 15000 // 15s
+                connection.readTimeout = 0 // Infinite for large files
+                connection.doInput = true
+                connection.connect()
+                if (connection.responseCode != java.net.HttpURLConnection.HTTP_OK) {
+                    throw RuntimeException("Failed to download model: HTTP ${connection.responseCode}")
+                }
+                val contentLength = connection.contentLengthLong // Use long for large files
+                val input = connection.inputStream
+                val output = java.io.FileOutputStream(tempFile)
+                val buffer = ByteArray(8 * 1024)
+                var bytesRead: Int
+                var totalBytesRead = 0L
+                var lastProgressUpdate = 0L
+                while (input.read(buffer).also { bytesRead = it } != -1) {
+                    output.write(buffer, 0, bytesRead)
+                    totalBytesRead += bytesRead
+                    if (contentLength > 0 && onProgress != null) {
+                        val currentTime = System.currentTimeMillis()
+                        // Update roughly every 100ms to avoid flooding JS bridge
+                        if (currentTime - lastProgressUpdate > 100) {
+                            val progress = totalBytesRead.toDouble() / contentLength.toDouble()
+                            onProgress(progress)
+                            lastProgressUpdate = currentTime
+                        }
+                    }
+                }
+                output.flush()
+                output.close()
+                input.close()
+                connection.disconnect()
+                // Atomic rename
+                if (tempFile.renameTo(modelFile)) {
+                    Log.i(TAG, "Download complete and renamed to: ${modelFile.absolutePath}")
+                    onProgress?.invoke(1.0)
+                    return@parallel modelFile.absolutePath
+                } else {
+                    throw RuntimeException("Failed to rename temp file to model file")
+                }
+            } catch (e: Exception) {
+                Log.e(TAG, "Download failed", e)
+                if (tempFile.exists()) {
+                    tempFile.delete()
+                }
+                throw RuntimeException("Download failed: ${e.message}", e)
+            }
+        }
+    }
+    override fun deleteModel(fileName: String): Promise<Unit> {
+        return Promise.parallel {
+            Log.i(TAG, "deleteModel: $fileName")
+            val context = LiteRTLMInitProvider.applicationContext ?: throw RuntimeException("Context not available")
+            val modelsDir = java.io.File(context.filesDir, "models")
+            val modelFile = java.io.File(modelsDir, fileName)
+            if (modelFile.exists()) {
+                val deleted = modelFile.delete()
+                if (deleted) {
+                    Log.i(TAG, "Deleted model: ${modelFile.absolutePath}")
+                    // Ensure engine references are cleared if they point to this file
+                    // We use cleanupInternal() which releases resources WITHOUT marking the instance as closed.
+                    if (engine != null) {
+                        Log.i(TAG, "Cleaning up engine after deleting model file.")
+                        cleanupInternal()
+                    }
+                } else {
+                    Log.e(TAG, "Failed to delete model: ${modelFile.absolutePath}")
+                    throw RuntimeException("Failed to delete model: ${modelFile.absolutePath}")
+                }
+            } else {
+                Log.w(TAG, "Model not found for deletion: ${modelFile.absolutePath}")
+            }
         }
     }
     override fun sendMessageWithAudio(message: String, audioPath: String): Promise<String> {
         return Promise.parallel {
-            // TODO: Implement audio loading from path
-            throw RuntimeException("Multimodal (Audio) not yet implemented in this wrapper")
+            ensureLoaded()
+            Log.i(TAG, "sendMessageWithAudio: $message, path=$audioPath")
+            // Load audio
+            val contentList = listOf(
+                Content.Text(message),
+                Content.AudioFile(audioPath)
+            )
+            val userMsg = LiteRTMessage.of(contentList)
+            history.add(Message(Role.USER, "$message [Audio]"))
+            val responseMsg = conversation!!.sendMessage(userMsg)
+            val response = responseMsg.contents
+                .filterIsInstance<Content.Text>()
+                .joinToString("") { it.text }
+            history.add(Message(Role.MODEL, response))
+            response
         }
     }
@@ -270,10 +498,26 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
     override fun close() {
         Log.d(TAG, "Closing resources")
+        isClosed = true
+        cleanupInternal()
+    }
+    private fun cleanupInternal() {
         try {
             conversation = null
-            engine = null // Engine destructor should handle cleanup
-            // In C++ we'd close explicitly, Kotlin GC helps but explicit close method is better if SDK has it
+            // Explicitly close engine if it supports it to free native memory immediately
+            // Assuming Engine implements AutoCloseable or has close()
+            if (engine is AutoCloseable) {
+                (engine as AutoCloseable).close()
+            } else {
+                 // Try reflection or just null it if no close method
+                try {
+                    engine?.javaClass?.getMethod("close")?.invoke(engine)
+                } catch (e: Exception) {
+                    // Method not found, rely on GC
+                }
+            }
+            engine = null
         } catch (e: Exception) {
             Log.e(TAG, "Error closing resources", e)
         }
@@ -290,4 +534,6 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
         // Dispose old conversation if needed
         conversation = engine!!.createConversation()
     }
 }