react-native-litert-lm 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +259 -0
  3. package/android/CMakeLists.txt +32 -0
  4. package/android/build.gradle +88 -0
  5. package/android/src/main/AndroidManifest.xml +11 -0
  6. package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +280 -0
  7. package/android/src/main/java/dev/litert/litertlm/LiteRTLMInitProvider.kt +43 -0
  8. package/android/src/main/java/dev/litert/litertlm/LiteRTLMPackage.kt +26 -0
  9. package/cpp/HybridLiteRTLM.cpp +483 -0
  10. package/cpp/HybridLiteRTLM.hpp +120 -0
  11. package/cpp/cpp-adapter.cpp +13 -0
  12. package/cpp/include/README.md +34 -0
  13. package/lib/index.d.ts +82 -0
  14. package/lib/index.js +106 -0
  15. package/lib/specs/LiteRTLM.nitro.d.ts +165 -0
  16. package/lib/specs/LiteRTLM.nitro.js +2 -0
  17. package/nitrogen/generated/.gitattributes +1 -0
  18. package/nitrogen/generated/android/LiteRTLM+autolinking.cmake +81 -0
  19. package/nitrogen/generated/android/LiteRTLM+autolinking.gradle +27 -0
  20. package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +46 -0
  21. package/nitrogen/generated/android/LiteRTLMOnLoad.hpp +25 -0
  22. package/nitrogen/generated/android/c++/JBackend.hpp +61 -0
  23. package/nitrogen/generated/android/c++/JFunc_void_std__string_bool.hpp +76 -0
  24. package/nitrogen/generated/android/c++/JGenerationStats.hpp +77 -0
  25. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +133 -0
  26. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +75 -0
  27. package/nitrogen/generated/android/c++/JLLMConfig.hpp +75 -0
  28. package/nitrogen/generated/android/c++/JMessage.hpp +63 -0
  29. package/nitrogen/generated/android/c++/JRole.hpp +61 -0
  30. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Backend.kt +24 -0
  31. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Func_void_std__string_bool.kt +80 -0
  32. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/GenerationStats.kt +53 -0
  33. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +98 -0
  34. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/LLMConfig.kt +50 -0
  35. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/LiteRTLMOnLoad.kt +35 -0
  36. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Message.kt +41 -0
  37. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Role.kt +24 -0
  38. package/nitrogen/generated/ios/LiteRTLM+autolinking.rb +60 -0
  39. package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Bridge.cpp +17 -0
  40. package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Bridge.hpp +27 -0
  41. package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Umbrella.hpp +38 -0
  42. package/nitrogen/generated/shared/c++/Backend.hpp +80 -0
  43. package/nitrogen/generated/shared/c++/GenerationStats.hpp +103 -0
  44. package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +30 -0
  45. package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +82 -0
  46. package/nitrogen/generated/shared/c++/LLMConfig.hpp +101 -0
  47. package/nitrogen/generated/shared/c++/Message.hpp +89 -0
  48. package/nitrogen/generated/shared/c++/Role.hpp +80 -0
  49. package/package.json +87 -0
  50. package/react-native-litert-lm.podspec +51 -0
  51. package/react-native.config.js +16 -0
  52. package/src/index.ts +125 -0
  53. package/src/specs/LiteRTLM.nitro.ts +187 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 hung-yueh
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,259 @@
1
+ # react-native-litert-lm
2
+
3
+ High-performance LLM inference for React Native powered by [LiteRT-LM](https://github.com/google-ai-edge/LiteRT-LM) and [Nitro Module](https://github.com/mrousavy/nitro). Optimized for **Gemma 3n** and other on-device language models.
4
+
5
+ ## Features
6
+
7
+ - 🚀 **Native Performance** - Kotlin (Android) / C++ (iOS) implementation via Nitro Modules
8
+ - 🧠 **Gemma 3n Ready** - First-class support for Gemma 3n E2B/E4B models
9
+ - ⚡ **GPU Acceleration** - GPU delegate (Android), Metal (iOS when available)
10
+ - 📦 **Bundled Tokenizer** - No separate tokenization library needed
11
+ - 🔄 **Streaming Support** - Token-by-token generation callbacks
12
+ - 📱 **Cross-Platform** - Android API 26+ (iOS coming soon)
13
+ - 🚧 **Multimodal** - Image and audio input (Coming Soon to Android)
14
+
15
+ ## Status
16
+
17
+ > ⚠️ **Early Preview**: This library is under active development. Android is functional with enough RAM, iOS implementation pending LiteRT-LM iOS release. Please report any issues on the [GitHub repository](https://github.com/litert-community/react-native-litert-lm).
18
+
19
+ ## Installation
20
+
21
+ ```bash
22
+ npm install react-native-litert-lm react-native-nitro-modules
23
+ ```
24
+
25
+ ### Expo
26
+
27
+ Add to your `app.json`:
28
+
29
+ ```json
30
+ {
31
+ "expo": {
32
+ "plugins": ["react-native-litert-lm"],
33
+ "android": {
34
+ "minSdkVersion": 26
35
+ }
36
+ }
37
+ }
38
+ ```
39
+
40
+ Then create a development build:
41
+
42
+ ```bash
43
+ npx expo prebuild
44
+ npx expo run:android
45
+ ```
46
+
47
+ > **Note**: Only ARM devices are supported (physical devices or ARM emulators). x86_64 emulators are not supported.
48
+
49
+ ### Bare React Native
50
+
51
+ ```bash
52
+ cd android && ./gradlew clean
53
+ cd ios && pod install # iOS coming soon
54
+ ```
55
+
56
+ ## Model Management
57
+
58
+ LiteRT-LM models (like Gemma 3n) are large files (3GB+) and cannot be bundled directly into your app's binary. You must download them at runtime to a writable directory (e.g., `DocumentDirectory`).
59
+
60
+ ### Downloading Models
61
+
62
+ We recommend using `rn-fetch-blob` or `expo-file-system` to download models.
63
+
64
+ ```typescript
65
+ import { FileSystem } from "react-native-file-access";
66
+ // or import * as FileSystem from 'expo-file-system';
67
+
68
+ const MODEL_URL =
69
+ "https://huggingface.co/litert-community/gemma-3n-2b-it/resolve/main/model.litertlm";
70
+ const localPath = `${FileSystem.DocumentDirectoryPath}/gemma-3n.litertlm`;
71
+
72
+ async function downloadModel() {
73
+ if (await FileSystem.exists(localPath)) return localPath;
74
+
75
+ // Download logic here...
76
+ return localPath;
77
+ }
78
+ ```
79
+
80
+ ## Usage
81
+
82
+ ### Basic Generation
83
+
84
+ ```typescript
85
+ import { createLLM } from "react-native-litert-lm";
86
+
87
+ const llm = createLLM();
88
+
89
+ // Load a Gemma 3n model
90
+ llm.loadModel("/path/to/gemma-3n-e2b.litertlm", {
91
+ backend: "gpu",
92
+ temperature: 0.7,
93
+ maxTokens: 512,
94
+ });
95
+
96
+ // Generate response
97
+ const response = llm.sendMessage("What is the capital of France?");
98
+ console.log(response);
99
+
100
+ // Clean up
101
+ llm.close();
102
+ ```
103
+
104
+ ### Streaming Generation
105
+
106
+ ```typescript
107
+ llm.sendMessageAsync("Tell me a story", (token, done) => {
108
+ process.stdout.write(token);
109
+ if (done) console.log("\n--- Done ---");
110
+ });
111
+ ```
112
+
113
+ ### Multimodal (Image/Audio)
114
+
115
+ ```typescript
116
+ // Image input (for vision models)
117
+ const response = llm.sendMessageWithImage(
118
+ "What's in this image?",
119
+ "/path/to/image.jpg",
120
+ );
121
+
122
+ // Audio input (for audio models)
123
+ const transcription = llm.sendMessageWithAudio(
124
+ "Transcribe this audio",
125
+ "/path/to/audio.wav",
126
+ );
127
+ ```
128
+
129
+ ### Check Performance
130
+
131
+ ```typescript
132
+ const stats = llm.getStats();
133
+ console.log(`Generated ${stats.completionTokens} tokens`);
134
+ console.log(`Speed: ${stats.tokensPerSecond.toFixed(1)} tokens/sec`);
135
+ ```
136
+
137
+ ## Supported Models
138
+
139
+ Download `.litertlm` models from [HuggingFace](https://huggingface.co/litert-community):
140
+
141
+ | Model | Size | Min Device RAM | Use Case |
142
+ | ------------- | ------ | -------------- | ------------------------- |
143
+ | Gemma 3n E2B | ~3GB | 4GB+ | Efficient, fast responses |
144
+ | Gemma 3n E4B | ~4GB | 8GB+ | Higher quality |
145
+ | Gemma 3 1B | ~1GB | 4GB+ | Smallest, fastest |
146
+ | Phi-4 Mini | ~2GB | 4GB+ | Microsoft's small LLM |
147
+ | Qwen 2.5 1.5B | ~1.5GB | 4GB+ | Multilingual |
148
+
149
+ ## API Reference
150
+
151
+ ### `createLLM(): LiteRTLM`
152
+
153
+ Creates a new LLM inference engine instance.
154
+
155
+ ### `loadModel(path, config?)`
156
+
157
+ - `path: string` - Absolute path to `.litertlm` file
158
+ - `config.backend` - `'cpu'` | `'gpu'` | `'npu'` (default: `'gpu'`)
159
+ - `config.temperature` - Sampling temperature (default: 0.7)
160
+ - `config.topK` - Top-K sampling (default: 40)
161
+ - `config.maxTokens` - Max generation length (default: 1024)
162
+
163
+ > **Note**: Vision encoder is always set to GPU (required by Gemma 3n). Audio encoder is always set to CPU (optimal for audio).
164
+
165
+ #### Backend Options
166
+
167
+ | Backend | Description | Speed | Compatibility |
168
+ | ------- | ----------------- | ------- | ------------------------------------------ |
169
+ | `'cpu'` | CPU inference | Slowest | Always available with less RAM requirement |
170
+ | `'gpu'` | GPU acceleration | Fast | Recommended default |
171
+ | `'npu'` | NPU/Neural Engine | Fastest | Requires supported hardware |
172
+
173
+ > ⚠️ **NPU Note**: NPU acceleration requires compatible hardware (Qualcomm Hexagon, MediaTek APU, etc.). If unavailable, LiteRT-LM automatically falls back to GPU.
174
+
175
+ ### `sendMessage(message): string`
176
+
177
+ Blocking generation. Returns complete response.
178
+
179
+ ### `sendMessageAsync(message, callback)`
180
+
181
+ Streaming generation. Callback receives `(token, isDone)`.
182
+
183
+ ### `sendMessageWithImage(message, imagePath): string`
184
+
185
+ Send a message with an image attachment (for vision models).
186
+
187
+ ### `sendMessageWithAudio(message, audioPath): string`
188
+
189
+ Send a message with an audio attachment (for audio models).
190
+
191
+ ### `getHistory(): Message[]`
192
+
193
+ Get conversation history.
194
+
195
+ ### `resetConversation()`
196
+
197
+ Clear context and start fresh.
198
+
199
+ ### `close()`
200
+
201
+ Release all native resources.
202
+
203
+ ### `getRecommendedBackend(): Backend`
204
+
205
+ Returns the recommended backend for the current platform (usually `'gpu'`).
206
+
207
+ ### `checkBackendSupport(backend): string | undefined`
208
+
209
+ Returns a warning message if the specified backend may have issues on the current platform, or `undefined` if OK.
210
+
211
+ ```typescript
212
+ import { checkBackendSupport } from "react-native-litert-lm";
213
+
214
+ const warning = checkBackendSupport("npu");
215
+ if (warning) {
216
+ console.warn(warning);
217
+ }
218
+ ```
219
+
220
+ ## Requirements
221
+
222
+ - React Native 0.76+
223
+ - react-native-nitro-modules 0.33.2+
224
+ - Android API 26+ (ARM64 only)
225
+ - **LiteRT-LM Android SDK**: `0.9.0-alpha01` (bundled automatically)
226
+ - iOS 15.0+ (coming soon)
227
+
228
+ ## Platform Support
229
+
230
+ | Platform | Status | Architecture |
231
+ | -------- | -------- | ------------ |
232
+ | Android | ✅ Ready | arm64-v8a |
233
+ | iOS | 🚧 Stub | - |
234
+
235
+ ## Architecture
236
+
237
+ This library uses a split implementation strategy to maximize performance and compatibility:
238
+
239
+ - **Android**: Uses **Kotlin** (`HybridLiteRTLM.kt`) to interface directly with the `litertlm-android` AAR.
240
+ - **iOS**: Uses **C++** (`HybridLiteRTLM.cpp`) which will interface with the LiteRT-LM C++ headers (once released).
241
+
242
+ > **Note for Contributors**: Changes made to the C++ implementation (`cpp/`) **do not** affect Android. You must apply feature changes to both the Kotlin and C++ implementations.
243
+
244
+ ## License
245
+
246
+ The code in this repository is licensed under the **[MIT License](LICENSE)**.
247
+
248
+ ### ⚠️ Important AI Model Disclaimer
249
+
250
+ This library acts as an execution engine for On-Device Large Language Models (LLMs). The AI models themselves are **not** distributed with this package and are **not** covered by the MIT license.
251
+
252
+ By downloading and running these models within your app, you agree to comply with their respective licenses and acceptable use policies:
253
+
254
+ - **Gemma (Google)**: [Gemma Terms of Use](https://ai.google.dev/gemma/terms)
255
+ - **Llama 3 (Meta)**: [Llama 3.2 Community License](https://www.llama.com/llama3/license/)
256
+ - **Qwen (Alibaba)**: [Apache 2.0 License](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct/blob/main/LICENSE)
257
+ - **Phi (Microsoft)**: [MIT License](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/blob/main/LICENSE)
258
+
259
+ _The author of `react-native-litert-lm` takes no responsibility for the outputs generated by these models or the applications built using them._
@@ -0,0 +1,32 @@
1
+ cmake_minimum_required(VERSION 3.18.0)
2
+ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
3
+
4
+ # Define the library name - must match what Nitrogen expects
5
+ project(LiteRTLM)
6
+
7
+ set(CMAKE_CXX_STANDARD 20)
8
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
9
+
10
+ # Define the shared library (main entry point)
11
+ add_library(
12
+ LiteRTLM
13
+ SHARED
14
+ ../cpp/cpp-adapter.cpp
15
+ # Additional sources are added by autolinking.cmake below
16
+ )
17
+
18
+ # Allow undefined symbols - they will be resolved at runtime when the app
19
+ # loads the NitroModules shared library. This is required because we're
20
+ # building a library that depends on NitroModules symbols which are only
21
+ # available at runtime.
22
+ set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--allow-shlib-undefined")
23
+
24
+ # Include Nitrogen autolinking - this adds all generated sources and links
25
+ include(${CMAKE_SOURCE_DIR}/../nitrogen/generated/android/LiteRTLM+autolinking.cmake)
26
+
27
+ # Android system libraries
28
+ target_link_libraries(
29
+ LiteRTLM
30
+ android
31
+ log
32
+ )
@@ -0,0 +1,88 @@
1
+ // Module-level build.gradle for react-native-litert-lm
2
+ // Configures Android build with Kotlin HybridObject + C++ JNI glue
3
+
4
+ plugins {
5
+ id 'com.android.library'
6
+ id 'org.jetbrains.kotlin.android'
7
+ }
8
+
9
+ // Apply Nitrogen autolinking
10
+ apply from: '../nitrogen/generated/android/LiteRTLM+autolinking.gradle'
11
+
12
+ android {
13
+ namespace "dev.litert.litertlm"
14
+ compileSdk 35
15
+
16
+ defaultConfig {
17
+ minSdk 26 // LiteRT-LM requires API 26+
18
+
19
+ externalNativeBuild {
20
+ cmake {
21
+ cppFlags "-O2 -fexceptions -frtti -std=c++20"
22
+ arguments "-DANDROID_STL=c++_shared"
23
+ }
24
+ }
25
+
26
+ ndk {
27
+ abiFilters 'arm64-v8a'
28
+ }
29
+ }
30
+
31
+ buildFeatures {
32
+ prefab true
33
+ }
34
+
35
+ externalNativeBuild {
36
+ cmake {
37
+ path "CMakeLists.txt"
38
+ version "3.22.1"
39
+ }
40
+ }
41
+
42
+ compileOptions {
43
+ sourceCompatibility JavaVersion.VERSION_17
44
+ targetCompatibility JavaVersion.VERSION_17
45
+ }
46
+
47
+ kotlinOptions {
48
+ jvmTarget = '17'
49
+ }
50
+
51
+ sourceSets {
52
+ main {
53
+ java.srcDirs += [
54
+ 'src/main/java',
55
+ '../nitrogen/generated/android/kotlin'
56
+ ]
57
+ }
58
+ }
59
+
60
+ packaging {
61
+ jniLibs {
62
+ keepDebugSymbols.add("**/*.so")
63
+ }
64
+ }
65
+ }
66
+
67
+ repositories {
68
+ google()
69
+ mavenCentral()
70
+ }
71
+
72
+ dependencies {
73
+ // React Native
74
+ implementation 'com.facebook.react:react-android'
75
+
76
+ // Nitro Modules
77
+ implementation project(':react-native-nitro-modules')
78
+
79
+ // fbjni for HybridObject JNI bridge
80
+ implementation 'com.facebook.fbjni:fbjni:0.6.0'
81
+
82
+ // Kotlin coroutines for async operations
83
+ implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.3'
84
+ implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3'
85
+
86
+ // LiteRT-LM Kotlin API
87
+ implementation 'com.google.ai.edge.litertlm:litertlm-android:0.9.0-alpha01'
88
+ }
@@ -0,0 +1,11 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <manifest xmlns:android="http://schemas.android.com/apk/res/android">
3
+ <application>
4
+ <!-- ContentProvider for initializing application context at startup -->
5
+ <provider
6
+ android:name="dev.litert.litertlm.LiteRTLMInitProvider"
7
+ android:authorities="${applicationId}.litertlm.init"
8
+ android:exported="false"
9
+ android:initOrder="100" />
10
+ </application>
11
+ </manifest>
@@ -0,0 +1,280 @@
1
+ ///
2
+ /// HybridLiteRTLM.kt
3
+ /// Kotlin implementation of LiteRTLM HybridObject using LiteRT-LM Android SDK.
4
+ ///
5
+
6
+ package com.margelo.nitro.dev.litert.litertlm
7
+
8
+ import android.util.Log
9
+ import androidx.annotation.Keep
10
+ import com.facebook.proguard.annotations.DoNotStrip
11
+ import dev.litert.litertlm.LiteRTLMInitProvider
12
+ import com.google.ai.edge.litertlm.Engine
13
+ import com.google.ai.edge.litertlm.Conversation
14
+ import com.google.ai.edge.litertlm.EngineConfig
15
+ import com.google.ai.edge.litertlm.ConversationConfig
16
+ import com.margelo.nitro.dev.litert.litertlm.Backend
17
+ import com.margelo.nitro.dev.litert.litertlm.GenerationStats
18
+ import com.margelo.nitro.dev.litert.litertlm.HybridLiteRTLMSpec
19
+ import com.margelo.nitro.dev.litert.litertlm.LLMConfig
20
+ import com.margelo.nitro.dev.litert.litertlm.Message
21
+ import com.margelo.nitro.dev.litert.litertlm.Role
22
+
23
+ // Alias to avoid confusion with our generated Message type
24
+ typealias LiteRTMessage = com.google.ai.edge.litertlm.Message
25
+
26
+ /**
27
+ * Kotlin implementation of LiteRTLM using the LiteRT-LM Android SDK.
28
+ * This class bridges between React Native (via Nitro) and the Google LiteRT-LM Engine.
29
+ */
30
+ @DoNotStrip
31
+ @Keep
32
+ class HybridLiteRTLM : HybridLiteRTLMSpec() {
33
+
34
+ companion object {
35
+ private const val TAG = "HybridLiteRTLM"
36
+ }
37
+
38
+ // LiteRT-LM Engine and Conversation
39
+ private var engine: Engine? = null
40
+ private var conversation: Conversation? = null
41
+
42
+ // Conversation history for getHistory()
43
+ private val history = mutableListOf<Message>()
44
+
45
+ // Last generation stats
46
+ private var lastStats = GenerationStats(
47
+ promptTokens = 0.0,
48
+ completionTokens = 0.0,
49
+ totalTokens = 0.0,
50
+ timeToFirstToken = 0.0,
51
+ totalTime = 0.0,
52
+ tokensPerSecond = 0.0
53
+ )
54
+
55
+ // Configuration
56
+ private var backend: Backend = Backend.GPU
57
+ private var temperature: Double = 0.7
58
+ private var topK: Int = 40
59
+ private var topP: Double = 0.95
60
+ private var maxTokens: Int = 1024
61
+
62
+ override val memorySize: Long
63
+ get() = 10L * 1024L * 1024L // ~10MB estimate
64
+
65
+ // -------------------------------------------------------------------------
66
+ // loadModel - Initialize LiteRT-LM Engine and Conversation
67
+ // -------------------------------------------------------------------------
68
+ override fun loadModel(modelPath: String, config: LLMConfig?) {
69
+ Log.i(TAG, "loadModel: $modelPath")
70
+
71
+ // Clean up existing resources
72
+ close()
73
+
74
+ // Apply configuration
75
+ config?.let { cfg ->
76
+ cfg.backend?.let { backend = it }
77
+ cfg.temperature?.let { temperature = it }
78
+ cfg.topK?.let { topK = it.toInt() }
79
+ cfg.topP?.let { topP = it }
80
+ cfg.maxTokens?.let { maxTokens = it.toInt() }
81
+ }
82
+
83
+ try {
84
+ // Map our Backend enum to LiteRT-LM Backend enum
85
+ val lmBackend = when (backend) {
86
+ Backend.GPU -> com.google.ai.edge.litertlm.Backend.GPU
87
+ Backend.NPU -> {
88
+ Log.i(TAG, "NPU backend requested - requires hardware support")
89
+ com.google.ai.edge.litertlm.Backend.NPU
90
+ }
91
+ else -> com.google.ai.edge.litertlm.Backend.CPU
92
+ }
93
+
94
+ // Vision backend: hardcoded to GPU (required by Gemma 3n)
95
+ val lmVisionBackend = com.google.ai.edge.litertlm.Backend.GPU
96
+
97
+ // Audio backend: hardcoded to CPU (optimal for audio processing)
98
+ val lmAudioBackend = com.google.ai.edge.litertlm.Backend.CPU
99
+
100
+ Log.i(TAG, "Backend config: main=$lmBackend, vision=$lmVisionBackend (hardcoded), audio=$lmAudioBackend (hardcoded)")
101
+
102
+ // Get cache directory from application context
103
+ // LiteRT-LM needs this to store temporary compiled model files
104
+ val cacheDirectory = LiteRTLMInitProvider.applicationContext?.cacheDir?.absolutePath
105
+ Log.i(TAG, "Using cache directory: $cacheDirectory")
106
+
107
+ // Create Engine configuration
108
+ val engineConfig = EngineConfig(
109
+ modelPath = modelPath,
110
+ backend = lmBackend,
111
+ visionBackend = lmVisionBackend,
112
+ audioBackend = lmAudioBackend,
113
+ maxNumTokens = maxTokens,
114
+ cacheDir = cacheDirectory
115
+ )
116
+
117
+ // Create Engine (heavyweight - loads model)
118
+ engine = Engine(engineConfig).also { it.initialize() }
119
+ Log.i(TAG, "Engine created and initialized successfully")
120
+
121
+ // Create Conversation (lightweight - holds KV cache)
122
+ createNewConversation()
123
+ Log.i(TAG, "Conversation created successfully")
124
+
125
+ } catch (e: Exception) {
126
+ Log.e(TAG, "Failed to load model: ${e.message}", e)
127
+ throw RuntimeException("Failed to load model: ${e.message}", e)
128
+ }
129
+ }
130
+
131
+ // -------------------------------------------------------------------------
132
+ // sendMessage - Blocking text inference
133
+ // -------------------------------------------------------------------------
134
+ override fun sendMessage(message: String): String {
135
+ ensureLoaded()
136
+
137
+ // Add user message to history
138
+ history.add(Message(Role.USER, message))
139
+
140
+ // Pre-process message (chat template)
141
+ Log.i(TAG, "sendMessage: $message")
142
+
143
+ // Blocking inference
144
+ // LiteRT-LM expects a Message object, not String
145
+ val userMsg = LiteRTMessage.of(message)
146
+ val responseMsg = conversation!!.sendMessage(userMsg)
147
+
148
+ // Extract text from response Message
149
+ val response = responseMsg.contents
150
+ .filterIsInstance<com.google.ai.edge.litertlm.Content.Text>()
151
+ .joinToString("") { it.text }
152
+
153
+ // Add model response to history
154
+ history.add(Message(Role.MODEL, response))
155
+
156
+ // Update stats (mock/approximate for now as SDK doesn't return full stats for sync call)
157
+ lastStats = GenerationStats(
158
+ promptTokens = message.length / 4.0,
159
+ completionTokens = response.length / 4.0,
160
+ totalTokens = (message.length + response.length) / 4.0,
161
+ timeToFirstToken = 0.0,
162
+ totalTime = 0.0,
163
+ tokensPerSecond = 0.0
164
+ )
165
+
166
+ return response
167
+ }
168
+
169
+ // -------------------------------------------------------------------------
170
+ // sendMessageAsync - Streaming inference
171
+ // -------------------------------------------------------------------------
172
+ override fun sendMessageAsync(message: String, onToken: (String, Boolean) -> Unit) {
173
+ ensureLoaded()
174
+
175
+ // Add user message to history
176
+ history.add(Message(Role.USER, message))
177
+ Log.d(TAG, "sendMessageAsync: $message")
178
+
179
+ val fullResponseBuilder = StringBuilder()
180
+
181
+ // Define callback
182
+ val listener = object : com.google.ai.edge.litertlm.MessageCallback {
183
+ override fun onMessage(responseMsg: LiteRTMessage) {
184
+ val chunk = responseMsg.contents
185
+ .filterIsInstance<com.google.ai.edge.litertlm.Content.Text>()
186
+ .joinToString("") { it.text }
187
+
188
+ onToken(chunk, false)
189
+
190
+ if (chunk.isNotEmpty()) {
191
+ fullResponseBuilder.append(chunk)
192
+ }
193
+ }
194
+
195
+ override fun onDone() {
196
+ onToken("", true)
197
+ val fullResponse = fullResponseBuilder.toString()
198
+ history.add(Message(Role.MODEL, fullResponse))
199
+ Log.d(TAG, "sendMessageAsync done. Length: ${fullResponse.length}")
200
+ }
201
+
202
+ override fun onError(t: Throwable) {
203
+ Log.e(TAG, "Async generation failed", t)
204
+ onToken("Error: ${t.message}", true)
205
+ }
206
+ }
207
+
208
+ try {
209
+ // Construct Message object
210
+ val userMsg = LiteRTMessage.of(message)
211
+
212
+ // LiteRT-LM async call - SDK handles threading
213
+ conversation!!.sendMessageAsync(userMsg, listener)
214
+
215
+ } catch (e: Exception) {
216
+ Log.e(TAG, "Failed into initiate async generation", e)
217
+ onToken("Error: ${e.message}", true)
218
+ }
219
+ }
220
+
221
+ // -------------------------------------------------------------------------
222
+ // Multimodal methods
223
+ // -------------------------------------------------------------------------
224
+ override fun sendMessageWithImage(message: String, imagePath: String): String {
225
+ // TODO: Implement image loading from path
226
+ throw RuntimeException("Multimodal (Image) not yet implemented in this wrapper")
227
+ }
228
+
229
+ override fun sendMessageWithAudio(message: String, audioPath: String): String {
230
+ // TODO: Implement audio loading from path
231
+ throw RuntimeException("Multimodal (Audio) not yet implemented in this wrapper")
232
+ }
233
+
234
+ // -------------------------------------------------------------------------
235
+ // Helpers
236
+ // -------------------------------------------------------------------------
237
+ override fun getHistory(): Array<Message> {
238
+ return history.toTypedArray()
239
+ }
240
+
241
+ override fun resetConversation() {
242
+ history.clear()
243
+ createNewConversation()
244
+ }
245
+
246
+ override fun isReady(): Boolean {
247
+ return isLoaded_
248
+ }
249
+
250
+ // Property backing field for isReady check
251
+ private val isLoaded_: Boolean
252
+ get() = engine != null
253
+
254
+ override fun getStats(): GenerationStats {
255
+ return lastStats
256
+ }
257
+
258
+ override fun close() {
259
+ Log.d(TAG, "Closing resources")
260
+ try {
261
+ conversation = null
262
+ engine = null // Engine destructor should handle cleanup
263
+ // In C++ we'd close explicitly, Kotlin GC helps but explicit close method is better if SDK has it
264
+ } catch (e: Exception) {
265
+ Log.e(TAG, "Error closing resources", e)
266
+ }
267
+ }
268
+
269
+ private fun ensureLoaded() {
270
+ if (engine == null) {
271
+ throw RuntimeException("LiteRTLM: No model loaded. Call loadModel() first.")
272
+ }
273
+ }
274
+
275
+ private fun createNewConversation() {
276
+ ensureLoaded()
277
+ // Dispose old conversation if needed
278
+ conversation = engine!!.createConversation()
279
+ }
280
+ }