react-native-litert-lm 0.1.0 โ 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -15
- package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +165 -102
- package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/LiteRTLMRegistry.kt +32 -0
- package/android/src/main/java/dev/litert/litertlm/LiteRTLMInitProvider.kt +14 -0
- package/cpp/HybridLiteRTLM.cpp +60 -27
- package/cpp/include/stb_image.h +7988 -0
- package/lib/specs/LiteRTLM.nitro.d.ts +4 -5
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +59 -12
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +4 -4
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +5 -4
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +5 -4
- package/package.json +1 -1
- package/src/specs/LiteRTLM.nitro.ts +4 -5
package/README.md
CHANGED
|
@@ -9,12 +9,13 @@ High-performance LLM inference for React Native powered by [LiteRT-LM](https://g
|
|
|
9
9
|
- โก **GPU Acceleration** - GPU delegate (Android), Metal (iOS when available)
|
|
10
10
|
- ๐ฆ **Bundled Tokenizer** - No separate tokenization library needed
|
|
11
11
|
- ๐ **Streaming Support** - Token-by-token generation callbacks
|
|
12
|
-
- ๐ฑ **Cross-Platform** - Android API 26+
|
|
13
|
-
-
|
|
12
|
+
- ๐ฑ **Cross-Platform** - Android API 26+
|
|
13
|
+
- ๐ผ๏ธ **Multimodal** - Image and audio input support (Android Beta, iOS coming soon)
|
|
14
|
+
- ๐งต **Async API** - Non-blocking inference to prevent UI freezes
|
|
14
15
|
|
|
15
16
|
## Status
|
|
16
17
|
|
|
17
|
-
> โ ๏ธ **Early Preview**: This library is under active development. Android is functional with enough RAM, iOS implementation pending LiteRT-LM iOS release. Please report any issues on the [GitHub
|
|
18
|
+
> โ ๏ธ **Early Preview**: This library is under active development. Android is functional with enough RAM, iOS implementation pending LiteRT-LM iOS release. Please report any issues on the [GitHub issues](https://github.com/hung-yueh/react-native-litert-lm/issues).
|
|
18
19
|
|
|
19
20
|
## Installation
|
|
20
21
|
|
|
@@ -86,15 +87,15 @@ import { createLLM } from "react-native-litert-lm";
|
|
|
86
87
|
|
|
87
88
|
const llm = createLLM();
|
|
88
89
|
|
|
89
|
-
// Load a Gemma 3n model
|
|
90
|
-
llm.loadModel("/path/to/gemma-3n-e2b.litertlm", {
|
|
90
|
+
// Load a Gemma 3n model (async)
|
|
91
|
+
await llm.loadModel("/path/to/gemma-3n-e2b.litertlm", {
|
|
91
92
|
backend: "gpu",
|
|
92
93
|
temperature: 0.7,
|
|
93
94
|
maxTokens: 512,
|
|
94
95
|
});
|
|
95
96
|
|
|
96
|
-
// Generate response
|
|
97
|
-
const response = llm.sendMessage("What is the capital of France?");
|
|
97
|
+
// Generate response (async)
|
|
98
|
+
const response = await llm.sendMessage("What is the capital of France?");
|
|
98
99
|
console.log(response);
|
|
99
100
|
|
|
100
101
|
// Clean up
|
|
@@ -113,14 +114,15 @@ llm.sendMessageAsync("Tell me a story", (token, done) => {
|
|
|
113
114
|
### Multimodal (Image/Audio)
|
|
114
115
|
|
|
115
116
|
```typescript
|
|
116
|
-
// Image input (for vision models)
|
|
117
|
-
|
|
117
|
+
// Image input (for vision models like Gemma 3n)
|
|
118
|
+
// โ ๏ธ Ensure model is loaded with { maxTokens: 1024+ }
|
|
119
|
+
const response = await llm.sendMessageWithImage(
|
|
118
120
|
"What's in this image?",
|
|
119
121
|
"/path/to/image.jpg",
|
|
120
122
|
);
|
|
121
123
|
|
|
122
124
|
// Audio input (for audio models)
|
|
123
|
-
const transcription = llm.sendMessageWithAudio(
|
|
125
|
+
const transcription = await llm.sendMessageWithAudio(
|
|
124
126
|
"Transcribe this audio",
|
|
125
127
|
"/path/to/audio.wav",
|
|
126
128
|
);
|
|
@@ -152,7 +154,7 @@ Download `.litertlm` models from [HuggingFace](https://huggingface.co/litert-com
|
|
|
152
154
|
|
|
153
155
|
Creates a new LLM inference engine instance.
|
|
154
156
|
|
|
155
|
-
### `loadModel(path, config?)
|
|
157
|
+
### `loadModel(path, config?): Promise<void>`
|
|
156
158
|
|
|
157
159
|
- `path: string` - Absolute path to `.litertlm` file
|
|
158
160
|
- `config.backend` - `'cpu'` | `'gpu'` | `'npu'` (default: `'gpu'`)
|
|
@@ -172,19 +174,19 @@ Creates a new LLM inference engine instance.
|
|
|
172
174
|
|
|
173
175
|
> โ ๏ธ **NPU Note**: NPU acceleration requires compatible hardware (Qualcomm Hexagon, MediaTek APU, etc.). If unavailable, LiteRT-LM automatically falls back to GPU.
|
|
174
176
|
|
|
175
|
-
### `sendMessage(message): string
|
|
177
|
+
### `sendMessage(message): Promise<string>`
|
|
176
178
|
|
|
177
|
-
Blocking generation. Returns complete response.
|
|
179
|
+
Blocking generation (executed on background thread). Returns complete response.
|
|
178
180
|
|
|
179
181
|
### `sendMessageAsync(message, callback)`
|
|
180
182
|
|
|
181
183
|
Streaming generation. Callback receives `(token, isDone)`.
|
|
182
184
|
|
|
183
|
-
### `sendMessageWithImage(message, imagePath): string
|
|
185
|
+
### `sendMessageWithImage(message, imagePath): Promise<string>`
|
|
184
186
|
|
|
185
187
|
Send a message with an image attachment (for vision models).
|
|
186
188
|
|
|
187
|
-
### `sendMessageWithAudio(message, audioPath): string
|
|
189
|
+
### `sendMessageWithAudio(message, audioPath): Promise<string>`
|
|
188
190
|
|
|
189
191
|
Send a message with an audio attachment (for audio models).
|
|
190
192
|
|
|
@@ -19,8 +19,12 @@ import com.margelo.nitro.dev.litert.litertlm.HybridLiteRTLMSpec
|
|
|
19
19
|
import com.margelo.nitro.dev.litert.litertlm.LLMConfig
|
|
20
20
|
import com.margelo.nitro.dev.litert.litertlm.Message
|
|
21
21
|
import com.margelo.nitro.dev.litert.litertlm.Role
|
|
22
|
+
import com.margelo.nitro.core.Promise
|
|
23
|
+
import com.google.ai.edge.litertlm.Content
|
|
24
|
+
|
|
22
25
|
|
|
23
26
|
// Alias to avoid confusion with our generated Message type
|
|
27
|
+
// Alias to avoid confusion
|
|
24
28
|
typealias LiteRTMessage = com.google.ai.edge.litertlm.Message
|
|
25
29
|
|
|
26
30
|
/**
|
|
@@ -35,6 +39,10 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
35
39
|
private const val TAG = "HybridLiteRTLM"
|
|
36
40
|
}
|
|
37
41
|
|
|
42
|
+
init {
|
|
43
|
+
LiteRTLMRegistry.register(this)
|
|
44
|
+
}
|
|
45
|
+
|
|
38
46
|
// LiteRT-LM Engine and Conversation
|
|
39
47
|
private var engine: Engine? = null
|
|
40
48
|
private var conversation: Conversation? = null
|
|
@@ -60,116 +68,124 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
60
68
|
private var maxTokens: Int = 1024
|
|
61
69
|
|
|
62
70
|
override val memorySize: Long
|
|
63
|
-
get() =
|
|
71
|
+
get() = 1024L * 1024L * 1024L // ~1GB (models are large)
|
|
64
72
|
|
|
65
73
|
// -------------------------------------------------------------------------
|
|
66
74
|
// loadModel - Initialize LiteRT-LM Engine and Conversation
|
|
67
75
|
// -------------------------------------------------------------------------
|
|
68
|
-
override fun loadModel(modelPath: String, config: LLMConfig?) {
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
76
|
+
override fun loadModel(modelPath: String, config: LLMConfig?): Promise<Unit> {
|
|
77
|
+
return Promise.parallel {
|
|
78
|
+
Log.i(TAG, "loadModel: $modelPath")
|
|
79
|
+
|
|
80
|
+
// Clean up existing resources
|
|
81
|
+
close()
|
|
82
|
+
|
|
83
|
+
// Apply configuration
|
|
84
|
+
config?.let { cfg ->
|
|
85
|
+
cfg.backend?.let { backend = it }
|
|
86
|
+
cfg.temperature?.let { temperature = it }
|
|
87
|
+
cfg.topK?.let { topK = it.toInt() }
|
|
88
|
+
cfg.topP?.let { topP = it }
|
|
89
|
+
cfg.maxTokens?.let { maxTokens = it.toInt() }
|
|
90
|
+
}
|
|
82
91
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
92
|
+
try {
|
|
93
|
+
// Map our Backend enum to LiteRT-LM Backend enum
|
|
94
|
+
val lmBackend = when (backend) {
|
|
95
|
+
Backend.GPU -> com.google.ai.edge.litertlm.Backend.GPU
|
|
96
|
+
Backend.NPU -> {
|
|
97
|
+
Log.i(TAG, "NPU backend requested - requires hardware support")
|
|
98
|
+
com.google.ai.edge.litertlm.Backend.NPU
|
|
99
|
+
}
|
|
100
|
+
else -> com.google.ai.edge.litertlm.Backend.CPU
|
|
90
101
|
}
|
|
91
|
-
else -> com.google.ai.edge.litertlm.Backend.CPU
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
// Vision backend: hardcoded to GPU (required by Gemma 3n)
|
|
95
|
-
val lmVisionBackend = com.google.ai.edge.litertlm.Backend.GPU
|
|
96
102
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
103
|
+
// Vision backend: hardcoded to GPU (required by Gemma 3n)
|
|
104
|
+
val lmVisionBackend = com.google.ai.edge.litertlm.Backend.GPU
|
|
105
|
+
|
|
106
|
+
// Audio backend: hardcoded to CPU (optimal for audio processing)
|
|
107
|
+
val lmAudioBackend = com.google.ai.edge.litertlm.Backend.CPU
|
|
108
|
+
|
|
109
|
+
Log.i(TAG, "Backend config: main=$lmBackend, vision=$lmVisionBackend (hardcoded), audio=$lmAudioBackend (hardcoded)")
|
|
110
|
+
|
|
111
|
+
// Get cache directory from application context
|
|
112
|
+
val cacheDirectory = LiteRTLMInitProvider.applicationContext?.cacheDir?.absolutePath
|
|
113
|
+
Log.i(TAG, "Using cache directory: $cacheDirectory")
|
|
114
|
+
|
|
115
|
+
// Create Engine configuration
|
|
116
|
+
val engineConfig = EngineConfig(
|
|
117
|
+
modelPath = modelPath,
|
|
118
|
+
backend = lmBackend,
|
|
119
|
+
visionBackend = lmVisionBackend,
|
|
120
|
+
audioBackend = lmAudioBackend,
|
|
121
|
+
maxNumTokens = maxTokens,
|
|
122
|
+
cacheDir = cacheDirectory
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
// Initialize Engine
|
|
126
|
+
engine = Engine(engineConfig).also { it.initialize() }
|
|
127
|
+
Log.i(TAG, "Engine created and initialized successfully")
|
|
128
|
+
|
|
129
|
+
// Create Conversation
|
|
130
|
+
createNewConversation()
|
|
131
|
+
Log.i(TAG, "Conversation created successfully")
|
|
132
|
+
|
|
133
|
+
} catch (e: Exception) {
|
|
134
|
+
Log.e(TAG, "Failed to load model: ${e.message}", e)
|
|
135
|
+
throw RuntimeException("Failed to load model: ${e.message}", e)
|
|
136
|
+
}
|
|
128
137
|
}
|
|
129
138
|
}
|
|
130
139
|
|
|
131
140
|
// -------------------------------------------------------------------------
|
|
132
|
-
// sendMessage -
|
|
141
|
+
// sendMessage - Helper for one-shot generation (internally uses Async)
|
|
133
142
|
// -------------------------------------------------------------------------
|
|
134
|
-
override fun sendMessage(message: String): String {
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
//
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
143
|
+
override fun sendMessage(message: String): Promise<String> {
|
|
144
|
+
// Implement Promise-based sendMessage using suspend coroutine logic wrapped in Promise
|
|
145
|
+
// Since Promise.parallel expects a blocking block returning T,
|
|
146
|
+
// and sendMessageAsync is callback-based, we need to bridge them.
|
|
147
|
+
// HOWEVER, we can just use the synchronous `sendMessage` API of the SDK
|
|
148
|
+
// inside the `Promise.parallel` block, which moves it off the main thread!
|
|
149
|
+
return Promise.parallel {
|
|
150
|
+
ensureLoaded()
|
|
151
|
+
|
|
152
|
+
// Add user message to history
|
|
153
|
+
history.add(Message(Role.USER, message))
|
|
154
|
+
Log.i(TAG, "sendMessage (Promise): $message")
|
|
155
|
+
|
|
156
|
+
// Blocking inference (safe here because we are in Promise.parallel worker thread)
|
|
157
|
+
val userMsg = LiteRTMessage.of(message)
|
|
158
|
+
val responseMsg = conversation!!.sendMessage(userMsg)
|
|
159
|
+
|
|
160
|
+
// Extract text
|
|
161
|
+
val response = responseMsg.contents
|
|
162
|
+
.filterIsInstance<com.google.ai.edge.litertlm.Content.Text>()
|
|
163
|
+
.joinToString("") { it.text }
|
|
164
|
+
|
|
165
|
+
// Add model response to history
|
|
166
|
+
history.add(Message(Role.MODEL, response))
|
|
167
|
+
|
|
168
|
+
// Update stats
|
|
169
|
+
lastStats = GenerationStats(
|
|
170
|
+
promptTokens = message.length / 4.0,
|
|
171
|
+
completionTokens = response.length / 4.0,
|
|
172
|
+
totalTokens = (message.length + response.length) / 4.0,
|
|
173
|
+
timeToFirstToken = 0.0,
|
|
174
|
+
totalTime = 0.0,
|
|
175
|
+
tokensPerSecond = 0.0
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
response // Return the string
|
|
179
|
+
}
|
|
167
180
|
}
|
|
168
181
|
|
|
169
182
|
// -------------------------------------------------------------------------
|
|
170
183
|
// sendMessageAsync - Streaming inference
|
|
171
184
|
// -------------------------------------------------------------------------
|
|
172
185
|
override fun sendMessageAsync(message: String, onToken: (String, Boolean) -> Unit) {
|
|
186
|
+
// This is already async (void return), so we execute immediately on the calling thread
|
|
187
|
+
// (which is the Nitro specialized thread, not Main).
|
|
188
|
+
// The SDK's sendMessageAsync is non-blocking anyway.
|
|
173
189
|
ensureLoaded()
|
|
174
190
|
|
|
175
191
|
// Add user message to history
|
|
@@ -206,12 +222,8 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
206
222
|
}
|
|
207
223
|
|
|
208
224
|
try {
|
|
209
|
-
// Construct Message object
|
|
210
225
|
val userMsg = LiteRTMessage.of(message)
|
|
211
|
-
|
|
212
|
-
// LiteRT-LM async call - SDK handles threading
|
|
213
226
|
conversation!!.sendMessageAsync(userMsg, listener)
|
|
214
|
-
|
|
215
227
|
} catch (e: Exception) {
|
|
216
228
|
Log.e(TAG, "Failed into initiate async generation", e)
|
|
217
229
|
onToken("Error: ${e.message}", true)
|
|
@@ -221,14 +233,63 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
221
233
|
// -------------------------------------------------------------------------
|
|
222
234
|
// Multimodal methods
|
|
223
235
|
// -------------------------------------------------------------------------
|
|
224
|
-
override fun sendMessageWithImage(message: String, imagePath: String): String {
|
|
225
|
-
|
|
226
|
-
|
|
236
|
+
override fun sendMessageWithImage(message: String, imagePath: String): Promise<String> {
|
|
237
|
+
return Promise.parallel {
|
|
238
|
+
ensureLoaded()
|
|
239
|
+
Log.i(TAG, "sendMessageWithImage: $message, path=$imagePath")
|
|
240
|
+
|
|
241
|
+
// Create multimodal message
|
|
242
|
+
// Use factory method Message.of passing a list of Content
|
|
243
|
+
val textContent = Content.Text(message)
|
|
244
|
+
|
|
245
|
+
val contentList = listOf(
|
|
246
|
+
textContent,
|
|
247
|
+
Content.ImageFile(imagePath)
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
val userMsg = LiteRTMessage.of(contentList)
|
|
251
|
+
|
|
252
|
+
// Add to history
|
|
253
|
+
history.add(Message(Role.USER, "$message [Image]"))
|
|
254
|
+
|
|
255
|
+
val responseMsg = conversation!!.sendMessage(userMsg)
|
|
256
|
+
|
|
257
|
+
val response = responseMsg.contents
|
|
258
|
+
.filterIsInstance<Content.Text>()
|
|
259
|
+
.joinToString("") { it.text }
|
|
260
|
+
|
|
261
|
+
history.add(Message(Role.MODEL, response))
|
|
262
|
+
|
|
263
|
+
response
|
|
264
|
+
}
|
|
227
265
|
}
|
|
228
266
|
|
|
229
|
-
override fun sendMessageWithAudio(message: String, audioPath: String): String {
|
|
230
|
-
|
|
231
|
-
|
|
267
|
+
override fun sendMessageWithAudio(message: String, audioPath: String): Promise<String> {
|
|
268
|
+
return Promise.parallel {
|
|
269
|
+
ensureLoaded()
|
|
270
|
+
Log.i(TAG, "sendMessageWithAudio: $message, path=$audioPath")
|
|
271
|
+
|
|
272
|
+
// Load audio
|
|
273
|
+
|
|
274
|
+
val contentList = listOf(
|
|
275
|
+
Content.Text(message),
|
|
276
|
+
Content.AudioFile(audioPath)
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
val userMsg = LiteRTMessage.of(contentList)
|
|
280
|
+
|
|
281
|
+
history.add(Message(Role.USER, "$message [Audio]"))
|
|
282
|
+
|
|
283
|
+
val responseMsg = conversation!!.sendMessage(userMsg)
|
|
284
|
+
|
|
285
|
+
val response = responseMsg.contents
|
|
286
|
+
.filterIsInstance<Content.Text>()
|
|
287
|
+
.joinToString("") { it.text }
|
|
288
|
+
|
|
289
|
+
history.add(Message(Role.MODEL, response))
|
|
290
|
+
|
|
291
|
+
response
|
|
292
|
+
}
|
|
232
293
|
}
|
|
233
294
|
|
|
234
295
|
// -------------------------------------------------------------------------
|
|
@@ -277,4 +338,6 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
277
338
|
// Dispose old conversation if needed
|
|
278
339
|
conversation = engine!!.createConversation()
|
|
279
340
|
}
|
|
341
|
+
|
|
342
|
+
|
|
280
343
|
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
package com.margelo.nitro.dev.litert.litertlm
|
|
2
|
+
|
|
3
|
+
import java.util.Collections
|
|
4
|
+
import java.util.WeakHashMap
|
|
5
|
+
import android.util.Log
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Global registry to track active LiteRTLM instances.
|
|
9
|
+
* Used for memory trimming and cleanup.
|
|
10
|
+
*/
|
|
11
|
+
object LiteRTLMRegistry {
|
|
12
|
+
private const val TAG = "LiteRTLMRegistry"
|
|
13
|
+
|
|
14
|
+
// Use WeakSet-like structure to prevent leaks
|
|
15
|
+
private val instances = Collections.newSetFromMap(WeakHashMap<HybridLiteRTLM, Boolean>())
|
|
16
|
+
|
|
17
|
+
fun register(instance: HybridLiteRTLM) {
|
|
18
|
+
synchronized(instances) {
|
|
19
|
+
instances.add(instance)
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
fun onTrimMemory(level: Int) {
|
|
24
|
+
Log.w(TAG, "Received memory warning (level=$level). Releasing resources...")
|
|
25
|
+
synchronized(instances) {
|
|
26
|
+
instances.forEach { it.close() }
|
|
27
|
+
// Note: We don't clear the set here, as close() should be idempotent
|
|
28
|
+
// and the instance might still be ref-counted by JS.
|
|
29
|
+
// We just ensure the HEAVY native resources are gone.
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
@@ -17,6 +17,20 @@ class LiteRTLMInitProvider : ContentProvider() {
|
|
|
17
17
|
override fun onCreate(): Boolean {
|
|
18
18
|
applicationContext = context?.applicationContext
|
|
19
19
|
Log.i(TAG, "LiteRTLMInitProvider initialized with context: $applicationContext")
|
|
20
|
+
|
|
21
|
+
applicationContext?.registerComponentCallbacks(object : android.content.ComponentCallbacks2 {
|
|
22
|
+
override fun onTrimMemory(level: Int) {
|
|
23
|
+
if (level >= android.content.ComponentCallbacks2.TRIM_MEMORY_RUNNING_LOW) {
|
|
24
|
+
com.margelo.nitro.dev.litert.litertlm.LiteRTLMRegistry.onTrimMemory(level)
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
override fun onConfigurationChanged(newConfig: android.content.res.Configuration) {}
|
|
29
|
+
override fun onLowMemory() {
|
|
30
|
+
com.margelo.nitro.dev.litert.litertlm.LiteRTLMRegistry.onTrimMemory(android.content.ComponentCallbacks2.TRIM_MEMORY_COMPLETE)
|
|
31
|
+
}
|
|
32
|
+
})
|
|
33
|
+
|
|
20
34
|
return true
|
|
21
35
|
}
|
|
22
36
|
|
package/cpp/HybridLiteRTLM.cpp
CHANGED
|
@@ -11,9 +11,13 @@
|
|
|
11
11
|
|
|
12
12
|
#include "HybridLiteRTLM.hpp"
|
|
13
13
|
|
|
14
|
+
#define STB_IMAGE_IMPLEMENTATION
|
|
15
|
+
#include "include/stb_image.h"
|
|
16
|
+
|
|
14
17
|
#include <chrono>
|
|
15
18
|
#include <stdexcept>
|
|
16
19
|
#include <sstream>
|
|
20
|
+
#include <fstream>
|
|
17
21
|
|
|
18
22
|
namespace margelo::nitro::litertlm {
|
|
19
23
|
|
|
@@ -229,32 +233,46 @@ std::string HybridLiteRTLM::sendMessageWithImage(
|
|
|
229
233
|
ensureLoaded();
|
|
230
234
|
|
|
231
235
|
#ifdef LITERT_LM_ENABLED
|
|
232
|
-
//
|
|
233
|
-
|
|
234
|
-
//
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
//
|
|
241
|
-
|
|
242
|
-
" - Note: Image processing not yet implemented, text-only response]";
|
|
243
|
-
|
|
236
|
+
// Load image using stb_image
|
|
237
|
+
int width, height, channels;
|
|
238
|
+
unsigned char* img = stbi_load(imagePath.c_str(), &width, &height, &channels, 3); // Force 3 channels (RGB)
|
|
239
|
+
if (img == nullptr) {
|
|
240
|
+
throw std::runtime_error("Failed to load image from path: " + imagePath);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// Create input tensor/buffer for the engine.
|
|
244
|
+
// Note: The exact API for passing image data depends on the LiteRT-LM version.
|
|
245
|
+
// Assuming a structure that accepts raw bytes and dimensions.
|
|
244
246
|
litert::lm::UserMessage lm_message;
|
|
245
247
|
lm_message.role = "user";
|
|
246
|
-
lm_message.content = augmentedMessage;
|
|
247
248
|
|
|
249
|
+
// Construct multimodal content
|
|
250
|
+
// Option A: If UserMessage supports a list of content parts
|
|
251
|
+
litert::lm::ContentPart textPart;
|
|
252
|
+
textPart.type = litert::lm::ContentType::TEXT;
|
|
253
|
+
textPart.text = message;
|
|
254
|
+
lm_message.parts.push_back(textPart);
|
|
255
|
+
|
|
256
|
+
litert::lm::ContentPart imagePart;
|
|
257
|
+
imagePart.type = litert::lm::ContentType::IMAGE;
|
|
258
|
+
imagePart.image.width = width;
|
|
259
|
+
imagePart.image.height = height;
|
|
260
|
+
imagePart.image.channels = channels;
|
|
261
|
+
imagePart.image.data = std::vector<uint8_t>(img, img + (width * height * channels));
|
|
262
|
+
lm_message.parts.push_back(imagePart);
|
|
263
|
+
|
|
264
|
+
stbi_image_free(img);
|
|
265
|
+
|
|
248
266
|
auto response = conversation_->SendMessage(lm_message);
|
|
249
267
|
if (!response.ok()) {
|
|
250
268
|
throw std::runtime_error("Multimodal inference failed: " +
|
|
251
269
|
std::string(response.status().message()));
|
|
252
270
|
}
|
|
253
271
|
|
|
254
|
-
// Add to history
|
|
272
|
+
// Add to history (metadata only)
|
|
255
273
|
Message userMessage;
|
|
256
274
|
userMessage.role = Role::USER;
|
|
257
|
-
userMessage.content = message + " [
|
|
275
|
+
userMessage.content = message + " [Image]";
|
|
258
276
|
history_.push_back(userMessage);
|
|
259
277
|
|
|
260
278
|
Message modelMessage;
|
|
@@ -266,6 +284,11 @@ std::string HybridLiteRTLM::sendMessageWithImage(
|
|
|
266
284
|
|
|
267
285
|
#else
|
|
268
286
|
// Stub: just process text with image path noted
|
|
287
|
+
// Verify file exists at least
|
|
288
|
+
std::ifstream f(imagePath.c_str());
|
|
289
|
+
if (!f.good()) {
|
|
290
|
+
// Don't crash, just log/stub
|
|
291
|
+
}
|
|
269
292
|
return sendMessage(message + " [Image: " + imagePath + "]");
|
|
270
293
|
#endif
|
|
271
294
|
}
|
|
@@ -281,31 +304,41 @@ std::string HybridLiteRTLM::sendMessageWithAudio(
|
|
|
281
304
|
ensureLoaded();
|
|
282
305
|
|
|
283
306
|
#ifdef LITERT_LM_ENABLED
|
|
284
|
-
//
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
// 3. Create litert::lm::AudioData or equivalent
|
|
290
|
-
// 4. Pass to conversation with multimodal content
|
|
307
|
+
// Load audio file
|
|
308
|
+
std::ifstream audioFile(audioPath, std::ios::binary);
|
|
309
|
+
if (!audioFile) {
|
|
310
|
+
throw std::runtime_error("Failed to open audio file: " + audioPath);
|
|
311
|
+
}
|
|
291
312
|
|
|
292
|
-
|
|
293
|
-
|
|
313
|
+
// Simple WAV header skip (simplistic, assuming standard header size for now or raw)
|
|
314
|
+
// Ideally use a WAV parsing library or miniaudio if available.
|
|
315
|
+
// For this implementation, we read the whole file.
|
|
316
|
+
std::vector<uint8_t> audioData((std::istreambuf_iterator<char>(audioFile)), std::istreambuf_iterator<char>());
|
|
294
317
|
|
|
295
318
|
litert::lm::UserMessage lm_message;
|
|
296
319
|
lm_message.role = "user";
|
|
297
|
-
lm_message.content = augmentedMessage;
|
|
298
320
|
|
|
321
|
+
litert::lm::ContentPart textPart;
|
|
322
|
+
textPart.type = litert::lm::ContentType::TEXT;
|
|
323
|
+
textPart.text = message;
|
|
324
|
+
lm_message.parts.push_back(textPart);
|
|
325
|
+
|
|
326
|
+
litert::lm::ContentPart audioPart;
|
|
327
|
+
audioPart.type = litert::lm::ContentType::AUDIO;
|
|
328
|
+
audioPart.audio.data = audioData;
|
|
329
|
+
// Metadata like sample rate might be needed:
|
|
330
|
+
// audioPart.audio.sample_rate = 16000;
|
|
331
|
+
lm_message.parts.push_back(audioPart);
|
|
332
|
+
|
|
299
333
|
auto response = conversation_->SendMessage(lm_message);
|
|
300
334
|
if (!response.ok()) {
|
|
301
335
|
throw std::runtime_error("Audio inference failed: " +
|
|
302
336
|
std::string(response.status().message()));
|
|
303
337
|
}
|
|
304
338
|
|
|
305
|
-
// Add to history
|
|
306
339
|
Message userMessage;
|
|
307
340
|
userMessage.role = Role::USER;
|
|
308
|
-
userMessage.content = message + " [
|
|
341
|
+
userMessage.content = message + " [Audio]";
|
|
309
342
|
history_.push_back(userMessage);
|
|
310
343
|
|
|
311
344
|
Message modelMessage;
|