react-native-litert-lm 0.1.1 โ 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -10,12 +10,12 @@ High-performance LLM inference for React Native powered by [LiteRT-LM](https://g
|
|
|
10
10
|
- ๐ฆ **Bundled Tokenizer** - No separate tokenization library needed
|
|
11
11
|
- ๐ **Streaming Support** - Token-by-token generation callbacks
|
|
12
12
|
- ๐ฑ **Cross-Platform** - Android API 26+
|
|
13
|
-
-
|
|
13
|
+
- ๐ผ๏ธ **Multimodal** - Image and audio input support (Android Beta, iOS coming soon)
|
|
14
14
|
- ๐งต **Async API** - Non-blocking inference to prevent UI freezes
|
|
15
15
|
|
|
16
16
|
## Status
|
|
17
17
|
|
|
18
|
-
> โ ๏ธ **Early Preview**: This library is under active development. Android is functional with enough RAM, iOS implementation pending LiteRT-LM iOS release. Please report any issues on the [GitHub
|
|
18
|
+
> โ ๏ธ **Early Preview**: This library is under active development. Android is functional with enough RAM, iOS implementation pending LiteRT-LM iOS release. Please report any issues on the [GitHub issues](https://github.com/hung-yueh/react-native-litert-lm/issues).
|
|
19
19
|
|
|
20
20
|
## Installation
|
|
21
21
|
|
|
@@ -114,15 +114,14 @@ llm.sendMessageAsync("Tell me a story", (token, done) => {
|
|
|
114
114
|
### Multimodal (Image/Audio)
|
|
115
115
|
|
|
116
116
|
```typescript
|
|
117
|
-
// Image input (for vision models)
|
|
118
|
-
//
|
|
117
|
+
// Image input (for vision models like Gemma 3n)
|
|
118
|
+
// โ ๏ธ Ensure model is loaded with { maxTokens: 1024+ }
|
|
119
119
|
const response = await llm.sendMessageWithImage(
|
|
120
120
|
"What's in this image?",
|
|
121
121
|
"/path/to/image.jpg",
|
|
122
122
|
);
|
|
123
123
|
|
|
124
124
|
// Audio input (for audio models)
|
|
125
|
-
// Note: Currently throws error on Android (Coming Soon)
|
|
126
125
|
const transcription = await llm.sendMessageWithAudio(
|
|
127
126
|
"Transcribe this audio",
|
|
128
127
|
"/path/to/audio.wav",
|
|
@@ -20,8 +20,11 @@ import com.margelo.nitro.dev.litert.litertlm.LLMConfig
|
|
|
20
20
|
import com.margelo.nitro.dev.litert.litertlm.Message
|
|
21
21
|
import com.margelo.nitro.dev.litert.litertlm.Role
|
|
22
22
|
import com.margelo.nitro.core.Promise
|
|
23
|
+
import com.google.ai.edge.litertlm.Content
|
|
24
|
+
|
|
23
25
|
|
|
24
26
|
// Alias to avoid confusion with our generated Message type
|
|
27
|
+
// Alias to avoid confusion
|
|
25
28
|
typealias LiteRTMessage = com.google.ai.edge.litertlm.Message
|
|
26
29
|
|
|
27
30
|
/**
|
|
@@ -232,15 +235,60 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
232
235
|
// -------------------------------------------------------------------------
|
|
233
236
|
override fun sendMessageWithImage(message: String, imagePath: String): Promise<String> {
|
|
234
237
|
return Promise.parallel {
|
|
235
|
-
|
|
236
|
-
|
|
238
|
+
ensureLoaded()
|
|
239
|
+
Log.i(TAG, "sendMessageWithImage: $message, path=$imagePath")
|
|
240
|
+
|
|
241
|
+
// Create multimodal message
|
|
242
|
+
// Use factory method Message.of passing a list of Content
|
|
243
|
+
val textContent = Content.Text(message)
|
|
244
|
+
|
|
245
|
+
val contentList = listOf(
|
|
246
|
+
textContent,
|
|
247
|
+
Content.ImageFile(imagePath)
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
val userMsg = LiteRTMessage.of(contentList)
|
|
251
|
+
|
|
252
|
+
// Add to history
|
|
253
|
+
history.add(Message(Role.USER, "$message [Image]"))
|
|
254
|
+
|
|
255
|
+
val responseMsg = conversation!!.sendMessage(userMsg)
|
|
256
|
+
|
|
257
|
+
val response = responseMsg.contents
|
|
258
|
+
.filterIsInstance<Content.Text>()
|
|
259
|
+
.joinToString("") { it.text }
|
|
260
|
+
|
|
261
|
+
history.add(Message(Role.MODEL, response))
|
|
262
|
+
|
|
263
|
+
response
|
|
237
264
|
}
|
|
238
265
|
}
|
|
239
266
|
|
|
240
267
|
override fun sendMessageWithAudio(message: String, audioPath: String): Promise<String> {
|
|
241
268
|
return Promise.parallel {
|
|
242
|
-
|
|
243
|
-
|
|
269
|
+
ensureLoaded()
|
|
270
|
+
Log.i(TAG, "sendMessageWithAudio: $message, path=$audioPath")
|
|
271
|
+
|
|
272
|
+
// Load audio
|
|
273
|
+
|
|
274
|
+
val contentList = listOf(
|
|
275
|
+
Content.Text(message),
|
|
276
|
+
Content.AudioFile(audioPath)
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
val userMsg = LiteRTMessage.of(contentList)
|
|
280
|
+
|
|
281
|
+
history.add(Message(Role.USER, "$message [Audio]"))
|
|
282
|
+
|
|
283
|
+
val responseMsg = conversation!!.sendMessage(userMsg)
|
|
284
|
+
|
|
285
|
+
val response = responseMsg.contents
|
|
286
|
+
.filterIsInstance<Content.Text>()
|
|
287
|
+
.joinToString("") { it.text }
|
|
288
|
+
|
|
289
|
+
history.add(Message(Role.MODEL, response))
|
|
290
|
+
|
|
291
|
+
response
|
|
244
292
|
}
|
|
245
293
|
}
|
|
246
294
|
|
|
@@ -290,4 +338,6 @@ class HybridLiteRTLM : HybridLiteRTLMSpec() {
|
|
|
290
338
|
// Dispose old conversation if needed
|
|
291
339
|
conversation = engine!!.createConversation()
|
|
292
340
|
}
|
|
341
|
+
|
|
342
|
+
|
|
293
343
|
}
|
package/cpp/HybridLiteRTLM.cpp
CHANGED
|
@@ -11,9 +11,13 @@
|
|
|
11
11
|
|
|
12
12
|
#include "HybridLiteRTLM.hpp"
|
|
13
13
|
|
|
14
|
+
#define STB_IMAGE_IMPLEMENTATION
|
|
15
|
+
#include "include/stb_image.h"
|
|
16
|
+
|
|
14
17
|
#include <chrono>
|
|
15
18
|
#include <stdexcept>
|
|
16
19
|
#include <sstream>
|
|
20
|
+
#include <fstream>
|
|
17
21
|
|
|
18
22
|
namespace margelo::nitro::litertlm {
|
|
19
23
|
|
|
@@ -229,32 +233,46 @@ std::string HybridLiteRTLM::sendMessageWithImage(
|
|
|
229
233
|
ensureLoaded();
|
|
230
234
|
|
|
231
235
|
#ifdef LITERT_LM_ENABLED
|
|
232
|
-
//
|
|
233
|
-
|
|
234
|
-
//
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
//
|
|
241
|
-
|
|
242
|
-
" - Note: Image processing not yet implemented, text-only response]";
|
|
243
|
-
|
|
236
|
+
// Load image using stb_image
|
|
237
|
+
int width, height, channels;
|
|
238
|
+
unsigned char* img = stbi_load(imagePath.c_str(), &width, &height, &channels, 3); // Force 3 channels (RGB)
|
|
239
|
+
if (img == nullptr) {
|
|
240
|
+
throw std::runtime_error("Failed to load image from path: " + imagePath);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// Create input tensor/buffer for the engine.
|
|
244
|
+
// Note: The exact API for passing image data depends on the LiteRT-LM version.
|
|
245
|
+
// Assuming a structure that accepts raw bytes and dimensions.
|
|
244
246
|
litert::lm::UserMessage lm_message;
|
|
245
247
|
lm_message.role = "user";
|
|
246
|
-
lm_message.content = augmentedMessage;
|
|
247
248
|
|
|
249
|
+
// Construct multimodal content
|
|
250
|
+
// Option A: If UserMessage supports a list of content parts
|
|
251
|
+
litert::lm::ContentPart textPart;
|
|
252
|
+
textPart.type = litert::lm::ContentType::TEXT;
|
|
253
|
+
textPart.text = message;
|
|
254
|
+
lm_message.parts.push_back(textPart);
|
|
255
|
+
|
|
256
|
+
litert::lm::ContentPart imagePart;
|
|
257
|
+
imagePart.type = litert::lm::ContentType::IMAGE;
|
|
258
|
+
imagePart.image.width = width;
|
|
259
|
+
imagePart.image.height = height;
|
|
260
|
+
imagePart.image.channels = channels;
|
|
261
|
+
imagePart.image.data = std::vector<uint8_t>(img, img + (width * height * channels));
|
|
262
|
+
lm_message.parts.push_back(imagePart);
|
|
263
|
+
|
|
264
|
+
stbi_image_free(img);
|
|
265
|
+
|
|
248
266
|
auto response = conversation_->SendMessage(lm_message);
|
|
249
267
|
if (!response.ok()) {
|
|
250
268
|
throw std::runtime_error("Multimodal inference failed: " +
|
|
251
269
|
std::string(response.status().message()));
|
|
252
270
|
}
|
|
253
271
|
|
|
254
|
-
// Add to history
|
|
272
|
+
// Add to history (metadata only)
|
|
255
273
|
Message userMessage;
|
|
256
274
|
userMessage.role = Role::USER;
|
|
257
|
-
userMessage.content = message + " [
|
|
275
|
+
userMessage.content = message + " [Image]";
|
|
258
276
|
history_.push_back(userMessage);
|
|
259
277
|
|
|
260
278
|
Message modelMessage;
|
|
@@ -266,6 +284,11 @@ std::string HybridLiteRTLM::sendMessageWithImage(
|
|
|
266
284
|
|
|
267
285
|
#else
|
|
268
286
|
// Stub: just process text with image path noted
|
|
287
|
+
// Verify file exists at least
|
|
288
|
+
std::ifstream f(imagePath.c_str());
|
|
289
|
+
if (!f.good()) {
|
|
290
|
+
// Don't crash, just log/stub
|
|
291
|
+
}
|
|
269
292
|
return sendMessage(message + " [Image: " + imagePath + "]");
|
|
270
293
|
#endif
|
|
271
294
|
}
|
|
@@ -281,31 +304,41 @@ std::string HybridLiteRTLM::sendMessageWithAudio(
|
|
|
281
304
|
ensureLoaded();
|
|
282
305
|
|
|
283
306
|
#ifdef LITERT_LM_ENABLED
|
|
284
|
-
//
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
// 3. Create litert::lm::AudioData or equivalent
|
|
290
|
-
// 4. Pass to conversation with multimodal content
|
|
307
|
+
// Load audio file
|
|
308
|
+
std::ifstream audioFile(audioPath, std::ios::binary);
|
|
309
|
+
if (!audioFile) {
|
|
310
|
+
throw std::runtime_error("Failed to open audio file: " + audioPath);
|
|
311
|
+
}
|
|
291
312
|
|
|
292
|
-
|
|
293
|
-
|
|
313
|
+
// Simple WAV header skip (simplistic, assuming standard header size for now or raw)
|
|
314
|
+
// Ideally use a WAV parsing library or miniaudio if available.
|
|
315
|
+
// For this implementation, we read the whole file.
|
|
316
|
+
std::vector<uint8_t> audioData((std::istreambuf_iterator<char>(audioFile)), std::istreambuf_iterator<char>());
|
|
294
317
|
|
|
295
318
|
litert::lm::UserMessage lm_message;
|
|
296
319
|
lm_message.role = "user";
|
|
297
|
-
lm_message.content = augmentedMessage;
|
|
298
320
|
|
|
321
|
+
litert::lm::ContentPart textPart;
|
|
322
|
+
textPart.type = litert::lm::ContentType::TEXT;
|
|
323
|
+
textPart.text = message;
|
|
324
|
+
lm_message.parts.push_back(textPart);
|
|
325
|
+
|
|
326
|
+
litert::lm::ContentPart audioPart;
|
|
327
|
+
audioPart.type = litert::lm::ContentType::AUDIO;
|
|
328
|
+
audioPart.audio.data = audioData;
|
|
329
|
+
// Metadata like sample rate might be needed:
|
|
330
|
+
// audioPart.audio.sample_rate = 16000;
|
|
331
|
+
lm_message.parts.push_back(audioPart);
|
|
332
|
+
|
|
299
333
|
auto response = conversation_->SendMessage(lm_message);
|
|
300
334
|
if (!response.ok()) {
|
|
301
335
|
throw std::runtime_error("Audio inference failed: " +
|
|
302
336
|
std::string(response.status().message()));
|
|
303
337
|
}
|
|
304
338
|
|
|
305
|
-
// Add to history
|
|
306
339
|
Message userMessage;
|
|
307
340
|
userMessage.role = Role::USER;
|
|
308
|
-
userMessage.content = message + " [
|
|
341
|
+
userMessage.content = message + " [Audio]";
|
|
309
342
|
history_.push_back(userMessage);
|
|
310
343
|
|
|
311
344
|
Message modelMessage;
|