react-native-litert-lm 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +259 -0
- package/android/CMakeLists.txt +32 -0
- package/android/build.gradle +88 -0
- package/android/src/main/AndroidManifest.xml +11 -0
- package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +280 -0
- package/android/src/main/java/dev/litert/litertlm/LiteRTLMInitProvider.kt +43 -0
- package/android/src/main/java/dev/litert/litertlm/LiteRTLMPackage.kt +26 -0
- package/cpp/HybridLiteRTLM.cpp +483 -0
- package/cpp/HybridLiteRTLM.hpp +120 -0
- package/cpp/cpp-adapter.cpp +13 -0
- package/cpp/include/README.md +34 -0
- package/lib/index.d.ts +82 -0
- package/lib/index.js +106 -0
- package/lib/specs/LiteRTLM.nitro.d.ts +165 -0
- package/lib/specs/LiteRTLM.nitro.js +2 -0
- package/nitrogen/generated/.gitattributes +1 -0
- package/nitrogen/generated/android/LiteRTLM+autolinking.cmake +81 -0
- package/nitrogen/generated/android/LiteRTLM+autolinking.gradle +27 -0
- package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +46 -0
- package/nitrogen/generated/android/LiteRTLMOnLoad.hpp +25 -0
- package/nitrogen/generated/android/c++/JBackend.hpp +61 -0
- package/nitrogen/generated/android/c++/JFunc_void_std__string_bool.hpp +76 -0
- package/nitrogen/generated/android/c++/JGenerationStats.hpp +77 -0
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +133 -0
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +75 -0
- package/nitrogen/generated/android/c++/JLLMConfig.hpp +75 -0
- package/nitrogen/generated/android/c++/JMessage.hpp +63 -0
- package/nitrogen/generated/android/c++/JRole.hpp +61 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Backend.kt +24 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Func_void_std__string_bool.kt +80 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/GenerationStats.kt +53 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +98 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/LLMConfig.kt +50 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/LiteRTLMOnLoad.kt +35 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Message.kt +41 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Role.kt +24 -0
- package/nitrogen/generated/ios/LiteRTLM+autolinking.rb +60 -0
- package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Bridge.cpp +17 -0
- package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Bridge.hpp +27 -0
- package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Umbrella.hpp +38 -0
- package/nitrogen/generated/shared/c++/Backend.hpp +80 -0
- package/nitrogen/generated/shared/c++/GenerationStats.hpp +103 -0
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +30 -0
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +82 -0
- package/nitrogen/generated/shared/c++/LLMConfig.hpp +101 -0
- package/nitrogen/generated/shared/c++/Message.hpp +89 -0
- package/nitrogen/generated/shared/c++/Role.hpp +80 -0
- package/package.json +87 -0
- package/react-native-litert-lm.podspec +51 -0
- package/react-native.config.js +16 -0
- package/src/index.ts +125 -0
- package/src/specs/LiteRTLM.nitro.ts +187 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
package dev.litert.litertlm
|
|
2
|
+
|
|
3
|
+
import android.content.ContentProvider
|
|
4
|
+
import android.content.ContentValues
|
|
5
|
+
import android.content.Context
|
|
6
|
+
import android.database.Cursor
|
|
7
|
+
import android.net.Uri
|
|
8
|
+
import android.util.Log
|
|
9
|
+
|
|
10
|
+
class LiteRTLMInitProvider : ContentProvider() {
|
|
11
|
+
companion object {
|
|
12
|
+
private const val TAG = "LiteRTLMInitProvider"
|
|
13
|
+
var applicationContext: Context? = null
|
|
14
|
+
private set
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
override fun onCreate(): Boolean {
|
|
18
|
+
applicationContext = context?.applicationContext
|
|
19
|
+
Log.i(TAG, "LiteRTLMInitProvider initialized with context: $applicationContext")
|
|
20
|
+
return true
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
override fun query(
|
|
24
|
+
uri: Uri,
|
|
25
|
+
projection: Array<out String>?,
|
|
26
|
+
selection: String?,
|
|
27
|
+
selectionArgs: Array<out String>?,
|
|
28
|
+
sortOrder: String?
|
|
29
|
+
): Cursor? = null
|
|
30
|
+
|
|
31
|
+
override fun getType(uri: Uri): String? = null
|
|
32
|
+
|
|
33
|
+
override fun insert(uri: Uri, values: ContentValues?): Uri? = null
|
|
34
|
+
|
|
35
|
+
override fun delete(uri: Uri, selection: String?, selectionArgs: Array<out String>?): Int = 0
|
|
36
|
+
|
|
37
|
+
override fun update(
|
|
38
|
+
uri: Uri,
|
|
39
|
+
values: ContentValues?,
|
|
40
|
+
selection: String?,
|
|
41
|
+
selectionArgs: Array<out String>?
|
|
42
|
+
): Int = 0
|
|
43
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
package dev.litert.litertlm
|
|
2
|
+
|
|
3
|
+
import com.facebook.react.TurboReactPackage
|
|
4
|
+
import com.facebook.react.bridge.NativeModule
|
|
5
|
+
import com.facebook.react.bridge.ReactApplicationContext
|
|
6
|
+
import com.facebook.react.module.model.ReactModuleInfo
|
|
7
|
+
import com.facebook.react.module.model.ReactModuleInfoProvider
|
|
8
|
+
import com.margelo.nitro.core.HybridObject
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
import com.margelo.nitro.dev.litert.litertlm.LiteRTLMOnLoad
|
|
12
|
+
|
|
13
|
+
class LiteRTLMPackage : TurboReactPackage() {
|
|
14
|
+
init {
|
|
15
|
+
LiteRTLMOnLoad.initializeNative()
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
override fun getModule(name: String, reactContext: ReactApplicationContext): NativeModule? {
|
|
20
|
+
return null
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
override fun getReactModuleInfoProvider(): ReactModuleInfoProvider {
|
|
24
|
+
return ReactModuleInfoProvider { emptyMap<String, ReactModuleInfo>() }
|
|
25
|
+
}
|
|
26
|
+
}
|
|
@@ -0,0 +1,483 @@
|
|
|
1
|
+
//
|
|
2
|
+
// HybridLiteRTLM.cpp
|
|
3
|
+
// react-native-litert-lm
|
|
4
|
+
//
|
|
5
|
+
// High-performance LLM inference using LiteRT-LM.
|
|
6
|
+
//
|
|
7
|
+
// NOTE: This C++ implementation is used for iOS ONLY.
|
|
8
|
+
// Android uses the Kotlin implementation in `android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt`.
|
|
9
|
+
// Do not assume changes here will affect Android.
|
|
10
|
+
//
|
|
11
|
+
|
|
12
|
+
#include "HybridLiteRTLM.hpp"
|
|
13
|
+
|
|
14
|
+
#include <chrono>
|
|
15
|
+
#include <stdexcept>
|
|
16
|
+
#include <sstream>
|
|
17
|
+
|
|
18
|
+
namespace margelo::nitro::litertlm {
|
|
19
|
+
|
|
20
|
+
//------------------------------------------------------------------------------
|
|
21
|
+
// Helper: Format user prompt (applies chat template if needed)
|
|
22
|
+
//------------------------------------------------------------------------------
|
|
23
|
+
std::string HybridLiteRTLM::formatUserPrompt(const std::string& message) const {
|
|
24
|
+
// The LiteRT-LM Conversation class handles chat templates internally,
|
|
25
|
+
// so we just return the message as-is. If we were using Session directly,
|
|
26
|
+
// we'd apply the Gemma/Phi template here.
|
|
27
|
+
return message;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
//------------------------------------------------------------------------------
|
|
31
|
+
// Helper: Create a new Conversation from existing Engine
|
|
32
|
+
//------------------------------------------------------------------------------
|
|
33
|
+
void HybridLiteRTLM::createNewConversation() {
|
|
34
|
+
#ifdef LITERT_LM_ENABLED
|
|
35
|
+
if (!engine_) {
|
|
36
|
+
throw std::runtime_error("Cannot create conversation: engine not initialized");
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
auto conversation_config = litert::lm::ConversationConfig::CreateDefault(*engine_);
|
|
40
|
+
if (!conversation_config.ok()) {
|
|
41
|
+
throw std::runtime_error("Failed to create conversation config: " +
|
|
42
|
+
std::string(conversation_config.status().message()));
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
auto conversation = litert::lm::Conversation::Create(*engine_, *conversation_config);
|
|
46
|
+
if (!conversation.ok()) {
|
|
47
|
+
throw std::runtime_error("Failed to create conversation: " +
|
|
48
|
+
std::string(conversation.status().message()));
|
|
49
|
+
}
|
|
50
|
+
conversation_ = std::move(*conversation);
|
|
51
|
+
#endif
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
//------------------------------------------------------------------------------
|
|
55
|
+
// loadModel - Initialize Engine and Conversation
|
|
56
|
+
//------------------------------------------------------------------------------
|
|
57
|
+
void HybridLiteRTLM::loadModel(
|
|
58
|
+
const std::string& modelPath,
|
|
59
|
+
const std::optional<LLMConfig>& config) {
|
|
60
|
+
|
|
61
|
+
std::lock_guard<std::mutex> lock(mutex_);
|
|
62
|
+
|
|
63
|
+
// Clean up existing resources
|
|
64
|
+
if (isLoaded_) {
|
|
65
|
+
isLoaded_ = false;
|
|
66
|
+
history_.clear();
|
|
67
|
+
#ifdef LITERT_LM_ENABLED
|
|
68
|
+
conversation_.reset();
|
|
69
|
+
engine_.reset();
|
|
70
|
+
#endif
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Apply configuration
|
|
74
|
+
if (config.has_value()) {
|
|
75
|
+
if (config->backend.has_value()) {
|
|
76
|
+
backend_ = config->backend.value();
|
|
77
|
+
}
|
|
78
|
+
if (config->visionBackend.has_value()) {
|
|
79
|
+
visionBackend_ = config->visionBackend.value();
|
|
80
|
+
}
|
|
81
|
+
if (config->audioBackend.has_value()) {
|
|
82
|
+
audioBackend_ = config->audioBackend.value();
|
|
83
|
+
}
|
|
84
|
+
if (config->temperature.has_value()) {
|
|
85
|
+
temperature_ = config->temperature.value();
|
|
86
|
+
}
|
|
87
|
+
if (config->topK.has_value()) {
|
|
88
|
+
topK_ = config->topK.value();
|
|
89
|
+
}
|
|
90
|
+
if (config->topP.has_value()) {
|
|
91
|
+
topP_ = config->topP.value();
|
|
92
|
+
}
|
|
93
|
+
if (config->maxTokens.has_value()) {
|
|
94
|
+
maxTokens_ = config->maxTokens.value();
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
#ifdef LITERT_LM_ENABLED
|
|
99
|
+
// 1. Create ModelAssets from model path
|
|
100
|
+
auto model_assets = litert::lm::ModelAssets::Create(modelPath);
|
|
101
|
+
if (!model_assets.ok()) {
|
|
102
|
+
throw std::runtime_error("Failed to load model assets: " +
|
|
103
|
+
std::string(model_assets.status().message()));
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// 2. Map our Backend enum to LiteRT-LM Backend enum
|
|
107
|
+
auto engine_backend = (backend_ == Backend::GPU)
|
|
108
|
+
? litert::lm::Backend::GPU
|
|
109
|
+
: litert::lm::Backend::CPU;
|
|
110
|
+
auto vision_backend = (visionBackend_ == Backend::GPU)
|
|
111
|
+
? litert::lm::Backend::GPU
|
|
112
|
+
: litert::lm::Backend::CPU;
|
|
113
|
+
auto audio_backend = (audioBackend_ == Backend::GPU)
|
|
114
|
+
? litert::lm::Backend::GPU
|
|
115
|
+
: litert::lm::Backend::CPU;
|
|
116
|
+
|
|
117
|
+
// 3. Create EngineSettings with all backends
|
|
118
|
+
auto engine_settings = litert::lm::EngineSettings::CreateDefault(
|
|
119
|
+
*model_assets,
|
|
120
|
+
engine_backend,
|
|
121
|
+
vision_backend,
|
|
122
|
+
audio_backend
|
|
123
|
+
);
|
|
124
|
+
|
|
125
|
+
// 4. Create the Engine (heavyweight - loads model weights)
|
|
126
|
+
auto engine = litert::lm::Engine::CreateEngine(engine_settings);
|
|
127
|
+
if (!engine.ok()) {
|
|
128
|
+
throw std::runtime_error("Failed to create engine: " +
|
|
129
|
+
std::string(engine.status().message()));
|
|
130
|
+
}
|
|
131
|
+
engine_ = std::move(*engine);
|
|
132
|
+
|
|
133
|
+
// 5. Create the Conversation (lightweight - holds KV cache)
|
|
134
|
+
createNewConversation();
|
|
135
|
+
|
|
136
|
+
#endif // LITERT_LM_ENABLED
|
|
137
|
+
|
|
138
|
+
isLoaded_ = true;
|
|
139
|
+
history_.clear();
|
|
140
|
+
|
|
141
|
+
// Reset stats
|
|
142
|
+
lastStats_ = GenerationStats{0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
//------------------------------------------------------------------------------
|
|
146
|
+
// sendMessage - Blocking text inference
|
|
147
|
+
//------------------------------------------------------------------------------
|
|
148
|
+
std::string HybridLiteRTLM::sendMessage(const std::string& message) {
|
|
149
|
+
std::lock_guard<std::mutex> lock(mutex_);
|
|
150
|
+
ensureLoaded();
|
|
151
|
+
|
|
152
|
+
auto startTime = std::chrono::high_resolution_clock::now();
|
|
153
|
+
|
|
154
|
+
// Add user message to history
|
|
155
|
+
Message userMessage;
|
|
156
|
+
userMessage.role = Role::USER;
|
|
157
|
+
userMessage.content = message;
|
|
158
|
+
history_.push_back(userMessage);
|
|
159
|
+
|
|
160
|
+
std::string responseText;
|
|
161
|
+
|
|
162
|
+
#ifdef LITERT_LM_ENABLED
|
|
163
|
+
// Build the message struct for LiteRT-LM
|
|
164
|
+
// The Conversation API expects a structured input
|
|
165
|
+
litert::lm::UserMessage lm_message;
|
|
166
|
+
lm_message.role = "user";
|
|
167
|
+
lm_message.content = message;
|
|
168
|
+
|
|
169
|
+
auto response = conversation_->SendMessage(lm_message);
|
|
170
|
+
if (!response.ok()) {
|
|
171
|
+
// Remove the user message we just added since inference failed
|
|
172
|
+
history_.pop_back();
|
|
173
|
+
throw std::runtime_error("Inference failed: " +
|
|
174
|
+
std::string(response.status().message()));
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
responseText = response->content;
|
|
178
|
+
|
|
179
|
+
// Update stats from response if available
|
|
180
|
+
if (response->stats.has_value()) {
|
|
181
|
+
const auto& stats = response->stats.value();
|
|
182
|
+
lastStats_.promptTokens = static_cast<double>(stats.prompt_tokens);
|
|
183
|
+
lastStats_.completionTokens = static_cast<double>(stats.completion_tokens);
|
|
184
|
+
lastStats_.totalTokens = lastStats_.promptTokens + lastStats_.completionTokens;
|
|
185
|
+
lastStats_.timeToFirstToken = stats.time_to_first_token_ms;
|
|
186
|
+
lastStats_.totalTime = stats.total_time_ms;
|
|
187
|
+
lastStats_.tokensPerSecond = (lastStats_.totalTime > 0)
|
|
188
|
+
? lastStats_.completionTokens / (lastStats_.totalTime / 1000.0)
|
|
189
|
+
: 0.0;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
#else
|
|
193
|
+
// Stub response when LiteRT-LM is not available
|
|
194
|
+
responseText = "[LiteRT-LM Stub] Model response placeholder. "
|
|
195
|
+
"Real inference will be available when LiteRT-LM libraries are integrated. "
|
|
196
|
+
"You said: " + message;
|
|
197
|
+
|
|
198
|
+
auto endTime = std::chrono::high_resolution_clock::now();
|
|
199
|
+
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(endTime - startTime).count();
|
|
200
|
+
|
|
201
|
+
// Estimate stats for stub
|
|
202
|
+
lastStats_.promptTokens = static_cast<double>(message.length() / 4);
|
|
203
|
+
lastStats_.completionTokens = static_cast<double>(responseText.length() / 4);
|
|
204
|
+
lastStats_.totalTokens = lastStats_.promptTokens + lastStats_.completionTokens;
|
|
205
|
+
lastStats_.totalTime = static_cast<double>(duration);
|
|
206
|
+
lastStats_.timeToFirstToken = lastStats_.totalTime / 2;
|
|
207
|
+
lastStats_.tokensPerSecond = (lastStats_.totalTime > 0)
|
|
208
|
+
? lastStats_.completionTokens / (lastStats_.totalTime / 1000.0)
|
|
209
|
+
: 0;
|
|
210
|
+
#endif
|
|
211
|
+
|
|
212
|
+
// Add model response to history
|
|
213
|
+
Message modelMessage;
|
|
214
|
+
modelMessage.role = Role::MODEL;
|
|
215
|
+
modelMessage.content = responseText;
|
|
216
|
+
history_.push_back(modelMessage);
|
|
217
|
+
|
|
218
|
+
return responseText;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
//------------------------------------------------------------------------------
|
|
222
|
+
// sendMessageWithImage - Multimodal image + text
|
|
223
|
+
//------------------------------------------------------------------------------
|
|
224
|
+
std::string HybridLiteRTLM::sendMessageWithImage(
|
|
225
|
+
const std::string& message,
|
|
226
|
+
const std::string& imagePath) {
|
|
227
|
+
|
|
228
|
+
std::lock_guard<std::mutex> lock(mutex_);
|
|
229
|
+
ensureLoaded();
|
|
230
|
+
|
|
231
|
+
#ifdef LITERT_LM_ENABLED
|
|
232
|
+
// TODO: Load image file into raw pixel buffer
|
|
233
|
+
// The Engine expects raw RGBA/RGB data, not a file path.
|
|
234
|
+
// Implementation should:
|
|
235
|
+
// 1. Read image file (using stb_image.h or Android Bitmap JNI)
|
|
236
|
+
// 2. Decode to raw pixel buffer (std::vector<uint8_t>)
|
|
237
|
+
// 3. Create litert::lm::ImageData or equivalent tensor
|
|
238
|
+
// 4. Pass to conversation_->SendMessage with multimodal content
|
|
239
|
+
|
|
240
|
+
// For now, fall back to text-only with a note about the image
|
|
241
|
+
std::string augmentedMessage = message + " [Image attached: " + imagePath +
|
|
242
|
+
" - Note: Image processing not yet implemented, text-only response]";
|
|
243
|
+
|
|
244
|
+
litert::lm::UserMessage lm_message;
|
|
245
|
+
lm_message.role = "user";
|
|
246
|
+
lm_message.content = augmentedMessage;
|
|
247
|
+
|
|
248
|
+
auto response = conversation_->SendMessage(lm_message);
|
|
249
|
+
if (!response.ok()) {
|
|
250
|
+
throw std::runtime_error("Multimodal inference failed: " +
|
|
251
|
+
std::string(response.status().message()));
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// Add to history
|
|
255
|
+
Message userMessage;
|
|
256
|
+
userMessage.role = Role::USER;
|
|
257
|
+
userMessage.content = message + " [with image]";
|
|
258
|
+
history_.push_back(userMessage);
|
|
259
|
+
|
|
260
|
+
Message modelMessage;
|
|
261
|
+
modelMessage.role = Role::MODEL;
|
|
262
|
+
modelMessage.content = response->content;
|
|
263
|
+
history_.push_back(modelMessage);
|
|
264
|
+
|
|
265
|
+
return response->content;
|
|
266
|
+
|
|
267
|
+
#else
|
|
268
|
+
// Stub: just process text with image path noted
|
|
269
|
+
return sendMessage(message + " [Image: " + imagePath + "]");
|
|
270
|
+
#endif
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
//------------------------------------------------------------------------------
|
|
274
|
+
// sendMessageWithAudio - Multimodal audio + text
|
|
275
|
+
//------------------------------------------------------------------------------
|
|
276
|
+
std::string HybridLiteRTLM::sendMessageWithAudio(
|
|
277
|
+
const std::string& message,
|
|
278
|
+
const std::string& audioPath) {
|
|
279
|
+
|
|
280
|
+
std::lock_guard<std::mutex> lock(mutex_);
|
|
281
|
+
ensureLoaded();
|
|
282
|
+
|
|
283
|
+
#ifdef LITERT_LM_ENABLED
|
|
284
|
+
// TODO: Load audio file into raw sample buffer
|
|
285
|
+
// Similar to image - Engine expects raw audio samples, not file path.
|
|
286
|
+
// Implementation should:
|
|
287
|
+
// 1. Read WAV file header and samples
|
|
288
|
+
// 2. Convert to expected format (likely 16kHz mono float32)
|
|
289
|
+
// 3. Create litert::lm::AudioData or equivalent
|
|
290
|
+
// 4. Pass to conversation with multimodal content
|
|
291
|
+
|
|
292
|
+
std::string augmentedMessage = message + " [Audio attached: " + audioPath +
|
|
293
|
+
" - Note: Audio processing not yet implemented, text-only response]";
|
|
294
|
+
|
|
295
|
+
litert::lm::UserMessage lm_message;
|
|
296
|
+
lm_message.role = "user";
|
|
297
|
+
lm_message.content = augmentedMessage;
|
|
298
|
+
|
|
299
|
+
auto response = conversation_->SendMessage(lm_message);
|
|
300
|
+
if (!response.ok()) {
|
|
301
|
+
throw std::runtime_error("Audio inference failed: " +
|
|
302
|
+
std::string(response.status().message()));
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// Add to history
|
|
306
|
+
Message userMessage;
|
|
307
|
+
userMessage.role = Role::USER;
|
|
308
|
+
userMessage.content = message + " [with audio]";
|
|
309
|
+
history_.push_back(userMessage);
|
|
310
|
+
|
|
311
|
+
Message modelMessage;
|
|
312
|
+
modelMessage.role = Role::MODEL;
|
|
313
|
+
modelMessage.content = response->content;
|
|
314
|
+
history_.push_back(modelMessage);
|
|
315
|
+
|
|
316
|
+
return response->content;
|
|
317
|
+
|
|
318
|
+
#else
|
|
319
|
+
// Stub: just process text with audio path noted
|
|
320
|
+
return sendMessage(message + " [Audio: " + audioPath + "]");
|
|
321
|
+
#endif
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
//------------------------------------------------------------------------------
|
|
325
|
+
// sendMessageAsync - Streaming token generation
|
|
326
|
+
//------------------------------------------------------------------------------
|
|
327
|
+
void HybridLiteRTLM::sendMessageAsync(
|
|
328
|
+
const std::string& message,
|
|
329
|
+
const std::function<void(std::string, bool)>& onToken) {
|
|
330
|
+
|
|
331
|
+
// Note: We don't hold the lock during the entire async operation
|
|
332
|
+
// to avoid blocking other operations. The callback may be invoked
|
|
333
|
+
// from a different thread depending on LiteRT-LM's implementation.
|
|
334
|
+
|
|
335
|
+
{
|
|
336
|
+
std::lock_guard<std::mutex> lock(mutex_);
|
|
337
|
+
ensureLoaded();
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
#ifdef LITERT_LM_ENABLED
|
|
341
|
+
// Add user message to history before starting
|
|
342
|
+
{
|
|
343
|
+
std::lock_guard<std::mutex> lock(mutex_);
|
|
344
|
+
Message userMessage;
|
|
345
|
+
userMessage.role = Role::USER;
|
|
346
|
+
userMessage.content = message;
|
|
347
|
+
history_.push_back(userMessage);
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
litert::lm::UserMessage lm_message;
|
|
351
|
+
lm_message.role = "user";
|
|
352
|
+
lm_message.content = message;
|
|
353
|
+
|
|
354
|
+
std::string fullResponse;
|
|
355
|
+
|
|
356
|
+
// The callback needs to be carefully managed for thread safety
|
|
357
|
+
auto status = conversation_->SendMessageAsync(
|
|
358
|
+
lm_message,
|
|
359
|
+
[this, &onToken, &fullResponse](const std::string& token, bool isDone) {
|
|
360
|
+
fullResponse += token;
|
|
361
|
+
|
|
362
|
+
// Invoke the JS callback (Nitro handles thread marshalling)
|
|
363
|
+
onToken(token, isDone);
|
|
364
|
+
|
|
365
|
+
if (isDone) {
|
|
366
|
+
// Add complete response to history
|
|
367
|
+
std::lock_guard<std::mutex> lock(mutex_);
|
|
368
|
+
Message modelMessage;
|
|
369
|
+
modelMessage.role = Role::MODEL;
|
|
370
|
+
modelMessage.content = fullResponse;
|
|
371
|
+
history_.push_back(modelMessage);
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
);
|
|
375
|
+
|
|
376
|
+
if (!status.ok()) {
|
|
377
|
+
// Remove user message since inference failed
|
|
378
|
+
std::lock_guard<std::mutex> lock(mutex_);
|
|
379
|
+
if (!history_.empty()) {
|
|
380
|
+
history_.pop_back();
|
|
381
|
+
}
|
|
382
|
+
throw std::runtime_error("Async inference failed: " +
|
|
383
|
+
std::string(status.message()));
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
#else
|
|
387
|
+
// Stub: Simulate streaming by calling sendMessage and splitting response
|
|
388
|
+
std::string fullResponse;
|
|
389
|
+
{
|
|
390
|
+
std::lock_guard<std::mutex> lock(mutex_);
|
|
391
|
+
|
|
392
|
+
// Add user message
|
|
393
|
+
Message userMessage;
|
|
394
|
+
userMessage.role = Role::USER;
|
|
395
|
+
userMessage.content = message;
|
|
396
|
+
history_.push_back(userMessage);
|
|
397
|
+
|
|
398
|
+
fullResponse = "[LiteRT-LM Stub] Streaming response placeholder. You said: " + message;
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
// Simulate token-by-token streaming
|
|
402
|
+
std::string currentWord;
|
|
403
|
+
for (size_t i = 0; i < fullResponse.length(); i++) {
|
|
404
|
+
char c = fullResponse[i];
|
|
405
|
+
currentWord += c;
|
|
406
|
+
|
|
407
|
+
if (c == ' ' || c == '\n' || i == fullResponse.length() - 1) {
|
|
408
|
+
bool isDone = (i == fullResponse.length() - 1);
|
|
409
|
+
onToken(currentWord, isDone);
|
|
410
|
+
currentWord.clear();
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
// Add model response to history
|
|
415
|
+
{
|
|
416
|
+
std::lock_guard<std::mutex> lock(mutex_);
|
|
417
|
+
Message modelMessage;
|
|
418
|
+
modelMessage.role = Role::MODEL;
|
|
419
|
+
modelMessage.content = fullResponse;
|
|
420
|
+
history_.push_back(modelMessage);
|
|
421
|
+
}
|
|
422
|
+
#endif
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
//------------------------------------------------------------------------------
|
|
426
|
+
// getHistory - Return conversation history
|
|
427
|
+
//------------------------------------------------------------------------------
|
|
428
|
+
std::vector<Message> HybridLiteRTLM::getHistory() {
|
|
429
|
+
std::lock_guard<std::mutex> lock(mutex_);
|
|
430
|
+
return history_;
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
//------------------------------------------------------------------------------
|
|
434
|
+
// resetConversation - Clear KV cache, keep engine
|
|
435
|
+
//------------------------------------------------------------------------------
|
|
436
|
+
void HybridLiteRTLM::resetConversation() {
|
|
437
|
+
std::lock_guard<std::mutex> lock(mutex_);
|
|
438
|
+
|
|
439
|
+
#ifdef LITERT_LM_ENABLED
|
|
440
|
+
// Destroy old conversation and create a new one
|
|
441
|
+
// This clears the KV cache but keeps the (expensive) Engine loaded
|
|
442
|
+
if (engine_) {
|
|
443
|
+
conversation_.reset();
|
|
444
|
+
createNewConversation();
|
|
445
|
+
}
|
|
446
|
+
#endif
|
|
447
|
+
|
|
448
|
+
history_.clear();
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
//------------------------------------------------------------------------------
|
|
452
|
+
// isReady - Check if model is loaded
|
|
453
|
+
//------------------------------------------------------------------------------
|
|
454
|
+
bool HybridLiteRTLM::isReady() {
|
|
455
|
+
std::lock_guard<std::mutex> lock(mutex_);
|
|
456
|
+
return isLoaded_;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
//------------------------------------------------------------------------------
|
|
460
|
+
// getStats - Return last generation statistics
|
|
461
|
+
//------------------------------------------------------------------------------
|
|
462
|
+
GenerationStats HybridLiteRTLM::getStats() {
|
|
463
|
+
std::lock_guard<std::mutex> lock(mutex_);
|
|
464
|
+
return lastStats_;
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
//------------------------------------------------------------------------------
|
|
468
|
+
// close - Release all native resources
|
|
469
|
+
//------------------------------------------------------------------------------
|
|
470
|
+
void HybridLiteRTLM::close() {
|
|
471
|
+
std::lock_guard<std::mutex> lock(mutex_);
|
|
472
|
+
|
|
473
|
+
#ifdef LITERT_LM_ENABLED
|
|
474
|
+
// Release in reverse order of creation
|
|
475
|
+
conversation_.reset();
|
|
476
|
+
engine_.reset();
|
|
477
|
+
#endif
|
|
478
|
+
|
|
479
|
+
isLoaded_ = false;
|
|
480
|
+
history_.clear();
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
} // namespace margelo::nitro::litertlm
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
//
|
|
2
|
+
// HybridLiteRTLM.hpp
|
|
3
|
+
// react-native-litert-lm
|
|
4
|
+
//
|
|
5
|
+
// High-performance LLM inference using LiteRT-LM.
|
|
6
|
+
// Supports Gemma 3n and other .litertlm models.
|
|
7
|
+
//
|
|
8
|
+
// NOTE: This C++ implementation is used for iOS ONLY.
|
|
9
|
+
// Android uses the Kotlin implementation in `android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt`.
|
|
10
|
+
// Do not assume changes here will affect Android.
|
|
11
|
+
//
|
|
12
|
+
|
|
13
|
+
#pragma once
|
|
14
|
+
|
|
15
|
+
#include "../nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp"
|
|
16
|
+
|
|
17
|
+
// LiteRT-LM headers (conditionally included when available via Prefab/CMake)
|
|
18
|
+
#ifdef LITERT_LM_ENABLED
|
|
19
|
+
#include "litert/lm/engine.h"
|
|
20
|
+
#include "litert/lm/conversation.h"
|
|
21
|
+
#include "litert/lm/types.h"
|
|
22
|
+
#endif
|
|
23
|
+
|
|
24
|
+
#include <string>
|
|
25
|
+
#include <optional>
|
|
26
|
+
#include <vector>
|
|
27
|
+
#include <memory>
|
|
28
|
+
#include <mutex>
|
|
29
|
+
#include <functional>
|
|
30
|
+
|
|
31
|
+
namespace margelo::nitro::litertlm {
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* HybridLiteRTLM: React Native bindings for LiteRT-LM.
|
|
35
|
+
*
|
|
36
|
+
* Wraps LiteRT-LM's Engine and Conversation classes to provide
|
|
37
|
+
* high-level LLM inference with GPU acceleration.
|
|
38
|
+
*/
|
|
39
|
+
class HybridLiteRTLM : public HybridLiteRTLMSpec {
|
|
40
|
+
public:
|
|
41
|
+
HybridLiteRTLM() : HybridObject(TAG) {}
|
|
42
|
+
|
|
43
|
+
~HybridLiteRTLM() override {
|
|
44
|
+
close();
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Prevent copying
|
|
48
|
+
HybridLiteRTLM(const HybridLiteRTLM&) = delete;
|
|
49
|
+
HybridLiteRTLM& operator=(const HybridLiteRTLM&) = delete;
|
|
50
|
+
|
|
51
|
+
public:
|
|
52
|
+
// HybridLiteRTLMSpec interface implementation
|
|
53
|
+
|
|
54
|
+
void loadModel(const std::string& modelPath,
|
|
55
|
+
const std::optional<LLMConfig>& config) override;
|
|
56
|
+
|
|
57
|
+
std::string sendMessage(const std::string& message) override;
|
|
58
|
+
|
|
59
|
+
std::string sendMessageWithImage(const std::string& message,
|
|
60
|
+
const std::string& imagePath) override;
|
|
61
|
+
|
|
62
|
+
std::string sendMessageWithAudio(const std::string& message,
|
|
63
|
+
const std::string& audioPath) override;
|
|
64
|
+
|
|
65
|
+
void sendMessageAsync(
|
|
66
|
+
const std::string& message,
|
|
67
|
+
const std::function<void(std::string, bool)>& onToken
|
|
68
|
+
) override;
|
|
69
|
+
|
|
70
|
+
std::vector<Message> getHistory() override;
|
|
71
|
+
|
|
72
|
+
void resetConversation() override;
|
|
73
|
+
|
|
74
|
+
bool isReady() override;
|
|
75
|
+
|
|
76
|
+
GenerationStats getStats() override;
|
|
77
|
+
|
|
78
|
+
void close() override;
|
|
79
|
+
|
|
80
|
+
private:
|
|
81
|
+
// LiteRT-LM resources (conditionally available on Android with Prefab)
|
|
82
|
+
#ifdef LITERT_LM_ENABLED
|
|
83
|
+
std::unique_ptr<litert::lm::Engine> engine_;
|
|
84
|
+
std::unique_ptr<litert::lm::Conversation> conversation_;
|
|
85
|
+
#endif
|
|
86
|
+
|
|
87
|
+
// State
|
|
88
|
+
bool isLoaded_ = false;
|
|
89
|
+
std::vector<Message> history_;
|
|
90
|
+
GenerationStats lastStats_{0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
|
|
91
|
+
|
|
92
|
+
// Thread safety
|
|
93
|
+
mutable std::mutex mutex_;
|
|
94
|
+
|
|
95
|
+
// Configuration - backends
|
|
96
|
+
Backend backend_ = Backend::GPU;
|
|
97
|
+
Backend visionBackend_ = Backend::GPU; // Gemma 3n requires GPU for vision
|
|
98
|
+
Backend audioBackend_ = Backend::CPU; // Audio typically CPU
|
|
99
|
+
|
|
100
|
+
// Configuration - sampling parameters
|
|
101
|
+
double temperature_ = 0.7;
|
|
102
|
+
double topK_ = 40.0;
|
|
103
|
+
double topP_ = 0.95;
|
|
104
|
+
double maxTokens_ = 1024.0;
|
|
105
|
+
|
|
106
|
+
// Helper to ensure model is loaded
|
|
107
|
+
void ensureLoaded() const {
|
|
108
|
+
if (!isLoaded_) {
|
|
109
|
+
throw std::runtime_error("LiteRTLM: No model loaded. Call loadModel() first.");
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Helper to format a message for the engine (apply chat template if needed)
|
|
114
|
+
std::string formatUserPrompt(const std::string& message) const;
|
|
115
|
+
|
|
116
|
+
// Helper to create a new conversation from existing engine
|
|
117
|
+
void createNewConversation();
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
} // namespace margelo::nitro::litertlm
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
///
|
|
2
|
+
/// cpp-adapter.cpp
|
|
3
|
+
/// JNI Entry Point - Required by Nitrogen to register Kotlin HybridObjects
|
|
4
|
+
///
|
|
5
|
+
|
|
6
|
+
#include <jni.h>
|
|
7
|
+
#include "LiteRTLMOnLoad.hpp"
|
|
8
|
+
|
|
9
|
+
// JNI_OnLoad is called when the native library is loaded via System.loadLibrary()
|
|
10
|
+
// This is where we initialize the Nitrogen bridge and register all Kotlin HybridObjects
|
|
11
|
+
JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM* vm, void*) {
|
|
12
|
+
return margelo::nitro::litertlm::initialize(vm);
|
|
13
|
+
}
|