react-native-litert-lm 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +259 -0
  3. package/android/CMakeLists.txt +32 -0
  4. package/android/build.gradle +88 -0
  5. package/android/src/main/AndroidManifest.xml +11 -0
  6. package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +280 -0
  7. package/android/src/main/java/dev/litert/litertlm/LiteRTLMInitProvider.kt +43 -0
  8. package/android/src/main/java/dev/litert/litertlm/LiteRTLMPackage.kt +26 -0
  9. package/cpp/HybridLiteRTLM.cpp +483 -0
  10. package/cpp/HybridLiteRTLM.hpp +120 -0
  11. package/cpp/cpp-adapter.cpp +13 -0
  12. package/cpp/include/README.md +34 -0
  13. package/lib/index.d.ts +82 -0
  14. package/lib/index.js +106 -0
  15. package/lib/specs/LiteRTLM.nitro.d.ts +165 -0
  16. package/lib/specs/LiteRTLM.nitro.js +2 -0
  17. package/nitrogen/generated/.gitattributes +1 -0
  18. package/nitrogen/generated/android/LiteRTLM+autolinking.cmake +81 -0
  19. package/nitrogen/generated/android/LiteRTLM+autolinking.gradle +27 -0
  20. package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +46 -0
  21. package/nitrogen/generated/android/LiteRTLMOnLoad.hpp +25 -0
  22. package/nitrogen/generated/android/c++/JBackend.hpp +61 -0
  23. package/nitrogen/generated/android/c++/JFunc_void_std__string_bool.hpp +76 -0
  24. package/nitrogen/generated/android/c++/JGenerationStats.hpp +77 -0
  25. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +133 -0
  26. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +75 -0
  27. package/nitrogen/generated/android/c++/JLLMConfig.hpp +75 -0
  28. package/nitrogen/generated/android/c++/JMessage.hpp +63 -0
  29. package/nitrogen/generated/android/c++/JRole.hpp +61 -0
  30. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Backend.kt +24 -0
  31. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Func_void_std__string_bool.kt +80 -0
  32. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/GenerationStats.kt +53 -0
  33. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +98 -0
  34. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/LLMConfig.kt +50 -0
  35. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/LiteRTLMOnLoad.kt +35 -0
  36. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Message.kt +41 -0
  37. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Role.kt +24 -0
  38. package/nitrogen/generated/ios/LiteRTLM+autolinking.rb +60 -0
  39. package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Bridge.cpp +17 -0
  40. package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Bridge.hpp +27 -0
  41. package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Umbrella.hpp +38 -0
  42. package/nitrogen/generated/shared/c++/Backend.hpp +80 -0
  43. package/nitrogen/generated/shared/c++/GenerationStats.hpp +103 -0
  44. package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +30 -0
  45. package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +82 -0
  46. package/nitrogen/generated/shared/c++/LLMConfig.hpp +101 -0
  47. package/nitrogen/generated/shared/c++/Message.hpp +89 -0
  48. package/nitrogen/generated/shared/c++/Role.hpp +80 -0
  49. package/package.json +87 -0
  50. package/react-native-litert-lm.podspec +51 -0
  51. package/react-native.config.js +16 -0
  52. package/src/index.ts +125 -0
  53. package/src/specs/LiteRTLM.nitro.ts +187 -0
@@ -0,0 +1,43 @@
1
+ package dev.litert.litertlm
2
+
3
+ import android.content.ContentProvider
4
+ import android.content.ContentValues
5
+ import android.content.Context
6
+ import android.database.Cursor
7
+ import android.net.Uri
8
+ import android.util.Log
9
+
10
+ class LiteRTLMInitProvider : ContentProvider() {
11
+ companion object {
12
+ private const val TAG = "LiteRTLMInitProvider"
13
+ var applicationContext: Context? = null
14
+ private set
15
+ }
16
+
17
+ override fun onCreate(): Boolean {
18
+ applicationContext = context?.applicationContext
19
+ Log.i(TAG, "LiteRTLMInitProvider initialized with context: $applicationContext")
20
+ return true
21
+ }
22
+
23
+ override fun query(
24
+ uri: Uri,
25
+ projection: Array<out String>?,
26
+ selection: String?,
27
+ selectionArgs: Array<out String>?,
28
+ sortOrder: String?
29
+ ): Cursor? = null
30
+
31
+ override fun getType(uri: Uri): String? = null
32
+
33
+ override fun insert(uri: Uri, values: ContentValues?): Uri? = null
34
+
35
+ override fun delete(uri: Uri, selection: String?, selectionArgs: Array<out String>?): Int = 0
36
+
37
+ override fun update(
38
+ uri: Uri,
39
+ values: ContentValues?,
40
+ selection: String?,
41
+ selectionArgs: Array<out String>?
42
+ ): Int = 0
43
+ }
@@ -0,0 +1,26 @@
1
+ package dev.litert.litertlm
2
+
3
+ import com.facebook.react.TurboReactPackage
4
+ import com.facebook.react.bridge.NativeModule
5
+ import com.facebook.react.bridge.ReactApplicationContext
6
+ import com.facebook.react.module.model.ReactModuleInfo
7
+ import com.facebook.react.module.model.ReactModuleInfoProvider
8
+ import com.margelo.nitro.core.HybridObject
9
+
10
+
11
+ import com.margelo.nitro.dev.litert.litertlm.LiteRTLMOnLoad
12
+
13
+ class LiteRTLMPackage : TurboReactPackage() {
14
+ init {
15
+ LiteRTLMOnLoad.initializeNative()
16
+ }
17
+
18
+
19
+ override fun getModule(name: String, reactContext: ReactApplicationContext): NativeModule? {
20
+ return null
21
+ }
22
+
23
+ override fun getReactModuleInfoProvider(): ReactModuleInfoProvider {
24
+ return ReactModuleInfoProvider { emptyMap<String, ReactModuleInfo>() }
25
+ }
26
+ }
@@ -0,0 +1,483 @@
1
+ //
2
+ // HybridLiteRTLM.cpp
3
+ // react-native-litert-lm
4
+ //
5
+ // High-performance LLM inference using LiteRT-LM.
6
+ //
7
+ // NOTE: This C++ implementation is used for iOS ONLY.
8
+ // Android uses the Kotlin implementation in `android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt`.
9
+ // Do not assume changes here will affect Android.
10
+ //
11
+
12
+ #include "HybridLiteRTLM.hpp"
13
+
14
+ #include <chrono>
15
+ #include <stdexcept>
16
+ #include <sstream>
17
+
18
+ namespace margelo::nitro::litertlm {
19
+
20
+ //------------------------------------------------------------------------------
21
+ // Helper: Format user prompt (applies chat template if needed)
22
+ //------------------------------------------------------------------------------
23
+ std::string HybridLiteRTLM::formatUserPrompt(const std::string& message) const {
24
+ // The LiteRT-LM Conversation class handles chat templates internally,
25
+ // so we just return the message as-is. If we were using Session directly,
26
+ // we'd apply the Gemma/Phi template here.
27
+ return message;
28
+ }
29
+
30
+ //------------------------------------------------------------------------------
31
+ // Helper: Create a new Conversation from existing Engine
32
+ //------------------------------------------------------------------------------
33
+ void HybridLiteRTLM::createNewConversation() {
34
+ #ifdef LITERT_LM_ENABLED
35
+ if (!engine_) {
36
+ throw std::runtime_error("Cannot create conversation: engine not initialized");
37
+ }
38
+
39
+ auto conversation_config = litert::lm::ConversationConfig::CreateDefault(*engine_);
40
+ if (!conversation_config.ok()) {
41
+ throw std::runtime_error("Failed to create conversation config: " +
42
+ std::string(conversation_config.status().message()));
43
+ }
44
+
45
+ auto conversation = litert::lm::Conversation::Create(*engine_, *conversation_config);
46
+ if (!conversation.ok()) {
47
+ throw std::runtime_error("Failed to create conversation: " +
48
+ std::string(conversation.status().message()));
49
+ }
50
+ conversation_ = std::move(*conversation);
51
+ #endif
52
+ }
53
+
54
+ //------------------------------------------------------------------------------
55
+ // loadModel - Initialize Engine and Conversation
56
+ //------------------------------------------------------------------------------
57
+ void HybridLiteRTLM::loadModel(
58
+ const std::string& modelPath,
59
+ const std::optional<LLMConfig>& config) {
60
+
61
+ std::lock_guard<std::mutex> lock(mutex_);
62
+
63
+ // Clean up existing resources
64
+ if (isLoaded_) {
65
+ isLoaded_ = false;
66
+ history_.clear();
67
+ #ifdef LITERT_LM_ENABLED
68
+ conversation_.reset();
69
+ engine_.reset();
70
+ #endif
71
+ }
72
+
73
+ // Apply configuration
74
+ if (config.has_value()) {
75
+ if (config->backend.has_value()) {
76
+ backend_ = config->backend.value();
77
+ }
78
+ if (config->visionBackend.has_value()) {
79
+ visionBackend_ = config->visionBackend.value();
80
+ }
81
+ if (config->audioBackend.has_value()) {
82
+ audioBackend_ = config->audioBackend.value();
83
+ }
84
+ if (config->temperature.has_value()) {
85
+ temperature_ = config->temperature.value();
86
+ }
87
+ if (config->topK.has_value()) {
88
+ topK_ = config->topK.value();
89
+ }
90
+ if (config->topP.has_value()) {
91
+ topP_ = config->topP.value();
92
+ }
93
+ if (config->maxTokens.has_value()) {
94
+ maxTokens_ = config->maxTokens.value();
95
+ }
96
+ }
97
+
98
+ #ifdef LITERT_LM_ENABLED
99
+ // 1. Create ModelAssets from model path
100
+ auto model_assets = litert::lm::ModelAssets::Create(modelPath);
101
+ if (!model_assets.ok()) {
102
+ throw std::runtime_error("Failed to load model assets: " +
103
+ std::string(model_assets.status().message()));
104
+ }
105
+
106
+ // 2. Map our Backend enum to LiteRT-LM Backend enum
107
+ auto engine_backend = (backend_ == Backend::GPU)
108
+ ? litert::lm::Backend::GPU
109
+ : litert::lm::Backend::CPU;
110
+ auto vision_backend = (visionBackend_ == Backend::GPU)
111
+ ? litert::lm::Backend::GPU
112
+ : litert::lm::Backend::CPU;
113
+ auto audio_backend = (audioBackend_ == Backend::GPU)
114
+ ? litert::lm::Backend::GPU
115
+ : litert::lm::Backend::CPU;
116
+
117
+ // 3. Create EngineSettings with all backends
118
+ auto engine_settings = litert::lm::EngineSettings::CreateDefault(
119
+ *model_assets,
120
+ engine_backend,
121
+ vision_backend,
122
+ audio_backend
123
+ );
124
+
125
+ // 4. Create the Engine (heavyweight - loads model weights)
126
+ auto engine = litert::lm::Engine::CreateEngine(engine_settings);
127
+ if (!engine.ok()) {
128
+ throw std::runtime_error("Failed to create engine: " +
129
+ std::string(engine.status().message()));
130
+ }
131
+ engine_ = std::move(*engine);
132
+
133
+ // 5. Create the Conversation (lightweight - holds KV cache)
134
+ createNewConversation();
135
+
136
+ #endif // LITERT_LM_ENABLED
137
+
138
+ isLoaded_ = true;
139
+ history_.clear();
140
+
141
+ // Reset stats
142
+ lastStats_ = GenerationStats{0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
143
+ }
144
+
145
+ //------------------------------------------------------------------------------
146
+ // sendMessage - Blocking text inference
147
+ //------------------------------------------------------------------------------
148
+ std::string HybridLiteRTLM::sendMessage(const std::string& message) {
149
+ std::lock_guard<std::mutex> lock(mutex_);
150
+ ensureLoaded();
151
+
152
+ auto startTime = std::chrono::high_resolution_clock::now();
153
+
154
+ // Add user message to history
155
+ Message userMessage;
156
+ userMessage.role = Role::USER;
157
+ userMessage.content = message;
158
+ history_.push_back(userMessage);
159
+
160
+ std::string responseText;
161
+
162
+ #ifdef LITERT_LM_ENABLED
163
+ // Build the message struct for LiteRT-LM
164
+ // The Conversation API expects a structured input
165
+ litert::lm::UserMessage lm_message;
166
+ lm_message.role = "user";
167
+ lm_message.content = message;
168
+
169
+ auto response = conversation_->SendMessage(lm_message);
170
+ if (!response.ok()) {
171
+ // Remove the user message we just added since inference failed
172
+ history_.pop_back();
173
+ throw std::runtime_error("Inference failed: " +
174
+ std::string(response.status().message()));
175
+ }
176
+
177
+ responseText = response->content;
178
+
179
+ // Update stats from response if available
180
+ if (response->stats.has_value()) {
181
+ const auto& stats = response->stats.value();
182
+ lastStats_.promptTokens = static_cast<double>(stats.prompt_tokens);
183
+ lastStats_.completionTokens = static_cast<double>(stats.completion_tokens);
184
+ lastStats_.totalTokens = lastStats_.promptTokens + lastStats_.completionTokens;
185
+ lastStats_.timeToFirstToken = stats.time_to_first_token_ms;
186
+ lastStats_.totalTime = stats.total_time_ms;
187
+ lastStats_.tokensPerSecond = (lastStats_.totalTime > 0)
188
+ ? lastStats_.completionTokens / (lastStats_.totalTime / 1000.0)
189
+ : 0.0;
190
+ }
191
+
192
+ #else
193
+ // Stub response when LiteRT-LM is not available
194
+ responseText = "[LiteRT-LM Stub] Model response placeholder. "
195
+ "Real inference will be available when LiteRT-LM libraries are integrated. "
196
+ "You said: " + message;
197
+
198
+ auto endTime = std::chrono::high_resolution_clock::now();
199
+ auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(endTime - startTime).count();
200
+
201
+ // Estimate stats for stub
202
+ lastStats_.promptTokens = static_cast<double>(message.length() / 4);
203
+ lastStats_.completionTokens = static_cast<double>(responseText.length() / 4);
204
+ lastStats_.totalTokens = lastStats_.promptTokens + lastStats_.completionTokens;
205
+ lastStats_.totalTime = static_cast<double>(duration);
206
+ lastStats_.timeToFirstToken = lastStats_.totalTime / 2;
207
+ lastStats_.tokensPerSecond = (lastStats_.totalTime > 0)
208
+ ? lastStats_.completionTokens / (lastStats_.totalTime / 1000.0)
209
+ : 0;
210
+ #endif
211
+
212
+ // Add model response to history
213
+ Message modelMessage;
214
+ modelMessage.role = Role::MODEL;
215
+ modelMessage.content = responseText;
216
+ history_.push_back(modelMessage);
217
+
218
+ return responseText;
219
+ }
220
+
221
+ //------------------------------------------------------------------------------
222
+ // sendMessageWithImage - Multimodal image + text
223
+ //------------------------------------------------------------------------------
224
+ std::string HybridLiteRTLM::sendMessageWithImage(
225
+ const std::string& message,
226
+ const std::string& imagePath) {
227
+
228
+ std::lock_guard<std::mutex> lock(mutex_);
229
+ ensureLoaded();
230
+
231
+ #ifdef LITERT_LM_ENABLED
232
+ // TODO: Load image file into raw pixel buffer
233
+ // The Engine expects raw RGBA/RGB data, not a file path.
234
+ // Implementation should:
235
+ // 1. Read image file (using stb_image.h or Android Bitmap JNI)
236
+ // 2. Decode to raw pixel buffer (std::vector<uint8_t>)
237
+ // 3. Create litert::lm::ImageData or equivalent tensor
238
+ // 4. Pass to conversation_->SendMessage with multimodal content
239
+
240
+ // For now, fall back to text-only with a note about the image
241
+ std::string augmentedMessage = message + " [Image attached: " + imagePath +
242
+ " - Note: Image processing not yet implemented, text-only response]";
243
+
244
+ litert::lm::UserMessage lm_message;
245
+ lm_message.role = "user";
246
+ lm_message.content = augmentedMessage;
247
+
248
+ auto response = conversation_->SendMessage(lm_message);
249
+ if (!response.ok()) {
250
+ throw std::runtime_error("Multimodal inference failed: " +
251
+ std::string(response.status().message()));
252
+ }
253
+
254
+ // Add to history
255
+ Message userMessage;
256
+ userMessage.role = Role::USER;
257
+ userMessage.content = message + " [with image]";
258
+ history_.push_back(userMessage);
259
+
260
+ Message modelMessage;
261
+ modelMessage.role = Role::MODEL;
262
+ modelMessage.content = response->content;
263
+ history_.push_back(modelMessage);
264
+
265
+ return response->content;
266
+
267
+ #else
268
+ // Stub: just process text with image path noted
269
+ return sendMessage(message + " [Image: " + imagePath + "]");
270
+ #endif
271
+ }
272
+
273
+ //------------------------------------------------------------------------------
274
+ // sendMessageWithAudio - Multimodal audio + text
275
+ //------------------------------------------------------------------------------
276
+ std::string HybridLiteRTLM::sendMessageWithAudio(
277
+ const std::string& message,
278
+ const std::string& audioPath) {
279
+
280
+ std::lock_guard<std::mutex> lock(mutex_);
281
+ ensureLoaded();
282
+
283
+ #ifdef LITERT_LM_ENABLED
284
+ // TODO: Load audio file into raw sample buffer
285
+ // Similar to image - Engine expects raw audio samples, not file path.
286
+ // Implementation should:
287
+ // 1. Read WAV file header and samples
288
+ // 2. Convert to expected format (likely 16kHz mono float32)
289
+ // 3. Create litert::lm::AudioData or equivalent
290
+ // 4. Pass to conversation with multimodal content
291
+
292
+ std::string augmentedMessage = message + " [Audio attached: " + audioPath +
293
+ " - Note: Audio processing not yet implemented, text-only response]";
294
+
295
+ litert::lm::UserMessage lm_message;
296
+ lm_message.role = "user";
297
+ lm_message.content = augmentedMessage;
298
+
299
+ auto response = conversation_->SendMessage(lm_message);
300
+ if (!response.ok()) {
301
+ throw std::runtime_error("Audio inference failed: " +
302
+ std::string(response.status().message()));
303
+ }
304
+
305
+ // Add to history
306
+ Message userMessage;
307
+ userMessage.role = Role::USER;
308
+ userMessage.content = message + " [with audio]";
309
+ history_.push_back(userMessage);
310
+
311
+ Message modelMessage;
312
+ modelMessage.role = Role::MODEL;
313
+ modelMessage.content = response->content;
314
+ history_.push_back(modelMessage);
315
+
316
+ return response->content;
317
+
318
+ #else
319
+ // Stub: just process text with audio path noted
320
+ return sendMessage(message + " [Audio: " + audioPath + "]");
321
+ #endif
322
+ }
323
+
324
+ //------------------------------------------------------------------------------
325
+ // sendMessageAsync - Streaming token generation
326
+ //------------------------------------------------------------------------------
327
+ void HybridLiteRTLM::sendMessageAsync(
328
+ const std::string& message,
329
+ const std::function<void(std::string, bool)>& onToken) {
330
+
331
+ // Note: We don't hold the lock during the entire async operation
332
+ // to avoid blocking other operations. The callback may be invoked
333
+ // from a different thread depending on LiteRT-LM's implementation.
334
+
335
+ {
336
+ std::lock_guard<std::mutex> lock(mutex_);
337
+ ensureLoaded();
338
+ }
339
+
340
+ #ifdef LITERT_LM_ENABLED
341
+ // Add user message to history before starting
342
+ {
343
+ std::lock_guard<std::mutex> lock(mutex_);
344
+ Message userMessage;
345
+ userMessage.role = Role::USER;
346
+ userMessage.content = message;
347
+ history_.push_back(userMessage);
348
+ }
349
+
350
+ litert::lm::UserMessage lm_message;
351
+ lm_message.role = "user";
352
+ lm_message.content = message;
353
+
354
+ std::string fullResponse;
355
+
356
+ // The callback needs to be carefully managed for thread safety
357
+ auto status = conversation_->SendMessageAsync(
358
+ lm_message,
359
+ [this, &onToken, &fullResponse](const std::string& token, bool isDone) {
360
+ fullResponse += token;
361
+
362
+ // Invoke the JS callback (Nitro handles thread marshalling)
363
+ onToken(token, isDone);
364
+
365
+ if (isDone) {
366
+ // Add complete response to history
367
+ std::lock_guard<std::mutex> lock(mutex_);
368
+ Message modelMessage;
369
+ modelMessage.role = Role::MODEL;
370
+ modelMessage.content = fullResponse;
371
+ history_.push_back(modelMessage);
372
+ }
373
+ }
374
+ );
375
+
376
+ if (!status.ok()) {
377
+ // Remove user message since inference failed
378
+ std::lock_guard<std::mutex> lock(mutex_);
379
+ if (!history_.empty()) {
380
+ history_.pop_back();
381
+ }
382
+ throw std::runtime_error("Async inference failed: " +
383
+ std::string(status.message()));
384
+ }
385
+
386
+ #else
387
+ // Stub: Simulate streaming by calling sendMessage and splitting response
388
+ std::string fullResponse;
389
+ {
390
+ std::lock_guard<std::mutex> lock(mutex_);
391
+
392
+ // Add user message
393
+ Message userMessage;
394
+ userMessage.role = Role::USER;
395
+ userMessage.content = message;
396
+ history_.push_back(userMessage);
397
+
398
+ fullResponse = "[LiteRT-LM Stub] Streaming response placeholder. You said: " + message;
399
+ }
400
+
401
+ // Simulate token-by-token streaming
402
+ std::string currentWord;
403
+ for (size_t i = 0; i < fullResponse.length(); i++) {
404
+ char c = fullResponse[i];
405
+ currentWord += c;
406
+
407
+ if (c == ' ' || c == '\n' || i == fullResponse.length() - 1) {
408
+ bool isDone = (i == fullResponse.length() - 1);
409
+ onToken(currentWord, isDone);
410
+ currentWord.clear();
411
+ }
412
+ }
413
+
414
+ // Add model response to history
415
+ {
416
+ std::lock_guard<std::mutex> lock(mutex_);
417
+ Message modelMessage;
418
+ modelMessage.role = Role::MODEL;
419
+ modelMessage.content = fullResponse;
420
+ history_.push_back(modelMessage);
421
+ }
422
+ #endif
423
+ }
424
+
425
+ //------------------------------------------------------------------------------
426
+ // getHistory - Return conversation history
427
+ //------------------------------------------------------------------------------
428
+ std::vector<Message> HybridLiteRTLM::getHistory() {
429
+ std::lock_guard<std::mutex> lock(mutex_);
430
+ return history_;
431
+ }
432
+
433
+ //------------------------------------------------------------------------------
434
+ // resetConversation - Clear KV cache, keep engine
435
+ //------------------------------------------------------------------------------
436
+ void HybridLiteRTLM::resetConversation() {
437
+ std::lock_guard<std::mutex> lock(mutex_);
438
+
439
+ #ifdef LITERT_LM_ENABLED
440
+ // Destroy old conversation and create a new one
441
+ // This clears the KV cache but keeps the (expensive) Engine loaded
442
+ if (engine_) {
443
+ conversation_.reset();
444
+ createNewConversation();
445
+ }
446
+ #endif
447
+
448
+ history_.clear();
449
+ }
450
+
451
+ //------------------------------------------------------------------------------
452
+ // isReady - Check if model is loaded
453
+ //------------------------------------------------------------------------------
454
+ bool HybridLiteRTLM::isReady() {
455
+ std::lock_guard<std::mutex> lock(mutex_);
456
+ return isLoaded_;
457
+ }
458
+
459
+ //------------------------------------------------------------------------------
460
+ // getStats - Return last generation statistics
461
+ //------------------------------------------------------------------------------
462
+ GenerationStats HybridLiteRTLM::getStats() {
463
+ std::lock_guard<std::mutex> lock(mutex_);
464
+ return lastStats_;
465
+ }
466
+
467
+ //------------------------------------------------------------------------------
468
+ // close - Release all native resources
469
+ //------------------------------------------------------------------------------
470
+ void HybridLiteRTLM::close() {
471
+ std::lock_guard<std::mutex> lock(mutex_);
472
+
473
+ #ifdef LITERT_LM_ENABLED
474
+ // Release in reverse order of creation
475
+ conversation_.reset();
476
+ engine_.reset();
477
+ #endif
478
+
479
+ isLoaded_ = false;
480
+ history_.clear();
481
+ }
482
+
483
+ } // namespace margelo::nitro::litertlm
@@ -0,0 +1,120 @@
1
+ //
2
+ // HybridLiteRTLM.hpp
3
+ // react-native-litert-lm
4
+ //
5
+ // High-performance LLM inference using LiteRT-LM.
6
+ // Supports Gemma 3n and other .litertlm models.
7
+ //
8
+ // NOTE: This C++ implementation is used for iOS ONLY.
9
+ // Android uses the Kotlin implementation in `android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt`.
10
+ // Do not assume changes here will affect Android.
11
+ //
12
+
13
+ #pragma once
14
+
15
+ #include "../nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp"
16
+
17
+ // LiteRT-LM headers (conditionally included when available via Prefab/CMake)
18
+ #ifdef LITERT_LM_ENABLED
19
+ #include "litert/lm/engine.h"
20
+ #include "litert/lm/conversation.h"
21
+ #include "litert/lm/types.h"
22
+ #endif
23
+
24
+ #include <string>
25
+ #include <optional>
26
+ #include <vector>
27
+ #include <memory>
28
+ #include <mutex>
29
+ #include <functional>
30
+
31
+ namespace margelo::nitro::litertlm {
32
+
33
+ /**
34
+ * HybridLiteRTLM: React Native bindings for LiteRT-LM.
35
+ *
36
+ * Wraps LiteRT-LM's Engine and Conversation classes to provide
37
+ * high-level LLM inference with GPU acceleration.
38
+ */
39
+ class HybridLiteRTLM : public HybridLiteRTLMSpec {
40
+ public:
41
+ HybridLiteRTLM() : HybridObject(TAG) {}
42
+
43
+ ~HybridLiteRTLM() override {
44
+ close();
45
+ }
46
+
47
+ // Prevent copying
48
+ HybridLiteRTLM(const HybridLiteRTLM&) = delete;
49
+ HybridLiteRTLM& operator=(const HybridLiteRTLM&) = delete;
50
+
51
+ public:
52
+ // HybridLiteRTLMSpec interface implementation
53
+
54
+ void loadModel(const std::string& modelPath,
55
+ const std::optional<LLMConfig>& config) override;
56
+
57
+ std::string sendMessage(const std::string& message) override;
58
+
59
+ std::string sendMessageWithImage(const std::string& message,
60
+ const std::string& imagePath) override;
61
+
62
+ std::string sendMessageWithAudio(const std::string& message,
63
+ const std::string& audioPath) override;
64
+
65
+ void sendMessageAsync(
66
+ const std::string& message,
67
+ const std::function<void(std::string, bool)>& onToken
68
+ ) override;
69
+
70
+ std::vector<Message> getHistory() override;
71
+
72
+ void resetConversation() override;
73
+
74
+ bool isReady() override;
75
+
76
+ GenerationStats getStats() override;
77
+
78
+ void close() override;
79
+
80
+ private:
81
+ // LiteRT-LM resources (conditionally available on Android with Prefab)
82
+ #ifdef LITERT_LM_ENABLED
83
+ std::unique_ptr<litert::lm::Engine> engine_;
84
+ std::unique_ptr<litert::lm::Conversation> conversation_;
85
+ #endif
86
+
87
+ // State
88
+ bool isLoaded_ = false;
89
+ std::vector<Message> history_;
90
+ GenerationStats lastStats_{0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
91
+
92
+ // Thread safety
93
+ mutable std::mutex mutex_;
94
+
95
+ // Configuration - backends
96
+ Backend backend_ = Backend::GPU;
97
+ Backend visionBackend_ = Backend::GPU; // Gemma 3n requires GPU for vision
98
+ Backend audioBackend_ = Backend::CPU; // Audio typically CPU
99
+
100
+ // Configuration - sampling parameters
101
+ double temperature_ = 0.7;
102
+ double topK_ = 40.0;
103
+ double topP_ = 0.95;
104
+ double maxTokens_ = 1024.0;
105
+
106
+ // Helper to ensure model is loaded
107
+ void ensureLoaded() const {
108
+ if (!isLoaded_) {
109
+ throw std::runtime_error("LiteRTLM: No model loaded. Call loadModel() first.");
110
+ }
111
+ }
112
+
113
+ // Helper to format a message for the engine (apply chat template if needed)
114
+ std::string formatUserPrompt(const std::string& message) const;
115
+
116
+ // Helper to create a new conversation from existing engine
117
+ void createNewConversation();
118
+ };
119
+
120
+ } // namespace margelo::nitro::litertlm
@@ -0,0 +1,13 @@
1
+ ///
2
+ /// cpp-adapter.cpp
3
+ /// JNI Entry Point - Required by Nitrogen to register Kotlin HybridObjects
4
+ ///
5
+
6
+ #include <jni.h>
7
+ #include "LiteRTLMOnLoad.hpp"
8
+
9
+ // JNI_OnLoad is called when the native library is loaded via System.loadLibrary()
10
+ // This is where we initialize the Nitrogen bridge and register all Kotlin HybridObjects
11
+ JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM* vm, void*) {
12
+ return margelo::nitro::litertlm::initialize(vm);
13
+ }