react-native-litert-lm 0.3.7 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +153 -135
  2. package/android/build.gradle +12 -0
  3. package/android/src/main/AndroidManifest.xml +8 -0
  4. package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +276 -62
  5. package/android/src/main/java/dev/litert/litertlm/LiteRTLMPackage.kt +19 -2
  6. package/android/src/test/java/com/margelo/nitro/core/Promise.kt +46 -0
  7. package/android/src/test/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMTest.kt +105 -0
  8. package/ios/HybridLiteRTLM.swift +1344 -0
  9. package/ios/Tests/HybridLiteRTLMTests.swift +113 -0
  10. package/lib/__mocks__/react-native-nitro-modules.d.ts +65 -0
  11. package/lib/__mocks__/react-native-nitro-modules.js +60 -0
  12. package/lib/__tests__/hooks.test.d.ts +1 -0
  13. package/lib/__tests__/hooks.test.js +124 -0
  14. package/lib/__tests__/memoryTracker.test.d.ts +1 -0
  15. package/lib/__tests__/memoryTracker.test.js +74 -0
  16. package/lib/__tests__/modelFactory.test.d.ts +1 -0
  17. package/lib/__tests__/modelFactory.test.js +68 -0
  18. package/lib/hooks.js +27 -3
  19. package/lib/index.d.ts +6 -2
  20. package/lib/index.js +8 -8
  21. package/lib/modelFactory.js +82 -63
  22. package/lib/specs/LiteRTLM.nitro.d.ts +87 -2
  23. package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +2 -2
  24. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +94 -9
  25. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +5 -1
  26. package/nitrogen/generated/android/c++/JLLMConfig.hpp +40 -3
  27. package/nitrogen/generated/android/c++/JMultimodalPart.hpp +74 -0
  28. package/nitrogen/generated/android/c++/JPartType.hpp +61 -0
  29. package/nitrogen/generated/android/c++/JToolDefinition.hpp +65 -0
  30. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/GenerationStats.kt +23 -0
  31. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +28 -2
  32. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/LLMConfig.kt +46 -3
  33. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MemoryUsage.kt +19 -0
  34. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Message.kt +15 -0
  35. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MultimodalPart.kt +66 -0
  36. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/PartType.kt +24 -0
  37. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/ToolDefinition.kt +61 -0
  38. package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Bridge.cpp +57 -1
  39. package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Bridge.hpp +414 -3
  40. package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Umbrella.hpp +41 -3
  41. package/nitrogen/generated/ios/LiteRTLMAutolinking.mm +4 -6
  42. package/nitrogen/generated/ios/LiteRTLMAutolinking.swift +10 -0
  43. package/nitrogen/generated/ios/c++/HybridLiteRTLMSpecSwift.cpp +11 -0
  44. package/nitrogen/generated/ios/c++/HybridLiteRTLMSpecSwift.hpp +240 -0
  45. package/nitrogen/generated/ios/swift/Backend.swift +44 -0
  46. package/nitrogen/generated/ios/swift/Func_void.swift +46 -0
  47. package/nitrogen/generated/ios/swift/Func_void_double.swift +46 -0
  48. package/nitrogen/generated/ios/swift/Func_void_std__exception_ptr.swift +46 -0
  49. package/nitrogen/generated/ios/swift/Func_void_std__string.swift +46 -0
  50. package/nitrogen/generated/ios/swift/Func_void_std__string_bool.swift +46 -0
  51. package/nitrogen/generated/ios/swift/GenerationStats.swift +54 -0
  52. package/nitrogen/generated/ios/swift/HybridLiteRTLMSpec.swift +71 -0
  53. package/nitrogen/generated/ios/swift/HybridLiteRTLMSpec_cxx.swift +431 -0
  54. package/nitrogen/generated/ios/swift/LLMConfig.swift +203 -0
  55. package/nitrogen/generated/ios/swift/MemoryUsage.swift +44 -0
  56. package/nitrogen/generated/ios/swift/Message.swift +34 -0
  57. package/nitrogen/generated/ios/swift/MultimodalPart.swift +83 -0
  58. package/nitrogen/generated/ios/swift/PartType.swift +44 -0
  59. package/nitrogen/generated/ios/swift/Role.swift +44 -0
  60. package/nitrogen/generated/ios/swift/ToolDefinition.swift +39 -0
  61. package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +4 -0
  62. package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +9 -2
  63. package/nitrogen/generated/shared/c++/LLMConfig.hpp +22 -2
  64. package/nitrogen/generated/shared/c++/MultimodalPart.hpp +99 -0
  65. package/nitrogen/generated/shared/c++/PartType.hpp +80 -0
  66. package/nitrogen/generated/shared/c++/ToolDefinition.hpp +91 -0
  67. package/package.json +22 -11
  68. package/react-native-litert-lm.podspec +17 -19
  69. package/scripts/download-ios-frameworks.sh +17 -50
  70. package/scripts/framework-source.js +46 -0
  71. package/scripts/postinstall.js +40 -18
  72. package/src/__mocks__/react-native-nitro-modules.ts +58 -0
  73. package/src/__tests__/hooks.test.ts +153 -0
  74. package/src/__tests__/memoryTracker.test.ts +87 -0
  75. package/src/__tests__/modelFactory.test.ts +96 -0
  76. package/src/hooks.ts +29 -7
  77. package/src/index.ts +7 -10
  78. package/src/modelFactory.ts +104 -80
  79. package/src/specs/LiteRTLM.nitro.ts +106 -2
  80. package/cpp/HybridLiteRTLM.cpp +0 -939
  81. package/cpp/HybridLiteRTLM.hpp +0 -169
  82. package/cpp/IOSDownloadHelper.h +0 -24
  83. package/ios/IOSDownloadHelper.mm +0 -129
  84. package/scripts/build-ios-engine.sh +0 -302
  85. package/scripts/stubs/cxx_bridge_stubs.cc +0 -224
  86. package/scripts/stubs/gemma_model_constraint_provider.cc +0 -46
  87. package/scripts/stubs/llguidance_stubs.c +0 -101
  88. package/src/templates.ts +0 -105
@@ -1,939 +0,0 @@
1
- //
2
- // HybridLiteRTLM.cpp
3
- // react-native-litert-lm
4
- //
5
- // High-performance LLM inference using LiteRT-LM C API.
6
- //
7
- // NOTE: This C++ implementation is used for iOS ONLY.
8
- // Android uses the Kotlin implementation in `android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt`.
9
- // Do not assume changes here will affect Android.
10
- //
11
-
12
- #include "HybridLiteRTLM.hpp"
13
-
14
-
15
-
16
-
17
- #include <NitroModules/Promise.hpp>
18
- #include <chrono>
19
- #include <stdexcept>
20
- #include <sstream>
21
- #include <sys/stat.h>
22
- #include <cstdio>
23
-
24
- #ifdef __APPLE__
25
- #include "IOSDownloadHelper.h"
26
- #include <os/proc.h>
27
- #endif
28
- #include <fstream>
29
- #include <thread>
30
- #include <regex>
31
- #include <pthread.h>
32
- #include <functional>
33
-
34
- namespace margelo::nitro::litertlm {
35
-
36
- // =============================================================================
37
- // Thread Helper — LiteRT engine operations need >512KB stack (XNNPack, Metal)
38
- // =============================================================================
39
-
40
- static void runOnLargeStack(std::function<void()> work, size_t stackSize = 8 * 1024 * 1024) {
41
- struct Context {
42
- std::function<void()> fn;
43
- std::exception_ptr exception;
44
- };
45
- Context ctx{std::move(work), nullptr};
46
-
47
- pthread_t thread;
48
- pthread_attr_t attr;
49
- pthread_attr_init(&attr);
50
- pthread_attr_setstacksize(&attr, stackSize);
51
-
52
- int rc = pthread_create(&thread, &attr, [](void* arg) -> void* {
53
- auto* c = static_cast<Context*>(arg);
54
- try {
55
- c->fn();
56
- } catch (...) {
57
- c->exception = std::current_exception();
58
- }
59
- return nullptr;
60
- }, &ctx);
61
- pthread_attr_destroy(&attr);
62
- if (rc != 0) {
63
- throw std::runtime_error("Failed to create large-stack thread (errno: " + std::to_string(rc) + ")");
64
- }
65
- pthread_join(thread, nullptr);
66
-
67
- if (ctx.exception) {
68
- std::rethrow_exception(ctx.exception);
69
- }
70
- }
71
-
72
- // =============================================================================
73
- // JSON Helpers
74
- // =============================================================================
75
-
76
- std::string HybridLiteRTLM::escapeJson(const std::string& input) {
77
- std::string output;
78
- output.reserve(input.size() + 16);
79
- for (char c : input) {
80
- switch (c) {
81
- case '"': output += "\\\""; break;
82
- case '\\': output += "\\\\"; break;
83
- case '\n': output += "\\n"; break;
84
- case '\r': output += "\\r"; break;
85
- case '\t': output += "\\t"; break;
86
- case '\b': output += "\\b"; break;
87
- case '\f': output += "\\f"; break;
88
- default: output += c; break;
89
- }
90
- }
91
- return output;
92
- }
93
-
94
- std::string HybridLiteRTLM::buildTextMessageJson(const std::string& text) {
95
- return "{\"role\":\"user\",\"content\":\"" + escapeJson(text) + "\"}";
96
- }
97
-
98
- std::string HybridLiteRTLM::buildImageMessageJson(const std::string& text, const std::string& imagePath) {
99
- return "{\"role\":\"user\",\"content\":["
100
- "{\"type\":\"text\",\"text\":\"" + escapeJson(text) + "\"},"
101
- "{\"type\":\"image\",\"path\":\"" + escapeJson(imagePath) + "\"}"
102
- "]}";
103
- }
104
-
105
- std::string HybridLiteRTLM::buildAudioMessageJson(const std::string& text, const std::string& audioPath) {
106
- return "{\"role\":\"user\",\"content\":["
107
- "{\"type\":\"text\",\"text\":\"" + escapeJson(text) + "\"},"
108
- "{\"type\":\"audio\",\"path\":\"" + escapeJson(audioPath) + "\"}"
109
- "]}";
110
- }
111
-
112
- /**
113
- * Gemma / LiteRT-LM control tokens that the iOS C API includes in raw output.
114
- * The Android Kotlin SDK strips these automatically.
115
- */
116
- static const char* kControlTokens[] = {
117
- "<end_of_turn>",
118
- "<start_of_turn>model",
119
- "<start_of_turn>user",
120
- "<start_of_turn>",
121
- "<eos>",
122
- };
123
-
124
- /**
125
- * Strip control tokens from model output, preserving whitespace.
126
- * Streaming tokens like " the", " is" have meaningful leading spaces
127
- * that must not be trimmed.
128
- */
129
- static std::string stripControlTokens(const std::string& text) {
130
- std::string result = text;
131
- for (auto* tok : kControlTokens) {
132
- std::string t(tok);
133
- size_t pos;
134
- while ((pos = result.find(t)) != std::string::npos) {
135
- result.erase(pos, t.length());
136
- }
137
- }
138
- return result;
139
- }
140
-
141
- /**
142
- * Determine how many characters from the start of `text` are safe to emit.
143
- * If the tail of `text` could be the beginning of a control token (split
144
- * across chunk boundaries), those characters are withheld until the next
145
- * chunk confirms whether it's a real token or normal content.
146
- */
147
- static size_t safeEmitLength(const std::string& text) {
148
- // Find the last '<' — it could be the start of a partial control token
149
- size_t lastAngle = text.rfind('<');
150
- if (lastAngle == std::string::npos) {
151
- return text.length(); // No '<' found, safe to emit all
152
- }
153
-
154
- std::string suffix = text.substr(lastAngle);
155
- // Check if this suffix is a prefix of any control token
156
- for (auto* tok : kControlTokens) {
157
- std::string t(tok);
158
- if (suffix.length() < t.length() && t.compare(0, suffix.length(), suffix) == 0) {
159
- // This suffix could be the start of a control token — hold it back
160
- return lastAngle;
161
- }
162
- }
163
-
164
- // The '<' doesn't match any control token prefix, safe to emit all
165
- return text.length();
166
- }
167
-
168
- /** Trim leading/trailing whitespace from a complete response. */
169
- static std::string trimWhitespace(const std::string& text) {
170
- size_t start = text.find_first_not_of(" \t\n\r");
171
- if (start == std::string::npos) return "";
172
- size_t end = text.find_last_not_of(" \t\n\r");
173
- return text.substr(start, end - start + 1);
174
- }
175
-
176
- std::string HybridLiteRTLM::extractTextFromResponse(const std::string& jsonResponse) {
177
- // The C API response JSON is structured as:
178
- // {"role":"model","content":[{"type":"text","text":"..."}]}
179
- // or:
180
- // {"role":"model","content":"..."}
181
- //
182
- // We use simple string extraction to avoid a JSON library dependency.
183
-
184
- // Try array format first: find "text":"..." after "type":"text"
185
- std::string textMarker = "\"text\":\"";
186
- size_t pos = jsonResponse.find("\"type\":\"text\"");
187
- if (pos != std::string::npos) {
188
- pos = jsonResponse.find(textMarker, pos);
189
- if (pos != std::string::npos) {
190
- pos += textMarker.length();
191
- std::string result;
192
- result.reserve(jsonResponse.size() - pos);
193
- for (size_t i = pos; i < jsonResponse.size(); i++) {
194
- if (jsonResponse[i] == '\\' && i + 1 < jsonResponse.size()) {
195
- char next = jsonResponse[i + 1];
196
- if (next == '"') { result += '"'; i++; }
197
- else if (next == '\\') { result += '\\'; i++; }
198
- else if (next == 'n') { result += '\n'; i++; }
199
- else if (next == 'r') { result += '\r'; i++; }
200
- else if (next == 't') { result += '\t'; i++; }
201
- else { result += jsonResponse[i]; }
202
- } else if (jsonResponse[i] == '"') {
203
- break; // End of the text value
204
- } else {
205
- result += jsonResponse[i];
206
- }
207
- }
208
- return stripControlTokens(result);
209
- }
210
- }
211
-
212
- // Try simple string format: "content":"..."
213
- std::string contentMarker = "\"content\":\"";
214
- pos = jsonResponse.find(contentMarker);
215
- if (pos != std::string::npos) {
216
- pos += contentMarker.length();
217
- std::string result;
218
- for (size_t i = pos; i < jsonResponse.size(); i++) {
219
- if (jsonResponse[i] == '\\' && i + 1 < jsonResponse.size()) {
220
- char next = jsonResponse[i + 1];
221
- if (next == '"') { result += '"'; i++; }
222
- else if (next == '\\') { result += '\\'; i++; }
223
- else if (next == 'n') { result += '\n'; i++; }
224
- else { result += jsonResponse[i]; }
225
- } else if (jsonResponse[i] == '"') {
226
- break;
227
- } else {
228
- result += jsonResponse[i];
229
- }
230
- }
231
- return stripControlTokens(result);
232
- }
233
-
234
- // Fallback: return full response (still strip control tokens)
235
- return stripControlTokens(jsonResponse);
236
- }
237
-
238
- // =============================================================================
239
- // Conversation Management
240
- // =============================================================================
241
-
242
- void HybridLiteRTLM::createNewConversation() {
243
- #ifdef __APPLE__
244
- if (!engine_) {
245
- throw std::runtime_error("Cannot create conversation: engine not initialized");
246
- }
247
-
248
- // Clean up previous conversation
249
- if (conversation_) {
250
- litert_lm_conversation_delete(conversation_);
251
- conversation_ = nullptr;
252
- }
253
- if (conv_config_) {
254
- litert_lm_conversation_config_delete(conv_config_);
255
- conv_config_ = nullptr;
256
- }
257
-
258
- // Build system message JSON if provided
259
- std::string systemMsgJson;
260
- const char* systemMsgPtr = nullptr;
261
- if (!systemPrompt_.empty()) {
262
- systemMsgJson = "{\"role\":\"system\",\"content\":\"" + escapeJson(systemPrompt_) + "\"}";
263
- systemMsgPtr = systemMsgJson.c_str();
264
- }
265
-
266
- // Create conversation config with session config
267
- conv_config_ = litert_lm_conversation_config_create(
268
- engine_,
269
- session_config_, // may be nullptr for defaults
270
- systemMsgPtr, // system message
271
- nullptr, // tools (not used yet)
272
- nullptr, // messages history
273
- false // constrained decoding
274
- );
275
- if (!conv_config_) {
276
- throw std::runtime_error("Failed to create conversation config");
277
- }
278
-
279
- // Create conversation
280
- conversation_ = litert_lm_conversation_create(engine_, conv_config_);
281
- if (!conversation_) {
282
- litert_lm_conversation_config_delete(conv_config_);
283
- conv_config_ = nullptr;
284
- throw std::runtime_error("Failed to create conversation");
285
- }
286
- #endif
287
- }
288
-
289
- // =============================================================================
290
- // loadModel
291
- // =============================================================================
292
-
293
- std::shared_ptr<Promise<void>> HybridLiteRTLM::loadModel(
294
- const std::string& modelPath,
295
- const std::optional<LLMConfig>& config) {
296
- return Promise<void>::async([this, modelPath, config]() {
297
- runOnLargeStack([&]() {
298
- loadModelInternal(modelPath, config);
299
- });
300
- });
301
- }
302
-
303
- void HybridLiteRTLM::loadModelInternal(
304
- const std::string& modelPath,
305
- const std::optional<LLMConfig>& config) {
306
-
307
- std::lock_guard<std::mutex> lock(mutex_);
308
-
309
- if (isLoaded_) {
310
- close();
311
- }
312
-
313
- if (config.has_value()) {
314
- if (config->backend.has_value()) {
315
- backend_ = config->backend.value();
316
- }
317
- if (config->temperature.has_value()) {
318
- temperature_ = config->temperature.value();
319
- }
320
- if (config->topK.has_value()) {
321
- topK_ = config->topK.value();
322
- }
323
- if (config->topP.has_value()) {
324
- topP_ = config->topP.value();
325
- }
326
- if (config->maxTokens.has_value()) {
327
- maxTokens_ = config->maxTokens.value();
328
- }
329
- if (config->systemPrompt.has_value()) {
330
- systemPrompt_ = config->systemPrompt.value();
331
- }
332
- }
333
-
334
- #ifdef __APPLE__
335
- // Set log verbosity: 2=WARNING (production), 0=INFO (debug)
336
- litert_lm_set_min_log_level(2);
337
-
338
- auto backendStr = [](Backend b) -> const char* {
339
- switch (b) {
340
- case Backend::GPU: return "gpu";
341
- case Backend::NPU: return "gpu"; // NPU not available on iOS, use GPU
342
- default: return "cpu";
343
- }
344
- };
345
-
346
- auto tryCreateEngine = [&](const char* backend, const char* visionBackend) -> bool {
347
- auto* settings = litert_lm_engine_settings_create(
348
- modelPath.c_str(),
349
- backend,
350
- visionBackend,
351
- "cpu" // audio executor: iOS XCFramework lacks compiled audio ops (INTERNAL ERROR at Invoke)
352
- );
353
- if (!settings) {
354
- return false;
355
- }
356
-
357
- litert_lm_engine_settings_set_max_num_tokens(settings, static_cast<int>(maxTokens_));
358
- litert_lm_engine_settings_enable_benchmark(settings);
359
-
360
- // Set cache directory to the same directory as the model file
361
- std::string cacheDir = modelPath.substr(0, modelPath.find_last_of('/'));
362
- litert_lm_engine_settings_set_cache_dir(settings, cacheDir.c_str());
363
-
364
- engine_ = litert_lm_engine_create(settings);
365
- litert_lm_engine_settings_delete(settings);
366
-
367
- return engine_ != nullptr;
368
- };
369
-
370
- // Try requested backend first (e.g. gpu/gpu)
371
- const char* primaryBackend = backendStr(backend_);
372
- if (!tryCreateEngine(primaryBackend, primaryBackend)) {
373
- // Fallback chain for when the primary backend fails:
374
- bool fallbackOk = false;
375
- if (backend_ != Backend::CPU) {
376
- // 1) Try CPU main + GPU vision (model's vision encoder often requires GPU)
377
- fallbackOk = tryCreateEngine("cpu", "gpu");
378
- // 2) Try CPU main + CPU vision
379
- if (!fallbackOk) fallbackOk = tryCreateEngine("cpu", "cpu");
380
- }
381
- // 3) Try CPU main + no vision (nullptr skips vision executor entirely)
382
- if (!fallbackOk) fallbackOk = tryCreateEngine("cpu", nullptr);
383
- if (fallbackOk) {
384
- backend_ = Backend::CPU;
385
- }
386
- }
387
-
388
- if (!engine_) {
389
- // Collect diagnostic info
390
- std::string diag = " | Diagnostics: ";
391
- struct stat st;
392
- if (stat(modelPath.c_str(), &st) == 0) {
393
- diag += "File size: " + std::to_string(st.st_size) + " bytes";
394
- } else {
395
- diag += "Failed to stat file (errno: " + std::to_string(errno) + ")";
396
- }
397
-
398
- FILE* f = fopen(modelPath.c_str(), "rb");
399
- if (f) {
400
- diag += ", Readable: YES";
401
- fclose(f);
402
- } else {
403
- diag += ", Readable: NO (errno: " + std::to_string(errno) + ")";
404
- }
405
-
406
-
407
- throw std::runtime_error(
408
- "Failed to create LiteRT-LM engine. Tried backend '" +
409
- std::string(primaryBackend) + "' and CPU fallback. Model path: " + modelPath + diag);
410
- }
411
-
412
- session_config_ = litert_lm_session_config_create();
413
- if (session_config_) {
414
- litert_lm_session_config_set_max_output_tokens(session_config_, static_cast<int>(maxTokens_));
415
-
416
- LiteRtLmSamplerParams sampler{};
417
- sampler.type = kTopP;
418
- sampler.top_k = static_cast<int32_t>(topK_);
419
- sampler.top_p = static_cast<float>(topP_);
420
- sampler.temperature = static_cast<float>(temperature_);
421
- sampler.seed = 0;
422
- litert_lm_session_config_set_sampler_params(session_config_, &sampler);
423
- }
424
-
425
- createNewConversation();
426
- #endif
427
-
428
- isLoaded_ = true;
429
- history_.clear();
430
- lastStats_ = GenerationStats{0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
431
- }
432
-
433
- // =============================================================================
434
- // sendMessage — Blocking text inference
435
- // =============================================================================
436
-
437
- std::shared_ptr<Promise<std::string>> HybridLiteRTLM::sendMessage(const std::string& message) {
438
- return Promise<std::string>::async([this, message]() -> std::string {
439
- std::string result;
440
- runOnLargeStack([&]() {
441
- result = sendMessageInternal(message);
442
- });
443
- return result;
444
- });
445
- }
446
-
447
- std::string HybridLiteRTLM::sendMessageInternal(const std::string& message) {
448
- std::lock_guard<std::mutex> lock(mutex_);
449
- ensureLoaded();
450
-
451
- auto startTime = std::chrono::steady_clock::now();
452
- std::string result;
453
-
454
- #ifdef __APPLE__
455
- std::string msgJson = buildTextMessageJson(message);
456
-
457
- auto* response = litert_lm_conversation_send_message(
458
- conversation_, msgJson.c_str(), nullptr);
459
-
460
- if (!response) {
461
- throw std::runtime_error("LiteRT-LM: sendMessage failed");
462
- }
463
-
464
- const char* responseStr = litert_lm_json_response_get_string(response);
465
- if (responseStr) {
466
- result = trimWhitespace(extractTextFromResponse(std::string(responseStr)));
467
- }
468
- litert_lm_json_response_delete(response);
469
-
470
- auto* benchInfo = litert_lm_conversation_get_benchmark_info(conversation_);
471
- if (benchInfo) {
472
- int numDecodeTurns = litert_lm_benchmark_info_get_num_decode_turns(benchInfo);
473
- if (numDecodeTurns > 0) {
474
- int lastIdx = numDecodeTurns - 1;
475
- lastStats_.tokensPerSecond = litert_lm_benchmark_info_get_decode_tokens_per_sec_at(benchInfo, lastIdx);
476
- lastStats_.completionTokens = static_cast<double>(
477
- litert_lm_benchmark_info_get_decode_token_count_at(benchInfo, lastIdx));
478
- }
479
- lastStats_.timeToFirstToken = litert_lm_benchmark_info_get_time_to_first_token(benchInfo);
480
- litert_lm_benchmark_info_delete(benchInfo);
481
- }
482
- #else
483
- // Non-Apple stub
484
- result = "[iOS only] LiteRT-LM inference not available on this platform.";
485
- #endif
486
-
487
- auto endTime = std::chrono::steady_clock::now();
488
- double latencyMs = std::chrono::duration<double, std::milli>(endTime - startTime).count();
489
- lastStats_.totalTime = latencyMs / 1000.0;
490
-
491
- // Update history
492
- history_.push_back(Message{Role::USER, message});
493
- history_.push_back(Message{Role::MODEL, result});
494
-
495
- return result;
496
- }
497
-
498
- // =============================================================================
499
- // sendMessageAsync — Streaming text inference
500
- // =============================================================================
501
-
502
- void HybridLiteRTLM::streamCallbackFn(void* callback_data, const char* chunk,
503
- bool is_final, const char* error_msg) {
504
- auto* ctx = static_cast<StreamContext*>(callback_data);
505
-
506
- if (error_msg) {
507
- // Error occurred — notify JS and clean up
508
- ctx->onToken(std::string("Error: ") + error_msg, true);
509
- delete ctx;
510
- return;
511
- }
512
-
513
- if (is_final) {
514
- // Calculate stats
515
- auto endTime = std::chrono::steady_clock::now();
516
- double durationMs = std::chrono::duration<double, std::milli>(endTime - ctx->startTime).count();
517
-
518
- if (ctx->lastStats && ctx->tokenCount > 0) {
519
- ctx->lastStats->completionTokens = static_cast<double>(ctx->tokenCount);
520
- ctx->lastStats->totalTime = durationMs / 1000.0;
521
- ctx->lastStats->tokensPerSecond = (ctx->tokenCount / durationMs) * 1000.0;
522
- }
523
-
524
- // Final flush: do one last clean of the full accumulated response
525
- // to emit any text that was withheld by safeEmitLength.
526
- std::string cleaned = stripControlTokens(ctx->rawResponse);
527
- size_t start = cleaned.find_first_not_of(" \t\n\r");
528
- if (start != std::string::npos) {
529
- cleaned = cleaned.substr(start);
530
- // Strip echoed user message
531
- if (!ctx->userMessage.empty() && cleaned.find(ctx->userMessage) == 0) {
532
- cleaned = cleaned.substr(ctx->userMessage.length());
533
- size_t nextStart = cleaned.find_first_not_of(" \t\n\r");
534
- cleaned = (nextStart != std::string::npos) ? cleaned.substr(nextStart) : "";
535
- }
536
- // Emit any remaining text not yet sent
537
- if (cleaned.length() > ctx->lastEmittedLength) {
538
- std::string remaining = cleaned.substr(ctx->lastEmittedLength);
539
- ctx->onToken(remaining, false);
540
- }
541
- ctx->fullResponse = cleaned;
542
- }
543
-
544
- // Update history (thread-safe)
545
- {
546
- std::lock_guard<std::mutex> lock(*ctx->historyMutex);
547
- ctx->history->push_back(Message{Role::USER, ctx->userMessage});
548
- ctx->history->push_back(Message{Role::MODEL, ctx->fullResponse});
549
- }
550
-
551
- ctx->onToken("", true);
552
- delete ctx;
553
- return;
554
- }
555
-
556
- if (chunk) {
557
- std::string token(chunk);
558
-
559
- // The C API may return JSON-wrapped responses (e.g.
560
- // {"role":"model","content":[{"type":"text","text":"Hi"}]})
561
- // instead of raw text tokens. Detect and extract text content.
562
- std::string raw;
563
- if (token.size() > 2 && token[0] == '{' && token.find("\"role\"") != std::string::npos) {
564
- raw = HybridLiteRTLM::extractTextFromResponse(token);
565
- } else {
566
- raw = token;
567
- }
568
-
569
- // Accumulate raw text, then strip control tokens from the FULL buffer.
570
- // This correctly handles tokens split across chunk boundaries (e.g.
571
- // chunk1="<end_of_tu" chunk2="rn>Hello").
572
- ctx->rawResponse += raw;
573
- std::string cleaned = stripControlTokens(ctx->rawResponse);
574
-
575
- // Trim leading whitespace from the overall response
576
- size_t start = cleaned.find_first_not_of(" \t\n\r");
577
- if (start == std::string::npos) {
578
- // Still only whitespace/control tokens — nothing to emit yet
579
- return;
580
- }
581
- cleaned = cleaned.substr(start);
582
-
583
- // The C API may echo back the user's message before the model response.
584
- // Strip the echoed user message prefix if present.
585
- if (!ctx->userMessage.empty()) {
586
- size_t userPos = cleaned.find(ctx->userMessage);
587
- if (userPos == 0) {
588
- cleaned = cleaned.substr(ctx->userMessage.length());
589
- // Trim any whitespace after the stripped user message
590
- size_t nextStart = cleaned.find_first_not_of(" \t\n\r");
591
- if (nextStart == std::string::npos) {
592
- return; // Only user message so far, nothing to emit
593
- }
594
- cleaned = cleaned.substr(nextStart);
595
- }
596
- }
597
-
598
- // Only emit text that is "safe" — withhold any trailing characters
599
- // that could be the start of a control token split across chunks.
600
- size_t safe = safeEmitLength(cleaned);
601
- if (safe > ctx->lastEmittedLength) {
602
- std::string newText = cleaned.substr(ctx->lastEmittedLength, safe - ctx->lastEmittedLength);
603
- ctx->fullResponse = cleaned.substr(0, safe);
604
- ctx->lastEmittedLength = safe;
605
- ctx->tokenCount++;
606
- ctx->onToken(newText, false);
607
- }
608
- }
609
- }
610
-
611
- void HybridLiteRTLM::sendMessageAsync(
612
- const std::string& message,
613
- const std::function<void(const std::string&, bool)>& onToken) {
614
-
615
- // Copy values for the background thread (avoid use-after-free)
616
- auto onTokenCopy = onToken;
617
- auto messageCopy = message;
618
-
619
- // Capture shared state safely — use unique_ptr to prevent leaks
620
- auto ctxOwner = std::make_unique<StreamContext>();
621
- ctxOwner->onToken = std::move(onTokenCopy);
622
- ctxOwner->rawResponse = "";
623
- ctxOwner->fullResponse = "";
624
- ctxOwner->lastEmittedLength = 0;
625
- ctxOwner->history = &history_;
626
- ctxOwner->historyMutex = &mutex_;
627
- ctxOwner->userMessage = messageCopy;
628
- ctxOwner->lastStats = &lastStats_;
629
- ctxOwner->startTime = std::chrono::steady_clock::now();
630
- ctxOwner->tokenCount = 0;
631
-
632
- #ifdef __APPLE__
633
- ensureLoaded();
634
-
635
- std::string msgJson = buildTextMessageJson(messageCopy);
636
-
637
- // Release ownership — the C callback now owns the context via raw pointer.
638
- // streamCallbackFn will delete it when done or on error.
639
- StreamContext* ctx = ctxOwner.release();
640
-
641
- // Wrap the initial engine call in runOnLargeStack for consistency
642
- // with all other engine entry points (XNNPack needs >512KB stack).
643
- runOnLargeStack([&]() {
644
- int result = litert_lm_conversation_send_message_stream(
645
- conversation_, msgJson.c_str(), nullptr,
646
- streamCallbackFn, ctx);
647
-
648
- if (result != 0) {
649
- delete ctx;
650
- throw std::runtime_error("LiteRT-LM: Failed to start streaming inference");
651
- }
652
- });
653
- #else
654
- // Non-Apple stub
655
- ctxOwner->onToken("[iOS only] Streaming not available on this platform.", true);
656
- // ctxOwner auto-deleted by unique_ptr
657
- #endif
658
- }
659
-
660
- // =============================================================================
661
- // sendMessageWithImage — Multimodal (vision)
662
- // =============================================================================
663
-
664
- std::shared_ptr<Promise<std::string>> HybridLiteRTLM::sendMessageWithImage(
665
- const std::string& message,
666
- const std::string& imagePath) {
667
- return Promise<std::string>::async([this, message, imagePath]() -> std::string {
668
- std::string result;
669
- runOnLargeStack([&]() {
670
- result = sendMessageWithImageInternal(message, imagePath);
671
- });
672
- return result;
673
- });
674
- }
675
-
676
- std::string HybridLiteRTLM::sendMessageWithImageInternal(
677
- const std::string& message,
678
- const std::string& imagePath) {
679
-
680
- std::lock_guard<std::mutex> lock(mutex_);
681
- ensureLoaded();
682
-
683
- auto startTime = std::chrono::steady_clock::now();
684
- std::string result;
685
-
686
- #ifdef __APPLE__
687
- // Verify image exists
688
- std::ifstream imageFile(imagePath);
689
- if (!imageFile.good()) {
690
- throw std::runtime_error("Image file not found: " + imagePath);
691
- }
692
- imageFile.close();
693
-
694
- // Build multimodal message JSON — the C API handles image preprocessing
695
- std::string msgJson = buildImageMessageJson(message, imagePath);
696
-
697
- auto* response = litert_lm_conversation_send_message(
698
- conversation_, msgJson.c_str(), nullptr);
699
-
700
- if (!response) {
701
- throw std::runtime_error("LiteRT-LM: sendMessageWithImage failed");
702
- }
703
-
704
- const char* responseStr = litert_lm_json_response_get_string(response);
705
- if (responseStr) {
706
- result = trimWhitespace(extractTextFromResponse(std::string(responseStr)));
707
- }
708
- litert_lm_json_response_delete(response);
709
- #else
710
- result = "[iOS only] Vision inference not available on this platform.";
711
- #endif
712
-
713
- auto endTime = std::chrono::steady_clock::now();
714
- lastStats_.totalTime = std::chrono::duration<double>(endTime - startTime).count();
715
-
716
- history_.push_back(Message{Role::USER, message + " [image: " + imagePath + "]"});
717
- history_.push_back(Message{Role::MODEL, result});
718
-
719
- return result;
720
- }
721
-
722
- // =============================================================================
723
- // sendMessageWithAudio — Multimodal (audio)
724
- // =============================================================================
725
-
726
- std::shared_ptr<Promise<std::string>> HybridLiteRTLM::sendMessageWithAudio(
727
- const std::string& message,
728
- const std::string& audioPath) {
729
- return Promise<std::string>::async([this, message, audioPath]() -> std::string {
730
- std::string result;
731
- runOnLargeStack([&]() {
732
- result = sendMessageWithAudioInternal(message, audioPath);
733
- });
734
- return result;
735
- });
736
- }
737
-
738
- std::string HybridLiteRTLM::sendMessageWithAudioInternal(
739
- const std::string& message,
740
- const std::string& audioPath) {
741
-
742
- std::lock_guard<std::mutex> lock(mutex_);
743
- ensureLoaded();
744
-
745
- auto startTime = std::chrono::steady_clock::now();
746
- std::string result;
747
-
748
- #ifdef __APPLE__
749
- std::ifstream audioFile(audioPath);
750
- if (!audioFile.good()) {
751
- throw std::runtime_error("Audio file not found: " + audioPath);
752
- }
753
- audioFile.close();
754
-
755
- std::string msgJson = buildAudioMessageJson(message, audioPath);
756
-
757
- auto* response = litert_lm_conversation_send_message(
758
- conversation_, msgJson.c_str(), nullptr);
759
-
760
- if (!response) {
761
- throw std::runtime_error("LiteRT-LM: sendMessageWithAudio failed");
762
- }
763
-
764
- const char* responseStr = litert_lm_json_response_get_string(response);
765
- if (responseStr) {
766
- result = trimWhitespace(extractTextFromResponse(std::string(responseStr)));
767
- }
768
- litert_lm_json_response_delete(response);
769
- #else
770
- result = "[iOS only] Audio inference not available on this platform.";
771
- #endif
772
-
773
- auto endTime = std::chrono::steady_clock::now();
774
- lastStats_.totalTime = std::chrono::duration<double>(endTime - startTime).count();
775
-
776
- history_.push_back(Message{Role::USER, message + " [audio: " + audioPath + "]"});
777
- history_.push_back(Message{Role::MODEL, result});
778
-
779
- return result;
780
- }
781
-
782
- // =============================================================================
783
- // downloadModel — Download model from URL
784
- // =============================================================================
785
-
786
- std::shared_ptr<Promise<std::string>> HybridLiteRTLM::downloadModel(
787
- const std::string& url,
788
- const std::string& fileName,
789
- const std::optional<std::function<void(double)>>& onProgress) {
790
- return Promise<std::string>::async([url, fileName, onProgress]() -> std::string {
791
- #ifdef __APPLE__
792
- return litert_lm::downloadModelFile(url, fileName, onProgress);
793
- #else
794
- // Non-Apple platforms: not supported from C++ (Android uses Kotlin)
795
- throw std::runtime_error("Download not available on this platform. Use the Kotlin implementation.");
796
- #endif
797
- });
798
- }
799
-
800
- std::shared_ptr<Promise<void>> HybridLiteRTLM::deleteModel(const std::string& fileName) {
801
- return Promise<void>::async([fileName]() {
802
- std::string path;
803
- #ifdef __APPLE__
804
- // Match the path used by IOSDownloadHelper: ~/Library/Caches/litert_models/
805
- const char* home = getenv("HOME");
806
- if (home) {
807
- path = std::string(home) + "/Library/Caches/litert_models/" + fileName;
808
- }
809
- #else
810
- path = "/tmp/" + fileName;
811
- #endif
812
- if (!path.empty()) {
813
- std::remove(path.c_str());
814
- }
815
- });
816
- }
817
-
818
- // =============================================================================
819
- // getHistory
820
- // =============================================================================
821
-
822
- std::vector<Message> HybridLiteRTLM::getHistory() {
823
- std::lock_guard<std::mutex> lock(mutex_);
824
- return history_;
825
- }
826
-
827
- // =============================================================================
828
- // resetConversation
829
- // =============================================================================
830
-
831
- void HybridLiteRTLM::resetConversation() {
832
- std::lock_guard<std::mutex> lock(mutex_);
833
-
834
- history_.clear();
835
- lastStats_ = GenerationStats{0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
836
-
837
- #ifdef __APPLE__
838
- if (isLoaded_ && engine_) {
839
- createNewConversation();
840
- }
841
- #endif
842
- }
843
-
844
- // =============================================================================
845
- // isReady
846
- // =============================================================================
847
-
848
- bool HybridLiteRTLM::isReady() {
849
- std::lock_guard<std::mutex> lock(mutex_);
850
- return isLoaded_;
851
- }
852
-
853
- // =============================================================================
854
- // getStats
855
- // =============================================================================
856
-
857
- GenerationStats HybridLiteRTLM::getStats() {
858
- std::lock_guard<std::mutex> lock(mutex_);
859
- return lastStats_;
860
- }
861
-
862
- // =============================================================================
863
- // getMemoryUsage — Uses Mach APIs for iOS process memory
864
- // =============================================================================
865
-
866
- MemoryUsage HybridLiteRTLM::getMemoryUsage() {
867
- double nativeHeapBytes = 0;
868
- double residentBytes = 0;
869
- double availableBytes = 0;
870
- bool isLowMemory = false;
871
-
872
- #ifdef __APPLE__
873
- // Get app process memory (resident set size)
874
- struct mach_task_basic_info info;
875
- mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT;
876
-
877
- kern_return_t kr = task_info(mach_task_self(),
878
- MACH_TASK_BASIC_INFO,
879
- (task_info_t)&info,
880
- &count);
881
-
882
- if (kr == KERN_SUCCESS) {
883
- residentBytes = static_cast<double>(info.resident_size);
884
- // On iOS, mach_task_basic_info doesn't separate heap from RSS.
885
- // Use resident_size_max as a proxy for peak native allocation.
886
- nativeHeapBytes = static_cast<double>(info.resident_size);
887
- }
888
-
889
- // Use os_proc_available_memory() (iOS 13+) for accurate Jetsam headroom.
890
- // This reports how much memory the process can still allocate before
891
- // the system kills it — far more accurate than total_physical - process_rss.
892
- availableBytes = static_cast<double>(os_proc_available_memory());
893
-
894
- // Low memory threshold (~200MB available)
895
- isLowMemory = availableBytes < 200.0 * 1024.0 * 1024.0;
896
- #endif
897
-
898
- return MemoryUsage{
899
- nativeHeapBytes, // nativeHeapBytes (RSS as proxy on iOS)
900
- residentBytes, // residentBytes
901
- availableBytes, // availableMemoryBytes
902
- isLowMemory // isLowMemory
903
- };
904
- }
905
-
906
- // =============================================================================
907
- // close — Clean up all LiteRT-LM resources
908
- // =============================================================================
909
-
910
- void HybridLiteRTLM::close() {
911
- // Note: Don't lock here if called from destructor (mutex may be destroyed)
912
- // The caller (loadModel, destructor) should handle locking.
913
-
914
- isLoaded_ = false;
915
- history_.clear();
916
-
917
- #ifdef __APPLE__
918
- if (conversation_) {
919
- litert_lm_conversation_delete(conversation_);
920
- conversation_ = nullptr;
921
- }
922
- if (conv_config_) {
923
- litert_lm_conversation_config_delete(conv_config_);
924
- conv_config_ = nullptr;
925
- }
926
- if (session_config_) {
927
- litert_lm_session_config_delete(session_config_);
928
- session_config_ = nullptr;
929
- }
930
- if (engine_) {
931
- litert_lm_engine_delete(engine_);
932
- engine_ = nullptr;
933
- }
934
- #endif
935
-
936
- lastStats_ = GenerationStats{0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
937
- }
938
-
939
- } // namespace margelo::nitro::litertlm