react-native-litert-lm 0.3.6 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/README.md +207 -158
  2. package/android/build.gradle +12 -0
  3. package/android/src/main/AndroidManifest.xml +5 -0
  4. package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +316 -63
  5. package/android/src/main/java/dev/litert/litertlm/LiteRTLMPackage.kt +19 -2
  6. package/android/src/test/java/com/margelo/nitro/core/Promise.kt +46 -0
  7. package/android/src/test/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMTest.kt +83 -0
  8. package/cpp/include/README.md +9 -11
  9. package/ios/HybridLiteRTLM.swift +1058 -0
  10. package/ios/Tests/HybridLiteRTLMTests.swift +67 -0
  11. package/lib/__mocks__/react-native-nitro-modules.d.ts +61 -0
  12. package/lib/__mocks__/react-native-nitro-modules.js +50 -0
  13. package/lib/__tests__/hooks.test.d.ts +1 -0
  14. package/lib/__tests__/hooks.test.js +124 -0
  15. package/lib/__tests__/memoryTracker.test.d.ts +1 -0
  16. package/lib/__tests__/memoryTracker.test.js +74 -0
  17. package/lib/__tests__/modelFactory.test.d.ts +1 -0
  18. package/lib/__tests__/modelFactory.test.js +52 -0
  19. package/lib/hooks.js +1 -1
  20. package/lib/index.d.ts +2 -4
  21. package/lib/index.js +12 -7
  22. package/lib/modelFactory.js +62 -63
  23. package/lib/specs/LiteRTLM.nitro.d.ts +71 -2
  24. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +62 -7
  25. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +3 -1
  26. package/nitrogen/generated/android/c++/JLLMConfig.hpp +40 -3
  27. package/nitrogen/generated/android/c++/JMultimodalPart.hpp +74 -0
  28. package/nitrogen/generated/android/c++/JPartType.hpp +61 -0
  29. package/nitrogen/generated/android/c++/JToolDefinition.hpp +65 -0
  30. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/GenerationStats.kt +23 -0
  31. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +10 -2
  32. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/LLMConfig.kt +46 -3
  33. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MemoryUsage.kt +19 -0
  34. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/Message.kt +15 -0
  35. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MultimodalPart.kt +66 -0
  36. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/PartType.kt +24 -0
  37. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/ToolDefinition.kt +61 -0
  38. package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Bridge.cpp +57 -1
  39. package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Bridge.hpp +414 -3
  40. package/nitrogen/generated/ios/LiteRTLM-Swift-Cxx-Umbrella.hpp +41 -3
  41. package/nitrogen/generated/ios/LiteRTLMAutolinking.mm +4 -6
  42. package/nitrogen/generated/ios/LiteRTLMAutolinking.swift +10 -0
  43. package/nitrogen/generated/ios/c++/HybridLiteRTLMSpecSwift.cpp +11 -0
  44. package/nitrogen/generated/ios/c++/HybridLiteRTLMSpecSwift.hpp +224 -0
  45. package/nitrogen/generated/ios/swift/Backend.swift +44 -0
  46. package/nitrogen/generated/ios/swift/Func_void.swift +46 -0
  47. package/nitrogen/generated/ios/swift/Func_void_double.swift +46 -0
  48. package/nitrogen/generated/ios/swift/Func_void_std__exception_ptr.swift +46 -0
  49. package/nitrogen/generated/ios/swift/Func_void_std__string.swift +46 -0
  50. package/nitrogen/generated/ios/swift/Func_void_std__string_bool.swift +46 -0
  51. package/nitrogen/generated/ios/swift/GenerationStats.swift +54 -0
  52. package/nitrogen/generated/ios/swift/HybridLiteRTLMSpec.swift +69 -0
  53. package/nitrogen/generated/ios/swift/HybridLiteRTLMSpec_cxx.swift +383 -0
  54. package/nitrogen/generated/ios/swift/LLMConfig.swift +203 -0
  55. package/nitrogen/generated/ios/swift/MemoryUsage.swift +44 -0
  56. package/nitrogen/generated/ios/swift/Message.swift +34 -0
  57. package/nitrogen/generated/ios/swift/MultimodalPart.swift +83 -0
  58. package/nitrogen/generated/ios/swift/PartType.swift +44 -0
  59. package/nitrogen/generated/ios/swift/Role.swift +44 -0
  60. package/nitrogen/generated/ios/swift/ToolDefinition.swift +39 -0
  61. package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +2 -0
  62. package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +7 -2
  63. package/nitrogen/generated/shared/c++/LLMConfig.hpp +22 -2
  64. package/nitrogen/generated/shared/c++/MultimodalPart.hpp +99 -0
  65. package/nitrogen/generated/shared/c++/PartType.hpp +80 -0
  66. package/nitrogen/generated/shared/c++/ToolDefinition.hpp +91 -0
  67. package/package.json +16 -8
  68. package/react-native-litert-lm.podspec +15 -19
  69. package/scripts/download-ios-frameworks.sh +14 -48
  70. package/scripts/postinstall.js +1 -2
  71. package/src/__mocks__/react-native-nitro-modules.ts +48 -0
  72. package/src/__tests__/hooks.test.ts +153 -0
  73. package/src/__tests__/memoryTracker.test.ts +87 -0
  74. package/src/__tests__/modelFactory.test.ts +68 -0
  75. package/src/hooks.ts +1 -1
  76. package/src/index.ts +12 -9
  77. package/src/modelFactory.ts +82 -80
  78. package/src/specs/LiteRTLM.nitro.ts +80 -2
  79. package/cpp/HybridLiteRTLM.cpp +0 -838
  80. package/cpp/HybridLiteRTLM.hpp +0 -167
  81. package/cpp/IOSDownloadHelper.h +0 -24
  82. package/ios/IOSDownloadHelper.mm +0 -129
  83. package/scripts/build-ios-engine.sh +0 -302
  84. package/scripts/stubs/cxx_bridge_stubs.cc +0 -224
  85. package/scripts/stubs/gemma_model_constraint_provider.cc +0 -46
  86. package/scripts/stubs/llguidance_stubs.c +0 -101
  87. package/src/templates.ts +0 -105
@@ -1,838 +0,0 @@
1
- //
2
- // HybridLiteRTLM.cpp
3
- // react-native-litert-lm
4
- //
5
- // High-performance LLM inference using LiteRT-LM C API.
6
- //
7
- // NOTE: This C++ implementation is used for iOS ONLY.
8
- // Android uses the Kotlin implementation in `android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt`.
9
- // Do not assume changes here will affect Android.
10
- //
11
-
12
- #include "HybridLiteRTLM.hpp"
13
-
14
-
15
-
16
-
17
- #include <NitroModules/Promise.hpp>
18
- #include <chrono>
19
- #include <stdexcept>
20
- #include <sstream>
21
- #include <sys/stat.h>
22
- #include <cstdio>
23
-
24
- #ifdef __APPLE__
25
- #include "IOSDownloadHelper.h"
26
- #include <os/proc.h>
27
- #endif
28
- #include <fstream>
29
- #include <thread>
30
- #include <regex>
31
- #include <pthread.h>
32
- #include <functional>
33
-
34
- namespace margelo::nitro::litertlm {
35
-
36
- // =============================================================================
37
- // Thread Helper — LiteRT engine operations need >512KB stack (XNNPack, Metal)
38
- // =============================================================================
39
-
40
- static void runOnLargeStack(std::function<void()> work, size_t stackSize = 8 * 1024 * 1024) {
41
- struct Context {
42
- std::function<void()> fn;
43
- std::exception_ptr exception;
44
- };
45
- Context ctx{std::move(work), nullptr};
46
-
47
- pthread_t thread;
48
- pthread_attr_t attr;
49
- pthread_attr_init(&attr);
50
- pthread_attr_setstacksize(&attr, stackSize);
51
-
52
- int rc = pthread_create(&thread, &attr, [](void* arg) -> void* {
53
- auto* c = static_cast<Context*>(arg);
54
- try {
55
- c->fn();
56
- } catch (...) {
57
- c->exception = std::current_exception();
58
- }
59
- return nullptr;
60
- }, &ctx);
61
- pthread_attr_destroy(&attr);
62
- if (rc != 0) {
63
- throw std::runtime_error("Failed to create large-stack thread (errno: " + std::to_string(rc) + ")");
64
- }
65
- pthread_join(thread, nullptr);
66
-
67
- if (ctx.exception) {
68
- std::rethrow_exception(ctx.exception);
69
- }
70
- }
71
-
72
- // =============================================================================
73
- // JSON Helpers
74
- // =============================================================================
75
-
76
- std::string HybridLiteRTLM::escapeJson(const std::string& input) {
77
- std::string output;
78
- output.reserve(input.size() + 16);
79
- for (char c : input) {
80
- switch (c) {
81
- case '"': output += "\\\""; break;
82
- case '\\': output += "\\\\"; break;
83
- case '\n': output += "\\n"; break;
84
- case '\r': output += "\\r"; break;
85
- case '\t': output += "\\t"; break;
86
- case '\b': output += "\\b"; break;
87
- case '\f': output += "\\f"; break;
88
- default: output += c; break;
89
- }
90
- }
91
- return output;
92
- }
93
-
94
- std::string HybridLiteRTLM::buildTextMessageJson(const std::string& text) {
95
- return "{\"role\":\"user\",\"content\":\"" + escapeJson(text) + "\"}";
96
- }
97
-
98
- std::string HybridLiteRTLM::buildImageMessageJson(const std::string& text, const std::string& imagePath) {
99
- return "{\"role\":\"user\",\"content\":["
100
- "{\"type\":\"text\",\"text\":\"" + escapeJson(text) + "\"},"
101
- "{\"type\":\"image\",\"path\":\"" + escapeJson(imagePath) + "\"}"
102
- "]}";
103
- }
104
-
105
- std::string HybridLiteRTLM::buildAudioMessageJson(const std::string& text, const std::string& audioPath) {
106
- return "{\"role\":\"user\",\"content\":["
107
- "{\"type\":\"text\",\"text\":\"" + escapeJson(text) + "\"},"
108
- "{\"type\":\"audio\",\"path\":\"" + escapeJson(audioPath) + "\"}"
109
- "]}";
110
- }
111
-
112
- /**
113
- * Strip Gemma / LiteRT-LM control tokens from model output.
114
- * The iOS C API returns raw model text including stop/turn markers
115
- * that the Android Kotlin SDK strips automatically.
116
- */
117
- static std::string stripControlTokens(const std::string& text) {
118
- static const char* tokens[] = {
119
- "<end_of_turn>",
120
- "<start_of_turn>model",
121
- "<start_of_turn>user",
122
- "<start_of_turn>",
123
- "<eos>",
124
- };
125
- std::string result = text;
126
- for (auto* tok : tokens) {
127
- std::string t(tok);
128
- size_t pos;
129
- while ((pos = result.find(t)) != std::string::npos) {
130
- result.erase(pos, t.length());
131
- }
132
- }
133
- // Trim leading/trailing whitespace
134
- size_t start = result.find_first_not_of(" \t\n\r");
135
- if (start == std::string::npos) return "";
136
- size_t end = result.find_last_not_of(" \t\n\r");
137
- return result.substr(start, end - start + 1);
138
- }
139
-
140
- std::string HybridLiteRTLM::extractTextFromResponse(const std::string& jsonResponse) {
141
- // The C API response JSON is structured as:
142
- // {"role":"model","content":[{"type":"text","text":"..."}]}
143
- // or:
144
- // {"role":"model","content":"..."}
145
- //
146
- // We use simple string extraction to avoid a JSON library dependency.
147
-
148
- // Try array format first: find "text":"..." after "type":"text"
149
- std::string textMarker = "\"text\":\"";
150
- size_t pos = jsonResponse.find("\"type\":\"text\"");
151
- if (pos != std::string::npos) {
152
- pos = jsonResponse.find(textMarker, pos);
153
- if (pos != std::string::npos) {
154
- pos += textMarker.length();
155
- std::string result;
156
- result.reserve(jsonResponse.size() - pos);
157
- for (size_t i = pos; i < jsonResponse.size(); i++) {
158
- if (jsonResponse[i] == '\\' && i + 1 < jsonResponse.size()) {
159
- char next = jsonResponse[i + 1];
160
- if (next == '"') { result += '"'; i++; }
161
- else if (next == '\\') { result += '\\'; i++; }
162
- else if (next == 'n') { result += '\n'; i++; }
163
- else if (next == 'r') { result += '\r'; i++; }
164
- else if (next == 't') { result += '\t'; i++; }
165
- else { result += jsonResponse[i]; }
166
- } else if (jsonResponse[i] == '"') {
167
- break; // End of the text value
168
- } else {
169
- result += jsonResponse[i];
170
- }
171
- }
172
- return stripControlTokens(result);
173
- }
174
- }
175
-
176
- // Try simple string format: "content":"..."
177
- std::string contentMarker = "\"content\":\"";
178
- pos = jsonResponse.find(contentMarker);
179
- if (pos != std::string::npos) {
180
- pos += contentMarker.length();
181
- std::string result;
182
- for (size_t i = pos; i < jsonResponse.size(); i++) {
183
- if (jsonResponse[i] == '\\' && i + 1 < jsonResponse.size()) {
184
- char next = jsonResponse[i + 1];
185
- if (next == '"') { result += '"'; i++; }
186
- else if (next == '\\') { result += '\\'; i++; }
187
- else if (next == 'n') { result += '\n'; i++; }
188
- else { result += jsonResponse[i]; }
189
- } else if (jsonResponse[i] == '"') {
190
- break;
191
- } else {
192
- result += jsonResponse[i];
193
- }
194
- }
195
- return stripControlTokens(result);
196
- }
197
-
198
- // Fallback: return full response (still strip control tokens)
199
- return stripControlTokens(jsonResponse);
200
- }
201
-
202
- // =============================================================================
203
- // Conversation Management
204
- // =============================================================================
205
-
206
- void HybridLiteRTLM::createNewConversation() {
207
- #ifdef __APPLE__
208
- if (!engine_) {
209
- throw std::runtime_error("Cannot create conversation: engine not initialized");
210
- }
211
-
212
- // Clean up previous conversation
213
- if (conversation_) {
214
- litert_lm_conversation_delete(conversation_);
215
- conversation_ = nullptr;
216
- }
217
- if (conv_config_) {
218
- litert_lm_conversation_config_delete(conv_config_);
219
- conv_config_ = nullptr;
220
- }
221
-
222
- // Build system message JSON if provided
223
- std::string systemMsgJson;
224
- const char* systemMsgPtr = nullptr;
225
- if (!systemPrompt_.empty()) {
226
- systemMsgJson = "{\"role\":\"system\",\"content\":\"" + escapeJson(systemPrompt_) + "\"}";
227
- systemMsgPtr = systemMsgJson.c_str();
228
- }
229
-
230
- // Create conversation config with session config
231
- conv_config_ = litert_lm_conversation_config_create(
232
- engine_,
233
- session_config_, // may be nullptr for defaults
234
- systemMsgPtr, // system message
235
- nullptr, // tools (not used yet)
236
- nullptr, // messages history
237
- false // constrained decoding
238
- );
239
- if (!conv_config_) {
240
- throw std::runtime_error("Failed to create conversation config");
241
- }
242
-
243
- // Create conversation
244
- conversation_ = litert_lm_conversation_create(engine_, conv_config_);
245
- if (!conversation_) {
246
- litert_lm_conversation_config_delete(conv_config_);
247
- conv_config_ = nullptr;
248
- throw std::runtime_error("Failed to create conversation");
249
- }
250
- #endif
251
- }
252
-
253
- // =============================================================================
254
- // loadModel
255
- // =============================================================================
256
-
257
- std::shared_ptr<Promise<void>> HybridLiteRTLM::loadModel(
258
- const std::string& modelPath,
259
- const std::optional<LLMConfig>& config) {
260
- return Promise<void>::async([this, modelPath, config]() {
261
- runOnLargeStack([&]() {
262
- loadModelInternal(modelPath, config);
263
- });
264
- });
265
- }
266
-
267
- void HybridLiteRTLM::loadModelInternal(
268
- const std::string& modelPath,
269
- const std::optional<LLMConfig>& config) {
270
-
271
- std::lock_guard<std::mutex> lock(mutex_);
272
-
273
- if (isLoaded_) {
274
- close();
275
- }
276
-
277
- if (config.has_value()) {
278
- if (config->backend.has_value()) {
279
- backend_ = config->backend.value();
280
- }
281
- if (config->temperature.has_value()) {
282
- temperature_ = config->temperature.value();
283
- }
284
- if (config->topK.has_value()) {
285
- topK_ = config->topK.value();
286
- }
287
- if (config->topP.has_value()) {
288
- topP_ = config->topP.value();
289
- }
290
- if (config->maxTokens.has_value()) {
291
- maxTokens_ = config->maxTokens.value();
292
- }
293
- if (config->systemPrompt.has_value()) {
294
- systemPrompt_ = config->systemPrompt.value();
295
- }
296
- }
297
-
298
- #ifdef __APPLE__
299
- // Set log verbosity: 2=WARNING (production), 0=INFO (debug)
300
- litert_lm_set_min_log_level(2);
301
-
302
- auto backendStr = [](Backend b) -> const char* {
303
- switch (b) {
304
- case Backend::GPU: return "gpu";
305
- case Backend::NPU: return "gpu"; // NPU not available on iOS, use GPU
306
- default: return "cpu";
307
- }
308
- };
309
-
310
- auto tryCreateEngine = [&](const char* backend, const char* visionBackend) -> bool {
311
- auto* settings = litert_lm_engine_settings_create(
312
- modelPath.c_str(),
313
- backend,
314
- visionBackend,
315
- "cpu" // audio executor: iOS XCFramework lacks compiled audio ops (INTERNAL ERROR at Invoke)
316
- );
317
- if (!settings) {
318
- return false;
319
- }
320
-
321
- litert_lm_engine_settings_set_max_num_tokens(settings, static_cast<int>(maxTokens_));
322
- litert_lm_engine_settings_enable_benchmark(settings);
323
-
324
- // Set cache directory to the same directory as the model file
325
- std::string cacheDir = modelPath.substr(0, modelPath.find_last_of('/'));
326
- litert_lm_engine_settings_set_cache_dir(settings, cacheDir.c_str());
327
-
328
- engine_ = litert_lm_engine_create(settings);
329
- litert_lm_engine_settings_delete(settings);
330
-
331
- return engine_ != nullptr;
332
- };
333
-
334
- // Try requested backend first (e.g. gpu/gpu)
335
- const char* primaryBackend = backendStr(backend_);
336
- if (!tryCreateEngine(primaryBackend, primaryBackend)) {
337
- // Fallback chain for when the primary backend fails:
338
- bool fallbackOk = false;
339
- if (backend_ != Backend::CPU) {
340
- // 1) Try CPU main + GPU vision (model's vision encoder often requires GPU)
341
- fallbackOk = tryCreateEngine("cpu", "gpu");
342
- // 2) Try CPU main + CPU vision
343
- if (!fallbackOk) fallbackOk = tryCreateEngine("cpu", "cpu");
344
- }
345
- // 3) Try CPU main + no vision (nullptr skips vision executor entirely)
346
- if (!fallbackOk) fallbackOk = tryCreateEngine("cpu", nullptr);
347
- if (fallbackOk) {
348
- backend_ = Backend::CPU;
349
- }
350
- }
351
-
352
- if (!engine_) {
353
- // Collect diagnostic info
354
- std::string diag = " | Diagnostics: ";
355
- struct stat st;
356
- if (stat(modelPath.c_str(), &st) == 0) {
357
- diag += "File size: " + std::to_string(st.st_size) + " bytes";
358
- } else {
359
- diag += "Failed to stat file (errno: " + std::to_string(errno) + ")";
360
- }
361
-
362
- FILE* f = fopen(modelPath.c_str(), "rb");
363
- if (f) {
364
- diag += ", Readable: YES";
365
- fclose(f);
366
- } else {
367
- diag += ", Readable: NO (errno: " + std::to_string(errno) + ")";
368
- }
369
-
370
-
371
- throw std::runtime_error(
372
- "Failed to create LiteRT-LM engine. Tried backend '" +
373
- std::string(primaryBackend) + "' and CPU fallback. Model path: " + modelPath + diag);
374
- }
375
-
376
- session_config_ = litert_lm_session_config_create();
377
- if (session_config_) {
378
- litert_lm_session_config_set_max_output_tokens(session_config_, static_cast<int>(maxTokens_));
379
-
380
- LiteRtLmSamplerParams sampler{};
381
- sampler.type = kTopP;
382
- sampler.top_k = static_cast<int32_t>(topK_);
383
- sampler.top_p = static_cast<float>(topP_);
384
- sampler.temperature = static_cast<float>(temperature_);
385
- sampler.seed = 0;
386
- litert_lm_session_config_set_sampler_params(session_config_, &sampler);
387
- }
388
-
389
- createNewConversation();
390
- #endif
391
-
392
- isLoaded_ = true;
393
- history_.clear();
394
- lastStats_ = GenerationStats{0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
395
- }
396
-
397
- // =============================================================================
398
- // sendMessage — Blocking text inference
399
- // =============================================================================
400
-
401
- std::shared_ptr<Promise<std::string>> HybridLiteRTLM::sendMessage(const std::string& message) {
402
- return Promise<std::string>::async([this, message]() -> std::string {
403
- std::string result;
404
- runOnLargeStack([&]() {
405
- result = sendMessageInternal(message);
406
- });
407
- return result;
408
- });
409
- }
410
-
411
- std::string HybridLiteRTLM::sendMessageInternal(const std::string& message) {
412
- std::lock_guard<std::mutex> lock(mutex_);
413
- ensureLoaded();
414
-
415
- auto startTime = std::chrono::steady_clock::now();
416
- std::string result;
417
-
418
- #ifdef __APPLE__
419
- std::string msgJson = buildTextMessageJson(message);
420
-
421
- auto* response = litert_lm_conversation_send_message(
422
- conversation_, msgJson.c_str(), nullptr);
423
-
424
- if (!response) {
425
- throw std::runtime_error("LiteRT-LM: sendMessage failed");
426
- }
427
-
428
- const char* responseStr = litert_lm_json_response_get_string(response);
429
- if (responseStr) {
430
- result = extractTextFromResponse(std::string(responseStr));
431
- }
432
- litert_lm_json_response_delete(response);
433
-
434
- auto* benchInfo = litert_lm_conversation_get_benchmark_info(conversation_);
435
- if (benchInfo) {
436
- int numDecodeTurns = litert_lm_benchmark_info_get_num_decode_turns(benchInfo);
437
- if (numDecodeTurns > 0) {
438
- int lastIdx = numDecodeTurns - 1;
439
- lastStats_.tokensPerSecond = litert_lm_benchmark_info_get_decode_tokens_per_sec_at(benchInfo, lastIdx);
440
- lastStats_.completionTokens = static_cast<double>(
441
- litert_lm_benchmark_info_get_decode_token_count_at(benchInfo, lastIdx));
442
- }
443
- lastStats_.timeToFirstToken = litert_lm_benchmark_info_get_time_to_first_token(benchInfo);
444
- litert_lm_benchmark_info_delete(benchInfo);
445
- }
446
- #else
447
- // Non-Apple stub
448
- result = "[iOS only] LiteRT-LM inference not available on this platform.";
449
- #endif
450
-
451
- auto endTime = std::chrono::steady_clock::now();
452
- double latencyMs = std::chrono::duration<double, std::milli>(endTime - startTime).count();
453
- lastStats_.totalTime = latencyMs / 1000.0;
454
-
455
- // Update history
456
- history_.push_back(Message{Role::USER, message});
457
- history_.push_back(Message{Role::MODEL, result});
458
-
459
- return result;
460
- }
461
-
462
- // =============================================================================
463
- // sendMessageAsync — Streaming text inference
464
- // =============================================================================
465
-
466
- void HybridLiteRTLM::streamCallbackFn(void* callback_data, const char* chunk,
467
- bool is_final, const char* error_msg) {
468
- auto* ctx = static_cast<StreamContext*>(callback_data);
469
-
470
- if (error_msg) {
471
- // Error occurred — notify JS and clean up
472
- ctx->onToken(std::string("Error: ") + error_msg, true);
473
- delete ctx;
474
- return;
475
- }
476
-
477
- if (is_final) {
478
- // Calculate stats
479
- auto endTime = std::chrono::steady_clock::now();
480
- double durationMs = std::chrono::duration<double, std::milli>(endTime - ctx->startTime).count();
481
-
482
- if (ctx->lastStats && ctx->tokenCount > 0) {
483
- ctx->lastStats->completionTokens = static_cast<double>(ctx->tokenCount);
484
- ctx->lastStats->totalTime = durationMs / 1000.0;
485
- ctx->lastStats->tokensPerSecond = (ctx->tokenCount / durationMs) * 1000.0;
486
- }
487
-
488
- // Update history (thread-safe)
489
- {
490
- std::lock_guard<std::mutex> lock(*ctx->historyMutex);
491
- ctx->history->push_back(Message{Role::USER, ctx->userMessage});
492
- ctx->history->push_back(Message{Role::MODEL, ctx->fullResponse});
493
- }
494
-
495
- ctx->onToken("", true);
496
- delete ctx;
497
- return;
498
- }
499
-
500
- if (chunk) {
501
- std::string token(chunk);
502
- // Filter out Gemma control tokens from streamed chunks
503
- std::string cleaned = stripControlTokens(token);
504
- ctx->fullResponse += cleaned;
505
- ctx->tokenCount++;
506
- if (!cleaned.empty()) {
507
- ctx->onToken(cleaned, false);
508
- }
509
- }
510
- }
511
-
512
- void HybridLiteRTLM::sendMessageAsync(
513
- const std::string& message,
514
- const std::function<void(const std::string&, bool)>& onToken) {
515
-
516
- // Copy values for the background thread (avoid use-after-free)
517
- auto onTokenCopy = onToken;
518
- auto messageCopy = message;
519
-
520
- // Capture shared state safely — use unique_ptr to prevent leaks
521
- auto ctxOwner = std::make_unique<StreamContext>();
522
- ctxOwner->onToken = std::move(onTokenCopy);
523
- ctxOwner->fullResponse = "";
524
- ctxOwner->history = &history_;
525
- ctxOwner->historyMutex = &mutex_;
526
- ctxOwner->userMessage = messageCopy;
527
- ctxOwner->lastStats = &lastStats_;
528
- ctxOwner->startTime = std::chrono::steady_clock::now();
529
- ctxOwner->tokenCount = 0;
530
-
531
- #ifdef __APPLE__
532
- ensureLoaded();
533
-
534
- std::string msgJson = buildTextMessageJson(messageCopy);
535
-
536
- // Release ownership — the C callback now owns the context via raw pointer.
537
- // streamCallbackFn will delete it when done or on error.
538
- StreamContext* ctx = ctxOwner.release();
539
-
540
- // Wrap the initial engine call in runOnLargeStack for consistency
541
- // with all other engine entry points (XNNPack needs >512KB stack).
542
- runOnLargeStack([&]() {
543
- int result = litert_lm_conversation_send_message_stream(
544
- conversation_, msgJson.c_str(), nullptr,
545
- streamCallbackFn, ctx);
546
-
547
- if (result != 0) {
548
- delete ctx;
549
- throw std::runtime_error("LiteRT-LM: Failed to start streaming inference");
550
- }
551
- });
552
- #else
553
- // Non-Apple stub
554
- ctxOwner->onToken("[iOS only] Streaming not available on this platform.", true);
555
- // ctxOwner auto-deleted by unique_ptr
556
- #endif
557
- }
558
-
559
- // =============================================================================
560
- // sendMessageWithImage — Multimodal (vision)
561
- // =============================================================================
562
-
563
- std::shared_ptr<Promise<std::string>> HybridLiteRTLM::sendMessageWithImage(
564
- const std::string& message,
565
- const std::string& imagePath) {
566
- return Promise<std::string>::async([this, message, imagePath]() -> std::string {
567
- std::string result;
568
- runOnLargeStack([&]() {
569
- result = sendMessageWithImageInternal(message, imagePath);
570
- });
571
- return result;
572
- });
573
- }
574
-
575
- std::string HybridLiteRTLM::sendMessageWithImageInternal(
576
- const std::string& message,
577
- const std::string& imagePath) {
578
-
579
- std::lock_guard<std::mutex> lock(mutex_);
580
- ensureLoaded();
581
-
582
- auto startTime = std::chrono::steady_clock::now();
583
- std::string result;
584
-
585
- #ifdef __APPLE__
586
- // Verify image exists
587
- std::ifstream imageFile(imagePath);
588
- if (!imageFile.good()) {
589
- throw std::runtime_error("Image file not found: " + imagePath);
590
- }
591
- imageFile.close();
592
-
593
- // Build multimodal message JSON — the C API handles image preprocessing
594
- std::string msgJson = buildImageMessageJson(message, imagePath);
595
-
596
- auto* response = litert_lm_conversation_send_message(
597
- conversation_, msgJson.c_str(), nullptr);
598
-
599
- if (!response) {
600
- throw std::runtime_error("LiteRT-LM: sendMessageWithImage failed");
601
- }
602
-
603
- const char* responseStr = litert_lm_json_response_get_string(response);
604
- if (responseStr) {
605
- result = extractTextFromResponse(std::string(responseStr));
606
- }
607
- litert_lm_json_response_delete(response);
608
- #else
609
- result = "[iOS only] Vision inference not available on this platform.";
610
- #endif
611
-
612
- auto endTime = std::chrono::steady_clock::now();
613
- lastStats_.totalTime = std::chrono::duration<double>(endTime - startTime).count();
614
-
615
- history_.push_back(Message{Role::USER, message + " [image: " + imagePath + "]"});
616
- history_.push_back(Message{Role::MODEL, result});
617
-
618
- return result;
619
- }
620
-
621
- // =============================================================================
622
- // sendMessageWithAudio — Multimodal (audio)
623
- // =============================================================================
624
-
625
- std::shared_ptr<Promise<std::string>> HybridLiteRTLM::sendMessageWithAudio(
626
- const std::string& message,
627
- const std::string& audioPath) {
628
- return Promise<std::string>::async([this, message, audioPath]() -> std::string {
629
- std::string result;
630
- runOnLargeStack([&]() {
631
- result = sendMessageWithAudioInternal(message, audioPath);
632
- });
633
- return result;
634
- });
635
- }
636
-
637
- std::string HybridLiteRTLM::sendMessageWithAudioInternal(
638
- const std::string& message,
639
- const std::string& audioPath) {
640
-
641
- std::lock_guard<std::mutex> lock(mutex_);
642
- ensureLoaded();
643
-
644
- auto startTime = std::chrono::steady_clock::now();
645
- std::string result;
646
-
647
- #ifdef __APPLE__
648
- std::ifstream audioFile(audioPath);
649
- if (!audioFile.good()) {
650
- throw std::runtime_error("Audio file not found: " + audioPath);
651
- }
652
- audioFile.close();
653
-
654
- std::string msgJson = buildAudioMessageJson(message, audioPath);
655
-
656
- auto* response = litert_lm_conversation_send_message(
657
- conversation_, msgJson.c_str(), nullptr);
658
-
659
- if (!response) {
660
- throw std::runtime_error("LiteRT-LM: sendMessageWithAudio failed");
661
- }
662
-
663
- const char* responseStr = litert_lm_json_response_get_string(response);
664
- if (responseStr) {
665
- result = extractTextFromResponse(std::string(responseStr));
666
- }
667
- litert_lm_json_response_delete(response);
668
- #else
669
- result = "[iOS only] Audio inference not available on this platform.";
670
- #endif
671
-
672
- auto endTime = std::chrono::steady_clock::now();
673
- lastStats_.totalTime = std::chrono::duration<double>(endTime - startTime).count();
674
-
675
- history_.push_back(Message{Role::USER, message + " [audio: " + audioPath + "]"});
676
- history_.push_back(Message{Role::MODEL, result});
677
-
678
- return result;
679
- }
680
-
681
- // =============================================================================
682
- // downloadModel — Download model from URL
683
- // =============================================================================
684
-
685
- std::shared_ptr<Promise<std::string>> HybridLiteRTLM::downloadModel(
686
- const std::string& url,
687
- const std::string& fileName,
688
- const std::optional<std::function<void(double)>>& onProgress) {
689
- return Promise<std::string>::async([url, fileName, onProgress]() -> std::string {
690
- #ifdef __APPLE__
691
- return litert_lm::downloadModelFile(url, fileName, onProgress);
692
- #else
693
- // Non-Apple platforms: not supported from C++ (Android uses Kotlin)
694
- throw std::runtime_error("Download not available on this platform. Use the Kotlin implementation.");
695
- #endif
696
- });
697
- }
698
-
699
- std::shared_ptr<Promise<void>> HybridLiteRTLM::deleteModel(const std::string& fileName) {
700
- return Promise<void>::async([fileName]() {
701
- std::string path;
702
- #ifdef __APPLE__
703
- // Match the path used by IOSDownloadHelper: ~/Library/Caches/litert_models/
704
- const char* home = getenv("HOME");
705
- if (home) {
706
- path = std::string(home) + "/Library/Caches/litert_models/" + fileName;
707
- }
708
- #else
709
- path = "/tmp/" + fileName;
710
- #endif
711
- if (!path.empty()) {
712
- std::remove(path.c_str());
713
- }
714
- });
715
- }
716
-
717
- // =============================================================================
718
- // getHistory
719
- // =============================================================================
720
-
721
- std::vector<Message> HybridLiteRTLM::getHistory() {
722
- std::lock_guard<std::mutex> lock(mutex_);
723
- return history_;
724
- }
725
-
726
- // =============================================================================
727
- // resetConversation
728
- // =============================================================================
729
-
730
- void HybridLiteRTLM::resetConversation() {
731
- std::lock_guard<std::mutex> lock(mutex_);
732
-
733
- history_.clear();
734
- lastStats_ = GenerationStats{0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
735
-
736
- #ifdef __APPLE__
737
- if (isLoaded_ && engine_) {
738
- createNewConversation();
739
- }
740
- #endif
741
- }
742
-
743
- // =============================================================================
744
- // isReady
745
- // =============================================================================
746
-
747
- bool HybridLiteRTLM::isReady() {
748
- std::lock_guard<std::mutex> lock(mutex_);
749
- return isLoaded_;
750
- }
751
-
752
- // =============================================================================
753
- // getStats
754
- // =============================================================================
755
-
756
- GenerationStats HybridLiteRTLM::getStats() {
757
- std::lock_guard<std::mutex> lock(mutex_);
758
- return lastStats_;
759
- }
760
-
761
- // =============================================================================
762
- // getMemoryUsage — Uses Mach APIs for iOS process memory
763
- // =============================================================================
764
-
765
- MemoryUsage HybridLiteRTLM::getMemoryUsage() {
766
- double nativeHeapBytes = 0;
767
- double residentBytes = 0;
768
- double availableBytes = 0;
769
- bool isLowMemory = false;
770
-
771
- #ifdef __APPLE__
772
- // Get app process memory (resident set size)
773
- struct mach_task_basic_info info;
774
- mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT;
775
-
776
- kern_return_t kr = task_info(mach_task_self(),
777
- MACH_TASK_BASIC_INFO,
778
- (task_info_t)&info,
779
- &count);
780
-
781
- if (kr == KERN_SUCCESS) {
782
- residentBytes = static_cast<double>(info.resident_size);
783
- // On iOS, mach_task_basic_info doesn't separate heap from RSS.
784
- // Use resident_size_max as a proxy for peak native allocation.
785
- nativeHeapBytes = static_cast<double>(info.resident_size);
786
- }
787
-
788
- // Use os_proc_available_memory() (iOS 13+) for accurate Jetsam headroom.
789
- // This reports how much memory the process can still allocate before
790
- // the system kills it — far more accurate than total_physical - process_rss.
791
- availableBytes = static_cast<double>(os_proc_available_memory());
792
-
793
- // Low memory threshold (~200MB available)
794
- isLowMemory = availableBytes < 200.0 * 1024.0 * 1024.0;
795
- #endif
796
-
797
- return MemoryUsage{
798
- nativeHeapBytes, // nativeHeapBytes (RSS as proxy on iOS)
799
- residentBytes, // residentBytes
800
- availableBytes, // availableMemoryBytes
801
- isLowMemory // isLowMemory
802
- };
803
- }
804
-
805
- // =============================================================================
806
- // close — Clean up all LiteRT-LM resources
807
- // =============================================================================
808
-
809
- void HybridLiteRTLM::close() {
810
- // Note: Don't lock here if called from destructor (mutex may be destroyed)
811
- // The caller (loadModel, destructor) should handle locking.
812
-
813
- isLoaded_ = false;
814
- history_.clear();
815
-
816
- #ifdef __APPLE__
817
- if (conversation_) {
818
- litert_lm_conversation_delete(conversation_);
819
- conversation_ = nullptr;
820
- }
821
- if (conv_config_) {
822
- litert_lm_conversation_config_delete(conv_config_);
823
- conv_config_ = nullptr;
824
- }
825
- if (session_config_) {
826
- litert_lm_session_config_delete(session_config_);
827
- session_config_ = nullptr;
828
- }
829
- if (engine_) {
830
- litert_lm_engine_delete(engine_);
831
- engine_ = nullptr;
832
- }
833
- #endif
834
-
835
- lastStats_ = GenerationStats{0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
836
- }
837
-
838
- } // namespace margelo::nitro::litertlm