react-native-litert-lm 0.2.2 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.md +269 -186
  2. package/android/build.gradle +1 -1
  3. package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +93 -37
  4. package/app.plugin.js +33 -0
  5. package/cpp/HybridLiteRTLM.cpp +604 -450
  6. package/cpp/HybridLiteRTLM.hpp +54 -23
  7. package/cpp/IOSDownloadHelper.h +24 -0
  8. package/cpp/cpp-adapter.cpp +2 -2
  9. package/cpp/include/litert_lm_engine.h +509 -0
  10. package/ios/IOSDownloadHelper.mm +129 -0
  11. package/ios/LiteRTLMAutolinking.mm +30 -0
  12. package/lib/hooks.d.ts +9 -4
  13. package/lib/hooks.js +34 -20
  14. package/lib/index.d.ts +1 -0
  15. package/lib/index.js +2 -5
  16. package/lib/memoryTracker.d.ts +1 -1
  17. package/lib/memoryTracker.js +1 -1
  18. package/lib/modelFactory.d.ts +11 -5
  19. package/lib/modelFactory.js +9 -4
  20. package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +11 -4
  21. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +31 -37
  22. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +19 -22
  23. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +15 -18
  24. package/package.json +12 -5
  25. package/react-native-litert-lm.podspec +20 -7
  26. package/scripts/build-ios-engine.sh +302 -0
  27. package/scripts/download-ios-frameworks.sh +72 -0
  28. package/scripts/postinstall.js +116 -0
  29. package/scripts/stubs/cxx_bridge_stubs.cc +224 -0
  30. package/scripts/stubs/gemma_model_constraint_provider.cc +46 -0
  31. package/scripts/stubs/llguidance_stubs.c +101 -0
  32. package/src/hooks.ts +62 -39
  33. package/src/index.ts +4 -7
  34. package/src/memoryTracker.ts +1 -1
  35. package/src/modelFactory.ts +30 -5
@@ -2,7 +2,7 @@
2
2
  // HybridLiteRTLM.cpp
3
3
  // react-native-litert-lm
4
4
  //
5
- // High-performance LLM inference using LiteRT-LM.
5
+ // High-performance LLM inference using LiteRT-LM C API.
6
6
  //
7
7
  // NOTE: This C++ implementation is used for iOS ONLY.
8
8
  // Android uses the Kotlin implementation in `android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt`.
@@ -11,80 +11,204 @@
11
11
 
12
12
  #include "HybridLiteRTLM.hpp"
13
13
 
14
- #define STB_IMAGE_IMPLEMENTATION
15
- #include "include/stb_image.h"
16
14
 
15
+
16
+
17
+ #include <NitroModules/Promise.hpp>
17
18
  #include <chrono>
18
19
  #include <stdexcept>
19
20
  #include <sstream>
21
+ #include <sys/stat.h>
22
+ #include <cstdio>
23
+
24
+ #ifdef __APPLE__
25
+ #include "IOSDownloadHelper.h"
26
+ #endif
20
27
  #include <fstream>
28
+ #include <thread>
29
+ #include <regex>
21
30
 
22
31
  namespace margelo::nitro::litertlm {
23
32
 
24
- //------------------------------------------------------------------------------
25
- // Helper: Format user prompt (applies chat template if needed)
26
- //------------------------------------------------------------------------------
27
- std::string HybridLiteRTLM::formatUserPrompt(const std::string& message) const {
28
- // The LiteRT-LM Conversation class handles chat templates internally,
29
- // so we just return the message as-is. If we were using Session directly,
30
- // we'd apply the Gemma/Phi template here.
31
- return message;
33
+ // =============================================================================
34
+ // JSON Helpers
35
+ // =============================================================================
36
+
37
+ std::string HybridLiteRTLM::escapeJson(const std::string& input) {
38
+ std::string output;
39
+ output.reserve(input.size() + 16);
40
+ for (char c : input) {
41
+ switch (c) {
42
+ case '"': output += "\\\""; break;
43
+ case '\\': output += "\\\\"; break;
44
+ case '\n': output += "\\n"; break;
45
+ case '\r': output += "\\r"; break;
46
+ case '\t': output += "\\t"; break;
47
+ case '\b': output += "\\b"; break;
48
+ case '\f': output += "\\f"; break;
49
+ default: output += c; break;
50
+ }
51
+ }
52
+ return output;
53
+ }
54
+
55
+ std::string HybridLiteRTLM::buildTextMessageJson(const std::string& text) {
56
+ return "{\"role\":\"user\",\"content\":\"" + escapeJson(text) + "\"}";
57
+ }
58
+
59
+ std::string HybridLiteRTLM::buildImageMessageJson(const std::string& text, const std::string& imagePath) {
60
+ return "{\"role\":\"user\",\"content\":["
61
+ "{\"type\":\"text\",\"text\":\"" + escapeJson(text) + "\"},"
62
+ "{\"type\":\"image\",\"path\":\"" + escapeJson(imagePath) + "\"}"
63
+ "]}";
64
+ }
65
+
66
+ std::string HybridLiteRTLM::buildAudioMessageJson(const std::string& text, const std::string& audioPath) {
67
+ return "{\"role\":\"user\",\"content\":["
68
+ "{\"type\":\"text\",\"text\":\"" + escapeJson(text) + "\"},"
69
+ "{\"type\":\"audio\",\"path\":\"" + escapeJson(audioPath) + "\"}"
70
+ "]}";
71
+ }
72
+
73
+ std::string HybridLiteRTLM::extractTextFromResponse(const std::string& jsonResponse) {
74
+ // The C API response JSON is structured as:
75
+ // {"role":"model","content":[{"type":"text","text":"..."}]}
76
+ // or:
77
+ // {"role":"model","content":"..."}
78
+ //
79
+ // We use simple string extraction to avoid a JSON library dependency.
80
+
81
+ // Try array format first: find "text":"..." after "type":"text"
82
+ std::string textMarker = "\"text\":\"";
83
+ size_t pos = jsonResponse.find("\"type\":\"text\"");
84
+ if (pos != std::string::npos) {
85
+ pos = jsonResponse.find(textMarker, pos);
86
+ if (pos != std::string::npos) {
87
+ pos += textMarker.length();
88
+ std::string result;
89
+ result.reserve(jsonResponse.size() - pos);
90
+ for (size_t i = pos; i < jsonResponse.size(); i++) {
91
+ if (jsonResponse[i] == '\\' && i + 1 < jsonResponse.size()) {
92
+ char next = jsonResponse[i + 1];
93
+ if (next == '"') { result += '"'; i++; }
94
+ else if (next == '\\') { result += '\\'; i++; }
95
+ else if (next == 'n') { result += '\n'; i++; }
96
+ else if (next == 'r') { result += '\r'; i++; }
97
+ else if (next == 't') { result += '\t'; i++; }
98
+ else { result += jsonResponse[i]; }
99
+ } else if (jsonResponse[i] == '"') {
100
+ break; // End of the text value
101
+ } else {
102
+ result += jsonResponse[i];
103
+ }
104
+ }
105
+ return result;
106
+ }
107
+ }
108
+
109
+ // Try simple string format: "content":"..."
110
+ std::string contentMarker = "\"content\":\"";
111
+ pos = jsonResponse.find(contentMarker);
112
+ if (pos != std::string::npos) {
113
+ pos += contentMarker.length();
114
+ std::string result;
115
+ for (size_t i = pos; i < jsonResponse.size(); i++) {
116
+ if (jsonResponse[i] == '\\' && i + 1 < jsonResponse.size()) {
117
+ char next = jsonResponse[i + 1];
118
+ if (next == '"') { result += '"'; i++; }
119
+ else if (next == '\\') { result += '\\'; i++; }
120
+ else if (next == 'n') { result += '\n'; i++; }
121
+ else { result += jsonResponse[i]; }
122
+ } else if (jsonResponse[i] == '"') {
123
+ break;
124
+ } else {
125
+ result += jsonResponse[i];
126
+ }
127
+ }
128
+ return result;
129
+ }
130
+
131
+ // Fallback: return full response
132
+ return jsonResponse;
32
133
  }
33
134
 
34
- //------------------------------------------------------------------------------
35
- // Helper: Create a new Conversation from existing Engine
36
- //------------------------------------------------------------------------------
135
+ // =============================================================================
136
+ // Conversation Management
137
+ // =============================================================================
138
+
37
139
  void HybridLiteRTLM::createNewConversation() {
38
- #ifdef LITERT_LM_ENABLED
140
+ #ifdef __APPLE__
39
141
  if (!engine_) {
40
142
  throw std::runtime_error("Cannot create conversation: engine not initialized");
41
143
  }
42
144
 
43
- auto conversation_config = litert::lm::ConversationConfig::CreateDefault(*engine_);
44
- if (!conversation_config.ok()) {
45
- throw std::runtime_error("Failed to create conversation config: " +
46
- std::string(conversation_config.status().message()));
145
+ // Clean up previous conversation
146
+ if (conversation_) {
147
+ litert_lm_conversation_delete(conversation_);
148
+ conversation_ = nullptr;
149
+ }
150
+ if (conv_config_) {
151
+ litert_lm_conversation_config_delete(conv_config_);
152
+ conv_config_ = nullptr;
47
153
  }
48
154
 
49
- auto conversation = litert::lm::Conversation::Create(*engine_, *conversation_config);
50
- if (!conversation.ok()) {
51
- throw std::runtime_error("Failed to create conversation: " +
52
- std::string(conversation.status().message()));
155
+ // Build system message JSON if provided
156
+ std::string systemMsgJson;
157
+ const char* systemMsgPtr = nullptr;
158
+ if (!systemPrompt_.empty()) {
159
+ systemMsgJson = "{\"role\":\"system\",\"content\":\"" + escapeJson(systemPrompt_) + "\"}";
160
+ systemMsgPtr = systemMsgJson.c_str();
161
+ }
162
+
163
+ // Create conversation config with session config
164
+ conv_config_ = litert_lm_conversation_config_create(
165
+ engine_,
166
+ session_config_, // may be nullptr for defaults
167
+ systemMsgPtr, // system message
168
+ nullptr, // tools (not used yet)
169
+ nullptr, // messages history
170
+ false // constrained decoding
171
+ );
172
+ if (!conv_config_) {
173
+ throw std::runtime_error("Failed to create conversation config");
174
+ }
175
+
176
+ // Create conversation
177
+ conversation_ = litert_lm_conversation_create(engine_, conv_config_);
178
+ if (!conversation_) {
179
+ litert_lm_conversation_config_delete(conv_config_);
180
+ conv_config_ = nullptr;
181
+ throw std::runtime_error("Failed to create conversation");
53
182
  }
54
- conversation_ = std::move(*conversation);
55
183
  #endif
56
184
  }
57
185
 
58
- //------------------------------------------------------------------------------
59
- // loadModel - Initialize Engine and Conversation
60
- //------------------------------------------------------------------------------
61
- void HybridLiteRTLM::loadModel(
186
+ // =============================================================================
187
+ // loadModel
188
+ // =============================================================================
189
+
190
+ std::shared_ptr<Promise<void>> HybridLiteRTLM::loadModel(
191
+ const std::string& modelPath,
192
+ const std::optional<LLMConfig>& config) {
193
+ return Promise<void>::async([this, modelPath, config]() {
194
+ loadModelInternal(modelPath, config);
195
+ });
196
+ }
197
+
198
+ void HybridLiteRTLM::loadModelInternal(
62
199
  const std::string& modelPath,
63
200
  const std::optional<LLMConfig>& config) {
64
201
 
65
202
  std::lock_guard<std::mutex> lock(mutex_);
66
203
 
67
- // Clean up existing resources
68
204
  if (isLoaded_) {
69
- isLoaded_ = false;
70
- history_.clear();
71
- #ifdef LITERT_LM_ENABLED
72
- conversation_.reset();
73
- engine_.reset();
74
- #endif
205
+ close();
75
206
  }
76
207
 
77
- // Apply configuration
78
208
  if (config.has_value()) {
79
209
  if (config->backend.has_value()) {
80
210
  backend_ = config->backend.value();
81
211
  }
82
- if (config->visionBackend.has_value()) {
83
- visionBackend_ = config->visionBackend.value();
84
- }
85
- if (config->audioBackend.has_value()) {
86
- audioBackend_ = config->audioBackend.value();
87
- }
88
212
  if (config->temperature.has_value()) {
89
213
  temperature_ = config->temperature.value();
90
214
  }
@@ -97,520 +221,550 @@ void HybridLiteRTLM::loadModel(
97
221
  if (config->maxTokens.has_value()) {
98
222
  maxTokens_ = config->maxTokens.value();
99
223
  }
224
+ if (config->systemPrompt.has_value()) {
225
+ systemPrompt_ = config->systemPrompt.value();
226
+ }
100
227
  }
101
228
 
102
- #ifdef LITERT_LM_ENABLED
103
- // 1. Create ModelAssets from model path
104
- auto model_assets = litert::lm::ModelAssets::Create(modelPath);
105
- if (!model_assets.ok()) {
106
- throw std::runtime_error("Failed to load model assets: " +
107
- std::string(model_assets.status().message()));
108
- }
109
-
110
- // 2. Map our Backend enum to LiteRT-LM Backend enum
111
- auto engine_backend = (backend_ == Backend::GPU)
112
- ? litert::lm::Backend::GPU
113
- : litert::lm::Backend::CPU;
114
- auto vision_backend = (visionBackend_ == Backend::GPU)
115
- ? litert::lm::Backend::GPU
116
- : litert::lm::Backend::CPU;
117
- auto audio_backend = (audioBackend_ == Backend::GPU)
118
- ? litert::lm::Backend::GPU
119
- : litert::lm::Backend::CPU;
120
-
121
- // 3. Create EngineSettings with all backends
122
- auto engine_settings = litert::lm::EngineSettings::CreateDefault(
123
- *model_assets,
124
- engine_backend,
125
- vision_backend,
126
- audio_backend
127
- );
128
-
129
- // 4. Create the Engine (heavyweight - loads model weights)
130
- auto engine = litert::lm::Engine::CreateEngine(engine_settings);
131
- if (!engine.ok()) {
132
- throw std::runtime_error("Failed to create engine: " +
133
- std::string(engine.status().message()));
229
+ #ifdef __APPLE__
230
+ // Set log verbosity: 2=WARNING (production), 0=INFO (debug)
231
+ litert_lm_set_min_log_level(2);
232
+
233
+ auto backendStr = [](Backend b) -> const char* {
234
+ switch (b) {
235
+ case Backend::GPU: return "gpu";
236
+ case Backend::NPU: return "gpu"; // NPU not available on iOS, use GPU
237
+ default: return "cpu";
238
+ }
239
+ };
240
+
241
+ auto tryCreateEngine = [&](const char* backend, const char* visionBackend) -> bool {
242
+ auto* settings = litert_lm_engine_settings_create(
243
+ modelPath.c_str(),
244
+ backend,
245
+ visionBackend,
246
+ nullptr // audio executor not supported on iOS yet
247
+ );
248
+ if (!settings) {
249
+ return false;
250
+ }
251
+
252
+ litert_lm_engine_settings_set_max_num_tokens(settings, static_cast<int>(maxTokens_));
253
+ litert_lm_engine_settings_enable_benchmark(settings);
254
+
255
+ // Set cache directory to the same directory as the model file
256
+ std::string cacheDir = modelPath.substr(0, modelPath.find_last_of('/'));
257
+ litert_lm_engine_settings_set_cache_dir(settings, cacheDir.c_str());
258
+
259
+ engine_ = litert_lm_engine_create(settings);
260
+ litert_lm_engine_settings_delete(settings);
261
+
262
+ return engine_ != nullptr;
263
+ };
264
+
265
+ // Try requested backend first (e.g. gpu/gpu)
266
+ const char* primaryBackend = backendStr(backend_);
267
+ if (!tryCreateEngine(primaryBackend, primaryBackend)) {
268
+ // Fallback chain for when the primary backend fails:
269
+ bool fallbackOk = false;
270
+ if (backend_ != Backend::CPU) {
271
+ // 1) Try CPU main + GPU vision (model's vision encoder often requires GPU)
272
+ fallbackOk = tryCreateEngine("cpu", "gpu");
273
+ // 2) Try CPU main + CPU vision
274
+ if (!fallbackOk) fallbackOk = tryCreateEngine("cpu", "cpu");
275
+ }
276
+ // 3) Try CPU main + no vision (nullptr skips vision executor entirely)
277
+ if (!fallbackOk) fallbackOk = tryCreateEngine("cpu", nullptr);
278
+ if (fallbackOk) {
279
+ backend_ = Backend::CPU;
280
+ }
134
281
  }
135
- engine_ = std::move(*engine);
282
+
283
+ if (!engine_) {
284
+ // Collect diagnostic info
285
+ std::string diag = " | Diagnostics: ";
286
+ struct stat st;
287
+ if (stat(modelPath.c_str(), &st) == 0) {
288
+ diag += "File size: " + std::to_string(st.st_size) + " bytes";
289
+ } else {
290
+ diag += "Failed to stat file (errno: " + std::to_string(errno) + ")";
291
+ }
292
+
293
+ FILE* f = fopen(modelPath.c_str(), "rb");
294
+ if (f) {
295
+ diag += ", Readable: YES";
296
+ fclose(f);
297
+ } else {
298
+ diag += ", Readable: NO (errno: " + std::to_string(errno) + ")";
299
+ }
300
+
301
+ // Get the native error from the C API
302
+ const char* nativeErr = litert_lm_get_last_error();
303
+ if (nativeErr && nativeErr[0] != '\0') {
304
+ diag += " | Native error: " + std::string(nativeErr);
305
+ }
136
306
 
137
- // 5. Create the Conversation (lightweight - holds KV cache)
138
- createNewConversation();
307
+ throw std::runtime_error(
308
+ "Failed to create LiteRT-LM engine. Tried backend '" +
309
+ std::string(primaryBackend) + "' and CPU fallback. Model path: " + modelPath + diag);
310
+ }
139
311
 
140
- #endif // LITERT_LM_ENABLED
312
+ session_config_ = litert_lm_session_config_create();
313
+ if (session_config_) {
314
+ litert_lm_session_config_set_max_output_tokens(session_config_, static_cast<int>(maxTokens_));
315
+
316
+ LiteRtLmSamplerParams sampler{};
317
+ sampler.type = kTopP;
318
+ sampler.top_k = static_cast<int32_t>(topK_);
319
+ sampler.top_p = static_cast<float>(topP_);
320
+ sampler.temperature = static_cast<float>(temperature_);
321
+ sampler.seed = 0;
322
+ litert_lm_session_config_set_sampler_params(session_config_, &sampler);
323
+ }
324
+
325
+ createNewConversation();
326
+ #endif
141
327
 
142
328
  isLoaded_ = true;
143
329
  history_.clear();
144
-
145
- // Reset stats
146
330
  lastStats_ = GenerationStats{0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
147
331
  }
148
332
 
149
- //------------------------------------------------------------------------------
150
- // sendMessage - Blocking text inference
151
- //------------------------------------------------------------------------------
152
- std::string HybridLiteRTLM::sendMessage(const std::string& message) {
333
+ // =============================================================================
334
+ // sendMessage Blocking text inference
335
+ // =============================================================================
336
+
337
+ std::shared_ptr<Promise<std::string>> HybridLiteRTLM::sendMessage(const std::string& message) {
338
+ return Promise<std::string>::async([this, message]() -> std::string {
339
+ return sendMessageInternal(message);
340
+ });
341
+ }
342
+
343
+ std::string HybridLiteRTLM::sendMessageInternal(const std::string& message) {
153
344
  std::lock_guard<std::mutex> lock(mutex_);
154
345
  ensureLoaded();
155
346
 
156
- auto startTime = std::chrono::high_resolution_clock::now();
157
-
158
- // Add user message to history
159
- Message userMessage;
160
- userMessage.role = Role::USER;
161
- userMessage.content = message;
162
- history_.push_back(userMessage);
347
+ auto startTime = std::chrono::steady_clock::now();
348
+ std::string result;
163
349
 
164
- std::string responseText;
350
+ #ifdef __APPLE__
351
+ std::string msgJson = buildTextMessageJson(message);
165
352
 
166
- #ifdef LITERT_LM_ENABLED
167
- // Build the message struct for LiteRT-LM
168
- // The Conversation API expects a structured input
169
- litert::lm::UserMessage lm_message;
170
- lm_message.role = "user";
171
- lm_message.content = message;
353
+ auto* response = litert_lm_conversation_send_message(
354
+ conversation_, msgJson.c_str(), nullptr);
172
355
 
173
- auto response = conversation_->SendMessage(lm_message);
174
- if (!response.ok()) {
175
- // Remove the user message we just added since inference failed
176
- history_.pop_back();
177
- throw std::runtime_error("Inference failed: " +
178
- std::string(response.status().message()));
356
+ if (!response) {
357
+ throw std::runtime_error("LiteRT-LM: sendMessage failed");
179
358
  }
180
359
 
181
- responseText = response->content;
182
-
183
- // Update stats from response if available
184
- if (response->stats.has_value()) {
185
- const auto& stats = response->stats.value();
186
- lastStats_.promptTokens = static_cast<double>(stats.prompt_tokens);
187
- lastStats_.completionTokens = static_cast<double>(stats.completion_tokens);
188
- lastStats_.totalTokens = lastStats_.promptTokens + lastStats_.completionTokens;
189
- lastStats_.timeToFirstToken = stats.time_to_first_token_ms;
190
- lastStats_.totalTime = stats.total_time_ms;
191
- lastStats_.tokensPerSecond = (lastStats_.totalTime > 0)
192
- ? lastStats_.completionTokens / (lastStats_.totalTime / 1000.0)
193
- : 0.0;
360
+ const char* responseStr = litert_lm_json_response_get_string(response);
361
+ if (responseStr) {
362
+ result = extractTextFromResponse(std::string(responseStr));
363
+ }
364
+ litert_lm_json_response_delete(response);
365
+
366
+ auto* benchInfo = litert_lm_conversation_get_benchmark_info(conversation_);
367
+ if (benchInfo) {
368
+ int numDecodeTurns = litert_lm_benchmark_info_get_num_decode_turns(benchInfo);
369
+ if (numDecodeTurns > 0) {
370
+ int lastIdx = numDecodeTurns - 1;
371
+ lastStats_.tokensPerSecond = litert_lm_benchmark_info_get_decode_tokens_per_sec_at(benchInfo, lastIdx);
372
+ lastStats_.completionTokens = static_cast<double>(
373
+ litert_lm_benchmark_info_get_decode_token_count_at(benchInfo, lastIdx));
374
+ }
375
+ lastStats_.timeToFirstToken = litert_lm_benchmark_info_get_time_to_first_token(benchInfo);
376
+ litert_lm_benchmark_info_delete(benchInfo);
194
377
  }
195
-
196
378
  #else
197
- // Stub response when LiteRT-LM is not available
198
- responseText = "[LiteRT-LM Stub] Model response placeholder. "
199
- "Real inference will be available when LiteRT-LM libraries are integrated. "
200
- "You said: " + message;
201
-
202
- auto endTime = std::chrono::high_resolution_clock::now();
203
- auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(endTime - startTime).count();
204
-
205
- // Estimate stats for stub
206
- lastStats_.promptTokens = static_cast<double>(message.length() / 4);
207
- lastStats_.completionTokens = static_cast<double>(responseText.length() / 4);
208
- lastStats_.totalTokens = lastStats_.promptTokens + lastStats_.completionTokens;
209
- lastStats_.totalTime = static_cast<double>(duration);
210
- lastStats_.timeToFirstToken = lastStats_.totalTime / 2;
211
- lastStats_.tokensPerSecond = (lastStats_.totalTime > 0)
212
- ? lastStats_.completionTokens / (lastStats_.totalTime / 1000.0)
213
- : 0;
379
+ // Non-Apple stub
380
+ result = "[iOS only] LiteRT-LM inference not available on this platform.";
214
381
  #endif
215
382
 
216
- // Add model response to history
217
- Message modelMessage;
218
- modelMessage.role = Role::MODEL;
219
- modelMessage.content = responseText;
220
- history_.push_back(modelMessage);
383
+ auto endTime = std::chrono::steady_clock::now();
384
+ double latencyMs = std::chrono::duration<double, std::milli>(endTime - startTime).count();
385
+ lastStats_.totalTime = latencyMs / 1000.0;
386
+
387
+ // Update history
388
+ history_.push_back(Message{Role::USER, message});
389
+ history_.push_back(Message{Role::MODEL, result});
390
+
391
+ return result;
392
+ }
393
+
394
+ // =============================================================================
395
+ // sendMessageAsync — Streaming text inference
396
+ // =============================================================================
397
+
398
+ void HybridLiteRTLM::streamCallbackFn(void* callback_data, const char* chunk,
399
+ bool is_final, const char* error_msg) {
400
+ auto* ctx = static_cast<StreamContext*>(callback_data);
401
+
402
+ if (error_msg) {
403
+ // Error occurred — notify JS and clean up
404
+ ctx->onToken(std::string("Error: ") + error_msg, true);
405
+ delete ctx;
406
+ return;
407
+ }
408
+
409
+ if (is_final) {
410
+ // Calculate stats
411
+ auto endTime = std::chrono::steady_clock::now();
412
+ double durationMs = std::chrono::duration<double, std::milli>(endTime - ctx->startTime).count();
413
+
414
+ if (ctx->lastStats && ctx->tokenCount > 0) {
415
+ ctx->lastStats->completionTokens = static_cast<double>(ctx->tokenCount);
416
+ ctx->lastStats->totalTime = durationMs / 1000.0;
417
+ ctx->lastStats->tokensPerSecond = (ctx->tokenCount / durationMs) * 1000.0;
418
+ }
419
+
420
+ // Update history (thread-safe)
421
+ {
422
+ std::lock_guard<std::mutex> lock(*ctx->historyMutex);
423
+ ctx->history->push_back(Message{Role::USER, ctx->userMessage});
424
+ ctx->history->push_back(Message{Role::MODEL, ctx->fullResponse});
425
+ }
426
+
427
+ ctx->onToken("", true);
428
+ delete ctx;
429
+ return;
430
+ }
221
431
 
222
- return responseText;
432
+ if (chunk) {
433
+ std::string token(chunk);
434
+ ctx->fullResponse += token;
435
+ ctx->tokenCount++;
436
+ ctx->onToken(token, false);
437
+ }
223
438
  }
224
439
 
225
- //------------------------------------------------------------------------------
226
- // sendMessageWithImage - Multimodal image + text
227
- //------------------------------------------------------------------------------
228
- std::string HybridLiteRTLM::sendMessageWithImage(
440
+ void HybridLiteRTLM::sendMessageAsync(
229
441
  const std::string& message,
230
- const std::string& imagePath) {
442
+ const std::function<void(const std::string&, bool)>& onToken) {
443
+
444
+ // Copy values for the background thread (avoid use-after-free)
445
+ auto onTokenCopy = onToken;
446
+ auto messageCopy = message;
447
+
448
+ // Capture shared state safely
449
+ auto* ctx = new StreamContext();
450
+ ctx->onToken = std::move(onTokenCopy);
451
+ ctx->fullResponse = "";
452
+ ctx->history = &history_;
453
+ ctx->historyMutex = &mutex_;
454
+ ctx->userMessage = messageCopy;
455
+ ctx->lastStats = &lastStats_;
456
+ ctx->startTime = std::chrono::steady_clock::now();
457
+ ctx->tokenCount = 0;
231
458
 
232
- std::lock_guard<std::mutex> lock(mutex_);
459
+ #ifdef __APPLE__
233
460
  ensureLoaded();
234
461
 
235
- #ifdef LITERT_LM_ENABLED
236
- // Load image using stb_image
237
- int width, height, channels;
238
- unsigned char* img = stbi_load(imagePath.c_str(), &width, &height, &channels, 3); // Force 3 channels (RGB)
239
- if (img == nullptr) {
240
- throw std::runtime_error("Failed to load image from path: " + imagePath);
462
+ std::string msgJson = buildTextMessageJson(messageCopy);
463
+
464
+ int result = litert_lm_conversation_send_message_stream(
465
+ conversation_, msgJson.c_str(), nullptr,
466
+ streamCallbackFn, ctx);
467
+
468
+ if (result != 0) {
469
+ delete ctx;
470
+ throw std::runtime_error("LiteRT-LM: Failed to start streaming inference");
241
471
  }
472
+ #else
473
+ // Non-Apple stub
474
+ ctx->onToken("[iOS only] Streaming not available on this platform.", true);
475
+ delete ctx;
476
+ #endif
477
+ }
242
478
 
243
- // Create input tensor/buffer for the engine.
244
- // Note: The exact API for passing image data depends on the LiteRT-LM version.
245
- // Assuming a structure that accepts raw bytes and dimensions.
246
- litert::lm::UserMessage lm_message;
247
- lm_message.role = "user";
248
-
249
- // Construct multimodal content
250
- // Option A: If UserMessage supports a list of content parts
251
- litert::lm::ContentPart textPart;
252
- textPart.type = litert::lm::ContentType::TEXT;
253
- textPart.text = message;
254
- lm_message.parts.push_back(textPart);
479
+ // =============================================================================
480
+ // sendMessageWithImage Multimodal (vision)
481
+ // =============================================================================
255
482
 
256
- litert::lm::ContentPart imagePart;
257
- imagePart.type = litert::lm::ContentType::IMAGE;
258
- imagePart.image.width = width;
259
- imagePart.image.height = height;
260
- imagePart.image.channels = channels;
261
- imagePart.image.data = std::vector<uint8_t>(img, img + (width * height * channels));
262
- lm_message.parts.push_back(imagePart);
263
-
264
- stbi_image_free(img);
483
+ std::shared_ptr<Promise<std::string>> HybridLiteRTLM::sendMessageWithImage(
484
+ const std::string& message,
485
+ const std::string& imagePath) {
486
+ return Promise<std::string>::async([this, message, imagePath]() -> std::string {
487
+ return sendMessageWithImageInternal(message, imagePath);
488
+ });
489
+ }
265
490
 
266
- auto response = conversation_->SendMessage(lm_message);
267
- if (!response.ok()) {
268
- throw std::runtime_error("Multimodal inference failed: " +
269
- std::string(response.status().message()));
491
+ std::string HybridLiteRTLM::sendMessageWithImageInternal(
492
+ const std::string& message,
493
+ const std::string& imagePath) {
494
+
495
+ std::lock_guard<std::mutex> lock(mutex_);
496
+ ensureLoaded();
497
+
498
+ auto startTime = std::chrono::steady_clock::now();
499
+ std::string result;
500
+
501
+ #ifdef __APPLE__
502
+ // Verify image exists
503
+ std::ifstream imageFile(imagePath);
504
+ if (!imageFile.good()) {
505
+ throw std::runtime_error("Image file not found: " + imagePath);
270
506
  }
507
+ imageFile.close();
271
508
 
272
- // Add to history (metadata only)
273
- Message userMessage;
274
- userMessage.role = Role::USER;
275
- userMessage.content = message + " [Image]";
276
- history_.push_back(userMessage);
509
+ // Build multimodal message JSON — the C API handles image preprocessing
510
+ std::string msgJson = buildImageMessageJson(message, imagePath);
277
511
 
278
- Message modelMessage;
279
- modelMessage.role = Role::MODEL;
280
- modelMessage.content = response->content;
281
- history_.push_back(modelMessage);
512
+ auto* response = litert_lm_conversation_send_message(
513
+ conversation_, msgJson.c_str(), nullptr);
282
514
 
283
- return response->content;
515
+ if (!response) {
516
+ std::string errMsg = "LiteRT-LM: sendMessageWithImage failed";
517
+ const char* nativeErr = litert_lm_get_last_error();
518
+ if (nativeErr && nativeErr[0] != '\0') {
519
+ errMsg += ": " + std::string(nativeErr);
520
+ }
521
+ throw std::runtime_error(errMsg);
522
+ }
284
523
 
524
+ const char* responseStr = litert_lm_json_response_get_string(response);
525
+ if (responseStr) {
526
+ result = extractTextFromResponse(std::string(responseStr));
527
+ }
528
+ litert_lm_json_response_delete(response);
285
529
  #else
286
- // iOS: LiteRT-LM SDK not yet available, throw clear error
287
- throw std::runtime_error(
288
- "sendMessageWithImage is not supported on iOS. "
289
- "LiteRT-LM iOS SDK is not yet available. "
290
- "Please use text-only sendMessage() for now.");
530
+ result = "[iOS only] Vision inference not available on this platform.";
291
531
  #endif
532
+
533
+ auto endTime = std::chrono::steady_clock::now();
534
+ lastStats_.totalTime = std::chrono::duration<double>(endTime - startTime).count();
535
+
536
+ history_.push_back(Message{Role::USER, message + " [image: " + imagePath + "]"});
537
+ history_.push_back(Message{Role::MODEL, result});
538
+
539
+ return result;
292
540
  }
293
541
 
294
- #endif
295
- }
542
+ // =============================================================================
543
+ // sendMessageWithAudio — Multimodal (audio)
544
+ // =============================================================================
296
545
 
297
- //------------------------------------------------------------------------------
298
- // downloadModel - Download model file from URL
299
- //------------------------------------------------------------------------------
300
- std::future<std::string> HybridLiteRTLM::downloadModel(
301
- const std::string& url,
302
- const std::string& fileName,
303
- const std::optional<std::function<void(double)>>& onProgress) {
304
-
305
- // Return a future that throws an exception
306
- return std::async(std::launch::async, []() -> std::string {
307
- throw std::runtime_error(
308
- "downloadModel is not supported on iOS yet. "
309
- "Please download the model manually using a separate library."
310
- );
546
+ std::shared_ptr<Promise<std::string>> HybridLiteRTLM::sendMessageWithAudio(
547
+ const std::string& message,
548
+ const std::string& audioPath) {
549
+ return Promise<std::string>::async([this, message, audioPath]() -> std::string {
550
+ return sendMessageWithAudioInternal(message, audioPath);
311
551
  });
312
552
  }
313
553
 
314
- //------------------------------------------------------------------------------
315
- // sendMessageWithAudio - Multimodal audio + text
316
- //------------------------------------------------------------------------------
317
- std::string HybridLiteRTLM::sendMessageWithAudio(
554
+ std::string HybridLiteRTLM::sendMessageWithAudioInternal(
318
555
  const std::string& message,
319
556
  const std::string& audioPath) {
320
557
 
321
558
  std::lock_guard<std::mutex> lock(mutex_);
322
559
  ensureLoaded();
323
560
 
324
- #ifdef LITERT_LM_ENABLED
325
- // Load audio file
326
- std::ifstream audioFile(audioPath, std::ios::binary);
327
- if (!audioFile) {
328
- throw std::runtime_error("Failed to open audio file: " + audioPath);
561
+ auto startTime = std::chrono::steady_clock::now();
562
+ std::string result;
563
+
564
+ #ifdef __APPLE__
565
+ std::ifstream audioFile(audioPath);
566
+ if (!audioFile.good()) {
567
+ throw std::runtime_error("Audio file not found: " + audioPath);
329
568
  }
569
+ audioFile.close();
330
570
 
331
- // Simple WAV header skip (simplistic, assuming standard header size for now or raw)
332
- // Ideally use a WAV parsing library or miniaudio if available.
333
- // For this implementation, we read the whole file.
334
- std::vector<uint8_t> audioData((std::istreambuf_iterator<char>(audioFile)), std::istreambuf_iterator<char>());
571
+ std::string msgJson = buildAudioMessageJson(message, audioPath);
335
572
 
336
- litert::lm::UserMessage lm_message;
337
- lm_message.role = "user";
573
+ auto* response = litert_lm_conversation_send_message(
574
+ conversation_, msgJson.c_str(), nullptr);
338
575
 
339
- litert::lm::ContentPart textPart;
340
- textPart.type = litert::lm::ContentType::TEXT;
341
- textPart.text = message;
342
- lm_message.parts.push_back(textPart);
343
-
344
- litert::lm::ContentPart audioPart;
345
- audioPart.type = litert::lm::ContentType::AUDIO;
346
- audioPart.audio.data = audioData;
347
- // Metadata like sample rate might be needed:
348
- // audioPart.audio.sample_rate = 16000;
349
- lm_message.parts.push_back(audioPart);
350
-
351
- auto response = conversation_->SendMessage(lm_message);
352
- if (!response.ok()) {
353
- throw std::runtime_error("Audio inference failed: " +
354
- std::string(response.status().message()));
576
+ if (!response) {
577
+ throw std::runtime_error("LiteRT-LM: sendMessageWithAudio failed");
355
578
  }
356
579
 
357
- Message userMessage;
358
- userMessage.role = Role::USER;
359
- userMessage.content = message + " [Audio]";
360
- history_.push_back(userMessage);
580
+ const char* responseStr = litert_lm_json_response_get_string(response);
581
+ if (responseStr) {
582
+ result = extractTextFromResponse(std::string(responseStr));
583
+ }
584
+ litert_lm_json_response_delete(response);
585
+ #else
586
+ result = "[iOS only] Audio inference not available on this platform.";
587
+ #endif
361
588
 
362
- Message modelMessage;
363
- modelMessage.role = Role::MODEL;
364
- modelMessage.content = response->content;
365
- history_.push_back(modelMessage);
589
+ auto endTime = std::chrono::steady_clock::now();
590
+ lastStats_.totalTime = std::chrono::duration<double>(endTime - startTime).count();
366
591
 
367
- return response->content;
592
+ history_.push_back(Message{Role::USER, message + " [audio: " + audioPath + "]"});
593
+ history_.push_back(Message{Role::MODEL, result});
368
594
 
595
+ return result;
596
+ }
597
+
598
+ // =============================================================================
599
+ // downloadModel — Download model from URL
600
+ // =============================================================================
601
+
602
+ std::shared_ptr<Promise<std::string>> HybridLiteRTLM::downloadModel(
603
+ const std::string& url,
604
+ const std::string& fileName,
605
+ const std::optional<std::function<void(double)>>& onProgress) {
606
+ return Promise<std::string>::async([url, fileName, onProgress]() -> std::string {
607
+ #ifdef __APPLE__
608
+ return litert_lm::downloadModelFile(url, fileName, onProgress);
369
609
  #else
370
- // iOS: LiteRT-LM SDK not yet available, throw clear error
371
- throw std::runtime_error(
372
- "sendMessageWithAudio is not supported on iOS. "
373
- "LiteRT-LM iOS SDK is not yet available. "
374
- "Please use text-only sendMessage() for now.");
610
+ std::string destPath = "/tmp/" + fileName;
611
+ std::string curlCmd = "curl -L -o \"" + destPath + "\" \"" + url + "\"";
612
+ int result = system(curlCmd.c_str());
613
+ if (result != 0) {
614
+ throw std::runtime_error("Failed to download model from: " + url);
615
+ }
616
+ if (onProgress.has_value()) {
617
+ onProgress.value()(1.0);
618
+ }
619
+ return destPath;
375
620
  #endif
621
+ });
376
622
  }
377
623
 
378
- //------------------------------------------------------------------------------
379
- // sendMessageAsync - Streaming token generation
380
- //------------------------------------------------------------------------------
381
- void HybridLiteRTLM::sendMessageAsync(
382
- const std::string& message,
383
- const std::function<void(std::string, bool)>& onToken) {
384
-
385
- // Note: We don't hold the lock during the entire async operation
386
- // to avoid blocking other operations. The callback may be invoked
387
- // from a different thread depending on LiteRT-LM's implementation.
388
-
389
- {
390
- std::lock_guard<std::mutex> lock(mutex_);
391
- ensureLoaded();
392
- }
393
-
394
- #ifdef LITERT_LM_ENABLED
395
- // Add user message to history before starting
396
- {
397
- std::lock_guard<std::mutex> lock(mutex_);
398
- Message userMessage;
399
- userMessage.role = Role::USER;
400
- userMessage.content = message;
401
- history_.push_back(userMessage);
402
- }
403
-
404
- litert::lm::UserMessage lm_message;
405
- lm_message.role = "user";
406
- lm_message.content = message;
407
-
408
- std::string fullResponse;
409
-
410
- // The callback needs to be carefully managed for thread safety
411
- auto status = conversation_->SendMessageAsync(
412
- lm_message,
413
- [this, &onToken, &fullResponse](const std::string& token, bool isDone) {
414
- fullResponse += token;
415
-
416
- // Invoke the JS callback (Nitro handles thread marshalling)
417
- onToken(token, isDone);
418
-
419
- if (isDone) {
420
- // Add complete response to history
421
- std::lock_guard<std::mutex> lock(mutex_);
422
- Message modelMessage;
423
- modelMessage.role = Role::MODEL;
424
- modelMessage.content = fullResponse;
425
- history_.push_back(modelMessage);
426
- }
427
- }
428
- );
429
-
430
- if (!status.ok()) {
431
- // Remove user message since inference failed
432
- std::lock_guard<std::mutex> lock(mutex_);
433
- if (!history_.empty()) {
434
- history_.pop_back();
624
+ std::shared_ptr<Promise<void>> HybridLiteRTLM::deleteModel(const std::string& fileName) {
625
+ return Promise<void>::async([fileName]() {
626
+ std::string path;
627
+ #ifdef __APPLE__
628
+ // Match the path used by IOSDownloadHelper: ~/Library/Caches/litert_models/
629
+ const char* home = getenv("HOME");
630
+ if (home) {
631
+ path = std::string(home) + "/Library/Caches/litert_models/" + fileName;
435
632
  }
436
- throw std::runtime_error("Async inference failed: " +
437
- std::string(status.message()));
438
- }
439
-
440
633
  #else
441
- // Stub: Simulate streaming by calling sendMessage and splitting response
442
- std::string fullResponse;
443
- {
444
- std::lock_guard<std::mutex> lock(mutex_);
445
-
446
- // Add user message
447
- Message userMessage;
448
- userMessage.role = Role::USER;
449
- userMessage.content = message;
450
- history_.push_back(userMessage);
451
-
452
- fullResponse = "[LiteRT-LM Stub] Streaming response placeholder. You said: " + message;
453
- }
454
-
455
- // Simulate token-by-token streaming
456
- std::string currentWord;
457
- for (size_t i = 0; i < fullResponse.length(); i++) {
458
- char c = fullResponse[i];
459
- currentWord += c;
460
-
461
- if (c == ' ' || c == '\n' || i == fullResponse.length() - 1) {
462
- bool isDone = (i == fullResponse.length() - 1);
463
- onToken(currentWord, isDone);
464
- currentWord.clear();
465
- }
466
- }
467
-
468
- // Add model response to history
469
- {
470
- std::lock_guard<std::mutex> lock(mutex_);
471
- Message modelMessage;
472
- modelMessage.role = Role::MODEL;
473
- modelMessage.content = fullResponse;
474
- history_.push_back(modelMessage);
475
- }
634
+ path = "/tmp/" + fileName;
476
635
  #endif
636
+ if (!path.empty()) {
637
+ std::remove(path.c_str());
638
+ }
639
+ });
477
640
  }
478
641
 
479
- //------------------------------------------------------------------------------
480
- // getHistory - Return conversation history
481
- //------------------------------------------------------------------------------
642
+ // =============================================================================
643
+ // getHistory
644
+ // =============================================================================
645
+
482
646
  std::vector<Message> HybridLiteRTLM::getHistory() {
483
647
  std::lock_guard<std::mutex> lock(mutex_);
484
648
  return history_;
485
649
  }
486
650
 
487
- //------------------------------------------------------------------------------
488
- // resetConversation - Clear KV cache, keep engine
489
- //------------------------------------------------------------------------------
651
+ // =============================================================================
652
+ // resetConversation
653
+ // =============================================================================
654
+
490
655
  void HybridLiteRTLM::resetConversation() {
491
656
  std::lock_guard<std::mutex> lock(mutex_);
492
657
 
493
- #ifdef LITERT_LM_ENABLED
494
- // Destroy old conversation and create a new one
495
- // This clears the KV cache but keeps the (expensive) Engine loaded
496
- if (engine_) {
497
- conversation_.reset();
658
+ history_.clear();
659
+ lastStats_ = GenerationStats{0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
660
+
661
+ #ifdef __APPLE__
662
+ if (isLoaded_ && engine_) {
498
663
  createNewConversation();
499
664
  }
500
665
  #endif
501
-
502
- history_.clear();
503
666
  }
504
667
 
505
- //------------------------------------------------------------------------------
506
- // isReady - Check if model is loaded
507
- //------------------------------------------------------------------------------
668
+ // =============================================================================
669
+ // isReady
670
+ // =============================================================================
671
+
508
672
  bool HybridLiteRTLM::isReady() {
509
673
  std::lock_guard<std::mutex> lock(mutex_);
510
674
  return isLoaded_;
511
675
  }
512
676
 
513
- //------------------------------------------------------------------------------
514
- // getStats - Return last generation statistics
515
- //------------------------------------------------------------------------------
677
+ // =============================================================================
678
+ // getStats
679
+ // =============================================================================
680
+
516
681
  GenerationStats HybridLiteRTLM::getStats() {
517
682
  std::lock_guard<std::mutex> lock(mutex_);
518
683
  return lastStats_;
519
684
  }
520
685
 
521
- //------------------------------------------------------------------------------
522
- // getMemoryUsage - Return real memory usage from OS
523
- //------------------------------------------------------------------------------
686
+ // =============================================================================
687
+ // getMemoryUsage Uses Mach APIs for iOS process memory
688
+ // =============================================================================
689
+
524
690
  MemoryUsage HybridLiteRTLM::getMemoryUsage() {
525
- double nativeHeapBytes = 0;
526
- double residentBytes = 0;
527
- double availableMemoryBytes = 0;
691
+ double usedMemoryBytes = 0;
692
+ double totalMemoryBytes = 0;
693
+ double availableBytes = 0;
528
694
  bool isLowMemory = false;
529
-
695
+
530
696
  #ifdef __APPLE__
531
- // Get process memory info via Mach APIs
532
- struct mach_task_basic_info taskInfo;
533
- mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
534
- if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO,
535
- (task_info_t)&taskInfo, &infoCount) == KERN_SUCCESS) {
536
- residentBytes = static_cast<double>(taskInfo.resident_size);
537
- }
538
-
539
- // Get system-wide memory pressure
540
- vm_statistics64_data_t vmStats;
541
- mach_msg_type_number_t vmCount = HOST_VM_INFO64_COUNT;
542
- if (host_statistics64(mach_host_self(), HOST_VM_INFO64,
543
- (host_info64_t)&vmStats, &vmCount) == KERN_SUCCESS) {
544
- vm_size_t pageSize;
545
- host_page_size(mach_host_self(), &pageSize);
546
- availableMemoryBytes = static_cast<double>(vmStats.free_count) * pageSize;
547
- // Consider low memory if free pages < 10% of total active+inactive+free
548
- uint64_t totalPages = vmStats.active_count + vmStats.inactive_count + vmStats.free_count;
549
- isLowMemory = (totalPages > 0) &&
550
- (static_cast<double>(vmStats.free_count) / totalPages < 0.1);
551
- }
552
-
553
- // malloc_size is per-allocation; use resident_size as native heap proxy
554
- nativeHeapBytes = residentBytes;
555
- #endif
556
-
557
- #ifdef __ANDROID__
558
- // Parse /proc/self/status for VmRSS (resident set size)
559
- std::ifstream statusFile("/proc/self/status");
560
- if (statusFile.is_open()) {
561
- std::string line;
562
- while (std::getline(statusFile, line)) {
563
- if (line.rfind("VmRSS:", 0) == 0) {
564
- // Format: "VmRSS: 123456 kB"
565
- std::istringstream iss(line.substr(6));
566
- double kbValue = 0;
567
- iss >> kbValue;
568
- residentBytes = kbValue * 1024.0;
569
- break;
570
- }
571
- }
697
+ // Get app process memory (resident set size)
698
+ struct mach_task_basic_info info;
699
+ mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT;
700
+
701
+ kern_return_t kr = task_info(mach_task_self(),
702
+ MACH_TASK_BASIC_INFO,
703
+ (task_info_t)&info,
704
+ &count);
705
+
706
+ if (kr == KERN_SUCCESS) {
707
+ usedMemoryBytes = static_cast<double>(info.resident_size);
572
708
  }
573
-
574
- // Use mallinfo for native heap
575
- struct mallinfo mi = mallinfo();
576
- nativeHeapBytes = static_cast<double>(mi.uordblks); // total allocated space
577
-
578
- // Parse /proc/meminfo for available memory
579
- std::ifstream memFile("/proc/meminfo");
580
- if (memFile.is_open()) {
581
- std::string line;
582
- while (std::getline(memFile, line)) {
583
- if (line.rfind("MemAvailable:", 0) == 0) {
584
- std::istringstream iss(line.substr(13));
585
- double kbValue = 0;
586
- iss >> kbValue;
587
- availableMemoryBytes = kbValue * 1024.0;
588
- break;
589
- }
590
- }
709
+
710
+ // Get total physical memory
711
+ mach_port_t host_port = mach_host_self();
712
+ struct host_basic_info hostInfo;
713
+ mach_msg_type_number_t hostCount = HOST_BASIC_INFO_COUNT;
714
+
715
+ kr = host_info(host_port, HOST_BASIC_INFO,
716
+ (host_info_t)&hostInfo, &hostCount);
717
+
718
+ if (kr == KERN_SUCCESS) {
719
+ totalMemoryBytes = static_cast<double>(hostInfo.max_mem);
591
720
  }
592
-
593
- // Consider low if available < 256MB
594
- isLowMemory = availableMemoryBytes > 0 && availableMemoryBytes < 256.0 * 1024 * 1024;
721
+
722
+ availableBytes = totalMemoryBytes - usedMemoryBytes;
723
+ if (availableBytes < 0) availableBytes = 0;
724
+
725
+ // Low memory threshold (~200MB available)
726
+ isLowMemory = (totalMemoryBytes > 0) && (availableBytes < 200.0 * 1024.0 * 1024.0);
595
727
  #endif
596
-
597
- return MemoryUsage{nativeHeapBytes, residentBytes, availableMemoryBytes, isLowMemory};
728
+
729
+ return MemoryUsage{
730
+ usedMemoryBytes, // nativeHeapBytes
731
+ usedMemoryBytes, // residentBytes
732
+ availableBytes, // availableMemoryBytes
733
+ isLowMemory // isLowMemory
734
+ };
598
735
  }
599
736
 
600
- //------------------------------------------------------------------------------
601
- // close - Release all native resources
602
- //------------------------------------------------------------------------------
737
+ // =============================================================================
738
+ // close Clean up all LiteRT-LM resources
739
+ // =============================================================================
740
+
603
741
  void HybridLiteRTLM::close() {
604
- std::lock_guard<std::mutex> lock(mutex_);
605
-
606
- #ifdef LITERT_LM_ENABLED
607
- // Release in reverse order of creation
608
- conversation_.reset();
609
- engine_.reset();
610
- #endif
742
+ // Note: Don't lock here if called from destructor (mutex may be destroyed)
743
+ // The caller (loadModel, destructor) should handle locking.
611
744
 
612
745
  isLoaded_ = false;
613
746
  history_.clear();
747
+
748
+ #ifdef __APPLE__
749
+ if (conversation_) {
750
+ litert_lm_conversation_delete(conversation_);
751
+ conversation_ = nullptr;
752
+ }
753
+ if (conv_config_) {
754
+ litert_lm_conversation_config_delete(conv_config_);
755
+ conv_config_ = nullptr;
756
+ }
757
+ if (session_config_) {
758
+ litert_lm_session_config_delete(session_config_);
759
+ session_config_ = nullptr;
760
+ }
761
+ if (engine_) {
762
+ litert_lm_engine_delete(engine_);
763
+ engine_ = nullptr;
764
+ }
765
+ #endif
766
+
767
+ lastStats_ = GenerationStats{0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
614
768
  }
615
769
 
616
770
  } // namespace margelo::nitro::litertlm