react-native-litert-lm 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +270 -186
- package/android/build.gradle +1 -1
- package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +93 -37
- package/app.plugin.js +33 -0
- package/cpp/HybridLiteRTLM.cpp +571 -451
- package/cpp/HybridLiteRTLM.hpp +54 -23
- package/cpp/IOSDownloadHelper.h +24 -0
- package/cpp/cpp-adapter.cpp +2 -2
- package/cpp/include/litert_lm_engine.h +502 -0
- package/ios/IOSDownloadHelper.mm +129 -0
- package/ios/LiteRTLMAutolinking.mm +30 -0
- package/lib/hooks.d.ts +9 -4
- package/lib/hooks.js +34 -20
- package/lib/index.d.ts +1 -0
- package/lib/index.js +2 -5
- package/lib/memoryTracker.d.ts +1 -1
- package/lib/memoryTracker.js +1 -1
- package/lib/modelFactory.d.ts +11 -5
- package/lib/modelFactory.js +9 -4
- package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +11 -4
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +31 -37
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +19 -22
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +15 -18
- package/package.json +12 -5
- package/react-native-litert-lm.podspec +20 -7
- package/scripts/build-ios-engine.sh +283 -0
- package/scripts/download-ios-frameworks.sh +72 -0
- package/scripts/postinstall.js +116 -0
- package/scripts/stubs/cxx_bridge_stubs.cc +224 -0
- package/scripts/stubs/gemma_model_constraint_provider.cc +46 -0
- package/scripts/stubs/llguidance_stubs.c +101 -0
- package/src/hooks.ts +62 -39
- package/src/index.ts +4 -7
- package/src/memoryTracker.ts +1 -1
- package/src/modelFactory.ts +30 -5
package/cpp/HybridLiteRTLM.cpp
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
// HybridLiteRTLM.cpp
|
|
3
3
|
// react-native-litert-lm
|
|
4
4
|
//
|
|
5
|
-
// High-performance LLM inference using LiteRT-LM.
|
|
5
|
+
// High-performance LLM inference using LiteRT-LM C API.
|
|
6
6
|
//
|
|
7
7
|
// NOTE: This C++ implementation is used for iOS ONLY.
|
|
8
8
|
// Android uses the Kotlin implementation in `android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt`.
|
|
@@ -11,80 +11,202 @@
|
|
|
11
11
|
|
|
12
12
|
#include "HybridLiteRTLM.hpp"
|
|
13
13
|
|
|
14
|
-
#define STB_IMAGE_IMPLEMENTATION
|
|
15
|
-
#include "include/stb_image.h"
|
|
16
14
|
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
#include <NitroModules/Promise.hpp>
|
|
17
18
|
#include <chrono>
|
|
18
19
|
#include <stdexcept>
|
|
19
20
|
#include <sstream>
|
|
21
|
+
|
|
22
|
+
#ifdef __APPLE__
|
|
23
|
+
#include "IOSDownloadHelper.h"
|
|
24
|
+
#endif
|
|
20
25
|
#include <fstream>
|
|
26
|
+
#include <thread>
|
|
27
|
+
#include <regex>
|
|
21
28
|
|
|
22
29
|
namespace margelo::nitro::litertlm {
|
|
23
30
|
|
|
24
|
-
|
|
25
|
-
//
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
31
|
+
// =============================================================================
|
|
32
|
+
// JSON Helpers
|
|
33
|
+
// =============================================================================
|
|
34
|
+
|
|
35
|
+
std::string HybridLiteRTLM::escapeJson(const std::string& input) {
|
|
36
|
+
std::string output;
|
|
37
|
+
output.reserve(input.size() + 16);
|
|
38
|
+
for (char c : input) {
|
|
39
|
+
switch (c) {
|
|
40
|
+
case '"': output += "\\\""; break;
|
|
41
|
+
case '\\': output += "\\\\"; break;
|
|
42
|
+
case '\n': output += "\\n"; break;
|
|
43
|
+
case '\r': output += "\\r"; break;
|
|
44
|
+
case '\t': output += "\\t"; break;
|
|
45
|
+
case '\b': output += "\\b"; break;
|
|
46
|
+
case '\f': output += "\\f"; break;
|
|
47
|
+
default: output += c; break;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return output;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
std::string HybridLiteRTLM::buildTextMessageJson(const std::string& text) {
|
|
54
|
+
return "{\"role\":\"user\",\"content\":\"" + escapeJson(text) + "\"}";
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
std::string HybridLiteRTLM::buildImageMessageJson(const std::string& text, const std::string& imagePath) {
|
|
58
|
+
return "{\"role\":\"user\",\"content\":["
|
|
59
|
+
"{\"type\":\"text\",\"text\":\"" + escapeJson(text) + "\"},"
|
|
60
|
+
"{\"type\":\"image\",\"path\":\"" + escapeJson(imagePath) + "\"}"
|
|
61
|
+
"]}";
|
|
32
62
|
}
|
|
33
63
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
64
|
+
std::string HybridLiteRTLM::buildAudioMessageJson(const std::string& text, const std::string& audioPath) {
|
|
65
|
+
return "{\"role\":\"user\",\"content\":["
|
|
66
|
+
"{\"type\":\"text\",\"text\":\"" + escapeJson(text) + "\"},"
|
|
67
|
+
"{\"type\":\"audio\",\"path\":\"" + escapeJson(audioPath) + "\"}"
|
|
68
|
+
"]}";
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
std::string HybridLiteRTLM::extractTextFromResponse(const std::string& jsonResponse) {
|
|
72
|
+
// The C API response JSON is structured as:
|
|
73
|
+
// {"role":"model","content":[{"type":"text","text":"..."}]}
|
|
74
|
+
// or:
|
|
75
|
+
// {"role":"model","content":"..."}
|
|
76
|
+
//
|
|
77
|
+
// We use simple string extraction to avoid a JSON library dependency.
|
|
78
|
+
|
|
79
|
+
// Try array format first: find "text":"..." after "type":"text"
|
|
80
|
+
std::string textMarker = "\"text\":\"";
|
|
81
|
+
size_t pos = jsonResponse.find("\"type\":\"text\"");
|
|
82
|
+
if (pos != std::string::npos) {
|
|
83
|
+
pos = jsonResponse.find(textMarker, pos);
|
|
84
|
+
if (pos != std::string::npos) {
|
|
85
|
+
pos += textMarker.length();
|
|
86
|
+
std::string result;
|
|
87
|
+
result.reserve(jsonResponse.size() - pos);
|
|
88
|
+
for (size_t i = pos; i < jsonResponse.size(); i++) {
|
|
89
|
+
if (jsonResponse[i] == '\\' && i + 1 < jsonResponse.size()) {
|
|
90
|
+
char next = jsonResponse[i + 1];
|
|
91
|
+
if (next == '"') { result += '"'; i++; }
|
|
92
|
+
else if (next == '\\') { result += '\\'; i++; }
|
|
93
|
+
else if (next == 'n') { result += '\n'; i++; }
|
|
94
|
+
else if (next == 'r') { result += '\r'; i++; }
|
|
95
|
+
else if (next == 't') { result += '\t'; i++; }
|
|
96
|
+
else { result += jsonResponse[i]; }
|
|
97
|
+
} else if (jsonResponse[i] == '"') {
|
|
98
|
+
break; // End of the text value
|
|
99
|
+
} else {
|
|
100
|
+
result += jsonResponse[i];
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
return result;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Try simple string format: "content":"..."
|
|
108
|
+
std::string contentMarker = "\"content\":\"";
|
|
109
|
+
pos = jsonResponse.find(contentMarker);
|
|
110
|
+
if (pos != std::string::npos) {
|
|
111
|
+
pos += contentMarker.length();
|
|
112
|
+
std::string result;
|
|
113
|
+
for (size_t i = pos; i < jsonResponse.size(); i++) {
|
|
114
|
+
if (jsonResponse[i] == '\\' && i + 1 < jsonResponse.size()) {
|
|
115
|
+
char next = jsonResponse[i + 1];
|
|
116
|
+
if (next == '"') { result += '"'; i++; }
|
|
117
|
+
else if (next == '\\') { result += '\\'; i++; }
|
|
118
|
+
else if (next == 'n') { result += '\n'; i++; }
|
|
119
|
+
else { result += jsonResponse[i]; }
|
|
120
|
+
} else if (jsonResponse[i] == '"') {
|
|
121
|
+
break;
|
|
122
|
+
} else {
|
|
123
|
+
result += jsonResponse[i];
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return result;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Fallback: return full response
|
|
130
|
+
return jsonResponse;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// =============================================================================
|
|
134
|
+
// Conversation Management
|
|
135
|
+
// =============================================================================
|
|
136
|
+
|
|
37
137
|
void HybridLiteRTLM::createNewConversation() {
|
|
38
|
-
#ifdef
|
|
138
|
+
#ifdef __APPLE__
|
|
39
139
|
if (!engine_) {
|
|
40
140
|
throw std::runtime_error("Cannot create conversation: engine not initialized");
|
|
41
141
|
}
|
|
42
142
|
|
|
43
|
-
|
|
44
|
-
if (
|
|
45
|
-
|
|
46
|
-
|
|
143
|
+
// Clean up previous conversation
|
|
144
|
+
if (conversation_) {
|
|
145
|
+
litert_lm_conversation_delete(conversation_);
|
|
146
|
+
conversation_ = nullptr;
|
|
147
|
+
}
|
|
148
|
+
if (conv_config_) {
|
|
149
|
+
litert_lm_conversation_config_delete(conv_config_);
|
|
150
|
+
conv_config_ = nullptr;
|
|
47
151
|
}
|
|
48
152
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
153
|
+
// Build system message JSON if provided
|
|
154
|
+
std::string systemMsgJson;
|
|
155
|
+
const char* systemMsgPtr = nullptr;
|
|
156
|
+
if (!systemPrompt_.empty()) {
|
|
157
|
+
systemMsgJson = "{\"role\":\"system\",\"content\":\"" + escapeJson(systemPrompt_) + "\"}";
|
|
158
|
+
systemMsgPtr = systemMsgJson.c_str();
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Create conversation config with session config
|
|
162
|
+
conv_config_ = litert_lm_conversation_config_create(
|
|
163
|
+
engine_,
|
|
164
|
+
session_config_, // may be nullptr for defaults
|
|
165
|
+
systemMsgPtr, // system message
|
|
166
|
+
nullptr, // tools (not used yet)
|
|
167
|
+
nullptr, // messages history
|
|
168
|
+
false // constrained decoding
|
|
169
|
+
);
|
|
170
|
+
if (!conv_config_) {
|
|
171
|
+
throw std::runtime_error("Failed to create conversation config");
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// Create conversation
|
|
175
|
+
conversation_ = litert_lm_conversation_create(engine_, conv_config_);
|
|
176
|
+
if (!conversation_) {
|
|
177
|
+
litert_lm_conversation_config_delete(conv_config_);
|
|
178
|
+
conv_config_ = nullptr;
|
|
179
|
+
throw std::runtime_error("Failed to create conversation");
|
|
53
180
|
}
|
|
54
|
-
conversation_ = std::move(*conversation);
|
|
55
181
|
#endif
|
|
56
182
|
}
|
|
57
183
|
|
|
58
|
-
|
|
59
|
-
// loadModel
|
|
60
|
-
|
|
61
|
-
|
|
184
|
+
// =============================================================================
|
|
185
|
+
// loadModel
|
|
186
|
+
// =============================================================================
|
|
187
|
+
|
|
188
|
+
std::shared_ptr<Promise<void>> HybridLiteRTLM::loadModel(
|
|
189
|
+
const std::string& modelPath,
|
|
190
|
+
const std::optional<LLMConfig>& config) {
|
|
191
|
+
return Promise<void>::async([this, modelPath, config]() {
|
|
192
|
+
loadModelInternal(modelPath, config);
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
void HybridLiteRTLM::loadModelInternal(
|
|
62
197
|
const std::string& modelPath,
|
|
63
198
|
const std::optional<LLMConfig>& config) {
|
|
64
199
|
|
|
65
200
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
66
201
|
|
|
67
|
-
// Clean up existing resources
|
|
68
202
|
if (isLoaded_) {
|
|
69
|
-
|
|
70
|
-
history_.clear();
|
|
71
|
-
#ifdef LITERT_LM_ENABLED
|
|
72
|
-
conversation_.reset();
|
|
73
|
-
engine_.reset();
|
|
74
|
-
#endif
|
|
203
|
+
close();
|
|
75
204
|
}
|
|
76
205
|
|
|
77
|
-
// Apply configuration
|
|
78
206
|
if (config.has_value()) {
|
|
79
207
|
if (config->backend.has_value()) {
|
|
80
208
|
backend_ = config->backend.value();
|
|
81
209
|
}
|
|
82
|
-
if (config->visionBackend.has_value()) {
|
|
83
|
-
visionBackend_ = config->visionBackend.value();
|
|
84
|
-
}
|
|
85
|
-
if (config->audioBackend.has_value()) {
|
|
86
|
-
audioBackend_ = config->audioBackend.value();
|
|
87
|
-
}
|
|
88
210
|
if (config->temperature.has_value()) {
|
|
89
211
|
temperature_ = config->temperature.value();
|
|
90
212
|
}
|
|
@@ -97,520 +219,518 @@ void HybridLiteRTLM::loadModel(
|
|
|
97
219
|
if (config->maxTokens.has_value()) {
|
|
98
220
|
maxTokens_ = config->maxTokens.value();
|
|
99
221
|
}
|
|
222
|
+
if (config->systemPrompt.has_value()) {
|
|
223
|
+
systemPrompt_ = config->systemPrompt.value();
|
|
224
|
+
}
|
|
100
225
|
}
|
|
101
226
|
|
|
102
|
-
#ifdef
|
|
103
|
-
//
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
auto
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
227
|
+
#ifdef __APPLE__
|
|
228
|
+
// Set log verbosity: 2=WARNING (production), 0=INFO (debug)
|
|
229
|
+
litert_lm_set_min_log_level(2);
|
|
230
|
+
|
|
231
|
+
auto backendStr = [](Backend b) -> const char* {
|
|
232
|
+
switch (b) {
|
|
233
|
+
case Backend::GPU: return "gpu";
|
|
234
|
+
case Backend::NPU: return "gpu"; // NPU not available on iOS, use GPU
|
|
235
|
+
default: return "cpu";
|
|
236
|
+
}
|
|
237
|
+
};
|
|
238
|
+
|
|
239
|
+
auto tryCreateEngine = [&](const char* backend, const char* visionBackend) -> bool {
|
|
240
|
+
auto* settings = litert_lm_engine_settings_create(
|
|
241
|
+
modelPath.c_str(),
|
|
242
|
+
backend,
|
|
243
|
+
visionBackend,
|
|
244
|
+
"cpu" // audio always on CPU
|
|
245
|
+
);
|
|
246
|
+
if (!settings) {
|
|
247
|
+
return false;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
litert_lm_engine_settings_set_max_num_tokens(settings, static_cast<int>(maxTokens_));
|
|
251
|
+
litert_lm_engine_settings_enable_benchmark(settings);
|
|
252
|
+
|
|
253
|
+
engine_ = litert_lm_engine_create(settings);
|
|
254
|
+
litert_lm_engine_settings_delete(settings);
|
|
255
|
+
|
|
256
|
+
return engine_ != nullptr;
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
// Try requested backend first (e.g. gpu/gpu)
|
|
260
|
+
const char* primaryBackend = backendStr(backend_);
|
|
261
|
+
if (!tryCreateEngine(primaryBackend, primaryBackend)) {
|
|
262
|
+
// Fallback chain for when the primary backend fails:
|
|
263
|
+
bool fallbackOk = false;
|
|
264
|
+
if (backend_ != Backend::CPU) {
|
|
265
|
+
// 1) Try CPU main + GPU vision (model's vision encoder often requires GPU)
|
|
266
|
+
fallbackOk = tryCreateEngine("cpu", "gpu");
|
|
267
|
+
// 2) Try CPU main + CPU vision
|
|
268
|
+
if (!fallbackOk) fallbackOk = tryCreateEngine("cpu", "cpu");
|
|
269
|
+
}
|
|
270
|
+
// 3) Try CPU main + no vision (nullptr skips vision executor entirely)
|
|
271
|
+
if (!fallbackOk) fallbackOk = tryCreateEngine("cpu", nullptr);
|
|
272
|
+
if (fallbackOk) {
|
|
273
|
+
backend_ = Backend::CPU;
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
if (!engine_) {
|
|
278
|
+
throw std::runtime_error(
|
|
279
|
+
"Failed to create LiteRT-LM engine. Tried backend '" +
|
|
280
|
+
std::string(primaryBackend) + "' and CPU fallback. Model path: " + modelPath);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
session_config_ = litert_lm_session_config_create();
|
|
284
|
+
if (session_config_) {
|
|
285
|
+
litert_lm_session_config_set_max_output_tokens(session_config_, static_cast<int>(maxTokens_));
|
|
286
|
+
|
|
287
|
+
LiteRtLmSamplerParams sampler{};
|
|
288
|
+
sampler.type = kTopP;
|
|
289
|
+
sampler.top_k = static_cast<int32_t>(topK_);
|
|
290
|
+
sampler.top_p = static_cast<float>(topP_);
|
|
291
|
+
sampler.temperature = static_cast<float>(temperature_);
|
|
292
|
+
sampler.seed = 0;
|
|
293
|
+
litert_lm_session_config_set_sampler_params(session_config_, &sampler);
|
|
134
294
|
}
|
|
135
|
-
engine_ = std::move(*engine);
|
|
136
|
-
|
|
137
|
-
// 5. Create the Conversation (lightweight - holds KV cache)
|
|
138
|
-
createNewConversation();
|
|
139
295
|
|
|
140
|
-
|
|
296
|
+
createNewConversation();
|
|
297
|
+
#endif
|
|
141
298
|
|
|
142
299
|
isLoaded_ = true;
|
|
143
300
|
history_.clear();
|
|
144
|
-
|
|
145
|
-
// Reset stats
|
|
146
301
|
lastStats_ = GenerationStats{0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
|
|
147
302
|
}
|
|
148
303
|
|
|
149
|
-
|
|
150
|
-
// sendMessage
|
|
151
|
-
|
|
152
|
-
|
|
304
|
+
// =============================================================================
|
|
305
|
+
// sendMessage — Blocking text inference
|
|
306
|
+
// =============================================================================
|
|
307
|
+
|
|
308
|
+
std::shared_ptr<Promise<std::string>> HybridLiteRTLM::sendMessage(const std::string& message) {
|
|
309
|
+
return Promise<std::string>::async([this, message]() -> std::string {
|
|
310
|
+
return sendMessageInternal(message);
|
|
311
|
+
});
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
std::string HybridLiteRTLM::sendMessageInternal(const std::string& message) {
|
|
153
315
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
154
316
|
ensureLoaded();
|
|
155
317
|
|
|
156
|
-
auto startTime = std::chrono::
|
|
318
|
+
auto startTime = std::chrono::steady_clock::now();
|
|
319
|
+
std::string result;
|
|
157
320
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
userMessage.role = Role::USER;
|
|
161
|
-
userMessage.content = message;
|
|
162
|
-
history_.push_back(userMessage);
|
|
163
|
-
|
|
164
|
-
std::string responseText;
|
|
321
|
+
#ifdef __APPLE__
|
|
322
|
+
std::string msgJson = buildTextMessageJson(message);
|
|
165
323
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
// The Conversation API expects a structured input
|
|
169
|
-
litert::lm::UserMessage lm_message;
|
|
170
|
-
lm_message.role = "user";
|
|
171
|
-
lm_message.content = message;
|
|
324
|
+
auto* response = litert_lm_conversation_send_message(
|
|
325
|
+
conversation_, msgJson.c_str(), nullptr);
|
|
172
326
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
// Remove the user message we just added since inference failed
|
|
176
|
-
history_.pop_back();
|
|
177
|
-
throw std::runtime_error("Inference failed: " +
|
|
178
|
-
std::string(response.status().message()));
|
|
327
|
+
if (!response) {
|
|
328
|
+
throw std::runtime_error("LiteRT-LM: sendMessage failed");
|
|
179
329
|
}
|
|
180
330
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
331
|
+
const char* responseStr = litert_lm_json_response_get_string(response);
|
|
332
|
+
if (responseStr) {
|
|
333
|
+
result = extractTextFromResponse(std::string(responseStr));
|
|
334
|
+
}
|
|
335
|
+
litert_lm_json_response_delete(response);
|
|
336
|
+
|
|
337
|
+
auto* benchInfo = litert_lm_conversation_get_benchmark_info(conversation_);
|
|
338
|
+
if (benchInfo) {
|
|
339
|
+
int numDecodeTurns = litert_lm_benchmark_info_get_num_decode_turns(benchInfo);
|
|
340
|
+
if (numDecodeTurns > 0) {
|
|
341
|
+
int lastIdx = numDecodeTurns - 1;
|
|
342
|
+
lastStats_.tokensPerSecond = litert_lm_benchmark_info_get_decode_tokens_per_sec_at(benchInfo, lastIdx);
|
|
343
|
+
lastStats_.completionTokens = static_cast<double>(
|
|
344
|
+
litert_lm_benchmark_info_get_decode_token_count_at(benchInfo, lastIdx));
|
|
345
|
+
}
|
|
346
|
+
lastStats_.timeToFirstToken = litert_lm_benchmark_info_get_time_to_first_token(benchInfo);
|
|
347
|
+
litert_lm_benchmark_info_delete(benchInfo);
|
|
194
348
|
}
|
|
195
|
-
|
|
196
349
|
#else
|
|
197
|
-
//
|
|
198
|
-
|
|
199
|
-
"Real inference will be available when LiteRT-LM libraries are integrated. "
|
|
200
|
-
"You said: " + message;
|
|
201
|
-
|
|
202
|
-
auto endTime = std::chrono::high_resolution_clock::now();
|
|
203
|
-
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(endTime - startTime).count();
|
|
204
|
-
|
|
205
|
-
// Estimate stats for stub
|
|
206
|
-
lastStats_.promptTokens = static_cast<double>(message.length() / 4);
|
|
207
|
-
lastStats_.completionTokens = static_cast<double>(responseText.length() / 4);
|
|
208
|
-
lastStats_.totalTokens = lastStats_.promptTokens + lastStats_.completionTokens;
|
|
209
|
-
lastStats_.totalTime = static_cast<double>(duration);
|
|
210
|
-
lastStats_.timeToFirstToken = lastStats_.totalTime / 2;
|
|
211
|
-
lastStats_.tokensPerSecond = (lastStats_.totalTime > 0)
|
|
212
|
-
? lastStats_.completionTokens / (lastStats_.totalTime / 1000.0)
|
|
213
|
-
: 0;
|
|
350
|
+
// Non-Apple stub
|
|
351
|
+
result = "[iOS only] LiteRT-LM inference not available on this platform.";
|
|
214
352
|
#endif
|
|
215
353
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
354
|
+
auto endTime = std::chrono::steady_clock::now();
|
|
355
|
+
double latencyMs = std::chrono::duration<double, std::milli>(endTime - startTime).count();
|
|
356
|
+
lastStats_.totalTime = latencyMs / 1000.0;
|
|
357
|
+
|
|
358
|
+
// Update history
|
|
359
|
+
history_.push_back(Message{Role::USER, message});
|
|
360
|
+
history_.push_back(Message{Role::MODEL, result});
|
|
221
361
|
|
|
222
|
-
return
|
|
362
|
+
return result;
|
|
223
363
|
}
|
|
224
364
|
|
|
225
|
-
|
|
226
|
-
//
|
|
227
|
-
|
|
228
|
-
|
|
365
|
+
// =============================================================================
|
|
366
|
+
// sendMessageAsync — Streaming text inference
|
|
367
|
+
// =============================================================================
|
|
368
|
+
|
|
369
|
+
void HybridLiteRTLM::streamCallbackFn(void* callback_data, const char* chunk,
|
|
370
|
+
bool is_final, const char* error_msg) {
|
|
371
|
+
auto* ctx = static_cast<StreamContext*>(callback_data);
|
|
372
|
+
|
|
373
|
+
if (error_msg) {
|
|
374
|
+
// Error occurred — notify JS and clean up
|
|
375
|
+
ctx->onToken(std::string("Error: ") + error_msg, true);
|
|
376
|
+
delete ctx;
|
|
377
|
+
return;
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
if (is_final) {
|
|
381
|
+
// Calculate stats
|
|
382
|
+
auto endTime = std::chrono::steady_clock::now();
|
|
383
|
+
double durationMs = std::chrono::duration<double, std::milli>(endTime - ctx->startTime).count();
|
|
384
|
+
|
|
385
|
+
if (ctx->lastStats && ctx->tokenCount > 0) {
|
|
386
|
+
ctx->lastStats->completionTokens = static_cast<double>(ctx->tokenCount);
|
|
387
|
+
ctx->lastStats->totalTime = durationMs / 1000.0;
|
|
388
|
+
ctx->lastStats->tokensPerSecond = (ctx->tokenCount / durationMs) * 1000.0;
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
// Update history (thread-safe)
|
|
392
|
+
{
|
|
393
|
+
std::lock_guard<std::mutex> lock(*ctx->historyMutex);
|
|
394
|
+
ctx->history->push_back(Message{Role::USER, ctx->userMessage});
|
|
395
|
+
ctx->history->push_back(Message{Role::MODEL, ctx->fullResponse});
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
ctx->onToken("", true);
|
|
399
|
+
delete ctx;
|
|
400
|
+
return;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
if (chunk) {
|
|
404
|
+
std::string token(chunk);
|
|
405
|
+
ctx->fullResponse += token;
|
|
406
|
+
ctx->tokenCount++;
|
|
407
|
+
ctx->onToken(token, false);
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
void HybridLiteRTLM::sendMessageAsync(
|
|
229
412
|
const std::string& message,
|
|
230
|
-
const std::string
|
|
413
|
+
const std::function<void(const std::string&, bool)>& onToken) {
|
|
414
|
+
|
|
415
|
+
// Copy values for the background thread (avoid use-after-free)
|
|
416
|
+
auto onTokenCopy = onToken;
|
|
417
|
+
auto messageCopy = message;
|
|
418
|
+
|
|
419
|
+
// Capture shared state safely
|
|
420
|
+
auto* ctx = new StreamContext();
|
|
421
|
+
ctx->onToken = std::move(onTokenCopy);
|
|
422
|
+
ctx->fullResponse = "";
|
|
423
|
+
ctx->history = &history_;
|
|
424
|
+
ctx->historyMutex = &mutex_;
|
|
425
|
+
ctx->userMessage = messageCopy;
|
|
426
|
+
ctx->lastStats = &lastStats_;
|
|
427
|
+
ctx->startTime = std::chrono::steady_clock::now();
|
|
428
|
+
ctx->tokenCount = 0;
|
|
231
429
|
|
|
232
|
-
|
|
430
|
+
#ifdef __APPLE__
|
|
233
431
|
ensureLoaded();
|
|
234
432
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
int
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
433
|
+
std::string msgJson = buildTextMessageJson(messageCopy);
|
|
434
|
+
|
|
435
|
+
int result = litert_lm_conversation_send_message_stream(
|
|
436
|
+
conversation_, msgJson.c_str(), nullptr,
|
|
437
|
+
streamCallbackFn, ctx);
|
|
438
|
+
|
|
439
|
+
if (result != 0) {
|
|
440
|
+
delete ctx;
|
|
441
|
+
throw std::runtime_error("LiteRT-LM: Failed to start streaming inference");
|
|
241
442
|
}
|
|
443
|
+
#else
|
|
444
|
+
// Non-Apple stub
|
|
445
|
+
ctx->onToken("[iOS only] Streaming not available on this platform.", true);
|
|
446
|
+
delete ctx;
|
|
447
|
+
#endif
|
|
448
|
+
}
|
|
242
449
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
litert::lm::UserMessage lm_message;
|
|
247
|
-
lm_message.role = "user";
|
|
248
|
-
|
|
249
|
-
// Construct multimodal content
|
|
250
|
-
// Option A: If UserMessage supports a list of content parts
|
|
251
|
-
litert::lm::ContentPart textPart;
|
|
252
|
-
textPart.type = litert::lm::ContentType::TEXT;
|
|
253
|
-
textPart.text = message;
|
|
254
|
-
lm_message.parts.push_back(textPart);
|
|
450
|
+
// =============================================================================
|
|
451
|
+
// sendMessageWithImage — Multimodal (vision)
|
|
452
|
+
// =============================================================================
|
|
255
453
|
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
stbi_image_free(img);
|
|
454
|
+
std::shared_ptr<Promise<std::string>> HybridLiteRTLM::sendMessageWithImage(
|
|
455
|
+
const std::string& message,
|
|
456
|
+
const std::string& imagePath) {
|
|
457
|
+
return Promise<std::string>::async([this, message, imagePath]() -> std::string {
|
|
458
|
+
return sendMessageWithImageInternal(message, imagePath);
|
|
459
|
+
});
|
|
460
|
+
}
|
|
265
461
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
462
|
+
std::string HybridLiteRTLM::sendMessageWithImageInternal(
|
|
463
|
+
const std::string& message,
|
|
464
|
+
const std::string& imagePath) {
|
|
465
|
+
|
|
466
|
+
std::lock_guard<std::mutex> lock(mutex_);
|
|
467
|
+
ensureLoaded();
|
|
468
|
+
|
|
469
|
+
auto startTime = std::chrono::steady_clock::now();
|
|
470
|
+
std::string result;
|
|
471
|
+
|
|
472
|
+
#ifdef __APPLE__
|
|
473
|
+
// Verify image exists
|
|
474
|
+
std::ifstream imageFile(imagePath);
|
|
475
|
+
if (!imageFile.good()) {
|
|
476
|
+
throw std::runtime_error("Image file not found: " + imagePath);
|
|
270
477
|
}
|
|
478
|
+
imageFile.close();
|
|
271
479
|
|
|
272
|
-
//
|
|
273
|
-
|
|
274
|
-
userMessage.role = Role::USER;
|
|
275
|
-
userMessage.content = message + " [Image]";
|
|
276
|
-
history_.push_back(userMessage);
|
|
480
|
+
// Build multimodal message JSON — the C API handles image preprocessing
|
|
481
|
+
std::string msgJson = buildImageMessageJson(message, imagePath);
|
|
277
482
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
modelMessage.content = response->content;
|
|
281
|
-
history_.push_back(modelMessage);
|
|
483
|
+
auto* response = litert_lm_conversation_send_message(
|
|
484
|
+
conversation_, msgJson.c_str(), nullptr);
|
|
282
485
|
|
|
283
|
-
|
|
486
|
+
if (!response) {
|
|
487
|
+
throw std::runtime_error("LiteRT-LM: sendMessageWithImage failed");
|
|
488
|
+
}
|
|
284
489
|
|
|
490
|
+
const char* responseStr = litert_lm_json_response_get_string(response);
|
|
491
|
+
if (responseStr) {
|
|
492
|
+
result = extractTextFromResponse(std::string(responseStr));
|
|
493
|
+
}
|
|
494
|
+
litert_lm_json_response_delete(response);
|
|
285
495
|
#else
|
|
286
|
-
|
|
287
|
-
throw std::runtime_error(
|
|
288
|
-
"sendMessageWithImage is not supported on iOS. "
|
|
289
|
-
"LiteRT-LM iOS SDK is not yet available. "
|
|
290
|
-
"Please use text-only sendMessage() for now.");
|
|
496
|
+
result = "[iOS only] Vision inference not available on this platform.";
|
|
291
497
|
#endif
|
|
498
|
+
|
|
499
|
+
auto endTime = std::chrono::steady_clock::now();
|
|
500
|
+
lastStats_.totalTime = std::chrono::duration<double>(endTime - startTime).count();
|
|
501
|
+
|
|
502
|
+
history_.push_back(Message{Role::USER, message + " [image: " + imagePath + "]"});
|
|
503
|
+
history_.push_back(Message{Role::MODEL, result});
|
|
504
|
+
|
|
505
|
+
return result;
|
|
292
506
|
}
|
|
293
507
|
|
|
294
|
-
|
|
295
|
-
|
|
508
|
+
// =============================================================================
|
|
509
|
+
// sendMessageWithAudio — Multimodal (audio)
|
|
510
|
+
// =============================================================================
|
|
296
511
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
std::
|
|
301
|
-
|
|
302
|
-
const std::string& fileName,
|
|
303
|
-
const std::optional<std::function<void(double)>>& onProgress) {
|
|
304
|
-
|
|
305
|
-
// Return a future that throws an exception
|
|
306
|
-
return std::async(std::launch::async, []() -> std::string {
|
|
307
|
-
throw std::runtime_error(
|
|
308
|
-
"downloadModel is not supported on iOS yet. "
|
|
309
|
-
"Please download the model manually using a separate library."
|
|
310
|
-
);
|
|
512
|
+
std::shared_ptr<Promise<std::string>> HybridLiteRTLM::sendMessageWithAudio(
|
|
513
|
+
const std::string& message,
|
|
514
|
+
const std::string& audioPath) {
|
|
515
|
+
return Promise<std::string>::async([this, message, audioPath]() -> std::string {
|
|
516
|
+
return sendMessageWithAudioInternal(message, audioPath);
|
|
311
517
|
});
|
|
312
518
|
}
|
|
313
519
|
|
|
314
|
-
|
|
315
|
-
// sendMessageWithAudio - Multimodal audio + text
|
|
316
|
-
//------------------------------------------------------------------------------
|
|
317
|
-
std::string HybridLiteRTLM::sendMessageWithAudio(
|
|
520
|
+
std::string HybridLiteRTLM::sendMessageWithAudioInternal(
|
|
318
521
|
const std::string& message,
|
|
319
522
|
const std::string& audioPath) {
|
|
320
523
|
|
|
321
524
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
322
525
|
ensureLoaded();
|
|
323
526
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
527
|
+
auto startTime = std::chrono::steady_clock::now();
|
|
528
|
+
std::string result;
|
|
529
|
+
|
|
530
|
+
#ifdef __APPLE__
|
|
531
|
+
std::ifstream audioFile(audioPath);
|
|
532
|
+
if (!audioFile.good()) {
|
|
533
|
+
throw std::runtime_error("Audio file not found: " + audioPath);
|
|
329
534
|
}
|
|
535
|
+
audioFile.close();
|
|
330
536
|
|
|
331
|
-
|
|
332
|
-
// Ideally use a WAV parsing library or miniaudio if available.
|
|
333
|
-
// For this implementation, we read the whole file.
|
|
334
|
-
std::vector<uint8_t> audioData((std::istreambuf_iterator<char>(audioFile)), std::istreambuf_iterator<char>());
|
|
537
|
+
std::string msgJson = buildAudioMessageJson(message, audioPath);
|
|
335
538
|
|
|
336
|
-
|
|
337
|
-
|
|
539
|
+
auto* response = litert_lm_conversation_send_message(
|
|
540
|
+
conversation_, msgJson.c_str(), nullptr);
|
|
338
541
|
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
textPart.text = message;
|
|
342
|
-
lm_message.parts.push_back(textPart);
|
|
343
|
-
|
|
344
|
-
litert::lm::ContentPart audioPart;
|
|
345
|
-
audioPart.type = litert::lm::ContentType::AUDIO;
|
|
346
|
-
audioPart.audio.data = audioData;
|
|
347
|
-
// Metadata like sample rate might be needed:
|
|
348
|
-
// audioPart.audio.sample_rate = 16000;
|
|
349
|
-
lm_message.parts.push_back(audioPart);
|
|
350
|
-
|
|
351
|
-
auto response = conversation_->SendMessage(lm_message);
|
|
352
|
-
if (!response.ok()) {
|
|
353
|
-
throw std::runtime_error("Audio inference failed: " +
|
|
354
|
-
std::string(response.status().message()));
|
|
542
|
+
if (!response) {
|
|
543
|
+
throw std::runtime_error("LiteRT-LM: sendMessageWithAudio failed");
|
|
355
544
|
}
|
|
356
545
|
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
546
|
+
const char* responseStr = litert_lm_json_response_get_string(response);
|
|
547
|
+
if (responseStr) {
|
|
548
|
+
result = extractTextFromResponse(std::string(responseStr));
|
|
549
|
+
}
|
|
550
|
+
litert_lm_json_response_delete(response);
|
|
551
|
+
#else
|
|
552
|
+
result = "[iOS only] Audio inference not available on this platform.";
|
|
553
|
+
#endif
|
|
361
554
|
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
modelMessage.content = response->content;
|
|
365
|
-
history_.push_back(modelMessage);
|
|
555
|
+
auto endTime = std::chrono::steady_clock::now();
|
|
556
|
+
lastStats_.totalTime = std::chrono::duration<double>(endTime - startTime).count();
|
|
366
557
|
|
|
367
|
-
|
|
558
|
+
history_.push_back(Message{Role::USER, message + " [audio: " + audioPath + "]"});
|
|
559
|
+
history_.push_back(Message{Role::MODEL, result});
|
|
368
560
|
|
|
561
|
+
return result;
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
// =============================================================================
|
|
565
|
+
// downloadModel — Download model from URL
|
|
566
|
+
// =============================================================================
|
|
567
|
+
|
|
568
|
+
std::shared_ptr<Promise<std::string>> HybridLiteRTLM::downloadModel(
|
|
569
|
+
const std::string& url,
|
|
570
|
+
const std::string& fileName,
|
|
571
|
+
const std::optional<std::function<void(double)>>& onProgress) {
|
|
572
|
+
return Promise<std::string>::async([url, fileName, onProgress]() -> std::string {
|
|
573
|
+
#ifdef __APPLE__
|
|
574
|
+
return litert_lm::downloadModelFile(url, fileName, onProgress);
|
|
369
575
|
#else
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
"
|
|
576
|
+
std::string destPath = "/tmp/" + fileName;
|
|
577
|
+
std::string curlCmd = "curl -L -o \"" + destPath + "\" \"" + url + "\"";
|
|
578
|
+
int result = system(curlCmd.c_str());
|
|
579
|
+
if (result != 0) {
|
|
580
|
+
throw std::runtime_error("Failed to download model from: " + url);
|
|
581
|
+
}
|
|
582
|
+
if (onProgress.has_value()) {
|
|
583
|
+
onProgress.value()(1.0);
|
|
584
|
+
}
|
|
585
|
+
return destPath;
|
|
375
586
|
#endif
|
|
587
|
+
});
|
|
376
588
|
}
|
|
377
589
|
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
const
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
// to avoid blocking other operations. The callback may be invoked
|
|
387
|
-
// from a different thread depending on LiteRT-LM's implementation.
|
|
388
|
-
|
|
389
|
-
{
|
|
390
|
-
std::lock_guard<std::mutex> lock(mutex_);
|
|
391
|
-
ensureLoaded();
|
|
392
|
-
}
|
|
393
|
-
|
|
394
|
-
#ifdef LITERT_LM_ENABLED
|
|
395
|
-
// Add user message to history before starting
|
|
396
|
-
{
|
|
397
|
-
std::lock_guard<std::mutex> lock(mutex_);
|
|
398
|
-
Message userMessage;
|
|
399
|
-
userMessage.role = Role::USER;
|
|
400
|
-
userMessage.content = message;
|
|
401
|
-
history_.push_back(userMessage);
|
|
402
|
-
}
|
|
403
|
-
|
|
404
|
-
litert::lm::UserMessage lm_message;
|
|
405
|
-
lm_message.role = "user";
|
|
406
|
-
lm_message.content = message;
|
|
407
|
-
|
|
408
|
-
std::string fullResponse;
|
|
409
|
-
|
|
410
|
-
// The callback needs to be carefully managed for thread safety
|
|
411
|
-
auto status = conversation_->SendMessageAsync(
|
|
412
|
-
lm_message,
|
|
413
|
-
[this, &onToken, &fullResponse](const std::string& token, bool isDone) {
|
|
414
|
-
fullResponse += token;
|
|
415
|
-
|
|
416
|
-
// Invoke the JS callback (Nitro handles thread marshalling)
|
|
417
|
-
onToken(token, isDone);
|
|
418
|
-
|
|
419
|
-
if (isDone) {
|
|
420
|
-
// Add complete response to history
|
|
421
|
-
std::lock_guard<std::mutex> lock(mutex_);
|
|
422
|
-
Message modelMessage;
|
|
423
|
-
modelMessage.role = Role::MODEL;
|
|
424
|
-
modelMessage.content = fullResponse;
|
|
425
|
-
history_.push_back(modelMessage);
|
|
426
|
-
}
|
|
427
|
-
}
|
|
428
|
-
);
|
|
429
|
-
|
|
430
|
-
if (!status.ok()) {
|
|
431
|
-
// Remove user message since inference failed
|
|
432
|
-
std::lock_guard<std::mutex> lock(mutex_);
|
|
433
|
-
if (!history_.empty()) {
|
|
434
|
-
history_.pop_back();
|
|
590
|
+
std::shared_ptr<Promise<void>> HybridLiteRTLM::deleteModel(const std::string& fileName) {
|
|
591
|
+
return Promise<void>::async([fileName]() {
|
|
592
|
+
std::string path;
|
|
593
|
+
#ifdef __APPLE__
|
|
594
|
+
// Match the path used by IOSDownloadHelper: ~/Library/Caches/litert_models/
|
|
595
|
+
const char* home = getenv("HOME");
|
|
596
|
+
if (home) {
|
|
597
|
+
path = std::string(home) + "/Library/Caches/litert_models/" + fileName;
|
|
435
598
|
}
|
|
436
|
-
throw std::runtime_error("Async inference failed: " +
|
|
437
|
-
std::string(status.message()));
|
|
438
|
-
}
|
|
439
|
-
|
|
440
599
|
#else
|
|
441
|
-
|
|
442
|
-
std::string fullResponse;
|
|
443
|
-
{
|
|
444
|
-
std::lock_guard<std::mutex> lock(mutex_);
|
|
445
|
-
|
|
446
|
-
// Add user message
|
|
447
|
-
Message userMessage;
|
|
448
|
-
userMessage.role = Role::USER;
|
|
449
|
-
userMessage.content = message;
|
|
450
|
-
history_.push_back(userMessage);
|
|
451
|
-
|
|
452
|
-
fullResponse = "[LiteRT-LM Stub] Streaming response placeholder. You said: " + message;
|
|
453
|
-
}
|
|
454
|
-
|
|
455
|
-
// Simulate token-by-token streaming
|
|
456
|
-
std::string currentWord;
|
|
457
|
-
for (size_t i = 0; i < fullResponse.length(); i++) {
|
|
458
|
-
char c = fullResponse[i];
|
|
459
|
-
currentWord += c;
|
|
460
|
-
|
|
461
|
-
if (c == ' ' || c == '\n' || i == fullResponse.length() - 1) {
|
|
462
|
-
bool isDone = (i == fullResponse.length() - 1);
|
|
463
|
-
onToken(currentWord, isDone);
|
|
464
|
-
currentWord.clear();
|
|
465
|
-
}
|
|
466
|
-
}
|
|
467
|
-
|
|
468
|
-
// Add model response to history
|
|
469
|
-
{
|
|
470
|
-
std::lock_guard<std::mutex> lock(mutex_);
|
|
471
|
-
Message modelMessage;
|
|
472
|
-
modelMessage.role = Role::MODEL;
|
|
473
|
-
modelMessage.content = fullResponse;
|
|
474
|
-
history_.push_back(modelMessage);
|
|
475
|
-
}
|
|
600
|
+
path = "/tmp/" + fileName;
|
|
476
601
|
#endif
|
|
602
|
+
if (!path.empty()) {
|
|
603
|
+
std::remove(path.c_str());
|
|
604
|
+
}
|
|
605
|
+
});
|
|
477
606
|
}
|
|
478
607
|
|
|
479
|
-
|
|
480
|
-
// getHistory
|
|
481
|
-
|
|
608
|
+
// =============================================================================
|
|
609
|
+
// getHistory
|
|
610
|
+
// =============================================================================
|
|
611
|
+
|
|
482
612
|
std::vector<Message> HybridLiteRTLM::getHistory() {
|
|
483
613
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
484
614
|
return history_;
|
|
485
615
|
}
|
|
486
616
|
|
|
487
|
-
|
|
488
|
-
// resetConversation
|
|
489
|
-
|
|
617
|
+
// =============================================================================
|
|
618
|
+
// resetConversation
|
|
619
|
+
// =============================================================================
|
|
620
|
+
|
|
490
621
|
void HybridLiteRTLM::resetConversation() {
|
|
491
622
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
492
623
|
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
624
|
+
history_.clear();
|
|
625
|
+
lastStats_ = GenerationStats{0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
|
|
626
|
+
|
|
627
|
+
#ifdef __APPLE__
|
|
628
|
+
if (isLoaded_ && engine_) {
|
|
498
629
|
createNewConversation();
|
|
499
630
|
}
|
|
500
631
|
#endif
|
|
501
|
-
|
|
502
|
-
history_.clear();
|
|
503
632
|
}
|
|
504
633
|
|
|
505
|
-
|
|
506
|
-
// isReady
|
|
507
|
-
|
|
634
|
+
// =============================================================================
|
|
635
|
+
// isReady
|
|
636
|
+
// =============================================================================
|
|
637
|
+
|
|
508
638
|
bool HybridLiteRTLM::isReady() {
|
|
509
639
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
510
640
|
return isLoaded_;
|
|
511
641
|
}
|
|
512
642
|
|
|
513
|
-
|
|
514
|
-
// getStats
|
|
515
|
-
|
|
643
|
+
// =============================================================================
|
|
644
|
+
// getStats
|
|
645
|
+
// =============================================================================
|
|
646
|
+
|
|
516
647
|
GenerationStats HybridLiteRTLM::getStats() {
|
|
517
648
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
518
649
|
return lastStats_;
|
|
519
650
|
}
|
|
520
651
|
|
|
521
|
-
|
|
522
|
-
// getMemoryUsage
|
|
523
|
-
|
|
652
|
+
// =============================================================================
|
|
653
|
+
// getMemoryUsage — Uses Mach APIs for iOS process memory
|
|
654
|
+
// =============================================================================
|
|
655
|
+
|
|
524
656
|
MemoryUsage HybridLiteRTLM::getMemoryUsage() {
|
|
525
|
-
double
|
|
526
|
-
double
|
|
527
|
-
double
|
|
657
|
+
double usedMemoryBytes = 0;
|
|
658
|
+
double totalMemoryBytes = 0;
|
|
659
|
+
double availableBytes = 0;
|
|
528
660
|
bool isLowMemory = false;
|
|
529
|
-
|
|
661
|
+
|
|
530
662
|
#ifdef __APPLE__
|
|
531
|
-
// Get process memory
|
|
532
|
-
struct mach_task_basic_info
|
|
533
|
-
mach_msg_type_number_t
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
if (host_statistics64(mach_host_self(), HOST_VM_INFO64,
|
|
543
|
-
(host_info64_t)&vmStats, &vmCount) == KERN_SUCCESS) {
|
|
544
|
-
vm_size_t pageSize;
|
|
545
|
-
host_page_size(mach_host_self(), &pageSize);
|
|
546
|
-
availableMemoryBytes = static_cast<double>(vmStats.free_count) * pageSize;
|
|
547
|
-
// Consider low memory if free pages < 10% of total active+inactive+free
|
|
548
|
-
uint64_t totalPages = vmStats.active_count + vmStats.inactive_count + vmStats.free_count;
|
|
549
|
-
isLowMemory = (totalPages > 0) &&
|
|
550
|
-
(static_cast<double>(vmStats.free_count) / totalPages < 0.1);
|
|
551
|
-
}
|
|
552
|
-
|
|
553
|
-
// malloc_size is per-allocation; use resident_size as native heap proxy
|
|
554
|
-
nativeHeapBytes = residentBytes;
|
|
555
|
-
#endif
|
|
556
|
-
|
|
557
|
-
#ifdef __ANDROID__
|
|
558
|
-
// Parse /proc/self/status for VmRSS (resident set size)
|
|
559
|
-
std::ifstream statusFile("/proc/self/status");
|
|
560
|
-
if (statusFile.is_open()) {
|
|
561
|
-
std::string line;
|
|
562
|
-
while (std::getline(statusFile, line)) {
|
|
563
|
-
if (line.rfind("VmRSS:", 0) == 0) {
|
|
564
|
-
// Format: "VmRSS: 123456 kB"
|
|
565
|
-
std::istringstream iss(line.substr(6));
|
|
566
|
-
double kbValue = 0;
|
|
567
|
-
iss >> kbValue;
|
|
568
|
-
residentBytes = kbValue * 1024.0;
|
|
569
|
-
break;
|
|
570
|
-
}
|
|
571
|
-
}
|
|
663
|
+
// Get app process memory (resident set size)
|
|
664
|
+
struct mach_task_basic_info info;
|
|
665
|
+
mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT;
|
|
666
|
+
|
|
667
|
+
kern_return_t kr = task_info(mach_task_self(),
|
|
668
|
+
MACH_TASK_BASIC_INFO,
|
|
669
|
+
(task_info_t)&info,
|
|
670
|
+
&count);
|
|
671
|
+
|
|
672
|
+
if (kr == KERN_SUCCESS) {
|
|
673
|
+
usedMemoryBytes = static_cast<double>(info.resident_size);
|
|
572
674
|
}
|
|
573
|
-
|
|
574
|
-
//
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
std::istringstream iss(line.substr(13));
|
|
585
|
-
double kbValue = 0;
|
|
586
|
-
iss >> kbValue;
|
|
587
|
-
availableMemoryBytes = kbValue * 1024.0;
|
|
588
|
-
break;
|
|
589
|
-
}
|
|
590
|
-
}
|
|
675
|
+
|
|
676
|
+
// Get total physical memory
|
|
677
|
+
mach_port_t host_port = mach_host_self();
|
|
678
|
+
struct host_basic_info hostInfo;
|
|
679
|
+
mach_msg_type_number_t hostCount = HOST_BASIC_INFO_COUNT;
|
|
680
|
+
|
|
681
|
+
kr = host_info(host_port, HOST_BASIC_INFO,
|
|
682
|
+
(host_info_t)&hostInfo, &hostCount);
|
|
683
|
+
|
|
684
|
+
if (kr == KERN_SUCCESS) {
|
|
685
|
+
totalMemoryBytes = static_cast<double>(hostInfo.max_mem);
|
|
591
686
|
}
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
687
|
+
|
|
688
|
+
availableBytes = totalMemoryBytes - usedMemoryBytes;
|
|
689
|
+
if (availableBytes < 0) availableBytes = 0;
|
|
690
|
+
|
|
691
|
+
// Low memory threshold (~200MB available)
|
|
692
|
+
isLowMemory = (totalMemoryBytes > 0) && (availableBytes < 200.0 * 1024.0 * 1024.0);
|
|
595
693
|
#endif
|
|
596
|
-
|
|
597
|
-
return MemoryUsage{
|
|
694
|
+
|
|
695
|
+
return MemoryUsage{
|
|
696
|
+
usedMemoryBytes, // nativeHeapBytes
|
|
697
|
+
usedMemoryBytes, // residentBytes
|
|
698
|
+
availableBytes, // availableMemoryBytes
|
|
699
|
+
isLowMemory // isLowMemory
|
|
700
|
+
};
|
|
598
701
|
}
|
|
599
702
|
|
|
600
|
-
|
|
601
|
-
// close
|
|
602
|
-
|
|
703
|
+
// =============================================================================
|
|
704
|
+
// close — Clean up all LiteRT-LM resources
|
|
705
|
+
// =============================================================================
|
|
706
|
+
|
|
603
707
|
void HybridLiteRTLM::close() {
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
#ifdef LITERT_LM_ENABLED
|
|
607
|
-
// Release in reverse order of creation
|
|
608
|
-
conversation_.reset();
|
|
609
|
-
engine_.reset();
|
|
610
|
-
#endif
|
|
708
|
+
// Note: Don't lock here if called from destructor (mutex may be destroyed)
|
|
709
|
+
// The caller (loadModel, destructor) should handle locking.
|
|
611
710
|
|
|
612
711
|
isLoaded_ = false;
|
|
613
712
|
history_.clear();
|
|
713
|
+
|
|
714
|
+
#ifdef __APPLE__
|
|
715
|
+
if (conversation_) {
|
|
716
|
+
litert_lm_conversation_delete(conversation_);
|
|
717
|
+
conversation_ = nullptr;
|
|
718
|
+
}
|
|
719
|
+
if (conv_config_) {
|
|
720
|
+
litert_lm_conversation_config_delete(conv_config_);
|
|
721
|
+
conv_config_ = nullptr;
|
|
722
|
+
}
|
|
723
|
+
if (session_config_) {
|
|
724
|
+
litert_lm_session_config_delete(session_config_);
|
|
725
|
+
session_config_ = nullptr;
|
|
726
|
+
}
|
|
727
|
+
if (engine_) {
|
|
728
|
+
litert_lm_engine_delete(engine_);
|
|
729
|
+
engine_ = nullptr;
|
|
730
|
+
}
|
|
731
|
+
#endif
|
|
732
|
+
|
|
733
|
+
lastStats_ = GenerationStats{0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
|
|
614
734
|
}
|
|
615
735
|
|
|
616
736
|
} // namespace margelo::nitro::litertlm
|