react-native-litert-lm 0.2.2 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +269 -186
- package/android/build.gradle +1 -1
- package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +93 -37
- package/app.plugin.js +33 -0
- package/cpp/HybridLiteRTLM.cpp +604 -450
- package/cpp/HybridLiteRTLM.hpp +54 -23
- package/cpp/IOSDownloadHelper.h +24 -0
- package/cpp/cpp-adapter.cpp +2 -2
- package/cpp/include/litert_lm_engine.h +509 -0
- package/ios/IOSDownloadHelper.mm +129 -0
- package/ios/LiteRTLMAutolinking.mm +30 -0
- package/lib/hooks.d.ts +9 -4
- package/lib/hooks.js +34 -20
- package/lib/index.d.ts +1 -0
- package/lib/index.js +2 -5
- package/lib/memoryTracker.d.ts +1 -1
- package/lib/memoryTracker.js +1 -1
- package/lib/modelFactory.d.ts +11 -5
- package/lib/modelFactory.js +9 -4
- package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +11 -4
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +31 -37
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +19 -22
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +15 -18
- package/package.json +12 -5
- package/react-native-litert-lm.podspec +20 -7
- package/scripts/build-ios-engine.sh +302 -0
- package/scripts/download-ios-frameworks.sh +72 -0
- package/scripts/postinstall.js +116 -0
- package/scripts/stubs/cxx_bridge_stubs.cc +224 -0
- package/scripts/stubs/gemma_model_constraint_provider.cc +46 -0
- package/scripts/stubs/llguidance_stubs.c +101 -0
- package/src/hooks.ts +62 -39
- package/src/index.ts +4 -7
- package/src/memoryTracker.ts +1 -1
- package/src/modelFactory.ts +30 -5
package/cpp/HybridLiteRTLM.cpp
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
// HybridLiteRTLM.cpp
|
|
3
3
|
// react-native-litert-lm
|
|
4
4
|
//
|
|
5
|
-
// High-performance LLM inference using LiteRT-LM.
|
|
5
|
+
// High-performance LLM inference using LiteRT-LM C API.
|
|
6
6
|
//
|
|
7
7
|
// NOTE: This C++ implementation is used for iOS ONLY.
|
|
8
8
|
// Android uses the Kotlin implementation in `android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt`.
|
|
@@ -11,80 +11,204 @@
|
|
|
11
11
|
|
|
12
12
|
#include "HybridLiteRTLM.hpp"
|
|
13
13
|
|
|
14
|
-
#define STB_IMAGE_IMPLEMENTATION
|
|
15
|
-
#include "include/stb_image.h"
|
|
16
14
|
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
#include <NitroModules/Promise.hpp>
|
|
17
18
|
#include <chrono>
|
|
18
19
|
#include <stdexcept>
|
|
19
20
|
#include <sstream>
|
|
21
|
+
#include <sys/stat.h>
|
|
22
|
+
#include <cstdio>
|
|
23
|
+
|
|
24
|
+
#ifdef __APPLE__
|
|
25
|
+
#include "IOSDownloadHelper.h"
|
|
26
|
+
#endif
|
|
20
27
|
#include <fstream>
|
|
28
|
+
#include <thread>
|
|
29
|
+
#include <regex>
|
|
21
30
|
|
|
22
31
|
namespace margelo::nitro::litertlm {
|
|
23
32
|
|
|
24
|
-
|
|
25
|
-
//
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
33
|
+
// =============================================================================
|
|
34
|
+
// JSON Helpers
|
|
35
|
+
// =============================================================================
|
|
36
|
+
|
|
37
|
+
std::string HybridLiteRTLM::escapeJson(const std::string& input) {
|
|
38
|
+
std::string output;
|
|
39
|
+
output.reserve(input.size() + 16);
|
|
40
|
+
for (char c : input) {
|
|
41
|
+
switch (c) {
|
|
42
|
+
case '"': output += "\\\""; break;
|
|
43
|
+
case '\\': output += "\\\\"; break;
|
|
44
|
+
case '\n': output += "\\n"; break;
|
|
45
|
+
case '\r': output += "\\r"; break;
|
|
46
|
+
case '\t': output += "\\t"; break;
|
|
47
|
+
case '\b': output += "\\b"; break;
|
|
48
|
+
case '\f': output += "\\f"; break;
|
|
49
|
+
default: output += c; break;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return output;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
std::string HybridLiteRTLM::buildTextMessageJson(const std::string& text) {
|
|
56
|
+
return "{\"role\":\"user\",\"content\":\"" + escapeJson(text) + "\"}";
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
std::string HybridLiteRTLM::buildImageMessageJson(const std::string& text, const std::string& imagePath) {
|
|
60
|
+
return "{\"role\":\"user\",\"content\":["
|
|
61
|
+
"{\"type\":\"text\",\"text\":\"" + escapeJson(text) + "\"},"
|
|
62
|
+
"{\"type\":\"image\",\"path\":\"" + escapeJson(imagePath) + "\"}"
|
|
63
|
+
"]}";
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
std::string HybridLiteRTLM::buildAudioMessageJson(const std::string& text, const std::string& audioPath) {
|
|
67
|
+
return "{\"role\":\"user\",\"content\":["
|
|
68
|
+
"{\"type\":\"text\",\"text\":\"" + escapeJson(text) + "\"},"
|
|
69
|
+
"{\"type\":\"audio\",\"path\":\"" + escapeJson(audioPath) + "\"}"
|
|
70
|
+
"]}";
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
std::string HybridLiteRTLM::extractTextFromResponse(const std::string& jsonResponse) {
|
|
74
|
+
// The C API response JSON is structured as:
|
|
75
|
+
// {"role":"model","content":[{"type":"text","text":"..."}]}
|
|
76
|
+
// or:
|
|
77
|
+
// {"role":"model","content":"..."}
|
|
78
|
+
//
|
|
79
|
+
// We use simple string extraction to avoid a JSON library dependency.
|
|
80
|
+
|
|
81
|
+
// Try array format first: find "text":"..." after "type":"text"
|
|
82
|
+
std::string textMarker = "\"text\":\"";
|
|
83
|
+
size_t pos = jsonResponse.find("\"type\":\"text\"");
|
|
84
|
+
if (pos != std::string::npos) {
|
|
85
|
+
pos = jsonResponse.find(textMarker, pos);
|
|
86
|
+
if (pos != std::string::npos) {
|
|
87
|
+
pos += textMarker.length();
|
|
88
|
+
std::string result;
|
|
89
|
+
result.reserve(jsonResponse.size() - pos);
|
|
90
|
+
for (size_t i = pos; i < jsonResponse.size(); i++) {
|
|
91
|
+
if (jsonResponse[i] == '\\' && i + 1 < jsonResponse.size()) {
|
|
92
|
+
char next = jsonResponse[i + 1];
|
|
93
|
+
if (next == '"') { result += '"'; i++; }
|
|
94
|
+
else if (next == '\\') { result += '\\'; i++; }
|
|
95
|
+
else if (next == 'n') { result += '\n'; i++; }
|
|
96
|
+
else if (next == 'r') { result += '\r'; i++; }
|
|
97
|
+
else if (next == 't') { result += '\t'; i++; }
|
|
98
|
+
else { result += jsonResponse[i]; }
|
|
99
|
+
} else if (jsonResponse[i] == '"') {
|
|
100
|
+
break; // End of the text value
|
|
101
|
+
} else {
|
|
102
|
+
result += jsonResponse[i];
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
return result;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Try simple string format: "content":"..."
|
|
110
|
+
std::string contentMarker = "\"content\":\"";
|
|
111
|
+
pos = jsonResponse.find(contentMarker);
|
|
112
|
+
if (pos != std::string::npos) {
|
|
113
|
+
pos += contentMarker.length();
|
|
114
|
+
std::string result;
|
|
115
|
+
for (size_t i = pos; i < jsonResponse.size(); i++) {
|
|
116
|
+
if (jsonResponse[i] == '\\' && i + 1 < jsonResponse.size()) {
|
|
117
|
+
char next = jsonResponse[i + 1];
|
|
118
|
+
if (next == '"') { result += '"'; i++; }
|
|
119
|
+
else if (next == '\\') { result += '\\'; i++; }
|
|
120
|
+
else if (next == 'n') { result += '\n'; i++; }
|
|
121
|
+
else { result += jsonResponse[i]; }
|
|
122
|
+
} else if (jsonResponse[i] == '"') {
|
|
123
|
+
break;
|
|
124
|
+
} else {
|
|
125
|
+
result += jsonResponse[i];
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
return result;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Fallback: return full response
|
|
132
|
+
return jsonResponse;
|
|
32
133
|
}
|
|
33
134
|
|
|
34
|
-
|
|
35
|
-
//
|
|
36
|
-
|
|
135
|
+
// =============================================================================
|
|
136
|
+
// Conversation Management
|
|
137
|
+
// =============================================================================
|
|
138
|
+
|
|
37
139
|
void HybridLiteRTLM::createNewConversation() {
|
|
38
|
-
#ifdef
|
|
140
|
+
#ifdef __APPLE__
|
|
39
141
|
if (!engine_) {
|
|
40
142
|
throw std::runtime_error("Cannot create conversation: engine not initialized");
|
|
41
143
|
}
|
|
42
144
|
|
|
43
|
-
|
|
44
|
-
if (
|
|
45
|
-
|
|
46
|
-
|
|
145
|
+
// Clean up previous conversation
|
|
146
|
+
if (conversation_) {
|
|
147
|
+
litert_lm_conversation_delete(conversation_);
|
|
148
|
+
conversation_ = nullptr;
|
|
149
|
+
}
|
|
150
|
+
if (conv_config_) {
|
|
151
|
+
litert_lm_conversation_config_delete(conv_config_);
|
|
152
|
+
conv_config_ = nullptr;
|
|
47
153
|
}
|
|
48
154
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
155
|
+
// Build system message JSON if provided
|
|
156
|
+
std::string systemMsgJson;
|
|
157
|
+
const char* systemMsgPtr = nullptr;
|
|
158
|
+
if (!systemPrompt_.empty()) {
|
|
159
|
+
systemMsgJson = "{\"role\":\"system\",\"content\":\"" + escapeJson(systemPrompt_) + "\"}";
|
|
160
|
+
systemMsgPtr = systemMsgJson.c_str();
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Create conversation config with session config
|
|
164
|
+
conv_config_ = litert_lm_conversation_config_create(
|
|
165
|
+
engine_,
|
|
166
|
+
session_config_, // may be nullptr for defaults
|
|
167
|
+
systemMsgPtr, // system message
|
|
168
|
+
nullptr, // tools (not used yet)
|
|
169
|
+
nullptr, // messages history
|
|
170
|
+
false // constrained decoding
|
|
171
|
+
);
|
|
172
|
+
if (!conv_config_) {
|
|
173
|
+
throw std::runtime_error("Failed to create conversation config");
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Create conversation
|
|
177
|
+
conversation_ = litert_lm_conversation_create(engine_, conv_config_);
|
|
178
|
+
if (!conversation_) {
|
|
179
|
+
litert_lm_conversation_config_delete(conv_config_);
|
|
180
|
+
conv_config_ = nullptr;
|
|
181
|
+
throw std::runtime_error("Failed to create conversation");
|
|
53
182
|
}
|
|
54
|
-
conversation_ = std::move(*conversation);
|
|
55
183
|
#endif
|
|
56
184
|
}
|
|
57
185
|
|
|
58
|
-
|
|
59
|
-
// loadModel
|
|
60
|
-
|
|
61
|
-
|
|
186
|
+
// =============================================================================
|
|
187
|
+
// loadModel
|
|
188
|
+
// =============================================================================
|
|
189
|
+
|
|
190
|
+
std::shared_ptr<Promise<void>> HybridLiteRTLM::loadModel(
|
|
191
|
+
const std::string& modelPath,
|
|
192
|
+
const std::optional<LLMConfig>& config) {
|
|
193
|
+
return Promise<void>::async([this, modelPath, config]() {
|
|
194
|
+
loadModelInternal(modelPath, config);
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
void HybridLiteRTLM::loadModelInternal(
|
|
62
199
|
const std::string& modelPath,
|
|
63
200
|
const std::optional<LLMConfig>& config) {
|
|
64
201
|
|
|
65
202
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
66
203
|
|
|
67
|
-
// Clean up existing resources
|
|
68
204
|
if (isLoaded_) {
|
|
69
|
-
|
|
70
|
-
history_.clear();
|
|
71
|
-
#ifdef LITERT_LM_ENABLED
|
|
72
|
-
conversation_.reset();
|
|
73
|
-
engine_.reset();
|
|
74
|
-
#endif
|
|
205
|
+
close();
|
|
75
206
|
}
|
|
76
207
|
|
|
77
|
-
// Apply configuration
|
|
78
208
|
if (config.has_value()) {
|
|
79
209
|
if (config->backend.has_value()) {
|
|
80
210
|
backend_ = config->backend.value();
|
|
81
211
|
}
|
|
82
|
-
if (config->visionBackend.has_value()) {
|
|
83
|
-
visionBackend_ = config->visionBackend.value();
|
|
84
|
-
}
|
|
85
|
-
if (config->audioBackend.has_value()) {
|
|
86
|
-
audioBackend_ = config->audioBackend.value();
|
|
87
|
-
}
|
|
88
212
|
if (config->temperature.has_value()) {
|
|
89
213
|
temperature_ = config->temperature.value();
|
|
90
214
|
}
|
|
@@ -97,520 +221,550 @@ void HybridLiteRTLM::loadModel(
|
|
|
97
221
|
if (config->maxTokens.has_value()) {
|
|
98
222
|
maxTokens_ = config->maxTokens.value();
|
|
99
223
|
}
|
|
224
|
+
if (config->systemPrompt.has_value()) {
|
|
225
|
+
systemPrompt_ = config->systemPrompt.value();
|
|
226
|
+
}
|
|
100
227
|
}
|
|
101
228
|
|
|
102
|
-
#ifdef
|
|
103
|
-
//
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
auto
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
229
|
+
#ifdef __APPLE__
|
|
230
|
+
// Set log verbosity: 2=WARNING (production), 0=INFO (debug)
|
|
231
|
+
litert_lm_set_min_log_level(2);
|
|
232
|
+
|
|
233
|
+
auto backendStr = [](Backend b) -> const char* {
|
|
234
|
+
switch (b) {
|
|
235
|
+
case Backend::GPU: return "gpu";
|
|
236
|
+
case Backend::NPU: return "gpu"; // NPU not available on iOS, use GPU
|
|
237
|
+
default: return "cpu";
|
|
238
|
+
}
|
|
239
|
+
};
|
|
240
|
+
|
|
241
|
+
auto tryCreateEngine = [&](const char* backend, const char* visionBackend) -> bool {
|
|
242
|
+
auto* settings = litert_lm_engine_settings_create(
|
|
243
|
+
modelPath.c_str(),
|
|
244
|
+
backend,
|
|
245
|
+
visionBackend,
|
|
246
|
+
nullptr // audio executor not supported on iOS yet
|
|
247
|
+
);
|
|
248
|
+
if (!settings) {
|
|
249
|
+
return false;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
litert_lm_engine_settings_set_max_num_tokens(settings, static_cast<int>(maxTokens_));
|
|
253
|
+
litert_lm_engine_settings_enable_benchmark(settings);
|
|
254
|
+
|
|
255
|
+
// Set cache directory to the same directory as the model file
|
|
256
|
+
std::string cacheDir = modelPath.substr(0, modelPath.find_last_of('/'));
|
|
257
|
+
litert_lm_engine_settings_set_cache_dir(settings, cacheDir.c_str());
|
|
258
|
+
|
|
259
|
+
engine_ = litert_lm_engine_create(settings);
|
|
260
|
+
litert_lm_engine_settings_delete(settings);
|
|
261
|
+
|
|
262
|
+
return engine_ != nullptr;
|
|
263
|
+
};
|
|
264
|
+
|
|
265
|
+
// Try requested backend first (e.g. gpu/gpu)
|
|
266
|
+
const char* primaryBackend = backendStr(backend_);
|
|
267
|
+
if (!tryCreateEngine(primaryBackend, primaryBackend)) {
|
|
268
|
+
// Fallback chain for when the primary backend fails:
|
|
269
|
+
bool fallbackOk = false;
|
|
270
|
+
if (backend_ != Backend::CPU) {
|
|
271
|
+
// 1) Try CPU main + GPU vision (model's vision encoder often requires GPU)
|
|
272
|
+
fallbackOk = tryCreateEngine("cpu", "gpu");
|
|
273
|
+
// 2) Try CPU main + CPU vision
|
|
274
|
+
if (!fallbackOk) fallbackOk = tryCreateEngine("cpu", "cpu");
|
|
275
|
+
}
|
|
276
|
+
// 3) Try CPU main + no vision (nullptr skips vision executor entirely)
|
|
277
|
+
if (!fallbackOk) fallbackOk = tryCreateEngine("cpu", nullptr);
|
|
278
|
+
if (fallbackOk) {
|
|
279
|
+
backend_ = Backend::CPU;
|
|
280
|
+
}
|
|
134
281
|
}
|
|
135
|
-
|
|
282
|
+
|
|
283
|
+
if (!engine_) {
|
|
284
|
+
// Collect diagnostic info
|
|
285
|
+
std::string diag = " | Diagnostics: ";
|
|
286
|
+
struct stat st;
|
|
287
|
+
if (stat(modelPath.c_str(), &st) == 0) {
|
|
288
|
+
diag += "File size: " + std::to_string(st.st_size) + " bytes";
|
|
289
|
+
} else {
|
|
290
|
+
diag += "Failed to stat file (errno: " + std::to_string(errno) + ")";
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
FILE* f = fopen(modelPath.c_str(), "rb");
|
|
294
|
+
if (f) {
|
|
295
|
+
diag += ", Readable: YES";
|
|
296
|
+
fclose(f);
|
|
297
|
+
} else {
|
|
298
|
+
diag += ", Readable: NO (errno: " + std::to_string(errno) + ")";
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
// Get the native error from the C API
|
|
302
|
+
const char* nativeErr = litert_lm_get_last_error();
|
|
303
|
+
if (nativeErr && nativeErr[0] != '\0') {
|
|
304
|
+
diag += " | Native error: " + std::string(nativeErr);
|
|
305
|
+
}
|
|
136
306
|
|
|
137
|
-
|
|
138
|
-
|
|
307
|
+
throw std::runtime_error(
|
|
308
|
+
"Failed to create LiteRT-LM engine. Tried backend '" +
|
|
309
|
+
std::string(primaryBackend) + "' and CPU fallback. Model path: " + modelPath + diag);
|
|
310
|
+
}
|
|
139
311
|
|
|
140
|
-
|
|
312
|
+
session_config_ = litert_lm_session_config_create();
|
|
313
|
+
if (session_config_) {
|
|
314
|
+
litert_lm_session_config_set_max_output_tokens(session_config_, static_cast<int>(maxTokens_));
|
|
315
|
+
|
|
316
|
+
LiteRtLmSamplerParams sampler{};
|
|
317
|
+
sampler.type = kTopP;
|
|
318
|
+
sampler.top_k = static_cast<int32_t>(topK_);
|
|
319
|
+
sampler.top_p = static_cast<float>(topP_);
|
|
320
|
+
sampler.temperature = static_cast<float>(temperature_);
|
|
321
|
+
sampler.seed = 0;
|
|
322
|
+
litert_lm_session_config_set_sampler_params(session_config_, &sampler);
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
createNewConversation();
|
|
326
|
+
#endif
|
|
141
327
|
|
|
142
328
|
isLoaded_ = true;
|
|
143
329
|
history_.clear();
|
|
144
|
-
|
|
145
|
-
// Reset stats
|
|
146
330
|
lastStats_ = GenerationStats{0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
|
|
147
331
|
}
|
|
148
332
|
|
|
149
|
-
|
|
150
|
-
// sendMessage
|
|
151
|
-
|
|
152
|
-
|
|
333
|
+
// =============================================================================
|
|
334
|
+
// sendMessage — Blocking text inference
|
|
335
|
+
// =============================================================================
|
|
336
|
+
|
|
337
|
+
std::shared_ptr<Promise<std::string>> HybridLiteRTLM::sendMessage(const std::string& message) {
|
|
338
|
+
return Promise<std::string>::async([this, message]() -> std::string {
|
|
339
|
+
return sendMessageInternal(message);
|
|
340
|
+
});
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
std::string HybridLiteRTLM::sendMessageInternal(const std::string& message) {
|
|
153
344
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
154
345
|
ensureLoaded();
|
|
155
346
|
|
|
156
|
-
auto startTime = std::chrono::
|
|
157
|
-
|
|
158
|
-
// Add user message to history
|
|
159
|
-
Message userMessage;
|
|
160
|
-
userMessage.role = Role::USER;
|
|
161
|
-
userMessage.content = message;
|
|
162
|
-
history_.push_back(userMessage);
|
|
347
|
+
auto startTime = std::chrono::steady_clock::now();
|
|
348
|
+
std::string result;
|
|
163
349
|
|
|
164
|
-
|
|
350
|
+
#ifdef __APPLE__
|
|
351
|
+
std::string msgJson = buildTextMessageJson(message);
|
|
165
352
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
// The Conversation API expects a structured input
|
|
169
|
-
litert::lm::UserMessage lm_message;
|
|
170
|
-
lm_message.role = "user";
|
|
171
|
-
lm_message.content = message;
|
|
353
|
+
auto* response = litert_lm_conversation_send_message(
|
|
354
|
+
conversation_, msgJson.c_str(), nullptr);
|
|
172
355
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
// Remove the user message we just added since inference failed
|
|
176
|
-
history_.pop_back();
|
|
177
|
-
throw std::runtime_error("Inference failed: " +
|
|
178
|
-
std::string(response.status().message()));
|
|
356
|
+
if (!response) {
|
|
357
|
+
throw std::runtime_error("LiteRT-LM: sendMessage failed");
|
|
179
358
|
}
|
|
180
359
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
360
|
+
const char* responseStr = litert_lm_json_response_get_string(response);
|
|
361
|
+
if (responseStr) {
|
|
362
|
+
result = extractTextFromResponse(std::string(responseStr));
|
|
363
|
+
}
|
|
364
|
+
litert_lm_json_response_delete(response);
|
|
365
|
+
|
|
366
|
+
auto* benchInfo = litert_lm_conversation_get_benchmark_info(conversation_);
|
|
367
|
+
if (benchInfo) {
|
|
368
|
+
int numDecodeTurns = litert_lm_benchmark_info_get_num_decode_turns(benchInfo);
|
|
369
|
+
if (numDecodeTurns > 0) {
|
|
370
|
+
int lastIdx = numDecodeTurns - 1;
|
|
371
|
+
lastStats_.tokensPerSecond = litert_lm_benchmark_info_get_decode_tokens_per_sec_at(benchInfo, lastIdx);
|
|
372
|
+
lastStats_.completionTokens = static_cast<double>(
|
|
373
|
+
litert_lm_benchmark_info_get_decode_token_count_at(benchInfo, lastIdx));
|
|
374
|
+
}
|
|
375
|
+
lastStats_.timeToFirstToken = litert_lm_benchmark_info_get_time_to_first_token(benchInfo);
|
|
376
|
+
litert_lm_benchmark_info_delete(benchInfo);
|
|
194
377
|
}
|
|
195
|
-
|
|
196
378
|
#else
|
|
197
|
-
//
|
|
198
|
-
|
|
199
|
-
"Real inference will be available when LiteRT-LM libraries are integrated. "
|
|
200
|
-
"You said: " + message;
|
|
201
|
-
|
|
202
|
-
auto endTime = std::chrono::high_resolution_clock::now();
|
|
203
|
-
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(endTime - startTime).count();
|
|
204
|
-
|
|
205
|
-
// Estimate stats for stub
|
|
206
|
-
lastStats_.promptTokens = static_cast<double>(message.length() / 4);
|
|
207
|
-
lastStats_.completionTokens = static_cast<double>(responseText.length() / 4);
|
|
208
|
-
lastStats_.totalTokens = lastStats_.promptTokens + lastStats_.completionTokens;
|
|
209
|
-
lastStats_.totalTime = static_cast<double>(duration);
|
|
210
|
-
lastStats_.timeToFirstToken = lastStats_.totalTime / 2;
|
|
211
|
-
lastStats_.tokensPerSecond = (lastStats_.totalTime > 0)
|
|
212
|
-
? lastStats_.completionTokens / (lastStats_.totalTime / 1000.0)
|
|
213
|
-
: 0;
|
|
379
|
+
// Non-Apple stub
|
|
380
|
+
result = "[iOS only] LiteRT-LM inference not available on this platform.";
|
|
214
381
|
#endif
|
|
215
382
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
383
|
+
auto endTime = std::chrono::steady_clock::now();
|
|
384
|
+
double latencyMs = std::chrono::duration<double, std::milli>(endTime - startTime).count();
|
|
385
|
+
lastStats_.totalTime = latencyMs / 1000.0;
|
|
386
|
+
|
|
387
|
+
// Update history
|
|
388
|
+
history_.push_back(Message{Role::USER, message});
|
|
389
|
+
history_.push_back(Message{Role::MODEL, result});
|
|
390
|
+
|
|
391
|
+
return result;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
// =============================================================================
|
|
395
|
+
// sendMessageAsync — Streaming text inference
|
|
396
|
+
// =============================================================================
|
|
397
|
+
|
|
398
|
+
void HybridLiteRTLM::streamCallbackFn(void* callback_data, const char* chunk,
|
|
399
|
+
bool is_final, const char* error_msg) {
|
|
400
|
+
auto* ctx = static_cast<StreamContext*>(callback_data);
|
|
401
|
+
|
|
402
|
+
if (error_msg) {
|
|
403
|
+
// Error occurred — notify JS and clean up
|
|
404
|
+
ctx->onToken(std::string("Error: ") + error_msg, true);
|
|
405
|
+
delete ctx;
|
|
406
|
+
return;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
if (is_final) {
|
|
410
|
+
// Calculate stats
|
|
411
|
+
auto endTime = std::chrono::steady_clock::now();
|
|
412
|
+
double durationMs = std::chrono::duration<double, std::milli>(endTime - ctx->startTime).count();
|
|
413
|
+
|
|
414
|
+
if (ctx->lastStats && ctx->tokenCount > 0) {
|
|
415
|
+
ctx->lastStats->completionTokens = static_cast<double>(ctx->tokenCount);
|
|
416
|
+
ctx->lastStats->totalTime = durationMs / 1000.0;
|
|
417
|
+
ctx->lastStats->tokensPerSecond = (ctx->tokenCount / durationMs) * 1000.0;
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
// Update history (thread-safe)
|
|
421
|
+
{
|
|
422
|
+
std::lock_guard<std::mutex> lock(*ctx->historyMutex);
|
|
423
|
+
ctx->history->push_back(Message{Role::USER, ctx->userMessage});
|
|
424
|
+
ctx->history->push_back(Message{Role::MODEL, ctx->fullResponse});
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
ctx->onToken("", true);
|
|
428
|
+
delete ctx;
|
|
429
|
+
return;
|
|
430
|
+
}
|
|
221
431
|
|
|
222
|
-
|
|
432
|
+
if (chunk) {
|
|
433
|
+
std::string token(chunk);
|
|
434
|
+
ctx->fullResponse += token;
|
|
435
|
+
ctx->tokenCount++;
|
|
436
|
+
ctx->onToken(token, false);
|
|
437
|
+
}
|
|
223
438
|
}
|
|
224
439
|
|
|
225
|
-
|
|
226
|
-
// sendMessageWithImage - Multimodal image + text
|
|
227
|
-
//------------------------------------------------------------------------------
|
|
228
|
-
std::string HybridLiteRTLM::sendMessageWithImage(
|
|
440
|
+
void HybridLiteRTLM::sendMessageAsync(
|
|
229
441
|
const std::string& message,
|
|
230
|
-
const std::string
|
|
442
|
+
const std::function<void(const std::string&, bool)>& onToken) {
|
|
443
|
+
|
|
444
|
+
// Copy values for the background thread (avoid use-after-free)
|
|
445
|
+
auto onTokenCopy = onToken;
|
|
446
|
+
auto messageCopy = message;
|
|
447
|
+
|
|
448
|
+
// Capture shared state safely
|
|
449
|
+
auto* ctx = new StreamContext();
|
|
450
|
+
ctx->onToken = std::move(onTokenCopy);
|
|
451
|
+
ctx->fullResponse = "";
|
|
452
|
+
ctx->history = &history_;
|
|
453
|
+
ctx->historyMutex = &mutex_;
|
|
454
|
+
ctx->userMessage = messageCopy;
|
|
455
|
+
ctx->lastStats = &lastStats_;
|
|
456
|
+
ctx->startTime = std::chrono::steady_clock::now();
|
|
457
|
+
ctx->tokenCount = 0;
|
|
231
458
|
|
|
232
|
-
|
|
459
|
+
#ifdef __APPLE__
|
|
233
460
|
ensureLoaded();
|
|
234
461
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
int
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
462
|
+
std::string msgJson = buildTextMessageJson(messageCopy);
|
|
463
|
+
|
|
464
|
+
int result = litert_lm_conversation_send_message_stream(
|
|
465
|
+
conversation_, msgJson.c_str(), nullptr,
|
|
466
|
+
streamCallbackFn, ctx);
|
|
467
|
+
|
|
468
|
+
if (result != 0) {
|
|
469
|
+
delete ctx;
|
|
470
|
+
throw std::runtime_error("LiteRT-LM: Failed to start streaming inference");
|
|
241
471
|
}
|
|
472
|
+
#else
|
|
473
|
+
// Non-Apple stub
|
|
474
|
+
ctx->onToken("[iOS only] Streaming not available on this platform.", true);
|
|
475
|
+
delete ctx;
|
|
476
|
+
#endif
|
|
477
|
+
}
|
|
242
478
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
litert::lm::UserMessage lm_message;
|
|
247
|
-
lm_message.role = "user";
|
|
248
|
-
|
|
249
|
-
// Construct multimodal content
|
|
250
|
-
// Option A: If UserMessage supports a list of content parts
|
|
251
|
-
litert::lm::ContentPart textPart;
|
|
252
|
-
textPart.type = litert::lm::ContentType::TEXT;
|
|
253
|
-
textPart.text = message;
|
|
254
|
-
lm_message.parts.push_back(textPart);
|
|
479
|
+
// =============================================================================
|
|
480
|
+
// sendMessageWithImage — Multimodal (vision)
|
|
481
|
+
// =============================================================================
|
|
255
482
|
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
stbi_image_free(img);
|
|
483
|
+
std::shared_ptr<Promise<std::string>> HybridLiteRTLM::sendMessageWithImage(
|
|
484
|
+
const std::string& message,
|
|
485
|
+
const std::string& imagePath) {
|
|
486
|
+
return Promise<std::string>::async([this, message, imagePath]() -> std::string {
|
|
487
|
+
return sendMessageWithImageInternal(message, imagePath);
|
|
488
|
+
});
|
|
489
|
+
}
|
|
265
490
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
491
|
+
std::string HybridLiteRTLM::sendMessageWithImageInternal(
|
|
492
|
+
const std::string& message,
|
|
493
|
+
const std::string& imagePath) {
|
|
494
|
+
|
|
495
|
+
std::lock_guard<std::mutex> lock(mutex_);
|
|
496
|
+
ensureLoaded();
|
|
497
|
+
|
|
498
|
+
auto startTime = std::chrono::steady_clock::now();
|
|
499
|
+
std::string result;
|
|
500
|
+
|
|
501
|
+
#ifdef __APPLE__
|
|
502
|
+
// Verify image exists
|
|
503
|
+
std::ifstream imageFile(imagePath);
|
|
504
|
+
if (!imageFile.good()) {
|
|
505
|
+
throw std::runtime_error("Image file not found: " + imagePath);
|
|
270
506
|
}
|
|
507
|
+
imageFile.close();
|
|
271
508
|
|
|
272
|
-
//
|
|
273
|
-
|
|
274
|
-
userMessage.role = Role::USER;
|
|
275
|
-
userMessage.content = message + " [Image]";
|
|
276
|
-
history_.push_back(userMessage);
|
|
509
|
+
// Build multimodal message JSON — the C API handles image preprocessing
|
|
510
|
+
std::string msgJson = buildImageMessageJson(message, imagePath);
|
|
277
511
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
modelMessage.content = response->content;
|
|
281
|
-
history_.push_back(modelMessage);
|
|
512
|
+
auto* response = litert_lm_conversation_send_message(
|
|
513
|
+
conversation_, msgJson.c_str(), nullptr);
|
|
282
514
|
|
|
283
|
-
|
|
515
|
+
if (!response) {
|
|
516
|
+
std::string errMsg = "LiteRT-LM: sendMessageWithImage failed";
|
|
517
|
+
const char* nativeErr = litert_lm_get_last_error();
|
|
518
|
+
if (nativeErr && nativeErr[0] != '\0') {
|
|
519
|
+
errMsg += ": " + std::string(nativeErr);
|
|
520
|
+
}
|
|
521
|
+
throw std::runtime_error(errMsg);
|
|
522
|
+
}
|
|
284
523
|
|
|
524
|
+
const char* responseStr = litert_lm_json_response_get_string(response);
|
|
525
|
+
if (responseStr) {
|
|
526
|
+
result = extractTextFromResponse(std::string(responseStr));
|
|
527
|
+
}
|
|
528
|
+
litert_lm_json_response_delete(response);
|
|
285
529
|
#else
|
|
286
|
-
|
|
287
|
-
throw std::runtime_error(
|
|
288
|
-
"sendMessageWithImage is not supported on iOS. "
|
|
289
|
-
"LiteRT-LM iOS SDK is not yet available. "
|
|
290
|
-
"Please use text-only sendMessage() for now.");
|
|
530
|
+
result = "[iOS only] Vision inference not available on this platform.";
|
|
291
531
|
#endif
|
|
532
|
+
|
|
533
|
+
auto endTime = std::chrono::steady_clock::now();
|
|
534
|
+
lastStats_.totalTime = std::chrono::duration<double>(endTime - startTime).count();
|
|
535
|
+
|
|
536
|
+
history_.push_back(Message{Role::USER, message + " [image: " + imagePath + "]"});
|
|
537
|
+
history_.push_back(Message{Role::MODEL, result});
|
|
538
|
+
|
|
539
|
+
return result;
|
|
292
540
|
}
|
|
293
541
|
|
|
294
|
-
|
|
295
|
-
|
|
542
|
+
// =============================================================================
|
|
543
|
+
// sendMessageWithAudio — Multimodal (audio)
|
|
544
|
+
// =============================================================================
|
|
296
545
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
std::
|
|
301
|
-
|
|
302
|
-
const std::string& fileName,
|
|
303
|
-
const std::optional<std::function<void(double)>>& onProgress) {
|
|
304
|
-
|
|
305
|
-
// Return a future that throws an exception
|
|
306
|
-
return std::async(std::launch::async, []() -> std::string {
|
|
307
|
-
throw std::runtime_error(
|
|
308
|
-
"downloadModel is not supported on iOS yet. "
|
|
309
|
-
"Please download the model manually using a separate library."
|
|
310
|
-
);
|
|
546
|
+
std::shared_ptr<Promise<std::string>> HybridLiteRTLM::sendMessageWithAudio(
|
|
547
|
+
const std::string& message,
|
|
548
|
+
const std::string& audioPath) {
|
|
549
|
+
return Promise<std::string>::async([this, message, audioPath]() -> std::string {
|
|
550
|
+
return sendMessageWithAudioInternal(message, audioPath);
|
|
311
551
|
});
|
|
312
552
|
}
|
|
313
553
|
|
|
314
|
-
|
|
315
|
-
// sendMessageWithAudio - Multimodal audio + text
|
|
316
|
-
//------------------------------------------------------------------------------
|
|
317
|
-
std::string HybridLiteRTLM::sendMessageWithAudio(
|
|
554
|
+
std::string HybridLiteRTLM::sendMessageWithAudioInternal(
|
|
318
555
|
const std::string& message,
|
|
319
556
|
const std::string& audioPath) {
|
|
320
557
|
|
|
321
558
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
322
559
|
ensureLoaded();
|
|
323
560
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
561
|
+
auto startTime = std::chrono::steady_clock::now();
|
|
562
|
+
std::string result;
|
|
563
|
+
|
|
564
|
+
#ifdef __APPLE__
|
|
565
|
+
std::ifstream audioFile(audioPath);
|
|
566
|
+
if (!audioFile.good()) {
|
|
567
|
+
throw std::runtime_error("Audio file not found: " + audioPath);
|
|
329
568
|
}
|
|
569
|
+
audioFile.close();
|
|
330
570
|
|
|
331
|
-
|
|
332
|
-
// Ideally use a WAV parsing library or miniaudio if available.
|
|
333
|
-
// For this implementation, we read the whole file.
|
|
334
|
-
std::vector<uint8_t> audioData((std::istreambuf_iterator<char>(audioFile)), std::istreambuf_iterator<char>());
|
|
571
|
+
std::string msgJson = buildAudioMessageJson(message, audioPath);
|
|
335
572
|
|
|
336
|
-
|
|
337
|
-
|
|
573
|
+
auto* response = litert_lm_conversation_send_message(
|
|
574
|
+
conversation_, msgJson.c_str(), nullptr);
|
|
338
575
|
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
textPart.text = message;
|
|
342
|
-
lm_message.parts.push_back(textPart);
|
|
343
|
-
|
|
344
|
-
litert::lm::ContentPart audioPart;
|
|
345
|
-
audioPart.type = litert::lm::ContentType::AUDIO;
|
|
346
|
-
audioPart.audio.data = audioData;
|
|
347
|
-
// Metadata like sample rate might be needed:
|
|
348
|
-
// audioPart.audio.sample_rate = 16000;
|
|
349
|
-
lm_message.parts.push_back(audioPart);
|
|
350
|
-
|
|
351
|
-
auto response = conversation_->SendMessage(lm_message);
|
|
352
|
-
if (!response.ok()) {
|
|
353
|
-
throw std::runtime_error("Audio inference failed: " +
|
|
354
|
-
std::string(response.status().message()));
|
|
576
|
+
if (!response) {
|
|
577
|
+
throw std::runtime_error("LiteRT-LM: sendMessageWithAudio failed");
|
|
355
578
|
}
|
|
356
579
|
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
580
|
+
const char* responseStr = litert_lm_json_response_get_string(response);
|
|
581
|
+
if (responseStr) {
|
|
582
|
+
result = extractTextFromResponse(std::string(responseStr));
|
|
583
|
+
}
|
|
584
|
+
litert_lm_json_response_delete(response);
|
|
585
|
+
#else
|
|
586
|
+
result = "[iOS only] Audio inference not available on this platform.";
|
|
587
|
+
#endif
|
|
361
588
|
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
modelMessage.content = response->content;
|
|
365
|
-
history_.push_back(modelMessage);
|
|
589
|
+
auto endTime = std::chrono::steady_clock::now();
|
|
590
|
+
lastStats_.totalTime = std::chrono::duration<double>(endTime - startTime).count();
|
|
366
591
|
|
|
367
|
-
|
|
592
|
+
history_.push_back(Message{Role::USER, message + " [audio: " + audioPath + "]"});
|
|
593
|
+
history_.push_back(Message{Role::MODEL, result});
|
|
368
594
|
|
|
595
|
+
return result;
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
// =============================================================================
|
|
599
|
+
// downloadModel — Download model from URL
|
|
600
|
+
// =============================================================================
|
|
601
|
+
|
|
602
|
+
std::shared_ptr<Promise<std::string>> HybridLiteRTLM::downloadModel(
|
|
603
|
+
const std::string& url,
|
|
604
|
+
const std::string& fileName,
|
|
605
|
+
const std::optional<std::function<void(double)>>& onProgress) {
|
|
606
|
+
return Promise<std::string>::async([url, fileName, onProgress]() -> std::string {
|
|
607
|
+
#ifdef __APPLE__
|
|
608
|
+
return litert_lm::downloadModelFile(url, fileName, onProgress);
|
|
369
609
|
#else
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
"
|
|
610
|
+
std::string destPath = "/tmp/" + fileName;
|
|
611
|
+
std::string curlCmd = "curl -L -o \"" + destPath + "\" \"" + url + "\"";
|
|
612
|
+
int result = system(curlCmd.c_str());
|
|
613
|
+
if (result != 0) {
|
|
614
|
+
throw std::runtime_error("Failed to download model from: " + url);
|
|
615
|
+
}
|
|
616
|
+
if (onProgress.has_value()) {
|
|
617
|
+
onProgress.value()(1.0);
|
|
618
|
+
}
|
|
619
|
+
return destPath;
|
|
375
620
|
#endif
|
|
621
|
+
});
|
|
376
622
|
}
|
|
377
623
|
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
const
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
// to avoid blocking other operations. The callback may be invoked
|
|
387
|
-
// from a different thread depending on LiteRT-LM's implementation.
|
|
388
|
-
|
|
389
|
-
{
|
|
390
|
-
std::lock_guard<std::mutex> lock(mutex_);
|
|
391
|
-
ensureLoaded();
|
|
392
|
-
}
|
|
393
|
-
|
|
394
|
-
#ifdef LITERT_LM_ENABLED
|
|
395
|
-
// Add user message to history before starting
|
|
396
|
-
{
|
|
397
|
-
std::lock_guard<std::mutex> lock(mutex_);
|
|
398
|
-
Message userMessage;
|
|
399
|
-
userMessage.role = Role::USER;
|
|
400
|
-
userMessage.content = message;
|
|
401
|
-
history_.push_back(userMessage);
|
|
402
|
-
}
|
|
403
|
-
|
|
404
|
-
litert::lm::UserMessage lm_message;
|
|
405
|
-
lm_message.role = "user";
|
|
406
|
-
lm_message.content = message;
|
|
407
|
-
|
|
408
|
-
std::string fullResponse;
|
|
409
|
-
|
|
410
|
-
// The callback needs to be carefully managed for thread safety
|
|
411
|
-
auto status = conversation_->SendMessageAsync(
|
|
412
|
-
lm_message,
|
|
413
|
-
[this, &onToken, &fullResponse](const std::string& token, bool isDone) {
|
|
414
|
-
fullResponse += token;
|
|
415
|
-
|
|
416
|
-
// Invoke the JS callback (Nitro handles thread marshalling)
|
|
417
|
-
onToken(token, isDone);
|
|
418
|
-
|
|
419
|
-
if (isDone) {
|
|
420
|
-
// Add complete response to history
|
|
421
|
-
std::lock_guard<std::mutex> lock(mutex_);
|
|
422
|
-
Message modelMessage;
|
|
423
|
-
modelMessage.role = Role::MODEL;
|
|
424
|
-
modelMessage.content = fullResponse;
|
|
425
|
-
history_.push_back(modelMessage);
|
|
426
|
-
}
|
|
427
|
-
}
|
|
428
|
-
);
|
|
429
|
-
|
|
430
|
-
if (!status.ok()) {
|
|
431
|
-
// Remove user message since inference failed
|
|
432
|
-
std::lock_guard<std::mutex> lock(mutex_);
|
|
433
|
-
if (!history_.empty()) {
|
|
434
|
-
history_.pop_back();
|
|
624
|
+
std::shared_ptr<Promise<void>> HybridLiteRTLM::deleteModel(const std::string& fileName) {
|
|
625
|
+
return Promise<void>::async([fileName]() {
|
|
626
|
+
std::string path;
|
|
627
|
+
#ifdef __APPLE__
|
|
628
|
+
// Match the path used by IOSDownloadHelper: ~/Library/Caches/litert_models/
|
|
629
|
+
const char* home = getenv("HOME");
|
|
630
|
+
if (home) {
|
|
631
|
+
path = std::string(home) + "/Library/Caches/litert_models/" + fileName;
|
|
435
632
|
}
|
|
436
|
-
throw std::runtime_error("Async inference failed: " +
|
|
437
|
-
std::string(status.message()));
|
|
438
|
-
}
|
|
439
|
-
|
|
440
633
|
#else
|
|
441
|
-
|
|
442
|
-
std::string fullResponse;
|
|
443
|
-
{
|
|
444
|
-
std::lock_guard<std::mutex> lock(mutex_);
|
|
445
|
-
|
|
446
|
-
// Add user message
|
|
447
|
-
Message userMessage;
|
|
448
|
-
userMessage.role = Role::USER;
|
|
449
|
-
userMessage.content = message;
|
|
450
|
-
history_.push_back(userMessage);
|
|
451
|
-
|
|
452
|
-
fullResponse = "[LiteRT-LM Stub] Streaming response placeholder. You said: " + message;
|
|
453
|
-
}
|
|
454
|
-
|
|
455
|
-
// Simulate token-by-token streaming
|
|
456
|
-
std::string currentWord;
|
|
457
|
-
for (size_t i = 0; i < fullResponse.length(); i++) {
|
|
458
|
-
char c = fullResponse[i];
|
|
459
|
-
currentWord += c;
|
|
460
|
-
|
|
461
|
-
if (c == ' ' || c == '\n' || i == fullResponse.length() - 1) {
|
|
462
|
-
bool isDone = (i == fullResponse.length() - 1);
|
|
463
|
-
onToken(currentWord, isDone);
|
|
464
|
-
currentWord.clear();
|
|
465
|
-
}
|
|
466
|
-
}
|
|
467
|
-
|
|
468
|
-
// Add model response to history
|
|
469
|
-
{
|
|
470
|
-
std::lock_guard<std::mutex> lock(mutex_);
|
|
471
|
-
Message modelMessage;
|
|
472
|
-
modelMessage.role = Role::MODEL;
|
|
473
|
-
modelMessage.content = fullResponse;
|
|
474
|
-
history_.push_back(modelMessage);
|
|
475
|
-
}
|
|
634
|
+
path = "/tmp/" + fileName;
|
|
476
635
|
#endif
|
|
636
|
+
if (!path.empty()) {
|
|
637
|
+
std::remove(path.c_str());
|
|
638
|
+
}
|
|
639
|
+
});
|
|
477
640
|
}
|
|
478
641
|
|
|
479
|
-
|
|
480
|
-
// getHistory
|
|
481
|
-
|
|
642
|
+
// =============================================================================
|
|
643
|
+
// getHistory
|
|
644
|
+
// =============================================================================
|
|
645
|
+
|
|
482
646
|
std::vector<Message> HybridLiteRTLM::getHistory() {
|
|
483
647
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
484
648
|
return history_;
|
|
485
649
|
}
|
|
486
650
|
|
|
487
|
-
|
|
488
|
-
// resetConversation
|
|
489
|
-
|
|
651
|
+
// =============================================================================
|
|
652
|
+
// resetConversation
|
|
653
|
+
// =============================================================================
|
|
654
|
+
|
|
490
655
|
void HybridLiteRTLM::resetConversation() {
|
|
491
656
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
492
657
|
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
658
|
+
history_.clear();
|
|
659
|
+
lastStats_ = GenerationStats{0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
|
|
660
|
+
|
|
661
|
+
#ifdef __APPLE__
|
|
662
|
+
if (isLoaded_ && engine_) {
|
|
498
663
|
createNewConversation();
|
|
499
664
|
}
|
|
500
665
|
#endif
|
|
501
|
-
|
|
502
|
-
history_.clear();
|
|
503
666
|
}
|
|
504
667
|
|
|
505
|
-
|
|
506
|
-
// isReady
|
|
507
|
-
|
|
668
|
+
// =============================================================================
|
|
669
|
+
// isReady
|
|
670
|
+
// =============================================================================
|
|
671
|
+
|
|
508
672
|
bool HybridLiteRTLM::isReady() {
|
|
509
673
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
510
674
|
return isLoaded_;
|
|
511
675
|
}
|
|
512
676
|
|
|
513
|
-
|
|
514
|
-
// getStats
|
|
515
|
-
|
|
677
|
+
// =============================================================================
|
|
678
|
+
// getStats
|
|
679
|
+
// =============================================================================
|
|
680
|
+
|
|
516
681
|
GenerationStats HybridLiteRTLM::getStats() {
|
|
517
682
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
518
683
|
return lastStats_;
|
|
519
684
|
}
|
|
520
685
|
|
|
521
|
-
|
|
522
|
-
// getMemoryUsage
|
|
523
|
-
|
|
686
|
+
// =============================================================================
|
|
687
|
+
// getMemoryUsage — Uses Mach APIs for iOS process memory
|
|
688
|
+
// =============================================================================
|
|
689
|
+
|
|
524
690
|
MemoryUsage HybridLiteRTLM::getMemoryUsage() {
|
|
525
|
-
double
|
|
526
|
-
double
|
|
527
|
-
double
|
|
691
|
+
double usedMemoryBytes = 0;
|
|
692
|
+
double totalMemoryBytes = 0;
|
|
693
|
+
double availableBytes = 0;
|
|
528
694
|
bool isLowMemory = false;
|
|
529
|
-
|
|
695
|
+
|
|
530
696
|
#ifdef __APPLE__
|
|
531
|
-
// Get process memory
|
|
532
|
-
struct mach_task_basic_info
|
|
533
|
-
mach_msg_type_number_t
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
if (host_statistics64(mach_host_self(), HOST_VM_INFO64,
|
|
543
|
-
(host_info64_t)&vmStats, &vmCount) == KERN_SUCCESS) {
|
|
544
|
-
vm_size_t pageSize;
|
|
545
|
-
host_page_size(mach_host_self(), &pageSize);
|
|
546
|
-
availableMemoryBytes = static_cast<double>(vmStats.free_count) * pageSize;
|
|
547
|
-
// Consider low memory if free pages < 10% of total active+inactive+free
|
|
548
|
-
uint64_t totalPages = vmStats.active_count + vmStats.inactive_count + vmStats.free_count;
|
|
549
|
-
isLowMemory = (totalPages > 0) &&
|
|
550
|
-
(static_cast<double>(vmStats.free_count) / totalPages < 0.1);
|
|
551
|
-
}
|
|
552
|
-
|
|
553
|
-
// malloc_size is per-allocation; use resident_size as native heap proxy
|
|
554
|
-
nativeHeapBytes = residentBytes;
|
|
555
|
-
#endif
|
|
556
|
-
|
|
557
|
-
#ifdef __ANDROID__
|
|
558
|
-
// Parse /proc/self/status for VmRSS (resident set size)
|
|
559
|
-
std::ifstream statusFile("/proc/self/status");
|
|
560
|
-
if (statusFile.is_open()) {
|
|
561
|
-
std::string line;
|
|
562
|
-
while (std::getline(statusFile, line)) {
|
|
563
|
-
if (line.rfind("VmRSS:", 0) == 0) {
|
|
564
|
-
// Format: "VmRSS: 123456 kB"
|
|
565
|
-
std::istringstream iss(line.substr(6));
|
|
566
|
-
double kbValue = 0;
|
|
567
|
-
iss >> kbValue;
|
|
568
|
-
residentBytes = kbValue * 1024.0;
|
|
569
|
-
break;
|
|
570
|
-
}
|
|
571
|
-
}
|
|
697
|
+
// Get app process memory (resident set size)
|
|
698
|
+
struct mach_task_basic_info info;
|
|
699
|
+
mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT;
|
|
700
|
+
|
|
701
|
+
kern_return_t kr = task_info(mach_task_self(),
|
|
702
|
+
MACH_TASK_BASIC_INFO,
|
|
703
|
+
(task_info_t)&info,
|
|
704
|
+
&count);
|
|
705
|
+
|
|
706
|
+
if (kr == KERN_SUCCESS) {
|
|
707
|
+
usedMemoryBytes = static_cast<double>(info.resident_size);
|
|
572
708
|
}
|
|
573
|
-
|
|
574
|
-
//
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
std::istringstream iss(line.substr(13));
|
|
585
|
-
double kbValue = 0;
|
|
586
|
-
iss >> kbValue;
|
|
587
|
-
availableMemoryBytes = kbValue * 1024.0;
|
|
588
|
-
break;
|
|
589
|
-
}
|
|
590
|
-
}
|
|
709
|
+
|
|
710
|
+
// Get total physical memory
|
|
711
|
+
mach_port_t host_port = mach_host_self();
|
|
712
|
+
struct host_basic_info hostInfo;
|
|
713
|
+
mach_msg_type_number_t hostCount = HOST_BASIC_INFO_COUNT;
|
|
714
|
+
|
|
715
|
+
kr = host_info(host_port, HOST_BASIC_INFO,
|
|
716
|
+
(host_info_t)&hostInfo, &hostCount);
|
|
717
|
+
|
|
718
|
+
if (kr == KERN_SUCCESS) {
|
|
719
|
+
totalMemoryBytes = static_cast<double>(hostInfo.max_mem);
|
|
591
720
|
}
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
721
|
+
|
|
722
|
+
availableBytes = totalMemoryBytes - usedMemoryBytes;
|
|
723
|
+
if (availableBytes < 0) availableBytes = 0;
|
|
724
|
+
|
|
725
|
+
// Low memory threshold (~200MB available)
|
|
726
|
+
isLowMemory = (totalMemoryBytes > 0) && (availableBytes < 200.0 * 1024.0 * 1024.0);
|
|
595
727
|
#endif
|
|
596
|
-
|
|
597
|
-
return MemoryUsage{
|
|
728
|
+
|
|
729
|
+
return MemoryUsage{
|
|
730
|
+
usedMemoryBytes, // nativeHeapBytes
|
|
731
|
+
usedMemoryBytes, // residentBytes
|
|
732
|
+
availableBytes, // availableMemoryBytes
|
|
733
|
+
isLowMemory // isLowMemory
|
|
734
|
+
};
|
|
598
735
|
}
|
|
599
736
|
|
|
600
|
-
|
|
601
|
-
// close
|
|
602
|
-
|
|
737
|
+
// =============================================================================
|
|
738
|
+
// close — Clean up all LiteRT-LM resources
|
|
739
|
+
// =============================================================================
|
|
740
|
+
|
|
603
741
|
void HybridLiteRTLM::close() {
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
#ifdef LITERT_LM_ENABLED
|
|
607
|
-
// Release in reverse order of creation
|
|
608
|
-
conversation_.reset();
|
|
609
|
-
engine_.reset();
|
|
610
|
-
#endif
|
|
742
|
+
// Note: Don't lock here if called from destructor (mutex may be destroyed)
|
|
743
|
+
// The caller (loadModel, destructor) should handle locking.
|
|
611
744
|
|
|
612
745
|
isLoaded_ = false;
|
|
613
746
|
history_.clear();
|
|
747
|
+
|
|
748
|
+
#ifdef __APPLE__
|
|
749
|
+
if (conversation_) {
|
|
750
|
+
litert_lm_conversation_delete(conversation_);
|
|
751
|
+
conversation_ = nullptr;
|
|
752
|
+
}
|
|
753
|
+
if (conv_config_) {
|
|
754
|
+
litert_lm_conversation_config_delete(conv_config_);
|
|
755
|
+
conv_config_ = nullptr;
|
|
756
|
+
}
|
|
757
|
+
if (session_config_) {
|
|
758
|
+
litert_lm_session_config_delete(session_config_);
|
|
759
|
+
session_config_ = nullptr;
|
|
760
|
+
}
|
|
761
|
+
if (engine_) {
|
|
762
|
+
litert_lm_engine_delete(engine_);
|
|
763
|
+
engine_ = nullptr;
|
|
764
|
+
}
|
|
765
|
+
#endif
|
|
766
|
+
|
|
767
|
+
lastStats_ = GenerationStats{0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
|
|
614
768
|
}
|
|
615
769
|
|
|
616
770
|
} // namespace margelo::nitro::litertlm
|