react-native-litert-lm 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +331 -150
- package/android/build.gradle +1 -1
- package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +140 -37
- package/app.plugin.js +33 -0
- package/cpp/HybridLiteRTLM.cpp +577 -378
- package/cpp/HybridLiteRTLM.hpp +66 -23
- package/cpp/IOSDownloadHelper.h +24 -0
- package/cpp/cpp-adapter.cpp +10 -2
- package/cpp/include/litert_lm_engine.h +502 -0
- package/ios/IOSDownloadHelper.mm +129 -0
- package/ios/LiteRTLMAutolinking.mm +30 -0
- package/lib/hooks.d.ts +33 -3
- package/lib/hooks.js +54 -23
- package/lib/index.d.ts +4 -1
- package/lib/index.js +6 -6
- package/lib/memoryTracker.d.ts +128 -0
- package/lib/memoryTracker.js +155 -0
- package/lib/modelFactory.d.ts +21 -2
- package/lib/modelFactory.js +78 -11
- package/lib/specs/LiteRTLM.nitro.d.ts +19 -0
- package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +28 -18
- package/nitrogen/generated/android/LiteRTLMOnLoad.hpp +13 -4
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +39 -36
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +20 -22
- package/nitrogen/generated/android/c++/JMemoryUsage.hpp +69 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +19 -18
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MemoryUsage.kt +47 -0
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +1 -0
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +4 -0
- package/nitrogen/generated/shared/c++/MemoryUsage.hpp +95 -0
- package/package.json +12 -5
- package/react-native-litert-lm.podspec +20 -7
- package/scripts/build-ios-engine.sh +283 -0
- package/scripts/download-ios-frameworks.sh +72 -0
- package/scripts/postinstall.js +116 -0
- package/scripts/stubs/cxx_bridge_stubs.cc +224 -0
- package/scripts/stubs/gemma_model_constraint_provider.cc +46 -0
- package/scripts/stubs/llguidance_stubs.c +101 -0
- package/src/hooks.ts +107 -41
- package/src/index.ts +13 -6
- package/src/memoryTracker.ts +268 -0
- package/src/modelFactory.ts +107 -11
- package/src/specs/LiteRTLM.nitro.ts +21 -0
package/cpp/HybridLiteRTLM.cpp
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
// HybridLiteRTLM.cpp
|
|
3
3
|
// react-native-litert-lm
|
|
4
4
|
//
|
|
5
|
-
// High-performance LLM inference using LiteRT-LM.
|
|
5
|
+
// High-performance LLM inference using LiteRT-LM C API.
|
|
6
6
|
//
|
|
7
7
|
// NOTE: This C++ implementation is used for iOS ONLY.
|
|
8
8
|
// Android uses the Kotlin implementation in `android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt`.
|
|
@@ -11,80 +11,202 @@
|
|
|
11
11
|
|
|
12
12
|
#include "HybridLiteRTLM.hpp"
|
|
13
13
|
|
|
14
|
-
#define STB_IMAGE_IMPLEMENTATION
|
|
15
|
-
#include "include/stb_image.h"
|
|
16
14
|
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
#include <NitroModules/Promise.hpp>
|
|
17
18
|
#include <chrono>
|
|
18
19
|
#include <stdexcept>
|
|
19
20
|
#include <sstream>
|
|
21
|
+
|
|
22
|
+
#ifdef __APPLE__
|
|
23
|
+
#include "IOSDownloadHelper.h"
|
|
24
|
+
#endif
|
|
20
25
|
#include <fstream>
|
|
26
|
+
#include <thread>
|
|
27
|
+
#include <regex>
|
|
21
28
|
|
|
22
29
|
namespace margelo::nitro::litertlm {
|
|
23
30
|
|
|
24
|
-
|
|
25
|
-
//
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
31
|
+
// =============================================================================
|
|
32
|
+
// JSON Helpers
|
|
33
|
+
// =============================================================================
|
|
34
|
+
|
|
35
|
+
std::string HybridLiteRTLM::escapeJson(const std::string& input) {
|
|
36
|
+
std::string output;
|
|
37
|
+
output.reserve(input.size() + 16);
|
|
38
|
+
for (char c : input) {
|
|
39
|
+
switch (c) {
|
|
40
|
+
case '"': output += "\\\""; break;
|
|
41
|
+
case '\\': output += "\\\\"; break;
|
|
42
|
+
case '\n': output += "\\n"; break;
|
|
43
|
+
case '\r': output += "\\r"; break;
|
|
44
|
+
case '\t': output += "\\t"; break;
|
|
45
|
+
case '\b': output += "\\b"; break;
|
|
46
|
+
case '\f': output += "\\f"; break;
|
|
47
|
+
default: output += c; break;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return output;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
std::string HybridLiteRTLM::buildTextMessageJson(const std::string& text) {
|
|
54
|
+
return "{\"role\":\"user\",\"content\":\"" + escapeJson(text) + "\"}";
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
std::string HybridLiteRTLM::buildImageMessageJson(const std::string& text, const std::string& imagePath) {
|
|
58
|
+
return "{\"role\":\"user\",\"content\":["
|
|
59
|
+
"{\"type\":\"text\",\"text\":\"" + escapeJson(text) + "\"},"
|
|
60
|
+
"{\"type\":\"image\",\"path\":\"" + escapeJson(imagePath) + "\"}"
|
|
61
|
+
"]}";
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
std::string HybridLiteRTLM::buildAudioMessageJson(const std::string& text, const std::string& audioPath) {
|
|
65
|
+
return "{\"role\":\"user\",\"content\":["
|
|
66
|
+
"{\"type\":\"text\",\"text\":\"" + escapeJson(text) + "\"},"
|
|
67
|
+
"{\"type\":\"audio\",\"path\":\"" + escapeJson(audioPath) + "\"}"
|
|
68
|
+
"]}";
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
std::string HybridLiteRTLM::extractTextFromResponse(const std::string& jsonResponse) {
|
|
72
|
+
// The C API response JSON is structured as:
|
|
73
|
+
// {"role":"model","content":[{"type":"text","text":"..."}]}
|
|
74
|
+
// or:
|
|
75
|
+
// {"role":"model","content":"..."}
|
|
76
|
+
//
|
|
77
|
+
// We use simple string extraction to avoid a JSON library dependency.
|
|
78
|
+
|
|
79
|
+
// Try array format first: find "text":"..." after "type":"text"
|
|
80
|
+
std::string textMarker = "\"text\":\"";
|
|
81
|
+
size_t pos = jsonResponse.find("\"type\":\"text\"");
|
|
82
|
+
if (pos != std::string::npos) {
|
|
83
|
+
pos = jsonResponse.find(textMarker, pos);
|
|
84
|
+
if (pos != std::string::npos) {
|
|
85
|
+
pos += textMarker.length();
|
|
86
|
+
std::string result;
|
|
87
|
+
result.reserve(jsonResponse.size() - pos);
|
|
88
|
+
for (size_t i = pos; i < jsonResponse.size(); i++) {
|
|
89
|
+
if (jsonResponse[i] == '\\' && i + 1 < jsonResponse.size()) {
|
|
90
|
+
char next = jsonResponse[i + 1];
|
|
91
|
+
if (next == '"') { result += '"'; i++; }
|
|
92
|
+
else if (next == '\\') { result += '\\'; i++; }
|
|
93
|
+
else if (next == 'n') { result += '\n'; i++; }
|
|
94
|
+
else if (next == 'r') { result += '\r'; i++; }
|
|
95
|
+
else if (next == 't') { result += '\t'; i++; }
|
|
96
|
+
else { result += jsonResponse[i]; }
|
|
97
|
+
} else if (jsonResponse[i] == '"') {
|
|
98
|
+
break; // End of the text value
|
|
99
|
+
} else {
|
|
100
|
+
result += jsonResponse[i];
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
return result;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Try simple string format: "content":"..."
|
|
108
|
+
std::string contentMarker = "\"content\":\"";
|
|
109
|
+
pos = jsonResponse.find(contentMarker);
|
|
110
|
+
if (pos != std::string::npos) {
|
|
111
|
+
pos += contentMarker.length();
|
|
112
|
+
std::string result;
|
|
113
|
+
for (size_t i = pos; i < jsonResponse.size(); i++) {
|
|
114
|
+
if (jsonResponse[i] == '\\' && i + 1 < jsonResponse.size()) {
|
|
115
|
+
char next = jsonResponse[i + 1];
|
|
116
|
+
if (next == '"') { result += '"'; i++; }
|
|
117
|
+
else if (next == '\\') { result += '\\'; i++; }
|
|
118
|
+
else if (next == 'n') { result += '\n'; i++; }
|
|
119
|
+
else { result += jsonResponse[i]; }
|
|
120
|
+
} else if (jsonResponse[i] == '"') {
|
|
121
|
+
break;
|
|
122
|
+
} else {
|
|
123
|
+
result += jsonResponse[i];
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return result;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Fallback: return full response
|
|
130
|
+
return jsonResponse;
|
|
32
131
|
}
|
|
33
132
|
|
|
34
|
-
|
|
35
|
-
//
|
|
36
|
-
|
|
133
|
+
// =============================================================================
|
|
134
|
+
// Conversation Management
|
|
135
|
+
// =============================================================================
|
|
136
|
+
|
|
37
137
|
void HybridLiteRTLM::createNewConversation() {
|
|
38
|
-
#ifdef
|
|
138
|
+
#ifdef __APPLE__
|
|
39
139
|
if (!engine_) {
|
|
40
140
|
throw std::runtime_error("Cannot create conversation: engine not initialized");
|
|
41
141
|
}
|
|
42
142
|
|
|
43
|
-
|
|
44
|
-
if (
|
|
45
|
-
|
|
46
|
-
|
|
143
|
+
// Clean up previous conversation
|
|
144
|
+
if (conversation_) {
|
|
145
|
+
litert_lm_conversation_delete(conversation_);
|
|
146
|
+
conversation_ = nullptr;
|
|
147
|
+
}
|
|
148
|
+
if (conv_config_) {
|
|
149
|
+
litert_lm_conversation_config_delete(conv_config_);
|
|
150
|
+
conv_config_ = nullptr;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Build system message JSON if provided
|
|
154
|
+
std::string systemMsgJson;
|
|
155
|
+
const char* systemMsgPtr = nullptr;
|
|
156
|
+
if (!systemPrompt_.empty()) {
|
|
157
|
+
systemMsgJson = "{\"role\":\"system\",\"content\":\"" + escapeJson(systemPrompt_) + "\"}";
|
|
158
|
+
systemMsgPtr = systemMsgJson.c_str();
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Create conversation config with session config
|
|
162
|
+
conv_config_ = litert_lm_conversation_config_create(
|
|
163
|
+
engine_,
|
|
164
|
+
session_config_, // may be nullptr for defaults
|
|
165
|
+
systemMsgPtr, // system message
|
|
166
|
+
nullptr, // tools (not used yet)
|
|
167
|
+
nullptr, // messages history
|
|
168
|
+
false // constrained decoding
|
|
169
|
+
);
|
|
170
|
+
if (!conv_config_) {
|
|
171
|
+
throw std::runtime_error("Failed to create conversation config");
|
|
47
172
|
}
|
|
48
173
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
174
|
+
// Create conversation
|
|
175
|
+
conversation_ = litert_lm_conversation_create(engine_, conv_config_);
|
|
176
|
+
if (!conversation_) {
|
|
177
|
+
litert_lm_conversation_config_delete(conv_config_);
|
|
178
|
+
conv_config_ = nullptr;
|
|
179
|
+
throw std::runtime_error("Failed to create conversation");
|
|
53
180
|
}
|
|
54
|
-
conversation_ = std::move(*conversation);
|
|
55
181
|
#endif
|
|
56
182
|
}
|
|
57
183
|
|
|
58
|
-
|
|
59
|
-
// loadModel
|
|
60
|
-
|
|
61
|
-
|
|
184
|
+
// =============================================================================
|
|
185
|
+
// loadModel
|
|
186
|
+
// =============================================================================
|
|
187
|
+
|
|
188
|
+
std::shared_ptr<Promise<void>> HybridLiteRTLM::loadModel(
|
|
189
|
+
const std::string& modelPath,
|
|
190
|
+
const std::optional<LLMConfig>& config) {
|
|
191
|
+
return Promise<void>::async([this, modelPath, config]() {
|
|
192
|
+
loadModelInternal(modelPath, config);
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
void HybridLiteRTLM::loadModelInternal(
|
|
62
197
|
const std::string& modelPath,
|
|
63
198
|
const std::optional<LLMConfig>& config) {
|
|
64
199
|
|
|
65
200
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
66
201
|
|
|
67
|
-
// Clean up existing resources
|
|
68
202
|
if (isLoaded_) {
|
|
69
|
-
|
|
70
|
-
history_.clear();
|
|
71
|
-
#ifdef LITERT_LM_ENABLED
|
|
72
|
-
conversation_.reset();
|
|
73
|
-
engine_.reset();
|
|
74
|
-
#endif
|
|
203
|
+
close();
|
|
75
204
|
}
|
|
76
205
|
|
|
77
|
-
// Apply configuration
|
|
78
206
|
if (config.has_value()) {
|
|
79
207
|
if (config->backend.has_value()) {
|
|
80
208
|
backend_ = config->backend.value();
|
|
81
209
|
}
|
|
82
|
-
if (config->visionBackend.has_value()) {
|
|
83
|
-
visionBackend_ = config->visionBackend.value();
|
|
84
|
-
}
|
|
85
|
-
if (config->audioBackend.has_value()) {
|
|
86
|
-
audioBackend_ = config->audioBackend.value();
|
|
87
|
-
}
|
|
88
210
|
if (config->temperature.has_value()) {
|
|
89
211
|
temperature_ = config->temperature.value();
|
|
90
212
|
}
|
|
@@ -97,441 +219,518 @@ void HybridLiteRTLM::loadModel(
|
|
|
97
219
|
if (config->maxTokens.has_value()) {
|
|
98
220
|
maxTokens_ = config->maxTokens.value();
|
|
99
221
|
}
|
|
222
|
+
if (config->systemPrompt.has_value()) {
|
|
223
|
+
systemPrompt_ = config->systemPrompt.value();
|
|
224
|
+
}
|
|
100
225
|
}
|
|
101
226
|
|
|
102
|
-
#ifdef
|
|
103
|
-
//
|
|
104
|
-
|
|
105
|
-
if (!model_assets.ok()) {
|
|
106
|
-
throw std::runtime_error("Failed to load model assets: " +
|
|
107
|
-
std::string(model_assets.status().message()));
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
// 2. Map our Backend enum to LiteRT-LM Backend enum
|
|
111
|
-
auto engine_backend = (backend_ == Backend::GPU)
|
|
112
|
-
? litert::lm::Backend::GPU
|
|
113
|
-
: litert::lm::Backend::CPU;
|
|
114
|
-
auto vision_backend = (visionBackend_ == Backend::GPU)
|
|
115
|
-
? litert::lm::Backend::GPU
|
|
116
|
-
: litert::lm::Backend::CPU;
|
|
117
|
-
auto audio_backend = (audioBackend_ == Backend::GPU)
|
|
118
|
-
? litert::lm::Backend::GPU
|
|
119
|
-
: litert::lm::Backend::CPU;
|
|
120
|
-
|
|
121
|
-
// 3. Create EngineSettings with all backends
|
|
122
|
-
auto engine_settings = litert::lm::EngineSettings::CreateDefault(
|
|
123
|
-
*model_assets,
|
|
124
|
-
engine_backend,
|
|
125
|
-
vision_backend,
|
|
126
|
-
audio_backend
|
|
127
|
-
);
|
|
227
|
+
#ifdef __APPLE__
|
|
228
|
+
// Set log verbosity: 2=WARNING (production), 0=INFO (debug)
|
|
229
|
+
litert_lm_set_min_log_level(2);
|
|
128
230
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
231
|
+
auto backendStr = [](Backend b) -> const char* {
|
|
232
|
+
switch (b) {
|
|
233
|
+
case Backend::GPU: return "gpu";
|
|
234
|
+
case Backend::NPU: return "gpu"; // NPU not available on iOS, use GPU
|
|
235
|
+
default: return "cpu";
|
|
236
|
+
}
|
|
237
|
+
};
|
|
238
|
+
|
|
239
|
+
auto tryCreateEngine = [&](const char* backend, const char* visionBackend) -> bool {
|
|
240
|
+
auto* settings = litert_lm_engine_settings_create(
|
|
241
|
+
modelPath.c_str(),
|
|
242
|
+
backend,
|
|
243
|
+
visionBackend,
|
|
244
|
+
"cpu" // audio always on CPU
|
|
245
|
+
);
|
|
246
|
+
if (!settings) {
|
|
247
|
+
return false;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
litert_lm_engine_settings_set_max_num_tokens(settings, static_cast<int>(maxTokens_));
|
|
251
|
+
litert_lm_engine_settings_enable_benchmark(settings);
|
|
252
|
+
|
|
253
|
+
engine_ = litert_lm_engine_create(settings);
|
|
254
|
+
litert_lm_engine_settings_delete(settings);
|
|
255
|
+
|
|
256
|
+
return engine_ != nullptr;
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
// Try requested backend first (e.g. gpu/gpu)
|
|
260
|
+
const char* primaryBackend = backendStr(backend_);
|
|
261
|
+
if (!tryCreateEngine(primaryBackend, primaryBackend)) {
|
|
262
|
+
// Fallback chain for when the primary backend fails:
|
|
263
|
+
bool fallbackOk = false;
|
|
264
|
+
if (backend_ != Backend::CPU) {
|
|
265
|
+
// 1) Try CPU main + GPU vision (model's vision encoder often requires GPU)
|
|
266
|
+
fallbackOk = tryCreateEngine("cpu", "gpu");
|
|
267
|
+
// 2) Try CPU main + CPU vision
|
|
268
|
+
if (!fallbackOk) fallbackOk = tryCreateEngine("cpu", "cpu");
|
|
269
|
+
}
|
|
270
|
+
// 3) Try CPU main + no vision (nullptr skips vision executor entirely)
|
|
271
|
+
if (!fallbackOk) fallbackOk = tryCreateEngine("cpu", nullptr);
|
|
272
|
+
if (fallbackOk) {
|
|
273
|
+
backend_ = Backend::CPU;
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
if (!engine_) {
|
|
278
|
+
throw std::runtime_error(
|
|
279
|
+
"Failed to create LiteRT-LM engine. Tried backend '" +
|
|
280
|
+
std::string(primaryBackend) + "' and CPU fallback. Model path: " + modelPath);
|
|
134
281
|
}
|
|
135
|
-
engine_ = std::move(*engine);
|
|
136
|
-
|
|
137
|
-
// 5. Create the Conversation (lightweight - holds KV cache)
|
|
138
|
-
createNewConversation();
|
|
139
282
|
|
|
140
|
-
|
|
283
|
+
session_config_ = litert_lm_session_config_create();
|
|
284
|
+
if (session_config_) {
|
|
285
|
+
litert_lm_session_config_set_max_output_tokens(session_config_, static_cast<int>(maxTokens_));
|
|
286
|
+
|
|
287
|
+
LiteRtLmSamplerParams sampler{};
|
|
288
|
+
sampler.type = kTopP;
|
|
289
|
+
sampler.top_k = static_cast<int32_t>(topK_);
|
|
290
|
+
sampler.top_p = static_cast<float>(topP_);
|
|
291
|
+
sampler.temperature = static_cast<float>(temperature_);
|
|
292
|
+
sampler.seed = 0;
|
|
293
|
+
litert_lm_session_config_set_sampler_params(session_config_, &sampler);
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
createNewConversation();
|
|
297
|
+
#endif
|
|
141
298
|
|
|
142
299
|
isLoaded_ = true;
|
|
143
300
|
history_.clear();
|
|
144
|
-
|
|
145
|
-
// Reset stats
|
|
146
301
|
lastStats_ = GenerationStats{0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
|
|
147
302
|
}
|
|
148
303
|
|
|
149
|
-
|
|
150
|
-
// sendMessage
|
|
151
|
-
|
|
152
|
-
|
|
304
|
+
// =============================================================================
|
|
305
|
+
// sendMessage — Blocking text inference
|
|
306
|
+
// =============================================================================
|
|
307
|
+
|
|
308
|
+
std::shared_ptr<Promise<std::string>> HybridLiteRTLM::sendMessage(const std::string& message) {
|
|
309
|
+
return Promise<std::string>::async([this, message]() -> std::string {
|
|
310
|
+
return sendMessageInternal(message);
|
|
311
|
+
});
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
std::string HybridLiteRTLM::sendMessageInternal(const std::string& message) {
|
|
153
315
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
154
316
|
ensureLoaded();
|
|
155
317
|
|
|
156
|
-
auto startTime = std::chrono::
|
|
318
|
+
auto startTime = std::chrono::steady_clock::now();
|
|
319
|
+
std::string result;
|
|
157
320
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
321
|
+
#ifdef __APPLE__
|
|
322
|
+
std::string msgJson = buildTextMessageJson(message);
|
|
323
|
+
|
|
324
|
+
auto* response = litert_lm_conversation_send_message(
|
|
325
|
+
conversation_, msgJson.c_str(), nullptr);
|
|
326
|
+
|
|
327
|
+
if (!response) {
|
|
328
|
+
throw std::runtime_error("LiteRT-LM: sendMessage failed");
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
const char* responseStr = litert_lm_json_response_get_string(response);
|
|
332
|
+
if (responseStr) {
|
|
333
|
+
result = extractTextFromResponse(std::string(responseStr));
|
|
334
|
+
}
|
|
335
|
+
litert_lm_json_response_delete(response);
|
|
336
|
+
|
|
337
|
+
auto* benchInfo = litert_lm_conversation_get_benchmark_info(conversation_);
|
|
338
|
+
if (benchInfo) {
|
|
339
|
+
int numDecodeTurns = litert_lm_benchmark_info_get_num_decode_turns(benchInfo);
|
|
340
|
+
if (numDecodeTurns > 0) {
|
|
341
|
+
int lastIdx = numDecodeTurns - 1;
|
|
342
|
+
lastStats_.tokensPerSecond = litert_lm_benchmark_info_get_decode_tokens_per_sec_at(benchInfo, lastIdx);
|
|
343
|
+
lastStats_.completionTokens = static_cast<double>(
|
|
344
|
+
litert_lm_benchmark_info_get_decode_token_count_at(benchInfo, lastIdx));
|
|
345
|
+
}
|
|
346
|
+
lastStats_.timeToFirstToken = litert_lm_benchmark_info_get_time_to_first_token(benchInfo);
|
|
347
|
+
litert_lm_benchmark_info_delete(benchInfo);
|
|
348
|
+
}
|
|
349
|
+
#else
|
|
350
|
+
// Non-Apple stub
|
|
351
|
+
result = "[iOS only] LiteRT-LM inference not available on this platform.";
|
|
352
|
+
#endif
|
|
163
353
|
|
|
164
|
-
std::
|
|
354
|
+
auto endTime = std::chrono::steady_clock::now();
|
|
355
|
+
double latencyMs = std::chrono::duration<double, std::milli>(endTime - startTime).count();
|
|
356
|
+
lastStats_.totalTime = latencyMs / 1000.0;
|
|
165
357
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
litert::lm::UserMessage lm_message;
|
|
170
|
-
lm_message.role = "user";
|
|
171
|
-
lm_message.content = message;
|
|
358
|
+
// Update history
|
|
359
|
+
history_.push_back(Message{Role::USER, message});
|
|
360
|
+
history_.push_back(Message{Role::MODEL, result});
|
|
172
361
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
362
|
+
return result;
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
// =============================================================================
|
|
366
|
+
// sendMessageAsync — Streaming text inference
|
|
367
|
+
// =============================================================================
|
|
368
|
+
|
|
369
|
+
void HybridLiteRTLM::streamCallbackFn(void* callback_data, const char* chunk,
|
|
370
|
+
bool is_final, const char* error_msg) {
|
|
371
|
+
auto* ctx = static_cast<StreamContext*>(callback_data);
|
|
372
|
+
|
|
373
|
+
if (error_msg) {
|
|
374
|
+
// Error occurred — notify JS and clean up
|
|
375
|
+
ctx->onToken(std::string("Error: ") + error_msg, true);
|
|
376
|
+
delete ctx;
|
|
377
|
+
return;
|
|
179
378
|
}
|
|
180
379
|
|
|
181
|
-
|
|
380
|
+
if (is_final) {
|
|
381
|
+
// Calculate stats
|
|
382
|
+
auto endTime = std::chrono::steady_clock::now();
|
|
383
|
+
double durationMs = std::chrono::duration<double, std::milli>(endTime - ctx->startTime).count();
|
|
384
|
+
|
|
385
|
+
if (ctx->lastStats && ctx->tokenCount > 0) {
|
|
386
|
+
ctx->lastStats->completionTokens = static_cast<double>(ctx->tokenCount);
|
|
387
|
+
ctx->lastStats->totalTime = durationMs / 1000.0;
|
|
388
|
+
ctx->lastStats->tokensPerSecond = (ctx->tokenCount / durationMs) * 1000.0;
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
// Update history (thread-safe)
|
|
392
|
+
{
|
|
393
|
+
std::lock_guard<std::mutex> lock(*ctx->historyMutex);
|
|
394
|
+
ctx->history->push_back(Message{Role::USER, ctx->userMessage});
|
|
395
|
+
ctx->history->push_back(Message{Role::MODEL, ctx->fullResponse});
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
ctx->onToken("", true);
|
|
399
|
+
delete ctx;
|
|
400
|
+
return;
|
|
401
|
+
}
|
|
182
402
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
lastStats_.totalTokens = lastStats_.promptTokens + lastStats_.completionTokens;
|
|
189
|
-
lastStats_.timeToFirstToken = stats.time_to_first_token_ms;
|
|
190
|
-
lastStats_.totalTime = stats.total_time_ms;
|
|
191
|
-
lastStats_.tokensPerSecond = (lastStats_.totalTime > 0)
|
|
192
|
-
? lastStats_.completionTokens / (lastStats_.totalTime / 1000.0)
|
|
193
|
-
: 0.0;
|
|
403
|
+
if (chunk) {
|
|
404
|
+
std::string token(chunk);
|
|
405
|
+
ctx->fullResponse += token;
|
|
406
|
+
ctx->tokenCount++;
|
|
407
|
+
ctx->onToken(token, false);
|
|
194
408
|
}
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
void HybridLiteRTLM::sendMessageAsync(
|
|
412
|
+
const std::string& message,
|
|
413
|
+
const std::function<void(const std::string&, bool)>& onToken) {
|
|
414
|
+
|
|
415
|
+
// Copy values for the background thread (avoid use-after-free)
|
|
416
|
+
auto onTokenCopy = onToken;
|
|
417
|
+
auto messageCopy = message;
|
|
418
|
+
|
|
419
|
+
// Capture shared state safely
|
|
420
|
+
auto* ctx = new StreamContext();
|
|
421
|
+
ctx->onToken = std::move(onTokenCopy);
|
|
422
|
+
ctx->fullResponse = "";
|
|
423
|
+
ctx->history = &history_;
|
|
424
|
+
ctx->historyMutex = &mutex_;
|
|
425
|
+
ctx->userMessage = messageCopy;
|
|
426
|
+
ctx->lastStats = &lastStats_;
|
|
427
|
+
ctx->startTime = std::chrono::steady_clock::now();
|
|
428
|
+
ctx->tokenCount = 0;
|
|
429
|
+
|
|
430
|
+
#ifdef __APPLE__
|
|
431
|
+
ensureLoaded();
|
|
195
432
|
|
|
196
|
-
|
|
197
|
-
// Stub response when LiteRT-LM is not available
|
|
198
|
-
responseText = "[LiteRT-LM Stub] Model response placeholder. "
|
|
199
|
-
"Real inference will be available when LiteRT-LM libraries are integrated. "
|
|
200
|
-
"You said: " + message;
|
|
201
|
-
|
|
202
|
-
auto endTime = std::chrono::high_resolution_clock::now();
|
|
203
|
-
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(endTime - startTime).count();
|
|
204
|
-
|
|
205
|
-
// Estimate stats for stub
|
|
206
|
-
lastStats_.promptTokens = static_cast<double>(message.length() / 4);
|
|
207
|
-
lastStats_.completionTokens = static_cast<double>(responseText.length() / 4);
|
|
208
|
-
lastStats_.totalTokens = lastStats_.promptTokens + lastStats_.completionTokens;
|
|
209
|
-
lastStats_.totalTime = static_cast<double>(duration);
|
|
210
|
-
lastStats_.timeToFirstToken = lastStats_.totalTime / 2;
|
|
211
|
-
lastStats_.tokensPerSecond = (lastStats_.totalTime > 0)
|
|
212
|
-
? lastStats_.completionTokens / (lastStats_.totalTime / 1000.0)
|
|
213
|
-
: 0;
|
|
214
|
-
#endif
|
|
433
|
+
std::string msgJson = buildTextMessageJson(messageCopy);
|
|
215
434
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
modelMessage.content = responseText;
|
|
220
|
-
history_.push_back(modelMessage);
|
|
435
|
+
int result = litert_lm_conversation_send_message_stream(
|
|
436
|
+
conversation_, msgJson.c_str(), nullptr,
|
|
437
|
+
streamCallbackFn, ctx);
|
|
221
438
|
|
|
222
|
-
|
|
439
|
+
if (result != 0) {
|
|
440
|
+
delete ctx;
|
|
441
|
+
throw std::runtime_error("LiteRT-LM: Failed to start streaming inference");
|
|
442
|
+
}
|
|
443
|
+
#else
|
|
444
|
+
// Non-Apple stub
|
|
445
|
+
ctx->onToken("[iOS only] Streaming not available on this platform.", true);
|
|
446
|
+
delete ctx;
|
|
447
|
+
#endif
|
|
223
448
|
}
|
|
224
449
|
|
|
225
|
-
|
|
226
|
-
// sendMessageWithImage
|
|
227
|
-
|
|
228
|
-
|
|
450
|
+
// =============================================================================
|
|
451
|
+
// sendMessageWithImage — Multimodal (vision)
|
|
452
|
+
// =============================================================================
|
|
453
|
+
|
|
454
|
+
std::shared_ptr<Promise<std::string>> HybridLiteRTLM::sendMessageWithImage(
|
|
455
|
+
const std::string& message,
|
|
456
|
+
const std::string& imagePath) {
|
|
457
|
+
return Promise<std::string>::async([this, message, imagePath]() -> std::string {
|
|
458
|
+
return sendMessageWithImageInternal(message, imagePath);
|
|
459
|
+
});
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
std::string HybridLiteRTLM::sendMessageWithImageInternal(
|
|
229
463
|
const std::string& message,
|
|
230
464
|
const std::string& imagePath) {
|
|
231
465
|
|
|
232
466
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
233
467
|
ensureLoaded();
|
|
234
468
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
int width, height, channels;
|
|
238
|
-
unsigned char* img = stbi_load(imagePath.c_str(), &width, &height, &channels, 3); // Force 3 channels (RGB)
|
|
239
|
-
if (img == nullptr) {
|
|
240
|
-
throw std::runtime_error("Failed to load image from path: " + imagePath);
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
// Create input tensor/buffer for the engine.
|
|
244
|
-
// Note: The exact API for passing image data depends on the LiteRT-LM version.
|
|
245
|
-
// Assuming a structure that accepts raw bytes and dimensions.
|
|
246
|
-
litert::lm::UserMessage lm_message;
|
|
247
|
-
lm_message.role = "user";
|
|
248
|
-
|
|
249
|
-
// Construct multimodal content
|
|
250
|
-
// Option A: If UserMessage supports a list of content parts
|
|
251
|
-
litert::lm::ContentPart textPart;
|
|
252
|
-
textPart.type = litert::lm::ContentType::TEXT;
|
|
253
|
-
textPart.text = message;
|
|
254
|
-
lm_message.parts.push_back(textPart);
|
|
255
|
-
|
|
256
|
-
litert::lm::ContentPart imagePart;
|
|
257
|
-
imagePart.type = litert::lm::ContentType::IMAGE;
|
|
258
|
-
imagePart.image.width = width;
|
|
259
|
-
imagePart.image.height = height;
|
|
260
|
-
imagePart.image.channels = channels;
|
|
261
|
-
imagePart.image.data = std::vector<uint8_t>(img, img + (width * height * channels));
|
|
262
|
-
lm_message.parts.push_back(imagePart);
|
|
469
|
+
auto startTime = std::chrono::steady_clock::now();
|
|
470
|
+
std::string result;
|
|
263
471
|
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
if (!
|
|
268
|
-
throw std::runtime_error("
|
|
269
|
-
std::string(response.status().message()));
|
|
472
|
+
#ifdef __APPLE__
|
|
473
|
+
// Verify image exists
|
|
474
|
+
std::ifstream imageFile(imagePath);
|
|
475
|
+
if (!imageFile.good()) {
|
|
476
|
+
throw std::runtime_error("Image file not found: " + imagePath);
|
|
270
477
|
}
|
|
478
|
+
imageFile.close();
|
|
271
479
|
|
|
272
|
-
//
|
|
273
|
-
|
|
274
|
-
userMessage.role = Role::USER;
|
|
275
|
-
userMessage.content = message + " [Image]";
|
|
276
|
-
history_.push_back(userMessage);
|
|
480
|
+
// Build multimodal message JSON — the C API handles image preprocessing
|
|
481
|
+
std::string msgJson = buildImageMessageJson(message, imagePath);
|
|
277
482
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
modelMessage.content = response->content;
|
|
281
|
-
history_.push_back(modelMessage);
|
|
483
|
+
auto* response = litert_lm_conversation_send_message(
|
|
484
|
+
conversation_, msgJson.c_str(), nullptr);
|
|
282
485
|
|
|
283
|
-
|
|
486
|
+
if (!response) {
|
|
487
|
+
throw std::runtime_error("LiteRT-LM: sendMessageWithImage failed");
|
|
488
|
+
}
|
|
284
489
|
|
|
490
|
+
const char* responseStr = litert_lm_json_response_get_string(response);
|
|
491
|
+
if (responseStr) {
|
|
492
|
+
result = extractTextFromResponse(std::string(responseStr));
|
|
493
|
+
}
|
|
494
|
+
litert_lm_json_response_delete(response);
|
|
285
495
|
#else
|
|
286
|
-
|
|
287
|
-
throw std::runtime_error(
|
|
288
|
-
"sendMessageWithImage is not supported on iOS. "
|
|
289
|
-
"LiteRT-LM iOS SDK is not yet available. "
|
|
290
|
-
"Please use text-only sendMessage() for now.");
|
|
496
|
+
result = "[iOS only] Vision inference not available on this platform.";
|
|
291
497
|
#endif
|
|
498
|
+
|
|
499
|
+
auto endTime = std::chrono::steady_clock::now();
|
|
500
|
+
lastStats_.totalTime = std::chrono::duration<double>(endTime - startTime).count();
|
|
501
|
+
|
|
502
|
+
history_.push_back(Message{Role::USER, message + " [image: " + imagePath + "]"});
|
|
503
|
+
history_.push_back(Message{Role::MODEL, result});
|
|
504
|
+
|
|
505
|
+
return result;
|
|
292
506
|
}
|
|
293
507
|
|
|
294
|
-
|
|
295
|
-
|
|
508
|
+
// =============================================================================
|
|
509
|
+
// sendMessageWithAudio — Multimodal (audio)
|
|
510
|
+
// =============================================================================
|
|
296
511
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
std::
|
|
301
|
-
|
|
302
|
-
const std::string& fileName,
|
|
303
|
-
const std::optional<std::function<void(double)>>& onProgress) {
|
|
304
|
-
|
|
305
|
-
// Return a future that throws an exception
|
|
306
|
-
return std::async(std::launch::async, []() -> std::string {
|
|
307
|
-
throw std::runtime_error(
|
|
308
|
-
"downloadModel is not supported on iOS yet. "
|
|
309
|
-
"Please download the model manually using a separate library."
|
|
310
|
-
);
|
|
512
|
+
std::shared_ptr<Promise<std::string>> HybridLiteRTLM::sendMessageWithAudio(
|
|
513
|
+
const std::string& message,
|
|
514
|
+
const std::string& audioPath) {
|
|
515
|
+
return Promise<std::string>::async([this, message, audioPath]() -> std::string {
|
|
516
|
+
return sendMessageWithAudioInternal(message, audioPath);
|
|
311
517
|
});
|
|
312
518
|
}
|
|
313
519
|
|
|
314
|
-
|
|
315
|
-
// sendMessageWithAudio - Multimodal audio + text
|
|
316
|
-
//------------------------------------------------------------------------------
|
|
317
|
-
std::string HybridLiteRTLM::sendMessageWithAudio(
|
|
520
|
+
std::string HybridLiteRTLM::sendMessageWithAudioInternal(
|
|
318
521
|
const std::string& message,
|
|
319
522
|
const std::string& audioPath) {
|
|
320
523
|
|
|
321
524
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
322
525
|
ensureLoaded();
|
|
323
526
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
527
|
+
auto startTime = std::chrono::steady_clock::now();
|
|
528
|
+
std::string result;
|
|
529
|
+
|
|
530
|
+
#ifdef __APPLE__
|
|
531
|
+
std::ifstream audioFile(audioPath);
|
|
532
|
+
if (!audioFile.good()) {
|
|
533
|
+
throw std::runtime_error("Audio file not found: " + audioPath);
|
|
329
534
|
}
|
|
535
|
+
audioFile.close();
|
|
330
536
|
|
|
331
|
-
|
|
332
|
-
// Ideally use a WAV parsing library or miniaudio if available.
|
|
333
|
-
// For this implementation, we read the whole file.
|
|
334
|
-
std::vector<uint8_t> audioData((std::istreambuf_iterator<char>(audioFile)), std::istreambuf_iterator<char>());
|
|
537
|
+
std::string msgJson = buildAudioMessageJson(message, audioPath);
|
|
335
538
|
|
|
336
|
-
|
|
337
|
-
|
|
539
|
+
auto* response = litert_lm_conversation_send_message(
|
|
540
|
+
conversation_, msgJson.c_str(), nullptr);
|
|
338
541
|
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
textPart.text = message;
|
|
342
|
-
lm_message.parts.push_back(textPart);
|
|
343
|
-
|
|
344
|
-
litert::lm::ContentPart audioPart;
|
|
345
|
-
audioPart.type = litert::lm::ContentType::AUDIO;
|
|
346
|
-
audioPart.audio.data = audioData;
|
|
347
|
-
// Metadata like sample rate might be needed:
|
|
348
|
-
// audioPart.audio.sample_rate = 16000;
|
|
349
|
-
lm_message.parts.push_back(audioPart);
|
|
350
|
-
|
|
351
|
-
auto response = conversation_->SendMessage(lm_message);
|
|
352
|
-
if (!response.ok()) {
|
|
353
|
-
throw std::runtime_error("Audio inference failed: " +
|
|
354
|
-
std::string(response.status().message()));
|
|
542
|
+
if (!response) {
|
|
543
|
+
throw std::runtime_error("LiteRT-LM: sendMessageWithAudio failed");
|
|
355
544
|
}
|
|
356
545
|
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
546
|
+
const char* responseStr = litert_lm_json_response_get_string(response);
|
|
547
|
+
if (responseStr) {
|
|
548
|
+
result = extractTextFromResponse(std::string(responseStr));
|
|
549
|
+
}
|
|
550
|
+
litert_lm_json_response_delete(response);
|
|
551
|
+
#else
|
|
552
|
+
result = "[iOS only] Audio inference not available on this platform.";
|
|
553
|
+
#endif
|
|
361
554
|
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
modelMessage.content = response->content;
|
|
365
|
-
history_.push_back(modelMessage);
|
|
555
|
+
auto endTime = std::chrono::steady_clock::now();
|
|
556
|
+
lastStats_.totalTime = std::chrono::duration<double>(endTime - startTime).count();
|
|
366
557
|
|
|
367
|
-
|
|
558
|
+
history_.push_back(Message{Role::USER, message + " [audio: " + audioPath + "]"});
|
|
559
|
+
history_.push_back(Message{Role::MODEL, result});
|
|
368
560
|
|
|
561
|
+
return result;
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
// =============================================================================
|
|
565
|
+
// downloadModel — Download model from URL
|
|
566
|
+
// =============================================================================
|
|
567
|
+
|
|
568
|
+
std::shared_ptr<Promise<std::string>> HybridLiteRTLM::downloadModel(
|
|
569
|
+
const std::string& url,
|
|
570
|
+
const std::string& fileName,
|
|
571
|
+
const std::optional<std::function<void(double)>>& onProgress) {
|
|
572
|
+
return Promise<std::string>::async([url, fileName, onProgress]() -> std::string {
|
|
573
|
+
#ifdef __APPLE__
|
|
574
|
+
return litert_lm::downloadModelFile(url, fileName, onProgress);
|
|
369
575
|
#else
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
"
|
|
576
|
+
std::string destPath = "/tmp/" + fileName;
|
|
577
|
+
std::string curlCmd = "curl -L -o \"" + destPath + "\" \"" + url + "\"";
|
|
578
|
+
int result = system(curlCmd.c_str());
|
|
579
|
+
if (result != 0) {
|
|
580
|
+
throw std::runtime_error("Failed to download model from: " + url);
|
|
581
|
+
}
|
|
582
|
+
if (onProgress.has_value()) {
|
|
583
|
+
onProgress.value()(1.0);
|
|
584
|
+
}
|
|
585
|
+
return destPath;
|
|
375
586
|
#endif
|
|
587
|
+
});
|
|
376
588
|
}
|
|
377
589
|
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
const
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
// to avoid blocking other operations. The callback may be invoked
|
|
387
|
-
// from a different thread depending on LiteRT-LM's implementation.
|
|
388
|
-
|
|
389
|
-
{
|
|
390
|
-
std::lock_guard<std::mutex> lock(mutex_);
|
|
391
|
-
ensureLoaded();
|
|
392
|
-
}
|
|
393
|
-
|
|
394
|
-
#ifdef LITERT_LM_ENABLED
|
|
395
|
-
// Add user message to history before starting
|
|
396
|
-
{
|
|
397
|
-
std::lock_guard<std::mutex> lock(mutex_);
|
|
398
|
-
Message userMessage;
|
|
399
|
-
userMessage.role = Role::USER;
|
|
400
|
-
userMessage.content = message;
|
|
401
|
-
history_.push_back(userMessage);
|
|
402
|
-
}
|
|
403
|
-
|
|
404
|
-
litert::lm::UserMessage lm_message;
|
|
405
|
-
lm_message.role = "user";
|
|
406
|
-
lm_message.content = message;
|
|
407
|
-
|
|
408
|
-
std::string fullResponse;
|
|
409
|
-
|
|
410
|
-
// The callback needs to be carefully managed for thread safety
|
|
411
|
-
auto status = conversation_->SendMessageAsync(
|
|
412
|
-
lm_message,
|
|
413
|
-
[this, &onToken, &fullResponse](const std::string& token, bool isDone) {
|
|
414
|
-
fullResponse += token;
|
|
415
|
-
|
|
416
|
-
// Invoke the JS callback (Nitro handles thread marshalling)
|
|
417
|
-
onToken(token, isDone);
|
|
418
|
-
|
|
419
|
-
if (isDone) {
|
|
420
|
-
// Add complete response to history
|
|
421
|
-
std::lock_guard<std::mutex> lock(mutex_);
|
|
422
|
-
Message modelMessage;
|
|
423
|
-
modelMessage.role = Role::MODEL;
|
|
424
|
-
modelMessage.content = fullResponse;
|
|
425
|
-
history_.push_back(modelMessage);
|
|
426
|
-
}
|
|
427
|
-
}
|
|
428
|
-
);
|
|
429
|
-
|
|
430
|
-
if (!status.ok()) {
|
|
431
|
-
// Remove user message since inference failed
|
|
432
|
-
std::lock_guard<std::mutex> lock(mutex_);
|
|
433
|
-
if (!history_.empty()) {
|
|
434
|
-
history_.pop_back();
|
|
590
|
+
std::shared_ptr<Promise<void>> HybridLiteRTLM::deleteModel(const std::string& fileName) {
|
|
591
|
+
return Promise<void>::async([fileName]() {
|
|
592
|
+
std::string path;
|
|
593
|
+
#ifdef __APPLE__
|
|
594
|
+
// Match the path used by IOSDownloadHelper: ~/Library/Caches/litert_models/
|
|
595
|
+
const char* home = getenv("HOME");
|
|
596
|
+
if (home) {
|
|
597
|
+
path = std::string(home) + "/Library/Caches/litert_models/" + fileName;
|
|
435
598
|
}
|
|
436
|
-
throw std::runtime_error("Async inference failed: " +
|
|
437
|
-
std::string(status.message()));
|
|
438
|
-
}
|
|
439
|
-
|
|
440
599
|
#else
|
|
441
|
-
|
|
442
|
-
std::string fullResponse;
|
|
443
|
-
{
|
|
444
|
-
std::lock_guard<std::mutex> lock(mutex_);
|
|
445
|
-
|
|
446
|
-
// Add user message
|
|
447
|
-
Message userMessage;
|
|
448
|
-
userMessage.role = Role::USER;
|
|
449
|
-
userMessage.content = message;
|
|
450
|
-
history_.push_back(userMessage);
|
|
451
|
-
|
|
452
|
-
fullResponse = "[LiteRT-LM Stub] Streaming response placeholder. You said: " + message;
|
|
453
|
-
}
|
|
454
|
-
|
|
455
|
-
// Simulate token-by-token streaming
|
|
456
|
-
std::string currentWord;
|
|
457
|
-
for (size_t i = 0; i < fullResponse.length(); i++) {
|
|
458
|
-
char c = fullResponse[i];
|
|
459
|
-
currentWord += c;
|
|
460
|
-
|
|
461
|
-
if (c == ' ' || c == '\n' || i == fullResponse.length() - 1) {
|
|
462
|
-
bool isDone = (i == fullResponse.length() - 1);
|
|
463
|
-
onToken(currentWord, isDone);
|
|
464
|
-
currentWord.clear();
|
|
465
|
-
}
|
|
466
|
-
}
|
|
467
|
-
|
|
468
|
-
// Add model response to history
|
|
469
|
-
{
|
|
470
|
-
std::lock_guard<std::mutex> lock(mutex_);
|
|
471
|
-
Message modelMessage;
|
|
472
|
-
modelMessage.role = Role::MODEL;
|
|
473
|
-
modelMessage.content = fullResponse;
|
|
474
|
-
history_.push_back(modelMessage);
|
|
475
|
-
}
|
|
600
|
+
path = "/tmp/" + fileName;
|
|
476
601
|
#endif
|
|
602
|
+
if (!path.empty()) {
|
|
603
|
+
std::remove(path.c_str());
|
|
604
|
+
}
|
|
605
|
+
});
|
|
477
606
|
}
|
|
478
607
|
|
|
479
|
-
|
|
480
|
-
// getHistory
|
|
481
|
-
|
|
608
|
+
// =============================================================================
|
|
609
|
+
// getHistory
|
|
610
|
+
// =============================================================================
|
|
611
|
+
|
|
482
612
|
std::vector<Message> HybridLiteRTLM::getHistory() {
|
|
483
613
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
484
614
|
return history_;
|
|
485
615
|
}
|
|
486
616
|
|
|
487
|
-
|
|
488
|
-
// resetConversation
|
|
489
|
-
|
|
617
|
+
// =============================================================================
|
|
618
|
+
// resetConversation
|
|
619
|
+
// =============================================================================
|
|
620
|
+
|
|
490
621
|
void HybridLiteRTLM::resetConversation() {
|
|
491
622
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
492
623
|
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
624
|
+
history_.clear();
|
|
625
|
+
lastStats_ = GenerationStats{0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
|
|
626
|
+
|
|
627
|
+
#ifdef __APPLE__
|
|
628
|
+
if (isLoaded_ && engine_) {
|
|
498
629
|
createNewConversation();
|
|
499
630
|
}
|
|
500
631
|
#endif
|
|
501
|
-
|
|
502
|
-
history_.clear();
|
|
503
632
|
}
|
|
504
633
|
|
|
505
|
-
|
|
506
|
-
// isReady
|
|
507
|
-
|
|
634
|
+
// =============================================================================
|
|
635
|
+
// isReady
|
|
636
|
+
// =============================================================================
|
|
637
|
+
|
|
508
638
|
bool HybridLiteRTLM::isReady() {
|
|
509
639
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
510
640
|
return isLoaded_;
|
|
511
641
|
}
|
|
512
642
|
|
|
513
|
-
|
|
514
|
-
// getStats
|
|
515
|
-
|
|
643
|
+
// =============================================================================
|
|
644
|
+
// getStats
|
|
645
|
+
// =============================================================================
|
|
646
|
+
|
|
516
647
|
GenerationStats HybridLiteRTLM::getStats() {
|
|
517
648
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
518
649
|
return lastStats_;
|
|
519
650
|
}
|
|
520
651
|
|
|
521
|
-
|
|
522
|
-
//
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
652
|
+
// =============================================================================
|
|
653
|
+
// getMemoryUsage — Uses Mach APIs for iOS process memory
|
|
654
|
+
// =============================================================================
|
|
655
|
+
|
|
656
|
+
MemoryUsage HybridLiteRTLM::getMemoryUsage() {
|
|
657
|
+
double usedMemoryBytes = 0;
|
|
658
|
+
double totalMemoryBytes = 0;
|
|
659
|
+
double availableBytes = 0;
|
|
660
|
+
bool isLowMemory = false;
|
|
661
|
+
|
|
662
|
+
#ifdef __APPLE__
|
|
663
|
+
// Get app process memory (resident set size)
|
|
664
|
+
struct mach_task_basic_info info;
|
|
665
|
+
mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT;
|
|
666
|
+
|
|
667
|
+
kern_return_t kr = task_info(mach_task_self(),
|
|
668
|
+
MACH_TASK_BASIC_INFO,
|
|
669
|
+
(task_info_t)&info,
|
|
670
|
+
&count);
|
|
671
|
+
|
|
672
|
+
if (kr == KERN_SUCCESS) {
|
|
673
|
+
usedMemoryBytes = static_cast<double>(info.resident_size);
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
// Get total physical memory
|
|
677
|
+
mach_port_t host_port = mach_host_self();
|
|
678
|
+
struct host_basic_info hostInfo;
|
|
679
|
+
mach_msg_type_number_t hostCount = HOST_BASIC_INFO_COUNT;
|
|
680
|
+
|
|
681
|
+
kr = host_info(host_port, HOST_BASIC_INFO,
|
|
682
|
+
(host_info_t)&hostInfo, &hostCount);
|
|
526
683
|
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
684
|
+
if (kr == KERN_SUCCESS) {
|
|
685
|
+
totalMemoryBytes = static_cast<double>(hostInfo.max_mem);
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
availableBytes = totalMemoryBytes - usedMemoryBytes;
|
|
689
|
+
if (availableBytes < 0) availableBytes = 0;
|
|
690
|
+
|
|
691
|
+
// Low memory threshold (~200MB available)
|
|
692
|
+
isLowMemory = (totalMemoryBytes > 0) && (availableBytes < 200.0 * 1024.0 * 1024.0);
|
|
531
693
|
#endif
|
|
532
694
|
|
|
695
|
+
return MemoryUsage{
|
|
696
|
+
usedMemoryBytes, // nativeHeapBytes
|
|
697
|
+
usedMemoryBytes, // residentBytes
|
|
698
|
+
availableBytes, // availableMemoryBytes
|
|
699
|
+
isLowMemory // isLowMemory
|
|
700
|
+
};
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
// =============================================================================
|
|
704
|
+
// close — Clean up all LiteRT-LM resources
|
|
705
|
+
// =============================================================================
|
|
706
|
+
|
|
707
|
+
void HybridLiteRTLM::close() {
|
|
708
|
+
// Note: Don't lock here if called from destructor (mutex may be destroyed)
|
|
709
|
+
// The caller (loadModel, destructor) should handle locking.
|
|
710
|
+
|
|
533
711
|
isLoaded_ = false;
|
|
534
712
|
history_.clear();
|
|
713
|
+
|
|
714
|
+
#ifdef __APPLE__
|
|
715
|
+
if (conversation_) {
|
|
716
|
+
litert_lm_conversation_delete(conversation_);
|
|
717
|
+
conversation_ = nullptr;
|
|
718
|
+
}
|
|
719
|
+
if (conv_config_) {
|
|
720
|
+
litert_lm_conversation_config_delete(conv_config_);
|
|
721
|
+
conv_config_ = nullptr;
|
|
722
|
+
}
|
|
723
|
+
if (session_config_) {
|
|
724
|
+
litert_lm_session_config_delete(session_config_);
|
|
725
|
+
session_config_ = nullptr;
|
|
726
|
+
}
|
|
727
|
+
if (engine_) {
|
|
728
|
+
litert_lm_engine_delete(engine_);
|
|
729
|
+
engine_ = nullptr;
|
|
730
|
+
}
|
|
731
|
+
#endif
|
|
732
|
+
|
|
733
|
+
lastStats_ = GenerationStats{0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
|
|
535
734
|
}
|
|
536
735
|
|
|
537
736
|
} // namespace margelo::nitro::litertlm
|