@runanywhere/llamacpp 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/RunAnywhereLlama.podspec +131 -0
- package/android/CMakeLists.txt +105 -0
- package/android/build.gradle +288 -0
- package/android/src/main/AndroidManifest.xml +3 -0
- package/android/src/main/cpp/cpp-adapter.cpp +14 -0
- package/android/src/main/java/com/margelo/nitro/runanywhere/llama/RunAnywhereLlamaPackage.kt +35 -0
- package/android/src/main/jniLibs/arm64-v8a/librac_backend_llamacpp.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librac_backend_llamacpp_jni.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librunanywhere_llamacpp.so +0 -0
- package/cpp/HybridRunAnywhereLlama.cpp +346 -0
- package/cpp/HybridRunAnywhereLlama.hpp +107 -0
- package/cpp/bridges/LLMBridge.cpp +209 -0
- package/cpp/bridges/LLMBridge.hpp +109 -0
- package/cpp/bridges/StructuredOutputBridge.cpp +151 -0
- package/cpp/bridges/StructuredOutputBridge.hpp +66 -0
- package/cpp/rac_llm_llamacpp.h +34 -0
- package/ios/.testlocal +0 -0
- package/ios/Frameworks/RABackendLLAMACPP.xcframework/Info.plist +44 -0
- package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64/RABackendLLAMACPP.framework/Headers/RABackendLLAMACPP.h +2 -0
- package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64/RABackendLLAMACPP.framework/Info.plist +11 -0
- package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64/RABackendLLAMACPP.framework/Modules/module.modulemap +5 -0
- package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64/RABackendLLAMACPP.framework/RABackendLLAMACPP +0 -0
- package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64_x86_64-simulator/RABackendLLAMACPP.framework/Headers/RABackendLLAMACPP.h +2 -0
- package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64_x86_64-simulator/RABackendLLAMACPP.framework/Info.plist +11 -0
- package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64_x86_64-simulator/RABackendLLAMACPP.framework/Modules/module.modulemap +5 -0
- package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64_x86_64-simulator/RABackendLLAMACPP.framework/RABackendLLAMACPP +0 -0
- package/ios/LlamaCPPBackend.podspec +127 -0
- package/nitro.json +16 -0
- package/nitrogen/generated/.gitattributes +1 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/runanywhere/llama/runanywherellamaOnLoad.kt +35 -0
- package/nitrogen/generated/android/runanywherellama+autolinking.cmake +81 -0
- package/nitrogen/generated/android/runanywherellama+autolinking.gradle +27 -0
- package/nitrogen/generated/android/runanywherellamaOnLoad.cpp +44 -0
- package/nitrogen/generated/android/runanywherellamaOnLoad.hpp +25 -0
- package/nitrogen/generated/ios/RunAnywhereLlama+autolinking.rb +60 -0
- package/nitrogen/generated/ios/RunAnywhereLlama-Swift-Cxx-Bridge.cpp +17 -0
- package/nitrogen/generated/ios/RunAnywhereLlama-Swift-Cxx-Bridge.hpp +27 -0
- package/nitrogen/generated/ios/RunAnywhereLlama-Swift-Cxx-Umbrella.hpp +38 -0
- package/nitrogen/generated/ios/RunAnywhereLlamaAutolinking.mm +35 -0
- package/nitrogen/generated/ios/RunAnywhereLlamaAutolinking.swift +12 -0
- package/nitrogen/generated/shared/c++/HybridRunAnywhereLlamaSpec.cpp +33 -0
- package/nitrogen/generated/shared/c++/HybridRunAnywhereLlamaSpec.hpp +77 -0
- package/package.json +60 -0
- package/react-native.config.js +14 -0
- package/src/LlamaCPP.ts +206 -0
- package/src/LlamaCppProvider.ts +120 -0
- package/src/index.ts +59 -0
- package/src/native/NativeRunAnywhereLlama.ts +58 -0
- package/src/native/index.ts +11 -0
- package/src/specs/RunAnywhereLlama.nitro.ts +160 -0
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HybridRunAnywhereLlama.cpp
|
|
3
|
+
*
|
|
4
|
+
* Nitrogen HybridObject implementation for RunAnywhere Llama backend.
|
|
5
|
+
*
|
|
6
|
+
* Llama-specific implementation for text generation using LlamaCPP.
|
|
7
|
+
*
|
|
8
|
+
* NOTE: LlamaCPP backend is REQUIRED and always linked via the build system.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
#include "HybridRunAnywhereLlama.hpp"
|
|
12
|
+
|
|
13
|
+
// Llama bridges
|
|
14
|
+
#include "bridges/LLMBridge.hpp"
|
|
15
|
+
#include "bridges/StructuredOutputBridge.hpp"
|
|
16
|
+
|
|
17
|
+
// Backend registration header - always available
|
|
18
|
+
extern "C" {
|
|
19
|
+
#include "rac_llm_llamacpp.h"
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// Unified logging via rac_logger.h
|
|
23
|
+
#include "rac_logger.h"
|
|
24
|
+
|
|
25
|
+
#include <sstream>
|
|
26
|
+
#include <chrono>
|
|
27
|
+
#include <vector>
|
|
28
|
+
#include <stdexcept>
|
|
29
|
+
|
|
30
|
+
// Log category for this module
|
|
31
|
+
#define LOG_CATEGORY "LLM.LlamaCpp"
|
|
32
|
+
|
|
33
|
+
namespace margelo::nitro::runanywhere::llama {
|
|
34
|
+
|
|
35
|
+
using namespace ::runanywhere::bridges;
|
|
36
|
+
|
|
37
|
+
// ============================================================================
|
|
38
|
+
// JSON Utilities
|
|
39
|
+
// ============================================================================
|
|
40
|
+
|
|
41
|
+
namespace {
|
|
42
|
+
|
|
43
|
+
int extractIntValue(const std::string& json, const std::string& key, int defaultValue) {
|
|
44
|
+
std::string searchKey = "\"" + key + "\":";
|
|
45
|
+
size_t pos = json.find(searchKey);
|
|
46
|
+
if (pos == std::string::npos) return defaultValue;
|
|
47
|
+
pos += searchKey.length();
|
|
48
|
+
while (pos < json.size() && (json[pos] == ' ' || json[pos] == '\t')) pos++;
|
|
49
|
+
if (pos >= json.size()) return defaultValue;
|
|
50
|
+
return std::stoi(json.substr(pos));
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
float extractFloatValue(const std::string& json, const std::string& key, float defaultValue) {
|
|
54
|
+
std::string searchKey = "\"" + key + "\":";
|
|
55
|
+
size_t pos = json.find(searchKey);
|
|
56
|
+
if (pos == std::string::npos) return defaultValue;
|
|
57
|
+
pos += searchKey.length();
|
|
58
|
+
while (pos < json.size() && (json[pos] == ' ' || json[pos] == '\t')) pos++;
|
|
59
|
+
if (pos >= json.size()) return defaultValue;
|
|
60
|
+
return std::stof(json.substr(pos));
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
std::string extractStringValue(const std::string& json, const std::string& key, const std::string& defaultValue = "") {
|
|
64
|
+
std::string searchKey = "\"" + key + "\":\"";
|
|
65
|
+
size_t pos = json.find(searchKey);
|
|
66
|
+
if (pos == std::string::npos) return defaultValue;
|
|
67
|
+
pos += searchKey.length();
|
|
68
|
+
size_t endPos = json.find("\"", pos);
|
|
69
|
+
if (endPos == std::string::npos) return defaultValue;
|
|
70
|
+
return json.substr(pos, endPos - pos);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
std::string buildJsonObject(const std::vector<std::pair<std::string, std::string>>& keyValues) {
|
|
74
|
+
std::string result = "{";
|
|
75
|
+
for (size_t i = 0; i < keyValues.size(); i++) {
|
|
76
|
+
if (i > 0) result += ",";
|
|
77
|
+
result += "\"" + keyValues[i].first + "\":" + keyValues[i].second;
|
|
78
|
+
}
|
|
79
|
+
result += "}";
|
|
80
|
+
return result;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
std::string jsonString(const std::string& value) {
|
|
84
|
+
std::string escaped = "\"";
|
|
85
|
+
for (char c : value) {
|
|
86
|
+
if (c == '"') escaped += "\\\"";
|
|
87
|
+
else if (c == '\\') escaped += "\\\\";
|
|
88
|
+
else if (c == '\n') escaped += "\\n";
|
|
89
|
+
else if (c == '\r') escaped += "\\r";
|
|
90
|
+
else if (c == '\t') escaped += "\\t";
|
|
91
|
+
else escaped += c;
|
|
92
|
+
}
|
|
93
|
+
escaped += "\"";
|
|
94
|
+
return escaped;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
} // anonymous namespace
|
|
98
|
+
|
|
99
|
+
// ============================================================================
|
|
100
|
+
// Constructor / Destructor
|
|
101
|
+
// ============================================================================
|
|
102
|
+
|
|
103
|
+
HybridRunAnywhereLlama::HybridRunAnywhereLlama() : HybridObject(TAG) {
|
|
104
|
+
RAC_LOG_DEBUG(LOG_CATEGORY, "HybridRunAnywhereLlama constructor - Llama backend module");
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
HybridRunAnywhereLlama::~HybridRunAnywhereLlama() {
|
|
108
|
+
RAC_LOG_DEBUG(LOG_CATEGORY, "HybridRunAnywhereLlama destructor");
|
|
109
|
+
LLMBridge::shared().destroy();
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// ============================================================================
|
|
113
|
+
// Backend Registration
|
|
114
|
+
// ============================================================================
|
|
115
|
+
|
|
116
|
+
std::shared_ptr<Promise<bool>> HybridRunAnywhereLlama::registerBackend() {
|
|
117
|
+
return Promise<bool>::async([this]() {
|
|
118
|
+
RAC_LOG_DEBUG(LOG_CATEGORY, "Registering LlamaCPP backend with C++ registry");
|
|
119
|
+
|
|
120
|
+
rac_result_t result = rac_backend_llamacpp_register();
|
|
121
|
+
// RAC_SUCCESS (0) or RAC_ERROR_MODULE_ALREADY_REGISTERED (-4) are both OK
|
|
122
|
+
if (result == RAC_SUCCESS || result == -4) {
|
|
123
|
+
RAC_LOG_INFO(LOG_CATEGORY, "LlamaCPP backend registered successfully");
|
|
124
|
+
isRegistered_ = true;
|
|
125
|
+
return true;
|
|
126
|
+
} else {
|
|
127
|
+
RAC_LOG_ERROR(LOG_CATEGORY, "LlamaCPP registration failed with code: %d", result);
|
|
128
|
+
setLastError("LlamaCPP registration failed with error: " + std::to_string(result));
|
|
129
|
+
throw std::runtime_error("LlamaCPP registration failed with error: " + std::to_string(result));
|
|
130
|
+
}
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
std::shared_ptr<Promise<bool>> HybridRunAnywhereLlama::unregisterBackend() {
|
|
135
|
+
return Promise<bool>::async([this]() {
|
|
136
|
+
RAC_LOG_DEBUG(LOG_CATEGORY, "Unregistering LlamaCPP backend");
|
|
137
|
+
|
|
138
|
+
rac_result_t result = rac_backend_llamacpp_unregister();
|
|
139
|
+
isRegistered_ = false;
|
|
140
|
+
if (result != RAC_SUCCESS) {
|
|
141
|
+
RAC_LOG_ERROR(LOG_CATEGORY, "LlamaCPP unregistration failed with code: %d", result);
|
|
142
|
+
throw std::runtime_error("LlamaCPP unregistration failed with error: " + std::to_string(result));
|
|
143
|
+
}
|
|
144
|
+
return true;
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
std::shared_ptr<Promise<bool>> HybridRunAnywhereLlama::isBackendRegistered() {
|
|
149
|
+
return Promise<bool>::async([this]() {
|
|
150
|
+
return isRegistered_;
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// ============================================================================
|
|
155
|
+
// Model Loading
|
|
156
|
+
// ============================================================================
|
|
157
|
+
|
|
158
|
+
std::shared_ptr<Promise<bool>> HybridRunAnywhereLlama::loadModel(
|
|
159
|
+
const std::string& path,
|
|
160
|
+
const std::optional<std::string>& modelId,
|
|
161
|
+
const std::optional<std::string>& modelName,
|
|
162
|
+
const std::optional<std::string>& configJson) {
|
|
163
|
+
return Promise<bool>::async([this, path, modelId, modelName, configJson]() {
|
|
164
|
+
std::lock_guard<std::mutex> lock(modelMutex_);
|
|
165
|
+
|
|
166
|
+
RAC_LOG_INFO(LOG_CATEGORY, "Loading Llama model: %s", path.c_str());
|
|
167
|
+
|
|
168
|
+
std::string id = modelId.value_or("");
|
|
169
|
+
std::string name = modelName.value_or("");
|
|
170
|
+
|
|
171
|
+
// Call with correct 4-arg signature (path, modelId, modelName)
|
|
172
|
+
// LLMBridge::loadModel will throw on error
|
|
173
|
+
auto result = LLMBridge::shared().loadModel(path, id, name);
|
|
174
|
+
if (result != 0) {
|
|
175
|
+
std::string error = "Failed to load Llama model: " + path + " (error: " + std::to_string(result) + ")";
|
|
176
|
+
setLastError(error);
|
|
177
|
+
throw std::runtime_error(error);
|
|
178
|
+
}
|
|
179
|
+
return true;
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
std::shared_ptr<Promise<bool>> HybridRunAnywhereLlama::isModelLoaded() {
|
|
184
|
+
return Promise<bool>::async([]() {
|
|
185
|
+
return LLMBridge::shared().isLoaded();
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
std::shared_ptr<Promise<bool>> HybridRunAnywhereLlama::unloadModel() {
|
|
190
|
+
return Promise<bool>::async([this]() {
|
|
191
|
+
std::lock_guard<std::mutex> lock(modelMutex_);
|
|
192
|
+
auto result = LLMBridge::shared().unload();
|
|
193
|
+
return result == 0;
|
|
194
|
+
});
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
std::shared_ptr<Promise<std::string>> HybridRunAnywhereLlama::getModelInfo() {
|
|
198
|
+
return Promise<std::string>::async([]() {
|
|
199
|
+
if (!LLMBridge::shared().isLoaded()) {
|
|
200
|
+
return std::string("{}");
|
|
201
|
+
}
|
|
202
|
+
return buildJsonObject({
|
|
203
|
+
{"loaded", "true"},
|
|
204
|
+
{"backend", jsonString("llamacpp")}
|
|
205
|
+
});
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// ============================================================================
|
|
210
|
+
// Text Generation
|
|
211
|
+
// ============================================================================
|
|
212
|
+
|
|
213
|
+
std::shared_ptr<Promise<std::string>> HybridRunAnywhereLlama::generate(
|
|
214
|
+
const std::string& prompt,
|
|
215
|
+
const std::optional<std::string>& optionsJson) {
|
|
216
|
+
return Promise<std::string>::async([this, prompt, optionsJson]() {
|
|
217
|
+
if (!LLMBridge::shared().isLoaded()) {
|
|
218
|
+
setLastError("Model not loaded");
|
|
219
|
+
throw std::runtime_error("LLMBridge: Model not loaded. Call loadModel() first.");
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
LLMOptions options;
|
|
223
|
+
if (optionsJson.has_value()) {
|
|
224
|
+
options.maxTokens = extractIntValue(*optionsJson, "max_tokens", 512);
|
|
225
|
+
options.temperature = extractFloatValue(*optionsJson, "temperature", 0.7f);
|
|
226
|
+
options.topP = extractFloatValue(*optionsJson, "top_p", 0.9f);
|
|
227
|
+
options.topK = extractIntValue(*optionsJson, "top_k", 40);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
RAC_LOG_DEBUG(LOG_CATEGORY, "Generating with prompt: %.50s...", prompt.c_str());
|
|
231
|
+
|
|
232
|
+
auto startTime = std::chrono::high_resolution_clock::now();
|
|
233
|
+
// LLMBridge::generate will throw on error
|
|
234
|
+
auto result = LLMBridge::shared().generate(prompt, options);
|
|
235
|
+
auto endTime = std::chrono::high_resolution_clock::now();
|
|
236
|
+
auto durationMs = std::chrono::duration_cast<std::chrono::milliseconds>(
|
|
237
|
+
endTime - startTime).count();
|
|
238
|
+
|
|
239
|
+
return buildJsonObject({
|
|
240
|
+
{"text", jsonString(result.text)},
|
|
241
|
+
{"tokensUsed", std::to_string(result.tokenCount)},
|
|
242
|
+
{"latencyMs", std::to_string(durationMs)},
|
|
243
|
+
{"cancelled", result.cancelled ? "true" : "false"}
|
|
244
|
+
});
|
|
245
|
+
});
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
std::shared_ptr<Promise<std::string>> HybridRunAnywhereLlama::generateStream(
|
|
249
|
+
const std::string& prompt,
|
|
250
|
+
const std::string& optionsJson,
|
|
251
|
+
const std::function<void(const std::string&, bool)>& callback) {
|
|
252
|
+
return Promise<std::string>::async([this, prompt, optionsJson, callback]() {
|
|
253
|
+
if (!LLMBridge::shared().isLoaded()) {
|
|
254
|
+
setLastError("Model not loaded");
|
|
255
|
+
throw std::runtime_error("LLMBridge: Model not loaded. Call loadModel() first.");
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
LLMOptions options;
|
|
259
|
+
options.maxTokens = extractIntValue(optionsJson, "max_tokens", 512);
|
|
260
|
+
options.temperature = extractFloatValue(optionsJson, "temperature", 0.7f);
|
|
261
|
+
|
|
262
|
+
std::string fullResponse;
|
|
263
|
+
std::string streamError;
|
|
264
|
+
|
|
265
|
+
LLMStreamCallbacks streamCallbacks;
|
|
266
|
+
streamCallbacks.onToken = [&callback, &fullResponse](const std::string& token) -> bool {
|
|
267
|
+
fullResponse += token;
|
|
268
|
+
if (callback) {
|
|
269
|
+
callback(token, false);
|
|
270
|
+
}
|
|
271
|
+
return true;
|
|
272
|
+
};
|
|
273
|
+
streamCallbacks.onComplete = [&callback](const std::string&, int, double) {
|
|
274
|
+
if (callback) {
|
|
275
|
+
callback("", true);
|
|
276
|
+
}
|
|
277
|
+
};
|
|
278
|
+
streamCallbacks.onError = [this, &streamError](int code, const std::string& message) {
|
|
279
|
+
setLastError(message);
|
|
280
|
+
streamError = message;
|
|
281
|
+
};
|
|
282
|
+
|
|
283
|
+
LLMBridge::shared().generateStream(prompt, options, streamCallbacks);
|
|
284
|
+
|
|
285
|
+
if (!streamError.empty()) {
|
|
286
|
+
throw std::runtime_error("LLMBridge: Stream generation failed: " + streamError);
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
return fullResponse;
|
|
290
|
+
});
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
std::shared_ptr<Promise<bool>> HybridRunAnywhereLlama::cancelGeneration() {
|
|
294
|
+
return Promise<bool>::async([]() {
|
|
295
|
+
LLMBridge::shared().cancel();
|
|
296
|
+
return true;
|
|
297
|
+
});
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// ============================================================================
|
|
301
|
+
// Structured Output
|
|
302
|
+
// ============================================================================
|
|
303
|
+
|
|
304
|
+
std::shared_ptr<Promise<std::string>> HybridRunAnywhereLlama::generateStructured(
|
|
305
|
+
const std::string& prompt,
|
|
306
|
+
const std::string& schema,
|
|
307
|
+
const std::optional<std::string>& optionsJson) {
|
|
308
|
+
return Promise<std::string>::async([this, prompt, schema, optionsJson]() {
|
|
309
|
+
auto result = StructuredOutputBridge::shared().generate(
|
|
310
|
+
prompt, schema, optionsJson.value_or("")
|
|
311
|
+
);
|
|
312
|
+
|
|
313
|
+
if (result.success) {
|
|
314
|
+
return result.json;
|
|
315
|
+
} else {
|
|
316
|
+
setLastError(result.error);
|
|
317
|
+
return buildJsonObject({{"error", jsonString(result.error)}});
|
|
318
|
+
}
|
|
319
|
+
});
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// ============================================================================
|
|
323
|
+
// Utilities
|
|
324
|
+
// ============================================================================
|
|
325
|
+
|
|
326
|
+
std::shared_ptr<Promise<std::string>> HybridRunAnywhereLlama::getLastError() {
|
|
327
|
+
return Promise<std::string>::async([this]() { return lastError_; });
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
std::shared_ptr<Promise<double>> HybridRunAnywhereLlama::getMemoryUsage() {
|
|
331
|
+
return Promise<double>::async([]() {
|
|
332
|
+
// TODO: Get memory usage from LlamaCPP
|
|
333
|
+
return 0.0;
|
|
334
|
+
});
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// ============================================================================
|
|
338
|
+
// Helper Methods
|
|
339
|
+
// ============================================================================
|
|
340
|
+
|
|
341
|
+
void HybridRunAnywhereLlama::setLastError(const std::string& error) {
|
|
342
|
+
lastError_ = error;
|
|
343
|
+
RAC_LOG_ERROR(LOG_CATEGORY, "Error: %s", error.c_str());
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
} // namespace margelo::nitro::runanywhere::llama
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HybridRunAnywhereLlama.hpp
|
|
3
|
+
*
|
|
4
|
+
* Nitrogen HybridObject implementation for RunAnywhere Llama backend.
|
|
5
|
+
* This single C++ file works on both iOS and Android.
|
|
6
|
+
*
|
|
7
|
+
* Llama-specific implementation:
|
|
8
|
+
* - Backend Registration
|
|
9
|
+
* - Model Loading/Unloading
|
|
10
|
+
* - Text Generation (streaming and non-streaming)
|
|
11
|
+
* - Structured Output (JSON schema generation)
|
|
12
|
+
*
|
|
13
|
+
* Matches Swift SDK: LlamaCPPRuntime/LlamaCPP.swift
|
|
14
|
+
*
|
|
15
|
+
* The HybridRunAnywhereLlamaSpec base class is auto-generated by Nitrogen
|
|
16
|
+
* from src/specs/RunAnywhereLlama.nitro.ts
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#pragma once
|
|
20
|
+
|
|
21
|
+
// Include the generated spec header (created by nitrogen)
|
|
22
|
+
#if __has_include(<NitroModules/HybridObject.hpp>)
|
|
23
|
+
#include "HybridRunAnywhereLlamaSpec.hpp"
|
|
24
|
+
#else
|
|
25
|
+
// Fallback include path during development
|
|
26
|
+
#include "../nitrogen/generated/shared/c++/HybridRunAnywhereLlamaSpec.hpp"
|
|
27
|
+
#endif
|
|
28
|
+
|
|
29
|
+
#include <mutex>
|
|
30
|
+
#include <string>
|
|
31
|
+
|
|
32
|
+
namespace margelo::nitro::runanywhere::llama {
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* HybridRunAnywhereLlama - Llama backend native implementation
|
|
36
|
+
*
|
|
37
|
+
* Implements the RunAnywhereLlama interface defined in RunAnywhereLlama.nitro.ts
|
|
38
|
+
* Delegates to LLMBridge and StructuredOutputBridge for actual inference.
|
|
39
|
+
*/
|
|
40
|
+
class HybridRunAnywhereLlama : public HybridRunAnywhereLlamaSpec {
|
|
41
|
+
public:
|
|
42
|
+
HybridRunAnywhereLlama();
|
|
43
|
+
~HybridRunAnywhereLlama();
|
|
44
|
+
|
|
45
|
+
// ============================================================================
|
|
46
|
+
// Backend Registration
|
|
47
|
+
// ============================================================================
|
|
48
|
+
|
|
49
|
+
std::shared_ptr<Promise<bool>> registerBackend() override;
|
|
50
|
+
std::shared_ptr<Promise<bool>> unregisterBackend() override;
|
|
51
|
+
std::shared_ptr<Promise<bool>> isBackendRegistered() override;
|
|
52
|
+
|
|
53
|
+
// ============================================================================
|
|
54
|
+
// Model Loading
|
|
55
|
+
// ============================================================================
|
|
56
|
+
|
|
57
|
+
std::shared_ptr<Promise<bool>> loadModel(
|
|
58
|
+
const std::string& path,
|
|
59
|
+
const std::optional<std::string>& modelId,
|
|
60
|
+
const std::optional<std::string>& modelName,
|
|
61
|
+
const std::optional<std::string>& configJson) override;
|
|
62
|
+
std::shared_ptr<Promise<bool>> isModelLoaded() override;
|
|
63
|
+
std::shared_ptr<Promise<bool>> unloadModel() override;
|
|
64
|
+
std::shared_ptr<Promise<std::string>> getModelInfo() override;
|
|
65
|
+
|
|
66
|
+
// ============================================================================
|
|
67
|
+
// Text Generation
|
|
68
|
+
// ============================================================================
|
|
69
|
+
|
|
70
|
+
std::shared_ptr<Promise<std::string>> generate(
|
|
71
|
+
const std::string& prompt,
|
|
72
|
+
const std::optional<std::string>& optionsJson) override;
|
|
73
|
+
std::shared_ptr<Promise<std::string>> generateStream(
|
|
74
|
+
const std::string& prompt,
|
|
75
|
+
const std::string& optionsJson,
|
|
76
|
+
const std::function<void(const std::string&, bool)>& callback) override;
|
|
77
|
+
std::shared_ptr<Promise<bool>> cancelGeneration() override;
|
|
78
|
+
|
|
79
|
+
// ============================================================================
|
|
80
|
+
// Structured Output
|
|
81
|
+
// ============================================================================
|
|
82
|
+
|
|
83
|
+
std::shared_ptr<Promise<std::string>> generateStructured(
|
|
84
|
+
const std::string& prompt,
|
|
85
|
+
const std::string& schema,
|
|
86
|
+
const std::optional<std::string>& optionsJson) override;
|
|
87
|
+
|
|
88
|
+
// ============================================================================
|
|
89
|
+
// Utilities
|
|
90
|
+
// ============================================================================
|
|
91
|
+
|
|
92
|
+
std::shared_ptr<Promise<std::string>> getLastError() override;
|
|
93
|
+
std::shared_ptr<Promise<double>> getMemoryUsage() override;
|
|
94
|
+
|
|
95
|
+
private:
|
|
96
|
+
// Thread safety
|
|
97
|
+
std::mutex modelMutex_;
|
|
98
|
+
|
|
99
|
+
// State tracking
|
|
100
|
+
std::string lastError_;
|
|
101
|
+
bool isRegistered_ = false;
|
|
102
|
+
|
|
103
|
+
// Helper methods
|
|
104
|
+
void setLastError(const std::string& error);
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
} // namespace margelo::nitro::runanywhere::llama
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file LLMBridge.cpp
|
|
3
|
+
* @brief LLM capability bridge implementation
|
|
4
|
+
*
|
|
5
|
+
* NOTE: RACommons and LlamaCPP backend are REQUIRED and always linked via the build system.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#include "LLMBridge.hpp"
|
|
9
|
+
#include <stdexcept>
|
|
10
|
+
|
|
11
|
+
namespace runanywhere {
|
|
12
|
+
namespace bridges {
|
|
13
|
+
|
|
14
|
+
LLMBridge& LLMBridge::shared() {
|
|
15
|
+
static LLMBridge instance;
|
|
16
|
+
return instance;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
LLMBridge::LLMBridge() = default;
|
|
20
|
+
|
|
21
|
+
LLMBridge::~LLMBridge() {
|
|
22
|
+
destroy();
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
bool LLMBridge::isLoaded() const {
|
|
26
|
+
if (handle_) {
|
|
27
|
+
return rac_llm_component_is_loaded(handle_) == RAC_TRUE;
|
|
28
|
+
}
|
|
29
|
+
return false;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
std::string LLMBridge::currentModelId() const {
|
|
33
|
+
return loadedModelId_;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
rac_result_t LLMBridge::loadModel(const std::string& modelPath,
|
|
37
|
+
const std::string& modelId,
|
|
38
|
+
const std::string& modelName) {
|
|
39
|
+
// Create component if needed
|
|
40
|
+
if (!handle_) {
|
|
41
|
+
rac_result_t result = rac_llm_component_create(&handle_);
|
|
42
|
+
if (result != RAC_SUCCESS) {
|
|
43
|
+
throw std::runtime_error("LLMBridge: Failed to create LLM component. Error: " + std::to_string(result));
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Use modelPath as modelId if not provided
|
|
48
|
+
std::string effectiveModelId = modelId.empty() ? modelPath : modelId;
|
|
49
|
+
std::string effectiveModelName = modelName.empty() ? effectiveModelId : modelName;
|
|
50
|
+
|
|
51
|
+
// Unload existing model if different
|
|
52
|
+
if (isLoaded() && loadedModelId_ != effectiveModelId) {
|
|
53
|
+
rac_llm_component_unload(handle_);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Load new model with correct 4-arg signature
|
|
57
|
+
// rac_llm_component_load_model(handle, model_path, model_id, model_name)
|
|
58
|
+
rac_result_t result = rac_llm_component_load_model(
|
|
59
|
+
handle_,
|
|
60
|
+
modelPath.c_str(),
|
|
61
|
+
effectiveModelId.c_str(),
|
|
62
|
+
effectiveModelName.c_str()
|
|
63
|
+
);
|
|
64
|
+
if (result == RAC_SUCCESS) {
|
|
65
|
+
loadedModelId_ = effectiveModelId;
|
|
66
|
+
} else {
|
|
67
|
+
throw std::runtime_error("LLMBridge: Failed to load LLM model '" + effectiveModelId + "'. Error: " + std::to_string(result));
|
|
68
|
+
}
|
|
69
|
+
return result;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
rac_result_t LLMBridge::unload() {
|
|
73
|
+
if (handle_) {
|
|
74
|
+
rac_result_t result = rac_llm_component_unload(handle_);
|
|
75
|
+
if (result == RAC_SUCCESS) {
|
|
76
|
+
loadedModelId_.clear();
|
|
77
|
+
} else {
|
|
78
|
+
throw std::runtime_error("LLMBridge: Failed to unload LLM model. Error: " + std::to_string(result));
|
|
79
|
+
}
|
|
80
|
+
return result;
|
|
81
|
+
}
|
|
82
|
+
loadedModelId_.clear();
|
|
83
|
+
return RAC_SUCCESS;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
void LLMBridge::cleanup() {
|
|
87
|
+
if (handle_) {
|
|
88
|
+
rac_llm_component_cleanup(handle_);
|
|
89
|
+
}
|
|
90
|
+
loadedModelId_.clear();
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
void LLMBridge::cancel() {
|
|
94
|
+
cancellationRequested_ = true;
|
|
95
|
+
if (handle_) {
|
|
96
|
+
rac_llm_component_cancel(handle_);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
void LLMBridge::destroy() {
|
|
101
|
+
if (handle_) {
|
|
102
|
+
rac_llm_component_destroy(handle_);
|
|
103
|
+
handle_ = nullptr;
|
|
104
|
+
}
|
|
105
|
+
loadedModelId_.clear();
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
LLMResult LLMBridge::generate(const std::string& prompt, const LLMOptions& options) {
|
|
109
|
+
LLMResult result;
|
|
110
|
+
cancellationRequested_ = false;
|
|
111
|
+
|
|
112
|
+
if (!handle_ || !isLoaded()) {
|
|
113
|
+
throw std::runtime_error("LLMBridge: LLM model not loaded. Call loadModel() first.");
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
rac_llm_options_t racOptions = {};
|
|
117
|
+
racOptions.max_tokens = options.maxTokens;
|
|
118
|
+
racOptions.temperature = static_cast<float>(options.temperature);
|
|
119
|
+
racOptions.top_p = static_cast<float>(options.topP);
|
|
120
|
+
// NOTE: top_k is not available in rac_llm_options_t, only top_p
|
|
121
|
+
|
|
122
|
+
rac_llm_result_t racResult = {};
|
|
123
|
+
rac_result_t status = rac_llm_component_generate(handle_, prompt.c_str(),
|
|
124
|
+
&racOptions, &racResult);
|
|
125
|
+
|
|
126
|
+
if (status == RAC_SUCCESS) {
|
|
127
|
+
if (racResult.text) {
|
|
128
|
+
result.text = racResult.text;
|
|
129
|
+
}
|
|
130
|
+
result.tokenCount = racResult.completion_tokens;
|
|
131
|
+
result.durationMs = static_cast<double>(racResult.total_time_ms);
|
|
132
|
+
} else {
|
|
133
|
+
throw std::runtime_error("LLMBridge: Text generation failed with error code: " + std::to_string(status));
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
result.cancelled = cancellationRequested_;
|
|
137
|
+
return result;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
void LLMBridge::generateStream(const std::string& prompt, const LLMOptions& options,
|
|
141
|
+
const LLMStreamCallbacks& callbacks) {
|
|
142
|
+
cancellationRequested_ = false;
|
|
143
|
+
|
|
144
|
+
if (!handle_ || !isLoaded()) {
|
|
145
|
+
if (callbacks.onError) {
|
|
146
|
+
callbacks.onError(-4, "LLM model not loaded. Call loadModel() first.");
|
|
147
|
+
}
|
|
148
|
+
return;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
rac_llm_options_t racOptions = {};
|
|
152
|
+
racOptions.max_tokens = options.maxTokens;
|
|
153
|
+
racOptions.temperature = static_cast<float>(options.temperature);
|
|
154
|
+
racOptions.top_p = static_cast<float>(options.topP);
|
|
155
|
+
// NOTE: top_k is not available in rac_llm_options_t, only top_p
|
|
156
|
+
|
|
157
|
+
// Stream context for callbacks
|
|
158
|
+
struct StreamContext {
|
|
159
|
+
const LLMStreamCallbacks* callbacks;
|
|
160
|
+
bool* cancellationRequested;
|
|
161
|
+
std::string accumulatedText;
|
|
162
|
+
};
|
|
163
|
+
|
|
164
|
+
StreamContext ctx = { &callbacks, &cancellationRequested_, "" };
|
|
165
|
+
|
|
166
|
+
auto tokenCallback = [](const char* token, void* user_data) -> rac_bool_t {
|
|
167
|
+
auto* ctx = static_cast<StreamContext*>(user_data);
|
|
168
|
+
if (*ctx->cancellationRequested) {
|
|
169
|
+
return RAC_FALSE;
|
|
170
|
+
}
|
|
171
|
+
if (ctx->callbacks->onToken && token) {
|
|
172
|
+
ctx->accumulatedText += token;
|
|
173
|
+
return ctx->callbacks->onToken(token) ? RAC_TRUE : RAC_FALSE;
|
|
174
|
+
}
|
|
175
|
+
return RAC_TRUE;
|
|
176
|
+
};
|
|
177
|
+
|
|
178
|
+
auto completeCallback = [](const rac_llm_result_t* result, void* user_data) {
|
|
179
|
+
auto* ctx = static_cast<StreamContext*>(user_data);
|
|
180
|
+
if (ctx->callbacks->onComplete) {
|
|
181
|
+
ctx->callbacks->onComplete(
|
|
182
|
+
ctx->accumulatedText,
|
|
183
|
+
result ? result->completion_tokens : 0,
|
|
184
|
+
result ? static_cast<double>(result->total_time_ms) : 0.0
|
|
185
|
+
);
|
|
186
|
+
}
|
|
187
|
+
};
|
|
188
|
+
|
|
189
|
+
auto errorCallback = [](rac_result_t error_code, const char* error_message,
|
|
190
|
+
void* user_data) {
|
|
191
|
+
auto* ctx = static_cast<StreamContext*>(user_data);
|
|
192
|
+
if (ctx->callbacks->onError) {
|
|
193
|
+
ctx->callbacks->onError(error_code, error_message ? error_message : "Unknown error");
|
|
194
|
+
}
|
|
195
|
+
};
|
|
196
|
+
|
|
197
|
+
rac_llm_component_generate_stream(handle_, prompt.c_str(), &racOptions,
|
|
198
|
+
tokenCallback, completeCallback, errorCallback, &ctx);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
rac_lifecycle_state_t LLMBridge::getState() const {
|
|
202
|
+
if (handle_) {
|
|
203
|
+
return rac_llm_component_get_state(handle_);
|
|
204
|
+
}
|
|
205
|
+
return RAC_LIFECYCLE_STATE_IDLE;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
} // namespace bridges
|
|
209
|
+
} // namespace runanywhere
|