@runanywhere/llamacpp 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/RunAnywhereLlama.podspec +131 -0
  2. package/android/CMakeLists.txt +105 -0
  3. package/android/build.gradle +288 -0
  4. package/android/src/main/AndroidManifest.xml +3 -0
  5. package/android/src/main/cpp/cpp-adapter.cpp +14 -0
  6. package/android/src/main/java/com/margelo/nitro/runanywhere/llama/RunAnywhereLlamaPackage.kt +35 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librac_backend_llamacpp.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librac_backend_llamacpp_jni.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librunanywhere_llamacpp.so +0 -0
  10. package/cpp/HybridRunAnywhereLlama.cpp +346 -0
  11. package/cpp/HybridRunAnywhereLlama.hpp +107 -0
  12. package/cpp/bridges/LLMBridge.cpp +209 -0
  13. package/cpp/bridges/LLMBridge.hpp +109 -0
  14. package/cpp/bridges/StructuredOutputBridge.cpp +151 -0
  15. package/cpp/bridges/StructuredOutputBridge.hpp +66 -0
  16. package/cpp/rac_llm_llamacpp.h +34 -0
  17. package/ios/.testlocal +0 -0
  18. package/ios/Frameworks/RABackendLLAMACPP.xcframework/Info.plist +44 -0
  19. package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64/RABackendLLAMACPP.framework/Headers/RABackendLLAMACPP.h +2 -0
  20. package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64/RABackendLLAMACPP.framework/Info.plist +11 -0
  21. package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64/RABackendLLAMACPP.framework/Modules/module.modulemap +5 -0
  22. package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64/RABackendLLAMACPP.framework/RABackendLLAMACPP +0 -0
  23. package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64_x86_64-simulator/RABackendLLAMACPP.framework/Headers/RABackendLLAMACPP.h +2 -0
  24. package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64_x86_64-simulator/RABackendLLAMACPP.framework/Info.plist +11 -0
  25. package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64_x86_64-simulator/RABackendLLAMACPP.framework/Modules/module.modulemap +5 -0
  26. package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64_x86_64-simulator/RABackendLLAMACPP.framework/RABackendLLAMACPP +0 -0
  27. package/ios/LlamaCPPBackend.podspec +127 -0
  28. package/nitro.json +16 -0
  29. package/nitrogen/generated/.gitattributes +1 -0
  30. package/nitrogen/generated/android/kotlin/com/margelo/nitro/runanywhere/llama/runanywherellamaOnLoad.kt +35 -0
  31. package/nitrogen/generated/android/runanywherellama+autolinking.cmake +81 -0
  32. package/nitrogen/generated/android/runanywherellama+autolinking.gradle +27 -0
  33. package/nitrogen/generated/android/runanywherellamaOnLoad.cpp +44 -0
  34. package/nitrogen/generated/android/runanywherellamaOnLoad.hpp +25 -0
  35. package/nitrogen/generated/ios/RunAnywhereLlama+autolinking.rb +60 -0
  36. package/nitrogen/generated/ios/RunAnywhereLlama-Swift-Cxx-Bridge.cpp +17 -0
  37. package/nitrogen/generated/ios/RunAnywhereLlama-Swift-Cxx-Bridge.hpp +27 -0
  38. package/nitrogen/generated/ios/RunAnywhereLlama-Swift-Cxx-Umbrella.hpp +38 -0
  39. package/nitrogen/generated/ios/RunAnywhereLlamaAutolinking.mm +35 -0
  40. package/nitrogen/generated/ios/RunAnywhereLlamaAutolinking.swift +12 -0
  41. package/nitrogen/generated/shared/c++/HybridRunAnywhereLlamaSpec.cpp +33 -0
  42. package/nitrogen/generated/shared/c++/HybridRunAnywhereLlamaSpec.hpp +77 -0
  43. package/package.json +60 -0
  44. package/react-native.config.js +14 -0
  45. package/src/LlamaCPP.ts +206 -0
  46. package/src/LlamaCppProvider.ts +120 -0
  47. package/src/index.ts +59 -0
  48. package/src/native/NativeRunAnywhereLlama.ts +58 -0
  49. package/src/native/index.ts +11 -0
  50. package/src/specs/RunAnywhereLlama.nitro.ts +160 -0
@@ -0,0 +1,346 @@
1
+ /**
2
+ * HybridRunAnywhereLlama.cpp
3
+ *
4
+ * Nitrogen HybridObject implementation for RunAnywhere Llama backend.
5
+ *
6
+ * Llama-specific implementation for text generation using LlamaCPP.
7
+ *
8
+ * NOTE: LlamaCPP backend is REQUIRED and always linked via the build system.
9
+ */
10
+
11
+ #include "HybridRunAnywhereLlama.hpp"
12
+
13
+ // Llama bridges
14
+ #include "bridges/LLMBridge.hpp"
15
+ #include "bridges/StructuredOutputBridge.hpp"
16
+
17
+ // Backend registration header - always available
18
+ extern "C" {
19
+ #include "rac_llm_llamacpp.h"
20
+ }
21
+
22
+ // Unified logging via rac_logger.h
23
+ #include "rac_logger.h"
24
+
25
+ #include <sstream>
26
+ #include <chrono>
27
+ #include <vector>
28
+ #include <stdexcept>
29
+
30
+ // Log category for this module
31
+ #define LOG_CATEGORY "LLM.LlamaCpp"
32
+
33
+ namespace margelo::nitro::runanywhere::llama {
34
+
35
+ using namespace ::runanywhere::bridges;
36
+
37
+ // ============================================================================
38
+ // JSON Utilities
39
+ // ============================================================================
40
+
41
+ namespace {
42
+
43
+ int extractIntValue(const std::string& json, const std::string& key, int defaultValue) {
44
+ std::string searchKey = "\"" + key + "\":";
45
+ size_t pos = json.find(searchKey);
46
+ if (pos == std::string::npos) return defaultValue;
47
+ pos += searchKey.length();
48
+ while (pos < json.size() && (json[pos] == ' ' || json[pos] == '\t')) pos++;
49
+ if (pos >= json.size()) return defaultValue;
50
+ return std::stoi(json.substr(pos));
51
+ }
52
+
53
+ float extractFloatValue(const std::string& json, const std::string& key, float defaultValue) {
54
+ std::string searchKey = "\"" + key + "\":";
55
+ size_t pos = json.find(searchKey);
56
+ if (pos == std::string::npos) return defaultValue;
57
+ pos += searchKey.length();
58
+ while (pos < json.size() && (json[pos] == ' ' || json[pos] == '\t')) pos++;
59
+ if (pos >= json.size()) return defaultValue;
60
+ return std::stof(json.substr(pos));
61
+ }
62
+
63
+ std::string extractStringValue(const std::string& json, const std::string& key, const std::string& defaultValue = "") {
64
+ std::string searchKey = "\"" + key + "\":\"";
65
+ size_t pos = json.find(searchKey);
66
+ if (pos == std::string::npos) return defaultValue;
67
+ pos += searchKey.length();
68
+ size_t endPos = json.find("\"", pos);
69
+ if (endPos == std::string::npos) return defaultValue;
70
+ return json.substr(pos, endPos - pos);
71
+ }
72
+
73
+ std::string buildJsonObject(const std::vector<std::pair<std::string, std::string>>& keyValues) {
74
+ std::string result = "{";
75
+ for (size_t i = 0; i < keyValues.size(); i++) {
76
+ if (i > 0) result += ",";
77
+ result += "\"" + keyValues[i].first + "\":" + keyValues[i].second;
78
+ }
79
+ result += "}";
80
+ return result;
81
+ }
82
+
83
+ std::string jsonString(const std::string& value) {
84
+ std::string escaped = "\"";
85
+ for (char c : value) {
86
+ if (c == '"') escaped += "\\\"";
87
+ else if (c == '\\') escaped += "\\\\";
88
+ else if (c == '\n') escaped += "\\n";
89
+ else if (c == '\r') escaped += "\\r";
90
+ else if (c == '\t') escaped += "\\t";
91
+ else escaped += c;
92
+ }
93
+ escaped += "\"";
94
+ return escaped;
95
+ }
96
+
97
+ } // anonymous namespace
98
+
99
+ // ============================================================================
100
+ // Constructor / Destructor
101
+ // ============================================================================
102
+
103
+ HybridRunAnywhereLlama::HybridRunAnywhereLlama() : HybridObject(TAG) {
104
+ RAC_LOG_DEBUG(LOG_CATEGORY, "HybridRunAnywhereLlama constructor - Llama backend module");
105
+ }
106
+
107
+ HybridRunAnywhereLlama::~HybridRunAnywhereLlama() {
108
+ RAC_LOG_DEBUG(LOG_CATEGORY, "HybridRunAnywhereLlama destructor");
109
+ LLMBridge::shared().destroy();
110
+ }
111
+
112
+ // ============================================================================
113
+ // Backend Registration
114
+ // ============================================================================
115
+
116
+ std::shared_ptr<Promise<bool>> HybridRunAnywhereLlama::registerBackend() {
117
+ return Promise<bool>::async([this]() {
118
+ RAC_LOG_DEBUG(LOG_CATEGORY, "Registering LlamaCPP backend with C++ registry");
119
+
120
+ rac_result_t result = rac_backend_llamacpp_register();
121
+ // RAC_SUCCESS (0) or RAC_ERROR_MODULE_ALREADY_REGISTERED (-4) are both OK
122
+ if (result == RAC_SUCCESS || result == -4) {
123
+ RAC_LOG_INFO(LOG_CATEGORY, "LlamaCPP backend registered successfully");
124
+ isRegistered_ = true;
125
+ return true;
126
+ } else {
127
+ RAC_LOG_ERROR(LOG_CATEGORY, "LlamaCPP registration failed with code: %d", result);
128
+ setLastError("LlamaCPP registration failed with error: " + std::to_string(result));
129
+ throw std::runtime_error("LlamaCPP registration failed with error: " + std::to_string(result));
130
+ }
131
+ });
132
+ }
133
+
134
+ std::shared_ptr<Promise<bool>> HybridRunAnywhereLlama::unregisterBackend() {
135
+ return Promise<bool>::async([this]() {
136
+ RAC_LOG_DEBUG(LOG_CATEGORY, "Unregistering LlamaCPP backend");
137
+
138
+ rac_result_t result = rac_backend_llamacpp_unregister();
139
+ isRegistered_ = false;
140
+ if (result != RAC_SUCCESS) {
141
+ RAC_LOG_ERROR(LOG_CATEGORY, "LlamaCPP unregistration failed with code: %d", result);
142
+ throw std::runtime_error("LlamaCPP unregistration failed with error: " + std::to_string(result));
143
+ }
144
+ return true;
145
+ });
146
+ }
147
+
148
+ std::shared_ptr<Promise<bool>> HybridRunAnywhereLlama::isBackendRegistered() {
149
+ return Promise<bool>::async([this]() {
150
+ return isRegistered_;
151
+ });
152
+ }
153
+
154
+ // ============================================================================
155
+ // Model Loading
156
+ // ============================================================================
157
+
158
+ std::shared_ptr<Promise<bool>> HybridRunAnywhereLlama::loadModel(
159
+ const std::string& path,
160
+ const std::optional<std::string>& modelId,
161
+ const std::optional<std::string>& modelName,
162
+ const std::optional<std::string>& configJson) {
163
+ return Promise<bool>::async([this, path, modelId, modelName, configJson]() {
164
+ std::lock_guard<std::mutex> lock(modelMutex_);
165
+
166
+ RAC_LOG_INFO(LOG_CATEGORY, "Loading Llama model: %s", path.c_str());
167
+
168
+ std::string id = modelId.value_or("");
169
+ std::string name = modelName.value_or("");
170
+
171
+ // Call with correct 4-arg signature (path, modelId, modelName)
172
+ // LLMBridge::loadModel will throw on error
173
+ auto result = LLMBridge::shared().loadModel(path, id, name);
174
+ if (result != 0) {
175
+ std::string error = "Failed to load Llama model: " + path + " (error: " + std::to_string(result) + ")";
176
+ setLastError(error);
177
+ throw std::runtime_error(error);
178
+ }
179
+ return true;
180
+ });
181
+ }
182
+
183
+ std::shared_ptr<Promise<bool>> HybridRunAnywhereLlama::isModelLoaded() {
184
+ return Promise<bool>::async([]() {
185
+ return LLMBridge::shared().isLoaded();
186
+ });
187
+ }
188
+
189
+ std::shared_ptr<Promise<bool>> HybridRunAnywhereLlama::unloadModel() {
190
+ return Promise<bool>::async([this]() {
191
+ std::lock_guard<std::mutex> lock(modelMutex_);
192
+ auto result = LLMBridge::shared().unload();
193
+ return result == 0;
194
+ });
195
+ }
196
+
197
+ std::shared_ptr<Promise<std::string>> HybridRunAnywhereLlama::getModelInfo() {
198
+ return Promise<std::string>::async([]() {
199
+ if (!LLMBridge::shared().isLoaded()) {
200
+ return std::string("{}");
201
+ }
202
+ return buildJsonObject({
203
+ {"loaded", "true"},
204
+ {"backend", jsonString("llamacpp")}
205
+ });
206
+ });
207
+ }
208
+
209
+ // ============================================================================
210
+ // Text Generation
211
+ // ============================================================================
212
+
213
+ std::shared_ptr<Promise<std::string>> HybridRunAnywhereLlama::generate(
214
+ const std::string& prompt,
215
+ const std::optional<std::string>& optionsJson) {
216
+ return Promise<std::string>::async([this, prompt, optionsJson]() {
217
+ if (!LLMBridge::shared().isLoaded()) {
218
+ setLastError("Model not loaded");
219
+ throw std::runtime_error("LLMBridge: Model not loaded. Call loadModel() first.");
220
+ }
221
+
222
+ LLMOptions options;
223
+ if (optionsJson.has_value()) {
224
+ options.maxTokens = extractIntValue(*optionsJson, "max_tokens", 512);
225
+ options.temperature = extractFloatValue(*optionsJson, "temperature", 0.7f);
226
+ options.topP = extractFloatValue(*optionsJson, "top_p", 0.9f);
227
+ options.topK = extractIntValue(*optionsJson, "top_k", 40);
228
+ }
229
+
230
+ RAC_LOG_DEBUG(LOG_CATEGORY, "Generating with prompt: %.50s...", prompt.c_str());
231
+
232
+ auto startTime = std::chrono::high_resolution_clock::now();
233
+ // LLMBridge::generate will throw on error
234
+ auto result = LLMBridge::shared().generate(prompt, options);
235
+ auto endTime = std::chrono::high_resolution_clock::now();
236
+ auto durationMs = std::chrono::duration_cast<std::chrono::milliseconds>(
237
+ endTime - startTime).count();
238
+
239
+ return buildJsonObject({
240
+ {"text", jsonString(result.text)},
241
+ {"tokensUsed", std::to_string(result.tokenCount)},
242
+ {"latencyMs", std::to_string(durationMs)},
243
+ {"cancelled", result.cancelled ? "true" : "false"}
244
+ });
245
+ });
246
+ }
247
+
248
+ std::shared_ptr<Promise<std::string>> HybridRunAnywhereLlama::generateStream(
249
+ const std::string& prompt,
250
+ const std::string& optionsJson,
251
+ const std::function<void(const std::string&, bool)>& callback) {
252
+ return Promise<std::string>::async([this, prompt, optionsJson, callback]() {
253
+ if (!LLMBridge::shared().isLoaded()) {
254
+ setLastError("Model not loaded");
255
+ throw std::runtime_error("LLMBridge: Model not loaded. Call loadModel() first.");
256
+ }
257
+
258
+ LLMOptions options;
259
+ options.maxTokens = extractIntValue(optionsJson, "max_tokens", 512);
260
+ options.temperature = extractFloatValue(optionsJson, "temperature", 0.7f);
261
+
262
+ std::string fullResponse;
263
+ std::string streamError;
264
+
265
+ LLMStreamCallbacks streamCallbacks;
266
+ streamCallbacks.onToken = [&callback, &fullResponse](const std::string& token) -> bool {
267
+ fullResponse += token;
268
+ if (callback) {
269
+ callback(token, false);
270
+ }
271
+ return true;
272
+ };
273
+ streamCallbacks.onComplete = [&callback](const std::string&, int, double) {
274
+ if (callback) {
275
+ callback("", true);
276
+ }
277
+ };
278
+ streamCallbacks.onError = [this, &streamError](int code, const std::string& message) {
279
+ setLastError(message);
280
+ streamError = message;
281
+ };
282
+
283
+ LLMBridge::shared().generateStream(prompt, options, streamCallbacks);
284
+
285
+ if (!streamError.empty()) {
286
+ throw std::runtime_error("LLMBridge: Stream generation failed: " + streamError);
287
+ }
288
+
289
+ return fullResponse;
290
+ });
291
+ }
292
+
293
+ std::shared_ptr<Promise<bool>> HybridRunAnywhereLlama::cancelGeneration() {
294
+ return Promise<bool>::async([]() {
295
+ LLMBridge::shared().cancel();
296
+ return true;
297
+ });
298
+ }
299
+
300
+ // ============================================================================
301
+ // Structured Output
302
+ // ============================================================================
303
+
304
+ std::shared_ptr<Promise<std::string>> HybridRunAnywhereLlama::generateStructured(
305
+ const std::string& prompt,
306
+ const std::string& schema,
307
+ const std::optional<std::string>& optionsJson) {
308
+ return Promise<std::string>::async([this, prompt, schema, optionsJson]() {
309
+ auto result = StructuredOutputBridge::shared().generate(
310
+ prompt, schema, optionsJson.value_or("")
311
+ );
312
+
313
+ if (result.success) {
314
+ return result.json;
315
+ } else {
316
+ setLastError(result.error);
317
+ return buildJsonObject({{"error", jsonString(result.error)}});
318
+ }
319
+ });
320
+ }
321
+
322
+ // ============================================================================
323
+ // Utilities
324
+ // ============================================================================
325
+
326
+ std::shared_ptr<Promise<std::string>> HybridRunAnywhereLlama::getLastError() {
327
+ return Promise<std::string>::async([this]() { return lastError_; });
328
+ }
329
+
330
+ std::shared_ptr<Promise<double>> HybridRunAnywhereLlama::getMemoryUsage() {
331
+ return Promise<double>::async([]() {
332
+ // TODO: Get memory usage from LlamaCPP
333
+ return 0.0;
334
+ });
335
+ }
336
+
337
+ // ============================================================================
338
+ // Helper Methods
339
+ // ============================================================================
340
+
341
+ void HybridRunAnywhereLlama::setLastError(const std::string& error) {
342
+ lastError_ = error;
343
+ RAC_LOG_ERROR(LOG_CATEGORY, "Error: %s", error.c_str());
344
+ }
345
+
346
+ } // namespace margelo::nitro::runanywhere::llama
@@ -0,0 +1,107 @@
1
+ /**
2
+ * HybridRunAnywhereLlama.hpp
3
+ *
4
+ * Nitrogen HybridObject implementation for RunAnywhere Llama backend.
5
+ * This single C++ file works on both iOS and Android.
6
+ *
7
+ * Llama-specific implementation:
8
+ * - Backend Registration
9
+ * - Model Loading/Unloading
10
+ * - Text Generation (streaming and non-streaming)
11
+ * - Structured Output (JSON schema generation)
12
+ *
13
+ * Matches Swift SDK: LlamaCPPRuntime/LlamaCPP.swift
14
+ *
15
+ * The HybridRunAnywhereLlamaSpec base class is auto-generated by Nitrogen
16
+ * from src/specs/RunAnywhereLlama.nitro.ts
17
+ */
18
+
19
+ #pragma once
20
+
21
+ // Include the generated spec header (created by nitrogen)
22
+ #if __has_include(<NitroModules/HybridObject.hpp>)
23
+ #include "HybridRunAnywhereLlamaSpec.hpp"
24
+ #else
25
+ // Fallback include path during development
26
+ #include "../nitrogen/generated/shared/c++/HybridRunAnywhereLlamaSpec.hpp"
27
+ #endif
28
+
29
+ #include <mutex>
30
+ #include <string>
31
+
32
+ namespace margelo::nitro::runanywhere::llama {
33
+
34
+ /**
35
+ * HybridRunAnywhereLlama - Llama backend native implementation
36
+ *
37
+ * Implements the RunAnywhereLlama interface defined in RunAnywhereLlama.nitro.ts
38
+ * Delegates to LLMBridge and StructuredOutputBridge for actual inference.
39
+ */
40
+ class HybridRunAnywhereLlama : public HybridRunAnywhereLlamaSpec {
41
+ public:
42
+ HybridRunAnywhereLlama();
43
+ ~HybridRunAnywhereLlama();
44
+
45
+ // ============================================================================
46
+ // Backend Registration
47
+ // ============================================================================
48
+
49
+ std::shared_ptr<Promise<bool>> registerBackend() override;
50
+ std::shared_ptr<Promise<bool>> unregisterBackend() override;
51
+ std::shared_ptr<Promise<bool>> isBackendRegistered() override;
52
+
53
+ // ============================================================================
54
+ // Model Loading
55
+ // ============================================================================
56
+
57
+ std::shared_ptr<Promise<bool>> loadModel(
58
+ const std::string& path,
59
+ const std::optional<std::string>& modelId,
60
+ const std::optional<std::string>& modelName,
61
+ const std::optional<std::string>& configJson) override;
62
+ std::shared_ptr<Promise<bool>> isModelLoaded() override;
63
+ std::shared_ptr<Promise<bool>> unloadModel() override;
64
+ std::shared_ptr<Promise<std::string>> getModelInfo() override;
65
+
66
+ // ============================================================================
67
+ // Text Generation
68
+ // ============================================================================
69
+
70
+ std::shared_ptr<Promise<std::string>> generate(
71
+ const std::string& prompt,
72
+ const std::optional<std::string>& optionsJson) override;
73
+ std::shared_ptr<Promise<std::string>> generateStream(
74
+ const std::string& prompt,
75
+ const std::string& optionsJson,
76
+ const std::function<void(const std::string&, bool)>& callback) override;
77
+ std::shared_ptr<Promise<bool>> cancelGeneration() override;
78
+
79
+ // ============================================================================
80
+ // Structured Output
81
+ // ============================================================================
82
+
83
+ std::shared_ptr<Promise<std::string>> generateStructured(
84
+ const std::string& prompt,
85
+ const std::string& schema,
86
+ const std::optional<std::string>& optionsJson) override;
87
+
88
+ // ============================================================================
89
+ // Utilities
90
+ // ============================================================================
91
+
92
+ std::shared_ptr<Promise<std::string>> getLastError() override;
93
+ std::shared_ptr<Promise<double>> getMemoryUsage() override;
94
+
95
+ private:
96
+ // Thread safety
97
+ std::mutex modelMutex_;
98
+
99
+ // State tracking
100
+ std::string lastError_;
101
+ bool isRegistered_ = false;
102
+
103
+ // Helper methods
104
+ void setLastError(const std::string& error);
105
+ };
106
+
107
+ } // namespace margelo::nitro::runanywhere::llama
@@ -0,0 +1,209 @@
1
+ /**
2
+ * @file LLMBridge.cpp
3
+ * @brief LLM capability bridge implementation
4
+ *
5
+ * NOTE: RACommons and LlamaCPP backend are REQUIRED and always linked via the build system.
6
+ */
7
+
8
+ #include "LLMBridge.hpp"
9
+ #include <stdexcept>
10
+
11
+ namespace runanywhere {
12
+ namespace bridges {
13
+
14
+ LLMBridge& LLMBridge::shared() {
15
+ static LLMBridge instance;
16
+ return instance;
17
+ }
18
+
19
+ LLMBridge::LLMBridge() = default;
20
+
21
+ LLMBridge::~LLMBridge() {
22
+ destroy();
23
+ }
24
+
25
+ bool LLMBridge::isLoaded() const {
26
+ if (handle_) {
27
+ return rac_llm_component_is_loaded(handle_) == RAC_TRUE;
28
+ }
29
+ return false;
30
+ }
31
+
32
+ std::string LLMBridge::currentModelId() const {
33
+ return loadedModelId_;
34
+ }
35
+
36
+ rac_result_t LLMBridge::loadModel(const std::string& modelPath,
37
+ const std::string& modelId,
38
+ const std::string& modelName) {
39
+ // Create component if needed
40
+ if (!handle_) {
41
+ rac_result_t result = rac_llm_component_create(&handle_);
42
+ if (result != RAC_SUCCESS) {
43
+ throw std::runtime_error("LLMBridge: Failed to create LLM component. Error: " + std::to_string(result));
44
+ }
45
+ }
46
+
47
+ // Use modelPath as modelId if not provided
48
+ std::string effectiveModelId = modelId.empty() ? modelPath : modelId;
49
+ std::string effectiveModelName = modelName.empty() ? effectiveModelId : modelName;
50
+
51
+ // Unload existing model if different
52
+ if (isLoaded() && loadedModelId_ != effectiveModelId) {
53
+ rac_llm_component_unload(handle_);
54
+ }
55
+
56
+ // Load new model with correct 4-arg signature
57
+ // rac_llm_component_load_model(handle, model_path, model_id, model_name)
58
+ rac_result_t result = rac_llm_component_load_model(
59
+ handle_,
60
+ modelPath.c_str(),
61
+ effectiveModelId.c_str(),
62
+ effectiveModelName.c_str()
63
+ );
64
+ if (result == RAC_SUCCESS) {
65
+ loadedModelId_ = effectiveModelId;
66
+ } else {
67
+ throw std::runtime_error("LLMBridge: Failed to load LLM model '" + effectiveModelId + "'. Error: " + std::to_string(result));
68
+ }
69
+ return result;
70
+ }
71
+
72
+ rac_result_t LLMBridge::unload() {
73
+ if (handle_) {
74
+ rac_result_t result = rac_llm_component_unload(handle_);
75
+ if (result == RAC_SUCCESS) {
76
+ loadedModelId_.clear();
77
+ } else {
78
+ throw std::runtime_error("LLMBridge: Failed to unload LLM model. Error: " + std::to_string(result));
79
+ }
80
+ return result;
81
+ }
82
+ loadedModelId_.clear();
83
+ return RAC_SUCCESS;
84
+ }
85
+
86
+ void LLMBridge::cleanup() {
87
+ if (handle_) {
88
+ rac_llm_component_cleanup(handle_);
89
+ }
90
+ loadedModelId_.clear();
91
+ }
92
+
93
+ void LLMBridge::cancel() {
94
+ cancellationRequested_ = true;
95
+ if (handle_) {
96
+ rac_llm_component_cancel(handle_);
97
+ }
98
+ }
99
+
100
+ void LLMBridge::destroy() {
101
+ if (handle_) {
102
+ rac_llm_component_destroy(handle_);
103
+ handle_ = nullptr;
104
+ }
105
+ loadedModelId_.clear();
106
+ }
107
+
108
+ LLMResult LLMBridge::generate(const std::string& prompt, const LLMOptions& options) {
109
+ LLMResult result;
110
+ cancellationRequested_ = false;
111
+
112
+ if (!handle_ || !isLoaded()) {
113
+ throw std::runtime_error("LLMBridge: LLM model not loaded. Call loadModel() first.");
114
+ }
115
+
116
+ rac_llm_options_t racOptions = {};
117
+ racOptions.max_tokens = options.maxTokens;
118
+ racOptions.temperature = static_cast<float>(options.temperature);
119
+ racOptions.top_p = static_cast<float>(options.topP);
120
+ // NOTE: top_k is not available in rac_llm_options_t, only top_p
121
+
122
+ rac_llm_result_t racResult = {};
123
+ rac_result_t status = rac_llm_component_generate(handle_, prompt.c_str(),
124
+ &racOptions, &racResult);
125
+
126
+ if (status == RAC_SUCCESS) {
127
+ if (racResult.text) {
128
+ result.text = racResult.text;
129
+ }
130
+ result.tokenCount = racResult.completion_tokens;
131
+ result.durationMs = static_cast<double>(racResult.total_time_ms);
132
+ } else {
133
+ throw std::runtime_error("LLMBridge: Text generation failed with error code: " + std::to_string(status));
134
+ }
135
+
136
+ result.cancelled = cancellationRequested_;
137
+ return result;
138
+ }
139
+
140
+ void LLMBridge::generateStream(const std::string& prompt, const LLMOptions& options,
141
+ const LLMStreamCallbacks& callbacks) {
142
+ cancellationRequested_ = false;
143
+
144
+ if (!handle_ || !isLoaded()) {
145
+ if (callbacks.onError) {
146
+ callbacks.onError(-4, "LLM model not loaded. Call loadModel() first.");
147
+ }
148
+ return;
149
+ }
150
+
151
+ rac_llm_options_t racOptions = {};
152
+ racOptions.max_tokens = options.maxTokens;
153
+ racOptions.temperature = static_cast<float>(options.temperature);
154
+ racOptions.top_p = static_cast<float>(options.topP);
155
+ // NOTE: top_k is not available in rac_llm_options_t, only top_p
156
+
157
+ // Stream context for callbacks
158
+ struct StreamContext {
159
+ const LLMStreamCallbacks* callbacks;
160
+ bool* cancellationRequested;
161
+ std::string accumulatedText;
162
+ };
163
+
164
+ StreamContext ctx = { &callbacks, &cancellationRequested_, "" };
165
+
166
+ auto tokenCallback = [](const char* token, void* user_data) -> rac_bool_t {
167
+ auto* ctx = static_cast<StreamContext*>(user_data);
168
+ if (*ctx->cancellationRequested) {
169
+ return RAC_FALSE;
170
+ }
171
+ if (ctx->callbacks->onToken && token) {
172
+ ctx->accumulatedText += token;
173
+ return ctx->callbacks->onToken(token) ? RAC_TRUE : RAC_FALSE;
174
+ }
175
+ return RAC_TRUE;
176
+ };
177
+
178
+ auto completeCallback = [](const rac_llm_result_t* result, void* user_data) {
179
+ auto* ctx = static_cast<StreamContext*>(user_data);
180
+ if (ctx->callbacks->onComplete) {
181
+ ctx->callbacks->onComplete(
182
+ ctx->accumulatedText,
183
+ result ? result->completion_tokens : 0,
184
+ result ? static_cast<double>(result->total_time_ms) : 0.0
185
+ );
186
+ }
187
+ };
188
+
189
+ auto errorCallback = [](rac_result_t error_code, const char* error_message,
190
+ void* user_data) {
191
+ auto* ctx = static_cast<StreamContext*>(user_data);
192
+ if (ctx->callbacks->onError) {
193
+ ctx->callbacks->onError(error_code, error_message ? error_message : "Unknown error");
194
+ }
195
+ };
196
+
197
+ rac_llm_component_generate_stream(handle_, prompt.c_str(), &racOptions,
198
+ tokenCallback, completeCallback, errorCallback, &ctx);
199
+ }
200
+
201
+ rac_lifecycle_state_t LLMBridge::getState() const {
202
+ if (handle_) {
203
+ return rac_llm_component_get_state(handle_);
204
+ }
205
+ return RAC_LIFECYCLE_STATE_IDLE;
206
+ }
207
+
208
+ } // namespace bridges
209
+ } // namespace runanywhere