@runanywhere/llamacpp 0.17.7 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -16
- package/android/CMakeLists.txt +6 -2
- package/android/build.gradle +76 -10
- package/android/src/main/include/rac/backends/rac_llm_llamacpp.h +218 -0
- package/android/src/main/include/rac/backends/rac_stt_onnx.h +99 -0
- package/android/src/main/include/rac/backends/rac_stt_whispercpp.h +153 -0
- package/android/src/main/include/rac/backends/rac_tts_onnx.h +71 -0
- package/android/src/main/include/rac/backends/rac_vad_onnx.h +84 -0
- package/android/src/main/include/rac/core/capabilities/rac_lifecycle.h +290 -0
- package/android/src/main/include/rac/core/rac_analytics_events.h +610 -0
- package/android/src/main/include/rac/core/rac_audio_utils.h +88 -0
- package/android/src/main/include/rac/core/rac_component_types.h +160 -0
- package/android/src/main/include/rac/core/rac_core.h +331 -0
- package/android/src/main/include/rac/core/rac_error.h +469 -0
- package/android/src/main/include/rac/core/rac_events.h +334 -0
- package/android/src/main/include/rac/core/rac_logger.h +416 -0
- package/android/src/main/include/rac/core/rac_platform_adapter.h +340 -0
- package/android/src/main/include/rac/core/rac_sdk_state.h +292 -0
- package/android/src/main/include/rac/core/rac_structured_error.h +594 -0
- package/android/src/main/include/rac/core/rac_types.h +264 -0
- package/android/src/main/include/rac/features/llm/rac_llm.h +17 -0
- package/android/src/main/include/rac/features/llm/rac_llm_analytics.h +188 -0
- package/android/src/main/include/rac/features/llm/rac_llm_component.h +228 -0
- package/android/src/main/include/rac/features/llm/rac_llm_events.h +215 -0
- package/android/src/main/include/rac/features/llm/rac_llm_metrics.h +402 -0
- package/android/src/main/include/rac/features/llm/rac_llm_service.h +163 -0
- package/android/src/main/include/rac/features/llm/rac_llm_structured_output.h +141 -0
- package/android/src/main/include/rac/features/llm/rac_llm_types.h +384 -0
- package/android/src/main/include/rac/features/platform/rac_llm_platform.h +204 -0
- package/android/src/main/include/rac/features/platform/rac_tts_platform.h +197 -0
- package/android/src/main/include/rac/features/stt/rac_stt.h +17 -0
- package/android/src/main/include/rac/features/stt/rac_stt_analytics.h +204 -0
- package/android/src/main/include/rac/features/stt/rac_stt_component.h +162 -0
- package/android/src/main/include/rac/features/stt/rac_stt_events.h +62 -0
- package/android/src/main/include/rac/features/stt/rac_stt_service.h +154 -0
- package/android/src/main/include/rac/features/stt/rac_stt_types.h +389 -0
- package/android/src/main/include/rac/features/tts/rac_tts.h +17 -0
- package/android/src/main/include/rac/features/tts/rac_tts_analytics.h +181 -0
- package/android/src/main/include/rac/features/tts/rac_tts_component.h +158 -0
- package/android/src/main/include/rac/features/tts/rac_tts_events.h +54 -0
- package/android/src/main/include/rac/features/tts/rac_tts_service.h +162 -0
- package/android/src/main/include/rac/features/tts/rac_tts_types.h +374 -0
- package/android/src/main/include/rac/features/vad/rac_vad.h +17 -0
- package/android/src/main/include/rac/features/vad/rac_vad_analytics.h +236 -0
- package/android/src/main/include/rac/features/vad/rac_vad_component.h +185 -0
- package/android/src/main/include/rac/features/vad/rac_vad_energy.h +443 -0
- package/android/src/main/include/rac/features/vad/rac_vad_events.h +76 -0
- package/android/src/main/include/rac/features/vad/rac_vad_service.h +167 -0
- package/android/src/main/include/rac/features/vad/rac_vad_types.h +244 -0
- package/android/src/main/include/rac/features/voice_agent/rac_voice_agent.h +612 -0
- package/android/src/main/include/rac/infrastructure/device/rac_device_manager.h +176 -0
- package/android/src/main/include/rac/infrastructure/download/rac_download.h +418 -0
- package/android/src/main/include/rac/infrastructure/events/rac_events.h +177 -0
- package/android/src/main/include/rac/infrastructure/model_management/rac_model_assignment.h +169 -0
- package/android/src/main/include/rac/infrastructure/model_management/rac_model_paths.h +258 -0
- package/android/src/main/include/rac/infrastructure/model_management/rac_model_registry.h +357 -0
- package/android/src/main/include/rac/infrastructure/model_management/rac_model_strategy.h +374 -0
- package/android/src/main/include/rac/infrastructure/model_management/rac_model_types.h +613 -0
- package/android/src/main/include/rac/infrastructure/network/rac_api_types.h +335 -0
- package/android/src/main/include/rac/infrastructure/network/rac_auth_manager.h +252 -0
- package/android/src/main/include/rac/infrastructure/network/rac_dev_config.h +85 -0
- package/android/src/main/include/rac/infrastructure/network/rac_endpoints.h +102 -0
- package/android/src/main/include/rac/infrastructure/network/rac_environment.h +220 -0
- package/android/src/main/include/rac/infrastructure/network/rac_http_client.h +233 -0
- package/android/src/main/include/rac/infrastructure/storage/rac_storage_analyzer.h +286 -0
- package/android/src/main/include/rac/infrastructure/telemetry/rac_telemetry_manager.h +206 -0
- package/android/src/main/include/rac/infrastructure/telemetry/rac_telemetry_types.h +234 -0
- package/android/src/main/jniLibs/arm64-v8a/libomp.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librac_backend_llamacpp.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librac_backend_llamacpp_jni.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librac_commons.so +0 -0
- package/android/src/main/jniLibs/x86_64/libomp.so +0 -0
- package/android/src/main/jniLibs/x86_64/librac_backend_llamacpp.so +0 -0
- package/android/src/main/jniLibs/x86_64/librac_backend_llamacpp_jni.so +0 -0
- package/android/src/main/jniLibs/x86_64/librac_commons.so +0 -0
- package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64/RABackendLLAMACPP.framework/RABackendLLAMACPP +0 -0
- package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64_x86_64-simulator/RABackendLLAMACPP.framework/RABackendLLAMACPP +0 -0
- package/package.json +7 -4
package/README.md
CHANGED
|
@@ -10,6 +10,8 @@ LlamaCPP backend for the RunAnywhere React Native SDK. Provides on-device LLM te
|
|
|
10
10
|
|
|
11
11
|
- **Text Generation** — Generate text responses from prompts
|
|
12
12
|
- **Streaming** — Real-time token-by-token output
|
|
13
|
+
- **Tool Calling** — Let models invoke registered tools during generation
|
|
14
|
+
- **Structured Output** — Generate type-safe JSON responses
|
|
13
15
|
- **GGUF Support** — Run any GGUF-format model (Llama, Mistral, Qwen, SmolLM, etc.)
|
|
14
16
|
- **Metal GPU Acceleration** — 3-5x faster inference on Apple Silicon (iOS)
|
|
15
17
|
- **CPU Inference** — Works on all devices without GPU requirements
|
|
@@ -20,7 +22,7 @@ LlamaCPP backend for the RunAnywhere React Native SDK. Provides on-device LLM te
|
|
|
20
22
|
## Requirements
|
|
21
23
|
|
|
22
24
|
- `@runanywhere/core` (peer dependency)
|
|
23
|
-
- React Native 0.
|
|
25
|
+
- React Native 0.74+
|
|
24
26
|
- iOS 15.1+ / Android API 24+
|
|
25
27
|
|
|
26
28
|
---
|
|
@@ -246,6 +248,51 @@ const result = await streamResult.result;
|
|
|
246
248
|
console.log('\nSpeed:', result.performanceMetrics.tokensPerSecond, 'tok/s');
|
|
247
249
|
```
|
|
248
250
|
|
|
251
|
+
#### Tool Calling
|
|
252
|
+
|
|
253
|
+
Register tools and let the LLM call them during generation. Tool calling parsing and prompt formatting is handled entirely in C++ for consistency across platforms.
|
|
254
|
+
|
|
255
|
+
```typescript
|
|
256
|
+
import { RunAnywhere } from '@runanywhere/core';
|
|
257
|
+
import { LlamaCPP } from '@runanywhere/llamacpp';
|
|
258
|
+
|
|
259
|
+
// Register a tool
|
|
260
|
+
RunAnywhere.registerTool(
|
|
261
|
+
{
|
|
262
|
+
name: 'calculate',
|
|
263
|
+
description: 'Perform a math calculation',
|
|
264
|
+
parameters: [
|
|
265
|
+
{ name: 'expression', type: 'string', description: 'Math expression', required: true },
|
|
266
|
+
],
|
|
267
|
+
},
|
|
268
|
+
async (args) => {
|
|
269
|
+
const result = eval(args.expression as string); // simplified example
|
|
270
|
+
return { result };
|
|
271
|
+
}
|
|
272
|
+
);
|
|
273
|
+
|
|
274
|
+
// Generate with tools
|
|
275
|
+
const result = await RunAnywhere.generateWithTools(
|
|
276
|
+
'What is 42 * 17?',
|
|
277
|
+
{
|
|
278
|
+
autoExecute: true,
|
|
279
|
+
maxToolCalls: 3,
|
|
280
|
+
temperature: 0.7,
|
|
281
|
+
format: 'default', // 'default' for most models, 'lfm2' for Liquid AI models
|
|
282
|
+
}
|
|
283
|
+
);
|
|
284
|
+
console.log(result.text); // "42 * 17 = 714"
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
**Supported tool calling formats:**
|
|
288
|
+
|
|
289
|
+
| Format | Tag Pattern | Models |
|
|
290
|
+
|--------|-------------|--------|
|
|
291
|
+
| `default` | `<tool_call>{"tool":"name","arguments":{}}</tool_call>` | Llama, Qwen, Mistral, SmolLM, most GGUF models |
|
|
292
|
+
| `lfm2` | `<\|tool_call_start\|>[func(arg="val")]<\|tool_call_end\|>` | Liquid AI LFM2-Tool models |
|
|
293
|
+
|
|
294
|
+
---
|
|
295
|
+
|
|
249
296
|
#### Model Management
|
|
250
297
|
|
|
251
298
|
```typescript
|
|
@@ -270,27 +317,38 @@ Any GGUF-format model works with this backend. Recommended models:
|
|
|
270
317
|
|
|
271
318
|
### Small Models (< 1GB RAM)
|
|
272
319
|
|
|
273
|
-
| Model | Size | Memory | Description |
|
|
274
|
-
|
|
275
|
-
| SmolLM2 360M Q8_0 | ~400MB | 500MB | Fast, lightweight |
|
|
276
|
-
| Qwen 2.5 0.5B Q6_K | ~500MB | 600MB | Multilingual |
|
|
277
|
-
| LFM2 350M Q4_K_M | ~200MB | 250MB | Ultra-compact |
|
|
320
|
+
| Model | Size | Memory | Tool Calling | Description |
|
|
321
|
+
|-------|------|--------|:------------:|-------------|
|
|
322
|
+
| SmolLM2 360M Q8_0 | ~400MB | 500MB | - | Fast, lightweight |
|
|
323
|
+
| Qwen 2.5 0.5B Q6_K | ~500MB | 600MB | Yes | Multilingual |
|
|
324
|
+
| LFM2 350M Q4_K_M | ~200MB | 250MB | Yes (lfm2) | Ultra-compact, Liquid AI |
|
|
278
325
|
|
|
279
326
|
### Medium Models (1-3GB RAM)
|
|
280
327
|
|
|
281
|
-
| Model | Size | Memory | Description |
|
|
282
|
-
|
|
283
|
-
| Phi-3 Mini Q4_K_M | ~2GB | 2.5GB | Microsoft |
|
|
284
|
-
| Gemma 2B Q4_K_M | ~1.5GB | 2GB | Google |
|
|
285
|
-
|
|
|
328
|
+
| Model | Size | Memory | Tool Calling | Description |
|
|
329
|
+
|-------|------|--------|:------------:|-------------|
|
|
330
|
+
| Phi-3 Mini Q4_K_M | ~2GB | 2.5GB | - | Microsoft |
|
|
331
|
+
| Gemma 2B Q4_K_M | ~1.5GB | 2GB | - | Google |
|
|
332
|
+
| LFM2 1.2B Q4_K_M | ~800MB | 1GB | Yes (lfm2) | Liquid AI tool-calling |
|
|
333
|
+
| Qwen 2.5 1.5B Instruct Q4_K_M | ~1GB | 1.5GB | Yes | Alibaba, multilingual |
|
|
334
|
+
| TinyLlama 1.1B Q4_K_M | ~700MB | 1GB | - | Fast chat |
|
|
286
335
|
|
|
287
336
|
### Large Models (4GB+ RAM)
|
|
288
337
|
|
|
289
|
-
| Model | Size | Memory | Description |
|
|
290
|
-
|
|
291
|
-
| Llama 2
|
|
292
|
-
| Mistral 7B Q4_K_M | ~4GB | 5GB | Mistral AI |
|
|
293
|
-
|
|
|
338
|
+
| Model | Size | Memory | Tool Calling | Description |
|
|
339
|
+
|-------|------|--------|:------------:|-------------|
|
|
340
|
+
| Llama 3.2 3B Instruct Q4_K_M | ~2GB | 3GB | Yes | Meta latest |
|
|
341
|
+
| Mistral 7B Instruct Q4_K_M | ~4GB | 5GB | Yes | Mistral AI |
|
|
342
|
+
| Qwen 2.5 7B Instruct Q4_K_M | ~4GB | 5GB | Yes | Alibaba |
|
|
343
|
+
| Llama 2 7B Chat Q4_K_M | ~4GB | 5GB | - | Meta |
|
|
344
|
+
|
|
345
|
+
### Tool Calling Model Selection Guide
|
|
346
|
+
|
|
347
|
+
- **Best for tool calling (small):** LFM2-350M-Tool (use `format: 'lfm2'`) or Qwen 2.5 0.5B
|
|
348
|
+
- **Best for tool calling (medium):** LFM2-1.2B-Tool or Qwen 2.5 1.5B Instruct
|
|
349
|
+
- **Best for tool calling (large):** Mistral 7B Instruct or Qwen 2.5 7B Instruct
|
|
350
|
+
- **Instruct-tuned models** generally perform better at following tool calling instructions
|
|
351
|
+
- Use `format: 'lfm2'` only with Liquid AI LFM2-Tool models; all others use `format: 'default'`
|
|
294
352
|
|
|
295
353
|
---
|
|
296
354
|
|
package/android/CMakeLists.txt
CHANGED
|
@@ -19,8 +19,12 @@ set(JNILIB_DIR ${CMAKE_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI})
|
|
|
19
19
|
# Downloaded via Gradle downloadNativeLibs task
|
|
20
20
|
# =============================================================================
|
|
21
21
|
if(NOT EXISTS "${JNILIB_DIR}/librac_backend_llamacpp.so")
|
|
22
|
-
message(
|
|
23
|
-
|
|
22
|
+
message(WARNING "[RunAnywhereLlama] RABackendLlamaCPP not found for ${ANDROID_ABI} at ${JNILIB_DIR}/librac_backend_llamacpp.so\n"
|
|
23
|
+
"This ABI will not be functional. To fix, run: ./gradlew :runanywhere_llamacpp:downloadNativeLibs\n"
|
|
24
|
+
"Or set reactNativeArchitectures=arm64-v8a in gradle.properties to skip this ABI.")
|
|
25
|
+
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/stub.cpp" "// Stub for missing ABI ${ANDROID_ABI}")
|
|
26
|
+
add_library(${PACKAGE_NAME} SHARED "${CMAKE_CURRENT_BINARY_DIR}/stub.cpp")
|
|
27
|
+
return()
|
|
24
28
|
endif()
|
|
25
29
|
|
|
26
30
|
add_library(rac_backend_llamacpp SHARED IMPORTED)
|
package/android/build.gradle
CHANGED
|
@@ -38,9 +38,13 @@ def getExtOrDefault(name) {
|
|
|
38
38
|
return rootProject.ext.has(name) ? rootProject.ext.get(name) : project.properties['RunAnywhereLlama_' + name]
|
|
39
39
|
}
|
|
40
40
|
|
|
41
|
-
//
|
|
41
|
+
// Supported ABIs - arm64-v8a for physical devices, x86_64 for emulators
|
|
42
|
+
// Can be overridden via gradle.properties: reactNativeArchitectures=arm64-v8a
|
|
42
43
|
def reactNativeArchitectures() {
|
|
43
|
-
|
|
44
|
+
def value = rootProject.hasProperty("reactNativeArchitectures")
|
|
45
|
+
? rootProject.property("reactNativeArchitectures")
|
|
46
|
+
: null
|
|
47
|
+
return value ? value.split(",").collect { it.trim() } : ["arm64-v8a", "x86_64"]
|
|
44
48
|
}
|
|
45
49
|
|
|
46
50
|
apply plugin: 'com.android.library'
|
|
@@ -109,7 +113,7 @@ android {
|
|
|
109
113
|
targetSdkVersion getExtOrIntegerDefault('targetSdkVersion')
|
|
110
114
|
|
|
111
115
|
ndk {
|
|
112
|
-
abiFilters
|
|
116
|
+
abiFilters(*reactNativeArchitectures())
|
|
113
117
|
}
|
|
114
118
|
|
|
115
119
|
externalNativeBuild {
|
|
@@ -118,7 +122,7 @@ android {
|
|
|
118
122
|
arguments "-DANDROID_STL=c++_shared",
|
|
119
123
|
// Fix NitroModules prefab path - use app's build directory
|
|
120
124
|
"-DREACT_NATIVE_NITRO_BUILD_DIR=${rootProject.buildDir}"
|
|
121
|
-
abiFilters
|
|
125
|
+
abiFilters(*reactNativeArchitectures())
|
|
122
126
|
}
|
|
123
127
|
}
|
|
124
128
|
}
|
|
@@ -132,7 +136,12 @@ android {
|
|
|
132
136
|
packagingOptions {
|
|
133
137
|
excludes = [
|
|
134
138
|
"META-INF",
|
|
135
|
-
"META-INF/**"
|
|
139
|
+
"META-INF/**",
|
|
140
|
+
// Exclude librac_commons.so from this module's packaging.
|
|
141
|
+
// The core package (@runanywhere/core) is the single authoritative source
|
|
142
|
+
// for librac_commons.so. If this module also packages it, Gradle's native
|
|
143
|
+
// lib merge may pick a stale version, causing UnsatisfiedLinkError crashes.
|
|
144
|
+
"**/librac_commons.so"
|
|
136
145
|
]
|
|
137
146
|
pickFirsts = [
|
|
138
147
|
"**/libc++_shared.so",
|
|
@@ -202,11 +211,64 @@ task downloadNativeLibs {
|
|
|
202
211
|
return
|
|
203
212
|
}
|
|
204
213
|
|
|
205
|
-
// Check if libs are already bundled (npm install case)
|
|
206
|
-
def
|
|
207
|
-
def
|
|
208
|
-
|
|
209
|
-
|
|
214
|
+
// Check if libs are already bundled for ALL requested ABIs (npm install case)
|
|
215
|
+
def requestedAbis = reactNativeArchitectures()
|
|
216
|
+
def allAbisBundled = requestedAbis.every { abi ->
|
|
217
|
+
def abiDir = file("${jniLibsDir}/${abi}")
|
|
218
|
+
def libs = abiDir.exists() ? abiDir.listFiles()?.findAll { it.name.endsWith(".so") } : []
|
|
219
|
+
return libs?.size() > 0
|
|
220
|
+
}
|
|
221
|
+
if (allAbisBundled) {
|
|
222
|
+
logger.lifecycle("[RunAnywhereLlama] ✅ Using bundled native libraries from npm package for ABIs: ${requestedAbis.join(', ')}")
|
|
223
|
+
return
|
|
224
|
+
}
|
|
225
|
+
// Check if at least arm64-v8a is bundled (partial bundle - need to download missing ABIs)
|
|
226
|
+
def arm64Dir = file("${jniLibsDir}/arm64-v8a")
|
|
227
|
+
def arm64Bundled = arm64Dir.exists() && arm64Dir.listFiles()?.any { it.name.endsWith(".so") }
|
|
228
|
+
if (arm64Bundled) {
|
|
229
|
+
def missingAbis = requestedAbis.findAll { abi ->
|
|
230
|
+
def abiDir = file("${jniLibsDir}/${abi}")
|
|
231
|
+
def libs = abiDir.exists() ? abiDir.listFiles()?.findAll { it.name.endsWith(".so") } : []
|
|
232
|
+
return !libs || libs.size() == 0
|
|
233
|
+
}
|
|
234
|
+
if (missingAbis.size() > 0) {
|
|
235
|
+
logger.lifecycle("[RunAnywhereLlama] ⚠️ Bundled libs found for arm64-v8a but missing for: ${missingAbis.join(', ')}")
|
|
236
|
+
logger.lifecycle("[RunAnywhereLlama] Attempting to download missing ABIs from GitHub releases...")
|
|
237
|
+
try {
|
|
238
|
+
def llamacppUrl = "https://github.com/${githubOrg}/${coreRepo}/releases/download/core-v${coreVersion}/RABackendLlamaCPP-android-v${coreVersion}.zip"
|
|
239
|
+
def tempZip = file("${downloadedLibsDir}/RABackendLlamaCPP-supplement.zip")
|
|
240
|
+
downloadedLibsDir.mkdirs()
|
|
241
|
+
new URL(llamacppUrl).withInputStream { input ->
|
|
242
|
+
tempZip.withOutputStream { output -> output << input }
|
|
243
|
+
}
|
|
244
|
+
copy {
|
|
245
|
+
from zipTree(tempZip)
|
|
246
|
+
into jniLibsDir
|
|
247
|
+
exclude "**/libc++_shared.so"
|
|
248
|
+
// Exclude librac_commons.so - the core package (@runanywhere/core) is the
|
|
249
|
+
// authoritative source. Including it here risks a stale version winning the
|
|
250
|
+
// Gradle native lib merge, causing UnsatisfiedLinkError crashes at runtime.
|
|
251
|
+
exclude "**/librac_commons.so"
|
|
252
|
+
eachFile { fileCopyDetails ->
|
|
253
|
+
def pathString = fileCopyDetails.relativePath.pathString
|
|
254
|
+
def match = pathString =~ /.*\/(arm64-v8a|armeabi-v7a|x86|x86_64)\/(.+\.so)$/
|
|
255
|
+
if (match) {
|
|
256
|
+
def abi = match[0][1]
|
|
257
|
+
def filename = match[0][2]
|
|
258
|
+
fileCopyDetails.relativePath = new RelativePath(true, abi, filename)
|
|
259
|
+
} else if (!pathString.endsWith(".so")) {
|
|
260
|
+
fileCopyDetails.exclude()
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
includeEmptyDirs = false
|
|
264
|
+
}
|
|
265
|
+
tempZip.delete()
|
|
266
|
+
logger.lifecycle("[RunAnywhereLlama] ✅ Downloaded missing ABIs successfully")
|
|
267
|
+
} catch (Exception e) {
|
|
268
|
+
logger.warn("[RunAnywhereLlama] ⚠️ Could not download missing ABIs: ${e.message}")
|
|
269
|
+
logger.warn("[RunAnywhereLlama] Building with available ABIs only (arm64-v8a)")
|
|
270
|
+
}
|
|
271
|
+
}
|
|
210
272
|
return
|
|
211
273
|
}
|
|
212
274
|
|
|
@@ -249,6 +311,10 @@ task downloadNativeLibs {
|
|
|
249
311
|
// IMPORTANT: Exclude libc++_shared.so - React Native provides its own
|
|
250
312
|
// Using a different version causes ABI compatibility issues
|
|
251
313
|
exclude "**/libc++_shared.so"
|
|
314
|
+
// Exclude librac_commons.so - the core package (@runanywhere/core) is the
|
|
315
|
+
// authoritative source. Including it here risks a stale version winning the
|
|
316
|
+
// Gradle native lib merge, causing UnsatisfiedLinkError crashes at runtime.
|
|
317
|
+
exclude "**/librac_commons.so"
|
|
252
318
|
eachFile { fileCopyDetails ->
|
|
253
319
|
def pathString = fileCopyDetails.relativePath.pathString
|
|
254
320
|
// Handle RABackendLlamaCPP-android-vX.Y.Z/llamacpp/ABI/*.so structure
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file rac_llm_llamacpp.h
|
|
3
|
+
* @brief RunAnywhere Core - LlamaCPP Backend RAC API
|
|
4
|
+
*
|
|
5
|
+
* Direct RAC API export from runanywhere-core's LlamaCPP backend.
|
|
6
|
+
* This header defines the public C API for LLM inference using llama.cpp.
|
|
7
|
+
*
|
|
8
|
+
* Mirrors Swift's LlamaCPPService implementation pattern.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
#ifndef RAC_LLM_LLAMACPP_H
|
|
12
|
+
#define RAC_LLM_LLAMACPP_H
|
|
13
|
+
|
|
14
|
+
#include "rac/core/rac_error.h"
|
|
15
|
+
#include "rac/core/rac_types.h"
|
|
16
|
+
#include "rac/features/llm/rac_llm.h"
|
|
17
|
+
|
|
18
|
+
#ifdef __cplusplus
|
|
19
|
+
extern "C" {
|
|
20
|
+
#endif
|
|
21
|
+
|
|
22
|
+
// =============================================================================
|
|
23
|
+
// EXPORT MACRO
|
|
24
|
+
// =============================================================================
|
|
25
|
+
|
|
26
|
+
#if defined(RAC_LLAMACPP_BUILDING)
|
|
27
|
+
#if defined(_WIN32)
|
|
28
|
+
#define RAC_LLAMACPP_API __declspec(dllexport)
|
|
29
|
+
#elif defined(__GNUC__) || defined(__clang__)
|
|
30
|
+
#define RAC_LLAMACPP_API __attribute__((visibility("default")))
|
|
31
|
+
#else
|
|
32
|
+
#define RAC_LLAMACPP_API
|
|
33
|
+
#endif
|
|
34
|
+
#else
|
|
35
|
+
#define RAC_LLAMACPP_API
|
|
36
|
+
#endif
|
|
37
|
+
|
|
38
|
+
// =============================================================================
|
|
39
|
+
// CONFIGURATION - Mirrors Swift's LlamaCPPGenerationConfig
|
|
40
|
+
// =============================================================================
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* LlamaCPP-specific configuration.
|
|
44
|
+
*
|
|
45
|
+
* Mirrors Swift's LlamaCPPGenerationConfig.
|
|
46
|
+
*/
|
|
47
|
+
typedef struct rac_llm_llamacpp_config {
|
|
48
|
+
/** Context size (0 = auto-detect from model) */
|
|
49
|
+
int32_t context_size;
|
|
50
|
+
|
|
51
|
+
/** Number of threads (0 = auto-detect) */
|
|
52
|
+
int32_t num_threads;
|
|
53
|
+
|
|
54
|
+
/** Number of layers to offload to GPU (Metal on iOS/macOS) */
|
|
55
|
+
int32_t gpu_layers;
|
|
56
|
+
|
|
57
|
+
/** Batch size for prompt processing */
|
|
58
|
+
int32_t batch_size;
|
|
59
|
+
} rac_llm_llamacpp_config_t;
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Default LlamaCPP configuration.
|
|
63
|
+
*/
|
|
64
|
+
static const rac_llm_llamacpp_config_t RAC_LLM_LLAMACPP_CONFIG_DEFAULT = {
|
|
65
|
+
.context_size = 0, // Auto-detect
|
|
66
|
+
.num_threads = 0, // Auto-detect
|
|
67
|
+
.gpu_layers = -1, // All layers on GPU
|
|
68
|
+
.batch_size = 512};
|
|
69
|
+
|
|
70
|
+
// =============================================================================
|
|
71
|
+
// LLAMACPP-SPECIFIC API
|
|
72
|
+
// =============================================================================
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Creates a LlamaCPP LLM service.
|
|
76
|
+
*
|
|
77
|
+
* Mirrors Swift's LlamaCPPService.initialize(modelPath:)
|
|
78
|
+
*
|
|
79
|
+
* @param model_path Path to the GGUF model file
|
|
80
|
+
* @param config LlamaCPP-specific configuration (can be NULL for defaults)
|
|
81
|
+
* @param out_handle Output: Handle to the created service
|
|
82
|
+
* @return RAC_SUCCESS or error code
|
|
83
|
+
*/
|
|
84
|
+
RAC_LLAMACPP_API rac_result_t rac_llm_llamacpp_create(const char* model_path,
|
|
85
|
+
const rac_llm_llamacpp_config_t* config,
|
|
86
|
+
rac_handle_t* out_handle);
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Loads a GGUF model into an existing service.
|
|
90
|
+
*
|
|
91
|
+
* Mirrors Swift's LlamaCPPService.loadModel(path:config:)
|
|
92
|
+
*
|
|
93
|
+
* @param handle Service handle
|
|
94
|
+
* @param model_path Path to the GGUF model file
|
|
95
|
+
* @param config LlamaCPP configuration (can be NULL)
|
|
96
|
+
* @return RAC_SUCCESS or error code
|
|
97
|
+
*/
|
|
98
|
+
RAC_LLAMACPP_API rac_result_t rac_llm_llamacpp_load_model(rac_handle_t handle,
|
|
99
|
+
const char* model_path,
|
|
100
|
+
const rac_llm_llamacpp_config_t* config);
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Unloads the current model.
|
|
104
|
+
*
|
|
105
|
+
* Mirrors Swift's LlamaCPPService.unloadModel()
|
|
106
|
+
*
|
|
107
|
+
* @param handle Service handle
|
|
108
|
+
* @return RAC_SUCCESS or error code
|
|
109
|
+
*/
|
|
110
|
+
RAC_LLAMACPP_API rac_result_t rac_llm_llamacpp_unload_model(rac_handle_t handle);
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Checks if a model is loaded.
|
|
114
|
+
*
|
|
115
|
+
* Mirrors Swift's LlamaCPPService.isModelLoaded
|
|
116
|
+
*
|
|
117
|
+
* @param handle Service handle
|
|
118
|
+
* @return RAC_TRUE if model is loaded, RAC_FALSE otherwise
|
|
119
|
+
*/
|
|
120
|
+
RAC_LLAMACPP_API rac_bool_t rac_llm_llamacpp_is_model_loaded(rac_handle_t handle);
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Generates text completion.
|
|
124
|
+
*
|
|
125
|
+
* Mirrors Swift's LlamaCPPService.generate(prompt:config:)
|
|
126
|
+
*
|
|
127
|
+
* @param handle Service handle
|
|
128
|
+
* @param prompt Input prompt text
|
|
129
|
+
* @param options Generation options (can be NULL for defaults)
|
|
130
|
+
* @param out_result Output: Generation result (caller must free text with rac_free)
|
|
131
|
+
* @return RAC_SUCCESS or error code
|
|
132
|
+
*/
|
|
133
|
+
RAC_LLAMACPP_API rac_result_t rac_llm_llamacpp_generate(rac_handle_t handle, const char* prompt,
|
|
134
|
+
const rac_llm_options_t* options,
|
|
135
|
+
rac_llm_result_t* out_result);
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Streaming text generation callback.
|
|
139
|
+
*
|
|
140
|
+
* Mirrors Swift's streaming callback pattern.
|
|
141
|
+
*
|
|
142
|
+
* @param token Generated token string
|
|
143
|
+
* @param is_final Whether this is the final token
|
|
144
|
+
* @param user_data User-provided context
|
|
145
|
+
* @return RAC_TRUE to continue, RAC_FALSE to stop
|
|
146
|
+
*/
|
|
147
|
+
typedef rac_bool_t (*rac_llm_llamacpp_stream_callback_fn)(const char* token, rac_bool_t is_final,
|
|
148
|
+
void* user_data);
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Generates text with streaming callback.
|
|
152
|
+
*
|
|
153
|
+
* Mirrors Swift's LlamaCPPService.generateStream(prompt:config:)
|
|
154
|
+
*
|
|
155
|
+
* @param handle Service handle
|
|
156
|
+
* @param prompt Input prompt text
|
|
157
|
+
* @param options Generation options
|
|
158
|
+
* @param callback Callback for each token
|
|
159
|
+
* @param user_data User context passed to callback
|
|
160
|
+
* @return RAC_SUCCESS or error code
|
|
161
|
+
*/
|
|
162
|
+
RAC_LLAMACPP_API rac_result_t rac_llm_llamacpp_generate_stream(
|
|
163
|
+
rac_handle_t handle, const char* prompt, const rac_llm_options_t* options,
|
|
164
|
+
rac_llm_llamacpp_stream_callback_fn callback, void* user_data);
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Cancels ongoing generation.
|
|
168
|
+
*
|
|
169
|
+
* Mirrors Swift's LlamaCPPService.cancel()
|
|
170
|
+
*
|
|
171
|
+
* @param handle Service handle
|
|
172
|
+
*/
|
|
173
|
+
RAC_LLAMACPP_API void rac_llm_llamacpp_cancel(rac_handle_t handle);
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Gets model information as JSON.
|
|
177
|
+
*
|
|
178
|
+
* @param handle Service handle
|
|
179
|
+
* @param out_json Output: JSON string (caller must free with rac_free)
|
|
180
|
+
* @return RAC_SUCCESS or error code
|
|
181
|
+
*/
|
|
182
|
+
RAC_LLAMACPP_API rac_result_t rac_llm_llamacpp_get_model_info(rac_handle_t handle, char** out_json);
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Destroys a LlamaCPP LLM service.
|
|
186
|
+
*
|
|
187
|
+
* @param handle Service handle to destroy
|
|
188
|
+
*/
|
|
189
|
+
RAC_LLAMACPP_API void rac_llm_llamacpp_destroy(rac_handle_t handle);
|
|
190
|
+
|
|
191
|
+
// =============================================================================
|
|
192
|
+
// BACKEND REGISTRATION
|
|
193
|
+
// =============================================================================
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Registers the LlamaCPP backend with the commons module and service registries.
|
|
197
|
+
*
|
|
198
|
+
* Should be called once during SDK initialization.
|
|
199
|
+
* This registers:
|
|
200
|
+
* - Module: "llamacpp" with TEXT_GENERATION capability
|
|
201
|
+
* - Service provider: LlamaCPP LLM provider
|
|
202
|
+
*
|
|
203
|
+
* @return RAC_SUCCESS or error code
|
|
204
|
+
*/
|
|
205
|
+
RAC_LLAMACPP_API rac_result_t rac_backend_llamacpp_register(void);
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Unregisters the LlamaCPP backend.
|
|
209
|
+
*
|
|
210
|
+
* @return RAC_SUCCESS or error code
|
|
211
|
+
*/
|
|
212
|
+
RAC_LLAMACPP_API rac_result_t rac_backend_llamacpp_unregister(void);
|
|
213
|
+
|
|
214
|
+
#ifdef __cplusplus
|
|
215
|
+
}
|
|
216
|
+
#endif
|
|
217
|
+
|
|
218
|
+
#endif /* RAC_LLM_LLAMACPP_H */
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file rac_stt_onnx.h
|
|
3
|
+
* @brief RunAnywhere Core - ONNX Backend RAC API for STT
|
|
4
|
+
*
|
|
5
|
+
* Direct RAC API export from runanywhere-core's ONNX STT backend.
|
|
6
|
+
* Mirrors Swift's ONNXSTTService implementation pattern.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
#ifndef RAC_STT_ONNX_H
|
|
10
|
+
#define RAC_STT_ONNX_H
|
|
11
|
+
|
|
12
|
+
#include "rac/core/rac_error.h"
|
|
13
|
+
#include "rac/core/rac_types.h"
|
|
14
|
+
#include "rac/features/stt/rac_stt.h"
|
|
15
|
+
|
|
16
|
+
#ifdef __cplusplus
|
|
17
|
+
extern "C" {
|
|
18
|
+
#endif
|
|
19
|
+
|
|
20
|
+
// =============================================================================
|
|
21
|
+
// EXPORT MACRO
|
|
22
|
+
// =============================================================================
|
|
23
|
+
|
|
24
|
+
#if defined(RAC_ONNX_BUILDING)
|
|
25
|
+
#if defined(_WIN32)
|
|
26
|
+
#define RAC_ONNX_API __declspec(dllexport)
|
|
27
|
+
#elif defined(__GNUC__) || defined(__clang__)
|
|
28
|
+
#define RAC_ONNX_API __attribute__((visibility("default")))
|
|
29
|
+
#else
|
|
30
|
+
#define RAC_ONNX_API
|
|
31
|
+
#endif
|
|
32
|
+
#else
|
|
33
|
+
#define RAC_ONNX_API
|
|
34
|
+
#endif
|
|
35
|
+
|
|
36
|
+
// =============================================================================
|
|
37
|
+
// CONFIGURATION
|
|
38
|
+
// =============================================================================
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* ONNX STT model types.
|
|
42
|
+
*/
|
|
43
|
+
typedef enum rac_stt_onnx_model_type {
|
|
44
|
+
RAC_STT_ONNX_MODEL_WHISPER = 0,
|
|
45
|
+
RAC_STT_ONNX_MODEL_ZIPFORMER = 1,
|
|
46
|
+
RAC_STT_ONNX_MODEL_PARAFORMER = 2,
|
|
47
|
+
RAC_STT_ONNX_MODEL_AUTO = 99
|
|
48
|
+
} rac_stt_onnx_model_type_t;
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* ONNX STT configuration.
|
|
52
|
+
*/
|
|
53
|
+
typedef struct rac_stt_onnx_config {
|
|
54
|
+
rac_stt_onnx_model_type_t model_type;
|
|
55
|
+
int32_t num_threads;
|
|
56
|
+
rac_bool_t use_coreml;
|
|
57
|
+
} rac_stt_onnx_config_t;
|
|
58
|
+
|
|
59
|
+
static const rac_stt_onnx_config_t RAC_STT_ONNX_CONFIG_DEFAULT = {
|
|
60
|
+
.model_type = RAC_STT_ONNX_MODEL_AUTO, .num_threads = 0, .use_coreml = RAC_TRUE};
|
|
61
|
+
|
|
62
|
+
// =============================================================================
|
|
63
|
+
// ONNX STT API
|
|
64
|
+
// =============================================================================
|
|
65
|
+
|
|
66
|
+
RAC_ONNX_API rac_result_t rac_stt_onnx_create(const char* model_path,
|
|
67
|
+
const rac_stt_onnx_config_t* config,
|
|
68
|
+
rac_handle_t* out_handle);
|
|
69
|
+
|
|
70
|
+
RAC_ONNX_API rac_result_t rac_stt_onnx_transcribe(rac_handle_t handle, const float* audio_samples,
|
|
71
|
+
size_t num_samples,
|
|
72
|
+
const rac_stt_options_t* options,
|
|
73
|
+
rac_stt_result_t* out_result);
|
|
74
|
+
|
|
75
|
+
RAC_ONNX_API rac_bool_t rac_stt_onnx_supports_streaming(rac_handle_t handle);
|
|
76
|
+
|
|
77
|
+
RAC_ONNX_API rac_result_t rac_stt_onnx_create_stream(rac_handle_t handle, rac_handle_t* out_stream);
|
|
78
|
+
|
|
79
|
+
RAC_ONNX_API rac_result_t rac_stt_onnx_feed_audio(rac_handle_t handle, rac_handle_t stream,
|
|
80
|
+
const float* audio_samples, size_t num_samples);
|
|
81
|
+
|
|
82
|
+
RAC_ONNX_API rac_bool_t rac_stt_onnx_stream_is_ready(rac_handle_t handle, rac_handle_t stream);
|
|
83
|
+
|
|
84
|
+
RAC_ONNX_API rac_result_t rac_stt_onnx_decode_stream(rac_handle_t handle, rac_handle_t stream,
|
|
85
|
+
char** out_text);
|
|
86
|
+
|
|
87
|
+
RAC_ONNX_API void rac_stt_onnx_input_finished(rac_handle_t handle, rac_handle_t stream);
|
|
88
|
+
|
|
89
|
+
RAC_ONNX_API rac_bool_t rac_stt_onnx_is_endpoint(rac_handle_t handle, rac_handle_t stream);
|
|
90
|
+
|
|
91
|
+
RAC_ONNX_API void rac_stt_onnx_destroy_stream(rac_handle_t handle, rac_handle_t stream);
|
|
92
|
+
|
|
93
|
+
RAC_ONNX_API void rac_stt_onnx_destroy(rac_handle_t handle);
|
|
94
|
+
|
|
95
|
+
#ifdef __cplusplus
|
|
96
|
+
}
|
|
97
|
+
#endif
|
|
98
|
+
|
|
99
|
+
#endif /* RAC_STT_ONNX_H */
|