@runanywhere/llamacpp 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/RunAnywhereLlama.podspec +131 -0
  2. package/android/CMakeLists.txt +105 -0
  3. package/android/build.gradle +288 -0
  4. package/android/src/main/AndroidManifest.xml +3 -0
  5. package/android/src/main/cpp/cpp-adapter.cpp +14 -0
  6. package/android/src/main/java/com/margelo/nitro/runanywhere/llama/RunAnywhereLlamaPackage.kt +35 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librac_backend_llamacpp.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librac_backend_llamacpp_jni.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librunanywhere_llamacpp.so +0 -0
  10. package/cpp/HybridRunAnywhereLlama.cpp +346 -0
  11. package/cpp/HybridRunAnywhereLlama.hpp +107 -0
  12. package/cpp/bridges/LLMBridge.cpp +209 -0
  13. package/cpp/bridges/LLMBridge.hpp +109 -0
  14. package/cpp/bridges/StructuredOutputBridge.cpp +151 -0
  15. package/cpp/bridges/StructuredOutputBridge.hpp +66 -0
  16. package/cpp/rac_llm_llamacpp.h +34 -0
  17. package/ios/.testlocal +0 -0
  18. package/ios/Frameworks/RABackendLLAMACPP.xcframework/Info.plist +44 -0
  19. package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64/RABackendLLAMACPP.framework/Headers/RABackendLLAMACPP.h +2 -0
  20. package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64/RABackendLLAMACPP.framework/Info.plist +11 -0
  21. package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64/RABackendLLAMACPP.framework/Modules/module.modulemap +5 -0
  22. package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64/RABackendLLAMACPP.framework/RABackendLLAMACPP +0 -0
  23. package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64_x86_64-simulator/RABackendLLAMACPP.framework/Headers/RABackendLLAMACPP.h +2 -0
  24. package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64_x86_64-simulator/RABackendLLAMACPP.framework/Info.plist +11 -0
  25. package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64_x86_64-simulator/RABackendLLAMACPP.framework/Modules/module.modulemap +5 -0
  26. package/ios/Frameworks/RABackendLLAMACPP.xcframework/ios-arm64_x86_64-simulator/RABackendLLAMACPP.framework/RABackendLLAMACPP +0 -0
  27. package/ios/LlamaCPPBackend.podspec +127 -0
  28. package/nitro.json +16 -0
  29. package/nitrogen/generated/.gitattributes +1 -0
  30. package/nitrogen/generated/android/kotlin/com/margelo/nitro/runanywhere/llama/runanywherellamaOnLoad.kt +35 -0
  31. package/nitrogen/generated/android/runanywherellama+autolinking.cmake +81 -0
  32. package/nitrogen/generated/android/runanywherellama+autolinking.gradle +27 -0
  33. package/nitrogen/generated/android/runanywherellamaOnLoad.cpp +44 -0
  34. package/nitrogen/generated/android/runanywherellamaOnLoad.hpp +25 -0
  35. package/nitrogen/generated/ios/RunAnywhereLlama+autolinking.rb +60 -0
  36. package/nitrogen/generated/ios/RunAnywhereLlama-Swift-Cxx-Bridge.cpp +17 -0
  37. package/nitrogen/generated/ios/RunAnywhereLlama-Swift-Cxx-Bridge.hpp +27 -0
  38. package/nitrogen/generated/ios/RunAnywhereLlama-Swift-Cxx-Umbrella.hpp +38 -0
  39. package/nitrogen/generated/ios/RunAnywhereLlamaAutolinking.mm +35 -0
  40. package/nitrogen/generated/ios/RunAnywhereLlamaAutolinking.swift +12 -0
  41. package/nitrogen/generated/shared/c++/HybridRunAnywhereLlamaSpec.cpp +33 -0
  42. package/nitrogen/generated/shared/c++/HybridRunAnywhereLlamaSpec.hpp +77 -0
  43. package/package.json +60 -0
  44. package/react-native.config.js +14 -0
  45. package/src/LlamaCPP.ts +206 -0
  46. package/src/LlamaCppProvider.ts +120 -0
  47. package/src/index.ts +59 -0
  48. package/src/native/NativeRunAnywhereLlama.ts +58 -0
  49. package/src/native/index.ts +11 -0
  50. package/src/specs/RunAnywhereLlama.nitro.ts +160 -0
@@ -0,0 +1,206 @@
1
+ /**
2
+ * @runanywhere/llamacpp - LlamaCPP Module
3
+ *
4
+ * LlamaCPP module wrapper for RunAnywhere React Native SDK.
5
+ * Provides public API for module registration and model declaration.
6
+ *
7
+ * This mirrors the Swift SDK's LlamaCPP module pattern:
8
+ * - LlamaCPP.register() - Register the module with ServiceRegistry
9
+ * - LlamaCPP.addModel() - Declare a model for this module
10
+ *
11
+ * Reference: sdk/runanywhere-swift/Sources/LlamaCPPRuntime/LlamaCPPServiceProvider.swift
12
+ */
13
+
14
+ import { LlamaCppProvider } from './LlamaCppProvider';
15
+ import {
16
+ ModelRegistry,
17
+ FileSystem,
18
+ LLMFramework,
19
+ ModelCategory,
20
+ ModelFormat,
21
+ ConfigurationSource,
22
+ SDKLogger,
23
+ type ModelInfo,
24
+ } from '@runanywhere/core';
25
+
26
+ // SDKLogger instance for this module
27
+ const log = new SDKLogger('LLM.LlamaCpp');
28
+
29
+ /**
30
+ * Model registration options for LlamaCPP models
31
+ *
32
+ * Matches iOS: LlamaCPP.addModel() parameter structure
33
+ */
34
+ export interface LlamaCPPModelOptions {
35
+ /** Unique model ID. If not provided, generated from URL filename */
36
+ id?: string;
37
+ /** Display name for the model */
38
+ name: string;
39
+ /** Download URL for the model */
40
+ url: string;
41
+ /** Model category (defaults to Language for LLM models) */
42
+ modality?: ModelCategory;
43
+ /** Memory requirement in bytes */
44
+ memoryRequirement?: number;
45
+ /** Whether model supports reasoning/thinking tokens */
46
+ supportsThinking?: boolean;
47
+ }
48
+
49
+ /**
50
+ * LlamaCPP Module
51
+ *
52
+ * Public API for registering LlamaCPP module and declaring GGUF models.
53
+ * This provides the same developer experience as the iOS SDK.
54
+ *
55
+ * ## Usage
56
+ *
57
+ * ```typescript
58
+ * import { LlamaCPP } from '@runanywhere/llamacpp';
59
+ *
60
+ * // Register module
61
+ * LlamaCPP.register();
62
+ *
63
+ * // Add models
64
+ * LlamaCPP.addModel({
65
+ * id: 'smollm2-360m-q8_0',
66
+ * name: 'SmolLM2 360M Q8_0',
67
+ * url: 'https://huggingface.co/prithivMLmods/SmolLM2-360M-GGUF/resolve/main/SmolLM2-360M.Q8_0.gguf',
68
+ * memoryRequirement: 500_000_000
69
+ * });
70
+ * ```
71
+ *
72
+ * Matches iOS: public enum LlamaCPP: RunAnywhereModule
73
+ */
74
+ export const LlamaCPP = {
75
+ /**
76
+ * Module metadata
77
+ * Matches iOS: static let moduleId, moduleName, inferenceFramework
78
+ */
79
+ moduleId: 'llamacpp',
80
+ moduleName: 'LlamaCPP',
81
+ inferenceFramework: LLMFramework.LlamaCpp,
82
+ capabilities: ['llm'] as const,
83
+ defaultPriority: 100,
84
+
85
+ /**
86
+ * Register LlamaCPP module with the SDK
87
+ *
88
+ * This registers the LlamaCPP provider with ServiceRegistry,
89
+ * enabling it to handle GGUF models.
90
+ *
91
+ * Matches iOS: static func register(priority: Int = defaultPriority)
92
+ *
93
+ * @example
94
+ * ```typescript
95
+ * LlamaCPP.register();
96
+ * ```
97
+ */
98
+ register(): void {
99
+ log.debug('Registering LlamaCPP module');
100
+ LlamaCppProvider.register();
101
+ log.info('LlamaCPP module registered');
102
+ },
103
+
104
+ /**
105
+ * Add a model to this module
106
+ *
107
+ * Registers a GGUF model with the ModelRegistry.
108
+ * The model will use LlamaCPP framework automatically.
109
+ *
110
+ * Matches iOS: static func addModel(id:name:url:modality:memoryRequirement:supportsThinking:)
111
+ *
112
+ * @param options - Model registration options
113
+ * @returns Promise resolving to the created ModelInfo
114
+ *
115
+ * @example
116
+ * ```typescript
117
+ * await LlamaCPP.addModel({
118
+ * id: 'llama-2-7b-chat-q4_k_m',
119
+ * name: 'Llama 2 7B Chat Q4_K_M',
120
+ * url: 'https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf',
121
+ * memoryRequirement: 4_000_000_000
122
+ * });
123
+ * ```
124
+ */
125
+ async addModel(options: LlamaCPPModelOptions): Promise<ModelInfo> {
126
+ // Generate stable ID from URL if not provided
127
+ const modelId = options.id ?? this._generateModelId(options.url);
128
+
129
+ // Determine modality (default to Language for LLM)
130
+ const category = options.modality ?? ModelCategory.Language;
131
+
132
+ // Infer format from URL
133
+ const format = options.url.toLowerCase().includes('.gguf')
134
+ ? ModelFormat.GGUF
135
+ : ModelFormat.GGML;
136
+
137
+ const now = new Date().toISOString();
138
+
139
+ // Check if model already exists on disk (persistence across sessions)
140
+ let isDownloaded = false;
141
+ let localPath: string | undefined;
142
+
143
+ if (FileSystem.isAvailable()) {
144
+ try {
145
+ const exists = await FileSystem.modelExists(modelId, 'LlamaCpp');
146
+ if (exists) {
147
+ localPath = await FileSystem.getModelPath(modelId, 'LlamaCpp');
148
+ isDownloaded = true;
149
+ log.debug(`Model ${modelId} found on disk: ${localPath}`);
150
+ }
151
+ } catch (error) {
152
+ // Ignore errors checking for existing model
153
+ log.debug(`Could not check for existing model ${modelId}: ${error}`);
154
+ }
155
+ }
156
+
157
+ const modelInfo: ModelInfo = {
158
+ id: modelId,
159
+ name: options.name,
160
+ category,
161
+ format,
162
+ downloadURL: options.url,
163
+ localPath,
164
+ downloadSize: undefined,
165
+ memoryRequired: options.memoryRequirement,
166
+ compatibleFrameworks: [LLMFramework.LlamaCpp],
167
+ preferredFramework: LLMFramework.LlamaCpp,
168
+ supportsThinking: options.supportsThinking ?? false,
169
+ metadata: { tags: [] },
170
+ source: ConfigurationSource.Local,
171
+ createdAt: now,
172
+ updatedAt: now,
173
+ syncPending: false,
174
+ usageCount: 0,
175
+ isDownloaded,
176
+ isAvailable: true,
177
+ };
178
+
179
+ // Register with ModelRegistry and wait for completion
180
+ await ModelRegistry.registerModel(modelInfo);
181
+
182
+ log.info(`Added model: ${modelId} (${options.name})`, {
183
+ modelId,
184
+ isDownloaded,
185
+ });
186
+
187
+ return modelInfo;
188
+ },
189
+
190
+ /**
191
+ * Generate a stable model ID from URL
192
+ * @internal
193
+ */
194
+ _generateModelId(url: string): string {
195
+ try {
196
+ const urlObj = new URL(url);
197
+ const pathname = urlObj.pathname;
198
+ const filename = pathname.split('/').pop() ?? 'model';
199
+ // Remove common extensions
200
+ return filename.replace(/\.(gguf|ggml|bin)$/i, '');
201
+ } catch {
202
+ // Fallback for invalid URLs
203
+ return `model-${Date.now()}`;
204
+ }
205
+ },
206
+ };
@@ -0,0 +1,120 @@
1
+ /**
2
+ * @runanywhere/llamacpp - LlamaCPP Provider
3
+ *
4
+ * LlamaCPP module registration for React Native SDK.
5
+ * Thin wrapper that triggers C++ backend registration.
6
+ *
7
+ * Reference: sdk/runanywhere-swift/Sources/LlamaCPPRuntime/LlamaCPP.swift
8
+ */
9
+
10
+ import { requireNativeLlamaModule, isNativeLlamaModuleAvailable } from './native/NativeRunAnywhereLlama';
11
+ import { SDKLogger } from '@runanywhere/core';
12
+
13
+ // SDKLogger instance for this module
14
+ const log = new SDKLogger('LLM.LlamaCppProvider');
15
+
16
+ /**
17
+ * LlamaCPP Module
18
+ *
19
+ * Provides LLM capabilities using llama.cpp with GGUF models.
20
+ * The actual service is provided by the C++ backend.
21
+ *
22
+ * ## Registration
23
+ *
24
+ * ```typescript
25
+ * import { LlamaCppProvider } from '@runanywhere/llamacpp';
26
+ *
27
+ * // Register the backend
28
+ * await LlamaCppProvider.register();
29
+ * ```
30
+ */
31
+ export class LlamaCppProvider {
32
+ static readonly moduleId = 'llamacpp';
33
+ static readonly moduleName = 'LlamaCPP';
34
+ static readonly version = '2.0.0';
35
+
36
+ private static isRegistered = false;
37
+
38
+ /**
39
+ * Register LlamaCPP backend with the C++ service registry.
40
+ * Calls rac_backend_llamacpp_register() to register the
41
+ * LlamaCPP service provider with the C++ commons layer.
42
+ * Safe to call multiple times - subsequent calls are no-ops.
43
+ * @returns Promise<boolean> true if registered successfully
44
+ */
45
+ static async register(): Promise<boolean> {
46
+ if (this.isRegistered) {
47
+ log.debug('LlamaCPP already registered, returning');
48
+ return true;
49
+ }
50
+
51
+ if (!isNativeLlamaModuleAvailable()) {
52
+ log.warning('LlamaCPP native module not available');
53
+ return false;
54
+ }
55
+
56
+ log.debug('Registering LlamaCPP backend with C++ registry');
57
+
58
+ try {
59
+ const native = requireNativeLlamaModule();
60
+ // Call the native registration method from the Llama module
61
+ const success = await native.registerBackend();
62
+ if (success) {
63
+ this.isRegistered = true;
64
+ log.info('LlamaCPP backend registered successfully');
65
+ }
66
+ return success;
67
+ } catch (error) {
68
+ const msg = error instanceof Error ? error.message : String(error);
69
+ log.warning(`LlamaCPP registration failed: ${msg}`);
70
+ return false;
71
+ }
72
+ }
73
+
74
+ /**
75
+ * Unregister the LlamaCPP backend from C++ registry.
76
+ * @returns Promise<boolean> true if unregistered successfully
77
+ */
78
+ static async unregister(): Promise<boolean> {
79
+ if (!this.isRegistered) {
80
+ return true;
81
+ }
82
+
83
+ if (!isNativeLlamaModuleAvailable()) {
84
+ return false;
85
+ }
86
+
87
+ try {
88
+ const native = requireNativeLlamaModule();
89
+ const success = await native.unregisterBackend();
90
+ if (success) {
91
+ this.isRegistered = false;
92
+ log.debug('LlamaCPP backend unregistered');
93
+ }
94
+ return success;
95
+ } catch (error) {
96
+ log.error(`LlamaCPP unregistration failed: ${error instanceof Error ? error.message : String(error)}`);
97
+ return false;
98
+ }
99
+ }
100
+
101
+ /**
102
+ * Check if LlamaCPP can handle a given model
103
+ */
104
+ static canHandle(modelId: string | null | undefined): boolean {
105
+ if (!modelId) {
106
+ return false;
107
+ }
108
+ const lowercased = modelId.toLowerCase();
109
+ return lowercased.includes('gguf') || lowercased.endsWith('.gguf');
110
+ }
111
+ }
112
+
113
+ /**
114
+ * Auto-register when module is imported
115
+ */
116
+ export function autoRegister(): void {
117
+ LlamaCppProvider.register().catch(() => {
118
+ // Silently handle registration failure during auto-registration
119
+ });
120
+ }
package/src/index.ts ADDED
@@ -0,0 +1,59 @@
1
+ /**
2
+ * @runanywhere/llamacpp - LlamaCPP Backend for RunAnywhere React Native SDK
3
+ *
4
+ * This package provides the LlamaCPP backend for on-device LLM inference.
5
+ * It supports GGUF models and provides the same API as the iOS SDK.
6
+ *
7
+ * ## Usage
8
+ *
9
+ * ```typescript
10
+ * import { RunAnywhere } from '@runanywhere/core';
11
+ * import { LlamaCPP, LlamaCppProvider } from '@runanywhere/llamacpp';
12
+ *
13
+ * // Initialize core SDK
14
+ * await RunAnywhere.initialize({ apiKey: 'your-key' });
15
+ *
16
+ * // Register LlamaCPP backend (calls native rac_backend_llamacpp_register)
17
+ * await LlamaCppProvider.register();
18
+ *
19
+ * // Add a model
20
+ * LlamaCPP.addModel({
21
+ * id: 'smollm2-360m-q8_0',
22
+ * name: 'SmolLM2 360M Q8_0',
23
+ * url: 'https://huggingface.co/.../SmolLM2-360M.Q8_0.gguf',
24
+ * memoryRequirement: 500_000_000
25
+ * });
26
+ *
27
+ * // Download and use
28
+ * await RunAnywhere.downloadModel('smollm2-360m-q8_0');
29
+ * await RunAnywhere.loadModel('smollm2-360m-q8_0');
30
+ * const result = await RunAnywhere.generate('Hello, world!');
31
+ * ```
32
+ *
33
+ * @packageDocumentation
34
+ */
35
+
36
+ // =============================================================================
37
+ // Main API
38
+ // =============================================================================
39
+
40
+ export { LlamaCPP, type LlamaCPPModelOptions } from './LlamaCPP';
41
+ export { LlamaCppProvider, autoRegister } from './LlamaCppProvider';
42
+
43
+ // =============================================================================
44
+ // Native Module
45
+ // =============================================================================
46
+
47
+ export {
48
+ NativeRunAnywhereLlama,
49
+ getNativeLlamaModule,
50
+ requireNativeLlamaModule,
51
+ isNativeLlamaModuleAvailable,
52
+ } from './native/NativeRunAnywhereLlama';
53
+ export type { NativeRunAnywhereLlamaModule } from './native/NativeRunAnywhereLlama';
54
+
55
+ // =============================================================================
56
+ // Nitrogen Spec Types
57
+ // =============================================================================
58
+
59
+ export type { RunAnywhereLlama } from './specs/RunAnywhereLlama.nitro';
@@ -0,0 +1,58 @@
1
+ /**
2
+ * NativeRunAnywhereLlama.ts
3
+ *
4
+ * Exports the native RunAnywhereLlama Hybrid Object from Nitro Modules.
5
+ * This module provides Llama-based text generation capabilities.
6
+ */
7
+
8
+ import { NitroModules } from 'react-native-nitro-modules';
9
+ import type { RunAnywhereLlama } from '../specs/RunAnywhereLlama.nitro';
10
+
11
+ /**
12
+ * The native RunAnywhereLlama module type
13
+ */
14
+ export type NativeRunAnywhereLlamaModule = RunAnywhereLlama;
15
+
16
+ /**
17
+ * Get the native RunAnywhereLlama Hybrid Object
18
+ */
19
+ export function requireNativeLlamaModule(): NativeRunAnywhereLlamaModule {
20
+ return NitroModules.createHybridObject<RunAnywhereLlama>('RunAnywhereLlama');
21
+ }
22
+
23
+ /**
24
+ * Check if the native Llama module is available
25
+ */
26
+ export function isNativeLlamaModuleAvailable(): boolean {
27
+ try {
28
+ requireNativeLlamaModule();
29
+ return true;
30
+ } catch {
31
+ return false;
32
+ }
33
+ }
34
+
35
+ /**
36
+ * Singleton instance of the native module (lazy initialized)
37
+ */
38
+ let _nativeModule: NativeRunAnywhereLlamaModule | undefined;
39
+
40
+ /**
41
+ * Get the singleton native module instance
42
+ */
43
+ export function getNativeLlamaModule(): NativeRunAnywhereLlamaModule {
44
+ if (!_nativeModule) {
45
+ _nativeModule = requireNativeLlamaModule();
46
+ }
47
+ return _nativeModule;
48
+ }
49
+
50
+ /**
51
+ * Default export - the native module getter
52
+ */
53
+ export const NativeRunAnywhereLlama = {
54
+ get: getNativeLlamaModule,
55
+ isAvailable: isNativeLlamaModuleAvailable,
56
+ };
57
+
58
+ export default NativeRunAnywhereLlama;
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Native module exports for @runanywhere/llamacpp
3
+ */
4
+
5
+ export {
6
+ NativeRunAnywhereLlama,
7
+ getNativeLlamaModule,
8
+ requireNativeLlamaModule,
9
+ isNativeLlamaModuleAvailable,
10
+ } from './NativeRunAnywhereLlama';
11
+ export type { NativeRunAnywhereLlamaModule } from './NativeRunAnywhereLlama';
@@ -0,0 +1,160 @@
1
+ /**
2
+ * RunAnywhereLlama Nitrogen Spec
3
+ *
4
+ * LlamaCPP backend interface for Llama-based text generation:
5
+ * - Backend Registration
6
+ * - Model Loading/Unloading
7
+ * - Text Generation (non-streaming and streaming)
8
+ * - Structured Output (JSON schema generation)
9
+ *
10
+ * Matches Swift SDK: LlamaCPPRuntime/LlamaCPP.swift + CppBridge+LLM.swift
11
+ */
12
+ import type { HybridObject } from 'react-native-nitro-modules';
13
+
14
+ /**
15
+ * Llama text generation native interface
16
+ *
17
+ * This interface provides Llama-based LLM capabilities.
18
+ * Requires @runanywhere/core to be initialized first.
19
+ */
20
+ export interface RunAnywhereLlama
21
+ extends HybridObject<{
22
+ ios: 'c++';
23
+ android: 'c++';
24
+ }> {
25
+ // ============================================================================
26
+ // Backend Registration
27
+ // Matches Swift: LlamaCPP.register(), LlamaCPP.unregister()
28
+ // ============================================================================
29
+
30
+ /**
31
+ * Register the LlamaCPP backend with the C++ service registry.
32
+ * Calls rac_backend_llamacpp_register() from runanywhere-binaries.
33
+ * Safe to call multiple times - subsequent calls are no-ops.
34
+ * @returns true if registered successfully (or already registered)
35
+ */
36
+ registerBackend(): Promise<boolean>;
37
+
38
+ /**
39
+ * Unregister the LlamaCPP backend from the C++ service registry.
40
+ * @returns true if unregistered successfully
41
+ */
42
+ unregisterBackend(): Promise<boolean>;
43
+
44
+ /**
45
+ * Check if the LlamaCPP backend is registered
46
+ * @returns true if backend is registered
47
+ */
48
+ isBackendRegistered(): Promise<boolean>;
49
+
50
+ // ============================================================================
51
+ // Model Loading
52
+ // Matches Swift: CppBridge+LLM.swift loadTextModel/unloadTextModel
53
+ // ============================================================================
54
+
55
+ /**
56
+ * Load a Llama model for text generation
57
+ * @param path Path to the model file (.gguf)
58
+ * @param modelId Optional unique identifier for the model
59
+ * @param modelName Optional human-readable name for the model
60
+ * @param configJson Optional JSON configuration (context_length, gpu_layers, etc.)
61
+ * @returns true if loaded successfully
62
+ */
63
+ loadModel(
64
+ path: string,
65
+ modelId?: string,
66
+ modelName?: string,
67
+ configJson?: string
68
+ ): Promise<boolean>;
69
+
70
+ /**
71
+ * Check if a Llama model is loaded
72
+ */
73
+ isModelLoaded(): Promise<boolean>;
74
+
75
+ /**
76
+ * Unload the current Llama model
77
+ */
78
+ unloadModel(): Promise<boolean>;
79
+
80
+ /**
81
+ * Get info about the currently loaded model
82
+ * @returns JSON with model info or empty if not loaded
83
+ */
84
+ getModelInfo(): Promise<string>;
85
+
86
+ // ============================================================================
87
+ // Text Generation
88
+ // Matches Swift: RunAnywhere+TextGeneration.swift
89
+ // ============================================================================
90
+
91
+ /**
92
+ * Generate text (non-streaming)
93
+ * @param prompt The prompt text
94
+ * @param optionsJson JSON string with generation options:
95
+ * - max_tokens: Maximum tokens to generate (default: 512)
96
+ * - temperature: Sampling temperature (default: 0.7)
97
+ * - top_p: Nucleus sampling parameter (default: 0.9)
98
+ * - top_k: Top-k sampling parameter (default: 40)
99
+ * - system_prompt: Optional system prompt
100
+ * @returns JSON string with generation result:
101
+ * - text: Generated text
102
+ * - tokensUsed: Number of tokens generated
103
+ * - latencyMs: Generation time in milliseconds
104
+ * - cancelled: Whether generation was cancelled
105
+ */
106
+ generate(prompt: string, optionsJson?: string): Promise<string>;
107
+
108
+ /**
109
+ * Generate text with streaming callback
110
+ * @param prompt The prompt text
111
+ * @param optionsJson JSON string with generation options
112
+ * @param callback Called for each token with (token, isComplete)
113
+ * @returns Complete generated text
114
+ */
115
+ generateStream(
116
+ prompt: string,
117
+ optionsJson: string,
118
+ callback: (token: string, isComplete: boolean) => void
119
+ ): Promise<string>;
120
+
121
+ /**
122
+ * Cancel ongoing text generation
123
+ * @returns true if cancellation was successful
124
+ */
125
+ cancelGeneration(): Promise<boolean>;
126
+
127
+ // ============================================================================
128
+ // Structured Output
129
+ // Matches Swift: RunAnywhere+StructuredOutput.swift
130
+ // ============================================================================
131
+
132
+ /**
133
+ * Generate structured output following a JSON schema
134
+ * Uses constrained generation to ensure output conforms to schema
135
+ * @param prompt The prompt text
136
+ * @param schema JSON schema string defining the output structure
137
+ * @param optionsJson Optional generation options
138
+ * @returns JSON string conforming to the provided schema
139
+ */
140
+ generateStructured(
141
+ prompt: string,
142
+ schema: string,
143
+ optionsJson?: string
144
+ ): Promise<string>;
145
+
146
+ // ============================================================================
147
+ // Utilities
148
+ // ============================================================================
149
+
150
+ /**
151
+ * Get the last error message from the Llama backend
152
+ */
153
+ getLastError(): Promise<string>;
154
+
155
+ /**
156
+ * Get current memory usage of the Llama backend
157
+ * @returns Memory usage in bytes
158
+ */
159
+ getMemoryUsage(): Promise<number>;
160
+ }