llama-cpp-capacitor 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/LlamaCpp.podspec +17 -0
- package/Package.swift +28 -0
- package/README.md +574 -0
- package/android/build.gradle +58 -0
- package/android/src/main/AndroidManifest.xml +2 -0
- package/android/src/main/CMakeLists.txt +148 -0
- package/android/src/main/java/ai/annadata/plugin/capacitor/LlamaCpp.java +677 -0
- package/android/src/main/java/ai/annadata/plugin/capacitor/LlamaCppPlugin.java +482 -0
- package/android/src/main/jni-utils.h +139 -0
- package/android/src/main/jni.cpp +271 -0
- package/android/src/main/res/.gitkeep +0 -0
- package/dist/docs.json +5513 -0
- package/dist/esm/definitions.d.ts +653 -0
- package/dist/esm/definitions.js +2 -0
- package/dist/esm/definitions.js.map +1 -0
- package/dist/esm/index.d.ts +180 -0
- package/dist/esm/index.js +518 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/plugin.cjs.js +531 -0
- package/dist/plugin.cjs.js.map +1 -0
- package/dist/plugin.js +534 -0
- package/dist/plugin.js.map +1 -0
- package/ios/Sources/LlamaCppPlugin/LlamaCpp.swift +596 -0
- package/ios/Sources/LlamaCppPlugin/LlamaCppPlugin.swift +514 -0
- package/ios/Tests/LlamaCppPluginTests/LlamaCppPluginTests.swift +15 -0
- package/package.json +108 -0
|
@@ -0,0 +1,596 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
|
|
3
|
+
// MARK: - Native Library Integration
|
|
4
|
+
private var contexts: [Int64: UnsafeMutableRawPointer] = [:]
|
|
5
|
+
private var nextContextId: Int64 = 1
|
|
6
|
+
|
|
7
|
+
// Load the native library
|
|
8
|
+
private lazy var llamaLibrary: UnsafeMutableRawPointer? = {
|
|
9
|
+
guard let libraryPath = Bundle.main.path(forResource: "llama-cpp", ofType: "framework") else {
|
|
10
|
+
print("Error: llama-cpp framework not found")
|
|
11
|
+
return nil
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
guard let handle = dlopen(libraryPath, RTLD_NOW) else {
|
|
15
|
+
print("Error: Failed to load llama-cpp library: \(String(cString: dlerror()))")
|
|
16
|
+
return nil
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
return handle
|
|
20
|
+
}()
|
|
21
|
+
|
|
22
|
+
// Function pointers for native calls
|
|
23
|
+
private var initContextFunc: ((String, UnsafePointer<Int8>) -> Int64)?
|
|
24
|
+
private var releaseContextFunc: ((Int64) -> Void)?
|
|
25
|
+
private var completionFunc: ((Int64, String, UnsafePointer<Int8>) -> String?)?
|
|
26
|
+
private var stopCompletionFunc: ((Int64) -> Void)?
|
|
27
|
+
private var getFormattedChatFunc: ((Int64, String, String) -> String?)?
|
|
28
|
+
private var toggleNativeLogFunc: ((Bool) -> Bool)?
|
|
29
|
+
|
|
30
|
+
private func loadFunctionPointers() {
|
|
31
|
+
guard let library = llamaLibrary else { return }
|
|
32
|
+
|
|
33
|
+
// Load function pointers from the native library
|
|
34
|
+
initContextFunc = unsafeBitCast(dlsym(library, "llama_init_context"), to: ((String, UnsafePointer<Int8>) -> Int64).self)
|
|
35
|
+
releaseContextFunc = unsafeBitCast(dlsym(library, "llama_release_context"), to: ((Int64) -> Void).self)
|
|
36
|
+
completionFunc = unsafeBitCast(dlsym(library, "llama_completion"), to: ((Int64, String, UnsafePointer<Int8>) -> String?).self)
|
|
37
|
+
stopCompletionFunc = unsafeBitCast(dlsym(library, "llama_stop_completion"), to: ((Int64) -> Void).self)
|
|
38
|
+
getFormattedChatFunc = unsafeBitCast(dlsym(library, "llama_get_formatted_chat"), to: ((Int64, String, String) -> String?).self)
|
|
39
|
+
toggleNativeLogFunc = unsafeBitCast(dlsym(library, "llama_toggle_native_log"), to: ((Bool) -> Bool).self)
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// MARK: - Result Types
|
|
43
|
+
typealias LlamaResult<T> = Result<T, LlamaError>
|
|
44
|
+
|
|
45
|
+
enum LlamaError: Error, LocalizedError {
|
|
46
|
+
case contextNotFound
|
|
47
|
+
case modelNotFound
|
|
48
|
+
case invalidParameters
|
|
49
|
+
case operationFailed(String)
|
|
50
|
+
case notImplemented
|
|
51
|
+
|
|
52
|
+
var errorDescription: String? {
|
|
53
|
+
switch self {
|
|
54
|
+
case .contextNotFound:
|
|
55
|
+
return "Context not found"
|
|
56
|
+
case .modelNotFound:
|
|
57
|
+
return "Model not found"
|
|
58
|
+
case .invalidParameters:
|
|
59
|
+
return "Invalid parameters"
|
|
60
|
+
case .operationFailed(let message):
|
|
61
|
+
return "Operation failed: \(message)"
|
|
62
|
+
case .notImplemented:
|
|
63
|
+
return "Operation not implemented"
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// MARK: - Context Management
|
|
69
|
+
class LlamaContext {
|
|
70
|
+
let id: Int
|
|
71
|
+
var model: LlamaModel?
|
|
72
|
+
var isMultimodalEnabled: Bool = false
|
|
73
|
+
var isVocoderEnabled: Bool = false
|
|
74
|
+
|
|
75
|
+
init(id: Int) {
|
|
76
|
+
self.id = id
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
class LlamaModel {
|
|
81
|
+
let path: String
|
|
82
|
+
let desc: String
|
|
83
|
+
let size: Int
|
|
84
|
+
let nEmbd: Int
|
|
85
|
+
let nParams: Int
|
|
86
|
+
let chatTemplates: ChatTemplates
|
|
87
|
+
let metadata: [String: Any]
|
|
88
|
+
|
|
89
|
+
init(path: String, desc: String, size: Int, nEmbd: Int, nParams: Int, chatTemplates: ChatTemplates, metadata: [String: Any]) {
|
|
90
|
+
self.path = path
|
|
91
|
+
self.desc = desc
|
|
92
|
+
self.size = size
|
|
93
|
+
self.nEmbd = nEmbd
|
|
94
|
+
self.nParams = nParams
|
|
95
|
+
self.chatTemplates = chatTemplates
|
|
96
|
+
self.metadata = metadata
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
struct ChatTemplates {
|
|
101
|
+
let llamaChat: Bool
|
|
102
|
+
let minja: MinjaTemplates
|
|
103
|
+
|
|
104
|
+
init(llamaChat: Bool, minja: MinjaTemplates) {
|
|
105
|
+
self.llamaChat = llamaChat
|
|
106
|
+
self.minja = minja
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
struct MinjaTemplates {
|
|
111
|
+
let `default`: Bool
|
|
112
|
+
let defaultCaps: MinjaCaps
|
|
113
|
+
let toolUse: Bool
|
|
114
|
+
let toolUseCaps: MinjaCaps
|
|
115
|
+
|
|
116
|
+
init(default: Bool, defaultCaps: MinjaCaps, toolUse: Bool, toolUseCaps: MinjaCaps) {
|
|
117
|
+
self.default = `default`
|
|
118
|
+
self.defaultCaps = defaultCaps
|
|
119
|
+
self.toolUse = toolUse
|
|
120
|
+
self.toolUseCaps = toolUseCaps
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
struct MinjaCaps {
|
|
125
|
+
let tools: Bool
|
|
126
|
+
let toolCalls: Bool
|
|
127
|
+
let toolResponses: Bool
|
|
128
|
+
let systemRole: Bool
|
|
129
|
+
let parallelToolCalls: Bool
|
|
130
|
+
let toolCallId: Bool
|
|
131
|
+
|
|
132
|
+
init(tools: Bool, toolCalls: Bool, toolResponses: Bool, systemRole: Bool, parallelToolCalls: Bool, toolCallId: Bool) {
|
|
133
|
+
self.tools = tools
|
|
134
|
+
self.toolCalls = toolCalls
|
|
135
|
+
self.toolResponses = toolResponses
|
|
136
|
+
self.systemRole = systemRole
|
|
137
|
+
self.parallelToolCalls = parallelToolCalls
|
|
138
|
+
self.toolCallId = toolCallId
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// MARK: - Main Implementation
|
|
143
|
+
@objc public class LlamaCpp: NSObject {
|
|
144
|
+
private var contexts: [Int: LlamaContext] = [:]
|
|
145
|
+
private var contextCounter: Int = 0
|
|
146
|
+
private var contextLimit: Int = 10
|
|
147
|
+
private var nativeLogEnabled: Bool = false
|
|
148
|
+
|
|
149
|
+
// MARK: - Core initialization and management
|
|
150
|
+
|
|
151
|
+
func toggleNativeLog(enabled: Bool, completion: @escaping (LlamaResult<Void>) -> Void) {
|
|
152
|
+
nativeLogEnabled = enabled
|
|
153
|
+
if enabled {
|
|
154
|
+
print("[LlamaCpp] Native logging enabled")
|
|
155
|
+
} else {
|
|
156
|
+
print("[LlamaCpp] Native logging disabled")
|
|
157
|
+
}
|
|
158
|
+
completion(.success(()))
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
func setContextLimit(limit: Int, completion: @escaping (LlamaResult<Void>) -> Void) {
|
|
162
|
+
contextLimit = limit
|
|
163
|
+
print("[LlamaCpp] Context limit set to \(limit)")
|
|
164
|
+
completion(.success(()))
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
func modelInfo(path: String, skip: [String], completion: @escaping (LlamaResult<[String: Any]>) -> Void) {
|
|
168
|
+
// This would typically load model info from the GGUF file
|
|
169
|
+
// For now, return a basic structure
|
|
170
|
+
let modelInfo: [String: Any] = [
|
|
171
|
+
"path": path,
|
|
172
|
+
"desc": "Sample model",
|
|
173
|
+
"size": 0,
|
|
174
|
+
"nEmbd": 0,
|
|
175
|
+
"nParams": 0
|
|
176
|
+
]
|
|
177
|
+
completion(.success(modelInfo))
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
func initContext(contextId: Int, params: [String: Any], completion: @escaping (LlamaResult<[String: Any]>) -> Void) {
|
|
181
|
+
// Check context limit
|
|
182
|
+
if contexts.count >= contextLimit {
|
|
183
|
+
completion(.failure(.operationFailed("Context limit reached")))
|
|
184
|
+
return
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// Extract parameters
|
|
188
|
+
guard let modelPath = params["model"] as? String else {
|
|
189
|
+
completion(.failure(.invalidParameters))
|
|
190
|
+
return
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Create context
|
|
194
|
+
let context = LlamaContext(id: contextId)
|
|
195
|
+
|
|
196
|
+
// Create model info (this would typically load from GGUF file)
|
|
197
|
+
let chatTemplates = ChatTemplates(
|
|
198
|
+
llamaChat: true,
|
|
199
|
+
minja: MinjaTemplates(
|
|
200
|
+
default: true,
|
|
201
|
+
defaultCaps: MinjaCaps(
|
|
202
|
+
tools: true,
|
|
203
|
+
toolCalls: true,
|
|
204
|
+
toolResponses: true,
|
|
205
|
+
systemRole: true,
|
|
206
|
+
parallelToolCalls: true,
|
|
207
|
+
toolCallId: true
|
|
208
|
+
),
|
|
209
|
+
toolUse: true,
|
|
210
|
+
toolUseCaps: MinjaCaps(
|
|
211
|
+
tools: true,
|
|
212
|
+
toolCalls: true,
|
|
213
|
+
toolResponses: true,
|
|
214
|
+
systemRole: true,
|
|
215
|
+
parallelToolCalls: true,
|
|
216
|
+
toolCallId: true
|
|
217
|
+
)
|
|
218
|
+
)
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
let model = LlamaModel(
|
|
222
|
+
path: modelPath,
|
|
223
|
+
desc: "Sample model",
|
|
224
|
+
size: 0,
|
|
225
|
+
nEmbd: 0,
|
|
226
|
+
nParams: 0,
|
|
227
|
+
chatTemplates: chatTemplates,
|
|
228
|
+
metadata: [:]
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
context.model = model
|
|
232
|
+
contexts[contextId] = context
|
|
233
|
+
|
|
234
|
+
// Return context info
|
|
235
|
+
let contextInfo: [String: Any] = [
|
|
236
|
+
"contextId": contextId,
|
|
237
|
+
"gpu": false,
|
|
238
|
+
"reasonNoGPU": "Not implemented",
|
|
239
|
+
"model": [
|
|
240
|
+
"desc": model.desc,
|
|
241
|
+
"size": model.size,
|
|
242
|
+
"nEmbd": model.nEmbd,
|
|
243
|
+
"nParams": model.nParams,
|
|
244
|
+
"chatTemplates": [
|
|
245
|
+
"llamaChat": model.chatTemplates.llamaChat,
|
|
246
|
+
"minja": [
|
|
247
|
+
"default": model.chatTemplates.minja.default,
|
|
248
|
+
"defaultCaps": [
|
|
249
|
+
"tools": model.chatTemplates.minja.defaultCaps.tools,
|
|
250
|
+
"toolCalls": model.chatTemplates.minja.defaultCaps.toolCalls,
|
|
251
|
+
"toolResponses": model.chatTemplates.minja.defaultCaps.toolResponses,
|
|
252
|
+
"systemRole": model.chatTemplates.minja.defaultCaps.systemRole,
|
|
253
|
+
"parallelToolCalls": model.chatTemplates.minja.defaultCaps.parallelToolCalls,
|
|
254
|
+
"toolCallId": model.chatTemplates.minja.defaultCaps.toolCallId
|
|
255
|
+
],
|
|
256
|
+
"toolUse": model.chatTemplates.minja.toolUse,
|
|
257
|
+
"toolUseCaps": [
|
|
258
|
+
"tools": model.chatTemplates.minja.toolUseCaps.tools,
|
|
259
|
+
"toolCalls": model.chatTemplates.minja.toolUseCaps.toolCalls,
|
|
260
|
+
"toolResponses": model.chatTemplates.minja.toolUseCaps.toolResponses,
|
|
261
|
+
"systemRole": model.chatTemplates.minja.toolUseCaps.systemRole,
|
|
262
|
+
"parallelToolCalls": model.chatTemplates.minja.toolUseCaps.parallelToolCalls,
|
|
263
|
+
"toolCallId": model.chatTemplates.minja.toolUseCaps.toolCallId
|
|
264
|
+
]
|
|
265
|
+
]
|
|
266
|
+
],
|
|
267
|
+
"metadata": model.metadata,
|
|
268
|
+
"isChatTemplateSupported": true
|
|
269
|
+
]
|
|
270
|
+
]
|
|
271
|
+
|
|
272
|
+
completion(.success(contextInfo))
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
func releaseContext(contextId: Int, completion: @escaping (LlamaResult<Void>) -> Void) {
|
|
276
|
+
guard contexts.removeValue(forKey: contextId) != nil else {
|
|
277
|
+
completion(.failure(.contextNotFound))
|
|
278
|
+
return
|
|
279
|
+
}
|
|
280
|
+
completion(.success(()))
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
func releaseAllContexts(completion: @escaping (LlamaResult<Void>) -> Void) {
|
|
284
|
+
contexts.removeAll()
|
|
285
|
+
completion(.success(()))
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// MARK: - Chat and completion
|
|
289
|
+
|
|
290
|
+
func getFormattedChat(contextId: Int, messages: String, chatTemplate: String?, params: [String: Any]?, completion: @escaping (LlamaResult<Any>) -> Void) {
|
|
291
|
+
guard let context = contexts[contextId] else {
|
|
292
|
+
completion(.failure(.contextNotFound))
|
|
293
|
+
return
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// This would typically format the chat using the model's chat templates
|
|
297
|
+
// For now, return a basic formatted chat
|
|
298
|
+
let formattedChat: [String: Any] = [
|
|
299
|
+
"type": "llama-chat",
|
|
300
|
+
"prompt": messages,
|
|
301
|
+
"has_media": false,
|
|
302
|
+
"media_paths": []
|
|
303
|
+
]
|
|
304
|
+
|
|
305
|
+
completion(.success(formattedChat))
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
func completion(contextId: Int, params: [String: Any], completion: @escaping (LlamaResult<[String: Any]>) -> Void) {
|
|
309
|
+
guard let context = contexts[contextId] else {
|
|
310
|
+
completion(.failure(.contextNotFound))
|
|
311
|
+
return
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
// This would typically perform the completion using llama.cpp
|
|
315
|
+
// For now, return a basic completion result
|
|
316
|
+
let completionResult: [String: Any] = [
|
|
317
|
+
"text": "Sample completion text",
|
|
318
|
+
"reasoning_content": "",
|
|
319
|
+
"tool_calls": [],
|
|
320
|
+
"content": "Sample completion text",
|
|
321
|
+
"chat_format": 0,
|
|
322
|
+
"tokens_predicted": 0,
|
|
323
|
+
"tokens_evaluated": 0,
|
|
324
|
+
"truncated": false,
|
|
325
|
+
"stopped_eos": false,
|
|
326
|
+
"stopped_word": "",
|
|
327
|
+
"stopped_limit": 0,
|
|
328
|
+
"stopping_word": "",
|
|
329
|
+
"context_full": false,
|
|
330
|
+
"interrupted": false,
|
|
331
|
+
"tokens_cached": 0,
|
|
332
|
+
"timings": [
|
|
333
|
+
"prompt_n": 0,
|
|
334
|
+
"prompt_ms": 0,
|
|
335
|
+
"prompt_per_token_ms": 0,
|
|
336
|
+
"prompt_per_second": 0,
|
|
337
|
+
"predicted_n": 0,
|
|
338
|
+
"predicted_ms": 0,
|
|
339
|
+
"predicted_per_token_ms": 0,
|
|
340
|
+
"predicted_per_second": 0
|
|
341
|
+
]
|
|
342
|
+
]
|
|
343
|
+
|
|
344
|
+
completion(.success(completionResult))
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
func stopCompletion(contextId: Int, completion: @escaping (LlamaResult<Void>) -> Void) {
|
|
348
|
+
guard contexts[contextId] != nil else {
|
|
349
|
+
completion(.failure(.contextNotFound))
|
|
350
|
+
return
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
// This would typically stop any ongoing completion
|
|
354
|
+
completion(.success(()))
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
// MARK: - Session management
|
|
358
|
+
|
|
359
|
+
func loadSession(contextId: Int, filepath: String, completion: @escaping (LlamaResult<[String: Any]>) -> Void) {
|
|
360
|
+
guard contexts[contextId] != nil else {
|
|
361
|
+
completion(.failure(.contextNotFound))
|
|
362
|
+
return
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
// This would typically load session from file
|
|
366
|
+
let sessionResult: [String: Any] = [
|
|
367
|
+
"tokens_loaded": 0,
|
|
368
|
+
"prompt": ""
|
|
369
|
+
]
|
|
370
|
+
|
|
371
|
+
completion(.success(sessionResult))
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
func saveSession(contextId: Int, filepath: String, size: Int, completion: @escaping (LlamaResult<Int>) -> Void) {
|
|
375
|
+
guard contexts[contextId] != nil else {
|
|
376
|
+
completion(.failure(.contextNotFound))
|
|
377
|
+
return
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
// This would typically save session to file
|
|
381
|
+
completion(.success(0))
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
// MARK: - Tokenization
|
|
385
|
+
|
|
386
|
+
func tokenize(contextId: Int, text: String, imagePaths: [String], completion: @escaping (LlamaResult<[String: Any]>) -> Void) {
|
|
387
|
+
guard contexts[contextId] != nil else {
|
|
388
|
+
completion(.failure(.contextNotFound))
|
|
389
|
+
return
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// This would typically tokenize the text using the model's tokenizer
|
|
393
|
+
let tokenizeResult: [String: Any] = [
|
|
394
|
+
"tokens": [],
|
|
395
|
+
"has_images": false,
|
|
396
|
+
"bitmap_hashes": [],
|
|
397
|
+
"chunk_pos": [],
|
|
398
|
+
"chunk_pos_images": []
|
|
399
|
+
]
|
|
400
|
+
|
|
401
|
+
completion(.success(tokenizeResult))
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
func detokenize(contextId: Int, tokens: [Int], completion: @escaping (LlamaResult<String>) -> Void) {
|
|
405
|
+
guard contexts[contextId] != nil else {
|
|
406
|
+
completion(.failure(.contextNotFound))
|
|
407
|
+
return
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
// This would typically detokenize using the model's tokenizer
|
|
411
|
+
completion(.success(""))
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
// MARK: - Embeddings and reranking
|
|
415
|
+
|
|
416
|
+
func embedding(contextId: Int, text: String, params: [String: Any], completion: @escaping (LlamaResult<[String: Any]>) -> Void) {
|
|
417
|
+
guard contexts[contextId] != nil else {
|
|
418
|
+
completion(.failure(.contextNotFound))
|
|
419
|
+
return
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
// This would typically generate embeddings
|
|
423
|
+
let embeddingResult: [String: Any] = [
|
|
424
|
+
"embedding": []
|
|
425
|
+
]
|
|
426
|
+
|
|
427
|
+
completion(.success(embeddingResult))
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
func rerank(contextId: Int, query: String, documents: [String], params: [String: Any]?, completion: @escaping (LlamaResult<[[String: Any]]>) -> Void) {
|
|
431
|
+
guard contexts[contextId] != nil else {
|
|
432
|
+
completion(.failure(.contextNotFound))
|
|
433
|
+
return
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
// This would typically perform reranking
|
|
437
|
+
let rerankResults: [[String: Any]] = []
|
|
438
|
+
completion(.success(rerankResults))
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
// MARK: - Benchmarking
|
|
442
|
+
|
|
443
|
+
func bench(contextId: Int, pp: Int, tg: Int, pl: Int, nr: Int, completion: @escaping (LlamaResult<String>) -> Void) {
|
|
444
|
+
guard contexts[contextId] != nil else {
|
|
445
|
+
completion(.failure(.contextNotFound))
|
|
446
|
+
return
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
// This would typically run benchmarks
|
|
450
|
+
let benchResult = "[]"
|
|
451
|
+
completion(.success(benchResult))
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// MARK: - LoRA adapters
|
|
455
|
+
|
|
456
|
+
func applyLoraAdapters(contextId: Int, loraAdapters: [[String: Any]], completion: @escaping (LlamaResult<Void>) -> Void) {
|
|
457
|
+
guard contexts[contextId] != nil else {
|
|
458
|
+
completion(.failure(.contextNotFound))
|
|
459
|
+
return
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
// This would typically apply LoRA adapters
|
|
463
|
+
completion(.success(()))
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
func removeLoraAdapters(contextId: Int, completion: @escaping (LlamaResult<Void>) -> Void) {
|
|
467
|
+
guard contexts[contextId] != nil else {
|
|
468
|
+
completion(.failure(.contextNotFound))
|
|
469
|
+
return
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
// This would typically remove LoRA adapters
|
|
473
|
+
completion(.success(()))
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
func getLoadedLoraAdapters(contextId: Int, completion: @escaping (LlamaResult<[[String: Any]]>) -> Void) {
|
|
477
|
+
guard contexts[contextId] != nil else {
|
|
478
|
+
completion(.failure(.contextNotFound))
|
|
479
|
+
return
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
// This would typically return loaded LoRA adapters
|
|
483
|
+
let adapters: [[String: Any]] = []
|
|
484
|
+
completion(.success(adapters))
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
// MARK: - Multimodal methods
|
|
488
|
+
|
|
489
|
+
func initMultimodal(contextId: Int, path: String, useGpu: Bool, completion: @escaping (LlamaResult<Bool>) -> Void) {
|
|
490
|
+
guard let context = contexts[contextId] else {
|
|
491
|
+
completion(.failure(.contextNotFound))
|
|
492
|
+
return
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
context.isMultimodalEnabled = true
|
|
496
|
+
completion(.success(true))
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
func isMultimodalEnabled(contextId: Int, completion: @escaping (LlamaResult<Bool>) -> Void) {
|
|
500
|
+
guard let context = contexts[contextId] else {
|
|
501
|
+
completion(.failure(.contextNotFound))
|
|
502
|
+
return
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
completion(.success(context.isMultimodalEnabled))
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
func getMultimodalSupport(contextId: Int, completion: @escaping (LlamaResult<[String: Any]>) -> Void) {
|
|
509
|
+
guard contexts[contextId] != nil else {
|
|
510
|
+
completion(.failure(.contextNotFound))
|
|
511
|
+
return
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
let support: [String: Any] = [
|
|
515
|
+
"vision": true,
|
|
516
|
+
"audio": true
|
|
517
|
+
]
|
|
518
|
+
|
|
519
|
+
completion(.success(support))
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
func releaseMultimodal(contextId: Int, completion: @escaping (LlamaResult<Void>) -> Void) {
|
|
523
|
+
guard let context = contexts[contextId] else {
|
|
524
|
+
completion(.failure(.contextNotFound))
|
|
525
|
+
return
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
context.isMultimodalEnabled = false
|
|
529
|
+
completion(.success(()))
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
// MARK: - TTS methods
|
|
533
|
+
|
|
534
|
+
func initVocoder(contextId: Int, path: String, nBatch: Int?, completion: @escaping (LlamaResult<Bool>) -> Void) {
|
|
535
|
+
guard let context = contexts[contextId] else {
|
|
536
|
+
completion(.failure(.contextNotFound))
|
|
537
|
+
return
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
context.isVocoderEnabled = true
|
|
541
|
+
completion(.success(true))
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
func isVocoderEnabled(contextId: Int, completion: @escaping (LlamaResult<Bool>) -> Void) {
|
|
545
|
+
guard let context = contexts[contextId] else {
|
|
546
|
+
completion(.failure(.contextNotFound))
|
|
547
|
+
return
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
completion(.success(context.isVocoderEnabled))
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
func getFormattedAudioCompletion(contextId: Int, speakerJsonStr: String, textToSpeak: String, completion: @escaping (LlamaResult<[String: Any]>) -> Void) {
|
|
554
|
+
guard contexts[contextId] != nil else {
|
|
555
|
+
completion(.failure(.contextNotFound))
|
|
556
|
+
return
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
let audioCompletion: [String: Any] = [
|
|
560
|
+
"prompt": "",
|
|
561
|
+
"grammar": nil
|
|
562
|
+
]
|
|
563
|
+
|
|
564
|
+
completion(.success(audioCompletion))
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
func getAudioCompletionGuideTokens(contextId: Int, textToSpeak: String, completion: @escaping (LlamaResult<[Int]>) -> Void) {
|
|
568
|
+
guard contexts[contextId] != nil else {
|
|
569
|
+
completion(.failure(.contextNotFound))
|
|
570
|
+
return
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
let tokens: [Int] = []
|
|
574
|
+
completion(.success(tokens))
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
func decodeAudioTokens(contextId: Int, tokens: [Int], completion: @escaping (LlamaResult<[Int]>) -> Void) {
|
|
578
|
+
guard contexts[contextId] != nil else {
|
|
579
|
+
completion(.failure(.contextNotFound))
|
|
580
|
+
return
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
let decodedTokens: [Int] = []
|
|
584
|
+
completion(.success(decodedTokens))
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
func releaseVocoder(contextId: Int, completion: @escaping (LlamaResult<Void>) -> Void) {
|
|
588
|
+
guard let context = contexts[contextId] else {
|
|
589
|
+
completion(.failure(.contextNotFound))
|
|
590
|
+
return
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
context.isVocoderEnabled = false
|
|
594
|
+
completion(.success(()))
|
|
595
|
+
}
|
|
596
|
+
}
|