llama-cpp-capacitor 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,596 @@
1
+ import Foundation
2
+
3
+ // MARK: - Native Library Integration
4
+ private var contexts: [Int64: UnsafeMutableRawPointer] = [:]
5
+ private var nextContextId: Int64 = 1
6
+
7
+ // Load the native library
8
+ private lazy var llamaLibrary: UnsafeMutableRawPointer? = {
9
+ guard let libraryPath = Bundle.main.path(forResource: "llama-cpp", ofType: "framework") else {
10
+ print("Error: llama-cpp framework not found")
11
+ return nil
12
+ }
13
+
14
+ guard let handle = dlopen(libraryPath, RTLD_NOW) else {
15
+ print("Error: Failed to load llama-cpp library: \(String(cString: dlerror()))")
16
+ return nil
17
+ }
18
+
19
+ return handle
20
+ }()
21
+
22
+ // Function pointers for native calls
23
+ private var initContextFunc: ((String, UnsafePointer<Int8>) -> Int64)?
24
+ private var releaseContextFunc: ((Int64) -> Void)?
25
+ private var completionFunc: ((Int64, String, UnsafePointer<Int8>) -> String?)?
26
+ private var stopCompletionFunc: ((Int64) -> Void)?
27
+ private var getFormattedChatFunc: ((Int64, String, String) -> String?)?
28
+ private var toggleNativeLogFunc: ((Bool) -> Bool)?
29
+
30
+ private func loadFunctionPointers() {
31
+ guard let library = llamaLibrary else { return }
32
+
33
+ // Load function pointers from the native library
34
+ initContextFunc = unsafeBitCast(dlsym(library, "llama_init_context"), to: ((String, UnsafePointer<Int8>) -> Int64).self)
35
+ releaseContextFunc = unsafeBitCast(dlsym(library, "llama_release_context"), to: ((Int64) -> Void).self)
36
+ completionFunc = unsafeBitCast(dlsym(library, "llama_completion"), to: ((Int64, String, UnsafePointer<Int8>) -> String?).self)
37
+ stopCompletionFunc = unsafeBitCast(dlsym(library, "llama_stop_completion"), to: ((Int64) -> Void).self)
38
+ getFormattedChatFunc = unsafeBitCast(dlsym(library, "llama_get_formatted_chat"), to: ((Int64, String, String) -> String?).self)
39
+ toggleNativeLogFunc = unsafeBitCast(dlsym(library, "llama_toggle_native_log"), to: ((Bool) -> Bool).self)
40
+ }
41
+
42
+ // MARK: - Result Types
43
+ typealias LlamaResult<T> = Result<T, LlamaError>
44
+
45
+ enum LlamaError: Error, LocalizedError {
46
+ case contextNotFound
47
+ case modelNotFound
48
+ case invalidParameters
49
+ case operationFailed(String)
50
+ case notImplemented
51
+
52
+ var errorDescription: String? {
53
+ switch self {
54
+ case .contextNotFound:
55
+ return "Context not found"
56
+ case .modelNotFound:
57
+ return "Model not found"
58
+ case .invalidParameters:
59
+ return "Invalid parameters"
60
+ case .operationFailed(let message):
61
+ return "Operation failed: \(message)"
62
+ case .notImplemented:
63
+ return "Operation not implemented"
64
+ }
65
+ }
66
+ }
67
+
68
+ // MARK: - Context Management
69
+ class LlamaContext {
70
+ let id: Int
71
+ var model: LlamaModel?
72
+ var isMultimodalEnabled: Bool = false
73
+ var isVocoderEnabled: Bool = false
74
+
75
+ init(id: Int) {
76
+ self.id = id
77
+ }
78
+ }
79
+
80
+ class LlamaModel {
81
+ let path: String
82
+ let desc: String
83
+ let size: Int
84
+ let nEmbd: Int
85
+ let nParams: Int
86
+ let chatTemplates: ChatTemplates
87
+ let metadata: [String: Any]
88
+
89
+ init(path: String, desc: String, size: Int, nEmbd: Int, nParams: Int, chatTemplates: ChatTemplates, metadata: [String: Any]) {
90
+ self.path = path
91
+ self.desc = desc
92
+ self.size = size
93
+ self.nEmbd = nEmbd
94
+ self.nParams = nParams
95
+ self.chatTemplates = chatTemplates
96
+ self.metadata = metadata
97
+ }
98
+ }
99
+
100
+ struct ChatTemplates {
101
+ let llamaChat: Bool
102
+ let minja: MinjaTemplates
103
+
104
+ init(llamaChat: Bool, minja: MinjaTemplates) {
105
+ self.llamaChat = llamaChat
106
+ self.minja = minja
107
+ }
108
+ }
109
+
110
+ struct MinjaTemplates {
111
+ let `default`: Bool
112
+ let defaultCaps: MinjaCaps
113
+ let toolUse: Bool
114
+ let toolUseCaps: MinjaCaps
115
+
116
+ init(default: Bool, defaultCaps: MinjaCaps, toolUse: Bool, toolUseCaps: MinjaCaps) {
117
+ self.default = `default`
118
+ self.defaultCaps = defaultCaps
119
+ self.toolUse = toolUse
120
+ self.toolUseCaps = toolUseCaps
121
+ }
122
+ }
123
+
124
+ struct MinjaCaps {
125
+ let tools: Bool
126
+ let toolCalls: Bool
127
+ let toolResponses: Bool
128
+ let systemRole: Bool
129
+ let parallelToolCalls: Bool
130
+ let toolCallId: Bool
131
+
132
+ init(tools: Bool, toolCalls: Bool, toolResponses: Bool, systemRole: Bool, parallelToolCalls: Bool, toolCallId: Bool) {
133
+ self.tools = tools
134
+ self.toolCalls = toolCalls
135
+ self.toolResponses = toolResponses
136
+ self.systemRole = systemRole
137
+ self.parallelToolCalls = parallelToolCalls
138
+ self.toolCallId = toolCallId
139
+ }
140
+ }
141
+
142
+ // MARK: - Main Implementation
143
+ @objc public class LlamaCpp: NSObject {
144
+ private var contexts: [Int: LlamaContext] = [:]
145
+ private var contextCounter: Int = 0
146
+ private var contextLimit: Int = 10
147
+ private var nativeLogEnabled: Bool = false
148
+
149
+ // MARK: - Core initialization and management
150
+
151
+ func toggleNativeLog(enabled: Bool, completion: @escaping (LlamaResult<Void>) -> Void) {
152
+ nativeLogEnabled = enabled
153
+ if enabled {
154
+ print("[LlamaCpp] Native logging enabled")
155
+ } else {
156
+ print("[LlamaCpp] Native logging disabled")
157
+ }
158
+ completion(.success(()))
159
+ }
160
+
161
+ func setContextLimit(limit: Int, completion: @escaping (LlamaResult<Void>) -> Void) {
162
+ contextLimit = limit
163
+ print("[LlamaCpp] Context limit set to \(limit)")
164
+ completion(.success(()))
165
+ }
166
+
167
+ func modelInfo(path: String, skip: [String], completion: @escaping (LlamaResult<[String: Any]>) -> Void) {
168
+ // This would typically load model info from the GGUF file
169
+ // For now, return a basic structure
170
+ let modelInfo: [String: Any] = [
171
+ "path": path,
172
+ "desc": "Sample model",
173
+ "size": 0,
174
+ "nEmbd": 0,
175
+ "nParams": 0
176
+ ]
177
+ completion(.success(modelInfo))
178
+ }
179
+
180
+ func initContext(contextId: Int, params: [String: Any], completion: @escaping (LlamaResult<[String: Any]>) -> Void) {
181
+ // Check context limit
182
+ if contexts.count >= contextLimit {
183
+ completion(.failure(.operationFailed("Context limit reached")))
184
+ return
185
+ }
186
+
187
+ // Extract parameters
188
+ guard let modelPath = params["model"] as? String else {
189
+ completion(.failure(.invalidParameters))
190
+ return
191
+ }
192
+
193
+ // Create context
194
+ let context = LlamaContext(id: contextId)
195
+
196
+ // Create model info (this would typically load from GGUF file)
197
+ let chatTemplates = ChatTemplates(
198
+ llamaChat: true,
199
+ minja: MinjaTemplates(
200
+ default: true,
201
+ defaultCaps: MinjaCaps(
202
+ tools: true,
203
+ toolCalls: true,
204
+ toolResponses: true,
205
+ systemRole: true,
206
+ parallelToolCalls: true,
207
+ toolCallId: true
208
+ ),
209
+ toolUse: true,
210
+ toolUseCaps: MinjaCaps(
211
+ tools: true,
212
+ toolCalls: true,
213
+ toolResponses: true,
214
+ systemRole: true,
215
+ parallelToolCalls: true,
216
+ toolCallId: true
217
+ )
218
+ )
219
+ )
220
+
221
+ let model = LlamaModel(
222
+ path: modelPath,
223
+ desc: "Sample model",
224
+ size: 0,
225
+ nEmbd: 0,
226
+ nParams: 0,
227
+ chatTemplates: chatTemplates,
228
+ metadata: [:]
229
+ )
230
+
231
+ context.model = model
232
+ contexts[contextId] = context
233
+
234
+ // Return context info
235
+ let contextInfo: [String: Any] = [
236
+ "contextId": contextId,
237
+ "gpu": false,
238
+ "reasonNoGPU": "Not implemented",
239
+ "model": [
240
+ "desc": model.desc,
241
+ "size": model.size,
242
+ "nEmbd": model.nEmbd,
243
+ "nParams": model.nParams,
244
+ "chatTemplates": [
245
+ "llamaChat": model.chatTemplates.llamaChat,
246
+ "minja": [
247
+ "default": model.chatTemplates.minja.default,
248
+ "defaultCaps": [
249
+ "tools": model.chatTemplates.minja.defaultCaps.tools,
250
+ "toolCalls": model.chatTemplates.minja.defaultCaps.toolCalls,
251
+ "toolResponses": model.chatTemplates.minja.defaultCaps.toolResponses,
252
+ "systemRole": model.chatTemplates.minja.defaultCaps.systemRole,
253
+ "parallelToolCalls": model.chatTemplates.minja.defaultCaps.parallelToolCalls,
254
+ "toolCallId": model.chatTemplates.minja.defaultCaps.toolCallId
255
+ ],
256
+ "toolUse": model.chatTemplates.minja.toolUse,
257
+ "toolUseCaps": [
258
+ "tools": model.chatTemplates.minja.toolUseCaps.tools,
259
+ "toolCalls": model.chatTemplates.minja.toolUseCaps.toolCalls,
260
+ "toolResponses": model.chatTemplates.minja.toolUseCaps.toolResponses,
261
+ "systemRole": model.chatTemplates.minja.toolUseCaps.systemRole,
262
+ "parallelToolCalls": model.chatTemplates.minja.toolUseCaps.parallelToolCalls,
263
+ "toolCallId": model.chatTemplates.minja.toolUseCaps.toolCallId
264
+ ]
265
+ ]
266
+ ],
267
+ "metadata": model.metadata,
268
+ "isChatTemplateSupported": true
269
+ ]
270
+ ]
271
+
272
+ completion(.success(contextInfo))
273
+ }
274
+
275
+ func releaseContext(contextId: Int, completion: @escaping (LlamaResult<Void>) -> Void) {
276
+ guard contexts.removeValue(forKey: contextId) != nil else {
277
+ completion(.failure(.contextNotFound))
278
+ return
279
+ }
280
+ completion(.success(()))
281
+ }
282
+
283
+ func releaseAllContexts(completion: @escaping (LlamaResult<Void>) -> Void) {
284
+ contexts.removeAll()
285
+ completion(.success(()))
286
+ }
287
+
288
+ // MARK: - Chat and completion
289
+
290
+ func getFormattedChat(contextId: Int, messages: String, chatTemplate: String?, params: [String: Any]?, completion: @escaping (LlamaResult<Any>) -> Void) {
291
+ guard let context = contexts[contextId] else {
292
+ completion(.failure(.contextNotFound))
293
+ return
294
+ }
295
+
296
+ // This would typically format the chat using the model's chat templates
297
+ // For now, return a basic formatted chat
298
+ let formattedChat: [String: Any] = [
299
+ "type": "llama-chat",
300
+ "prompt": messages,
301
+ "has_media": false,
302
+ "media_paths": []
303
+ ]
304
+
305
+ completion(.success(formattedChat))
306
+ }
307
+
308
+ func completion(contextId: Int, params: [String: Any], completion: @escaping (LlamaResult<[String: Any]>) -> Void) {
309
+ guard let context = contexts[contextId] else {
310
+ completion(.failure(.contextNotFound))
311
+ return
312
+ }
313
+
314
+ // This would typically perform the completion using llama.cpp
315
+ // For now, return a basic completion result
316
+ let completionResult: [String: Any] = [
317
+ "text": "Sample completion text",
318
+ "reasoning_content": "",
319
+ "tool_calls": [],
320
+ "content": "Sample completion text",
321
+ "chat_format": 0,
322
+ "tokens_predicted": 0,
323
+ "tokens_evaluated": 0,
324
+ "truncated": false,
325
+ "stopped_eos": false,
326
+ "stopped_word": "",
327
+ "stopped_limit": 0,
328
+ "stopping_word": "",
329
+ "context_full": false,
330
+ "interrupted": false,
331
+ "tokens_cached": 0,
332
+ "timings": [
333
+ "prompt_n": 0,
334
+ "prompt_ms": 0,
335
+ "prompt_per_token_ms": 0,
336
+ "prompt_per_second": 0,
337
+ "predicted_n": 0,
338
+ "predicted_ms": 0,
339
+ "predicted_per_token_ms": 0,
340
+ "predicted_per_second": 0
341
+ ]
342
+ ]
343
+
344
+ completion(.success(completionResult))
345
+ }
346
+
347
+ func stopCompletion(contextId: Int, completion: @escaping (LlamaResult<Void>) -> Void) {
348
+ guard contexts[contextId] != nil else {
349
+ completion(.failure(.contextNotFound))
350
+ return
351
+ }
352
+
353
+ // This would typically stop any ongoing completion
354
+ completion(.success(()))
355
+ }
356
+
357
+ // MARK: - Session management
358
+
359
+ func loadSession(contextId: Int, filepath: String, completion: @escaping (LlamaResult<[String: Any]>) -> Void) {
360
+ guard contexts[contextId] != nil else {
361
+ completion(.failure(.contextNotFound))
362
+ return
363
+ }
364
+
365
+ // This would typically load session from file
366
+ let sessionResult: [String: Any] = [
367
+ "tokens_loaded": 0,
368
+ "prompt": ""
369
+ ]
370
+
371
+ completion(.success(sessionResult))
372
+ }
373
+
374
+ func saveSession(contextId: Int, filepath: String, size: Int, completion: @escaping (LlamaResult<Int>) -> Void) {
375
+ guard contexts[contextId] != nil else {
376
+ completion(.failure(.contextNotFound))
377
+ return
378
+ }
379
+
380
+ // This would typically save session to file
381
+ completion(.success(0))
382
+ }
383
+
384
+ // MARK: - Tokenization
385
+
386
+ func tokenize(contextId: Int, text: String, imagePaths: [String], completion: @escaping (LlamaResult<[String: Any]>) -> Void) {
387
+ guard contexts[contextId] != nil else {
388
+ completion(.failure(.contextNotFound))
389
+ return
390
+ }
391
+
392
+ // This would typically tokenize the text using the model's tokenizer
393
+ let tokenizeResult: [String: Any] = [
394
+ "tokens": [],
395
+ "has_images": false,
396
+ "bitmap_hashes": [],
397
+ "chunk_pos": [],
398
+ "chunk_pos_images": []
399
+ ]
400
+
401
+ completion(.success(tokenizeResult))
402
+ }
403
+
404
+ func detokenize(contextId: Int, tokens: [Int], completion: @escaping (LlamaResult<String>) -> Void) {
405
+ guard contexts[contextId] != nil else {
406
+ completion(.failure(.contextNotFound))
407
+ return
408
+ }
409
+
410
+ // This would typically detokenize using the model's tokenizer
411
+ completion(.success(""))
412
+ }
413
+
414
+ // MARK: - Embeddings and reranking
415
+
416
+ func embedding(contextId: Int, text: String, params: [String: Any], completion: @escaping (LlamaResult<[String: Any]>) -> Void) {
417
+ guard contexts[contextId] != nil else {
418
+ completion(.failure(.contextNotFound))
419
+ return
420
+ }
421
+
422
+ // This would typically generate embeddings
423
+ let embeddingResult: [String: Any] = [
424
+ "embedding": []
425
+ ]
426
+
427
+ completion(.success(embeddingResult))
428
+ }
429
+
430
+ func rerank(contextId: Int, query: String, documents: [String], params: [String: Any]?, completion: @escaping (LlamaResult<[[String: Any]]>) -> Void) {
431
+ guard contexts[contextId] != nil else {
432
+ completion(.failure(.contextNotFound))
433
+ return
434
+ }
435
+
436
+ // This would typically perform reranking
437
+ let rerankResults: [[String: Any]] = []
438
+ completion(.success(rerankResults))
439
+ }
440
+
441
+ // MARK: - Benchmarking
442
+
443
+ func bench(contextId: Int, pp: Int, tg: Int, pl: Int, nr: Int, completion: @escaping (LlamaResult<String>) -> Void) {
444
+ guard contexts[contextId] != nil else {
445
+ completion(.failure(.contextNotFound))
446
+ return
447
+ }
448
+
449
+ // This would typically run benchmarks
450
+ let benchResult = "[]"
451
+ completion(.success(benchResult))
452
+ }
453
+
454
+ // MARK: - LoRA adapters
455
+
456
+ func applyLoraAdapters(contextId: Int, loraAdapters: [[String: Any]], completion: @escaping (LlamaResult<Void>) -> Void) {
457
+ guard contexts[contextId] != nil else {
458
+ completion(.failure(.contextNotFound))
459
+ return
460
+ }
461
+
462
+ // This would typically apply LoRA adapters
463
+ completion(.success(()))
464
+ }
465
+
466
+ func removeLoraAdapters(contextId: Int, completion: @escaping (LlamaResult<Void>) -> Void) {
467
+ guard contexts[contextId] != nil else {
468
+ completion(.failure(.contextNotFound))
469
+ return
470
+ }
471
+
472
+ // This would typically remove LoRA adapters
473
+ completion(.success(()))
474
+ }
475
+
476
+ func getLoadedLoraAdapters(contextId: Int, completion: @escaping (LlamaResult<[[String: Any]]>) -> Void) {
477
+ guard contexts[contextId] != nil else {
478
+ completion(.failure(.contextNotFound))
479
+ return
480
+ }
481
+
482
+ // This would typically return loaded LoRA adapters
483
+ let adapters: [[String: Any]] = []
484
+ completion(.success(adapters))
485
+ }
486
+
487
+ // MARK: - Multimodal methods
488
+
489
+ func initMultimodal(contextId: Int, path: String, useGpu: Bool, completion: @escaping (LlamaResult<Bool>) -> Void) {
490
+ guard let context = contexts[contextId] else {
491
+ completion(.failure(.contextNotFound))
492
+ return
493
+ }
494
+
495
+ context.isMultimodalEnabled = true
496
+ completion(.success(true))
497
+ }
498
+
499
+ func isMultimodalEnabled(contextId: Int, completion: @escaping (LlamaResult<Bool>) -> Void) {
500
+ guard let context = contexts[contextId] else {
501
+ completion(.failure(.contextNotFound))
502
+ return
503
+ }
504
+
505
+ completion(.success(context.isMultimodalEnabled))
506
+ }
507
+
508
+ func getMultimodalSupport(contextId: Int, completion: @escaping (LlamaResult<[String: Any]>) -> Void) {
509
+ guard contexts[contextId] != nil else {
510
+ completion(.failure(.contextNotFound))
511
+ return
512
+ }
513
+
514
+ let support: [String: Any] = [
515
+ "vision": true,
516
+ "audio": true
517
+ ]
518
+
519
+ completion(.success(support))
520
+ }
521
+
522
+ func releaseMultimodal(contextId: Int, completion: @escaping (LlamaResult<Void>) -> Void) {
523
+ guard let context = contexts[contextId] else {
524
+ completion(.failure(.contextNotFound))
525
+ return
526
+ }
527
+
528
+ context.isMultimodalEnabled = false
529
+ completion(.success(()))
530
+ }
531
+
532
+ // MARK: - TTS methods
533
+
534
+ func initVocoder(contextId: Int, path: String, nBatch: Int?, completion: @escaping (LlamaResult<Bool>) -> Void) {
535
+ guard let context = contexts[contextId] else {
536
+ completion(.failure(.contextNotFound))
537
+ return
538
+ }
539
+
540
+ context.isVocoderEnabled = true
541
+ completion(.success(true))
542
+ }
543
+
544
+ func isVocoderEnabled(contextId: Int, completion: @escaping (LlamaResult<Bool>) -> Void) {
545
+ guard let context = contexts[contextId] else {
546
+ completion(.failure(.contextNotFound))
547
+ return
548
+ }
549
+
550
+ completion(.success(context.isVocoderEnabled))
551
+ }
552
+
553
+ func getFormattedAudioCompletion(contextId: Int, speakerJsonStr: String, textToSpeak: String, completion: @escaping (LlamaResult<[String: Any]>) -> Void) {
554
+ guard contexts[contextId] != nil else {
555
+ completion(.failure(.contextNotFound))
556
+ return
557
+ }
558
+
559
+ let audioCompletion: [String: Any] = [
560
+ "prompt": "",
561
+ "grammar": nil
562
+ ]
563
+
564
+ completion(.success(audioCompletion))
565
+ }
566
+
567
+ func getAudioCompletionGuideTokens(contextId: Int, textToSpeak: String, completion: @escaping (LlamaResult<[Int]>) -> Void) {
568
+ guard contexts[contextId] != nil else {
569
+ completion(.failure(.contextNotFound))
570
+ return
571
+ }
572
+
573
+ let tokens: [Int] = []
574
+ completion(.success(tokens))
575
+ }
576
+
577
+ func decodeAudioTokens(contextId: Int, tokens: [Int], completion: @escaping (LlamaResult<[Int]>) -> Void) {
578
+ guard contexts[contextId] != nil else {
579
+ completion(.failure(.contextNotFound))
580
+ return
581
+ }
582
+
583
+ let decodedTokens: [Int] = []
584
+ completion(.success(decodedTokens))
585
+ }
586
+
587
+ func releaseVocoder(contextId: Int, completion: @escaping (LlamaResult<Void>) -> Void) {
588
+ guard let context = contexts[contextId] else {
589
+ completion(.failure(.contextNotFound))
590
+ return
591
+ }
592
+
593
+ context.isVocoderEnabled = false
594
+ completion(.success(()))
595
+ }
596
+ }