@dvai-bridge/ios-llama-core 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,445 @@
1
+ // Internal/ModelDownloader.swift
2
+ import Foundation
3
+ import CryptoKit
4
+
5
+ /// Result of `listCachedModels()` — one entry per file in the cache dir.
6
+ public struct CachedModelInfoSwift: Sendable {
7
+ public let filename: String
8
+ public let path: String
9
+ public let bytes: Int64
10
+ public let sha256: String
11
+ }
12
+
13
+ /// Resumable, sha256-verified model downloader plus cache management.
14
+ ///
15
+ /// Cache directory layout (per spec §9.2):
16
+ /// - default: `<App Support>/<bundle-id>/dvai-models/<filename>`
17
+ /// - test override: any caller-supplied URL (used by unit tests to keep
18
+ /// real App Support clean).
19
+ ///
20
+ /// Concurrency: an `actor` so cache-list / cache-delete operations are
21
+ /// serialised. The download path delegates to a private `URLSessionDataDelegate`
22
+ /// (compatible with iOS 14+, unlike the iOS-15 `bytes(for:)` API).
23
+ public actor ModelDownloader {
24
+ public enum DownloadError: LocalizedError {
25
+ case checksumMismatch(expected: String, got: String)
26
+ case httpError(status: Int)
27
+ case missingApplicationSupport
28
+ case sha256Required
29
+ case ioError(String)
30
+
31
+ public var errorDescription: String? {
32
+ switch self {
33
+ case .checksumMismatch(let expected, let got):
34
+ return "ChecksumMismatchError: expected \(expected), got \(got)"
35
+ case .httpError(let status):
36
+ return "HTTP error \(status)"
37
+ case .missingApplicationSupport:
38
+ return "Could not locate Application Support directory"
39
+ case .sha256Required:
40
+ return "sha256 is required"
41
+ case .ioError(let msg):
42
+ return "I/O error: \(msg)"
43
+ }
44
+ }
45
+ }
46
+
47
+ private let cacheDirOverride: URL?
48
+
49
+ public init(cacheDirOverride: URL? = nil) {
50
+ self.cacheDirOverride = cacheDirOverride
51
+ }
52
+
53
+ // MARK: - Cache dir
54
+
55
+ /// Resolve and create the cache directory. Returns the directory URL.
56
+ func cacheDirURL() throws -> URL {
57
+ if let override = cacheDirOverride {
58
+ try FileManager.default.createDirectory(at: override, withIntermediateDirectories: true)
59
+ return override
60
+ }
61
+ guard let asd = try? FileManager.default.url(
62
+ for: .applicationSupportDirectory,
63
+ in: .userDomainMask,
64
+ appropriateFor: nil,
65
+ create: true
66
+ ) else {
67
+ throw DownloadError.missingApplicationSupport
68
+ }
69
+ let bundleId = Bundle.main.bundleIdentifier ?? "co.deepvoiceai.dvai-bridge"
70
+ let dir = asd.appendingPathComponent(bundleId).appendingPathComponent("dvai-models")
71
+ try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
72
+ return dir
73
+ }
74
+
75
+ public func cacheDirPath() throws -> String {
76
+ try cacheDirURL().path
77
+ }
78
+
79
+ // MARK: - List / delete
80
+
81
+ /// Enumerate files in the cache dir (skipping `.partial` and dotfiles),
82
+ /// sha256 each, return one entry per file.
83
+ public func listCachedModels() throws -> [CachedModelInfoSwift] {
84
+ let dir = try cacheDirURL()
85
+ let fm = FileManager.default
86
+ let names = (try? fm.contentsOfDirectory(atPath: dir.path)) ?? []
87
+ var out: [CachedModelInfoSwift] = []
88
+ for name in names {
89
+ if name.hasPrefix(".") || name.hasSuffix(".partial") { continue }
90
+ let url = dir.appendingPathComponent(name)
91
+ var isDir: ObjCBool = false
92
+ if !fm.fileExists(atPath: url.path, isDirectory: &isDir) || isDir.boolValue { continue }
93
+ let attrs = try fm.attributesOfItem(atPath: url.path)
94
+ let bytes = (attrs[.size] as? NSNumber)?.int64Value ?? 0
95
+ let sha = try Self.sha256OfFile(at: url)
96
+ out.append(CachedModelInfoSwift(
97
+ filename: name,
98
+ path: url.path,
99
+ bytes: bytes,
100
+ sha256: sha
101
+ ))
102
+ }
103
+ return out
104
+ }
105
+
106
+ public func deleteCachedModel(filename: String) throws {
107
+ let dir = try cacheDirURL()
108
+ let url = dir.appendingPathComponent(filename)
109
+ if FileManager.default.fileExists(atPath: url.path) {
110
+ try FileManager.default.removeItem(at: url)
111
+ }
112
+ let partial = dir.appendingPathComponent("\(filename).partial")
113
+ if FileManager.default.fileExists(atPath: partial.path) {
114
+ try? FileManager.default.removeItem(at: partial)
115
+ }
116
+ }
117
+
118
+ // MARK: - Download
119
+
120
+ /// Download `url` into `<cacheDir>/<destFilename>`, resumable + sha256-verified.
121
+ ///
122
+ /// - Parameters:
123
+ /// - expectedSha256: lowercase-hex digest the final file MUST match.
124
+ /// - onProgress: bytesDone, optional bytesTotal. Caller already throttles
125
+ /// ~10/sec internally; do not throttle again here.
126
+ /// - Returns: (final-path, cached). `cached: true` means the file was already
127
+ /// present with a matching hash and no network request was made.
128
+ public func downloadModel(
129
+ url: URL,
130
+ expectedSha256: String,
131
+ destFilename: String,
132
+ headers: [String: String],
133
+ onProgress: @Sendable @escaping (Int64, Int64?) -> Void
134
+ ) async throws -> (path: String, cached: Bool) {
135
+ guard !expectedSha256.isEmpty else { throw DownloadError.sha256Required }
136
+ let expected = expectedSha256.lowercased()
137
+
138
+ let dir = try cacheDirURL()
139
+ let final = dir.appendingPathComponent(destFilename)
140
+ let partial = dir.appendingPathComponent("\(destFilename).partial")
141
+ let fm = FileManager.default
142
+
143
+ // Step 2: cache hit check.
144
+ if fm.fileExists(atPath: final.path) {
145
+ let existing = try Self.sha256OfFile(at: final)
146
+ if existing == expected {
147
+ Self.applyNoBackupAttribute(final)
148
+ return (final.path, true)
149
+ }
150
+ // Step 3: mismatch → delete and fall through.
151
+ try? fm.removeItem(at: final)
152
+ }
153
+
154
+ // Step 4: stream download (resumable).
155
+ try await StreamingDownload.run(
156
+ url: url,
157
+ partial: partial,
158
+ headers: headers,
159
+ onProgress: onProgress
160
+ ).verifyAndFinalize(
161
+ final: final,
162
+ partial: partial,
163
+ expectedSha256: expected
164
+ )
165
+
166
+ // Step 7: iOS no-backup attribute.
167
+ Self.applyNoBackupAttribute(final)
168
+
169
+ return (final.path, false)
170
+ }
171
+
172
+ // MARK: - Helpers
173
+
174
+ /// Compute SHA-256 of a file lazily by streaming 64 KiB chunks.
175
+ static func sha256OfFile(at url: URL) throws -> String {
176
+ var hasher = SHA256()
177
+ let handle = try FileHandle(forReadingFrom: url)
178
+ defer { try? handle.close() }
179
+ while true {
180
+ let chunk = try handle.read(upToCount: 64 * 1024) ?? Data()
181
+ if chunk.isEmpty { break }
182
+ hasher.update(data: chunk)
183
+ }
184
+ let digest = hasher.finalize()
185
+ return digest.map { String(format: "%02x", $0) }.joined()
186
+ }
187
+
188
+ /// Set `URLResourceKey.isExcludedFromBackupKey = true`. Best-effort —
189
+ /// failure here is non-fatal (e.g. unit tests in tmp dirs).
190
+ static func applyNoBackupAttribute(_ url: URL) {
191
+ var mutableURL = url
192
+ var values = URLResourceValues()
193
+ values.isExcludedFromBackup = true
194
+ try? mutableURL.setResourceValues(values)
195
+ }
196
+ }
197
+
198
+ // MARK: - Streaming download (URLSessionDataDelegate, iOS 14+)
199
+
200
+ /// Result of a streaming download: the running SHA-256 digest as hex.
201
+ struct StreamingDownloadResult {
202
+ let gotHex: String
203
+
204
+ /// Verify hash, atomic-rename, and clean up on mismatch.
205
+ func verifyAndFinalize(final: URL, partial: URL, expectedSha256: String) throws {
206
+ let fm = FileManager.default
207
+ if gotHex != expectedSha256 {
208
+ try? fm.removeItem(at: partial)
209
+ try? fm.removeItem(at: final)
210
+ throw ModelDownloader.DownloadError.checksumMismatch(
211
+ expected: expectedSha256,
212
+ got: gotHex
213
+ )
214
+ }
215
+ if fm.fileExists(atPath: final.path) {
216
+ try? fm.removeItem(at: final)
217
+ }
218
+ try fm.moveItem(at: partial, to: final)
219
+ }
220
+ }
221
+
222
+ /// Wraps `URLSessionDataDelegate` with a continuation so the streaming
223
+ /// download can be `await`ed. Handles:
224
+ /// - Replaying existing `.partial` bytes through SHA-256 (resume).
225
+ /// - Range request + 200/206 handling (server-honoured / not-honoured).
226
+ /// - 64 KiB-buffered hashing + appending.
227
+ /// - Progress debounced to ~10/sec.
228
+ final class StreamingDownload: NSObject, URLSessionDataDelegate, @unchecked Sendable {
229
+ private var hasher = SHA256()
230
+ private var written: Int64 = 0
231
+ private var totalBytes: Int64?
232
+ private var writeHandle: FileHandle?
233
+ private var lastEmit: Date = .distantPast
234
+ private let debounceInterval: TimeInterval = 0.1
235
+
236
+ private let partial: URL
237
+ private let onProgress: @Sendable (Int64, Int64?) -> Void
238
+ private var continuation: CheckedContinuation<StreamingDownloadResult, Error>?
239
+ private var didFinish = false
240
+
241
+ /// Whether we asked the server for a Range. If true and the server replies
242
+ /// 200 (full body), reset hash + truncate file before consuming data.
243
+ private var requestedRange = false
244
+ private var serverWillSendFullBody = false
245
+
246
+ private init(
247
+ partial: URL,
248
+ onProgress: @Sendable @escaping (Int64, Int64?) -> Void
249
+ ) {
250
+ self.partial = partial
251
+ self.onProgress = onProgress
252
+ }
253
+
254
+ /// Entry point. Replays existing `.partial`, performs the download,
255
+ /// returns the final hex digest on success.
256
+ static func run(
257
+ url: URL,
258
+ partial: URL,
259
+ headers: [String: String],
260
+ onProgress: @Sendable @escaping (Int64, Int64?) -> Void
261
+ ) async throws -> StreamingDownloadResult {
262
+ let runner = StreamingDownload(partial: partial, onProgress: onProgress)
263
+ try runner.replayPartial()
264
+
265
+ // Parity with Android: if `.partial` is larger than the remote resource,
266
+ // a Range request would yield 416 Range Not Satisfiable (opaque to the
267
+ // caller). HEAD-probe the Content-Length and discard an oversized
268
+ // partial before issuing the real GET. Any HEAD failure is non-fatal —
269
+ // we just skip the optimisation and let the GET path proceed.
270
+ if runner.written > 0 {
271
+ if let remoteLength = await Self.probeContentLength(url: url, headers: headers),
272
+ remoteLength >= 0,
273
+ runner.written > remoteLength {
274
+ try? FileManager.default.removeItem(at: partial)
275
+ runner.hasher = SHA256()
276
+ runner.written = 0
277
+ }
278
+ }
279
+
280
+ var request = URLRequest(url: url)
281
+ for (k, v) in headers {
282
+ request.setValue(v, forHTTPHeaderField: k)
283
+ }
284
+ if runner.written > 0 {
285
+ request.setValue("bytes=\(runner.written)-", forHTTPHeaderField: "Range")
286
+ runner.requestedRange = true
287
+ }
288
+
289
+ let session = URLSession(
290
+ configuration: .default,
291
+ delegate: runner,
292
+ delegateQueue: nil // serial OperationQueue created internally
293
+ )
294
+ defer { session.finishTasksAndInvalidate() }
295
+
296
+ return try await withCheckedThrowingContinuation { cont in
297
+ runner.continuation = cont
298
+ let task = session.dataTask(with: request)
299
+ task.resume()
300
+ }
301
+ }
302
+
303
+ /// HEAD-probe the URL to learn `Content-Length`. Returns nil on any
304
+ /// failure (network error, non-2xx, missing/invalid header). Headers are
305
+ /// forwarded so authenticated endpoints work.
306
+ private static func probeContentLength(url: URL, headers: [String: String]) async -> Int64? {
307
+ var request = URLRequest(url: url)
308
+ request.httpMethod = "HEAD"
309
+ for (k, v) in headers {
310
+ request.setValue(v, forHTTPHeaderField: k)
311
+ }
312
+ do {
313
+ let (_, response) = try await URLSession.shared.data(for: request)
314
+ guard let http = response as? HTTPURLResponse,
315
+ (200...299).contains(http.statusCode) else {
316
+ return nil
317
+ }
318
+ // expectedContentLength uses Content-Length when present (-1 if unknown).
319
+ let len = http.expectedContentLength
320
+ return len >= 0 ? len : nil
321
+ } catch {
322
+ return nil
323
+ }
324
+ }
325
+
326
+ /// Replay any existing .partial bytes through the hash so we can resume.
327
+ private func replayPartial() throws {
328
+ let fm = FileManager.default
329
+ guard fm.fileExists(atPath: partial.path) else { return }
330
+ let attrs = try fm.attributesOfItem(atPath: partial.path)
331
+ let size = (attrs[.size] as? NSNumber)?.int64Value ?? 0
332
+ if size <= 0 { return }
333
+ let handle = try FileHandle(forReadingFrom: partial)
334
+ defer { try? handle.close() }
335
+ while true {
336
+ let chunk = try handle.read(upToCount: 64 * 1024) ?? Data()
337
+ if chunk.isEmpty { break }
338
+ hasher.update(data: chunk)
339
+ written += Int64(chunk.count)
340
+ }
341
+ }
342
+
343
+ // MARK: - URLSessionDataDelegate
344
+
345
+ func urlSession(
346
+ _ session: URLSession,
347
+ dataTask: URLSessionDataTask,
348
+ didReceive response: URLResponse,
349
+ completionHandler: @escaping (URLSession.ResponseDisposition) -> Void
350
+ ) {
351
+ guard let http = response as? HTTPURLResponse else {
352
+ finish(with: .failure(ModelDownloader.DownloadError.httpError(status: -1)))
353
+ completionHandler(.cancel)
354
+ return
355
+ }
356
+ if !(200...206).contains(http.statusCode) {
357
+ finish(with: .failure(ModelDownloader.DownloadError.httpError(status: http.statusCode)))
358
+ completionHandler(.cancel)
359
+ return
360
+ }
361
+
362
+ let fm = FileManager.default
363
+
364
+ // If we asked for a Range and got 200 → server didn't honour it,
365
+ // restart hash + truncate file.
366
+ if requestedRange && http.statusCode == 200 {
367
+ hasher = SHA256()
368
+ written = 0
369
+ serverWillSendFullBody = true
370
+ try? fm.removeItem(at: partial)
371
+ }
372
+
373
+ let contentLength = http.expectedContentLength // -1 if unknown
374
+ if contentLength >= 0 {
375
+ totalBytes = written + contentLength
376
+ }
377
+
378
+ // Open partial for write at current offset.
379
+ if !fm.fileExists(atPath: partial.path) {
380
+ fm.createFile(atPath: partial.path, contents: nil)
381
+ }
382
+ do {
383
+ let h = try FileHandle(forWritingTo: partial)
384
+ try h.seek(toOffset: UInt64(written))
385
+ writeHandle = h
386
+ } catch {
387
+ finish(with: .failure(error))
388
+ completionHandler(.cancel)
389
+ return
390
+ }
391
+
392
+ // Initial 0% emit so callers see we've started.
393
+ onProgress(written, totalBytes)
394
+ lastEmit = Date()
395
+
396
+ completionHandler(.allow)
397
+ }
398
+
399
+ func urlSession(
400
+ _ session: URLSession,
401
+ dataTask: URLSessionDataTask,
402
+ didReceive data: Data
403
+ ) {
404
+ guard let h = writeHandle else { return }
405
+ do {
406
+ try h.write(contentsOf: data)
407
+ } catch {
408
+ finish(with: .failure(error))
409
+ return
410
+ }
411
+ hasher.update(data: data)
412
+ written += Int64(data.count)
413
+ let now = Date()
414
+ if now.timeIntervalSince(lastEmit) >= debounceInterval {
415
+ onProgress(written, totalBytes)
416
+ lastEmit = now
417
+ }
418
+ }
419
+
420
+ func urlSession(
421
+ _ session: URLSession,
422
+ task: URLSessionTask,
423
+ didCompleteWithError error: Error?
424
+ ) {
425
+ try? writeHandle?.close()
426
+ writeHandle = nil
427
+ if let error = error {
428
+ finish(with: .failure(error))
429
+ return
430
+ }
431
+ // Final progress emit.
432
+ onProgress(written, totalBytes)
433
+ let digest = hasher.finalize()
434
+ let gotHex = digest.map { String(format: "%02x", $0) }.joined()
435
+ finish(with: .success(StreamingDownloadResult(gotHex: gotHex)))
436
+ }
437
+
438
+ private func finish(with result: Result<StreamingDownloadResult, Error>) {
439
+ guard !didFinish else { return }
440
+ didFinish = true
441
+ let cont = continuation
442
+ continuation = nil
443
+ cont?.resume(with: result)
444
+ }
445
+ }
@@ -0,0 +1,158 @@
1
+ // Internal/PluginState.swift
2
+ import Foundation
3
+ #if !COCOAPODS
4
+ import DVAILlamaCoreObjC
5
+ #endif
6
+ #if !COCOAPODS
7
+ import DVAISharedCore
8
+ #endif
9
+
10
+ /// Owns the running state of the capacitor-llama plugin: the model bridge,
11
+ /// the HTTP server, and the model metadata. All access is serialised through
12
+ /// the actor isolation.
13
+ public actor PluginState {
14
+ private var server: HttpServer?
15
+ private var bridge: LlamaCppBridge?
16
+ private(set) var modelId: String = ""
17
+ private(set) var isRunning: Bool = false
18
+ private(set) var baseUrl: String?
19
+ private(set) var port: Int?
20
+
21
+ public init() {}
22
+
23
+ /// Start the plugin: load model, bind server, install routes.
24
+ /// - Returns dictionary suitable for Capacitor's `call.resolve(...)`.
25
+ public func start(opts: [String: Any]) async throws -> [String: Any] {
26
+ if isRunning { try await stopInternal() }
27
+
28
+ guard let modelPath = opts["modelPath"] as? String, !modelPath.isEmpty else {
29
+ throw NSError(
30
+ domain: "DVAIBridgeLlama",
31
+ code: 400,
32
+ userInfo: [NSLocalizedDescriptionKey: "modelPath is required for llama backend"]
33
+ )
34
+ }
35
+
36
+ let mmprojPath = opts["mmprojPath"] as? String
37
+ let chatTemplate = opts["chatTemplate"] as? String
38
+ let gpuLayers = opts["gpuLayers"] as? Int ?? 99
39
+ let contextSize = opts["contextSize"] as? Int ?? 2048
40
+ let threads = opts["threads"] as? Int ?? 4
41
+ let embeddingMode = opts["embeddingMode"] as? Bool ?? false
42
+ let httpBasePort = opts["httpBasePort"] as? Int ?? 38883
43
+ let httpMaxPortAttempts = opts["httpMaxPortAttempts"] as? Int ?? 16
44
+ let corsRaw = opts["corsOrigin"]
45
+ let corsConfig = parseCors(corsRaw)
46
+
47
+ // Load model via the ObjC++ bridge (real llama.cpp under the hood).
48
+ let bridge = LlamaCppBridge()
49
+ try bridge.loadModel(
50
+ atPath: modelPath,
51
+ mmprojPath: mmprojPath,
52
+ gpuLayers: Int32(gpuLayers),
53
+ contextSize: Int32(contextSize),
54
+ threads: Int32(threads),
55
+ embeddingMode: embeddingMode
56
+ )
57
+
58
+ // Phase 2A Pass 2: load mmproj (if provided) so multimodal handlers
59
+ // can light up. A failed mmproj load is fatal for this start() call —
60
+ // the caller asked for a multimodal model and we couldn't deliver.
61
+ if let mmprojPath = mmprojPath, !mmprojPath.isEmpty {
62
+ do {
63
+ try bridge.loadMmproj(atPath: mmprojPath)
64
+ } catch {
65
+ bridge.unload()
66
+ throw error
67
+ }
68
+ }
69
+ let mmprojLoaded = bridge.isMmprojLoaded
70
+ // Audio encoder support implies mmproj is loaded AND mtmd reports
71
+ // an audio encoder is present in the projector.
72
+ let modelHasAudioEncoder = mmprojLoaded && bridge.hasAudioEncoder()
73
+
74
+ // Build handlers + context first; Hummingbird requires routes
75
+ // to be registered at Application construction time, so the
76
+ // installRoutes → tryBind order is mandatory. Phase 2A Pass 2:
77
+ // real flags mirrored from the bridge state. embeddingMode
78
+ // comes straight from the start opts so /v1/embeddings can
79
+ // short-circuit when off. chatTemplate is an optional
80
+ // Jinja-compatible override; nil/empty falls through to the
81
+ // model's bundled `tokenizer.chat_template`.
82
+ let handlers = LlamaHandlers(
83
+ bridge: bridge,
84
+ modelId: modelPath,
85
+ mmprojLoaded: mmprojLoaded,
86
+ modelHasAudioEncoder: modelHasAudioEncoder,
87
+ embeddingMode: embeddingMode,
88
+ chatTemplate: chatTemplate
89
+ )
90
+ let ctx = HandlerContext(modelId: modelPath, backendName: "llama")
91
+ let server = HttpServer()
92
+ await server.installRoutes(handlers: handlers, ctx: ctx, corsConfig: corsConfig)
93
+
94
+ // Bind server (with port-fallback). If bind fails, release the
95
+ // bridge so the loaded llama context doesn't leak until next
96
+ // start().
97
+ let port: Int
98
+ do {
99
+ port = try await server.tryBind(
100
+ basePort: httpBasePort,
101
+ maxAttempts: httpMaxPortAttempts,
102
+ host: "127.0.0.1"
103
+ )
104
+ } catch {
105
+ bridge.unload()
106
+ throw error
107
+ }
108
+
109
+ self.bridge = bridge
110
+ self.server = server
111
+ self.modelId = modelPath
112
+ self.port = port
113
+ self.baseUrl = "http://127.0.0.1:\(port)/v1"
114
+ self.isRunning = true
115
+
116
+ return [
117
+ "baseUrl": self.baseUrl!,
118
+ "port": port,
119
+ "backend": "llama",
120
+ "modelId": modelPath,
121
+ ]
122
+ }
123
+
124
+ /// Stop the plugin: release model, stop server.
125
+ public func stop() async throws {
126
+ try await stopInternal()
127
+ }
128
+
129
+ private func stopInternal() async throws {
130
+ await server?.stop()
131
+ bridge?.unload()
132
+ server = nil
133
+ bridge = nil
134
+ modelId = ""
135
+ baseUrl = nil
136
+ port = nil
137
+ isRunning = false
138
+ }
139
+
140
+ /// Snapshot of the current running state, suitable for Capacitor `call.resolve(...)`.
141
+ public func statusInfo() -> [String: Any] {
142
+ var dict: [String: Any] = ["running": isRunning]
143
+ if let baseUrl = baseUrl { dict["baseUrl"] = baseUrl }
144
+ if isRunning { dict["backend"] = "llama" }
145
+ return dict
146
+ }
147
+
148
+ /// Parse the CORS option from the start opts dict.
149
+ private func parseCors(_ raw: Any?) -> CORSConfig {
150
+ if let s = raw as? String {
151
+ return s == "*" ? .wildcard : .exact(s)
152
+ }
153
+ if let arr = raw as? [String] {
154
+ return .allowlist(arr)
155
+ }
156
+ return .wildcard
157
+ }
158
+ }