@dvai-bridge/ios 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/DVAIBridge.podspec +120 -0
  2. package/LICENSE +51 -0
  3. package/Package.swift +104 -0
  4. package/README.md +199 -0
  5. package/ios/Sources/DVAIBridge/BackendKind.swift +23 -0
  6. package/ios/Sources/DVAIBridge/BoundServer.swift +46 -0
  7. package/ios/Sources/DVAIBridge/Capability/CapabilityCache.swift +85 -0
  8. package/ios/Sources/DVAIBridge/Capability/CapabilityPrecheck.swift +193 -0
  9. package/ios/Sources/DVAIBridge/Capability/CapabilityScore.swift +51 -0
  10. package/ios/Sources/DVAIBridge/Capability/DeviceID.swift +70 -0
  11. package/ios/Sources/DVAIBridge/Capability/HardwareAssessment.swift +41 -0
  12. package/ios/Sources/DVAIBridge/DVAIBridge.swift +658 -0
  13. package/ios/Sources/DVAIBridge/DVAIBridgeConfig.swift +86 -0
  14. package/ios/Sources/DVAIBridge/DVAIBridgeError.swift +33 -0
  15. package/ios/Sources/DVAIBridge/Discovery/MDNSPeer.swift +64 -0
  16. package/ios/Sources/DVAIBridge/Discovery/NWAdvertiser.swift +103 -0
  17. package/ios/Sources/DVAIBridge/Discovery/NWBrowserDiscovery.swift +212 -0
  18. package/ios/Sources/DVAIBridge/Internal/BackendSelector.swift +59 -0
  19. package/ios/Sources/DVAIBridge/Internal/ProgressBroadcaster.swift +84 -0
  20. package/ios/Sources/DVAIBridge/License/Audience.swift +133 -0
  21. package/ios/Sources/DVAIBridge/License/Discovery.swift +164 -0
  22. package/ios/Sources/DVAIBridge/License/LicenseValidator.swift +392 -0
  23. package/ios/Sources/DVAIBridge/License/PublicKeys.swift +114 -0
  24. package/ios/Sources/DVAIBridge/License/Types.swift +195 -0
  25. package/ios/Sources/DVAIBridge/Offload/OffloadConfig.swift +118 -0
  26. package/ios/Sources/DVAIBridge/Offload/OffloadProxy.swift +604 -0
  27. package/ios/Sources/DVAIBridge/Offload/OffloadRuntime.swift +98 -0
  28. package/ios/Sources/DVAIBridge/Pairing/Pairing.swift +125 -0
  29. package/ios/Sources/DVAIBridge/Pairing/PairingHandshake.swift +141 -0
  30. package/ios/Sources/DVAIBridge/Pairing/PairingPolicy.swift +162 -0
  31. package/ios/Sources/DVAIBridge/Pairing/PairingStore.swift +65 -0
  32. package/ios/Sources/DVAIBridge/ProgressEvent.swift +34 -0
  33. package/ios/Sources/DVAIBridge/ReactiveState.swift +149 -0
  34. package/ios/Sources/DVAICoreMLCore/.gitkeep +0 -0
  35. package/ios/Sources/DVAICoreMLCore/CoreMLBackendError.swift +19 -0
  36. package/ios/Sources/DVAICoreMLCore/CoreMLHandlers.swift +123 -0
  37. package/ios/Sources/DVAICoreMLCore/CoreMLPluginState.swift +130 -0
  38. package/ios/Sources/DVAICoreMLCore/Internal/CoreMLEngine.swift +137 -0
  39. package/ios/Sources/DVAICoreMLCore/Internal/CoreMLGenerator.swift +108 -0
  40. package/ios/Sources/DVAICoreMLCore/Internal/CoreMLSampler.swift +96 -0
  41. package/ios/Sources/DVAICoreMLCore/Internal/CoreMLTokenizer.swift +69 -0
  42. package/ios/Tests/DVAIBridgeTests/BackendSelectorTests.swift +53 -0
  43. package/ios/Tests/DVAIBridgeTests/CapabilityPrecheckTests.swift +108 -0
  44. package/ios/Tests/DVAIBridgeTests/CoreMLEngineTests.swift +18 -0
  45. package/ios/Tests/DVAIBridgeTests/CoreMLGeneratorShapeTests.swift +11 -0
  46. package/ios/Tests/DVAIBridgeTests/CoreMLHandlersTests.swift +32 -0
  47. package/ios/Tests/DVAIBridgeTests/CoreMLPluginStateTests.swift +41 -0
  48. package/ios/Tests/DVAIBridgeTests/CoreMLSamplerTests.swift +40 -0
  49. package/ios/Tests/DVAIBridgeTests/CoreMLTokenizerTests.swift +19 -0
  50. package/ios/Tests/DVAIBridgeTests/DVAIBridgeAPIShapeTests.swift +37 -0
  51. package/ios/Tests/DVAIBridgeTests/DVAIBridgeConfigTests.swift +52 -0
  52. package/ios/Tests/DVAIBridgeTests/DVAIBridgeErrorTests.swift +33 -0
  53. package/ios/Tests/DVAIBridgeTests/LicenseValidatorTests.swift +658 -0
  54. package/ios/Tests/DVAIBridgeTests/OffloadProxyDecisionTests.swift +156 -0
  55. package/ios/Tests/DVAIBridgeTests/OffloadTests.swift +339 -0
  56. package/ios/Tests/DVAIBridgeTests/ProgressBroadcasterTests.swift +69 -0
  57. package/ios/Tests/DVAIBridgeTests/ProgressEventTests.swift +25 -0
  58. package/ios/Tests/DVAIBridgeTests/ReactiveStateTests.swift +45 -0
  59. package/ios/Tests/DVAIBridgeTests/RealModelIntegrationTest.swift +359 -0
  60. package/package.json +19 -0
@@ -0,0 +1,149 @@
1
+ import Foundation
2
+ import Combine
3
+
4
+ /// SwiftUI-friendly reactive state. Exposes lifecycle and progress as
5
+ /// observable properties on the main actor.
6
+ ///
7
+ /// ## Distribution-channel asymmetry
8
+ ///
9
+ /// - **Under SwiftPM** (`Package.swift`): full `ObservableObject` +
10
+ /// `@Published` API. Drop into a SwiftUI view as `@StateObject` /
11
+ /// `@ObservedObject` and the view re-renders automatically when any
12
+ /// property changes.
13
+ /// - **Under CocoaPods** (`DVAIBridge.podspec`): `ObservableObject`
14
+ /// conformance and the `@Published` wrappers are intentionally OMITTED.
15
+ /// The properties remain `public private(set) var` and are still
16
+ /// readable; observers must subscribe to `stateChanges` (the always-
17
+ /// available `Combine` publisher below) instead of using SwiftUI's
18
+ /// property-wrapper integration.
19
+ ///
20
+ /// **Why the asymmetry?** Xcode 26 / iOS 26 SDK's static linker emits
21
+ /// an implicit link directive for `SwiftUICore` (a private framework
22
+ /// non-Apple products cannot link) for *any* module that conforms a type
23
+ /// to `ObservableObject` — even if the module never imports SwiftUI.
24
+ /// Linking `SwiftUICore` from a non-Apple framework fails with
25
+ /// "cannot link directly with 'SwiftUICore' because product being built
26
+ /// is not an allowed client of it". CocoaPods bundles all of dvai-bridge
27
+ /// into a single Swift module, so the trigger lands on every consumer's
28
+ /// link line. SwiftPM, by contrast, builds dvai-bridge as a library
29
+ /// dynamically resolved at the consumer's link line where SwiftUICore
30
+ /// access *is* allowed (because the consumer's app IS an allowed client),
31
+ /// so the same conformance compiles fine.
32
+ ///
33
+ /// CocoaPods SwiftUI consumers wanting reactive view updates should:
34
+ ///
35
+ /// @State private var snapshot = DVAIBridgeSnapshot()
36
+ /// ...
37
+ /// .onReceive(DVAIBridge.shared.reactive.stateChanges) { _ in
38
+ /// snapshot = DVAIBridgeSnapshot.from(DVAIBridge.shared.reactive)
39
+ /// }
40
+ ///
41
+ /// Or wrap the reactive object in a small SwiftUI-side adapter that
42
+ /// conforms to `ObservableObject` themselves (since their app target
43
+ /// IS an allowed SwiftUICore client).
44
+ @MainActor
45
+ public final class DVAIBridgeReactiveState {
46
+ #if COCOAPODS
47
+ public private(set) var isReady: Bool = false {
48
+ didSet { stateChangesSubject.send() }
49
+ }
50
+ public private(set) var baseUrl: String? = nil {
51
+ didSet { stateChangesSubject.send() }
52
+ }
53
+ public private(set) var port: Int? = nil {
54
+ didSet { stateChangesSubject.send() }
55
+ }
56
+ public private(set) var currentBackend: BackendKind? = nil {
57
+ didSet { stateChangesSubject.send() }
58
+ }
59
+ public private(set) var lastProgress: ProgressEvent? = nil {
60
+ didSet { stateChangesSubject.send() }
61
+ }
62
+ #else
63
+ @Published public private(set) var isReady: Bool = false
64
+ @Published public private(set) var baseUrl: String? = nil
65
+ @Published public private(set) var port: Int? = nil
66
+ @Published public private(set) var currentBackend: BackendKind? = nil
67
+ @Published public private(set) var lastProgress: ProgressEvent? = nil
68
+ #endif
69
+
70
+ private let stateChangesSubject = PassthroughSubject<Void, Never>()
71
+
72
+ /// Combine publisher that fires whenever any of the state properties
73
+ /// changes. Available in both SwiftPM and CocoaPods builds — SwiftPM
74
+ /// consumers usually use `ObservableObject` directly via SwiftUI's
75
+ /// property wrappers, but this publisher remains available as a
76
+ /// non-SwiftUI alternative.
77
+ public var stateChanges: AnyPublisher<Void, Never> {
78
+ stateChangesSubject.eraseToAnyPublisher()
79
+ }
80
+
81
+ internal init() {}
82
+
83
+ internal func didStart(_ server: BoundServer) {
84
+ isReady = true
85
+ baseUrl = server.baseUrl
86
+ port = server.port
87
+ currentBackend = server.backend
88
+ #if !COCOAPODS
89
+ // Under SwiftPM the @Published wrappers handle change publishing
90
+ // automatically; we still emit on stateChangesSubject so non-SwiftUI
91
+ // observers (e.g. UIKit code paths) can subscribe to it uniformly.
92
+ stateChangesSubject.send()
93
+ #endif
94
+ }
95
+
96
+ internal func didStop() {
97
+ isReady = false
98
+ baseUrl = nil
99
+ port = nil
100
+ currentBackend = nil
101
+ #if !COCOAPODS
102
+ stateChangesSubject.send()
103
+ #endif
104
+ }
105
+
106
+ internal func didReceiveProgress(_ event: ProgressEvent) {
107
+ lastProgress = event
108
+ #if !COCOAPODS
109
+ stateChangesSubject.send()
110
+ #endif
111
+ }
112
+ }
113
+
114
+ #if !COCOAPODS
115
+ extension DVAIBridgeReactiveState: ObservableObject {}
116
+ #endif
117
+
118
+ extension DVAIBridge {
119
+ /// Main-actor-isolated reactive state. Subsequent accesses return the
120
+ /// same object — under SwiftPM, pin it as `@StateObject` upstream;
121
+ /// under CocoaPods, observe the `stateChanges` publisher.
122
+ @MainActor
123
+ public var reactive: DVAIBridgeReactiveState {
124
+ DVAIBridgeReactiveStateRegistry.shared.state(for: self)
125
+ }
126
+ }
127
+
128
+ /// Per-DVAIBridge-instance registry of ReactiveState objects. Actors can't
129
+ /// own MainActor-isolated state directly, so the registry lives on the
130
+ /// MainActor and keys by `ObjectIdentifier(bridge)`.
131
+ @MainActor
132
+ internal final class DVAIBridgeReactiveStateRegistry {
133
+ static let shared = DVAIBridgeReactiveStateRegistry()
134
+ private var states: [ObjectIdentifier: DVAIBridgeReactiveState] = [:]
135
+
136
+ func state(for bridge: DVAIBridge) -> DVAIBridgeReactiveState {
137
+ let id = ObjectIdentifier(bridge)
138
+ if let existing = states[id] { return existing }
139
+ let new = DVAIBridgeReactiveState()
140
+ states[id] = new
141
+ // Forward all progress events into the state on the main actor.
142
+ Task { @MainActor [weak new] in
143
+ for await event in bridge.progressStream {
144
+ new?.didReceiveProgress(event)
145
+ }
146
+ }
147
+ return new
148
+ }
149
+ }
File without changes
@@ -0,0 +1,19 @@
1
+ import Foundation
2
+
3
+ public enum CoreMLBackendError: Error, LocalizedError, Sendable {
4
+ case modelLoadFailed(reason: String)
5
+ case tokenizerLoadFailed(reason: String)
6
+ case stateInitFailed(reason: String)
7
+ case generationFailed(reason: String)
8
+ case unsupportedModelFormat(reason: String)
9
+
10
+ public var errorDescription: String? {
11
+ switch self {
12
+ case .modelLoadFailed(let r): return "CoreML model load failed: \(r)"
13
+ case .tokenizerLoadFailed(let r): return "Tokenizer load failed: \(r)"
14
+ case .stateInitFailed(let r): return "MLState init failed: \(r)"
15
+ case .generationFailed(let r): return "Generation failed: \(r)"
16
+ case .unsupportedModelFormat(let r): return "Unsupported model format: \(r)"
17
+ }
18
+ }
19
+ }
@@ -0,0 +1,123 @@
1
+ import Foundation
2
+ #if !COCOAPODS
3
+ import DVAISharedCore
4
+ #endif
5
+
6
+ /// `DVAIHandlers` conformer for the CoreML backend.
7
+ /// Translates OpenAI-compatible HTTP requests into CoreMLGenerator calls and
8
+ /// formats the results as OpenAI JSON / SSE responses.
9
+ @available(iOS 18.0, macOS 15.0, *)
10
+ public final class CoreMLHandlers: DVAIHandlers {
11
+ private let generator: CoreMLGenerator
12
+ private let modelId: String
13
+
14
+ // Internal init — `CoreMLGenerator` is an implementation detail of
15
+ // DVAICoreMLCore and stays internal. The only construction site is
16
+ // `CoreMLPluginState.start()` inside the same module.
17
+ internal init(generator: CoreMLGenerator, modelId: String) {
18
+ self.generator = generator
19
+ self.modelId = modelId
20
+ }
21
+
22
+ public func handleChatCompletion(body: [String: Any], ctx: HandlerContext) async throws -> HandlerResponse {
23
+ guard let messages = body["messages"] as? [[String: String]] else {
24
+ return .error(400, "messages array is required")
25
+ }
26
+ let stream = (body["stream"] as? Bool) ?? false
27
+ let temperature = (body["temperature"] as? Double).map(Float.init) ?? 0.0
28
+ let topP = (body["top_p"] as? Double).map(Float.init) ?? 1.0
29
+ let maxTokens = (body["max_tokens"] as? Int) ?? 512
30
+
31
+ // Build a generator with the per-request sampling params.
32
+ let requestSampler = CoreMLSampler(temperature: temperature, topP: topP, topK: 0)
33
+ let requestGenerator = CoreMLGenerator(
34
+ engine: generator.engine,
35
+ tokenizer: generator.tokenizer,
36
+ sampler: requestSampler,
37
+ maxNewTokens: maxTokens
38
+ )
39
+
40
+ let promptTokens: [Int]
41
+ do {
42
+ promptTokens = try generator.tokenizer.applyChatTemplate(messages: messages)
43
+ } catch {
44
+ return .error(400, "tokenizer chat-template failed: \(error.localizedDescription)")
45
+ }
46
+
47
+ if stream {
48
+ let sse = requestGenerator.generateStream(promptTokens: promptTokens)
49
+ let streamId = UUID().uuidString
50
+ let mappedStream = AsyncStream<String> { cont in
51
+ Task {
52
+ do {
53
+ for try await chunk in sse {
54
+ let evt = "data: {\"id\":\"\(streamId)\",\"object\":\"chat.completion.chunk\",\"created\":\(Int(Date().timeIntervalSince1970)),\"model\":\"\(modelId)\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\(jsonString(chunk))},\"finish_reason\":null}]}\n\n"
55
+ cont.yield(evt)
56
+ }
57
+ cont.yield("data: [DONE]\n\n")
58
+ cont.finish()
59
+ } catch {
60
+ cont.yield("data: {\"error\":\"\(error.localizedDescription)\"}\n\n")
61
+ cont.finish()
62
+ }
63
+ }
64
+ }
65
+ return .sse(mappedStream)
66
+ }
67
+
68
+ let text: String
69
+ do {
70
+ text = try await requestGenerator.generate(promptTokens: promptTokens)
71
+ } catch {
72
+ return .error(500, "generation failed: \(error.localizedDescription)")
73
+ }
74
+ let responseJSON: [String: Any] = [
75
+ "id": UUID().uuidString,
76
+ "object": "chat.completion",
77
+ "created": Int(Date().timeIntervalSince1970),
78
+ "model": modelId,
79
+ "choices": [[
80
+ "index": 0,
81
+ "message": ["role": "assistant", "content": text],
82
+ "finish_reason": "stop"
83
+ ]],
84
+ "usage": [
85
+ "prompt_tokens": promptTokens.count,
86
+ "completion_tokens": -1, // CoreML decoding doesn't track this per checkpoint
87
+ "total_tokens": -1
88
+ ]
89
+ ]
90
+ return .json(200, responseJSON)
91
+ }
92
+
93
+ public func handleCompletion(body: [String: Any], ctx: HandlerContext) async throws -> HandlerResponse {
94
+ let prompt = body["prompt"] as? String ?? ""
95
+ let chatBody: [String: Any] = [
96
+ "messages": [["role": "user", "content": prompt]],
97
+ "stream": body["stream"] as? Bool ?? false,
98
+ "temperature": body["temperature"] as? Double ?? 0.0,
99
+ "top_p": body["top_p"] as? Double ?? 1.0,
100
+ "max_tokens": body["max_tokens"] as? Int ?? 512,
101
+ ]
102
+ return try await handleChatCompletion(body: chatBody, ctx: ctx)
103
+ }
104
+
105
+ public func handleEmbeddings(body: [String: Any], ctx: HandlerContext) async throws -> HandlerResponse {
106
+ return .error(501, "embeddings not yet supported by the CoreML backend")
107
+ }
108
+
109
+ public func handleModels(ctx: HandlerContext) async throws -> HandlerResponse {
110
+ return .json(200, [
111
+ "object": "list",
112
+ "data": [["id": modelId, "object": "model", "owned_by": "dvai-bridge"]]
113
+ ])
114
+ }
115
+
116
+ /// JSON-encode a single string value (produces a quoted JSON string).
117
+ private func jsonString(_ s: String) -> String {
118
+ let data = (try? JSONSerialization.data(withJSONObject: [s], options: [])) ?? Data()
119
+ let str = String(data: data, encoding: .utf8) ?? "[\"\"]"
120
+ // Strip the surrounding array brackets — leaves the quoted string value.
121
+ return String(str.dropFirst().dropLast())
122
+ }
123
+ }
@@ -0,0 +1,130 @@
1
+ import Foundation
2
+ import CoreML
3
+ #if !COCOAPODS
4
+ import DVAISharedCore // HttpServer, DVAIHandlers, HandlerContext, CORSConfig
5
+ #endif
6
+
7
+ /// Public PluginState mirroring DVAILlamaCore.PluginState's shape.
8
+ /// Boots a Telegraph HTTP server on `127.0.0.1:<port>` (with port-fallback),
9
+ /// loads the .mlmodelc model + tokenizer, and serves OpenAI-compatible
10
+ /// requests via CoreMLHandlers.
11
+ ///
12
+ /// Requires iOS 18 / macOS 15 for MLState (KV-cache stateful decoding).
13
+ @available(iOS 18.0, macOS 15.0, *)
14
+ public actor CoreMLPluginState {
15
+ private var httpServer: HttpServer?
16
+ private var generator: CoreMLGenerator?
17
+ private var modelId: String = ""
18
+ private var isRunning: Bool = false
19
+ private var baseUrl: String?
20
+ private var port: Int?
21
+
22
+ public init() {}
23
+
24
+ public func start(opts: [String: Any]) async throws -> [String: Any] {
25
+ if isRunning { try await stop() }
26
+
27
+ guard let modelPath = opts["modelPath"] as? String, !modelPath.isEmpty else {
28
+ throw CoreMLBackendError.modelLoadFailed(
29
+ reason: "modelPath is required for the CoreML backend")
30
+ }
31
+ guard let tokenizerPath = opts["tokenizerPath"] as? String, !tokenizerPath.isEmpty else {
32
+ throw CoreMLBackendError.tokenizerLoadFailed(
33
+ reason: "tokenizerPath is required (path to a directory containing " +
34
+ "tokenizer.json + tokenizer_config.json)")
35
+ }
36
+
37
+ let modelURL = URL(fileURLWithPath: modelPath)
38
+ let tokenizerDir = URL(fileURLWithPath: tokenizerPath)
39
+
40
+ // Optional opts with defaults — match Apple's stateful Llama-3.2
41
+ // conversion conventions (snake_case, matching HF / PyTorch).
42
+ let inputName = (opts["coremlInputName"] as? String) ?? "input_ids"
43
+ let causalMaskName = (opts["coremlCausalMaskName"] as? String) ?? "causal_mask"
44
+ let outputName = (opts["coremlOutputName"] as? String) ?? "logits"
45
+ let maxContextTokens = (opts["contextSize"] as? Int) ?? 2048
46
+ let temperature = (opts["temperature"] as? Double).map(Float.init) ?? 0.0
47
+ let topP = (opts["topP"] as? Double).map(Float.init) ?? 1.0
48
+ let topK = (opts["topK"] as? Int) ?? 0
49
+ let maxNewTokens = (opts["maxNewTokens"] as? Int) ?? 512
50
+ let httpBasePort = (opts["httpBasePort"] as? Int) ?? 38883
51
+ let httpMaxPortAttempts = (opts["httpMaxPortAttempts"] as? Int) ?? 16
52
+
53
+ // Load tokenizer first — its eosTokenId is needed by the engine.
54
+ let tokenizer = try await CoreMLTokenizer(tokenizerDir: tokenizerDir)
55
+ let engine = try CoreMLEngine(
56
+ modelURL: modelURL,
57
+ inputName: inputName,
58
+ causalMaskName: causalMaskName,
59
+ outputName: outputName,
60
+ maxContextTokens: maxContextTokens,
61
+ eosTokenId: tokenizer.eosTokenId
62
+ )
63
+
64
+ let sampler = CoreMLSampler(temperature: temperature, topP: topP, topK: topK)
65
+ let gen = CoreMLGenerator(
66
+ engine: engine,
67
+ tokenizer: tokenizer,
68
+ sampler: sampler,
69
+ maxNewTokens: maxNewTokens
70
+ )
71
+
72
+ let modelIdValue = modelURL.deletingPathExtension().lastPathComponent
73
+ let handlers = CoreMLHandlers(generator: gen, modelId: modelIdValue)
74
+
75
+ // Build context + cors first, install routes, THEN bind —
76
+ // Hummingbird requires routes at Application construction time
77
+ // so the install → bind order is mandatory.
78
+ let ctx = HandlerContext(modelId: modelIdValue, backendName: "coreml")
79
+ // Note: plan used DispatchConfig which doesn't exist in DVAILlamaCore.
80
+ // Real type is CORSConfig (public). parseCors() below maps opts → CORSConfig.
81
+ let corsConfig = parseCors(opts["corsOrigin"])
82
+ let server = HttpServer()
83
+ await server.installRoutes(handlers: handlers, ctx: ctx, corsConfig: corsConfig)
84
+
85
+ let boundPort = try await server.tryBind(
86
+ basePort: httpBasePort,
87
+ maxAttempts: httpMaxPortAttempts,
88
+ host: "127.0.0.1"
89
+ )
90
+
91
+ self.httpServer = server
92
+ self.generator = gen
93
+ self.modelId = modelIdValue
94
+ self.port = boundPort
95
+ self.baseUrl = "http://127.0.0.1:\(boundPort)/v1"
96
+ self.isRunning = true
97
+
98
+ return [
99
+ "baseUrl": self.baseUrl!,
100
+ "port": boundPort,
101
+ "backend": "coreml",
102
+ "modelId": modelIdValue,
103
+ ]
104
+ }
105
+
106
+ public func stop() async throws {
107
+ await httpServer?.stop()
108
+ httpServer = nil
109
+ generator = nil
110
+ modelId = ""
111
+ baseUrl = nil
112
+ port = nil
113
+ isRunning = false
114
+ }
115
+
116
+ public func statusInfo() -> [String: Any] {
117
+ var dict: [String: Any] = ["running": isRunning]
118
+ if let baseUrl = baseUrl { dict["baseUrl"] = baseUrl }
119
+ if isRunning { dict["backend"] = "coreml" }
120
+ return dict
121
+ }
122
+
123
+ // MARK: - Private
124
+
125
+ private func parseCors(_ raw: Any?) -> CORSConfig {
126
+ if let s = raw as? String { return s == "*" ? .wildcard : .exact(s) }
127
+ if let arr = raw as? [String] { return .allowlist(arr) }
128
+ return .wildcard
129
+ }
130
+ }
@@ -0,0 +1,137 @@
1
+ import Foundation
2
+ import CoreML
3
+
4
+ /// Wraps an `MLModel` plus the shape conventions our CoreML LLM checkpoints
5
+ /// follow. `makeConversationState()` produces a fresh `MLState` for each
6
+ /// conversation so token-by-token decoding can preserve KV-cache across calls.
7
+ ///
8
+ /// iOS 18 / macOS 15 API notes:
9
+ /// - `MLModel.makeState()` returns `MLState` (non-optional, throws is not in
10
+ /// the signature — it can still crash at runtime on non-stateful models).
11
+ /// - `MLModel.prediction(from:using:options:)` takes state via the `using:`
12
+ /// label, NOT `state:`. Verified against Apple's CoreML docs.
13
+ @available(iOS 18.0, macOS 15.0, *)
14
+ internal final class CoreMLEngine: @unchecked Sendable {
15
+ let model: MLModel
16
+ /// Name of the token-id input feature. Apple-converted Llama-3.2 stateful
17
+ /// checkpoints use `input_ids` (snake_case, matching HF / PyTorch
18
+ /// convention). Override via `opts["coremlInputName"]` for non-standard
19
+ /// checkpoints.
20
+ let inputName: String
21
+ /// Name of the causal-mask input feature. Apple-converted stateful
22
+ /// checkpoints declare a `causal_mask` Float16 multiarray of shape
23
+ /// `[1, 1, q_len, kv_len]` — the model uses it inside
24
+ /// `Ios18.scaledDotProductAttention`. Empty string disables the
25
+ /// causal-mask input (for older or simpler checkpoints that don't
26
+ /// declare it). Override via `opts["coremlCausalMaskName"]`.
27
+ let causalMaskName: String
28
+ let outputName: String // default: "logits"
29
+ let maxContextTokens: Int // from opts; default 2048
30
+ let eosTokenId: Int // from tokenizer or opts
31
+
32
+ init(
33
+ modelURL: URL,
34
+ inputName: String = "input_ids",
35
+ causalMaskName: String = "causal_mask",
36
+ outputName: String = "logits",
37
+ maxContextTokens: Int = 2048,
38
+ eosTokenId: Int,
39
+ computeUnits: MLComputeUnits = .all
40
+ ) throws {
41
+ let cfg = MLModelConfiguration()
42
+ cfg.computeUnits = computeUnits
43
+ do {
44
+ self.model = try MLModel(contentsOf: modelURL, configuration: cfg)
45
+ } catch {
46
+ throw CoreMLBackendError.modelLoadFailed(reason: "\(error)")
47
+ }
48
+ self.inputName = inputName
49
+ self.causalMaskName = causalMaskName
50
+ self.outputName = outputName
51
+ self.maxContextTokens = maxContextTokens
52
+ self.eosTokenId = eosTokenId
53
+ }
54
+
55
+ /// Make a fresh KV-cache state for a new conversation.
56
+ /// Wraps `MLModel.makeState()` (iOS 18 / macOS 15).
57
+ /// Note: `makeState()` is NOT throwing in Apple's API; it returns `MLState`
58
+ /// directly. Non-stateful models will produce a state object that has no
59
+ /// effect — they won't crash here, but predictions will behave as if
60
+ /// stateless. Real validation happens at prediction time.
61
+ func makeConversationState() -> MLState {
62
+ // matches MLModel.makeState() iOS 18 non-throwing signature
63
+ return model.makeState()
64
+ }
65
+
66
+ /// Run a single-token forward pass using the given KV-cache state.
67
+ ///
68
+ /// Uses `MLModel.prediction(from:using:options:)` — the `using:` label
69
+ /// carries the `MLState` object (not `state:`). Verified against Apple docs.
70
+ ///
71
+ /// - Parameters:
72
+ /// - token: New token id to feed (the K/V is appended to `state` by
73
+ /// the model's `Ios18.writeState` op as a side-effect).
74
+ /// - kvCachePosition: 0-based position of the new token in the
75
+ /// conversation. The first prompt token is position 0, second is 1,
76
+ /// etc. Used to size the causal-mask input. Caller increments this
77
+ /// across runStep calls within the same conversation.
78
+ /// - state: KV-cache `MLState` from `makeConversationState()`.
79
+ func runStep(token: Int, kvCachePosition: Int, state: MLState) throws -> MLMultiArray {
80
+ var features: [String: MLFeatureValue] = [:]
81
+
82
+ // input_ids: [1, 1] Int32 with the new token. Direct memory write
83
+ // (rather than NSNumber subscript) matches Apple's documented
84
+ // pattern for primitive multiarray data and avoids unnecessary
85
+ // bridging overhead.
86
+ //
87
+ // KNOWN ISSUE: on the reference Apple-converted Llama-3.2 stateful
88
+ // 4-bit checkpoint, the FIRST `model.prediction(from:using:)` call
89
+ // crashes hard inside CoreML's C++ IR layer with:
90
+ //
91
+ // Error: Cannot retrieve vector from IRValue format int32
92
+ //
93
+ // The crash is reproducible on BOTH iOS Simulator and macOS-native,
94
+ // which rules out the previously-suspected simulator-only Espresso
95
+ // limitation. Verified that:
96
+ // - Model loads fine (no "Failed to build execution plan").
97
+ // - input_ids name + shape match the model description.
98
+ // - causal_mask name + shape match Apple's published convention.
99
+ //
100
+ // The error manifests as a process crash (xctest exits unexpectedly,
101
+ // not a Swift Error throw), so callers can't try/catch it. The
102
+ // RealModelIntegrationTest gates the test off until the cause is
103
+ // understood. Live debugging on a real iOS device with Instruments
104
+ // is the next step. See:
105
+ // packages/dvai-bridge-ios/ios/Tests/DVAIBridgeTests/RealModelIntegrationTest.swift
106
+ let inputArr = try MLMultiArray(shape: [1, 1], dataType: .int32)
107
+ inputArr.dataPointer.bindMemory(to: Int32.self, capacity: 1).pointee = Int32(token)
108
+ features[inputName] = MLFeatureValue(multiArray: inputArr)
109
+
110
+ // causal_mask: [1, 1, 1, kvCachePosition+1] Float16, all zeros.
111
+ //
112
+ // For autoregressive single-token decoding the new query attends to
113
+ // every K/V position seen so far (0..kvCachePosition inclusive), so
114
+ // the mask is all-zeros (zero = unmasked, large-negative = masked).
115
+ // Apple's stateful Llama-3.2 checkpoints declare this input as
116
+ // Float16 with shape flexibility `[1, 1, 1...2048, 1...2048]`; we
117
+ // produce the minimal slice for the current step.
118
+ if !causalMaskName.isEmpty,
119
+ model.modelDescription.inputDescriptionsByName[causalMaskName] != nil
120
+ {
121
+ let kvLen = max(1, kvCachePosition + 1)
122
+ let mask = try MLMultiArray(shape: [1, 1, 1, NSNumber(value: kvLen)], dataType: .float16)
123
+ // Float16 zero == bit pattern 0x0000, so memset(0) suffices.
124
+ memset(mask.dataPointer, 0, mask.count * MemoryLayout<UInt16>.size)
125
+ features[causalMaskName] = MLFeatureValue(multiArray: mask)
126
+ }
127
+
128
+ let input = try MLDictionaryFeatureProvider(dictionary: features)
129
+ // `prediction(from:using:options:)` is synchronous in Apple's CoreML iOS 18 API.
130
+ // Wrapped in CoreMLGenerator via async Task to avoid blocking the caller's thread.
131
+ let output = try model.prediction(from: input, using: state, options: MLPredictionOptions())
132
+ guard let logits = output.featureValue(for: outputName)?.multiArrayValue else {
133
+ throw CoreMLBackendError.generationFailed(reason: "no '\(outputName)' output in model prediction")
134
+ }
135
+ return logits
136
+ }
137
+ }
@@ -0,0 +1,108 @@
1
+ import Foundation
2
+ import CoreML
3
+
4
+ /// Orchestrates `CoreMLEngine` + `CoreMLTokenizer` + `CoreMLSampler` to
5
+ /// produce text from a prompt via autoregressive decoding.
6
+ ///
7
+ /// CoreML prediction note (iOS 18):
8
+ /// `MLModel.prediction(from:using:options:)` is synchronous. We wrap the
9
+ /// decode loop in a `Task.detached` (in `generateStream`) or simply call
10
+ /// `runStep` directly in the async context for `generate`. Since `runStep`
11
+ /// is not itself `async`, calling it in an `async` function does NOT suspend
12
+ /// — it runs inline on the current executor. For long-running decodes the
13
+ /// caller should call `generate` / `generateStream` from a background Task
14
+ /// to avoid blocking the main actor.
15
+ @available(iOS 18.0, macOS 15.0, *)
16
+ internal struct CoreMLGenerator: @unchecked Sendable {
17
+ let engine: CoreMLEngine
18
+ let tokenizer: CoreMLTokenizer
19
+ let sampler: CoreMLSampler
20
+ let maxNewTokens: Int
21
+
22
+ /// Buffered generation. Runs the full decode loop and returns the decoded text.
23
+ func generate(promptTokens: [Int]) async throws -> String {
24
+ return try await Task.detached(priority: .userInitiated) {
25
+ guard !promptTokens.isEmpty else {
26
+ throw CoreMLBackendError.generationFailed(reason: "prompt tokens are empty")
27
+ }
28
+
29
+ var generated: [Int] = []
30
+ let state = self.engine.makeConversationState()
31
+
32
+ // Prefill + decode unified: each runStep returns logits for the
33
+ // *next* token at position (kvPos+1). After feeding all prompt
34
+ // tokens, the last logits give us our first generated token.
35
+ // (Previous iteration of this code re-fed promptTokens.last as a
36
+ // separate step, which double-counted that token in the KV
37
+ // cache.)
38
+ var kvPos = 0
39
+ var lastLogits: MLMultiArray = try self.engine.runStep(
40
+ token: promptTokens[0], kvCachePosition: 0, state: state
41
+ )
42
+ kvPos = 1
43
+ for token in promptTokens.dropFirst() {
44
+ lastLogits = try self.engine.runStep(
45
+ token: token, kvCachePosition: kvPos, state: state
46
+ )
47
+ kvPos += 1
48
+ }
49
+
50
+ var nextToken = self.sampler.sample(logits: lastLogits)
51
+
52
+ for _ in 0 ..< self.maxNewTokens {
53
+ if nextToken == self.engine.eosTokenId { break }
54
+ generated.append(nextToken)
55
+ lastLogits = try self.engine.runStep(
56
+ token: nextToken, kvCachePosition: kvPos, state: state
57
+ )
58
+ kvPos += 1
59
+ nextToken = self.sampler.sample(logits: lastLogits)
60
+ }
61
+
62
+ return self.tokenizer.decode(tokens: generated)
63
+ }.value
64
+ }
65
+
66
+ /// Streaming generation. Yields each decoded token chunk via `AsyncThrowingStream`.
67
+ func generateStream(promptTokens: [Int]) -> AsyncThrowingStream<String, Error> {
68
+ AsyncThrowingStream { continuation in
69
+ Task.detached(priority: .userInitiated) {
70
+ do {
71
+ guard !promptTokens.isEmpty else {
72
+ throw CoreMLBackendError.generationFailed(reason: "prompt tokens are empty")
73
+ }
74
+
75
+ let state = self.engine.makeConversationState()
76
+
77
+ var kvPos = 0
78
+ var lastLogits: MLMultiArray = try self.engine.runStep(
79
+ token: promptTokens[0], kvCachePosition: 0, state: state
80
+ )
81
+ kvPos = 1
82
+ for token in promptTokens.dropFirst() {
83
+ lastLogits = try self.engine.runStep(
84
+ token: token, kvCachePosition: kvPos, state: state
85
+ )
86
+ kvPos += 1
87
+ }
88
+
89
+ var nextToken = self.sampler.sample(logits: lastLogits)
90
+
91
+ for _ in 0 ..< self.maxNewTokens {
92
+ if nextToken == self.engine.eosTokenId { break }
93
+ let chunk = self.tokenizer.decode(token: nextToken)
94
+ continuation.yield(chunk)
95
+ lastLogits = try self.engine.runStep(
96
+ token: nextToken, kvCachePosition: kvPos, state: state
97
+ )
98
+ kvPos += 1
99
+ nextToken = self.sampler.sample(logits: lastLogits)
100
+ }
101
+ continuation.finish()
102
+ } catch {
103
+ continuation.finish(throwing: error)
104
+ }
105
+ }
106
+ }
107
+ }
108
+ }