@dvai-bridge/ios 4.0.0 → 4.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/Package.swift +104 -104
  2. package/ios/Sources/DVAIBridge/BackendKind.swift +23 -23
  3. package/ios/Sources/DVAIBridge/BoundServer.swift +46 -46
  4. package/ios/Sources/DVAIBridge/DVAIBridge.swift +658 -658
  5. package/ios/Sources/DVAIBridge/DVAIBridgeConfig.swift +86 -86
  6. package/ios/Sources/DVAIBridge/DVAIBridgeError.swift +33 -33
  7. package/ios/Sources/DVAIBridge/Internal/BackendSelector.swift +59 -59
  8. package/ios/Sources/DVAIBridge/Internal/ProgressBroadcaster.swift +84 -84
  9. package/ios/Sources/DVAIBridge/License/Audience.swift +133 -133
  10. package/ios/Sources/DVAIBridge/License/Discovery.swift +164 -164
  11. package/ios/Sources/DVAIBridge/License/LicenseValidator.swift +392 -392
  12. package/ios/Sources/DVAIBridge/License/PublicKeys.swift +114 -114
  13. package/ios/Sources/DVAIBridge/License/Types.swift +195 -195
  14. package/ios/Sources/DVAIBridge/Offload/OffloadConfig.swift +118 -118
  15. package/ios/Sources/DVAIBridge/ProgressEvent.swift +34 -34
  16. package/ios/Sources/DVAICoreMLCore/CoreMLBackendError.swift +19 -19
  17. package/ios/Sources/DVAICoreMLCore/CoreMLHandlers.swift +123 -123
  18. package/ios/Sources/DVAICoreMLCore/CoreMLPluginState.swift +130 -130
  19. package/ios/Sources/DVAICoreMLCore/Internal/CoreMLEngine.swift +137 -137
  20. package/ios/Sources/DVAICoreMLCore/Internal/CoreMLGenerator.swift +108 -108
  21. package/ios/Sources/DVAICoreMLCore/Internal/CoreMLSampler.swift +96 -96
  22. package/ios/Sources/DVAICoreMLCore/Internal/CoreMLTokenizer.swift +69 -69
  23. package/ios/Tests/DVAIBridgeTests/BackendSelectorTests.swift +53 -53
  24. package/ios/Tests/DVAIBridgeTests/CoreMLEngineTests.swift +18 -18
  25. package/ios/Tests/DVAIBridgeTests/CoreMLGeneratorShapeTests.swift +11 -11
  26. package/ios/Tests/DVAIBridgeTests/CoreMLHandlersTests.swift +32 -32
  27. package/ios/Tests/DVAIBridgeTests/CoreMLPluginStateTests.swift +41 -41
  28. package/ios/Tests/DVAIBridgeTests/CoreMLSamplerTests.swift +40 -40
  29. package/ios/Tests/DVAIBridgeTests/CoreMLTokenizerTests.swift +19 -19
  30. package/ios/Tests/DVAIBridgeTests/DVAIBridgeAPIShapeTests.swift +37 -37
  31. package/ios/Tests/DVAIBridgeTests/DVAIBridgeConfigTests.swift +52 -52
  32. package/ios/Tests/DVAIBridgeTests/DVAIBridgeErrorTests.swift +33 -33
  33. package/ios/Tests/DVAIBridgeTests/LicenseValidatorTests.swift +658 -658
  34. package/ios/Tests/DVAIBridgeTests/ProgressBroadcasterTests.swift +69 -69
  35. package/ios/Tests/DVAIBridgeTests/ProgressEventTests.swift +25 -25
  36. package/ios/Tests/DVAIBridgeTests/ReactiveStateTests.swift +45 -45
  37. package/ios/Tests/DVAIBridgeTests/RealModelIntegrationTest.swift +385 -359
  38. package/package.json +3 -4
  39. package/DVAIBridge.podspec +0 -120
  40. package/LICENSE +0 -51
  41. package/README.md +0 -199
@@ -1,118 +1,118 @@
1
- import Foundation
2
-
3
- /// Per-instance offload configuration. Mirrors the TS-side
4
- /// `OffloadConfig` in `packages/dvai-bridge-core/src/offload/types.ts`.
5
- ///
6
- /// Default state: `enabled = false` — offload is opt-in at v3.0.
7
- ///
8
- /// Set on `StartOptions(offload:)` when calling
9
- /// `DVAIBridge.shared.start(_:)`.
10
- public struct OffloadConfig: Sendable {
11
- /// Master switch. Default `false`; offload is opt-in at v3.0.
12
- public var enabled: Bool
13
- /// Run mDNS to discover LAN peers. Default `true` when enabled.
14
- public var discoverLAN: Bool
15
- /// Below this tok/s, look for a peer. Default 10.
16
- public var minLocalCapability: Double
17
- /// Optional rendezvous-server URL — enables internet path if set.
18
- public var rendezvousUrl: URL?
19
- /// Optional pre-known peers (skip discovery for these).
20
- public var knownPeers: [MDNSPeer]
21
- /// Pairing TTL in days. Default 30 — matches the JS-side default
22
- /// in `PairingPolicy`.
23
- public var expireAfterDays: Int
24
-
25
- public init(
26
- enabled: Bool = false,
27
- discoverLAN: Bool = true,
28
- minLocalCapability: Double = 10,
29
- rendezvousUrl: URL? = nil,
30
- knownPeers: [MDNSPeer] = [],
31
- expireAfterDays: Int = 30
32
- ) {
33
- self.enabled = enabled
34
- self.discoverLAN = discoverLAN
35
- self.minLocalCapability = minLocalCapability
36
- self.rendezvousUrl = rendezvousUrl
37
- self.knownPeers = knownPeers
38
- self.expireAfterDays = expireAfterDays
39
- }
40
- }
41
-
42
- /// Modern start-options surface that wraps `DVAIBridgeConfig` and adds
43
- /// the `offload` knob. Both call sites are supported on
44
- /// `DVAIBridge.shared.start(...)`:
45
- ///
46
- /// try await DVAIBridge.shared.start(.init(backend: .auto, modelPath: "/x"))
47
- /// try await DVAIBridge.shared.start(StartOptions(
48
- /// backend: .auto,
49
- /// modelPath: "/x",
50
- /// offload: OffloadConfig(enabled: true, discoverLAN: true)
51
- /// ))
52
- ///
53
- /// Internally `StartOptions` decomposes into `(DVAIBridgeConfig, OffloadConfig?)`
54
- /// so all existing tests + call sites that take a config keep working.
55
- public struct StartOptions: Sendable {
56
- public var config: DVAIBridgeConfig
57
- public var offload: OffloadConfig?
58
- /// v3.2.2 — explicit path to the license `.jwt` file. Overrides the
59
- /// auto-discovery walk (Bundle resource, Documents, etc). The same
60
- /// .jwt format works across iOS / Android / .NET / RN / JS SDKs.
61
- public var licenseKeyPath: String?
62
- /// v3.2.2 — inline JWT license. Useful when the host app fetches
63
- /// the license over the network and wants to inject the result
64
- /// without touching disk. Wins over `licenseKeyPath` if both are set.
65
- public var licenseToken: String?
66
-
67
- public init(
68
- config: DVAIBridgeConfig,
69
- offload: OffloadConfig? = nil,
70
- licenseKeyPath: String? = nil,
71
- licenseToken: String? = nil
72
- ) {
73
- self.config = config
74
- self.offload = offload
75
- self.licenseKeyPath = licenseKeyPath
76
- self.licenseToken = licenseToken
77
- }
78
-
79
- /// Convenience initializer that mirrors the documented public
80
- /// surface in `docs/migration/v2.4-to-v3.0.md`.
81
- public init(
82
- backend: BackendKind = .auto,
83
- modelPath: String? = nil,
84
- mmprojPath: String? = nil,
85
- tokenizerPath: String? = nil,
86
- gpuLayers: Int = 99,
87
- contextSize: Int = 2048,
88
- threads: Int = 4,
89
- embeddingMode: Bool = false,
90
- httpBasePort: Int = 38883,
91
- httpMaxPortAttempts: Int = 16,
92
- corsOrigin: DVAIBridgeConfig.CORSOrigin = .wildcard,
93
- autoUnloadOnLowMemory: Bool = false,
94
- logLevel: String = "info",
95
- offload: OffloadConfig? = nil,
96
- licenseKeyPath: String? = nil,
97
- licenseToken: String? = nil
98
- ) {
99
- self.config = DVAIBridgeConfig(
100
- backend: backend,
101
- modelPath: modelPath,
102
- mmprojPath: mmprojPath,
103
- tokenizerPath: tokenizerPath,
104
- gpuLayers: gpuLayers,
105
- contextSize: contextSize,
106
- threads: threads,
107
- embeddingMode: embeddingMode,
108
- httpBasePort: httpBasePort,
109
- httpMaxPortAttempts: httpMaxPortAttempts,
110
- corsOrigin: corsOrigin,
111
- autoUnloadOnLowMemory: autoUnloadOnLowMemory,
112
- logLevel: logLevel
113
- )
114
- self.offload = offload
115
- self.licenseKeyPath = licenseKeyPath
116
- self.licenseToken = licenseToken
117
- }
118
- }
1
+ import Foundation
2
+
3
+ /// Per-instance offload configuration. Mirrors the TS-side
4
+ /// `OffloadConfig` in `packages/dvai-bridge-core/src/offload/types.ts`.
5
+ ///
6
+ /// Default state: `enabled = false` — offload is opt-in at v3.0.
7
+ ///
8
+ /// Set on `StartOptions(offload:)` when calling
9
+ /// `DVAIBridge.shared.start(_:)`.
10
+ public struct OffloadConfig: Sendable {
11
+ /// Master switch. Default `false`; offload is opt-in at v3.0.
12
+ public var enabled: Bool
13
+ /// Run mDNS to discover LAN peers. Default `true` when enabled.
14
+ public var discoverLAN: Bool
15
+ /// Below this tok/s, look for a peer. Default 10.
16
+ public var minLocalCapability: Double
17
+ /// Optional rendezvous-server URL — enables internet path if set.
18
+ public var rendezvousUrl: URL?
19
+ /// Optional pre-known peers (skip discovery for these).
20
+ public var knownPeers: [MDNSPeer]
21
+ /// Pairing TTL in days. Default 30 — matches the JS-side default
22
+ /// in `PairingPolicy`.
23
+ public var expireAfterDays: Int
24
+
25
+ public init(
26
+ enabled: Bool = false,
27
+ discoverLAN: Bool = true,
28
+ minLocalCapability: Double = 10,
29
+ rendezvousUrl: URL? = nil,
30
+ knownPeers: [MDNSPeer] = [],
31
+ expireAfterDays: Int = 30
32
+ ) {
33
+ self.enabled = enabled
34
+ self.discoverLAN = discoverLAN
35
+ self.minLocalCapability = minLocalCapability
36
+ self.rendezvousUrl = rendezvousUrl
37
+ self.knownPeers = knownPeers
38
+ self.expireAfterDays = expireAfterDays
39
+ }
40
+ }
41
+
42
+ /// Modern start-options surface that wraps `DVAIBridgeConfig` and adds
43
+ /// the `offload` knob. Both call sites are supported on
44
+ /// `DVAIBridge.shared.start(...)`:
45
+ ///
46
+ /// try await DVAIBridge.shared.start(.init(backend: .auto, modelPath: "/x"))
47
+ /// try await DVAIBridge.shared.start(StartOptions(
48
+ /// backend: .auto,
49
+ /// modelPath: "/x",
50
+ /// offload: OffloadConfig(enabled: true, discoverLAN: true)
51
+ /// ))
52
+ ///
53
+ /// Internally `StartOptions` decomposes into `(DVAIBridgeConfig, OffloadConfig?)`
54
+ /// so all existing tests + call sites that take a config keep working.
55
+ public struct StartOptions: Sendable {
56
+ public var config: DVAIBridgeConfig
57
+ public var offload: OffloadConfig?
58
+ /// v3.2.2 — explicit path to the license `.jwt` file. Overrides the
59
+ /// auto-discovery walk (Bundle resource, Documents, etc). The same
60
+ /// .jwt format works across iOS / Android / .NET / RN / JS SDKs.
61
+ public var licenseKeyPath: String?
62
+ /// v3.2.2 — inline JWT license. Useful when the host app fetches
63
+ /// the license over the network and wants to inject the result
64
+ /// without touching disk. Wins over `licenseKeyPath` if both are set.
65
+ public var licenseToken: String?
66
+
67
+ public init(
68
+ config: DVAIBridgeConfig,
69
+ offload: OffloadConfig? = nil,
70
+ licenseKeyPath: String? = nil,
71
+ licenseToken: String? = nil
72
+ ) {
73
+ self.config = config
74
+ self.offload = offload
75
+ self.licenseKeyPath = licenseKeyPath
76
+ self.licenseToken = licenseToken
77
+ }
78
+
79
+ /// Convenience initializer that mirrors the documented public
80
+ /// surface in `docs/migration/v2.4-to-v3.0.md`.
81
+ public init(
82
+ backend: BackendKind = .auto,
83
+ modelPath: String? = nil,
84
+ mmprojPath: String? = nil,
85
+ tokenizerPath: String? = nil,
86
+ gpuLayers: Int = 99,
87
+ contextSize: Int = 2048,
88
+ threads: Int = 4,
89
+ embeddingMode: Bool = false,
90
+ httpBasePort: Int = 38883,
91
+ httpMaxPortAttempts: Int = 16,
92
+ corsOrigin: DVAIBridgeConfig.CORSOrigin = .wildcard,
93
+ autoUnloadOnLowMemory: Bool = false,
94
+ logLevel: String = "info",
95
+ offload: OffloadConfig? = nil,
96
+ licenseKeyPath: String? = nil,
97
+ licenseToken: String? = nil
98
+ ) {
99
+ self.config = DVAIBridgeConfig(
100
+ backend: backend,
101
+ modelPath: modelPath,
102
+ mmprojPath: mmprojPath,
103
+ tokenizerPath: tokenizerPath,
104
+ gpuLayers: gpuLayers,
105
+ contextSize: contextSize,
106
+ threads: threads,
107
+ embeddingMode: embeddingMode,
108
+ httpBasePort: httpBasePort,
109
+ httpMaxPortAttempts: httpMaxPortAttempts,
110
+ corsOrigin: corsOrigin,
111
+ autoUnloadOnLowMemory: autoUnloadOnLowMemory,
112
+ logLevel: logLevel
113
+ )
114
+ self.offload = offload
115
+ self.licenseKeyPath = licenseKeyPath
116
+ self.licenseToken = licenseToken
117
+ }
118
+ }
@@ -1,34 +1,34 @@
1
- import Foundation
2
-
3
- /// Lifecycle progress event emitted during start(), downloadModel(), and
4
- /// related long-running operations. Mirrors the existing TS / Capacitor
5
- /// `ProgressEvent` shape so the iOS SDK reads identically to the JS API.
6
- public struct ProgressEvent: Sendable, Equatable, Codable {
7
- public enum Phase: String, Sendable, Codable {
8
- case download
9
- case verify
10
- case load
11
- case ready
12
- case error
13
- }
14
-
15
- public let phase: Phase
16
- public let bytesReceived: Int64?
17
- public let bytesTotal: Int64?
18
- public let percent: Double?
19
- public let message: String?
20
-
21
- public init(
22
- phase: Phase,
23
- bytesReceived: Int64? = nil,
24
- bytesTotal: Int64? = nil,
25
- percent: Double? = nil,
26
- message: String? = nil
27
- ) {
28
- self.phase = phase
29
- self.bytesReceived = bytesReceived
30
- self.bytesTotal = bytesTotal
31
- self.percent = percent
32
- self.message = message
33
- }
34
- }
1
+ import Foundation
2
+
3
+ /// Lifecycle progress event emitted during start(), downloadModel(), and
4
+ /// related long-running operations. Mirrors the existing TS / Capacitor
5
+ /// `ProgressEvent` shape so the iOS SDK reads identically to the JS API.
6
+ public struct ProgressEvent: Sendable, Equatable, Codable {
7
+ public enum Phase: String, Sendable, Codable {
8
+ case download
9
+ case verify
10
+ case load
11
+ case ready
12
+ case error
13
+ }
14
+
15
+ public let phase: Phase
16
+ public let bytesReceived: Int64?
17
+ public let bytesTotal: Int64?
18
+ public let percent: Double?
19
+ public let message: String?
20
+
21
+ public init(
22
+ phase: Phase,
23
+ bytesReceived: Int64? = nil,
24
+ bytesTotal: Int64? = nil,
25
+ percent: Double? = nil,
26
+ message: String? = nil
27
+ ) {
28
+ self.phase = phase
29
+ self.bytesReceived = bytesReceived
30
+ self.bytesTotal = bytesTotal
31
+ self.percent = percent
32
+ self.message = message
33
+ }
34
+ }
@@ -1,19 +1,19 @@
1
- import Foundation
2
-
3
- public enum CoreMLBackendError: Error, LocalizedError, Sendable {
4
- case modelLoadFailed(reason: String)
5
- case tokenizerLoadFailed(reason: String)
6
- case stateInitFailed(reason: String)
7
- case generationFailed(reason: String)
8
- case unsupportedModelFormat(reason: String)
9
-
10
- public var errorDescription: String? {
11
- switch self {
12
- case .modelLoadFailed(let r): return "CoreML model load failed: \(r)"
13
- case .tokenizerLoadFailed(let r): return "Tokenizer load failed: \(r)"
14
- case .stateInitFailed(let r): return "MLState init failed: \(r)"
15
- case .generationFailed(let r): return "Generation failed: \(r)"
16
- case .unsupportedModelFormat(let r): return "Unsupported model format: \(r)"
17
- }
18
- }
19
- }
1
+ import Foundation
2
+
3
+ public enum CoreMLBackendError: Error, LocalizedError, Sendable {
4
+ case modelLoadFailed(reason: String)
5
+ case tokenizerLoadFailed(reason: String)
6
+ case stateInitFailed(reason: String)
7
+ case generationFailed(reason: String)
8
+ case unsupportedModelFormat(reason: String)
9
+
10
+ public var errorDescription: String? {
11
+ switch self {
12
+ case .modelLoadFailed(let r): return "CoreML model load failed: \(r)"
13
+ case .tokenizerLoadFailed(let r): return "Tokenizer load failed: \(r)"
14
+ case .stateInitFailed(let r): return "MLState init failed: \(r)"
15
+ case .generationFailed(let r): return "Generation failed: \(r)"
16
+ case .unsupportedModelFormat(let r): return "Unsupported model format: \(r)"
17
+ }
18
+ }
19
+ }
@@ -1,123 +1,123 @@
1
- import Foundation
2
- #if !COCOAPODS
3
- import DVAISharedCore
4
- #endif
5
-
6
- /// `DVAIHandlers` conformer for the CoreML backend.
7
- /// Translates OpenAI-compatible HTTP requests into CoreMLGenerator calls and
8
- /// formats the results as OpenAI JSON / SSE responses.
9
- @available(iOS 18.0, macOS 15.0, *)
10
- public final class CoreMLHandlers: DVAIHandlers {
11
- private let generator: CoreMLGenerator
12
- private let modelId: String
13
-
14
- // Internal init — `CoreMLGenerator` is an implementation detail of
15
- // DVAICoreMLCore and stays internal. The only construction site is
16
- // `CoreMLPluginState.start()` inside the same module.
17
- internal init(generator: CoreMLGenerator, modelId: String) {
18
- self.generator = generator
19
- self.modelId = modelId
20
- }
21
-
22
- public func handleChatCompletion(body: [String: Any], ctx: HandlerContext) async throws -> HandlerResponse {
23
- guard let messages = body["messages"] as? [[String: String]] else {
24
- return .error(400, "messages array is required")
25
- }
26
- let stream = (body["stream"] as? Bool) ?? false
27
- let temperature = (body["temperature"] as? Double).map(Float.init) ?? 0.0
28
- let topP = (body["top_p"] as? Double).map(Float.init) ?? 1.0
29
- let maxTokens = (body["max_tokens"] as? Int) ?? 512
30
-
31
- // Build a generator with the per-request sampling params.
32
- let requestSampler = CoreMLSampler(temperature: temperature, topP: topP, topK: 0)
33
- let requestGenerator = CoreMLGenerator(
34
- engine: generator.engine,
35
- tokenizer: generator.tokenizer,
36
- sampler: requestSampler,
37
- maxNewTokens: maxTokens
38
- )
39
-
40
- let promptTokens: [Int]
41
- do {
42
- promptTokens = try generator.tokenizer.applyChatTemplate(messages: messages)
43
- } catch {
44
- return .error(400, "tokenizer chat-template failed: \(error.localizedDescription)")
45
- }
46
-
47
- if stream {
48
- let sse = requestGenerator.generateStream(promptTokens: promptTokens)
49
- let streamId = UUID().uuidString
50
- let mappedStream = AsyncStream<String> { cont in
51
- Task {
52
- do {
53
- for try await chunk in sse {
54
- let evt = "data: {\"id\":\"\(streamId)\",\"object\":\"chat.completion.chunk\",\"created\":\(Int(Date().timeIntervalSince1970)),\"model\":\"\(modelId)\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\(jsonString(chunk))},\"finish_reason\":null}]}\n\n"
55
- cont.yield(evt)
56
- }
57
- cont.yield("data: [DONE]\n\n")
58
- cont.finish()
59
- } catch {
60
- cont.yield("data: {\"error\":\"\(error.localizedDescription)\"}\n\n")
61
- cont.finish()
62
- }
63
- }
64
- }
65
- return .sse(mappedStream)
66
- }
67
-
68
- let text: String
69
- do {
70
- text = try await requestGenerator.generate(promptTokens: promptTokens)
71
- } catch {
72
- return .error(500, "generation failed: \(error.localizedDescription)")
73
- }
74
- let responseJSON: [String: Any] = [
75
- "id": UUID().uuidString,
76
- "object": "chat.completion",
77
- "created": Int(Date().timeIntervalSince1970),
78
- "model": modelId,
79
- "choices": [[
80
- "index": 0,
81
- "message": ["role": "assistant", "content": text],
82
- "finish_reason": "stop"
83
- ]],
84
- "usage": [
85
- "prompt_tokens": promptTokens.count,
86
- "completion_tokens": -1, // CoreML decoding doesn't track this per checkpoint
87
- "total_tokens": -1
88
- ]
89
- ]
90
- return .json(200, responseJSON)
91
- }
92
-
93
- public func handleCompletion(body: [String: Any], ctx: HandlerContext) async throws -> HandlerResponse {
94
- let prompt = body["prompt"] as? String ?? ""
95
- let chatBody: [String: Any] = [
96
- "messages": [["role": "user", "content": prompt]],
97
- "stream": body["stream"] as? Bool ?? false,
98
- "temperature": body["temperature"] as? Double ?? 0.0,
99
- "top_p": body["top_p"] as? Double ?? 1.0,
100
- "max_tokens": body["max_tokens"] as? Int ?? 512,
101
- ]
102
- return try await handleChatCompletion(body: chatBody, ctx: ctx)
103
- }
104
-
105
- public func handleEmbeddings(body: [String: Any], ctx: HandlerContext) async throws -> HandlerResponse {
106
- return .error(501, "embeddings not yet supported by the CoreML backend")
107
- }
108
-
109
- public func handleModels(ctx: HandlerContext) async throws -> HandlerResponse {
110
- return .json(200, [
111
- "object": "list",
112
- "data": [["id": modelId, "object": "model", "owned_by": "dvai-bridge"]]
113
- ])
114
- }
115
-
116
- /// JSON-encode a single string value (produces a quoted JSON string).
117
- private func jsonString(_ s: String) -> String {
118
- let data = (try? JSONSerialization.data(withJSONObject: [s], options: [])) ?? Data()
119
- let str = String(data: data, encoding: .utf8) ?? "[\"\"]"
120
- // Strip the surrounding array brackets — leaves the quoted string value.
121
- return String(str.dropFirst().dropLast())
122
- }
123
- }
1
+ import Foundation
2
+ #if !COCOAPODS
3
+ import DVAISharedCore
4
+ #endif
5
+
6
+ /// `DVAIHandlers` conformer for the CoreML backend.
7
+ /// Translates OpenAI-compatible HTTP requests into CoreMLGenerator calls and
8
+ /// formats the results as OpenAI JSON / SSE responses.
9
+ @available(iOS 18.0, macOS 15.0, *)
10
+ public final class CoreMLHandlers: DVAIHandlers {
11
+ private let generator: CoreMLGenerator
12
+ private let modelId: String
13
+
14
+ // Internal init — `CoreMLGenerator` is an implementation detail of
15
+ // DVAICoreMLCore and stays internal. The only construction site is
16
+ // `CoreMLPluginState.start()` inside the same module.
17
+ internal init(generator: CoreMLGenerator, modelId: String) {
18
+ self.generator = generator
19
+ self.modelId = modelId
20
+ }
21
+
22
+ public func handleChatCompletion(body: [String: Any], ctx: HandlerContext) async throws -> HandlerResponse {
23
+ guard let messages = body["messages"] as? [[String: String]] else {
24
+ return .error(400, "messages array is required")
25
+ }
26
+ let stream = (body["stream"] as? Bool) ?? false
27
+ let temperature = (body["temperature"] as? Double).map(Float.init) ?? 0.0
28
+ let topP = (body["top_p"] as? Double).map(Float.init) ?? 1.0
29
+ let maxTokens = (body["max_tokens"] as? Int) ?? 512
30
+
31
+ // Build a generator with the per-request sampling params.
32
+ let requestSampler = CoreMLSampler(temperature: temperature, topP: topP, topK: 0)
33
+ let requestGenerator = CoreMLGenerator(
34
+ engine: generator.engine,
35
+ tokenizer: generator.tokenizer,
36
+ sampler: requestSampler,
37
+ maxNewTokens: maxTokens
38
+ )
39
+
40
+ let promptTokens: [Int]
41
+ do {
42
+ promptTokens = try generator.tokenizer.applyChatTemplate(messages: messages)
43
+ } catch {
44
+ return .error(400, "tokenizer chat-template failed: \(error.localizedDescription)")
45
+ }
46
+
47
+ if stream {
48
+ let sse = requestGenerator.generateStream(promptTokens: promptTokens)
49
+ let streamId = UUID().uuidString
50
+ let mappedStream = AsyncStream<String> { cont in
51
+ Task {
52
+ do {
53
+ for try await chunk in sse {
54
+ let evt = "data: {\"id\":\"\(streamId)\",\"object\":\"chat.completion.chunk\",\"created\":\(Int(Date().timeIntervalSince1970)),\"model\":\"\(modelId)\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\(jsonString(chunk))},\"finish_reason\":null}]}\n\n"
55
+ cont.yield(evt)
56
+ }
57
+ cont.yield("data: [DONE]\n\n")
58
+ cont.finish()
59
+ } catch {
60
+ cont.yield("data: {\"error\":\"\(error.localizedDescription)\"}\n\n")
61
+ cont.finish()
62
+ }
63
+ }
64
+ }
65
+ return .sse(mappedStream)
66
+ }
67
+
68
+ let text: String
69
+ do {
70
+ text = try await requestGenerator.generate(promptTokens: promptTokens)
71
+ } catch {
72
+ return .error(500, "generation failed: \(error.localizedDescription)")
73
+ }
74
+ let responseJSON: [String: Any] = [
75
+ "id": UUID().uuidString,
76
+ "object": "chat.completion",
77
+ "created": Int(Date().timeIntervalSince1970),
78
+ "model": modelId,
79
+ "choices": [[
80
+ "index": 0,
81
+ "message": ["role": "assistant", "content": text],
82
+ "finish_reason": "stop"
83
+ ]],
84
+ "usage": [
85
+ "prompt_tokens": promptTokens.count,
86
+ "completion_tokens": -1, // CoreML decoding doesn't track this per checkpoint
87
+ "total_tokens": -1
88
+ ]
89
+ ]
90
+ return .json(200, responseJSON)
91
+ }
92
+
93
+ public func handleCompletion(body: [String: Any], ctx: HandlerContext) async throws -> HandlerResponse {
94
+ let prompt = body["prompt"] as? String ?? ""
95
+ let chatBody: [String: Any] = [
96
+ "messages": [["role": "user", "content": prompt]],
97
+ "stream": body["stream"] as? Bool ?? false,
98
+ "temperature": body["temperature"] as? Double ?? 0.0,
99
+ "top_p": body["top_p"] as? Double ?? 1.0,
100
+ "max_tokens": body["max_tokens"] as? Int ?? 512,
101
+ ]
102
+ return try await handleChatCompletion(body: chatBody, ctx: ctx)
103
+ }
104
+
105
+ public func handleEmbeddings(body: [String: Any], ctx: HandlerContext) async throws -> HandlerResponse {
106
+ return .error(501, "embeddings not yet supported by the CoreML backend")
107
+ }
108
+
109
+ public func handleModels(ctx: HandlerContext) async throws -> HandlerResponse {
110
+ return .json(200, [
111
+ "object": "list",
112
+ "data": [["id": modelId, "object": "model", "owned_by": "dvai-bridge"]]
113
+ ])
114
+ }
115
+
116
+ /// JSON-encode a single string value (produces a quoted JSON string).
117
+ private func jsonString(_ s: String) -> String {
118
+ let data = (try? JSONSerialization.data(withJSONObject: [s], options: [])) ?? Data()
119
+ let str = String(data: data, encoding: .utf8) ?? "[\"\"]"
120
+ // Strip the surrounding array brackets — leaves the quoted string value.
121
+ return String(str.dropFirst().dropLast())
122
+ }
123
+ }