@dvai-bridge/ios 4.0.0 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Package.swift +104 -104
- package/ios/Sources/DVAIBridge/BackendKind.swift +23 -23
- package/ios/Sources/DVAIBridge/BoundServer.swift +46 -46
- package/ios/Sources/DVAIBridge/DVAIBridge.swift +658 -658
- package/ios/Sources/DVAIBridge/DVAIBridgeConfig.swift +86 -86
- package/ios/Sources/DVAIBridge/DVAIBridgeError.swift +33 -33
- package/ios/Sources/DVAIBridge/Internal/BackendSelector.swift +59 -59
- package/ios/Sources/DVAIBridge/Internal/ProgressBroadcaster.swift +84 -84
- package/ios/Sources/DVAIBridge/License/Audience.swift +133 -133
- package/ios/Sources/DVAIBridge/License/Discovery.swift +164 -164
- package/ios/Sources/DVAIBridge/License/LicenseValidator.swift +392 -392
- package/ios/Sources/DVAIBridge/License/PublicKeys.swift +114 -114
- package/ios/Sources/DVAIBridge/License/Types.swift +195 -195
- package/ios/Sources/DVAIBridge/Offload/OffloadConfig.swift +118 -118
- package/ios/Sources/DVAIBridge/ProgressEvent.swift +34 -34
- package/ios/Sources/DVAICoreMLCore/CoreMLBackendError.swift +19 -19
- package/ios/Sources/DVAICoreMLCore/CoreMLHandlers.swift +123 -123
- package/ios/Sources/DVAICoreMLCore/CoreMLPluginState.swift +130 -130
- package/ios/Sources/DVAICoreMLCore/Internal/CoreMLEngine.swift +137 -137
- package/ios/Sources/DVAICoreMLCore/Internal/CoreMLGenerator.swift +108 -108
- package/ios/Sources/DVAICoreMLCore/Internal/CoreMLSampler.swift +96 -96
- package/ios/Sources/DVAICoreMLCore/Internal/CoreMLTokenizer.swift +69 -69
- package/ios/Tests/DVAIBridgeTests/BackendSelectorTests.swift +53 -53
- package/ios/Tests/DVAIBridgeTests/CoreMLEngineTests.swift +18 -18
- package/ios/Tests/DVAIBridgeTests/CoreMLGeneratorShapeTests.swift +11 -11
- package/ios/Tests/DVAIBridgeTests/CoreMLHandlersTests.swift +32 -32
- package/ios/Tests/DVAIBridgeTests/CoreMLPluginStateTests.swift +41 -41
- package/ios/Tests/DVAIBridgeTests/CoreMLSamplerTests.swift +40 -40
- package/ios/Tests/DVAIBridgeTests/CoreMLTokenizerTests.swift +19 -19
- package/ios/Tests/DVAIBridgeTests/DVAIBridgeAPIShapeTests.swift +37 -37
- package/ios/Tests/DVAIBridgeTests/DVAIBridgeConfigTests.swift +52 -52
- package/ios/Tests/DVAIBridgeTests/DVAIBridgeErrorTests.swift +33 -33
- package/ios/Tests/DVAIBridgeTests/LicenseValidatorTests.swift +658 -658
- package/ios/Tests/DVAIBridgeTests/ProgressBroadcasterTests.swift +69 -69
- package/ios/Tests/DVAIBridgeTests/ProgressEventTests.swift +25 -25
- package/ios/Tests/DVAIBridgeTests/ReactiveStateTests.swift +45 -45
- package/ios/Tests/DVAIBridgeTests/RealModelIntegrationTest.swift +385 -359
- package/package.json +3 -4
- package/DVAIBridge.podspec +0 -120
- package/LICENSE +0 -51
- package/README.md +0 -199
|
@@ -1,118 +1,118 @@
|
|
|
1
|
-
import Foundation
|
|
2
|
-
|
|
3
|
-
/// Per-instance offload configuration. Mirrors the TS-side
|
|
4
|
-
/// `OffloadConfig` in `packages/dvai-bridge-core/src/offload/types.ts`.
|
|
5
|
-
///
|
|
6
|
-
/// Default state: `enabled = false` — offload is opt-in at v3.0.
|
|
7
|
-
///
|
|
8
|
-
/// Set on `StartOptions(offload:)` when calling
|
|
9
|
-
/// `DVAIBridge.shared.start(_:)`.
|
|
10
|
-
public struct OffloadConfig: Sendable {
|
|
11
|
-
/// Master switch. Default `false`; offload is opt-in at v3.0.
|
|
12
|
-
public var enabled: Bool
|
|
13
|
-
/// Run mDNS to discover LAN peers. Default `true` when enabled.
|
|
14
|
-
public var discoverLAN: Bool
|
|
15
|
-
/// Below this tok/s, look for a peer. Default 10.
|
|
16
|
-
public var minLocalCapability: Double
|
|
17
|
-
/// Optional rendezvous-server URL — enables internet path if set.
|
|
18
|
-
public var rendezvousUrl: URL?
|
|
19
|
-
/// Optional pre-known peers (skip discovery for these).
|
|
20
|
-
public var knownPeers: [MDNSPeer]
|
|
21
|
-
/// Pairing TTL in days. Default 30 — matches the JS-side default
|
|
22
|
-
/// in `PairingPolicy`.
|
|
23
|
-
public var expireAfterDays: Int
|
|
24
|
-
|
|
25
|
-
public init(
|
|
26
|
-
enabled: Bool = false,
|
|
27
|
-
discoverLAN: Bool = true,
|
|
28
|
-
minLocalCapability: Double = 10,
|
|
29
|
-
rendezvousUrl: URL? = nil,
|
|
30
|
-
knownPeers: [MDNSPeer] = [],
|
|
31
|
-
expireAfterDays: Int = 30
|
|
32
|
-
) {
|
|
33
|
-
self.enabled = enabled
|
|
34
|
-
self.discoverLAN = discoverLAN
|
|
35
|
-
self.minLocalCapability = minLocalCapability
|
|
36
|
-
self.rendezvousUrl = rendezvousUrl
|
|
37
|
-
self.knownPeers = knownPeers
|
|
38
|
-
self.expireAfterDays = expireAfterDays
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
/// Modern start-options surface that wraps `DVAIBridgeConfig` and adds
|
|
43
|
-
/// the `offload` knob. Both call sites are supported on
|
|
44
|
-
/// `DVAIBridge.shared.start(...)`:
|
|
45
|
-
///
|
|
46
|
-
/// try await DVAIBridge.shared.start(.init(backend: .auto, modelPath: "/x"))
|
|
47
|
-
/// try await DVAIBridge.shared.start(StartOptions(
|
|
48
|
-
/// backend: .auto,
|
|
49
|
-
/// modelPath: "/x",
|
|
50
|
-
/// offload: OffloadConfig(enabled: true, discoverLAN: true)
|
|
51
|
-
/// ))
|
|
52
|
-
///
|
|
53
|
-
/// Internally `StartOptions` decomposes into `(DVAIBridgeConfig, OffloadConfig?)`
|
|
54
|
-
/// so all existing tests + call sites that take a config keep working.
|
|
55
|
-
public struct StartOptions: Sendable {
|
|
56
|
-
public var config: DVAIBridgeConfig
|
|
57
|
-
public var offload: OffloadConfig?
|
|
58
|
-
/// v3.2.2 — explicit path to the license `.jwt` file. Overrides the
|
|
59
|
-
/// auto-discovery walk (Bundle resource, Documents, etc). The same
|
|
60
|
-
/// .jwt format works across iOS / Android / .NET / RN / JS SDKs.
|
|
61
|
-
public var licenseKeyPath: String?
|
|
62
|
-
/// v3.2.2 — inline JWT license. Useful when the host app fetches
|
|
63
|
-
/// the license over the network and wants to inject the result
|
|
64
|
-
/// without touching disk. Wins over `licenseKeyPath` if both are set.
|
|
65
|
-
public var licenseToken: String?
|
|
66
|
-
|
|
67
|
-
public init(
|
|
68
|
-
config: DVAIBridgeConfig,
|
|
69
|
-
offload: OffloadConfig? = nil,
|
|
70
|
-
licenseKeyPath: String? = nil,
|
|
71
|
-
licenseToken: String? = nil
|
|
72
|
-
) {
|
|
73
|
-
self.config = config
|
|
74
|
-
self.offload = offload
|
|
75
|
-
self.licenseKeyPath = licenseKeyPath
|
|
76
|
-
self.licenseToken = licenseToken
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
/// Convenience initializer that mirrors the documented public
|
|
80
|
-
/// surface in `docs/migration/v2.4-to-v3.0.md`.
|
|
81
|
-
public init(
|
|
82
|
-
backend: BackendKind = .auto,
|
|
83
|
-
modelPath: String? = nil,
|
|
84
|
-
mmprojPath: String? = nil,
|
|
85
|
-
tokenizerPath: String? = nil,
|
|
86
|
-
gpuLayers: Int = 99,
|
|
87
|
-
contextSize: Int = 2048,
|
|
88
|
-
threads: Int = 4,
|
|
89
|
-
embeddingMode: Bool = false,
|
|
90
|
-
httpBasePort: Int = 38883,
|
|
91
|
-
httpMaxPortAttempts: Int = 16,
|
|
92
|
-
corsOrigin: DVAIBridgeConfig.CORSOrigin = .wildcard,
|
|
93
|
-
autoUnloadOnLowMemory: Bool = false,
|
|
94
|
-
logLevel: String = "info",
|
|
95
|
-
offload: OffloadConfig? = nil,
|
|
96
|
-
licenseKeyPath: String? = nil,
|
|
97
|
-
licenseToken: String? = nil
|
|
98
|
-
) {
|
|
99
|
-
self.config = DVAIBridgeConfig(
|
|
100
|
-
backend: backend,
|
|
101
|
-
modelPath: modelPath,
|
|
102
|
-
mmprojPath: mmprojPath,
|
|
103
|
-
tokenizerPath: tokenizerPath,
|
|
104
|
-
gpuLayers: gpuLayers,
|
|
105
|
-
contextSize: contextSize,
|
|
106
|
-
threads: threads,
|
|
107
|
-
embeddingMode: embeddingMode,
|
|
108
|
-
httpBasePort: httpBasePort,
|
|
109
|
-
httpMaxPortAttempts: httpMaxPortAttempts,
|
|
110
|
-
corsOrigin: corsOrigin,
|
|
111
|
-
autoUnloadOnLowMemory: autoUnloadOnLowMemory,
|
|
112
|
-
logLevel: logLevel
|
|
113
|
-
)
|
|
114
|
-
self.offload = offload
|
|
115
|
-
self.licenseKeyPath = licenseKeyPath
|
|
116
|
-
self.licenseToken = licenseToken
|
|
117
|
-
}
|
|
118
|
-
}
|
|
1
|
+
import Foundation
|
|
2
|
+
|
|
3
|
+
/// Per-instance offload configuration. Mirrors the TS-side
|
|
4
|
+
/// `OffloadConfig` in `packages/dvai-bridge-core/src/offload/types.ts`.
|
|
5
|
+
///
|
|
6
|
+
/// Default state: `enabled = false` — offload is opt-in at v3.0.
|
|
7
|
+
///
|
|
8
|
+
/// Set on `StartOptions(offload:)` when calling
|
|
9
|
+
/// `DVAIBridge.shared.start(_:)`.
|
|
10
|
+
public struct OffloadConfig: Sendable {
|
|
11
|
+
/// Master switch. Default `false`; offload is opt-in at v3.0.
|
|
12
|
+
public var enabled: Bool
|
|
13
|
+
/// Run mDNS to discover LAN peers. Default `true` when enabled.
|
|
14
|
+
public var discoverLAN: Bool
|
|
15
|
+
/// Below this tok/s, look for a peer. Default 10.
|
|
16
|
+
public var minLocalCapability: Double
|
|
17
|
+
/// Optional rendezvous-server URL — enables internet path if set.
|
|
18
|
+
public var rendezvousUrl: URL?
|
|
19
|
+
/// Optional pre-known peers (skip discovery for these).
|
|
20
|
+
public var knownPeers: [MDNSPeer]
|
|
21
|
+
/// Pairing TTL in days. Default 30 — matches the JS-side default
|
|
22
|
+
/// in `PairingPolicy`.
|
|
23
|
+
public var expireAfterDays: Int
|
|
24
|
+
|
|
25
|
+
public init(
|
|
26
|
+
enabled: Bool = false,
|
|
27
|
+
discoverLAN: Bool = true,
|
|
28
|
+
minLocalCapability: Double = 10,
|
|
29
|
+
rendezvousUrl: URL? = nil,
|
|
30
|
+
knownPeers: [MDNSPeer] = [],
|
|
31
|
+
expireAfterDays: Int = 30
|
|
32
|
+
) {
|
|
33
|
+
self.enabled = enabled
|
|
34
|
+
self.discoverLAN = discoverLAN
|
|
35
|
+
self.minLocalCapability = minLocalCapability
|
|
36
|
+
self.rendezvousUrl = rendezvousUrl
|
|
37
|
+
self.knownPeers = knownPeers
|
|
38
|
+
self.expireAfterDays = expireAfterDays
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/// Modern start-options surface that wraps `DVAIBridgeConfig` and adds
|
|
43
|
+
/// the `offload` knob. Both call sites are supported on
|
|
44
|
+
/// `DVAIBridge.shared.start(...)`:
|
|
45
|
+
///
|
|
46
|
+
/// try await DVAIBridge.shared.start(.init(backend: .auto, modelPath: "/x"))
|
|
47
|
+
/// try await DVAIBridge.shared.start(StartOptions(
|
|
48
|
+
/// backend: .auto,
|
|
49
|
+
/// modelPath: "/x",
|
|
50
|
+
/// offload: OffloadConfig(enabled: true, discoverLAN: true)
|
|
51
|
+
/// ))
|
|
52
|
+
///
|
|
53
|
+
/// Internally `StartOptions` decomposes into `(DVAIBridgeConfig, OffloadConfig?)`
|
|
54
|
+
/// so all existing tests + call sites that take a config keep working.
|
|
55
|
+
public struct StartOptions: Sendable {
|
|
56
|
+
public var config: DVAIBridgeConfig
|
|
57
|
+
public var offload: OffloadConfig?
|
|
58
|
+
/// v3.2.2 — explicit path to the license `.jwt` file. Overrides the
|
|
59
|
+
/// auto-discovery walk (Bundle resource, Documents, etc). The same
|
|
60
|
+
/// .jwt format works across iOS / Android / .NET / RN / JS SDKs.
|
|
61
|
+
public var licenseKeyPath: String?
|
|
62
|
+
/// v3.2.2 — inline JWT license. Useful when the host app fetches
|
|
63
|
+
/// the license over the network and wants to inject the result
|
|
64
|
+
/// without touching disk. Wins over `licenseKeyPath` if both are set.
|
|
65
|
+
public var licenseToken: String?
|
|
66
|
+
|
|
67
|
+
public init(
|
|
68
|
+
config: DVAIBridgeConfig,
|
|
69
|
+
offload: OffloadConfig? = nil,
|
|
70
|
+
licenseKeyPath: String? = nil,
|
|
71
|
+
licenseToken: String? = nil
|
|
72
|
+
) {
|
|
73
|
+
self.config = config
|
|
74
|
+
self.offload = offload
|
|
75
|
+
self.licenseKeyPath = licenseKeyPath
|
|
76
|
+
self.licenseToken = licenseToken
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/// Convenience initializer that mirrors the documented public
|
|
80
|
+
/// surface in `docs/migration/v2.4-to-v3.0.md`.
|
|
81
|
+
public init(
|
|
82
|
+
backend: BackendKind = .auto,
|
|
83
|
+
modelPath: String? = nil,
|
|
84
|
+
mmprojPath: String? = nil,
|
|
85
|
+
tokenizerPath: String? = nil,
|
|
86
|
+
gpuLayers: Int = 99,
|
|
87
|
+
contextSize: Int = 2048,
|
|
88
|
+
threads: Int = 4,
|
|
89
|
+
embeddingMode: Bool = false,
|
|
90
|
+
httpBasePort: Int = 38883,
|
|
91
|
+
httpMaxPortAttempts: Int = 16,
|
|
92
|
+
corsOrigin: DVAIBridgeConfig.CORSOrigin = .wildcard,
|
|
93
|
+
autoUnloadOnLowMemory: Bool = false,
|
|
94
|
+
logLevel: String = "info",
|
|
95
|
+
offload: OffloadConfig? = nil,
|
|
96
|
+
licenseKeyPath: String? = nil,
|
|
97
|
+
licenseToken: String? = nil
|
|
98
|
+
) {
|
|
99
|
+
self.config = DVAIBridgeConfig(
|
|
100
|
+
backend: backend,
|
|
101
|
+
modelPath: modelPath,
|
|
102
|
+
mmprojPath: mmprojPath,
|
|
103
|
+
tokenizerPath: tokenizerPath,
|
|
104
|
+
gpuLayers: gpuLayers,
|
|
105
|
+
contextSize: contextSize,
|
|
106
|
+
threads: threads,
|
|
107
|
+
embeddingMode: embeddingMode,
|
|
108
|
+
httpBasePort: httpBasePort,
|
|
109
|
+
httpMaxPortAttempts: httpMaxPortAttempts,
|
|
110
|
+
corsOrigin: corsOrigin,
|
|
111
|
+
autoUnloadOnLowMemory: autoUnloadOnLowMemory,
|
|
112
|
+
logLevel: logLevel
|
|
113
|
+
)
|
|
114
|
+
self.offload = offload
|
|
115
|
+
self.licenseKeyPath = licenseKeyPath
|
|
116
|
+
self.licenseToken = licenseToken
|
|
117
|
+
}
|
|
118
|
+
}
|
|
@@ -1,34 +1,34 @@
|
|
|
1
|
-
import Foundation
|
|
2
|
-
|
|
3
|
-
/// Lifecycle progress event emitted during start(), downloadModel(), and
|
|
4
|
-
/// related long-running operations. Mirrors the existing TS / Capacitor
|
|
5
|
-
/// `ProgressEvent` shape so the iOS SDK reads identically to the JS API.
|
|
6
|
-
public struct ProgressEvent: Sendable, Equatable, Codable {
|
|
7
|
-
public enum Phase: String, Sendable, Codable {
|
|
8
|
-
case download
|
|
9
|
-
case verify
|
|
10
|
-
case load
|
|
11
|
-
case ready
|
|
12
|
-
case error
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
public let phase: Phase
|
|
16
|
-
public let bytesReceived: Int64?
|
|
17
|
-
public let bytesTotal: Int64?
|
|
18
|
-
public let percent: Double?
|
|
19
|
-
public let message: String?
|
|
20
|
-
|
|
21
|
-
public init(
|
|
22
|
-
phase: Phase,
|
|
23
|
-
bytesReceived: Int64? = nil,
|
|
24
|
-
bytesTotal: Int64? = nil,
|
|
25
|
-
percent: Double? = nil,
|
|
26
|
-
message: String? = nil
|
|
27
|
-
) {
|
|
28
|
-
self.phase = phase
|
|
29
|
-
self.bytesReceived = bytesReceived
|
|
30
|
-
self.bytesTotal = bytesTotal
|
|
31
|
-
self.percent = percent
|
|
32
|
-
self.message = message
|
|
33
|
-
}
|
|
34
|
-
}
|
|
1
|
+
import Foundation
|
|
2
|
+
|
|
3
|
+
/// Lifecycle progress event emitted during start(), downloadModel(), and
|
|
4
|
+
/// related long-running operations. Mirrors the existing TS / Capacitor
|
|
5
|
+
/// `ProgressEvent` shape so the iOS SDK reads identically to the JS API.
|
|
6
|
+
public struct ProgressEvent: Sendable, Equatable, Codable {
|
|
7
|
+
public enum Phase: String, Sendable, Codable {
|
|
8
|
+
case download
|
|
9
|
+
case verify
|
|
10
|
+
case load
|
|
11
|
+
case ready
|
|
12
|
+
case error
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
public let phase: Phase
|
|
16
|
+
public let bytesReceived: Int64?
|
|
17
|
+
public let bytesTotal: Int64?
|
|
18
|
+
public let percent: Double?
|
|
19
|
+
public let message: String?
|
|
20
|
+
|
|
21
|
+
public init(
|
|
22
|
+
phase: Phase,
|
|
23
|
+
bytesReceived: Int64? = nil,
|
|
24
|
+
bytesTotal: Int64? = nil,
|
|
25
|
+
percent: Double? = nil,
|
|
26
|
+
message: String? = nil
|
|
27
|
+
) {
|
|
28
|
+
self.phase = phase
|
|
29
|
+
self.bytesReceived = bytesReceived
|
|
30
|
+
self.bytesTotal = bytesTotal
|
|
31
|
+
self.percent = percent
|
|
32
|
+
self.message = message
|
|
33
|
+
}
|
|
34
|
+
}
|
|
@@ -1,19 +1,19 @@
|
|
|
1
|
-
import Foundation
|
|
2
|
-
|
|
3
|
-
public enum CoreMLBackendError: Error, LocalizedError, Sendable {
|
|
4
|
-
case modelLoadFailed(reason: String)
|
|
5
|
-
case tokenizerLoadFailed(reason: String)
|
|
6
|
-
case stateInitFailed(reason: String)
|
|
7
|
-
case generationFailed(reason: String)
|
|
8
|
-
case unsupportedModelFormat(reason: String)
|
|
9
|
-
|
|
10
|
-
public var errorDescription: String? {
|
|
11
|
-
switch self {
|
|
12
|
-
case .modelLoadFailed(let r): return "CoreML model load failed: \(r)"
|
|
13
|
-
case .tokenizerLoadFailed(let r): return "Tokenizer load failed: \(r)"
|
|
14
|
-
case .stateInitFailed(let r): return "MLState init failed: \(r)"
|
|
15
|
-
case .generationFailed(let r): return "Generation failed: \(r)"
|
|
16
|
-
case .unsupportedModelFormat(let r): return "Unsupported model format: \(r)"
|
|
17
|
-
}
|
|
18
|
-
}
|
|
19
|
-
}
|
|
1
|
+
import Foundation
|
|
2
|
+
|
|
3
|
+
public enum CoreMLBackendError: Error, LocalizedError, Sendable {
|
|
4
|
+
case modelLoadFailed(reason: String)
|
|
5
|
+
case tokenizerLoadFailed(reason: String)
|
|
6
|
+
case stateInitFailed(reason: String)
|
|
7
|
+
case generationFailed(reason: String)
|
|
8
|
+
case unsupportedModelFormat(reason: String)
|
|
9
|
+
|
|
10
|
+
public var errorDescription: String? {
|
|
11
|
+
switch self {
|
|
12
|
+
case .modelLoadFailed(let r): return "CoreML model load failed: \(r)"
|
|
13
|
+
case .tokenizerLoadFailed(let r): return "Tokenizer load failed: \(r)"
|
|
14
|
+
case .stateInitFailed(let r): return "MLState init failed: \(r)"
|
|
15
|
+
case .generationFailed(let r): return "Generation failed: \(r)"
|
|
16
|
+
case .unsupportedModelFormat(let r): return "Unsupported model format: \(r)"
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
}
|
|
@@ -1,123 +1,123 @@
|
|
|
1
|
-
import Foundation
|
|
2
|
-
#if !COCOAPODS
|
|
3
|
-
import DVAISharedCore
|
|
4
|
-
#endif
|
|
5
|
-
|
|
6
|
-
/// `DVAIHandlers` conformer for the CoreML backend.
|
|
7
|
-
/// Translates OpenAI-compatible HTTP requests into CoreMLGenerator calls and
|
|
8
|
-
/// formats the results as OpenAI JSON / SSE responses.
|
|
9
|
-
@available(iOS 18.0, macOS 15.0, *)
|
|
10
|
-
public final class CoreMLHandlers: DVAIHandlers {
|
|
11
|
-
private let generator: CoreMLGenerator
|
|
12
|
-
private let modelId: String
|
|
13
|
-
|
|
14
|
-
// Internal init — `CoreMLGenerator` is an implementation detail of
|
|
15
|
-
// DVAICoreMLCore and stays internal. The only construction site is
|
|
16
|
-
// `CoreMLPluginState.start()` inside the same module.
|
|
17
|
-
internal init(generator: CoreMLGenerator, modelId: String) {
|
|
18
|
-
self.generator = generator
|
|
19
|
-
self.modelId = modelId
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
public func handleChatCompletion(body: [String: Any], ctx: HandlerContext) async throws -> HandlerResponse {
|
|
23
|
-
guard let messages = body["messages"] as? [[String: String]] else {
|
|
24
|
-
return .error(400, "messages array is required")
|
|
25
|
-
}
|
|
26
|
-
let stream = (body["stream"] as? Bool) ?? false
|
|
27
|
-
let temperature = (body["temperature"] as? Double).map(Float.init) ?? 0.0
|
|
28
|
-
let topP = (body["top_p"] as? Double).map(Float.init) ?? 1.0
|
|
29
|
-
let maxTokens = (body["max_tokens"] as? Int) ?? 512
|
|
30
|
-
|
|
31
|
-
// Build a generator with the per-request sampling params.
|
|
32
|
-
let requestSampler = CoreMLSampler(temperature: temperature, topP: topP, topK: 0)
|
|
33
|
-
let requestGenerator = CoreMLGenerator(
|
|
34
|
-
engine: generator.engine,
|
|
35
|
-
tokenizer: generator.tokenizer,
|
|
36
|
-
sampler: requestSampler,
|
|
37
|
-
maxNewTokens: maxTokens
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
let promptTokens: [Int]
|
|
41
|
-
do {
|
|
42
|
-
promptTokens = try generator.tokenizer.applyChatTemplate(messages: messages)
|
|
43
|
-
} catch {
|
|
44
|
-
return .error(400, "tokenizer chat-template failed: \(error.localizedDescription)")
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
if stream {
|
|
48
|
-
let sse = requestGenerator.generateStream(promptTokens: promptTokens)
|
|
49
|
-
let streamId = UUID().uuidString
|
|
50
|
-
let mappedStream = AsyncStream<String> { cont in
|
|
51
|
-
Task {
|
|
52
|
-
do {
|
|
53
|
-
for try await chunk in sse {
|
|
54
|
-
let evt = "data: {\"id\":\"\(streamId)\",\"object\":\"chat.completion.chunk\",\"created\":\(Int(Date().timeIntervalSince1970)),\"model\":\"\(modelId)\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\(jsonString(chunk))},\"finish_reason\":null}]}\n\n"
|
|
55
|
-
cont.yield(evt)
|
|
56
|
-
}
|
|
57
|
-
cont.yield("data: [DONE]\n\n")
|
|
58
|
-
cont.finish()
|
|
59
|
-
} catch {
|
|
60
|
-
cont.yield("data: {\"error\":\"\(error.localizedDescription)\"}\n\n")
|
|
61
|
-
cont.finish()
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
return .sse(mappedStream)
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
let text: String
|
|
69
|
-
do {
|
|
70
|
-
text = try await requestGenerator.generate(promptTokens: promptTokens)
|
|
71
|
-
} catch {
|
|
72
|
-
return .error(500, "generation failed: \(error.localizedDescription)")
|
|
73
|
-
}
|
|
74
|
-
let responseJSON: [String: Any] = [
|
|
75
|
-
"id": UUID().uuidString,
|
|
76
|
-
"object": "chat.completion",
|
|
77
|
-
"created": Int(Date().timeIntervalSince1970),
|
|
78
|
-
"model": modelId,
|
|
79
|
-
"choices": [[
|
|
80
|
-
"index": 0,
|
|
81
|
-
"message": ["role": "assistant", "content": text],
|
|
82
|
-
"finish_reason": "stop"
|
|
83
|
-
]],
|
|
84
|
-
"usage": [
|
|
85
|
-
"prompt_tokens": promptTokens.count,
|
|
86
|
-
"completion_tokens": -1, // CoreML decoding doesn't track this per checkpoint
|
|
87
|
-
"total_tokens": -1
|
|
88
|
-
]
|
|
89
|
-
]
|
|
90
|
-
return .json(200, responseJSON)
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
public func handleCompletion(body: [String: Any], ctx: HandlerContext) async throws -> HandlerResponse {
|
|
94
|
-
let prompt = body["prompt"] as? String ?? ""
|
|
95
|
-
let chatBody: [String: Any] = [
|
|
96
|
-
"messages": [["role": "user", "content": prompt]],
|
|
97
|
-
"stream": body["stream"] as? Bool ?? false,
|
|
98
|
-
"temperature": body["temperature"] as? Double ?? 0.0,
|
|
99
|
-
"top_p": body["top_p"] as? Double ?? 1.0,
|
|
100
|
-
"max_tokens": body["max_tokens"] as? Int ?? 512,
|
|
101
|
-
]
|
|
102
|
-
return try await handleChatCompletion(body: chatBody, ctx: ctx)
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
public func handleEmbeddings(body: [String: Any], ctx: HandlerContext) async throws -> HandlerResponse {
|
|
106
|
-
return .error(501, "embeddings not yet supported by the CoreML backend")
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
public func handleModels(ctx: HandlerContext) async throws -> HandlerResponse {
|
|
110
|
-
return .json(200, [
|
|
111
|
-
"object": "list",
|
|
112
|
-
"data": [["id": modelId, "object": "model", "owned_by": "dvai-bridge"]]
|
|
113
|
-
])
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
/// JSON-encode a single string value (produces a quoted JSON string).
|
|
117
|
-
private func jsonString(_ s: String) -> String {
|
|
118
|
-
let data = (try? JSONSerialization.data(withJSONObject: [s], options: [])) ?? Data()
|
|
119
|
-
let str = String(data: data, encoding: .utf8) ?? "[\"\"]"
|
|
120
|
-
// Strip the surrounding array brackets — leaves the quoted string value.
|
|
121
|
-
return String(str.dropFirst().dropLast())
|
|
122
|
-
}
|
|
123
|
-
}
|
|
1
|
+
import Foundation
|
|
2
|
+
#if !COCOAPODS
|
|
3
|
+
import DVAISharedCore
|
|
4
|
+
#endif
|
|
5
|
+
|
|
6
|
+
/// `DVAIHandlers` conformer for the CoreML backend.
|
|
7
|
+
/// Translates OpenAI-compatible HTTP requests into CoreMLGenerator calls and
|
|
8
|
+
/// formats the results as OpenAI JSON / SSE responses.
|
|
9
|
+
@available(iOS 18.0, macOS 15.0, *)
|
|
10
|
+
public final class CoreMLHandlers: DVAIHandlers {
|
|
11
|
+
private let generator: CoreMLGenerator
|
|
12
|
+
private let modelId: String
|
|
13
|
+
|
|
14
|
+
// Internal init — `CoreMLGenerator` is an implementation detail of
|
|
15
|
+
// DVAICoreMLCore and stays internal. The only construction site is
|
|
16
|
+
// `CoreMLPluginState.start()` inside the same module.
|
|
17
|
+
internal init(generator: CoreMLGenerator, modelId: String) {
|
|
18
|
+
self.generator = generator
|
|
19
|
+
self.modelId = modelId
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
public func handleChatCompletion(body: [String: Any], ctx: HandlerContext) async throws -> HandlerResponse {
|
|
23
|
+
guard let messages = body["messages"] as? [[String: String]] else {
|
|
24
|
+
return .error(400, "messages array is required")
|
|
25
|
+
}
|
|
26
|
+
let stream = (body["stream"] as? Bool) ?? false
|
|
27
|
+
let temperature = (body["temperature"] as? Double).map(Float.init) ?? 0.0
|
|
28
|
+
let topP = (body["top_p"] as? Double).map(Float.init) ?? 1.0
|
|
29
|
+
let maxTokens = (body["max_tokens"] as? Int) ?? 512
|
|
30
|
+
|
|
31
|
+
// Build a generator with the per-request sampling params.
|
|
32
|
+
let requestSampler = CoreMLSampler(temperature: temperature, topP: topP, topK: 0)
|
|
33
|
+
let requestGenerator = CoreMLGenerator(
|
|
34
|
+
engine: generator.engine,
|
|
35
|
+
tokenizer: generator.tokenizer,
|
|
36
|
+
sampler: requestSampler,
|
|
37
|
+
maxNewTokens: maxTokens
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
let promptTokens: [Int]
|
|
41
|
+
do {
|
|
42
|
+
promptTokens = try generator.tokenizer.applyChatTemplate(messages: messages)
|
|
43
|
+
} catch {
|
|
44
|
+
return .error(400, "tokenizer chat-template failed: \(error.localizedDescription)")
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if stream {
|
|
48
|
+
let sse = requestGenerator.generateStream(promptTokens: promptTokens)
|
|
49
|
+
let streamId = UUID().uuidString
|
|
50
|
+
let mappedStream = AsyncStream<String> { cont in
|
|
51
|
+
Task {
|
|
52
|
+
do {
|
|
53
|
+
for try await chunk in sse {
|
|
54
|
+
let evt = "data: {\"id\":\"\(streamId)\",\"object\":\"chat.completion.chunk\",\"created\":\(Int(Date().timeIntervalSince1970)),\"model\":\"\(modelId)\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\(jsonString(chunk))},\"finish_reason\":null}]}\n\n"
|
|
55
|
+
cont.yield(evt)
|
|
56
|
+
}
|
|
57
|
+
cont.yield("data: [DONE]\n\n")
|
|
58
|
+
cont.finish()
|
|
59
|
+
} catch {
|
|
60
|
+
cont.yield("data: {\"error\":\"\(error.localizedDescription)\"}\n\n")
|
|
61
|
+
cont.finish()
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return .sse(mappedStream)
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
let text: String
|
|
69
|
+
do {
|
|
70
|
+
text = try await requestGenerator.generate(promptTokens: promptTokens)
|
|
71
|
+
} catch {
|
|
72
|
+
return .error(500, "generation failed: \(error.localizedDescription)")
|
|
73
|
+
}
|
|
74
|
+
let responseJSON: [String: Any] = [
|
|
75
|
+
"id": UUID().uuidString,
|
|
76
|
+
"object": "chat.completion",
|
|
77
|
+
"created": Int(Date().timeIntervalSince1970),
|
|
78
|
+
"model": modelId,
|
|
79
|
+
"choices": [[
|
|
80
|
+
"index": 0,
|
|
81
|
+
"message": ["role": "assistant", "content": text],
|
|
82
|
+
"finish_reason": "stop"
|
|
83
|
+
]],
|
|
84
|
+
"usage": [
|
|
85
|
+
"prompt_tokens": promptTokens.count,
|
|
86
|
+
"completion_tokens": -1, // CoreML decoding doesn't track this per checkpoint
|
|
87
|
+
"total_tokens": -1
|
|
88
|
+
]
|
|
89
|
+
]
|
|
90
|
+
return .json(200, responseJSON)
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
public func handleCompletion(body: [String: Any], ctx: HandlerContext) async throws -> HandlerResponse {
|
|
94
|
+
let prompt = body["prompt"] as? String ?? ""
|
|
95
|
+
let chatBody: [String: Any] = [
|
|
96
|
+
"messages": [["role": "user", "content": prompt]],
|
|
97
|
+
"stream": body["stream"] as? Bool ?? false,
|
|
98
|
+
"temperature": body["temperature"] as? Double ?? 0.0,
|
|
99
|
+
"top_p": body["top_p"] as? Double ?? 1.0,
|
|
100
|
+
"max_tokens": body["max_tokens"] as? Int ?? 512,
|
|
101
|
+
]
|
|
102
|
+
return try await handleChatCompletion(body: chatBody, ctx: ctx)
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
public func handleEmbeddings(body: [String: Any], ctx: HandlerContext) async throws -> HandlerResponse {
|
|
106
|
+
return .error(501, "embeddings not yet supported by the CoreML backend")
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
public func handleModels(ctx: HandlerContext) async throws -> HandlerResponse {
|
|
110
|
+
return .json(200, [
|
|
111
|
+
"object": "list",
|
|
112
|
+
"data": [["id": modelId, "object": "model", "owned_by": "dvai-bridge"]]
|
|
113
|
+
])
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/// JSON-encode a single string value (produces a quoted JSON string).
|
|
117
|
+
private func jsonString(_ s: String) -> String {
|
|
118
|
+
let data = (try? JSONSerialization.data(withJSONObject: [s], options: [])) ?? Data()
|
|
119
|
+
let str = String(data: data, encoding: .utf8) ?? "[\"\"]"
|
|
120
|
+
// Strip the surrounding array brackets — leaves the quoted string value.
|
|
121
|
+
return String(str.dropFirst().dropLast())
|
|
122
|
+
}
|
|
123
|
+
}
|