@dvai-bridge/ios 4.0.0 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Package.swift +104 -104
- package/ios/Sources/DVAIBridge/BackendKind.swift +23 -23
- package/ios/Sources/DVAIBridge/BoundServer.swift +46 -46
- package/ios/Sources/DVAIBridge/DVAIBridge.swift +658 -658
- package/ios/Sources/DVAIBridge/DVAIBridgeConfig.swift +86 -86
- package/ios/Sources/DVAIBridge/DVAIBridgeError.swift +33 -33
- package/ios/Sources/DVAIBridge/Internal/BackendSelector.swift +59 -59
- package/ios/Sources/DVAIBridge/Internal/ProgressBroadcaster.swift +84 -84
- package/ios/Sources/DVAIBridge/License/Audience.swift +133 -133
- package/ios/Sources/DVAIBridge/License/Discovery.swift +164 -164
- package/ios/Sources/DVAIBridge/License/LicenseValidator.swift +392 -392
- package/ios/Sources/DVAIBridge/License/PublicKeys.swift +114 -114
- package/ios/Sources/DVAIBridge/License/Types.swift +195 -195
- package/ios/Sources/DVAIBridge/Offload/OffloadConfig.swift +118 -118
- package/ios/Sources/DVAIBridge/ProgressEvent.swift +34 -34
- package/ios/Sources/DVAICoreMLCore/CoreMLBackendError.swift +19 -19
- package/ios/Sources/DVAICoreMLCore/CoreMLHandlers.swift +123 -123
- package/ios/Sources/DVAICoreMLCore/CoreMLPluginState.swift +130 -130
- package/ios/Sources/DVAICoreMLCore/Internal/CoreMLEngine.swift +137 -137
- package/ios/Sources/DVAICoreMLCore/Internal/CoreMLGenerator.swift +108 -108
- package/ios/Sources/DVAICoreMLCore/Internal/CoreMLSampler.swift +96 -96
- package/ios/Sources/DVAICoreMLCore/Internal/CoreMLTokenizer.swift +69 -69
- package/ios/Tests/DVAIBridgeTests/BackendSelectorTests.swift +53 -53
- package/ios/Tests/DVAIBridgeTests/CoreMLEngineTests.swift +18 -18
- package/ios/Tests/DVAIBridgeTests/CoreMLGeneratorShapeTests.swift +11 -11
- package/ios/Tests/DVAIBridgeTests/CoreMLHandlersTests.swift +32 -32
- package/ios/Tests/DVAIBridgeTests/CoreMLPluginStateTests.swift +41 -41
- package/ios/Tests/DVAIBridgeTests/CoreMLSamplerTests.swift +40 -40
- package/ios/Tests/DVAIBridgeTests/CoreMLTokenizerTests.swift +19 -19
- package/ios/Tests/DVAIBridgeTests/DVAIBridgeAPIShapeTests.swift +37 -37
- package/ios/Tests/DVAIBridgeTests/DVAIBridgeConfigTests.swift +52 -52
- package/ios/Tests/DVAIBridgeTests/DVAIBridgeErrorTests.swift +33 -33
- package/ios/Tests/DVAIBridgeTests/LicenseValidatorTests.swift +658 -658
- package/ios/Tests/DVAIBridgeTests/ProgressBroadcasterTests.swift +69 -69
- package/ios/Tests/DVAIBridgeTests/ProgressEventTests.swift +25 -25
- package/ios/Tests/DVAIBridgeTests/ReactiveStateTests.swift +45 -45
- package/ios/Tests/DVAIBridgeTests/RealModelIntegrationTest.swift +385 -359
- package/package.json +3 -4
- package/DVAIBridge.podspec +0 -120
- package/LICENSE +0 -51
- package/README.md +0 -199
|
@@ -1,108 +1,108 @@
|
|
|
1
|
-
import Foundation
|
|
2
|
-
import CoreML
|
|
3
|
-
|
|
4
|
-
/// Orchestrates `CoreMLEngine` + `CoreMLTokenizer` + `CoreMLSampler` to
|
|
5
|
-
/// produce text from a prompt via autoregressive decoding.
|
|
6
|
-
///
|
|
7
|
-
/// CoreML prediction note (iOS 18):
|
|
8
|
-
/// `MLModel.prediction(from:using:options:)` is synchronous. We wrap the
|
|
9
|
-
/// decode loop in a `Task.detached` (in `generateStream`) or simply call
|
|
10
|
-
/// `runStep` directly in the async context for `generate`. Since `runStep`
|
|
11
|
-
/// is not itself `async`, calling it in an `async` function does NOT suspend
|
|
12
|
-
/// — it runs inline on the current executor. For long-running decodes the
|
|
13
|
-
/// caller should call `generate` / `generateStream` from a background Task
|
|
14
|
-
/// to avoid blocking the main actor.
|
|
15
|
-
@available(iOS 18.0, macOS 15.0, *)
|
|
16
|
-
internal struct CoreMLGenerator: @unchecked Sendable {
|
|
17
|
-
let engine: CoreMLEngine
|
|
18
|
-
let tokenizer: CoreMLTokenizer
|
|
19
|
-
let sampler: CoreMLSampler
|
|
20
|
-
let maxNewTokens: Int
|
|
21
|
-
|
|
22
|
-
/// Buffered generation. Runs the full decode loop and returns the decoded text.
|
|
23
|
-
func generate(promptTokens: [Int]) async throws -> String {
|
|
24
|
-
return try await Task.detached(priority: .userInitiated) {
|
|
25
|
-
guard !promptTokens.isEmpty else {
|
|
26
|
-
throw CoreMLBackendError.generationFailed(reason: "prompt tokens are empty")
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
var generated: [Int] = []
|
|
30
|
-
let state = self.engine.makeConversationState()
|
|
31
|
-
|
|
32
|
-
// Prefill + decode unified: each runStep returns logits for the
|
|
33
|
-
// *next* token at position (kvPos+1). After feeding all prompt
|
|
34
|
-
// tokens, the last logits give us our first generated token.
|
|
35
|
-
// (Previous iteration of this code re-fed promptTokens.last as a
|
|
36
|
-
// separate step, which double-counted that token in the KV
|
|
37
|
-
// cache.)
|
|
38
|
-
var kvPos = 0
|
|
39
|
-
var lastLogits: MLMultiArray = try self.engine.runStep(
|
|
40
|
-
token: promptTokens[0], kvCachePosition: 0, state: state
|
|
41
|
-
)
|
|
42
|
-
kvPos = 1
|
|
43
|
-
for token in promptTokens.dropFirst() {
|
|
44
|
-
lastLogits = try self.engine.runStep(
|
|
45
|
-
token: token, kvCachePosition: kvPos, state: state
|
|
46
|
-
)
|
|
47
|
-
kvPos += 1
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
var nextToken = self.sampler.sample(logits: lastLogits)
|
|
51
|
-
|
|
52
|
-
for _ in 0 ..< self.maxNewTokens {
|
|
53
|
-
if nextToken == self.engine.eosTokenId { break }
|
|
54
|
-
generated.append(nextToken)
|
|
55
|
-
lastLogits = try self.engine.runStep(
|
|
56
|
-
token: nextToken, kvCachePosition: kvPos, state: state
|
|
57
|
-
)
|
|
58
|
-
kvPos += 1
|
|
59
|
-
nextToken = self.sampler.sample(logits: lastLogits)
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
return self.tokenizer.decode(tokens: generated)
|
|
63
|
-
}.value
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
/// Streaming generation. Yields each decoded token chunk via `AsyncThrowingStream`.
|
|
67
|
-
func generateStream(promptTokens: [Int]) -> AsyncThrowingStream<String, Error> {
|
|
68
|
-
AsyncThrowingStream { continuation in
|
|
69
|
-
Task.detached(priority: .userInitiated) {
|
|
70
|
-
do {
|
|
71
|
-
guard !promptTokens.isEmpty else {
|
|
72
|
-
throw CoreMLBackendError.generationFailed(reason: "prompt tokens are empty")
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
let state = self.engine.makeConversationState()
|
|
76
|
-
|
|
77
|
-
var kvPos = 0
|
|
78
|
-
var lastLogits: MLMultiArray = try self.engine.runStep(
|
|
79
|
-
token: promptTokens[0], kvCachePosition: 0, state: state
|
|
80
|
-
)
|
|
81
|
-
kvPos = 1
|
|
82
|
-
for token in promptTokens.dropFirst() {
|
|
83
|
-
lastLogits = try self.engine.runStep(
|
|
84
|
-
token: token, kvCachePosition: kvPos, state: state
|
|
85
|
-
)
|
|
86
|
-
kvPos += 1
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
var nextToken = self.sampler.sample(logits: lastLogits)
|
|
90
|
-
|
|
91
|
-
for _ in 0 ..< self.maxNewTokens {
|
|
92
|
-
if nextToken == self.engine.eosTokenId { break }
|
|
93
|
-
let chunk = self.tokenizer.decode(token: nextToken)
|
|
94
|
-
continuation.yield(chunk)
|
|
95
|
-
lastLogits = try self.engine.runStep(
|
|
96
|
-
token: nextToken, kvCachePosition: kvPos, state: state
|
|
97
|
-
)
|
|
98
|
-
kvPos += 1
|
|
99
|
-
nextToken = self.sampler.sample(logits: lastLogits)
|
|
100
|
-
}
|
|
101
|
-
continuation.finish()
|
|
102
|
-
} catch {
|
|
103
|
-
continuation.finish(throwing: error)
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
}
|
|
1
|
+
import Foundation
|
|
2
|
+
import CoreML
|
|
3
|
+
|
|
4
|
+
/// Orchestrates `CoreMLEngine` + `CoreMLTokenizer` + `CoreMLSampler` to
|
|
5
|
+
/// produce text from a prompt via autoregressive decoding.
|
|
6
|
+
///
|
|
7
|
+
/// CoreML prediction note (iOS 18):
|
|
8
|
+
/// `MLModel.prediction(from:using:options:)` is synchronous. We wrap the
|
|
9
|
+
/// decode loop in a `Task.detached` (in `generateStream`) or simply call
|
|
10
|
+
/// `runStep` directly in the async context for `generate`. Since `runStep`
|
|
11
|
+
/// is not itself `async`, calling it in an `async` function does NOT suspend
|
|
12
|
+
/// — it runs inline on the current executor. For long-running decodes the
|
|
13
|
+
/// caller should call `generate` / `generateStream` from a background Task
|
|
14
|
+
/// to avoid blocking the main actor.
|
|
15
|
+
@available(iOS 18.0, macOS 15.0, *)
|
|
16
|
+
internal struct CoreMLGenerator: @unchecked Sendable {
|
|
17
|
+
let engine: CoreMLEngine
|
|
18
|
+
let tokenizer: CoreMLTokenizer
|
|
19
|
+
let sampler: CoreMLSampler
|
|
20
|
+
let maxNewTokens: Int
|
|
21
|
+
|
|
22
|
+
/// Buffered generation. Runs the full decode loop and returns the decoded text.
|
|
23
|
+
func generate(promptTokens: [Int]) async throws -> String {
|
|
24
|
+
return try await Task.detached(priority: .userInitiated) {
|
|
25
|
+
guard !promptTokens.isEmpty else {
|
|
26
|
+
throw CoreMLBackendError.generationFailed(reason: "prompt tokens are empty")
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
var generated: [Int] = []
|
|
30
|
+
let state = self.engine.makeConversationState()
|
|
31
|
+
|
|
32
|
+
// Prefill + decode unified: each runStep returns logits for the
|
|
33
|
+
// *next* token at position (kvPos+1). After feeding all prompt
|
|
34
|
+
// tokens, the last logits give us our first generated token.
|
|
35
|
+
// (Previous iteration of this code re-fed promptTokens.last as a
|
|
36
|
+
// separate step, which double-counted that token in the KV
|
|
37
|
+
// cache.)
|
|
38
|
+
var kvPos = 0
|
|
39
|
+
var lastLogits: MLMultiArray = try self.engine.runStep(
|
|
40
|
+
token: promptTokens[0], kvCachePosition: 0, state: state
|
|
41
|
+
)
|
|
42
|
+
kvPos = 1
|
|
43
|
+
for token in promptTokens.dropFirst() {
|
|
44
|
+
lastLogits = try self.engine.runStep(
|
|
45
|
+
token: token, kvCachePosition: kvPos, state: state
|
|
46
|
+
)
|
|
47
|
+
kvPos += 1
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
var nextToken = self.sampler.sample(logits: lastLogits)
|
|
51
|
+
|
|
52
|
+
for _ in 0 ..< self.maxNewTokens {
|
|
53
|
+
if nextToken == self.engine.eosTokenId { break }
|
|
54
|
+
generated.append(nextToken)
|
|
55
|
+
lastLogits = try self.engine.runStep(
|
|
56
|
+
token: nextToken, kvCachePosition: kvPos, state: state
|
|
57
|
+
)
|
|
58
|
+
kvPos += 1
|
|
59
|
+
nextToken = self.sampler.sample(logits: lastLogits)
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return self.tokenizer.decode(tokens: generated)
|
|
63
|
+
}.value
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/// Streaming generation. Yields each decoded token chunk via `AsyncThrowingStream`.
|
|
67
|
+
func generateStream(promptTokens: [Int]) -> AsyncThrowingStream<String, Error> {
|
|
68
|
+
AsyncThrowingStream { continuation in
|
|
69
|
+
Task.detached(priority: .userInitiated) {
|
|
70
|
+
do {
|
|
71
|
+
guard !promptTokens.isEmpty else {
|
|
72
|
+
throw CoreMLBackendError.generationFailed(reason: "prompt tokens are empty")
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
let state = self.engine.makeConversationState()
|
|
76
|
+
|
|
77
|
+
var kvPos = 0
|
|
78
|
+
var lastLogits: MLMultiArray = try self.engine.runStep(
|
|
79
|
+
token: promptTokens[0], kvCachePosition: 0, state: state
|
|
80
|
+
)
|
|
81
|
+
kvPos = 1
|
|
82
|
+
for token in promptTokens.dropFirst() {
|
|
83
|
+
lastLogits = try self.engine.runStep(
|
|
84
|
+
token: token, kvCachePosition: kvPos, state: state
|
|
85
|
+
)
|
|
86
|
+
kvPos += 1
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
var nextToken = self.sampler.sample(logits: lastLogits)
|
|
90
|
+
|
|
91
|
+
for _ in 0 ..< self.maxNewTokens {
|
|
92
|
+
if nextToken == self.engine.eosTokenId { break }
|
|
93
|
+
let chunk = self.tokenizer.decode(token: nextToken)
|
|
94
|
+
continuation.yield(chunk)
|
|
95
|
+
lastLogits = try self.engine.runStep(
|
|
96
|
+
token: nextToken, kvCachePosition: kvPos, state: state
|
|
97
|
+
)
|
|
98
|
+
kvPos += 1
|
|
99
|
+
nextToken = self.sampler.sample(logits: lastLogits)
|
|
100
|
+
}
|
|
101
|
+
continuation.finish()
|
|
102
|
+
} catch {
|
|
103
|
+
continuation.finish(throwing: error)
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
@@ -1,96 +1,96 @@
|
|
|
1
|
-
import Foundation
|
|
2
|
-
import CoreML
|
|
3
|
-
|
|
4
|
-
/// Sampling strategies for autoregressive decoding.
|
|
5
|
-
///
|
|
6
|
-
/// Note on `seed:` — `SystemRandomNumberGenerator` cannot be seeded; for
|
|
7
|
-
/// reproducible sampling a custom PRNG (e.g. Mulberry32) would be needed.
|
|
8
|
-
/// Per the plan, we drop `seed:` from the public-facing init entirely rather
|
|
9
|
-
/// than silently ignoring it. Apple-managed entropy is fine for production LLM
|
|
10
|
-
/// sampling.
|
|
11
|
-
internal struct CoreMLSampler {
|
|
12
|
-
let temperature: Float
|
|
13
|
-
let topP: Float
|
|
14
|
-
let topK: Int // 0 = disabled
|
|
15
|
-
|
|
16
|
-
/// Sample a token id from a logits vector.
|
|
17
|
-
/// - Parameter logits: 1-D MLMultiArray<Float32> of length vocab_size.
|
|
18
|
-
func sample(logits: MLMultiArray) -> Int {
|
|
19
|
-
let count = logits.count
|
|
20
|
-
let ptr = UnsafeMutablePointer<Float32>(OpaquePointer(logits.dataPointer))
|
|
21
|
-
|
|
22
|
-
// 1. Greedy fast-path
|
|
23
|
-
if temperature <= 0 {
|
|
24
|
-
return argmax(ptr, count: count)
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
// 2. Apply temperature
|
|
28
|
-
var scaled = [Float](repeating: 0, count: count)
|
|
29
|
-
for i in 0 ..< count { scaled[i] = ptr[i] / temperature }
|
|
30
|
-
|
|
31
|
-
// 3. Optional top-K filter
|
|
32
|
-
if topK > 0 && topK < count {
|
|
33
|
-
applyTopK(&scaled, k: topK)
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
// 4. Softmax → probabilities
|
|
37
|
-
let probs = softmax(scaled)
|
|
38
|
-
|
|
39
|
-
// 5. Optional nucleus (top-p) filter
|
|
40
|
-
let final = topP < 1.0 ? applyTopP(probs, p: topP) : probs
|
|
41
|
-
|
|
42
|
-
// 6. Categorical draw
|
|
43
|
-
return categoricalSample(final)
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
// MARK: - Helpers
|
|
47
|
-
|
|
48
|
-
private func argmax(_ ptr: UnsafeMutablePointer<Float32>, count: Int) -> Int {
|
|
49
|
-
var bestIdx = 0
|
|
50
|
-
var bestVal = ptr[0]
|
|
51
|
-
for i in 1 ..< count {
|
|
52
|
-
if ptr[i] > bestVal { bestVal = ptr[i]; bestIdx = i }
|
|
53
|
-
}
|
|
54
|
-
return bestIdx
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
private func softmax(_ logits: [Float]) -> [Float] {
|
|
58
|
-
let maxVal = logits.max() ?? 0
|
|
59
|
-
var exps = logits.map { Float(exp(Double($0 - maxVal))) }
|
|
60
|
-
let sum = exps.reduce(0, +)
|
|
61
|
-
if sum > 0 { for i in 0 ..< exps.count { exps[i] /= sum } }
|
|
62
|
-
return exps
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
private func applyTopK(_ logits: inout [Float], k: Int) {
|
|
66
|
-
let kth = logits.sorted(by: >).prefix(k).last ?? -.greatestFiniteMagnitude
|
|
67
|
-
for i in 0 ..< logits.count where logits[i] < kth { logits[i] = -.greatestFiniteMagnitude }
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
private func applyTopP(_ probs: [Float], p: Float) -> [Float] {
|
|
71
|
-
let sorted = probs.enumerated().sorted { $0.element > $1.element }
|
|
72
|
-
var cum: Float = 0
|
|
73
|
-
var keep = Set<Int>()
|
|
74
|
-
for (idx, prob) in sorted {
|
|
75
|
-
keep.insert(idx)
|
|
76
|
-
cum += prob
|
|
77
|
-
if cum >= p { break }
|
|
78
|
-
}
|
|
79
|
-
var result = probs
|
|
80
|
-
for i in 0 ..< result.count where !keep.contains(i) { result[i] = 0 }
|
|
81
|
-
let sum = result.reduce(0, +)
|
|
82
|
-
if sum > 0 { for i in 0 ..< result.count { result[i] /= sum } }
|
|
83
|
-
return result
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
private func categoricalSample(_ probs: [Float]) -> Int {
|
|
87
|
-
var rng = SystemRandomNumberGenerator()
|
|
88
|
-
let r = Float.random(in: 0 ..< 1, using: &rng)
|
|
89
|
-
var cum: Float = 0
|
|
90
|
-
for i in 0 ..< probs.count {
|
|
91
|
-
cum += probs[i]
|
|
92
|
-
if r < cum { return i }
|
|
93
|
-
}
|
|
94
|
-
return probs.count - 1
|
|
95
|
-
}
|
|
96
|
-
}
|
|
1
|
+
import Foundation
|
|
2
|
+
import CoreML
|
|
3
|
+
|
|
4
|
+
/// Sampling strategies for autoregressive decoding.
|
|
5
|
+
///
|
|
6
|
+
/// Note on `seed:` — `SystemRandomNumberGenerator` cannot be seeded; for
|
|
7
|
+
/// reproducible sampling a custom PRNG (e.g. Mulberry32) would be needed.
|
|
8
|
+
/// Per the plan, we drop `seed:` from the public-facing init entirely rather
|
|
9
|
+
/// than silently ignoring it. Apple-managed entropy is fine for production LLM
|
|
10
|
+
/// sampling.
|
|
11
|
+
internal struct CoreMLSampler {
|
|
12
|
+
let temperature: Float
|
|
13
|
+
let topP: Float
|
|
14
|
+
let topK: Int // 0 = disabled
|
|
15
|
+
|
|
16
|
+
/// Sample a token id from a logits vector.
|
|
17
|
+
/// - Parameter logits: 1-D MLMultiArray<Float32> of length vocab_size.
|
|
18
|
+
func sample(logits: MLMultiArray) -> Int {
|
|
19
|
+
let count = logits.count
|
|
20
|
+
let ptr = UnsafeMutablePointer<Float32>(OpaquePointer(logits.dataPointer))
|
|
21
|
+
|
|
22
|
+
// 1. Greedy fast-path
|
|
23
|
+
if temperature <= 0 {
|
|
24
|
+
return argmax(ptr, count: count)
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// 2. Apply temperature
|
|
28
|
+
var scaled = [Float](repeating: 0, count: count)
|
|
29
|
+
for i in 0 ..< count { scaled[i] = ptr[i] / temperature }
|
|
30
|
+
|
|
31
|
+
// 3. Optional top-K filter
|
|
32
|
+
if topK > 0 && topK < count {
|
|
33
|
+
applyTopK(&scaled, k: topK)
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// 4. Softmax → probabilities
|
|
37
|
+
let probs = softmax(scaled)
|
|
38
|
+
|
|
39
|
+
// 5. Optional nucleus (top-p) filter
|
|
40
|
+
let final = topP < 1.0 ? applyTopP(probs, p: topP) : probs
|
|
41
|
+
|
|
42
|
+
// 6. Categorical draw
|
|
43
|
+
return categoricalSample(final)
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// MARK: - Helpers
|
|
47
|
+
|
|
48
|
+
private func argmax(_ ptr: UnsafeMutablePointer<Float32>, count: Int) -> Int {
|
|
49
|
+
var bestIdx = 0
|
|
50
|
+
var bestVal = ptr[0]
|
|
51
|
+
for i in 1 ..< count {
|
|
52
|
+
if ptr[i] > bestVal { bestVal = ptr[i]; bestIdx = i }
|
|
53
|
+
}
|
|
54
|
+
return bestIdx
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
private func softmax(_ logits: [Float]) -> [Float] {
|
|
58
|
+
let maxVal = logits.max() ?? 0
|
|
59
|
+
var exps = logits.map { Float(exp(Double($0 - maxVal))) }
|
|
60
|
+
let sum = exps.reduce(0, +)
|
|
61
|
+
if sum > 0 { for i in 0 ..< exps.count { exps[i] /= sum } }
|
|
62
|
+
return exps
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
private func applyTopK(_ logits: inout [Float], k: Int) {
|
|
66
|
+
let kth = logits.sorted(by: >).prefix(k).last ?? -.greatestFiniteMagnitude
|
|
67
|
+
for i in 0 ..< logits.count where logits[i] < kth { logits[i] = -.greatestFiniteMagnitude }
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
private func applyTopP(_ probs: [Float], p: Float) -> [Float] {
|
|
71
|
+
let sorted = probs.enumerated().sorted { $0.element > $1.element }
|
|
72
|
+
var cum: Float = 0
|
|
73
|
+
var keep = Set<Int>()
|
|
74
|
+
for (idx, prob) in sorted {
|
|
75
|
+
keep.insert(idx)
|
|
76
|
+
cum += prob
|
|
77
|
+
if cum >= p { break }
|
|
78
|
+
}
|
|
79
|
+
var result = probs
|
|
80
|
+
for i in 0 ..< result.count where !keep.contains(i) { result[i] = 0 }
|
|
81
|
+
let sum = result.reduce(0, +)
|
|
82
|
+
if sum > 0 { for i in 0 ..< result.count { result[i] /= sum } }
|
|
83
|
+
return result
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
private func categoricalSample(_ probs: [Float]) -> Int {
|
|
87
|
+
var rng = SystemRandomNumberGenerator()
|
|
88
|
+
let r = Float.random(in: 0 ..< 1, using: &rng)
|
|
89
|
+
var cum: Float = 0
|
|
90
|
+
for i in 0 ..< probs.count {
|
|
91
|
+
cum += probs[i]
|
|
92
|
+
if r < cum { return i }
|
|
93
|
+
}
|
|
94
|
+
return probs.count - 1
|
|
95
|
+
}
|
|
96
|
+
}
|
|
@@ -1,69 +1,69 @@
|
|
|
1
|
-
import Foundation
|
|
2
|
-
#if !COCOAPODS
|
|
3
|
-
import Tokenizers
|
|
4
|
-
#endif
|
|
5
|
-
|
|
6
|
-
/// Loads a HuggingFace-style tokenizer.json + tokenizer_config.json from a
|
|
7
|
-
/// local directory. Provides chat-template application, encode, and decode.
|
|
8
|
-
///
|
|
9
|
-
/// swift-transformers 1.3.0 API notes:
|
|
10
|
-
/// - `AutoTokenizer.from(modelFolder:hubApi:strict:)` — hubApi and strict
|
|
11
|
-
/// have default values so the two-arg form `from(modelFolder:)` is NOT
|
|
12
|
-
/// available; must pass at minimum `modelFolder:`.
|
|
13
|
-
/// - `Message` is `typealias Message = [String: any Sendable]` so we convert
|
|
14
|
-
/// `[[String: String]]` → `[[String: any Sendable]]` before passing.
|
|
15
|
-
/// - `applyChatTemplate(messages:)` has all other params defaulted.
|
|
16
|
-
/// - `eosTokenId` is `Int?` (optional) — we fall back to 0 if absent.
|
|
17
|
-
internal struct CoreMLTokenizer: @unchecked Sendable {
|
|
18
|
-
private let inner: any Tokenizer
|
|
19
|
-
|
|
20
|
-
init(tokenizerDir: URL) async throws {
|
|
21
|
-
do {
|
|
22
|
-
// `from(modelFolder:)` resolves to `from(modelFolder:hubApi:strict:)`
|
|
23
|
-
// with default HubApi() and strict: true.
|
|
24
|
-
self.inner = try await AutoTokenizer.from(modelFolder: tokenizerDir)
|
|
25
|
-
} catch {
|
|
26
|
-
throw CoreMLBackendError.tokenizerLoadFailed(reason: "\(error)")
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
/// Apply the model's chat template to convert messages into token IDs.
|
|
31
|
-
/// - Parameter messages: Array of {"role": ..., "content": ...} dicts.
|
|
32
|
-
/// - Parameter addGenerationPrompt: Append the generation-start marker.
|
|
33
|
-
func applyChatTemplate(
|
|
34
|
-
messages: [[String: String]],
|
|
35
|
-
addGenerationPrompt: Bool = true
|
|
36
|
-
) throws -> [Int] {
|
|
37
|
-
// Convert [[String: String]] → [[String: any Sendable]] (Tokenizers.Message)
|
|
38
|
-
let normalized: [Message] = messages.map { dict in
|
|
39
|
-
var m: Message = [:]
|
|
40
|
-
for (k, v) in dict { m[k] = v }
|
|
41
|
-
return m
|
|
42
|
-
}
|
|
43
|
-
do {
|
|
44
|
-
// swift-transformers 1.x's applyChatTemplate signature drops the
|
|
45
|
-
// addGenerationPrompt parameter (defaulted to true server-side).
|
|
46
|
-
// The `addGenerationPrompt` knob in our wrapper is preserved for
|
|
47
|
-
// future API symmetry but currently passed through implicitly.
|
|
48
|
-
_ = addGenerationPrompt
|
|
49
|
-
return try inner.applyChatTemplate(messages: normalized)
|
|
50
|
-
} catch {
|
|
51
|
-
throw CoreMLBackendError.generationFailed(reason: "applyChatTemplate failed: \(error)")
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
func encode(text: String) -> [Int] {
|
|
56
|
-
inner.encode(text: text)
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
func decode(tokens: [Int]) -> String {
|
|
60
|
-
inner.decode(tokens: tokens, skipSpecialTokens: true)
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
func decode(token: Int) -> String {
|
|
64
|
-
decode(tokens: [token])
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
/// EOS token id. Falls back to 0 if the tokenizer config doesn't specify one.
|
|
68
|
-
var eosTokenId: Int { inner.eosTokenId ?? 0 }
|
|
69
|
-
}
|
|
1
|
+
import Foundation
|
|
2
|
+
#if !COCOAPODS
|
|
3
|
+
import Tokenizers
|
|
4
|
+
#endif
|
|
5
|
+
|
|
6
|
+
/// Loads a HuggingFace-style tokenizer.json + tokenizer_config.json from a
|
|
7
|
+
/// local directory. Provides chat-template application, encode, and decode.
|
|
8
|
+
///
|
|
9
|
+
/// swift-transformers 1.3.0 API notes:
|
|
10
|
+
/// - `AutoTokenizer.from(modelFolder:hubApi:strict:)` — hubApi and strict
|
|
11
|
+
/// have default values so the two-arg form `from(modelFolder:)` is NOT
|
|
12
|
+
/// available; must pass at minimum `modelFolder:`.
|
|
13
|
+
/// - `Message` is `typealias Message = [String: any Sendable]` so we convert
|
|
14
|
+
/// `[[String: String]]` → `[[String: any Sendable]]` before passing.
|
|
15
|
+
/// - `applyChatTemplate(messages:)` has all other params defaulted.
|
|
16
|
+
/// - `eosTokenId` is `Int?` (optional) — we fall back to 0 if absent.
|
|
17
|
+
internal struct CoreMLTokenizer: @unchecked Sendable {
|
|
18
|
+
private let inner: any Tokenizer
|
|
19
|
+
|
|
20
|
+
init(tokenizerDir: URL) async throws {
|
|
21
|
+
do {
|
|
22
|
+
// `from(modelFolder:)` resolves to `from(modelFolder:hubApi:strict:)`
|
|
23
|
+
// with default HubApi() and strict: true.
|
|
24
|
+
self.inner = try await AutoTokenizer.from(modelFolder: tokenizerDir)
|
|
25
|
+
} catch {
|
|
26
|
+
throw CoreMLBackendError.tokenizerLoadFailed(reason: "\(error)")
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/// Apply the model's chat template to convert messages into token IDs.
|
|
31
|
+
/// - Parameter messages: Array of {"role": ..., "content": ...} dicts.
|
|
32
|
+
/// - Parameter addGenerationPrompt: Append the generation-start marker.
|
|
33
|
+
func applyChatTemplate(
|
|
34
|
+
messages: [[String: String]],
|
|
35
|
+
addGenerationPrompt: Bool = true
|
|
36
|
+
) throws -> [Int] {
|
|
37
|
+
// Convert [[String: String]] → [[String: any Sendable]] (Tokenizers.Message)
|
|
38
|
+
let normalized: [Message] = messages.map { dict in
|
|
39
|
+
var m: Message = [:]
|
|
40
|
+
for (k, v) in dict { m[k] = v }
|
|
41
|
+
return m
|
|
42
|
+
}
|
|
43
|
+
do {
|
|
44
|
+
// swift-transformers 1.x's applyChatTemplate signature drops the
|
|
45
|
+
// addGenerationPrompt parameter (defaulted to true server-side).
|
|
46
|
+
// The `addGenerationPrompt` knob in our wrapper is preserved for
|
|
47
|
+
// future API symmetry but currently passed through implicitly.
|
|
48
|
+
_ = addGenerationPrompt
|
|
49
|
+
return try inner.applyChatTemplate(messages: normalized)
|
|
50
|
+
} catch {
|
|
51
|
+
throw CoreMLBackendError.generationFailed(reason: "applyChatTemplate failed: \(error)")
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
func encode(text: String) -> [Int] {
|
|
56
|
+
inner.encode(text: text)
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
func decode(tokens: [Int]) -> String {
|
|
60
|
+
inner.decode(tokens: tokens, skipSpecialTokens: true)
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
func decode(token: Int) -> String {
|
|
64
|
+
decode(tokens: [token])
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/// EOS token id. Falls back to 0 if the tokenizer config doesn't specify one.
|
|
68
|
+
var eosTokenId: Int { inner.eosTokenId ?? 0 }
|
|
69
|
+
}
|
|
@@ -1,53 +1,53 @@
|
|
|
1
|
-
import XCTest
|
|
2
|
-
@testable import DVAIBridge
|
|
3
|
-
|
|
4
|
-
final class BackendSelectorTests: XCTestCase {
|
|
5
|
-
func testExplicitChoicePassesThrough() throws {
|
|
6
|
-
for kind in [BackendKind.llama, .foundation, .coreml] {
|
|
7
|
-
let resolved = try BackendSelector.resolve(kind, config: DVAIBridgeConfig())
|
|
8
|
-
XCTAssertEqual(resolved, kind)
|
|
9
|
-
}
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
func testAutoWithGGUFResolvesToLlama() throws {
|
|
13
|
-
let cfg = DVAIBridgeConfig(modelPath: "/path/to/model.gguf")
|
|
14
|
-
XCTAssertEqual(try BackendSelector.resolve(.auto, config: cfg), .llama)
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
func testAutoWithMlmodelcResolvesToCoreML() throws {
|
|
18
|
-
let cfg = DVAIBridgeConfig(modelPath: "/path/to/model.mlmodelc")
|
|
19
|
-
XCTAssertEqual(try BackendSelector.resolve(.auto, config: cfg), .coreml)
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
func testAutoWithMlpackageResolvesToCoreML() throws {
|
|
23
|
-
let cfg = DVAIBridgeConfig(modelPath: "/path/to/model.mlpackage")
|
|
24
|
-
XCTAssertEqual(try BackendSelector.resolve(.auto, config: cfg), .coreml)
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
func testAutoWithTaskFileThrows() {
|
|
28
|
-
let cfg = DVAIBridgeConfig(modelPath: "/path/to/model.task")
|
|
29
|
-
XCTAssertThrowsError(try BackendSelector.resolve(.auto, config: cfg)) { err in
|
|
30
|
-
guard case let DVAIBridgeError.configurationInvalid(reason) = err else {
|
|
31
|
-
return XCTFail("wrong error type")
|
|
32
|
-
}
|
|
33
|
-
XCTAssertTrue(reason.contains("Android"))
|
|
34
|
-
}
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
func testAutoWithUnknownExtensionThrows() {
|
|
38
|
-
let cfg = DVAIBridgeConfig(modelPath: "/path/to/something.unknown")
|
|
39
|
-
XCTAssertThrowsError(try BackendSelector.resolve(.auto, config: cfg))
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
func testAutoWithNoModelPathOnIOS26ResolvesToFoundation() throws {
|
|
43
|
-
// This test only meaningfully runs on iOS 26+. On older simulators
|
|
44
|
-
// the no-modelPath branch throws. Both outcomes are well-defined;
|
|
45
|
-
// assert the right one based on availability.
|
|
46
|
-
let cfg = DVAIBridgeConfig(modelPath: nil)
|
|
47
|
-
if #available(iOS 26.0, macOS 26.0, *) {
|
|
48
|
-
XCTAssertEqual(try BackendSelector.resolve(.auto, config: cfg), .foundation)
|
|
49
|
-
} else {
|
|
50
|
-
XCTAssertThrowsError(try BackendSelector.resolve(.auto, config: cfg))
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
}
|
|
1
|
+
import XCTest
|
|
2
|
+
@testable import DVAIBridge
|
|
3
|
+
|
|
4
|
+
final class BackendSelectorTests: XCTestCase {
|
|
5
|
+
func testExplicitChoicePassesThrough() throws {
|
|
6
|
+
for kind in [BackendKind.llama, .foundation, .coreml] {
|
|
7
|
+
let resolved = try BackendSelector.resolve(kind, config: DVAIBridgeConfig())
|
|
8
|
+
XCTAssertEqual(resolved, kind)
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
func testAutoWithGGUFResolvesToLlama() throws {
|
|
13
|
+
let cfg = DVAIBridgeConfig(modelPath: "/path/to/model.gguf")
|
|
14
|
+
XCTAssertEqual(try BackendSelector.resolve(.auto, config: cfg), .llama)
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
func testAutoWithMlmodelcResolvesToCoreML() throws {
|
|
18
|
+
let cfg = DVAIBridgeConfig(modelPath: "/path/to/model.mlmodelc")
|
|
19
|
+
XCTAssertEqual(try BackendSelector.resolve(.auto, config: cfg), .coreml)
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
func testAutoWithMlpackageResolvesToCoreML() throws {
|
|
23
|
+
let cfg = DVAIBridgeConfig(modelPath: "/path/to/model.mlpackage")
|
|
24
|
+
XCTAssertEqual(try BackendSelector.resolve(.auto, config: cfg), .coreml)
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
func testAutoWithTaskFileThrows() {
|
|
28
|
+
let cfg = DVAIBridgeConfig(modelPath: "/path/to/model.task")
|
|
29
|
+
XCTAssertThrowsError(try BackendSelector.resolve(.auto, config: cfg)) { err in
|
|
30
|
+
guard case let DVAIBridgeError.configurationInvalid(reason) = err else {
|
|
31
|
+
return XCTFail("wrong error type")
|
|
32
|
+
}
|
|
33
|
+
XCTAssertTrue(reason.contains("Android"))
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
func testAutoWithUnknownExtensionThrows() {
|
|
38
|
+
let cfg = DVAIBridgeConfig(modelPath: "/path/to/something.unknown")
|
|
39
|
+
XCTAssertThrowsError(try BackendSelector.resolve(.auto, config: cfg))
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
func testAutoWithNoModelPathOnIOS26ResolvesToFoundation() throws {
|
|
43
|
+
// This test only meaningfully runs on iOS 26+. On older simulators
|
|
44
|
+
// the no-modelPath branch throws. Both outcomes are well-defined;
|
|
45
|
+
// assert the right one based on availability.
|
|
46
|
+
let cfg = DVAIBridgeConfig(modelPath: nil)
|
|
47
|
+
if #available(iOS 26.0, macOS 26.0, *) {
|
|
48
|
+
XCTAssertEqual(try BackendSelector.resolve(.auto, config: cfg), .foundation)
|
|
49
|
+
} else {
|
|
50
|
+
XCTAssertThrowsError(try BackendSelector.resolve(.auto, config: cfg))
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|