@dvai-bridge/ios-llama-core 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +51 -0
- package/Package.swift +71 -0
- package/README.md +199 -0
- package/ios/Sources/DVAILlamaCore/AudioDecoder.swift +112 -0
- package/ios/Sources/DVAILlamaCore/ContentPartsTranslator.swift +232 -0
- package/ios/Sources/DVAILlamaCore/ImageDecoder.swift +91 -0
- package/ios/Sources/DVAILlamaCore/LlamaCppBridgeProtocol.swift +59 -0
- package/ios/Sources/DVAILlamaCore/LlamaHandlers.swift +422 -0
- package/ios/Sources/DVAILlamaCore/ModelDownloader.swift +445 -0
- package/ios/Sources/DVAILlamaCore/PluginState.swift +158 -0
- package/ios/Sources/DVAILlamaCoreObjC/LlamaCppBridge.mm +649 -0
- package/ios/Sources/DVAILlamaCoreObjC/include/LlamaCppBridge.h +101 -0
- package/ios/Tests/DVAILlamaCoreTests/AudioDecoderTest.swift +46 -0
- package/ios/Tests/DVAILlamaCoreTests/ContentPartsTranslatorTest.swift +361 -0
- package/ios/Tests/DVAILlamaCoreTests/ImageDecoderTest.swift +139 -0
- package/ios/Tests/DVAILlamaCoreTests/LlamaCppBridgeTest.swift +131 -0
- package/ios/Tests/DVAILlamaCoreTests/LlamaHandlersTest.swift +515 -0
- package/ios/Tests/DVAILlamaCoreTests/ModelDownloaderTest.swift +89 -0
- package/ios/Tests/DVAILlamaCoreTests/PluginStateTest.swift +51 -0
- package/package.json +18 -0
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
|
|
3
|
+
/// Errors thrown by `ImageDecoder.resolve(url:)` when the input URL string
|
|
4
|
+
/// can't be turned into image bytes.
|
|
5
|
+
enum ImageSourceError: Error {
|
|
6
|
+
case malformedDataURL(String)
|
|
7
|
+
case invalidScheme(String)
|
|
8
|
+
case httpError(status: Int)
|
|
9
|
+
case base64DecodeFailed
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/// Resolves any of the three image URL schemes accepted by the DVAI bridge
|
|
13
|
+
/// (`data:`, `https:`/`http:`, `file:`) into the raw encoded image bytes
|
|
14
|
+
/// (PNG/JPEG/etc.). The bytes are returned as-is — actual format decoding
|
|
15
|
+
/// is performed downstream by `mtmd_helper_eval` inside llama.cpp.
|
|
16
|
+
struct ImageDecoder {
|
|
17
|
+
/// Resolve any supported URL scheme into raw image bytes.
|
|
18
|
+
///
|
|
19
|
+
/// - `data:` URLs are parsed for an optional `;base64` token and
|
|
20
|
+
/// decoded accordingly (URL-encoded payloads are also supported).
|
|
21
|
+
/// - `https:` / `http:` URLs are fetched via `URLSession` with a 30s
|
|
22
|
+
/// timeout; non-2xx responses throw `httpError`.
|
|
23
|
+
/// - `file:` URLs are read off disk via `Data(contentsOf:)`.
|
|
24
|
+
/// - Any other scheme throws `invalidScheme`.
|
|
25
|
+
static func resolve(url: String) async throws -> Data {
|
|
26
|
+
if url.hasPrefix("data:") {
|
|
27
|
+
return try resolveDataURL(url)
|
|
28
|
+
}
|
|
29
|
+
guard let parsed = URL(string: url) else {
|
|
30
|
+
throw ImageSourceError.invalidScheme(url)
|
|
31
|
+
}
|
|
32
|
+
switch parsed.scheme?.lowercased() {
|
|
33
|
+
case "https", "http":
|
|
34
|
+
return try await resolveHTTP(parsed)
|
|
35
|
+
case "file":
|
|
36
|
+
return try Data(contentsOf: parsed)
|
|
37
|
+
case let other?:
|
|
38
|
+
throw ImageSourceError.invalidScheme(other)
|
|
39
|
+
case nil:
|
|
40
|
+
throw ImageSourceError.invalidScheme(url)
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/// Parse a `data:[<mediatype>][;base64],<payload>` URL into raw bytes.
|
|
45
|
+
/// Strict: a missing comma is treated as malformed (we don't try to
|
|
46
|
+
/// guess intent).
|
|
47
|
+
private static func resolveDataURL(_ url: String) throws -> Data {
|
|
48
|
+
// RFC 2397: data:[<mediatype>][;base64],<data>
|
|
49
|
+
// Empty header and/or empty body are well-formed and produce an
|
|
50
|
+
// empty Data result (e.g. `data:,` returns `Data()`).
|
|
51
|
+
guard let commaIdx = url.firstIndex(of: ",") else {
|
|
52
|
+
throw ImageSourceError.malformedDataURL(url)
|
|
53
|
+
}
|
|
54
|
+
// Skip the leading "data:" (5 chars) and isolate the header / body.
|
|
55
|
+
let prefixEnd = url.index(url.startIndex, offsetBy: 5)
|
|
56
|
+
let header = url[prefixEnd..<commaIdx]
|
|
57
|
+
let body = String(url[url.index(after: commaIdx)...])
|
|
58
|
+
if header.contains(";base64") {
|
|
59
|
+
guard let decoded = Data(base64Encoded: body) else {
|
|
60
|
+
throw ImageSourceError.base64DecodeFailed
|
|
61
|
+
}
|
|
62
|
+
return decoded
|
|
63
|
+
}
|
|
64
|
+
// Non-base64: payload is percent-encoded text per RFC 2397.
|
|
65
|
+
let decodedString = body.removingPercentEncoding ?? body
|
|
66
|
+
return Data(decodedString.utf8)
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/// Fetch over HTTP(S) with a 30-second timeout. Uses the older
|
|
70
|
+
/// dataTask + continuation pattern so we still work on iOS 14
|
|
71
|
+
/// (the package's deployment target); `URLSession.data(for:)` is
|
|
72
|
+
/// iOS 15+.
|
|
73
|
+
private static func resolveHTTP(_ url: URL) async throws -> Data {
|
|
74
|
+
var request = URLRequest(url: url)
|
|
75
|
+
request.timeoutInterval = 30
|
|
76
|
+
return try await withCheckedThrowingContinuation { continuation in
|
|
77
|
+
let task = URLSession.shared.dataTask(with: request) { data, response, error in
|
|
78
|
+
if let error = error {
|
|
79
|
+
continuation.resume(throwing: error)
|
|
80
|
+
return
|
|
81
|
+
}
|
|
82
|
+
if let http = response as? HTTPURLResponse, !(200...299).contains(http.statusCode) {
|
|
83
|
+
continuation.resume(throwing: ImageSourceError.httpError(status: http.statusCode))
|
|
84
|
+
return
|
|
85
|
+
}
|
|
86
|
+
continuation.resume(returning: data ?? Data())
|
|
87
|
+
}
|
|
88
|
+
task.resume()
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
// Internal/LlamaCppBridgeProtocol.swift
|
|
2
|
+
import Foundation
|
|
3
|
+
#if !COCOAPODS
|
|
4
|
+
import DVAILlamaCoreObjC
|
|
5
|
+
#endif
|
|
6
|
+
|
|
7
|
+
/// Test seam over the ObjC++ `LlamaCppBridge`. Concrete `LlamaCppBridge`
|
|
8
|
+
/// conforms via the extension below; `LlamaHandlers` takes this protocol so
|
|
9
|
+
/// unit tests can substitute a canned-response fake without loading a real
|
|
10
|
+
/// GGUF model. Mirrors the `ImageDecoderProtocol` pattern used by Task 35's
|
|
11
|
+
/// `ContentPartsTranslator`.
|
|
12
|
+
///
|
|
13
|
+
/// The inference methods use Swift's automatic NSError-bridging — they
|
|
14
|
+
/// match the `(NSString *) … error:(NSError **)` ObjC selector and so are
|
|
15
|
+
/// imported as `throws -> String` / `throws -> [NSNumber]`.
|
|
16
|
+
protocol LlamaCppBridgeProtocol: AnyObject {
|
|
17
|
+
var isLoaded: Bool { get }
|
|
18
|
+
func completePrompt(
|
|
19
|
+
_ prompt: String,
|
|
20
|
+
maxTokens: Int32,
|
|
21
|
+
temperature: Float,
|
|
22
|
+
topP: Float
|
|
23
|
+
) throws -> String
|
|
24
|
+
func embedding(_ text: String) throws -> [NSNumber]
|
|
25
|
+
|
|
26
|
+
// Phase 2A Pass 2: real multimodal projector (mmproj) lifecycle +
|
|
27
|
+
// chat-template + multimodal completion.
|
|
28
|
+
var isMmprojLoaded: Bool { get }
|
|
29
|
+
func loadMmproj(atPath path: String) throws
|
|
30
|
+
func unloadMmproj()
|
|
31
|
+
/// Whether the loaded model declares an audio encoder (mtmd_support_audio).
|
|
32
|
+
/// Always false when mmproj is not loaded.
|
|
33
|
+
func hasAudioEncoder() -> Bool
|
|
34
|
+
|
|
35
|
+
/// Apply `llama_chat_apply_template`. `templateOverride` nil/empty falls
|
|
36
|
+
/// back to the model's bundled chat template. Each message dict must have
|
|
37
|
+
/// `role` and `content` string entries. Returns the rendered prompt string.
|
|
38
|
+
func applyChatTemplate(
|
|
39
|
+
_ templateOverride: String?,
|
|
40
|
+
messages: [[String: String]],
|
|
41
|
+
addAssistant: Bool
|
|
42
|
+
) throws -> String
|
|
43
|
+
|
|
44
|
+
/// Multimodal completion. The prompt must contain N `<__media__>` markers
|
|
45
|
+
/// matching `media.count`; bytes are auto-detected as image vs audio
|
|
46
|
+
/// (image: PNG/JPEG/etc.; audio: WAV/MP3/FLAC).
|
|
47
|
+
func completeMultimodalPrompt(
|
|
48
|
+
_ prompt: String,
|
|
49
|
+
media: [Data],
|
|
50
|
+
maxTokens: Int32,
|
|
51
|
+
temperature: Float,
|
|
52
|
+
topP: Float
|
|
53
|
+
) throws -> String
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Concrete `LlamaCppBridge` (ObjC class) gets the four new methods via its
|
|
57
|
+
// imported ObjC selectors; the existing ones (completePrompt, embedding,
|
|
58
|
+
// loadMmproj, isMmprojLoaded) already conform from Pass 1.
|
|
59
|
+
extension LlamaCppBridge: LlamaCppBridgeProtocol {}
|
|
@@ -0,0 +1,422 @@
|
|
|
1
|
+
// Internal/LlamaHandlers.swift
|
|
2
|
+
import Foundation
|
|
3
|
+
#if !COCOAPODS
|
|
4
|
+
import DVAILlamaCoreObjC
|
|
5
|
+
#endif
|
|
6
|
+
#if !COCOAPODS
|
|
7
|
+
import DVAISharedCore
|
|
8
|
+
#endif
|
|
9
|
+
|
|
10
|
+
/// OpenAI-compatible handler set for the llama backend. Wires
|
|
11
|
+
/// `ContentPartsTranslator` → `bridge.completePrompt` → OpenAI response shape
|
|
12
|
+
/// per spec §6 + §8.
|
|
13
|
+
///
|
|
14
|
+
/// Phase 1 scope (all `false` until Phase 2 lands the corresponding loaders):
|
|
15
|
+
/// - `mmprojLoaded`: true once a multimodal projector is loaded; gates image parts.
|
|
16
|
+
/// - `modelHasAudioEncoder`: true once a model with native audio is loaded; gates audio parts.
|
|
17
|
+
/// - `embeddingMode`: mirrored from the start opts; gates POST /v1/embeddings.
|
|
18
|
+
///
|
|
19
|
+
/// Streaming: SSE chunks are emitted in 4 frames (role / content / finish /
|
|
20
|
+
/// `[DONE]`). Telegraph 0.40 buffers the whole SSE body server-side anyway, so
|
|
21
|
+
/// 4-chunk vs 1-chunk is identical to the client. Real per-token streaming
|
|
22
|
+
/// lands when Telegraph (or its replacement) supports chunked-encoding flush.
|
|
23
|
+
///
|
|
24
|
+
/// Note: this 4-frame shape with a separate empty-delta finish frame matches
|
|
25
|
+
/// `FoundationHandlers` (iOS, capacitor-foundation) but intentionally differs
|
|
26
|
+
/// from `MediaPipeHandlers` (Android, capacitor-mediapipe), which folds
|
|
27
|
+
/// `finish_reason: "stop"` onto its final content delta and emits a variable
|
|
28
|
+
/// number of frames. See `MediaPipeHandlers`' "Streaming envelope parity"
|
|
29
|
+
/// KDoc section, and `docs/development/handler-parity.md`, for the full
|
|
30
|
+
/// comparison.
|
|
31
|
+
///
|
|
32
|
+
/// All bridge-touching paths are serialized via `bridgeLock` because
|
|
33
|
+
/// llama.cpp's `llama_context` is not thread-safe; concurrent requests
|
|
34
|
+
/// would corrupt the shared KV cache.
|
|
35
|
+
public final class LlamaHandlers: DVAIHandlers, @unchecked Sendable {
|
|
36
|
+
private let bridge: LlamaCppBridgeProtocol
|
|
37
|
+
private let bridgeLock = NSLock()
|
|
38
|
+
private let modelId: String
|
|
39
|
+
private let mmprojLoaded: Bool
|
|
40
|
+
private let modelHasAudioEncoder: Bool
|
|
41
|
+
private let embeddingMode: Bool
|
|
42
|
+
private let chatTemplate: String?
|
|
43
|
+
private let translator: ContentPartsTranslator
|
|
44
|
+
|
|
45
|
+
/// Public initializer used by `PluginState`. Wraps a concrete
|
|
46
|
+
/// `LlamaCppBridge` (the protocol existential) so tests can swap in fakes.
|
|
47
|
+
public convenience init(
|
|
48
|
+
bridge: LlamaCppBridge,
|
|
49
|
+
modelId: String,
|
|
50
|
+
mmprojLoaded: Bool = false,
|
|
51
|
+
modelHasAudioEncoder: Bool = false,
|
|
52
|
+
embeddingMode: Bool = false,
|
|
53
|
+
chatTemplate: String? = nil
|
|
54
|
+
) {
|
|
55
|
+
self.init(
|
|
56
|
+
bridgeProtocol: bridge,
|
|
57
|
+
modelId: modelId,
|
|
58
|
+
mmprojLoaded: mmprojLoaded,
|
|
59
|
+
modelHasAudioEncoder: modelHasAudioEncoder,
|
|
60
|
+
embeddingMode: embeddingMode,
|
|
61
|
+
chatTemplate: chatTemplate
|
|
62
|
+
)
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/// Internal initializer accepting the protocol existential — used by
|
|
66
|
+
/// tests that inject a mock bridge. The public init forwards here.
|
|
67
|
+
init(
|
|
68
|
+
bridgeProtocol: LlamaCppBridgeProtocol,
|
|
69
|
+
modelId: String,
|
|
70
|
+
mmprojLoaded: Bool = false,
|
|
71
|
+
modelHasAudioEncoder: Bool = false,
|
|
72
|
+
embeddingMode: Bool = false,
|
|
73
|
+
chatTemplate: String? = nil,
|
|
74
|
+
translator: ContentPartsTranslator? = nil
|
|
75
|
+
) {
|
|
76
|
+
self.bridge = bridgeProtocol
|
|
77
|
+
self.modelId = modelId
|
|
78
|
+
self.mmprojLoaded = mmprojLoaded
|
|
79
|
+
self.modelHasAudioEncoder = modelHasAudioEncoder
|
|
80
|
+
self.embeddingMode = embeddingMode
|
|
81
|
+
self.chatTemplate = chatTemplate
|
|
82
|
+
self.translator = translator ?? ContentPartsTranslator(
|
|
83
|
+
mmprojLoaded: mmprojLoaded,
|
|
84
|
+
modelHasAudioEncoder: modelHasAudioEncoder
|
|
85
|
+
)
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// MARK: - /v1/chat/completions
|
|
89
|
+
|
|
90
|
+
public func handleChatCompletion(body: [String: Any], ctx: HandlerContext) async throws -> HandlerResponse {
|
|
91
|
+
guard let messages = body["messages"] as? [[String: Any]] else {
|
|
92
|
+
return .error(400, "Missing 'messages' field")
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
let promptInput: LlamaPromptInput
|
|
96
|
+
do {
|
|
97
|
+
promptInput = try await translator.translate(messages: messages)
|
|
98
|
+
} catch let e as TranslatorError {
|
|
99
|
+
return .error(translatorErrorToStatus(e), translatorErrorMessage(e))
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// TODO(strict-mode): currently silently defaults if max_tokens/temperature/top_p
|
|
103
|
+
// arrive as strings instead of numbers; OpenAI rejects this with 400.
|
|
104
|
+
let maxTokens = body["max_tokens"] as? Int ?? 256
|
|
105
|
+
let temperature = body["temperature"] as? Double ?? 1.0
|
|
106
|
+
let topP = body["top_p"] as? Double ?? 1.0
|
|
107
|
+
let stream = body["stream"] as? Bool ?? false
|
|
108
|
+
|
|
109
|
+
// Render the chat template. The bridge falls back to the model's
|
|
110
|
+
// bundled tokenizer.chat_template when our override is nil/empty.
|
|
111
|
+
// Marker positions inside content fields are preserved by the
|
|
112
|
+
// translator, so the rendered prompt has N <__media__> markers
|
|
113
|
+
// matching media.count in declaration order.
|
|
114
|
+
let chatPrompt: String
|
|
115
|
+
do {
|
|
116
|
+
chatPrompt = try runOnBridge {
|
|
117
|
+
try bridge.applyChatTemplate(
|
|
118
|
+
chatTemplate,
|
|
119
|
+
messages: promptInput.messagesWithMarkers.map { ["role": $0.role, "content": $0.content] },
|
|
120
|
+
addAssistant: true
|
|
121
|
+
)
|
|
122
|
+
}
|
|
123
|
+
} catch {
|
|
124
|
+
return .error(500, "chat template apply failed: \(error.localizedDescription)")
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
let completion: String
|
|
128
|
+
do {
|
|
129
|
+
if promptInput.media.isEmpty {
|
|
130
|
+
completion = try runOnBridge {
|
|
131
|
+
try bridge.completePrompt(
|
|
132
|
+
chatPrompt,
|
|
133
|
+
maxTokens: Int32(maxTokens),
|
|
134
|
+
temperature: Float(temperature),
|
|
135
|
+
topP: Float(topP)
|
|
136
|
+
)
|
|
137
|
+
}
|
|
138
|
+
} else {
|
|
139
|
+
completion = try runOnBridge {
|
|
140
|
+
try bridge.completeMultimodalPrompt(
|
|
141
|
+
chatPrompt,
|
|
142
|
+
media: promptInput.media,
|
|
143
|
+
maxTokens: Int32(maxTokens),
|
|
144
|
+
temperature: Float(temperature),
|
|
145
|
+
topP: Float(topP)
|
|
146
|
+
)
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
} catch {
|
|
150
|
+
return .error(500, error.localizedDescription)
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
let id = "chatcmpl-\(UUID().uuidString.prefix(24).lowercased())"
|
|
154
|
+
let created = Int(Date().timeIntervalSince1970)
|
|
155
|
+
|
|
156
|
+
if stream {
|
|
157
|
+
// 4-chunk SSE: role delta, content delta with full body, finish, [DONE].
|
|
158
|
+
let chunks = buildChatStreamChunks(
|
|
159
|
+
id: id,
|
|
160
|
+
created: created,
|
|
161
|
+
completion: completion
|
|
162
|
+
)
|
|
163
|
+
let asyncStream = AsyncStream<String> { continuation in
|
|
164
|
+
for chunk in chunks { continuation.yield(chunk) }
|
|
165
|
+
continuation.finish()
|
|
166
|
+
}
|
|
167
|
+
return .sse(asyncStream)
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
let response: [String: Any] = [
|
|
171
|
+
"id": id,
|
|
172
|
+
"object": "chat.completion",
|
|
173
|
+
"created": created,
|
|
174
|
+
"model": modelId,
|
|
175
|
+
"choices": [[
|
|
176
|
+
"index": 0,
|
|
177
|
+
"message": ["role": "assistant", "content": completion],
|
|
178
|
+
"finish_reason": "stop",
|
|
179
|
+
] as [String: Any]],
|
|
180
|
+
"usage": ["prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0],
|
|
181
|
+
]
|
|
182
|
+
return .json(200, response)
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// MARK: - /v1/completions (legacy)
|
|
186
|
+
|
|
187
|
+
public func handleCompletion(body: [String: Any], ctx: HandlerContext) async throws -> HandlerResponse {
|
|
188
|
+
let promptField = body["prompt"]
|
|
189
|
+
let prompt: String
|
|
190
|
+
if let s = promptField as? String {
|
|
191
|
+
prompt = s
|
|
192
|
+
} else if let arr = promptField as? [String] {
|
|
193
|
+
prompt = arr.joined(separator: "\n")
|
|
194
|
+
} else if promptField == nil {
|
|
195
|
+
prompt = ""
|
|
196
|
+
} else {
|
|
197
|
+
return .error(400, "'prompt' must be a string or array of strings")
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
var chatBody = body
|
|
201
|
+
chatBody["messages"] = [["role": "user", "content": prompt]]
|
|
202
|
+
chatBody.removeValue(forKey: "prompt")
|
|
203
|
+
|
|
204
|
+
let chatResp = try await handleChatCompletion(body: chatBody, ctx: ctx)
|
|
205
|
+
switch chatResp {
|
|
206
|
+
case .json(let status, let chatBodyAny):
|
|
207
|
+
guard status == 200, let chat = chatBodyAny as? [String: Any] else {
|
|
208
|
+
return chatResp
|
|
209
|
+
}
|
|
210
|
+
return .json(200, chatToLegacyCompletion(chat))
|
|
211
|
+
case .sse(let chatStream):
|
|
212
|
+
let model = (body["model"] as? String) ?? modelId
|
|
213
|
+
let legacyStream = AsyncStream<String> { continuation in
|
|
214
|
+
Task {
|
|
215
|
+
for await chunk in chatStream {
|
|
216
|
+
continuation.yield(adaptChunkToLegacy(chunk, model: model))
|
|
217
|
+
}
|
|
218
|
+
continuation.finish()
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
return .sse(legacyStream)
|
|
222
|
+
case .error:
|
|
223
|
+
return chatResp
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// MARK: - /v1/embeddings
|
|
228
|
+
|
|
229
|
+
public func handleEmbeddings(body: [String: Any], ctx: HandlerContext) async throws -> HandlerResponse {
|
|
230
|
+
if !embeddingMode {
|
|
231
|
+
return .error(400, "Embeddings require nativeEmbeddingMode: true at start time.")
|
|
232
|
+
}
|
|
233
|
+
let inputAny = body["input"]
|
|
234
|
+
let inputs: [String]
|
|
235
|
+
if let s = inputAny as? String {
|
|
236
|
+
inputs = [s]
|
|
237
|
+
} else if let arr = inputAny as? [String] {
|
|
238
|
+
inputs = arr
|
|
239
|
+
} else {
|
|
240
|
+
return .error(400, "Missing or malformed 'input' field")
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
var data: [[String: Any]] = []
|
|
244
|
+
for (i, text) in inputs.enumerated() {
|
|
245
|
+
do {
|
|
246
|
+
let vec = try runOnBridge { try bridge.embedding(text) }
|
|
247
|
+
let embedding = vec.map { $0.doubleValue }
|
|
248
|
+
data.append(["object": "embedding", "embedding": embedding, "index": i])
|
|
249
|
+
} catch {
|
|
250
|
+
return .error(500, error.localizedDescription)
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
let response: [String: Any] = [
|
|
254
|
+
"object": "list",
|
|
255
|
+
"data": data,
|
|
256
|
+
"model": modelId,
|
|
257
|
+
"usage": ["prompt_tokens": 0, "total_tokens": 0],
|
|
258
|
+
]
|
|
259
|
+
return .json(200, response)
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// MARK: - /v1/models
|
|
263
|
+
|
|
264
|
+
public func handleModels(ctx: HandlerContext) async throws -> HandlerResponse {
|
|
265
|
+
return .json(200, [
|
|
266
|
+
"object": "list",
|
|
267
|
+
"data": [["id": ctx.modelId, "object": "model", "owned_by": "dvai-bridge"] as [String: Any]],
|
|
268
|
+
])
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// MARK: - Helpers
|
|
272
|
+
|
|
273
|
+
private func translatorErrorToStatus(_ e: TranslatorError) -> Int {
|
|
274
|
+
switch e {
|
|
275
|
+
case .imageFetchFailed: return 502
|
|
276
|
+
default: return 400
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
private func translatorErrorMessage(_ e: TranslatorError) -> String {
|
|
281
|
+
switch e {
|
|
282
|
+
case .noMmprojForImage:
|
|
283
|
+
return "Request includes an image but no mmproj was loaded. Set nativeMmprojPath when starting."
|
|
284
|
+
case .audioWithoutAudioEncoder:
|
|
285
|
+
return "Loaded model has no native audio encoder. Use a multimodal model like Gemma 4 or Phi-4 Multimodal."
|
|
286
|
+
case .unsupportedAudioFormat(let fmt, let supported):
|
|
287
|
+
return "Unsupported audio format: \(fmt). Supported on this platform: \(supported.joined(separator: ", "))."
|
|
288
|
+
case .audioDecodeFailed(let reason):
|
|
289
|
+
return "Audio decode failed: \(reason)"
|
|
290
|
+
case .imageFetchFailed(let reason):
|
|
291
|
+
return "Failed to fetch image: \(reason)"
|
|
292
|
+
case .malformedRequest(let reason):
|
|
293
|
+
return reason
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// Server-side buffering: Telegraph 0.40 does not stream chunks incrementally;
|
|
298
|
+
// the entire AsyncStream content is gathered before the response is flushed.
|
|
299
|
+
// 4-chunk vs single-chunk emission is identical to clients.
|
|
300
|
+
/// Build the 4 SSE frames for a streaming chat.completion response.
|
|
301
|
+
/// Each entry is the full `data: <json>\n\n` (or `data: [DONE]\n\n`)
|
|
302
|
+
/// frame. Returned in protocol order: role, content, finish, DONE.
|
|
303
|
+
private func buildChatStreamChunks(id: String, created: Int, completion: String) -> [String] {
|
|
304
|
+
var out: [String] = []
|
|
305
|
+
let role: [String: Any] = [
|
|
306
|
+
"id": id,
|
|
307
|
+
"object": "chat.completion.chunk",
|
|
308
|
+
"created": created,
|
|
309
|
+
"model": modelId,
|
|
310
|
+
"choices": [[
|
|
311
|
+
"index": 0,
|
|
312
|
+
"delta": ["role": "assistant"],
|
|
313
|
+
] as [String: Any]],
|
|
314
|
+
]
|
|
315
|
+
if let s = serialize(role) { out.append("data: \(s)\n\n") }
|
|
316
|
+
|
|
317
|
+
let content: [String: Any] = [
|
|
318
|
+
"id": id,
|
|
319
|
+
"object": "chat.completion.chunk",
|
|
320
|
+
"created": created,
|
|
321
|
+
"model": modelId,
|
|
322
|
+
"choices": [[
|
|
323
|
+
"index": 0,
|
|
324
|
+
"delta": ["content": completion],
|
|
325
|
+
] as [String: Any]],
|
|
326
|
+
]
|
|
327
|
+
if let s = serialize(content) { out.append("data: \(s)\n\n") }
|
|
328
|
+
|
|
329
|
+
let finish: [String: Any] = [
|
|
330
|
+
"id": id,
|
|
331
|
+
"object": "chat.completion.chunk",
|
|
332
|
+
"created": created,
|
|
333
|
+
"model": modelId,
|
|
334
|
+
"choices": [[
|
|
335
|
+
"index": 0,
|
|
336
|
+
"delta": [:] as [String: Any],
|
|
337
|
+
"finish_reason": "stop",
|
|
338
|
+
] as [String: Any]],
|
|
339
|
+
]
|
|
340
|
+
if let s = serialize(finish) { out.append("data: \(s)\n\n") }
|
|
341
|
+
|
|
342
|
+
out.append("data: [DONE]\n\n")
|
|
343
|
+
return out
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
private func serialize(_ obj: Any) -> String? {
|
|
347
|
+
guard let data = try? JSONSerialization.data(withJSONObject: obj, options: []),
|
|
348
|
+
let s = String(data: data, encoding: .utf8) else {
|
|
349
|
+
return nil
|
|
350
|
+
}
|
|
351
|
+
return s
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
/// Convert a chat.completion JSON body to the legacy text_completion shape.
|
|
355
|
+
/// Mirrors `chatToLegacyCompletion()` in `packages/dvai-bridge-core`.
|
|
356
|
+
private func chatToLegacyCompletion(_ chat: [String: Any]) -> [String: Any] {
|
|
357
|
+
var legacy: [String: Any] = [:]
|
|
358
|
+
let chatId = chat["id"] as? String ?? ""
|
|
359
|
+
legacy["id"] = chatId.isEmpty
|
|
360
|
+
? "cmpl-\(Int(Date().timeIntervalSince1970))"
|
|
361
|
+
: chatId.replacingOccurrences(of: "chatcmpl-", with: "cmpl-")
|
|
362
|
+
legacy["object"] = "text_completion"
|
|
363
|
+
legacy["created"] = chat["created"] ?? Int(Date().timeIntervalSince1970)
|
|
364
|
+
legacy["model"] = chat["model"] ?? modelId
|
|
365
|
+
let choices = (chat["choices"] as? [[String: Any]]) ?? []
|
|
366
|
+
legacy["choices"] = choices.map { c -> [String: Any] in
|
|
367
|
+
let msg = c["message"] as? [String: Any]
|
|
368
|
+
return [
|
|
369
|
+
"text": (msg?["content"] as? String) ?? "",
|
|
370
|
+
"index": c["index"] ?? 0,
|
|
371
|
+
"finish_reason": c["finish_reason"] ?? "stop",
|
|
372
|
+
"logprobs": NSNull(),
|
|
373
|
+
] as [String: Any]
|
|
374
|
+
}
|
|
375
|
+
legacy["usage"] = chat["usage"] ?? ["prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0]
|
|
376
|
+
return legacy
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
/// Adapt a single SSE frame from chat.completion.chunk → text_completion.chunk.
|
|
380
|
+
/// `[DONE]` is forwarded unchanged. Frames that don't parse fall through.
|
|
381
|
+
private func adaptChunkToLegacy(_ chunk: String, model: String) -> String {
|
|
382
|
+
let trimmed = chunk.trimmingCharacters(in: .whitespacesAndNewlines)
|
|
383
|
+
guard trimmed.hasPrefix("data:") else { return chunk }
|
|
384
|
+
let payload = String(trimmed.dropFirst("data:".count)).trimmingCharacters(in: .whitespacesAndNewlines)
|
|
385
|
+
if payload == "[DONE]" { return "data: [DONE]\n\n" }
|
|
386
|
+
guard let data = payload.data(using: .utf8),
|
|
387
|
+
let parsed = try? JSONSerialization.jsonObject(with: data) as? [String: Any] else {
|
|
388
|
+
return chunk
|
|
389
|
+
}
|
|
390
|
+
let chatId = parsed["id"] as? String ?? ""
|
|
391
|
+
let id = chatId.replacingOccurrences(of: "chatcmpl-", with: "cmpl-")
|
|
392
|
+
var legacyChoices: [[String: Any]] = []
|
|
393
|
+
for c in (parsed["choices"] as? [[String: Any]]) ?? [] {
|
|
394
|
+
let delta = c["delta"] as? [String: Any]
|
|
395
|
+
legacyChoices.append([
|
|
396
|
+
"text": (delta?["content"] as? String) ?? "",
|
|
397
|
+
"index": c["index"] ?? 0,
|
|
398
|
+
"finish_reason": c["finish_reason"] ?? NSNull(),
|
|
399
|
+
"logprobs": NSNull(),
|
|
400
|
+
] as [String: Any])
|
|
401
|
+
}
|
|
402
|
+
let legacy: [String: Any] = [
|
|
403
|
+
"id": id,
|
|
404
|
+
"object": "text_completion.chunk",
|
|
405
|
+
"created": parsed["created"] ?? Int(Date().timeIntervalSince1970),
|
|
406
|
+
"model": parsed["model"] ?? model,
|
|
407
|
+
"choices": legacyChoices,
|
|
408
|
+
]
|
|
409
|
+
if let s = serialize(legacy) { return "data: \(s)\n\n" }
|
|
410
|
+
return chunk
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
private extension LlamaHandlers {
|
|
415
|
+
/// Serialize all bridge-touching paths via `bridgeLock` so concurrent
|
|
416
|
+
/// requests can't corrupt the shared `llama_context` KV cache.
|
|
417
|
+
func runOnBridge<T>(_ block: () throws -> T) throws -> T {
|
|
418
|
+
bridgeLock.lock()
|
|
419
|
+
defer { bridgeLock.unlock() }
|
|
420
|
+
return try block()
|
|
421
|
+
}
|
|
422
|
+
}
|