@dvai-bridge/ios-llama-core 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,101 @@
1
+ #import <Foundation/Foundation.h>
2
+
3
+ NS_ASSUME_NONNULL_BEGIN
4
+
5
+ /// Objective-C++ bridge to llama.cpp. Wraps the C API for use from Swift via the
6
+ /// `DVAICapacitorLlamaObjC` module. Owns the `llama_model` and `llama_context`
7
+ /// for the lifetime of a load/unload cycle.
8
+ @interface LlamaCppBridge : NSObject
9
+
10
+ @property (nonatomic, readonly, getter=isLoaded) BOOL loaded;
11
+ @property (nonatomic, readonly, copy, nullable) NSString *currentModelPath;
12
+
13
+ - (instancetype)init;
14
+
15
+ - (BOOL)loadModelAtPath:(NSString *)path
16
+ mmprojPath:(nullable NSString *)mmprojPath
17
+ gpuLayers:(int)gpuLayers
18
+ contextSize:(int)contextSize
19
+ threads:(int)threads
20
+ embeddingMode:(BOOL)embeddingMode
21
+ error:(NSError **)error;
22
+
23
+ - (void)unload;
24
+
25
+ - (NSString *)versionString;
26
+
27
+ /// Greedy-sample `maxTokens` tokens from the loaded model for the given prompt.
28
+ /// `temperature` and `topP` are accepted but currently ignored — this entry point
29
+ /// uses a deterministic greedy sampler for Task 30. Future work (Task 36) will
30
+ /// honour temperature/top-p via additional sampler-chain stages.
31
+ - (nullable NSString *)completePrompt:(NSString *)prompt
32
+ maxTokens:(int)maxTokens
33
+ temperature:(float)temperature
34
+ topP:(float)topP
35
+ error:(NSError **)error;
36
+
37
+ /// Compute an embedding vector for the given text. Requires the model to have
38
+ /// been loaded with `embeddingMode:YES`; otherwise the returned values are
39
+ /// undefined / not meaningful (the handler layer is responsible for the 400
40
+ /// short-circuit before we get here). Returns the per-dimension floats (length
41
+ /// == llama_n_embd(model)) wrapped as `NSNumber` doubles.
42
+ - (nullable NSArray<NSNumber *> *)embedding:(NSString *)text
43
+ error:(NSError **)error;
44
+
45
+ /// Whether a multimodal projector (mmproj) has been loaded for this bridge.
46
+ @property (nonatomic, readonly, getter=isMmprojLoaded) BOOL mmprojLoaded;
47
+
48
+ /// Load a multimodal projector (mmproj). The main model must already be
49
+ /// loaded — the projector is always paired with a text model. Phase 2A Pass 2
50
+ /// wires the real `mtmd_init_from_file()` call.
51
+ ///
52
+ /// `useGPU` controls `mtmd_context_params.use_gpu`. Pass `YES` for production
53
+ /// (Metal-accelerated CLIP / vision-encoder pass on real iPhone). Pass `NO`
54
+ /// in environments where Metal allocation can't accommodate the projector's
55
+ /// position-embedding tensor — most commonly the iOS Simulator, where
56
+ /// `_xpc_shmem_create_with_prot` aborts on tensors >~ 60 MiB. The bridge
57
+ /// uses YES by default. Returns NO on failure (with `error` populated).
58
+ - (BOOL)loadMmprojAtPath:(NSString *)mmprojPath
59
+ useGPU:(BOOL)useGPU
60
+ error:(NSError **)error;
61
+
62
+ /// Convenience wrapper that defaults `useGPU:YES`. Most callers want this.
63
+ - (BOOL)loadMmprojAtPath:(NSString *)mmprojPath
64
+ error:(NSError **)error;
65
+
66
+ /// Unload the multimodal projector. Safe to call when nothing is loaded
67
+ /// (idempotent). Frees the underlying mtmd_context.
68
+ - (void)unloadMmproj;
69
+
70
+ /// Apply a chat template via `llama_chat_apply_template`. If `templateOverride`
71
+ /// is nil/empty, the model's bundled `tokenizer.chat_template` (via
72
+ /// `llama_model_chat_template`) is used. `messages` is an array of
73
+ /// `@{ @"role": @"...", @"content": @"..." }` dicts. Returns the rendered
74
+ /// prompt string with role markers; multimodal callers should pre-populate
75
+ /// `<__media__>` markers in the content fields where image/audio bytes will
76
+ /// splice in.
77
+ - (nullable NSString *)applyChatTemplate:(nullable NSString *)templateOverride
78
+ messages:(NSArray<NSDictionary<NSString *, NSString *> *> *)messages
79
+ addAssistant:(BOOL)addAssistant
80
+ error:(NSError **)error;
81
+
82
+ /// Multimodal completion. `prompt` must contain N `<__media__>` markers
83
+ /// matching the count of `media`. Bytes are auto-detected as image or audio
84
+ /// via `mtmd_helper_bitmap_init_from_buf` (magic bytes); the caller must
85
+ /// supply media in declaration order (i.e. the same order the markers
86
+ /// appear in `prompt`). Returns the generated text. Throws on tokenization
87
+ /// / eval / decode failures.
88
+ - (nullable NSString *)completeMultimodalPrompt:(NSString *)prompt
89
+ media:(NSArray<NSData *> *)mediaInOrder
90
+ maxTokens:(int)maxTokens
91
+ temperature:(float)temperature
92
+ topP:(float)topP
93
+ error:(NSError **)error;
94
+
95
+ /// Returns YES if the loaded model declares an audio encoder (via
96
+ /// `mtmd_support_audio()`). Always NO when no mmproj is loaded.
97
+ - (BOOL)hasAudioEncoder;
98
+
99
+ @end
100
+
101
+ NS_ASSUME_NONNULL_END
@@ -0,0 +1,46 @@
1
+ import XCTest
2
+ @testable import DVAILlamaCore
3
+
4
+ final class AudioDecoderTest: XCTestCase {
5
+ func testPCM16PassThrough() async throws {
6
+ let pcm = try Data(contentsOf: audioFixtureURL("pcm16-1s-16khz-mono.bin"))
7
+ let result = try await AudioDecoder.decode(data: pcm, format: .pcm16)
8
+ XCTAssertEqual(result.count, pcm.count)
9
+ }
10
+
11
+ func testWavToPCM() async throws {
12
+ let wav = try Data(contentsOf: audioFixtureURL("wav-1s-16khz-mono.wav"))
13
+ let result = try await AudioDecoder.decode(data: wav, format: .wav)
14
+ // 1s @ 16kHz mono PCM16 = 32000 bytes (allow ±5%).
15
+ XCTAssertGreaterThan(result.count, 30000)
16
+ XCTAssertLessThan(result.count, 34000)
17
+ }
18
+
19
+ func testM4AToPCM() async throws {
20
+ let m4a = try Data(contentsOf: audioFixtureURL("m4a-1s.m4a"))
21
+ let result = try await AudioDecoder.decode(data: m4a, format: .m4a)
22
+ // M4A AAC 1s decodes to ~32000 bytes; AAC priming may shave a few hundred samples.
23
+ XCTAssertGreaterThan(result.count, 25000)
24
+ XCTAssertLessThan(result.count, 36000)
25
+ }
26
+
27
+ /// Walks up from this test source file until it finds the repo-root
28
+ /// `fixtures/` directory. We don't bundle the fixtures into the SwiftPM
29
+ /// test target's resources because they're shared with Android and Node
30
+ /// tests.
31
+ private func fixturesURL() -> URL {
32
+ var dir = URL(fileURLWithPath: #file).deletingLastPathComponent()
33
+ while !FileManager.default.fileExists(atPath: dir.appendingPathComponent("fixtures").path) {
34
+ let parent = dir.deletingLastPathComponent()
35
+ if parent.path == dir.path {
36
+ fatalError("fixtures dir not found walking up from \(#file)")
37
+ }
38
+ dir = parent
39
+ }
40
+ return dir.appendingPathComponent("fixtures")
41
+ }
42
+
43
+ private func audioFixtureURL(_ name: String) -> URL {
44
+ fixturesURL().appendingPathComponent("audio").appendingPathComponent(name)
45
+ }
46
+ }
@@ -0,0 +1,361 @@
1
+ import XCTest
2
+ @testable import DVAILlamaCore
3
+
4
+ final class ContentPartsTranslatorTest: XCTestCase {
5
+ // MARK: - Mocks
6
+
7
+ /// Image decoder that returns canned bytes per URL. Records every call so
8
+ /// tests can assert which URLs were passed in and in what order.
9
+ final class MockImageDecoder: ImageDecoderProtocol {
10
+ var responses: [String: Data] = [:]
11
+ var calls: [String] = []
12
+ func resolve(url: String) async throws -> Data {
13
+ calls.append(url)
14
+ if let bytes = responses[url] { return bytes }
15
+ return Data([0xDE, 0xAD, 0xBE, 0xEF])
16
+ }
17
+ }
18
+
19
+ /// Audio-decoder closure factory. Records each call's `(bytesIn, format)`
20
+ /// and returns canned PCM bytes.
21
+ final class AudioRecorder {
22
+ var calls: [(Data, AudioFormat)] = []
23
+ var pcmOut: Data = Data([0x11, 0x22, 0x33, 0x44])
24
+ func make() -> (Data, AudioFormat) async throws -> Data {
25
+ { [unowned self] data, format in
26
+ self.calls.append((data, format))
27
+ return self.pcmOut
28
+ }
29
+ }
30
+ }
31
+
32
+ // MARK: - Fixture loader
33
+
34
+ /// Loads `transport-fixtures.json` from the repo-root `fixtures/` dir.
35
+ /// For `CHAT_REQUEST_AUDIO_PCM16` the `data` field carries the literal
36
+ /// `"<replaced-by-loader>"` placeholder; we substitute the base64 of the
37
+ /// PCM16 fixture file before returning.
38
+ private func loadFixture(_ key: String) throws -> [String: Any] {
39
+ let url = fixturesURL().appendingPathComponent("transport-fixtures.json")
40
+ let data = try Data(contentsOf: url)
41
+ guard var root = try JSONSerialization.jsonObject(with: data) as? [String: Any],
42
+ var fixture = root[key] as? [String: Any] else {
43
+ XCTFail("fixture \(key) missing or not an object")
44
+ return [:]
45
+ }
46
+ if key == "CHAT_REQUEST_AUDIO_PCM16" {
47
+ let pcmURL = fixturesURL().appendingPathComponent("audio").appendingPathComponent("pcm16-1s-16khz-mono.bin")
48
+ let pcmBytes = try Data(contentsOf: pcmURL)
49
+ let b64 = pcmBytes.base64EncodedString()
50
+ // Mutate messages[0].content[0].input_audio.data
51
+ if var messages = fixture["messages"] as? [[String: Any]],
52
+ var msg0 = messages.first,
53
+ var parts = msg0["content"] as? [[String: Any]],
54
+ var part0 = parts.first,
55
+ var audio = part0["input_audio"] as? [String: Any] {
56
+ audio["data"] = b64
57
+ part0["input_audio"] = audio
58
+ parts[0] = part0
59
+ msg0["content"] = parts
60
+ messages[0] = msg0
61
+ fixture["messages"] = messages
62
+ root[key] = fixture
63
+ } else {
64
+ XCTFail("CHAT_REQUEST_AUDIO_PCM16 fixture shape unexpected")
65
+ }
66
+ }
67
+ return fixture
68
+ }
69
+
70
+ private func messages(from fixture: [String: Any]) -> [[String: Any]] {
71
+ (fixture["messages"] as? [[String: Any]]) ?? []
72
+ }
73
+
74
+ private func fixturesURL() -> URL {
75
+ var dir = URL(fileURLWithPath: #file).deletingLastPathComponent()
76
+ while !FileManager.default.fileExists(atPath: dir.appendingPathComponent("fixtures").path) {
77
+ let parent = dir.deletingLastPathComponent()
78
+ if parent.path == dir.path {
79
+ fatalError("fixtures dir not found walking up from \(#file)")
80
+ }
81
+ dir = parent
82
+ }
83
+ return dir.appendingPathComponent("fixtures")
84
+ }
85
+
86
+ // MARK: - Happy paths (driven by transport-fixtures.json)
87
+
88
+ /// `CHAT_REQUEST_TEXT` — the legacy string-content shape produces a prompt
89
+ /// with the user text and no media collateral.
90
+ func testTextOnlyMessage() async throws {
91
+ let fixture = try loadFixture("CHAT_REQUEST_TEXT")
92
+ let translator = ContentPartsTranslator(mmprojLoaded: false, modelHasAudioEncoder: false)
93
+ let result = try await translator.translate(messages: messages(from: fixture))
94
+ XCTAssertEqual(result.prompt, "hi")
95
+ XCTAssertTrue(result.media.isEmpty)
96
+ XCTAssertEqual(result.messagesWithMarkers.count, 1)
97
+ XCTAssertEqual(result.messagesWithMarkers[0].role, "user")
98
+ XCTAssertEqual(result.messagesWithMarkers[0].content, "hi")
99
+ }
100
+
101
+ /// `CHAT_REQUEST_IMAGE` — text + data-URL image. The image part should be
102
+ /// resolved via the (mocked) ImageDecoder and the bytes appended to
103
+ /// `media`. The rendered content for that message has a single
104
+ /// `<__media__>` marker substituted in place of the image part.
105
+ func testTextPlusImage() async throws {
106
+ let fixture = try loadFixture("CHAT_REQUEST_IMAGE")
107
+ let mock = MockImageDecoder()
108
+ let cannedPng = Data([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x99])
109
+ // Prefix-match any data: URL by snapping it after we observe it; here
110
+ // we just set a default in `responses` keyed off the actual URL once
111
+ // we know it from the fixture.
112
+ let urlFromFixture: String = {
113
+ let parts = (((fixture["messages"] as? [[String: Any]])?[0])?["content"] as? [[String: Any]]) ?? []
114
+ return ((parts.first(where: { ($0["type"] as? String) == "image_url" })?["image_url"] as? [String: Any])?["url"] as? String) ?? ""
115
+ }()
116
+ mock.responses[urlFromFixture] = cannedPng
117
+
118
+ let translator = ContentPartsTranslator(
119
+ mmprojLoaded: true,
120
+ modelHasAudioEncoder: false,
121
+ imageDecoder: mock
122
+ )
123
+ let result = try await translator.translate(messages: messages(from: fixture))
124
+ XCTAssertEqual(result.prompt, "What is in this image?")
125
+ XCTAssertEqual(result.media.count, 1)
126
+ XCTAssertEqual(result.media[0], cannedPng)
127
+ XCTAssertEqual(mock.calls, [urlFromFixture])
128
+ // Marker count in rendered content == media count.
129
+ let markerCount = result.messagesWithMarkers
130
+ .map { $0.content.components(separatedBy: MTMD_MEDIA_MARKER).count - 1 }
131
+ .reduce(0, +)
132
+ XCTAssertEqual(markerCount, 1)
133
+ }
134
+
135
+ /// `CHAT_REQUEST_AUDIO_PCM16` — base64-encoded audio + text. The base64
136
+ /// payload is decoded and the **raw bytes** land in `media` unchanged;
137
+ /// mtmd does its own format detection downstream via miniaudio, so the
138
+ /// translator no longer routes audio through `AudioDecoder`. The
139
+ /// `audioDecoder` collaborator is wired up but should not be invoked.
140
+ func testAudioPCM16PlusText() async throws {
141
+ let fixture = try loadFixture("CHAT_REQUEST_AUDIO_PCM16")
142
+ let recorder = AudioRecorder()
143
+ let translator = ContentPartsTranslator(
144
+ mmprojLoaded: false,
145
+ modelHasAudioEncoder: true,
146
+ audioDecoder: recorder.make()
147
+ )
148
+ let result = try await translator.translate(messages: messages(from: fixture))
149
+ XCTAssertEqual(result.prompt, "Transcribe this.")
150
+ XCTAssertEqual(result.media.count, 1)
151
+ // `media[0]` should be the raw base64-decoded bytes (i.e. the
152
+ // contents of the PCM fixture file as-is) — NOT the canned
153
+ // `recorder.pcmOut`, because the translator no longer routes audio
154
+ // through the decoder closure.
155
+ let pcmFile = try Data(contentsOf: fixturesURL().appendingPathComponent("audio").appendingPathComponent("pcm16-1s-16khz-mono.bin"))
156
+ XCTAssertEqual(result.media[0], pcmFile)
157
+ XCTAssertEqual(recorder.calls.count, 0, "audioDecoder must not be called on the production path; mtmd handles decode itself")
158
+ let markerCount = result.messagesWithMarkers
159
+ .map { $0.content.components(separatedBy: MTMD_MEDIA_MARKER).count - 1 }
160
+ .reduce(0, +)
161
+ XCTAssertEqual(markerCount, 1)
162
+ }
163
+
164
+ /// Interleaved `[text, image, text, audio, text]` → media list preserves
165
+ /// declaration order (image first, then audio); rendered content has
166
+ /// exactly two `<__media__>` markers in the right positions. After the
167
+ /// audio-path fix, `media[1]` is the raw base64-decoded audio bytes
168
+ /// (mtmd handles format detection downstream); the audio-decoder
169
+ /// collaborator must not be invoked.
170
+ func testInterleavedTextImageAudio() async throws {
171
+ let imageMock = MockImageDecoder()
172
+ let imageBytes = Data([0xAA, 0xBB, 0xCC])
173
+ imageMock.responses["data:image/png;base64,AAAA"] = imageBytes
174
+ let audioRecorder = AudioRecorder()
175
+ audioRecorder.pcmOut = Data([0x55, 0x66, 0x77])
176
+ let translator = ContentPartsTranslator(
177
+ mmprojLoaded: true,
178
+ modelHasAudioEncoder: true,
179
+ imageDecoder: imageMock,
180
+ audioDecoder: audioRecorder.make()
181
+ )
182
+ let messages: [[String: Any]] = [[
183
+ "role": "user",
184
+ "content": [
185
+ ["type": "text", "text": "before"],
186
+ ["type": "image_url", "image_url": ["url": "data:image/png;base64,AAAA"]] as [String: Any],
187
+ ["type": "text", "text": "between"],
188
+ ["type": "input_audio", "input_audio": ["data": "AAAA", "format": "pcm16"]] as [String: Any],
189
+ ["type": "text", "text": "after"],
190
+ ],
191
+ ]]
192
+ let result = try await translator.translate(messages: messages)
193
+ XCTAssertEqual(result.media.count, 2)
194
+ XCTAssertEqual(result.media[0], imageBytes, "image must come first in declaration order")
195
+ // `"AAAA"` base64-decoded is three zero bytes — that's what mtmd sees.
196
+ XCTAssertEqual(result.media[1], Data([0x00, 0x00, 0x00]), "audio bytes are the raw base64-decoded payload")
197
+ XCTAssertEqual(audioRecorder.calls.count, 0, "audioDecoder must not be invoked on the production path")
198
+ XCTAssertEqual(result.messagesWithMarkers.count, 1)
199
+ let content = result.messagesWithMarkers[0].content
200
+ let markerCount = content.components(separatedBy: MTMD_MEDIA_MARKER).count - 1
201
+ XCTAssertEqual(markerCount, 2)
202
+ // First marker should appear after "before" and before "between";
203
+ // second after "between" and before "after".
204
+ let firstMarker = content.range(of: MTMD_MEDIA_MARKER)!
205
+ let secondMarker = content.range(of: MTMD_MEDIA_MARKER, range: firstMarker.upperBound..<content.endIndex)!
206
+ let beforeRange = content.range(of: "before")!
207
+ let betweenRange = content.range(of: "between")!
208
+ let afterRange = content.range(of: "after")!
209
+ XCTAssertLessThan(beforeRange.upperBound, firstMarker.lowerBound)
210
+ XCTAssertLessThan(firstMarker.upperBound, betweenRange.lowerBound)
211
+ XCTAssertLessThan(betweenRange.upperBound, secondMarker.lowerBound)
212
+ XCTAssertLessThan(secondMarker.upperBound, afterRange.lowerBound)
213
+ }
214
+
215
+ // MARK: - Negative paths
216
+
217
+ /// Image part with `mmprojLoaded == false` → `noMmprojForImage`. The
218
+ /// translator must throw before even consulting the image decoder.
219
+ func testImageWithoutMmprojThrows() async {
220
+ let messages: [[String: Any]] = [[
221
+ "role": "user",
222
+ "content": [
223
+ ["type": "image_url", "image_url": ["url": "data:image/png;base64,AAAA"]]
224
+ ],
225
+ ]]
226
+ let mock = MockImageDecoder()
227
+ let translator = ContentPartsTranslator(
228
+ mmprojLoaded: false,
229
+ modelHasAudioEncoder: false,
230
+ imageDecoder: mock
231
+ )
232
+ do {
233
+ _ = try await translator.translate(messages: messages)
234
+ XCTFail("expected noMmprojForImage")
235
+ } catch TranslatorError.noMmprojForImage {
236
+ XCTAssertTrue(mock.calls.isEmpty, "translator should not invoke decoder when mmproj is missing")
237
+ } catch {
238
+ XCTFail("unexpected error: \(error)")
239
+ }
240
+ }
241
+
242
+ /// Audio part with `modelHasAudioEncoder == false` → `audioWithoutAudioEncoder`.
243
+ func testAudioWithoutEncoderThrows() async {
244
+ let messages: [[String: Any]] = [[
245
+ "role": "user",
246
+ "content": [
247
+ ["type": "input_audio", "input_audio": ["data": "AAAA", "format": "pcm16"]]
248
+ ],
249
+ ]]
250
+ let translator = ContentPartsTranslator(mmprojLoaded: false, modelHasAudioEncoder: false)
251
+ do {
252
+ _ = try await translator.translate(messages: messages)
253
+ XCTFail("expected audioWithoutAudioEncoder")
254
+ } catch TranslatorError.audioWithoutAudioEncoder {
255
+ // expected
256
+ } catch {
257
+ XCTFail("unexpected error: \(error)")
258
+ }
259
+ }
260
+
261
+ /// Unsupported audio format (e.g. `vorbis`) → `unsupportedAudioFormat`
262
+ /// with the offending format echoed back and the supported list filled in.
263
+ func testUnsupportedAudioFormatThrows() async {
264
+ let messages: [[String: Any]] = [[
265
+ "role": "user",
266
+ "content": [
267
+ ["type": "input_audio", "input_audio": ["data": "AAAA", "format": "vorbis"]]
268
+ ],
269
+ ]]
270
+ let translator = ContentPartsTranslator(mmprojLoaded: false, modelHasAudioEncoder: true)
271
+ do {
272
+ _ = try await translator.translate(messages: messages)
273
+ XCTFail("expected unsupportedAudioFormat")
274
+ } catch let TranslatorError.unsupportedAudioFormat(fmt, supported) {
275
+ XCTAssertEqual(fmt, "vorbis")
276
+ XCTAssertEqual(supported, ContentPartsTranslator.supportedAudioFormats)
277
+ XCTAssertTrue(supported.contains("flac"), "iOS supported list should include flac")
278
+ } catch {
279
+ XCTFail("unexpected error: \(error)")
280
+ }
281
+ }
282
+
283
+ /// Unknown content part type → `malformedRequest` with the offending type
284
+ /// echoed in the message.
285
+ func testUnknownContentPartTypeThrows() async {
286
+ let messages: [[String: Any]] = [[
287
+ "role": "user",
288
+ "content": [
289
+ ["type": "video_url", "video_url": ["url": "https://example.com/v.mp4"]]
290
+ ],
291
+ ]]
292
+ let translator = ContentPartsTranslator(mmprojLoaded: true, modelHasAudioEncoder: true)
293
+ do {
294
+ _ = try await translator.translate(messages: messages)
295
+ XCTFail("expected malformedRequest")
296
+ } catch let TranslatorError.malformedRequest(reason) {
297
+ XCTAssertTrue(reason.contains("video_url"), "expected reason to mention offending type, got: \(reason)")
298
+ } catch {
299
+ XCTFail("unexpected error: \(error)")
300
+ }
301
+ }
302
+
303
+ /// Empty `input_audio.data` → `malformedRequest`. The audio decoder must
304
+ /// not be invoked — this is a request-shape error caught before decode.
305
+ func testEmptyAudioDataThrowsMalformedRequest() async {
306
+ let translator = ContentPartsTranslator(
307
+ mmprojLoaded: false,
308
+ modelHasAudioEncoder: true,
309
+ imageDecoder: MockImageDecoder(),
310
+ audioDecoder: { _, _ in
311
+ XCTFail("audio decoder should not be invoked for empty data")
312
+ return Data()
313
+ }
314
+ )
315
+ let messages: [[String: Any]] = [[
316
+ "role": "user",
317
+ "content": [[
318
+ "type": "input_audio",
319
+ "input_audio": ["data": "", "format": "pcm16"]
320
+ ]]
321
+ ]]
322
+ do {
323
+ _ = try await translator.translate(messages: messages)
324
+ XCTFail("Expected throw")
325
+ } catch TranslatorError.malformedRequest {
326
+ // OK
327
+ } catch {
328
+ XCTFail("Unexpected: \(error)")
329
+ }
330
+ }
331
+
332
+ /// Malformed base64 in `input_audio.data` → `malformedRequest` (not
333
+ /// `audioDecodeFailed`). The audio decoder never runs — this is a
334
+ /// pre-decode request-shape error.
335
+ func testMalformedBase64ThrowsMalformedRequest() async {
336
+ let translator = ContentPartsTranslator(
337
+ mmprojLoaded: false,
338
+ modelHasAudioEncoder: true,
339
+ imageDecoder: MockImageDecoder(),
340
+ audioDecoder: { _, _ in
341
+ XCTFail("audio decoder should not be invoked for invalid base64")
342
+ return Data()
343
+ }
344
+ )
345
+ let messages: [[String: Any]] = [[
346
+ "role": "user",
347
+ "content": [[
348
+ "type": "input_audio",
349
+ "input_audio": ["data": "!!!not-valid-base64!!!", "format": "pcm16"]
350
+ ]]
351
+ ]]
352
+ do {
353
+ _ = try await translator.translate(messages: messages)
354
+ XCTFail("Expected throw")
355
+ } catch TranslatorError.malformedRequest {
356
+ // OK
357
+ } catch {
358
+ XCTFail("Unexpected: \(error)")
359
+ }
360
+ }
361
+ }
@@ -0,0 +1,139 @@
1
+ import XCTest
2
+ @testable import DVAILlamaCore
3
+
4
+ final class ImageDecoderTest: XCTestCase {
5
+ /// `data:image/png;base64,...` round-trips to bytes whose first 8 bytes
6
+ /// are the canonical PNG magic header.
7
+ func testDataURLBase64() async throws {
8
+ let url = try String(contentsOf: imageFixtureURL("tiny-test-base64.txt"), encoding: .utf8)
9
+ .trimmingCharacters(in: .whitespacesAndNewlines)
10
+ let bytes = try await ImageDecoder.resolve(url: url)
11
+ XCTAssertEqual(
12
+ Array(bytes.prefix(8)),
13
+ [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A],
14
+ "expected PNG magic header"
15
+ )
16
+ }
17
+
18
+ /// `file://` URLs return the raw bytes off disk.
19
+ func testFileURL() async throws {
20
+ let pngURL = imageFixtureURL("tiny-test.png")
21
+ let result = try await ImageDecoder.resolve(url: pngURL.absoluteString)
22
+ let raw = try Data(contentsOf: pngURL)
23
+ XCTAssertEqual(result, raw)
24
+ }
25
+
26
+ /// Unsupported schemes throw `invalidScheme`.
27
+ func testInvalidScheme() async {
28
+ do {
29
+ _ = try await ImageDecoder.resolve(url: "ftp://example.com/x.png")
30
+ XCTFail("Expected throw")
31
+ } catch ImageSourceError.invalidScheme {
32
+ // expected
33
+ } catch {
34
+ XCTFail("Unexpected error type: \(error)")
35
+ }
36
+ }
37
+
38
+ /// `data:` URL with no comma → `malformedDataURL`.
39
+ func testMalformedDataURL() async {
40
+ do {
41
+ _ = try await ImageDecoder.resolve(url: "data:image/png;base64")
42
+ XCTFail("Expected throw")
43
+ } catch ImageSourceError.malformedDataURL {
44
+ // expected
45
+ } catch {
46
+ XCTFail("Unexpected error type: \(error)")
47
+ }
48
+ }
49
+
50
+ /// `https://` URL fetches response body bytes verbatim. Mocked at the
51
+ /// URLSession layer via `URLProtocol.registerClass` so no real network
52
+ /// is touched.
53
+ func testHTTPSFetchesBytes() async throws {
54
+ let payload = try Data(contentsOf: imageFixtureURL("tiny-test.png"))
55
+ URLProtocol.registerClass(MockURLProtocol.self)
56
+ defer {
57
+ URLProtocol.unregisterClass(MockURLProtocol.self)
58
+ MockURLProtocol.handler = nil
59
+ }
60
+ MockURLProtocol.handler = { request in
61
+ let response = HTTPURLResponse(
62
+ url: request.url!,
63
+ statusCode: 200,
64
+ httpVersion: "HTTP/1.1",
65
+ headerFields: nil
66
+ )!
67
+ return (response, payload)
68
+ }
69
+
70
+ let bytes = try await ImageDecoder.resolve(url: "https://example.invalid/img.png")
71
+ XCTAssertEqual(bytes, payload)
72
+ }
73
+
74
+ /// HTTP non-2xx → `ImageSourceError.httpError(status:)` carrying the code.
75
+ func testHTTPErrorThrowsHttpError() async {
76
+ URLProtocol.registerClass(MockURLProtocol.self)
77
+ defer {
78
+ URLProtocol.unregisterClass(MockURLProtocol.self)
79
+ MockURLProtocol.handler = nil
80
+ }
81
+ MockURLProtocol.handler = { request in
82
+ let response = HTTPURLResponse(
83
+ url: request.url!,
84
+ statusCode: 404,
85
+ httpVersion: "HTTP/1.1",
86
+ headerFields: nil
87
+ )!
88
+ return (response, Data())
89
+ }
90
+
91
+ do {
92
+ _ = try await ImageDecoder.resolve(url: "https://example.invalid/missing.png")
93
+ XCTFail("Expected throw")
94
+ } catch ImageSourceError.httpError(let status) {
95
+ XCTAssertEqual(status, 404)
96
+ } catch {
97
+ XCTFail("Unexpected error type: \(error)")
98
+ }
99
+ }
100
+
101
+ /// Walks up from this test source file until it finds the repo-root
102
+ /// `fixtures/` directory — same pattern as `AudioDecoderTest`.
103
+ private func imageFixtureURL(_ name: String) -> URL {
104
+ var dir = URL(fileURLWithPath: #file).deletingLastPathComponent()
105
+ while !FileManager.default.fileExists(atPath: dir.appendingPathComponent("fixtures").path) {
106
+ let parent = dir.deletingLastPathComponent()
107
+ if parent.path == dir.path {
108
+ fatalError("fixtures dir not found walking up from \(#file)")
109
+ }
110
+ dir = parent
111
+ }
112
+ return dir.appendingPathComponent("fixtures").appendingPathComponent("images").appendingPathComponent(name)
113
+ }
114
+ }
115
+
116
+ /// In-process `URLProtocol` stub that intercepts every URLSession request
117
+ /// and dispatches it to a per-test handler. Registered globally via
118
+ /// `URLProtocol.registerClass`, which `URLSession.shared` consults — so the
119
+ /// production code under test (which uses `URLSession.shared`) is exercised
120
+ /// without any actual network I/O.
121
+ private final class MockURLProtocol: URLProtocol {
122
+ static var handler: ((URLRequest) -> (HTTPURLResponse, Data))?
123
+
124
+ override class func canInit(with request: URLRequest) -> Bool { true }
125
+ override class func canonicalRequest(for request: URLRequest) -> URLRequest { request }
126
+
127
+ override func startLoading() {
128
+ guard let handler = MockURLProtocol.handler else {
129
+ client?.urlProtocol(self, didFailWithError: NSError(domain: "MockURLProtocol", code: -1))
130
+ return
131
+ }
132
+ let (response, data) = handler(request)
133
+ client?.urlProtocol(self, didReceive: response, cacheStoragePolicy: .notAllowed)
134
+ client?.urlProtocol(self, didLoad: data)
135
+ client?.urlProtocolDidFinishLoading(self)
136
+ }
137
+
138
+ override func stopLoading() {}
139
+ }