@dvai-bridge/capacitor-llama 4.0.0 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,412 +1,429 @@
1
- // Tests/DVAICapacitorLlamaTests/RealModelSmokeTest.swift
2
- //
3
- // End-to-end smoke test against a small public GGUF model. Verifies
4
- // mechanics (download → load → respond → free) only, not output quality.
5
- //
6
- // The test reads `SMOKE_MODEL_URL` and `SMOKE_MODEL_SHA256` from the
7
- // process environment. When either is missing, it skips cleanly via
8
- // `XCTSkip`, so this file is safe to compile and run locally even
9
- // without those env vars set.
10
- //
11
- // On the self-hosted Mac runner the workflow forwards the secrets to
12
- // the simulator via `SIMCTL_CHILD_SMOKE_MODEL_URL=...` (xcodebuild's
13
- // documented mechanism for env vars to reach the simulator-hosted
14
- // XCTest process).
15
-
16
- import XCTest
17
- import DVAILlamaCore
18
- import DVAILlamaCoreObjC
19
-
20
- /// Unbuffered breadcrumb. NSLog flushes per call to stderr / oslog, so
21
- /// even if the test process dies mid-step (jetsam SIGKILL on simulator,
22
- /// for example) the most recent step still appears in xcresult /
23
- /// `log show`. Plain `print(...)` buffers on stdout and silently
24
- /// disappears when the process is killed.
25
- @inline(__always)
26
- fileprivate func smokeStep(_ msg: String) {
27
- NSLog("DVAI-SMOKE: %@", msg)
28
- }
29
-
30
- final class RealModelSmokeTest: XCTestCase {
31
- private var tempDir: URL!
32
- private var bridge: LlamaCppBridge?
33
-
34
- /// Vision + audio smoke involves downloading a 5 GB GGUF + 557 MB
35
- /// mmproj plus loading both into the simulator's Metal context and
36
- /// running an eval pass. The combined runtime can easily exceed
37
- /// Xcode's default 10-minute per-test allowance, after which xctest
38
- /// kills and "Restarts" the test bundle. We ask for 45 minutes per
39
- /// test to absorb slow networks + model load + first-Metal-shader
40
- /// compile.
41
- override class var defaultTestSuite: XCTestSuite {
42
- let suite = super.defaultTestSuite
43
- for case let testCase as XCTestCase in suite.tests {
44
- testCase.executionTimeAllowance = 45 * 60
45
- }
46
- return suite
47
- }
48
-
49
- override func setUpWithError() throws {
50
- let base = FileManager.default.temporaryDirectory
51
- .appendingPathComponent("dvai-smoke-\(UUID().uuidString)")
52
- try FileManager.default.createDirectory(at: base, withIntermediateDirectories: true)
53
- tempDir = base
54
- }
55
-
56
- override func tearDownWithError() throws {
57
- bridge?.unload()
58
- bridge = nil
59
- if let tempDir { try? FileManager.default.removeItem(at: tempDir) }
60
- tempDir = nil
61
- }
62
-
63
- func testSmokeRealModelEndToEnd() async throws {
64
- let env = Self.loadSmokeEnv()
65
- guard let urlStr = env["SMOKE_MODEL_URL"], !urlStr.isEmpty,
66
- let sha = env["SMOKE_MODEL_SHA256"], !sha.isEmpty,
67
- let url = URL(string: urlStr)
68
- else {
69
- throw XCTSkip("SMOKE_MODEL_URL/SMOKE_MODEL_SHA256 not set in env; skipping real-model smoke")
70
- }
71
-
72
- // Generous timeout for the 800 MB download + 1B-param load.
73
- let downloader = ModelDownloader(cacheDirOverride: tempDir)
74
- let result = try await downloader.downloadModel(
75
- url: url,
76
- expectedSha256: sha.lowercased(),
77
- destFilename: "smoke-model.gguf",
78
- headers: [:],
79
- onProgress: { _, _ in /* no-op for smoke */ }
80
- )
81
-
82
- XCTAssertFalse(result.cached, "first download into a fresh temp dir should not be cached")
83
- XCTAssertTrue(
84
- FileManager.default.fileExists(atPath: result.path),
85
- "downloaded file should exist at \(result.path)"
86
- )
87
-
88
- let bridge = LlamaCppBridge()
89
- self.bridge = bridge
90
- try bridge.loadModel(
91
- atPath: result.path,
92
- mmprojPath: nil,
93
- gpuLayers: 99,
94
- contextSize: 2048,
95
- threads: 4,
96
- embeddingMode: false
97
- )
98
- XCTAssertTrue(bridge.isLoaded, "model should be loaded after loadModel(...) returns")
99
-
100
- let completion = try bridge.completePrompt(
101
- "<|begin_of_text|>What is 2+2?",
102
- maxTokens: 32,
103
- temperature: 0.0,
104
- topP: 1.0
105
- )
106
- // Don't assert specific content — that's quality testing, not smoke.
107
- XCTAssertFalse(completion.isEmpty, "completion should not be empty")
108
- }
109
-
110
- /// Vision smoke: download model + mmproj, load both, run a chat
111
- /// completion against the tiny test image fixture. Skips cleanly if any
112
- /// of SMOKE_VISION_MODEL_URL / SMOKE_VISION_MODEL_SHA256 /
113
- /// SMOKE_VISION_MMPROJ_URL / SMOKE_VISION_MMPROJ_SHA256 are unset.
114
- func testSmokeVisionEndToEnd() async throws {
115
- let env = Self.loadSmokeEnv()
116
- guard let modelUrlStr = env["SMOKE_VISION_MODEL_URL"], !modelUrlStr.isEmpty,
117
- let modelSha = env["SMOKE_VISION_MODEL_SHA256"], !modelSha.isEmpty,
118
- let mmprojUrlStr = env["SMOKE_VISION_MMPROJ_URL"], !mmprojUrlStr.isEmpty,
119
- let mmprojSha = env["SMOKE_VISION_MMPROJ_SHA256"], !mmprojSha.isEmpty,
120
- let modelUrl = URL(string: modelUrlStr),
121
- let mmprojUrl = URL(string: mmprojUrlStr)
122
- else {
123
- throw XCTSkip("SMOKE_VISION_* env vars not all set; skipping vision smoke")
124
- }
125
-
126
- smokeStep("vision: downloading main model")
127
- let downloader = ModelDownloader(cacheDirOverride: tempDir)
128
- let modelResult = try await downloader.downloadModel(
129
- url: modelUrl,
130
- expectedSha256: modelSha.lowercased(),
131
- destFilename: "smoke-vision-model.gguf",
132
- headers: [:],
133
- onProgress: { _, _ in /* no-op for smoke */ }
134
- )
135
- XCTAssertTrue(FileManager.default.fileExists(atPath: modelResult.path))
136
- smokeStep("vision: model downloaded; downloading mmproj")
137
- let mmprojResult = try await downloader.downloadModel(
138
- url: mmprojUrl,
139
- expectedSha256: mmprojSha.lowercased(),
140
- destFilename: "smoke-vision-mmproj.gguf",
141
- headers: [:],
142
- onProgress: { _, _ in /* no-op for smoke */ }
143
- )
144
- XCTAssertTrue(FileManager.default.fileExists(atPath: mmprojResult.path))
145
- smokeStep("vision: mmproj downloaded")
146
-
147
- let bridge = LlamaCppBridge()
148
- self.bridge = bridge
149
- // gpuLayers=0 on simulator: same MTLSimDevice allocation cap that
150
- // hits the mmproj also bites the main model when mtmd_helper_decode
151
- // builds image-embedding tensors and llama_decode runs them on
152
- // Metal. Falling back to CPU for the main model avoids the abort.
153
- // Real iPhone hardware uses Metal end-to-end → gpuLayers=99 is the
154
- // production default.
155
- #if targetEnvironment(simulator)
156
- let mainGPULayers: Int32 = 0
157
- #else
158
- let mainGPULayers: Int32 = 99
159
- #endif
160
- smokeStep("vision: loading main model (gpuLayers=\(mainGPULayers))")
161
- try bridge.loadModel(
162
- atPath: modelResult.path,
163
- mmprojPath: nil,
164
- // Smoke: small context to keep KV-cache memory well under
165
- // the simulator's per-process budget. We sample at most 32
166
- // tokens, so 1024 leaves plenty of headroom for the prompt
167
- // + image chunk + completion without paging.
168
- gpuLayers: mainGPULayers,
169
- contextSize: 1024,
170
- threads: 4,
171
- embeddingMode: false
172
- )
173
- XCTAssertTrue(bridge.isLoaded)
174
- smokeStep("vision: main model loaded")
175
- // useGPU=false on simulator: iOS Simulator's MTLSimDevice aborts in
176
- // _xpc_shmem_create_with_prot when CLIP tries to allocate the
177
- // ~60 MiB position-embedding tensor (gemma4v has shape [768, 10240, 2]).
178
- // CPU-only projection is slow but lets the smoke run end-to-end.
179
- // Real iPhone hardware uses Metal without issue useGPU=true is the
180
- // production default.
181
- #if targetEnvironment(simulator)
182
- let useGPUForMmproj = false
183
- #else
184
- let useGPUForMmproj = true
185
- #endif
186
- smokeStep("vision: loading mmproj (useGPU=\(useGPUForMmproj))")
187
- try bridge.loadMmproj(atPath: mmprojResult.path, useGPU: useGPUForMmproj)
188
- XCTAssertTrue(bridge.isMmprojLoaded)
189
- smokeStep("vision: mmproj loaded")
190
-
191
- // Read the smoke PNG fixture. tiny-test.png is a 256x256 image with
192
- // three primary-colour squares + a yellow ellipse picked so a
193
- // captioner has unambiguous content to describe (a blank canvas
194
- // tends to make Gemma 4 emit `<end_of_turn>` as its first sample,
195
- // which the greedy sampler treats as a clean exit and returns "").
196
- // Regenerate via `scripts/generate-image-fixtures.sh`.
197
- let imageURL = fixturesURL().appendingPathComponent("images").appendingPathComponent("tiny-test.png")
198
- let imageData = try Data(contentsOf: imageURL)
199
-
200
- // Build a marker-bearing prompt and apply the model's chat template.
201
- // Gemma 4's published GGUFs at ggml-org/gemma-4-E2B-it-GGUF do not
202
- // embed a tokenizer.chat_template that llama.cpp's heuristic
203
- // recognizes, so passing nil here produces error 41 ("model has no
204
- // chat template and none provided"). Production developers using
205
- // capacitor-llama are expected to supply their model's template at
206
- // start time; for smoke purposes we hardcode Gemma's published
207
- // chat-template format inline.
208
- let gemmaTemplate = """
209
- {% for m in messages %}<start_of_turn>{% if m.role == 'assistant' %}model{% else %}{{ m.role }}{% endif %}
210
- {{ m.content }}<end_of_turn>
211
- {% endfor %}{% if add_generation_prompt %}<start_of_turn>model
212
- {% endif %}
213
- """
214
- let messages: [[String: String]] = [
215
- ["role": "user", "content": "Describe this image: \(MTMD_MEDIA_MARKER)"]
216
- ]
217
- let chatPrompt = try bridge.applyChatTemplate(gemmaTemplate, messages: messages, addAssistant: true)
218
- XCTAssertFalse(chatPrompt.isEmpty)
219
- smokeStep("vision: chat template applied; running multimodal eval")
220
-
221
- let completion = try bridge.completeMultimodalPrompt(
222
- chatPrompt,
223
- media: [imageData],
224
- maxTokens: 32,
225
- temperature: 0.0,
226
- topP: 1.0
227
- )
228
- smokeStep("vision: eval done completion=\(completion.prefix(80))")
229
- XCTAssertFalse(completion.isEmpty, "vision completion should not be empty")
230
- }
231
-
232
- /// Audio smoke: same as vision, but with the WAV fixture instead of PNG.
233
- /// mtmd's `mtmd_helper_bitmap_init_from_buf` accepts wav/mp3/flac for
234
- /// audio (per mtmd-helper.h docs). Skips when the model declared no
235
- /// audio encoder (e.g. vision-only mmproj).
236
- func testSmokeAudioEndToEnd() async throws {
237
- let env = Self.loadSmokeEnv()
238
- guard let modelUrlStr = env["SMOKE_VISION_MODEL_URL"], !modelUrlStr.isEmpty,
239
- let modelSha = env["SMOKE_VISION_MODEL_SHA256"], !modelSha.isEmpty,
240
- let mmprojUrlStr = env["SMOKE_VISION_MMPROJ_URL"], !mmprojUrlStr.isEmpty,
241
- let mmprojSha = env["SMOKE_VISION_MMPROJ_SHA256"], !mmprojSha.isEmpty,
242
- let modelUrl = URL(string: modelUrlStr),
243
- let mmprojUrl = URL(string: mmprojUrlStr)
244
- else {
245
- throw XCTSkip("SMOKE_VISION_* env vars not all set; skipping audio smoke")
246
- }
247
-
248
- smokeStep("audio: downloading main model")
249
- let downloader = ModelDownloader(cacheDirOverride: tempDir)
250
- let modelResult = try await downloader.downloadModel(
251
- url: modelUrl,
252
- expectedSha256: modelSha.lowercased(),
253
- destFilename: "smoke-audio-model.gguf",
254
- headers: [:],
255
- onProgress: { _, _ in /* no-op for smoke */ }
256
- )
257
- smokeStep("audio: model downloaded; downloading mmproj")
258
- let mmprojResult = try await downloader.downloadModel(
259
- url: mmprojUrl,
260
- expectedSha256: mmprojSha.lowercased(),
261
- destFilename: "smoke-audio-mmproj.gguf",
262
- headers: [:],
263
- onProgress: { _, _ in /* no-op for smoke */ }
264
- )
265
- smokeStep("audio: mmproj downloaded")
266
-
267
- let bridge = LlamaCppBridge()
268
- self.bridge = bridge
269
- // gpuLayers=0 on simulator: same MTLSimDevice allocation cap that
270
- // hits the mmproj also bites the main model when mtmd_helper_decode
271
- // builds audio-embedding tensors and llama_decode runs them on
272
- // Metal. Falling back to CPU for the main model avoids the abort.
273
- #if targetEnvironment(simulator)
274
- let mainGPULayers: Int32 = 0
275
- #else
276
- let mainGPULayers: Int32 = 99
277
- #endif
278
- smokeStep("audio: loading main model (gpuLayers=\(mainGPULayers))")
279
- try bridge.loadModel(
280
- atPath: modelResult.path,
281
- mmprojPath: nil,
282
- gpuLayers: mainGPULayers,
283
- contextSize: 1024,
284
- threads: 4,
285
- embeddingMode: false
286
- )
287
- smokeStep("audio: main model loaded")
288
- // useGPU=false on simulator: iOS Simulator's MTLSimDevice aborts in
289
- // _xpc_shmem_create_with_prot when CLIP tries to allocate the
290
- // ~60 MiB position-embedding tensor (gemma4v has shape [768, 10240, 2]).
291
- // CPU-only projection is slow but lets the smoke run end-to-end.
292
- // Real iPhone hardware uses Metal without issue → useGPU=true is the
293
- // production default.
294
- #if targetEnvironment(simulator)
295
- let useGPUForMmproj = false
296
- #else
297
- let useGPUForMmproj = true
298
- #endif
299
- smokeStep("audio: loading mmproj (useGPU=\(useGPUForMmproj))")
300
- try bridge.loadMmproj(atPath: mmprojResult.path, useGPU: useGPUForMmproj)
301
- smokeStep("audio: mmproj loaded")
302
-
303
- // Skip cleanly if the loaded mmproj has no audio encoder (e.g. when
304
- // SMOKE_VISION_* points at a vision-only projector).
305
- guard bridge.hasAudioEncoder() else {
306
- throw XCTSkip("Loaded mmproj reports no audio encoder; skipping audio smoke")
307
- }
308
- smokeStep("audio: hasAudioEncoder=true; running multimodal eval")
309
-
310
- let audioURL = fixturesURL().appendingPathComponent("audio").appendingPathComponent("wav-1s-16khz-mono.wav")
311
- let audioData = try Data(contentsOf: audioURL)
312
-
313
- // Same Gemma chat template as the vision test — Gemma 4 GGUFs at
314
- // ggml-org don't ship a llama.cpp-recognized chat_template.
315
- let gemmaTemplate = """
316
- {% for m in messages %}<start_of_turn>{% if m.role == 'assistant' %}model{% else %}{{ m.role }}{% endif %}
317
- {{ m.content }}<end_of_turn>
318
- {% endfor %}{% if add_generation_prompt %}<start_of_turn>model
319
- {% endif %}
320
- """
321
- // Open-ended prompt: the WAV fixture is a synthetic 1-second 440 Hz
322
- // sine tone with no speech content, so "Transcribe this:" makes the
323
- // model emit `<end_of_turn>` as its first sample (legitimate — there's
324
- // nothing to transcribe). "Describe what you hear" gives Gemma room
325
- // to say something like "A pure tone." instead of bailing immediately.
326
- let messages: [[String: String]] = [
327
- ["role": "user", "content": "Describe what you hear: \(MTMD_MEDIA_MARKER)"]
328
- ]
329
- let chatPrompt = try bridge.applyChatTemplate(gemmaTemplate, messages: messages, addAssistant: true)
330
-
331
- // Note: we do NOT assert that the completion is non-empty. With a
332
- // synthetic tone fixture, an immediate-EOS sample is a *correct*
333
- // model response, not a pipeline failure. This smoke verifies that
334
- // the audio path runs end-to-end without throwing — eval, decode,
335
- // and sampler all return cleanly. Production code paths (real
336
- // speech audio) are exercised by host-app integration tests.
337
- _ = try bridge.completeMultimodalPrompt(
338
- chatPrompt,
339
- media: [audioData],
340
- maxTokens: 32,
341
- temperature: 0.0,
342
- topP: 1.0
343
- )
344
- smokeStep("audio: eval done")
345
- }
346
-
347
- /// Walks up from #file to find the repo-root `fixtures/` dir.
348
- private func fixturesURL() -> URL {
349
- var dir = URL(fileURLWithPath: #file).deletingLastPathComponent()
350
- while !FileManager.default.fileExists(atPath: dir.appendingPathComponent("fixtures").path) {
351
- let parent = dir.deletingLastPathComponent()
352
- if parent.path == dir.path {
353
- fatalError("fixtures dir not found walking up from \(#file)")
354
- }
355
- dir = parent
356
- }
357
- return dir.appendingPathComponent("fixtures")
358
- }
359
-
360
- /// Reads SMOKE_* env vars from the test process's environment first,
361
- /// then falls back to the per-developer `scripts/smoke.local.env`
362
- /// file on the host filesystem. The fallback exists because
363
- /// `xcodebuild test` does not propagate parent-process env vars
364
- /// (or `SIMCTL_CHILD_*` / `TEST_RUNNER_*`) to the unit-test bundle's
365
- /// `ProcessInfo.processInfo.environment` that's an XCUITest-only
366
- /// channel. Reading the file directly works reliably both locally
367
- /// (via the gitignored smoke.local.env) and in CI (which actually
368
- /// does inject env vars at the workflow level — `ProcessInfo` sees
369
- /// those because the Mac runner inherits the GitHub Actions step env
370
- /// before xcodebuild starts).
371
- fileprivate static func loadSmokeEnv() -> [String: String] {
372
- var env = ProcessInfo.processInfo.environment.filter { $0.key.hasPrefix("SMOKE_") }
373
- if !env.isEmpty {
374
- return env
375
- }
376
- // Walk up from this file to find `scripts/smoke.local.env`.
377
- var dir = URL(fileURLWithPath: #file).deletingLastPathComponent()
378
- while !FileManager.default.fileExists(atPath: dir.appendingPathComponent("scripts/smoke.local.env").path) {
379
- let parent = dir.deletingLastPathComponent()
380
- if parent.path == dir.path {
381
- return env // empty
382
- }
383
- dir = parent
384
- }
385
- let envFile = dir.appendingPathComponent("scripts/smoke.local.env")
386
- guard let contents = try? String(contentsOf: envFile, encoding: .utf8) else {
387
- return env
388
- }
389
- for line in contents.split(separator: "\n") {
390
- let trimmed = line.trimmingCharacters(in: .whitespaces)
391
- if trimmed.isEmpty || trimmed.hasPrefix("#") { continue }
392
- guard let eq = trimmed.firstIndex(of: "=") else { continue }
393
- let key = String(trimmed[..<eq]).trimmingCharacters(in: .whitespaces)
394
- var value = String(trimmed[trimmed.index(after: eq)...]).trimmingCharacters(in: .whitespaces)
395
- // Strip a single matching pair of leading/trailing quotes so a
396
- // developer writing `SMOKE_MODEL_URL="https://..."` in the env
397
- // file doesn't end up with the quote chars baked into the URL
398
- // (which makes `URL(string:)` return nil and the test silently
399
- // skip with a confusing reason).
400
- if value.count >= 2 {
401
- if (value.first == "\"" && value.last == "\"") ||
402
- (value.first == "'" && value.last == "'") {
403
- value = String(value.dropFirst().dropLast())
404
- }
405
- }
406
- if key.hasPrefix("SMOKE_") && !value.isEmpty {
407
- env[key] = value
408
- }
409
- }
410
- return env
411
- }
412
- }
1
+ // Tests/DVAICapacitorLlamaTests/RealModelSmokeTest.swift
2
+ //
3
+ // End-to-end smoke test against a small public GGUF model. Verifies
4
+ // mechanics (download → load → respond → free) only, not output quality.
5
+ //
6
+ // The test reads `SMOKE_MODEL_URL` and `SMOKE_MODEL_SHA256` from the
7
+ // process environment. When either is missing, it skips cleanly via
8
+ // `XCTSkip`, so this file is safe to compile and run locally even
9
+ // without those env vars set.
10
+ //
11
+ // On the self-hosted Mac runner the workflow forwards the secrets to
12
+ // the simulator via `SIMCTL_CHILD_SMOKE_MODEL_URL=...` (xcodebuild's
13
+ // documented mechanism for env vars to reach the simulator-hosted
14
+ // XCTest process).
15
+
16
+ import XCTest
17
+ import DVAILlamaCore
18
+ import DVAILlamaCoreObjC
19
+
20
+ /// Unbuffered breadcrumb. NSLog flushes per call to stderr / oslog, so
21
+ /// even if the test process dies mid-step (jetsam SIGKILL on simulator,
22
+ /// for example) the most recent step still appears in xcresult /
23
+ /// `log show`. Plain `print(...)` buffers on stdout and silently
24
+ /// disappears when the process is killed.
25
+ @inline(__always)
26
+ fileprivate func smokeStep(_ msg: String) {
27
+ NSLog("DVAI-SMOKE: %@", msg)
28
+ }
29
+
30
+ final class RealModelSmokeTest: XCTestCase {
31
+ private var tempDir: URL!
32
+ private var bridge: LlamaCppBridge?
33
+
34
+ /// Vision + audio smoke involves downloading a 5 GB GGUF + 557 MB
35
+ /// mmproj plus loading both into the simulator's Metal context and
36
+ /// running an eval pass. The combined runtime can easily exceed
37
+ /// Xcode's default 10-minute per-test allowance, after which xctest
38
+ /// kills and "Restarts" the test bundle. We ask for 45 minutes per
39
+ /// test to absorb slow networks + model load + first-Metal-shader
40
+ /// compile.
41
+ override class var defaultTestSuite: XCTestSuite {
42
+ let suite = super.defaultTestSuite
43
+ for case let testCase as XCTestCase in suite.tests {
44
+ testCase.executionTimeAllowance = 45 * 60
45
+ }
46
+ return suite
47
+ }
48
+
49
+ override func setUpWithError() throws {
50
+ let base = FileManager.default.temporaryDirectory
51
+ .appendingPathComponent("dvai-smoke-\(UUID().uuidString)")
52
+ try FileManager.default.createDirectory(at: base, withIntermediateDirectories: true)
53
+ tempDir = base
54
+ }
55
+
56
+ override func tearDownWithError() throws {
57
+ bridge?.unload()
58
+ bridge = nil
59
+ if let tempDir { try? FileManager.default.removeItem(at: tempDir) }
60
+ tempDir = nil
61
+ }
62
+
63
+ func testSmokeRealModelEndToEnd() async throws {
64
+ // Gated off CI in v4.0.1 — see testLlamaBackendIntegration in
65
+ // packages/dvai-bridge-ios/.../RealModelIntegrationTest.swift for
66
+ // the full context. Same simulator-OOM symptom: sustained CPU-only
67
+ // llama.cpp inference dies after ~10-16min with exit 65 and no
68
+ // assertion. Three RealModelSmokeTest cases (this one + Vision +
69
+ // Audio) all hit it. Re-enable locally with RUN_LLAMA_INTEGRATION=1.
70
+ guard ProcessInfo.processInfo.environment["RUN_LLAMA_INTEGRATION"] == "1" else {
71
+ throw XCTSkip("Real-model smoke gated off CI in v4.0.1 — simulator dies under sustained inference. Set RUN_LLAMA_INTEGRATION=1 to run.")
72
+ }
73
+ let env = Self.loadSmokeEnv()
74
+ guard let urlStr = env["SMOKE_MODEL_URL"], !urlStr.isEmpty,
75
+ let sha = env["SMOKE_MODEL_SHA256"], !sha.isEmpty,
76
+ let url = URL(string: urlStr)
77
+ else {
78
+ throw XCTSkip("SMOKE_MODEL_URL/SMOKE_MODEL_SHA256 not set in env; skipping real-model smoke")
79
+ }
80
+
81
+ // Generous timeout for the 800 MB download + 1B-param load.
82
+ let downloader = ModelDownloader(cacheDirOverride: tempDir)
83
+ let result = try await downloader.downloadModel(
84
+ url: url,
85
+ expectedSha256: sha.lowercased(),
86
+ destFilename: "smoke-model.gguf",
87
+ headers: [:],
88
+ onProgress: { _, _ in /* no-op for smoke */ }
89
+ )
90
+
91
+ XCTAssertFalse(result.cached, "first download into a fresh temp dir should not be cached")
92
+ XCTAssertTrue(
93
+ FileManager.default.fileExists(atPath: result.path),
94
+ "downloaded file should exist at \(result.path)"
95
+ )
96
+
97
+ let bridge = LlamaCppBridge()
98
+ self.bridge = bridge
99
+ try bridge.loadModel(
100
+ atPath: result.path,
101
+ mmprojPath: nil,
102
+ gpuLayers: 99,
103
+ contextSize: 2048,
104
+ threads: 4,
105
+ embeddingMode: false
106
+ )
107
+ XCTAssertTrue(bridge.isLoaded, "model should be loaded after loadModel(...) returns")
108
+
109
+ let completion = try bridge.completePrompt(
110
+ "<|begin_of_text|>What is 2+2?",
111
+ maxTokens: 32,
112
+ temperature: 0.0,
113
+ topP: 1.0
114
+ )
115
+ // Don't assert specific content — that's quality testing, not smoke.
116
+ XCTAssertFalse(completion.isEmpty, "completion should not be empty")
117
+ }
118
+
119
+ /// Vision smoke: download model + mmproj, load both, run a chat
120
+ /// completion against the tiny test image fixture. Skips cleanly if any
121
+ /// of SMOKE_VISION_MODEL_URL / SMOKE_VISION_MODEL_SHA256 /
122
+ /// SMOKE_VISION_MMPROJ_URL / SMOKE_VISION_MMPROJ_SHA256 are unset.
123
+ func testSmokeVisionEndToEnd() async throws {
124
+ // Gated off CI in v4.0.1 — see testSmokeRealModelEndToEnd above.
125
+ guard ProcessInfo.processInfo.environment["RUN_LLAMA_INTEGRATION"] == "1" else {
126
+ throw XCTSkip("Real-model vision smoke gated off CI in v4.0.1 — simulator dies under sustained inference. Set RUN_LLAMA_INTEGRATION=1 to run.")
127
+ }
128
+ let env = Self.loadSmokeEnv()
129
+ guard let modelUrlStr = env["SMOKE_VISION_MODEL_URL"], !modelUrlStr.isEmpty,
130
+ let modelSha = env["SMOKE_VISION_MODEL_SHA256"], !modelSha.isEmpty,
131
+ let mmprojUrlStr = env["SMOKE_VISION_MMPROJ_URL"], !mmprojUrlStr.isEmpty,
132
+ let mmprojSha = env["SMOKE_VISION_MMPROJ_SHA256"], !mmprojSha.isEmpty,
133
+ let modelUrl = URL(string: modelUrlStr),
134
+ let mmprojUrl = URL(string: mmprojUrlStr)
135
+ else {
136
+ throw XCTSkip("SMOKE_VISION_* env vars not all set; skipping vision smoke")
137
+ }
138
+
139
+ smokeStep("vision: downloading main model")
140
+ let downloader = ModelDownloader(cacheDirOverride: tempDir)
141
+ let modelResult = try await downloader.downloadModel(
142
+ url: modelUrl,
143
+ expectedSha256: modelSha.lowercased(),
144
+ destFilename: "smoke-vision-model.gguf",
145
+ headers: [:],
146
+ onProgress: { _, _ in /* no-op for smoke */ }
147
+ )
148
+ XCTAssertTrue(FileManager.default.fileExists(atPath: modelResult.path))
149
+ smokeStep("vision: model downloaded; downloading mmproj")
150
+ let mmprojResult = try await downloader.downloadModel(
151
+ url: mmprojUrl,
152
+ expectedSha256: mmprojSha.lowercased(),
153
+ destFilename: "smoke-vision-mmproj.gguf",
154
+ headers: [:],
155
+ onProgress: { _, _ in /* no-op for smoke */ }
156
+ )
157
+ XCTAssertTrue(FileManager.default.fileExists(atPath: mmprojResult.path))
158
+ smokeStep("vision: mmproj downloaded")
159
+
160
+ let bridge = LlamaCppBridge()
161
+ self.bridge = bridge
162
+ // gpuLayers=0 on simulator: same MTLSimDevice allocation cap that
163
+ // hits the mmproj also bites the main model when mtmd_helper_decode
164
+ // builds image-embedding tensors and llama_decode runs them on
165
+ // Metal. Falling back to CPU for the main model avoids the abort.
166
+ // Real iPhone hardware uses Metal end-to-end gpuLayers=99 is the
167
+ // production default.
168
+ #if targetEnvironment(simulator)
169
+ let mainGPULayers: Int32 = 0
170
+ #else
171
+ let mainGPULayers: Int32 = 99
172
+ #endif
173
+ smokeStep("vision: loading main model (gpuLayers=\(mainGPULayers))")
174
+ try bridge.loadModel(
175
+ atPath: modelResult.path,
176
+ mmprojPath: nil,
177
+ // Smoke: small context to keep KV-cache memory well under
178
+ // the simulator's per-process budget. We sample at most 32
179
+ // tokens, so 1024 leaves plenty of headroom for the prompt
180
+ // + image chunk + completion without paging.
181
+ gpuLayers: mainGPULayers,
182
+ contextSize: 1024,
183
+ threads: 4,
184
+ embeddingMode: false
185
+ )
186
+ XCTAssertTrue(bridge.isLoaded)
187
+ smokeStep("vision: main model loaded")
188
+ // useGPU=false on simulator: iOS Simulator's MTLSimDevice aborts in
189
+ // _xpc_shmem_create_with_prot when CLIP tries to allocate the
190
+ // ~60 MiB position-embedding tensor (gemma4v has shape [768, 10240, 2]).
191
+ // CPU-only projection is slow but lets the smoke run end-to-end.
192
+ // Real iPhone hardware uses Metal without issue useGPU=true is the
193
+ // production default.
194
+ #if targetEnvironment(simulator)
195
+ let useGPUForMmproj = false
196
+ #else
197
+ let useGPUForMmproj = true
198
+ #endif
199
+ smokeStep("vision: loading mmproj (useGPU=\(useGPUForMmproj))")
200
+ try bridge.loadMmproj(atPath: mmprojResult.path, useGPU: useGPUForMmproj)
201
+ XCTAssertTrue(bridge.isMmprojLoaded)
202
+ smokeStep("vision: mmproj loaded")
203
+
204
+ // Read the smoke PNG fixture. tiny-test.png is a 256x256 image with
205
+ // three primary-colour squares + a yellow ellipse picked so a
206
+ // captioner has unambiguous content to describe (a blank canvas
207
+ // tends to make Gemma 4 emit `<end_of_turn>` as its first sample,
208
+ // which the greedy sampler treats as a clean exit and returns "").
209
+ // Regenerate via `scripts/generate-image-fixtures.sh`.
210
+ let imageURL = fixturesURL().appendingPathComponent("images").appendingPathComponent("tiny-test.png")
211
+ let imageData = try Data(contentsOf: imageURL)
212
+
213
+ // Build a marker-bearing prompt and apply the model's chat template.
214
+ // Gemma 4's published GGUFs at ggml-org/gemma-4-E2B-it-GGUF do not
215
+ // embed a tokenizer.chat_template that llama.cpp's heuristic
216
+ // recognizes, so passing nil here produces error 41 ("model has no
217
+ // chat template and none provided"). Production developers using
218
+ // capacitor-llama are expected to supply their model's template at
219
+ // start time; for smoke purposes we hardcode Gemma's published
220
+ // chat-template format inline.
221
+ let gemmaTemplate = """
222
+ {% for m in messages %}<start_of_turn>{% if m.role == 'assistant' %}model{% else %}{{ m.role }}{% endif %}
223
+ {{ m.content }}<end_of_turn>
224
+ {% endfor %}{% if add_generation_prompt %}<start_of_turn>model
225
+ {% endif %}
226
+ """
227
+ let messages: [[String: String]] = [
228
+ ["role": "user", "content": "Describe this image: \(MTMD_MEDIA_MARKER)"]
229
+ ]
230
+ let chatPrompt = try bridge.applyChatTemplate(gemmaTemplate, messages: messages, addAssistant: true)
231
+ XCTAssertFalse(chatPrompt.isEmpty)
232
+ smokeStep("vision: chat template applied; running multimodal eval")
233
+
234
+ let completion = try bridge.completeMultimodalPrompt(
235
+ chatPrompt,
236
+ media: [imageData],
237
+ maxTokens: 32,
238
+ temperature: 0.0,
239
+ topP: 1.0
240
+ )
241
+ smokeStep("vision: eval done completion=\(completion.prefix(80))")
242
+ XCTAssertFalse(completion.isEmpty, "vision completion should not be empty")
243
+ }
244
+
245
+ /// Audio smoke: same as vision, but with the WAV fixture instead of PNG.
246
+ /// mtmd's `mtmd_helper_bitmap_init_from_buf` accepts wav/mp3/flac for
247
+ /// audio (per mtmd-helper.h docs). Skips when the model declared no
248
+ /// audio encoder (e.g. vision-only mmproj).
249
+ func testSmokeAudioEndToEnd() async throws {
250
+ // Gated off CI in v4.0.1 — see testSmokeRealModelEndToEnd above.
251
+ guard ProcessInfo.processInfo.environment["RUN_LLAMA_INTEGRATION"] == "1" else {
252
+ throw XCTSkip("Real-model audio smoke gated off CI in v4.0.1 — simulator dies under sustained inference. Set RUN_LLAMA_INTEGRATION=1 to run.")
253
+ }
254
+ let env = Self.loadSmokeEnv()
255
+ guard let modelUrlStr = env["SMOKE_VISION_MODEL_URL"], !modelUrlStr.isEmpty,
256
+ let modelSha = env["SMOKE_VISION_MODEL_SHA256"], !modelSha.isEmpty,
257
+ let mmprojUrlStr = env["SMOKE_VISION_MMPROJ_URL"], !mmprojUrlStr.isEmpty,
258
+ let mmprojSha = env["SMOKE_VISION_MMPROJ_SHA256"], !mmprojSha.isEmpty,
259
+ let modelUrl = URL(string: modelUrlStr),
260
+ let mmprojUrl = URL(string: mmprojUrlStr)
261
+ else {
262
+ throw XCTSkip("SMOKE_VISION_* env vars not all set; skipping audio smoke")
263
+ }
264
+
265
+ smokeStep("audio: downloading main model")
266
+ let downloader = ModelDownloader(cacheDirOverride: tempDir)
267
+ let modelResult = try await downloader.downloadModel(
268
+ url: modelUrl,
269
+ expectedSha256: modelSha.lowercased(),
270
+ destFilename: "smoke-audio-model.gguf",
271
+ headers: [:],
272
+ onProgress: { _, _ in /* no-op for smoke */ }
273
+ )
274
+ smokeStep("audio: model downloaded; downloading mmproj")
275
+ let mmprojResult = try await downloader.downloadModel(
276
+ url: mmprojUrl,
277
+ expectedSha256: mmprojSha.lowercased(),
278
+ destFilename: "smoke-audio-mmproj.gguf",
279
+ headers: [:],
280
+ onProgress: { _, _ in /* no-op for smoke */ }
281
+ )
282
+ smokeStep("audio: mmproj downloaded")
283
+
284
+ let bridge = LlamaCppBridge()
285
+ self.bridge = bridge
286
+ // gpuLayers=0 on simulator: same MTLSimDevice allocation cap that
287
+ // hits the mmproj also bites the main model when mtmd_helper_decode
288
+ // builds audio-embedding tensors and llama_decode runs them on
289
+ // Metal. Falling back to CPU for the main model avoids the abort.
290
+ #if targetEnvironment(simulator)
291
+ let mainGPULayers: Int32 = 0
292
+ #else
293
+ let mainGPULayers: Int32 = 99
294
+ #endif
295
+ smokeStep("audio: loading main model (gpuLayers=\(mainGPULayers))")
296
+ try bridge.loadModel(
297
+ atPath: modelResult.path,
298
+ mmprojPath: nil,
299
+ gpuLayers: mainGPULayers,
300
+ contextSize: 1024,
301
+ threads: 4,
302
+ embeddingMode: false
303
+ )
304
+ smokeStep("audio: main model loaded")
305
+ // useGPU=false on simulator: iOS Simulator's MTLSimDevice aborts in
306
+ // _xpc_shmem_create_with_prot when CLIP tries to allocate the
307
+ // ~60 MiB position-embedding tensor (gemma4v has shape [768, 10240, 2]).
308
+ // CPU-only projection is slow but lets the smoke run end-to-end.
309
+ // Real iPhone hardware uses Metal without issue → useGPU=true is the
310
+ // production default.
311
+ #if targetEnvironment(simulator)
312
+ let useGPUForMmproj = false
313
+ #else
314
+ let useGPUForMmproj = true
315
+ #endif
316
+ smokeStep("audio: loading mmproj (useGPU=\(useGPUForMmproj))")
317
+ try bridge.loadMmproj(atPath: mmprojResult.path, useGPU: useGPUForMmproj)
318
+ smokeStep("audio: mmproj loaded")
319
+
320
+ // Skip cleanly if the loaded mmproj has no audio encoder (e.g. when
321
+ // SMOKE_VISION_* points at a vision-only projector).
322
+ guard bridge.hasAudioEncoder() else {
323
+ throw XCTSkip("Loaded mmproj reports no audio encoder; skipping audio smoke")
324
+ }
325
+ smokeStep("audio: hasAudioEncoder=true; running multimodal eval")
326
+
327
+ let audioURL = fixturesURL().appendingPathComponent("audio").appendingPathComponent("wav-1s-16khz-mono.wav")
328
+ let audioData = try Data(contentsOf: audioURL)
329
+
330
+ // Same Gemma chat template as the vision test — Gemma 4 GGUFs at
331
+ // ggml-org don't ship a llama.cpp-recognized chat_template.
332
+ let gemmaTemplate = """
333
+ {% for m in messages %}<start_of_turn>{% if m.role == 'assistant' %}model{% else %}{{ m.role }}{% endif %}
334
+ {{ m.content }}<end_of_turn>
335
+ {% endfor %}{% if add_generation_prompt %}<start_of_turn>model
336
+ {% endif %}
337
+ """
338
+ // Open-ended prompt: the WAV fixture is a synthetic 1-second 440 Hz
339
+ // sine tone with no speech content, so "Transcribe this:" makes the
340
+ // model emit `<end_of_turn>` as its first sample (legitimate — there's
341
+ // nothing to transcribe). "Describe what you hear" gives Gemma room
342
+ // to say something like "A pure tone." instead of bailing immediately.
343
+ let messages: [[String: String]] = [
344
+ ["role": "user", "content": "Describe what you hear: \(MTMD_MEDIA_MARKER)"]
345
+ ]
346
+ let chatPrompt = try bridge.applyChatTemplate(gemmaTemplate, messages: messages, addAssistant: true)
347
+
348
+ // Note: we do NOT assert that the completion is non-empty. With a
349
+ // synthetic tone fixture, an immediate-EOS sample is a *correct*
350
+ // model response, not a pipeline failure. This smoke verifies that
351
+ // the audio path runs end-to-end without throwing — eval, decode,
352
+ // and sampler all return cleanly. Production code paths (real
353
+ // speech audio) are exercised by host-app integration tests.
354
+ _ = try bridge.completeMultimodalPrompt(
355
+ chatPrompt,
356
+ media: [audioData],
357
+ maxTokens: 32,
358
+ temperature: 0.0,
359
+ topP: 1.0
360
+ )
361
+ smokeStep("audio: eval done")
362
+ }
363
+
364
+ /// Walks up from #file to find the repo-root `fixtures/` dir.
365
+ private func fixturesURL() -> URL {
366
+ var dir = URL(fileURLWithPath: #file).deletingLastPathComponent()
367
+ while !FileManager.default.fileExists(atPath: dir.appendingPathComponent("fixtures").path) {
368
+ let parent = dir.deletingLastPathComponent()
369
+ if parent.path == dir.path {
370
+ fatalError("fixtures dir not found walking up from \(#file)")
371
+ }
372
+ dir = parent
373
+ }
374
+ return dir.appendingPathComponent("fixtures")
375
+ }
376
+
377
+ /// Reads SMOKE_* env vars from the test process's environment first,
378
+ /// then falls back to the per-developer `scripts/smoke.local.env`
379
+ /// file on the host filesystem. The fallback exists because
380
+ /// `xcodebuild test` does not propagate parent-process env vars
381
+ /// (or `SIMCTL_CHILD_*` / `TEST_RUNNER_*`) to the unit-test bundle's
382
+ /// `ProcessInfo.processInfo.environment` — that's an XCUITest-only
383
+ /// channel. Reading the file directly works reliably both locally
384
+ /// (via the gitignored smoke.local.env) and in CI (which actually
385
+ /// does inject env vars at the workflow level — `ProcessInfo` sees
386
+ /// those because the Mac runner inherits the GitHub Actions step env
387
+ /// before xcodebuild starts).
388
+ fileprivate static func loadSmokeEnv() -> [String: String] {
389
+ var env = ProcessInfo.processInfo.environment.filter { $0.key.hasPrefix("SMOKE_") }
390
+ if !env.isEmpty {
391
+ return env
392
+ }
393
+ // Walk up from this file to find `scripts/smoke.local.env`.
394
+ var dir = URL(fileURLWithPath: #file).deletingLastPathComponent()
395
+ while !FileManager.default.fileExists(atPath: dir.appendingPathComponent("scripts/smoke.local.env").path) {
396
+ let parent = dir.deletingLastPathComponent()
397
+ if parent.path == dir.path {
398
+ return env // empty
399
+ }
400
+ dir = parent
401
+ }
402
+ let envFile = dir.appendingPathComponent("scripts/smoke.local.env")
403
+ guard let contents = try? String(contentsOf: envFile, encoding: .utf8) else {
404
+ return env
405
+ }
406
+ for line in contents.split(separator: "\n") {
407
+ let trimmed = line.trimmingCharacters(in: .whitespaces)
408
+ if trimmed.isEmpty || trimmed.hasPrefix("#") { continue }
409
+ guard let eq = trimmed.firstIndex(of: "=") else { continue }
410
+ let key = String(trimmed[..<eq]).trimmingCharacters(in: .whitespaces)
411
+ var value = String(trimmed[trimmed.index(after: eq)...]).trimmingCharacters(in: .whitespaces)
412
+ // Strip a single matching pair of leading/trailing quotes so a
413
+ // developer writing `SMOKE_MODEL_URL="https://..."` in the env
414
+ // file doesn't end up with the quote chars baked into the URL
415
+ // (which makes `URL(string:)` return nil and the test silently
416
+ // skip with a confusing reason).
417
+ if value.count >= 2 {
418
+ if (value.first == "\"" && value.last == "\"") ||
419
+ (value.first == "'" && value.last == "'") {
420
+ value = String(value.dropFirst().dropLast())
421
+ }
422
+ }
423
+ if key.hasPrefix("SMOKE_") && !value.isEmpty {
424
+ env[key] = value
425
+ }
426
+ }
427
+ return env
428
+ }
429
+ }