@dvai-bridge/android-mediapipe-core 4.0.0 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,529 +1,529 @@
1
- package co.deepvoiceai.bridge.mediapipe.core
2
-
3
- import co.deepvoiceai.bridge.shared.core.HandlerContext
4
- import co.deepvoiceai.bridge.shared.core.HandlerResponse
5
- import kotlinx.coroutines.flow.toList
6
- import kotlinx.coroutines.runBlocking
7
- import kotlinx.serialization.json.Json
8
- import kotlinx.serialization.json.JsonArray
9
- import kotlinx.serialization.json.JsonNull
10
- import kotlinx.serialization.json.JsonObject
11
- import kotlinx.serialization.json.JsonPrimitive
12
- import kotlinx.serialization.json.add
13
- import kotlinx.serialization.json.addJsonObject
14
- import kotlinx.serialization.json.buildJsonObject
15
- import kotlinx.serialization.json.contentOrNull
16
- import kotlinx.serialization.json.jsonArray
17
- import kotlinx.serialization.json.jsonObject
18
- import kotlinx.serialization.json.jsonPrimitive
19
- import kotlinx.serialization.json.put
20
- import kotlinx.serialization.json.putJsonArray
21
- import kotlinx.serialization.json.putJsonObject
22
- import org.junit.Assert.assertEquals
23
- import org.junit.Assert.assertNotNull
24
- import org.junit.Assert.assertNull
25
- import org.junit.Assert.assertTrue
26
- import org.junit.Test
27
- import org.junit.runner.RunWith
28
- import org.robolectric.RobolectricTestRunner
29
-
30
- /**
31
- * JVM unit tests for [MediaPipeHandlers]. Uses a [FakeBridge] implementing
32
- * [MediaPipeBridgeApi] so tests don't need a real MediaPipe `.task` model.
33
- *
34
- * Mirrors the coverage of `LlamaHandlersTest`: chat-completion (sync +
35
- * streaming), legacy completions, error / 400 surfaces, embeddings rejection,
36
- * the models endpoint, plus Task 46 vision-capable happy path and image
37
- * fetch failure surfaces.
38
- *
39
- * The `bytesToImage` seam that previously lived in [MediaPipeHandlers] has been
40
- * moved into [MediaPipeBridge] as part of the Task 17 interface neutralization.
41
- * [MediaPipeBridgeApi] now accepts [List]<[ByteArray]> rather than
42
- * [List]<MPImage>. The [FakeBridge] captures raw bytes so tests can assert on
43
- * the exact payload handed to the bridge, without any Robolectric bitmap
44
- * overhead.
45
- */
46
- @RunWith(RobolectricTestRunner::class)
47
- class MediaPipeHandlersTest {
48
- private val ctx = HandlerContext(modelId = "gemma-2b-it-cpu-int4", backendName = "mediapipe")
49
-
50
- private class FakeBridge(
51
- var responseToReturn: String = "canned mediapipe response",
52
- var shouldThrow: Boolean = false,
53
- ) : MediaPipeBridgeApi {
54
- var receivedPrompt: String? = null
55
- var receivedImages: List<ByteArray> = emptyList()
56
- var asyncCloseCount: Int = 0
57
-
58
- override fun completePrompt(prompt: String, images: List<ByteArray>): String {
59
- receivedPrompt = prompt
60
- receivedImages = images
61
- if (shouldThrow) throw RuntimeException("simulated mediapipe error")
62
- return responseToReturn
63
- }
64
-
65
- override fun completePromptAsync(
66
- prompt: String,
67
- images: List<ByteArray>,
68
- onPartial: (String, Boolean) -> Unit,
69
- ): AutoCloseable {
70
- receivedPrompt = prompt
71
- receivedImages = images
72
- if (shouldThrow) throw RuntimeException("simulated mediapipe error")
73
- // Synchronously emit two partial chunks then a final done=true. The
74
- // handler's callbackFlow trySend is non-blocking so this is fine.
75
- val mid = (responseToReturn.length / 2).coerceAtLeast(0)
76
- val first = responseToReturn.substring(0, mid)
77
- val second = responseToReturn.substring(mid)
78
- onPartial(first, false)
79
- onPartial(second, true)
80
- return AutoCloseable { asyncCloseCount += 1 }
81
- }
82
- }
83
-
84
- private fun makeHandlers(
85
- bridge: FakeBridge = FakeBridge(),
86
- visionCapable: Boolean = false,
87
- ): MediaPipeHandlers =
88
- MediaPipeHandlers(
89
- bridge = bridge,
90
- modelId = "gemma-2b-it-cpu-int4",
91
- visionCapable = visionCapable,
92
- )
93
-
94
- /**
95
- * Parse a single SSE frame's JSON payload. Returns null for `[DONE]`
96
- * frames or non-`data:` lines.
97
- */
98
- private fun decodeFrame(frame: String): JsonObject? {
99
- val trimmed = frame.trim()
100
- if (!trimmed.startsWith("data: ")) return null
101
- val payload = trimmed.removePrefix("data: ")
102
- if (payload == "[DONE]") return null
103
- return Json.parseToJsonElement(payload).jsonObject
104
- }
105
-
106
- // ----- Chat completion (text happy path) -----
107
-
108
- @Test
109
- fun `chat completion text happy path`() = runBlocking {
110
- val bridge = FakeBridge(responseToReturn = "Hello, world!")
111
- val handlers = makeHandlers(bridge = bridge)
112
- val body = buildJsonObject {
113
- putJsonArray("messages") {
114
- addJsonObject {
115
- put("role", "user")
116
- put("content", "hi")
117
- }
118
- }
119
- }
120
- val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Json
121
- ?: error("expected Json response")
122
- assertEquals(200, resp.status)
123
- val obj = resp.body as JsonObject
124
- assertEquals("chat.completion", (obj["object"] as JsonPrimitive).content)
125
- assertEquals("gemma-2b-it-cpu-int4", (obj["model"] as JsonPrimitive).content)
126
- val choices = obj["choices"] as JsonArray
127
- assertEquals(1, choices.size)
128
- val msg = (choices[0] as JsonObject)["message"] as JsonObject
129
- assertEquals("Hello, world!", (msg["content"] as JsonPrimitive).content)
130
- assertEquals("assistant", (msg["role"] as JsonPrimitive).content)
131
- assertEquals(
132
- "stop",
133
- ((choices[0] as JsonObject)["finish_reason"] as JsonPrimitive).content,
134
- )
135
- // Prompt threaded through openAIMessagesToPrompt.
136
- assertEquals("user: hi", bridge.receivedPrompt)
137
- // No images provided → empty list passed through.
138
- assertEquals(0, bridge.receivedImages.size)
139
- // ID prefix is the MediaPipe-flavored one.
140
- assertTrue(
141
- "id should start with chatcmpl-mp-: ${(obj["id"] as JsonPrimitive).content}",
142
- (obj["id"] as JsonPrimitive).content.startsWith("chatcmpl-mp-"),
143
- )
144
- }
145
-
146
- // ----- Streaming -----
147
-
148
- @Test
149
- fun `chat completion streaming text emits role content content finish-with-content done`() = runBlocking {
150
- val bridge = FakeBridge(responseToReturn = "abcd")
151
- val handlers = makeHandlers(bridge = bridge)
152
- val body = buildJsonObject {
153
- putJsonArray("messages") {
154
- addJsonObject {
155
- put("role", "user")
156
- put("content", "hi")
157
- }
158
- }
159
- put("stream", true)
160
- }
161
- val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Sse
162
- ?: error("expected Sse response")
163
- val frames = resp.flow.toList()
164
-
165
- // Expect: role(0) + 2 content chunks (last one carries finish_reason) + [DONE] = 4 frames
166
- assertEquals(4, frames.size)
167
-
168
- // Frame 0: role delta
169
- val frame0 = decodeFrame(frames[0])
170
- ?: error("frame 0 not decodable")
171
- val roleDelta = frame0["choices"]?.jsonArray?.first()?.jsonObject
172
- ?.get("delta")?.jsonObject
173
- assertEquals("assistant", roleDelta?.get("role")?.jsonPrimitive?.content)
174
- assertEquals("chat.completion.chunk", frame0["object"]?.jsonPrimitive?.content)
175
-
176
- // Frame 1: first content delta (done=false)
177
- val frame1 = decodeFrame(frames[1]) ?: error("frame 1 not decodable")
178
- val choice1 = frame1["choices"]?.jsonArray?.first()?.jsonObject
179
- ?: error("frame 1 missing choices[0]")
180
- assertEquals(
181
- "ab",
182
- choice1["delta"]?.jsonObject?.get("content")?.jsonPrimitive?.content,
183
- )
184
- // finish_reason is JsonNull on the non-final frame
185
- assertTrue(
186
- "frame 1 finish_reason should be JsonNull, got ${choice1["finish_reason"]}",
187
- choice1["finish_reason"] is JsonNull,
188
- )
189
-
190
- // Frame 2: final content delta (done=true) — carries finish_reason="stop"
191
- val frame2 = decodeFrame(frames[2]) ?: error("frame 2 not decodable")
192
- val choice2 = frame2["choices"]?.jsonArray?.first()?.jsonObject
193
- ?: error("frame 2 missing choices[0]")
194
- assertEquals(
195
- "cd",
196
- choice2["delta"]?.jsonObject?.get("content")?.jsonPrimitive?.content,
197
- )
198
- assertEquals(
199
- "stop",
200
- choice2["finish_reason"]?.jsonPrimitive?.contentOrNull,
201
- )
202
-
203
- // Frame 3: [DONE]
204
- assertEquals("data: [DONE]\n\n", frames[3])
205
- assertNull(decodeFrame(frames[3]))
206
-
207
- // Bridge.completePromptAsync was called and the AutoCloseable handle was closed.
208
- assertEquals("user: hi", bridge.receivedPrompt)
209
- assertEquals(1, bridge.asyncCloseCount)
210
- }
211
-
212
- // ----- Vision (Task 46) -----
213
-
214
- @Test
215
- fun `chat completion image part returns 400 when not vision-capable`() = runBlocking {
216
- // visionCapable = false → image_url request is rejected before image fetch.
217
- val handlers = makeHandlers(visionCapable = false)
218
- val body = buildJsonObject {
219
- putJsonArray("messages") {
220
- addJsonObject {
221
- put("role", "user")
222
- putJsonArray("content") {
223
- addJsonObject {
224
- put("type", "image_url")
225
- putJsonObject("image_url") {
226
- put("url", "data:image/png;base64,iVBOR")
227
- }
228
- }
229
- }
230
- }
231
- }
232
- }
233
- val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
234
- ?: error("expected Error response")
235
- assertEquals(400, resp.status)
236
- assertTrue(
237
- "message: ${resp.message}",
238
- resp.message.contains("vision-capable MediaPipe model"),
239
- )
240
- assertTrue(
241
- "message: ${resp.message}",
242
- resp.message.contains("visionEnabled"),
243
- )
244
- }
245
-
246
- @Test
247
- fun `chat completion vision-capable image part decodes and threads through bridge`() = runBlocking {
248
- val bridge = FakeBridge(responseToReturn = "looks like a cat")
249
- // Use a tiny in-memory PNG via the data URL fixture so the real
250
- // ImageDecoder.resolve path exercises base64 decoding. Raw bytes are
251
- // now passed directly to the bridge (ByteArray neutralization, Task 17).
252
- val handlers = makeHandlers(bridge = bridge, visionCapable = true)
253
- // Real PNG bytes (1x1 transparent pixel) inlined as base64. Same
254
- // payload as `tiny-test-base64.txt` but inlined to keep the test
255
- // self-contained.
256
- val pngDataUrl =
257
- "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkAAIAAAoAAv/lxKUAAAAASUVORK5CYII="
258
- val body = buildJsonObject {
259
- putJsonArray("messages") {
260
- addJsonObject {
261
- put("role", "user")
262
- putJsonArray("content") {
263
- addJsonObject {
264
- put("type", "text")
265
- put("text", "describe this")
266
- }
267
- addJsonObject {
268
- put("type", "image_url")
269
- putJsonObject("image_url") {
270
- put("url", pngDataUrl)
271
- }
272
- }
273
- }
274
- }
275
- }
276
- }
277
- val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Json
278
- ?: error("expected Json response")
279
- assertEquals(200, resp.status)
280
- val obj = resp.body as JsonObject
281
- val msg = ((obj["choices"] as JsonArray)[0] as JsonObject)["message"] as JsonObject
282
- assertEquals("looks like a cat", (msg["content"] as JsonPrimitive).content)
283
-
284
- // Bridge received exactly one image (as raw bytes) and the text prompt.
285
- assertEquals(1, bridge.receivedImages.size)
286
- // Sanity: first 8 bytes must match the PNG magic header.
287
- val receivedBytes = bridge.receivedImages[0]
288
- assertTrue("PNG magic: byte[0]", receivedBytes[0] == 0x89.toByte())
289
- assertTrue("PNG magic: byte[1]", receivedBytes[1] == 0x50.toByte())
290
- // Prompt should contain just the text part.
291
- assertEquals("user: describe this", bridge.receivedPrompt)
292
- }
293
-
294
- @Test
295
- fun `chat completion image_url missing url field returns 400`() = runBlocking {
296
- val handlers = makeHandlers(visionCapable = true)
297
- val body = buildJsonObject {
298
- putJsonArray("messages") {
299
- addJsonObject {
300
- put("role", "user")
301
- putJsonArray("content") {
302
- addJsonObject {
303
- put("type", "image_url")
304
- putJsonObject("image_url") {
305
- // No "url" key.
306
- }
307
- }
308
- }
309
- }
310
- }
311
- }
312
- val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
313
- ?: error("expected Error response")
314
- assertEquals(400, resp.status)
315
- assertTrue(
316
- "message: ${resp.message}",
317
- resp.message.contains("missing 'url' field"),
318
- )
319
- }
320
-
321
- @Test
322
- fun `chat completion image fetch failure returns 502`() = runBlocking {
323
- val handlers = makeHandlers(visionCapable = true)
324
- val body = buildJsonObject {
325
- putJsonArray("messages") {
326
- addJsonObject {
327
- put("role", "user")
328
- putJsonArray("content") {
329
- addJsonObject {
330
- put("type", "image_url")
331
- putJsonObject("image_url") {
332
- // Unsupported scheme — ImageDecoder.resolve throws InvalidScheme.
333
- put("url", "ftp://example.com/x.png")
334
- }
335
- }
336
- }
337
- }
338
- }
339
- }
340
- val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
341
- ?: error("expected Error response")
342
- assertEquals(502, resp.status)
343
- assertTrue(
344
- "message: ${resp.message}",
345
- resp.message.contains("Failed to fetch image"),
346
- )
347
- }
348
-
349
- @Test
350
- fun `chat completion streaming vision-capable image part threads raw bytes through bridge`() = runBlocking {
351
- // Verify that the streaming path passes raw ByteArray to the bridge
352
- // rather than converting to MPImage in the handler layer (Task 17
353
- // neutralization: ByteArray → MPImage conversion now happens inside
354
- // MediaPipeBridge, not in MediaPipeHandlers).
355
- val bridge = FakeBridge(responseToReturn = "ab")
356
- val handlers = makeHandlers(bridge = bridge, visionCapable = true)
357
- val pngDataUrl =
358
- "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkAAIAAAoAAv/lxKUAAAAASUVORK5CYII="
359
- val body = buildJsonObject {
360
- putJsonArray("messages") {
361
- addJsonObject {
362
- put("role", "user")
363
- putJsonArray("content") {
364
- addJsonObject { put("type", "text"); put("text", "what is this") }
365
- addJsonObject {
366
- put("type", "image_url")
367
- putJsonObject("image_url") { put("url", pngDataUrl) }
368
- }
369
- }
370
- }
371
- }
372
- put("stream", true)
373
- }
374
- val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Sse
375
- ?: error("expected Sse response")
376
- resp.flow.toList() // consume the stream to completion
377
- // Bridge received one raw-byte image via the streaming path.
378
- assertEquals(1, bridge.receivedImages.size)
379
- val receivedBytes = bridge.receivedImages[0]
380
- assertTrue("PNG magic byte[0]", receivedBytes[0] == 0x89.toByte())
381
- assertTrue("PNG magic byte[1]", receivedBytes[1] == 0x50.toByte())
382
- }
383
-
384
- // ----- 400 surfaces -----
385
-
386
- @Test
387
- fun `chat completion audio part returns 400`() = runBlocking {
388
- val handlers = makeHandlers()
389
- val body = buildJsonObject {
390
- putJsonArray("messages") {
391
- addJsonObject {
392
- put("role", "user")
393
- putJsonArray("content") {
394
- addJsonObject {
395
- put("type", "input_audio")
396
- putJsonObject("input_audio") {
397
- put("data", "AAAA")
398
- put("format", "pcm16")
399
- }
400
- }
401
- }
402
- }
403
- }
404
- }
405
- val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
406
- ?: error("expected Error response")
407
- assertEquals(400, resp.status)
408
- assertTrue("message: ${resp.message}", resp.message.contains("Audio input not supported"))
409
- }
410
-
411
- @Test
412
- fun `chat completion missing messages returns 400`() = runBlocking {
413
- val handlers = makeHandlers()
414
- val resp = handlers.handleChatCompletion(buildJsonObject {}, ctx) as? HandlerResponse.Error
415
- ?: error("expected Error response")
416
- assertEquals(400, resp.status)
417
- assertTrue("message: ${resp.message}", resp.message.contains("messages"))
418
- }
419
-
420
- @Test
421
- fun `chat completion empty messages returns 400`() = runBlocking {
422
- val handlers = makeHandlers()
423
- val body = buildJsonObject {
424
- putJsonArray("messages") { /* empty */ }
425
- }
426
- val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
427
- ?: error("expected Error response")
428
- assertEquals(400, resp.status)
429
- assertTrue("message: ${resp.message}", resp.message.contains("Empty"))
430
- }
431
-
432
- @Test
433
- fun `chat completion bridge throw returns 500`() = runBlocking {
434
- val bridge = FakeBridge(shouldThrow = true)
435
- val handlers = makeHandlers(bridge = bridge)
436
- val body = buildJsonObject {
437
- putJsonArray("messages") {
438
- addJsonObject {
439
- put("role", "user")
440
- put("content", "hi")
441
- }
442
- }
443
- }
444
- val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
445
- ?: error("expected Error response")
446
- assertEquals(500, resp.status)
447
- assertTrue("message: ${resp.message}", resp.message.contains("simulated mediapipe error"))
448
- }
449
-
450
- // ----- Legacy completions -----
451
-
452
- @Test
453
- fun `legacy completion converts to text_completion shape`() = runBlocking {
454
- val bridge = FakeBridge(responseToReturn = "canned-text")
455
- val handlers = makeHandlers(bridge = bridge)
456
- val body = buildJsonObject { put("prompt", "say hi") }
457
- val resp = handlers.handleCompletion(body, ctx) as? HandlerResponse.Json
458
- ?: error("expected Json response")
459
- assertEquals(200, resp.status)
460
- val obj = resp.body as JsonObject
461
- assertEquals("text_completion", (obj["object"] as JsonPrimitive).content)
462
- val choices = obj["choices"] as JsonArray
463
- val choice0 = choices[0] as JsonObject
464
- assertEquals("canned-text", (choice0["text"] as JsonPrimitive).content)
465
- assertEquals("stop", (choice0["finish_reason"] as JsonPrimitive).content)
466
- assertNotNull(choice0["logprobs"])
467
- // ID rewritten chatcmpl-mp- → cmpl-mp-
468
- val idStr = (obj["id"] as JsonPrimitive).content
469
- assertTrue("id should start with cmpl-: $idStr", idStr.startsWith("cmpl-"))
470
- // Prompt was wrapped as a user message and threaded through.
471
- assertEquals("user: say hi", bridge.receivedPrompt)
472
- }
473
-
474
- @Test
475
- fun `legacy completion array prompt joined with newline`() = runBlocking {
476
- val bridge = FakeBridge()
477
- val handlers = makeHandlers(bridge = bridge)
478
- val body = buildJsonObject {
479
- putJsonArray("prompt") {
480
- add("alpha")
481
- add("beta")
482
- }
483
- }
484
- val resp = handlers.handleCompletion(body, ctx) as? HandlerResponse.Json
485
- ?: error("expected Json response")
486
- assertEquals(200, resp.status)
487
- // The prompt array is joined with \n, then wrapped as a single user message.
488
- assertEquals("user: alpha\nbeta", bridge.receivedPrompt)
489
- }
490
-
491
- // ----- Embeddings -----
492
-
493
- @Test
494
- fun `embeddings always returns 400 with redirect message`() = runBlocking {
495
- val handlers = makeHandlers()
496
- val resp = handlers.handleEmbeddings(
497
- buildJsonObject { put("input", "hello") },
498
- ctx,
499
- ) as? HandlerResponse.Error ?: error("expected Error response")
500
- assertEquals(400, resp.status)
501
- assertTrue(
502
- "message: ${resp.message}",
503
- resp.message.contains("Embeddings not supported on MediaPipe LLM"),
504
- )
505
- assertTrue(
506
- "message: ${resp.message}",
507
- resp.message.contains("capacitorBackend: \"llama\""),
508
- )
509
- }
510
-
511
- // ----- Models -----
512
-
513
- @Test
514
- fun `models returns single google-mediapipe entry with ctx modelId`() = runBlocking {
515
- val handlers = makeHandlers()
516
- val customCtx = HandlerContext(modelId = "/path/to/gemma-2b.task", backendName = "mediapipe")
517
- val resp = handlers.handleModels(customCtx) as? HandlerResponse.Json
518
- ?: error("expected Json response")
519
- assertEquals(200, resp.status)
520
- val obj = resp.body as JsonObject
521
- assertEquals("list", (obj["object"] as JsonPrimitive).content)
522
- val data = obj["data"] as JsonArray
523
- assertEquals(1, data.size)
524
- val entry0 = data[0] as JsonObject
525
- assertEquals("/path/to/gemma-2b.task", (entry0["id"] as JsonPrimitive).content)
526
- assertEquals("model", (entry0["object"] as JsonPrimitive).content)
527
- assertEquals("google-mediapipe", (entry0["owned_by"] as JsonPrimitive).content)
528
- }
529
- }
1
+ package co.deepvoiceai.bridge.mediapipe.core
2
+
3
+ import co.deepvoiceai.bridge.shared.core.HandlerContext
4
+ import co.deepvoiceai.bridge.shared.core.HandlerResponse
5
+ import kotlinx.coroutines.flow.toList
6
+ import kotlinx.coroutines.runBlocking
7
+ import kotlinx.serialization.json.Json
8
+ import kotlinx.serialization.json.JsonArray
9
+ import kotlinx.serialization.json.JsonNull
10
+ import kotlinx.serialization.json.JsonObject
11
+ import kotlinx.serialization.json.JsonPrimitive
12
+ import kotlinx.serialization.json.add
13
+ import kotlinx.serialization.json.addJsonObject
14
+ import kotlinx.serialization.json.buildJsonObject
15
+ import kotlinx.serialization.json.contentOrNull
16
+ import kotlinx.serialization.json.jsonArray
17
+ import kotlinx.serialization.json.jsonObject
18
+ import kotlinx.serialization.json.jsonPrimitive
19
+ import kotlinx.serialization.json.put
20
+ import kotlinx.serialization.json.putJsonArray
21
+ import kotlinx.serialization.json.putJsonObject
22
+ import org.junit.Assert.assertEquals
23
+ import org.junit.Assert.assertNotNull
24
+ import org.junit.Assert.assertNull
25
+ import org.junit.Assert.assertTrue
26
+ import org.junit.Test
27
+ import org.junit.runner.RunWith
28
+ import org.robolectric.RobolectricTestRunner
29
+
30
+ /**
31
+ * JVM unit tests for [MediaPipeHandlers]. Uses a [FakeBridge] implementing
32
+ * [MediaPipeBridgeApi] so tests don't need a real MediaPipe `.task` model.
33
+ *
34
+ * Mirrors the coverage of `LlamaHandlersTest`: chat-completion (sync +
35
+ * streaming), legacy completions, error / 400 surfaces, embeddings rejection,
36
+ * the models endpoint, plus Task 46 vision-capable happy path and image
37
+ * fetch failure surfaces.
38
+ *
39
+ * The `bytesToImage` seam that previously lived in [MediaPipeHandlers] has been
40
+ * moved into [MediaPipeBridge] as part of the Task 17 interface neutralization.
41
+ * [MediaPipeBridgeApi] now accepts [List]<[ByteArray]> rather than
42
+ * [List]<MPImage>. The [FakeBridge] captures raw bytes so tests can assert on
43
+ * the exact payload handed to the bridge, without any Robolectric bitmap
44
+ * overhead.
45
+ */
46
+ @RunWith(RobolectricTestRunner::class)
47
+ class MediaPipeHandlersTest {
48
+ private val ctx = HandlerContext(modelId = "gemma-2b-it-cpu-int4", backendName = "mediapipe")
49
+
50
+ private class FakeBridge(
51
+ var responseToReturn: String = "canned mediapipe response",
52
+ var shouldThrow: Boolean = false,
53
+ ) : MediaPipeBridgeApi {
54
+ var receivedPrompt: String? = null
55
+ var receivedImages: List<ByteArray> = emptyList()
56
+ var asyncCloseCount: Int = 0
57
+
58
+ override fun completePrompt(prompt: String, images: List<ByteArray>): String {
59
+ receivedPrompt = prompt
60
+ receivedImages = images
61
+ if (shouldThrow) throw RuntimeException("simulated mediapipe error")
62
+ return responseToReturn
63
+ }
64
+
65
+ override fun completePromptAsync(
66
+ prompt: String,
67
+ images: List<ByteArray>,
68
+ onPartial: (String, Boolean) -> Unit,
69
+ ): AutoCloseable {
70
+ receivedPrompt = prompt
71
+ receivedImages = images
72
+ if (shouldThrow) throw RuntimeException("simulated mediapipe error")
73
+ // Synchronously emit two partial chunks then a final done=true. The
74
+ // handler's callbackFlow trySend is non-blocking so this is fine.
75
+ val mid = (responseToReturn.length / 2).coerceAtLeast(0)
76
+ val first = responseToReturn.substring(0, mid)
77
+ val second = responseToReturn.substring(mid)
78
+ onPartial(first, false)
79
+ onPartial(second, true)
80
+ return AutoCloseable { asyncCloseCount += 1 }
81
+ }
82
+ }
83
+
84
+ private fun makeHandlers(
85
+ bridge: FakeBridge = FakeBridge(),
86
+ visionCapable: Boolean = false,
87
+ ): MediaPipeHandlers =
88
+ MediaPipeHandlers(
89
+ bridge = bridge,
90
+ modelId = "gemma-2b-it-cpu-int4",
91
+ visionCapable = visionCapable,
92
+ )
93
+
94
+ /**
95
+ * Parse a single SSE frame's JSON payload. Returns null for `[DONE]`
96
+ * frames or non-`data:` lines.
97
+ */
98
+ private fun decodeFrame(frame: String): JsonObject? {
99
+ val trimmed = frame.trim()
100
+ if (!trimmed.startsWith("data: ")) return null
101
+ val payload = trimmed.removePrefix("data: ")
102
+ if (payload == "[DONE]") return null
103
+ return Json.parseToJsonElement(payload).jsonObject
104
+ }
105
+
106
+ // ----- Chat completion (text happy path) -----
107
+
108
+ @Test
109
+ fun `chat completion text happy path`() = runBlocking {
110
+ val bridge = FakeBridge(responseToReturn = "Hello, world!")
111
+ val handlers = makeHandlers(bridge = bridge)
112
+ val body = buildJsonObject {
113
+ putJsonArray("messages") {
114
+ addJsonObject {
115
+ put("role", "user")
116
+ put("content", "hi")
117
+ }
118
+ }
119
+ }
120
+ val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Json
121
+ ?: error("expected Json response")
122
+ assertEquals(200, resp.status)
123
+ val obj = resp.body as JsonObject
124
+ assertEquals("chat.completion", (obj["object"] as JsonPrimitive).content)
125
+ assertEquals("gemma-2b-it-cpu-int4", (obj["model"] as JsonPrimitive).content)
126
+ val choices = obj["choices"] as JsonArray
127
+ assertEquals(1, choices.size)
128
+ val msg = (choices[0] as JsonObject)["message"] as JsonObject
129
+ assertEquals("Hello, world!", (msg["content"] as JsonPrimitive).content)
130
+ assertEquals("assistant", (msg["role"] as JsonPrimitive).content)
131
+ assertEquals(
132
+ "stop",
133
+ ((choices[0] as JsonObject)["finish_reason"] as JsonPrimitive).content,
134
+ )
135
+ // Prompt threaded through openAIMessagesToPrompt.
136
+ assertEquals("user: hi", bridge.receivedPrompt)
137
+ // No images provided → empty list passed through.
138
+ assertEquals(0, bridge.receivedImages.size)
139
+ // ID prefix is the MediaPipe-flavored one.
140
+ assertTrue(
141
+ "id should start with chatcmpl-mp-: ${(obj["id"] as JsonPrimitive).content}",
142
+ (obj["id"] as JsonPrimitive).content.startsWith("chatcmpl-mp-"),
143
+ )
144
+ }
145
+
146
+ // ----- Streaming -----
147
+
148
+ @Test
149
+ fun `chat completion streaming text emits role content content finish-with-content done`() = runBlocking {
150
+ val bridge = FakeBridge(responseToReturn = "abcd")
151
+ val handlers = makeHandlers(bridge = bridge)
152
+ val body = buildJsonObject {
153
+ putJsonArray("messages") {
154
+ addJsonObject {
155
+ put("role", "user")
156
+ put("content", "hi")
157
+ }
158
+ }
159
+ put("stream", true)
160
+ }
161
+ val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Sse
162
+ ?: error("expected Sse response")
163
+ val frames = resp.flow.toList()
164
+
165
+ // Expect: role(0) + 2 content chunks (last one carries finish_reason) + [DONE] = 4 frames
166
+ assertEquals(4, frames.size)
167
+
168
+ // Frame 0: role delta
169
+ val frame0 = decodeFrame(frames[0])
170
+ ?: error("frame 0 not decodable")
171
+ val roleDelta = frame0["choices"]?.jsonArray?.first()?.jsonObject
172
+ ?.get("delta")?.jsonObject
173
+ assertEquals("assistant", roleDelta?.get("role")?.jsonPrimitive?.content)
174
+ assertEquals("chat.completion.chunk", frame0["object"]?.jsonPrimitive?.content)
175
+
176
+ // Frame 1: first content delta (done=false)
177
+ val frame1 = decodeFrame(frames[1]) ?: error("frame 1 not decodable")
178
+ val choice1 = frame1["choices"]?.jsonArray?.first()?.jsonObject
179
+ ?: error("frame 1 missing choices[0]")
180
+ assertEquals(
181
+ "ab",
182
+ choice1["delta"]?.jsonObject?.get("content")?.jsonPrimitive?.content,
183
+ )
184
+ // finish_reason is JsonNull on the non-final frame
185
+ assertTrue(
186
+ "frame 1 finish_reason should be JsonNull, got ${choice1["finish_reason"]}",
187
+ choice1["finish_reason"] is JsonNull,
188
+ )
189
+
190
+ // Frame 2: final content delta (done=true) — carries finish_reason="stop"
191
+ val frame2 = decodeFrame(frames[2]) ?: error("frame 2 not decodable")
192
+ val choice2 = frame2["choices"]?.jsonArray?.first()?.jsonObject
193
+ ?: error("frame 2 missing choices[0]")
194
+ assertEquals(
195
+ "cd",
196
+ choice2["delta"]?.jsonObject?.get("content")?.jsonPrimitive?.content,
197
+ )
198
+ assertEquals(
199
+ "stop",
200
+ choice2["finish_reason"]?.jsonPrimitive?.contentOrNull,
201
+ )
202
+
203
+ // Frame 3: [DONE]
204
+ assertEquals("data: [DONE]\n\n", frames[3])
205
+ assertNull(decodeFrame(frames[3]))
206
+
207
+ // Bridge.completePromptAsync was called and the AutoCloseable handle was closed.
208
+ assertEquals("user: hi", bridge.receivedPrompt)
209
+ assertEquals(1, bridge.asyncCloseCount)
210
+ }
211
+
212
+ // ----- Vision (Task 46) -----
213
+
214
+ @Test
215
+ fun `chat completion image part returns 400 when not vision-capable`() = runBlocking {
216
+ // visionCapable = false → image_url request is rejected before image fetch.
217
+ val handlers = makeHandlers(visionCapable = false)
218
+ val body = buildJsonObject {
219
+ putJsonArray("messages") {
220
+ addJsonObject {
221
+ put("role", "user")
222
+ putJsonArray("content") {
223
+ addJsonObject {
224
+ put("type", "image_url")
225
+ putJsonObject("image_url") {
226
+ put("url", "data:image/png;base64,iVBOR")
227
+ }
228
+ }
229
+ }
230
+ }
231
+ }
232
+ }
233
+ val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
234
+ ?: error("expected Error response")
235
+ assertEquals(400, resp.status)
236
+ assertTrue(
237
+ "message: ${resp.message}",
238
+ resp.message.contains("vision-capable MediaPipe model"),
239
+ )
240
+ assertTrue(
241
+ "message: ${resp.message}",
242
+ resp.message.contains("visionEnabled"),
243
+ )
244
+ }
245
+
246
+ @Test
247
+ fun `chat completion vision-capable image part decodes and threads through bridge`() = runBlocking {
248
+ val bridge = FakeBridge(responseToReturn = "looks like a cat")
249
+ // Use a tiny in-memory PNG via the data URL fixture so the real
250
+ // ImageDecoder.resolve path exercises base64 decoding. Raw bytes are
251
+ // now passed directly to the bridge (ByteArray neutralization, Task 17).
252
+ val handlers = makeHandlers(bridge = bridge, visionCapable = true)
253
+ // Real PNG bytes (1x1 transparent pixel) inlined as base64. Same
254
+ // payload as `tiny-test-base64.txt` but inlined to keep the test
255
+ // self-contained.
256
+ val pngDataUrl =
257
+ "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkAAIAAAoAAv/lxKUAAAAASUVORK5CYII="
258
+ val body = buildJsonObject {
259
+ putJsonArray("messages") {
260
+ addJsonObject {
261
+ put("role", "user")
262
+ putJsonArray("content") {
263
+ addJsonObject {
264
+ put("type", "text")
265
+ put("text", "describe this")
266
+ }
267
+ addJsonObject {
268
+ put("type", "image_url")
269
+ putJsonObject("image_url") {
270
+ put("url", pngDataUrl)
271
+ }
272
+ }
273
+ }
274
+ }
275
+ }
276
+ }
277
+ val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Json
278
+ ?: error("expected Json response")
279
+ assertEquals(200, resp.status)
280
+ val obj = resp.body as JsonObject
281
+ val msg = ((obj["choices"] as JsonArray)[0] as JsonObject)["message"] as JsonObject
282
+ assertEquals("looks like a cat", (msg["content"] as JsonPrimitive).content)
283
+
284
+ // Bridge received exactly one image (as raw bytes) and the text prompt.
285
+ assertEquals(1, bridge.receivedImages.size)
286
+ // Sanity: first 8 bytes must match the PNG magic header.
287
+ val receivedBytes = bridge.receivedImages[0]
288
+ assertTrue("PNG magic: byte[0]", receivedBytes[0] == 0x89.toByte())
289
+ assertTrue("PNG magic: byte[1]", receivedBytes[1] == 0x50.toByte())
290
+ // Prompt should contain just the text part.
291
+ assertEquals("user: describe this", bridge.receivedPrompt)
292
+ }
293
+
294
+ @Test
295
+ fun `chat completion image_url missing url field returns 400`() = runBlocking {
296
+ val handlers = makeHandlers(visionCapable = true)
297
+ val body = buildJsonObject {
298
+ putJsonArray("messages") {
299
+ addJsonObject {
300
+ put("role", "user")
301
+ putJsonArray("content") {
302
+ addJsonObject {
303
+ put("type", "image_url")
304
+ putJsonObject("image_url") {
305
+ // No "url" key.
306
+ }
307
+ }
308
+ }
309
+ }
310
+ }
311
+ }
312
+ val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
313
+ ?: error("expected Error response")
314
+ assertEquals(400, resp.status)
315
+ assertTrue(
316
+ "message: ${resp.message}",
317
+ resp.message.contains("missing 'url' field"),
318
+ )
319
+ }
320
+
321
+ @Test
322
+ fun `chat completion image fetch failure returns 502`() = runBlocking {
323
+ val handlers = makeHandlers(visionCapable = true)
324
+ val body = buildJsonObject {
325
+ putJsonArray("messages") {
326
+ addJsonObject {
327
+ put("role", "user")
328
+ putJsonArray("content") {
329
+ addJsonObject {
330
+ put("type", "image_url")
331
+ putJsonObject("image_url") {
332
+ // Unsupported scheme — ImageDecoder.resolve throws InvalidScheme.
333
+ put("url", "ftp://example.com/x.png")
334
+ }
335
+ }
336
+ }
337
+ }
338
+ }
339
+ }
340
+ val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
341
+ ?: error("expected Error response")
342
+ assertEquals(502, resp.status)
343
+ assertTrue(
344
+ "message: ${resp.message}",
345
+ resp.message.contains("Failed to fetch image"),
346
+ )
347
+ }
348
+
349
+ @Test
350
+ fun `chat completion streaming vision-capable image part threads raw bytes through bridge`() = runBlocking {
351
+ // Verify that the streaming path passes raw ByteArray to the bridge
352
+ // rather than converting to MPImage in the handler layer (Task 17
353
+ // neutralization: ByteArray → MPImage conversion now happens inside
354
+ // MediaPipeBridge, not in MediaPipeHandlers).
355
+ val bridge = FakeBridge(responseToReturn = "ab")
356
+ val handlers = makeHandlers(bridge = bridge, visionCapable = true)
357
+ val pngDataUrl =
358
+ "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkAAIAAAoAAv/lxKUAAAAASUVORK5CYII="
359
+ val body = buildJsonObject {
360
+ putJsonArray("messages") {
361
+ addJsonObject {
362
+ put("role", "user")
363
+ putJsonArray("content") {
364
+ addJsonObject { put("type", "text"); put("text", "what is this") }
365
+ addJsonObject {
366
+ put("type", "image_url")
367
+ putJsonObject("image_url") { put("url", pngDataUrl) }
368
+ }
369
+ }
370
+ }
371
+ }
372
+ put("stream", true)
373
+ }
374
+ val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Sse
375
+ ?: error("expected Sse response")
376
+ resp.flow.toList() // consume the stream to completion
377
+ // Bridge received one raw-byte image via the streaming path.
378
+ assertEquals(1, bridge.receivedImages.size)
379
+ val receivedBytes = bridge.receivedImages[0]
380
+ assertTrue("PNG magic byte[0]", receivedBytes[0] == 0x89.toByte())
381
+ assertTrue("PNG magic byte[1]", receivedBytes[1] == 0x50.toByte())
382
+ }
383
+
384
+ // ----- 400 surfaces -----
385
+
386
+ @Test
387
+ fun `chat completion audio part returns 400`() = runBlocking {
388
+ val handlers = makeHandlers()
389
+ val body = buildJsonObject {
390
+ putJsonArray("messages") {
391
+ addJsonObject {
392
+ put("role", "user")
393
+ putJsonArray("content") {
394
+ addJsonObject {
395
+ put("type", "input_audio")
396
+ putJsonObject("input_audio") {
397
+ put("data", "AAAA")
398
+ put("format", "pcm16")
399
+ }
400
+ }
401
+ }
402
+ }
403
+ }
404
+ }
405
+ val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
406
+ ?: error("expected Error response")
407
+ assertEquals(400, resp.status)
408
+ assertTrue("message: ${resp.message}", resp.message.contains("Audio input not supported"))
409
+ }
410
+
411
+ @Test
412
+ fun `chat completion missing messages returns 400`() = runBlocking {
413
+ val handlers = makeHandlers()
414
+ val resp = handlers.handleChatCompletion(buildJsonObject {}, ctx) as? HandlerResponse.Error
415
+ ?: error("expected Error response")
416
+ assertEquals(400, resp.status)
417
+ assertTrue("message: ${resp.message}", resp.message.contains("messages"))
418
+ }
419
+
420
+ @Test
421
+ fun `chat completion empty messages returns 400`() = runBlocking {
422
+ val handlers = makeHandlers()
423
+ val body = buildJsonObject {
424
+ putJsonArray("messages") { /* empty */ }
425
+ }
426
+ val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
427
+ ?: error("expected Error response")
428
+ assertEquals(400, resp.status)
429
+ assertTrue("message: ${resp.message}", resp.message.contains("Empty"))
430
+ }
431
+
432
+ @Test
433
+ fun `chat completion bridge throw returns 500`() = runBlocking {
434
+ val bridge = FakeBridge(shouldThrow = true)
435
+ val handlers = makeHandlers(bridge = bridge)
436
+ val body = buildJsonObject {
437
+ putJsonArray("messages") {
438
+ addJsonObject {
439
+ put("role", "user")
440
+ put("content", "hi")
441
+ }
442
+ }
443
+ }
444
+ val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
445
+ ?: error("expected Error response")
446
+ assertEquals(500, resp.status)
447
+ assertTrue("message: ${resp.message}", resp.message.contains("simulated mediapipe error"))
448
+ }
449
+
450
+ // ----- Legacy completions -----
451
+
452
+ @Test
453
+ fun `legacy completion converts to text_completion shape`() = runBlocking {
454
+ val bridge = FakeBridge(responseToReturn = "canned-text")
455
+ val handlers = makeHandlers(bridge = bridge)
456
+ val body = buildJsonObject { put("prompt", "say hi") }
457
+ val resp = handlers.handleCompletion(body, ctx) as? HandlerResponse.Json
458
+ ?: error("expected Json response")
459
+ assertEquals(200, resp.status)
460
+ val obj = resp.body as JsonObject
461
+ assertEquals("text_completion", (obj["object"] as JsonPrimitive).content)
462
+ val choices = obj["choices"] as JsonArray
463
+ val choice0 = choices[0] as JsonObject
464
+ assertEquals("canned-text", (choice0["text"] as JsonPrimitive).content)
465
+ assertEquals("stop", (choice0["finish_reason"] as JsonPrimitive).content)
466
+ assertNotNull(choice0["logprobs"])
467
+ // ID rewritten chatcmpl-mp- → cmpl-mp-
468
+ val idStr = (obj["id"] as JsonPrimitive).content
469
+ assertTrue("id should start with cmpl-: $idStr", idStr.startsWith("cmpl-"))
470
+ // Prompt was wrapped as a user message and threaded through.
471
+ assertEquals("user: say hi", bridge.receivedPrompt)
472
+ }
473
+
474
+ @Test
475
+ fun `legacy completion array prompt joined with newline`() = runBlocking {
476
+ val bridge = FakeBridge()
477
+ val handlers = makeHandlers(bridge = bridge)
478
+ val body = buildJsonObject {
479
+ putJsonArray("prompt") {
480
+ add("alpha")
481
+ add("beta")
482
+ }
483
+ }
484
+ val resp = handlers.handleCompletion(body, ctx) as? HandlerResponse.Json
485
+ ?: error("expected Json response")
486
+ assertEquals(200, resp.status)
487
+ // The prompt array is joined with \n, then wrapped as a single user message.
488
+ assertEquals("user: alpha\nbeta", bridge.receivedPrompt)
489
+ }
490
+
491
+ // ----- Embeddings -----
492
+
493
+ @Test
494
+ fun `embeddings always returns 400 with redirect message`() = runBlocking {
495
+ val handlers = makeHandlers()
496
+ val resp = handlers.handleEmbeddings(
497
+ buildJsonObject { put("input", "hello") },
498
+ ctx,
499
+ ) as? HandlerResponse.Error ?: error("expected Error response")
500
+ assertEquals(400, resp.status)
501
+ assertTrue(
502
+ "message: ${resp.message}",
503
+ resp.message.contains("Embeddings not supported on MediaPipe LLM"),
504
+ )
505
+ assertTrue(
506
+ "message: ${resp.message}",
507
+ resp.message.contains("capacitorBackend: \"llama\""),
508
+ )
509
+ }
510
+
511
+ // ----- Models -----
512
+
513
+ @Test
514
+ fun `models returns single google-mediapipe entry with ctx modelId`() = runBlocking {
515
+ val handlers = makeHandlers()
516
+ val customCtx = HandlerContext(modelId = "/path/to/gemma-2b.task", backendName = "mediapipe")
517
+ val resp = handlers.handleModels(customCtx) as? HandlerResponse.Json
518
+ ?: error("expected Json response")
519
+ assertEquals(200, resp.status)
520
+ val obj = resp.body as JsonObject
521
+ assertEquals("list", (obj["object"] as JsonPrimitive).content)
522
+ val data = obj["data"] as JsonArray
523
+ assertEquals(1, data.size)
524
+ val entry0 = data[0] as JsonObject
525
+ assertEquals("/path/to/gemma-2b.task", (entry0["id"] as JsonPrimitive).content)
526
+ assertEquals("model", (entry0["object"] as JsonPrimitive).content)
527
+ assertEquals("google-mediapipe", (entry0["owned_by"] as JsonPrimitive).content)
528
+ }
529
+ }