@dvai-bridge/android-mediapipe-core 4.0.0 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +341 -34
- package/android/build.gradle +196 -134
- package/android/gradle.properties +1 -1
- package/android/settings.gradle +1 -1
- package/android/src/main/AndroidManifest.xml +14 -14
- package/android/src/main/java/co/deepvoiceai/bridge/mediapipe/core/ImageDecoder.kt +115 -115
- package/android/src/main/java/co/deepvoiceai/bridge/mediapipe/core/MediaPipeBridge.kt +203 -203
- package/android/src/main/java/co/deepvoiceai/bridge/mediapipe/core/MediaPipeHandlers.kt +482 -482
- package/android/src/main/java/co/deepvoiceai/bridge/mediapipe/core/PluginState.kt +134 -134
- package/android/src/main/res/xml/dvai_network_security_config.xml +7 -7
- package/android/src/test/java/co/deepvoiceai/bridge/mediapipe/core/ImageDecoderTest.kt +114 -114
- package/android/src/test/java/co/deepvoiceai/bridge/mediapipe/core/MediaPipeHandlersTest.kt +529 -529
- package/android/src/test/java/co/deepvoiceai/bridge/mediapipe/core/PluginStateTest.kt +85 -85
- package/package.json +1 -1
- package/README.md +0 -199
|
@@ -1,529 +1,529 @@
|
|
|
1
|
-
package co.deepvoiceai.bridge.mediapipe.core
|
|
2
|
-
|
|
3
|
-
import co.deepvoiceai.bridge.shared.core.HandlerContext
|
|
4
|
-
import co.deepvoiceai.bridge.shared.core.HandlerResponse
|
|
5
|
-
import kotlinx.coroutines.flow.toList
|
|
6
|
-
import kotlinx.coroutines.runBlocking
|
|
7
|
-
import kotlinx.serialization.json.Json
|
|
8
|
-
import kotlinx.serialization.json.JsonArray
|
|
9
|
-
import kotlinx.serialization.json.JsonNull
|
|
10
|
-
import kotlinx.serialization.json.JsonObject
|
|
11
|
-
import kotlinx.serialization.json.JsonPrimitive
|
|
12
|
-
import kotlinx.serialization.json.add
|
|
13
|
-
import kotlinx.serialization.json.addJsonObject
|
|
14
|
-
import kotlinx.serialization.json.buildJsonObject
|
|
15
|
-
import kotlinx.serialization.json.contentOrNull
|
|
16
|
-
import kotlinx.serialization.json.jsonArray
|
|
17
|
-
import kotlinx.serialization.json.jsonObject
|
|
18
|
-
import kotlinx.serialization.json.jsonPrimitive
|
|
19
|
-
import kotlinx.serialization.json.put
|
|
20
|
-
import kotlinx.serialization.json.putJsonArray
|
|
21
|
-
import kotlinx.serialization.json.putJsonObject
|
|
22
|
-
import org.junit.Assert.assertEquals
|
|
23
|
-
import org.junit.Assert.assertNotNull
|
|
24
|
-
import org.junit.Assert.assertNull
|
|
25
|
-
import org.junit.Assert.assertTrue
|
|
26
|
-
import org.junit.Test
|
|
27
|
-
import org.junit.runner.RunWith
|
|
28
|
-
import org.robolectric.RobolectricTestRunner
|
|
29
|
-
|
|
30
|
-
/**
|
|
31
|
-
* JVM unit tests for [MediaPipeHandlers]. Uses a [FakeBridge] implementing
|
|
32
|
-
* [MediaPipeBridgeApi] so tests don't need a real MediaPipe `.task` model.
|
|
33
|
-
*
|
|
34
|
-
* Mirrors the coverage of `LlamaHandlersTest`: chat-completion (sync +
|
|
35
|
-
* streaming), legacy completions, error / 400 surfaces, embeddings rejection,
|
|
36
|
-
* the models endpoint, plus Task 46 vision-capable happy path and image
|
|
37
|
-
* fetch failure surfaces.
|
|
38
|
-
*
|
|
39
|
-
* The `bytesToImage` seam that previously lived in [MediaPipeHandlers] has been
|
|
40
|
-
* moved into [MediaPipeBridge] as part of the Task 17 interface neutralization.
|
|
41
|
-
* [MediaPipeBridgeApi] now accepts [List]<[ByteArray]> rather than
|
|
42
|
-
* [List]<MPImage>. The [FakeBridge] captures raw bytes so tests can assert on
|
|
43
|
-
* the exact payload handed to the bridge, without any Robolectric bitmap
|
|
44
|
-
* overhead.
|
|
45
|
-
*/
|
|
46
|
-
@RunWith(RobolectricTestRunner::class)
|
|
47
|
-
class MediaPipeHandlersTest {
|
|
48
|
-
private val ctx = HandlerContext(modelId = "gemma-2b-it-cpu-int4", backendName = "mediapipe")
|
|
49
|
-
|
|
50
|
-
private class FakeBridge(
|
|
51
|
-
var responseToReturn: String = "canned mediapipe response",
|
|
52
|
-
var shouldThrow: Boolean = false,
|
|
53
|
-
) : MediaPipeBridgeApi {
|
|
54
|
-
var receivedPrompt: String? = null
|
|
55
|
-
var receivedImages: List<ByteArray> = emptyList()
|
|
56
|
-
var asyncCloseCount: Int = 0
|
|
57
|
-
|
|
58
|
-
override fun completePrompt(prompt: String, images: List<ByteArray>): String {
|
|
59
|
-
receivedPrompt = prompt
|
|
60
|
-
receivedImages = images
|
|
61
|
-
if (shouldThrow) throw RuntimeException("simulated mediapipe error")
|
|
62
|
-
return responseToReturn
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
override fun completePromptAsync(
|
|
66
|
-
prompt: String,
|
|
67
|
-
images: List<ByteArray>,
|
|
68
|
-
onPartial: (String, Boolean) -> Unit,
|
|
69
|
-
): AutoCloseable {
|
|
70
|
-
receivedPrompt = prompt
|
|
71
|
-
receivedImages = images
|
|
72
|
-
if (shouldThrow) throw RuntimeException("simulated mediapipe error")
|
|
73
|
-
// Synchronously emit two partial chunks then a final done=true. The
|
|
74
|
-
// handler's callbackFlow trySend is non-blocking so this is fine.
|
|
75
|
-
val mid = (responseToReturn.length / 2).coerceAtLeast(0)
|
|
76
|
-
val first = responseToReturn.substring(0, mid)
|
|
77
|
-
val second = responseToReturn.substring(mid)
|
|
78
|
-
onPartial(first, false)
|
|
79
|
-
onPartial(second, true)
|
|
80
|
-
return AutoCloseable { asyncCloseCount += 1 }
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
private fun makeHandlers(
|
|
85
|
-
bridge: FakeBridge = FakeBridge(),
|
|
86
|
-
visionCapable: Boolean = false,
|
|
87
|
-
): MediaPipeHandlers =
|
|
88
|
-
MediaPipeHandlers(
|
|
89
|
-
bridge = bridge,
|
|
90
|
-
modelId = "gemma-2b-it-cpu-int4",
|
|
91
|
-
visionCapable = visionCapable,
|
|
92
|
-
)
|
|
93
|
-
|
|
94
|
-
/**
|
|
95
|
-
* Parse a single SSE frame's JSON payload. Returns null for `[DONE]`
|
|
96
|
-
* frames or non-`data:` lines.
|
|
97
|
-
*/
|
|
98
|
-
private fun decodeFrame(frame: String): JsonObject? {
|
|
99
|
-
val trimmed = frame.trim()
|
|
100
|
-
if (!trimmed.startsWith("data: ")) return null
|
|
101
|
-
val payload = trimmed.removePrefix("data: ")
|
|
102
|
-
if (payload == "[DONE]") return null
|
|
103
|
-
return Json.parseToJsonElement(payload).jsonObject
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
// ----- Chat completion (text happy path) -----
|
|
107
|
-
|
|
108
|
-
@Test
|
|
109
|
-
fun `chat completion text happy path`() = runBlocking {
|
|
110
|
-
val bridge = FakeBridge(responseToReturn = "Hello, world!")
|
|
111
|
-
val handlers = makeHandlers(bridge = bridge)
|
|
112
|
-
val body = buildJsonObject {
|
|
113
|
-
putJsonArray("messages") {
|
|
114
|
-
addJsonObject {
|
|
115
|
-
put("role", "user")
|
|
116
|
-
put("content", "hi")
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Json
|
|
121
|
-
?: error("expected Json response")
|
|
122
|
-
assertEquals(200, resp.status)
|
|
123
|
-
val obj = resp.body as JsonObject
|
|
124
|
-
assertEquals("chat.completion", (obj["object"] as JsonPrimitive).content)
|
|
125
|
-
assertEquals("gemma-2b-it-cpu-int4", (obj["model"] as JsonPrimitive).content)
|
|
126
|
-
val choices = obj["choices"] as JsonArray
|
|
127
|
-
assertEquals(1, choices.size)
|
|
128
|
-
val msg = (choices[0] as JsonObject)["message"] as JsonObject
|
|
129
|
-
assertEquals("Hello, world!", (msg["content"] as JsonPrimitive).content)
|
|
130
|
-
assertEquals("assistant", (msg["role"] as JsonPrimitive).content)
|
|
131
|
-
assertEquals(
|
|
132
|
-
"stop",
|
|
133
|
-
((choices[0] as JsonObject)["finish_reason"] as JsonPrimitive).content,
|
|
134
|
-
)
|
|
135
|
-
// Prompt threaded through openAIMessagesToPrompt.
|
|
136
|
-
assertEquals("user: hi", bridge.receivedPrompt)
|
|
137
|
-
// No images provided → empty list passed through.
|
|
138
|
-
assertEquals(0, bridge.receivedImages.size)
|
|
139
|
-
// ID prefix is the MediaPipe-flavored one.
|
|
140
|
-
assertTrue(
|
|
141
|
-
"id should start with chatcmpl-mp-: ${(obj["id"] as JsonPrimitive).content}",
|
|
142
|
-
(obj["id"] as JsonPrimitive).content.startsWith("chatcmpl-mp-"),
|
|
143
|
-
)
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
// ----- Streaming -----
|
|
147
|
-
|
|
148
|
-
@Test
|
|
149
|
-
fun `chat completion streaming text emits role content content finish-with-content done`() = runBlocking {
|
|
150
|
-
val bridge = FakeBridge(responseToReturn = "abcd")
|
|
151
|
-
val handlers = makeHandlers(bridge = bridge)
|
|
152
|
-
val body = buildJsonObject {
|
|
153
|
-
putJsonArray("messages") {
|
|
154
|
-
addJsonObject {
|
|
155
|
-
put("role", "user")
|
|
156
|
-
put("content", "hi")
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
put("stream", true)
|
|
160
|
-
}
|
|
161
|
-
val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Sse
|
|
162
|
-
?: error("expected Sse response")
|
|
163
|
-
val frames = resp.flow.toList()
|
|
164
|
-
|
|
165
|
-
// Expect: role(0) + 2 content chunks (last one carries finish_reason) + [DONE] = 4 frames
|
|
166
|
-
assertEquals(4, frames.size)
|
|
167
|
-
|
|
168
|
-
// Frame 0: role delta
|
|
169
|
-
val frame0 = decodeFrame(frames[0])
|
|
170
|
-
?: error("frame 0 not decodable")
|
|
171
|
-
val roleDelta = frame0["choices"]?.jsonArray?.first()?.jsonObject
|
|
172
|
-
?.get("delta")?.jsonObject
|
|
173
|
-
assertEquals("assistant", roleDelta?.get("role")?.jsonPrimitive?.content)
|
|
174
|
-
assertEquals("chat.completion.chunk", frame0["object"]?.jsonPrimitive?.content)
|
|
175
|
-
|
|
176
|
-
// Frame 1: first content delta (done=false)
|
|
177
|
-
val frame1 = decodeFrame(frames[1]) ?: error("frame 1 not decodable")
|
|
178
|
-
val choice1 = frame1["choices"]?.jsonArray?.first()?.jsonObject
|
|
179
|
-
?: error("frame 1 missing choices[0]")
|
|
180
|
-
assertEquals(
|
|
181
|
-
"ab",
|
|
182
|
-
choice1["delta"]?.jsonObject?.get("content")?.jsonPrimitive?.content,
|
|
183
|
-
)
|
|
184
|
-
// finish_reason is JsonNull on the non-final frame
|
|
185
|
-
assertTrue(
|
|
186
|
-
"frame 1 finish_reason should be JsonNull, got ${choice1["finish_reason"]}",
|
|
187
|
-
choice1["finish_reason"] is JsonNull,
|
|
188
|
-
)
|
|
189
|
-
|
|
190
|
-
// Frame 2: final content delta (done=true) — carries finish_reason="stop"
|
|
191
|
-
val frame2 = decodeFrame(frames[2]) ?: error("frame 2 not decodable")
|
|
192
|
-
val choice2 = frame2["choices"]?.jsonArray?.first()?.jsonObject
|
|
193
|
-
?: error("frame 2 missing choices[0]")
|
|
194
|
-
assertEquals(
|
|
195
|
-
"cd",
|
|
196
|
-
choice2["delta"]?.jsonObject?.get("content")?.jsonPrimitive?.content,
|
|
197
|
-
)
|
|
198
|
-
assertEquals(
|
|
199
|
-
"stop",
|
|
200
|
-
choice2["finish_reason"]?.jsonPrimitive?.contentOrNull,
|
|
201
|
-
)
|
|
202
|
-
|
|
203
|
-
// Frame 3: [DONE]
|
|
204
|
-
assertEquals("data: [DONE]\n\n", frames[3])
|
|
205
|
-
assertNull(decodeFrame(frames[3]))
|
|
206
|
-
|
|
207
|
-
// Bridge.completePromptAsync was called and the AutoCloseable handle was closed.
|
|
208
|
-
assertEquals("user: hi", bridge.receivedPrompt)
|
|
209
|
-
assertEquals(1, bridge.asyncCloseCount)
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
// ----- Vision (Task 46) -----
|
|
213
|
-
|
|
214
|
-
@Test
|
|
215
|
-
fun `chat completion image part returns 400 when not vision-capable`() = runBlocking {
|
|
216
|
-
// visionCapable = false → image_url request is rejected before image fetch.
|
|
217
|
-
val handlers = makeHandlers(visionCapable = false)
|
|
218
|
-
val body = buildJsonObject {
|
|
219
|
-
putJsonArray("messages") {
|
|
220
|
-
addJsonObject {
|
|
221
|
-
put("role", "user")
|
|
222
|
-
putJsonArray("content") {
|
|
223
|
-
addJsonObject {
|
|
224
|
-
put("type", "image_url")
|
|
225
|
-
putJsonObject("image_url") {
|
|
226
|
-
put("url", "data:image/png;base64,iVBOR")
|
|
227
|
-
}
|
|
228
|
-
}
|
|
229
|
-
}
|
|
230
|
-
}
|
|
231
|
-
}
|
|
232
|
-
}
|
|
233
|
-
val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
|
|
234
|
-
?: error("expected Error response")
|
|
235
|
-
assertEquals(400, resp.status)
|
|
236
|
-
assertTrue(
|
|
237
|
-
"message: ${resp.message}",
|
|
238
|
-
resp.message.contains("vision-capable MediaPipe model"),
|
|
239
|
-
)
|
|
240
|
-
assertTrue(
|
|
241
|
-
"message: ${resp.message}",
|
|
242
|
-
resp.message.contains("visionEnabled"),
|
|
243
|
-
)
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
@Test
|
|
247
|
-
fun `chat completion vision-capable image part decodes and threads through bridge`() = runBlocking {
|
|
248
|
-
val bridge = FakeBridge(responseToReturn = "looks like a cat")
|
|
249
|
-
// Use a tiny in-memory PNG via the data URL fixture so the real
|
|
250
|
-
// ImageDecoder.resolve path exercises base64 decoding. Raw bytes are
|
|
251
|
-
// now passed directly to the bridge (ByteArray neutralization, Task 17).
|
|
252
|
-
val handlers = makeHandlers(bridge = bridge, visionCapable = true)
|
|
253
|
-
// Real PNG bytes (1x1 transparent pixel) inlined as base64. Same
|
|
254
|
-
// payload as `tiny-test-base64.txt` but inlined to keep the test
|
|
255
|
-
// self-contained.
|
|
256
|
-
val pngDataUrl =
|
|
257
|
-
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkAAIAAAoAAv/lxKUAAAAASUVORK5CYII="
|
|
258
|
-
val body = buildJsonObject {
|
|
259
|
-
putJsonArray("messages") {
|
|
260
|
-
addJsonObject {
|
|
261
|
-
put("role", "user")
|
|
262
|
-
putJsonArray("content") {
|
|
263
|
-
addJsonObject {
|
|
264
|
-
put("type", "text")
|
|
265
|
-
put("text", "describe this")
|
|
266
|
-
}
|
|
267
|
-
addJsonObject {
|
|
268
|
-
put("type", "image_url")
|
|
269
|
-
putJsonObject("image_url") {
|
|
270
|
-
put("url", pngDataUrl)
|
|
271
|
-
}
|
|
272
|
-
}
|
|
273
|
-
}
|
|
274
|
-
}
|
|
275
|
-
}
|
|
276
|
-
}
|
|
277
|
-
val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Json
|
|
278
|
-
?: error("expected Json response")
|
|
279
|
-
assertEquals(200, resp.status)
|
|
280
|
-
val obj = resp.body as JsonObject
|
|
281
|
-
val msg = ((obj["choices"] as JsonArray)[0] as JsonObject)["message"] as JsonObject
|
|
282
|
-
assertEquals("looks like a cat", (msg["content"] as JsonPrimitive).content)
|
|
283
|
-
|
|
284
|
-
// Bridge received exactly one image (as raw bytes) and the text prompt.
|
|
285
|
-
assertEquals(1, bridge.receivedImages.size)
|
|
286
|
-
// Sanity: first 8 bytes must match the PNG magic header.
|
|
287
|
-
val receivedBytes = bridge.receivedImages[0]
|
|
288
|
-
assertTrue("PNG magic: byte[0]", receivedBytes[0] == 0x89.toByte())
|
|
289
|
-
assertTrue("PNG magic: byte[1]", receivedBytes[1] == 0x50.toByte())
|
|
290
|
-
// Prompt should contain just the text part.
|
|
291
|
-
assertEquals("user: describe this", bridge.receivedPrompt)
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
@Test
|
|
295
|
-
fun `chat completion image_url missing url field returns 400`() = runBlocking {
|
|
296
|
-
val handlers = makeHandlers(visionCapable = true)
|
|
297
|
-
val body = buildJsonObject {
|
|
298
|
-
putJsonArray("messages") {
|
|
299
|
-
addJsonObject {
|
|
300
|
-
put("role", "user")
|
|
301
|
-
putJsonArray("content") {
|
|
302
|
-
addJsonObject {
|
|
303
|
-
put("type", "image_url")
|
|
304
|
-
putJsonObject("image_url") {
|
|
305
|
-
// No "url" key.
|
|
306
|
-
}
|
|
307
|
-
}
|
|
308
|
-
}
|
|
309
|
-
}
|
|
310
|
-
}
|
|
311
|
-
}
|
|
312
|
-
val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
|
|
313
|
-
?: error("expected Error response")
|
|
314
|
-
assertEquals(400, resp.status)
|
|
315
|
-
assertTrue(
|
|
316
|
-
"message: ${resp.message}",
|
|
317
|
-
resp.message.contains("missing 'url' field"),
|
|
318
|
-
)
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
@Test
|
|
322
|
-
fun `chat completion image fetch failure returns 502`() = runBlocking {
|
|
323
|
-
val handlers = makeHandlers(visionCapable = true)
|
|
324
|
-
val body = buildJsonObject {
|
|
325
|
-
putJsonArray("messages") {
|
|
326
|
-
addJsonObject {
|
|
327
|
-
put("role", "user")
|
|
328
|
-
putJsonArray("content") {
|
|
329
|
-
addJsonObject {
|
|
330
|
-
put("type", "image_url")
|
|
331
|
-
putJsonObject("image_url") {
|
|
332
|
-
// Unsupported scheme — ImageDecoder.resolve throws InvalidScheme.
|
|
333
|
-
put("url", "ftp://example.com/x.png")
|
|
334
|
-
}
|
|
335
|
-
}
|
|
336
|
-
}
|
|
337
|
-
}
|
|
338
|
-
}
|
|
339
|
-
}
|
|
340
|
-
val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
|
|
341
|
-
?: error("expected Error response")
|
|
342
|
-
assertEquals(502, resp.status)
|
|
343
|
-
assertTrue(
|
|
344
|
-
"message: ${resp.message}",
|
|
345
|
-
resp.message.contains("Failed to fetch image"),
|
|
346
|
-
)
|
|
347
|
-
}
|
|
348
|
-
|
|
349
|
-
@Test
|
|
350
|
-
fun `chat completion streaming vision-capable image part threads raw bytes through bridge`() = runBlocking {
|
|
351
|
-
// Verify that the streaming path passes raw ByteArray to the bridge
|
|
352
|
-
// rather than converting to MPImage in the handler layer (Task 17
|
|
353
|
-
// neutralization: ByteArray → MPImage conversion now happens inside
|
|
354
|
-
// MediaPipeBridge, not in MediaPipeHandlers).
|
|
355
|
-
val bridge = FakeBridge(responseToReturn = "ab")
|
|
356
|
-
val handlers = makeHandlers(bridge = bridge, visionCapable = true)
|
|
357
|
-
val pngDataUrl =
|
|
358
|
-
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkAAIAAAoAAv/lxKUAAAAASUVORK5CYII="
|
|
359
|
-
val body = buildJsonObject {
|
|
360
|
-
putJsonArray("messages") {
|
|
361
|
-
addJsonObject {
|
|
362
|
-
put("role", "user")
|
|
363
|
-
putJsonArray("content") {
|
|
364
|
-
addJsonObject { put("type", "text"); put("text", "what is this") }
|
|
365
|
-
addJsonObject {
|
|
366
|
-
put("type", "image_url")
|
|
367
|
-
putJsonObject("image_url") { put("url", pngDataUrl) }
|
|
368
|
-
}
|
|
369
|
-
}
|
|
370
|
-
}
|
|
371
|
-
}
|
|
372
|
-
put("stream", true)
|
|
373
|
-
}
|
|
374
|
-
val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Sse
|
|
375
|
-
?: error("expected Sse response")
|
|
376
|
-
resp.flow.toList() // consume the stream to completion
|
|
377
|
-
// Bridge received one raw-byte image via the streaming path.
|
|
378
|
-
assertEquals(1, bridge.receivedImages.size)
|
|
379
|
-
val receivedBytes = bridge.receivedImages[0]
|
|
380
|
-
assertTrue("PNG magic byte[0]", receivedBytes[0] == 0x89.toByte())
|
|
381
|
-
assertTrue("PNG magic byte[1]", receivedBytes[1] == 0x50.toByte())
|
|
382
|
-
}
|
|
383
|
-
|
|
384
|
-
// ----- 400 surfaces -----
|
|
385
|
-
|
|
386
|
-
@Test
|
|
387
|
-
fun `chat completion audio part returns 400`() = runBlocking {
|
|
388
|
-
val handlers = makeHandlers()
|
|
389
|
-
val body = buildJsonObject {
|
|
390
|
-
putJsonArray("messages") {
|
|
391
|
-
addJsonObject {
|
|
392
|
-
put("role", "user")
|
|
393
|
-
putJsonArray("content") {
|
|
394
|
-
addJsonObject {
|
|
395
|
-
put("type", "input_audio")
|
|
396
|
-
putJsonObject("input_audio") {
|
|
397
|
-
put("data", "AAAA")
|
|
398
|
-
put("format", "pcm16")
|
|
399
|
-
}
|
|
400
|
-
}
|
|
401
|
-
}
|
|
402
|
-
}
|
|
403
|
-
}
|
|
404
|
-
}
|
|
405
|
-
val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
|
|
406
|
-
?: error("expected Error response")
|
|
407
|
-
assertEquals(400, resp.status)
|
|
408
|
-
assertTrue("message: ${resp.message}", resp.message.contains("Audio input not supported"))
|
|
409
|
-
}
|
|
410
|
-
|
|
411
|
-
@Test
|
|
412
|
-
fun `chat completion missing messages returns 400`() = runBlocking {
|
|
413
|
-
val handlers = makeHandlers()
|
|
414
|
-
val resp = handlers.handleChatCompletion(buildJsonObject {}, ctx) as? HandlerResponse.Error
|
|
415
|
-
?: error("expected Error response")
|
|
416
|
-
assertEquals(400, resp.status)
|
|
417
|
-
assertTrue("message: ${resp.message}", resp.message.contains("messages"))
|
|
418
|
-
}
|
|
419
|
-
|
|
420
|
-
@Test
|
|
421
|
-
fun `chat completion empty messages returns 400`() = runBlocking {
|
|
422
|
-
val handlers = makeHandlers()
|
|
423
|
-
val body = buildJsonObject {
|
|
424
|
-
putJsonArray("messages") { /* empty */ }
|
|
425
|
-
}
|
|
426
|
-
val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
|
|
427
|
-
?: error("expected Error response")
|
|
428
|
-
assertEquals(400, resp.status)
|
|
429
|
-
assertTrue("message: ${resp.message}", resp.message.contains("Empty"))
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
@Test
|
|
433
|
-
fun `chat completion bridge throw returns 500`() = runBlocking {
|
|
434
|
-
val bridge = FakeBridge(shouldThrow = true)
|
|
435
|
-
val handlers = makeHandlers(bridge = bridge)
|
|
436
|
-
val body = buildJsonObject {
|
|
437
|
-
putJsonArray("messages") {
|
|
438
|
-
addJsonObject {
|
|
439
|
-
put("role", "user")
|
|
440
|
-
put("content", "hi")
|
|
441
|
-
}
|
|
442
|
-
}
|
|
443
|
-
}
|
|
444
|
-
val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
|
|
445
|
-
?: error("expected Error response")
|
|
446
|
-
assertEquals(500, resp.status)
|
|
447
|
-
assertTrue("message: ${resp.message}", resp.message.contains("simulated mediapipe error"))
|
|
448
|
-
}
|
|
449
|
-
|
|
450
|
-
// ----- Legacy completions -----
|
|
451
|
-
|
|
452
|
-
@Test
|
|
453
|
-
fun `legacy completion converts to text_completion shape`() = runBlocking {
|
|
454
|
-
val bridge = FakeBridge(responseToReturn = "canned-text")
|
|
455
|
-
val handlers = makeHandlers(bridge = bridge)
|
|
456
|
-
val body = buildJsonObject { put("prompt", "say hi") }
|
|
457
|
-
val resp = handlers.handleCompletion(body, ctx) as? HandlerResponse.Json
|
|
458
|
-
?: error("expected Json response")
|
|
459
|
-
assertEquals(200, resp.status)
|
|
460
|
-
val obj = resp.body as JsonObject
|
|
461
|
-
assertEquals("text_completion", (obj["object"] as JsonPrimitive).content)
|
|
462
|
-
val choices = obj["choices"] as JsonArray
|
|
463
|
-
val choice0 = choices[0] as JsonObject
|
|
464
|
-
assertEquals("canned-text", (choice0["text"] as JsonPrimitive).content)
|
|
465
|
-
assertEquals("stop", (choice0["finish_reason"] as JsonPrimitive).content)
|
|
466
|
-
assertNotNull(choice0["logprobs"])
|
|
467
|
-
// ID rewritten chatcmpl-mp- → cmpl-mp-
|
|
468
|
-
val idStr = (obj["id"] as JsonPrimitive).content
|
|
469
|
-
assertTrue("id should start with cmpl-: $idStr", idStr.startsWith("cmpl-"))
|
|
470
|
-
// Prompt was wrapped as a user message and threaded through.
|
|
471
|
-
assertEquals("user: say hi", bridge.receivedPrompt)
|
|
472
|
-
}
|
|
473
|
-
|
|
474
|
-
@Test
|
|
475
|
-
fun `legacy completion array prompt joined with newline`() = runBlocking {
|
|
476
|
-
val bridge = FakeBridge()
|
|
477
|
-
val handlers = makeHandlers(bridge = bridge)
|
|
478
|
-
val body = buildJsonObject {
|
|
479
|
-
putJsonArray("prompt") {
|
|
480
|
-
add("alpha")
|
|
481
|
-
add("beta")
|
|
482
|
-
}
|
|
483
|
-
}
|
|
484
|
-
val resp = handlers.handleCompletion(body, ctx) as? HandlerResponse.Json
|
|
485
|
-
?: error("expected Json response")
|
|
486
|
-
assertEquals(200, resp.status)
|
|
487
|
-
// The prompt array is joined with \n, then wrapped as a single user message.
|
|
488
|
-
assertEquals("user: alpha\nbeta", bridge.receivedPrompt)
|
|
489
|
-
}
|
|
490
|
-
|
|
491
|
-
// ----- Embeddings -----
|
|
492
|
-
|
|
493
|
-
@Test
|
|
494
|
-
fun `embeddings always returns 400 with redirect message`() = runBlocking {
|
|
495
|
-
val handlers = makeHandlers()
|
|
496
|
-
val resp = handlers.handleEmbeddings(
|
|
497
|
-
buildJsonObject { put("input", "hello") },
|
|
498
|
-
ctx,
|
|
499
|
-
) as? HandlerResponse.Error ?: error("expected Error response")
|
|
500
|
-
assertEquals(400, resp.status)
|
|
501
|
-
assertTrue(
|
|
502
|
-
"message: ${resp.message}",
|
|
503
|
-
resp.message.contains("Embeddings not supported on MediaPipe LLM"),
|
|
504
|
-
)
|
|
505
|
-
assertTrue(
|
|
506
|
-
"message: ${resp.message}",
|
|
507
|
-
resp.message.contains("capacitorBackend: \"llama\""),
|
|
508
|
-
)
|
|
509
|
-
}
|
|
510
|
-
|
|
511
|
-
// ----- Models -----
|
|
512
|
-
|
|
513
|
-
@Test
|
|
514
|
-
fun `models returns single google-mediapipe entry with ctx modelId`() = runBlocking {
|
|
515
|
-
val handlers = makeHandlers()
|
|
516
|
-
val customCtx = HandlerContext(modelId = "/path/to/gemma-2b.task", backendName = "mediapipe")
|
|
517
|
-
val resp = handlers.handleModels(customCtx) as? HandlerResponse.Json
|
|
518
|
-
?: error("expected Json response")
|
|
519
|
-
assertEquals(200, resp.status)
|
|
520
|
-
val obj = resp.body as JsonObject
|
|
521
|
-
assertEquals("list", (obj["object"] as JsonPrimitive).content)
|
|
522
|
-
val data = obj["data"] as JsonArray
|
|
523
|
-
assertEquals(1, data.size)
|
|
524
|
-
val entry0 = data[0] as JsonObject
|
|
525
|
-
assertEquals("/path/to/gemma-2b.task", (entry0["id"] as JsonPrimitive).content)
|
|
526
|
-
assertEquals("model", (entry0["object"] as JsonPrimitive).content)
|
|
527
|
-
assertEquals("google-mediapipe", (entry0["owned_by"] as JsonPrimitive).content)
|
|
528
|
-
}
|
|
529
|
-
}
|
|
1
|
+
package co.deepvoiceai.bridge.mediapipe.core
|
|
2
|
+
|
|
3
|
+
import co.deepvoiceai.bridge.shared.core.HandlerContext
|
|
4
|
+
import co.deepvoiceai.bridge.shared.core.HandlerResponse
|
|
5
|
+
import kotlinx.coroutines.flow.toList
|
|
6
|
+
import kotlinx.coroutines.runBlocking
|
|
7
|
+
import kotlinx.serialization.json.Json
|
|
8
|
+
import kotlinx.serialization.json.JsonArray
|
|
9
|
+
import kotlinx.serialization.json.JsonNull
|
|
10
|
+
import kotlinx.serialization.json.JsonObject
|
|
11
|
+
import kotlinx.serialization.json.JsonPrimitive
|
|
12
|
+
import kotlinx.serialization.json.add
|
|
13
|
+
import kotlinx.serialization.json.addJsonObject
|
|
14
|
+
import kotlinx.serialization.json.buildJsonObject
|
|
15
|
+
import kotlinx.serialization.json.contentOrNull
|
|
16
|
+
import kotlinx.serialization.json.jsonArray
|
|
17
|
+
import kotlinx.serialization.json.jsonObject
|
|
18
|
+
import kotlinx.serialization.json.jsonPrimitive
|
|
19
|
+
import kotlinx.serialization.json.put
|
|
20
|
+
import kotlinx.serialization.json.putJsonArray
|
|
21
|
+
import kotlinx.serialization.json.putJsonObject
|
|
22
|
+
import org.junit.Assert.assertEquals
|
|
23
|
+
import org.junit.Assert.assertNotNull
|
|
24
|
+
import org.junit.Assert.assertNull
|
|
25
|
+
import org.junit.Assert.assertTrue
|
|
26
|
+
import org.junit.Test
|
|
27
|
+
import org.junit.runner.RunWith
|
|
28
|
+
import org.robolectric.RobolectricTestRunner
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* JVM unit tests for [MediaPipeHandlers]. Uses a [FakeBridge] implementing
|
|
32
|
+
* [MediaPipeBridgeApi] so tests don't need a real MediaPipe `.task` model.
|
|
33
|
+
*
|
|
34
|
+
* Mirrors the coverage of `LlamaHandlersTest`: chat-completion (sync +
|
|
35
|
+
* streaming), legacy completions, error / 400 surfaces, embeddings rejection,
|
|
36
|
+
* the models endpoint, plus Task 46 vision-capable happy path and image
|
|
37
|
+
* fetch failure surfaces.
|
|
38
|
+
*
|
|
39
|
+
* The `bytesToImage` seam that previously lived in [MediaPipeHandlers] has been
|
|
40
|
+
* moved into [MediaPipeBridge] as part of the Task 17 interface neutralization.
|
|
41
|
+
* [MediaPipeBridgeApi] now accepts [List]<[ByteArray]> rather than
|
|
42
|
+
* [List]<MPImage>. The [FakeBridge] captures raw bytes so tests can assert on
|
|
43
|
+
* the exact payload handed to the bridge, without any Robolectric bitmap
|
|
44
|
+
* overhead.
|
|
45
|
+
*/
|
|
46
|
+
@RunWith(RobolectricTestRunner::class)
|
|
47
|
+
class MediaPipeHandlersTest {
|
|
48
|
+
private val ctx = HandlerContext(modelId = "gemma-2b-it-cpu-int4", backendName = "mediapipe")
|
|
49
|
+
|
|
50
|
+
private class FakeBridge(
|
|
51
|
+
var responseToReturn: String = "canned mediapipe response",
|
|
52
|
+
var shouldThrow: Boolean = false,
|
|
53
|
+
) : MediaPipeBridgeApi {
|
|
54
|
+
var receivedPrompt: String? = null
|
|
55
|
+
var receivedImages: List<ByteArray> = emptyList()
|
|
56
|
+
var asyncCloseCount: Int = 0
|
|
57
|
+
|
|
58
|
+
override fun completePrompt(prompt: String, images: List<ByteArray>): String {
|
|
59
|
+
receivedPrompt = prompt
|
|
60
|
+
receivedImages = images
|
|
61
|
+
if (shouldThrow) throw RuntimeException("simulated mediapipe error")
|
|
62
|
+
return responseToReturn
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
override fun completePromptAsync(
|
|
66
|
+
prompt: String,
|
|
67
|
+
images: List<ByteArray>,
|
|
68
|
+
onPartial: (String, Boolean) -> Unit,
|
|
69
|
+
): AutoCloseable {
|
|
70
|
+
receivedPrompt = prompt
|
|
71
|
+
receivedImages = images
|
|
72
|
+
if (shouldThrow) throw RuntimeException("simulated mediapipe error")
|
|
73
|
+
// Synchronously emit two partial chunks then a final done=true. The
|
|
74
|
+
// handler's callbackFlow trySend is non-blocking so this is fine.
|
|
75
|
+
val mid = (responseToReturn.length / 2).coerceAtLeast(0)
|
|
76
|
+
val first = responseToReturn.substring(0, mid)
|
|
77
|
+
val second = responseToReturn.substring(mid)
|
|
78
|
+
onPartial(first, false)
|
|
79
|
+
onPartial(second, true)
|
|
80
|
+
return AutoCloseable { asyncCloseCount += 1 }
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
private fun makeHandlers(
|
|
85
|
+
bridge: FakeBridge = FakeBridge(),
|
|
86
|
+
visionCapable: Boolean = false,
|
|
87
|
+
): MediaPipeHandlers =
|
|
88
|
+
MediaPipeHandlers(
|
|
89
|
+
bridge = bridge,
|
|
90
|
+
modelId = "gemma-2b-it-cpu-int4",
|
|
91
|
+
visionCapable = visionCapable,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Parse a single SSE frame's JSON payload. Returns null for `[DONE]`
|
|
96
|
+
* frames or non-`data:` lines.
|
|
97
|
+
*/
|
|
98
|
+
private fun decodeFrame(frame: String): JsonObject? {
|
|
99
|
+
val trimmed = frame.trim()
|
|
100
|
+
if (!trimmed.startsWith("data: ")) return null
|
|
101
|
+
val payload = trimmed.removePrefix("data: ")
|
|
102
|
+
if (payload == "[DONE]") return null
|
|
103
|
+
return Json.parseToJsonElement(payload).jsonObject
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// ----- Chat completion (text happy path) -----
|
|
107
|
+
|
|
108
|
+
@Test
|
|
109
|
+
fun `chat completion text happy path`() = runBlocking {
|
|
110
|
+
val bridge = FakeBridge(responseToReturn = "Hello, world!")
|
|
111
|
+
val handlers = makeHandlers(bridge = bridge)
|
|
112
|
+
val body = buildJsonObject {
|
|
113
|
+
putJsonArray("messages") {
|
|
114
|
+
addJsonObject {
|
|
115
|
+
put("role", "user")
|
|
116
|
+
put("content", "hi")
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Json
|
|
121
|
+
?: error("expected Json response")
|
|
122
|
+
assertEquals(200, resp.status)
|
|
123
|
+
val obj = resp.body as JsonObject
|
|
124
|
+
assertEquals("chat.completion", (obj["object"] as JsonPrimitive).content)
|
|
125
|
+
assertEquals("gemma-2b-it-cpu-int4", (obj["model"] as JsonPrimitive).content)
|
|
126
|
+
val choices = obj["choices"] as JsonArray
|
|
127
|
+
assertEquals(1, choices.size)
|
|
128
|
+
val msg = (choices[0] as JsonObject)["message"] as JsonObject
|
|
129
|
+
assertEquals("Hello, world!", (msg["content"] as JsonPrimitive).content)
|
|
130
|
+
assertEquals("assistant", (msg["role"] as JsonPrimitive).content)
|
|
131
|
+
assertEquals(
|
|
132
|
+
"stop",
|
|
133
|
+
((choices[0] as JsonObject)["finish_reason"] as JsonPrimitive).content,
|
|
134
|
+
)
|
|
135
|
+
// Prompt threaded through openAIMessagesToPrompt.
|
|
136
|
+
assertEquals("user: hi", bridge.receivedPrompt)
|
|
137
|
+
// No images provided → empty list passed through.
|
|
138
|
+
assertEquals(0, bridge.receivedImages.size)
|
|
139
|
+
// ID prefix is the MediaPipe-flavored one.
|
|
140
|
+
assertTrue(
|
|
141
|
+
"id should start with chatcmpl-mp-: ${(obj["id"] as JsonPrimitive).content}",
|
|
142
|
+
(obj["id"] as JsonPrimitive).content.startsWith("chatcmpl-mp-"),
|
|
143
|
+
)
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// ----- Streaming -----
|
|
147
|
+
|
|
148
|
+
@Test
|
|
149
|
+
fun `chat completion streaming text emits role content content finish-with-content done`() = runBlocking {
|
|
150
|
+
val bridge = FakeBridge(responseToReturn = "abcd")
|
|
151
|
+
val handlers = makeHandlers(bridge = bridge)
|
|
152
|
+
val body = buildJsonObject {
|
|
153
|
+
putJsonArray("messages") {
|
|
154
|
+
addJsonObject {
|
|
155
|
+
put("role", "user")
|
|
156
|
+
put("content", "hi")
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
put("stream", true)
|
|
160
|
+
}
|
|
161
|
+
val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Sse
|
|
162
|
+
?: error("expected Sse response")
|
|
163
|
+
val frames = resp.flow.toList()
|
|
164
|
+
|
|
165
|
+
// Expect: role(0) + 2 content chunks (last one carries finish_reason) + [DONE] = 4 frames
|
|
166
|
+
assertEquals(4, frames.size)
|
|
167
|
+
|
|
168
|
+
// Frame 0: role delta
|
|
169
|
+
val frame0 = decodeFrame(frames[0])
|
|
170
|
+
?: error("frame 0 not decodable")
|
|
171
|
+
val roleDelta = frame0["choices"]?.jsonArray?.first()?.jsonObject
|
|
172
|
+
?.get("delta")?.jsonObject
|
|
173
|
+
assertEquals("assistant", roleDelta?.get("role")?.jsonPrimitive?.content)
|
|
174
|
+
assertEquals("chat.completion.chunk", frame0["object"]?.jsonPrimitive?.content)
|
|
175
|
+
|
|
176
|
+
// Frame 1: first content delta (done=false)
|
|
177
|
+
val frame1 = decodeFrame(frames[1]) ?: error("frame 1 not decodable")
|
|
178
|
+
val choice1 = frame1["choices"]?.jsonArray?.first()?.jsonObject
|
|
179
|
+
?: error("frame 1 missing choices[0]")
|
|
180
|
+
assertEquals(
|
|
181
|
+
"ab",
|
|
182
|
+
choice1["delta"]?.jsonObject?.get("content")?.jsonPrimitive?.content,
|
|
183
|
+
)
|
|
184
|
+
// finish_reason is JsonNull on the non-final frame
|
|
185
|
+
assertTrue(
|
|
186
|
+
"frame 1 finish_reason should be JsonNull, got ${choice1["finish_reason"]}",
|
|
187
|
+
choice1["finish_reason"] is JsonNull,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
// Frame 2: final content delta (done=true) — carries finish_reason="stop"
|
|
191
|
+
val frame2 = decodeFrame(frames[2]) ?: error("frame 2 not decodable")
|
|
192
|
+
val choice2 = frame2["choices"]?.jsonArray?.first()?.jsonObject
|
|
193
|
+
?: error("frame 2 missing choices[0]")
|
|
194
|
+
assertEquals(
|
|
195
|
+
"cd",
|
|
196
|
+
choice2["delta"]?.jsonObject?.get("content")?.jsonPrimitive?.content,
|
|
197
|
+
)
|
|
198
|
+
assertEquals(
|
|
199
|
+
"stop",
|
|
200
|
+
choice2["finish_reason"]?.jsonPrimitive?.contentOrNull,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
// Frame 3: [DONE]
|
|
204
|
+
assertEquals("data: [DONE]\n\n", frames[3])
|
|
205
|
+
assertNull(decodeFrame(frames[3]))
|
|
206
|
+
|
|
207
|
+
// Bridge.completePromptAsync was called and the AutoCloseable handle was closed.
|
|
208
|
+
assertEquals("user: hi", bridge.receivedPrompt)
|
|
209
|
+
assertEquals(1, bridge.asyncCloseCount)
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// ----- Vision (Task 46) -----
|
|
213
|
+
|
|
214
|
+
@Test
|
|
215
|
+
fun `chat completion image part returns 400 when not vision-capable`() = runBlocking {
|
|
216
|
+
// visionCapable = false → image_url request is rejected before image fetch.
|
|
217
|
+
val handlers = makeHandlers(visionCapable = false)
|
|
218
|
+
val body = buildJsonObject {
|
|
219
|
+
putJsonArray("messages") {
|
|
220
|
+
addJsonObject {
|
|
221
|
+
put("role", "user")
|
|
222
|
+
putJsonArray("content") {
|
|
223
|
+
addJsonObject {
|
|
224
|
+
put("type", "image_url")
|
|
225
|
+
putJsonObject("image_url") {
|
|
226
|
+
put("url", "data:image/png;base64,iVBOR")
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
|
|
234
|
+
?: error("expected Error response")
|
|
235
|
+
assertEquals(400, resp.status)
|
|
236
|
+
assertTrue(
|
|
237
|
+
"message: ${resp.message}",
|
|
238
|
+
resp.message.contains("vision-capable MediaPipe model"),
|
|
239
|
+
)
|
|
240
|
+
assertTrue(
|
|
241
|
+
"message: ${resp.message}",
|
|
242
|
+
resp.message.contains("visionEnabled"),
|
|
243
|
+
)
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
@Test
|
|
247
|
+
fun `chat completion vision-capable image part decodes and threads through bridge`() = runBlocking {
|
|
248
|
+
val bridge = FakeBridge(responseToReturn = "looks like a cat")
|
|
249
|
+
// Use a tiny in-memory PNG via the data URL fixture so the real
|
|
250
|
+
// ImageDecoder.resolve path exercises base64 decoding. Raw bytes are
|
|
251
|
+
// now passed directly to the bridge (ByteArray neutralization, Task 17).
|
|
252
|
+
val handlers = makeHandlers(bridge = bridge, visionCapable = true)
|
|
253
|
+
// Real PNG bytes (1x1 transparent pixel) inlined as base64. Same
|
|
254
|
+
// payload as `tiny-test-base64.txt` but inlined to keep the test
|
|
255
|
+
// self-contained.
|
|
256
|
+
val pngDataUrl =
|
|
257
|
+
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkAAIAAAoAAv/lxKUAAAAASUVORK5CYII="
|
|
258
|
+
val body = buildJsonObject {
|
|
259
|
+
putJsonArray("messages") {
|
|
260
|
+
addJsonObject {
|
|
261
|
+
put("role", "user")
|
|
262
|
+
putJsonArray("content") {
|
|
263
|
+
addJsonObject {
|
|
264
|
+
put("type", "text")
|
|
265
|
+
put("text", "describe this")
|
|
266
|
+
}
|
|
267
|
+
addJsonObject {
|
|
268
|
+
put("type", "image_url")
|
|
269
|
+
putJsonObject("image_url") {
|
|
270
|
+
put("url", pngDataUrl)
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Json
|
|
278
|
+
?: error("expected Json response")
|
|
279
|
+
assertEquals(200, resp.status)
|
|
280
|
+
val obj = resp.body as JsonObject
|
|
281
|
+
val msg = ((obj["choices"] as JsonArray)[0] as JsonObject)["message"] as JsonObject
|
|
282
|
+
assertEquals("looks like a cat", (msg["content"] as JsonPrimitive).content)
|
|
283
|
+
|
|
284
|
+
// Bridge received exactly one image (as raw bytes) and the text prompt.
|
|
285
|
+
assertEquals(1, bridge.receivedImages.size)
|
|
286
|
+
// Sanity: first 8 bytes must match the PNG magic header.
|
|
287
|
+
val receivedBytes = bridge.receivedImages[0]
|
|
288
|
+
assertTrue("PNG magic: byte[0]", receivedBytes[0] == 0x89.toByte())
|
|
289
|
+
assertTrue("PNG magic: byte[1]", receivedBytes[1] == 0x50.toByte())
|
|
290
|
+
// Prompt should contain just the text part.
|
|
291
|
+
assertEquals("user: describe this", bridge.receivedPrompt)
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
@Test
|
|
295
|
+
fun `chat completion image_url missing url field returns 400`() = runBlocking {
|
|
296
|
+
val handlers = makeHandlers(visionCapable = true)
|
|
297
|
+
val body = buildJsonObject {
|
|
298
|
+
putJsonArray("messages") {
|
|
299
|
+
addJsonObject {
|
|
300
|
+
put("role", "user")
|
|
301
|
+
putJsonArray("content") {
|
|
302
|
+
addJsonObject {
|
|
303
|
+
put("type", "image_url")
|
|
304
|
+
putJsonObject("image_url") {
|
|
305
|
+
// No "url" key.
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
|
|
313
|
+
?: error("expected Error response")
|
|
314
|
+
assertEquals(400, resp.status)
|
|
315
|
+
assertTrue(
|
|
316
|
+
"message: ${resp.message}",
|
|
317
|
+
resp.message.contains("missing 'url' field"),
|
|
318
|
+
)
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
@Test
|
|
322
|
+
fun `chat completion image fetch failure returns 502`() = runBlocking {
|
|
323
|
+
val handlers = makeHandlers(visionCapable = true)
|
|
324
|
+
val body = buildJsonObject {
|
|
325
|
+
putJsonArray("messages") {
|
|
326
|
+
addJsonObject {
|
|
327
|
+
put("role", "user")
|
|
328
|
+
putJsonArray("content") {
|
|
329
|
+
addJsonObject {
|
|
330
|
+
put("type", "image_url")
|
|
331
|
+
putJsonObject("image_url") {
|
|
332
|
+
// Unsupported scheme — ImageDecoder.resolve throws InvalidScheme.
|
|
333
|
+
put("url", "ftp://example.com/x.png")
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
|
|
341
|
+
?: error("expected Error response")
|
|
342
|
+
assertEquals(502, resp.status)
|
|
343
|
+
assertTrue(
|
|
344
|
+
"message: ${resp.message}",
|
|
345
|
+
resp.message.contains("Failed to fetch image"),
|
|
346
|
+
)
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
@Test
|
|
350
|
+
fun `chat completion streaming vision-capable image part threads raw bytes through bridge`() = runBlocking {
|
|
351
|
+
// Verify that the streaming path passes raw ByteArray to the bridge
|
|
352
|
+
// rather than converting to MPImage in the handler layer (Task 17
|
|
353
|
+
// neutralization: ByteArray → MPImage conversion now happens inside
|
|
354
|
+
// MediaPipeBridge, not in MediaPipeHandlers).
|
|
355
|
+
val bridge = FakeBridge(responseToReturn = "ab")
|
|
356
|
+
val handlers = makeHandlers(bridge = bridge, visionCapable = true)
|
|
357
|
+
val pngDataUrl =
|
|
358
|
+
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkAAIAAAoAAv/lxKUAAAAASUVORK5CYII="
|
|
359
|
+
val body = buildJsonObject {
|
|
360
|
+
putJsonArray("messages") {
|
|
361
|
+
addJsonObject {
|
|
362
|
+
put("role", "user")
|
|
363
|
+
putJsonArray("content") {
|
|
364
|
+
addJsonObject { put("type", "text"); put("text", "what is this") }
|
|
365
|
+
addJsonObject {
|
|
366
|
+
put("type", "image_url")
|
|
367
|
+
putJsonObject("image_url") { put("url", pngDataUrl) }
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
put("stream", true)
|
|
373
|
+
}
|
|
374
|
+
val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Sse
|
|
375
|
+
?: error("expected Sse response")
|
|
376
|
+
resp.flow.toList() // consume the stream to completion
|
|
377
|
+
// Bridge received one raw-byte image via the streaming path.
|
|
378
|
+
assertEquals(1, bridge.receivedImages.size)
|
|
379
|
+
val receivedBytes = bridge.receivedImages[0]
|
|
380
|
+
assertTrue("PNG magic byte[0]", receivedBytes[0] == 0x89.toByte())
|
|
381
|
+
assertTrue("PNG magic byte[1]", receivedBytes[1] == 0x50.toByte())
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
// ----- 400 surfaces -----
|
|
385
|
+
|
|
386
|
+
@Test
|
|
387
|
+
fun `chat completion audio part returns 400`() = runBlocking {
|
|
388
|
+
val handlers = makeHandlers()
|
|
389
|
+
val body = buildJsonObject {
|
|
390
|
+
putJsonArray("messages") {
|
|
391
|
+
addJsonObject {
|
|
392
|
+
put("role", "user")
|
|
393
|
+
putJsonArray("content") {
|
|
394
|
+
addJsonObject {
|
|
395
|
+
put("type", "input_audio")
|
|
396
|
+
putJsonObject("input_audio") {
|
|
397
|
+
put("data", "AAAA")
|
|
398
|
+
put("format", "pcm16")
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
|
|
406
|
+
?: error("expected Error response")
|
|
407
|
+
assertEquals(400, resp.status)
|
|
408
|
+
assertTrue("message: ${resp.message}", resp.message.contains("Audio input not supported"))
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
@Test
|
|
412
|
+
fun `chat completion missing messages returns 400`() = runBlocking {
|
|
413
|
+
val handlers = makeHandlers()
|
|
414
|
+
val resp = handlers.handleChatCompletion(buildJsonObject {}, ctx) as? HandlerResponse.Error
|
|
415
|
+
?: error("expected Error response")
|
|
416
|
+
assertEquals(400, resp.status)
|
|
417
|
+
assertTrue("message: ${resp.message}", resp.message.contains("messages"))
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
@Test
|
|
421
|
+
fun `chat completion empty messages returns 400`() = runBlocking {
|
|
422
|
+
val handlers = makeHandlers()
|
|
423
|
+
val body = buildJsonObject {
|
|
424
|
+
putJsonArray("messages") { /* empty */ }
|
|
425
|
+
}
|
|
426
|
+
val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
|
|
427
|
+
?: error("expected Error response")
|
|
428
|
+
assertEquals(400, resp.status)
|
|
429
|
+
assertTrue("message: ${resp.message}", resp.message.contains("Empty"))
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
@Test
|
|
433
|
+
fun `chat completion bridge throw returns 500`() = runBlocking {
|
|
434
|
+
val bridge = FakeBridge(shouldThrow = true)
|
|
435
|
+
val handlers = makeHandlers(bridge = bridge)
|
|
436
|
+
val body = buildJsonObject {
|
|
437
|
+
putJsonArray("messages") {
|
|
438
|
+
addJsonObject {
|
|
439
|
+
put("role", "user")
|
|
440
|
+
put("content", "hi")
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
val resp = handlers.handleChatCompletion(body, ctx) as? HandlerResponse.Error
|
|
445
|
+
?: error("expected Error response")
|
|
446
|
+
assertEquals(500, resp.status)
|
|
447
|
+
assertTrue("message: ${resp.message}", resp.message.contains("simulated mediapipe error"))
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
// ----- Legacy completions -----
|
|
451
|
+
|
|
452
|
+
@Test
|
|
453
|
+
fun `legacy completion converts to text_completion shape`() = runBlocking {
|
|
454
|
+
val bridge = FakeBridge(responseToReturn = "canned-text")
|
|
455
|
+
val handlers = makeHandlers(bridge = bridge)
|
|
456
|
+
val body = buildJsonObject { put("prompt", "say hi") }
|
|
457
|
+
val resp = handlers.handleCompletion(body, ctx) as? HandlerResponse.Json
|
|
458
|
+
?: error("expected Json response")
|
|
459
|
+
assertEquals(200, resp.status)
|
|
460
|
+
val obj = resp.body as JsonObject
|
|
461
|
+
assertEquals("text_completion", (obj["object"] as JsonPrimitive).content)
|
|
462
|
+
val choices = obj["choices"] as JsonArray
|
|
463
|
+
val choice0 = choices[0] as JsonObject
|
|
464
|
+
assertEquals("canned-text", (choice0["text"] as JsonPrimitive).content)
|
|
465
|
+
assertEquals("stop", (choice0["finish_reason"] as JsonPrimitive).content)
|
|
466
|
+
assertNotNull(choice0["logprobs"])
|
|
467
|
+
// ID rewritten chatcmpl-mp- → cmpl-mp-
|
|
468
|
+
val idStr = (obj["id"] as JsonPrimitive).content
|
|
469
|
+
assertTrue("id should start with cmpl-: $idStr", idStr.startsWith("cmpl-"))
|
|
470
|
+
// Prompt was wrapped as a user message and threaded through.
|
|
471
|
+
assertEquals("user: say hi", bridge.receivedPrompt)
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
@Test
|
|
475
|
+
fun `legacy completion array prompt joined with newline`() = runBlocking {
|
|
476
|
+
val bridge = FakeBridge()
|
|
477
|
+
val handlers = makeHandlers(bridge = bridge)
|
|
478
|
+
val body = buildJsonObject {
|
|
479
|
+
putJsonArray("prompt") {
|
|
480
|
+
add("alpha")
|
|
481
|
+
add("beta")
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
val resp = handlers.handleCompletion(body, ctx) as? HandlerResponse.Json
|
|
485
|
+
?: error("expected Json response")
|
|
486
|
+
assertEquals(200, resp.status)
|
|
487
|
+
// The prompt array is joined with \n, then wrapped as a single user message.
|
|
488
|
+
assertEquals("user: alpha\nbeta", bridge.receivedPrompt)
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
// ----- Embeddings -----
|
|
492
|
+
|
|
493
|
+
@Test
|
|
494
|
+
fun `embeddings always returns 400 with redirect message`() = runBlocking {
|
|
495
|
+
val handlers = makeHandlers()
|
|
496
|
+
val resp = handlers.handleEmbeddings(
|
|
497
|
+
buildJsonObject { put("input", "hello") },
|
|
498
|
+
ctx,
|
|
499
|
+
) as? HandlerResponse.Error ?: error("expected Error response")
|
|
500
|
+
assertEquals(400, resp.status)
|
|
501
|
+
assertTrue(
|
|
502
|
+
"message: ${resp.message}",
|
|
503
|
+
resp.message.contains("Embeddings not supported on MediaPipe LLM"),
|
|
504
|
+
)
|
|
505
|
+
assertTrue(
|
|
506
|
+
"message: ${resp.message}",
|
|
507
|
+
resp.message.contains("capacitorBackend: \"llama\""),
|
|
508
|
+
)
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
// ----- Models -----
|
|
512
|
+
|
|
513
|
+
@Test
|
|
514
|
+
fun `models returns single google-mediapipe entry with ctx modelId`() = runBlocking {
|
|
515
|
+
val handlers = makeHandlers()
|
|
516
|
+
val customCtx = HandlerContext(modelId = "/path/to/gemma-2b.task", backendName = "mediapipe")
|
|
517
|
+
val resp = handlers.handleModels(customCtx) as? HandlerResponse.Json
|
|
518
|
+
?: error("expected Json response")
|
|
519
|
+
assertEquals(200, resp.status)
|
|
520
|
+
val obj = resp.body as JsonObject
|
|
521
|
+
assertEquals("list", (obj["object"] as JsonPrimitive).content)
|
|
522
|
+
val data = obj["data"] as JsonArray
|
|
523
|
+
assertEquals(1, data.size)
|
|
524
|
+
val entry0 = data[0] as JsonObject
|
|
525
|
+
assertEquals("/path/to/gemma-2b.task", (entry0["id"] as JsonPrimitive).content)
|
|
526
|
+
assertEquals("model", (entry0["object"] as JsonPrimitive).content)
|
|
527
|
+
assertEquals("google-mediapipe", (entry0["owned_by"] as JsonPrimitive).content)
|
|
528
|
+
}
|
|
529
|
+
}
|