@dvai-bridge/android-mediapipe-core 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,203 @@
1
+ package co.deepvoiceai.bridge.mediapipe.core
2
+
3
+ import android.content.Context
4
+ import com.google.ai.edge.litertlm.Backend
5
+ import com.google.ai.edge.litertlm.Content
6
+ import com.google.ai.edge.litertlm.Contents
7
+ import com.google.ai.edge.litertlm.Conversation
8
+ import com.google.ai.edge.litertlm.Engine
9
+ import com.google.ai.edge.litertlm.EngineConfig
10
+ import com.google.ai.edge.litertlm.Message
11
+ import com.google.ai.edge.litertlm.MessageCallback
12
+
13
+ /**
14
+ * Test seam over Google's LiteRT-LM Engine. Concrete [MediaPipeBridge]
15
+ * implements this; [MediaPipeHandlers] takes the interface so unit tests can
16
+ * substitute a canned-response fake without loading a real `.litertlm` model.
17
+ *
18
+ * Concurrency: implementations need NOT be thread-safe — [MediaPipeHandlers]
19
+ * serializes all calls behind its own mutex.
20
+ */
21
+ interface MediaPipeBridgeApi {
22
+ /**
23
+ * Synchronous prompt completion. If [images] is non-empty the engine must
24
+ * have been built with `visionEnabled = true`; otherwise LiteRT-LM will
25
+ * throw at conversation creation or message-send time. Images are supplied
26
+ * as raw encoded bytes (PNG/JPEG/etc.).
27
+ */
28
+ fun completePrompt(prompt: String, images: List<ByteArray> = emptyList()): String
29
+
30
+ /**
31
+ * Asynchronous prompt completion. The supplied callback fires per partial
32
+ * chunk; the second arg is `true` on the final fragment. Returns a handle
33
+ * the caller can [AutoCloseable.close] to release the per-call conversation
34
+ * once the stream finishes (or is cancelled). Images are supplied as raw
35
+ * encoded bytes (PNG/JPEG/etc.).
36
+ */
37
+ fun completePromptAsync(
38
+ prompt: String,
39
+ images: List<ByteArray> = emptyList(),
40
+ onPartial: (partial: String, done: Boolean) -> Unit,
41
+ ): AutoCloseable
42
+ }
43
+
44
+ /**
45
+ * Kotlin wrapper around the LiteRT-LM `litertlm-android:0.10.2` Engine API.
46
+ * Replaces the deprecated `com.google.mediapipe:tasks-genai` MediaPipe bridge
47
+ * (Phase 3B, Tasks 18-19).
48
+ *
49
+ * Architecture:
50
+ *
51
+ * - One long-lived [Engine] per bridge instance (lazy-initialized so JVM unit
52
+ * tests using the [MediaPipeBridgeApi] fake never trigger native loading).
53
+ * [engine.initialize()] is called inside the lazy block; this is the heavy
54
+ * model-load step (~10 s) and must be called off the main thread.
55
+ * - One [Conversation] per request — LiteRT-LM Conversations are stateful and
56
+ * multi-turn, so we create a fresh one per call and close it after to
57
+ * maintain the same stateless-request semantics as the old session model.
58
+ * - Vision is enabled at the engine level via [EngineConfig.visionBackend].
59
+ * There is no per-conversation vision flag (unlike the old
60
+ * `GraphOptions.setEnableVisionModality`).
61
+ *
62
+ * API deviations from the migration doc (§3) based on actual bytecode inspection:
63
+ * - [Message] has no `.text` property — text is accessed through
64
+ * `message.contents.contents`, which is a `List<Content>`. Text parts are
65
+ * `Content.Text` items; their text fields are joined to form the response.
66
+ * - [EngineConfig] DOES have `maxNumImages: Int?` and `maxNumTokens: Int?`
67
+ * fields in the actual 0.10.2 artifact — the migration doc §5 risk for
68
+ * setMaxNumImages is not applicable; the field exists and is used here.
69
+ * - [Engine] does not accept Android `Context` — per migration doc §4, Context
70
+ * is only needed for optional path derivation. The constructor keeps `context`
71
+ * for API compatibility and future use (e.g. `context.cacheDir.path`).
72
+ *
73
+ * Model file format: LiteRT-LM uses `.litertlm` bundles, not `.task`. Existing
74
+ * `.task` models must be re-converted; see the migration notes for details.
75
+ */
76
+ class MediaPipeBridge(
77
+ @Suppress("UNUSED_PARAMETER") private val context: Context,
78
+ private val modelPath: String,
79
+ private val maxTokens: Int = 2048,
80
+ private val visionEnabled: Boolean = false,
81
+ private val maxImages: Int = 1,
82
+ ) : MediaPipeBridgeApi, AutoCloseable {
83
+
84
+ private val engine: Engine by lazy {
85
+ val cfg = EngineConfig(
86
+ modelPath = modelPath,
87
+ // Vision is enabled at the engine level by supplying a visionBackend.
88
+ // GPU() is the standard choice; null disables vision modality.
89
+ visionBackend = if (visionEnabled) Backend.GPU() else null,
90
+ // maxNumImages: EngineConfig does have this field in 0.10.2
91
+ // (migration doc §5 TBD is resolved — field exists in actual artifact).
92
+ maxNumImages = if (visionEnabled) maxImages else null,
93
+ // maxNumTokens maps to the old setMaxTokens(int) option.
94
+ maxNumTokens = maxTokens,
95
+ )
96
+ val e = Engine(cfg)
97
+ e.initialize()
98
+ e
99
+ }
100
+
101
+ @Volatile private var engineInitialized: Boolean = false
102
+
103
+ private fun engine(): Engine {
104
+ val ref = engine
105
+ engineInitialized = true
106
+ return ref
107
+ }
108
+
109
+ private fun newConversation(): Conversation =
110
+ engine().createConversation()
111
+
112
+ /**
113
+ * Build a [Contents] value combining the text prompt with any image bytes.
114
+ * [Content.ImageBytes] accepts raw PNG/JPEG bytes directly — no MPImage
115
+ * wrapping required (migration doc §2). The vararg [Contents.of] overload
116
+ * is used to avoid a spurious unchecked-cast warning from the list overload.
117
+ */
118
+ private fun buildContents(prompt: String, images: List<ByteArray>): Contents {
119
+ val parts = mutableListOf<Content>(Content.Text(prompt))
120
+ for (bytes in images) {
121
+ parts.add(Content.ImageBytes(bytes))
122
+ }
123
+ return Contents.of(parts)
124
+ }
125
+
126
+ /**
127
+ * Extract text from a [Message] response.
128
+ *
129
+ * [Message] has no `.text` shortcut in the 0.10.2 public API. Text is
130
+ * accessed via `message.contents.contents` (a `List<Content>`). All
131
+ * `Content.Text` items are joined; non-text parts (images, audio, tool
132
+ * responses) are silently ignored, matching the expected LLM response shape.
133
+ */
134
+ private fun Message.extractText(): String =
135
+ contents.contents
136
+ .filterIsInstance<Content.Text>()
137
+ .joinToString("") { it.text }
138
+
139
+ override fun completePrompt(prompt: String, images: List<ByteArray>): String {
140
+ val msgContents = buildContents(prompt, images)
141
+ val conversation = newConversation()
142
+ try {
143
+ // sendMessage is the single-call replacement for the old
144
+ // addQueryChunk + addImage + generateResponse triple (migration doc §3).
145
+ val message = conversation.sendMessage(msgContents)
146
+ return message.extractText()
147
+ } finally {
148
+ try {
149
+ conversation.close()
150
+ } catch (_: Throwable) { /* idempotent */ }
151
+ }
152
+ }
153
+
154
+ override fun completePromptAsync(
155
+ prompt: String,
156
+ images: List<ByteArray>,
157
+ onPartial: (String, Boolean) -> Unit,
158
+ ): AutoCloseable {
159
+ val msgContents = buildContents(prompt, images)
160
+ val conversation = newConversation()
161
+ try {
162
+ // MessageCallback replaces the old ProgressListener<String> callback.
163
+ // onMessage fires per partial token; onDone signals completion.
164
+ // (migration doc §3 streaming: callback form maps 1:1 to our contract)
165
+ conversation.sendMessageAsync(
166
+ msgContents,
167
+ object : MessageCallback {
168
+ override fun onMessage(message: Message) {
169
+ onPartial(message.extractText(), false)
170
+ }
171
+
172
+ override fun onDone() {
173
+ onPartial("", true)
174
+ }
175
+
176
+ override fun onError(throwable: Throwable) {
177
+ // Surface the error: re-throw on the callback thread so
178
+ // that the engine's internal executor propagates it.
179
+ throw RuntimeException("LiteRT-LM streaming error", throwable)
180
+ }
181
+ },
182
+ )
183
+ } catch (t: Throwable) {
184
+ try {
185
+ conversation.close()
186
+ } catch (_: Throwable) { /* idempotent */ }
187
+ throw t
188
+ }
189
+ return AutoCloseable {
190
+ try {
191
+ conversation.close()
192
+ } catch (_: Throwable) { /* idempotent — best-effort cleanup */ }
193
+ }
194
+ }
195
+
196
+ override fun close() {
197
+ if (engineInitialized) {
198
+ try {
199
+ engine.close()
200
+ } catch (_: Throwable) { /* idempotent */ }
201
+ }
202
+ }
203
+ }
@@ -0,0 +1,482 @@
1
+ package co.deepvoiceai.bridge.mediapipe.core
2
+
3
+ import co.deepvoiceai.bridge.shared.core.DvaiHandlers
4
+ import co.deepvoiceai.bridge.shared.core.HandlerContext
5
+ import co.deepvoiceai.bridge.shared.core.HandlerResponse
6
+ import kotlinx.coroutines.Dispatchers
7
+ import kotlinx.coroutines.channels.awaitClose
8
+ import kotlinx.coroutines.flow.Flow
9
+ import kotlinx.coroutines.flow.callbackFlow
10
+ import kotlinx.coroutines.flow.flow
11
+ import kotlinx.coroutines.sync.Mutex
12
+ import kotlinx.coroutines.sync.withLock
13
+ import kotlinx.coroutines.withContext
14
+ import kotlinx.serialization.json.Json
15
+ import kotlinx.serialization.json.JsonArray
16
+ import kotlinx.serialization.json.JsonNull
17
+ import kotlinx.serialization.json.JsonObject
18
+ import kotlinx.serialization.json.JsonPrimitive
19
+ import kotlinx.serialization.json.addJsonObject
20
+ import kotlinx.serialization.json.booleanOrNull
21
+ import kotlinx.serialization.json.buildJsonObject
22
+ import kotlinx.serialization.json.contentOrNull
23
+ import kotlinx.serialization.json.intOrNull
24
+ import kotlinx.serialization.json.put
25
+ import kotlinx.serialization.json.putJsonArray
26
+ import kotlinx.serialization.json.putJsonObject
27
+ import java.util.UUID
28
+
29
+ /**
30
+ * OpenAI-compatible handler set for the MediaPipe LLM backend on Android.
31
+ * Wires `openAIMessagesToPrompt` -> `bridge.completePrompt` (sync) or
32
+ * `bridge.completePromptAsync` (streaming) -> OpenAI response shape.
33
+ *
34
+ * Phase 1 scope (Task 46): text + optional image input on vision-capable
35
+ * Gemma tasks (e.g. Gemma 3n vision variants). Audio (`input_audio`) is
36
+ * permanently rejected — MediaPipe `tasks-genai` has no audio path. Image
37
+ * support is gated behind [visionCapable]: when `false`, `image_url` parts
38
+ * return a 400 pointing at the model's lack of vision capability rather than
39
+ * silently ignoring them. PluginState (Task 48) toggles the flag from the
40
+ * caller-supplied `visionEnabled` start option.
41
+ *
42
+ * ## Streaming envelope
43
+ *
44
+ * Emits one role-only delta frame, then one content delta frame per MediaPipe
45
+ * progress callback (with `finish_reason: "stop"` on the final frame), then a
46
+ * literal `[DONE]` terminator. Frame count therefore varies with the number of
47
+ * tokens generated — there is no fixed envelope size. Server-side buffering in
48
+ * [HandlerDispatch] still collects everything before flush in Phase 1, so
49
+ * clients see all frames together; per-token streaming lands when dispatch
50
+ * grows a flush-per-chunk path.
51
+ *
52
+ * ## Streaming envelope parity (with [LlamaHandlers])
53
+ *
54
+ * The two backends emit slightly different shapes — both valid per OpenAI's
55
+ * spec, but worth documenting so readers don't assume identical behavior:
56
+ *
57
+ * - [LlamaHandlers] emits: role / content / **separate empty-delta finish
58
+ * frame with `finish_reason: "stop"`** / `[DONE]` (fixed 4-frame shape).
59
+ * - [MediaPipeHandlers] emits: role / content₁ … content_N (last frame
60
+ * carries `finish_reason: "stop"` alongside its content) / `[DONE]`
61
+ * (variable frame count).
62
+ *
63
+ * Clients that accumulate `delta.content` see the full text in both cases.
64
+ * Clients that gate on `finish_reason` see it on the trailing chunk in both
65
+ * cases — just empty-delta in Llama, content-bearing in MediaPipe. The
66
+ * asymmetry is intentional: LlamaHandlers wraps a single completePrompt call
67
+ * with no intra-token signal, while MediaPipe's progress callback already
68
+ * surfaces the `done` flag inline with the final content chunk.
69
+ *
70
+ * All bridge-touching paths are serialized via [bridgeMutex] because
71
+ * `LlmInferenceSession` is not safe to use from multiple concurrent callers.
72
+ */
73
+ class MediaPipeHandlers(
74
+ private val bridge: MediaPipeBridgeApi,
75
+ private val modelId: String,
76
+ /**
77
+ * `true` when the loaded MediaPipe `.task` bundle is a vision-capable
78
+ * Gemma variant AND PluginState has wired the bridge with
79
+ * `visionEnabled = true`. Defaults to `false` so non-vision deployments
80
+ * (the common case) reject image parts with a clear 400.
81
+ */
82
+ private val visionCapable: Boolean = false,
83
+ ) : DvaiHandlers {
84
+ private val bridgeMutex = Mutex()
85
+
86
+ override suspend fun handleChatCompletion(body: JsonObject, ctx: HandlerContext): HandlerResponse {
87
+ val messages = body["messages"] as? JsonArray
88
+ ?: return HandlerResponse.Error(400, "Missing 'messages' field")
89
+
90
+ // Walk content parts up-front: collect images for vision-capable
91
+ // models, reject audio (always unsupported), and reject image_url for
92
+ // non-vision models. Mirrors LlamaHandlers/FoundationHandlers ordering.
93
+ // Images are collected as raw bytes; ByteArray → MPImage conversion
94
+ // happens inside the bridge implementation.
95
+ val images = mutableListOf<ByteArray>()
96
+ for (msg in messages) {
97
+ val msgObj = msg as? JsonObject ?: continue
98
+ val content = msgObj["content"] as? JsonArray ?: continue
99
+ for (part in content) {
100
+ val partObj = part as? JsonObject ?: continue
101
+ val type = (partObj["type"] as? JsonPrimitive)?.contentOrNull
102
+ if (type == "image_url") {
103
+ if (!visionCapable) {
104
+ return HandlerResponse.Error(
105
+ 400,
106
+ "Image input requires a vision-capable MediaPipe model. " +
107
+ "Loaded model has no vision capability — pass " +
108
+ "`visionEnabled: true` to start() with a Gemma 3n " +
109
+ "vision-capable .task bundle to enable image input.",
110
+ )
111
+ }
112
+ val urlStr = (partObj["image_url"] as? JsonObject)
113
+ ?.get("url")
114
+ ?.let { it as? JsonPrimitive }
115
+ ?.contentOrNull
116
+ if (urlStr.isNullOrEmpty()) {
117
+ return HandlerResponse.Error(
118
+ 400,
119
+ "image_url part missing 'url' field",
120
+ )
121
+ }
122
+ val bytes = try {
123
+ withContext(Dispatchers.IO) { ImageDecoder.resolve(urlStr) }
124
+ } catch (e: Exception) {
125
+ // Fetch / decode failure — 502 per spec §8.5 wording.
126
+ return HandlerResponse.Error(
127
+ 502,
128
+ "Failed to fetch image: ${e.message ?: "unknown error"}",
129
+ )
130
+ }
131
+ images.add(bytes)
132
+ }
133
+ if (type == "input_audio") {
134
+ return HandlerResponse.Error(
135
+ 400,
136
+ "Audio input not supported on MediaPipe LLM " +
137
+ "(no audio-capable tasks-genai task).",
138
+ )
139
+ }
140
+ }
141
+ }
142
+
143
+ if (messages.isEmpty()) {
144
+ return HandlerResponse.Error(400, "Empty 'messages' array")
145
+ }
146
+
147
+ val prompt = openAIMessagesToPrompt(messages)
148
+ val isStream = (body["stream"] as? JsonPrimitive)?.booleanOrNull ?: false
149
+
150
+ val id = "chatcmpl-mp-" + UUID.randomUUID().toString().take(20).lowercase()
151
+ val created = System.currentTimeMillis() / 1000L
152
+
153
+ if (isStream) {
154
+ return HandlerResponse.Sse(
155
+ buildChatStreamFrames(id = id, created = created, prompt = prompt, images = images),
156
+ )
157
+ }
158
+
159
+ val text = try {
160
+ bridgeMutex.withLock {
161
+ withContext(Dispatchers.IO) { bridge.completePrompt(prompt, images) }
162
+ }
163
+ } catch (e: Exception) {
164
+ return HandlerResponse.Error(500, e.message ?: "Inference failed")
165
+ }
166
+
167
+ val response = buildJsonObject {
168
+ put("id", id)
169
+ put("object", "chat.completion")
170
+ put("created", created)
171
+ put("model", modelId)
172
+ putJsonArray("choices") {
173
+ addJsonObject {
174
+ put("index", 0)
175
+ putJsonObject("message") {
176
+ put("role", "assistant")
177
+ put("content", text)
178
+ }
179
+ put("finish_reason", "stop")
180
+ }
181
+ }
182
+ putJsonObject("usage") {
183
+ put("prompt_tokens", 0)
184
+ put("completion_tokens", 0)
185
+ put("total_tokens", 0)
186
+ }
187
+ }
188
+ return HandlerResponse.Json(200, response)
189
+ }
190
+
191
+ override suspend fun handleCompletion(body: JsonObject, ctx: HandlerContext): HandlerResponse {
192
+ val promptField = body["prompt"]
193
+ val prompt: String = when {
194
+ promptField == null || promptField is JsonNull -> ""
195
+ promptField is JsonPrimitive && promptField.contentOrNull != null -> promptField.content
196
+ promptField is JsonArray -> promptField.joinToString("\n") {
197
+ (it as? JsonPrimitive)?.contentOrNull ?: ""
198
+ }
199
+ else -> return HandlerResponse.Error(400, "'prompt' must be a string or array of strings")
200
+ }
201
+
202
+ val chatBody = buildJsonObject {
203
+ for ((k, v) in body) {
204
+ if (k == "prompt") continue
205
+ put(k, v)
206
+ }
207
+ putJsonArray("messages") {
208
+ addJsonObject {
209
+ put("role", "user")
210
+ put("content", prompt)
211
+ }
212
+ }
213
+ }
214
+
215
+ val chatResp = handleChatCompletion(chatBody, ctx)
216
+ return when (chatResp) {
217
+ is HandlerResponse.Json -> {
218
+ // Smart cast across the shared-core module boundary doesn't work;
219
+ // bind to a local val and cast once. See LlamaHandlers.kt for the
220
+ // same fix.
221
+ val respBody = chatResp.body
222
+ if (chatResp.status != 200 || respBody !is JsonObject) {
223
+ chatResp
224
+ } else {
225
+ HandlerResponse.Json(200, chatToLegacyCompletion(respBody))
226
+ }
227
+ }
228
+ is HandlerResponse.Sse -> {
229
+ val model = (body["model"] as? JsonPrimitive)?.contentOrNull ?: modelId
230
+ HandlerResponse.Sse(
231
+ flow {
232
+ chatResp.flow.collect { chunk ->
233
+ emit(adaptChunkToLegacy(chunk, model))
234
+ }
235
+ },
236
+ )
237
+ }
238
+ is HandlerResponse.Error -> chatResp
239
+ }
240
+ }
241
+
242
+ override suspend fun handleEmbeddings(body: JsonObject, ctx: HandlerContext): HandlerResponse =
243
+ HandlerResponse.Error(
244
+ 400,
245
+ "Embeddings not supported on MediaPipe LLM. " +
246
+ "Use capacitorBackend: \"llama\" with nativeEmbeddingMode: true.",
247
+ )
248
+
249
+ override suspend fun handleModels(ctx: HandlerContext): HandlerResponse =
250
+ HandlerResponse.Json(
251
+ 200,
252
+ buildJsonObject {
253
+ put("object", "list")
254
+ putJsonArray("data") {
255
+ addJsonObject {
256
+ put("id", ctx.modelId)
257
+ put("object", "model")
258
+ put("owned_by", "google-mediapipe")
259
+ }
260
+ }
261
+ },
262
+ )
263
+
264
+ // ----- Streaming -----
265
+
266
+ /**
267
+ * Build the SSE envelope: role frame + N content frames (last carries
268
+ * `finish_reason: "stop"`) + `[DONE]` terminator. Frame count varies with
269
+ * token count — see the "Streaming envelope parity" note in this class's
270
+ * KDoc for the documented divergence from [LlamaHandlers].
271
+ *
272
+ * Acquires [bridgeMutex] for the lifetime of the stream and releases it
273
+ * in [awaitClose] — guarantees serialization with non-streaming requests
274
+ * AND mutex release on either successful completion or coroutine
275
+ * cancellation.
276
+ */
277
+ private fun buildChatStreamFrames(
278
+ id: String,
279
+ created: Long,
280
+ prompt: String,
281
+ images: List<ByteArray>,
282
+ ): Flow<String> = callbackFlow {
283
+ // Serialize against any other bridge use for the entire stream lifetime.
284
+ // Track ownership explicitly via a local flag so we never depend on the
285
+ // racy `Mutex.isLocked` snapshot read for unlock decisions.
286
+ var unlocked = false
287
+ bridgeMutex.lock()
288
+ fun safeUnlock() {
289
+ if (!unlocked) {
290
+ unlocked = true
291
+ bridgeMutex.unlock()
292
+ }
293
+ }
294
+
295
+ // Role chunk (first frame of the envelope).
296
+ val roleChunk = buildJsonObject {
297
+ put("id", id); put("object", "chat.completion.chunk")
298
+ put("created", created); put("model", modelId)
299
+ putJsonArray("choices") {
300
+ addJsonObject {
301
+ put("index", 0)
302
+ putJsonObject("delta") { put("role", "assistant") }
303
+ }
304
+ }
305
+ }
306
+ trySend("data: $roleChunk\n\n")
307
+
308
+ val handle: AutoCloseable = try {
309
+ bridge.completePromptAsync(prompt, images) { partial, done ->
310
+ // Content-delta chunk for every (partial, done) pair. When `done`
311
+ // is true the final frame carries finish_reason="stop"; otherwise
312
+ // finish_reason is null.
313
+ val chunk = buildJsonObject {
314
+ put("id", id); put("object", "chat.completion.chunk")
315
+ put("created", created); put("model", modelId)
316
+ putJsonArray("choices") {
317
+ addJsonObject {
318
+ put("index", 0)
319
+ putJsonObject("delta") { put("content", partial) }
320
+ if (done) {
321
+ put("finish_reason", "stop")
322
+ } else {
323
+ put("finish_reason", JsonNull)
324
+ }
325
+ }
326
+ }
327
+ }
328
+ trySend("data: $chunk\n\n")
329
+ if (done) {
330
+ trySend("data: [DONE]\n\n")
331
+ close()
332
+ }
333
+ }
334
+ } catch (e: Exception) {
335
+ // Generation failed to start. Emit an error chunk + [DONE] so the
336
+ // client sees a well-formed SSE close, then complete the flow with
337
+ // the exception (collector receives it).
338
+ //
339
+ // finish_reason uses the OpenAI-standard "stop" value; the failure
340
+ // signal is conveyed via the sibling `error` field. (OpenAI's spec
341
+ // restricts finish_reason to stop|length|tool_calls|content_filter|
342
+ // function_call|null, so we don't invent an "error" value.)
343
+ val errChunk = buildJsonObject {
344
+ put("id", id); put("object", "chat.completion.chunk")
345
+ put("created", created); put("model", modelId)
346
+ putJsonArray("choices") {
347
+ addJsonObject {
348
+ put("index", 0)
349
+ putJsonObject("delta") { /* empty */ }
350
+ put("finish_reason", "stop")
351
+ }
352
+ }
353
+ putJsonObject("error") { put("message", e.message ?: "Inference failed") }
354
+ }
355
+ trySend("data: $errChunk\n\n")
356
+ trySend("data: [DONE]\n\n")
357
+ close(e)
358
+ // Release the mutex synchronously — awaitClose still runs but
359
+ // there is no AutoCloseable handle to call close() on.
360
+ safeUnlock()
361
+ return@callbackFlow
362
+ }
363
+
364
+ awaitClose {
365
+ try {
366
+ handle.close()
367
+ } catch (_: Throwable) { /* best-effort */ }
368
+ safeUnlock()
369
+ }
370
+ }
371
+
372
+ // ----- Helpers -----
373
+
374
+ /**
375
+ * Flatten OpenAI chat messages into a single `role: content` newline-joined
376
+ * prompt string. Multimodal content arrays are reduced to their `text`
377
+ * parts (image / audio parts are rejected before this method is reached).
378
+ */
379
+ private fun openAIMessagesToPrompt(messages: JsonArray): String =
380
+ messages.mapNotNull { msg ->
381
+ val msgObj = msg as? JsonObject ?: return@mapNotNull null
382
+ val role = (msgObj["role"] as? JsonPrimitive)?.contentOrNull ?: "user"
383
+ val content = msgObj["content"]
384
+ when (content) {
385
+ is JsonPrimitive -> "$role: ${content.contentOrNull ?: ""}"
386
+ is JsonArray -> {
387
+ val texts = content.mapNotNull inner@{ part ->
388
+ val partObj = part as? JsonObject ?: return@inner null
389
+ if ((partObj["type"] as? JsonPrimitive)?.contentOrNull == "text") {
390
+ (partObj["text"] as? JsonPrimitive)?.contentOrNull
391
+ } else {
392
+ null
393
+ }
394
+ }
395
+ if (texts.isNotEmpty()) "$role: ${texts.joinToString(" ")}" else null
396
+ }
397
+ else -> null
398
+ }
399
+ }.joinToString("\n")
400
+
401
+ /** Mirrors `chatToLegacyCompletion()` from `packages/dvai-bridge-core`. */
402
+ private fun chatToLegacyCompletion(chat: JsonObject): JsonObject = buildJsonObject {
403
+ val chatId = (chat["id"] as? JsonPrimitive)?.contentOrNull ?: ""
404
+ val cmplId = if (chatId.isEmpty()) {
405
+ "cmpl-${System.currentTimeMillis() / 1000L}"
406
+ } else {
407
+ chatId.replace("chatcmpl-", "cmpl-")
408
+ }
409
+ put("id", cmplId)
410
+ put("object", "text_completion")
411
+ chat["created"]?.let { put("created", it) }
412
+ ?: put("created", System.currentTimeMillis() / 1000L)
413
+ put("model", (chat["model"] as? JsonPrimitive)?.contentOrNull ?: modelId)
414
+ putJsonArray("choices") {
415
+ val chatChoices = chat["choices"] as? JsonArray ?: JsonArray(emptyList())
416
+ for (c in chatChoices) {
417
+ val co = c as? JsonObject ?: continue
418
+ addJsonObject {
419
+ val msg = co["message"] as? JsonObject
420
+ put("text", (msg?.get("content") as? JsonPrimitive)?.contentOrNull ?: "")
421
+ put("index", (co["index"] as? JsonPrimitive)?.intOrNull ?: 0)
422
+ put(
423
+ "finish_reason",
424
+ (co["finish_reason"] as? JsonPrimitive)?.contentOrNull ?: "stop",
425
+ )
426
+ put("logprobs", JsonNull)
427
+ }
428
+ }
429
+ }
430
+ val usage = chat["usage"] as? JsonObject
431
+ if (usage != null) {
432
+ put("usage", usage)
433
+ } else {
434
+ putJsonObject("usage") {
435
+ put("prompt_tokens", 0)
436
+ put("completion_tokens", 0)
437
+ put("total_tokens", 0)
438
+ }
439
+ }
440
+ }
441
+
442
+ /** Adapt a single SSE frame from chat.completion.chunk -> text_completion.chunk. */
443
+ private fun adaptChunkToLegacy(chunk: String, model: String): String {
444
+ val trimmed = chunk.trim()
445
+ if (!trimmed.startsWith("data:")) return chunk
446
+ val payload = trimmed.removePrefix("data:").trim()
447
+ if (payload == "[DONE]") return "data: [DONE]\n\n"
448
+ val parsed = try {
449
+ Json.parseToJsonElement(payload) as? JsonObject ?: return chunk
450
+ } catch (_: Exception) {
451
+ return chunk
452
+ }
453
+ val chatId = (parsed["id"] as? JsonPrimitive)?.contentOrNull ?: ""
454
+ val id = chatId.replace("chatcmpl-", "cmpl-")
455
+ val legacy = buildJsonObject {
456
+ put("id", id)
457
+ put("object", "text_completion.chunk")
458
+ parsed["created"]?.let { put("created", it) }
459
+ ?: put("created", System.currentTimeMillis() / 1000L)
460
+ put("model", (parsed["model"] as? JsonPrimitive)?.contentOrNull ?: model)
461
+ putJsonArray("choices") {
462
+ val chatChoices = parsed["choices"] as? JsonArray ?: JsonArray(emptyList())
463
+ for (c in chatChoices) {
464
+ val co = c as? JsonObject ?: continue
465
+ addJsonObject {
466
+ val delta = co["delta"] as? JsonObject
467
+ put("text", (delta?.get("content") as? JsonPrimitive)?.contentOrNull ?: "")
468
+ put("index", (co["index"] as? JsonPrimitive)?.intOrNull ?: 0)
469
+ val fr = co["finish_reason"]
470
+ if (fr is JsonPrimitive && fr.contentOrNull != null) {
471
+ put("finish_reason", fr.content)
472
+ } else {
473
+ put("finish_reason", JsonNull)
474
+ }
475
+ put("logprobs", JsonNull)
476
+ }
477
+ }
478
+ }
479
+ }
480
+ return "data: $legacy\n\n"
481
+ }
482
+ }