@dvai-bridge/android-mediapipe-core 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,134 @@
1
+ package co.deepvoiceai.bridge.mediapipe.core
2
+
3
+ import android.content.Context
4
+ import co.deepvoiceai.bridge.shared.core.CorsConfig
5
+ import co.deepvoiceai.bridge.shared.core.HandlerContext
6
+ import co.deepvoiceai.bridge.shared.core.HttpServer
7
+ import kotlinx.coroutines.sync.Mutex
8
+ import kotlinx.coroutines.sync.withLock
9
+
10
+ /**
11
+ * Owns the running state of the MediaPipe core: the [MediaPipeBridge]
12
+ * inference engine, the HTTP server, and model metadata. All access is
13
+ * serialized through the [Mutex] so concurrent start/stop calls from the
14
+ * bridge layer can never race against the underlying MediaPipe engine
15
+ * (which is not safe to construct twice in parallel).
16
+ *
17
+ * Capacitor-free: opts are plain [Map<String, Any?>] and return values are
18
+ * plain [Map<String, Any?>]. The Capacitor wrapper translates between
19
+ * JSObject and Map before/after calling into this class.
20
+ *
21
+ * Differs from [co.deepvoiceai.bridge.llama.core.PluginState] in three ways:
22
+ * - No `mmprojPath` / `gpuLayers` / `contextSize` / `threads` opts —
23
+ * LiteRT-LM doesn't expose those knobs.
24
+ * - No `embeddingMode` opt — LiteRT-LM LLM has no embeddings path
25
+ * (handler returns 400 with a redirect to the llama backend).
26
+ * - Adds a `visionEnabled` opt that wires the bridge with
27
+ * `EngineConfig.visionBackend` and toggles the [MediaPipeHandlers.visionCapable]
28
+ * flag so `image_url` parts route through `Content.ImageBytes` instead of
29
+ * returning a 400.
30
+ */
31
+ class PluginState {
32
+ private val mutex = Mutex()
33
+ private var server: HttpServer? = null
34
+ private var bridge: MediaPipeBridge? = null
35
+ private var modelId: String = ""
36
+ private var isRunning: Boolean = false
37
+ private var baseUrl: String? = null
38
+ private var port: Int? = null
39
+
40
+ suspend fun start(opts: Map<String, Any?>, context: Context): Map<String, Any?> = mutex.withLock {
41
+ if (isRunning) stopInternal()
42
+
43
+ val modelPath = (opts["modelPath"] as? String)?.takeIf { it.isNotEmpty() }
44
+ ?: throw IllegalArgumentException("modelPath is required for mediapipe backend")
45
+
46
+ val maxTokens = (opts["maxTokens"] as? Number)?.toInt() ?: 2048
47
+ val visionEnabled = opts["visionEnabled"] as? Boolean ?: false
48
+ val httpBasePort = (opts["httpBasePort"] as? Number)?.toInt() ?: 38883
49
+ val httpMaxPortAttempts = (opts["httpMaxPortAttempts"] as? Number)?.toInt() ?: 16
50
+ val corsConfig = parseCors(opts["corsOrigin"])
51
+ val resolvedModelId = (opts["modelId"] as? String)?.takeIf { it.isNotEmpty() }
52
+ ?: deriveModelId(modelPath)
53
+
54
+ // Construct the MediaPipe bridge. The engine itself is loaded lazily
55
+ // on first use, so the handler set is wired before any heavy native
56
+ // initialization happens — failures then surface on first inference.
57
+ val newBridge = MediaPipeBridge(
58
+ context = context,
59
+ modelPath = modelPath,
60
+ maxTokens = maxTokens,
61
+ visionEnabled = visionEnabled,
62
+ )
63
+
64
+ val handlers = MediaPipeHandlers(
65
+ bridge = newBridge,
66
+ modelId = resolvedModelId,
67
+ visionCapable = visionEnabled,
68
+ )
69
+ val ctx = HandlerContext(modelId = resolvedModelId, backendName = "mediapipe")
70
+
71
+ val newServer = HttpServer()
72
+ val boundPort = try {
73
+ newServer.startWithRoutes(
74
+ basePort = httpBasePort,
75
+ maxAttempts = httpMaxPortAttempts,
76
+ host = "127.0.0.1",
77
+ handlers = handlers,
78
+ ctx = ctx,
79
+ corsConfig = corsConfig,
80
+ )
81
+ } catch (t: Throwable) {
82
+ // Bind failed — release the bridge engine if we already booted it
83
+ // so we don't leak native resources.
84
+ runCatching { newBridge.close() }
85
+ throw t
86
+ }
87
+
88
+ this.bridge = newBridge
89
+ this.server = newServer
90
+ this.modelId = resolvedModelId
91
+ this.port = boundPort
92
+ this.baseUrl = "http://127.0.0.1:$boundPort/v1"
93
+ this.isRunning = true
94
+
95
+ return@withLock mapOf(
96
+ "baseUrl" to "http://127.0.0.1:$boundPort/v1",
97
+ "port" to boundPort,
98
+ "backend" to "mediapipe",
99
+ "modelId" to resolvedModelId,
100
+ )
101
+ }
102
+
103
+ suspend fun stop() = mutex.withLock { stopInternal() }
104
+
105
+ private suspend fun stopInternal() {
106
+ server?.stop()
107
+ runCatching { bridge?.close() }
108
+ server = null
109
+ bridge = null
110
+ modelId = ""
111
+ baseUrl = null
112
+ port = null
113
+ isRunning = false
114
+ }
115
+
116
+ fun statusInfo(): Map<String, Any?> = buildMap {
117
+ put("running", isRunning)
118
+ baseUrl?.let { put("baseUrl", it) }
119
+ if (isRunning) put("backend", "mediapipe")
120
+ }
121
+
122
+ private fun parseCors(raw: Any?): CorsConfig = CorsConfig.fromOpt(raw)
123
+
124
+ /**
125
+ * Best-effort default model id from a model file path: strip the directory
126
+ * prefix and any `.litertlm` or legacy `.task` extension. Falls back to
127
+ * the literal path if no separators are present.
128
+ */
129
+ private fun deriveModelId(modelPath: String): String {
130
+ val name = modelPath.substringAfterLast('/').substringAfterLast('\\')
131
+ val stripped = name.removeSuffix(".litertlm").removeSuffix(".task")
132
+ return stripped.ifEmpty { "mediapipe-default" }
133
+ }
134
+ }
@@ -0,0 +1,7 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <network-security-config>
3
+ <domain-config cleartextTrafficPermitted="true">
4
+ <domain includeSubdomains="false">localhost</domain>
5
+ <domain includeSubdomains="false">127.0.0.1</domain>
6
+ </domain-config>
7
+ </network-security-config>
@@ -0,0 +1,114 @@
1
+ package co.deepvoiceai.bridge.mediapipe.core
2
+
3
+ import okhttp3.OkHttpClient
4
+ import okhttp3.mockwebserver.MockResponse
5
+ import okhttp3.mockwebserver.MockWebServer
6
+ import okio.Buffer
7
+ import org.junit.After
8
+ import org.junit.Assert.assertArrayEquals
9
+ import org.junit.Assert.assertEquals
10
+ import org.junit.Assert.fail
11
+ import org.junit.Before
12
+ import org.junit.Test
13
+ import org.junit.runner.RunWith
14
+ import org.robolectric.RobolectricTestRunner
15
+ import java.io.File
16
+ import java.nio.file.Files
17
+
18
+ @RunWith(RobolectricTestRunner::class)
19
+ class ImageDecoderTest {
20
+ private lateinit var server: MockWebServer
21
+
22
+ @Before
23
+ fun setUp() {
24
+ server = MockWebServer().apply { start(0) }
25
+ }
26
+
27
+ @After
28
+ fun tearDown() {
29
+ server.shutdown()
30
+ }
31
+
32
+ /** `data:image/png;base64,...` decodes to bytes starting with the PNG magic header. */
33
+ @Test
34
+ fun `data URL base64 decodes to bytes`() {
35
+ val url = loadResourceText("tiny-test-base64.txt")
36
+ val bytes = ImageDecoder.resolve(url)
37
+ assertArrayEquals(
38
+ byteArrayOf(0x89.toByte(), 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A),
39
+ bytes.copyOfRange(0, 8),
40
+ )
41
+ }
42
+
43
+ /** `https://...` URLs fetch the body bytes verbatim. */
44
+ @Test
45
+ fun `https URL fetches bytes`() {
46
+ val payload = loadResource("tiny-test.png")
47
+ server.enqueue(MockResponse().setBody(Buffer().apply { write(payload) }))
48
+ val bytes = ImageDecoder.resolveWithClient(
49
+ server.url("/img.png").toString(),
50
+ OkHttpClient(),
51
+ )
52
+ assertArrayEquals(payload, bytes)
53
+ }
54
+
55
+ /** `file://` URLs read the file off disk. */
56
+ @Test
57
+ fun `file URL reads bytes`() {
58
+ val payload = loadResource("tiny-test.png")
59
+ val tmp = Files.createTempFile("dvai-image-", ".png").toFile().apply {
60
+ writeBytes(payload)
61
+ deleteOnExit()
62
+ }
63
+ try {
64
+ val bytes = ImageDecoder.resolve(tmp.toURI().toString())
65
+ assertArrayEquals(payload, bytes)
66
+ } finally {
67
+ tmp.delete()
68
+ }
69
+ }
70
+
71
+ /** Unsupported scheme → `InvalidScheme`. */
72
+ @Test(expected = ImageSourceError.InvalidScheme::class)
73
+ fun `unsupported scheme throws`() {
74
+ ImageDecoder.resolve("ftp://example.com/x.png")
75
+ }
76
+
77
+ /** `data:` URL with no comma → `MalformedDataURL`. */
78
+ @Test(expected = ImageSourceError.MalformedDataURL::class)
79
+ fun `malformed data URL throws`() {
80
+ ImageDecoder.resolve("data:image/png;base64")
81
+ }
82
+
83
+ /** `file:` URL with no path → `MalformedURL` (was previously misreported as `InvalidScheme`). */
84
+ @Test(expected = ImageSourceError.MalformedURL::class)
85
+ fun `file URL with no path throws MalformedURL`() {
86
+ ImageDecoder.resolve("file:")
87
+ }
88
+
89
+ /** HTTP non-2xx → `HttpError` carrying the status code. */
90
+ @Test
91
+ fun `http error throws HttpError`() {
92
+ server.enqueue(MockResponse().setResponseCode(404))
93
+ try {
94
+ ImageDecoder.resolveWithClient(
95
+ server.url("/missing.png").toString(),
96
+ OkHttpClient(),
97
+ )
98
+ fail("Expected ImageSourceError.HttpError")
99
+ } catch (e: ImageSourceError.HttpError) {
100
+ assertEquals(404, e.status)
101
+ }
102
+ }
103
+
104
+ private fun loadResource(name: String): ByteArray =
105
+ javaClass.getResourceAsStream("/images/$name")
106
+ ?.use { it.readBytes() }
107
+ ?: error("missing test resource: /images/$name")
108
+
109
+ private fun loadResourceText(name: String): String =
110
+ javaClass.getResourceAsStream("/images/$name")
111
+ ?.bufferedReader()
112
+ ?.use { it.readText().trim() }
113
+ ?: error("missing test resource: /images/$name")
114
+ }