react-native-image-stitcher 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@ import com.facebook.react.ReactPackage
5
5
  import com.facebook.react.bridge.NativeModule
6
6
  import com.facebook.react.bridge.ReactApplicationContext
7
7
  import com.facebook.react.uimanager.ViewManager
8
+ import com.mrousavy.camera.frameprocessors.FrameProcessorPluginRegistry
8
9
 
9
10
  /**
10
11
  * ReactPackage that registers the SDK's two native modules with
@@ -22,15 +23,72 @@ import com.facebook.react.uimanager.ViewManager
22
23
  * JS layer.
23
24
  */
24
25
  class RNImageStitcherPackage : ReactPackage {
26
+
27
+ companion object {
28
+ @Volatile
29
+ private var fpPluginRegistered = false
30
+
31
+ /**
32
+ * F8.4 — register the vision-camera Frame Processor plugin.
33
+ * Called lazily from `createNativeModules` (which fires
34
+ * AFTER the React bridge has booted, side-stepping the
35
+ * bridgeless TurboModule init race we'd hit if we did this
36
+ * in a class-level static initialiser).
37
+ *
38
+ * No-op when vision-camera isn't on the runtime classpath
39
+ * (the SDK doesn't hard-depend on it — consumers that don't
40
+ * use `<Camera>` don't pay the dep). Catches
41
+ * `NoClassDefFoundError` defensively because the runtime
42
+ * classpath is what matters, not the compile-time one.
43
+ *
44
+ * Idempotent: guarded by `fpPluginRegistered` so a host
45
+ * with multiple React instances doesn't double-register
46
+ * (would throw "name already exists" from the registry).
47
+ */
48
+ @JvmStatic
49
+ @Synchronized
50
+ fun ensureFrameProcessorPluginRegistered() {
51
+ if (fpPluginRegistered) return
52
+ try {
53
+ FrameProcessorPluginRegistry.addFrameProcessorPlugin(
54
+ "cv_flow_gate_process_frame",
55
+ ) { proxy, options ->
56
+ CvFlowGateFrameProcessor(proxy, options)
57
+ }
58
+ fpPluginRegistered = true
59
+ } catch (e: NoClassDefFoundError) {
60
+ android.util.Log.i(
61
+ "RNImageStitcherPackage",
62
+ "vision-camera FrameProcessorPluginRegistry not on classpath — "
63
+ + "skipping cv_flow_gate_process_frame plugin registration "
64
+ + "(host app doesn't appear to use Frame Processors).",
65
+ )
66
+ fpPluginRegistered = true // don't retry every package init
67
+ } catch (e: Throwable) {
68
+ android.util.Log.w(
69
+ "RNImageStitcherPackage",
70
+ "Failed to register cv_flow_gate_process_frame plugin: ${e.message}",
71
+ )
72
+ fpPluginRegistered = true
73
+ }
74
+ }
75
+ }
76
+
25
77
  override fun createNativeModules(
26
78
  reactContext: ReactApplicationContext,
27
- ): List<NativeModule> = listOf(
28
- QualityChecker(reactContext),
29
- BatchStitcher(reactContext),
30
- RNSARSession(reactContext),
31
- IncrementalStitcher(reactContext),
32
- FileBridge(reactContext),
33
- )
79
+ ): List<NativeModule> {
80
+ // F8.4 — register the Frame Processor plugin here, after the
81
+ // bridge is fully booted. See `ensureFrameProcessorPluginRegistered`
82
+ // for the rationale (vs. a class-load-time static init).
83
+ ensureFrameProcessorPluginRegistered()
84
+ return listOf(
85
+ QualityChecker(reactContext),
86
+ BatchStitcher(reactContext),
87
+ RNSARSession(reactContext),
88
+ IncrementalStitcher(reactContext),
89
+ FileBridge(reactContext),
90
+ )
91
+ }
34
92
 
35
93
  override fun createViewManagers(
36
94
  reactContext: ReactApplicationContext,
@@ -428,133 +428,146 @@ class RNSARCameraView @JvmOverloads constructor(
428
428
  }
429
429
  return
430
430
  }
431
- try {
432
- // 2026-05-21 (v0.3) — pixel-data path. Pre-0.3 this code
433
- // unconditionally encoded the YUV camera image to JPEG and
434
- // wrote it to disk for EVERY ARCore frame at ~60 Hz (~25 ms
435
- // per frame of JPEG encode + disk I/O on the GL render
436
- // thread), regardless of whether the C++ KeyframeGate would
437
- // accept it. Now we extract the Y plane bytes (cheap
438
- // memcpy from a DirectByteBuffer), feed them to the gate
439
- // for proper Flow-strategy evaluation, and defer the JPEG
440
- // encode + disk write to the `onAccept` lambda so it only
441
- // runs on the rare frames the gate actually keeps
442
- // (typically ~6 per capture).
443
- //
444
- // Y-plane extraction for ARCore's YUV_420_888 format:
445
- // plane[0] is the luminance channel at full resolution,
446
- // pixelStride=1, rowStride may equal width OR be padded.
447
- // We pass rowStride as the C++ side's `stride` so the gate
448
- // skips padding correctly.
449
- val yPlane = image.planes[0]
450
- val yBuffer = yPlane.buffer
451
- val yStride = yPlane.rowStride
452
- val yWidth = image.width
453
- val yHeight = image.height
454
- // Copy Y bytes into a JVM-side ByteArray. Using
455
- // duplicate() so we don't mutate the original buffer's
456
- // position state (ARCore may have other readers).
457
- // For 1920×1080 Y plane that's ~2 MB; on Galaxy A35 the
458
- // memcpy itself is < 1 ms. JNI side pins via
459
- // GetPrimitiveArrayCritical so the byte[] stays a single
460
- // copy through the entire frame's lifecycle.
461
- val ySize = yStride * yHeight
462
- val yBytes = ByteArray(ySize)
463
- yBuffer.duplicate().apply { rewind() }.get(yBytes, 0, ySize)
464
-
465
- // Compute yaw + pitch from the ARCore quaternion using
466
- // the same convention the iOS Swift side uses (camera-
467
- // forward in world space). This keeps the two platforms
468
- // numerically aligned for the FoV-overlap gate.
469
- val q = camera.pose.rotationQuaternion // x, y, z, w
470
- val (yaw, pitch) = quaternionYawPitch(q)
471
-
472
- // Both FoVs + the full quaternion + intrinsics go to the
473
- // engine. V6 pose-driven path uses (qx, qy, qz, qw, fx,
474
- // fy, cx, cy, w, h) to compute the geometrically-exact
475
- // homography.
476
- val intrinsics = camera.imageIntrinsics
477
- val fx = intrinsics.focalLength[0].toDouble()
478
- val fy = intrinsics.focalLength[1].toDouble()
479
- val cxIntr = intrinsics.principalPoint[0].toDouble()
480
- val cyIntr = intrinsics.principalPoint[1].toDouble()
481
- val w = intrinsics.imageDimensions[0].toDouble()
482
- val h = intrinsics.imageDimensions[1].toDouble()
483
- val fovHRad = 2.0 * atan(w / (2.0 * fx))
484
- val fovVRad = 2.0 * atan(h / (2.0 * fy))
485
- val fovHDeg = fovHRad * 180.0 / Math.PI
486
- val fovVDeg = fovVRad * 180.0 / Math.PI
487
-
488
- // ARCore quaternion comes back in (x, y, z, w) order.
489
- val qarr = camera.pose.rotationQuaternion
490
- // P3-F: also extract translation so the KeyframeGate's
491
- // plane-based ray-projection can compute polygon overlap.
492
- // Previously these were dropped, forcing the gate into
493
- // angular-fallback even when a plane was latched.
494
- val tArr = camera.pose.translation
495
-
496
- val trackingPoor = camera.trackingState != TrackingState.TRACKING
497
- val module = IncrementalStitcher.bridgeInstance ?: return
498
- // 2026-05-15 (B3) — pass current display rotation so the
499
- // encoded JPEG gets an EXIF orientation tag. Captured into
500
- // a local val so the lambda below closes over a primitive
501
- // (avoids re-reading lastDisplayRotation if it shifts
502
- // between gate-evaluate and lambda invocation).
503
- val rotationForEncode = if (lastDisplayRotation >= 0)
504
- lastDisplayRotation else android.view.Surface.ROTATION_0
505
- // 2026-05-21 (v0.3) — eager JPEG encode is only needed when
506
- // the engine is in the legacy hybrid/firstwins live-engine
507
- // mode (which feeds JPEG paths into addFrameAtPath every
508
- // frame). In batch-keyframe mode (the production Camera
509
- // component's path), the JPEG is encoded LAZILY inside
510
- // the onAccept lambda below — only on the ~6 frames per
511
- // capture that the C++ KeyframeGate actually keeps.
512
- val legacyJpegPath: String? = if (module.isBatchKeyframeMode) {
513
- null
514
- } else {
515
- YuvImageConverter.encodeToJpeg(
516
- image,
517
- tmpJpegFile.absolutePath,
518
- jpegQuality = 70,
519
- displayRotation = rotationForEncode,
520
- )
431
+
432
+ // 2026-05-22 (audit follow-up #19) — minimise ARCore Image
433
+ // hold time.
434
+ //
435
+ // Pre-#19 the Image stayed open through the entire JNI
436
+ // ingest call AND any subsequent JPEG encode (~25 ms in
437
+ // legacy hybrid mode where every frame is encoded eagerly;
438
+ // ~25 ms in batch-keyframe mode for the ~5/60 frames the
439
+ // gate accepts). At 60 Hz ARCore that meant the Image was
440
+ // held 25-30 ms per frame on accepts, starving the Camera2
441
+ // ImageReader's circular buffer pool and risking
442
+ // "BufferQueue has been abandoned" stalls.
443
+ //
444
+ // The fix is mechanical: pack the YUV planes into a
445
+ // JVM-side NV21 byte array (~3 ms), close the Image, and
446
+ // run all subsequent work (JNI ingest + JPEG encode) on
447
+ // the copied bytes. ARCore Camera2 buffer pool stays
448
+ // healthier; latency-sensitive ARCore frames flow through
449
+ // their fixed pool instead of waiting on our JPEG path.
450
+ //
451
+ // The packed.nv21 array's first `width*height` bytes are
452
+ // the Y plane (densely packed, stride = width) — these go
453
+ // to the C++ gate as grayscale. The full array is the
454
+ // input to YuvImageConverter.encodeJpegFromNV21 if the
455
+ // gate accepts (or if we're in legacy eager-encode mode).
456
+ val packed = try {
457
+ YuvImageConverter.packNV21(image)
458
+ } finally {
459
+ // Close ASAP every microsecond reduces buffer-pool
460
+ // pressure on Camera2. Even if packNV21 returns null
461
+ // (unsupported format), we still need to close.
462
+ try { image.close() } catch (_: Throwable) {}
463
+ } ?: run {
464
+ if (forwardLogTick % 30 == 1) {
465
+ Log.w(TAG, "forwardToIncremental: packNV21 returned null (unexpected format?)")
521
466
  }
522
- module.ingestFromARCameraView(
523
- tx = tArr[0].toDouble(),
524
- ty = tArr[1].toDouble(),
525
- tz = tArr[2].toDouble(),
526
- qx = qarr[0].toDouble(), qy = qarr[1].toDouble(),
527
- qz = qarr[2].toDouble(), qw = qarr[3].toDouble(),
528
- fx = fx, fy = fy, cx = cxIntr, cy = cyIntr,
529
- imageWidth = intrinsics.imageDimensions[0],
530
- imageHeight = intrinsics.imageDimensions[1],
531
- yaw = yaw, pitch = pitch,
532
- fovHorizDegrees = fovHDeg, fovVertDegrees = fovVDeg,
533
- trackingPoor = trackingPoor,
534
- grayData = yBytes,
535
- grayWidth = yWidth,
536
- grayHeight = yHeight,
537
- grayStride = yStride,
538
- legacyJpegPath = legacyJpegPath,
539
- onAccept = { targetPath ->
540
- // Lazy JPEG encode. Runs ONLY if the C++ KeyframeGate
541
- // accepted the frame. The ARCore Image is still open
542
- // at this point (we haven't reached `image.close()`
543
- // in the surrounding `finally` block yet), so the
544
- // encode reads raw camera pixels directly into a
545
- // JPEG at the final persistent path no tmp file,
546
- // no second copy.
547
- YuvImageConverter.encodeToJpeg(
548
- image,
549
- targetPath,
550
- jpegQuality = 70,
551
- displayRotation = rotationForEncode,
552
- ) != null
553
- },
467
+ return
468
+ }
469
+
470
+ // Compute yaw + pitch from the ARCore quaternion using
471
+ // the same convention the iOS Swift side uses (camera-
472
+ // forward in world space). This keeps the two platforms
473
+ // numerically aligned for the FoV-overlap gate. `camera`
474
+ // (and `camera.pose`) remain valid after image.close() —
475
+ // they're ARCore Frame metadata, not pixel buffers.
476
+ val q = camera.pose.rotationQuaternion // x, y, z, w
477
+ val (yaw, pitch) = quaternionYawPitch(q)
478
+
479
+ // Both FoVs + the full quaternion + intrinsics go to the
480
+ // engine. V6 pose-driven path uses (qx, qy, qz, qw, fx,
481
+ // fy, cx, cy, w, h) to compute the geometrically-exact
482
+ // homography.
483
+ val intrinsics = camera.imageIntrinsics
484
+ val fx = intrinsics.focalLength[0].toDouble()
485
+ val fy = intrinsics.focalLength[1].toDouble()
486
+ val cxIntr = intrinsics.principalPoint[0].toDouble()
487
+ val cyIntr = intrinsics.principalPoint[1].toDouble()
488
+ val w = intrinsics.imageDimensions[0].toDouble()
489
+ val h = intrinsics.imageDimensions[1].toDouble()
490
+ val fovHRad = 2.0 * atan(w / (2.0 * fx))
491
+ val fovVRad = 2.0 * atan(h / (2.0 * fy))
492
+ val fovHDeg = fovHRad * 180.0 / Math.PI
493
+ val fovVDeg = fovVRad * 180.0 / Math.PI
494
+
495
+ // ARCore quaternion comes back in (x, y, z, w) order.
496
+ val qarr = camera.pose.rotationQuaternion
497
+ // P3-F: also extract translation so the KeyframeGate's
498
+ // plane-based ray-projection can compute polygon overlap.
499
+ // Previously these were dropped, forcing the gate into
500
+ // angular-fallback even when a plane was latched.
501
+ val tArr = camera.pose.translation
502
+
503
+ val trackingPoor = camera.trackingState != TrackingState.TRACKING
504
+ val module = IncrementalStitcher.bridgeInstance ?: return
505
+ // 2026-05-15 (B3) — pass current display rotation so the
506
+ // encoded JPEG gets an EXIF orientation tag. Captured into
507
+ // a local val so the lambda below closes over a primitive
508
+ // (avoids re-reading lastDisplayRotation if it shifts
509
+ // between gate-evaluate and lambda invocation).
510
+ val rotationForEncode = if (lastDisplayRotation >= 0)
511
+ lastDisplayRotation else android.view.Surface.ROTATION_0
512
+
513
+ // 2026-05-21 (v0.3) — eager JPEG encode is only needed when
514
+ // the engine is in the legacy hybrid/firstwins live-engine
515
+ // mode (which feeds JPEG paths into addFrameAtPath every
516
+ // frame). In batch-keyframe mode (the production Camera
517
+ // component's path), the JPEG is encoded LAZILY inside
518
+ // the onAccept lambda below — only on the ~6 frames per
519
+ // capture that the C++ KeyframeGate actually keeps.
520
+ //
521
+ // 2026-05-22 (#19) — the encode now reads from the already-
522
+ // packed NV21 bytes (`packed`), NOT from the live Image
523
+ // (which has been closed above). Same output, no Image
524
+ // hold time.
525
+ val legacyJpegPath: String? = if (module.isBatchKeyframeMode) {
526
+ null
527
+ } else {
528
+ YuvImageConverter.encodeJpegFromNV21(
529
+ packed,
530
+ tmpJpegFile.absolutePath,
531
+ jpegQuality = 70,
532
+ displayRotation = rotationForEncode,
554
533
  )
555
- } finally {
556
- image.close()
557
534
  }
535
+ module.ingestFromARCameraView(
536
+ tx = tArr[0].toDouble(),
537
+ ty = tArr[1].toDouble(),
538
+ tz = tArr[2].toDouble(),
539
+ qx = qarr[0].toDouble(), qy = qarr[1].toDouble(),
540
+ qz = qarr[2].toDouble(), qw = qarr[3].toDouble(),
541
+ fx = fx, fy = fy, cx = cxIntr, cy = cyIntr,
542
+ imageWidth = intrinsics.imageDimensions[0],
543
+ imageHeight = intrinsics.imageDimensions[1],
544
+ yaw = yaw, pitch = pitch,
545
+ fovHorizDegrees = fovHDeg, fovVertDegrees = fovVDeg,
546
+ trackingPoor = trackingPoor,
547
+ // The Y plane lives at packed.nv21[0 .. width*height).
548
+ // C++ keyframe_gate reads `height * stride` bytes and
549
+ // ignores anything past that, so passing the full NV21
550
+ // array with `grayStride = width` reads exactly the Y
551
+ // plane (UV bytes at the tail are not touched).
552
+ grayData = packed.nv21,
553
+ grayWidth = packed.width,
554
+ grayHeight = packed.height,
555
+ grayStride = packed.width,
556
+ legacyJpegPath = legacyJpegPath,
557
+ onAccept = { targetPath ->
558
+ // Lazy JPEG encode. Runs ONLY if the C++ KeyframeGate
559
+ // accepted the frame. Encodes from the pre-packed
560
+ // NV21 bytes — the ARCore Image has been closed since
561
+ // ~25 ms ago (right after packNV21), so no
562
+ // Image-hold cost on this slow path.
563
+ YuvImageConverter.encodeJpegFromNV21(
564
+ packed,
565
+ targetPath,
566
+ jpegQuality = 70,
567
+ displayRotation = rotationForEncode,
568
+ ) != null
569
+ },
570
+ )
558
571
  }
559
572
 
560
573
  private fun applyDisplayGeometry() {