react-native-image-stitcher 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. package/CHANGELOG.md +96 -0
  2. package/LICENSE +201 -0
  3. package/NOTICE +21 -0
  4. package/README.md +189 -0
  5. package/RNImageStitcher.podspec +76 -0
  6. package/android/build.gradle +224 -0
  7. package/android/src/main/AndroidManifest.xml +3 -0
  8. package/android/src/main/cpp/CMakeLists.txt +124 -0
  9. package/android/src/main/cpp/image_stitcher_jni.cpp +145 -0
  10. package/android/src/main/cpp/keyframe_gate_jni.cpp +204 -0
  11. package/android/src/main/java/io/imagestitcher/rn/BatchStitcher.kt +426 -0
  12. package/android/src/main/java/io/imagestitcher/rn/IncrementalFirstwinsEngine.kt +960 -0
  13. package/android/src/main/java/io/imagestitcher/rn/IncrementalStitcher.kt +2371 -0
  14. package/android/src/main/java/io/imagestitcher/rn/KeyframeGate.kt +256 -0
  15. package/android/src/main/java/io/imagestitcher/rn/QualityChecker.kt +167 -0
  16. package/android/src/main/java/io/imagestitcher/rn/RNImageStitcherPackage.kt +39 -0
  17. package/android/src/main/java/io/imagestitcher/rn/RNSARCameraView.kt +558 -0
  18. package/android/src/main/java/io/imagestitcher/rn/RNSARCameraViewManager.kt +35 -0
  19. package/android/src/main/java/io/imagestitcher/rn/RNSARSession.kt +784 -0
  20. package/android/src/main/java/io/imagestitcher/rn/ar/BackgroundRenderer.kt +176 -0
  21. package/android/src/main/java/io/imagestitcher/rn/ar/ShaderUtil.kt +67 -0
  22. package/android/src/main/java/io/imagestitcher/rn/ar/YuvImageConverter.kt +201 -0
  23. package/cpp/ar_frame_pose.h +63 -0
  24. package/cpp/keyframe_gate.cpp +927 -0
  25. package/cpp/keyframe_gate.hpp +240 -0
  26. package/cpp/stitcher.cpp +2207 -0
  27. package/cpp/stitcher.hpp +275 -0
  28. package/dist/ar/useARSession.d.ts +102 -0
  29. package/dist/ar/useARSession.js +133 -0
  30. package/dist/camera/ARCameraView.d.ts +93 -0
  31. package/dist/camera/ARCameraView.js +170 -0
  32. package/dist/camera/Camera.d.ts +134 -0
  33. package/dist/camera/Camera.js +688 -0
  34. package/dist/camera/CameraShutter.d.ts +80 -0
  35. package/dist/camera/CameraShutter.js +237 -0
  36. package/dist/camera/CameraView.d.ts +65 -0
  37. package/dist/camera/CameraView.js +117 -0
  38. package/dist/camera/CaptureControlsBar.d.ts +87 -0
  39. package/dist/camera/CaptureControlsBar.js +82 -0
  40. package/dist/camera/CaptureHeader.d.ts +62 -0
  41. package/dist/camera/CaptureHeader.js +81 -0
  42. package/dist/camera/CapturePreview.d.ts +70 -0
  43. package/dist/camera/CapturePreview.js +188 -0
  44. package/dist/camera/CaptureStatusOverlay.d.ts +75 -0
  45. package/dist/camera/CaptureStatusOverlay.js +326 -0
  46. package/dist/camera/CaptureThumbnailStrip.d.ts +87 -0
  47. package/dist/camera/CaptureThumbnailStrip.js +177 -0
  48. package/dist/camera/IncrementalPanGuide.d.ts +83 -0
  49. package/dist/camera/IncrementalPanGuide.js +267 -0
  50. package/dist/camera/PanoramaBandOverlay.d.ts +107 -0
  51. package/dist/camera/PanoramaBandOverlay.js +399 -0
  52. package/dist/camera/PanoramaConfirmModal.d.ts +57 -0
  53. package/dist/camera/PanoramaConfirmModal.js +128 -0
  54. package/dist/camera/PanoramaGuidance.d.ts +79 -0
  55. package/dist/camera/PanoramaGuidance.js +246 -0
  56. package/dist/camera/PanoramaSettingsModal.d.ts +311 -0
  57. package/dist/camera/PanoramaSettingsModal.js +611 -0
  58. package/dist/camera/ViewportCropOverlay.d.ts +46 -0
  59. package/dist/camera/ViewportCropOverlay.js +67 -0
  60. package/dist/camera/useCapture.d.ts +111 -0
  61. package/dist/camera/useCapture.js +160 -0
  62. package/dist/camera/useDeviceOrientation.d.ts +48 -0
  63. package/dist/camera/useDeviceOrientation.js +131 -0
  64. package/dist/camera/useVideoCapture.d.ts +79 -0
  65. package/dist/camera/useVideoCapture.js +151 -0
  66. package/dist/index.d.ts +26 -0
  67. package/dist/index.js +39 -0
  68. package/dist/quality/normaliseOrientation.d.ts +36 -0
  69. package/dist/quality/normaliseOrientation.js +62 -0
  70. package/dist/quality/runQualityCheck.d.ts +41 -0
  71. package/dist/quality/runQualityCheck.js +98 -0
  72. package/dist/sensors/useIMUTranslationGate.d.ts +70 -0
  73. package/dist/sensors/useIMUTranslationGate.js +235 -0
  74. package/dist/stitching/IncrementalStitcherView.d.ts +41 -0
  75. package/dist/stitching/IncrementalStitcherView.js +157 -0
  76. package/dist/stitching/incremental.d.ts +930 -0
  77. package/dist/stitching/incremental.js +133 -0
  78. package/dist/stitching/stitchFrames.d.ts +55 -0
  79. package/dist/stitching/stitchFrames.js +56 -0
  80. package/dist/stitching/stitchVideo.d.ts +119 -0
  81. package/dist/stitching/stitchVideo.js +57 -0
  82. package/dist/stitching/useIncrementalJSDriver.d.ts +74 -0
  83. package/dist/stitching/useIncrementalJSDriver.js +199 -0
  84. package/dist/stitching/useIncrementalStitcher.d.ts +58 -0
  85. package/dist/stitching/useIncrementalStitcher.js +172 -0
  86. package/dist/types.d.ts +58 -0
  87. package/dist/types.js +15 -0
  88. package/ios/Package.swift +72 -0
  89. package/ios/Sources/RNImageStitcher/ARCameraViewManager.m +33 -0
  90. package/ios/Sources/RNImageStitcher/ARCameraViewManager.swift +40 -0
  91. package/ios/Sources/RNImageStitcher/ARSessionBridge.m +55 -0
  92. package/ios/Sources/RNImageStitcher/ARSessionBridge.swift +149 -0
  93. package/ios/Sources/RNImageStitcher/IncrementalStitcher.swift +2727 -0
  94. package/ios/Sources/RNImageStitcher/IncrementalStitcherBridge.m +85 -0
  95. package/ios/Sources/RNImageStitcher/IncrementalStitcherBridge.swift +625 -0
  96. package/ios/Sources/RNImageStitcher/KeyframeGate.swift +328 -0
  97. package/ios/Sources/RNImageStitcher/KeyframeGateBridge.h +141 -0
  98. package/ios/Sources/RNImageStitcher/KeyframeGateBridge.mm +278 -0
  99. package/ios/Sources/RNImageStitcher/OpenCVIncrementalStitcher.h +473 -0
  100. package/ios/Sources/RNImageStitcher/OpenCVIncrementalStitcher.mm +1326 -0
  101. package/ios/Sources/RNImageStitcher/OpenCVKeyframeCollector.h +97 -0
  102. package/ios/Sources/RNImageStitcher/OpenCVKeyframeCollector.mm +296 -0
  103. package/ios/Sources/RNImageStitcher/OpenCVSlitScanStitcher.h +103 -0
  104. package/ios/Sources/RNImageStitcher/OpenCVSlitScanStitcher.mm +3285 -0
  105. package/ios/Sources/RNImageStitcher/OpenCVStitcher.h +238 -0
  106. package/ios/Sources/RNImageStitcher/OpenCVStitcher.mm +1880 -0
  107. package/ios/Sources/RNImageStitcher/QualityChecker.swift +252 -0
  108. package/ios/Sources/RNImageStitcher/QualityCheckerBridge.m +26 -0
  109. package/ios/Sources/RNImageStitcher/QualityCheckerBridge.swift +72 -0
  110. package/ios/Sources/RNImageStitcher/RNSARCameraView.swift +114 -0
  111. package/ios/Sources/RNImageStitcher/RNSARSession.swift +1111 -0
  112. package/ios/Sources/RNImageStitcher/Stitcher.swift +243 -0
  113. package/ios/Sources/RNImageStitcher/StitcherBridge.m +28 -0
  114. package/ios/Sources/RNImageStitcher/StitcherBridge.swift +246 -0
  115. package/package.json +73 -0
  116. package/react-native.config.js +34 -0
  117. package/scripts/opencv-version.txt +1 -0
  118. package/scripts/postinstall-fetch-binaries.js +286 -0
  119. package/src/ar/useARSession.ts +210 -0
  120. package/src/camera/.gitkeep +0 -0
  121. package/src/camera/ARCameraView.tsx +256 -0
  122. package/src/camera/Camera.tsx +1053 -0
  123. package/src/camera/CameraShutter.tsx +292 -0
  124. package/src/camera/CameraView.tsx +157 -0
  125. package/src/camera/CaptureControlsBar.tsx +204 -0
  126. package/src/camera/CaptureHeader.tsx +184 -0
  127. package/src/camera/CapturePreview.tsx +318 -0
  128. package/src/camera/CaptureStatusOverlay.tsx +391 -0
  129. package/src/camera/CaptureThumbnailStrip.tsx +277 -0
  130. package/src/camera/IncrementalPanGuide.tsx +328 -0
  131. package/src/camera/PanoramaBandOverlay.tsx +498 -0
  132. package/src/camera/PanoramaConfirmModal.tsx +206 -0
  133. package/src/camera/PanoramaGuidance.tsx +327 -0
  134. package/src/camera/PanoramaSettingsModal.tsx +1357 -0
  135. package/src/camera/ViewportCropOverlay.tsx +81 -0
  136. package/src/camera/useCapture.ts +279 -0
  137. package/src/camera/useDeviceOrientation.ts +140 -0
  138. package/src/camera/useVideoCapture.ts +236 -0
  139. package/src/index.ts +53 -0
  140. package/src/quality/.gitkeep +0 -0
  141. package/src/quality/normaliseOrientation.ts +79 -0
  142. package/src/quality/runQualityCheck.ts +131 -0
  143. package/src/sensors/useIMUTranslationGate.ts +347 -0
  144. package/src/stitching/.gitkeep +0 -0
  145. package/src/stitching/IncrementalStitcherView.tsx +198 -0
  146. package/src/stitching/incremental.ts +1021 -0
  147. package/src/stitching/stitchFrames.ts +88 -0
  148. package/src/stitching/stitchVideo.ts +153 -0
  149. package/src/stitching/useIncrementalJSDriver.ts +273 -0
  150. package/src/stitching/useIncrementalStitcher.ts +252 -0
  151. package/src/types.ts +78 -0
@@ -0,0 +1,1111 @@
1
+ // SPDX-License-Identifier: Apache-2.0
2
+ //
3
+ // RNSARSession — iOS ARKit wrapper that drives the SDK's
4
+ // pose-aware capture path.
5
+ //
6
+ // Phase 4 of the AR measurement plan
7
+ // (docs/site-content/design/2026-04-29-ar-measurement-and-detection.md).
8
+ // This is the foundation: it owns an ARSession, streams 6DoF
9
+ // camera poses + intrinsics + timestamps to JS, and stores a
10
+ // rolling pose log keyed by frame timestamp that the stitcher
11
+ // (Phase 5) and measurement APIs (Phase 6) read from.
12
+ //
13
+ // Why we own the ARSession instead of letting the host app:
14
+ // 1. ARKit and AVCaptureSession can't coexist on the same camera.
15
+ // The SDK's vision-camera-backed CameraView and an ARSession
16
+ // both want exclusive control. Centralising AR session
17
+ // lifecycle in the SDK lets us coordinate the handoff: when
18
+ // AR is active, vision-camera releases the camera; when AR
19
+ // stops, vision-camera resumes.
20
+ // 2. The pose log lives in native memory. Marshalling every
21
+ // frame (60Hz × 4×4 matrix × N frames) to JS via the bridge
22
+ // would be wasteful. Instead the JS side gets a session
23
+ // handle + occasional state updates, while the stitcher and
24
+ // measurement code read poses directly from native land.
25
+
26
+ import Foundation
27
+ import ARKit
28
+ import AVFoundation
29
+ import simd
30
+ import UIKit
31
+ import os.log
32
+
33
+ // V15.0c.4 — FAULT-level os_log on the same subsystem/category the
34
+ // slit-scan engine uses, so Console.app's filter for `category =
35
+ // slitscan` shows ARKit plane events alongside engine events.
36
+ // FAULT survives os_log's default-level rate limiting; NSLog is
37
+ // "default" level and gets coalesced/dropped under burst.
38
+ fileprivate let arSessionDiagLog = OSLog(
39
+ subsystem: "com.tiger.retailens.sdk",
40
+ category: "slitscan"
41
+ )
42
+
43
+
44
+ /// Track state mirrors `ARCamera.TrackingState`. We mirror it
45
+ /// rather than re-export the ARKit enum so the JS bridge sees a
46
+ /// stable shape that doesn't drift with iOS SDK updates.
47
+ @objc public enum RNSARTrackingState: Int {
48
+ /// AR isn't running on this device or session was never started.
49
+ case notAvailable = 0
50
+ /// Session is running but tracking quality is too low to use.
51
+ /// Equivalent to ARKit's .limited.
52
+ case initialising = 1
53
+ /// Session is tracking with normal quality. Poses are usable.
54
+ case tracking = 2
55
+ /// Tracking was lost mid-session (e.g. user covered the camera).
56
+ /// Poses captured during this period have low confidence.
57
+ case limited = 3
58
+ }
59
+
60
+
61
+ /// One frame's pose as a plain-old struct, ready to flatten into
62
+ /// JSON for the JS bridge. Values are in ARKit's right-handed
63
+ /// world coordinate frame (Y-up, -Z forward), translation in
64
+ /// metres.
65
+ @objc(RNSARFramePose)
66
+ public final class RNSARFramePose: NSObject {
67
+ /// Translation in world coordinates, metres.
68
+ @objc public let tx: Double
69
+ @objc public let ty: Double
70
+ @objc public let tz: Double
71
+
72
+ /// Rotation as a unit quaternion. qw is the real component.
73
+ @objc public let qx: Double
74
+ @objc public let qy: Double
75
+ @objc public let qz: Double
76
+ @objc public let qw: Double
77
+
78
+ /// Camera intrinsic parameters at this frame.
79
+ /// fx/fy: focal length in pixels.
80
+ /// cx/cy: principal point in pixels.
81
+ @objc public let fx: Double
82
+ @objc public let fy: Double
83
+ @objc public let cx: Double
84
+ @objc public let cy: Double
85
+
86
+ /// Image dimensions of the captured frame in pixels.
87
+ /// Useful for scaling intrinsics if the consumer downsamples.
88
+ @objc public let imageWidth: Int
89
+ @objc public let imageHeight: Int
90
+
91
+ /// Frame timestamp in milliseconds since session start.
92
+ /// Stitcher uses this to correlate pose data with video frames.
93
+ @objc public let timestampMs: Double
94
+
95
+ /// Tracking quality at the time of this frame.
96
+ @objc public let trackingState: RNSARTrackingState
97
+
98
+ @objc public init(
99
+ tx: Double, ty: Double, tz: Double,
100
+ qx: Double, qy: Double, qz: Double, qw: Double,
101
+ fx: Double, fy: Double, cx: Double, cy: Double,
102
+ imageWidth: Int, imageHeight: Int,
103
+ timestampMs: Double,
104
+ trackingState: RNSARTrackingState
105
+ ) {
106
+ self.tx = tx; self.ty = ty; self.tz = tz
107
+ self.qx = qx; self.qy = qy; self.qz = qz; self.qw = qw
108
+ self.fx = fx; self.fy = fy; self.cx = cx; self.cy = cy
109
+ self.imageWidth = imageWidth
110
+ self.imageHeight = imageHeight
111
+ self.timestampMs = timestampMs
112
+ self.trackingState = trackingState
113
+ }
114
+
115
+ /// Convenience: serialise to NSDictionary for the RN bridge.
116
+ @objc public func asDictionary() -> [String: Any] {
117
+ return [
118
+ "tx": tx, "ty": ty, "tz": tz,
119
+ "qx": qx, "qy": qy, "qz": qz, "qw": qw,
120
+ "fx": fx, "fy": fy, "cx": cx, "cy": cy,
121
+ "imageWidth": imageWidth, "imageHeight": imageHeight,
122
+ "timestampMs": timestampMs,
123
+ "trackingState": trackingState.rawValue,
124
+ ]
125
+ }
126
+ }
127
+
128
+
129
+ /// Singleton owner of the ARSession + pose log.
130
+ ///
131
+ /// We use a singleton because the iOS hardware constraint is global:
132
+ /// only one ARSession can be active per process. A singleton avoids
133
+ /// accidentally starting two sessions from different SDK call sites.
134
+ @objc(RNSARSession)
135
+ public final class RNSARSession: NSObject, ARSessionDelegate {
136
+
137
+ /// Shared instance. All callers MUST go through this.
138
+ @objc public static let shared = RNSARSession()
139
+
140
+ /// The underlying ARKit session. Module-internal (not `private`)
141
+ /// so RNSARCameraView (same module) can bind its ARSCNView
142
+ /// to this exact session — sharing is critical so the pose log
143
+ /// (driven by this object's `ARSessionDelegate` callbacks) stays
144
+ /// populated while the view renders frames. Lifecycle is still
145
+ /// controlled exclusively via `start()` / `stop()`.
146
+ let arSession = ARSession()
147
+
148
+ /// Rolling log of poses, keyed by ARFrame timestamp (TimeInterval).
149
+ /// Capped at MAX_POSE_LOG entries to bound memory under long
150
+ /// recordings. Phase 5 stitching will query by timestamp.
151
+ private var poseLog: [(TimeInterval, RNSARFramePose)] = []
152
+ private let poseLogQueue = DispatchQueue(
153
+ label: "com.retailens.arsession.poselog",
154
+ attributes: .concurrent
155
+ )
156
+ private static let MAX_POSE_LOG = 600 // ~10 s @ 60Hz
157
+
158
+ /// Latest tracking state. Read by JS for UI feedback.
159
+ @objc public private(set) var currentTrackingState: RNSARTrackingState = .notAvailable
160
+
161
+ /// Whether the session is currently running.
162
+ @objc public private(set) var isRunning: Bool = false
163
+
164
+ // ──────────────────────────────────────────────────────────────
165
+ // V15.0b — vertical plane detection
166
+ // ──────────────────────────────────────────────────────────────
167
+ /// First detected vertical plane anchor's transform (4x4, column-
168
+ /// major, world coords). Nil until ARKit detects a vertical
169
+ /// plane. Once latched, NOT updated — canvas geometry needs to
170
+ /// be stable across the capture.
171
+ private var detectedPlaneTransformInternal: simd_float4x4? = nil
172
+ private let planeLatchLock = NSLock()
173
+
174
+ /// V15.0d — minimum dot product between a candidate plane's
175
+ /// surface normal and the camera's FACING direction (i.e. the
176
+ /// negative of camera-forward) at detection time. Planes whose
177
+ /// alignment is below this threshold are REJECTED — the user is
178
+ /// scanning a wall in front of them, not a side wall or a
179
+ /// doorframe. Ranges 0.0 (accept any vertical plane) – 1.0
180
+ /// (only accept perfectly camera-facing planes). Default 0.6
181
+ /// ≈ 53° max angle off-camera. Set by the bridge via
182
+ /// `setPlaneAlignmentThreshold` from the engine config.
183
+ @objc public var planeAlignmentThreshold: Float = 0.6
184
+
185
+ /// V15.0e — best alignment score seen on any candidate plane
186
+ /// rejected by the alignment filter. -1 = no candidate seen
187
+ /// yet. When > 0 but a plane hasn't been latched, the JS UI
188
+ /// shows "found plane but off-axis (best 0.45)" so the operator
189
+ /// knows to face the wall more directly to clear the threshold.
190
+ /// Reset on -stop.
191
+ @objc public private(set) var bestRejectedAlignment: Float = -1.0
192
+
193
+ /// Whether a vertical plane has been detected and latched.
194
+ @objc public var hasPlaneDetected: Bool {
195
+ planeLatchLock.lock()
196
+ defer { planeLatchLock.unlock() }
197
+ return detectedPlaneTransformInternal != nil
198
+ }
199
+
200
+ /// V15.0g — clear the latched plane and re-evaluate ALL currently-
201
+ /// tracked vertical ARPlaneAnchors against the camera's CURRENT
202
+ /// aim, picking the BEST candidate.
203
+ ///
204
+ /// V15.0g.3 scoring (replaces V15.0g area-weighted):
205
+ /// 1. Reject planes whose alignment is below
206
+ /// `planeAlignmentThreshold`.
207
+ /// 2. Reject planes smaller than `kMinPlaneArea` (0.20 m²) —
208
+ /// filters out micro-planes from artifacts (sign edges, etc.)
209
+ /// that might happen to be very close.
210
+ /// 3. Among the rest, pick the **closest** plane (smallest
211
+ /// perpendicular distance from camera).
212
+ ///
213
+ /// Why closest, not largest:
214
+ /// Field testing on a Pepsi cooler (2026-05-08) showed the area-
215
+ /// weighted heuristic picking the WALL behind the cooler (3.5 m²,
216
+ /// 1.5m away) over the cooler face itself (0.85 m², 0.85m away).
217
+ /// Wall normal isn't perpendicular to the camera view → projecting
218
+ /// onto wall plane caused horizontal anchor drift as user tilted
219
+ /// down ("everything moves to the right as I pan down").
220
+ ///
221
+ /// The user is almost always aimed at the FOREGROUND object they
222
+ /// want to scan — that's why they're aimed at it. Closest plane
223
+ /// = foreground = scan target. Min-area filter prevents tiny
224
+ /// nearby artifacts (a sign's edge, a small reflection) from
225
+ /// winning by being super close.
226
+ ///
227
+ /// Returns YES if a plane was latched, NO if no candidate passed
228
+ /// both filters.
229
+ @objc public func relatchPlaneFromCurrentAnchors() -> Bool {
230
+ planeLatchLock.lock()
231
+ defer { planeLatchLock.unlock() }
232
+
233
+ // Clear any existing latch; we're picking fresh.
234
+ detectedPlaneTransformInternal = nil
235
+ bestRejectedAlignment = -1.0
236
+
237
+ guard let frame = arSession.currentFrame else {
238
+ os_log(.fault, log: arSessionDiagLog,
239
+ "[V15.0g-relatch] no current frame; deferred until next session tick")
240
+ return false
241
+ }
242
+ let cameraTransform = frame.camera.transform
243
+ let cameraFacingWorld = simd_float3(
244
+ -cameraTransform.columns.2.x,
245
+ -cameraTransform.columns.2.y,
246
+ -cameraTransform.columns.2.z
247
+ )
248
+ let cameraPosWorld = simd_float3(
249
+ cameraTransform.columns.3.x,
250
+ cameraTransform.columns.3.y,
251
+ cameraTransform.columns.3.z
252
+ )
253
+
254
+ // V15.0g.3 — minimum plane area to be considered a real scan
255
+ // target. Tiny planes are usually artifacts (a small reflective
256
+ // surface, a sign's edge) that ARKit briefly fits.
257
+ let kMinPlaneArea: Float = 0.20 // 0.45m × 0.45m
258
+
259
+ var bestPlane: ARPlaneAnchor? = nil
260
+ var bestPerpDist: Float = .greatestFiniteMagnitude
261
+ var bestAlignment: Float = -1.0
262
+ var bestArea: Float = 0.0
263
+
264
+ for anchor in frame.anchors {
265
+ guard let plane = anchor as? ARPlaneAnchor else { continue }
266
+ if plane.alignment != .vertical { continue }
267
+
268
+ let planeNormalWorld = simd_float3(
269
+ plane.transform.columns.1.x,
270
+ plane.transform.columns.1.y,
271
+ plane.transform.columns.1.z
272
+ )
273
+ let planeOriginWorld = simd_float3(
274
+ plane.transform.columns.3.x,
275
+ plane.transform.columns.3.y,
276
+ plane.transform.columns.3.z
277
+ )
278
+ let dotPos = simd_dot(planeNormalWorld, cameraFacingWorld)
279
+ let alignment = max(dotPos, -dotPos)
280
+
281
+ if alignment < planeAlignmentThreshold {
282
+ if alignment > bestRejectedAlignment {
283
+ bestRejectedAlignment = alignment
284
+ }
285
+ continue
286
+ }
287
+
288
+ // Area = extent.x × extent.z (using deprecated extent for
289
+ // iOS 15 compat; iOS 16+ has planeExtent which is more
290
+ // accurate but we don't depend on absolute precision here).
291
+ let area = plane.extent.x * plane.extent.z
292
+
293
+ // V15.0g.3 — reject micro-planes.
294
+ if area < kMinPlaneArea {
295
+ os_log(.fault, log: arSessionDiagLog,
296
+ "[V15.0g-relatch] candidate REJECTED (area too small): alignment=%f area=%fm² (extent %fx%f) < min=%f",
297
+ alignment, area, plane.extent.x, plane.extent.z, kMinPlaneArea)
298
+ continue
299
+ }
300
+
301
+ // V15.0g.3 — perpendicular distance from camera to plane.
302
+ // Closer = more likely the foreground scan target.
303
+ let diff = planeOriginWorld - cameraPosWorld
304
+ let perpDist = abs(simd_dot(diff, planeNormalWorld))
305
+ // Score is inverse-distance for diagnostic clarity; lower
306
+ // perpDist = higher score.
307
+ let score = (perpDist > 0.001) ? (1.0 / perpDist) : 1000.0
308
+
309
+ os_log(.fault, log: arSessionDiagLog,
310
+ "[V15.0g-relatch] candidate plane: alignment=%f area=%fm² (extent %fx%f) perpDist=%fm score=%f",
311
+ alignment, area, plane.extent.x, plane.extent.z, perpDist, score)
312
+
313
+ // V15.0g.3 — closer wins.
314
+ if perpDist < bestPerpDist {
315
+ bestPlane = plane
316
+ bestPerpDist = perpDist
317
+ bestAlignment = alignment
318
+ bestArea = area
319
+ }
320
+ }
321
+
322
+ guard let chosen = bestPlane else {
323
+ os_log(.fault, log: arSessionDiagLog,
324
+ "[V15.0g-relatch] no candidate plane passed alignment+area filters (best rejected alignment=%f, threshold=%f); engine will refuse first frame until lock",
325
+ bestRejectedAlignment, planeAlignmentThreshold)
326
+ return false
327
+ }
328
+
329
+ detectedPlaneTransformInternal = chosen.transform
330
+ os_log(.fault, log: arSessionDiagLog,
331
+ "[V15.0g-relatch] latched best plane: alignment=%f area=%fm² perpDist=%fm extent=%fx%f centre=(%f,%f,%f)",
332
+ bestAlignment, bestArea, bestPerpDist,
333
+ chosen.extent.x, chosen.extent.z,
334
+ chosen.center.x, chosen.center.y, chosen.center.z)
335
+ return true
336
+ }
337
+
338
+ /// Returns the latched plane transform as a 16-element [Float]
339
+ /// array (column-major). `nil` if no plane detected yet.
340
+ @objc public func planeTransformFlat() -> [NSNumber]? {
341
+ planeLatchLock.lock()
342
+ defer { planeLatchLock.unlock() }
343
+ guard let m = detectedPlaneTransformInternal else { return nil }
344
+ let cols = [m.columns.0, m.columns.1, m.columns.2, m.columns.3]
345
+ var out: [NSNumber] = []
346
+ out.reserveCapacity(16)
347
+ for c in cols {
348
+ out.append(NSNumber(value: c.x))
349
+ out.append(NSNumber(value: c.y))
350
+ out.append(NSNumber(value: c.z))
351
+ out.append(NSNumber(value: c.w))
352
+ }
353
+ return out
354
+ }
355
+
356
+ /// V16 keyframe-gate accessor — returns the latched plane as a
357
+ /// `simd_float4x4`, the form Swift code (`KeyframeGate`,
358
+ /// `IncrementalStitcher`) needs for in-process polygon
359
+ /// math. Distinct from `planeTransformFlat()` which exists only
360
+ /// to bridge the same data into ObjC++ as an NSNumber array.
361
+ /// Nil until a plane is latched (via the AR delegate's didAdd
362
+ /// alignment filter or `relatchPlaneFromCurrentAnchors()`).
363
+ public func latchedPlaneTransform() -> simd_float4x4? {
364
+ planeLatchLock.lock()
365
+ defer { planeLatchLock.unlock() }
366
+ return detectedPlaneTransformInternal
367
+ }
368
+
369
+ // ──────────────────────────────────────────────────────────────
370
+ // Phase 5 — AR-backed photo + video capture state
371
+ // ──────────────────────────────────────────────────────────────
372
+ //
373
+ // `takePhoto` / `startRecording` / `stopRecording` make the AR
374
+ // session a drop-in replacement for vision-camera's `<Camera>`
375
+ // — same imperative API exposed via ARCameraView's ref, so the
376
+ // host's existing `useCapture` / `useVideoCapture` hooks work
377
+ // transparently when AR mode is on.
378
+ //
379
+ // The asset writer state below is touched from TWO threads:
380
+ // 1. The bridge thread (start/stop calls from JS).
381
+ // 2. The ARSession delegate thread (per-frame callbacks
382
+ // that append the latest pixelBuffer to the writer).
383
+ // We serialise via `writerLock` (NSLock) — the delegate uses
384
+ // `try()` so it never blocks ARKit; start/stop hold the lock
385
+ // only while swapping state pointers, never across the slow
386
+ // AVFoundation calls.
387
+
388
+ /// Active AVAssetWriter while recording; nil when idle.
389
+ private var assetWriter: AVAssetWriter?
390
+ /// AVAssetWriterInput owns the encoded video track. Held
391
+ /// separately from `assetWriter` so we can call `markAsFinished`
392
+ /// and check `isReadyForMoreMediaData` without re-querying.
393
+ private var videoInput: AVAssetWriterInput?
394
+ /// Adaptor accepts CVPixelBuffer directly — bypasses the
395
+ /// CMSampleBuffer ceremony that would otherwise be needed for
396
+ /// each frame. ARFrame.capturedImage is already a CVPixelBuffer.
397
+ private var pixelBufferAdaptor: AVAssetWriterInputPixelBufferAdaptor?
398
+ /// Timestamp of the first frame appended. Used as the session
399
+ /// start time so CMTime presentation timestamps remain monotonic
400
+ /// from zero.
401
+ private var recordingStartTime: CMTime?
402
+ /// Lock guarding writer-state reads/writes. Used with `try()`
403
+ /// from the ARSession delegate so frame-append never blocks the
404
+ /// delegate thread; if start/stop is mid-flight, the frame is
405
+ /// just dropped (graceful). Held briefly during setup +
406
+ /// teardown only to swap the state pointers — the slow
407
+ /// AVFoundation calls (`startWriting`, `finishWriting`) happen
408
+ /// OUTSIDE the lock.
409
+ private let writerLock = NSLock()
410
+
411
+ /// Optional consumer that receives each ARFrame's pixel buffer +
412
+ /// pose for the live incremental-stitching path. Set by
413
+ /// `IncrementalStitcher.start()` and cleared on
414
+ /// `finalize()` / `cancel()`.
415
+ ///
416
+ /// Weak so the consumer's lifetime is owned by whoever set it
417
+ /// (currently the incremental-stitcher singleton); this just
418
+ /// prevents the AR session from outliving a consumer that's
419
+ /// been torn down.
420
+ @objc public weak var incrementalConsumer: ARFrameConsumer?
421
+
422
+ private override init() {
423
+ super.init()
424
+ arSession.delegate = self
425
+ }
426
+
427
+ /// Whether ARKit's WorldTrackingConfiguration is supported on this
428
+ /// device. All iPhones since the 6s support it; the check is
429
+ /// defensive against the SDK being run on the simulator or an
430
+ /// unusual deployment.
431
+ @objc public static var isSupported: Bool {
432
+ return ARWorldTrackingConfiguration.isSupported
433
+ }
434
+
435
+ @objc public func start() {
436
+ guard Self.isSupported else {
437
+ currentTrackingState = .notAvailable
438
+ return
439
+ }
440
+ // V15.0f — IDEMPOTENT. Calling start() while the session is
441
+ // already running used to re-run with [.resetTracking,
442
+ // .removeExistingAnchors], which silently WIPED any plane
443
+ // detection that had been accumulating since the camera
444
+ // view first mounted. Multiple call sites (camera view's
445
+ // didMoveToWindow, JS bridge's start, useARSession hook)
446
+ // could trigger this race. Guarding here keeps plane
447
+ // detection state stable across redundant start() calls.
448
+ if isRunning {
449
+ os_log(.fault, log: arSessionDiagLog,
450
+ "[V15.0f-ar-start] start() called while already running — ignored to preserve plane detection state")
451
+ return
452
+ }
453
+ let config = ARWorldTrackingConfiguration()
454
+ // sceneDepth gives us per-pixel depth on LiDAR-equipped
455
+ // devices; gracefully no-ops on non-LiDAR devices. Used by
456
+ // Phase 6 measurement.
457
+ if ARWorldTrackingConfiguration.supportsFrameSemantics(.smoothedSceneDepth) {
458
+ config.frameSemantics = .smoothedSceneDepth
459
+ }
460
+ // V15.0b — enable VERTICAL plane detection for the
461
+ // plane-projected stitch mode. ARKit incrementally builds a
462
+ // model of any vertical surface in view (typical retail
463
+ // fixture wall). The first-detected vertical plane's
464
+ // transform is latched at capture-start and used as the
465
+ // canvas reference frame: each accepted camera frame is
466
+ // warped onto the plane via a 3×3 homography rather than
467
+ // onto a virtual cylinder/plane at first-frame anchor.
468
+ // CPU cost is negligible (<2 ms/frame). Detection time:
469
+ // 2–5 s on non-LiDAR devices, sub-second on LiDAR.
470
+ config.planeDetection = [.vertical]
471
+ // Auto-focus on for better feature tracking on shelves with
472
+ // small text and packaging detail.
473
+ config.isAutoFocusEnabled = true
474
+
475
+ arSession.run(config, options: [.resetTracking, .removeExistingAnchors])
476
+ // V16-diag — log the chosen video format so we can correlate
477
+ // batch-keyframe memory with ARFrame resolution. iPhone Pro
478
+ // models can default to higher-res capture which inflates
479
+ // every downstream cv::Mat allocation 3-5×.
480
+ let vfRes = config.videoFormat.imageResolution
481
+ os_log(.fault, log: arSessionDiagLog,
482
+ "[V16-diag] AR videoFormat: %dx%d @ %d fps",
483
+ Int32(vfRes.width), Int32(vfRes.height),
484
+ Int32(config.videoFormat.framesPerSecond))
485
+ isRunning = true
486
+ currentTrackingState = .initialising
487
+ }
488
+
489
+ @objc public func stop() {
490
+ guard isRunning else { return }
491
+ arSession.pause()
492
+ isRunning = false
493
+ currentTrackingState = .notAvailable
494
+ clearPoseLog()
495
+ // V15.0b — clear latched plane so the next capture detects
496
+ // afresh. Plane geometry is per-capture: a different
497
+ // fixture in a different orientation needs a new lock.
498
+ // V15.0e — also reset the rejected-alignment cache so the
499
+ // next capture's UI starts at "Searching" rather than
500
+ // showing a stale alignment from the previous capture.
501
+ planeLatchLock.lock()
502
+ detectedPlaneTransformInternal = nil
503
+ bestRejectedAlignment = -1.0
504
+ planeLatchLock.unlock()
505
+ }
506
+
507
+ /// Empty the pose log — call between captures so the next
508
+ /// panorama starts fresh.
509
+ @objc public func clearPoseLog() {
510
+ poseLogQueue.async(flags: .barrier) { [weak self] in
511
+ self?.poseLog.removeAll(keepingCapacity: true)
512
+ }
513
+ }
514
+
515
+ /// Get all poses in the log, in capture order.
516
+ /// Phase 5 stitcher calls this after recording stops.
517
+ @objc public func snapshotPoseLog() -> [RNSARFramePose] {
518
+ var result: [RNSARFramePose] = []
519
+ poseLogQueue.sync {
520
+ result = poseLog.map { $0.1 }
521
+ }
522
+ return result
523
+ }
524
+
525
+ /// Find the pose closest to the given timestamp (in ms).
526
+ /// Used by the stitcher to match each video frame to a pose.
527
+ /// Returns nil if the log is empty or the closest is farther
528
+ /// than `maxToleranceMs` away.
529
+ @objc public func poseClosestToTimestamp(
530
+ _ targetMs: Double,
531
+ maxToleranceMs: Double = 50
532
+ ) -> RNSARFramePose? {
533
+ var best: (TimeInterval, RNSARFramePose)?
534
+ var bestDelta: Double = .infinity
535
+ poseLogQueue.sync {
536
+ for entry in poseLog {
537
+ let delta = abs(entry.1.timestampMs - targetMs)
538
+ if delta < bestDelta {
539
+ bestDelta = delta
540
+ best = entry
541
+ }
542
+ }
543
+ }
544
+ if bestDelta > maxToleranceMs { return nil }
545
+ return best?.1
546
+ }
547
+
548
+ // MARK: - ARSessionDelegate
549
+
550
+ public func session(_ session: ARSession, didUpdate frame: ARFrame) {
551
+ // ARKit fires this ~60Hz. Capture the pose into our log.
552
+ let pose = makePose(from: frame)
553
+ let ts = frame.timestamp
554
+ poseLogQueue.async(flags: .barrier) { [weak self] in
555
+ guard let self = self else { return }
556
+ self.poseLog.append((ts, pose))
557
+ // Trim to bound memory. Drop oldest first.
558
+ if self.poseLog.count > Self.MAX_POSE_LOG {
559
+ let drop = self.poseLog.count - Self.MAX_POSE_LOG
560
+ self.poseLog.removeFirst(drop)
561
+ }
562
+ }
563
+
564
+ // Deliver this frame to the live incremental-stitching
565
+ // consumer if one is registered. The consumer MUST consume
566
+ // the pixel buffer before returning (Apple's ARKit pool
567
+ // reuse contract — same constraint as the recording-append
568
+ // path below) — `IncrementalStitcher` does this by
569
+ // converting NV12 → cv::Mat synchronously inside the call,
570
+ // then doing the heavy work on its own queue.
571
+ if let consumer = self.incrementalConsumer {
572
+ consumer.consumeFrame(pixelBuffer: frame.capturedImage, pose: pose)
573
+ }
574
+
575
+ // If recording is in flight, append this frame to the
576
+ // asset writer DIRECTLY — no queue hop.
577
+ //
578
+ // Apple's ARKit docs are explicit: "ARKit holds the captured
579
+ // pixel buffer in a small pool. The buffer may be reused
580
+ // after the next ARFrame is captured. To use the pixel
581
+ // buffer beyond the scope of the captured ARFrame, you must
582
+ // make a copy." Swift's CF retain on capturedImage does NOT
583
+ // protect against ARKit's pool reuse. Hopping queues with
584
+ // a captured pixelBuffer led to the EXC_BAD_ACCESS crashes
585
+ // we kept seeing (Sentry: "release" at objc_retain) — by
586
+ // the time the closure ran, ARKit had reclaimed the
587
+ // underlying memory.
588
+ //
589
+ // Appending synchronously inside the delegate callback
590
+ // means the pixel buffer is consumed (adaptor.append makes
591
+ // its own internal copy) before the delegate returns —
592
+ // exactly the lifetime ARKit guarantees.
593
+ //
594
+ // Synchronisation with start/stop is via `writerLock.try()`:
595
+ // if start/stop is mid-flight, the frame is dropped (graceful
596
+ // backpressure) rather than blocking ARKit's delegate. The
597
+ // slow AVFoundation calls (startWriting, finishWriting)
598
+ // happen OUTSIDE the lock so the lock hold time is
599
+ // microseconds, not milliseconds.
600
+ guard writerLock.try() else { return }
601
+ defer { writerLock.unlock() }
602
+ guard let writer = self.assetWriter,
603
+ let input = self.videoInput,
604
+ let adaptor = self.pixelBufferAdaptor,
605
+ writer.status == .writing,
606
+ input.isReadyForMoreMediaData,
607
+ let startTime = self.recordingStartTime else {
608
+ return
609
+ }
610
+ let frameCMTime = CMTime(
611
+ seconds: frame.timestamp,
612
+ preferredTimescale: 1_000_000
613
+ )
614
+ let pts = CMTimeSubtract(frameCMTime, startTime)
615
+ adaptor.append(frame.capturedImage, withPresentationTime: pts)
616
+ }
617
+
618
+ public func session(
619
+ _ session: ARSession,
620
+ cameraDidChangeTrackingState camera: ARCamera
621
+ ) {
622
+ switch camera.trackingState {
623
+ case .normal:
624
+ currentTrackingState = .tracking
625
+ case .notAvailable:
626
+ currentTrackingState = .notAvailable
627
+ case .limited:
628
+ currentTrackingState = .limited
629
+ }
630
+ }
631
+
632
+ public func session(_ session: ARSession, didFailWithError error: Error) {
633
+ NSLog("[RNSARSession] failed: \(error.localizedDescription)")
634
+ currentTrackingState = .notAvailable
635
+ isRunning = false
636
+ }
637
+
638
+ // V15.0b — latch the first detected vertical plane. Subsequent
639
+ // ARKit refinements of the same plane (didUpdate) are ignored so
640
+ // canvas geometry stays stable across the capture.
641
+ public func session(_ session: ARSession, didAdd anchors: [ARAnchor]) {
642
+ planeLatchLock.lock()
643
+ defer { planeLatchLock.unlock() }
644
+ guard detectedPlaneTransformInternal == nil else { return }
645
+
646
+ // V15.0d — alignment filter (3A). ARKit's vertical-plane
647
+ // detection finds whatever vertical surface it can — the
648
+ // wall in front of the user, the wall behind, side walls,
649
+ // doorframes, table edges. Latching the FIRST one ARKit
650
+ // reports often picks a surface unrelated to the user's
651
+ // scan target, producing a wildly wrong projection in the
652
+ // V15.0b path.
653
+ //
654
+ // Filter: only accept a candidate plane whose surface
655
+ // normal is within `planeAlignmentThreshold` (cosine of
656
+ // angle) of the camera's facing direction. If no plane
657
+ // in the current `anchors` batch passes the filter, leave
658
+ // `detectedPlaneTransformInternal` nil so a future
659
+ // `didAdd` callback can try again.
660
+ //
661
+ // Camera facing in WORLD = -worldForward = -camera.transform.cols[2]
662
+ // (ARKit camera looks down its local -Z; column 2 of the
663
+ // camera transform is local +Z in world, so the camera
664
+ // is looking in the direction of -columns.2)
665
+ // Plane surface normal in WORLD = plane.transform.cols[1]
666
+ // (ARPlaneAnchor convention: local Y axis = surface normal)
667
+ guard let cameraTransform = session.currentFrame?.camera.transform else {
668
+ // No camera pose yet — log and bail; next didAdd may
669
+ // succeed once the session warms up.
670
+ os_log(.fault, log: arSessionDiagLog,
671
+ "[V15.0d-plane-filter] didAdd received but no camera pose yet; deferring latch")
672
+ return
673
+ }
674
+ let cameraFacingWorld = simd_float3(
675
+ -cameraTransform.columns.2.x,
676
+ -cameraTransform.columns.2.y,
677
+ -cameraTransform.columns.2.z
678
+ )
679
+
680
+ for anchor in anchors {
681
+ guard let plane = anchor as? ARPlaneAnchor else { continue }
682
+ if plane.alignment != .vertical { continue }
683
+
684
+ let planeNormalWorld = simd_float3(
685
+ plane.transform.columns.1.x,
686
+ plane.transform.columns.1.y,
687
+ plane.transform.columns.1.z
688
+ )
689
+ // Two possible orientations for the normal (column 1
690
+ // can point either side of the wall). Take the
691
+ // larger of the two dot products — i.e. assume the
692
+ // normal that's most aligned with the camera-facing
693
+ // direction is the "outward" surface normal.
694
+ let dotPos = simd_dot(planeNormalWorld, cameraFacingWorld)
695
+ let alignment = max(dotPos, -dotPos)
696
+
697
+ if alignment < planeAlignmentThreshold {
698
+ // Reject — not the surface the camera is aimed at.
699
+ // Track the best-rejected score so JS UI can show
700
+ // a progress hint ("found plane but off-axis 0.45").
701
+ if alignment > bestRejectedAlignment {
702
+ bestRejectedAlignment = alignment
703
+ }
704
+ os_log(.fault, log: arSessionDiagLog,
705
+ "[V15.0d-plane-filter] REJECTED candidate plane: alignment=%f < threshold=%f extent=%fx%f",
706
+ alignment, planeAlignmentThreshold,
707
+ plane.extent.x, plane.extent.z)
708
+ continue
709
+ }
710
+
711
+ detectedPlaneTransformInternal = plane.transform
712
+ os_log(.fault, log: arSessionDiagLog,
713
+ "[V15.0b-plane] latched vertical plane alignment=%f extent=%fx%f centre=(%f,%f,%f)",
714
+ alignment,
715
+ plane.extent.x, plane.extent.z,
716
+ plane.center.x, plane.center.y, plane.center.z)
717
+ break
718
+ }
719
+ }
720
+
721
+ public func session(_ session: ARSession, didUpdate anchors: [ARAnchor]) {
722
+ // V15.0d — ARKit refines plane anchors over time via
723
+ // didUpdate. If our didAdd alignment filter rejected all
724
+ // candidates (e.g. user wasn't aimed at the wall yet when
725
+ // detection fired), we want to give the same anchors
726
+ // another chance once they're refined / the camera is
727
+ // pointed differently. Same logic as didAdd: consider
728
+ // each updated anchor; latch the first that passes the
729
+ // alignment filter. Once latched, never re-evaluate.
730
+ planeLatchLock.lock()
731
+ defer { planeLatchLock.unlock() }
732
+ guard detectedPlaneTransformInternal == nil else { return }
733
+
734
+ guard let cameraTransform = session.currentFrame?.camera.transform else {
735
+ return
736
+ }
737
+ let cameraFacingWorld = simd_float3(
738
+ -cameraTransform.columns.2.x,
739
+ -cameraTransform.columns.2.y,
740
+ -cameraTransform.columns.2.z
741
+ )
742
+
743
+ for anchor in anchors {
744
+ guard let plane = anchor as? ARPlaneAnchor else { continue }
745
+ if plane.alignment != .vertical { continue }
746
+ let planeNormalWorld = simd_float3(
747
+ plane.transform.columns.1.x,
748
+ plane.transform.columns.1.y,
749
+ plane.transform.columns.1.z
750
+ )
751
+ let dotPos = simd_dot(planeNormalWorld, cameraFacingWorld)
752
+ let alignment = max(dotPos, -dotPos)
753
+ if alignment < planeAlignmentThreshold {
754
+ if alignment > bestRejectedAlignment {
755
+ bestRejectedAlignment = alignment
756
+ }
757
+ continue
758
+ }
759
+
760
+ detectedPlaneTransformInternal = plane.transform
761
+ os_log(.fault, log: arSessionDiagLog,
762
+ "[V15.0b-plane] latched vertical plane (via didUpdate) alignment=%f extent=%fx%f centre=(%f,%f,%f)",
763
+ alignment,
764
+ plane.extent.x, plane.extent.z,
765
+ plane.center.x, plane.center.y, plane.center.z)
766
+ break
767
+ }
768
+ }
769
+
770
+ // MARK: - Phase 5: AR-backed photo + video capture
771
+
772
+ /// Capture the current camera frame as a JPEG. If `rawPath` is
773
+ /// empty, generates a fresh path inside `NSTemporaryDirectory()`
774
+ /// — matches vision-camera's API where the path is an OUTPUT,
775
+ /// not an input. Completion fires with a result dictionary
776
+ /// matching vision-camera's PhotoFile shape.
777
+ @objc public func takePhoto(
778
+ toPath rawPath: String,
779
+ quality: Int,
780
+ completion: @escaping ([String: Any]?, NSError?) -> Void
781
+ ) {
782
+ let resolvedPath: String
783
+ if rawPath.isEmpty {
784
+ let dir = NSTemporaryDirectory()
785
+ resolvedPath = (dir as NSString).appendingPathComponent(
786
+ "RNImageStitcherAR-\(UUID().uuidString).jpg"
787
+ )
788
+ } else {
789
+ resolvedPath = rawPath
790
+ }
791
+ guard let frame = arSession.currentFrame else {
792
+ completion(nil, NSError(
793
+ domain: "RNImageStitcherARCapture",
794
+ code: 2001,
795
+ userInfo: [NSLocalizedDescriptionKey:
796
+ "AR session has no current frame — start the session first."]
797
+ ))
798
+ return
799
+ }
800
+ let pixelBuffer = frame.capturedImage
801
+
802
+ // ARKit's capturedImage is in landscape sensor orientation
803
+ // regardless of how the device is held. Rotate to portrait
804
+ // (the way the user is holding the phone for shelf audits)
805
+ // by applying a 90° clockwise CIImage orientation. Without
806
+ // this, photos appear sideways in any consumer that doesn't
807
+ // honour EXIF (RN's <Image>, the OpenCV stitcher).
808
+ let ciImage = CIImage(cvPixelBuffer: pixelBuffer)
809
+ .oriented(.right)
810
+ let context = CIContext(options: nil)
811
+ guard let cgImage = context.createCGImage(
812
+ ciImage,
813
+ from: ciImage.extent
814
+ ) else {
815
+ completion(nil, NSError(
816
+ domain: "RNImageStitcherARCapture",
817
+ code: 2002,
818
+ userInfo: [NSLocalizedDescriptionKey:
819
+ "Failed to render AR frame to CGImage."]
820
+ ))
821
+ return
822
+ }
823
+ let uiImage = UIImage(cgImage: cgImage)
824
+ let clamped = max(0, min(100, quality))
825
+ guard let jpegData = uiImage.jpegData(
826
+ compressionQuality: CGFloat(clamped) / 100.0
827
+ ) else {
828
+ completion(nil, NSError(
829
+ domain: "RNImageStitcherARCapture",
830
+ code: 2003,
831
+ userInfo: [NSLocalizedDescriptionKey:
832
+ "Failed to encode AR frame as JPEG."]
833
+ ))
834
+ return
835
+ }
836
+
837
+ let cleanedPath = Self.normalisePath(resolvedPath)
838
+ let url = URL(fileURLWithPath: cleanedPath)
839
+ // Best-effort delete an existing file at the same path —
840
+ // vision-camera's takePhoto overwrites; we mirror that.
841
+ try? FileManager.default.removeItem(at: url)
842
+ do {
843
+ try jpegData.write(to: url)
844
+ completion([
845
+ "path": cleanedPath,
846
+ "width": cgImage.width,
847
+ "height": cgImage.height,
848
+ "isMirrored": false,
849
+ "isRawPhoto": false,
850
+ ], nil)
851
+ } catch {
852
+ completion(nil, error as NSError)
853
+ }
854
+ }
855
+
856
+ /// Begin recording AR frames to an mp4 at `path`. Completion
857
+ /// fires once the AVAssetWriter is ready to accept frames; the
858
+ /// per-frame append happens implicitly inside the ARSessionDelegate
859
+ /// callback above.
860
+ ///
861
+ /// No audio: the panorama stitcher only consumes video frames,
862
+ /// and audio adds AVCaptureSession setup that conflicts with
863
+ /// ARKit's exclusive camera access.
864
+ @objc public func startRecording(
865
+ toPath rawPath: String,
866
+ completion: @escaping (String?, NSError?) -> Void
867
+ ) {
868
+ let resolvedPath: String
869
+ if rawPath.isEmpty {
870
+ let dir = NSTemporaryDirectory()
871
+ resolvedPath = (dir as NSString).appendingPathComponent(
872
+ "RNImageStitcherAR-\(UUID().uuidString).mp4"
873
+ )
874
+ } else {
875
+ resolvedPath = rawPath
876
+ }
877
+ // Quick existence check under lock — bail if already recording.
878
+ writerLock.lock()
879
+ let alreadyRecording = (self.assetWriter != nil)
880
+ writerLock.unlock()
881
+ if alreadyRecording {
882
+ completion(nil, NSError(
883
+ domain: "RNImageStitcherARCapture",
884
+ code: 2010,
885
+ userInfo: [NSLocalizedDescriptionKey:
886
+ "A recording is already in progress."]
887
+ ))
888
+ return
889
+ }
890
+
891
+ guard let frame = self.arSession.currentFrame else {
892
+ completion(nil, NSError(
893
+ domain: "RNImageStitcherARCapture",
894
+ code: 2011,
895
+ userInfo: [NSLocalizedDescriptionKey:
896
+ "AR session has no current frame — start the session first."]
897
+ ))
898
+ return
899
+ }
900
+
901
+ // Heavy AVFoundation setup happens OUTSIDE the lock so the
902
+ // ARSession delegate's per-frame `try()` doesn't pile up
903
+ // dropped frames during this ~10-30ms window.
904
+ let pixelBuffer = frame.capturedImage
905
+ let width = CVPixelBufferGetWidth(pixelBuffer)
906
+ let height = CVPixelBufferGetHeight(pixelBuffer)
907
+ let cleanedPath = Self.normalisePath(resolvedPath)
908
+ let url = URL(fileURLWithPath: cleanedPath)
909
+ try? FileManager.default.removeItem(at: url)
910
+
911
+ do {
912
+ let writer = try AVAssetWriter(outputURL: url, fileType: .mp4)
913
+ // Encode H.264 at sensor dimensions (landscape).
914
+ let videoSettings: [String: Any] = [
915
+ AVVideoCodecKey: AVVideoCodecType.h264,
916
+ AVVideoWidthKey: width,
917
+ AVVideoHeightKey: height,
918
+ ]
919
+ let input = AVAssetWriterInput(
920
+ mediaType: .video,
921
+ outputSettings: videoSettings
922
+ )
923
+ input.expectsMediaDataInRealTime = true
924
+ // NO rotation transform on the AR-recorded mp4.
925
+ //
926
+ // Phase 5 pose-driven stitching consumes the
927
+ // ARKit pose's intrinsics (fx, fy, cx, cy) which
928
+ // describe the SENSOR'S NATIVE LANDSCAPE coordinate
929
+ // system. If we apply a 90° rotation transform on
930
+ // the mp4 and `extractFramesFromVideoAtPath` honours
931
+ // it via `appliesPreferredTrackTransform=YES`, the
932
+ // extracted frames come out PORTRAIT — orthogonal
933
+ // to what the intrinsics describe. cv::detail::Warper
934
+ // then projects with mismatched geometry and the
935
+ // output panorama is visibly rotated/sheared.
936
+ //
937
+ // Keeping frames in sensor-native landscape:
938
+ // - Intrinsics match the frame data → warp aligns
939
+ // correctly.
940
+ // - Output panorama comes out in landscape, which
941
+ // IS the natural orientation for a horizontal
942
+ // pan (wide × short).
943
+ //
944
+ // The feature-matched path (vision-camera mp4s) is
945
+ // unaffected — it estimates intrinsics from features
946
+ // so any orientation works internally.
947
+
948
+ // Source-pixel attributes: declare the format the
949
+ // adapter accepts. ARKit emits NV12 (YpCbCr 4:2:0
950
+ // bi-planar) — the adaptor handles this directly
951
+ // without needing us to convert per frame.
952
+ let attrs: [String: Any] = [
953
+ kCVPixelBufferPixelFormatTypeKey as String:
954
+ kCVPixelFormatType_420YpCbCr8BiPlanarFullRange,
955
+ kCVPixelBufferWidthKey as String: width,
956
+ kCVPixelBufferHeightKey as String: height,
957
+ ]
958
+ let adaptor = AVAssetWriterInputPixelBufferAdaptor(
959
+ assetWriterInput: input,
960
+ sourcePixelBufferAttributes: attrs
961
+ )
962
+
963
+ guard writer.canAdd(input) else {
964
+ completion(nil, NSError(
965
+ domain: "RNImageStitcherARCapture",
966
+ code: 2012,
967
+ userInfo: [NSLocalizedDescriptionKey:
968
+ "AVAssetWriter rejected the video input — codec/format mismatch."]
969
+ ))
970
+ return
971
+ }
972
+ writer.add(input)
973
+
974
+ let startTime = CMTime(
975
+ seconds: frame.timestamp,
976
+ preferredTimescale: 1_000_000
977
+ )
978
+ writer.startWriting()
979
+ writer.startSession(atSourceTime: .zero)
980
+
981
+ // Briefly hold the lock to swap in the new writer
982
+ // state. ARSession delegate's per-frame `try()`
983
+ // will see consistent state once we release.
984
+ self.writerLock.lock()
985
+ self.assetWriter = writer
986
+ self.videoInput = input
987
+ self.pixelBufferAdaptor = adaptor
988
+ self.recordingStartTime = startTime
989
+ self.writerLock.unlock()
990
+
991
+ // Reset the pose log so this recording's frames
992
+ // correlate with a fresh window of poses; the
993
+ // stitcher matches video frames to poses by
994
+ // timestamp from recording start.
995
+ self.poseLogQueue.async(flags: .barrier) { [weak self] in
996
+ self?.poseLog.removeAll(keepingCapacity: true)
997
+ }
998
+
999
+ NSLog("[RNImageStitcherARCapture] startRecording: %dx%d → %@",
1000
+ width, height, cleanedPath)
1001
+ completion(cleanedPath, nil)
1002
+ } catch {
1003
+ completion(nil, error as NSError)
1004
+ }
1005
+ }
1006
+
1007
+ /// Finalise the in-progress recording and resolve with the
1008
+ /// resulting file's metadata (path, duration, size, width,
1009
+ /// height) — shape mirrors vision-camera's VideoFile so JS
1010
+ /// consumers don't branch.
1011
+ @objc public func stopRecording(
1012
+ completion: @escaping ([String: Any]?, NSError?) -> Void
1013
+ ) {
1014
+ // Briefly acquire the lock just to capture + clear the
1015
+ // writer state. Strong locals keep the writer + input
1016
+ // alive across the lock release for the slow finalise.
1017
+ // Once self.assetWriter is nil, any in-flight delegate
1018
+ // `try()` that succeeds finds nil writer state and skips —
1019
+ // no further appends can race with finishWriting.
1020
+ writerLock.lock()
1021
+ let writer = self.assetWriter
1022
+ let input = self.videoInput
1023
+ self.assetWriter = nil
1024
+ self.videoInput = nil
1025
+ self.pixelBufferAdaptor = nil
1026
+ self.recordingStartTime = nil
1027
+ writerLock.unlock()
1028
+
1029
+ guard let writer = writer, let input = input else {
1030
+ completion(nil, NSError(
1031
+ domain: "RNImageStitcherARCapture",
1032
+ code: 2020,
1033
+ userInfo: [NSLocalizedDescriptionKey:
1034
+ "No active recording to stop."]
1035
+ ))
1036
+ return
1037
+ }
1038
+
1039
+ input.markAsFinished()
1040
+ let outputURL = writer.outputURL
1041
+ writer.finishWriting {
1042
+ let path = outputURL.path
1043
+ let asset = AVAsset(url: outputURL)
1044
+ let durationSec = CMTimeGetSeconds(asset.duration)
1045
+ let fileSize = (try? FileManager.default
1046
+ .attributesOfItem(atPath: path))?[.size] as? Int ?? 0
1047
+ let track = asset.tracks(withMediaType: .video).first
1048
+ let naturalSize = track?.naturalSize ?? .zero
1049
+ NSLog("[RNImageStitcherARCapture] stopRecording: %.2fs, %lld bytes",
1050
+ durationSec, Int64(fileSize))
1051
+ completion([
1052
+ "path": path,
1053
+ "duration": durationSec,
1054
+ "size": fileSize,
1055
+ "width": Int(naturalSize.width),
1056
+ "height": Int(naturalSize.height),
1057
+ ], nil)
1058
+ }
1059
+ }
1060
+
1061
+ // MARK: - Helpers
1062
+
1063
+ /// Strip a `file://` scheme some callers attach — same logic
1064
+ /// the OpenCV stitcher uses, kept local here so RNSARSession
1065
+ /// stays independent of the OpenCV path.
1066
+ private static func normalisePath(_ path: String) -> String {
1067
+ if path.hasPrefix("file://") {
1068
+ return String(path.dropFirst("file://".count))
1069
+ }
1070
+ return path
1071
+ }
1072
+
1073
+ private func makePose(from frame: ARFrame) -> RNSARFramePose {
1074
+ // ARKit's transform is a 4x4 matrix; extract translation
1075
+ // (last column) and rotation (top-left 3x3 → quaternion).
1076
+ let t = frame.camera.transform
1077
+ let translation = simd_float3(t.columns.3.x, t.columns.3.y, t.columns.3.z)
1078
+ // simd_quatf from a 4x4 matrix uses the rotational part.
1079
+ let q = simd_quatf(t)
1080
+
1081
+ // Camera intrinsics. Apple gives us a 3x3 matrix where
1082
+ // [0][0] = fx, [1][1] = fy, [0][2] = cx, [1][2] = cy.
1083
+ let k = frame.camera.intrinsics
1084
+ let imageRes = frame.camera.imageResolution
1085
+
1086
+ let mappedState: RNSARTrackingState
1087
+ switch frame.camera.trackingState {
1088
+ case .normal: mappedState = .tracking
1089
+ case .limited: mappedState = .limited
1090
+ case .notAvailable: mappedState = .notAvailable
1091
+ }
1092
+
1093
+ return RNSARFramePose(
1094
+ tx: Double(translation.x),
1095
+ ty: Double(translation.y),
1096
+ tz: Double(translation.z),
1097
+ qx: Double(q.imag.x),
1098
+ qy: Double(q.imag.y),
1099
+ qz: Double(q.imag.z),
1100
+ qw: Double(q.real),
1101
+ fx: Double(k[0][0]),
1102
+ fy: Double(k[1][1]),
1103
+ cx: Double(k[0][2]),
1104
+ cy: Double(k[1][2]),
1105
+ imageWidth: Int(imageRes.width),
1106
+ imageHeight: Int(imageRes.height),
1107
+ timestampMs: frame.timestamp * 1000.0,
1108
+ trackingState: mappedState,
1109
+ )
1110
+ }
1111
+ }