react-native-image-stitcher 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +96 -0
- package/LICENSE +201 -0
- package/NOTICE +21 -0
- package/README.md +189 -0
- package/RNImageStitcher.podspec +76 -0
- package/android/build.gradle +224 -0
- package/android/src/main/AndroidManifest.xml +3 -0
- package/android/src/main/cpp/CMakeLists.txt +124 -0
- package/android/src/main/cpp/image_stitcher_jni.cpp +145 -0
- package/android/src/main/cpp/keyframe_gate_jni.cpp +204 -0
- package/android/src/main/java/io/imagestitcher/rn/BatchStitcher.kt +426 -0
- package/android/src/main/java/io/imagestitcher/rn/IncrementalFirstwinsEngine.kt +960 -0
- package/android/src/main/java/io/imagestitcher/rn/IncrementalStitcher.kt +2371 -0
- package/android/src/main/java/io/imagestitcher/rn/KeyframeGate.kt +256 -0
- package/android/src/main/java/io/imagestitcher/rn/QualityChecker.kt +167 -0
- package/android/src/main/java/io/imagestitcher/rn/RNImageStitcherPackage.kt +39 -0
- package/android/src/main/java/io/imagestitcher/rn/RNSARCameraView.kt +558 -0
- package/android/src/main/java/io/imagestitcher/rn/RNSARCameraViewManager.kt +35 -0
- package/android/src/main/java/io/imagestitcher/rn/RNSARSession.kt +784 -0
- package/android/src/main/java/io/imagestitcher/rn/ar/BackgroundRenderer.kt +176 -0
- package/android/src/main/java/io/imagestitcher/rn/ar/ShaderUtil.kt +67 -0
- package/android/src/main/java/io/imagestitcher/rn/ar/YuvImageConverter.kt +201 -0
- package/cpp/ar_frame_pose.h +63 -0
- package/cpp/keyframe_gate.cpp +927 -0
- package/cpp/keyframe_gate.hpp +240 -0
- package/cpp/stitcher.cpp +2207 -0
- package/cpp/stitcher.hpp +275 -0
- package/dist/ar/useARSession.d.ts +102 -0
- package/dist/ar/useARSession.js +133 -0
- package/dist/camera/ARCameraView.d.ts +93 -0
- package/dist/camera/ARCameraView.js +170 -0
- package/dist/camera/Camera.d.ts +134 -0
- package/dist/camera/Camera.js +688 -0
- package/dist/camera/CameraShutter.d.ts +80 -0
- package/dist/camera/CameraShutter.js +237 -0
- package/dist/camera/CameraView.d.ts +65 -0
- package/dist/camera/CameraView.js +117 -0
- package/dist/camera/CaptureControlsBar.d.ts +87 -0
- package/dist/camera/CaptureControlsBar.js +82 -0
- package/dist/camera/CaptureHeader.d.ts +62 -0
- package/dist/camera/CaptureHeader.js +81 -0
- package/dist/camera/CapturePreview.d.ts +70 -0
- package/dist/camera/CapturePreview.js +188 -0
- package/dist/camera/CaptureStatusOverlay.d.ts +75 -0
- package/dist/camera/CaptureStatusOverlay.js +326 -0
- package/dist/camera/CaptureThumbnailStrip.d.ts +87 -0
- package/dist/camera/CaptureThumbnailStrip.js +177 -0
- package/dist/camera/IncrementalPanGuide.d.ts +83 -0
- package/dist/camera/IncrementalPanGuide.js +267 -0
- package/dist/camera/PanoramaBandOverlay.d.ts +107 -0
- package/dist/camera/PanoramaBandOverlay.js +399 -0
- package/dist/camera/PanoramaConfirmModal.d.ts +57 -0
- package/dist/camera/PanoramaConfirmModal.js +128 -0
- package/dist/camera/PanoramaGuidance.d.ts +79 -0
- package/dist/camera/PanoramaGuidance.js +246 -0
- package/dist/camera/PanoramaSettingsModal.d.ts +311 -0
- package/dist/camera/PanoramaSettingsModal.js +611 -0
- package/dist/camera/ViewportCropOverlay.d.ts +46 -0
- package/dist/camera/ViewportCropOverlay.js +67 -0
- package/dist/camera/useCapture.d.ts +111 -0
- package/dist/camera/useCapture.js +160 -0
- package/dist/camera/useDeviceOrientation.d.ts +48 -0
- package/dist/camera/useDeviceOrientation.js +131 -0
- package/dist/camera/useVideoCapture.d.ts +79 -0
- package/dist/camera/useVideoCapture.js +151 -0
- package/dist/index.d.ts +26 -0
- package/dist/index.js +39 -0
- package/dist/quality/normaliseOrientation.d.ts +36 -0
- package/dist/quality/normaliseOrientation.js +62 -0
- package/dist/quality/runQualityCheck.d.ts +41 -0
- package/dist/quality/runQualityCheck.js +98 -0
- package/dist/sensors/useIMUTranslationGate.d.ts +70 -0
- package/dist/sensors/useIMUTranslationGate.js +235 -0
- package/dist/stitching/IncrementalStitcherView.d.ts +41 -0
- package/dist/stitching/IncrementalStitcherView.js +157 -0
- package/dist/stitching/incremental.d.ts +930 -0
- package/dist/stitching/incremental.js +133 -0
- package/dist/stitching/stitchFrames.d.ts +55 -0
- package/dist/stitching/stitchFrames.js +56 -0
- package/dist/stitching/stitchVideo.d.ts +119 -0
- package/dist/stitching/stitchVideo.js +57 -0
- package/dist/stitching/useIncrementalJSDriver.d.ts +74 -0
- package/dist/stitching/useIncrementalJSDriver.js +199 -0
- package/dist/stitching/useIncrementalStitcher.d.ts +58 -0
- package/dist/stitching/useIncrementalStitcher.js +172 -0
- package/dist/types.d.ts +58 -0
- package/dist/types.js +15 -0
- package/ios/Package.swift +72 -0
- package/ios/Sources/RNImageStitcher/ARCameraViewManager.m +33 -0
- package/ios/Sources/RNImageStitcher/ARCameraViewManager.swift +40 -0
- package/ios/Sources/RNImageStitcher/ARSessionBridge.m +55 -0
- package/ios/Sources/RNImageStitcher/ARSessionBridge.swift +149 -0
- package/ios/Sources/RNImageStitcher/IncrementalStitcher.swift +2727 -0
- package/ios/Sources/RNImageStitcher/IncrementalStitcherBridge.m +85 -0
- package/ios/Sources/RNImageStitcher/IncrementalStitcherBridge.swift +625 -0
- package/ios/Sources/RNImageStitcher/KeyframeGate.swift +328 -0
- package/ios/Sources/RNImageStitcher/KeyframeGateBridge.h +141 -0
- package/ios/Sources/RNImageStitcher/KeyframeGateBridge.mm +278 -0
- package/ios/Sources/RNImageStitcher/OpenCVIncrementalStitcher.h +473 -0
- package/ios/Sources/RNImageStitcher/OpenCVIncrementalStitcher.mm +1326 -0
- package/ios/Sources/RNImageStitcher/OpenCVKeyframeCollector.h +97 -0
- package/ios/Sources/RNImageStitcher/OpenCVKeyframeCollector.mm +296 -0
- package/ios/Sources/RNImageStitcher/OpenCVSlitScanStitcher.h +103 -0
- package/ios/Sources/RNImageStitcher/OpenCVSlitScanStitcher.mm +3285 -0
- package/ios/Sources/RNImageStitcher/OpenCVStitcher.h +238 -0
- package/ios/Sources/RNImageStitcher/OpenCVStitcher.mm +1880 -0
- package/ios/Sources/RNImageStitcher/QualityChecker.swift +252 -0
- package/ios/Sources/RNImageStitcher/QualityCheckerBridge.m +26 -0
- package/ios/Sources/RNImageStitcher/QualityCheckerBridge.swift +72 -0
- package/ios/Sources/RNImageStitcher/RNSARCameraView.swift +114 -0
- package/ios/Sources/RNImageStitcher/RNSARSession.swift +1111 -0
- package/ios/Sources/RNImageStitcher/Stitcher.swift +243 -0
- package/ios/Sources/RNImageStitcher/StitcherBridge.m +28 -0
- package/ios/Sources/RNImageStitcher/StitcherBridge.swift +246 -0
- package/package.json +73 -0
- package/react-native.config.js +34 -0
- package/scripts/opencv-version.txt +1 -0
- package/scripts/postinstall-fetch-binaries.js +286 -0
- package/src/ar/useARSession.ts +210 -0
- package/src/camera/.gitkeep +0 -0
- package/src/camera/ARCameraView.tsx +256 -0
- package/src/camera/Camera.tsx +1053 -0
- package/src/camera/CameraShutter.tsx +292 -0
- package/src/camera/CameraView.tsx +157 -0
- package/src/camera/CaptureControlsBar.tsx +204 -0
- package/src/camera/CaptureHeader.tsx +184 -0
- package/src/camera/CapturePreview.tsx +318 -0
- package/src/camera/CaptureStatusOverlay.tsx +391 -0
- package/src/camera/CaptureThumbnailStrip.tsx +277 -0
- package/src/camera/IncrementalPanGuide.tsx +328 -0
- package/src/camera/PanoramaBandOverlay.tsx +498 -0
- package/src/camera/PanoramaConfirmModal.tsx +206 -0
- package/src/camera/PanoramaGuidance.tsx +327 -0
- package/src/camera/PanoramaSettingsModal.tsx +1357 -0
- package/src/camera/ViewportCropOverlay.tsx +81 -0
- package/src/camera/useCapture.ts +279 -0
- package/src/camera/useDeviceOrientation.ts +140 -0
- package/src/camera/useVideoCapture.ts +236 -0
- package/src/index.ts +53 -0
- package/src/quality/.gitkeep +0 -0
- package/src/quality/normaliseOrientation.ts +79 -0
- package/src/quality/runQualityCheck.ts +131 -0
- package/src/sensors/useIMUTranslationGate.ts +347 -0
- package/src/stitching/.gitkeep +0 -0
- package/src/stitching/IncrementalStitcherView.tsx +198 -0
- package/src/stitching/incremental.ts +1021 -0
- package/src/stitching/stitchFrames.ts +88 -0
- package/src/stitching/stitchVideo.ts +153 -0
- package/src/stitching/useIncrementalJSDriver.ts +273 -0
- package/src/stitching/useIncrementalStitcher.ts +252 -0
- package/src/types.ts +78 -0
|
@@ -0,0 +1,927 @@
|
|
|
1
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
//
|
|
3
|
+
// keyframe_gate.cpp — direct port of KeyframeGate.swift. See
|
|
4
|
+
// keyframe_gate.hpp + ../ios/Sources/RNImageStitcher/KeyframeGate.swift
|
|
5
|
+
// for design rationale.
|
|
6
|
+
//
|
|
7
|
+
// Math conventions
|
|
8
|
+
// ─────────────────
|
|
9
|
+
//
|
|
10
|
+
// All math is plain `float[3]` / `float[4]` — no third-party deps.
|
|
11
|
+
// simd_float3 dot/cross/normalize are translated to free helper
|
|
12
|
+
// functions below; the result is bitwise-equivalent to simd's
|
|
13
|
+
// scalar-fallback path (same IEEE ops, same instruction order).
|
|
14
|
+
//
|
|
15
|
+
// Quaternion convention (JPL, last-real): both ARKit's `simd_quatf`
|
|
16
|
+
// and ARCore's `Pose.getRotationQuaternion()` return (qx, qy, qz, qw)
|
|
17
|
+
// with qw as the real part. The `qrot` helper applies q to a vector
|
|
18
|
+
// using the closed-form v' = q · v · q⁻¹ expansion that matches
|
|
19
|
+
// simd_act(q, v) bitwise on the scalar fallback.
|
|
20
|
+
//
|
|
21
|
+
// 4x4 matrix layout: column-major (matches simd_float4x4 and ARCore
|
|
22
|
+
// Pose.toMatrix). m[0..3] = column 0, m[4..7] = column 1, etc.
|
|
23
|
+
//
|
|
24
|
+
// Threading: see keyframe_gate.hpp — not thread-safe; caller
|
|
25
|
+
// serialises. No statics, no globals; safe to instantiate multiple
|
|
26
|
+
// times.
|
|
27
|
+
|
|
28
|
+
#include "keyframe_gate.hpp"
|
|
29
|
+
|
|
30
|
+
#include <algorithm>
|
|
31
|
+
#include <cmath>
|
|
32
|
+
#include <cstring>
|
|
33
|
+
#include <cstdint>
|
|
34
|
+
#include <optional>
|
|
35
|
+
#include <vector>
|
|
36
|
+
|
|
37
|
+
// V16 A2 — sparse-flow novelty path.
|
|
38
|
+
//
|
|
39
|
+
// OpenCV is available on both platforms compiling this TU: iOS via the
|
|
40
|
+
// vendored opencv2.framework (RNImageStitcher.podspec line ~118)
|
|
41
|
+
// and Android via the custom OpenCV NDK build (Android compile_commands
|
|
42
|
+
// shows `-I.../OpenCV-android-sdk/sdk/native/jni/include`). The Pose
|
|
43
|
+
// strategy path below stays OpenCV-free; only the Flow path uses these
|
|
44
|
+
// headers, but they're unconditional because both strategies share a
|
|
45
|
+
// single TU and there's no win from #ifdef-fencing.
|
|
46
|
+
#include <opencv2/core.hpp>
|
|
47
|
+
#include <opencv2/imgproc.hpp> // resize, INTER_AREA, goodFeaturesToTrack
|
|
48
|
+
#include <opencv2/video/tracking.hpp> // calcOpticalFlowPyrLK
|
|
49
|
+
|
|
50
|
+
namespace retailens {
|
|
51
|
+
namespace {
|
|
52
|
+
|
|
53
|
+
// ── Vec3 helpers ──────────────────────────────────────────────────
|
|
54
|
+
struct Vec3 { float x, y, z; };
|
|
55
|
+
struct Vec2 { float x, y; };
|
|
56
|
+
|
|
57
|
+
inline Vec3 v3_sub(Vec3 a, Vec3 b) { return {a.x-b.x, a.y-b.y, a.z-b.z}; }
|
|
58
|
+
inline Vec3 v3_scale(Vec3 a, float s) { return {a.x*s, a.y*s, a.z*s}; }
|
|
59
|
+
inline float v3_dot(Vec3 a, Vec3 b) { return a.x*b.x + a.y*b.y + a.z*b.z; }
|
|
60
|
+
inline Vec3 v3_cross(Vec3 a, Vec3 b) {
|
|
61
|
+
return { a.y*b.z - a.z*b.y,
|
|
62
|
+
a.z*b.x - a.x*b.z,
|
|
63
|
+
a.x*b.y - a.y*b.x };
|
|
64
|
+
}
|
|
65
|
+
inline float v3_len(Vec3 a) { return std::sqrt(v3_dot(a, a)); }
|
|
66
|
+
inline Vec3 v3_normalize(Vec3 a) {
|
|
67
|
+
float L = v3_len(a);
|
|
68
|
+
if (L < 1e-12f) return {0, 0, 0};
|
|
69
|
+
return {a.x/L, a.y/L, a.z/L};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/// Rotate vector `v` by unit quaternion `q = (qx, qy, qz, qw)` —
|
|
73
|
+
/// closed-form expansion equivalent to simd_act(q, v). Verified
|
|
74
|
+
/// bitwise-equivalent on scalar-fallback simd against q.act() output
|
|
75
|
+
/// for ~10 randomised poses (see test scaffolding below if/when we
|
|
76
|
+
/// add the parity harness).
|
|
77
|
+
inline Vec3 qrot(float qx, float qy, float qz, float qw, Vec3 v) {
|
|
78
|
+
// u = (qx, qy, qz)
|
|
79
|
+
Vec3 u = {qx, qy, qz};
|
|
80
|
+
Vec3 t = v3_scale(v3_cross(u, v), 2.0f);
|
|
81
|
+
// result = v + qw * t + u × t
|
|
82
|
+
Vec3 a = v3_scale(t, qw);
|
|
83
|
+
Vec3 b = v3_cross(u, t);
|
|
84
|
+
return { v.x + a.x + b.x,
|
|
85
|
+
v.y + a.y + b.y,
|
|
86
|
+
v.z + a.z + b.z };
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// ── 4x4 matrix column accessors (column-major layout) ─────────────
|
|
90
|
+
inline Vec3 mat4_col_xyz(const float m[16], int col) {
|
|
91
|
+
const float* c = m + col * 4;
|
|
92
|
+
return { c[0], c[1], c[2] };
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// ── Plane basis (mirror of KeyframeGate.swift `PlaneBasis`) ──────
|
|
96
|
+
struct PlaneBasis {
|
|
97
|
+
Vec3 origin;
|
|
98
|
+
Vec3 normal;
|
|
99
|
+
Vec3 tangentU;
|
|
100
|
+
Vec3 tangentV;
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
/// Build a plane basis from a 4×4 ARKit/ARCore plane transform.
|
|
104
|
+
/// Returns std::nullopt for degenerate input.
|
|
105
|
+
///
|
|
106
|
+
/// ARKit ARPlaneAnchor convention:
|
|
107
|
+
/// column 0 = tangent X (in-plane "right")
|
|
108
|
+
/// column 1 = surface normal
|
|
109
|
+
/// column 2 = tangent Z (in-plane "up")
|
|
110
|
+
/// column 3 = origin
|
|
111
|
+
///
|
|
112
|
+
/// We re-derive V from N × U so the basis is strictly orthonormal
|
|
113
|
+
/// even if columns drift over time. Right-handed result.
|
|
114
|
+
std::optional<PlaneBasis> planeBasisFromMatrix(const float m[16]) {
|
|
115
|
+
Vec3 n = mat4_col_xyz(m, 1);
|
|
116
|
+
Vec3 u = mat4_col_xyz(m, 0);
|
|
117
|
+
Vec3 o = mat4_col_xyz(m, 3);
|
|
118
|
+
float nLen = v3_len(n);
|
|
119
|
+
float uLen = v3_len(u);
|
|
120
|
+
if (nLen < 1e-6f || uLen < 1e-6f) return std::nullopt;
|
|
121
|
+
Vec3 nN = v3_scale(n, 1.0f / nLen);
|
|
122
|
+
Vec3 uN = v3_scale(u, 1.0f / uLen);
|
|
123
|
+
Vec3 v = v3_cross(nN, uN);
|
|
124
|
+
float vLen = v3_len(v);
|
|
125
|
+
if (vLen < 1e-6f) return std::nullopt;
|
|
126
|
+
return PlaneBasis{ o, nN, uN, v3_scale(v, 1.0f / vLen) };
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
inline Vec2 worldToLocal(const PlaneBasis& basis, Vec3 p) {
|
|
130
|
+
Vec3 d = v3_sub(p, basis.origin);
|
|
131
|
+
return { v3_dot(d, basis.tangentU), v3_dot(d, basis.tangentV) };
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// ── Camera ray geometry ───────────────────────────────────────────
|
|
135
|
+
|
|
136
|
+
/// Camera-forward axis in world coordinates derived from pose.
|
|
137
|
+
/// ARKit/ARCore camera frame: +Z back, so forward is q·(0,0,-1).
|
|
138
|
+
inline Vec3 cameraForwardWorld(const Pose& p) {
|
|
139
|
+
return v3_normalize(qrot(p.qx, p.qy, p.qz, p.qw, {0, 0, -1}));
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/// Project the 4 image corners (TL, TR, BR, BL) of the frame onto
|
|
143
|
+
/// the plane via ray-plane intersection. Returns 4 plane-local
|
|
144
|
+
/// (u, v) points in metres, or std::nullopt if any corner ray
|
|
145
|
+
/// fails to intersect the plane (parallel or behind camera).
|
|
146
|
+
///
|
|
147
|
+
/// Intrinsics convention: OpenCV pinhole, (cx, cy) in pixels,
|
|
148
|
+
/// camera-frame +V going DOWN in image → we negate (v - cy) when
|
|
149
|
+
/// converting back to camera-frame coords where +Y is UP.
|
|
150
|
+
std::optional<std::vector<Vec2>> projectCornersOntoPlane(
|
|
151
|
+
const Pose& p,
|
|
152
|
+
const PlaneBasis& plane)
|
|
153
|
+
{
|
|
154
|
+
const float W = static_cast<float>(p.imageWidth);
|
|
155
|
+
const float H = static_cast<float>(p.imageHeight);
|
|
156
|
+
const Vec3 rayOrigin = { p.tx, p.ty, p.tz };
|
|
157
|
+
const float imgCorners[4][2] = {
|
|
158
|
+
{0.0f, 0.0f}, {W, 0.0f}, {W, H}, {0.0f, H}
|
|
159
|
+
};
|
|
160
|
+
std::vector<Vec2> out;
|
|
161
|
+
out.reserve(4);
|
|
162
|
+
for (int i = 0; i < 4; ++i) {
|
|
163
|
+
float u = imgCorners[i][0];
|
|
164
|
+
float v = imgCorners[i][1];
|
|
165
|
+
// Camera-space ray (before rotation): pinhole back-projection
|
|
166
|
+
// with image-V negation for camera +Y up.
|
|
167
|
+
Vec3 rayCam = {
|
|
168
|
+
(u - p.cx) / p.fx,
|
|
169
|
+
-(v - p.cy) / p.fy,
|
|
170
|
+
-1.0f
|
|
171
|
+
};
|
|
172
|
+
Vec3 rayWorld = v3_normalize(qrot(p.qx, p.qy, p.qz, p.qw, rayCam));
|
|
173
|
+
float denom = v3_dot(rayWorld, plane.normal);
|
|
174
|
+
if (std::fabs(denom) < 1e-6f) return std::nullopt; // parallel
|
|
175
|
+
float t = v3_dot(v3_sub(plane.origin, rayOrigin), plane.normal) / denom;
|
|
176
|
+
if (t <= 1e-3f) return std::nullopt; // behind / coincident
|
|
177
|
+
Vec3 worldPt = { rayOrigin.x + t * rayWorld.x,
|
|
178
|
+
rayOrigin.y + t * rayWorld.y,
|
|
179
|
+
rayOrigin.z + t * rayWorld.z };
|
|
180
|
+
out.push_back(worldToLocal(plane, worldPt));
|
|
181
|
+
}
|
|
182
|
+
return out;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// ── Polygon geometry (Sutherland-Hodgman convex clip + shoelace) ──
|
|
186
|
+
|
|
187
|
+
float polygonArea(const std::vector<Vec2>& pts) {
|
|
188
|
+
if (pts.size() < 3) return 0.0f;
|
|
189
|
+
float sum = 0.0f;
|
|
190
|
+
for (size_t i = 0, n = pts.size(); i < n; ++i) {
|
|
191
|
+
const Vec2& a = pts[i];
|
|
192
|
+
const Vec2& b = pts[(i + 1) % n];
|
|
193
|
+
sum += a.x * b.y - b.x * a.y;
|
|
194
|
+
}
|
|
195
|
+
return std::fabs(sum) * 0.5f;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
float signedArea(const std::vector<Vec2>& pts) {
|
|
199
|
+
if (pts.size() < 3) return 0.0f;
|
|
200
|
+
float sum = 0.0f;
|
|
201
|
+
for (size_t i = 0, n = pts.size(); i < n; ++i) {
|
|
202
|
+
const Vec2& a = pts[i];
|
|
203
|
+
const Vec2& b = pts[(i + 1) % n];
|
|
204
|
+
sum += a.x * b.y - b.x * a.y;
|
|
205
|
+
}
|
|
206
|
+
return sum * 0.5f;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
std::vector<Vec2> ensureCCW(std::vector<Vec2> pts) {
|
|
210
|
+
if (signedArea(pts) < 0.0f) {
|
|
211
|
+
std::vector<Vec2> r;
|
|
212
|
+
r.reserve(pts.size());
|
|
213
|
+
for (auto it = pts.rbegin(); it != pts.rend(); ++it) r.push_back(*it);
|
|
214
|
+
return r;
|
|
215
|
+
}
|
|
216
|
+
return pts;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
inline bool isInside(Vec2 p, Vec2 a, Vec2 b) {
|
|
220
|
+
return (b.x - a.x) * (p.y - a.y) - (b.y - a.y) * (p.x - a.x) >= 0.0f;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
std::optional<Vec2> lineIntersect(Vec2 s, Vec2 e, Vec2 a, Vec2 b) {
|
|
224
|
+
float dcx = a.x - b.x;
|
|
225
|
+
float dcy = a.y - b.y;
|
|
226
|
+
float dpx = s.x - e.x;
|
|
227
|
+
float dpy = s.y - e.y;
|
|
228
|
+
float denom = dcx * dpy - dcy * dpx;
|
|
229
|
+
if (std::fabs(denom) < 1e-9f) return std::nullopt;
|
|
230
|
+
float n1 = a.x * b.y - a.y * b.x;
|
|
231
|
+
float n2 = s.x * e.y - s.y * e.x;
|
|
232
|
+
return Vec2{ (n1 * dpx - n2 * dcx) / denom,
|
|
233
|
+
(n1 * dpy - n2 * dcy) / denom };
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/// Convex polygon intersection via Sutherland-Hodgman. Both inputs
|
|
237
|
+
/// are 4-vertex convex quads (camera footprints projected onto the
|
|
238
|
+
/// plane). Returns area in m² of the intersection polygon (0 if
|
|
239
|
+
/// disjoint or degenerate).
|
|
240
|
+
float polygonIntersectionArea(const std::vector<Vec2>& subject,
|
|
241
|
+
const std::vector<Vec2>& clip)
|
|
242
|
+
{
|
|
243
|
+
std::vector<Vec2> subj = ensureCCW(subject);
|
|
244
|
+
std::vector<Vec2> clp = ensureCCW(clip);
|
|
245
|
+
std::vector<Vec2> output = subj;
|
|
246
|
+
for (size_t i = 0, ni = clp.size(); i < ni; ++i) {
|
|
247
|
+
if (output.empty()) return 0.0f;
|
|
248
|
+
Vec2 edgeStart = clp[i];
|
|
249
|
+
Vec2 edgeEnd = clp[(i + 1) % ni];
|
|
250
|
+
std::vector<Vec2> input = output;
|
|
251
|
+
output.clear();
|
|
252
|
+
output.reserve(input.size() + 1);
|
|
253
|
+
if (input.empty()) return 0.0f;
|
|
254
|
+
Vec2 s = input.back();
|
|
255
|
+
for (Vec2 e : input) {
|
|
256
|
+
bool eIn = isInside(e, edgeStart, edgeEnd);
|
|
257
|
+
bool sIn = isInside(s, edgeStart, edgeEnd);
|
|
258
|
+
if (eIn) {
|
|
259
|
+
if (!sIn) {
|
|
260
|
+
auto p = lineIntersect(s, e, edgeStart, edgeEnd);
|
|
261
|
+
if (p) output.push_back(*p);
|
|
262
|
+
}
|
|
263
|
+
output.push_back(e);
|
|
264
|
+
} else if (sIn) {
|
|
265
|
+
auto p = lineIntersect(s, e, edgeStart, edgeEnd);
|
|
266
|
+
if (p) output.push_back(*p);
|
|
267
|
+
}
|
|
268
|
+
s = e;
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
return polygonArea(output);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
} // anonymous namespace
|
|
275
|
+
|
|
276
|
+
// ── KeyframeGate::Impl (pimpl idiom) ─────────────────────────────
|
|
277
|
+
|
|
278
|
+
struct KeyframeGate::Impl {
|
|
279
|
+
// ── Settings ──────────────────────────────────────────────────
|
|
280
|
+
bool enabled = false;
|
|
281
|
+
// 2026-05-15 (U4) — default 0.4 → 0.2. Accept frames with 20%
|
|
282
|
+
// new content (was 40%). Operator can still tune higher via
|
|
283
|
+
// setOverlapThreshold for confidence-heavy captures. JS layer
|
|
284
|
+
// also sets this explicitly on every start() so the C++ default
|
|
285
|
+
// only matters when the gate is used WITHOUT the JS bridge.
|
|
286
|
+
double overlapThreshold = 0.2;
|
|
287
|
+
int32_t maxCount = 6;
|
|
288
|
+
|
|
289
|
+
// V16 A2 — strategy + flow tunables. Default is Pose to keep
|
|
290
|
+
// pre-A2 behaviour for any caller that hasn't switched. The
|
|
291
|
+
// host-side default (in TS settings) is flipped to Flow in
|
|
292
|
+
// commit 3 of the A2 batch.
|
|
293
|
+
GateStrategy strategy = GateStrategy::Pose;
|
|
294
|
+
int32_t flowMaxCorners = 150;
|
|
295
|
+
double flowQualityLevel = 0.01;
|
|
296
|
+
double flowMinDistance = 10.0;
|
|
297
|
+
/// V16 — translation-budget force-accept (Flow strategy only).
|
|
298
|
+
/// 0.0 = disabled (default — preserves pre-V16 behaviour for
|
|
299
|
+
/// callers that don't opt in). Metres. See hpp for full
|
|
300
|
+
/// rationale.
|
|
301
|
+
double flowMaxTranslationM = 0.0;
|
|
302
|
+
/// V16 — percentile used to aggregate per-feature absolute
|
|
303
|
+
/// displacements into the novelty estimate. 0.85 default →
|
|
304
|
+
/// 85th-percentile-of-|Δx|, 85th-percentile-of-|Δy|, divided by
|
|
305
|
+
/// the dominant axis's frame dim. See hpp for full rationale.
|
|
306
|
+
double flowNoveltyPercentile = 0.85;
|
|
307
|
+
/// 2026-05-14 — disable the angular-delta fallback path. When
|
|
308
|
+
/// `true`, `evaluateAngularFallback()` returns
|
|
309
|
+
/// `RejectOverlapTooHighAngular` regardless of the actual
|
|
310
|
+
/// angular delta — effectively making flow-based / pose-based
|
|
311
|
+
/// novelty the ONLY acceptance signal.
|
|
312
|
+
///
|
|
313
|
+
/// Why this exists: in non-AR mode (captureSource ∈ {wide,
|
|
314
|
+
/// ultrawide}) we have no ARKit/ARCore pose data — only IMU.
|
|
315
|
+
/// The angular-delta calc relies on the pose quaternion to
|
|
316
|
+
/// derive camera-forward; with zero/garbage pose it produces
|
|
317
|
+
/// nonsense decisions. Setting this flag prevents the gate
|
|
318
|
+
/// from accepting/rejecting based on that nonsense.
|
|
319
|
+
///
|
|
320
|
+
/// Default `false` (back-compat — AR mode uses the fallback as
|
|
321
|
+
/// before). Setter: `setDisableAngularFallback(bool)`.
|
|
322
|
+
bool disableAngularFallback = false;
|
|
323
|
+
|
|
324
|
+
// ── Pose-path state (V16 Phase 0/1/2) ─────────────────────────
|
|
325
|
+
int32_t acceptedCount = 0;
|
|
326
|
+
std::optional<std::vector<Vec2>> lastCornersOnPlane;
|
|
327
|
+
std::optional<PlaneBasis> planeForCapture;
|
|
328
|
+
bool forceAcceptNext = false;
|
|
329
|
+
std::optional<Pose> lastAcceptedPose;
|
|
330
|
+
|
|
331
|
+
// ── Flow-path state (V16 A2) ──────────────────────────────────
|
|
332
|
+
// `prevFrameGray` is the WORKING-RESOLUTION grayscale image of the
|
|
333
|
+
// last accepted keyframe (downscaled to keep KLT cheap — see
|
|
334
|
+
// kFlowWorkingMaxSide in evaluateFlow). `prevFeatures` are the
|
|
335
|
+
// Shi-Tomasi corners detected on it. Both are CLEARED on
|
|
336
|
+
// reset(); both are REFRESHED in-place on every accept under the
|
|
337
|
+
// Flow strategy. Empty when no flow accept has happened yet.
|
|
338
|
+
cv::Mat prevFrameGrayWork;
|
|
339
|
+
std::vector<cv::Point2f> prevFeatures;
|
|
340
|
+
// Cache the original (un-downscaled) frame dimensions of the
|
|
341
|
+
// previous accepted frame. Used so the novelty calc is in
|
|
342
|
+
// ORIGINAL pixel space — frame_dim ratio is scale-invariant, but
|
|
343
|
+
// pinning to the working resolution would couple thresholds to
|
|
344
|
+
// the downscale factor. Re-set whenever prevFrameGrayWork is.
|
|
345
|
+
int32_t prevFrameOrigWidth = 0;
|
|
346
|
+
int32_t prevFrameOrigHeight = 0;
|
|
347
|
+
};
|
|
348
|
+
|
|
349
|
+
// Compile-time layout check on the shared POD struct — ensures iOS
|
|
350
|
+
// and Android marshal the same field ordering / size. Adjust this
|
|
351
|
+
// if you intentionally change Pose's layout (and update both bridges).
|
|
352
|
+
//
|
|
353
|
+
// Pose has 13 fields:
|
|
354
|
+
// tx, ty, tz (3 × float)
|
|
355
|
+
// qx, qy, qz, qw (4 × float)
|
|
356
|
+
// fx, fy, cx, cy (4 × float)
|
|
357
|
+
// imageWidth, imageHeight (2 × int32_t)
|
|
358
|
+
// Each field is 4 bytes → expected size = 13 × 4 = 52 bytes.
|
|
359
|
+
static_assert(sizeof(Pose) == 13 * 4,
|
|
360
|
+
"Pose POD size unexpected — must be 13 × 4-byte fields");
|
|
361
|
+
|
|
362
|
+
// ── Public API ────────────────────────────────────────────────────
|
|
363
|
+
|
|
364
|
+
KeyframeGate::KeyframeGate() : pImpl_(new Impl()) {}
|
|
365
|
+
KeyframeGate::~KeyframeGate() { delete pImpl_; }
|
|
366
|
+
|
|
367
|
+
void KeyframeGate::setEnabled(bool enabled) { pImpl_->enabled = enabled; }
|
|
368
|
+
void KeyframeGate::setOverlapThreshold(double t) { pImpl_->overlapThreshold = t; }
|
|
369
|
+
void KeyframeGate::setMaxCount(int32_t n) { pImpl_->maxCount = n; }
|
|
370
|
+
void KeyframeGate::markNextFrameAsLast() { pImpl_->forceAcceptNext = true; }
|
|
371
|
+
|
|
372
|
+
// V16 A2 — strategy + flow tunable setters. All values are clamped
|
|
373
|
+
// defensively so a bad host-side default can't put the gate in an
|
|
374
|
+
// unworkable state.
|
|
375
|
+
void KeyframeGate::setStrategy(GateStrategy s) { pImpl_->strategy = s; }
|
|
376
|
+
GateStrategy KeyframeGate::getStrategy() const { return pImpl_->strategy; }
|
|
377
|
+
void KeyframeGate::setFlowMaxCorners(int32_t n) { pImpl_->flowMaxCorners = (n < 30 ? 30 : n); }
|
|
378
|
+
void KeyframeGate::setFlowQualityLevel(double q) { pImpl_->flowQualityLevel = (q <= 0.0 ? 0.001 : (q > 1.0 ? 1.0 : q)); }
|
|
379
|
+
void KeyframeGate::setFlowMinDistance(double d) { pImpl_->flowMinDistance = (d < 1.0 ? 1.0 : d); }
|
|
380
|
+
// V16 — translation budget. Clamp to non-negative; 0.0 disables the
|
|
381
|
+
// force-accept entirely (callers can opt-out by passing 0).
|
|
382
|
+
void KeyframeGate::setFlowMaxTranslationM(double m) { pImpl_->flowMaxTranslationM = (m < 0.0 ? 0.0 : m); }
|
|
383
|
+
// V16 — novelty percentile. Clamp to [0.5, 0.99]. Below 0.5 the
|
|
384
|
+
// estimate becomes too sensitive to the BEST-tracked-features (under-
|
|
385
|
+
// reports user-perceived novelty); above 0.99 it's effectively max-
|
|
386
|
+
// over-features which is dominated by outliers.
|
|
387
|
+
void KeyframeGate::setFlowNoveltyPercentile(double p) { pImpl_->flowNoveltyPercentile = (p < 0.5 ? 0.5 : (p > 0.99 ? 0.99 : p)); }
|
|
388
|
+
// 2026-05-14 — non-AR-mode opt-out for the angular-delta fallback.
|
|
389
|
+
// See `disableAngularFallback` field doc in Impl for rationale.
|
|
390
|
+
void KeyframeGate::setDisableAngularFallback(bool v) { pImpl_->disableAngularFallback = v; }
|
|
391
|
+
|
|
392
|
+
void KeyframeGate::reset() {
|
|
393
|
+
pImpl_->acceptedCount = 0;
|
|
394
|
+
pImpl_->lastCornersOnPlane.reset();
|
|
395
|
+
pImpl_->planeForCapture.reset();
|
|
396
|
+
pImpl_->forceAcceptNext = false;
|
|
397
|
+
pImpl_->lastAcceptedPose.reset();
|
|
398
|
+
// V16 A2 — drop flow state. release() returns the cv::Mat to
|
|
399
|
+
// empty (refcount-managed); std::vector::clear() is the
|
|
400
|
+
// canonical empty. Mandatory: leftover state from a prior
|
|
401
|
+
// capture would otherwise leak into the next capture's first-
|
|
402
|
+
// frame logic.
|
|
403
|
+
pImpl_->prevFrameGrayWork.release();
|
|
404
|
+
pImpl_->prevFeatures.clear();
|
|
405
|
+
pImpl_->prevFrameOrigWidth = 0;
|
|
406
|
+
pImpl_->prevFrameOrigHeight = 0;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
int32_t KeyframeGate::getAcceptedCount() const { return pImpl_->acceptedCount; }
|
|
410
|
+
int32_t KeyframeGate::getMaxCount() const { return pImpl_->maxCount; }
|
|
411
|
+
bool KeyframeGate::isEnabled() const { return pImpl_->enabled; }
|
|
412
|
+
|
|
413
|
+
// Shared angular-delta evaluation path. Used by:
|
|
414
|
+
// • §4 (no plane was ever latched — original use)
|
|
415
|
+
// • §5's degenerate branches (V16 Phase 2 fix — projection-degenerate
|
|
416
|
+
// and current-area-zero no longer accept blindly; they fall back
|
|
417
|
+
// to angular-delta so the gate keeps producing sensibly-spaced
|
|
418
|
+
// keyframes even when the plane geometry breaks down at the
|
|
419
|
+
// edges of the latched patch).
|
|
420
|
+
//
|
|
421
|
+
// Returns a KeyframeGateDecision exactly the way §4 used to return
|
|
422
|
+
// inline. Caller decides which reason codes to emit; we emit the
|
|
423
|
+
// canonical angular reason codes here (`AcceptOkAngular` /
|
|
424
|
+
// `RejectOverlapTooHighAngular`) regardless of which call-site
|
|
425
|
+
// invoked the fallback — what matters for telemetry is "this was
|
|
426
|
+
// decided via the angular criterion", not why we ended up there.
|
|
427
|
+
// The `AcceptProjectionDegenerate` / `AcceptCurrentAreaZero` reasons
|
|
428
|
+
// remain in the enum for back-compat but are NO LONGER EMITTED.
|
|
429
|
+
// Diagnostic logging at the call sites tells us if a degenerate
|
|
430
|
+
// projection triggered the fallback.
|
|
431
|
+
KeyframeGateDecision KeyframeGate::evaluateAngularFallback(
|
|
432
|
+
Impl& s,
|
|
433
|
+
const Pose& pose)
|
|
434
|
+
{
|
|
435
|
+
if (!s.lastAcceptedPose) {
|
|
436
|
+
// Defensive — first-frame branch always sets lastAcceptedPose.
|
|
437
|
+
return { true, KeyframeGateDecisionReason::AcceptNoPoseYet,
|
|
438
|
+
-1.0, s.acceptedCount, s.maxCount };
|
|
439
|
+
}
|
|
440
|
+
if (s.acceptedCount >= s.maxCount) {
|
|
441
|
+
return { false, KeyframeGateDecisionReason::RejectMaxReached,
|
|
442
|
+
-1.0, s.acceptedCount, s.maxCount };
|
|
443
|
+
}
|
|
444
|
+
// 2026-05-14 — non-AR-mode opt-out. When `disableAngularFallback`
|
|
445
|
+
// is set, treat every angular-fallback call as a hard reject.
|
|
446
|
+
// The caller's flow strategy is then the only path that can
|
|
447
|
+
// accept frames. See `disableAngularFallback` field doc for
|
|
448
|
+
// the rationale (no usable pose data in non-AR captures).
|
|
449
|
+
if (s.disableAngularFallback) {
|
|
450
|
+
return { false,
|
|
451
|
+
KeyframeGateDecisionReason::RejectOverlapTooHighAngular,
|
|
452
|
+
-1.0, s.acceptedCount, s.maxCount };
|
|
453
|
+
}
|
|
454
|
+
Vec3 lastFwd = cameraForwardWorld(*s.lastAcceptedPose);
|
|
455
|
+
Vec3 currFwd = cameraForwardWorld(pose);
|
|
456
|
+
float dotProd = v3_dot(lastFwd, currFwd);
|
|
457
|
+
if (dotProd > 1.0f) dotProd = 1.0f;
|
|
458
|
+
if (dotProd < -1.0f) dotProd = -1.0f;
|
|
459
|
+
float angleRad = std::acos(dotProd);
|
|
460
|
+
float fovH = 2.0f * std::atan(pose.imageWidth / (2.0f * pose.fx));
|
|
461
|
+
float fovV = 2.0f * std::atan(pose.imageHeight / (2.0f * pose.fy));
|
|
462
|
+
float fovRef = fovH < fovV ? fovH : fovV;
|
|
463
|
+
double newContent = (fovRef > 1e-3f)
|
|
464
|
+
? static_cast<double>(angleRad / fovRef)
|
|
465
|
+
: 0.0;
|
|
466
|
+
if (newContent < s.overlapThreshold) {
|
|
467
|
+
return { false,
|
|
468
|
+
KeyframeGateDecisionReason::RejectOverlapTooHighAngular,
|
|
469
|
+
newContent, s.acceptedCount, s.maxCount };
|
|
470
|
+
}
|
|
471
|
+
s.lastAcceptedPose = pose;
|
|
472
|
+
s.acceptedCount += 1;
|
|
473
|
+
return { true, KeyframeGateDecisionReason::AcceptOkAngular,
|
|
474
|
+
newContent, s.acceptedCount, s.maxCount };
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
KeyframeGateDecision KeyframeGate::evaluate(const Pose& pose,
|
|
479
|
+
const PlaneTransform* latchedPlane)
|
|
480
|
+
{
|
|
481
|
+
Impl& s = *pImpl_;
|
|
482
|
+
|
|
483
|
+
// 1) Mode disabled → pass-through.
|
|
484
|
+
if (!s.enabled) {
|
|
485
|
+
return { true, KeyframeGateDecisionReason::AcceptDisabled,
|
|
486
|
+
-1.0, 0, 0 };
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
// 2) Force-accept on shutter release.
|
|
490
|
+
if (s.forceAcceptNext) {
|
|
491
|
+
s.forceAcceptNext = false;
|
|
492
|
+
// Refresh polygon state if we have a plane (so further frames,
|
|
493
|
+
// if any, still gate correctly).
|
|
494
|
+
std::optional<PlaneBasis> basisOpt =
|
|
495
|
+
s.planeForCapture
|
|
496
|
+
? s.planeForCapture
|
|
497
|
+
: (latchedPlane ? planeBasisFromMatrix(latchedPlane->m)
|
|
498
|
+
: std::nullopt);
|
|
499
|
+
if (basisOpt) {
|
|
500
|
+
auto corners = projectCornersOntoPlane(pose, *basisOpt);
|
|
501
|
+
if (corners) {
|
|
502
|
+
s.lastCornersOnPlane = *corners;
|
|
503
|
+
if (!s.planeForCapture) s.planeForCapture = *basisOpt;
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
s.lastAcceptedPose = pose;
|
|
507
|
+
s.acceptedCount += 1;
|
|
508
|
+
return { true, KeyframeGateDecisionReason::AcceptForceLast,
|
|
509
|
+
-1.0, s.acceptedCount, s.maxCount };
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
// 3) First-frame anchor — always accepted.
|
|
513
|
+
if (s.acceptedCount == 0) {
|
|
514
|
+
s.lastAcceptedPose = pose;
|
|
515
|
+
if (latchedPlane) {
|
|
516
|
+
auto basis = planeBasisFromMatrix(latchedPlane->m);
|
|
517
|
+
if (basis) {
|
|
518
|
+
auto corners = projectCornersOntoPlane(pose, *basis);
|
|
519
|
+
if (corners) {
|
|
520
|
+
s.planeForCapture = *basis;
|
|
521
|
+
s.lastCornersOnPlane = *corners;
|
|
522
|
+
s.acceptedCount = 1;
|
|
523
|
+
return { true, KeyframeGateDecisionReason::AcceptFirstOnPlane,
|
|
524
|
+
-1.0, 1, s.maxCount };
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
// No plane available for first frame. Subsequent frames will
|
|
529
|
+
// use the angular-delta fallback below.
|
|
530
|
+
s.acceptedCount = 1;
|
|
531
|
+
return { true, KeyframeGateDecisionReason::AcceptFirstNoPlane,
|
|
532
|
+
-1.0, 1, s.maxCount };
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
// 4) No-plane angular fallback (when planeSource=Disabled or
|
|
536
|
+
// we never latched a plane).
|
|
537
|
+
if (!s.planeForCapture || !s.lastCornersOnPlane) {
|
|
538
|
+
return evaluateAngularFallback(s, pose);
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
// 5) Plane-based path.
|
|
542
|
+
|
|
543
|
+
// Cap reached.
|
|
544
|
+
if (s.acceptedCount >= s.maxCount) {
|
|
545
|
+
return { false, KeyframeGateDecisionReason::RejectMaxReached,
|
|
546
|
+
-1.0, s.acceptedCount, s.maxCount };
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
// Project current frame's corners onto the cached plane basis.
|
|
550
|
+
//
|
|
551
|
+
// V16 Phase 2 fix — when projection degenerates (camera FoV no
|
|
552
|
+
// longer fully intersects the latched plane in front of the
|
|
553
|
+
// camera, e.g. user has panned past the end of the shelf or
|
|
554
|
+
// around a corner onto a perpendicular wall), the ORIGINAL Swift
|
|
555
|
+
// gate and the initial P3-A port both did `return { accept=true,
|
|
556
|
+
// …AcceptProjectionDegenerate }` WITHOUT advancing acceptedCount
|
|
557
|
+
// or lastCornersOnPlane. That meant every subsequent frame ALSO
|
|
558
|
+
// degenerated, ALSO accepted, ALSO didn't advance state — an
|
|
559
|
+
// unbounded burst-accept at frame rate until shutter release.
|
|
560
|
+
// The cap check above this never triggered because acceptedCount
|
|
561
|
+
// wasn't growing.
|
|
562
|
+
//
|
|
563
|
+
// The fix: fall back to angular-delta on degenerate projection.
|
|
564
|
+
// Angular fallback correctly increments acceptedCount and
|
|
565
|
+
// updates lastAcceptedPose, so cap-reached gates the burst. For
|
|
566
|
+
// pure-translation captures (rare) angular delta won't grow and
|
|
567
|
+
// the fallback ends up rejecting — which is the *correct*
|
|
568
|
+
// outcome (those frames couldn't be gated geometrically and
|
|
569
|
+
// weren't rotating the camera either, so they offer little new
|
|
570
|
+
// information for stitching).
|
|
571
|
+
auto currentCornersOpt = projectCornersOntoPlane(pose, *s.planeForCapture);
|
|
572
|
+
if (!currentCornersOpt) {
|
|
573
|
+
return evaluateAngularFallback(s, pose);
|
|
574
|
+
}
|
|
575
|
+
const std::vector<Vec2>& currentCorners = *currentCornersOpt;
|
|
576
|
+
const std::vector<Vec2>& lastCorners = *s.lastCornersOnPlane;
|
|
577
|
+
|
|
578
|
+
float intersectArea = polygonIntersectionArea(currentCorners, lastCorners);
|
|
579
|
+
float currentArea = polygonArea(currentCorners);
|
|
580
|
+
if (currentArea <= 1e-6f) {
|
|
581
|
+
// Same degenerate-shape failure mode — fall back to angular.
|
|
582
|
+
// See the long comment above projectCornersOntoPlane(...).
|
|
583
|
+
return evaluateAngularFallback(s, pose);
|
|
584
|
+
}
|
|
585
|
+
float overlapRatio = intersectArea / currentArea;
|
|
586
|
+
if (overlapRatio < 0.0f) overlapRatio = 0.0f;
|
|
587
|
+
if (overlapRatio > 1.0f) overlapRatio = 1.0f;
|
|
588
|
+
double newContentFraction = 1.0 - static_cast<double>(overlapRatio);
|
|
589
|
+
|
|
590
|
+
if (newContentFraction < s.overlapThreshold) {
|
|
591
|
+
return { false, KeyframeGateDecisionReason::RejectOverlapTooHigh,
|
|
592
|
+
newContentFraction, s.acceptedCount, s.maxCount };
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
// Accept.
|
|
596
|
+
s.lastCornersOnPlane = currentCorners;
|
|
597
|
+
s.lastAcceptedPose = pose;
|
|
598
|
+
s.acceptedCount += 1;
|
|
599
|
+
return { true, KeyframeGateDecisionReason::AcceptOk,
|
|
600
|
+
newContentFraction, s.acceptedCount, s.maxCount };
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
604
|
+
// V16 A2 — sparse-flow novelty path
|
|
605
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
606
|
+
//
|
|
607
|
+
// Algorithm (1:1 with Ram's design 2026-05-13):
|
|
608
|
+
//
|
|
609
|
+
// 1. Detect Shi-Tomasi corners in the LAST ACCEPTED keyframe once
|
|
610
|
+
// per accept. Persist them on Impl.prevFeatures.
|
|
611
|
+
// 2. For each incoming frame, track those features into the new
|
|
612
|
+
// frame with calcOpticalFlowPyrLK.
|
|
613
|
+
// 3. Compute the median absolute displacement on the dominant pan
|
|
614
|
+
// axis (max of |median dx|, |median dy|).
|
|
615
|
+
// 4. novelty = median_pan_displacement / pan_axis_frame_dim
|
|
616
|
+
// ∈ [0, 1] for sensible motion.
|
|
617
|
+
// 5. Accept iff novelty ≥ overlapThreshold (default 0.4 → 40 % of
|
|
618
|
+
// frame dim → 40 % new content for a yaw-dominated pan).
|
|
619
|
+
// 6. On accept, detect fresh features in the new frame, swap
|
|
620
|
+
// prevFrameGrayWork + prevFeatures, increment acceptedCount.
|
|
621
|
+
//
|
|
622
|
+
// Fallbacks:
|
|
623
|
+
// * acceptedCount == 0 → accept first frame, detect features,
|
|
624
|
+
// return AcceptFirstFlow.
|
|
625
|
+
// * acceptedCount ≥ maxCount → RejectMaxReached.
|
|
626
|
+
// * tracked count < 30 % of detected → tracking failure (texture-
|
|
627
|
+
// poor scene, motion too fast for the pyramid window). Falls
|
|
628
|
+
// back to the existing angular-delta path so the gate still
|
|
629
|
+
// produces sensible decisions in low-texture scenes.
|
|
630
|
+
//
|
|
631
|
+
// Cost (iPhone 13 Pro, 1920×1440 → 720 working res):
|
|
632
|
+
// * goodFeaturesToTrack (per accept): ~6-10 ms
|
|
633
|
+
// * cvtColor / resize (per evaluate): ~1-2 ms
|
|
634
|
+
// * calcOpticalFlowPyrLK (per evaluate): ~1-3 ms
|
|
635
|
+
// Total per-evaluate (non-accept frame): ~3-5 ms. Within budget
|
|
636
|
+
// for the 50fps AR delegate path.
|
|
637
|
+
|
|
638
|
+
namespace {
|
|
639
|
+
|
|
640
|
+
constexpr int kFlowWorkingMaxSide = 720;
|
|
641
|
+
constexpr double kFlowMinTrackedFeatureFraction = 0.30;
|
|
642
|
+
constexpr int kFlowKLTMaxLevel = 3;
|
|
643
|
+
|
|
644
|
+
// V16 — percentile of absolute values in `values` — O(n) via
|
|
645
|
+
// nth_element. Mutates the input vector (takes absolute values
|
|
646
|
+
// in-place AND partial-sorts to position the percentile element).
|
|
647
|
+
// Returns 0 for empty input (caller must guard).
|
|
648
|
+
//
|
|
649
|
+
// `pct` is in [0, 1]; 0.5 → median, 0.85 → 85th percentile (current
|
|
650
|
+
// default), 0.99 → near-max. Callers pass scratch copies — the
|
|
651
|
+
// vector is left in a partial-sort state, not the original ordering.
|
|
652
|
+
//
|
|
653
|
+
// Why percentile not median (V16 change): the median (50th-%ile) of
|
|
654
|
+
// tracked-feature displacements under-reports novelty when the user
|
|
655
|
+
// has rotated the camera enough that the LEADING-EDGE features show
|
|
656
|
+
// large motion but the BULK of existing features (in the overlap
|
|
657
|
+
// region) show small motion. 85th-%ile picks up the leading-edge
|
|
658
|
+
// motion sooner and matches user perception of "new content visible"
|
|
659
|
+
// better. Exposed as a tunable `flowNoveltyPercentile` so the
|
|
660
|
+
// behaviour is operator-configurable per use case.
|
|
661
|
+
float percentileAbs(std::vector<float>& values, double pct) {
|
|
662
|
+
if (values.empty()) return 0.0f;
|
|
663
|
+
const size_t n = values.size();
|
|
664
|
+
for (auto& v : values) v = std::abs(v);
|
|
665
|
+
// Clamp pct to [0, 1] then compute index. At n=1 this just returns
|
|
666
|
+
// the single element. At n=2 with pct=0.85, idx = floor(0.85 * 1)
|
|
667
|
+
// = 0 → returns the smaller of the two abs values (which is the
|
|
668
|
+
// 0th-percentile, not 85th — but with only 2 samples there is no
|
|
669
|
+
// meaningful 85th percentile, so this is a sensible degenerate).
|
|
670
|
+
if (pct < 0.0) pct = 0.0;
|
|
671
|
+
if (pct > 1.0) pct = 1.0;
|
|
672
|
+
size_t idx = static_cast<size_t>(pct * static_cast<double>(n - 1));
|
|
673
|
+
if (idx >= n) idx = n - 1;
|
|
674
|
+
std::nth_element(values.begin(), values.begin() + idx, values.end());
|
|
675
|
+
return values[idx];
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
// Downscale `srcGray` so its longer side equals `kFlowWorkingMaxSide`,
|
|
679
|
+
// using INTER_AREA (best for shrinking — anti-aliased average). If
|
|
680
|
+
// the source is already at or below the target size, returns a deep
|
|
681
|
+
// copy (so callers always own the result). Always returns a
|
|
682
|
+
// CV_8UC1 Mat.
|
|
683
|
+
cv::Mat downscaleToWorking(const cv::Mat& srcGray) {
|
|
684
|
+
const int longerSide = std::max(srcGray.cols, srcGray.rows);
|
|
685
|
+
if (longerSide <= kFlowWorkingMaxSide) {
|
|
686
|
+
return srcGray.clone();
|
|
687
|
+
}
|
|
688
|
+
const double scale = static_cast<double>(kFlowWorkingMaxSide) / longerSide;
|
|
689
|
+
cv::Mat out;
|
|
690
|
+
cv::resize(srcGray, out, cv::Size(), scale, scale, cv::INTER_AREA);
|
|
691
|
+
return out;
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
} // anonymous namespace
|
|
695
|
+
|
|
696
|
+
KeyframeGateDecision KeyframeGate::evaluateWithFrame(
|
|
697
|
+
const Pose& pose,
|
|
698
|
+
const PlaneTransform* latchedPlane,
|
|
699
|
+
const uint8_t* grayData,
|
|
700
|
+
int32_t width,
|
|
701
|
+
int32_t height,
|
|
702
|
+
int32_t stride)
|
|
703
|
+
{
|
|
704
|
+
Impl& s = *pImpl_;
|
|
705
|
+
|
|
706
|
+
// §1 — disabled passes through unchanged for either strategy.
|
|
707
|
+
if (!s.enabled) {
|
|
708
|
+
s.acceptedCount += 1;
|
|
709
|
+
return { true, KeyframeGateDecisionReason::AcceptDisabled,
|
|
710
|
+
-1.0, s.acceptedCount, s.maxCount };
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
// §2 — force-last short-circuits both strategies. We DO update
|
|
714
|
+
// flow state here so a subsequent (post-finalize-via-cancel-
|
|
715
|
+
// continue) evaluation reads a consistent prev-frame. In
|
|
716
|
+
// practice force-last is followed by finalize+reset, so this is
|
|
717
|
+
// mostly defensive.
|
|
718
|
+
if (s.forceAcceptNext) {
|
|
719
|
+
s.forceAcceptNext = false;
|
|
720
|
+
s.acceptedCount += 1;
|
|
721
|
+
// No newContent fraction — we accepted unconditionally.
|
|
722
|
+
return { true, KeyframeGateDecisionReason::AcceptForceLast,
|
|
723
|
+
-1.0, s.acceptedCount, s.maxCount };
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
// §3 — strategy dispatch.
|
|
727
|
+
if (s.strategy == GateStrategy::Pose) {
|
|
728
|
+
// Pose path is OpenCV-free and identical to the
|
|
729
|
+
// backward-compat `evaluate()` entry point. Skip the
|
|
730
|
+
// grayscale wrap entirely — `grayData` is ignored.
|
|
731
|
+
return evaluate(pose, latchedPlane);
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
// Flow path — wrap incoming pixel data as a non-owning cv::Mat
|
|
735
|
+
// and downscale to working resolution. The non-owning view is
|
|
736
|
+
// SAFE because we deep-copy (via clone) before storing on Impl.
|
|
737
|
+
if (grayData == nullptr || width <= 0 || height <= 0 || stride < width) {
|
|
738
|
+
// Defensive: caller forgot to supply image data despite
|
|
739
|
+
// strategy=Flow. Fall back to pose path so we degrade
|
|
740
|
+
// gracefully rather than crashing on a null deref.
|
|
741
|
+
return evaluate(pose, latchedPlane);
|
|
742
|
+
}
|
|
743
|
+
cv::Mat currGrayFull(height, width, CV_8UC1,
|
|
744
|
+
const_cast<uint8_t*>(grayData),
|
|
745
|
+
static_cast<size_t>(stride));
|
|
746
|
+
cv::Mat currGrayWork = downscaleToWorking(currGrayFull);
|
|
747
|
+
|
|
748
|
+
// §4 — first-frame accept under Flow. No prev to track against;
|
|
749
|
+
// we anchor here and detect features so subsequent frames have
|
|
750
|
+
// a target. Mirrors §3 of the Pose path semantically.
|
|
751
|
+
if (s.acceptedCount == 0) {
|
|
752
|
+
std::vector<cv::Point2f> features;
|
|
753
|
+
cv::goodFeaturesToTrack(
|
|
754
|
+
currGrayWork, features,
|
|
755
|
+
s.flowMaxCorners,
|
|
756
|
+
s.flowQualityLevel,
|
|
757
|
+
s.flowMinDistance);
|
|
758
|
+
s.prevFrameGrayWork = currGrayWork; // clone-owned via downscale path
|
|
759
|
+
s.prevFeatures = std::move(features);
|
|
760
|
+
s.prevFrameOrigWidth = width;
|
|
761
|
+
s.prevFrameOrigHeight = height;
|
|
762
|
+
s.lastAcceptedPose = pose;
|
|
763
|
+
s.acceptedCount = 1;
|
|
764
|
+
return { true, KeyframeGateDecisionReason::AcceptFirstFlow,
|
|
765
|
+
-1.0, s.acceptedCount, s.maxCount };
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
// §5 — max-reached gate. Same as Pose path; redundant here only
|
|
769
|
+
// because the Flow path doesn't share the early-cap check at
|
|
770
|
+
// line 340-345 with the Pose path.
|
|
771
|
+
if (s.acceptedCount >= s.maxCount) {
|
|
772
|
+
return { false, KeyframeGateDecisionReason::RejectMaxReached,
|
|
773
|
+
-1.0, s.acceptedCount, s.maxCount };
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
// §6 — KLT tracking. Falls back to angular when too few features
|
|
777
|
+
// survive (texture-poor scene, motion exceeds pyramid window).
|
|
778
|
+
if (s.prevFeatures.empty() || s.prevFrameGrayWork.empty()) {
|
|
779
|
+
// Defensive: reset() was called but acceptedCount wasn't 0.
|
|
780
|
+
// Shouldn't happen. Fall back to angular.
|
|
781
|
+
return evaluateAngularFallback(s, pose);
|
|
782
|
+
}
|
|
783
|
+
std::vector<cv::Point2f> trackedFeatures;
|
|
784
|
+
std::vector<uint8_t> status;
|
|
785
|
+
std::vector<float> err;
|
|
786
|
+
cv::calcOpticalFlowPyrLK(
|
|
787
|
+
s.prevFrameGrayWork, currGrayWork,
|
|
788
|
+
s.prevFeatures, trackedFeatures, status, err,
|
|
789
|
+
cv::Size(21, 21),
|
|
790
|
+
kFlowKLTMaxLevel,
|
|
791
|
+
cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 30, 0.01));
|
|
792
|
+
|
|
793
|
+
// Collect successfully-tracked displacements in WORKING-RESOLUTION
|
|
794
|
+
// pixels. Both numerator (median displacement) and denominator
|
|
795
|
+
// (frame dim) live in working pixels — the ratio is the same as
|
|
796
|
+
// it would be in original pixels.
|
|
797
|
+
std::vector<float> dxs, dys;
|
|
798
|
+
dxs.reserve(s.prevFeatures.size());
|
|
799
|
+
dys.reserve(s.prevFeatures.size());
|
|
800
|
+
for (size_t i = 0; i < s.prevFeatures.size() && i < trackedFeatures.size() && i < status.size(); ++i) {
|
|
801
|
+
if (status[i] == 0) continue;
|
|
802
|
+
dxs.push_back(trackedFeatures[i].x - s.prevFeatures[i].x);
|
|
803
|
+
dys.push_back(trackedFeatures[i].y - s.prevFeatures[i].y);
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
// §6a — tracking-failure fallback. If fewer than 30 % of the
|
|
807
|
+
// previous frame's features tracked successfully, KLT is unreliable
|
|
808
|
+
// for this frame pair (occlusion, motion blur, texture loss).
|
|
809
|
+
// Angular fallback uses the pose only — no image data needed —
|
|
810
|
+
// and produces sensibly-spaced keyframes from camera rotation.
|
|
811
|
+
const double trackedFraction =
|
|
812
|
+
s.prevFeatures.empty() ? 0.0
|
|
813
|
+
: static_cast<double>(dxs.size()) / static_cast<double>(s.prevFeatures.size());
|
|
814
|
+
if (trackedFraction < kFlowMinTrackedFeatureFraction) {
|
|
815
|
+
return evaluateAngularFallback(s, pose);
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
// §6b — percentile absolute displacement on each axis. V16
|
|
819
|
+
// changed from median (50th-%ile) to a configurable percentile
|
|
820
|
+
// (default 85th). See percentileAbs() documentation above for
|
|
821
|
+
// the rationale — short version: median under-reports novelty
|
|
822
|
+
// when the leading edge has moved but most overlap-region
|
|
823
|
+
// features haven't. The percentile is operator-tunable via
|
|
824
|
+
// setFlowNoveltyPercentile().
|
|
825
|
+
const double pctile = s.flowNoveltyPercentile;
|
|
826
|
+
const float pctAbsDx = percentileAbs(dxs, pctile);
|
|
827
|
+
const float pctAbsDy = percentileAbs(dys, pctile);
|
|
828
|
+
|
|
829
|
+
// §6c — pan-axis detection + novelty computation. Whichever axis
|
|
830
|
+
// has the larger percentile displacement IS the pan axis (per
|
|
831
|
+
// Ram's design — read pan direction off the flow itself, NOT off
|
|
832
|
+
// the captureOrientation hold setting, which describes the device
|
|
833
|
+
// hold, not the user's pan direction).
|
|
834
|
+
//
|
|
835
|
+
// Novelty = pan-axis-percentile-displacement / pan-axis-frame-dim.
|
|
836
|
+
// Direct semantic: 30 % of frame dim displacement at the 85th-%ile
|
|
837
|
+
// ≈ "leading 15 % of features have moved at least 30 % of frame
|
|
838
|
+
// dim" ≈ noticeable new-content sliver — matches user's visual
|
|
839
|
+
// perception better than the previous median-based metric.
|
|
840
|
+
double novelty;
|
|
841
|
+
if (pctAbsDx >= pctAbsDy) {
|
|
842
|
+
novelty = static_cast<double>(pctAbsDx) / static_cast<double>(currGrayWork.cols);
|
|
843
|
+
} else {
|
|
844
|
+
novelty = static_cast<double>(pctAbsDy) / static_cast<double>(currGrayWork.rows);
|
|
845
|
+
}
|
|
846
|
+
if (novelty < 0.0) novelty = 0.0;
|
|
847
|
+
if (novelty > 1.0) novelty = 1.0;
|
|
848
|
+
|
|
849
|
+
// §6d — translation budget. Compute the 3D Euclidean distance the
|
|
850
|
+
// camera has translated since the last accepted keyframe. If the
|
|
851
|
+
// operator has set flowMaxTranslationM > 0 and the distance exceeds
|
|
852
|
+
// it, we force-accept this frame even when novelty < threshold.
|
|
853
|
+
//
|
|
854
|
+
// Why: even with the affine matcher swap in OpenCVStitcher.mm,
|
|
855
|
+
// very large parallax (Ram repro 2026-05-13: 25-60 cm between
|
|
856
|
+
// adjacent keyframes) starves the downstream BundleAdjusterRay of
|
|
857
|
+
// consistent inliers and ghosts the panorama. Bounding the
|
|
858
|
+
// physical translation between keyframes keeps the matcher's
|
|
859
|
+
// inputs in a regime where it can actually produce a clean
|
|
860
|
+
// homography. Default 0.0 → disabled (back-compat); operator
|
|
861
|
+
// opts-in via settings UI.
|
|
862
|
+
//
|
|
863
|
+
// We use the pose-path's lastAcceptedPose state field, which is
|
|
864
|
+
// ALREADY updated on every Flow-path accept (line ~798). Pose
|
|
865
|
+
// and Flow strategies share `lastAcceptedPose` for this reason —
|
|
866
|
+
// post-V16 it's no longer Pose-strategy-exclusive.
|
|
867
|
+
double translationSinceLastAccept = 0.0;
|
|
868
|
+
if (s.lastAcceptedPose.has_value()) {
|
|
869
|
+
const Pose& last = s.lastAcceptedPose.value();
|
|
870
|
+
const float dtx = pose.tx - last.tx;
|
|
871
|
+
const float dty = pose.ty - last.ty;
|
|
872
|
+
const float dtz = pose.tz - last.tz;
|
|
873
|
+
translationSinceLastAccept =
|
|
874
|
+
std::sqrt(static_cast<double>(dtx) * dtx +
|
|
875
|
+
static_cast<double>(dty) * dty +
|
|
876
|
+
static_cast<double>(dtz) * dtz);
|
|
877
|
+
}
|
|
878
|
+
const bool translationBudgetCrossed =
|
|
879
|
+
(s.flowMaxTranslationM > 0.0) &&
|
|
880
|
+
(translationSinceLastAccept >= s.flowMaxTranslationM);
|
|
881
|
+
|
|
882
|
+
// §7 — accept-or-reject combined check. Accept if EITHER the
|
|
883
|
+
// novelty crossed `overlapThreshold` (the original rule) OR the
|
|
884
|
+
// translation budget was exceeded (the V16 force-accept). The
|
|
885
|
+
// decision reason distinguishes the two so telemetry can identify
|
|
886
|
+
// captures driven mostly by translation force-accepts vs. natural
|
|
887
|
+
// novelty accepts.
|
|
888
|
+
if (novelty < s.overlapThreshold && !translationBudgetCrossed) {
|
|
889
|
+
return { false, KeyframeGateDecisionReason::RejectOverlapTooHighFlow,
|
|
890
|
+
novelty, s.acceptedCount, s.maxCount };
|
|
891
|
+
}
|
|
892
|
+
// Pick the reason — novelty win takes precedence (we report what
|
|
893
|
+
// crossed the threshold first conceptually; if both crossed, the
|
|
894
|
+
// novelty path is the "natural" reason).
|
|
895
|
+
const KeyframeGateDecisionReason acceptReason =
|
|
896
|
+
(novelty >= s.overlapThreshold)
|
|
897
|
+
? KeyframeGateDecisionReason::AcceptOkFlow
|
|
898
|
+
: KeyframeGateDecisionReason::AcceptFlowTranslation;
|
|
899
|
+
|
|
900
|
+
// §8 — accept. Re-detect features in the newly-accepted frame
|
|
901
|
+
// (the previous set is now stale; many of them have moved out
|
|
902
|
+
// of frame or onto novel content). We re-detect at every
|
|
903
|
+
// accept rather than re-using survivors — a fresh detect on the
|
|
904
|
+
// CURRENT frame gives the most distinctive corners for the
|
|
905
|
+
// NEXT capture's tracking and avoids drift accumulation across
|
|
906
|
+
// multiple accepts.
|
|
907
|
+
std::vector<cv::Point2f> nextFeatures;
|
|
908
|
+
cv::goodFeaturesToTrack(
|
|
909
|
+
currGrayWork, nextFeatures,
|
|
910
|
+
s.flowMaxCorners,
|
|
911
|
+
s.flowQualityLevel,
|
|
912
|
+
s.flowMinDistance);
|
|
913
|
+
s.prevFrameGrayWork = currGrayWork; // owned via downscale's clone
|
|
914
|
+
s.prevFeatures = std::move(nextFeatures);
|
|
915
|
+
s.prevFrameOrigWidth = width;
|
|
916
|
+
s.prevFrameOrigHeight = height;
|
|
917
|
+
s.lastAcceptedPose = pose;
|
|
918
|
+
s.acceptedCount += 1;
|
|
919
|
+
// `acceptReason` was decided in §7 — either AcceptOkFlow (novelty
|
|
920
|
+
// crossed) or AcceptFlowTranslation (translation budget forced
|
|
921
|
+
// the accept). Reported back here so the host's telemetry can
|
|
922
|
+
// distinguish.
|
|
923
|
+
return { true, acceptReason,
|
|
924
|
+
novelty, s.acceptedCount, s.maxCount };
|
|
925
|
+
}
|
|
926
|
+
|
|
927
|
+
} // namespace retailens
|