react-native-image-stitcher 0.16.2 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/CHANGELOG.md +154 -0
  2. package/RNImageStitcher.podspec +26 -1
  3. package/android/build.gradle +20 -0
  4. package/android/src/main/cpp/CMakeLists.txt +46 -3
  5. package/android/src/main/cpp/stitcher_jsi_install_jni.cpp +436 -0
  6. package/android/src/main/java/io/imagestitcher/rn/RNImageStitcherPackage.kt +6 -0
  7. package/android/src/main/java/io/imagestitcher/rn/RNSARCameraView.kt +711 -6
  8. package/android/src/main/java/io/imagestitcher/rn/RNSARSession.kt +156 -0
  9. package/android/src/main/java/io/imagestitcher/rn/StitcherJsiInstallerModule.kt +103 -0
  10. package/android/src/main/java/io/imagestitcher/rn/StitcherWorkletRuntime.kt +338 -0
  11. package/cpp/{stitcher_frame_data.hpp → camera_frame_data.hpp} +96 -13
  12. package/cpp/camera_frame_jsi.cpp +357 -0
  13. package/cpp/camera_frame_jsi.hpp +108 -0
  14. package/cpp/stitcher_proxy_jsi.cpp +140 -0
  15. package/cpp/stitcher_proxy_jsi.hpp +62 -0
  16. package/cpp/stitcher_worklet_dispatch.cpp +103 -0
  17. package/cpp/stitcher_worklet_dispatch.hpp +71 -0
  18. package/cpp/stitcher_worklet_registry.cpp +91 -0
  19. package/cpp/stitcher_worklet_registry.hpp +146 -0
  20. package/dist/camera/ARCameraView.d.ts +77 -0
  21. package/dist/camera/ARCameraView.js +90 -1
  22. package/dist/camera/Camera.d.ts +63 -4
  23. package/dist/camera/Camera.js +2 -2
  24. package/dist/camera/CaptureMemoryPill.d.ts +4 -3
  25. package/dist/camera/CaptureMemoryPill.js +4 -3
  26. package/dist/index.d.ts +2 -1
  27. package/dist/stitching/ARFrameMeta.d.ts +100 -0
  28. package/dist/stitching/{StitcherFrame.js → ARFrameMeta.js} +1 -1
  29. package/dist/stitching/{StitcherFrame.d.ts → CameraFrame.d.ts} +70 -11
  30. package/dist/stitching/CameraFrame.js +4 -0
  31. package/dist/stitching/ensureStitcherProxyInstalled.d.ts +8 -0
  32. package/dist/stitching/ensureStitcherProxyInstalled.js +81 -0
  33. package/dist/stitching/useStitcherWorklet.d.ts +4 -4
  34. package/dist/stitching/useStitcherWorklet.js +4 -4
  35. package/ios/Sources/RNImageStitcher/ARSessionBridge.m +23 -1
  36. package/ios/Sources/RNImageStitcher/ARSessionBridge.swift +137 -2
  37. package/ios/Sources/RNImageStitcher/CameraFrameHostObject.h +83 -0
  38. package/ios/Sources/RNImageStitcher/CameraFrameHostObject.mm +760 -0
  39. package/ios/Sources/RNImageStitcher/RNSARSession.swift +336 -40
  40. package/ios/Sources/RNImageStitcher/RNSARWorkletRuntime.h +128 -0
  41. package/ios/Sources/RNImageStitcher/RNSARWorkletRuntime.mm +313 -0
  42. package/ios/Sources/RNImageStitcher/StitcherJsiInstaller.h +42 -0
  43. package/ios/Sources/RNImageStitcher/StitcherJsiInstaller.mm +160 -0
  44. package/package.json +1 -1
  45. package/src/camera/ARCameraView.tsx +211 -2
  46. package/src/camera/Camera.tsx +81 -4
  47. package/src/camera/CaptureMemoryPill.tsx +4 -3
  48. package/src/index.ts +7 -3
  49. package/src/stitching/ARFrameMeta.ts +107 -0
  50. package/src/stitching/{StitcherFrame.ts → CameraFrame.ts} +79 -11
  51. package/src/stitching/ensureStitcherProxyInstalled.ts +141 -0
  52. package/src/stitching/useStitcherWorklet.ts +9 -9
@@ -0,0 +1,760 @@
1
+ // SPDX-License-Identifier: Apache-2.0
2
+ //
3
+ // CameraFrameHostObject.mm — iOS-specific wrapper for the shared
4
+ // `retailens::CameraFrameJsiHostObject` (defined in
5
+ // `cpp/camera_frame_jsi.{hpp,cpp}`).
6
+ //
7
+ // Owns:
8
+ // - The Obj-C facade callable from Swift / other Obj-C / .mm files.
9
+ // - The iOS-specific `PixelBufferReader` impl (wraps a
10
+ // `CVPixelBufferRef` from `ARFrame.capturedImage`; lock / memcpy
11
+ // / unlock pattern).
12
+ // - The Obj-C → C++ extraction logic that builds a
13
+ // `retailens::CameraFrameData` from an `ARFrame` + the lib's
14
+ // `RNSARFramePose`.
15
+ //
16
+ // Does NOT own:
17
+ // - The JSI `get` / `getPropertyNames` dispatch. That lives in
18
+ // `cpp/camera_frame_jsi.cpp` and is identical to the Android
19
+ // implementation (DRY across platforms).
20
+
21
+ #import "CameraFrameHostObject.h"
22
+
23
+ #import <Foundation/Foundation.h>
24
+ #import <ARKit/ARKit.h>
25
+ #import <CoreVideo/CVPixelBuffer.h>
26
+ #import <CoreMedia/CoreMedia.h>
27
+ #import <Metal/Metal.h>
28
+ #import <simd/simd.h>
29
+ #import <os/log.h>
30
+
31
+ #include <jsi/jsi.h>
32
+
33
+ #include <algorithm>
34
+ #include <cstring>
35
+ #include <memory>
36
+ #include <string>
37
+ #include <utility>
38
+
39
+ #include "camera_frame_data.hpp"
40
+ #include "camera_frame_jsi.hpp"
41
+ #include "stitcher_proxy_jsi.hpp" // retailens::getExtractionConfig()
42
+
43
+ using namespace facebook;
44
+
45
+ // Forward-declare the Swift `RNSARFramePose` Obj-C surface we need.
46
+ // This matches the pattern in `KeyframeGateFrameProcessor.mm`
47
+ // (forward-declaring `IncrementalStitcher`) — avoids depending on
48
+ // the autogenerated `RNImageStitcher-Swift.h`, which is created at
49
+ // build time and not always available to .mm files in this pod.
50
+ //
51
+ // MUST stay in sync with `RNSARSession.swift::RNSARFramePose` —
52
+ // adding a new field there means adding it here too.
53
+ @class RNSARFramePose;
54
+ @interface RNSARFramePose : NSObject
55
+ @property (nonatomic, readonly) double tx;
56
+ @property (nonatomic, readonly) double ty;
57
+ @property (nonatomic, readonly) double tz;
58
+ @property (nonatomic, readonly) double qx;
59
+ @property (nonatomic, readonly) double qy;
60
+ @property (nonatomic, readonly) double qz;
61
+ @property (nonatomic, readonly) double qw;
62
+ @property (nonatomic, readonly) double fx;
63
+ @property (nonatomic, readonly) double fy;
64
+ @property (nonatomic, readonly) double cx;
65
+ @property (nonatomic, readonly) double cy;
66
+ @property (nonatomic, readonly) NSInteger imageWidth;
67
+ @property (nonatomic, readonly) NSInteger imageHeight;
68
+ @property (nonatomic, readonly) double timestampMs;
69
+ @end
70
+
71
+ #pragma mark - iOS PixelBufferReader
72
+
73
+ namespace {
74
+
75
+ /// iOS-specific `retailens::PixelBufferReader` impl. See the base
76
+ /// class docstring for the general contract (thread-affinity,
77
+ /// invalidation semantics, Y-plane-only constraint). This subclass
78
+ /// adds:
79
+ /// - `CVPixelBuffer` lock/memcpy/unlock per copyTo
80
+ /// - `CFBridgingRetain` of the parent `ARFrame` so ARKit's
81
+ /// pool can't reclaim the underlying buffer mid-read
82
+ class IOSPixelBufferReader : public retailens::PixelBufferReader {
83
+ public:
84
+ explicit IOSPixelBufferReader(ARFrame* arFrame) {
85
+ // Retain the ARFrame for our lifetime. CFBridgingRetain hands
86
+ // ARC ownership to our void*. Released in destructor.
87
+ _retainedFrame = (void*)CFBridgingRetain(arFrame);
88
+ CVPixelBufferRef pixelBuffer = arFrame.capturedImage;
89
+ if (pixelBuffer != NULL) {
90
+ _bytesPerRow = CVPixelBufferGetBytesPerRow(pixelBuffer);
91
+ _height = CVPixelBufferGetHeight(pixelBuffer);
92
+ }
93
+ }
94
+
95
+ ~IOSPixelBufferReader() override {
96
+ // Transfer ownership back to ARC, which then releases.
97
+ if (_retainedFrame != nullptr) {
98
+ ARFrame* frame = CFBridgingRelease(_retainedFrame);
99
+ (void)frame;
100
+ _retainedFrame = nullptr;
101
+ }
102
+ }
103
+
104
+ std::size_t byteSize() const override {
105
+ return _bytesPerRow * _height;
106
+ }
107
+
108
+ std::size_t copyTo(uint8_t* dst, std::size_t maxBytes) override {
109
+ if (_retainedFrame == nullptr) return 0;
110
+ ARFrame* frame = (__bridge ARFrame*)_retainedFrame;
111
+ CVPixelBufferRef pixelBuffer = frame.capturedImage;
112
+ if (pixelBuffer == NULL) return 0;
113
+
114
+ CVPixelBufferLockBaseAddress(pixelBuffer, kCVPixelBufferLock_ReadOnly);
115
+ const uint8_t* src = (const uint8_t*)CVPixelBufferGetBaseAddress(pixelBuffer);
116
+ std::size_t toCopy = std::min<std::size_t>(byteSize(), maxBytes);
117
+ if (src != nullptr && toCopy > 0) {
118
+ std::memcpy(dst, src, toCopy);
119
+ } else {
120
+ toCopy = 0;
121
+ }
122
+ CVPixelBufferUnlockBaseAddress(pixelBuffer, kCVPixelBufferLock_ReadOnly);
123
+ return toCopy;
124
+ }
125
+
126
+ private:
127
+ void* _retainedFrame = nullptr; // CFBridgingRetain'd ARFrame
128
+ std::size_t _bytesPerRow = 0;
129
+ std::size_t _height = 0;
130
+ };
131
+
132
+ #pragma mark - AR depth + anchor extraction
133
+
134
+ /// Copy a single-channel CVPixelBuffer into a TIGHTLY-PACKED byte
135
+ /// vector, stripping any per-row padding. ARKit's depth/confidence
136
+ /// maps frequently have `bytesPerRow > width * elementSize` (rows are
137
+ /// padded for alignment), so a bulk `memcpy(base, w*h*elemSize)` would
138
+ /// copy garbage padding bytes into the wrong positions and shear the
139
+ /// map. We copy `width * elementSize` bytes per row from `base +
140
+ /// row * bytesPerRow` so the result is row-packed exactly as the
141
+ /// shared JSI layer (`camera_frame_jsi.cpp`) expects.
142
+ ///
143
+ /// Returns `true` on success (out is filled with `w*h*elementSize`
144
+ /// bytes); `false` if the buffer couldn't be locked or has no base
145
+ /// address (out is left untouched).
146
+ bool PackSingleChannelPixelBuffer(CVPixelBufferRef buffer,
147
+ std::size_t elementSize,
148
+ std::vector<uint8_t>& out) {
149
+ if (buffer == NULL) return false;
150
+ if (CVPixelBufferLockBaseAddress(buffer, kCVPixelBufferLock_ReadOnly) !=
151
+ kCVReturnSuccess) {
152
+ return false;
153
+ }
154
+ const std::size_t width = CVPixelBufferGetWidth(buffer);
155
+ const std::size_t height = CVPixelBufferGetHeight(buffer);
156
+ const std::size_t bytesPerRow = CVPixelBufferGetBytesPerRow(buffer);
157
+ const uint8_t* base =
158
+ reinterpret_cast<const uint8_t*>(CVPixelBufferGetBaseAddress(buffer));
159
+ const std::size_t rowBytes = width * elementSize;
160
+
161
+ bool ok = false;
162
+ if (base != nullptr && width > 0 && height > 0 && bytesPerRow >= rowBytes) {
163
+ out.resize(rowBytes * height);
164
+ for (std::size_t row = 0; row < height; ++row) {
165
+ std::memcpy(out.data() + row * rowBytes,
166
+ base + row * bytesPerRow,
167
+ rowBytes);
168
+ }
169
+ ok = true;
170
+ }
171
+ CVPixelBufferUnlockBaseAddress(buffer, kCVPixelBufferLock_ReadOnly);
172
+ return ok;
173
+ }
174
+
175
+ /// Extract ARKit `sceneDepth` (preferred) / `smoothedSceneDepth` into
176
+ /// the shared `ArDepth` struct as `format="f32m"`:
177
+ /// - depthBytes = Float32 metres, row-packed (w*h*4 bytes)
178
+ /// - confidenceBytes = Uint8 ARConfidenceLevel 0..2, row-packed (w*h)
179
+ /// `width`/`height` are the DEPTH MAP's own dimensions (≈256x192),
180
+ /// NOT the camera image's — the JSI layer derives `px = w*h` from
181
+ /// these to validate the byte counts. Leaves `data.arDepth` as
182
+ /// `nullopt` when the device/session provides no depth (non-LiDAR
183
+ /// devices, or before the first depth frame arrives).
184
+ void ExtractARDepth(ARFrame* arFrame, retailens::CameraFrameData& data) {
185
+ ARDepthData* dd = arFrame.sceneDepth;
186
+ if (dd == nil) dd = arFrame.smoothedSceneDepth;
187
+ if (dd == nil) return;
188
+
189
+ CVPixelBufferRef depthMap = dd.depthMap; // kCVPixelFormatType_DepthFloat32
190
+ if (depthMap == NULL) return;
191
+ const int32_t w = static_cast<int32_t>(CVPixelBufferGetWidth(depthMap));
192
+ const int32_t h = static_cast<int32_t>(CVPixelBufferGetHeight(depthMap));
193
+ if (w <= 0 || h <= 0) return;
194
+
195
+ std::vector<uint8_t> depthBytes;
196
+ if (!PackSingleChannelPixelBuffer(depthMap, sizeof(float), depthBytes)) {
197
+ return;
198
+ }
199
+
200
+ // Confidence is optional (some configs/devices omit it). When
201
+ // present it's a Uint8 ARConfidenceLevel (0=low,1=medium,2=high),
202
+ // same w*h dimensions as the depth map. Leave empty on failure —
203
+ // the JSI layer treats an empty confidence buffer as "no
204
+ // confidenceMap" (matching the JS `confidenceMap?` optional).
205
+ std::vector<uint8_t> confidenceBytes;
206
+ CVPixelBufferRef conf = dd.confidenceMap; // kCVPixelFormatType_ConfidenceUint8
207
+ if (conf != NULL) {
208
+ if (!PackSingleChannelPixelBuffer(conf, sizeof(uint8_t), confidenceBytes)) {
209
+ confidenceBytes.clear();
210
+ }
211
+ }
212
+
213
+ retailens::ArDepth out;
214
+ out.width = w;
215
+ out.height = h;
216
+ out.format = "f32m";
217
+ out.depthBytes = std::move(depthBytes);
218
+ out.confidenceBytes = std::move(confidenceBytes);
219
+ data.arDepth = std::move(out);
220
+ }
221
+
222
+ /// Map an `ARPlaneClassification` to the JS `ARAnchor.classification`
223
+ /// string union. Returns `""` for anything unmapped (the JSI then
224
+ /// exposes `classification === undefined`). Caller gates this on
225
+ /// `classificationStatus == .known` so an undetermined `.none` doesn't
226
+ /// masquerade as a real "none" classification.
227
+ static std::string PlaneClassificationString(ARPlaneClassification c) {
228
+ switch (c) {
229
+ case ARPlaneClassificationWall: return "wall";
230
+ case ARPlaneClassificationFloor: return "floor";
231
+ case ARPlaneClassificationCeiling: return "ceiling";
232
+ case ARPlaneClassificationTable: return "table";
233
+ case ARPlaneClassificationSeat: return "seat";
234
+ case ARPlaneClassificationDoor: return "door";
235
+ case ARPlaneClassificationWindow: return "window";
236
+ case ARPlaneClassificationNone: return "none";
237
+ default: return "";
238
+ }
239
+ }
240
+
241
+ /// Extract the frame's tracked anchors into the shared `ArAnchor`
242
+ /// vector. Each anchor carries a stable id, a coarse type
243
+ /// (`"plane"` / `"image"` / `"point"`), and a 4x4 anchor->world
244
+ /// transform emitted ROW-MAJOR. ARKit's `simd_float4x4` is
245
+ /// COLUMN-MAJOR (`columns[c][r]`), so we transpose:
246
+ /// `transform[r*4+c] = a.transform.columns[c][r]`.
247
+ ///
248
+ /// Plane anchors additionally carry `alignment` (horizontal/vertical),
249
+ /// `extent` ([x, z] metres), and — on classification-capable devices —
250
+ /// a semantic `classification` (wall/floor/…).
251
+ void ExtractARAnchors(ARFrame* arFrame, retailens::CameraFrameData& data) {
252
+ NSArray<ARAnchor*>* anchors = arFrame.anchors;
253
+ data.arAnchors.reserve(anchors.count);
254
+ for (ARAnchor* a in anchors) {
255
+ retailens::ArAnchor out;
256
+ out.id = std::string(a.identifier.UUIDString.UTF8String);
257
+ if ([a isKindOfClass:[ARPlaneAnchor class]]) {
258
+ out.type = "plane";
259
+ ARPlaneAnchor* plane = (ARPlaneAnchor*)a;
260
+ out.alignment =
261
+ (plane.alignment == ARPlaneAnchorAlignmentVertical) ? "vertical"
262
+ : "horizontal";
263
+ // Deprecated `extent` (simd_float3 x,y,z; y≈0) rather than iOS-16
264
+ // `planeExtent` — the pod's deployment target still includes
265
+ // iOS 15, and the Swift plane math (RNSARSession) uses `.extent`
266
+ // too. [extentX, extentZ] in plane-local metres.
267
+ out.hasExtent = true;
268
+ out.extentX = static_cast<double>(plane.extent.x);
269
+ out.extentZ = static_cast<double>(plane.extent.z);
270
+ // Semantic classification only on capable devices AND once ARKit
271
+ // has actually determined it (`.known`). Otherwise `classification`
272
+ // is `.none` merely because it's undetermined — leave empty so JS
273
+ // sees `undefined`, not a misleading "none".
274
+ if (ARPlaneAnchor.isClassificationSupported &&
275
+ plane.classificationStatus == ARPlaneClassificationStatusKnown) {
276
+ out.classification = PlaneClassificationString(plane.classification);
277
+ }
278
+ } else if ([a isKindOfClass:[ARImageAnchor class]]) {
279
+ out.type = "image";
280
+ } else {
281
+ out.type = "point";
282
+ }
283
+ const simd_float4x4 m = a.transform;
284
+ for (int r = 0; r < 4; ++r) {
285
+ for (int c = 0; c < 4; ++c) {
286
+ out.transform[r * 4 + c] = static_cast<double>(m.columns[c][r]);
287
+ }
288
+ }
289
+ data.arAnchors.push_back(std::move(out));
290
+ }
291
+ }
292
+
293
+ /// Copy the `geometry.vertices` ARGeometrySource (format=float3,
294
+ /// MTLBuffer-backed) into a TIGHTLY-PACKED Float32 xyz byte vector
295
+ /// (count*3 floats). ARKit reports `offset` (byte offset to the first
296
+ /// element within the buffer) and `stride` (bytes between consecutive
297
+ /// elements). A simd_float3 is 16 bytes in MSL alignment (xyz + 4
298
+ /// pad) but ARKit may also hand back a 12-byte tight stride — we never
299
+ /// assume, we read `stride` per element and copy exactly 12 bytes
300
+ /// (3×Float32) from `base + offset + i*stride`, dropping any pad.
301
+ /// Returns false (and leaves `out` untouched) if the buffer is
302
+ /// unreadable or the source isn't the expected float3 layout.
303
+ bool PackMeshVertices(ARGeometrySource* src, std::vector<uint8_t>& out) {
304
+ if (src == nil) return false;
305
+ id<MTLBuffer> buffer = src.buffer;
306
+ if (buffer == nil) return false;
307
+ const NSInteger count = src.count;
308
+ if (count <= 0) return false;
309
+ // Vertices must be 3-component Float32 (ARKit's documented layout).
310
+ if (src.format != MTLVertexFormatFloat3 ||
311
+ src.componentsPerVector != 3) {
312
+ return false;
313
+ }
314
+ const NSUInteger offset = src.offset;
315
+ const NSUInteger stride = src.stride;
316
+ const uint8_t* contents =
317
+ reinterpret_cast<const uint8_t*>([buffer contents]);
318
+ if (contents == nullptr) return false;
319
+ const NSUInteger bufLen = [buffer length];
320
+ const std::size_t triple = 3 * sizeof(float); // 12 bytes, tight
321
+
322
+ out.resize(static_cast<std::size_t>(count) * triple);
323
+ for (NSInteger i = 0; i < count; ++i) {
324
+ const NSUInteger elemOffset = offset + static_cast<NSUInteger>(i) * stride;
325
+ // Bounds guard — never read past the MTLBuffer.
326
+ if (elemOffset + triple > bufLen) {
327
+ out.clear();
328
+ return false;
329
+ }
330
+ std::memcpy(out.data() + static_cast<std::size_t>(i) * triple,
331
+ contents + elemOffset,
332
+ triple);
333
+ }
334
+ return true;
335
+ }
336
+
337
+ /// Convert an ARGeometryElement (triangle faces) into a tightly-packed
338
+ /// Uint32 index vector (faces.count * 3 indices). ARKit's
339
+ /// `bytesPerIndex` is 2 or 4 — we widen 16-bit indices to Uint32 so the
340
+ /// JS side always sees a Uint32Array (matches the cpp/ JSI contract
341
+ /// which emits `meshFaces` verbatim as an ArrayBuffer of Uint32).
342
+ /// Returns false (out untouched) on an unexpected primitive type /
343
+ /// index width, or unreadable buffer.
344
+ bool PackMeshFaces(ARGeometryElement* faces, std::vector<uint8_t>& out) {
345
+ if (faces == nil) return false;
346
+ if (faces.primitiveType != ARGeometryPrimitiveTypeTriangle) return false;
347
+ if (faces.indexCountPerPrimitive != 3) return false;
348
+ id<MTLBuffer> buffer = faces.buffer;
349
+ if (buffer == nil) return false;
350
+ const NSInteger primCount = faces.count;
351
+ if (primCount <= 0) return false;
352
+ const NSInteger bytesPerIndex = faces.bytesPerIndex;
353
+ if (bytesPerIndex != 2 && bytesPerIndex != 4) return false;
354
+
355
+ const uint8_t* contents =
356
+ reinterpret_cast<const uint8_t*>([buffer contents]);
357
+ if (contents == nullptr) return false;
358
+ const NSUInteger bufLen = [buffer length];
359
+
360
+ const std::size_t totalIndices =
361
+ static_cast<std::size_t>(primCount) * 3;
362
+ // Index buffer is tightly packed: count*3 indices of bytesPerIndex.
363
+ const NSUInteger neededBytes =
364
+ static_cast<NSUInteger>(totalIndices) *
365
+ static_cast<NSUInteger>(bytesPerIndex);
366
+ if (neededBytes > bufLen) return false;
367
+
368
+ out.resize(totalIndices * sizeof(uint32_t));
369
+ uint32_t* dst = reinterpret_cast<uint32_t*>(out.data());
370
+ if (bytesPerIndex == 4) {
371
+ // Already Uint32 — bulk copy.
372
+ std::memcpy(dst, contents, totalIndices * sizeof(uint32_t));
373
+ } else {
374
+ // 16-bit → widen each index to 32-bit.
375
+ const uint16_t* src = reinterpret_cast<const uint16_t*>(contents);
376
+ for (std::size_t i = 0; i < totalIndices; ++i) {
377
+ dst[i] = static_cast<uint32_t>(src[i]);
378
+ }
379
+ }
380
+ return true;
381
+ }
382
+
383
+ /// Copy the optional per-face classification ARGeometrySource (UInt8,
384
+ /// one value per triangle) into a Uint8 byte vector. ARKit's
385
+ /// classification source is one element per face with format
386
+ /// MTLVertexFormatUChar. Leaves `out` empty (returns false) when nil
387
+ /// or unreadable — the cpp/ JSI layer treats an empty
388
+ /// `meshClassifications` as "no classifications" (optional in JS).
389
+ bool PackMeshClassifications(ARGeometrySource* src, std::vector<uint8_t>& out) {
390
+ if (src == nil) return false;
391
+ id<MTLBuffer> buffer = src.buffer;
392
+ if (buffer == nil) return false;
393
+ const NSInteger count = src.count;
394
+ if (count <= 0) return false;
395
+ const NSUInteger offset = src.offset;
396
+ const NSUInteger stride = src.stride;
397
+ const uint8_t* contents =
398
+ reinterpret_cast<const uint8_t*>([buffer contents]);
399
+ if (contents == nullptr) return false;
400
+ const NSUInteger bufLen = [buffer length];
401
+
402
+ out.resize(static_cast<std::size_t>(count));
403
+ for (NSInteger i = 0; i < count; ++i) {
404
+ const NSUInteger elemOffset = offset + static_cast<NSUInteger>(i) * stride;
405
+ if (elemOffset + 1 > bufLen) {
406
+ out.clear();
407
+ return false;
408
+ }
409
+ out[static_cast<std::size_t>(i)] = contents[elemOffset];
410
+ }
411
+ return true;
412
+ }
413
+
414
+ /// Extract scene-reconstruction mesh anchors (`ARMeshAnchor`) into the
415
+ /// shared `ArAnchor` vector as `type="mesh"` entries. Each mesh anchor
416
+ /// carries an anchor->world transform (ROW-MAJOR — same transpose as
417
+ /// `ExtractARAnchors`) plus the marshalled `ARMeshGeometry`:
418
+ /// - meshVertices: Float32 xyz triplets (anchor-local), tightly packed.
419
+ /// - meshFaces: Uint32 triangle indices.
420
+ /// - meshClassifications: optional Uint8 per-face class.
421
+ ///
422
+ /// ARMeshGeometry's buffers are MTLBuffer-backed but CPU-accessible
423
+ /// (ARKit allocates them with shared storage), so we read `.contents()`
424
+ /// directly on the delegate thread (EAGER copy into owned vectors, like
425
+ /// the depth bytes) — the marshalled `ArAnchor` then has no dependency
426
+ /// on the ARFrame's lifetime. A mesh anchor whose vertices/faces fail
427
+ /// to marshal is SKIPPED (we never emit a `hasMesh=true` anchor with
428
+ /// empty geometry).
429
+ void ExtractARMesh(ARFrame* arFrame, retailens::CameraFrameData& data) {
430
+ NSArray<ARAnchor*>* anchors = arFrame.anchors;
431
+ for (ARAnchor* a in anchors) {
432
+ if (![a isKindOfClass:[ARMeshAnchor class]]) continue;
433
+ ARMeshAnchor* meshAnchor = (ARMeshAnchor*)a;
434
+ ARMeshGeometry* geometry = meshAnchor.geometry;
435
+ if (geometry == nil) continue;
436
+
437
+ std::vector<uint8_t> vertices;
438
+ if (!PackMeshVertices(geometry.vertices, vertices)) continue;
439
+ std::vector<uint8_t> faces;
440
+ if (!PackMeshFaces(geometry.faces, faces)) continue;
441
+
442
+ std::vector<uint8_t> classifications;
443
+ // Optional — leave empty if absent / unreadable.
444
+ if (geometry.classification != nil) {
445
+ if (!PackMeshClassifications(geometry.classification, classifications)) {
446
+ classifications.clear();
447
+ }
448
+ }
449
+
450
+ retailens::ArAnchor out;
451
+ out.id = std::string(a.identifier.UUIDString.UTF8String);
452
+ out.type = "mesh";
453
+ const simd_float4x4 m = a.transform;
454
+ for (int r = 0; r < 4; ++r) {
455
+ for (int c = 0; c < 4; ++c) {
456
+ out.transform[r * 4 + c] = static_cast<double>(m.columns[c][r]);
457
+ }
458
+ }
459
+ out.hasMesh = true;
460
+ out.meshVertices = std::move(vertices);
461
+ out.meshFaces = std::move(faces);
462
+ out.meshClassifications = std::move(classifications);
463
+ data.arAnchors.push_back(std::move(out));
464
+ }
465
+ }
466
+
467
+ } // anonymous namespace
468
+
469
+ #pragma mark - Obj-C facade
470
+
471
+ @implementation CameraFrameHostObject {
472
+ std::shared_ptr<retailens::CameraFrameJsiHostObject> _hostObject;
473
+ }
474
+
475
+ + (instancetype)fromARFrame:(ARFrame*)arFrame pose:(RNSARFramePose*)pose {
476
+ CameraFrameHostObject* obj = [[self alloc] init];
477
+
478
+ retailens::CameraFrameData data;
479
+ data.source = "ar";
480
+ data.width = static_cast<int32_t>(pose.imageWidth);
481
+ data.height = static_cast<int32_t>(pose.imageHeight);
482
+ // ARKit's `kCVPixelFormatType_420YpCbCr8BiPlanarFullRange` (NV12)
483
+ // is reported as "yuv". Other formats (rare in ARKit; possible if
484
+ // ARWorldTrackingConfiguration.videoFormat is overridden to BGRA)
485
+ // → "unknown" + os_log warning so worklets that gate on
486
+ // `pixelFormat === 'yuv'` can be debugged without a screen recording.
487
+ OSType pf = CVPixelBufferGetPixelFormatType(arFrame.capturedImage);
488
+ if (pf == kCVPixelFormatType_420YpCbCr8BiPlanarFullRange ||
489
+ pf == kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange) {
490
+ data.pixelFormat = "yuv";
491
+ } else {
492
+ data.pixelFormat = "unknown";
493
+ os_log_error(OS_LOG_DEFAULT,
494
+ "[StitcherFrame] unexpected ARKit pixel format 0x%x; "
495
+ "worklet receives pixelFormat='unknown' and toArrayBuffer() "
496
+ "bytes are first-plane only (layout undefined for unknown "
497
+ "formats). See StitcherFrame.ts docstring.", (unsigned int)pf);
498
+ }
499
+ // ARKit doesn't have a `Frame.orientation` per se; pose carries
500
+ // the imageWidth >= imageHeight discriminator the lib uses
501
+ // elsewhere (`isLandscape`). v0.8.0 ships a coarse mapping;
502
+ // worklets that need exact UI orientation can read it from
503
+ // device-orientation sensors.
504
+ data.orientation =
505
+ (pose.imageWidth >= pose.imageHeight) ? "landscape-right" : "portrait";
506
+ // `ARFrame.timestamp` is CFAbsoluteTime (seconds since epoch).
507
+ // Convert to ns to match vc Frame.timestamp.
508
+ data.timestampNs = arFrame.timestamp * 1e9;
509
+
510
+ data.qx = pose.qx;
511
+ data.qy = pose.qy;
512
+ data.qz = pose.qz;
513
+ data.qw = pose.qw;
514
+ data.tx = pose.tx;
515
+ data.ty = pose.ty;
516
+ data.tz = pose.tz;
517
+ data.hasTranslation = true; // AR mode always has translation
518
+
519
+ // Per-frame camera intrinsics. `pose` already carries them
520
+ // (`ARCamera.intrinsics` + `imageResolution`, marshalled in
521
+ // `RNSARSession.makePose`), so this is six scalars — effectively free.
522
+ // Always populated for AR frames; the JSI exposes
523
+ // `intrinsics === undefined` only for non-AR (vc) frames, which have
524
+ // no intrinsics surface. NOT gated on the extraction config — it's
525
+ // too cheap to be worth a toggle, and pose-lift consumers expect it.
526
+ data.hasIntrinsics = true;
527
+ data.fx = pose.fx;
528
+ data.fy = pose.fy;
529
+ data.cx = pose.cx;
530
+ data.cy = pose.cy;
531
+ data.intrinsicsImageWidth = static_cast<int32_t>(pose.imageWidth);
532
+ data.intrinsicsImageHeight = static_cast<int32_t>(pose.imageHeight);
533
+
534
+ switch (arFrame.camera.trackingState) {
535
+ case ARTrackingStateNotAvailable:
536
+ data.arTrackingState = "notAvailable";
537
+ break;
538
+ case ARTrackingStateLimited:
539
+ data.arTrackingState = "limited";
540
+ break;
541
+ case ARTrackingStateNormal:
542
+ data.arTrackingState = "normal";
543
+ break;
544
+ }
545
+
546
+ data.pixelReader = std::make_shared<IOSPixelBufferReader>(arFrame);
547
+
548
+ // AR depth + anchors + mesh. All EAGER-COPY out of the ARFrame here
549
+ // (depth/confidence bytes and mesh vertex/face/classification bytes
550
+ // are packed into owned vectors; anchor transforms read into
551
+ // std::array), so none depend on the ARFrame's lifetime the way the
552
+ // pixel reader does. Depth is nullopt on non-LiDAR devices / before
553
+ // the first depth frame; anchors/mesh are empty when none are tracked.
554
+ //
555
+ // GATED on the per-frame extraction config (set from JS via
556
+ // `__stitcherProxy.setExtractionConfig(depth, anchors, mesh)`,
557
+ // driven by the <Camera> enableDepth/enableAnchors/enableMesh
558
+ // props). Defaults are all-false, so a host that doesn't opt in
559
+ // pays ZERO arDepth/arAnchors/mesh extraction cost — only the
560
+ // always-cheap pose/tracking/pixels are populated. Read the snapshot
561
+ // once so all three extractors see a consistent config for this frame.
562
+ const retailens::ExtractionConfig extractionConfig =
563
+ retailens::getExtractionConfig();
564
+ if (extractionConfig.depth) {
565
+ ExtractARDepth(arFrame, data);
566
+ }
567
+ if (extractionConfig.anchors) {
568
+ ExtractARAnchors(arFrame, data);
569
+ }
570
+ if (extractionConfig.mesh) {
571
+ ExtractARMesh(arFrame, data);
572
+ }
573
+
574
+ // Use the static factory (private ctor enforces shared_ptr
575
+ // ownership — required for `shared_from_this()` inside the JSI
576
+ // `toArrayBuffer` lambda).
577
+ obj->_hostObject =
578
+ retailens::CameraFrameJsiHostObject::create(std::move(data));
579
+ return obj;
580
+ }
581
+
582
+ + (NSDictionary *)lightArFrameMetaFromARFrame:(ARFrame *)arFrame
583
+ pose:(RNSARFramePose *)pose {
584
+ // ── Always-present scalars: timestamp / trackingState / pose ──────────
585
+ //
586
+ // timestamp is NANOSECONDS (AR-framework monotonic clock) to match the
587
+ // ARFrameMeta TS contract + CameraFrame.timestampNs. ARFrame.timestamp
588
+ // is CFAbsoluteTime (seconds) → ×1e9.
589
+ NSMutableDictionary *meta = [NSMutableDictionary dictionary];
590
+ meta[@"timestamp"] = @(arFrame.timestamp * 1e9);
591
+
592
+ NSString *trackingState;
593
+ switch (arFrame.camera.trackingState) {
594
+ case ARTrackingStateNotAvailable: trackingState = @"notAvailable"; break;
595
+ case ARTrackingStateLimited: trackingState = @"limited"; break;
596
+ case ARTrackingStateNormal: trackingState = @"normal"; break;
597
+ default: trackingState = @"notAvailable"; break;
598
+ }
599
+ meta[@"trackingState"] = trackingState;
600
+
601
+ // pose: quaternion (x,y,z,w) + translation [x,y,z] — straight off the
602
+ // already-marshalled RNSARFramePose (no re-derivation from the matrix).
603
+ meta[@"pose"] = @{
604
+ @"rotation": @[ @(pose.qx), @(pose.qy), @(pose.qz), @(pose.qw) ],
605
+ @"translation": @[ @(pose.tx), @(pose.ty), @(pose.tz) ],
606
+ };
607
+
608
+ // intrinsics: always attempted; NSNull only when the frame reported a
609
+ // degenerate (zero) capture resolution (the TS contract's `null`).
610
+ if (pose.imageWidth > 0 && pose.imageHeight > 0) {
611
+ meta[@"intrinsics"] = @{
612
+ @"fx": @(pose.fx),
613
+ @"fy": @(pose.fy),
614
+ @"cx": @(pose.cx),
615
+ @"cy": @(pose.cy),
616
+ @"imageWidth": @(pose.imageWidth),
617
+ @"imageHeight": @(pose.imageHeight),
618
+ };
619
+ } else {
620
+ meta[@"intrinsics"] = [NSNull null];
621
+ }
622
+
623
+ // ── Gated, LIGHT fields: depth dims / anchors / mesh counts ───────────
624
+ //
625
+ // Same per-frame extraction config the full host-object path reads
626
+ // (set from JS via __stitcherProxy.setExtractionConfig, driven by the
627
+ // <Camera> enableDepth/enableAnchors/enableMesh props). Snapshot once
628
+ // so all three see a consistent config for this frame.
629
+ const retailens::ExtractionConfig cfg = retailens::getExtractionConfig();
630
+
631
+ // depth: dimensions + whether a confidence channel exists. NO pixel
632
+ // copy — just the depth map's own w/h and a confidenceMap != NULL probe.
633
+ // null when the prop is off OR the device produced no depth this frame.
634
+ id depthValue = [NSNull null];
635
+ if (cfg.depth) {
636
+ ARDepthData *dd = arFrame.sceneDepth;
637
+ if (dd == nil) dd = arFrame.smoothedSceneDepth;
638
+ if (dd != nil) {
639
+ CVPixelBufferRef depthMap = dd.depthMap;
640
+ if (depthMap != NULL) {
641
+ const int w = (int)CVPixelBufferGetWidth(depthMap);
642
+ const int h = (int)CVPixelBufferGetHeight(depthMap);
643
+ if (w > 0 && h > 0) {
644
+ depthValue = @{
645
+ @"width": @(w),
646
+ @"height": @(h),
647
+ @"hasConfidence": @(dd.confidenceMap != NULL),
648
+ };
649
+ }
650
+ }
651
+ }
652
+ }
653
+ meta[@"depth"] = depthValue;
654
+
655
+ // anchors: id / coarse type / row-major 4x4 transform, plus plane
656
+ // alignment + extent + (capable-device) classification. Mirrors
657
+ // ExtractARAnchors above but into an NSDictionary array (no byte
658
+ // marshaling — that path is for the full host object). Empty array
659
+ // when the prop is off (cheap + JSON-stable, matching the TS contract's
660
+ // `Array<...>` rather than null). Mesh anchors are summarised under
661
+ // `mesh` (counts) below, NOT listed individually here unless enableMesh
662
+ // is off — to match Android's collectTrackingAnchors which surfaces
663
+ // plane/image anchors and emits mesh as a separate summary.
664
+ NSMutableArray *anchorsOut = [NSMutableArray array];
665
+ if (cfg.anchors) {
666
+ for (ARAnchor *a in arFrame.anchors) {
667
+ // ARMeshAnchors are summarised under `mesh`; skip here.
668
+ if ([a isKindOfClass:[ARMeshAnchor class]]) continue;
669
+
670
+ NSMutableDictionary *anchor = [NSMutableDictionary dictionary];
671
+ anchor[@"id"] = a.identifier.UUIDString;
672
+
673
+ if ([a isKindOfClass:[ARPlaneAnchor class]]) {
674
+ anchor[@"type"] = @"plane";
675
+ ARPlaneAnchor *plane = (ARPlaneAnchor *)a;
676
+ anchor[@"alignment"] =
677
+ (plane.alignment == ARPlaneAnchorAlignmentVertical) ? @"vertical"
678
+ : @"horizontal";
679
+ // [extentX, extentZ] in plane-local metres (deprecated `extent`
680
+ // for iOS-15 parity, same as ExtractARAnchors).
681
+ anchor[@"extent"] = @[ @(plane.extent.x), @(plane.extent.z) ];
682
+ if (ARPlaneAnchor.isClassificationSupported &&
683
+ plane.classificationStatus == ARPlaneClassificationStatusKnown) {
684
+ std::string cls = PlaneClassificationString(plane.classification);
685
+ if (!cls.empty()) {
686
+ anchor[@"classification"] =
687
+ [NSString stringWithUTF8String:cls.c_str()];
688
+ }
689
+ }
690
+ } else if ([a isKindOfClass:[ARImageAnchor class]]) {
691
+ anchor[@"type"] = @"image";
692
+ } else {
693
+ anchor[@"type"] = @"point";
694
+ }
695
+
696
+ // Row-major anchor->world (transpose ARKit's column-major matrix),
697
+ // 16 NSNumbers — same transpose as ExtractARAnchors.
698
+ const simd_float4x4 m = a.transform;
699
+ NSMutableArray *transform = [NSMutableArray arrayWithCapacity:16];
700
+ for (int r = 0; r < 4; ++r) {
701
+ for (int c = 0; c < 4; ++c) {
702
+ [transform addObject:@((double)m.columns[c][r])];
703
+ }
704
+ }
705
+ anchor[@"transform"] = transform;
706
+ [anchorsOut addObject:anchor];
707
+ }
708
+ }
709
+ meta[@"anchors"] = anchorsOut;
710
+
711
+ // mesh: anchor / vertex / face COUNTS only (no vertex/face byte
712
+ // marshaling). null when the prop is off. Counts are read from each
713
+ // ARMeshAnchor's geometry sources without touching the MTLBuffer
714
+ // contents (just `.count` on the vertices source + faces element).
715
+ id meshValue = [NSNull null];
716
+ if (cfg.mesh) {
717
+ int anchorCount = 0;
718
+ long vertexCount = 0;
719
+ long faceCount = 0;
720
+ for (ARAnchor *a in arFrame.anchors) {
721
+ if (![a isKindOfClass:[ARMeshAnchor class]]) continue;
722
+ ARMeshAnchor *meshAnchor = (ARMeshAnchor *)a;
723
+ ARMeshGeometry *geometry = meshAnchor.geometry;
724
+ if (geometry == nil) continue;
725
+ anchorCount += 1;
726
+ if (geometry.vertices != nil) {
727
+ vertexCount += (long)geometry.vertices.count;
728
+ }
729
+ if (geometry.faces != nil) {
730
+ faceCount += (long)geometry.faces.count; // triangle (primitive) count
731
+ }
732
+ }
733
+ meshValue = @{
734
+ @"anchorCount": @(anchorCount),
735
+ @"vertexCount": @(vertexCount),
736
+ @"faceCount": @(faceCount),
737
+ };
738
+ }
739
+ meta[@"mesh"] = meshValue;
740
+
741
+ return meta;
742
+ }
743
+
744
+ - (void)invalidate {
745
+ if (_hostObject) {
746
+ _hostObject->invalidate();
747
+ }
748
+ }
749
+
750
+ - (void*)jsiHostObjectPtr {
751
+ if (!_hostObject) return NULL;
752
+ // Box a heap-allocated copy of the shared_ptr to the abstract
753
+ // `jsi::HostObject` base. Caller (worklet runtime) does:
754
+ // auto sp = static_cast<std::shared_ptr<jsi::HostObject>*>(ptr);
755
+ // auto jsObj = jsi::Object::createFromHostObject(rt, *sp);
756
+ // delete sp;
757
+ return new std::shared_ptr<jsi::HostObject>(_hostObject);
758
+ }
759
+
760
+ @end