@srsergio/taptapp-ar 1.0.82 → 1.0.83
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/compiler/controller.d.ts +1 -0
- package/dist/compiler/controller.js +1 -1
- package/dist/compiler/matching/matching.js +19 -2
- package/dist/compiler/offline-compiler.d.ts +3 -0
- package/dist/compiler/offline-compiler.js +12 -0
- package/dist/compiler/simple-ar.d.ts +8 -0
- package/dist/compiler/simple-ar.js +33 -3
- package/dist/compiler/utils/fourier-encoder.d.ts +25 -0
- package/dist/compiler/utils/fourier-encoder.js +47 -0
- package/dist/react/use-ar.js +2 -1
- package/package.json +1 -1
- package/src/compiler/controller.ts +1 -1
- package/src/compiler/matching/matching.js +21 -2
- package/src/compiler/offline-compiler.ts +13 -0
- package/src/compiler/simple-ar.ts +39 -5
- package/src/compiler/utils/fourier-encoder.ts +53 -0
- package/src/react/use-ar.ts +3 -2
|
@@ -62,6 +62,7 @@ declare class Controller {
|
|
|
62
62
|
_detectAndMatch(inputData: any, targetIndexes: number[]): Promise<{
|
|
63
63
|
targetIndex: any;
|
|
64
64
|
modelViewTransform: any;
|
|
65
|
+
featurePoints: any[];
|
|
65
66
|
}>;
|
|
66
67
|
_trackAndUpdate(inputData: any, lastModelViewTransform: number[][], targetIndex: number): Promise<{
|
|
67
68
|
modelViewTransform: any;
|
|
@@ -185,7 +185,7 @@ class Controller {
|
|
|
185
185
|
async _detectAndMatch(inputData, targetIndexes) {
|
|
186
186
|
const { featurePoints } = this.fullDetector.detect(inputData);
|
|
187
187
|
const { targetIndex: matchedTargetIndex, modelViewTransform } = await this._workerMatch(featurePoints, targetIndexes);
|
|
188
|
-
return { targetIndex: matchedTargetIndex, modelViewTransform };
|
|
188
|
+
return { targetIndex: matchedTargetIndex, modelViewTransform, featurePoints };
|
|
189
189
|
}
|
|
190
190
|
async _trackAndUpdate(inputData, lastModelViewTransform, targetIndex) {
|
|
191
191
|
const { worldCoords, screenCoords, reliabilities, indices = [], octaveIndex = 0 } = this.tracker.track(inputData, lastModelViewTransform, targetIndex);
|
|
@@ -3,6 +3,8 @@ import { compute as hammingCompute } from "./hamming-distance.js";
|
|
|
3
3
|
import { computeHoughMatches } from "./hough.js";
|
|
4
4
|
import { computeHomography } from "./ransacHomography.js";
|
|
5
5
|
import { multiplyPointHomographyInhomogenous, matrixInverse33 } from "../utils/geometry.js";
|
|
6
|
+
import { FourierEncoder } from "../utils/fourier-encoder.js";
|
|
7
|
+
const encoder = new FourierEncoder(4);
|
|
6
8
|
const INLIER_THRESHOLD = 5.0; // Tightened from 10 to 5 for better precision
|
|
7
9
|
const MIN_NUM_INLIERS = 8; // Restored to 8
|
|
8
10
|
const CLUSTER_MAX_POP = 20;
|
|
@@ -94,7 +96,7 @@ const match = ({ keyframe, querypoints, querywidth, queryheight, debugMode }) =>
|
|
|
94
96
|
return { debugExtra };
|
|
95
97
|
// Second pass with homography guided matching
|
|
96
98
|
const HInv = matrixInverse33(H, 0.00001);
|
|
97
|
-
const dThreshold2 =
|
|
99
|
+
const dThreshold2 = 400; // 20 * 20 - Expanded search window thanks to Fourier filtering
|
|
98
100
|
const matches2 = [];
|
|
99
101
|
const hi00 = HInv[0], hi01 = HInv[1], hi02 = HInv[2];
|
|
100
102
|
const hi10 = HInv[3], hi11 = HInv[4], hi12 = HInv[5];
|
|
@@ -113,14 +115,29 @@ const match = ({ keyframe, querypoints, querywidth, queryheight, debugMode }) =>
|
|
|
113
115
|
const col = querypoint.maxima ? kmax : kmin;
|
|
114
116
|
if (!col)
|
|
115
117
|
continue;
|
|
116
|
-
const cx = col.x, cy = col.y, cd = col.d;
|
|
118
|
+
const cx = col.x, cy = col.y, cd = col.d, cf = col.f;
|
|
117
119
|
const qDesc = querypoint.descriptors;
|
|
120
|
+
// Fourier encoding of the mapped point (where it SHOULD be in the keyframe)
|
|
121
|
+
const qFourier = encoder.encode(mapX / keyframe.w, mapY / keyframe.h);
|
|
118
122
|
for (let k = 0, clen = cx.length; k < clen; k++) {
|
|
119
123
|
const dx = cx[k] - mapX;
|
|
120
124
|
const dy = cy[k] - mapY;
|
|
121
125
|
const d2 = dx * dx + dy * dy;
|
|
122
126
|
if (d2 > dThreshold2)
|
|
123
127
|
continue;
|
|
128
|
+
// 🚀 MOONSHOT: Fourier Spatial Harmony Check
|
|
129
|
+
// We check if the stored point's Fourier signature matches its predicted position
|
|
130
|
+
let fourierSim = 0;
|
|
131
|
+
if (cf) {
|
|
132
|
+
for (let fidx = 0; fidx < 16; fidx++) {
|
|
133
|
+
fourierSim += (cf[k * 16 + fidx] / 127) * qFourier[fidx];
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
else {
|
|
137
|
+
fourierSim = 16; // Backward compatibility
|
|
138
|
+
}
|
|
139
|
+
if (fourierSim < 8)
|
|
140
|
+
continue; // Reject if spatially dissonant (low harmonic match)
|
|
124
141
|
const d = hammingCompute({ v1: cd, v1Offset: k * descSize, v2: qDesc });
|
|
125
142
|
if (d < bestD1) {
|
|
126
143
|
bestD2 = bestD1;
|
|
@@ -5,8 +5,10 @@
|
|
|
5
5
|
* que NO depende de TensorFlow, eliminando todos los problemas de
|
|
6
6
|
* inicialización, bloqueos y compatibilidad.
|
|
7
7
|
*/
|
|
8
|
+
import { FourierEncoder } from "./utils/fourier-encoder.js";
|
|
8
9
|
export declare class OfflineCompiler {
|
|
9
10
|
data: any;
|
|
11
|
+
fourierEncoder: FourierEncoder;
|
|
10
12
|
constructor();
|
|
11
13
|
compileImageTargets(images: any[], progressCallback: (p: number) => void): Promise<any>;
|
|
12
14
|
_compileTarget(targetImages: any[], progressCallback: (p: number) => void): Promise<{
|
|
@@ -117,6 +119,7 @@ export declare class OfflineCompiler {
|
|
|
117
119
|
a: Int16Array<ArrayBuffer>;
|
|
118
120
|
s: Uint8Array<ArrayBuffer>;
|
|
119
121
|
d: Uint32Array<ArrayBuffer>;
|
|
122
|
+
f: Int8Array<ArrayBuffer>;
|
|
120
123
|
t: any;
|
|
121
124
|
};
|
|
122
125
|
_compactTree(node: any): any;
|
|
@@ -9,6 +9,7 @@ import { buildTrackingImageList, buildImageList } from "./image-list.js";
|
|
|
9
9
|
import { extractTrackingFeatures } from "./tracker/extract-utils.js";
|
|
10
10
|
import { DetectorLite } from "./detector/detector-lite.js";
|
|
11
11
|
import { build as hierarchicalClusteringBuild } from "./matching/hierarchical-clustering.js";
|
|
12
|
+
import { FourierEncoder } from "./utils/fourier-encoder.js";
|
|
12
13
|
import * as msgpack from "@msgpack/msgpack";
|
|
13
14
|
// Detect environment
|
|
14
15
|
const isNode = typeof process !== "undefined" &&
|
|
@@ -17,6 +18,7 @@ const isNode = typeof process !== "undefined" &&
|
|
|
17
18
|
const CURRENT_VERSION = 7; // Protocol v7: Moonshot - 4-bit Packed Tracking Data
|
|
18
19
|
export class OfflineCompiler {
|
|
19
20
|
data = null;
|
|
21
|
+
fourierEncoder = new FourierEncoder(4);
|
|
20
22
|
constructor() {
|
|
21
23
|
console.log("⚡ OfflineCompiler: Main thread mode (no workers)");
|
|
22
24
|
}
|
|
@@ -186,6 +188,7 @@ export class OfflineCompiler {
|
|
|
186
188
|
const angle = new Int16Array(count);
|
|
187
189
|
const scale = new Uint8Array(count);
|
|
188
190
|
const descriptors = new Uint32Array(count * 2);
|
|
191
|
+
const fourier = new Int8Array(count * 16); // 4 frequencies * 4 components (sin/cos x/y)
|
|
189
192
|
for (let i = 0; i < count; i++) {
|
|
190
193
|
x[i] = Math.round((points[i].x / width) * 65535);
|
|
191
194
|
y[i] = Math.round((points[i].y / height) * 65535);
|
|
@@ -195,6 +198,11 @@ export class OfflineCompiler {
|
|
|
195
198
|
descriptors[i * 2] = points[i].descriptors[0];
|
|
196
199
|
descriptors[(i * 2) + 1] = points[i].descriptors[1];
|
|
197
200
|
}
|
|
201
|
+
// 🚀 MOONSHOT: Fourier Positional Encoding
|
|
202
|
+
const feat = this.fourierEncoder.encode(points[i].x / width, points[i].y / height);
|
|
203
|
+
for (let j = 0; j < 16; j++) {
|
|
204
|
+
fourier[i * 16 + j] = Math.round(feat[j] * 127);
|
|
205
|
+
}
|
|
198
206
|
}
|
|
199
207
|
return {
|
|
200
208
|
x,
|
|
@@ -202,6 +210,7 @@ export class OfflineCompiler {
|
|
|
202
210
|
a: angle,
|
|
203
211
|
s: scale,
|
|
204
212
|
d: descriptors,
|
|
213
|
+
f: fourier,
|
|
205
214
|
t: this._compactTree(tree.rootNode),
|
|
206
215
|
};
|
|
207
216
|
}
|
|
@@ -278,6 +287,9 @@ export class OfflineCompiler {
|
|
|
278
287
|
if (col.d instanceof Uint8Array) {
|
|
279
288
|
col.d = new Uint32Array(col.d.buffer.slice(col.d.byteOffset, col.d.byteOffset + col.d.byteLength));
|
|
280
289
|
}
|
|
290
|
+
if (col.f instanceof Uint8Array) {
|
|
291
|
+
col.f = new Int8Array(col.f.buffer.slice(col.f.byteOffset, col.f.byteOffset + col.f.byteLength));
|
|
292
|
+
}
|
|
281
293
|
}
|
|
282
294
|
}
|
|
283
295
|
}
|
|
@@ -23,6 +23,10 @@ export interface SimpleAROptions {
|
|
|
23
23
|
}[];
|
|
24
24
|
reliabilities?: number[];
|
|
25
25
|
stabilities?: number[];
|
|
26
|
+
detectionPoints?: {
|
|
27
|
+
x: number;
|
|
28
|
+
y: number;
|
|
29
|
+
}[];
|
|
26
30
|
}) => void) | null;
|
|
27
31
|
cameraConfig?: MediaStreamConstraints['video'];
|
|
28
32
|
debug?: boolean;
|
|
@@ -47,6 +51,10 @@ declare class SimpleAR {
|
|
|
47
51
|
}[];
|
|
48
52
|
reliabilities?: number[];
|
|
49
53
|
stabilities?: number[];
|
|
54
|
+
detectionPoints?: {
|
|
55
|
+
x: number;
|
|
56
|
+
y: number;
|
|
57
|
+
}[];
|
|
50
58
|
}) => void) | null;
|
|
51
59
|
cameraConfig: MediaStreamConstraints['video'];
|
|
52
60
|
debug: boolean;
|
|
@@ -113,7 +113,7 @@ class SimpleAR {
|
|
|
113
113
|
if (this.debug)
|
|
114
114
|
this._updateDebugPanel(this.isTracking);
|
|
115
115
|
}
|
|
116
|
-
const { targetIndex, worldMatrix, modelViewTransform, screenCoords, reliabilities, stabilities } = data;
|
|
116
|
+
const { targetIndex, worldMatrix, modelViewTransform, screenCoords, reliabilities, stabilities, detectionPoints } = data;
|
|
117
117
|
// Project points to screen coordinates
|
|
118
118
|
let projectedPoints = [];
|
|
119
119
|
if (screenCoords && screenCoords.length > 0) {
|
|
@@ -144,6 +144,35 @@ class SimpleAR {
|
|
|
144
144
|
return { x: sx, y: sy };
|
|
145
145
|
});
|
|
146
146
|
}
|
|
147
|
+
let projectedDetectionPoints = [];
|
|
148
|
+
if (detectionPoints && detectionPoints.length > 0) {
|
|
149
|
+
const containerRect = this.container.getBoundingClientRect();
|
|
150
|
+
const videoW = this.video.videoWidth;
|
|
151
|
+
const videoH = this.video.videoHeight;
|
|
152
|
+
const isPortrait = containerRect.height > containerRect.width;
|
|
153
|
+
const isVideoLandscape = videoW > videoH;
|
|
154
|
+
const needsRotation = isPortrait && isVideoLandscape;
|
|
155
|
+
const proj = this.controller.projectionTransform;
|
|
156
|
+
const vW = needsRotation ? videoH : videoW;
|
|
157
|
+
const vH = needsRotation ? videoW : videoH;
|
|
158
|
+
const pScale = Math.max(containerRect.width / vW, containerRect.height / vH);
|
|
159
|
+
const dW = vW * pScale;
|
|
160
|
+
const dH = vH * pScale;
|
|
161
|
+
const oX = (containerRect.width - dW) / 2;
|
|
162
|
+
const oY = (containerRect.height - dH) / 2;
|
|
163
|
+
projectedDetectionPoints = detectionPoints.map((p) => {
|
|
164
|
+
let sx, sy;
|
|
165
|
+
if (needsRotation) {
|
|
166
|
+
sx = oX + (dW / 2) - (p.y - proj[1][2]) * pScale;
|
|
167
|
+
sy = oY + (dH / 2) + (p.x - proj[0][2]) * pScale;
|
|
168
|
+
}
|
|
169
|
+
else {
|
|
170
|
+
sx = oX + (dW / 2) + (p.x - proj[0][2]) * pScale;
|
|
171
|
+
sy = oY + (dH / 2) + (p.y - proj[1][2]) * pScale;
|
|
172
|
+
}
|
|
173
|
+
return { x: sx, y: sy };
|
|
174
|
+
});
|
|
175
|
+
}
|
|
147
176
|
if (worldMatrix) {
|
|
148
177
|
if (!this.isTracking) {
|
|
149
178
|
this.isTracking = true;
|
|
@@ -161,13 +190,14 @@ class SimpleAR {
|
|
|
161
190
|
}
|
|
162
191
|
}
|
|
163
192
|
// Always notify the callback if we have points, or if we just lost tracking
|
|
164
|
-
if (projectedPoints.length > 0 || (worldMatrix === null && data.type === 'updateMatrix')) {
|
|
193
|
+
if (projectedPoints.length > 0 || projectedDetectionPoints.length > 0 || (worldMatrix === null && data.type === 'updateMatrix')) {
|
|
165
194
|
this.onUpdateCallback && this.onUpdateCallback({
|
|
166
195
|
targetIndex,
|
|
167
196
|
worldMatrix,
|
|
168
197
|
screenCoords: projectedPoints,
|
|
169
198
|
reliabilities: reliabilities || [],
|
|
170
|
-
stabilities: stabilities || []
|
|
199
|
+
stabilities: stabilities || [],
|
|
200
|
+
detectionPoints: projectedDetectionPoints
|
|
171
201
|
});
|
|
172
202
|
}
|
|
173
203
|
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 🚀 Moonshot: Fourier Positional Encoding
|
|
3
|
+
*
|
|
4
|
+
* Maps 2D coordinates (x, y) to a high-dimensional frequency space.
|
|
5
|
+
* Used in Transformer Positional Encoding, NeRFs, and modern Generative AI.
|
|
6
|
+
*
|
|
7
|
+
* Theory: gamma(p) = [sin(2^0 * pi * p), cos(2^0 * pi * p), ..., sin(2^L-1 * pi * p), cos(2^L-1 * pi * p)]
|
|
8
|
+
*/
|
|
9
|
+
export declare class FourierEncoder {
|
|
10
|
+
private frequencies;
|
|
11
|
+
private L;
|
|
12
|
+
constructor(L?: number);
|
|
13
|
+
/**
|
|
14
|
+
* Encodes a normalized coordinate (0-1) into Fourier features
|
|
15
|
+
* @param x Normalized X
|
|
16
|
+
* @param y Normalized Y
|
|
17
|
+
* @returns Float32Array of size 4 * L
|
|
18
|
+
*/
|
|
19
|
+
encode(x: number, y: number): Float32Array;
|
|
20
|
+
/**
|
|
21
|
+
* Fast dot product between two fourier encodings
|
|
22
|
+
* This measures "harmonic spatial similarity"
|
|
23
|
+
*/
|
|
24
|
+
static similarity(v1: Float32Array, v2: Float32Array): number;
|
|
25
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 🚀 Moonshot: Fourier Positional Encoding
|
|
3
|
+
*
|
|
4
|
+
* Maps 2D coordinates (x, y) to a high-dimensional frequency space.
|
|
5
|
+
* Used in Transformer Positional Encoding, NeRFs, and modern Generative AI.
|
|
6
|
+
*
|
|
7
|
+
* Theory: gamma(p) = [sin(2^0 * pi * p), cos(2^0 * pi * p), ..., sin(2^L-1 * pi * p), cos(2^L-1 * pi * p)]
|
|
8
|
+
*/
|
|
9
|
+
export class FourierEncoder {
|
|
10
|
+
frequencies;
|
|
11
|
+
L;
|
|
12
|
+
constructor(L = 4) {
|
|
13
|
+
this.L = L;
|
|
14
|
+
this.frequencies = [];
|
|
15
|
+
for (let i = 0; i < L; i++) {
|
|
16
|
+
this.frequencies.push(Math.pow(2, i) * Math.PI);
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Encodes a normalized coordinate (0-1) into Fourier features
|
|
21
|
+
* @param x Normalized X
|
|
22
|
+
* @param y Normalized Y
|
|
23
|
+
* @returns Float32Array of size 4 * L
|
|
24
|
+
*/
|
|
25
|
+
encode(x, y) {
|
|
26
|
+
const result = new Float32Array(this.L * 4);
|
|
27
|
+
let idx = 0;
|
|
28
|
+
for (const freq of this.frequencies) {
|
|
29
|
+
result[idx++] = Math.sin(freq * x);
|
|
30
|
+
result[idx++] = Math.cos(freq * x);
|
|
31
|
+
result[idx++] = Math.sin(freq * y);
|
|
32
|
+
result[idx++] = Math.cos(freq * y);
|
|
33
|
+
}
|
|
34
|
+
return result;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Fast dot product between two fourier encodings
|
|
38
|
+
* This measures "harmonic spatial similarity"
|
|
39
|
+
*/
|
|
40
|
+
static similarity(v1, v2) {
|
|
41
|
+
let dot = 0;
|
|
42
|
+
for (let i = 0; i < v1.length; i++) {
|
|
43
|
+
dot += v1[i] * v2[i];
|
|
44
|
+
}
|
|
45
|
+
return dot / (v1.length / 2); // Normalize by number of components
|
|
46
|
+
}
|
|
47
|
+
}
|
package/dist/react/use-ar.js
CHANGED
|
@@ -40,7 +40,8 @@ export const useAR = (config) => {
|
|
|
40
40
|
overlay: overlayRef.current,
|
|
41
41
|
scale: config.scale,
|
|
42
42
|
debug: false,
|
|
43
|
-
onUpdate: (
|
|
43
|
+
onUpdate: (data) => {
|
|
44
|
+
const { screenCoords, reliabilities, stabilities } = data;
|
|
44
45
|
if (screenCoords && reliabilities && stabilities) {
|
|
45
46
|
const points = screenCoords.map((p, i) => ({
|
|
46
47
|
x: p.x,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@srsergio/taptapp-ar",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.83",
|
|
4
4
|
"description": "Ultra-fast Augmented Reality (AR) SDK for Node.js and Browser. Image tracking with 100% pure JavaScript, zero-dependencies, and high-performance compilation.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"augmented reality",
|
|
@@ -252,7 +252,7 @@ class Controller {
|
|
|
252
252
|
featurePoints,
|
|
253
253
|
targetIndexes,
|
|
254
254
|
);
|
|
255
|
-
return { targetIndex: matchedTargetIndex, modelViewTransform };
|
|
255
|
+
return { targetIndex: matchedTargetIndex, modelViewTransform, featurePoints };
|
|
256
256
|
}
|
|
257
257
|
|
|
258
258
|
async _trackAndUpdate(inputData: any, lastModelViewTransform: number[][], targetIndex: number) {
|
|
@@ -3,6 +3,9 @@ import { compute as hammingCompute } from "./hamming-distance.js";
|
|
|
3
3
|
import { computeHoughMatches } from "./hough.js";
|
|
4
4
|
import { computeHomography } from "./ransacHomography.js";
|
|
5
5
|
import { multiplyPointHomographyInhomogenous, matrixInverse33 } from "../utils/geometry.js";
|
|
6
|
+
import { FourierEncoder } from "../utils/fourier-encoder.js";
|
|
7
|
+
|
|
8
|
+
const encoder = new FourierEncoder(4);
|
|
6
9
|
|
|
7
10
|
const INLIER_THRESHOLD = 5.0; // Tightened from 10 to 5 for better precision
|
|
8
11
|
const MIN_NUM_INLIERS = 8; // Restored to 8
|
|
@@ -108,7 +111,7 @@ const match = ({ keyframe, querypoints, querywidth, queryheight, debugMode }) =>
|
|
|
108
111
|
|
|
109
112
|
// Second pass with homography guided matching
|
|
110
113
|
const HInv = matrixInverse33(H, 0.00001);
|
|
111
|
-
const dThreshold2 =
|
|
114
|
+
const dThreshold2 = 400; // 20 * 20 - Expanded search window thanks to Fourier filtering
|
|
112
115
|
const matches2 = [];
|
|
113
116
|
|
|
114
117
|
const hi00 = HInv[0], hi01 = HInv[1], hi02 = HInv[2];
|
|
@@ -132,9 +135,12 @@ const match = ({ keyframe, querypoints, querywidth, queryheight, debugMode }) =>
|
|
|
132
135
|
const col = querypoint.maxima ? kmax : kmin;
|
|
133
136
|
if (!col) continue;
|
|
134
137
|
|
|
135
|
-
const cx = col.x, cy = col.y, cd = col.d;
|
|
138
|
+
const cx = col.x, cy = col.y, cd = col.d, cf = col.f;
|
|
136
139
|
const qDesc = querypoint.descriptors;
|
|
137
140
|
|
|
141
|
+
// Fourier encoding of the mapped point (where it SHOULD be in the keyframe)
|
|
142
|
+
const qFourier = encoder.encode(mapX / keyframe.w, mapY / keyframe.h);
|
|
143
|
+
|
|
138
144
|
for (let k = 0, clen = cx.length; k < clen; k++) {
|
|
139
145
|
const dx = cx[k] - mapX;
|
|
140
146
|
const dy = cy[k] - mapY;
|
|
@@ -142,6 +148,19 @@ const match = ({ keyframe, querypoints, querywidth, queryheight, debugMode }) =>
|
|
|
142
148
|
|
|
143
149
|
if (d2 > dThreshold2) continue;
|
|
144
150
|
|
|
151
|
+
// 🚀 MOONSHOT: Fourier Spatial Harmony Check
|
|
152
|
+
// We check if the stored point's Fourier signature matches its predicted position
|
|
153
|
+
let fourierSim = 0;
|
|
154
|
+
if (cf) {
|
|
155
|
+
for (let fidx = 0; fidx < 16; fidx++) {
|
|
156
|
+
fourierSim += (cf[k * 16 + fidx] / 127) * qFourier[fidx];
|
|
157
|
+
}
|
|
158
|
+
} else {
|
|
159
|
+
fourierSim = 16; // Backward compatibility
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (fourierSim < 8) continue; // Reject if spatially dissonant (low harmonic match)
|
|
163
|
+
|
|
145
164
|
const d = hammingCompute({ v1: cd, v1Offset: k * descSize, v2: qDesc });
|
|
146
165
|
|
|
147
166
|
if (d < bestD1) {
|
|
@@ -10,6 +10,7 @@ import { buildTrackingImageList, buildImageList } from "./image-list.js";
|
|
|
10
10
|
import { extractTrackingFeatures } from "./tracker/extract-utils.js";
|
|
11
11
|
import { DetectorLite } from "./detector/detector-lite.js";
|
|
12
12
|
import { build as hierarchicalClusteringBuild } from "./matching/hierarchical-clustering.js";
|
|
13
|
+
import { FourierEncoder } from "./utils/fourier-encoder.js";
|
|
13
14
|
import * as msgpack from "@msgpack/msgpack";
|
|
14
15
|
|
|
15
16
|
// Detect environment
|
|
@@ -21,6 +22,7 @@ const CURRENT_VERSION = 7; // Protocol v7: Moonshot - 4-bit Packed Tracking Data
|
|
|
21
22
|
|
|
22
23
|
export class OfflineCompiler {
|
|
23
24
|
data: any = null;
|
|
25
|
+
fourierEncoder = new FourierEncoder(4);
|
|
24
26
|
|
|
25
27
|
constructor() {
|
|
26
28
|
console.log("⚡ OfflineCompiler: Main thread mode (no workers)");
|
|
@@ -230,6 +232,7 @@ export class OfflineCompiler {
|
|
|
230
232
|
const angle = new Int16Array(count);
|
|
231
233
|
const scale = new Uint8Array(count);
|
|
232
234
|
const descriptors = new Uint32Array(count * 2);
|
|
235
|
+
const fourier = new Int8Array(count * 16); // 4 frequencies * 4 components (sin/cos x/y)
|
|
233
236
|
|
|
234
237
|
for (let i = 0; i < count; i++) {
|
|
235
238
|
x[i] = Math.round((points[i].x / width) * 65535);
|
|
@@ -241,6 +244,12 @@ export class OfflineCompiler {
|
|
|
241
244
|
descriptors[i * 2] = points[i].descriptors[0];
|
|
242
245
|
descriptors[(i * 2) + 1] = points[i].descriptors[1];
|
|
243
246
|
}
|
|
247
|
+
|
|
248
|
+
// 🚀 MOONSHOT: Fourier Positional Encoding
|
|
249
|
+
const feat = this.fourierEncoder.encode(points[i].x / width, points[i].y / height);
|
|
250
|
+
for (let j = 0; j < 16; j++) {
|
|
251
|
+
fourier[i * 16 + j] = Math.round(feat[j] * 127);
|
|
252
|
+
}
|
|
244
253
|
}
|
|
245
254
|
|
|
246
255
|
return {
|
|
@@ -249,6 +258,7 @@ export class OfflineCompiler {
|
|
|
249
258
|
a: angle,
|
|
250
259
|
s: scale,
|
|
251
260
|
d: descriptors,
|
|
261
|
+
f: fourier,
|
|
252
262
|
t: this._compactTree(tree.rootNode),
|
|
253
263
|
};
|
|
254
264
|
}
|
|
@@ -337,6 +347,9 @@ export class OfflineCompiler {
|
|
|
337
347
|
if (col.d instanceof Uint8Array) {
|
|
338
348
|
col.d = new Uint32Array(col.d.buffer.slice(col.d.byteOffset, col.d.byteOffset + col.d.byteLength));
|
|
339
349
|
}
|
|
350
|
+
if (col.f instanceof Uint8Array) {
|
|
351
|
+
col.f = new Int8Array(col.f.buffer.slice(col.f.byteOffset, col.f.byteOffset + col.f.byteLength));
|
|
352
|
+
}
|
|
340
353
|
}
|
|
341
354
|
}
|
|
342
355
|
}
|
|
@@ -18,7 +18,8 @@ export interface SimpleAROptions {
|
|
|
18
18
|
worldMatrix: number[],
|
|
19
19
|
screenCoords?: { x: number, y: number }[],
|
|
20
20
|
reliabilities?: number[],
|
|
21
|
-
stabilities?: number[]
|
|
21
|
+
stabilities?: number[],
|
|
22
|
+
detectionPoints?: { x: number, y: number }[]
|
|
22
23
|
}) => void) | null;
|
|
23
24
|
cameraConfig?: MediaStreamConstraints['video'];
|
|
24
25
|
debug?: boolean;
|
|
@@ -36,7 +37,8 @@ class SimpleAR {
|
|
|
36
37
|
worldMatrix: number[],
|
|
37
38
|
screenCoords?: { x: number, y: number }[],
|
|
38
39
|
reliabilities?: number[],
|
|
39
|
-
stabilities?: number[]
|
|
40
|
+
stabilities?: number[],
|
|
41
|
+
detectionPoints?: { x: number, y: number }[]
|
|
40
42
|
}) => void) | null;
|
|
41
43
|
cameraConfig: MediaStreamConstraints['video'];
|
|
42
44
|
debug: boolean;
|
|
@@ -163,7 +165,7 @@ class SimpleAR {
|
|
|
163
165
|
if (this.debug) this._updateDebugPanel(this.isTracking);
|
|
164
166
|
}
|
|
165
167
|
|
|
166
|
-
const { targetIndex, worldMatrix, modelViewTransform, screenCoords, reliabilities, stabilities } = data;
|
|
168
|
+
const { targetIndex, worldMatrix, modelViewTransform, screenCoords, reliabilities, stabilities, detectionPoints } = data;
|
|
167
169
|
|
|
168
170
|
// Project points to screen coordinates
|
|
169
171
|
let projectedPoints = [];
|
|
@@ -197,6 +199,37 @@ class SimpleAR {
|
|
|
197
199
|
});
|
|
198
200
|
}
|
|
199
201
|
|
|
202
|
+
let projectedDetectionPoints = [];
|
|
203
|
+
if (detectionPoints && detectionPoints.length > 0) {
|
|
204
|
+
const containerRect = this.container.getBoundingClientRect();
|
|
205
|
+
const videoW = this.video!.videoWidth;
|
|
206
|
+
const videoH = this.video!.videoHeight;
|
|
207
|
+
const isPortrait = containerRect.height > containerRect.width;
|
|
208
|
+
const isVideoLandscape = videoW > videoH;
|
|
209
|
+
const needsRotation = isPortrait && isVideoLandscape;
|
|
210
|
+
const proj = this.controller!.projectionTransform;
|
|
211
|
+
|
|
212
|
+
const vW = needsRotation ? videoH : videoW;
|
|
213
|
+
const vH = needsRotation ? videoW : videoH;
|
|
214
|
+
const pScale = Math.max(containerRect.width / vW, containerRect.height / vH);
|
|
215
|
+
const dW = vW * pScale;
|
|
216
|
+
const dH = vH * pScale;
|
|
217
|
+
const oX = (containerRect.width - dW) / 2;
|
|
218
|
+
const oY = (containerRect.height - dH) / 2;
|
|
219
|
+
|
|
220
|
+
projectedDetectionPoints = detectionPoints.map((p: any) => {
|
|
221
|
+
let sx, sy;
|
|
222
|
+
if (needsRotation) {
|
|
223
|
+
sx = oX + (dW / 2) - (p.y - proj[1][2]) * pScale;
|
|
224
|
+
sy = oY + (dH / 2) + (p.x - proj[0][2]) * pScale;
|
|
225
|
+
} else {
|
|
226
|
+
sx = oX + (dW / 2) + (p.x - proj[0][2]) * pScale;
|
|
227
|
+
sy = oY + (dH / 2) + (p.y - proj[1][2]) * pScale;
|
|
228
|
+
}
|
|
229
|
+
return { x: sx, y: sy };
|
|
230
|
+
});
|
|
231
|
+
}
|
|
232
|
+
|
|
200
233
|
if (worldMatrix) {
|
|
201
234
|
if (!this.isTracking) {
|
|
202
235
|
this.isTracking = true;
|
|
@@ -215,13 +248,14 @@ class SimpleAR {
|
|
|
215
248
|
}
|
|
216
249
|
|
|
217
250
|
// Always notify the callback if we have points, or if we just lost tracking
|
|
218
|
-
if (projectedPoints.length > 0 || (worldMatrix === null && data.type === 'updateMatrix')) {
|
|
251
|
+
if (projectedPoints.length > 0 || projectedDetectionPoints.length > 0 || (worldMatrix === null && data.type === 'updateMatrix')) {
|
|
219
252
|
this.onUpdateCallback && this.onUpdateCallback({
|
|
220
253
|
targetIndex,
|
|
221
254
|
worldMatrix,
|
|
222
255
|
screenCoords: projectedPoints,
|
|
223
256
|
reliabilities: reliabilities || [],
|
|
224
|
-
stabilities: stabilities || []
|
|
257
|
+
stabilities: stabilities || [],
|
|
258
|
+
detectionPoints: projectedDetectionPoints
|
|
225
259
|
});
|
|
226
260
|
}
|
|
227
261
|
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 🚀 Moonshot: Fourier Positional Encoding
|
|
3
|
+
*
|
|
4
|
+
* Maps 2D coordinates (x, y) to a high-dimensional frequency space.
|
|
5
|
+
* Used in Transformer Positional Encoding, NeRFs, and modern Generative AI.
|
|
6
|
+
*
|
|
7
|
+
* Theory: gamma(p) = [sin(2^0 * pi * p), cos(2^0 * pi * p), ..., sin(2^L-1 * pi * p), cos(2^L-1 * pi * p)]
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
export class FourierEncoder {
|
|
11
|
+
private frequencies: number[];
|
|
12
|
+
private L: number;
|
|
13
|
+
|
|
14
|
+
constructor(L: number = 4) {
|
|
15
|
+
this.L = L;
|
|
16
|
+
this.frequencies = [];
|
|
17
|
+
for (let i = 0; i < L; i++) {
|
|
18
|
+
this.frequencies.push(Math.pow(2, i) * Math.PI);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Encodes a normalized coordinate (0-1) into Fourier features
|
|
24
|
+
* @param x Normalized X
|
|
25
|
+
* @param y Normalized Y
|
|
26
|
+
* @returns Float32Array of size 4 * L
|
|
27
|
+
*/
|
|
28
|
+
encode(x: number, y: number): Float32Array {
|
|
29
|
+
const result = new Float32Array(this.L * 4);
|
|
30
|
+
let idx = 0;
|
|
31
|
+
|
|
32
|
+
for (const freq of this.frequencies) {
|
|
33
|
+
result[idx++] = Math.sin(freq * x);
|
|
34
|
+
result[idx++] = Math.cos(freq * x);
|
|
35
|
+
result[idx++] = Math.sin(freq * y);
|
|
36
|
+
result[idx++] = Math.cos(freq * y);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return result;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Fast dot product between two fourier encodings
|
|
44
|
+
* This measures "harmonic spatial similarity"
|
|
45
|
+
*/
|
|
46
|
+
static similarity(v1: Float32Array, v2: Float32Array): number {
|
|
47
|
+
let dot = 0;
|
|
48
|
+
for (let i = 0; i < v1.length; i++) {
|
|
49
|
+
dot += v1[i] * v2[i];
|
|
50
|
+
}
|
|
51
|
+
return dot / (v1.length / 2); // Normalize by number of components
|
|
52
|
+
}
|
|
53
|
+
}
|
package/src/react/use-ar.ts
CHANGED
|
@@ -62,9 +62,10 @@ export const useAR = (config: ARConfig): UseARReturn => {
|
|
|
62
62
|
overlay: overlayRef.current!,
|
|
63
63
|
scale: config.scale,
|
|
64
64
|
debug: false,
|
|
65
|
-
onUpdate: (
|
|
65
|
+
onUpdate: (data: any) => {
|
|
66
|
+
const { screenCoords, reliabilities, stabilities } = data;
|
|
66
67
|
if (screenCoords && reliabilities && stabilities) {
|
|
67
|
-
const points = screenCoords.map((p, i) => ({
|
|
68
|
+
const points = screenCoords.map((p: any, i: number) => ({
|
|
68
69
|
x: p.x,
|
|
69
70
|
y: p.y,
|
|
70
71
|
reliability: reliabilities[i],
|