@srsergio/taptapp-ar 1.0.0 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +102 -26
- package/dist/compiler/aframe.js +0 -3
- package/dist/compiler/compiler-base.d.ts +3 -7
- package/dist/compiler/compiler-base.js +28 -14
- package/dist/compiler/compiler.js +1 -1
- package/dist/compiler/compiler.worker.js +1 -1
- package/dist/compiler/controller.js +4 -5
- package/dist/compiler/controller.worker.js +0 -2
- package/dist/compiler/detector/crop-detector.js +0 -2
- package/dist/compiler/detector/detector-lite.d.ts +73 -0
- package/dist/compiler/detector/detector-lite.js +430 -0
- package/dist/compiler/detector/detector.js +236 -243
- package/dist/compiler/detector/kernels/cpu/binomialFilter.js +0 -1
- package/dist/compiler/detector/kernels/cpu/computeLocalization.js +0 -4
- package/dist/compiler/detector/kernels/cpu/computeOrientationHistograms.js +0 -18
- package/dist/compiler/detector/kernels/cpu/fakeShader.js +1 -1
- package/dist/compiler/detector/kernels/cpu/prune.d.ts +7 -1
- package/dist/compiler/detector/kernels/cpu/prune.js +1 -42
- package/dist/compiler/detector/kernels/webgl/upsampleBilinear.js +2 -2
- package/dist/compiler/estimation/refine-estimate.js +0 -1
- package/dist/compiler/estimation/utils.d.ts +1 -1
- package/dist/compiler/estimation/utils.js +1 -14
- package/dist/compiler/image-list.js +4 -4
- package/dist/compiler/input-loader.js +2 -2
- package/dist/compiler/matching/hamming-distance.js +13 -13
- package/dist/compiler/matching/hierarchical-clustering.js +1 -1
- package/dist/compiler/matching/matching.d.ts +20 -4
- package/dist/compiler/matching/matching.js +67 -41
- package/dist/compiler/matching/ransacHomography.js +1 -2
- package/dist/compiler/node-worker.d.ts +1 -0
- package/dist/compiler/node-worker.js +84 -0
- package/dist/compiler/offline-compiler.d.ts +171 -6
- package/dist/compiler/offline-compiler.js +303 -421
- package/dist/compiler/tensorflow-setup.js +27 -1
- package/dist/compiler/three.js +3 -5
- package/dist/compiler/tracker/extract.d.ts +1 -0
- package/dist/compiler/tracker/extract.js +200 -244
- package/dist/compiler/tracker/tracker.d.ts +1 -1
- package/dist/compiler/tracker/tracker.js +13 -18
- package/dist/compiler/utils/cumsum.d.ts +4 -2
- package/dist/compiler/utils/cumsum.js +17 -19
- package/dist/compiler/utils/gpu-compute.d.ts +57 -0
- package/dist/compiler/utils/gpu-compute.js +262 -0
- package/dist/compiler/utils/images.d.ts +4 -4
- package/dist/compiler/utils/images.js +67 -53
- package/dist/compiler/utils/worker-pool.d.ts +14 -0
- package/dist/compiler/utils/worker-pool.js +84 -0
- package/dist/index.d.ts +0 -2
- package/dist/index.js +0 -2
- package/package.json +19 -13
- package/src/compiler/aframe.js +2 -4
- package/src/compiler/compiler-base.js +29 -14
- package/src/compiler/compiler.js +1 -1
- package/src/compiler/compiler.worker.js +1 -1
- package/src/compiler/controller.js +4 -5
- package/src/compiler/controller.worker.js +0 -2
- package/src/compiler/detector/crop-detector.js +0 -2
- package/src/compiler/detector/detector-lite.js +494 -0
- package/src/compiler/detector/detector.js +1052 -1063
- package/src/compiler/detector/kernels/cpu/binomialFilter.js +0 -1
- package/src/compiler/detector/kernels/cpu/computeLocalization.js +0 -4
- package/src/compiler/detector/kernels/cpu/computeOrientationHistograms.js +0 -17
- package/src/compiler/detector/kernels/cpu/fakeShader.js +1 -1
- package/src/compiler/detector/kernels/cpu/prune.js +1 -37
- package/src/compiler/detector/kernels/webgl/upsampleBilinear.js +2 -2
- package/src/compiler/estimation/refine-estimate.js +0 -1
- package/src/compiler/estimation/utils.js +9 -24
- package/src/compiler/image-list.js +4 -4
- package/src/compiler/input-loader.js +2 -2
- package/src/compiler/matching/hamming-distance.js +11 -15
- package/src/compiler/matching/hierarchical-clustering.js +1 -1
- package/src/compiler/matching/matching.js +72 -42
- package/src/compiler/matching/ransacHomography.js +0 -2
- package/src/compiler/node-worker.js +93 -0
- package/src/compiler/offline-compiler.js +339 -504
- package/src/compiler/tensorflow-setup.js +29 -1
- package/src/compiler/three.js +3 -5
- package/src/compiler/tracker/extract.js +211 -267
- package/src/compiler/tracker/tracker.js +13 -22
- package/src/compiler/utils/cumsum.js +17 -19
- package/src/compiler/utils/gpu-compute.js +303 -0
- package/src/compiler/utils/images.js +84 -53
- package/src/compiler/utils/worker-pool.js +89 -0
- package/src/index.ts +0 -2
- package/src/compiler/estimation/esimate-experiment.js +0 -316
- package/src/compiler/estimation/refine-estimate-experiment.js +0 -512
- package/src/react/AREditor.tsx +0 -394
- package/src/react/ProgressDialog.tsx +0 -185
|
@@ -6,1125 +6,1114 @@ import "./kernels/webgl/index.js";
|
|
|
6
6
|
const PYRAMID_MIN_SIZE = 8;
|
|
7
7
|
const PYRAMID_MAX_OCTAVE = 5;
|
|
8
8
|
|
|
9
|
-
const LAPLACIAN_THRESHOLD = 3.0;
|
|
10
|
-
const LAPLACIAN_SQR_THRESHOLD = LAPLACIAN_THRESHOLD * LAPLACIAN_THRESHOLD;
|
|
11
|
-
const EDGE_THRESHOLD = 4.0;
|
|
12
|
-
const EDGE_HESSIAN_THRESHOLD = ((EDGE_THRESHOLD + 1) * (EDGE_THRESHOLD + 1)) / EDGE_THRESHOLD;
|
|
13
|
-
|
|
14
9
|
const NUM_BUCKETS_PER_DIMENSION = 10;
|
|
15
10
|
const MAX_FEATURES_PER_BUCKET = 5;
|
|
16
|
-
|
|
17
|
-
// total max feature points = NUM_BUCKETS * MAX_FEATURES_PER_BUCKET
|
|
18
|
-
|
|
19
|
-
const ORIENTATION_NUM_BINS = 36;
|
|
20
|
-
const ORIENTATION_SMOOTHING_ITERATIONS = 5;
|
|
11
|
+
// total max feature points
|
|
21
12
|
|
|
22
13
|
const ORIENTATION_GAUSSIAN_EXPANSION_FACTOR = 3.0;
|
|
23
14
|
const ORIENTATION_REGION_EXPANSION_FACTOR = 1.5;
|
|
24
|
-
const
|
|
25
|
-
|
|
26
|
-
const FREAK_CONPARISON_COUNT = ((FREAKPOINTS.length - 1) * FREAKPOINTS.length) / 2; // 666
|
|
15
|
+
//const FREAK_CONPARISON_COUNT = ((FREAKPOINTS.length - 1) * FREAKPOINTS.length) / 2; // 666
|
|
27
16
|
|
|
28
17
|
class Detector {
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
}
|
|
40
|
-
this.numOctaves = numOctaves;
|
|
41
|
-
|
|
42
|
-
this.tensorCaches = {};
|
|
43
|
-
this.kernelCaches = {};
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
// used in compiler
|
|
47
|
-
detectImageData(imageData) {
|
|
48
|
-
const arr = new Uint8ClampedArray(4 * imageData.length);
|
|
49
|
-
for (let i = 0; i < imageData.length; i++) {
|
|
50
|
-
arr[4 * i] = imageData[i];
|
|
51
|
-
arr[4 * i + 1] = imageData[i];
|
|
52
|
-
arr[4 * i + 2] = imageData[i];
|
|
53
|
-
arr[4 * i + 3] = 255;
|
|
54
|
-
}
|
|
55
|
-
const img = new ImageData(arr, this.width, this.height);
|
|
56
|
-
return this.detect(img);
|
|
57
|
-
}
|
|
58
|
-
/**
|
|
59
|
-
*
|
|
60
|
-
* @param {tf.Tensor<tf.Rank>} inputImageT
|
|
61
|
-
* @returns
|
|
62
|
-
*/
|
|
63
|
-
detect(inputImageT) {
|
|
64
|
-
let debugExtra = null;
|
|
65
|
-
|
|
66
|
-
// Build gaussian pyramid images, two images per octave
|
|
67
|
-
/** @type {Array<Array<tf.Tensor<tf.Rank>>} */
|
|
68
|
-
const pyramidImagesT = [];
|
|
69
|
-
//console.log("Detector::Building pyramid Images...");
|
|
70
|
-
for (let i = 0; i < this.numOctaves; i++) {
|
|
71
|
-
let image1T;
|
|
72
|
-
let image2T;
|
|
73
|
-
|
|
74
|
-
if (i === 0) {
|
|
75
|
-
image1T = this._applyFilter(inputImageT);
|
|
76
|
-
} else {
|
|
77
|
-
image1T = this._downsampleBilinear(pyramidImagesT[i - 1][pyramidImagesT[i - 1].length - 1]);
|
|
78
|
-
}
|
|
79
|
-
image2T = this._applyFilter(image1T);
|
|
80
|
-
pyramidImagesT.push([image1T, image2T]);
|
|
81
|
-
}
|
|
82
|
-
//console.log("Detector::Building dog images...");
|
|
83
|
-
// Build difference-of-gaussian (dog) pyramid
|
|
84
|
-
/** @type {tf.Tensor<tf.Rank>[]} */
|
|
85
|
-
const dogPyramidImagesT = [];
|
|
86
|
-
for (let i = 0; i < this.numOctaves; i++) {
|
|
87
|
-
let dogImageT = this._differenceImageBinomial(pyramidImagesT[i][0], pyramidImagesT[i][1]);
|
|
88
|
-
dogPyramidImagesT.push(dogImageT);
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
// find local maximum/minimum
|
|
92
|
-
/** @type {tf.Tensor<tf.Rank>[]} */
|
|
93
|
-
const extremasResultsT = [];
|
|
94
|
-
for (let i = 1; i < this.numOctaves - 1; i++) {
|
|
95
|
-
const extremasResultT = this._buildExtremas(
|
|
96
|
-
dogPyramidImagesT[i - 1],
|
|
97
|
-
dogPyramidImagesT[i],
|
|
98
|
-
dogPyramidImagesT[i + 1],
|
|
99
|
-
);
|
|
100
|
-
extremasResultsT.push(extremasResultT);
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
// divide the input into N by N buckets, and for each bucket,
|
|
104
|
-
// collect the top 5 most significant extrema across extremas in all scale level
|
|
105
|
-
// result would be NUM_BUCKETS x NUM_FEATURES_PER_BUCKET extremas
|
|
106
|
-
const prunedExtremasList = this._applyPrune(extremasResultsT);
|
|
107
|
-
|
|
108
|
-
const prunedExtremasT = this._computeLocalization(prunedExtremasList, dogPyramidImagesT);
|
|
109
|
-
|
|
110
|
-
// compute the orientation angle for each pruned extremas
|
|
111
|
-
const extremaHistogramsT = this._computeOrientationHistograms(prunedExtremasT, pyramidImagesT);
|
|
112
|
-
|
|
113
|
-
const smoothedHistogramsT = this._smoothHistograms(extremaHistogramsT);
|
|
114
|
-
const extremaAnglesT = this._computeExtremaAngles(smoothedHistogramsT);
|
|
115
|
-
|
|
116
|
-
// to compute freak descriptors, we first find the pixel value of 37 freak points for each extrema
|
|
117
|
-
const extremaFreaksT = this._computeExtremaFreak(
|
|
118
|
-
pyramidImagesT,
|
|
119
|
-
prunedExtremasT,
|
|
120
|
-
extremaAnglesT,
|
|
121
|
-
);
|
|
122
|
-
|
|
123
|
-
// compute the binary descriptors
|
|
124
|
-
const freakDescriptorsT = this._computeFreakDescriptors(extremaFreaksT);
|
|
125
|
-
|
|
126
|
-
const prunedExtremasArr = prunedExtremasT.arraySync();
|
|
127
|
-
const extremaAnglesArr = extremaAnglesT.arraySync();
|
|
128
|
-
const freakDescriptorsArr = freakDescriptorsT.arraySync();
|
|
129
|
-
|
|
130
|
-
if (this.debugMode) {
|
|
131
|
-
debugExtra = {
|
|
132
|
-
pyramidImages: pyramidImagesT.map((ts) => ts.map((t) => t.arraySync())),
|
|
133
|
-
dogPyramidImages: dogPyramidImagesT.map((t) => (t ? t.arraySync() : null)),
|
|
134
|
-
extremasResults: extremasResultsT.map((t) => t.arraySync()),
|
|
135
|
-
extremaAngles: extremaAnglesT.arraySync(),
|
|
136
|
-
prunedExtremas: prunedExtremasList,
|
|
137
|
-
localizedExtremas: prunedExtremasT.arraySync(),
|
|
138
|
-
};
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
pyramidImagesT.forEach((ts) => ts.forEach((t) => t.dispose()));
|
|
142
|
-
dogPyramidImagesT.forEach((t) => t && t.dispose());
|
|
143
|
-
extremasResultsT.forEach((t) => t.dispose());
|
|
144
|
-
prunedExtremasT.dispose();
|
|
145
|
-
extremaHistogramsT.dispose();
|
|
146
|
-
smoothedHistogramsT.dispose();
|
|
147
|
-
extremaAnglesT.dispose();
|
|
148
|
-
extremaFreaksT.dispose();
|
|
149
|
-
freakDescriptorsT.dispose();
|
|
150
|
-
|
|
151
|
-
const featurePoints = [];
|
|
152
|
-
|
|
153
|
-
for (let i = 0; i < prunedExtremasArr.length; i++) {
|
|
154
|
-
if (prunedExtremasArr[i][0] == 0) continue;
|
|
155
|
-
|
|
156
|
-
const descriptors = [];
|
|
157
|
-
for (let m = 0; m < freakDescriptorsArr[i].length; m += 4) {
|
|
158
|
-
const v1 = freakDescriptorsArr[i][m];
|
|
159
|
-
const v2 = freakDescriptorsArr[i][m + 1];
|
|
160
|
-
const v3 = freakDescriptorsArr[i][m + 2];
|
|
161
|
-
const v4 = freakDescriptorsArr[i][m + 3];
|
|
162
|
-
|
|
163
|
-
let combined = v1 * 16777216 + v2 * 65536 + v3 * 256 + v4;
|
|
164
|
-
//if (m === freakDescriptorsArr[i].length-4) { // last one, legacy reason
|
|
165
|
-
// combined /= 32;
|
|
166
|
-
//}
|
|
167
|
-
descriptors.push(combined);
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
const octave = prunedExtremasArr[i][1];
|
|
171
|
-
const y = prunedExtremasArr[i][2];
|
|
172
|
-
const x = prunedExtremasArr[i][3];
|
|
173
|
-
const originalX = x * Math.pow(2, octave) + Math.pow(2, octave - 1) - 0.5;
|
|
174
|
-
const originalY = y * Math.pow(2, octave) + Math.pow(2, octave - 1) - 0.5;
|
|
175
|
-
const scale = Math.pow(2, octave);
|
|
176
|
-
|
|
177
|
-
featurePoints.push({
|
|
178
|
-
maxima: prunedExtremasArr[i][0] > 0,
|
|
179
|
-
x: originalX,
|
|
180
|
-
y: originalY,
|
|
181
|
-
scale: scale,
|
|
182
|
-
angle: extremaAnglesArr[i],
|
|
183
|
-
descriptors: descriptors,
|
|
184
|
-
});
|
|
185
|
-
}
|
|
186
|
-
//console.log("feature points", featurePoints);
|
|
187
|
-
//console.table(tf.memory());
|
|
188
|
-
return { featurePoints, debugExtra };
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
_computeFreakDescriptors(extremaFreaks) {
|
|
192
|
-
if (!this.tensorCaches.computeFreakDescriptors) {
|
|
193
|
-
const in1Arr = [];
|
|
194
|
-
const in2Arr = [];
|
|
195
|
-
for (let k1 = 0; k1 < extremaFreaks.shape[1]; k1++) {
|
|
196
|
-
for (let k2 = k1 + 1; k2 < extremaFreaks.shape[1]; k2++) {
|
|
197
|
-
in1Arr.push(k1);
|
|
198
|
-
in2Arr.push(k2);
|
|
199
|
-
}
|
|
200
|
-
}
|
|
201
|
-
const in1 = tf.tensor(in1Arr, [in1Arr.length]).cast("int32");
|
|
202
|
-
const in2 = tf.tensor(in2Arr, [in2Arr.length]).cast("int32");
|
|
203
|
-
|
|
204
|
-
this.tensorCaches.computeFreakDescriptors = {
|
|
205
|
-
positionT: tf.keep(tf.stack([in1, in2], 1)),
|
|
206
|
-
};
|
|
207
|
-
}
|
|
208
|
-
const { positionT } = this.tensorCaches.computeFreakDescriptors;
|
|
209
|
-
|
|
210
|
-
// encode 8 bits into one number
|
|
211
|
-
// trying to encode 16 bits give wrong result in iOS. may integer precision issue
|
|
212
|
-
const descriptorCount = Math.ceil(FREAK_CONPARISON_COUNT / 8);
|
|
213
|
-
/*
|
|
214
|
-
if (!this.kernelCaches.computeFreakDescriptors) {
|
|
215
|
-
const kernel = {
|
|
216
|
-
variableNames: ['freak', 'p'],
|
|
217
|
-
outputShape: [extremaFreaks.shape[0], descriptorCount],
|
|
218
|
-
userCode: `
|
|
219
|
-
void main() {
|
|
220
|
-
ivec2 coords = getOutputCoords();
|
|
221
|
-
int featureIndex = coords[0];
|
|
222
|
-
int descIndex = coords[1] * 8;
|
|
223
|
-
|
|
224
|
-
int sum = 0;
|
|
225
|
-
for (int i = 0; i < 8; i++) {
|
|
226
|
-
if (descIndex + i >= ${FREAK_CONPARISON_COUNT}) {
|
|
227
|
-
continue;
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
int p1 = int(getP(descIndex + i, 0));
|
|
231
|
-
int p2 = int(getP(descIndex + i, 1));
|
|
232
|
-
|
|
233
|
-
float v1 = getFreak(featureIndex, p1);
|
|
234
|
-
float v2 = getFreak(featureIndex, p2);
|
|
235
|
-
|
|
236
|
-
if (v1 < v2 + 0.01) {
|
|
237
|
-
sum += int(pow(2.0, float(7 - i)));
|
|
238
|
-
}
|
|
18
|
+
constructor(width, height, debugMode = false) {
|
|
19
|
+
this.debugMode = debugMode;
|
|
20
|
+
this.width = width;
|
|
21
|
+
this.height = height;
|
|
22
|
+
let numOctaves = 0;
|
|
23
|
+
while (width >= PYRAMID_MIN_SIZE && height >= PYRAMID_MIN_SIZE) {
|
|
24
|
+
width /= 2;
|
|
25
|
+
height /= 2;
|
|
26
|
+
numOctaves++;
|
|
27
|
+
if (numOctaves === PYRAMID_MAX_OCTAVE) break;
|
|
239
28
|
}
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
29
|
+
this.numOctaves = numOctaves;
|
|
30
|
+
|
|
31
|
+
this.tensorCaches = {};
|
|
32
|
+
this.kernelCaches = {};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// used in compiler
|
|
36
|
+
detectImageData(imageData) {
|
|
37
|
+
const arr = new Uint8ClampedArray(4 * imageData.length);
|
|
38
|
+
for (let i = 0; i < imageData.length; i++) {
|
|
39
|
+
arr[4 * i] = imageData[i];
|
|
40
|
+
arr[4 * i + 1] = imageData[i];
|
|
41
|
+
arr[4 * i + 2] = imageData[i];
|
|
42
|
+
arr[4 * i + 3] = 255;
|
|
245
43
|
}
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
}
|
|
270
|
-
|
|
271
|
-
/* if (!this.kernelCaches._computeExtremaFreak) {
|
|
272
|
-
const imageVariableNames = [];
|
|
273
|
-
for (let i = 1; i < pyramidImagesT.length; i++) {
|
|
274
|
-
imageVariableNames.push('image' + i);
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
let pixelsSubCodes = `float getPixel(int octave, int y, int x) {`;
|
|
278
|
-
for (let i = 1; i < pyramidImagesT.length; i++) {
|
|
279
|
-
pixelsSubCodes += `
|
|
280
|
-
if (octave == ${i}) {
|
|
281
|
-
return getImage${i}(y, x);
|
|
282
|
-
}
|
|
283
|
-
`
|
|
284
|
-
}
|
|
285
|
-
pixelsSubCodes += `}`;
|
|
286
|
-
|
|
287
|
-
const kernel = {
|
|
288
|
-
variableNames: [...imageVariableNames, 'extrema', 'angles', 'freakPoints'],
|
|
289
|
-
outputShape: [prunedExtremas.shape[0], FREAKPOINTS.length],
|
|
290
|
-
userCode: `
|
|
291
|
-
${pixelsSubCodes}
|
|
292
|
-
void main() {
|
|
293
|
-
ivec2 coords = getOutputCoords();
|
|
294
|
-
int featureIndex = coords[0];
|
|
295
|
-
int freakIndex = coords[1];
|
|
296
|
-
|
|
297
|
-
float freakSigma = getFreakPoints(freakIndex, 0);
|
|
298
|
-
float freakX = getFreakPoints(freakIndex, 1);
|
|
299
|
-
float freakY = getFreakPoints(freakIndex, 2);
|
|
300
|
-
|
|
301
|
-
int octave = int(getExtrema(featureIndex, 1));
|
|
302
|
-
float inputY = getExtrema(featureIndex, 2);
|
|
303
|
-
float inputX = getExtrema(featureIndex, 3);
|
|
304
|
-
float inputAngle = getAngles(featureIndex);
|
|
305
|
-
float cos = ${FREAK_EXPANSION_FACTOR}. * cos(inputAngle);
|
|
306
|
-
float sin = ${FREAK_EXPANSION_FACTOR}. * sin(inputAngle);
|
|
307
|
-
|
|
308
|
-
float yp = inputY + freakX * sin + freakY * cos;
|
|
309
|
-
float xp = inputX + freakX * cos + freakY * -sin;
|
|
310
|
-
|
|
311
|
-
int x0 = int(floor(xp));
|
|
312
|
-
int x1 = x0 + 1;
|
|
313
|
-
int y0 = int(floor(yp));
|
|
314
|
-
int y1 = y0 + 1;
|
|
315
|
-
|
|
316
|
-
float f1 = getPixel(octave, y0, x0);
|
|
317
|
-
float f2 = getPixel(octave, y0, x1);
|
|
318
|
-
float f3 = getPixel(octave, y1, x0);
|
|
319
|
-
float f4 = getPixel(octave, y1, x1);
|
|
320
|
-
|
|
321
|
-
float x1f = float(x1);
|
|
322
|
-
float y1f = float(y1);
|
|
323
|
-
float x0f = float(x0);
|
|
324
|
-
float y0f = float(y0);
|
|
325
|
-
|
|
326
|
-
// ratio for interpolation between four neighbouring points
|
|
327
|
-
float value = (x1f - xp) * (y1f - yp) * f1
|
|
328
|
-
+ (xp - x0f) * (y1f - yp) * f2
|
|
329
|
-
+ (x1f - xp) * (yp - y0f) * f3
|
|
330
|
-
+ (xp - x0f) * (yp - y0f) * f4;
|
|
331
|
-
|
|
332
|
-
setOutput(value);
|
|
333
|
-
}
|
|
334
|
-
`
|
|
44
|
+
const img = new ImageData(arr, this.width, this.height);
|
|
45
|
+
return this.detect(img);
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
*
|
|
49
|
+
* @param {tf.Tensor<tf.Rank>} inputImageT
|
|
50
|
+
* @returns
|
|
51
|
+
*/
|
|
52
|
+
detect(inputImageT) {
|
|
53
|
+
let debugExtra = null;
|
|
54
|
+
|
|
55
|
+
// Build gaussian pyramid images, two images per octave
|
|
56
|
+
/** @type {Array<Array<tf.Tensor<tf.Rank>>} */
|
|
57
|
+
const pyramidImagesT = [];
|
|
58
|
+
//console.log("Detector::Building pyramid Images...");
|
|
59
|
+
for (let i = 0; i < this.numOctaves; i++) {
|
|
60
|
+
let image1T;
|
|
61
|
+
let image2T;
|
|
62
|
+
|
|
63
|
+
if (i === 0) {
|
|
64
|
+
image1T = this._applyFilter(inputImageT);
|
|
65
|
+
} else {
|
|
66
|
+
image1T = this._downsampleBilinear(pyramidImagesT[i - 1][pyramidImagesT[i - 1].length - 1]);
|
|
335
67
|
}
|
|
68
|
+
image2T = this._applyFilter(image1T);
|
|
69
|
+
pyramidImagesT.push([image1T, image2T]);
|
|
70
|
+
}
|
|
71
|
+
//console.log("Detector::Building dog images...");
|
|
72
|
+
// Build difference-of-gaussian (dog) pyramid
|
|
73
|
+
/** @type {tf.Tensor<tf.Rank>[]} */
|
|
74
|
+
const dogPyramidImagesT = [];
|
|
75
|
+
for (let i = 0; i < this.numOctaves; i++) {
|
|
76
|
+
let dogImageT = this._differenceImageBinomial(pyramidImagesT[i][0], pyramidImagesT[i][1]);
|
|
77
|
+
dogPyramidImagesT.push(dogImageT);
|
|
78
|
+
}
|
|
336
79
|
|
|
337
|
-
|
|
338
|
-
} */
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
prunedExtremasAngles,
|
|
348
|
-
freakPointsT,
|
|
349
|
-
pyramidImagesLength: pyramidImagesT.length,
|
|
350
|
-
});
|
|
351
|
-
});
|
|
352
|
-
}
|
|
353
|
-
/**
|
|
354
|
-
*
|
|
355
|
-
* @param {tf.Tensor<tf.Rank>} histograms
|
|
356
|
-
* @returns
|
|
357
|
-
*/
|
|
358
|
-
_computeExtremaAngles(histograms) {
|
|
359
|
-
/* if (!this.kernelCaches.computeExtremaAngles) {
|
|
360
|
-
const kernel = {
|
|
361
|
-
variableNames: ['histogram'],
|
|
362
|
-
outputShape: [histograms.shape[0]],
|
|
363
|
-
userCode: `
|
|
364
|
-
void main() {
|
|
365
|
-
int featureIndex = getOutputCoords();
|
|
366
|
-
|
|
367
|
-
int maxIndex = 0;
|
|
368
|
-
for (int i = 1; i < ${ORIENTATION_NUM_BINS}; i++) {
|
|
369
|
-
if (getHistogram(featureIndex, i) > getHistogram(featureIndex, maxIndex)) {
|
|
370
|
-
maxIndex = i;
|
|
371
|
-
}
|
|
80
|
+
// find local maximum/minimum
|
|
81
|
+
/** @type {tf.Tensor<tf.Rank>[]} */
|
|
82
|
+
const extremasResultsT = [];
|
|
83
|
+
for (let i = 1; i < this.numOctaves - 1; i++) {
|
|
84
|
+
const extremasResultT = this._buildExtremas(
|
|
85
|
+
dogPyramidImagesT[i - 1],
|
|
86
|
+
dogPyramidImagesT[i],
|
|
87
|
+
dogPyramidImagesT[i + 1],
|
|
88
|
+
);
|
|
89
|
+
extremasResultsT.push(extremasResultT);
|
|
372
90
|
}
|
|
373
91
|
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
92
|
+
// divide the input into N by N buckets, and for each bucket,
|
|
93
|
+
// collect the top 5 most significant extrema across extremas in all scale level
|
|
94
|
+
// result would be NUM_BUCKETS x NUM_FEATURES_PER_BUCKET extremas
|
|
95
|
+
const prunedExtremasList = this._applyPrune(extremasResultsT);
|
|
96
|
+
|
|
97
|
+
const prunedExtremasT = this._computeLocalization(prunedExtremasList, dogPyramidImagesT);
|
|
98
|
+
|
|
99
|
+
// compute the orientation angle for each pruned extremas
|
|
100
|
+
const extremaHistogramsT = this._computeOrientationHistograms(prunedExtremasT, pyramidImagesT);
|
|
101
|
+
|
|
102
|
+
const smoothedHistogramsT = this._smoothHistograms(extremaHistogramsT);
|
|
103
|
+
const extremaAnglesT = this._computeExtremaAngles(smoothedHistogramsT);
|
|
104
|
+
|
|
105
|
+
// to compute freak descriptors, we first find the pixel value of 37 freak points for each extrema
|
|
106
|
+
const extremaFreaksT = this._computeExtremaFreak(
|
|
107
|
+
pyramidImagesT,
|
|
108
|
+
prunedExtremasT,
|
|
109
|
+
extremaAnglesT,
|
|
110
|
+
);
|
|
111
|
+
|
|
112
|
+
// compute the binary descriptors
|
|
113
|
+
const freakDescriptorsT = this._computeFreakDescriptors(extremaFreaksT);
|
|
114
|
+
|
|
115
|
+
const prunedExtremasArr = prunedExtremasT.arraySync();
|
|
116
|
+
const extremaAnglesArr = extremaAnglesT.arraySync();
|
|
117
|
+
const freakDescriptorsArr = freakDescriptorsT.arraySync();
|
|
118
|
+
|
|
119
|
+
if (this.debugMode) {
|
|
120
|
+
debugExtra = {
|
|
121
|
+
pyramidImages: pyramidImagesT.map((ts) => ts.map((t) => t.arraySync())),
|
|
122
|
+
dogPyramidImages: dogPyramidImagesT.map((t) => (t ? t.arraySync() : null)),
|
|
123
|
+
extremasResults: extremasResultsT.map((t) => t.arraySync()),
|
|
124
|
+
extremaAngles: extremaAnglesT.arraySync(),
|
|
125
|
+
prunedExtremas: prunedExtremasList,
|
|
126
|
+
localizedExtremas: prunedExtremasT.arraySync(),
|
|
127
|
+
};
|
|
408
128
|
}
|
|
409
129
|
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
130
|
+
pyramidImagesT.forEach((ts) => ts.forEach((t) => t.dispose()));
|
|
131
|
+
dogPyramidImagesT.forEach((t) => t && t.dispose());
|
|
132
|
+
extremasResultsT.forEach((t) => t.dispose());
|
|
133
|
+
prunedExtremasT.dispose();
|
|
134
|
+
extremaHistogramsT.dispose();
|
|
135
|
+
smoothedHistogramsT.dispose();
|
|
136
|
+
extremaAnglesT.dispose();
|
|
137
|
+
extremaFreaksT.dispose();
|
|
138
|
+
freakDescriptorsT.dispose();
|
|
139
|
+
|
|
140
|
+
const featurePoints = [];
|
|
141
|
+
|
|
142
|
+
for (let i = 0; i < prunedExtremasArr.length; i++) {
|
|
143
|
+
if (prunedExtremasArr[i][0] == 0) continue;
|
|
144
|
+
|
|
145
|
+
const descriptors = [];
|
|
146
|
+
for (let m = 0; m < freakDescriptorsArr[i].length; m += 4) {
|
|
147
|
+
const v1 = freakDescriptorsArr[i][m];
|
|
148
|
+
const v2 = freakDescriptorsArr[i][m + 1];
|
|
149
|
+
const v3 = freakDescriptorsArr[i][m + 2];
|
|
150
|
+
const v4 = freakDescriptorsArr[i][m + 3];
|
|
151
|
+
|
|
152
|
+
let combined = v1 * 16777216 + v2 * 65536 + v3 * 256 + v4;
|
|
153
|
+
//if (m === freakDescriptorsArr[i].length-4) { // last one, legacy reason
|
|
154
|
+
// combined /= 32;
|
|
155
|
+
//}
|
|
156
|
+
descriptors.push(combined);
|
|
414
157
|
}
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
const radialProperties = [];
|
|
448
|
-
for (let y = -radiusCeil; y <= radiusCeil; y++) {
|
|
449
|
-
for (let x = -radiusCeil; x <= radiusCeil; x++) {
|
|
450
|
-
const distanceSquare = x * x + y * y;
|
|
451
|
-
|
|
452
|
-
// may just assign w = 1 will do, this could be over complicated.
|
|
453
|
-
if (distanceSquare <= radius * radius) {
|
|
454
|
-
const _x = distanceSquare * gwScale;
|
|
455
|
-
// fast expontenial approx
|
|
456
|
-
let w =
|
|
457
|
-
(720 + _x * (720 + _x * (360 + _x * (120 + _x * (30 + _x * (6 + _x)))))) *
|
|
458
|
-
0.0013888888;
|
|
459
|
-
radialProperties.push([y, x, w]);
|
|
460
|
-
}
|
|
461
|
-
}
|
|
462
|
-
}
|
|
463
|
-
|
|
464
|
-
this.tensorCaches.orientationHistograms = {
|
|
465
|
-
radialPropertiesT: tf.keep(tf.tensor(radialProperties, [radialProperties.length, 3])),
|
|
466
|
-
};
|
|
467
|
-
});
|
|
468
|
-
}
|
|
469
|
-
const { radialPropertiesT } = this.tensorCaches.orientationHistograms;
|
|
470
|
-
|
|
471
|
-
/* if (!this.kernelCaches.computeOrientationHistograms) {
|
|
472
|
-
const imageVariableNames = [];
|
|
473
|
-
for (let i = 1; i < pyramidImagesT.length; i++) {
|
|
474
|
-
imageVariableNames.push('image' + i);
|
|
158
|
+
|
|
159
|
+
const octave = prunedExtremasArr[i][1];
|
|
160
|
+
const y = prunedExtremasArr[i][2];
|
|
161
|
+
const x = prunedExtremasArr[i][3];
|
|
162
|
+
const originalX = x * Math.pow(2, octave) + Math.pow(2, octave - 1) - 0.5;
|
|
163
|
+
const originalY = y * Math.pow(2, octave) + Math.pow(2, octave - 1) - 0.5;
|
|
164
|
+
const scale = Math.pow(2, octave);
|
|
165
|
+
|
|
166
|
+
featurePoints.push({
|
|
167
|
+
maxima: prunedExtremasArr[i][0] > 0,
|
|
168
|
+
x: originalX,
|
|
169
|
+
y: originalY,
|
|
170
|
+
scale: scale,
|
|
171
|
+
angle: extremaAnglesArr[i],
|
|
172
|
+
descriptors: descriptors,
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
//console.log("feature points", featurePoints);
|
|
176
|
+
//console.table(tf.memory());
|
|
177
|
+
return { featurePoints, debugExtra };
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
_computeFreakDescriptors(extremaFreaks) {
|
|
181
|
+
if (!this.tensorCaches.computeFreakDescriptors) {
|
|
182
|
+
const in1Arr = [];
|
|
183
|
+
const in2Arr = [];
|
|
184
|
+
for (let k1 = 0; k1 < extremaFreaks.shape[1]; k1++) {
|
|
185
|
+
for (let k2 = k1 + 1; k2 < extremaFreaks.shape[1]; k2++) {
|
|
186
|
+
in1Arr.push(k1);
|
|
187
|
+
in2Arr.push(k2);
|
|
188
|
+
}
|
|
475
189
|
}
|
|
190
|
+
const in1 = tf.tensor(in1Arr, [in1Arr.length]).cast("int32");
|
|
191
|
+
const in2 = tf.tensor(in2Arr, [in2Arr.length]).cast("int32");
|
|
476
192
|
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
193
|
+
this.tensorCaches.computeFreakDescriptors = {
|
|
194
|
+
positionT: tf.keep(tf.stack([in1, in2], 1)),
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
const { positionT } = this.tensorCaches.computeFreakDescriptors;
|
|
198
|
+
|
|
199
|
+
// encode 8 bits into one number
|
|
200
|
+
// trying to encode 16 bits give wrong result in iOS. may integer precision issue
|
|
201
|
+
/*
|
|
202
|
+
if (!this.kernelCaches.computeFreakDescriptors) {
|
|
203
|
+
const kernel = {
|
|
204
|
+
variableNames: ['freak', 'p'],
|
|
205
|
+
outputShape: [extremaFreaks.shape[0], descriptorCount],
|
|
206
|
+
userCode: `
|
|
207
|
+
void main() {
|
|
208
|
+
ivec2 coords = getOutputCoords();
|
|
209
|
+
int featureIndex = coords[0];
|
|
210
|
+
int descIndex = coords[1] * 8;
|
|
211
|
+
|
|
212
|
+
int sum = 0;
|
|
213
|
+
for (int i = 0; i < 8; i++) {
|
|
214
|
+
if (descIndex + i >= ${ FREAK_CONPARISON_COUNT }) {
|
|
215
|
+
continue;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
int p1 = int(getP(descIndex + i, 0));
|
|
219
|
+
int p2 = int(getP(descIndex + i, 1));
|
|
220
|
+
|
|
221
|
+
float v1 = getFreak(featureIndex, p1);
|
|
222
|
+
float v2 = getFreak(featureIndex, p2);
|
|
223
|
+
|
|
224
|
+
if (v1 < v2 + 0.01) {
|
|
225
|
+
sum += int(pow(2.0, float(7 - i)));
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
setOutput(float(sum));
|
|
229
|
+
}
|
|
230
|
+
`
|
|
231
|
+
}
|
|
232
|
+
this.kernelCaches.computeFreakDescriptors = [kernel];
|
|
484
233
|
}
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
const
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
float radialW = getRadial(radialIndex, 2);
|
|
502
|
-
|
|
503
|
-
int octave = int(getExtrema(featureIndex, 1));
|
|
504
|
-
int y = int(getExtrema(featureIndex, 2));
|
|
505
|
-
int x = int(getExtrema(featureIndex, 3));
|
|
506
|
-
|
|
507
|
-
int xp = x + radialX;
|
|
508
|
-
int yp = y + radialY;
|
|
509
|
-
|
|
510
|
-
float dy = getPixel(octave, yp+1, xp) - getPixel(octave, yp-1, xp);
|
|
511
|
-
float dx = getPixel(octave, yp, xp+1) - getPixel(octave, yp, xp-1);
|
|
512
|
-
|
|
513
|
-
if (propertyIndex == 0) {
|
|
514
|
-
// be careful that atan(0, 0) gives 1.57 instead of 0 (different from js), but doesn't matter here, coz magnitude is 0
|
|
515
|
-
|
|
516
|
-
float angle = atan(dy, dx) + ${Math.PI};
|
|
517
|
-
float fbin = angle * ${ORIENTATION_NUM_BINS}. * ${oneOver2PI};
|
|
518
|
-
setOutput(fbin);
|
|
519
|
-
return;
|
|
234
|
+
*/
|
|
235
|
+
return tf.tidy(() => {
|
|
236
|
+
//const [program] = this.kernelCaches.computeFreakDescriptors;
|
|
237
|
+
//return this._runWebGLProgram(program, [extremaFreaks, positionT], 'int32');
|
|
238
|
+
return tf.engine().runKernel("ComputeFreakDescriptors", { extremaFreaks, positionT });
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
_computeExtremaFreak(pyramidImagesT, prunedExtremas, prunedExtremasAngles) {
|
|
243
|
+
if (!this.tensorCaches._computeExtremaFreak) {
|
|
244
|
+
tf.tidy(() => {
|
|
245
|
+
const freakPoints = tf.tensor(FREAKPOINTS);
|
|
246
|
+
this.tensorCaches._computeExtremaFreak = {
|
|
247
|
+
freakPointsT: tf.keep(freakPoints),
|
|
248
|
+
};
|
|
249
|
+
});
|
|
520
250
|
}
|
|
251
|
+
const { freakPointsT } = this.tensorCaches._computeExtremaFreak;
|
|
521
252
|
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
return;
|
|
253
|
+
const gaussianImagesT = [];
|
|
254
|
+
for (let i = 1; i < pyramidImagesT.length; i++) {
|
|
255
|
+
//gaussianImagesT.push(pyramidImagesT[i][0]);
|
|
256
|
+
gaussianImagesT.push(pyramidImagesT[i][1]); // better
|
|
527
257
|
}
|
|
528
|
-
}
|
|
529
258
|
|
|
259
|
+
/* if (!this.kernelCaches._computeExtremaFreak) {
|
|
260
|
+
const imageVariableNames = [];
|
|
261
|
+
for (let i = 1; i < pyramidImagesT.length; i++) {
|
|
262
|
+
imageVariableNames.push('image' + i);
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
let pixelsSubCodes = `float getPixel(int octave, int y, int x) {
|
|
266
|
+
`;
|
|
267
|
+
for (let i = 1; i < pyramidImagesT.length; i++) {
|
|
268
|
+
pixelsSubCodes += `
|
|
269
|
+
if (octave == ${ i }) {
|
|
270
|
+
return getImage${ i } (y, x);
|
|
271
|
+
}
|
|
530
272
|
`
|
|
531
|
-
|
|
273
|
+
}
|
|
274
|
+
pixelsSubCodes += `} `;
|
|
275
|
+
|
|
276
|
+
const kernel = {
|
|
277
|
+
variableNames: [...imageVariableNames, 'extrema', 'angles', 'freakPoints'],
|
|
278
|
+
outputShape: [prunedExtremas.shape[0], FREAKPOINTS.length],
|
|
279
|
+
userCode: `
|
|
280
|
+
${ pixelsSubCodes }
|
|
281
|
+
void main() {
|
|
282
|
+
ivec2 coords = getOutputCoords();
|
|
283
|
+
int featureIndex = coords[0];
|
|
284
|
+
int freakIndex = coords[1];
|
|
285
|
+
|
|
286
|
+
float freakSigma = getFreakPoints(freakIndex, 0);
|
|
287
|
+
float freakX = getFreakPoints(freakIndex, 1);
|
|
288
|
+
float freakY = getFreakPoints(freakIndex, 2);
|
|
289
|
+
|
|
290
|
+
int octave = int(getExtrema(featureIndex, 1));
|
|
291
|
+
float inputY = getExtrema(featureIndex, 2);
|
|
292
|
+
float inputX = getExtrema(featureIndex, 3);
|
|
293
|
+
float inputAngle = getAngles(featureIndex);
|
|
294
|
+
float cos = ${ FREAK_EXPANSION_FACTOR }. * cos(inputAngle);
|
|
295
|
+
float sin = ${ FREAK_EXPANSION_FACTOR }. * sin(inputAngle);
|
|
296
|
+
|
|
297
|
+
float yp = inputY + freakX * sin + freakY * cos;
|
|
298
|
+
float xp = inputX + freakX * cos + freakY * -sin;
|
|
299
|
+
|
|
300
|
+
int x0 = int(floor(xp));
|
|
301
|
+
int x1 = x0 + 1;
|
|
302
|
+
int y0 = int(floor(yp));
|
|
303
|
+
int y1 = y0 + 1;
|
|
304
|
+
|
|
305
|
+
float f1 = getPixel(octave, y0, x0);
|
|
306
|
+
float f2 = getPixel(octave, y0, x1);
|
|
307
|
+
float f3 = getPixel(octave, y1, x0);
|
|
308
|
+
float f4 = getPixel(octave, y1, x1);
|
|
309
|
+
|
|
310
|
+
float x1f = float(x1);
|
|
311
|
+
float y1f = float(y1);
|
|
312
|
+
float x0f = float(x0);
|
|
313
|
+
float y0f = float(y0);
|
|
314
|
+
|
|
315
|
+
// ratio for interpolation between four neighbouring points
|
|
316
|
+
float value = (x1f - xp) * (y1f - yp) * f1
|
|
317
|
+
+ (xp - x0f) * (y1f - yp) * f2
|
|
318
|
+
+ (x1f - xp) * (yp - y0f) * f3
|
|
319
|
+
+ (xp - x0f) * (yp - y0f) * f4;
|
|
320
|
+
|
|
321
|
+
setOutput(value);
|
|
322
|
+
}
|
|
323
|
+
`
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
this.kernelCaches._computeExtremaFreak = [kernel];
|
|
327
|
+
} */
|
|
532
328
|
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
329
|
+
return tf.tidy(() => {
|
|
330
|
+
/* const [program] = this.kernelCaches._computeExtremaFreak;
|
|
331
|
+
const result = this._compileAndRun(program, [...gaussianImagesT, prunedExtremas, prunedExtremasAngles, freakPointsT]);
|
|
332
|
+
return result; */
|
|
333
|
+
return tf.engine().runKernel("ComputeExtremaFreak", {
|
|
334
|
+
gaussianImagesT,
|
|
335
|
+
prunedExtremas,
|
|
336
|
+
prunedExtremasAngles,
|
|
337
|
+
freakPointsT,
|
|
338
|
+
pyramidImagesLength: pyramidImagesT.length,
|
|
339
|
+
});
|
|
340
|
+
});
|
|
341
|
+
}
|
|
342
|
+
/**
|
|
343
|
+
*
|
|
344
|
+
* @param {tf.Tensor<tf.Rank>} histograms
|
|
345
|
+
* @returns
|
|
346
|
+
*/
|
|
347
|
+
_computeExtremaAngles(histograms) {
|
|
348
|
+
/* if (!this.kernelCaches.computeExtremaAngles) {
|
|
349
|
+
const kernel = {
|
|
350
|
+
variableNames: ['histogram'],
|
|
351
|
+
outputShape: [histograms.shape[0]],
|
|
352
|
+
userCode: `
|
|
353
|
+
void main() {
|
|
354
|
+
int featureIndex = getOutputCoords();
|
|
355
|
+
|
|
356
|
+
int maxIndex = 0;
|
|
357
|
+
for (int i = 1; i < ${ ORIENTATION_NUM_BINS }; i++) {
|
|
358
|
+
if (getHistogram(featureIndex, i) > getHistogram(featureIndex, maxIndex)) {
|
|
359
|
+
maxIndex = i;
|
|
556
360
|
}
|
|
557
|
-
|
|
558
|
-
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
int prev = imod(maxIndex - 1 + ${ ORIENTATION_NUM_BINS }, ${ ORIENTATION_NUM_BINS });
|
|
364
|
+
int next = imod(maxIndex + 1, ${ ORIENTATION_NUM_BINS });
|
|
365
|
+
|
|
366
|
+
**
|
|
367
|
+
* Fit a quatratic to 3 points.The system of equations is:
|
|
368
|
+
*
|
|
369
|
+
* y0 = A * x0 ^ 2 + B * x0 + C
|
|
370
|
+
* y1 = A * x1 ^ 2 + B * x1 + C
|
|
371
|
+
* y2 = A * x2 ^ 2 + B * x2 + C
|
|
372
|
+
*
|
|
373
|
+
* This system of equations is solved for A, B, C.
|
|
374
|
+
*
|
|
375
|
+
float p10 = float(maxIndex - 1);
|
|
376
|
+
float p11 = getHistogram(featureIndex, prev);
|
|
377
|
+
float p20 = float(maxIndex);
|
|
378
|
+
float p21 = getHistogram(featureIndex, maxIndex);
|
|
379
|
+
float p30 = float(maxIndex + 1);
|
|
380
|
+
float p31 = getHistogram(featureIndex, next);
|
|
381
|
+
|
|
382
|
+
float d1 = (p30 - p20) * (p30 - p10);
|
|
383
|
+
float d2 = (p10 - p20) * (p30 - p10);
|
|
384
|
+
float d3 = p10 - p20;
|
|
385
|
+
|
|
386
|
+
// If any of the denominators are zero then, just use maxIndex.
|
|
387
|
+
float fbin = float(maxIndex);
|
|
388
|
+
if (abs(d1) > 0.00001 && abs(d2) > 0.00001 && abs(d3) > 0.00001) {
|
|
389
|
+
float a = p10 * p10;
|
|
390
|
+
float b = p20 * p20;
|
|
391
|
+
|
|
392
|
+
// Solve for the coefficients A,B,C
|
|
393
|
+
float A = ((p31 - p21) / d1) - ((p11 - p21) / d2);
|
|
394
|
+
float B = ((p11 - p21) + (A * (b - a))) / d3;
|
|
395
|
+
float C = p11 - (A * a) - (B * p10);
|
|
396
|
+
fbin = -B / (2. * A);
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
float an = 2.0 * ${ Math.PI } * (fbin + 0.5) / ${ ORIENTATION_NUM_BINS }.- ${ Math.PI };
|
|
400
|
+
setOutput(an);
|
|
401
|
+
}
|
|
402
|
+
`
|
|
403
|
+
}
|
|
404
|
+
this.kernelCaches.computeExtremaAngles = kernel;
|
|
405
|
+
} */
|
|
406
|
+
return tf.tidy(() => {
|
|
407
|
+
/* const program = this.kernelCaches.computeExtremaAngles;
|
|
408
|
+
return this._compileAndRun(program, [histograms]); */
|
|
409
|
+
return tf.engine().runKernel("ComputeExtremaAngles", { histograms });
|
|
410
|
+
});
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
// TODO: maybe can try just using average momentum, instead of histogram method. histogram might be overcomplicated
|
|
414
|
+
/**
|
|
415
|
+
*
|
|
416
|
+
* @param {tf.Tensor<tf.Rank>} prunedExtremasT
|
|
417
|
+
* @param {tf.Tensor<tf.Rank>[]} pyramidImagesT
|
|
418
|
+
* @returns
|
|
419
|
+
*/
|
|
420
|
+
_computeOrientationHistograms(prunedExtremasT, pyramidImagesT) {
|
|
421
|
+
const gaussianImagesT = [];
|
|
422
|
+
for (let i = 1; i < pyramidImagesT.length; i++) {
|
|
423
|
+
gaussianImagesT.push(pyramidImagesT[i][1]);
|
|
559
424
|
}
|
|
560
|
-
|
|
425
|
+
|
|
426
|
+
if (!this.tensorCaches.orientationHistograms) {
|
|
427
|
+
tf.tidy(() => {
|
|
428
|
+
const gwScale =
|
|
429
|
+
-1.0 /
|
|
430
|
+
(2 * ORIENTATION_GAUSSIAN_EXPANSION_FACTOR * ORIENTATION_GAUSSIAN_EXPANSION_FACTOR);
|
|
431
|
+
const radius = ORIENTATION_GAUSSIAN_EXPANSION_FACTOR * ORIENTATION_REGION_EXPANSION_FACTOR;
|
|
432
|
+
const radiusCeil = Math.ceil(radius);
|
|
433
|
+
|
|
434
|
+
const radialProperties = [];
|
|
435
|
+
for (let y = -radiusCeil; y <= radiusCeil; y++) {
|
|
436
|
+
for (let x = -radiusCeil; x <= radiusCeil; x++) {
|
|
437
|
+
const distanceSquare = x * x + y * y;
|
|
438
|
+
|
|
439
|
+
// may just assign w = 1 will do, this could be over complicated.
|
|
440
|
+
if (distanceSquare <= radius * radius) {
|
|
441
|
+
const _x = distanceSquare * gwScale;
|
|
442
|
+
// fast expontenial approx
|
|
443
|
+
let w =
|
|
444
|
+
(720 + _x * (720 + _x * (360 + _x * (120 + _x * (30 + _x * (6 + _x)))))) *
|
|
445
|
+
0.0013888888;
|
|
446
|
+
radialProperties.push([y, x, w]);
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
this.tensorCaches.orientationHistograms = {
|
|
452
|
+
radialPropertiesT: tf.keep(tf.tensor(radialProperties, [radialProperties.length, 3])),
|
|
453
|
+
};
|
|
454
|
+
});
|
|
561
455
|
}
|
|
562
|
-
|
|
563
|
-
}
|
|
564
|
-
`
|
|
565
|
-
}
|
|
456
|
+
const { radialPropertiesT } = this.tensorCaches.orientationHistograms;
|
|
566
457
|
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
});
|
|
581
|
-
});
|
|
582
|
-
}
|
|
583
|
-
|
|
584
|
-
// The histogram is smoothed with a Gaussian, with sigma = 1
|
|
585
|
-
_smoothHistograms(histograms) {
|
|
586
|
-
/* if (!this.kernelCaches.smoothHistograms) {
|
|
587
|
-
const kernel = {
|
|
588
|
-
variableNames: ['histogram'],
|
|
589
|
-
outputShape: [histograms.shape[0], ORIENTATION_NUM_BINS],
|
|
590
|
-
userCode: `
|
|
591
|
-
void main() {
|
|
592
|
-
ivec2 coords = getOutputCoords();
|
|
593
|
-
|
|
594
|
-
int featureIndex = coords[0];
|
|
595
|
-
int binIndex = coords[1];
|
|
596
|
-
|
|
597
|
-
int prevBin = imod(binIndex - 1 + ${ORIENTATION_NUM_BINS}, ${ORIENTATION_NUM_BINS});
|
|
598
|
-
int nextBin = imod(binIndex + 1, ${ORIENTATION_NUM_BINS});
|
|
599
|
-
|
|
600
|
-
float result = 0.274068619061197 * getHistogram(featureIndex, prevBin) + 0.451862761877606 * getHistogram(featureIndex, binIndex) + 0.274068619061197 * getHistogram(featureIndex, nextBin);
|
|
601
|
-
|
|
602
|
-
setOutput(result);
|
|
603
|
-
}
|
|
458
|
+
/* if (!this.kernelCaches.computeOrientationHistograms) {
|
|
459
|
+
const imageVariableNames = [];
|
|
460
|
+
for (let i = 1; i < pyramidImagesT.length; i++) {
|
|
461
|
+
imageVariableNames.push('image' + i);
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
let kernel1SubCodes = `float getPixel(int octave, int y, int x) {
|
|
465
|
+
`;
|
|
466
|
+
for (let i = 1; i < pyramidImagesT.length; i++) {
|
|
467
|
+
kernel1SubCodes += `
|
|
468
|
+
if (octave == ${ i }) {
|
|
469
|
+
return getImage${ i } (y, x);
|
|
470
|
+
}
|
|
604
471
|
`
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
} */
|
|
608
|
-
return tf.tidy(() => {
|
|
609
|
-
return tf.engine().runKernel("SmoothHistograms", { histograms }); //
|
|
610
|
-
/* const program = this.kernelCaches.smoothHistograms;
|
|
611
|
-
for (let i = 0; i < ORIENTATION_SMOOTHING_ITERATIONS; i++) {
|
|
612
|
-
histograms = this._compileAndRun(program, [histograms]);
|
|
613
|
-
}
|
|
614
|
-
return histograms; */
|
|
615
|
-
});
|
|
616
|
-
}
|
|
617
|
-
/**
|
|
618
|
-
*
|
|
619
|
-
* @param {number[][]} prunedExtremasList
|
|
620
|
-
* @param {tf.Tensor<tf.Rank>[]} dogPyramidImagesT
|
|
621
|
-
* @returns
|
|
622
|
-
*/
|
|
623
|
-
_computeLocalization(prunedExtremasList, dogPyramidImagesT) {
|
|
624
|
-
/* if (!this.kernelCaches.computeLocalization) {
|
|
625
|
-
const dogVariableNames = [];
|
|
626
|
-
|
|
627
|
-
let dogSubCodes = `float getPixel(int octave, int y, int x) {`;
|
|
628
|
-
for (let i = 1; i < dogPyramidImagesT.length; i++) { // extrema starts from second octave
|
|
629
|
-
dogVariableNames.push('image' + i);
|
|
630
|
-
dogSubCodes += `
|
|
631
|
-
if (octave == ${i}) {
|
|
632
|
-
return getImage${i}(y, x);
|
|
633
|
-
}
|
|
634
|
-
`;
|
|
635
|
-
}
|
|
636
|
-
dogSubCodes += `}`;
|
|
637
|
-
|
|
638
|
-
const kernel = {
|
|
639
|
-
variableNames: [...dogVariableNames, 'extrema'],
|
|
640
|
-
outputShape: [prunedExtremasList.length, 3, 3], // 3x3 pixels around the extrema
|
|
641
|
-
userCode: `
|
|
642
|
-
${dogSubCodes}
|
|
643
|
-
|
|
644
|
-
void main() {
|
|
645
|
-
ivec3 coords = getOutputCoords();
|
|
646
|
-
int featureIndex = coords[0];
|
|
647
|
-
float score = getExtrema(featureIndex, 0);
|
|
648
|
-
if (score == 0.0) {
|
|
649
|
-
return;
|
|
650
|
-
}
|
|
651
|
-
|
|
652
|
-
int dy = coords[1]-1;
|
|
653
|
-
int dx = coords[2]-1;
|
|
654
|
-
int octave = int(getExtrema(featureIndex, 1));
|
|
655
|
-
int y = int(getExtrema(featureIndex, 2));
|
|
656
|
-
int x = int(getExtrema(featureIndex, 3));
|
|
657
|
-
setOutput(getPixel(octave, y+dy, x+dx));
|
|
658
|
-
}
|
|
659
|
-
`
|
|
660
|
-
}
|
|
661
|
-
|
|
662
|
-
this.kernelCaches.computeLocalization = [kernel];
|
|
663
|
-
} */
|
|
664
|
-
|
|
665
|
-
return tf.tidy(() => {
|
|
666
|
-
//const program = this.kernelCaches.computeLocalization[0];
|
|
667
|
-
//const prunedExtremasT = tf.tensor(prunedExtremasList, [prunedExtremasList.length, prunedExtremasList[0].length], 'int32');
|
|
668
|
-
|
|
669
|
-
const pixelsT = tf
|
|
670
|
-
.engine()
|
|
671
|
-
.runKernel("ComputeLocalization", { prunedExtremasList, dogPyramidImagesT }); //this._compileAndRun(program, [...dogPyramidImagesT.slice(1), prunedExtremasT]);
|
|
672
|
-
const pixels = pixelsT.arraySync();
|
|
673
|
-
|
|
674
|
-
const result = [];
|
|
675
|
-
for (let i = 0; i < pixels.length; i++) {
|
|
676
|
-
result.push([]);
|
|
677
|
-
for (let j = 0; j < pixels[i].length; j++) {
|
|
678
|
-
result[i].push([]);
|
|
679
|
-
}
|
|
680
|
-
}
|
|
681
|
-
|
|
682
|
-
const localizedExtremas = [];
|
|
683
|
-
for (let i = 0; i < prunedExtremasList.length; i++) {
|
|
684
|
-
localizedExtremas[i] = [
|
|
685
|
-
prunedExtremasList[i][0],
|
|
686
|
-
prunedExtremasList[i][1],
|
|
687
|
-
prunedExtremasList[i][2],
|
|
688
|
-
prunedExtremasList[i][3],
|
|
689
|
-
];
|
|
690
|
-
}
|
|
691
|
-
|
|
692
|
-
for (let i = 0; i < localizedExtremas.length; i++) {
|
|
693
|
-
if (localizedExtremas[i][0] === 0) {
|
|
694
|
-
continue;
|
|
695
|
-
}
|
|
696
|
-
const pixel = pixels[i];
|
|
697
|
-
const dx = 0.5 * (pixel[1][2] - pixel[1][0]);
|
|
698
|
-
const dy = 0.5 * (pixel[2][1] - pixel[0][1]);
|
|
699
|
-
const dxx = pixel[1][2] + pixel[1][0] - 2 * pixel[1][1];
|
|
700
|
-
const dyy = pixel[2][1] + pixel[0][1] - 2 * pixel[1][1];
|
|
701
|
-
const dxy = 0.25 * (pixel[0][0] + pixel[2][2] - pixel[0][2] - pixel[2][0]);
|
|
702
|
-
|
|
703
|
-
const det = dxx * dyy - dxy * dxy;
|
|
704
|
-
const ux = (dyy * -dx + -dxy * -dy) / det;
|
|
705
|
-
const uy = (-dxy * -dx + dxx * -dy) / det;
|
|
706
|
-
|
|
707
|
-
const newY = localizedExtremas[i][2] + uy;
|
|
708
|
-
const newX = localizedExtremas[i][3] + ux;
|
|
709
|
-
|
|
710
|
-
if (Math.abs(det) < 0.0001) {
|
|
711
|
-
continue;
|
|
712
|
-
}
|
|
713
|
-
|
|
714
|
-
localizedExtremas[i][2] = newY;
|
|
715
|
-
localizedExtremas[i][3] = newX;
|
|
716
|
-
}
|
|
717
|
-
return tf.tensor(
|
|
718
|
-
localizedExtremas,
|
|
719
|
-
[localizedExtremas.length, localizedExtremas[0].length],
|
|
720
|
-
"float32",
|
|
721
|
-
);
|
|
722
|
-
});
|
|
723
|
-
}
|
|
724
|
-
|
|
725
|
-
// faster to do it in CPU
|
|
726
|
-
// if we do in gpu, we probably need to use tf.topk(), which seems to be run in CPU anyway (no gpu operation for that)
|
|
727
|
-
// TODO: research adapative maximum supression method
|
|
728
|
-
/**
|
|
729
|
-
*
|
|
730
|
-
* @param {tf.Tensor<tf.Rank>[]} extremasResultsT
|
|
731
|
-
* @returns
|
|
732
|
-
*/
|
|
733
|
-
_applyPrune(extremasResultsT) {
|
|
734
|
-
const nBuckets = NUM_BUCKETS_PER_DIMENSION * NUM_BUCKETS_PER_DIMENSION;
|
|
735
|
-
const nFeatures = MAX_FEATURES_PER_BUCKET;
|
|
736
|
-
/*
|
|
737
|
-
if (!this.kernelCaches.applyPrune) {
|
|
738
|
-
const reductionKernels = [];
|
|
472
|
+
}
|
|
473
|
+
kernel1SubCodes += `} `;
|
|
739
474
|
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
475
|
+
const kernel1 = {
|
|
476
|
+
variableNames: [...imageVariableNames, 'extrema', 'radial'],
|
|
477
|
+
outputShape: [prunedExtremasT.shape[0], radialPropertiesT.shape[0], 2], // last dimension: [fbin, magnitude]
|
|
478
|
+
userCode: `
|
|
479
|
+
${ kernel1SubCodes }
|
|
480
|
+
|
|
481
|
+
void main() {
|
|
482
|
+
ivec3 coords = getOutputCoords();
|
|
483
|
+
int featureIndex = coords[0];
|
|
484
|
+
int radialIndex = coords[1];
|
|
485
|
+
int propertyIndex = coords[2];
|
|
745
486
|
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
userCode: `
|
|
750
|
-
void main() {
|
|
751
|
-
ivec2 coords = getOutputCoords();
|
|
752
|
-
int y = coords[0] * 2;
|
|
753
|
-
int x = coords[1] * 2;
|
|
487
|
+
int radialY = int(getRadial(radialIndex, 0));
|
|
488
|
+
int radialX = int(getRadial(radialIndex, 1));
|
|
489
|
+
float radialW = getRadial(radialIndex, 2);
|
|
754
490
|
|
|
755
|
-
|
|
756
|
-
|
|
491
|
+
int octave = int(getExtrema(featureIndex, 1));
|
|
492
|
+
int y = int(getExtrema(featureIndex, 2));
|
|
493
|
+
int x = int(getExtrema(featureIndex, 3));
|
|
757
494
|
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
values = getExtrema(y+1, x);
|
|
761
|
-
}
|
|
762
|
-
else if (getExtrema(y, x+1) != 0.0) {
|
|
763
|
-
location = 2.0;
|
|
764
|
-
values = getExtrema(y, x+1);
|
|
765
|
-
}
|
|
766
|
-
else if (getExtrema(y+1, x+1) != 0.0) {
|
|
767
|
-
location = 3.0;
|
|
768
|
-
values = getExtrema(y+1, x+1);
|
|
769
|
-
}
|
|
495
|
+
int xp = x + radialX;
|
|
496
|
+
int yp = y + radialY;
|
|
770
497
|
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
498
|
+
float dy = getPixel(octave, yp + 1, xp) - getPixel(octave, yp - 1, xp);
|
|
499
|
+
float dx = getPixel(octave, yp, xp + 1) - getPixel(octave, yp, xp - 1);
|
|
500
|
+
|
|
501
|
+
if (propertyIndex == 0) {
|
|
502
|
+
// be careful that atan(0, 0) gives 1.57 instead of 0 (different from js), but doesn't matter here, coz magnitude is 0
|
|
503
|
+
|
|
504
|
+
float angle = atan(dy, dx) + ${ Math.PI };
|
|
505
|
+
float fbin = angle * ${ ORIENTATION_NUM_BINS }. * ${ oneOver2PI };
|
|
506
|
+
setOutput(fbin);
|
|
507
|
+
return;
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
if (propertyIndex == 1) {
|
|
511
|
+
float mag = sqrt(dx * dx + dy * dy);
|
|
512
|
+
float magnitude = radialW * mag;
|
|
513
|
+
setOutput(magnitude);
|
|
514
|
+
return;
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
`
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
const kernel2 = {
|
|
522
|
+
variableNames: ['fbinMag'],
|
|
523
|
+
outputShape: [prunedExtremasT.shape[0], ORIENTATION_NUM_BINS],
|
|
524
|
+
userCode: `
|
|
525
|
+
void main() {
|
|
526
|
+
ivec2 coords = getOutputCoords();
|
|
527
|
+
int featureIndex = coords[0];
|
|
528
|
+
int binIndex = coords[1];
|
|
529
|
+
|
|
530
|
+
float sum = 0.;
|
|
531
|
+
for (int i = 0; i < ${ radialPropertiesT.shape[0] }; i++) {
|
|
532
|
+
float fbin = getFbinMag(featureIndex, i, 0);
|
|
533
|
+
int bin = int(floor(fbin - 0.5));
|
|
534
|
+
int b1 = imod(bin + ${ ORIENTATION_NUM_BINS }, ${ ORIENTATION_NUM_BINS });
|
|
535
|
+
int b2 = imod(bin + 1 + ${ ORIENTATION_NUM_BINS }, ${ ORIENTATION_NUM_BINS });
|
|
536
|
+
|
|
537
|
+
if (b1 == binIndex || b2 == binIndex) {
|
|
538
|
+
float magnitude = getFbinMag(featureIndex, i, 1);
|
|
539
|
+
float w2 = fbin - float(bin) - 0.5;
|
|
540
|
+
float w1 = w2 * -1. + 1.;
|
|
541
|
+
|
|
542
|
+
if (b1 == binIndex) {
|
|
543
|
+
sum += w1 * magnitude;
|
|
544
|
+
}
|
|
545
|
+
if (b2 == binIndex) {
|
|
546
|
+
sum += w2 * magnitude;
|
|
778
547
|
}
|
|
779
|
-
reductionKernels.push(kernel);
|
|
780
|
-
}
|
|
781
|
-
this.kernelCaches.applyPrune = {reductionKernels};
|
|
782
|
-
}
|
|
783
|
-
*/
|
|
784
|
-
// combine results into a tensor of:
|
|
785
|
-
// nBuckets x nFeatures x [score, octave, y, x]
|
|
786
|
-
const curAbsScores = [];
|
|
787
|
-
/** @type {number[][][]} */
|
|
788
|
-
const result = [];
|
|
789
|
-
for (let i = 0; i < nBuckets; i++) {
|
|
790
|
-
result.push([]);
|
|
791
|
-
curAbsScores.push([]);
|
|
792
|
-
for (let j = 0; j < nFeatures; j++) {
|
|
793
|
-
result[i].push([0, 0, 0, 0]);
|
|
794
|
-
curAbsScores[i].push(0);
|
|
795
|
-
}
|
|
796
|
-
}
|
|
797
|
-
|
|
798
|
-
tf.tidy(() => {
|
|
799
|
-
//const {reductionKernels} = this.kernelCaches.applyPrune;
|
|
800
|
-
|
|
801
|
-
for (let k = 0; k < extremasResultsT.length; k++) {
|
|
802
|
-
//const program = reductionKernels[k];
|
|
803
|
-
//const reducedT = this._compileAndRun(program, [extremasResultsT[k]]);
|
|
804
|
-
const reducedT = tf
|
|
805
|
-
.engine()
|
|
806
|
-
.runKernel("ExtremaReduction", { extremasResultT: extremasResultsT[k] });
|
|
807
|
-
const octave = k + 1; // extrema starts from second octave
|
|
808
|
-
|
|
809
|
-
const reduced = reducedT.arraySync();
|
|
810
|
-
const height = reducedT.shape[0];
|
|
811
|
-
const width = reducedT.shape[1];
|
|
812
|
-
|
|
813
|
-
const bucketWidth = (width * 2) / NUM_BUCKETS_PER_DIMENSION;
|
|
814
|
-
const bucketHeight = (height * 2) / NUM_BUCKETS_PER_DIMENSION;
|
|
815
|
-
|
|
816
|
-
for (let j = 0; j < height; j++) {
|
|
817
|
-
for (let i = 0; i < width; i++) {
|
|
818
|
-
const encoded = reduced[j][i];
|
|
819
|
-
if (encoded == 0) continue;
|
|
820
|
-
|
|
821
|
-
const score = encoded % 1000;
|
|
822
|
-
const loc = Math.floor(Math.abs(encoded) / 1000);
|
|
823
|
-
const x = i * 2 + (loc === 2 || loc === 3 ? 1 : 0);
|
|
824
|
-
const y = j * 2 + (loc === 1 || loc === 3 ? 1 : 0);
|
|
825
|
-
|
|
826
|
-
const bucketX = Math.floor(x / bucketWidth);
|
|
827
|
-
const bucketY = Math.floor(y / bucketHeight);
|
|
828
|
-
const bucket = bucketY * NUM_BUCKETS_PER_DIMENSION + bucketX;
|
|
829
|
-
|
|
830
|
-
const absScore = Math.abs(score);
|
|
831
|
-
|
|
832
|
-
let tIndex = nFeatures;
|
|
833
|
-
while (tIndex >= 1 && absScore > curAbsScores[bucket][tIndex - 1]) {
|
|
834
|
-
tIndex -= 1;
|
|
835
|
-
}
|
|
836
|
-
|
|
837
|
-
if (tIndex < nFeatures) {
|
|
838
|
-
for (let t = nFeatures - 1; t >= tIndex + 1; t--) {
|
|
839
|
-
curAbsScores[bucket][t] = curAbsScores[bucket][t - 1];
|
|
840
|
-
result[bucket][t][0] = result[bucket][t - 1][0];
|
|
841
|
-
result[bucket][t][1] = result[bucket][t - 1][1];
|
|
842
|
-
result[bucket][t][2] = result[bucket][t - 1][2];
|
|
843
|
-
result[bucket][t][3] = result[bucket][t - 1][3];
|
|
844
|
-
}
|
|
845
|
-
curAbsScores[bucket][tIndex] = absScore;
|
|
846
|
-
result[bucket][tIndex][0] = score;
|
|
847
|
-
result[bucket][tIndex][1] = octave;
|
|
848
|
-
result[bucket][tIndex][2] = y;
|
|
849
|
-
result[bucket][tIndex][3] = x;
|
|
850
|
-
}
|
|
851
|
-
} //for j<height
|
|
852
|
-
} //for i<width
|
|
853
|
-
}
|
|
854
|
-
});
|
|
855
|
-
|
|
856
|
-
// combine all buckets into a single list
|
|
857
|
-
const list = [];
|
|
858
|
-
for (let i = 0; i < nBuckets; i++) {
|
|
859
|
-
for (let j = 0; j < nFeatures; j++) {
|
|
860
|
-
list.push(result[i][j]);
|
|
861
|
-
}
|
|
862
|
-
}
|
|
863
|
-
return list;
|
|
864
|
-
}
|
|
865
|
-
|
|
866
|
-
_buildExtremas(image0, image1, image2) {
|
|
867
|
-
/* const imageHeight = image1.shape[0];
|
|
868
|
-
const imageWidth = image1.shape[1];
|
|
869
|
-
|
|
870
|
-
const kernelKey = 'w' + imageWidth;
|
|
871
|
-
|
|
872
|
-
if (!this.kernelCaches.buildExtremas) {
|
|
873
|
-
this.kernelCaches.buildExtremas = {};
|
|
874
|
-
}
|
|
875
|
-
if (!this.kernelCaches.buildExtremas[kernelKey]) {
|
|
876
|
-
const kernel = {
|
|
877
|
-
variableNames: ['image0', 'image1', 'image2'],
|
|
878
|
-
outputShape: [imageHeight, imageWidth],
|
|
879
|
-
userCode: `
|
|
880
|
-
void main() {
|
|
881
|
-
ivec2 coords = getOutputCoords();
|
|
882
|
-
|
|
883
|
-
int y = coords[0];
|
|
884
|
-
int x = coords[1];
|
|
885
|
-
|
|
886
|
-
float value = getImage1(y, x);
|
|
887
|
-
|
|
888
|
-
// Step 1: find local maxima/minima
|
|
889
|
-
if (value * value < ${LAPLACIAN_SQR_THRESHOLD}.) {
|
|
890
|
-
setOutput(0.);
|
|
891
|
-
return;
|
|
892
|
-
}
|
|
893
|
-
if (y < ${FREAK_EXPANSION_FACTOR} || y > ${imageHeight - 1 - FREAK_EXPANSION_FACTOR}) {
|
|
894
|
-
setOutput(0.);
|
|
895
|
-
return;
|
|
896
|
-
}
|
|
897
|
-
if (x < ${FREAK_EXPANSION_FACTOR} || x > ${imageWidth - 1 - FREAK_EXPANSION_FACTOR}) {
|
|
898
|
-
setOutput(0.);
|
|
899
|
-
return;
|
|
900
548
|
}
|
|
549
|
+
}
|
|
550
|
+
setOutput(sum);
|
|
551
|
+
}
|
|
552
|
+
`
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
this.kernelCaches.computeOrientationHistograms = [kernel1, kernel2];
|
|
556
|
+
} */
|
|
901
557
|
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
558
|
+
return tf.tidy(() => {
|
|
559
|
+
/* const [program1, program2] = this.kernelCaches.computeOrientationHistograms;
|
|
560
|
+
const result1 = this._compileAndRun(program1, [...gaussianImagesT, prunedExtremasT, radialPropertiesT]);
|
|
561
|
+
const result2 = this._compileAndRun(program2, [result1]);
|
|
562
|
+
return result2;*/
|
|
563
|
+
return tf.engine().runKernel("ComputeOrientationHistograms", {
|
|
564
|
+
gaussianImagesT,
|
|
565
|
+
prunedExtremasT,
|
|
566
|
+
radialPropertiesT,
|
|
567
|
+
pyramidImagesLength: pyramidImagesT.length,
|
|
568
|
+
});
|
|
569
|
+
});
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
// The histogram is smoothed with a Gaussian, with sigma = 1
|
|
573
|
+
_smoothHistograms(histograms) {
|
|
574
|
+
/* if (!this.kernelCaches.smoothHistograms) {
|
|
575
|
+
const kernel = {
|
|
576
|
+
variableNames: ['histogram'],
|
|
577
|
+
outputShape: [histograms.shape[0], ORIENTATION_NUM_BINS],
|
|
578
|
+
userCode: `
|
|
579
|
+
void main() {
|
|
580
|
+
ivec2 coords = getOutputCoords();
|
|
581
|
+
|
|
582
|
+
int featureIndex = coords[0];
|
|
583
|
+
int binIndex = coords[1];
|
|
584
|
+
|
|
585
|
+
int prevBin = imod(binIndex - 1 + ${ ORIENTATION_NUM_BINS }, ${ ORIENTATION_NUM_BINS });
|
|
586
|
+
int nextBin = imod(binIndex + 1, ${ ORIENTATION_NUM_BINS });
|
|
587
|
+
|
|
588
|
+
float result = 0.274068619061197 * getHistogram(featureIndex, prevBin) + 0.451862761877606 * getHistogram(featureIndex, binIndex) + 0.274068619061197 * getHistogram(featureIndex, nextBin);
|
|
909
589
|
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
590
|
+
setOutput(result);
|
|
591
|
+
}
|
|
592
|
+
`
|
|
593
|
+
}
|
|
594
|
+
this.kernelCaches.smoothHistograms = kernel;
|
|
595
|
+
} */
|
|
596
|
+
return tf.tidy(() => {
|
|
597
|
+
return tf.engine().runKernel("SmoothHistograms", { histograms }); //
|
|
598
|
+
/* const program = this.kernelCaches.smoothHistograms;
|
|
599
|
+
for (let i = 0; i < ORIENTATION_SMOOTHING_ITERATIONS; i++) {
|
|
600
|
+
histograms = this._compileAndRun(program, [histograms]);
|
|
601
|
+
}
|
|
602
|
+
return histograms; */
|
|
603
|
+
});
|
|
604
|
+
}
|
|
605
|
+
/**
|
|
606
|
+
*
|
|
607
|
+
* @param {number[][]} prunedExtremasList
|
|
608
|
+
* @param {tf.Tensor<tf.Rank>[]} dogPyramidImagesT
|
|
609
|
+
* @returns
|
|
610
|
+
*/
|
|
611
|
+
_computeLocalization(prunedExtremasList, dogPyramidImagesT) {
|
|
612
|
+
/* if (!this.kernelCaches.computeLocalization) {
|
|
613
|
+
const dogVariableNames = [];
|
|
614
|
+
|
|
615
|
+
let dogSubCodes = `float getPixel(int octave, int y, int x) {
|
|
616
|
+
`;
|
|
617
|
+
for (let i = 1; i < dogPyramidImagesT.length; i++) { // extrema starts from second octave
|
|
618
|
+
dogVariableNames.push('image' + i);
|
|
619
|
+
dogSubCodes += `
|
|
620
|
+
if (octave == ${ i }) {
|
|
621
|
+
return getImage${ i } (y, x);
|
|
622
|
+
}
|
|
623
|
+
`;
|
|
624
|
+
}
|
|
625
|
+
dogSubCodes += `} `;
|
|
626
|
+
|
|
627
|
+
const kernel = {
|
|
628
|
+
variableNames: [...dogVariableNames, 'extrema'],
|
|
629
|
+
outputShape: [prunedExtremasList.length, 3, 3], // 3x3 pixels around the extrema
|
|
630
|
+
userCode: `
|
|
631
|
+
${ dogSubCodes }
|
|
632
|
+
|
|
633
|
+
void main() {
|
|
634
|
+
ivec3 coords = getOutputCoords();
|
|
635
|
+
int featureIndex = coords[0];
|
|
636
|
+
float score = getExtrema(featureIndex, 0);
|
|
637
|
+
if (score == 0.0) {
|
|
638
|
+
return;
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
int dy = coords[1] - 1;
|
|
642
|
+
int dx = coords[2] - 1;
|
|
643
|
+
int octave = int(getExtrema(featureIndex, 1));
|
|
644
|
+
int y = int(getExtrema(featureIndex, 2));
|
|
645
|
+
int x = int(getExtrema(featureIndex, 3));
|
|
646
|
+
setOutput(getPixel(octave, y + dy, x + dx));
|
|
647
|
+
}
|
|
648
|
+
`
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
this.kernelCaches.computeLocalization = [kernel];
|
|
652
|
+
} */
|
|
653
|
+
|
|
654
|
+
return tf.tidy(() => {
|
|
655
|
+
//const program = this.kernelCaches.computeLocalization[0];
|
|
656
|
+
//const prunedExtremasT = tf.tensor(prunedExtremasList, [prunedExtremasList.length, prunedExtremasList[0].length], 'int32');
|
|
657
|
+
|
|
658
|
+
const pixelsT = tf
|
|
659
|
+
.engine()
|
|
660
|
+
.runKernel("ComputeLocalization", { prunedExtremasList, dogPyramidImagesT }); //this._compileAndRun(program, [...dogPyramidImagesT.slice(1), prunedExtremasT]);
|
|
661
|
+
const pixels = pixelsT.arraySync();
|
|
662
|
+
|
|
663
|
+
const result = [];
|
|
664
|
+
for (let i = 0; i < pixels.length; i++) {
|
|
665
|
+
result.push([]);
|
|
666
|
+
for (let j = 0; j < pixels[i].length; j++) {
|
|
667
|
+
result[i].push([]);
|
|
668
|
+
}
|
|
669
|
+
}
|
|
918
670
|
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
671
|
+
const localizedExtremas = [];
|
|
672
|
+
for (let i = 0; i < prunedExtremasList.length; i++) {
|
|
673
|
+
localizedExtremas[i] = [
|
|
674
|
+
prunedExtremasList[i][0],
|
|
675
|
+
prunedExtremasList[i][1],
|
|
676
|
+
prunedExtremasList[i][2],
|
|
677
|
+
prunedExtremasList[i][3],
|
|
678
|
+
];
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
for (let i = 0; i < localizedExtremas.length; i++) {
|
|
682
|
+
if (localizedExtremas[i][0] === 0) {
|
|
683
|
+
continue;
|
|
684
|
+
}
|
|
685
|
+
const pixel = pixels[i];
|
|
686
|
+
const dx = 0.5 * (pixel[1][2] - pixel[1][0]);
|
|
687
|
+
const dy = 0.5 * (pixel[2][1] - pixel[0][1]);
|
|
688
|
+
const dxx = pixel[1][2] + pixel[1][0] - 2 * pixel[1][1];
|
|
689
|
+
const dyy = pixel[2][1] + pixel[0][1] - 2 * pixel[1][1];
|
|
690
|
+
const dxy = 0.25 * (pixel[0][0] + pixel[2][2] - pixel[0][2] - pixel[2][0]);
|
|
691
|
+
|
|
692
|
+
const det = dxx * dyy - dxy * dxy;
|
|
693
|
+
const ux = (dyy * -dx + -dxy * -dy) / det;
|
|
694
|
+
const uy = (-dxy * -dx + dxx * -dy) / det;
|
|
695
|
+
|
|
696
|
+
const newY = localizedExtremas[i][2] + uy;
|
|
697
|
+
const newX = localizedExtremas[i][3] + ux;
|
|
698
|
+
|
|
699
|
+
if (Math.abs(det) < 0.0001) {
|
|
700
|
+
continue;
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
localizedExtremas[i][2] = newY;
|
|
704
|
+
localizedExtremas[i][3] = newX;
|
|
705
|
+
}
|
|
706
|
+
return tf.tensor(
|
|
707
|
+
localizedExtremas,
|
|
708
|
+
[localizedExtremas.length, localizedExtremas[0].length],
|
|
709
|
+
"float32",
|
|
710
|
+
);
|
|
711
|
+
});
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
// faster to do it in CPU
|
|
715
|
+
// if we do in gpu, we probably need to use tf.topk(), which seems to be run in CPU anyway (no gpu operation for that)
|
|
716
|
+
// TODO: research adapative maximum supression method
|
|
717
|
+
/**
|
|
718
|
+
*
|
|
719
|
+
* @param {tf.Tensor<tf.Rank>[]} extremasResultsT
|
|
720
|
+
* @returns
|
|
721
|
+
*/
|
|
722
|
+
_applyPrune(extremasResultsT) {
|
|
723
|
+
const nBuckets = NUM_BUCKETS_PER_DIMENSION * NUM_BUCKETS_PER_DIMENSION;
|
|
724
|
+
const nFeatures = MAX_FEATURES_PER_BUCKET;
|
|
725
|
+
/*
|
|
726
|
+
if (!this.kernelCaches.applyPrune) {
|
|
727
|
+
const reductionKernels = [];
|
|
728
|
+
|
|
729
|
+
// to reduce to amount of data that need to sync back to CPU by 4 times, we apply this trick:
|
|
730
|
+
// the fact that there is not possible to have consecutive maximum/minimum, we can safe combine 4 pixels into 1
|
|
731
|
+
for (let k = 0; k < extremasResultsT.length; k++) {
|
|
732
|
+
const extremaHeight = extremasResultsT[k].shape[0];
|
|
733
|
+
const extremaWidth = extremasResultsT[k].shape[1];
|
|
734
|
+
|
|
735
|
+
const kernel = {
|
|
736
|
+
variableNames: ['extrema'],
|
|
737
|
+
outputShape: [Math.floor(extremaHeight/2), Math.floor(extremaWidth/2)],
|
|
738
|
+
userCode: `
|
|
739
|
+
void main() {
|
|
740
|
+
ivec2 coords = getOutputCoords();
|
|
741
|
+
int y = coords[0] * 2;
|
|
742
|
+
int x = coords[1] * 2;
|
|
743
|
+
|
|
744
|
+
float location = 0.0;
|
|
745
|
+
float values = getExtrema(y, x);
|
|
746
|
+
|
|
747
|
+
if (getExtrema(y + 1, x) != 0.0) {
|
|
748
|
+
location = 1.0;
|
|
749
|
+
values = getExtrema(y + 1, x);
|
|
750
|
+
}
|
|
751
|
+
else if (getExtrema(y, x + 1) != 0.0) {
|
|
752
|
+
location = 2.0;
|
|
753
|
+
values = getExtrema(y, x + 1);
|
|
754
|
+
}
|
|
755
|
+
else if (getExtrema(y + 1, x + 1) != 0.0) {
|
|
756
|
+
location = 3.0;
|
|
757
|
+
values = getExtrema(y + 1, x + 1);
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
if (values < 0.0) {
|
|
761
|
+
setOutput(location * -1000.0 + values);
|
|
762
|
+
} else {
|
|
763
|
+
setOutput(location * 1000.0 + values);
|
|
764
|
+
}
|
|
765
|
+
}
|
|
766
|
+
`
|
|
767
|
+
}
|
|
768
|
+
reductionKernels.push(kernel);
|
|
769
|
+
}
|
|
770
|
+
this.kernelCaches.applyPrune = {reductionKernels};
|
|
771
|
+
}
|
|
772
|
+
*/
|
|
773
|
+
// combine results into a tensor of:
|
|
774
|
+
// nBuckets x nFeatures x [score, octave, y, x]
|
|
775
|
+
const curAbsScores = [];
|
|
776
|
+
/** @type {number[][][]} */
|
|
777
|
+
const result = [];
|
|
778
|
+
for (let i = 0; i < nBuckets; i++) {
|
|
779
|
+
result.push([]);
|
|
780
|
+
curAbsScores.push([]);
|
|
781
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
782
|
+
result[i].push([0, 0, 0, 0]);
|
|
783
|
+
curAbsScores[i].push(0);
|
|
784
|
+
}
|
|
922
785
|
}
|
|
923
786
|
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
float dyy = getImage1(y+1, x) + getImage1(y-1, x) - 2. * getImage1(y, x);
|
|
927
|
-
float dxy = 0.25 * (getImage1(y-1,x-1) + getImage1(y+1,x+1) - getImage1(y-1,x+1) - getImage1(y+1,x-1));
|
|
787
|
+
tf.tidy(() => {
|
|
788
|
+
//const {reductionKernels} = this.kernelCaches.applyPrune;
|
|
928
789
|
|
|
929
|
-
|
|
790
|
+
for (let k = 0; k < extremasResultsT.length; k++) {
|
|
791
|
+
//const program = reductionKernels[k];
|
|
792
|
+
//const reducedT = this._compileAndRun(program, [extremasResultsT[k]]);
|
|
793
|
+
const reducedT = tf
|
|
794
|
+
.engine()
|
|
795
|
+
.runKernel("ExtremaReduction", { extremasResultT: extremasResultsT[k] });
|
|
796
|
+
const octave = k + 1; // extrema starts from second octave
|
|
930
797
|
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
}
|
|
798
|
+
const reduced = reducedT.arraySync();
|
|
799
|
+
const height = reducedT.shape[0];
|
|
800
|
+
const width = reducedT.shape[1];
|
|
935
801
|
|
|
936
|
-
|
|
802
|
+
const bucketWidth = (width * 2) / NUM_BUCKETS_PER_DIMENSION;
|
|
803
|
+
const bucketHeight = (height * 2) / NUM_BUCKETS_PER_DIMENSION;
|
|
937
804
|
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
setOutput(getImage1(y,x));
|
|
943
|
-
}
|
|
944
|
-
`
|
|
945
|
-
};
|
|
946
|
-
this.kernelCaches.buildExtremas[kernelKey] = kernel;
|
|
947
|
-
} */
|
|
948
|
-
|
|
949
|
-
return tf.tidy(() => {
|
|
950
|
-
return tf.engine().runKernel("BuildExtremas", { image0, image1, image2 });
|
|
951
|
-
/* const program = this.kernelCaches.buildExtremas[kernelKey];
|
|
952
|
-
image0 = this._downsampleBilinear(image0);
|
|
953
|
-
image2 = this._upsampleBilinear(image2, image1); */
|
|
954
|
-
//this._compileAndRun(program, [image0, image1, image2]);
|
|
955
|
-
//return this._runWebGLProgram(program, [image0, image1, image2], 'float32');
|
|
956
|
-
});
|
|
957
|
-
}
|
|
958
|
-
/**
|
|
959
|
-
*
|
|
960
|
-
* @param {tf.Tensor<tf.Rank>} image1
|
|
961
|
-
* @param {tf.Tensor<tf.Rank>} image2
|
|
962
|
-
* @returns
|
|
963
|
-
*/
|
|
964
|
-
_differenceImageBinomial(image1, image2) {
|
|
965
|
-
return tf.tidy(() => {
|
|
966
|
-
return image1.sub(image2);
|
|
967
|
-
});
|
|
968
|
-
}
|
|
969
|
-
|
|
970
|
-
// 4th order binomail filter [1,4,6,4,1] X [1,4,6,4,1]
|
|
971
|
-
_applyFilter(image) {
|
|
972
|
-
/* const imageHeight = image.shape[0];
|
|
973
|
-
const imageWidth = image.shape[1];
|
|
974
|
-
|
|
975
|
-
const kernelKey = 'w' + imageWidth;
|
|
976
|
-
if (!this.kernelCaches.applyFilter) {
|
|
977
|
-
this.kernelCaches.applyFilter = {};
|
|
978
|
-
}
|
|
805
|
+
for (let j = 0; j < height; j++) {
|
|
806
|
+
for (let i = 0; i < width; i++) {
|
|
807
|
+
const encoded = reduced[j][i];
|
|
808
|
+
if (encoded == 0) continue;
|
|
979
809
|
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
userCode: `
|
|
985
|
-
void main() {
|
|
986
|
-
ivec2 coords = getOutputCoords();
|
|
987
|
-
|
|
988
|
-
float sum = getP(coords[0], coords[1]-2);
|
|
989
|
-
sum += getP(coords[0], coords[1]-1) * 4.;
|
|
990
|
-
sum += getP(coords[0], coords[1]) * 6.;
|
|
991
|
-
sum += getP(coords[0], coords[1]+1) * 4.;
|
|
992
|
-
sum += getP(coords[0], coords[1]+2);
|
|
993
|
-
setOutput(sum);
|
|
994
|
-
}
|
|
995
|
-
`
|
|
996
|
-
};
|
|
810
|
+
const score = encoded % 1000;
|
|
811
|
+
const loc = Math.floor(Math.abs(encoded) / 1000);
|
|
812
|
+
const x = i * 2 + (loc === 2 || loc === 3 ? 1 : 0);
|
|
813
|
+
const y = j * 2 + (loc === 1 || loc === 3 ? 1 : 0);
|
|
997
814
|
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
const
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
this.kernelCaches.upsampleBilinear = {};
|
|
815
|
+
const bucketX = Math.floor(x / bucketWidth);
|
|
816
|
+
const bucketY = Math.floor(y / bucketHeight);
|
|
817
|
+
const bucket = bucketY * NUM_BUCKETS_PER_DIMENSION + bucketX;
|
|
818
|
+
|
|
819
|
+
const absScore = Math.abs(score);
|
|
820
|
+
|
|
821
|
+
let tIndex = nFeatures;
|
|
822
|
+
while (tIndex >= 1 && absScore > curAbsScores[bucket][tIndex - 1]) {
|
|
823
|
+
tIndex -= 1;
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
if (tIndex < nFeatures) {
|
|
827
|
+
for (let t = nFeatures - 1; t >= tIndex + 1; t--) {
|
|
828
|
+
curAbsScores[bucket][t] = curAbsScores[bucket][t - 1];
|
|
829
|
+
result[bucket][t][0] = result[bucket][t - 1][0];
|
|
830
|
+
result[bucket][t][1] = result[bucket][t - 1][1];
|
|
831
|
+
result[bucket][t][2] = result[bucket][t - 1][2];
|
|
832
|
+
result[bucket][t][3] = result[bucket][t - 1][3];
|
|
833
|
+
}
|
|
834
|
+
curAbsScores[bucket][tIndex] = absScore;
|
|
835
|
+
result[bucket][tIndex][0] = score;
|
|
836
|
+
result[bucket][tIndex][1] = octave;
|
|
837
|
+
result[bucket][tIndex][2] = y;
|
|
838
|
+
result[bucket][tIndex][3] = x;
|
|
839
|
+
}
|
|
840
|
+
} //for j<height
|
|
841
|
+
} //for i<width
|
|
842
|
+
}
|
|
843
|
+
});
|
|
844
|
+
|
|
845
|
+
// combine all buckets into a single list
|
|
846
|
+
const list = [];
|
|
847
|
+
for (let i = 0; i < nBuckets; i++) {
|
|
848
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
849
|
+
list.push(result[i][j]);
|
|
850
|
+
}
|
|
1035
851
|
}
|
|
852
|
+
return list;
|
|
853
|
+
}
|
|
1036
854
|
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
setOutput(
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
855
|
+
_buildExtremas(image0, image1, image2) {
|
|
856
|
+
/* const imageHeight = image1.shape[0];
|
|
857
|
+
const imageWidth = image1.shape[1];
|
|
858
|
+
|
|
859
|
+
const kernelKey = 'w' + imageWidth;
|
|
860
|
+
|
|
861
|
+
if (!this.kernelCaches.buildExtremas) {
|
|
862
|
+
this.kernelCaches.buildExtremas = {};
|
|
863
|
+
}
|
|
864
|
+
if (!this.kernelCaches.buildExtremas[kernelKey]) {
|
|
865
|
+
const kernel = {
|
|
866
|
+
variableNames: ['image0', 'image1', 'image2'],
|
|
867
|
+
outputShape: [imageHeight, imageWidth],
|
|
868
|
+
userCode: `
|
|
869
|
+
void main() {
|
|
870
|
+
ivec2 coords = getOutputCoords();
|
|
871
|
+
|
|
872
|
+
int y = coords[0];
|
|
873
|
+
int x = coords[1];
|
|
874
|
+
|
|
875
|
+
float value = getImage1(y, x);
|
|
876
|
+
|
|
877
|
+
// Step 1: find local maxima/minima
|
|
878
|
+
if (value * value < ${ LAPLACIAN_SQR_THRESHOLD }.) {
|
|
879
|
+
setOutput(0.);
|
|
880
|
+
return;
|
|
881
|
+
}
|
|
882
|
+
if (y < ${ FREAK_EXPANSION_FACTOR } || y > ${ imageHeight - 1 - FREAK_EXPANSION_FACTOR }) {
|
|
883
|
+
setOutput(0.);
|
|
884
|
+
return;
|
|
885
|
+
}
|
|
886
|
+
if (x < ${ FREAK_EXPANSION_FACTOR } || x > ${ imageWidth - 1 - FREAK_EXPANSION_FACTOR }) {
|
|
887
|
+
setOutput(0.);
|
|
888
|
+
return;
|
|
889
|
+
}
|
|
890
|
+
|
|
891
|
+
bool isMax = true;
|
|
892
|
+
bool isMin = true;
|
|
893
|
+
for (int dy = -1; dy <= 1; dy++) {
|
|
894
|
+
for (int dx = -1; dx <= 1; dx++) {
|
|
895
|
+
float value0 = getImage0(y + dy, x + dx);
|
|
896
|
+
float value1 = getImage1(y + dy, x + dx);
|
|
897
|
+
float value2 = getImage2(y + dy, x + dx);
|
|
898
|
+
|
|
899
|
+
if (value < value0 || value < value1 || value < value2) {
|
|
900
|
+
isMax = false;
|
|
901
|
+
}
|
|
902
|
+
if (value > value0 || value > value1 || value > value2) {
|
|
903
|
+
isMin = false;
|
|
904
|
+
}
|
|
1070
905
|
}
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
if (!isMax && !isMin) {
|
|
909
|
+
setOutput(0.);
|
|
910
|
+
return;
|
|
911
|
+
}
|
|
912
|
+
|
|
913
|
+
// compute edge score and reject based on threshold
|
|
914
|
+
float dxx = getImage1(y, x + 1) + getImage1(y, x - 1) - 2. * getImage1(y, x);
|
|
915
|
+
float dyy = getImage1(y + 1, x) + getImage1(y - 1, x) - 2. * getImage1(y, x);
|
|
916
|
+
float dxy = 0.25 * (getImage1(y - 1, x - 1) + getImage1(y + 1, x + 1) - getImage1(y - 1, x + 1) - getImage1(y + 1, x - 1));
|
|
917
|
+
|
|
918
|
+
float det = (dxx * dyy) - (dxy * dxy);
|
|
919
|
+
|
|
920
|
+
if (abs(det) < 0.0001) { // determinant undefined. no solution
|
|
921
|
+
setOutput(0.);
|
|
922
|
+
return;
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
float edgeScore = (dxx + dyy) * (dxx + dyy) / det;
|
|
926
|
+
|
|
927
|
+
if (abs(edgeScore) >= ${ EDGE_HESSIAN_THRESHOLD } ) {
|
|
928
|
+
setOutput(0.);
|
|
929
|
+
return;
|
|
930
|
+
}
|
|
931
|
+
setOutput(getImage1(y, x));
|
|
932
|
+
}
|
|
933
|
+
`
|
|
934
|
+
};
|
|
935
|
+
this.kernelCaches.buildExtremas[kernelKey] = kernel;
|
|
936
|
+
} */
|
|
1071
937
|
|
|
1072
938
|
return tf.tidy(() => {
|
|
1073
|
-
|
|
1074
|
-
|
|
939
|
+
return tf.engine().runKernel("BuildExtremas", { image0, image1, image2 });
|
|
940
|
+
/* const program = this.kernelCaches.buildExtremas[kernelKey];
|
|
941
|
+
image0 = this._downsampleBilinear(image0);
|
|
942
|
+
image2 = this._upsampleBilinear(image2, image1); */
|
|
943
|
+
//this._compileAndRun(program, [image0, image1, image2]);
|
|
944
|
+
//return this._runWebGLProgram(program, [image0, image1, image2], 'float32');
|
|
1075
945
|
});
|
|
1076
|
-
}
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
946
|
+
}
|
|
947
|
+
/**
|
|
948
|
+
*
|
|
949
|
+
* @param {tf.Tensor<tf.Rank>} image1
|
|
950
|
+
* @param {tf.Tensor<tf.Rank>} image2
|
|
951
|
+
* @returns
|
|
952
|
+
*/
|
|
953
|
+
_differenceImageBinomial(image1, image2) {
|
|
954
|
+
return tf.tidy(() => {
|
|
955
|
+
return image1.sub(image2);
|
|
956
|
+
});
|
|
957
|
+
}
|
|
1081
958
|
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
959
|
+
// 4th order binomail filter [1,4,6,4,1] X [1,4,6,4,1]
|
|
960
|
+
_applyFilter(image) {
|
|
961
|
+
/* const imageHeight = image.shape[0];
|
|
962
|
+
const imageWidth = image.shape[1];
|
|
963
|
+
|
|
964
|
+
const kernelKey = 'w' + imageWidth;
|
|
965
|
+
if (!this.kernelCaches.applyFilter) {
|
|
966
|
+
this.kernelCaches.applyFilter = {};
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
if (!this.kernelCaches.applyFilter[kernelKey]) {
|
|
970
|
+
const kernel1 = {
|
|
971
|
+
variableNames: ['p'],
|
|
972
|
+
outputShape: [imageHeight, imageWidth],
|
|
973
|
+
userCode: `
|
|
974
|
+
void main() {
|
|
975
|
+
ivec2 coords = getOutputCoords();
|
|
976
|
+
|
|
977
|
+
float sum = getP(coords[0], coords[1] - 2);
|
|
978
|
+
sum += getP(coords[0], coords[1] - 1) * 4.;
|
|
979
|
+
sum += getP(coords[0], coords[1]) * 6.;
|
|
980
|
+
sum += getP(coords[0], coords[1] + 1) * 4.;
|
|
981
|
+
sum += getP(coords[0], coords[1] + 2);
|
|
982
|
+
setOutput(sum);
|
|
983
|
+
}
|
|
984
|
+
`
|
|
985
|
+
};
|
|
986
|
+
|
|
987
|
+
const kernel2 = {
|
|
988
|
+
variableNames: ['p'],
|
|
989
|
+
outputShape: [imageHeight, imageWidth],
|
|
990
|
+
userCode: `
|
|
991
|
+
void main() {
|
|
992
|
+
ivec2 coords = getOutputCoords();
|
|
993
|
+
|
|
994
|
+
float sum = getP(coords[0] - 2, coords[1]);
|
|
995
|
+
sum += getP(coords[0] - 1, coords[1]) * 4.;
|
|
996
|
+
sum += getP(coords[0], coords[1]) * 6.;
|
|
997
|
+
sum += getP(coords[0] + 1, coords[1]) * 4.;
|
|
998
|
+
sum += getP(coords[0] + 2, coords[1]);
|
|
999
|
+
sum /= 256.;
|
|
1000
|
+
setOutput(sum);
|
|
1001
|
+
}
|
|
1002
|
+
`
|
|
1003
|
+
};
|
|
1004
|
+
this.kernelCaches.applyFilter[kernelKey] = [kernel1, kernel2];
|
|
1005
|
+
}
|
|
1006
|
+
*/
|
|
1007
|
+
return tf.tidy(() => {
|
|
1008
|
+
/* const [program1, program2] = this.kernelCaches.applyFilter[kernelKey];
|
|
1009
|
+
|
|
1010
|
+
const result1 = this._compileAndRun(program1, [image]);
|
|
1011
|
+
const result2 = this._compileAndRun(program2, [result1]);
|
|
1012
|
+
return result2; */
|
|
1013
|
+
return tf.engine().runKernel("BinomialFilter", { image });
|
|
1014
|
+
});
|
|
1015
|
+
}
|
|
1016
|
+
|
|
1017
|
+
/* _upsampleBilinear(image, targetImage) {
|
|
1018
|
+
const imageHeight = image.shape[0];
|
|
1019
|
+
const imageWidth = image.shape[1];
|
|
1020
|
+
|
|
1021
|
+
const kernelKey = 'w' + imageWidth;
|
|
1022
|
+
if (!this.kernelCaches.upsampleBilinear) {
|
|
1023
|
+
this.kernelCaches.upsampleBilinear = {};
|
|
1024
|
+
}
|
|
1025
|
+
|
|
1026
|
+
if (!this.kernelCaches.upsampleBilinear[kernelKey]) {
|
|
1027
|
+
const kernel = {
|
|
1028
|
+
variableNames: ['p'],
|
|
1029
|
+
outputShape: [targetImage.shape[0], targetImage.shape[1]],
|
|
1030
|
+
userCode: `
|
|
1031
|
+
void main() {
|
|
1032
|
+
ivec2 coords = getOutputCoords();
|
|
1033
|
+
int j = coords[0];
|
|
1034
|
+
int i = coords[1];
|
|
1035
|
+
|
|
1036
|
+
float sj = 0.5 * float(j) - 0.25;
|
|
1037
|
+
float si = 0.5 * float(i) - 0.25;
|
|
1038
|
+
|
|
1039
|
+
float sj0 = floor(sj);
|
|
1040
|
+
float sj1 = ceil(sj);
|
|
1041
|
+
float si0 = floor(si);
|
|
1042
|
+
float si1 = ceil(si);
|
|
1043
|
+
|
|
1044
|
+
int sj0I = int(sj0);
|
|
1045
|
+
int sj1I = int(sj1);
|
|
1046
|
+
int si0I = int(si0);
|
|
1047
|
+
int si1I = int(si1);
|
|
1048
|
+
|
|
1049
|
+
float sum = 0.0;
|
|
1050
|
+
sum += getP(sj0I, si0I) * (si1 - si) * (sj1 - sj);
|
|
1051
|
+
sum += getP(sj1I, si0I) * (si1 - si) * (sj - sj0);
|
|
1052
|
+
sum += getP(sj0I, si1I) * (si - si0) * (sj1 - sj);
|
|
1053
|
+
sum += getP(sj1I, si1I) * (si - si0) * (sj - sj0);
|
|
1054
|
+
setOutput(sum);
|
|
1055
|
+
}
|
|
1056
|
+
`
|
|
1057
|
+
};
|
|
1058
|
+
this.kernelCaches.upsampleBilinear[kernelKey] = kernel;
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1061
|
+
return tf.tidy(() => {
|
|
1062
|
+
const program = this.kernelCaches.upsampleBilinear[kernelKey];
|
|
1063
|
+
return tf.engine().runKernel("UpsampleBilinear", { x: image, width: image.shape[1], height: image.shape[0] });//this._compileAndRun(program, [image]);
|
|
1064
|
+
});
|
|
1065
|
+
} */
|
|
1066
|
+
|
|
1067
|
+
_downsampleBilinear(image) {
|
|
1068
|
+
/* const imageHeight = image.shape[0];
|
|
1069
|
+
const imageWidth = image.shape[1];
|
|
1070
|
+
|
|
1071
|
+
const kernelKey = 'w' + imageWidth;
|
|
1072
|
+
if (!this.kernelCaches.downsampleBilinear) {
|
|
1073
|
+
this.kernelCaches.downsampleBilinear = {};
|
|
1074
|
+
}
|
|
1075
|
+
|
|
1076
|
+
if (!this.kernelCaches.downsampleBilinear[kernelKey]) {
|
|
1077
|
+
const kernel = {
|
|
1078
|
+
variableNames: ['p'],
|
|
1079
|
+
outputShape: [Math.floor(imageHeight / 2), Math.floor(imageWidth / 2)],
|
|
1080
|
+
userCode: `
|
|
1081
|
+
void main() {
|
|
1082
|
+
ivec2 coords = getOutputCoords();
|
|
1083
|
+
int y = coords[0] * 2;
|
|
1084
|
+
int x = coords[1] * 2;
|
|
1085
|
+
|
|
1086
|
+
float sum = getP(y, x) * 0.25;
|
|
1087
|
+
sum += getP(y + 1, x) * 0.25;
|
|
1088
|
+
sum += getP(y, x + 1) * 0.25;
|
|
1089
|
+
sum += getP(y + 1, x + 1) * 0.25;
|
|
1090
|
+
setOutput(sum);
|
|
1091
|
+
}
|
|
1092
|
+
`
|
|
1093
|
+
};
|
|
1094
|
+
this.kernelCaches.downsampleBilinear[kernelKey] = kernel;
|
|
1095
|
+
} */
|
|
1086
1096
|
|
|
1087
|
-
|
|
1088
|
-
const
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
return tf.tidy(() => {
|
|
1109
|
-
//const program = this.kernelCaches.downsampleBilinear[kernelKey];
|
|
1110
|
-
return tf.engine().runKernel("DownsampleBilinear", { image }); //this._compileAndRun(program, [image]);
|
|
1111
|
-
});
|
|
1112
|
-
}
|
|
1113
|
-
/**
|
|
1114
|
-
*
|
|
1115
|
-
* @param {tf.MathBackendWebGL.GPGPUProgram} program
|
|
1116
|
-
* @param {*} inputs
|
|
1117
|
-
* @returns
|
|
1118
|
-
*/
|
|
1119
|
-
_compileAndRun(program, inputs) {
|
|
1120
|
-
const outInfo = tf.backend().compileAndRun(program, inputs);
|
|
1121
|
-
return tf.engine().makeTensorFromDataId(outInfo.dataId, outInfo.shape, outInfo.dtype);
|
|
1122
|
-
}
|
|
1123
|
-
|
|
1124
|
-
_runWebGLProgram(program, inputs, outputType) {
|
|
1125
|
-
const outInfo = tf.backend().runWebGLProgram(program, inputs, outputType);
|
|
1126
|
-
return tf.engine().makeTensorFromDataId(outInfo.dataId, outInfo.shape, outInfo.dtype);
|
|
1127
|
-
}
|
|
1097
|
+
return tf.tidy(() => {
|
|
1098
|
+
//const program = this.kernelCaches.downsampleBilinear[kernelKey];
|
|
1099
|
+
return tf.engine().runKernel("DownsampleBilinear", { image }); //this._compileAndRun(program, [image]);
|
|
1100
|
+
});
|
|
1101
|
+
}
|
|
1102
|
+
/**
|
|
1103
|
+
*
|
|
1104
|
+
* @param {tf.MathBackendWebGL.GPGPUProgram} program
|
|
1105
|
+
* @param {*} inputs
|
|
1106
|
+
* @returns
|
|
1107
|
+
*/
|
|
1108
|
+
_compileAndRun(program, inputs) {
|
|
1109
|
+
const outInfo = tf.backend().compileAndRun(program, inputs);
|
|
1110
|
+
return tf.engine().makeTensor(outInfo.dataId, outInfo.shape, outInfo.dtype);
|
|
1111
|
+
}
|
|
1112
|
+
|
|
1113
|
+
_runWebGLProgram(program, inputs, outputType) {
|
|
1114
|
+
const outInfo = tf.backend().runWebGLProgram(program, inputs, outputType);
|
|
1115
|
+
return tf.engine().makeTensor(outInfo.dataId, outInfo.shape, outInfo.dtype);
|
|
1116
|
+
}
|
|
1128
1117
|
}
|
|
1129
1118
|
|
|
1130
1119
|
export { Detector };
|