@elizaos/plugin-vision 1.2.1 → 2.0.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build.config.ts +53 -53
- package/dist/index.js +6716 -67
- package/dist/index.js.map +33 -1
- package/dist/workers/florence2-worker.js +112304 -307
- package/dist/workers/florence2-worker.js.map +92 -1
- package/dist/workers/ocr-worker.js +119718 -339
- package/dist/workers/ocr-worker.js.map +137 -1
- package/dist/workers/screen-capture-worker.js +350 -418
- package/dist/workers/screen-capture-worker.js.map +11 -1
- package/package.json +13 -17
- package/README.md +0 -270
- package/dist/action.d.ts +0 -8
- package/dist/action.js +0 -1212
- package/dist/action.js.map +0 -1
- package/dist/audio-capture-stream.d.ts +0 -42
- package/dist/audio-capture-stream.js +0 -516
- package/dist/audio-capture-stream.js.map +0 -1
- package/dist/audio-capture.d.ts +0 -25
- package/dist/audio-capture.js +0 -412
- package/dist/audio-capture.js.map +0 -1
- package/dist/basic.test.d.ts +0 -1
- package/dist/basic.test.js +0 -97
- package/dist/basic.test.js.map +0 -1
- package/dist/config.d.ts +0 -73
- package/dist/config.js +0 -254
- package/dist/config.js.map +0 -1
- package/dist/entity-tracker.d.ts +0 -32
- package/dist/entity-tracker.js +0 -361
- package/dist/entity-tracker.js.map +0 -1
- package/dist/errors.d.ts +0 -67
- package/dist/errors.js +0 -395
- package/dist/errors.js.map +0 -1
- package/dist/face-recognition.d.ts +0 -31
- package/dist/face-recognition.js +0 -332
- package/dist/face-recognition.js.map +0 -1
- package/dist/florence2-local.d.ts +0 -25
- package/dist/florence2-local.js +0 -280
- package/dist/florence2-local.js.map +0 -1
- package/dist/florence2-model.d.ts +0 -36
- package/dist/florence2-model.js +0 -503
- package/dist/florence2-model.js.map +0 -1
- package/dist/index.d.ts +0 -3
- package/dist/ocr-service-real.d.ts +0 -32
- package/dist/ocr-service-real.js +0 -396
- package/dist/ocr-service-real.js.map +0 -1
- package/dist/ocr-service.d.ts +0 -28
- package/dist/ocr-service.js +0 -216
- package/dist/ocr-service.js.map +0 -1
- package/dist/provider.d.ts +0 -2
- package/dist/provider.js +0 -285
- package/dist/provider.js.map +0 -1
- package/dist/screen-capture.d.ts +0 -16
- package/dist/screen-capture.js +0 -302
- package/dist/screen-capture.js.map +0 -1
- package/dist/service.d.ts +0 -73
- package/dist/service.js +0 -1662
- package/dist/service.js.map +0 -1
- package/dist/tests/e2e/index.d.ts +0 -8
- package/dist/tests/e2e/index.js +0 -33
- package/dist/tests/e2e/index.js.map +0 -1
- package/dist/tests/e2e/run-local.d.ts +0 -2
- package/dist/tests/e2e/run-local.js +0 -166
- package/dist/tests/e2e/run-local.js.map +0 -1
- package/dist/tests/e2e/screen-vision.d.ts +0 -11
- package/dist/tests/e2e/screen-vision.js +0 -384
- package/dist/tests/e2e/screen-vision.js.map +0 -1
- package/dist/tests/e2e/vision-autonomy.d.ts +0 -11
- package/dist/tests/e2e/vision-autonomy.js +0 -375
- package/dist/tests/e2e/vision-autonomy.js.map +0 -1
- package/dist/tests/e2e/vision-basic.d.ts +0 -11
- package/dist/tests/e2e/vision-basic.js +0 -434
- package/dist/tests/e2e/vision-basic.js.map +0 -1
- package/dist/tests/e2e/vision-capture-log.d.ts +0 -11
- package/dist/tests/e2e/vision-capture-log.js +0 -302
- package/dist/tests/e2e/vision-capture-log.js.map +0 -1
- package/dist/tests/e2e/vision-runtime.d.ts +0 -11
- package/dist/tests/e2e/vision-runtime.js +0 -357
- package/dist/tests/e2e/vision-runtime.js.map +0 -1
- package/dist/tests/e2e/vision-worker-tests.d.ts +0 -11
- package/dist/tests/e2e/vision-worker-tests.js +0 -466
- package/dist/tests/e2e/vision-worker-tests.js.map +0 -1
- package/dist/tests/test-pattern-generator.d.ts +0 -40
- package/dist/tests/test-pattern-generator.js +0 -191
- package/dist/tests/test-pattern-generator.js.map +0 -1
- package/dist/tests.d.ts +0 -3
- package/dist/tests.js +0 -11
- package/dist/tests.js.map +0 -1
- package/dist/types.d.ts +0 -222
- package/dist/types.js +0 -16
- package/dist/types.js.map +0 -1
- package/dist/vision-models.d.ts +0 -47
- package/dist/vision-models.js +0 -501
- package/dist/vision-models.js.map +0 -1
- package/dist/vision-worker-manager.d.ts +0 -61
- package/dist/vision-worker-manager.js +0 -668
- package/dist/vision-worker-manager.js.map +0 -1
- package/dist/workers/florence2-worker-simple.d.ts +0 -13
- package/dist/workers/florence2-worker-simple.js +0 -121
- package/dist/workers/florence2-worker-simple.js.map +0 -1
- package/dist/workers/florence2-worker.d.ts +0 -1
- package/dist/workers/ocr-worker.d.ts +0 -1
- package/dist/workers/screen-capture-worker.d.ts +0 -1
- package/dist/workers/worker-logger.d.ts +0 -9
- package/dist/workers/worker-logger.js +0 -95
- package/dist/workers/worker-logger.js.map +0 -1
package/dist/florence2-model.js
DELETED
|
@@ -1,503 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
|
-
var __generator = (this && this.__generator) || function (thisArg, body) {
|
|
12
|
-
var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g = Object.create((typeof Iterator === "function" ? Iterator : Object).prototype);
|
|
13
|
-
return g.next = verb(0), g["throw"] = verb(1), g["return"] = verb(2), typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
|
|
14
|
-
function verb(n) { return function (v) { return step([n, v]); }; }
|
|
15
|
-
function step(op) {
|
|
16
|
-
if (f) throw new TypeError("Generator is already executing.");
|
|
17
|
-
while (g && (g = 0, op[0] && (_ = 0)), _) try {
|
|
18
|
-
if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
|
|
19
|
-
if (y = 0, t) op = [op[0] & 2, t.value];
|
|
20
|
-
switch (op[0]) {
|
|
21
|
-
case 0: case 1: t = op; break;
|
|
22
|
-
case 4: _.label++; return { value: op[1], done: false };
|
|
23
|
-
case 5: _.label++; y = op[1]; op = [0]; continue;
|
|
24
|
-
case 7: op = _.ops.pop(); _.trys.pop(); continue;
|
|
25
|
-
default:
|
|
26
|
-
if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
|
|
27
|
-
if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
|
|
28
|
-
if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
|
|
29
|
-
if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
|
|
30
|
-
if (t[2]) _.ops.pop();
|
|
31
|
-
_.trys.pop(); continue;
|
|
32
|
-
}
|
|
33
|
-
op = body.call(thisArg, _);
|
|
34
|
-
} catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
|
|
35
|
-
if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
|
|
36
|
-
}
|
|
37
|
-
};
|
|
38
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
-
exports.Florence2Model = void 0;
|
|
40
|
-
var core_1 = require("@elizaos/core");
|
|
41
|
-
var florence2_local_1 = require("./florence2-local");
|
|
42
|
-
var Florence2Model = /** @class */ (function () {
|
|
43
|
-
function Florence2Model() {
|
|
44
|
-
this.initialized = false;
|
|
45
|
-
this.localModel = new florence2_local_1.Florence2Local();
|
|
46
|
-
}
|
|
47
|
-
Florence2Model.prototype.initialize = function () {
|
|
48
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
49
|
-
var error_1;
|
|
50
|
-
return __generator(this, function (_a) {
|
|
51
|
-
switch (_a.label) {
|
|
52
|
-
case 0:
|
|
53
|
-
if (this.initialized) {
|
|
54
|
-
return [2 /*return*/];
|
|
55
|
-
}
|
|
56
|
-
_a.label = 1;
|
|
57
|
-
case 1:
|
|
58
|
-
_a.trys.push([1, 3, , 4]);
|
|
59
|
-
core_1.logger.info('[Florence2] Initializing local Florence-2 model with TensorFlow.js...');
|
|
60
|
-
return [4 /*yield*/, this.localModel.initialize()];
|
|
61
|
-
case 2:
|
|
62
|
-
_a.sent();
|
|
63
|
-
this.initialized = true;
|
|
64
|
-
core_1.logger.info('[Florence2] Local model initialized successfully');
|
|
65
|
-
return [3 /*break*/, 4];
|
|
66
|
-
case 3:
|
|
67
|
-
error_1 = _a.sent();
|
|
68
|
-
core_1.logger.error('[Florence2] Failed to initialize local model:', error_1);
|
|
69
|
-
// Don't throw - we have good fallbacks
|
|
70
|
-
this.initialized = true;
|
|
71
|
-
core_1.logger.warn('[Florence2] Running with enhanced fallback mode');
|
|
72
|
-
return [3 /*break*/, 4];
|
|
73
|
-
case 4: return [2 /*return*/];
|
|
74
|
-
}
|
|
75
|
-
});
|
|
76
|
-
});
|
|
77
|
-
};
|
|
78
|
-
Florence2Model.prototype.analyzeTile = function (tile) {
|
|
79
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
80
|
-
var result_1, _modelError_1, result, error_2;
|
|
81
|
-
return __generator(this, function (_a) {
|
|
82
|
-
switch (_a.label) {
|
|
83
|
-
case 0:
|
|
84
|
-
if (!!this.initialized) return [3 /*break*/, 2];
|
|
85
|
-
return [4 /*yield*/, this.initialize()];
|
|
86
|
-
case 1:
|
|
87
|
-
_a.sent();
|
|
88
|
-
_a.label = 2;
|
|
89
|
-
case 2:
|
|
90
|
-
if (!tile.data) {
|
|
91
|
-
throw new Error('Tile has no image data');
|
|
92
|
-
}
|
|
93
|
-
_a.label = 3;
|
|
94
|
-
case 3:
|
|
95
|
-
_a.trys.push([3, 9, , 10]);
|
|
96
|
-
_a.label = 4;
|
|
97
|
-
case 4:
|
|
98
|
-
_a.trys.push([4, 6, , 7]);
|
|
99
|
-
return [4 /*yield*/, this.localModel.analyzeImage(tile.data)];
|
|
100
|
-
case 5:
|
|
101
|
-
result_1 = _a.sent();
|
|
102
|
-
core_1.logger.debug("[Florence2] Analyzed tile ".concat(tile.id, ": ").concat(result_1.caption));
|
|
103
|
-
return [2 /*return*/, result_1];
|
|
104
|
-
case 6:
|
|
105
|
-
_modelError_1 = _a.sent();
|
|
106
|
-
core_1.logger.warn('[Florence2] Local model analysis failed, falling back:', _modelError_1);
|
|
107
|
-
return [3 /*break*/, 7];
|
|
108
|
-
case 7: return [4 /*yield*/, this.mockAnalyze(tile)];
|
|
109
|
-
case 8:
|
|
110
|
-
result = _a.sent();
|
|
111
|
-
core_1.logger.debug("[Florence2] Mock analyzed tile ".concat(tile.id, ": ").concat(result.caption));
|
|
112
|
-
return [2 /*return*/, result];
|
|
113
|
-
case 9:
|
|
114
|
-
error_2 = _a.sent();
|
|
115
|
-
core_1.logger.error('[Florence2] Analysis failed:', error_2);
|
|
116
|
-
throw error_2;
|
|
117
|
-
case 10: return [2 /*return*/];
|
|
118
|
-
}
|
|
119
|
-
});
|
|
120
|
-
});
|
|
121
|
-
};
|
|
122
|
-
Florence2Model.prototype.analyzeImage = function (imageBuffer) {
|
|
123
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
124
|
-
var result_2, _modelError_2, result, error_3;
|
|
125
|
-
return __generator(this, function (_a) {
|
|
126
|
-
switch (_a.label) {
|
|
127
|
-
case 0:
|
|
128
|
-
if (!!this.initialized) return [3 /*break*/, 2];
|
|
129
|
-
return [4 /*yield*/, this.initialize()];
|
|
130
|
-
case 1:
|
|
131
|
-
_a.sent();
|
|
132
|
-
_a.label = 2;
|
|
133
|
-
case 2:
|
|
134
|
-
_a.trys.push([2, 8, , 9]);
|
|
135
|
-
_a.label = 3;
|
|
136
|
-
case 3:
|
|
137
|
-
_a.trys.push([3, 5, , 6]);
|
|
138
|
-
return [4 /*yield*/, this.localModel.analyzeImage(imageBuffer)];
|
|
139
|
-
case 4:
|
|
140
|
-
result_2 = _a.sent();
|
|
141
|
-
core_1.logger.debug("[Florence2] Analyzed image: ".concat(result_2.caption));
|
|
142
|
-
return [2 /*return*/, result_2];
|
|
143
|
-
case 5:
|
|
144
|
-
_modelError_2 = _a.sent();
|
|
145
|
-
core_1.logger.warn('[Florence2] Local model analysis failed, falling back:', _modelError_2);
|
|
146
|
-
return [3 /*break*/, 6];
|
|
147
|
-
case 6: return [4 /*yield*/, this.mockAnalyzeBuffer(imageBuffer)];
|
|
148
|
-
case 7:
|
|
149
|
-
result = _a.sent();
|
|
150
|
-
core_1.logger.debug("[Florence2] Mock analyzed image: ".concat(result.caption));
|
|
151
|
-
return [2 /*return*/, result];
|
|
152
|
-
case 8:
|
|
153
|
-
error_3 = _a.sent();
|
|
154
|
-
core_1.logger.error('[Florence2] Image analysis failed:', error_3);
|
|
155
|
-
throw error_3;
|
|
156
|
-
case 9: return [2 /*return*/];
|
|
157
|
-
}
|
|
158
|
-
});
|
|
159
|
-
});
|
|
160
|
-
};
|
|
161
|
-
Florence2Model.prototype.mockAnalyze = function (tile) {
|
|
162
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
163
|
-
var isUpperRegion, isLeftRegion, caption, objects, regions, tags, buttonCount, i, textRegions, i;
|
|
164
|
-
return __generator(this, function (_a) {
|
|
165
|
-
isUpperRegion = tile.row < 2;
|
|
166
|
-
isLeftRegion = tile.col < 2;
|
|
167
|
-
caption = 'Desktop screen region';
|
|
168
|
-
objects = [];
|
|
169
|
-
regions = [];
|
|
170
|
-
tags = [];
|
|
171
|
-
if (isUpperRegion) {
|
|
172
|
-
caption = 'Application window with menu bar';
|
|
173
|
-
objects.push({
|
|
174
|
-
label: 'window',
|
|
175
|
-
bbox: { x: 0, y: 0, width: tile.width, height: 50 },
|
|
176
|
-
confidence: 0.9,
|
|
177
|
-
});
|
|
178
|
-
objects.push({
|
|
179
|
-
label: 'menu_bar',
|
|
180
|
-
bbox: { x: 0, y: 0, width: tile.width, height: 30 },
|
|
181
|
-
confidence: 0.85,
|
|
182
|
-
});
|
|
183
|
-
tags.push('ui', 'application', 'desktop');
|
|
184
|
-
}
|
|
185
|
-
if (isLeftRegion) {
|
|
186
|
-
caption = 'Sidebar or navigation area';
|
|
187
|
-
objects.push({
|
|
188
|
-
label: 'sidebar',
|
|
189
|
-
bbox: { x: 0, y: 0, width: 100, height: tile.height },
|
|
190
|
-
confidence: 0.8,
|
|
191
|
-
});
|
|
192
|
-
tags.push('navigation', 'sidebar');
|
|
193
|
-
}
|
|
194
|
-
buttonCount = Math.floor(Math.random() * 3) + 1;
|
|
195
|
-
for (i = 0; i < buttonCount; i++) {
|
|
196
|
-
objects.push({
|
|
197
|
-
label: 'button',
|
|
198
|
-
bbox: {
|
|
199
|
-
x: Math.random() * (tile.width - 100),
|
|
200
|
-
y: Math.random() * (tile.height - 40),
|
|
201
|
-
width: 100,
|
|
202
|
-
height: 40,
|
|
203
|
-
},
|
|
204
|
-
confidence: 0.7 + Math.random() * 0.2,
|
|
205
|
-
});
|
|
206
|
-
}
|
|
207
|
-
textRegions = Math.floor(Math.random() * 2) + 1;
|
|
208
|
-
for (i = 0; i < textRegions; i++) {
|
|
209
|
-
regions.push({
|
|
210
|
-
description: 'Text content area',
|
|
211
|
-
bbox: {
|
|
212
|
-
x: Math.random() * (tile.width - 200),
|
|
213
|
-
y: Math.random() * (tile.height - 100),
|
|
214
|
-
width: 200,
|
|
215
|
-
height: 100,
|
|
216
|
-
},
|
|
217
|
-
});
|
|
218
|
-
}
|
|
219
|
-
tags.push('screen', 'interface', 'computer');
|
|
220
|
-
return [2 /*return*/, {
|
|
221
|
-
caption: caption,
|
|
222
|
-
objects: objects,
|
|
223
|
-
regions: regions,
|
|
224
|
-
tags: tags,
|
|
225
|
-
}];
|
|
226
|
-
});
|
|
227
|
-
});
|
|
228
|
-
};
|
|
229
|
-
Florence2Model.prototype.detectUIElements = function (imageBuffer) {
|
|
230
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
231
|
-
var result, _modelError_3, error_4;
|
|
232
|
-
var _this = this;
|
|
233
|
-
return __generator(this, function (_a) {
|
|
234
|
-
switch (_a.label) {
|
|
235
|
-
case 0:
|
|
236
|
-
if (!!this.initialized) return [3 /*break*/, 2];
|
|
237
|
-
return [4 /*yield*/, this.initialize()];
|
|
238
|
-
case 1:
|
|
239
|
-
_a.sent();
|
|
240
|
-
_a.label = 2;
|
|
241
|
-
case 2:
|
|
242
|
-
_a.trys.push([2, 8, , 9]);
|
|
243
|
-
result = void 0;
|
|
244
|
-
_a.label = 3;
|
|
245
|
-
case 3:
|
|
246
|
-
_a.trys.push([3, 5, , 7]);
|
|
247
|
-
return [4 /*yield*/, this.localModel.analyzeImage(imageBuffer)];
|
|
248
|
-
case 4:
|
|
249
|
-
result = _a.sent();
|
|
250
|
-
return [3 /*break*/, 7];
|
|
251
|
-
case 5:
|
|
252
|
-
_modelError_3 = _a.sent();
|
|
253
|
-
core_1.logger.warn('[Florence2] Local model failed for UI detection, using fallback');
|
|
254
|
-
return [4 /*yield*/, this.mockAnalyzeBuffer(imageBuffer)];
|
|
255
|
-
case 6:
|
|
256
|
-
result = _a.sent();
|
|
257
|
-
return [3 /*break*/, 7];
|
|
258
|
-
case 7:
|
|
259
|
-
// Convert Florence-2 objects to UI elements
|
|
260
|
-
return [2 /*return*/, (result.objects || []).map(function (obj) { return ({
|
|
261
|
-
type: _this.mapToUIElementType(obj.label),
|
|
262
|
-
bbox: obj.bbox,
|
|
263
|
-
confidence: obj.confidence,
|
|
264
|
-
}); })];
|
|
265
|
-
case 8:
|
|
266
|
-
error_4 = _a.sent();
|
|
267
|
-
core_1.logger.error('[Florence2] UI element detection failed:', error_4);
|
|
268
|
-
return [2 /*return*/, []];
|
|
269
|
-
case 9: return [2 /*return*/];
|
|
270
|
-
}
|
|
271
|
-
});
|
|
272
|
-
});
|
|
273
|
-
};
|
|
274
|
-
Florence2Model.prototype.mockAnalyzeBuffer = function (_imageBuffer) {
|
|
275
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
276
|
-
var scenarios, scenario;
|
|
277
|
-
return __generator(this, function (_a) {
|
|
278
|
-
scenarios = [
|
|
279
|
-
{
|
|
280
|
-
caption: 'Indoor scene with a person in front of a computer',
|
|
281
|
-
objects: [
|
|
282
|
-
{
|
|
283
|
-
label: 'person',
|
|
284
|
-
bbox: { x: 300, y: 200, width: 200, height: 300 },
|
|
285
|
-
confidence: 0.9,
|
|
286
|
-
},
|
|
287
|
-
{
|
|
288
|
-
label: 'computer',
|
|
289
|
-
bbox: { x: 400, y: 350, width: 150, height: 100 },
|
|
290
|
-
confidence: 0.85,
|
|
291
|
-
},
|
|
292
|
-
{
|
|
293
|
-
label: 'desk',
|
|
294
|
-
bbox: { x: 350, y: 400, width: 250, height: 100 },
|
|
295
|
-
confidence: 0.8,
|
|
296
|
-
},
|
|
297
|
-
],
|
|
298
|
-
tags: ['indoor', 'office', 'workspace', 'person', 'computer'],
|
|
299
|
-
},
|
|
300
|
-
{
|
|
301
|
-
caption: 'Room interior with furniture and lighting',
|
|
302
|
-
objects: [
|
|
303
|
-
{
|
|
304
|
-
label: 'chair',
|
|
305
|
-
bbox: { x: 200, y: 300, width: 100, height: 150 },
|
|
306
|
-
confidence: 0.85,
|
|
307
|
-
},
|
|
308
|
-
{
|
|
309
|
-
label: 'table',
|
|
310
|
-
bbox: { x: 350, y: 350, width: 150, height: 100 },
|
|
311
|
-
confidence: 0.8,
|
|
312
|
-
},
|
|
313
|
-
{
|
|
314
|
-
label: 'lamp',
|
|
315
|
-
bbox: { x: 500, y: 200, width: 50, height: 100 },
|
|
316
|
-
confidence: 0.75,
|
|
317
|
-
},
|
|
318
|
-
],
|
|
319
|
-
tags: ['indoor', 'room', 'furniture', 'interior'],
|
|
320
|
-
},
|
|
321
|
-
{
|
|
322
|
-
caption: 'Person working at a desk with computer monitor',
|
|
323
|
-
objects: [
|
|
324
|
-
{
|
|
325
|
-
label: 'person',
|
|
326
|
-
bbox: { x: 250, y: 150, width: 250, height: 350 },
|
|
327
|
-
confidence: 0.92,
|
|
328
|
-
},
|
|
329
|
-
{
|
|
330
|
-
label: 'monitor',
|
|
331
|
-
bbox: { x: 450, y: 300, width: 120, height: 80 },
|
|
332
|
-
confidence: 0.88,
|
|
333
|
-
},
|
|
334
|
-
{
|
|
335
|
-
label: 'keyboard',
|
|
336
|
-
bbox: { x: 430, y: 380, width: 100, height: 30 },
|
|
337
|
-
confidence: 0.82,
|
|
338
|
-
},
|
|
339
|
-
],
|
|
340
|
-
tags: ['person', 'working', 'computer', 'desk', 'office'],
|
|
341
|
-
},
|
|
342
|
-
];
|
|
343
|
-
scenario = scenarios[Math.floor(Math.random() * scenarios.length)];
|
|
344
|
-
return [2 /*return*/, {
|
|
345
|
-
caption: scenario.caption,
|
|
346
|
-
objects: scenario.objects,
|
|
347
|
-
regions: [],
|
|
348
|
-
tags: scenario.tags,
|
|
349
|
-
}];
|
|
350
|
-
});
|
|
351
|
-
});
|
|
352
|
-
};
|
|
353
|
-
Florence2Model.prototype.mapToUIElementType = function (label) {
|
|
354
|
-
var mapping = {
|
|
355
|
-
button: 'button',
|
|
356
|
-
text_field: 'input',
|
|
357
|
-
text_area: 'textarea',
|
|
358
|
-
checkbox: 'checkbox',
|
|
359
|
-
radio_button: 'radio',
|
|
360
|
-
dropdown: 'select',
|
|
361
|
-
menu: 'menu',
|
|
362
|
-
menu_bar: 'menubar',
|
|
363
|
-
toolbar: 'toolbar',
|
|
364
|
-
window: 'window',
|
|
365
|
-
dialog: 'dialog',
|
|
366
|
-
icon: 'icon',
|
|
367
|
-
image: 'image',
|
|
368
|
-
video: 'video',
|
|
369
|
-
link: 'link',
|
|
370
|
-
heading: 'heading',
|
|
371
|
-
paragraph: 'text',
|
|
372
|
-
list: 'list',
|
|
373
|
-
table: 'table',
|
|
374
|
-
scrollbar: 'scrollbar',
|
|
375
|
-
tab: 'tab',
|
|
376
|
-
panel: 'panel',
|
|
377
|
-
};
|
|
378
|
-
return mapping[label.toLowerCase()] || 'unknown';
|
|
379
|
-
};
|
|
380
|
-
Florence2Model.prototype.generateSceneGraph = function (tiles) {
|
|
381
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
382
|
-
var nodes, edges, _i, tiles_1, tile, analysis, _a, _b, obj, nodeId, i, j, relation;
|
|
383
|
-
return __generator(this, function (_c) {
|
|
384
|
-
switch (_c.label) {
|
|
385
|
-
case 0:
|
|
386
|
-
nodes = [];
|
|
387
|
-
edges = [];
|
|
388
|
-
_i = 0, tiles_1 = tiles;
|
|
389
|
-
_c.label = 1;
|
|
390
|
-
case 1:
|
|
391
|
-
if (!(_i < tiles_1.length)) return [3 /*break*/, 4];
|
|
392
|
-
tile = tiles_1[_i];
|
|
393
|
-
if (!tile.data) {
|
|
394
|
-
return [3 /*break*/, 3];
|
|
395
|
-
}
|
|
396
|
-
return [4 /*yield*/, this.analyzeTile(tile)];
|
|
397
|
-
case 2:
|
|
398
|
-
analysis = _c.sent();
|
|
399
|
-
// Add objects as nodes
|
|
400
|
-
if (analysis.objects) {
|
|
401
|
-
for (_a = 0, _b = analysis.objects; _a < _b.length; _a++) {
|
|
402
|
-
obj = _b[_a];
|
|
403
|
-
nodeId = "".concat(tile.id, "-").concat(obj.label, "-").concat(nodes.length);
|
|
404
|
-
nodes.push({
|
|
405
|
-
id: nodeId,
|
|
406
|
-
type: obj.label,
|
|
407
|
-
label: obj.label,
|
|
408
|
-
position: {
|
|
409
|
-
x: tile.x + obj.bbox.x,
|
|
410
|
-
y: tile.y + obj.bbox.y,
|
|
411
|
-
width: obj.bbox.width,
|
|
412
|
-
height: obj.bbox.height,
|
|
413
|
-
},
|
|
414
|
-
});
|
|
415
|
-
}
|
|
416
|
-
}
|
|
417
|
-
_c.label = 3;
|
|
418
|
-
case 3:
|
|
419
|
-
_i++;
|
|
420
|
-
return [3 /*break*/, 1];
|
|
421
|
-
case 4:
|
|
422
|
-
// Infer spatial relationships
|
|
423
|
-
for (i = 0; i < nodes.length; i++) {
|
|
424
|
-
for (j = i + 1; j < nodes.length; j++) {
|
|
425
|
-
relation = this.inferSpatialRelation(nodes[i].position, nodes[j].position);
|
|
426
|
-
if (relation) {
|
|
427
|
-
edges.push({
|
|
428
|
-
source: nodes[i].id,
|
|
429
|
-
target: nodes[j].id,
|
|
430
|
-
relation: relation,
|
|
431
|
-
});
|
|
432
|
-
}
|
|
433
|
-
}
|
|
434
|
-
}
|
|
435
|
-
return [2 /*return*/, { nodes: nodes, edges: edges }];
|
|
436
|
-
}
|
|
437
|
-
});
|
|
438
|
-
});
|
|
439
|
-
};
|
|
440
|
-
Florence2Model.prototype.inferSpatialRelation = function (box1, box2) {
|
|
441
|
-
var center1 = {
|
|
442
|
-
x: box1.x + box1.width / 2,
|
|
443
|
-
y: box1.y + box1.height / 2,
|
|
444
|
-
};
|
|
445
|
-
var center2 = {
|
|
446
|
-
x: box2.x + box2.width / 2,
|
|
447
|
-
y: box2.y + box2.height / 2,
|
|
448
|
-
};
|
|
449
|
-
// Check containment
|
|
450
|
-
if (this.contains(box1, box2)) {
|
|
451
|
-
return 'contains';
|
|
452
|
-
}
|
|
453
|
-
if (this.contains(box2, box1)) {
|
|
454
|
-
return 'contained_by';
|
|
455
|
-
}
|
|
456
|
-
// Check overlap
|
|
457
|
-
if (this.overlaps(box1, box2)) {
|
|
458
|
-
return 'overlaps';
|
|
459
|
-
}
|
|
460
|
-
// Check adjacency and direction
|
|
461
|
-
var dx = center2.x - center1.x;
|
|
462
|
-
var dy = center2.y - center1.y;
|
|
463
|
-
var distance = Math.sqrt(dx * dx + dy * dy);
|
|
464
|
-
if (distance < 100) {
|
|
465
|
-
// Close proximity
|
|
466
|
-
if (Math.abs(dx) > Math.abs(dy)) {
|
|
467
|
-
return dx > 0 ? 'right_of' : 'left_of';
|
|
468
|
-
}
|
|
469
|
-
else {
|
|
470
|
-
return dy > 0 ? 'below' : 'above';
|
|
471
|
-
}
|
|
472
|
-
}
|
|
473
|
-
return null;
|
|
474
|
-
};
|
|
475
|
-
Florence2Model.prototype.contains = function (box1, box2) {
|
|
476
|
-
return (box1.x <= box2.x &&
|
|
477
|
-
box1.y <= box2.y &&
|
|
478
|
-
box1.x + box1.width >= box2.x + box2.width &&
|
|
479
|
-
box1.y + box1.height >= box2.y + box2.height);
|
|
480
|
-
};
|
|
481
|
-
Florence2Model.prototype.overlaps = function (box1, box2) {
|
|
482
|
-
return !(box1.x + box1.width < box2.x ||
|
|
483
|
-
box2.x + box2.width < box1.x ||
|
|
484
|
-
box1.y + box1.height < box2.y ||
|
|
485
|
-
box2.y + box2.height < box1.y);
|
|
486
|
-
};
|
|
487
|
-
Florence2Model.prototype.isInitialized = function () {
|
|
488
|
-
return this.initialized;
|
|
489
|
-
};
|
|
490
|
-
Florence2Model.prototype.dispose = function () {
|
|
491
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
492
|
-
return __generator(this, function (_a) {
|
|
493
|
-
// Clean up resources if needed
|
|
494
|
-
this.initialized = false;
|
|
495
|
-
core_1.logger.info('[Florence2] Model disposed');
|
|
496
|
-
return [2 /*return*/];
|
|
497
|
-
});
|
|
498
|
-
});
|
|
499
|
-
};
|
|
500
|
-
return Florence2Model;
|
|
501
|
-
}());
|
|
502
|
-
exports.Florence2Model = Florence2Model;
|
|
503
|
-
//# sourceMappingURL=florence2-model.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"florence2-model.js","sourceRoot":"","sources":["../src/florence2-model.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,sCAAuC;AAEvC,qDAAmD;AAEnD;IAIE;QAHQ,gBAAW,GAAG,KAAK,CAAC;QAI1B,IAAI,CAAC,UAAU,GAAG,IAAI,gCAAc,EAAE,CAAC;IACzC,CAAC;IAEK,mCAAU,GAAhB;;;;;;wBACE,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;4BACrB,sBAAO;wBACT,CAAC;;;;wBAGC,aAAM,CAAC,IAAI,CAAC,uEAAuE,CAAC,CAAC;wBAErF,qBAAM,IAAI,CAAC,UAAU,CAAC,UAAU,EAAE,EAAA;;wBAAlC,SAAkC,CAAC;wBAEnC,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;wBACxB,aAAM,CAAC,IAAI,CAAC,kDAAkD,CAAC,CAAC;;;;wBAEhE,aAAM,CAAC,KAAK,CAAC,+CAA+C,EAAE,OAAK,CAAC,CAAC;wBACrE,uCAAuC;wBACvC,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;wBACxB,aAAM,CAAC,IAAI,CAAC,iDAAiD,CAAC,CAAC;;;;;;KAElE;IAEK,oCAAW,GAAjB,UAAkB,IAAgB;;;;;;6BAC5B,CAAC,IAAI,CAAC,WAAW,EAAjB,wBAAiB;wBACnB,qBAAM,IAAI,CAAC,UAAU,EAAE,EAAA;;wBAAvB,SAAuB,CAAC;;;wBAG1B,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;4BACf,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC;wBAC5C,CAAC;;;;;;;wBAKkB,qBAAM,IAAI,CAAC,UAAU,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EAAA;;wBAAtD,WAAS,SAA6C;wBAC5D,aAAM,CAAC,KAAK,CAAC,oCAA6B,IAAI,CAAC,EAAE,eAAK,QAAM,CAAC,OAAO,CAAE,CAAC,CAAC;wBACxE,sBAAO,QAAM,EAAC;;;wBAEd,aAAM,CAAC,IAAI,CAAC,wDAAwD,EAAE,aAAW,CAAC,CAAC;;4BAItE,qBAAM,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,EAAA;;wBAArC,MAAM,GAAG,SAA4B;wBAC3C,aAAM,CAAC,KAAK,CAAC,yCAAkC,IAAI,CAAC,EAAE,eAAK,MAAM,CAAC,OAAO,CAAE,CAAC,CAAC;wBAC7E,sBAAO,MAAM,EAAC;;;wBAEd,aAAM,CAAC,KAAK,CAAC,8BAA8B,EAAE,OAAK,CAAC,CAAC;wBACpD,MAAM,OAAK,CAAC;;;;;KAEf;IAEK,qCAAY,GAAlB,UAAmB,WAAmB;;;;;;6BAChC,CAAC,IAAI,CAAC,WAAW,EAAjB,wBAAiB;wBACnB,qBAAM,IAAI,CAAC,UAAU,EAAE,EAAA;;wBAAvB,SAAuB,CAAC;;;;;;;wBAMP,qBAAM,IAAI,CAAC,UAAU,CAAC,YAAY,CAAC,WAAW,CAAC,EAAA;;wBAAxD,WAAS,SAA+C;wBAC9D,aAAM,CAAC,KAAK,CAAC,sCAA+B,QAAM,CAAC,OAAO,CAAE,CAAC,CAAC;wBAC9D,sBAAO,QAAM,EAAC;;;wBAEd,aAAM,CAAC,IAAI,CAAC,wDAAwD,EAAE,aAAW,CAAC,CAAC;;4BAItE,qBAAM,IAAI,CAAC,iBAAiB,CAAC,WAAW,CAAC,EAAA;;wBAAlD,MAAM,GAAG,SAAyC;wBACxD,aAAM,CAAC,KAAK,CAAC,2CAAoC,MAAM,CAAC,OAAO,CAAE,CAAC,CAAC;wBACnE,sBAAO,MAAM,EAAC;;;wBAEd,aAAM,CAAC,KAAK,CAAC,oCAAoC,EAAE,OAAK,CAAC,CAAC;wBAC1D,MAAM,OAAK,CAAC;;;;;KAEf;IAEa,oCAAW,GAAzB,UAA0B,IAAgB;;;;gBAIlC,aAAa,GAAG,IAAI,CAAC,GAAG,GAAG,CAAC,CAAC;gBAC7B,YAAY,GAAG,IAAI,CAAC,GAAG,GAAG,CAAC,CAAC;gBAG9B,OAAO,GAAG,uBAAuB,CAAC;gBAChC,OAAO,GAAoE,EAAE,CAAC;gBAC9E,OAAO,GAAsD,EAAE,CAAC;gBAChE,IAAI,GAAa,EAAE,CAAC;gBAE1B,IAAI,aAAa,EAAE,CAAC;oBAClB,OAAO,GAAG,kCAAkC,CAAC;oBAC7C,OAAO,CAAC,IAAI,CAAC;wBACX,KAAK,EAAE,QAAQ;wBACf,IAAI,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,EAAE;wBACnD,UAAU,EAAE,GAAG;qBAChB,CAAC,CAAC;oBACH,OAAO,CAAC,IAAI,CAAC;wBACX,KAAK,EAAE,UAAU;wBACjB,IAAI,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,EAAE;wBACnD,UAAU,EAAE,IAAI;qBACjB,CAAC,CAAC;oBACH,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,aAAa,EAAE,SAAS,CAAC,CAAC;gBAC5C,CAAC;gBAED,IAAI,YAAY,EAAE,CAAC;oBACjB,OAAO,GAAG,4BAA4B,CAAC;oBACvC,OAAO,CAAC,IAAI,CAAC;wBACX,KAAK,EAAE,SAAS;wBAChB,IAAI,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE;wBACrD,UAAU,EAAE,GAAG;qBAChB,CAAC,CAAC;oBACH,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,SAAS,CAAC,CAAC;gBACrC,CAAC;gBAGK,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;gBACtD,KAAS,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;oBACrC,OAAO,CAAC,IAAI,CAAC;wBACX,KAAK,EAAE,QAAQ;wBACf,IAAI,EAAE;4BACJ,CAAC,EAAE,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC;4BACrC,CAAC,EAAE,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC;4BACrC,KAAK,EAAE,GAAG;4BACV,MAAM,EAAE,EAAE;yBACX;wBACD,UAAU,EAAE,GAAG,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,GAAG;qBACtC,CAAC,CAAC;gBACL,CAAC;gBAGK,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;gBACtD,KAAS,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;oBACrC,OAAO,CAAC,IAAI,CAAC;wBACX,WAAW,EAAE,mBAAmB;wBAChC,IAAI,EAAE;4BACJ,CAAC,EAAE,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC;4BACrC,CAAC,EAAE,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC;4BACtC,KAAK,EAAE,GAAG;4BACV,MAAM,EAAE,GAAG;yBACZ;qBACF,CAAC,CAAC;gBACL,CAAC;gBAED,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,WAAW,EAAE,UAAU,CAAC,CAAC;gBAE7C,sBAAO;wBACL,OAAO,SAAA;wBACP,OAAO,SAAA;wBACP,OAAO,SAAA;wBACP,IAAI,MAAA;qBACL,EAAC;;;KACH;IAEK,yCAAgB,GAAtB,UAAuB,WAAmB;;;;;;;6BAQpC,CAAC,IAAI,CAAC,WAAW,EAAjB,wBAAiB;wBACnB,qBAAM,IAAI,CAAC,UAAU,EAAE,EAAA;;wBAAvB,SAAuB,CAAC;;;;wBAKpB,MAAM,SAAiB,CAAC;;;;wBAGjB,qBAAM,IAAI,CAAC,UAAU,CAAC,YAAY,CAAC,WAAW,CAAC,EAAA;;wBAAxD,MAAM,GAAG,SAA+C,CAAC;;;;wBAEzD,aAAM,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;wBACtE,qBAAM,IAAI,CAAC,iBAAiB,CAAC,WAAW,CAAC,EAAA;;wBAAlD,MAAM,GAAG,SAAyC,CAAC;;;oBAGrD,4CAA4C;oBAC5C,sBAAO,CAAC,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,UAAC,GAAG,IAAK,OAAA,CAAC;4BAC1C,IAAI,EAAE,KAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,KAAK,CAAC;4BACxC,IAAI,EAAE,GAAG,CAAC,IAAI;4BACd,UAAU,EAAE,GAAG,CAAC,UAAU;yBAC3B,CAAC,EAJyC,CAIzC,CAAC,EAAC;;;wBAEJ,aAAM,CAAC,KAAK,CAAC,0CAA0C,EAAE,OAAK,CAAC,CAAC;wBAChE,sBAAO,EAAE,EAAC;;;;;KAEb;IAEa,0CAAiB,GAA/B,UAAgC,YAAoB;;;;gBAI5C,SAAS,GAAG;oBAChB;wBACE,OAAO,EAAE,mDAAmD;wBAC5D,OAAO,EAAE;4BACP;gCACE,KAAK,EAAE,QAAQ;gCACf,IAAI,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;gCACjD,UAAU,EAAE,GAAG;6BAChB;4BACD;gCACE,KAAK,EAAE,UAAU;gCACjB,IAAI,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;gCACjD,UAAU,EAAE,IAAI;6BACjB;4BACD;gCACE,KAAK,EAAE,MAAM;gCACb,IAAI,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;gCACjD,UAAU,EAAE,GAAG;6BAChB;yBACF;wBACD,IAAI,EAAE,CAAC,QAAQ,EAAE,QAAQ,EAAE,WAAW,EAAE,QAAQ,EAAE,UAAU,CAAC;qBAC9D;oBACD;wBACE,OAAO,EAAE,2CAA2C;wBACpD,OAAO,EAAE;4BACP;gCACE,KAAK,EAAE,OAAO;gCACd,IAAI,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;gCACjD,UAAU,EAAE,IAAI;6BACjB;4BACD;gCACE,KAAK,EAAE,OAAO;gCACd,IAAI,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;gCACjD,UAAU,EAAE,GAAG;6BAChB;4BACD;gCACE,KAAK,EAAE,MAAM;gCACb,IAAI,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE;gCAChD,UAAU,EAAE,IAAI;6BACjB;yBACF;wBACD,IAAI,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,WAAW,EAAE,UAAU,CAAC;qBAClD;oBACD;wBACE,OAAO,EAAE,gDAAgD;wBACzD,OAAO,EAAE;4BACP;gCACE,KAAK,EAAE,QAAQ;gCACf,IAAI,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;gCACjD,UAAU,EAAE,IAAI;6BACjB;4BACD;gCACE,KAAK,EAAE,SAAS;gCAChB,IAAI,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,EAAE,EAAE;gCAChD,UAAU,EAAE,IAAI;6BACjB;4BACD;gCACE,KAAK,EAAE,UAAU;gCACjB,IAAI,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,EAAE,EAAE;gCAChD,UAAU,EAAE,IAAI;6BACjB;yBACF;wBACD,IAAI,EAAE,CAAC,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,MAAM,EAAE,QAAQ,CAAC;qBAC1D;iBACF,CAAC;gBAGI,QAAQ,GAAG,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC;gBAEzE,sBAAO;wBACL,OAAO,EAAE,QAAQ,CAAC,OAAO;wBACzB,OAAO,EAAE,QAAQ,CAAC,OAAO;wBACzB,OAAO,EAAE,EAAE;wBACX,IAAI,EAAE,QAAQ,CAAC,IAAI;qBACpB,EAAC;;;KACH;IAEO,2CAAkB,GAA1B,UAA2B,KAAa;QACtC,IAAM,OAAO,GAA2B;YACtC,MAAM,EAAE,QAAQ;YAChB,UAAU,EAAE,OAAO;YACnB,SAAS,EAAE,UAAU;YACrB,QAAQ,EAAE,UAAU;YACpB,YAAY,EAAE,OAAO;YACrB,QAAQ,EAAE,QAAQ;YAClB,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,SAAS;YAClB,MAAM,EAAE,QAAQ;YAChB,MAAM,EAAE,QAAQ;YAChB,IAAI,EAAE,MAAM;YACZ,KAAK,EAAE,OAAO;YACd,KAAK,EAAE,OAAO;YACd,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE,SAAS;YAClB,SAAS,EAAE,MAAM;YACjB,IAAI,EAAE,MAAM;YACZ,KAAK,EAAE,OAAO;YACd,SAAS,EAAE,WAAW;YACtB,GAAG,EAAE,KAAK;YACV,KAAK,EAAE,OAAO;SACf,CAAC;QAEF,OAAO,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,IAAI,SAAS,CAAC;IACnD,CAAC;IAEK,2CAAkB,GAAxB,UAAyB,KAAmB;;;;;;wBAIpC,KAAK,GAA8E,EAAE,CAAC;wBACtF,KAAK,GAAgE,EAAE,CAAC;8BAGtD,EAAL,eAAK;;;6BAAL,CAAA,mBAAK,CAAA;wBAAb,IAAI;wBACb,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;4BACf,wBAAS;wBACX,CAAC;wBAEgB,qBAAM,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,EAAA;;wBAAvC,QAAQ,GAAG,SAA4B;wBAE7C,uBAAuB;wBACvB,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;4BACrB,WAAkC,EAAhB,KAAA,QAAQ,CAAC,OAAO,EAAhB,cAAgB,EAAhB,IAAgB,EAAE,CAAC;gCAA1B,GAAG;gCACN,MAAM,GAAG,UAAG,IAAI,CAAC,EAAE,cAAI,GAAG,CAAC,KAAK,cAAI,KAAK,CAAC,MAAM,CAAE,CAAC;gCACzD,KAAK,CAAC,IAAI,CAAC;oCACT,EAAE,EAAE,MAAM;oCACV,IAAI,EAAE,GAAG,CAAC,KAAK;oCACf,KAAK,EAAE,GAAG,CAAC,KAAK;oCAChB,QAAQ,EAAE;wCACR,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC;wCACtB,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC;wCACtB,KAAK,EAAE,GAAG,CAAC,IAAI,CAAC,KAAK;wCACrB,MAAM,EAAE,GAAG,CAAC,IAAI,CAAC,MAAM;qCACxB;iCACF,CAAC,CAAC;4BACL,CAAC;wBACH,CAAC;;;wBAvBgB,IAAK,CAAA;;;wBA0BxB,8BAA8B;wBAC9B,KAAS,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;4BACtC,KAAS,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gCACpC,QAAQ,GAAG,IAAI,CAAC,oBAAoB,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;gCACjF,IAAI,QAAQ,EAAE,CAAC;oCACb,KAAK,CAAC,IAAI,CAAC;wCACT,MAAM,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE;wCACnB,MAAM,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE;wCACnB,QAAQ,UAAA;qCACT,CAAC,CAAC;gCACL,CAAC;4BACH,CAAC;wBACH,CAAC;wBAED,sBAAO,EAAE,KAAK,OAAA,EAAE,KAAK,OAAA,EAAE,EAAC;;;;KACzB;IAEO,6CAAoB,GAA5B,UAA6B,IAAiB,EAAE,IAAiB;QAC/D,IAAM,OAAO,GAAG;YACd,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,GAAG,CAAC;YAC1B,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC;SAC5B,CAAC;QACF,IAAM,OAAO,GAAG;YACd,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,GAAG,CAAC;YAC1B,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC;SAC5B,CAAC;QAEF,oBAAoB;QACpB,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,CAAC;YAC9B,OAAO,UAAU,CAAC;QACpB,CAAC;QACD,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,CAAC;YAC9B,OAAO,cAAc,CAAC;QACxB,CAAC;QAED,gBAAgB;QAChB,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,CAAC;YAC9B,OAAO,UAAU,CAAC;QACpB,CAAC;QAED,gCAAgC;QAChC,IAAM,EAAE,GAAG,OAAO,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC;QACjC,IAAM,EAAE,GAAG,OAAO,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC;QACjC,IAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC;QAE9C,IAAI,QAAQ,GAAG,GAAG,EAAE,CAAC;YACnB,kBAAkB;YAClB,IAAI,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;gBAChC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS,CAAC;YACzC,CAAC;iBAAM,CAAC;gBACN,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC;YACpC,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,iCAAQ,GAAhB,UAAiB,IAAiB,EAAE,IAAiB;QACnD,OAAO,CACL,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC;YAChB,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC;YAChB,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK;YAC1C,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,CAC7C,CAAC;IACJ,CAAC;IAEO,iCAAQ,GAAhB,UAAiB,IAAiB,EAAE,IAAiB;QACnD,OAAO,CAAC,CACN,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC;YAC5B,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC;YAC5B,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;YAC7B,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAC9B,CAAC;IACJ,CAAC;IAED,sCAAa,GAAb;QACE,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;IAEK,gCAAO,GAAb;;;gBACE,+BAA+B;gBAC/B,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;gBACzB,aAAM,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;;;;KAC3C;IACH,qBAAC;AAAD,CAAC,AAtaD,IAsaC;AAtaY,wCAAc","sourcesContent":["import { logger } from '@elizaos/core';\nimport type { Florence2Result, ScreenTile, BoundingBox } from './types';\nimport { Florence2Local } from './florence2-local';\n\nexport class Florence2Model {\n private initialized = false;\n private localModel: Florence2Local;\n\n constructor() {\n this.localModel = new Florence2Local();\n }\n\n async initialize(): Promise<void> {\n if (this.initialized) {\n return;\n }\n\n try {\n logger.info('[Florence2] Initializing local Florence-2 model with TensorFlow.js...');\n\n await this.localModel.initialize();\n\n this.initialized = true;\n logger.info('[Florence2] Local model initialized successfully');\n } catch (error) {\n logger.error('[Florence2] Failed to initialize local model:', error);\n // Don't throw - we have good fallbacks\n this.initialized = true;\n logger.warn('[Florence2] Running with enhanced fallback mode');\n }\n }\n\n async analyzeTile(tile: ScreenTile): Promise<Florence2Result> {\n if (!this.initialized) {\n await this.initialize();\n }\n\n if (!tile.data) {\n throw new Error('Tile has no image data');\n }\n\n try {\n // Use local model\n try {\n const result = await this.localModel.analyzeImage(tile.data);\n logger.debug(`[Florence2] Analyzed tile ${tile.id}: ${result.caption}`);\n return result;\n } catch (_modelError) {\n logger.warn('[Florence2] Local model analysis failed, falling back:', _modelError);\n }\n\n // Fall back to mock analysis\n const result = await this.mockAnalyze(tile);\n logger.debug(`[Florence2] Mock analyzed tile ${tile.id}: ${result.caption}`);\n return result;\n } catch (error) {\n logger.error('[Florence2] Analysis failed:', error);\n throw error;\n }\n }\n\n async analyzeImage(imageBuffer: Buffer): Promise<Florence2Result> {\n if (!this.initialized) {\n await this.initialize();\n }\n\n try {\n // Use local model\n try {\n const result = await this.localModel.analyzeImage(imageBuffer);\n logger.debug(`[Florence2] Analyzed image: ${result.caption}`);\n return result;\n } catch (_modelError) {\n logger.warn('[Florence2] Local model analysis failed, falling back:', _modelError);\n }\n\n // Fall back to mock analysis\n const result = await this.mockAnalyzeBuffer(imageBuffer);\n logger.debug(`[Florence2] Mock analyzed image: ${result.caption}`);\n return result;\n } catch (error) {\n logger.error('[Florence2] Image analysis failed:', error);\n throw error;\n }\n }\n\n private async mockAnalyze(tile: ScreenTile): Promise<Florence2Result> {\n // Mock implementation that simulates Florence-2 output\n // In production, this would be replaced with actual API calls\n\n const isUpperRegion = tile.row < 2;\n const isLeftRegion = tile.col < 2;\n\n // Simulate different UI regions\n let caption = 'Desktop screen region';\n const objects: Array<{ label: string; bbox: BoundingBox; confidence: number }> = [];\n const regions: Array<{ description: string; bbox: BoundingBox }> = [];\n const tags: string[] = [];\n\n if (isUpperRegion) {\n caption = 'Application window with menu bar';\n objects.push({\n label: 'window',\n bbox: { x: 0, y: 0, width: tile.width, height: 50 },\n confidence: 0.9,\n });\n objects.push({\n label: 'menu_bar',\n bbox: { x: 0, y: 0, width: tile.width, height: 30 },\n confidence: 0.85,\n });\n tags.push('ui', 'application', 'desktop');\n }\n\n if (isLeftRegion) {\n caption = 'Sidebar or navigation area';\n objects.push({\n label: 'sidebar',\n bbox: { x: 0, y: 0, width: 100, height: tile.height },\n confidence: 0.8,\n });\n tags.push('navigation', 'sidebar');\n }\n\n // Add some common UI elements\n const buttonCount = Math.floor(Math.random() * 3) + 1;\n for (let i = 0; i < buttonCount; i++) {\n objects.push({\n label: 'button',\n bbox: {\n x: Math.random() * (tile.width - 100),\n y: Math.random() * (tile.height - 40),\n width: 100,\n height: 40,\n },\n confidence: 0.7 + Math.random() * 0.2,\n });\n }\n\n // Add text regions\n const textRegions = Math.floor(Math.random() * 2) + 1;\n for (let i = 0; i < textRegions; i++) {\n regions.push({\n description: 'Text content area',\n bbox: {\n x: Math.random() * (tile.width - 200),\n y: Math.random() * (tile.height - 100),\n width: 200,\n height: 100,\n },\n });\n }\n\n tags.push('screen', 'interface', 'computer');\n\n return {\n caption,\n objects,\n regions,\n tags,\n };\n }\n\n async detectUIElements(imageBuffer: Buffer): Promise<\n Array<{\n type: string;\n bbox: BoundingBox;\n confidence: number;\n text?: string;\n }>\n > {\n if (!this.initialized) {\n await this.initialize();\n }\n\n try {\n // Use local model to analyze image\n let result: Florence2Result;\n\n try {\n result = await this.localModel.analyzeImage(imageBuffer);\n } catch (_modelError) {\n logger.warn('[Florence2] Local model failed for UI detection, using fallback');\n result = await this.mockAnalyzeBuffer(imageBuffer);\n }\n\n // Convert Florence-2 objects to UI elements\n return (result.objects || []).map((obj) => ({\n type: this.mapToUIElementType(obj.label),\n bbox: obj.bbox,\n confidence: obj.confidence,\n }));\n } catch (error) {\n logger.error('[Florence2] UI element detection failed:', error);\n return [];\n }\n }\n\n private async mockAnalyzeBuffer(_imageBuffer: Buffer): Promise<Florence2Result> {\n // Enhanced mock for when API is not available\n // Provides more realistic descriptions based on common scenarios\n\n const scenarios = [\n {\n caption: 'Indoor scene with a person in front of a computer',\n objects: [\n {\n label: 'person',\n bbox: { x: 300, y: 200, width: 200, height: 300 },\n confidence: 0.9,\n },\n {\n label: 'computer',\n bbox: { x: 400, y: 350, width: 150, height: 100 },\n confidence: 0.85,\n },\n {\n label: 'desk',\n bbox: { x: 350, y: 400, width: 250, height: 100 },\n confidence: 0.8,\n },\n ],\n tags: ['indoor', 'office', 'workspace', 'person', 'computer'],\n },\n {\n caption: 'Room interior with furniture and lighting',\n objects: [\n {\n label: 'chair',\n bbox: { x: 200, y: 300, width: 100, height: 150 },\n confidence: 0.85,\n },\n {\n label: 'table',\n bbox: { x: 350, y: 350, width: 150, height: 100 },\n confidence: 0.8,\n },\n {\n label: 'lamp',\n bbox: { x: 500, y: 200, width: 50, height: 100 },\n confidence: 0.75,\n },\n ],\n tags: ['indoor', 'room', 'furniture', 'interior'],\n },\n {\n caption: 'Person working at a desk with computer monitor',\n objects: [\n {\n label: 'person',\n bbox: { x: 250, y: 150, width: 250, height: 350 },\n confidence: 0.92,\n },\n {\n label: 'monitor',\n bbox: { x: 450, y: 300, width: 120, height: 80 },\n confidence: 0.88,\n },\n {\n label: 'keyboard',\n bbox: { x: 430, y: 380, width: 100, height: 30 },\n confidence: 0.82,\n },\n ],\n tags: ['person', 'working', 'computer', 'desk', 'office'],\n },\n ];\n\n // Randomly select a scenario for variety\n const scenario = scenarios[Math.floor(Math.random() * scenarios.length)];\n\n return {\n caption: scenario.caption,\n objects: scenario.objects,\n regions: [],\n tags: scenario.tags,\n };\n }\n\n private mapToUIElementType(label: string): string {\n const mapping: Record<string, string> = {\n button: 'button',\n text_field: 'input',\n text_area: 'textarea',\n checkbox: 'checkbox',\n radio_button: 'radio',\n dropdown: 'select',\n menu: 'menu',\n menu_bar: 'menubar',\n toolbar: 'toolbar',\n window: 'window',\n dialog: 'dialog',\n icon: 'icon',\n image: 'image',\n video: 'video',\n link: 'link',\n heading: 'heading',\n paragraph: 'text',\n list: 'list',\n table: 'table',\n scrollbar: 'scrollbar',\n tab: 'tab',\n panel: 'panel',\n };\n\n return mapping[label.toLowerCase()] || 'unknown';\n }\n\n async generateSceneGraph(tiles: ScreenTile[]): Promise<{\n nodes: Array<{ id: string; type: string; label: string; position: BoundingBox }>;\n edges: Array<{ source: string; target: string; relation: string }>;\n }> {\n const nodes: Array<{ id: string; type: string; label: string; position: BoundingBox }> = [];\n const edges: Array<{ source: string; target: string; relation: string }> = [];\n\n // Analyze each tile\n for (const tile of tiles) {\n if (!tile.data) {\n continue;\n }\n\n const analysis = await this.analyzeTile(tile);\n\n // Add objects as nodes\n if (analysis.objects) {\n for (const obj of analysis.objects) {\n const nodeId = `${tile.id}-${obj.label}-${nodes.length}`;\n nodes.push({\n id: nodeId,\n type: obj.label,\n label: obj.label,\n position: {\n x: tile.x + obj.bbox.x,\n y: tile.y + obj.bbox.y,\n width: obj.bbox.width,\n height: obj.bbox.height,\n },\n });\n }\n }\n }\n\n // Infer spatial relationships\n for (let i = 0; i < nodes.length; i++) {\n for (let j = i + 1; j < nodes.length; j++) {\n const relation = this.inferSpatialRelation(nodes[i].position, nodes[j].position);\n if (relation) {\n edges.push({\n source: nodes[i].id,\n target: nodes[j].id,\n relation,\n });\n }\n }\n }\n\n return { nodes, edges };\n }\n\n private inferSpatialRelation(box1: BoundingBox, box2: BoundingBox): string | null {\n const center1 = {\n x: box1.x + box1.width / 2,\n y: box1.y + box1.height / 2,\n };\n const center2 = {\n x: box2.x + box2.width / 2,\n y: box2.y + box2.height / 2,\n };\n\n // Check containment\n if (this.contains(box1, box2)) {\n return 'contains';\n }\n if (this.contains(box2, box1)) {\n return 'contained_by';\n }\n\n // Check overlap\n if (this.overlaps(box1, box2)) {\n return 'overlaps';\n }\n\n // Check adjacency and direction\n const dx = center2.x - center1.x;\n const dy = center2.y - center1.y;\n const distance = Math.sqrt(dx * dx + dy * dy);\n\n if (distance < 100) {\n // Close proximity\n if (Math.abs(dx) > Math.abs(dy)) {\n return dx > 0 ? 'right_of' : 'left_of';\n } else {\n return dy > 0 ? 'below' : 'above';\n }\n }\n\n return null;\n }\n\n private contains(box1: BoundingBox, box2: BoundingBox): boolean {\n return (\n box1.x <= box2.x &&\n box1.y <= box2.y &&\n box1.x + box1.width >= box2.x + box2.width &&\n box1.y + box1.height >= box2.y + box2.height\n );\n }\n\n private overlaps(box1: BoundingBox, box2: BoundingBox): boolean {\n return !(\n box1.x + box1.width < box2.x ||\n box2.x + box2.width < box1.x ||\n box1.y + box1.height < box2.y ||\n box2.y + box2.height < box1.y\n );\n }\n\n isInitialized(): boolean {\n return this.initialized;\n }\n\n async dispose(): Promise<void> {\n // Clean up resources if needed\n this.initialized = false;\n logger.info('[Florence2] Model disposed');\n }\n}\n"]}
|
package/dist/index.d.ts
DELETED
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
import type { OCRResult, ScreenTile, BoundingBox } from './types';
|
|
2
|
-
export declare class RealOCRService {
|
|
3
|
-
private worker;
|
|
4
|
-
private initialized;
|
|
5
|
-
private initPromise;
|
|
6
|
-
initialize(): Promise<void>;
|
|
7
|
-
private _initialize;
|
|
8
|
-
extractText(imageBuffer: Buffer): Promise<OCRResult>;
|
|
9
|
-
extractFromTile(tile: ScreenTile): Promise<OCRResult>;
|
|
10
|
-
private preprocessImage;
|
|
11
|
-
private convertTesseractResult;
|
|
12
|
-
extractStructuredData(imageBuffer: Buffer): Promise<{
|
|
13
|
-
tables?: Array<{
|
|
14
|
-
rows: string[][];
|
|
15
|
-
bbox: BoundingBox;
|
|
16
|
-
}>;
|
|
17
|
-
forms?: Array<{
|
|
18
|
-
label: string;
|
|
19
|
-
value: string;
|
|
20
|
-
bbox: BoundingBox;
|
|
21
|
-
}>;
|
|
22
|
-
lists?: Array<{
|
|
23
|
-
items: string[];
|
|
24
|
-
bbox: BoundingBox;
|
|
25
|
-
}>;
|
|
26
|
-
}>;
|
|
27
|
-
private detectTables;
|
|
28
|
-
private detectForms;
|
|
29
|
-
private detectLists;
|
|
30
|
-
isInitialized(): boolean;
|
|
31
|
-
dispose(): Promise<void>;
|
|
32
|
-
}
|