@elizaos/plugin-vision 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.npmignore +5 -0
- package/README.md +270 -0
- package/build.config.ts +70 -0
- package/dist/action.d.ts +8 -0
- package/dist/action.js +1212 -0
- package/dist/action.js.map +1 -0
- package/dist/audio-capture-stream.d.ts +42 -0
- package/dist/audio-capture-stream.js +516 -0
- package/dist/audio-capture-stream.js.map +1 -0
- package/dist/audio-capture.d.ts +25 -0
- package/dist/audio-capture.js +412 -0
- package/dist/audio-capture.js.map +1 -0
- package/dist/basic.test.d.ts +1 -0
- package/dist/basic.test.js +97 -0
- package/dist/basic.test.js.map +1 -0
- package/dist/config.d.ts +73 -0
- package/dist/config.js +254 -0
- package/dist/config.js.map +1 -0
- package/dist/entity-tracker.d.ts +32 -0
- package/dist/entity-tracker.js +361 -0
- package/dist/entity-tracker.js.map +1 -0
- package/dist/errors.d.ts +67 -0
- package/dist/errors.js +395 -0
- package/dist/errors.js.map +1 -0
- package/dist/face-recognition.d.ts +31 -0
- package/dist/face-recognition.js +332 -0
- package/dist/face-recognition.js.map +1 -0
- package/dist/florence2-local.d.ts +25 -0
- package/dist/florence2-local.js +280 -0
- package/dist/florence2-local.js.map +1 -0
- package/dist/florence2-model.d.ts +36 -0
- package/dist/florence2-model.js +503 -0
- package/dist/florence2-model.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.js +73 -0
- package/dist/index.js.map +1 -0
- package/dist/ocr-service-real.d.ts +32 -0
- package/dist/ocr-service-real.js +396 -0
- package/dist/ocr-service-real.js.map +1 -0
- package/dist/ocr-service.d.ts +28 -0
- package/dist/ocr-service.js +216 -0
- package/dist/ocr-service.js.map +1 -0
- package/dist/provider.d.ts +2 -0
- package/dist/provider.js +285 -0
- package/dist/provider.js.map +1 -0
- package/dist/screen-capture.d.ts +16 -0
- package/dist/screen-capture.js +302 -0
- package/dist/screen-capture.js.map +1 -0
- package/dist/service.d.ts +73 -0
- package/dist/service.js +1662 -0
- package/dist/service.js.map +1 -0
- package/dist/tests/e2e/index.d.ts +8 -0
- package/dist/tests/e2e/index.js +33 -0
- package/dist/tests/e2e/index.js.map +1 -0
- package/dist/tests/e2e/run-local.d.ts +2 -0
- package/dist/tests/e2e/run-local.js +166 -0
- package/dist/tests/e2e/run-local.js.map +1 -0
- package/dist/tests/e2e/screen-vision.d.ts +11 -0
- package/dist/tests/e2e/screen-vision.js +384 -0
- package/dist/tests/e2e/screen-vision.js.map +1 -0
- package/dist/tests/e2e/vision-autonomy.d.ts +11 -0
- package/dist/tests/e2e/vision-autonomy.js +375 -0
- package/dist/tests/e2e/vision-autonomy.js.map +1 -0
- package/dist/tests/e2e/vision-basic.d.ts +11 -0
- package/dist/tests/e2e/vision-basic.js +434 -0
- package/dist/tests/e2e/vision-basic.js.map +1 -0
- package/dist/tests/e2e/vision-capture-log.d.ts +11 -0
- package/dist/tests/e2e/vision-capture-log.js +302 -0
- package/dist/tests/e2e/vision-capture-log.js.map +1 -0
- package/dist/tests/e2e/vision-runtime.d.ts +11 -0
- package/dist/tests/e2e/vision-runtime.js +357 -0
- package/dist/tests/e2e/vision-runtime.js.map +1 -0
- package/dist/tests/e2e/vision-worker-tests.d.ts +11 -0
- package/dist/tests/e2e/vision-worker-tests.js +466 -0
- package/dist/tests/e2e/vision-worker-tests.js.map +1 -0
- package/dist/tests/test-pattern-generator.d.ts +40 -0
- package/dist/tests/test-pattern-generator.js +191 -0
- package/dist/tests/test-pattern-generator.js.map +1 -0
- package/dist/tests.d.ts +3 -0
- package/dist/tests.js +11 -0
- package/dist/tests.js.map +1 -0
- package/dist/types.d.ts +222 -0
- package/dist/types.js +16 -0
- package/dist/types.js.map +1 -0
- package/dist/vision-models.d.ts +47 -0
- package/dist/vision-models.js +501 -0
- package/dist/vision-models.js.map +1 -0
- package/dist/vision-worker-manager.d.ts +61 -0
- package/dist/vision-worker-manager.js +668 -0
- package/dist/vision-worker-manager.js.map +1 -0
- package/dist/workers/florence2-worker-simple.d.ts +13 -0
- package/dist/workers/florence2-worker-simple.js +121 -0
- package/dist/workers/florence2-worker-simple.js.map +1 -0
- package/dist/workers/florence2-worker.d.ts +1 -0
- package/dist/workers/florence2-worker.js +328 -0
- package/dist/workers/florence2-worker.js.map +1 -0
- package/dist/workers/ocr-worker.d.ts +1 -0
- package/dist/workers/ocr-worker.js +354 -0
- package/dist/workers/ocr-worker.js.map +1 -0
- package/dist/workers/screen-capture-worker.d.ts +1 -0
- package/dist/workers/screen-capture-worker.js +427 -0
- package/dist/workers/screen-capture-worker.js.map +1 -0
- package/dist/workers/worker-logger.d.ts +9 -0
- package/dist/workers/worker-logger.js +95 -0
- package/dist/workers/worker-logger.js.map +1 -0
- package/package.json +100 -0
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __generator = (this && this.__generator) || function (thisArg, body) {
|
|
12
|
+
var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g = Object.create((typeof Iterator === "function" ? Iterator : Object).prototype);
|
|
13
|
+
return g.next = verb(0), g["throw"] = verb(1), g["return"] = verb(2), typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
|
|
14
|
+
function verb(n) { return function (v) { return step([n, v]); }; }
|
|
15
|
+
function step(op) {
|
|
16
|
+
if (f) throw new TypeError("Generator is already executing.");
|
|
17
|
+
while (g && (g = 0, op[0] && (_ = 0)), _) try {
|
|
18
|
+
if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
|
|
19
|
+
if (y = 0, t) op = [op[0] & 2, t.value];
|
|
20
|
+
switch (op[0]) {
|
|
21
|
+
case 0: case 1: t = op; break;
|
|
22
|
+
case 4: _.label++; return { value: op[1], done: false };
|
|
23
|
+
case 5: _.label++; y = op[1]; op = [0]; continue;
|
|
24
|
+
case 7: op = _.ops.pop(); _.trys.pop(); continue;
|
|
25
|
+
default:
|
|
26
|
+
if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
|
|
27
|
+
if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
|
|
28
|
+
if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
|
|
29
|
+
if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
|
|
30
|
+
if (t[2]) _.ops.pop();
|
|
31
|
+
_.trys.pop(); continue;
|
|
32
|
+
}
|
|
33
|
+
op = body.call(thisArg, _);
|
|
34
|
+
} catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
|
|
35
|
+
if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
|
|
36
|
+
}
|
|
37
|
+
};
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.TestPatternGenerator = void 0;
|
|
40
|
+
var sharp_1 = require("sharp");
|
|
41
|
+
var path = require("path");
|
|
42
|
+
var fs = require("fs/promises");
|
|
43
|
+
var core_1 = require("@elizaos/core");
|
|
44
|
+
var TestPatternGenerator = /** @class */ (function () {
|
|
45
|
+
function TestPatternGenerator() {
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Generate a test pattern with numbers in each quadrant and center
|
|
49
|
+
*/
|
|
50
|
+
TestPatternGenerator.generateQuadrantPattern = function (config) {
|
|
51
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
52
|
+
var width, height, _a, backgroundColor, _b, textColor, _c, fontSize, _d, includeGrid, _e, includeTimestamp, _f, displayIndex, svg, buffer;
|
|
53
|
+
return __generator(this, function (_g) {
|
|
54
|
+
switch (_g.label) {
|
|
55
|
+
case 0:
|
|
56
|
+
width = config.width, height = config.height, _a = config.backgroundColor, backgroundColor = _a === void 0 ? '#ffffff' : _a, _b = config.textColor, textColor = _b === void 0 ? '#000000' : _b, _c = config.fontSize, fontSize = _c === void 0 ? 48 : _c, _d = config.includeGrid, includeGrid = _d === void 0 ? true : _d, _e = config.includeTimestamp, includeTimestamp = _e === void 0 ? true : _e, _f = config.displayIndex, displayIndex = _f === void 0 ? 0 : _f;
|
|
57
|
+
svg = "\n <svg width=\"".concat(width, "\" height=\"").concat(height, "\" xmlns=\"http://www.w3.org/2000/svg\">\n <!-- Background -->\n <rect width=\"").concat(width, "\" height=\"").concat(height, "\" fill=\"").concat(backgroundColor, "\"/>\n \n ").concat(includeGrid ? this.generateGrid(width, height) : '', "\n \n <!-- Quadrant dividers -->\n <line x1=\"").concat(width / 2, "\" y1=\"0\" x2=\"").concat(width / 2, "\" y2=\"").concat(height, "\" stroke=\"#cccccc\" stroke-width=\"2\"/>\n <line x1=\"0\" y1=\"").concat(height / 2, "\" x2=\"").concat(width, "\" y2=\"").concat(height / 2, "\" stroke=\"#cccccc\" stroke-width=\"2\"/>\n \n <!-- Quadrant numbers -->\n <text x=\"").concat(width / 4, "\" y=\"").concat(height / 4, "\" font-family=\"Arial\" font-size=\"").concat(fontSize, "\" fill=\"").concat(textColor, "\" text-anchor=\"middle\" dominant-baseline=\"middle\">1</text>\n <text x=\"").concat((3 * width) / 4, "\" y=\"").concat(height / 4, "\" font-family=\"Arial\" font-size=\"").concat(fontSize, "\" fill=\"").concat(textColor, "\" text-anchor=\"middle\" dominant-baseline=\"middle\">2</text>\n <text x=\"").concat(width / 4, "\" y=\"").concat((3 * height) / 4, "\" font-family=\"Arial\" font-size=\"").concat(fontSize, "\" fill=\"").concat(textColor, "\" text-anchor=\"middle\" dominant-baseline=\"middle\">3</text>\n <text x=\"").concat((3 * width) / 4, "\" y=\"").concat((3 * height) / 4, "\" font-family=\"Arial\" font-size=\"").concat(fontSize, "\" fill=\"").concat(textColor, "\" text-anchor=\"middle\" dominant-baseline=\"middle\">4</text>\n \n <!-- Center number -->\n <circle cx=\"").concat(width / 2, "\" cy=\"").concat(height / 2, "\" r=\"").concat(fontSize, "\" fill=\"#ff0000\" opacity=\"0.3\"/>\n <text x=\"").concat(width / 2, "\" y=\"").concat(height / 2, "\" font-family=\"Arial\" font-size=\"").concat(fontSize, "\" fill=\"").concat(textColor, "\" text-anchor=\"middle\" dominant-baseline=\"middle\">5</text>\n \n <!-- Display info -->\n <text x=\"20\" y=\"30\" font-family=\"Arial\" font-size=\"16\" fill=\"").concat(textColor, "\">Display ").concat(displayIndex, "</text>\n <text x=\"20\" y=\"50\" font-family=\"Arial\" font-size=\"16\" fill=\"").concat(textColor, "\">").concat(width, "x").concat(height, "</text>\n \n ").concat(includeTimestamp ? "<text x=\"20\" y=\"70\" font-family=\"Arial\" font-size=\"16\" fill=\"".concat(textColor, "\">Time: ").concat(new Date().toISOString(), "</text>") : '', "\n \n <!-- Corner markers -->\n <circle cx=\"10\" cy=\"10\" r=\"5\" fill=\"#ff0000\"/>\n <circle cx=\"").concat(width - 10, "\" cy=\"10\" r=\"5\" fill=\"#00ff00\"/>\n <circle cx=\"10\" cy=\"").concat(height - 10, "\" r=\"5\" fill=\"#0000ff\"/>\n <circle cx=\"").concat(width - 10, "\" cy=\"").concat(height - 10, "\" r=\"5\" fill=\"#ffff00\"/>\n </svg>\n ");
|
|
58
|
+
return [4 /*yield*/, (0, sharp_1.default)(Buffer.from(svg)).png().toBuffer()];
|
|
59
|
+
case 1:
|
|
60
|
+
buffer = _g.sent();
|
|
61
|
+
return [2 /*return*/, buffer];
|
|
62
|
+
}
|
|
63
|
+
});
|
|
64
|
+
});
|
|
65
|
+
};
|
|
66
|
+
/**
|
|
67
|
+
* Generate a complex test pattern with multiple text regions
|
|
68
|
+
*/
|
|
69
|
+
TestPatternGenerator.generateComplexPattern = function (config) {
|
|
70
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
71
|
+
var width, height, _a, backgroundColor, _b, textColor, _c, fontSize, _d, displayIndex, sampleTexts, svg, buffer;
|
|
72
|
+
return __generator(this, function (_e) {
|
|
73
|
+
switch (_e.label) {
|
|
74
|
+
case 0:
|
|
75
|
+
width = config.width, height = config.height, _a = config.backgroundColor, backgroundColor = _a === void 0 ? '#f0f0f0' : _a, _b = config.textColor, textColor = _b === void 0 ? '#000000' : _b, _c = config.fontSize, fontSize = _c === void 0 ? 24 : _c, _d = config.displayIndex, displayIndex = _d === void 0 ? 0 : _d;
|
|
76
|
+
sampleTexts = [
|
|
77
|
+
'The quick brown fox jumps over the lazy dog',
|
|
78
|
+
'ABCDEFGHIJKLMNOPQRSTUVWXYZ',
|
|
79
|
+
'abcdefghijklmnopqrstuvwxyz',
|
|
80
|
+
'0123456789',
|
|
81
|
+
'!@#$%^&*()_+-=[]{}|;:,.<>?',
|
|
82
|
+
];
|
|
83
|
+
svg = "\n <svg width=\"".concat(width, "\" height=\"").concat(height, "\" xmlns=\"http://www.w3.org/2000/svg\">\n <!-- Background -->\n <rect width=\"").concat(width, "\" height=\"").concat(height, "\" fill=\"").concat(backgroundColor, "\"/>\n \n <!-- Title -->\n <text x=\"").concat(width / 2, "\" y=\"50\" font-family=\"Arial\" font-size=\"32\" fill=\"").concat(textColor, "\" text-anchor=\"middle\">Vision Test Pattern - Display ").concat(displayIndex, "</text>\n \n <!-- Text regions for OCR testing -->\n ").concat(sampleTexts
|
|
84
|
+
.map(function (text, i) { return "\n <rect x=\"50\" y=\"".concat(150 + i * 80, "\" width=\"").concat(width - 100, "\" height=\"60\" fill=\"white\" stroke=\"#333\" stroke-width=\"1\"/>\n <text x=\"70\" y=\"").concat(185 + i * 80, "\" font-family=\"Arial\" font-size=\"").concat(fontSize, "\" fill=\"").concat(textColor, "\">").concat(text, "</text>\n "); })
|
|
85
|
+
.join(''), "\n \n <!-- UI Elements -->\n <rect x=\"50\" y=\"").concat(height - 200, "\" width=\"150\" height=\"40\" fill=\"#007bff\" rx=\"5\"/>\n <text x=\"125\" y=\"").concat(height - 175, "\" font-family=\"Arial\" font-size=\"16\" fill=\"white\" text-anchor=\"middle\">Button</text>\n \n <rect x=\"220\" y=\"").concat(height - 200, "\" width=\"200\" height=\"30\" fill=\"white\" stroke=\"#333\" stroke-width=\"1\"/>\n <text x=\"230\" y=\"").concat(height - 180, "\" font-family=\"Arial\" font-size=\"14\" fill=\"#666\">Input Field</text>\n \n <!-- Timestamp -->\n <text x=\"").concat(width - 20, "\" y=\"").concat(height - 20, "\" font-family=\"Arial\" font-size=\"12\" fill=\"#666\" text-anchor=\"end\">").concat(new Date().toISOString(), "</text>\n </svg>\n ");
|
|
86
|
+
return [4 /*yield*/, (0, sharp_1.default)(Buffer.from(svg)).png().toBuffer()];
|
|
87
|
+
case 1:
|
|
88
|
+
buffer = _e.sent();
|
|
89
|
+
return [2 /*return*/, buffer];
|
|
90
|
+
}
|
|
91
|
+
});
|
|
92
|
+
});
|
|
93
|
+
};
|
|
94
|
+
/**
|
|
95
|
+
* Generate grid lines for the pattern
|
|
96
|
+
*/
|
|
97
|
+
TestPatternGenerator.generateGrid = function (width, height, spacing) {
|
|
98
|
+
if (spacing === void 0) { spacing = 100; }
|
|
99
|
+
var lines = [];
|
|
100
|
+
// Vertical lines
|
|
101
|
+
for (var x = spacing; x < width; x += spacing) {
|
|
102
|
+
lines.push("<line x1=\"".concat(x, "\" y1=\"0\" x2=\"").concat(x, "\" y2=\"").concat(height, "\" stroke=\"#eeeeee\" stroke-width=\"1\"/>"));
|
|
103
|
+
}
|
|
104
|
+
// Horizontal lines
|
|
105
|
+
for (var y = spacing; y < height; y += spacing) {
|
|
106
|
+
lines.push("<line x1=\"0\" y1=\"".concat(y, "\" x2=\"").concat(width, "\" y2=\"").concat(y, "\" stroke=\"#eeeeee\" stroke-width=\"1\"/>"));
|
|
107
|
+
}
|
|
108
|
+
return lines.join('\n');
|
|
109
|
+
};
|
|
110
|
+
/**
|
|
111
|
+
* Save test pattern to file
|
|
112
|
+
*/
|
|
113
|
+
TestPatternGenerator.savePattern = function (buffer, filename) {
|
|
114
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
115
|
+
var outputDir, filepath;
|
|
116
|
+
return __generator(this, function (_a) {
|
|
117
|
+
switch (_a.label) {
|
|
118
|
+
case 0:
|
|
119
|
+
outputDir = path.join(process.cwd(), 'test-patterns');
|
|
120
|
+
return [4 /*yield*/, fs.mkdir(outputDir, { recursive: true })];
|
|
121
|
+
case 1:
|
|
122
|
+
_a.sent();
|
|
123
|
+
filepath = path.join(outputDir, filename);
|
|
124
|
+
return [4 /*yield*/, fs.writeFile(filepath, buffer)];
|
|
125
|
+
case 2:
|
|
126
|
+
_a.sent();
|
|
127
|
+
core_1.logger.info("[TestPatternGenerator] Saved test pattern to ".concat(filepath));
|
|
128
|
+
return [2 /*return*/, filepath];
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
});
|
|
132
|
+
};
|
|
133
|
+
/**
|
|
134
|
+
* Generate patterns for all displays
|
|
135
|
+
*/
|
|
136
|
+
TestPatternGenerator.generatePatternsForAllDisplays = function (displayCount) {
|
|
137
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
138
|
+
var patterns, i, pattern;
|
|
139
|
+
return __generator(this, function (_a) {
|
|
140
|
+
switch (_a.label) {
|
|
141
|
+
case 0:
|
|
142
|
+
patterns = new Map();
|
|
143
|
+
i = 0;
|
|
144
|
+
_a.label = 1;
|
|
145
|
+
case 1:
|
|
146
|
+
if (!(i < displayCount)) return [3 /*break*/, 4];
|
|
147
|
+
return [4 /*yield*/, this.generateQuadrantPattern({
|
|
148
|
+
width: 1920,
|
|
149
|
+
height: 1080,
|
|
150
|
+
displayIndex: i,
|
|
151
|
+
includeTimestamp: true,
|
|
152
|
+
})];
|
|
153
|
+
case 2:
|
|
154
|
+
pattern = _a.sent();
|
|
155
|
+
patterns.set(i, pattern);
|
|
156
|
+
_a.label = 3;
|
|
157
|
+
case 3:
|
|
158
|
+
i++;
|
|
159
|
+
return [3 /*break*/, 1];
|
|
160
|
+
case 4: return [2 /*return*/, patterns];
|
|
161
|
+
}
|
|
162
|
+
});
|
|
163
|
+
});
|
|
164
|
+
};
|
|
165
|
+
/**
|
|
166
|
+
* Verify OCR results match expected quadrant numbers
|
|
167
|
+
*/
|
|
168
|
+
TestPatternGenerator.verifyQuadrantNumbers = function (ocrText) {
|
|
169
|
+
var expectedNumbers = [1, 2, 3, 4, 5];
|
|
170
|
+
var foundNumbers = [];
|
|
171
|
+
// Extract all numbers from OCR text
|
|
172
|
+
var matches = ocrText.match(/\d+/g);
|
|
173
|
+
if (matches) {
|
|
174
|
+
matches.forEach(function (match) {
|
|
175
|
+
var num = parseInt(match, 10);
|
|
176
|
+
if (expectedNumbers.includes(num) && !foundNumbers.includes(num)) {
|
|
177
|
+
foundNumbers.push(num);
|
|
178
|
+
}
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
var missingNumbers = expectedNumbers.filter(function (n) { return !foundNumbers.includes(n); });
|
|
182
|
+
return {
|
|
183
|
+
success: missingNumbers.length === 0,
|
|
184
|
+
foundNumbers: foundNumbers,
|
|
185
|
+
missingNumbers: missingNumbers,
|
|
186
|
+
};
|
|
187
|
+
};
|
|
188
|
+
return TestPatternGenerator;
|
|
189
|
+
}());
|
|
190
|
+
exports.TestPatternGenerator = TestPatternGenerator;
|
|
191
|
+
//# sourceMappingURL=test-pattern-generator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"test-pattern-generator.js","sourceRoot":"","sources":["../../src/tests/test-pattern-generator.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,+BAA0B;AAC1B,2BAA6B;AAC7B,gCAAkC;AAClC,sCAAuC;AAavC;IAAA;IA0MA,CAAC;IAzMC;;OAEG;IACU,4CAAuB,GAApC,UAAqC,MAAyB;;;;;;wBAE1D,KAAK,GAQH,MAAM,MARH,EACL,MAAM,GAOJ,MAAM,OAPF,EACN,KAME,MAAM,gBANmB,EAA3B,eAAe,mBAAG,SAAS,KAAA,EAC3B,KAKE,MAAM,UALa,EAArB,SAAS,mBAAG,SAAS,KAAA,EACrB,KAIE,MAAM,SAJK,EAAb,QAAQ,mBAAG,EAAE,KAAA,EACb,KAGE,MAAM,YAHU,EAAlB,WAAW,mBAAG,IAAI,KAAA,EAClB,KAEE,MAAM,iBAFe,EAAvB,gBAAgB,mBAAG,IAAI,KAAA,EACvB,KACE,MAAM,aADQ,EAAhB,YAAY,mBAAG,CAAC,KAAA,CACP;wBAGL,GAAG,GAAG,+BACI,KAAK,yBAAa,MAAM,0GAErB,KAAK,yBAAa,MAAM,uBAAW,eAAe,qCAE/D,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,gFAGzC,KAAK,GAAG,CAAC,8BAAgB,KAAK,GAAG,CAAC,qBAAS,MAAM,qFAC1C,MAAM,GAAG,CAAC,qBAAS,KAAK,qBAAS,MAAM,GAAG,CAAC,wHAGnD,KAAK,GAAG,CAAC,oBAAQ,MAAM,GAAG,CAAC,kDAAoC,QAAQ,uBAAW,SAAS,gGAC3F,CAAC,CAAC,GAAG,KAAK,CAAC,GAAG,CAAC,oBAAQ,MAAM,GAAG,CAAC,kDAAoC,QAAQ,uBAAW,SAAS,gGACjG,KAAK,GAAG,CAAC,oBAAQ,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,kDAAoC,QAAQ,uBAAW,SAAS,gGACjG,CAAC,CAAC,GAAG,KAAK,CAAC,GAAG,CAAC,oBAAQ,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,kDAAoC,QAAQ,uBAAW,SAAS,6IAGpG,KAAK,GAAG,CAAC,qBAAS,MAAM,GAAG,CAAC,oBAAQ,QAAQ,sEAC/C,KAAK,GAAG,CAAC,oBAAQ,MAAM,GAAG,CAAC,kDAAoC,QAAQ,uBAAW,SAAS,qMAGvC,SAAS,wBAAa,YAAY,oGAClC,SAAS,gBAAK,KAAK,cAAI,MAAM,wCAE1F,gBAAgB,CAAC,CAAC,CAAC,gFAAgE,SAAS,sBAAW,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,YAAS,CAAC,CAAC,CAAC,EAAE,+IAIjI,KAAK,GAAG,EAAE,qFACF,MAAM,GAAG,EAAE,iEACnB,KAAK,GAAG,EAAE,qBAAS,MAAM,GAAG,EAAE,sDAE/C,CAAC;wBAGa,qBAAM,IAAA,eAAK,EAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,EAAA;;wBAAvD,MAAM,GAAG,SAA8C;wBAE7D,sBAAO,MAAM,EAAC;;;;KACf;IAED;;OAEG;IACU,2CAAsB,GAAnC,UAAoC,MAAyB;;;;;;wBAEzD,KAAK,GAMH,MAAM,MANH,EACL,MAAM,GAKJ,MAAM,OALF,EACN,KAIE,MAAM,gBAJmB,EAA3B,eAAe,mBAAG,SAAS,KAAA,EAC3B,KAGE,MAAM,UAHa,EAArB,SAAS,mBAAG,SAAS,KAAA,EACrB,KAEE,MAAM,SAFK,EAAb,QAAQ,mBAAG,EAAE,KAAA,EACb,KACE,MAAM,aADQ,EAAhB,YAAY,mBAAG,CAAC,KAAA,CACP;wBAGL,WAAW,GAAG;4BAClB,6CAA6C;4BAC7C,4BAA4B;4BAC5B,4BAA4B;4BAC5B,YAAY;4BACZ,4BAA4B;yBAC7B,CAAC;wBAEI,GAAG,GAAG,+BACI,KAAK,yBAAa,MAAM,0GAErB,KAAK,yBAAa,MAAM,uBAAW,eAAe,uEAGtD,KAAK,GAAG,CAAC,uEAAqD,SAAS,qEAAwD,YAAY,uFAGpJ,WAAW;6BACV,GAAG,CACF,UAAC,IAAI,EAAE,CAAC,IAAK,OAAA,yCACG,GAAG,GAAG,CAAC,GAAG,EAAE,wBAAY,KAAK,GAAG,GAAG,gHACnC,GAAG,GAAG,CAAC,GAAG,EAAE,kDAAoC,QAAQ,uBAAW,SAAS,gBAAK,IAAI,sBACxG,EAHgB,CAGhB,CACE;6BACA,IAAI,CAAC,EAAE,CAAC,kFAGO,MAAM,GAAG,GAAG,qGACX,MAAM,GAAG,GAAG,kJAEZ,MAAM,GAAG,GAAG,6HACZ,MAAM,GAAG,GAAG,iJAGpB,KAAK,GAAG,EAAE,oBAAQ,MAAM,GAAG,EAAE,yFAAsE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,gCAEzI,CAAC;wBAEa,qBAAM,IAAA,eAAK,EAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,EAAA;;wBAAvD,MAAM,GAAG,SAA8C;wBAE7D,sBAAO,MAAM,EAAC;;;;KACf;IAED;;OAEG;IACY,iCAAY,GAA3B,UAA4B,KAAa,EAAE,MAAc,EAAE,OAAqB;QAArB,wBAAA,EAAA,aAAqB;QAC9E,IAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,iBAAiB;QACjB,KAAK,IAAI,CAAC,GAAG,OAAO,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,IAAI,OAAO,EAAE,CAAC;YAC9C,KAAK,CAAC,IAAI,CACR,qBAAa,CAAC,8BAAgB,CAAC,qBAAS,MAAM,+CAAuC,CACtF,CAAC;QACJ,CAAC;QAED,mBAAmB;QACnB,KAAK,IAAI,CAAC,GAAG,OAAO,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,IAAI,OAAO,EAAE,CAAC;YAC/C,KAAK,CAAC,IAAI,CACR,8BAAoB,CAAC,qBAAS,KAAK,qBAAS,CAAC,+CAAuC,CACrF,CAAC;QACJ,CAAC;QAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAED;;OAEG;IACU,gCAAW,GAAxB,UAAyB,MAAc,EAAE,QAAgB;;;;;;wBACjD,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,eAAe,CAAC,CAAC;wBAC5D,qBAAM,EAAE,CAAC,KAAK,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,EAAA;;wBAA9C,SAA8C,CAAC;wBAEzC,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;wBAChD,qBAAM,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,MAAM,CAAC,EAAA;;wBAApC,SAAoC,CAAC;wBAErC,aAAM,CAAC,IAAI,CAAC,uDAAgD,QAAQ,CAAE,CAAC,CAAC;wBACxE,sBAAO,QAAQ,EAAC;;;;KACjB;IAED;;OAEG;IACU,mDAA8B,GAA3C,UAA4C,YAAoB;;;;;;wBACxD,QAAQ,GAAG,IAAI,GAAG,EAAkB,CAAC;wBAElC,CAAC,GAAG,CAAC;;;6BAAE,CAAA,CAAC,GAAG,YAAY,CAAA;wBACd,qBAAM,IAAI,CAAC,uBAAuB,CAAC;gCACjD,KAAK,EAAE,IAAI;gCACX,MAAM,EAAE,IAAI;gCACZ,YAAY,EAAE,CAAC;gCACf,gBAAgB,EAAE,IAAI;6BACvB,CAAC,EAAA;;wBALI,OAAO,GAAG,SAKd;wBAEF,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;;;wBARO,CAAC,EAAE,CAAA;;4BAWrC,sBAAO,QAAQ,EAAC;;;;KACjB;IAED;;OAEG;IACI,0CAAqB,GAA5B,UAA6B,OAAe;QAK1C,IAAM,eAAe,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;QACxC,IAAM,YAAY,GAAa,EAAE,CAAC;QAElC,oCAAoC;QACpC,IAAM,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QACtC,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,CAAC,OAAO,CAAC,UAAC,KAAK;gBACpB,IAAM,GAAG,GAAG,QAAQ,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;gBAChC,IAAI,eAAe,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;oBACjE,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACzB,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC;QAED,IAAM,cAAc,GAAG,eAAe,CAAC,MAAM,CAAC,UAAC,CAAC,IAAK,OAAA,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAzB,CAAyB,CAAC,CAAC;QAEhF,OAAO;YACL,OAAO,EAAE,cAAc,CAAC,MAAM,KAAK,CAAC;YACpC,YAAY,cAAA;YACZ,cAAc,gBAAA;SACf,CAAC;IACJ,CAAC;IACH,2BAAC;AAAD,CAAC,AA1MD,IA0MC;AA1MY,oDAAoB","sourcesContent":["import sharp from 'sharp';\nimport * as path from 'path';\nimport * as fs from 'fs/promises';\nimport { logger } from '@elizaos/core';\n\nexport interface TestPatternConfig {\n width: number;\n height: number;\n backgroundColor?: string;\n textColor?: string;\n fontSize?: number;\n includeGrid?: boolean;\n includeTimestamp?: boolean;\n displayIndex?: number;\n}\n\nexport class TestPatternGenerator {\n /**\n * Generate a test pattern with numbers in each quadrant and center\n */\n static async generateQuadrantPattern(config: TestPatternConfig): Promise<Buffer> {\n const {\n width,\n height,\n backgroundColor = '#ffffff',\n textColor = '#000000',\n fontSize = 48,\n includeGrid = true,\n includeTimestamp = true,\n displayIndex = 0,\n } = config;\n\n // Create SVG with test pattern\n const svg = `\n <svg width=\"${width}\" height=\"${height}\" xmlns=\"http://www.w3.org/2000/svg\">\n <!-- Background -->\n <rect width=\"${width}\" height=\"${height}\" fill=\"${backgroundColor}\"/>\n \n ${includeGrid ? this.generateGrid(width, height) : ''}\n \n <!-- Quadrant dividers -->\n <line x1=\"${width / 2}\" y1=\"0\" x2=\"${width / 2}\" y2=\"${height}\" stroke=\"#cccccc\" stroke-width=\"2\"/>\n <line x1=\"0\" y1=\"${height / 2}\" x2=\"${width}\" y2=\"${height / 2}\" stroke=\"#cccccc\" stroke-width=\"2\"/>\n \n <!-- Quadrant numbers -->\n <text x=\"${width / 4}\" y=\"${height / 4}\" font-family=\"Arial\" font-size=\"${fontSize}\" fill=\"${textColor}\" text-anchor=\"middle\" dominant-baseline=\"middle\">1</text>\n <text x=\"${(3 * width) / 4}\" y=\"${height / 4}\" font-family=\"Arial\" font-size=\"${fontSize}\" fill=\"${textColor}\" text-anchor=\"middle\" dominant-baseline=\"middle\">2</text>\n <text x=\"${width / 4}\" y=\"${(3 * height) / 4}\" font-family=\"Arial\" font-size=\"${fontSize}\" fill=\"${textColor}\" text-anchor=\"middle\" dominant-baseline=\"middle\">3</text>\n <text x=\"${(3 * width) / 4}\" y=\"${(3 * height) / 4}\" font-family=\"Arial\" font-size=\"${fontSize}\" fill=\"${textColor}\" text-anchor=\"middle\" dominant-baseline=\"middle\">4</text>\n \n <!-- Center number -->\n <circle cx=\"${width / 2}\" cy=\"${height / 2}\" r=\"${fontSize}\" fill=\"#ff0000\" opacity=\"0.3\"/>\n <text x=\"${width / 2}\" y=\"${height / 2}\" font-family=\"Arial\" font-size=\"${fontSize}\" fill=\"${textColor}\" text-anchor=\"middle\" dominant-baseline=\"middle\">5</text>\n \n <!-- Display info -->\n <text x=\"20\" y=\"30\" font-family=\"Arial\" font-size=\"16\" fill=\"${textColor}\">Display ${displayIndex}</text>\n <text x=\"20\" y=\"50\" font-family=\"Arial\" font-size=\"16\" fill=\"${textColor}\">${width}x${height}</text>\n \n ${includeTimestamp ? `<text x=\"20\" y=\"70\" font-family=\"Arial\" font-size=\"16\" fill=\"${textColor}\">Time: ${new Date().toISOString()}</text>` : ''}\n \n <!-- Corner markers -->\n <circle cx=\"10\" cy=\"10\" r=\"5\" fill=\"#ff0000\"/>\n <circle cx=\"${width - 10}\" cy=\"10\" r=\"5\" fill=\"#00ff00\"/>\n <circle cx=\"10\" cy=\"${height - 10}\" r=\"5\" fill=\"#0000ff\"/>\n <circle cx=\"${width - 10}\" cy=\"${height - 10}\" r=\"5\" fill=\"#ffff00\"/>\n </svg>\n `;\n\n // Convert SVG to PNG\n const buffer = await sharp(Buffer.from(svg)).png().toBuffer();\n\n return buffer;\n }\n\n /**\n * Generate a complex test pattern with multiple text regions\n */\n static async generateComplexPattern(config: TestPatternConfig): Promise<Buffer> {\n const {\n width,\n height,\n backgroundColor = '#f0f0f0',\n textColor = '#000000',\n fontSize = 24,\n displayIndex = 0,\n } = config;\n\n // Sample text for OCR testing\n const sampleTexts = [\n 'The quick brown fox jumps over the lazy dog',\n 'ABCDEFGHIJKLMNOPQRSTUVWXYZ',\n 'abcdefghijklmnopqrstuvwxyz',\n '0123456789',\n '!@#$%^&*()_+-=[]{}|;:,.<>?',\n ];\n\n const svg = `\n <svg width=\"${width}\" height=\"${height}\" xmlns=\"http://www.w3.org/2000/svg\">\n <!-- Background -->\n <rect width=\"${width}\" height=\"${height}\" fill=\"${backgroundColor}\"/>\n \n <!-- Title -->\n <text x=\"${width / 2}\" y=\"50\" font-family=\"Arial\" font-size=\"32\" fill=\"${textColor}\" text-anchor=\"middle\">Vision Test Pattern - Display ${displayIndex}</text>\n \n <!-- Text regions for OCR testing -->\n ${sampleTexts\n .map(\n (text, i) => `\n <rect x=\"50\" y=\"${150 + i * 80}\" width=\"${width - 100}\" height=\"60\" fill=\"white\" stroke=\"#333\" stroke-width=\"1\"/>\n <text x=\"70\" y=\"${185 + i * 80}\" font-family=\"Arial\" font-size=\"${fontSize}\" fill=\"${textColor}\">${text}</text>\n `\n )\n .join('')}\n \n <!-- UI Elements -->\n <rect x=\"50\" y=\"${height - 200}\" width=\"150\" height=\"40\" fill=\"#007bff\" rx=\"5\"/>\n <text x=\"125\" y=\"${height - 175}\" font-family=\"Arial\" font-size=\"16\" fill=\"white\" text-anchor=\"middle\">Button</text>\n \n <rect x=\"220\" y=\"${height - 200}\" width=\"200\" height=\"30\" fill=\"white\" stroke=\"#333\" stroke-width=\"1\"/>\n <text x=\"230\" y=\"${height - 180}\" font-family=\"Arial\" font-size=\"14\" fill=\"#666\">Input Field</text>\n \n <!-- Timestamp -->\n <text x=\"${width - 20}\" y=\"${height - 20}\" font-family=\"Arial\" font-size=\"12\" fill=\"#666\" text-anchor=\"end\">${new Date().toISOString()}</text>\n </svg>\n `;\n\n const buffer = await sharp(Buffer.from(svg)).png().toBuffer();\n\n return buffer;\n }\n\n /**\n * Generate grid lines for the pattern\n */\n private static generateGrid(width: number, height: number, spacing: number = 100): string {\n const lines: string[] = [];\n\n // Vertical lines\n for (let x = spacing; x < width; x += spacing) {\n lines.push(\n `<line x1=\"${x}\" y1=\"0\" x2=\"${x}\" y2=\"${height}\" stroke=\"#eeeeee\" stroke-width=\"1\"/>`\n );\n }\n\n // Horizontal lines\n for (let y = spacing; y < height; y += spacing) {\n lines.push(\n `<line x1=\"0\" y1=\"${y}\" x2=\"${width}\" y2=\"${y}\" stroke=\"#eeeeee\" stroke-width=\"1\"/>`\n );\n }\n\n return lines.join('\\n');\n }\n\n /**\n * Save test pattern to file\n */\n static async savePattern(buffer: Buffer, filename: string): Promise<string> {\n const outputDir = path.join(process.cwd(), 'test-patterns');\n await fs.mkdir(outputDir, { recursive: true });\n\n const filepath = path.join(outputDir, filename);\n await fs.writeFile(filepath, buffer);\n\n logger.info(`[TestPatternGenerator] Saved test pattern to ${filepath}`);\n return filepath;\n }\n\n /**\n * Generate patterns for all displays\n */\n static async generatePatternsForAllDisplays(displayCount: number): Promise<Map<number, Buffer>> {\n const patterns = new Map<number, Buffer>();\n\n for (let i = 0; i < displayCount; i++) {\n const pattern = await this.generateQuadrantPattern({\n width: 1920,\n height: 1080,\n displayIndex: i,\n includeTimestamp: true,\n });\n\n patterns.set(i, pattern);\n }\n\n return patterns;\n }\n\n /**\n * Verify OCR results match expected quadrant numbers\n */\n static verifyQuadrantNumbers(ocrText: string): {\n success: boolean;\n foundNumbers: number[];\n missingNumbers: number[];\n } {\n const expectedNumbers = [1, 2, 3, 4, 5];\n const foundNumbers: number[] = [];\n\n // Extract all numbers from OCR text\n const matches = ocrText.match(/\\d+/g);\n if (matches) {\n matches.forEach((match) => {\n const num = parseInt(match, 10);\n if (expectedNumbers.includes(num) && !foundNumbers.includes(num)) {\n foundNumbers.push(num);\n }\n });\n }\n\n const missingNumbers = expectedNumbers.filter((n) => !foundNumbers.includes(n));\n\n return {\n success: missingNumbers.length === 0,\n foundNumbers,\n missingNumbers,\n };\n }\n}\n"]}
|
package/dist/tests.d.ts
ADDED
package/dist/tests.js
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.default = exports.VisionAutonomyE2ETestSuite = exports.VisionBasicE2ETestSuite = void 0;
|
|
4
|
+
// Export all test suites for the vision plugin
|
|
5
|
+
var vision_basic_1 = require("./tests/e2e/vision-basic");
|
|
6
|
+
Object.defineProperty(exports, "VisionBasicE2ETestSuite", { enumerable: true, get: function () { return vision_basic_1.default; } });
|
|
7
|
+
var vision_autonomy_1 = require("./tests/e2e/vision-autonomy");
|
|
8
|
+
Object.defineProperty(exports, "VisionAutonomyE2ETestSuite", { enumerable: true, get: function () { return vision_autonomy_1.default; } });
|
|
9
|
+
var index_1 = require("./tests/e2e/index");
|
|
10
|
+
Object.defineProperty(exports, "default", { enumerable: true, get: function () { return index_1.testSuites; } });
|
|
11
|
+
//# sourceMappingURL=tests.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tests.js","sourceRoot":"","sources":["../src/tests.ts"],"names":[],"mappings":";;;AAAA,+CAA+C;AAC/C,yDAA8E;AAArE,uHAAA,OAAO,OAA2B;AAC3C,+DAAoF;AAA3E,6HAAA,OAAO,OAA8B;AAC9C,2CAA0D;AAAjD,gGAAA,UAAU,OAAW","sourcesContent":["// Export all test suites for the vision plugin\nexport { default as VisionBasicE2ETestSuite } from './tests/e2e/vision-basic';\nexport { default as VisionAutonomyE2ETestSuite } from './tests/e2e/vision-autonomy';\nexport { testSuites as default } from './tests/e2e/index';\n"]}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
export declare const VisionServiceType: {
|
|
2
|
+
VISION: "VISION";
|
|
3
|
+
};
|
|
4
|
+
export interface CameraInfo {
|
|
5
|
+
id: string;
|
|
6
|
+
name: string;
|
|
7
|
+
connected: boolean;
|
|
8
|
+
}
|
|
9
|
+
export interface SceneDescription {
|
|
10
|
+
timestamp: number;
|
|
11
|
+
description: string;
|
|
12
|
+
objects: DetectedObject[];
|
|
13
|
+
people: PersonInfo[];
|
|
14
|
+
sceneChanged: boolean;
|
|
15
|
+
changePercentage: number;
|
|
16
|
+
audioTranscription?: string;
|
|
17
|
+
}
|
|
18
|
+
export interface DetectedObject {
|
|
19
|
+
id: string;
|
|
20
|
+
type: string;
|
|
21
|
+
confidence: number;
|
|
22
|
+
boundingBox: BoundingBox;
|
|
23
|
+
}
|
|
24
|
+
export interface PersonInfo {
|
|
25
|
+
id: string;
|
|
26
|
+
pose: 'sitting' | 'standing' | 'lying' | 'unknown';
|
|
27
|
+
facing: 'camera' | 'away' | 'left' | 'right' | 'unknown';
|
|
28
|
+
confidence: number;
|
|
29
|
+
boundingBox: BoundingBox;
|
|
30
|
+
keypoints?: Array<{
|
|
31
|
+
part: string;
|
|
32
|
+
position: {
|
|
33
|
+
x: number;
|
|
34
|
+
y: number;
|
|
35
|
+
};
|
|
36
|
+
score: number;
|
|
37
|
+
}>;
|
|
38
|
+
}
|
|
39
|
+
export interface BoundingBox {
|
|
40
|
+
x: number;
|
|
41
|
+
y: number;
|
|
42
|
+
width: number;
|
|
43
|
+
height: number;
|
|
44
|
+
}
|
|
45
|
+
export interface VisionFrame {
|
|
46
|
+
timestamp: number;
|
|
47
|
+
width: number;
|
|
48
|
+
height: number;
|
|
49
|
+
data: Buffer;
|
|
50
|
+
format: 'rgb' | 'rgba' | 'jpeg' | 'png';
|
|
51
|
+
}
|
|
52
|
+
export declare enum VisionMode {
|
|
53
|
+
OFF = "OFF",
|
|
54
|
+
CAMERA = "CAMERA",
|
|
55
|
+
SCREEN = "SCREEN",
|
|
56
|
+
BOTH = "BOTH"
|
|
57
|
+
}
|
|
58
|
+
export interface ScreenCapture {
|
|
59
|
+
timestamp: number;
|
|
60
|
+
width: number;
|
|
61
|
+
height: number;
|
|
62
|
+
data: Buffer;
|
|
63
|
+
tiles: ScreenTile[];
|
|
64
|
+
}
|
|
65
|
+
export interface ScreenTile {
|
|
66
|
+
id: string;
|
|
67
|
+
row: number;
|
|
68
|
+
col: number;
|
|
69
|
+
x: number;
|
|
70
|
+
y: number;
|
|
71
|
+
width: number;
|
|
72
|
+
height: number;
|
|
73
|
+
data?: Buffer;
|
|
74
|
+
analysis?: TileAnalysis;
|
|
75
|
+
}
|
|
76
|
+
export interface TileAnalysis {
|
|
77
|
+
timestamp: number;
|
|
78
|
+
florence2?: Florence2Result;
|
|
79
|
+
ocr?: OCRResult;
|
|
80
|
+
objects?: DetectedObject[];
|
|
81
|
+
text?: string;
|
|
82
|
+
summary?: string;
|
|
83
|
+
}
|
|
84
|
+
export interface Florence2Result {
|
|
85
|
+
caption?: string;
|
|
86
|
+
objects?: Array<{
|
|
87
|
+
label: string;
|
|
88
|
+
bbox: BoundingBox;
|
|
89
|
+
confidence: number;
|
|
90
|
+
}>;
|
|
91
|
+
regions?: Array<{
|
|
92
|
+
description: string;
|
|
93
|
+
bbox: BoundingBox;
|
|
94
|
+
}>;
|
|
95
|
+
tags?: string[];
|
|
96
|
+
}
|
|
97
|
+
export interface OCRResult {
|
|
98
|
+
text: string;
|
|
99
|
+
blocks: Array<{
|
|
100
|
+
text: string;
|
|
101
|
+
bbox: BoundingBox;
|
|
102
|
+
confidence: number;
|
|
103
|
+
words?: Array<{
|
|
104
|
+
text: string;
|
|
105
|
+
bbox: BoundingBox;
|
|
106
|
+
confidence: number;
|
|
107
|
+
}>;
|
|
108
|
+
}>;
|
|
109
|
+
fullText: string;
|
|
110
|
+
}
|
|
111
|
+
export interface EnhancedSceneDescription extends SceneDescription {
|
|
112
|
+
screenCapture?: ScreenCapture;
|
|
113
|
+
screenAnalysis?: {
|
|
114
|
+
fullScreenOCR?: string;
|
|
115
|
+
activeTile?: TileAnalysis;
|
|
116
|
+
gridSummary?: string;
|
|
117
|
+
focusedApp?: string;
|
|
118
|
+
uiElements?: Array<{
|
|
119
|
+
type: string;
|
|
120
|
+
text: string;
|
|
121
|
+
position: BoundingBox;
|
|
122
|
+
}>;
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
export interface VisionConfig {
|
|
126
|
+
cameraName?: string;
|
|
127
|
+
pixelChangeThreshold?: number;
|
|
128
|
+
updateInterval?: number;
|
|
129
|
+
enablePoseDetection?: boolean;
|
|
130
|
+
enableObjectDetection?: boolean;
|
|
131
|
+
tfUpdateInterval?: number;
|
|
132
|
+
vlmUpdateInterval?: number;
|
|
133
|
+
tfChangeThreshold?: number;
|
|
134
|
+
vlmChangeThreshold?: number;
|
|
135
|
+
visionMode?: VisionMode;
|
|
136
|
+
screenCaptureInterval?: number;
|
|
137
|
+
tileSize?: number;
|
|
138
|
+
tileProcessingOrder?: 'sequential' | 'priority' | 'random';
|
|
139
|
+
ocrEnabled?: boolean;
|
|
140
|
+
florence2Enabled?: boolean;
|
|
141
|
+
screenRegion?: {
|
|
142
|
+
x: number;
|
|
143
|
+
y: number;
|
|
144
|
+
width: number;
|
|
145
|
+
height: number;
|
|
146
|
+
};
|
|
147
|
+
displayIndex?: number;
|
|
148
|
+
captureAllDisplays?: boolean;
|
|
149
|
+
targetScreenFPS?: number;
|
|
150
|
+
textRegions?: Array<{
|
|
151
|
+
x: number;
|
|
152
|
+
y: number;
|
|
153
|
+
width: number;
|
|
154
|
+
height: number;
|
|
155
|
+
}>;
|
|
156
|
+
}
|
|
157
|
+
export interface TrackedEntity {
|
|
158
|
+
id: string;
|
|
159
|
+
entityType: 'person' | 'object' | 'pet';
|
|
160
|
+
firstSeen: number;
|
|
161
|
+
lastSeen: number;
|
|
162
|
+
lastPosition: BoundingBox;
|
|
163
|
+
appearances: EntityAppearance[];
|
|
164
|
+
attributes: EntityAttributes;
|
|
165
|
+
worldId?: string;
|
|
166
|
+
roomId?: string;
|
|
167
|
+
}
|
|
168
|
+
export interface EntityAppearance {
|
|
169
|
+
timestamp: number;
|
|
170
|
+
boundingBox: BoundingBox;
|
|
171
|
+
confidence: number;
|
|
172
|
+
embedding?: number[];
|
|
173
|
+
keypoints?: Array<{
|
|
174
|
+
part: string;
|
|
175
|
+
position: {
|
|
176
|
+
x: number;
|
|
177
|
+
y: number;
|
|
178
|
+
};
|
|
179
|
+
score: number;
|
|
180
|
+
}>;
|
|
181
|
+
}
|
|
182
|
+
export interface EntityAttributes {
|
|
183
|
+
name?: string;
|
|
184
|
+
faceEmbedding?: number[];
|
|
185
|
+
faceId?: string;
|
|
186
|
+
clothing?: string[];
|
|
187
|
+
hairColor?: string;
|
|
188
|
+
accessories?: string[];
|
|
189
|
+
objectType?: string;
|
|
190
|
+
color?: string;
|
|
191
|
+
size?: 'small' | 'medium' | 'large';
|
|
192
|
+
description?: string;
|
|
193
|
+
tags?: string[];
|
|
194
|
+
}
|
|
195
|
+
export interface FaceLibrary {
|
|
196
|
+
faces: Map<string, FaceProfile>;
|
|
197
|
+
embeddings: Map<string, number[][]>;
|
|
198
|
+
}
|
|
199
|
+
export interface FaceProfile {
|
|
200
|
+
id: string;
|
|
201
|
+
name?: string;
|
|
202
|
+
embeddings: number[][];
|
|
203
|
+
firstSeen: number;
|
|
204
|
+
lastSeen: number;
|
|
205
|
+
seenCount: number;
|
|
206
|
+
attributes?: {
|
|
207
|
+
age?: string;
|
|
208
|
+
gender?: string;
|
|
209
|
+
emotion?: string;
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
export interface WorldState {
|
|
213
|
+
worldId: string;
|
|
214
|
+
entities: Map<string, TrackedEntity>;
|
|
215
|
+
lastUpdate: number;
|
|
216
|
+
activeEntities: string[];
|
|
217
|
+
recentlyLeft: Array<{
|
|
218
|
+
entityId: string;
|
|
219
|
+
leftAt: number;
|
|
220
|
+
lastPosition: BoundingBox;
|
|
221
|
+
}>;
|
|
222
|
+
}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.VisionMode = exports.VisionServiceType = void 0;
|
|
4
|
+
// Vision service types and interfaces
|
|
5
|
+
exports.VisionServiceType = {
|
|
6
|
+
VISION: 'VISION',
|
|
7
|
+
};
|
|
8
|
+
// Vision modes
|
|
9
|
+
var VisionMode;
|
|
10
|
+
(function (VisionMode) {
|
|
11
|
+
VisionMode["OFF"] = "OFF";
|
|
12
|
+
VisionMode["CAMERA"] = "CAMERA";
|
|
13
|
+
VisionMode["SCREEN"] = "SCREEN";
|
|
14
|
+
VisionMode["BOTH"] = "BOTH";
|
|
15
|
+
})(VisionMode || (exports.VisionMode = VisionMode = {}));
|
|
16
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":";;;AAAA,sCAAsC;AACzB,QAAA,iBAAiB,GAAG;IAC/B,MAAM,EAAE,QAAiB;CAC1B,CAAC;AAsDF,eAAe;AACf,IAAY,UAKX;AALD,WAAY,UAAU;IACpB,yBAAW,CAAA;IACX,+BAAiB,CAAA;IACjB,+BAAiB,CAAA;IACjB,2BAAa,CAAA;AACf,CAAC,EALW,UAAU,0BAAV,UAAU,QAKrB","sourcesContent":["// Vision service types and interfaces\nexport const VisionServiceType = {\n VISION: 'VISION' as const,\n};\n\n// Vision-specific types\nexport interface CameraInfo {\n id: string;\n name: string;\n connected: boolean;\n}\n\nexport interface SceneDescription {\n timestamp: number;\n description: string;\n objects: DetectedObject[];\n people: PersonInfo[];\n sceneChanged: boolean;\n changePercentage: number;\n audioTranscription?: string; // Latest audio transcription\n}\n\nexport interface DetectedObject {\n id: string;\n type: string;\n confidence: number;\n boundingBox: BoundingBox;\n}\n\nexport interface PersonInfo {\n id: string;\n pose: 'sitting' | 'standing' | 'lying' | 'unknown';\n facing: 'camera' | 'away' | 'left' | 'right' | 'unknown';\n confidence: number;\n boundingBox: BoundingBox;\n keypoints?: Array<{\n part: string;\n position: { x: number; y: number };\n score: number;\n }>;\n}\n\nexport interface BoundingBox {\n x: number;\n y: number;\n width: number;\n height: number;\n}\n\nexport interface VisionFrame {\n timestamp: number;\n width: number;\n height: number;\n data: Buffer;\n format: 'rgb' | 'rgba' | 'jpeg' | 'png';\n}\n\n// Vision modes\nexport enum VisionMode {\n OFF = 'OFF',\n CAMERA = 'CAMERA',\n SCREEN = 'SCREEN',\n BOTH = 'BOTH',\n}\n\n// Screen capture types\nexport interface ScreenCapture {\n timestamp: number;\n width: number;\n height: number;\n data: Buffer;\n tiles: ScreenTile[];\n}\n\nexport interface ScreenTile {\n id: string;\n row: number;\n col: number;\n x: number;\n y: number;\n width: number;\n height: number;\n data?: Buffer;\n analysis?: TileAnalysis;\n}\n\nexport interface TileAnalysis {\n timestamp: number;\n florence2?: Florence2Result;\n ocr?: OCRResult;\n objects?: DetectedObject[];\n text?: string;\n summary?: string;\n}\n\nexport interface Florence2Result {\n caption?: string;\n objects?: Array<{\n label: string;\n bbox: BoundingBox;\n confidence: number;\n }>;\n regions?: Array<{\n description: string;\n bbox: BoundingBox;\n }>;\n tags?: string[];\n}\n\nexport interface OCRResult {\n text: string;\n blocks: Array<{\n text: string;\n bbox: BoundingBox;\n confidence: number;\n words?: Array<{\n text: string;\n bbox: BoundingBox;\n confidence: number;\n }>;\n }>;\n fullText: string;\n}\n\n// Enhanced scene description with screen data\nexport interface EnhancedSceneDescription extends SceneDescription {\n screenCapture?: ScreenCapture;\n screenAnalysis?: {\n fullScreenOCR?: string;\n activeTile?: TileAnalysis;\n gridSummary?: string;\n focusedApp?: string;\n uiElements?: Array<{\n type: string;\n text: string;\n position: BoundingBox;\n }>;\n };\n}\n\n// Update VisionConfig\nexport interface VisionConfig {\n cameraName?: string;\n pixelChangeThreshold?: number;\n updateInterval?: number;\n enablePoseDetection?: boolean;\n enableObjectDetection?: boolean;\n tfUpdateInterval?: number;\n vlmUpdateInterval?: number;\n tfChangeThreshold?: number;\n vlmChangeThreshold?: number;\n\n // Screen vision config\n visionMode?: VisionMode;\n screenCaptureInterval?: number; // ms between screen captures\n tileSize?: number; // Size of tiles (e.g., 256 for 256x256)\n tileProcessingOrder?: 'sequential' | 'priority' | 'random';\n ocrEnabled?: boolean;\n florence2Enabled?: boolean;\n screenRegion?: {\n // Optional: capture only part of screen\n x: number;\n y: number;\n width: number;\n height: number;\n };\n\n // Multi-display and worker config\n displayIndex?: number; // Specific display to capture\n captureAllDisplays?: boolean; // Cycle through all displays\n targetScreenFPS?: number; // Target FPS for screen capture\n textRegions?: Array<{\n // Specific regions for OCR\n x: number;\n y: number;\n width: number;\n height: number;\n }>;\n}\n\n// Entity tracking types\nexport interface TrackedEntity {\n id: string;\n entityType: 'person' | 'object' | 'pet';\n firstSeen: number;\n lastSeen: number;\n lastPosition: BoundingBox;\n appearances: EntityAppearance[];\n attributes: EntityAttributes;\n worldId?: string;\n roomId?: string;\n}\n\nexport interface EntityAppearance {\n timestamp: number;\n boundingBox: BoundingBox;\n confidence: number;\n embedding?: number[]; // Face embedding for person recognition\n keypoints?: Array<{\n part: string;\n position: { x: number; y: number };\n score: number;\n }>;\n}\n\nexport interface EntityAttributes {\n // For people\n name?: string;\n faceEmbedding?: number[];\n faceId?: string;\n clothing?: string[];\n hairColor?: string;\n accessories?: string[];\n\n // For objects\n objectType?: string;\n color?: string;\n size?: 'small' | 'medium' | 'large';\n\n // Common\n description?: string;\n tags?: string[];\n}\n\nexport interface FaceLibrary {\n faces: Map<string, FaceProfile>;\n embeddings: Map<string, number[][]>; // Multiple embeddings per profile\n}\n\nexport interface FaceProfile {\n id: string;\n name?: string;\n embeddings: number[][]; // Multiple embeddings for better recognition\n firstSeen: number;\n lastSeen: number;\n seenCount: number;\n attributes?: {\n age?: string;\n gender?: string;\n emotion?: string;\n };\n}\n\nexport interface WorldState {\n worldId: string;\n entities: Map<string, TrackedEntity>;\n lastUpdate: number;\n activeEntities: string[]; // Currently visible\n recentlyLeft: Array<{\n entityId: string;\n leftAt: number;\n lastPosition: BoundingBox;\n }>;\n}\n"]}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import * as poseDetection from '@tensorflow-models/pose-detection';
|
|
2
|
+
import { IAgentRuntime } from '@elizaos/core';
|
|
3
|
+
import { DetectedObject, PersonInfo } from './types';
|
|
4
|
+
export interface VisionModelConfig {
|
|
5
|
+
enableObjectDetection?: boolean;
|
|
6
|
+
enablePoseDetection?: boolean;
|
|
7
|
+
florence2?: {
|
|
8
|
+
baseUrl: string;
|
|
9
|
+
apiKey?: string;
|
|
10
|
+
};
|
|
11
|
+
vlm?: {
|
|
12
|
+
model: string;
|
|
13
|
+
temperature: number;
|
|
14
|
+
maxTokens: number;
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
export type Pose = 'sitting' | 'standing' | 'lying' | 'walking' | 'unknown';
|
|
18
|
+
export interface PoseLandmark {
|
|
19
|
+
name: string;
|
|
20
|
+
x: number;
|
|
21
|
+
y: number;
|
|
22
|
+
score: number;
|
|
23
|
+
}
|
|
24
|
+
export declare class VisionModels {
|
|
25
|
+
private runtime;
|
|
26
|
+
private config;
|
|
27
|
+
private objectDetectionModel;
|
|
28
|
+
private poseDetector;
|
|
29
|
+
private initialized;
|
|
30
|
+
private florence2Model;
|
|
31
|
+
private cocoSsdModel;
|
|
32
|
+
private posenetModel;
|
|
33
|
+
constructor(runtime: IAgentRuntime, config?: VisionModelConfig);
|
|
34
|
+
initialize(config: VisionModelConfig): Promise<void>;
|
|
35
|
+
hasObjectDetection(): boolean;
|
|
36
|
+
hasPoseDetection(): boolean;
|
|
37
|
+
detectObjects(imageData: Buffer, width: number, height: number, description?: string): Promise<DetectedObject[]>;
|
|
38
|
+
private enhancedObjectDetection;
|
|
39
|
+
private generatePlausibleBoundingBox;
|
|
40
|
+
detectPoses(imageData: Buffer, width: number, height: number, description?: string): Promise<PersonInfo[]>;
|
|
41
|
+
private enhancedPoseDetection;
|
|
42
|
+
private generatePlausibleKeypoints;
|
|
43
|
+
convertPosesToPersonInfo(poses: poseDetection.Pose[]): PersonInfo[];
|
|
44
|
+
private determinePoseFromKeypoints;
|
|
45
|
+
private determineFacingDirection;
|
|
46
|
+
dispose(): Promise<void>;
|
|
47
|
+
}
|