@elizaos/plugin-vision 1.2.1 → 2.0.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/build.config.ts +53 -53
  2. package/dist/index.js +6716 -67
  3. package/dist/index.js.map +33 -1
  4. package/dist/workers/florence2-worker.js +112304 -307
  5. package/dist/workers/florence2-worker.js.map +92 -1
  6. package/dist/workers/ocr-worker.js +119718 -339
  7. package/dist/workers/ocr-worker.js.map +137 -1
  8. package/dist/workers/screen-capture-worker.js +350 -418
  9. package/dist/workers/screen-capture-worker.js.map +11 -1
  10. package/package.json +13 -17
  11. package/README.md +0 -270
  12. package/dist/action.d.ts +0 -8
  13. package/dist/action.js +0 -1212
  14. package/dist/action.js.map +0 -1
  15. package/dist/audio-capture-stream.d.ts +0 -42
  16. package/dist/audio-capture-stream.js +0 -516
  17. package/dist/audio-capture-stream.js.map +0 -1
  18. package/dist/audio-capture.d.ts +0 -25
  19. package/dist/audio-capture.js +0 -412
  20. package/dist/audio-capture.js.map +0 -1
  21. package/dist/basic.test.d.ts +0 -1
  22. package/dist/basic.test.js +0 -97
  23. package/dist/basic.test.js.map +0 -1
  24. package/dist/config.d.ts +0 -73
  25. package/dist/config.js +0 -254
  26. package/dist/config.js.map +0 -1
  27. package/dist/entity-tracker.d.ts +0 -32
  28. package/dist/entity-tracker.js +0 -361
  29. package/dist/entity-tracker.js.map +0 -1
  30. package/dist/errors.d.ts +0 -67
  31. package/dist/errors.js +0 -395
  32. package/dist/errors.js.map +0 -1
  33. package/dist/face-recognition.d.ts +0 -31
  34. package/dist/face-recognition.js +0 -332
  35. package/dist/face-recognition.js.map +0 -1
  36. package/dist/florence2-local.d.ts +0 -25
  37. package/dist/florence2-local.js +0 -280
  38. package/dist/florence2-local.js.map +0 -1
  39. package/dist/florence2-model.d.ts +0 -36
  40. package/dist/florence2-model.js +0 -503
  41. package/dist/florence2-model.js.map +0 -1
  42. package/dist/index.d.ts +0 -3
  43. package/dist/ocr-service-real.d.ts +0 -32
  44. package/dist/ocr-service-real.js +0 -396
  45. package/dist/ocr-service-real.js.map +0 -1
  46. package/dist/ocr-service.d.ts +0 -28
  47. package/dist/ocr-service.js +0 -216
  48. package/dist/ocr-service.js.map +0 -1
  49. package/dist/provider.d.ts +0 -2
  50. package/dist/provider.js +0 -285
  51. package/dist/provider.js.map +0 -1
  52. package/dist/screen-capture.d.ts +0 -16
  53. package/dist/screen-capture.js +0 -302
  54. package/dist/screen-capture.js.map +0 -1
  55. package/dist/service.d.ts +0 -73
  56. package/dist/service.js +0 -1662
  57. package/dist/service.js.map +0 -1
  58. package/dist/tests/e2e/index.d.ts +0 -8
  59. package/dist/tests/e2e/index.js +0 -33
  60. package/dist/tests/e2e/index.js.map +0 -1
  61. package/dist/tests/e2e/run-local.d.ts +0 -2
  62. package/dist/tests/e2e/run-local.js +0 -166
  63. package/dist/tests/e2e/run-local.js.map +0 -1
  64. package/dist/tests/e2e/screen-vision.d.ts +0 -11
  65. package/dist/tests/e2e/screen-vision.js +0 -384
  66. package/dist/tests/e2e/screen-vision.js.map +0 -1
  67. package/dist/tests/e2e/vision-autonomy.d.ts +0 -11
  68. package/dist/tests/e2e/vision-autonomy.js +0 -375
  69. package/dist/tests/e2e/vision-autonomy.js.map +0 -1
  70. package/dist/tests/e2e/vision-basic.d.ts +0 -11
  71. package/dist/tests/e2e/vision-basic.js +0 -434
  72. package/dist/tests/e2e/vision-basic.js.map +0 -1
  73. package/dist/tests/e2e/vision-capture-log.d.ts +0 -11
  74. package/dist/tests/e2e/vision-capture-log.js +0 -302
  75. package/dist/tests/e2e/vision-capture-log.js.map +0 -1
  76. package/dist/tests/e2e/vision-runtime.d.ts +0 -11
  77. package/dist/tests/e2e/vision-runtime.js +0 -357
  78. package/dist/tests/e2e/vision-runtime.js.map +0 -1
  79. package/dist/tests/e2e/vision-worker-tests.d.ts +0 -11
  80. package/dist/tests/e2e/vision-worker-tests.js +0 -466
  81. package/dist/tests/e2e/vision-worker-tests.js.map +0 -1
  82. package/dist/tests/test-pattern-generator.d.ts +0 -40
  83. package/dist/tests/test-pattern-generator.js +0 -191
  84. package/dist/tests/test-pattern-generator.js.map +0 -1
  85. package/dist/tests.d.ts +0 -3
  86. package/dist/tests.js +0 -11
  87. package/dist/tests.js.map +0 -1
  88. package/dist/types.d.ts +0 -222
  89. package/dist/types.js +0 -16
  90. package/dist/types.js.map +0 -1
  91. package/dist/vision-models.d.ts +0 -47
  92. package/dist/vision-models.js +0 -501
  93. package/dist/vision-models.js.map +0 -1
  94. package/dist/vision-worker-manager.d.ts +0 -61
  95. package/dist/vision-worker-manager.js +0 -668
  96. package/dist/vision-worker-manager.js.map +0 -1
  97. package/dist/workers/florence2-worker-simple.d.ts +0 -13
  98. package/dist/workers/florence2-worker-simple.js +0 -121
  99. package/dist/workers/florence2-worker-simple.js.map +0 -1
  100. package/dist/workers/florence2-worker.d.ts +0 -1
  101. package/dist/workers/ocr-worker.d.ts +0 -1
  102. package/dist/workers/screen-capture-worker.d.ts +0 -1
  103. package/dist/workers/worker-logger.d.ts +0 -9
  104. package/dist/workers/worker-logger.js +0 -95
  105. package/dist/workers/worker-logger.js.map +0 -1
@@ -1,191 +0,0 @@
1
- "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- var __generator = (this && this.__generator) || function (thisArg, body) {
12
- var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g = Object.create((typeof Iterator === "function" ? Iterator : Object).prototype);
13
- return g.next = verb(0), g["throw"] = verb(1), g["return"] = verb(2), typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
14
- function verb(n) { return function (v) { return step([n, v]); }; }
15
- function step(op) {
16
- if (f) throw new TypeError("Generator is already executing.");
17
- while (g && (g = 0, op[0] && (_ = 0)), _) try {
18
- if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
19
- if (y = 0, t) op = [op[0] & 2, t.value];
20
- switch (op[0]) {
21
- case 0: case 1: t = op; break;
22
- case 4: _.label++; return { value: op[1], done: false };
23
- case 5: _.label++; y = op[1]; op = [0]; continue;
24
- case 7: op = _.ops.pop(); _.trys.pop(); continue;
25
- default:
26
- if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
27
- if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
28
- if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
29
- if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
30
- if (t[2]) _.ops.pop();
31
- _.trys.pop(); continue;
32
- }
33
- op = body.call(thisArg, _);
34
- } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
35
- if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
36
- }
37
- };
38
- Object.defineProperty(exports, "__esModule", { value: true });
39
- exports.TestPatternGenerator = void 0;
40
- var sharp_1 = require("sharp");
41
- var path = require("path");
42
- var fs = require("fs/promises");
43
- var core_1 = require("@elizaos/core");
44
- var TestPatternGenerator = /** @class */ (function () {
45
- function TestPatternGenerator() {
46
- }
47
- /**
48
- * Generate a test pattern with numbers in each quadrant and center
49
- */
50
- TestPatternGenerator.generateQuadrantPattern = function (config) {
51
- return __awaiter(this, void 0, void 0, function () {
52
- var width, height, _a, backgroundColor, _b, textColor, _c, fontSize, _d, includeGrid, _e, includeTimestamp, _f, displayIndex, svg, buffer;
53
- return __generator(this, function (_g) {
54
- switch (_g.label) {
55
- case 0:
56
- width = config.width, height = config.height, _a = config.backgroundColor, backgroundColor = _a === void 0 ? '#ffffff' : _a, _b = config.textColor, textColor = _b === void 0 ? '#000000' : _b, _c = config.fontSize, fontSize = _c === void 0 ? 48 : _c, _d = config.includeGrid, includeGrid = _d === void 0 ? true : _d, _e = config.includeTimestamp, includeTimestamp = _e === void 0 ? true : _e, _f = config.displayIndex, displayIndex = _f === void 0 ? 0 : _f;
57
- svg = "\n <svg width=\"".concat(width, "\" height=\"").concat(height, "\" xmlns=\"http://www.w3.org/2000/svg\">\n <!-- Background -->\n <rect width=\"").concat(width, "\" height=\"").concat(height, "\" fill=\"").concat(backgroundColor, "\"/>\n \n ").concat(includeGrid ? this.generateGrid(width, height) : '', "\n \n <!-- Quadrant dividers -->\n <line x1=\"").concat(width / 2, "\" y1=\"0\" x2=\"").concat(width / 2, "\" y2=\"").concat(height, "\" stroke=\"#cccccc\" stroke-width=\"2\"/>\n <line x1=\"0\" y1=\"").concat(height / 2, "\" x2=\"").concat(width, "\" y2=\"").concat(height / 2, "\" stroke=\"#cccccc\" stroke-width=\"2\"/>\n \n <!-- Quadrant numbers -->\n <text x=\"").concat(width / 4, "\" y=\"").concat(height / 4, "\" font-family=\"Arial\" font-size=\"").concat(fontSize, "\" fill=\"").concat(textColor, "\" text-anchor=\"middle\" dominant-baseline=\"middle\">1</text>\n <text x=\"").concat((3 * width) / 4, "\" y=\"").concat(height / 4, "\" font-family=\"Arial\" font-size=\"").concat(fontSize, "\" fill=\"").concat(textColor, "\" text-anchor=\"middle\" dominant-baseline=\"middle\">2</text>\n <text x=\"").concat(width / 4, "\" y=\"").concat((3 * height) / 4, "\" font-family=\"Arial\" font-size=\"").concat(fontSize, "\" fill=\"").concat(textColor, "\" text-anchor=\"middle\" dominant-baseline=\"middle\">3</text>\n <text x=\"").concat((3 * width) / 4, "\" y=\"").concat((3 * height) / 4, "\" font-family=\"Arial\" font-size=\"").concat(fontSize, "\" fill=\"").concat(textColor, "\" text-anchor=\"middle\" dominant-baseline=\"middle\">4</text>\n \n <!-- Center number -->\n <circle cx=\"").concat(width / 2, "\" cy=\"").concat(height / 2, "\" r=\"").concat(fontSize, "\" fill=\"#ff0000\" opacity=\"0.3\"/>\n <text x=\"").concat(width / 2, "\" y=\"").concat(height / 2, "\" font-family=\"Arial\" font-size=\"").concat(fontSize, "\" fill=\"").concat(textColor, "\" text-anchor=\"middle\" dominant-baseline=\"middle\">5</text>\n \n <!-- Display info -->\n <text x=\"20\" y=\"30\" font-family=\"Arial\" font-size=\"16\" fill=\"").concat(textColor, "\">Display ").concat(displayIndex, "</text>\n <text x=\"20\" y=\"50\" font-family=\"Arial\" font-size=\"16\" fill=\"").concat(textColor, "\">").concat(width, "x").concat(height, "</text>\n \n ").concat(includeTimestamp ? "<text x=\"20\" y=\"70\" font-family=\"Arial\" font-size=\"16\" fill=\"".concat(textColor, "\">Time: ").concat(new Date().toISOString(), "</text>") : '', "\n \n <!-- Corner markers -->\n <circle cx=\"10\" cy=\"10\" r=\"5\" fill=\"#ff0000\"/>\n <circle cx=\"").concat(width - 10, "\" cy=\"10\" r=\"5\" fill=\"#00ff00\"/>\n <circle cx=\"10\" cy=\"").concat(height - 10, "\" r=\"5\" fill=\"#0000ff\"/>\n <circle cx=\"").concat(width - 10, "\" cy=\"").concat(height - 10, "\" r=\"5\" fill=\"#ffff00\"/>\n </svg>\n ");
58
- return [4 /*yield*/, (0, sharp_1.default)(Buffer.from(svg)).png().toBuffer()];
59
- case 1:
60
- buffer = _g.sent();
61
- return [2 /*return*/, buffer];
62
- }
63
- });
64
- });
65
- };
66
- /**
67
- * Generate a complex test pattern with multiple text regions
68
- */
69
- TestPatternGenerator.generateComplexPattern = function (config) {
70
- return __awaiter(this, void 0, void 0, function () {
71
- var width, height, _a, backgroundColor, _b, textColor, _c, fontSize, _d, displayIndex, sampleTexts, svg, buffer;
72
- return __generator(this, function (_e) {
73
- switch (_e.label) {
74
- case 0:
75
- width = config.width, height = config.height, _a = config.backgroundColor, backgroundColor = _a === void 0 ? '#f0f0f0' : _a, _b = config.textColor, textColor = _b === void 0 ? '#000000' : _b, _c = config.fontSize, fontSize = _c === void 0 ? 24 : _c, _d = config.displayIndex, displayIndex = _d === void 0 ? 0 : _d;
76
- sampleTexts = [
77
- 'The quick brown fox jumps over the lazy dog',
78
- 'ABCDEFGHIJKLMNOPQRSTUVWXYZ',
79
- 'abcdefghijklmnopqrstuvwxyz',
80
- '0123456789',
81
- '!@#$%^&*()_+-=[]{}|;:,.<>?',
82
- ];
83
- svg = "\n <svg width=\"".concat(width, "\" height=\"").concat(height, "\" xmlns=\"http://www.w3.org/2000/svg\">\n <!-- Background -->\n <rect width=\"").concat(width, "\" height=\"").concat(height, "\" fill=\"").concat(backgroundColor, "\"/>\n \n <!-- Title -->\n <text x=\"").concat(width / 2, "\" y=\"50\" font-family=\"Arial\" font-size=\"32\" fill=\"").concat(textColor, "\" text-anchor=\"middle\">Vision Test Pattern - Display ").concat(displayIndex, "</text>\n \n <!-- Text regions for OCR testing -->\n ").concat(sampleTexts
84
- .map(function (text, i) { return "\n <rect x=\"50\" y=\"".concat(150 + i * 80, "\" width=\"").concat(width - 100, "\" height=\"60\" fill=\"white\" stroke=\"#333\" stroke-width=\"1\"/>\n <text x=\"70\" y=\"").concat(185 + i * 80, "\" font-family=\"Arial\" font-size=\"").concat(fontSize, "\" fill=\"").concat(textColor, "\">").concat(text, "</text>\n "); })
85
- .join(''), "\n \n <!-- UI Elements -->\n <rect x=\"50\" y=\"").concat(height - 200, "\" width=\"150\" height=\"40\" fill=\"#007bff\" rx=\"5\"/>\n <text x=\"125\" y=\"").concat(height - 175, "\" font-family=\"Arial\" font-size=\"16\" fill=\"white\" text-anchor=\"middle\">Button</text>\n \n <rect x=\"220\" y=\"").concat(height - 200, "\" width=\"200\" height=\"30\" fill=\"white\" stroke=\"#333\" stroke-width=\"1\"/>\n <text x=\"230\" y=\"").concat(height - 180, "\" font-family=\"Arial\" font-size=\"14\" fill=\"#666\">Input Field</text>\n \n <!-- Timestamp -->\n <text x=\"").concat(width - 20, "\" y=\"").concat(height - 20, "\" font-family=\"Arial\" font-size=\"12\" fill=\"#666\" text-anchor=\"end\">").concat(new Date().toISOString(), "</text>\n </svg>\n ");
86
- return [4 /*yield*/, (0, sharp_1.default)(Buffer.from(svg)).png().toBuffer()];
87
- case 1:
88
- buffer = _e.sent();
89
- return [2 /*return*/, buffer];
90
- }
91
- });
92
- });
93
- };
94
- /**
95
- * Generate grid lines for the pattern
96
- */
97
- TestPatternGenerator.generateGrid = function (width, height, spacing) {
98
- if (spacing === void 0) { spacing = 100; }
99
- var lines = [];
100
- // Vertical lines
101
- for (var x = spacing; x < width; x += spacing) {
102
- lines.push("<line x1=\"".concat(x, "\" y1=\"0\" x2=\"").concat(x, "\" y2=\"").concat(height, "\" stroke=\"#eeeeee\" stroke-width=\"1\"/>"));
103
- }
104
- // Horizontal lines
105
- for (var y = spacing; y < height; y += spacing) {
106
- lines.push("<line x1=\"0\" y1=\"".concat(y, "\" x2=\"").concat(width, "\" y2=\"").concat(y, "\" stroke=\"#eeeeee\" stroke-width=\"1\"/>"));
107
- }
108
- return lines.join('\n');
109
- };
110
- /**
111
- * Save test pattern to file
112
- */
113
- TestPatternGenerator.savePattern = function (buffer, filename) {
114
- return __awaiter(this, void 0, void 0, function () {
115
- var outputDir, filepath;
116
- return __generator(this, function (_a) {
117
- switch (_a.label) {
118
- case 0:
119
- outputDir = path.join(process.cwd(), 'test-patterns');
120
- return [4 /*yield*/, fs.mkdir(outputDir, { recursive: true })];
121
- case 1:
122
- _a.sent();
123
- filepath = path.join(outputDir, filename);
124
- return [4 /*yield*/, fs.writeFile(filepath, buffer)];
125
- case 2:
126
- _a.sent();
127
- core_1.logger.info("[TestPatternGenerator] Saved test pattern to ".concat(filepath));
128
- return [2 /*return*/, filepath];
129
- }
130
- });
131
- });
132
- };
133
- /**
134
- * Generate patterns for all displays
135
- */
136
- TestPatternGenerator.generatePatternsForAllDisplays = function (displayCount) {
137
- return __awaiter(this, void 0, void 0, function () {
138
- var patterns, i, pattern;
139
- return __generator(this, function (_a) {
140
- switch (_a.label) {
141
- case 0:
142
- patterns = new Map();
143
- i = 0;
144
- _a.label = 1;
145
- case 1:
146
- if (!(i < displayCount)) return [3 /*break*/, 4];
147
- return [4 /*yield*/, this.generateQuadrantPattern({
148
- width: 1920,
149
- height: 1080,
150
- displayIndex: i,
151
- includeTimestamp: true,
152
- })];
153
- case 2:
154
- pattern = _a.sent();
155
- patterns.set(i, pattern);
156
- _a.label = 3;
157
- case 3:
158
- i++;
159
- return [3 /*break*/, 1];
160
- case 4: return [2 /*return*/, patterns];
161
- }
162
- });
163
- });
164
- };
165
- /**
166
- * Verify OCR results match expected quadrant numbers
167
- */
168
- TestPatternGenerator.verifyQuadrantNumbers = function (ocrText) {
169
- var expectedNumbers = [1, 2, 3, 4, 5];
170
- var foundNumbers = [];
171
- // Extract all numbers from OCR text
172
- var matches = ocrText.match(/\d+/g);
173
- if (matches) {
174
- matches.forEach(function (match) {
175
- var num = parseInt(match, 10);
176
- if (expectedNumbers.includes(num) && !foundNumbers.includes(num)) {
177
- foundNumbers.push(num);
178
- }
179
- });
180
- }
181
- var missingNumbers = expectedNumbers.filter(function (n) { return !foundNumbers.includes(n); });
182
- return {
183
- success: missingNumbers.length === 0,
184
- foundNumbers: foundNumbers,
185
- missingNumbers: missingNumbers,
186
- };
187
- };
188
- return TestPatternGenerator;
189
- }());
190
- exports.TestPatternGenerator = TestPatternGenerator;
191
- //# sourceMappingURL=test-pattern-generator.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"test-pattern-generator.js","sourceRoot":"","sources":["../../src/tests/test-pattern-generator.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,+BAA0B;AAC1B,2BAA6B;AAC7B,gCAAkC;AAClC,sCAAuC;AAavC;IAAA;IA0MA,CAAC;IAzMC;;OAEG;IACU,4CAAuB,GAApC,UAAqC,MAAyB;;;;;;wBAE1D,KAAK,GAQH,MAAM,MARH,EACL,MAAM,GAOJ,MAAM,OAPF,EACN,KAME,MAAM,gBANmB,EAA3B,eAAe,mBAAG,SAAS,KAAA,EAC3B,KAKE,MAAM,UALa,EAArB,SAAS,mBAAG,SAAS,KAAA,EACrB,KAIE,MAAM,SAJK,EAAb,QAAQ,mBAAG,EAAE,KAAA,EACb,KAGE,MAAM,YAHU,EAAlB,WAAW,mBAAG,IAAI,KAAA,EAClB,KAEE,MAAM,iBAFe,EAAvB,gBAAgB,mBAAG,IAAI,KAAA,EACvB,KACE,MAAM,aADQ,EAAhB,YAAY,mBAAG,CAAC,KAAA,CACP;wBAGL,GAAG,GAAG,+BACI,KAAK,yBAAa,MAAM,0GAErB,KAAK,yBAAa,MAAM,uBAAW,eAAe,qCAE/D,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,gFAGzC,KAAK,GAAG,CAAC,8BAAgB,KAAK,GAAG,CAAC,qBAAS,MAAM,qFAC1C,MAAM,GAAG,CAAC,qBAAS,KAAK,qBAAS,MAAM,GAAG,CAAC,wHAGnD,KAAK,GAAG,CAAC,oBAAQ,MAAM,GAAG,CAAC,kDAAoC,QAAQ,uBAAW,SAAS,gGAC3F,CAAC,CAAC,GAAG,KAAK,CAAC,GAAG,CAAC,oBAAQ,MAAM,GAAG,CAAC,kDAAoC,QAAQ,uBAAW,SAAS,gGACjG,KAAK,GAAG,CAAC,oBAAQ,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,kDAAoC,QAAQ,uBAAW,SAAS,gGACjG,CAAC,CAAC,GAAG,KAAK,CAAC,GAAG,CAAC,oBAAQ,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,kDAAoC,QAAQ,uBAAW,SAAS,6IAGpG,KAAK,GAAG,CAAC,qBAAS,MAAM,GAAG,CAAC,oBAAQ,QAAQ,sEAC/C,KAAK,GAAG,CAAC,oBAAQ,MAAM,GAAG,CAAC,kDAAoC,QAAQ,uBAAW,SAAS,qMAGvC,SAAS,wBAAa,YAAY,oGAClC,SAAS,gBAAK,KAAK,cAAI,MAAM,wCAE1F,gBAAgB,CAAC,CAAC,CAAC,gFAAgE,SAAS,sBAAW,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,YAAS,CAAC,CAAC,CAAC,EAAE,+IAIjI,KAAK,GAAG,EAAE,qFACF,MAAM,GAAG,EAAE,iEACnB,KAAK,GAAG,EAAE,qBAAS,MAAM,GAAG,EAAE,sDAE/C,CAAC;wBAGa,qBAAM,IAAA,eAAK,EAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,EAAA;;wBAAvD,MAAM,GAAG,SAA8C;wBAE7D,sBAAO,MAAM,EAAC;;;;KACf;IAED;;OAEG;IACU,2CAAsB,GAAnC,UAAoC,MAAyB;;;;;;wBAEzD,KAAK,GAMH,MAAM,MANH,EACL,MAAM,GAKJ,MAAM,OALF,EACN,KAIE,MAAM,gBAJmB,EAA3B,eAAe,mBAAG,SAAS,KAAA,EAC3B,KAGE,MAAM,UAHa,EAArB,SAAS,mBAAG,SAAS,KAAA,EACrB,KAEE,MAAM,SAFK,EAAb,QAAQ,mBAAG,EAAE,KAAA,EACb,KACE,MAAM,aADQ,EAAhB,YAAY,mBAAG,CAAC,KAAA,CACP;wBAGL,WAAW,GAAG;4BAClB,6CAA6C;4BAC7C,4BAA4B;4BAC5B,4BAA4B;4BAC5B,YAAY;4BACZ,4BAA4B;yBAC7B,CAAC;wBAEI,GAAG,GAAG,+BACI,KAAK,yBAAa,MAAM,0GAErB,KAAK,yBAAa,MAAM,uBAAW,eAAe,uEAGtD,KAAK,GAAG,CAAC,uEAAqD,SAAS,qEAAwD,YAAY,uFAGpJ,WAAW;6BACV,GAAG,CACF,UAAC,IAAI,EAAE,CAAC,IAAK,OAAA,yCACG,GAAG,GAAG,CAAC,GAAG,EAAE,wBAAY,KAAK,GAAG,GAAG,gHACnC,GAAG,GAAG,CAAC,GAAG,EAAE,kDAAoC,QAAQ,uBAAW,SAAS,gBAAK,IAAI,sBACxG,EAHgB,CAGhB,CACE;6BACA,IAAI,CAAC,EAAE,CAAC,kFAGO,MAAM,GAAG,GAAG,qGACX,MAAM,GAAG,GAAG,kJAEZ,MAAM,GAAG,GAAG,6HACZ,MAAM,GAAG,GAAG,iJAGpB,KAAK,GAAG,EAAE,oBAAQ,MAAM,GAAG,EAAE,yFAAsE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,gCAEzI,CAAC;wBAEa,qBAAM,IAAA,eAAK,EAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,EAAA;;wBAAvD,MAAM,GAAG,SAA8C;wBAE7D,sBAAO,MAAM,EAAC;;;;KACf;IAED;;OAEG;IACY,iCAAY,GAA3B,UAA4B,KAAa,EAAE,MAAc,EAAE,OAAqB;QAArB,wBAAA,EAAA,aAAqB;QAC9E,IAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,iBAAiB;QACjB,KAAK,IAAI,CAAC,GAAG,OAAO,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,IAAI,OAAO,EAAE,CAAC;YAC9C,KAAK,CAAC,IAAI,CACR,qBAAa,CAAC,8BAAgB,CAAC,qBAAS,MAAM,+CAAuC,CACtF,CAAC;QACJ,CAAC;QAED,mBAAmB;QACnB,KAAK,IAAI,CAAC,GAAG,OAAO,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,IAAI,OAAO,EAAE,CAAC;YAC/C,KAAK,CAAC,IAAI,CACR,8BAAoB,CAAC,qBAAS,KAAK,qBAAS,CAAC,+CAAuC,CACrF,CAAC;QACJ,CAAC;QAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAED;;OAEG;IACU,gCAAW,GAAxB,UAAyB,MAAc,EAAE,QAAgB;;;;;;wBACjD,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,eAAe,CAAC,CAAC;wBAC5D,qBAAM,EAAE,CAAC,KAAK,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,EAAA;;wBAA9C,SAA8C,CAAC;wBAEzC,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;wBAChD,qBAAM,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,MAAM,CAAC,EAAA;;wBAApC,SAAoC,CAAC;wBAErC,aAAM,CAAC,IAAI,CAAC,uDAAgD,QAAQ,CAAE,CAAC,CAAC;wBACxE,sBAAO,QAAQ,EAAC;;;;KACjB;IAED;;OAEG;IACU,mDAA8B,GAA3C,UAA4C,YAAoB;;;;;;wBACxD,QAAQ,GAAG,IAAI,GAAG,EAAkB,CAAC;wBAElC,CAAC,GAAG,CAAC;;;6BAAE,CAAA,CAAC,GAAG,YAAY,CAAA;wBACd,qBAAM,IAAI,CAAC,uBAAuB,CAAC;gCACjD,KAAK,EAAE,IAAI;gCACX,MAAM,EAAE,IAAI;gCACZ,YAAY,EAAE,CAAC;gCACf,gBAAgB,EAAE,IAAI;6BACvB,CAAC,EAAA;;wBALI,OAAO,GAAG,SAKd;wBAEF,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;;;wBARO,CAAC,EAAE,CAAA;;4BAWrC,sBAAO,QAAQ,EAAC;;;;KACjB;IAED;;OAEG;IACI,0CAAqB,GAA5B,UAA6B,OAAe;QAK1C,IAAM,eAAe,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;QACxC,IAAM,YAAY,GAAa,EAAE,CAAC;QAElC,oCAAoC;QACpC,IAAM,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QACtC,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,CAAC,OAAO,CAAC,UAAC,KAAK;gBACpB,IAAM,GAAG,GAAG,QAAQ,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;gBAChC,IAAI,eAAe,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;oBACjE,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACzB,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC;QAED,IAAM,cAAc,GAAG,eAAe,CAAC,MAAM,CAAC,UAAC,CAAC,IAAK,OAAA,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAzB,CAAyB,CAAC,CAAC;QAEhF,OAAO;YACL,OAAO,EAAE,cAAc,CAAC,MAAM,KAAK,CAAC;YACpC,YAAY,cAAA;YACZ,cAAc,gBAAA;SACf,CAAC;IACJ,CAAC;IACH,2BAAC;AAAD,CAAC,AA1MD,IA0MC;AA1MY,oDAAoB","sourcesContent":["import sharp from 'sharp';\nimport * as path from 'path';\nimport * as fs from 'fs/promises';\nimport { logger } from '@elizaos/core';\n\nexport interface TestPatternConfig {\n width: number;\n height: number;\n backgroundColor?: string;\n textColor?: string;\n fontSize?: number;\n includeGrid?: boolean;\n includeTimestamp?: boolean;\n displayIndex?: number;\n}\n\nexport class TestPatternGenerator {\n /**\n * Generate a test pattern with numbers in each quadrant and center\n */\n static async generateQuadrantPattern(config: TestPatternConfig): Promise<Buffer> {\n const {\n width,\n height,\n backgroundColor = '#ffffff',\n textColor = '#000000',\n fontSize = 48,\n includeGrid = true,\n includeTimestamp = true,\n displayIndex = 0,\n } = config;\n\n // Create SVG with test pattern\n const svg = `\n <svg width=\"${width}\" height=\"${height}\" xmlns=\"http://www.w3.org/2000/svg\">\n <!-- Background -->\n <rect width=\"${width}\" height=\"${height}\" fill=\"${backgroundColor}\"/>\n \n ${includeGrid ? this.generateGrid(width, height) : ''}\n \n <!-- Quadrant dividers -->\n <line x1=\"${width / 2}\" y1=\"0\" x2=\"${width / 2}\" y2=\"${height}\" stroke=\"#cccccc\" stroke-width=\"2\"/>\n <line x1=\"0\" y1=\"${height / 2}\" x2=\"${width}\" y2=\"${height / 2}\" stroke=\"#cccccc\" stroke-width=\"2\"/>\n \n <!-- Quadrant numbers -->\n <text x=\"${width / 4}\" y=\"${height / 4}\" font-family=\"Arial\" font-size=\"${fontSize}\" fill=\"${textColor}\" text-anchor=\"middle\" dominant-baseline=\"middle\">1</text>\n <text x=\"${(3 * width) / 4}\" y=\"${height / 4}\" font-family=\"Arial\" font-size=\"${fontSize}\" fill=\"${textColor}\" text-anchor=\"middle\" dominant-baseline=\"middle\">2</text>\n <text x=\"${width / 4}\" y=\"${(3 * height) / 4}\" font-family=\"Arial\" font-size=\"${fontSize}\" fill=\"${textColor}\" text-anchor=\"middle\" dominant-baseline=\"middle\">3</text>\n <text x=\"${(3 * width) / 4}\" y=\"${(3 * height) / 4}\" font-family=\"Arial\" font-size=\"${fontSize}\" fill=\"${textColor}\" text-anchor=\"middle\" dominant-baseline=\"middle\">4</text>\n \n <!-- Center number -->\n <circle cx=\"${width / 2}\" cy=\"${height / 2}\" r=\"${fontSize}\" fill=\"#ff0000\" opacity=\"0.3\"/>\n <text x=\"${width / 2}\" y=\"${height / 2}\" font-family=\"Arial\" font-size=\"${fontSize}\" fill=\"${textColor}\" text-anchor=\"middle\" dominant-baseline=\"middle\">5</text>\n \n <!-- Display info -->\n <text x=\"20\" y=\"30\" font-family=\"Arial\" font-size=\"16\" fill=\"${textColor}\">Display ${displayIndex}</text>\n <text x=\"20\" y=\"50\" font-family=\"Arial\" font-size=\"16\" fill=\"${textColor}\">${width}x${height}</text>\n \n ${includeTimestamp ? `<text x=\"20\" y=\"70\" font-family=\"Arial\" font-size=\"16\" fill=\"${textColor}\">Time: ${new Date().toISOString()}</text>` : ''}\n \n <!-- Corner markers -->\n <circle cx=\"10\" cy=\"10\" r=\"5\" fill=\"#ff0000\"/>\n <circle cx=\"${width - 10}\" cy=\"10\" r=\"5\" fill=\"#00ff00\"/>\n <circle cx=\"10\" cy=\"${height - 10}\" r=\"5\" fill=\"#0000ff\"/>\n <circle cx=\"${width - 10}\" cy=\"${height - 10}\" r=\"5\" fill=\"#ffff00\"/>\n </svg>\n `;\n\n // Convert SVG to PNG\n const buffer = await sharp(Buffer.from(svg)).png().toBuffer();\n\n return buffer;\n }\n\n /**\n * Generate a complex test pattern with multiple text regions\n */\n static async generateComplexPattern(config: TestPatternConfig): Promise<Buffer> {\n const {\n width,\n height,\n backgroundColor = '#f0f0f0',\n textColor = '#000000',\n fontSize = 24,\n displayIndex = 0,\n } = config;\n\n // Sample text for OCR testing\n const sampleTexts = [\n 'The quick brown fox jumps over the lazy dog',\n 'ABCDEFGHIJKLMNOPQRSTUVWXYZ',\n 'abcdefghijklmnopqrstuvwxyz',\n '0123456789',\n '!@#$%^&*()_+-=[]{}|;:,.<>?',\n ];\n\n const svg = `\n <svg width=\"${width}\" height=\"${height}\" xmlns=\"http://www.w3.org/2000/svg\">\n <!-- Background -->\n <rect width=\"${width}\" height=\"${height}\" fill=\"${backgroundColor}\"/>\n \n <!-- Title -->\n <text x=\"${width / 2}\" y=\"50\" font-family=\"Arial\" font-size=\"32\" fill=\"${textColor}\" text-anchor=\"middle\">Vision Test Pattern - Display ${displayIndex}</text>\n \n <!-- Text regions for OCR testing -->\n ${sampleTexts\n .map(\n (text, i) => `\n <rect x=\"50\" y=\"${150 + i * 80}\" width=\"${width - 100}\" height=\"60\" fill=\"white\" stroke=\"#333\" stroke-width=\"1\"/>\n <text x=\"70\" y=\"${185 + i * 80}\" font-family=\"Arial\" font-size=\"${fontSize}\" fill=\"${textColor}\">${text}</text>\n `\n )\n .join('')}\n \n <!-- UI Elements -->\n <rect x=\"50\" y=\"${height - 200}\" width=\"150\" height=\"40\" fill=\"#007bff\" rx=\"5\"/>\n <text x=\"125\" y=\"${height - 175}\" font-family=\"Arial\" font-size=\"16\" fill=\"white\" text-anchor=\"middle\">Button</text>\n \n <rect x=\"220\" y=\"${height - 200}\" width=\"200\" height=\"30\" fill=\"white\" stroke=\"#333\" stroke-width=\"1\"/>\n <text x=\"230\" y=\"${height - 180}\" font-family=\"Arial\" font-size=\"14\" fill=\"#666\">Input Field</text>\n \n <!-- Timestamp -->\n <text x=\"${width - 20}\" y=\"${height - 20}\" font-family=\"Arial\" font-size=\"12\" fill=\"#666\" text-anchor=\"end\">${new Date().toISOString()}</text>\n </svg>\n `;\n\n const buffer = await sharp(Buffer.from(svg)).png().toBuffer();\n\n return buffer;\n }\n\n /**\n * Generate grid lines for the pattern\n */\n private static generateGrid(width: number, height: number, spacing: number = 100): string {\n const lines: string[] = [];\n\n // Vertical lines\n for (let x = spacing; x < width; x += spacing) {\n lines.push(\n `<line x1=\"${x}\" y1=\"0\" x2=\"${x}\" y2=\"${height}\" stroke=\"#eeeeee\" stroke-width=\"1\"/>`\n );\n }\n\n // Horizontal lines\n for (let y = spacing; y < height; y += spacing) {\n lines.push(\n `<line x1=\"0\" y1=\"${y}\" x2=\"${width}\" y2=\"${y}\" stroke=\"#eeeeee\" stroke-width=\"1\"/>`\n );\n }\n\n return lines.join('\\n');\n }\n\n /**\n * Save test pattern to file\n */\n static async savePattern(buffer: Buffer, filename: string): Promise<string> {\n const outputDir = path.join(process.cwd(), 'test-patterns');\n await fs.mkdir(outputDir, { recursive: true });\n\n const filepath = path.join(outputDir, filename);\n await fs.writeFile(filepath, buffer);\n\n logger.info(`[TestPatternGenerator] Saved test pattern to ${filepath}`);\n return filepath;\n }\n\n /**\n * Generate patterns for all displays\n */\n static async generatePatternsForAllDisplays(displayCount: number): Promise<Map<number, Buffer>> {\n const patterns = new Map<number, Buffer>();\n\n for (let i = 0; i < displayCount; i++) {\n const pattern = await this.generateQuadrantPattern({\n width: 1920,\n height: 1080,\n displayIndex: i,\n includeTimestamp: true,\n });\n\n patterns.set(i, pattern);\n }\n\n return patterns;\n }\n\n /**\n * Verify OCR results match expected quadrant numbers\n */\n static verifyQuadrantNumbers(ocrText: string): {\n success: boolean;\n foundNumbers: number[];\n missingNumbers: number[];\n } {\n const expectedNumbers = [1, 2, 3, 4, 5];\n const foundNumbers: number[] = [];\n\n // Extract all numbers from OCR text\n const matches = ocrText.match(/\\d+/g);\n if (matches) {\n matches.forEach((match) => {\n const num = parseInt(match, 10);\n if (expectedNumbers.includes(num) && !foundNumbers.includes(num)) {\n foundNumbers.push(num);\n }\n });\n }\n\n const missingNumbers = expectedNumbers.filter((n) => !foundNumbers.includes(n));\n\n return {\n success: missingNumbers.length === 0,\n foundNumbers,\n missingNumbers,\n };\n }\n}\n"]}
package/dist/tests.d.ts DELETED
@@ -1,3 +0,0 @@
1
- export { default as VisionBasicE2ETestSuite } from './tests/e2e/vision-basic';
2
- export { default as VisionAutonomyE2ETestSuite } from './tests/e2e/vision-autonomy';
3
- export { testSuites as default } from './tests/e2e/index';
package/dist/tests.js DELETED
@@ -1,11 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.default = exports.VisionAutonomyE2ETestSuite = exports.VisionBasicE2ETestSuite = void 0;
4
- // Export all test suites for the vision plugin
5
- var vision_basic_1 = require("./tests/e2e/vision-basic");
6
- Object.defineProperty(exports, "VisionBasicE2ETestSuite", { enumerable: true, get: function () { return vision_basic_1.default; } });
7
- var vision_autonomy_1 = require("./tests/e2e/vision-autonomy");
8
- Object.defineProperty(exports, "VisionAutonomyE2ETestSuite", { enumerable: true, get: function () { return vision_autonomy_1.default; } });
9
- var index_1 = require("./tests/e2e/index");
10
- Object.defineProperty(exports, "default", { enumerable: true, get: function () { return index_1.testSuites; } });
11
- //# sourceMappingURL=tests.js.map
package/dist/tests.js.map DELETED
@@ -1 +0,0 @@
1
- {"version":3,"file":"tests.js","sourceRoot":"","sources":["../src/tests.ts"],"names":[],"mappings":";;;AAAA,+CAA+C;AAC/C,yDAA8E;AAArE,uHAAA,OAAO,OAA2B;AAC3C,+DAAoF;AAA3E,6HAAA,OAAO,OAA8B;AAC9C,2CAA0D;AAAjD,gGAAA,UAAU,OAAW","sourcesContent":["// Export all test suites for the vision plugin\nexport { default as VisionBasicE2ETestSuite } from './tests/e2e/vision-basic';\nexport { default as VisionAutonomyE2ETestSuite } from './tests/e2e/vision-autonomy';\nexport { testSuites as default } from './tests/e2e/index';\n"]}
package/dist/types.d.ts DELETED
@@ -1,222 +0,0 @@
1
- export declare const VisionServiceType: {
2
- VISION: "VISION";
3
- };
4
- export interface CameraInfo {
5
- id: string;
6
- name: string;
7
- connected: boolean;
8
- }
9
- export interface SceneDescription {
10
- timestamp: number;
11
- description: string;
12
- objects: DetectedObject[];
13
- people: PersonInfo[];
14
- sceneChanged: boolean;
15
- changePercentage: number;
16
- audioTranscription?: string;
17
- }
18
- export interface DetectedObject {
19
- id: string;
20
- type: string;
21
- confidence: number;
22
- boundingBox: BoundingBox;
23
- }
24
- export interface PersonInfo {
25
- id: string;
26
- pose: 'sitting' | 'standing' | 'lying' | 'unknown';
27
- facing: 'camera' | 'away' | 'left' | 'right' | 'unknown';
28
- confidence: number;
29
- boundingBox: BoundingBox;
30
- keypoints?: Array<{
31
- part: string;
32
- position: {
33
- x: number;
34
- y: number;
35
- };
36
- score: number;
37
- }>;
38
- }
39
- export interface BoundingBox {
40
- x: number;
41
- y: number;
42
- width: number;
43
- height: number;
44
- }
45
- export interface VisionFrame {
46
- timestamp: number;
47
- width: number;
48
- height: number;
49
- data: Buffer;
50
- format: 'rgb' | 'rgba' | 'jpeg' | 'png';
51
- }
52
- export declare enum VisionMode {
53
- OFF = "OFF",
54
- CAMERA = "CAMERA",
55
- SCREEN = "SCREEN",
56
- BOTH = "BOTH"
57
- }
58
- export interface ScreenCapture {
59
- timestamp: number;
60
- width: number;
61
- height: number;
62
- data: Buffer;
63
- tiles: ScreenTile[];
64
- }
65
- export interface ScreenTile {
66
- id: string;
67
- row: number;
68
- col: number;
69
- x: number;
70
- y: number;
71
- width: number;
72
- height: number;
73
- data?: Buffer;
74
- analysis?: TileAnalysis;
75
- }
76
- export interface TileAnalysis {
77
- timestamp: number;
78
- florence2?: Florence2Result;
79
- ocr?: OCRResult;
80
- objects?: DetectedObject[];
81
- text?: string;
82
- summary?: string;
83
- }
84
- export interface Florence2Result {
85
- caption?: string;
86
- objects?: Array<{
87
- label: string;
88
- bbox: BoundingBox;
89
- confidence: number;
90
- }>;
91
- regions?: Array<{
92
- description: string;
93
- bbox: BoundingBox;
94
- }>;
95
- tags?: string[];
96
- }
97
- export interface OCRResult {
98
- text: string;
99
- blocks: Array<{
100
- text: string;
101
- bbox: BoundingBox;
102
- confidence: number;
103
- words?: Array<{
104
- text: string;
105
- bbox: BoundingBox;
106
- confidence: number;
107
- }>;
108
- }>;
109
- fullText: string;
110
- }
111
- export interface EnhancedSceneDescription extends SceneDescription {
112
- screenCapture?: ScreenCapture;
113
- screenAnalysis?: {
114
- fullScreenOCR?: string;
115
- activeTile?: TileAnalysis;
116
- gridSummary?: string;
117
- focusedApp?: string;
118
- uiElements?: Array<{
119
- type: string;
120
- text: string;
121
- position: BoundingBox;
122
- }>;
123
- };
124
- }
125
- export interface VisionConfig {
126
- cameraName?: string;
127
- pixelChangeThreshold?: number;
128
- updateInterval?: number;
129
- enablePoseDetection?: boolean;
130
- enableObjectDetection?: boolean;
131
- tfUpdateInterval?: number;
132
- vlmUpdateInterval?: number;
133
- tfChangeThreshold?: number;
134
- vlmChangeThreshold?: number;
135
- visionMode?: VisionMode;
136
- screenCaptureInterval?: number;
137
- tileSize?: number;
138
- tileProcessingOrder?: 'sequential' | 'priority' | 'random';
139
- ocrEnabled?: boolean;
140
- florence2Enabled?: boolean;
141
- screenRegion?: {
142
- x: number;
143
- y: number;
144
- width: number;
145
- height: number;
146
- };
147
- displayIndex?: number;
148
- captureAllDisplays?: boolean;
149
- targetScreenFPS?: number;
150
- textRegions?: Array<{
151
- x: number;
152
- y: number;
153
- width: number;
154
- height: number;
155
- }>;
156
- }
157
- export interface TrackedEntity {
158
- id: string;
159
- entityType: 'person' | 'object' | 'pet';
160
- firstSeen: number;
161
- lastSeen: number;
162
- lastPosition: BoundingBox;
163
- appearances: EntityAppearance[];
164
- attributes: EntityAttributes;
165
- worldId?: string;
166
- roomId?: string;
167
- }
168
- export interface EntityAppearance {
169
- timestamp: number;
170
- boundingBox: BoundingBox;
171
- confidence: number;
172
- embedding?: number[];
173
- keypoints?: Array<{
174
- part: string;
175
- position: {
176
- x: number;
177
- y: number;
178
- };
179
- score: number;
180
- }>;
181
- }
182
- export interface EntityAttributes {
183
- name?: string;
184
- faceEmbedding?: number[];
185
- faceId?: string;
186
- clothing?: string[];
187
- hairColor?: string;
188
- accessories?: string[];
189
- objectType?: string;
190
- color?: string;
191
- size?: 'small' | 'medium' | 'large';
192
- description?: string;
193
- tags?: string[];
194
- }
195
- export interface FaceLibrary {
196
- faces: Map<string, FaceProfile>;
197
- embeddings: Map<string, number[][]>;
198
- }
199
- export interface FaceProfile {
200
- id: string;
201
- name?: string;
202
- embeddings: number[][];
203
- firstSeen: number;
204
- lastSeen: number;
205
- seenCount: number;
206
- attributes?: {
207
- age?: string;
208
- gender?: string;
209
- emotion?: string;
210
- };
211
- }
212
- export interface WorldState {
213
- worldId: string;
214
- entities: Map<string, TrackedEntity>;
215
- lastUpdate: number;
216
- activeEntities: string[];
217
- recentlyLeft: Array<{
218
- entityId: string;
219
- leftAt: number;
220
- lastPosition: BoundingBox;
221
- }>;
222
- }
package/dist/types.js DELETED
@@ -1,16 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.VisionMode = exports.VisionServiceType = void 0;
4
- // Vision service types and interfaces
5
- exports.VisionServiceType = {
6
- VISION: 'VISION',
7
- };
8
- // Vision modes
9
- var VisionMode;
10
- (function (VisionMode) {
11
- VisionMode["OFF"] = "OFF";
12
- VisionMode["CAMERA"] = "CAMERA";
13
- VisionMode["SCREEN"] = "SCREEN";
14
- VisionMode["BOTH"] = "BOTH";
15
- })(VisionMode || (exports.VisionMode = VisionMode = {}));
16
- //# sourceMappingURL=types.js.map
package/dist/types.js.map DELETED
@@ -1 +0,0 @@
1
- {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":";;;AAAA,sCAAsC;AACzB,QAAA,iBAAiB,GAAG;IAC/B,MAAM,EAAE,QAAiB;CAC1B,CAAC;AAsDF,eAAe;AACf,IAAY,UAKX;AALD,WAAY,UAAU;IACpB,yBAAW,CAAA;IACX,+BAAiB,CAAA;IACjB,+BAAiB,CAAA;IACjB,2BAAa,CAAA;AACf,CAAC,EALW,UAAU,0BAAV,UAAU,QAKrB","sourcesContent":["// Vision service types and interfaces\nexport const VisionServiceType = {\n VISION: 'VISION' as const,\n};\n\n// Vision-specific types\nexport interface CameraInfo {\n id: string;\n name: string;\n connected: boolean;\n}\n\nexport interface SceneDescription {\n timestamp: number;\n description: string;\n objects: DetectedObject[];\n people: PersonInfo[];\n sceneChanged: boolean;\n changePercentage: number;\n audioTranscription?: string; // Latest audio transcription\n}\n\nexport interface DetectedObject {\n id: string;\n type: string;\n confidence: number;\n boundingBox: BoundingBox;\n}\n\nexport interface PersonInfo {\n id: string;\n pose: 'sitting' | 'standing' | 'lying' | 'unknown';\n facing: 'camera' | 'away' | 'left' | 'right' | 'unknown';\n confidence: number;\n boundingBox: BoundingBox;\n keypoints?: Array<{\n part: string;\n position: { x: number; y: number };\n score: number;\n }>;\n}\n\nexport interface BoundingBox {\n x: number;\n y: number;\n width: number;\n height: number;\n}\n\nexport interface VisionFrame {\n timestamp: number;\n width: number;\n height: number;\n data: Buffer;\n format: 'rgb' | 'rgba' | 'jpeg' | 'png';\n}\n\n// Vision modes\nexport enum VisionMode {\n OFF = 'OFF',\n CAMERA = 'CAMERA',\n SCREEN = 'SCREEN',\n BOTH = 'BOTH',\n}\n\n// Screen capture types\nexport interface ScreenCapture {\n timestamp: number;\n width: number;\n height: number;\n data: Buffer;\n tiles: ScreenTile[];\n}\n\nexport interface ScreenTile {\n id: string;\n row: number;\n col: number;\n x: number;\n y: number;\n width: number;\n height: number;\n data?: Buffer;\n analysis?: TileAnalysis;\n}\n\nexport interface TileAnalysis {\n timestamp: number;\n florence2?: Florence2Result;\n ocr?: OCRResult;\n objects?: DetectedObject[];\n text?: string;\n summary?: string;\n}\n\nexport interface Florence2Result {\n caption?: string;\n objects?: Array<{\n label: string;\n bbox: BoundingBox;\n confidence: number;\n }>;\n regions?: Array<{\n description: string;\n bbox: BoundingBox;\n }>;\n tags?: string[];\n}\n\nexport interface OCRResult {\n text: string;\n blocks: Array<{\n text: string;\n bbox: BoundingBox;\n confidence: number;\n words?: Array<{\n text: string;\n bbox: BoundingBox;\n confidence: number;\n }>;\n }>;\n fullText: string;\n}\n\n// Enhanced scene description with screen data\nexport interface EnhancedSceneDescription extends SceneDescription {\n screenCapture?: ScreenCapture;\n screenAnalysis?: {\n fullScreenOCR?: string;\n activeTile?: TileAnalysis;\n gridSummary?: string;\n focusedApp?: string;\n uiElements?: Array<{\n type: string;\n text: string;\n position: BoundingBox;\n }>;\n };\n}\n\n// Update VisionConfig\nexport interface VisionConfig {\n cameraName?: string;\n pixelChangeThreshold?: number;\n updateInterval?: number;\n enablePoseDetection?: boolean;\n enableObjectDetection?: boolean;\n tfUpdateInterval?: number;\n vlmUpdateInterval?: number;\n tfChangeThreshold?: number;\n vlmChangeThreshold?: number;\n\n // Screen vision config\n visionMode?: VisionMode;\n screenCaptureInterval?: number; // ms between screen captures\n tileSize?: number; // Size of tiles (e.g., 256 for 256x256)\n tileProcessingOrder?: 'sequential' | 'priority' | 'random';\n ocrEnabled?: boolean;\n florence2Enabled?: boolean;\n screenRegion?: {\n // Optional: capture only part of screen\n x: number;\n y: number;\n width: number;\n height: number;\n };\n\n // Multi-display and worker config\n displayIndex?: number; // Specific display to capture\n captureAllDisplays?: boolean; // Cycle through all displays\n targetScreenFPS?: number; // Target FPS for screen capture\n textRegions?: Array<{\n // Specific regions for OCR\n x: number;\n y: number;\n width: number;\n height: number;\n }>;\n}\n\n// Entity tracking types\nexport interface TrackedEntity {\n id: string;\n entityType: 'person' | 'object' | 'pet';\n firstSeen: number;\n lastSeen: number;\n lastPosition: BoundingBox;\n appearances: EntityAppearance[];\n attributes: EntityAttributes;\n worldId?: string;\n roomId?: string;\n}\n\nexport interface EntityAppearance {\n timestamp: number;\n boundingBox: BoundingBox;\n confidence: number;\n embedding?: number[]; // Face embedding for person recognition\n keypoints?: Array<{\n part: string;\n position: { x: number; y: number };\n score: number;\n }>;\n}\n\nexport interface EntityAttributes {\n // For people\n name?: string;\n faceEmbedding?: number[];\n faceId?: string;\n clothing?: string[];\n hairColor?: string;\n accessories?: string[];\n\n // For objects\n objectType?: string;\n color?: string;\n size?: 'small' | 'medium' | 'large';\n\n // Common\n description?: string;\n tags?: string[];\n}\n\nexport interface FaceLibrary {\n faces: Map<string, FaceProfile>;\n embeddings: Map<string, number[][]>; // Multiple embeddings per profile\n}\n\nexport interface FaceProfile {\n id: string;\n name?: string;\n embeddings: number[][]; // Multiple embeddings for better recognition\n firstSeen: number;\n lastSeen: number;\n seenCount: number;\n attributes?: {\n age?: string;\n gender?: string;\n emotion?: string;\n };\n}\n\nexport interface WorldState {\n worldId: string;\n entities: Map<string, TrackedEntity>;\n lastUpdate: number;\n activeEntities: string[]; // Currently visible\n recentlyLeft: Array<{\n entityId: string;\n leftAt: number;\n lastPosition: BoundingBox;\n }>;\n}\n"]}
@@ -1,47 +0,0 @@
1
- import * as poseDetection from '@tensorflow-models/pose-detection';
2
- import { IAgentRuntime } from '@elizaos/core';
3
- import { DetectedObject, PersonInfo } from './types';
4
- export interface VisionModelConfig {
5
- enableObjectDetection?: boolean;
6
- enablePoseDetection?: boolean;
7
- florence2?: {
8
- baseUrl: string;
9
- apiKey?: string;
10
- };
11
- vlm?: {
12
- model: string;
13
- temperature: number;
14
- maxTokens: number;
15
- };
16
- }
17
- export type Pose = 'sitting' | 'standing' | 'lying' | 'walking' | 'unknown';
18
- export interface PoseLandmark {
19
- name: string;
20
- x: number;
21
- y: number;
22
- score: number;
23
- }
24
- export declare class VisionModels {
25
- private runtime;
26
- private config;
27
- private objectDetectionModel;
28
- private poseDetector;
29
- private initialized;
30
- private florence2Model;
31
- private cocoSsdModel;
32
- private posenetModel;
33
- constructor(runtime: IAgentRuntime, config?: VisionModelConfig);
34
- initialize(config: VisionModelConfig): Promise<void>;
35
- hasObjectDetection(): boolean;
36
- hasPoseDetection(): boolean;
37
- detectObjects(imageData: Buffer, width: number, height: number, description?: string): Promise<DetectedObject[]>;
38
- private enhancedObjectDetection;
39
- private generatePlausibleBoundingBox;
40
- detectPoses(imageData: Buffer, width: number, height: number, description?: string): Promise<PersonInfo[]>;
41
- private enhancedPoseDetection;
42
- private generatePlausibleKeypoints;
43
- convertPosesToPersonInfo(poses: poseDetection.Pose[]): PersonInfo[];
44
- private determinePoseFromKeypoints;
45
- private determineFacingDirection;
46
- dispose(): Promise<void>;
47
- }