@elizaos/plugin-vision 1.2.1 → 2.0.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/build.config.ts +53 -53
  2. package/dist/index.js +6716 -67
  3. package/dist/index.js.map +33 -1
  4. package/dist/workers/florence2-worker.js +112304 -307
  5. package/dist/workers/florence2-worker.js.map +92 -1
  6. package/dist/workers/ocr-worker.js +119718 -339
  7. package/dist/workers/ocr-worker.js.map +137 -1
  8. package/dist/workers/screen-capture-worker.js +350 -418
  9. package/dist/workers/screen-capture-worker.js.map +11 -1
  10. package/package.json +13 -17
  11. package/README.md +0 -270
  12. package/dist/action.d.ts +0 -8
  13. package/dist/action.js +0 -1212
  14. package/dist/action.js.map +0 -1
  15. package/dist/audio-capture-stream.d.ts +0 -42
  16. package/dist/audio-capture-stream.js +0 -516
  17. package/dist/audio-capture-stream.js.map +0 -1
  18. package/dist/audio-capture.d.ts +0 -25
  19. package/dist/audio-capture.js +0 -412
  20. package/dist/audio-capture.js.map +0 -1
  21. package/dist/basic.test.d.ts +0 -1
  22. package/dist/basic.test.js +0 -97
  23. package/dist/basic.test.js.map +0 -1
  24. package/dist/config.d.ts +0 -73
  25. package/dist/config.js +0 -254
  26. package/dist/config.js.map +0 -1
  27. package/dist/entity-tracker.d.ts +0 -32
  28. package/dist/entity-tracker.js +0 -361
  29. package/dist/entity-tracker.js.map +0 -1
  30. package/dist/errors.d.ts +0 -67
  31. package/dist/errors.js +0 -395
  32. package/dist/errors.js.map +0 -1
  33. package/dist/face-recognition.d.ts +0 -31
  34. package/dist/face-recognition.js +0 -332
  35. package/dist/face-recognition.js.map +0 -1
  36. package/dist/florence2-local.d.ts +0 -25
  37. package/dist/florence2-local.js +0 -280
  38. package/dist/florence2-local.js.map +0 -1
  39. package/dist/florence2-model.d.ts +0 -36
  40. package/dist/florence2-model.js +0 -503
  41. package/dist/florence2-model.js.map +0 -1
  42. package/dist/index.d.ts +0 -3
  43. package/dist/ocr-service-real.d.ts +0 -32
  44. package/dist/ocr-service-real.js +0 -396
  45. package/dist/ocr-service-real.js.map +0 -1
  46. package/dist/ocr-service.d.ts +0 -28
  47. package/dist/ocr-service.js +0 -216
  48. package/dist/ocr-service.js.map +0 -1
  49. package/dist/provider.d.ts +0 -2
  50. package/dist/provider.js +0 -285
  51. package/dist/provider.js.map +0 -1
  52. package/dist/screen-capture.d.ts +0 -16
  53. package/dist/screen-capture.js +0 -302
  54. package/dist/screen-capture.js.map +0 -1
  55. package/dist/service.d.ts +0 -73
  56. package/dist/service.js +0 -1662
  57. package/dist/service.js.map +0 -1
  58. package/dist/tests/e2e/index.d.ts +0 -8
  59. package/dist/tests/e2e/index.js +0 -33
  60. package/dist/tests/e2e/index.js.map +0 -1
  61. package/dist/tests/e2e/run-local.d.ts +0 -2
  62. package/dist/tests/e2e/run-local.js +0 -166
  63. package/dist/tests/e2e/run-local.js.map +0 -1
  64. package/dist/tests/e2e/screen-vision.d.ts +0 -11
  65. package/dist/tests/e2e/screen-vision.js +0 -384
  66. package/dist/tests/e2e/screen-vision.js.map +0 -1
  67. package/dist/tests/e2e/vision-autonomy.d.ts +0 -11
  68. package/dist/tests/e2e/vision-autonomy.js +0 -375
  69. package/dist/tests/e2e/vision-autonomy.js.map +0 -1
  70. package/dist/tests/e2e/vision-basic.d.ts +0 -11
  71. package/dist/tests/e2e/vision-basic.js +0 -434
  72. package/dist/tests/e2e/vision-basic.js.map +0 -1
  73. package/dist/tests/e2e/vision-capture-log.d.ts +0 -11
  74. package/dist/tests/e2e/vision-capture-log.js +0 -302
  75. package/dist/tests/e2e/vision-capture-log.js.map +0 -1
  76. package/dist/tests/e2e/vision-runtime.d.ts +0 -11
  77. package/dist/tests/e2e/vision-runtime.js +0 -357
  78. package/dist/tests/e2e/vision-runtime.js.map +0 -1
  79. package/dist/tests/e2e/vision-worker-tests.d.ts +0 -11
  80. package/dist/tests/e2e/vision-worker-tests.js +0 -466
  81. package/dist/tests/e2e/vision-worker-tests.js.map +0 -1
  82. package/dist/tests/test-pattern-generator.d.ts +0 -40
  83. package/dist/tests/test-pattern-generator.js +0 -191
  84. package/dist/tests/test-pattern-generator.js.map +0 -1
  85. package/dist/tests.d.ts +0 -3
  86. package/dist/tests.js +0 -11
  87. package/dist/tests.js.map +0 -1
  88. package/dist/types.d.ts +0 -222
  89. package/dist/types.js +0 -16
  90. package/dist/types.js.map +0 -1
  91. package/dist/vision-models.d.ts +0 -47
  92. package/dist/vision-models.js +0 -501
  93. package/dist/vision-models.js.map +0 -1
  94. package/dist/vision-worker-manager.d.ts +0 -61
  95. package/dist/vision-worker-manager.js +0 -668
  96. package/dist/vision-worker-manager.js.map +0 -1
  97. package/dist/workers/florence2-worker-simple.d.ts +0 -13
  98. package/dist/workers/florence2-worker-simple.js +0 -121
  99. package/dist/workers/florence2-worker-simple.js.map +0 -1
  100. package/dist/workers/florence2-worker.d.ts +0 -1
  101. package/dist/workers/ocr-worker.d.ts +0 -1
  102. package/dist/workers/screen-capture-worker.d.ts +0 -1
  103. package/dist/workers/worker-logger.d.ts +0 -9
  104. package/dist/workers/worker-logger.js +0 -95
  105. package/dist/workers/worker-logger.js.map +0 -1
@@ -1,501 +0,0 @@
1
- "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- var __generator = (this && this.__generator) || function (thisArg, body) {
12
- var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g = Object.create((typeof Iterator === "function" ? Iterator : Object).prototype);
13
- return g.next = verb(0), g["throw"] = verb(1), g["return"] = verb(2), typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
14
- function verb(n) { return function (v) { return step([n, v]); }; }
15
- function step(op) {
16
- if (f) throw new TypeError("Generator is already executing.");
17
- while (g && (g = 0, op[0] && (_ = 0)), _) try {
18
- if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
19
- if (y = 0, t) op = [op[0] & 2, t.value];
20
- switch (op[0]) {
21
- case 0: case 1: t = op; break;
22
- case 4: _.label++; return { value: op[1], done: false };
23
- case 5: _.label++; y = op[1]; op = [0]; continue;
24
- case 7: op = _.ops.pop(); _.trys.pop(); continue;
25
- default:
26
- if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
27
- if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
28
- if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
29
- if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
30
- if (t[2]) _.ops.pop();
31
- _.trys.pop(); continue;
32
- }
33
- op = body.call(thisArg, _);
34
- } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
35
- if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
36
- }
37
- };
38
- Object.defineProperty(exports, "__esModule", { value: true });
39
- exports.VisionModels = void 0;
40
- // Vision models for object detection and pose estimation
41
- var cocoSsd = require("@tensorflow-models/coco-ssd");
42
- var poseDetection = require("@tensorflow-models/pose-detection");
43
- var tf = require("@tensorflow/tfjs-node");
44
- var core_1 = require("@elizaos/core");
45
- var VisionModels = /** @class */ (function () {
46
- function VisionModels(runtime, config) {
47
- this.objectDetectionModel = null;
48
- this.poseDetector = null;
49
- this.initialized = false;
50
- this.florence2Model = null;
51
- this.cocoSsdModel = null;
52
- this.posenetModel = null;
53
- this.runtime = runtime;
54
- this.config = config || {
55
- florence2: {
56
- baseUrl: 'http://localhost:8000',
57
- apiKey: undefined,
58
- },
59
- vlm: {
60
- model: 'gpt-4o',
61
- temperature: 0.7,
62
- maxTokens: 500,
63
- },
64
- };
65
- }
66
- VisionModels.prototype.initialize = function (config) {
67
- return __awaiter(this, void 0, void 0, function () {
68
- var _a, error_1, detectorConfig, _b, error_2, error_3;
69
- return __generator(this, function (_c) {
70
- switch (_c.label) {
71
- case 0:
72
- if (this.initialized) {
73
- return [2 /*return*/];
74
- }
75
- this.config = config;
76
- core_1.logger.info('[VisionModels] Initializing vision models...');
77
- _c.label = 1;
78
- case 1:
79
- _c.trys.push([1, 11, , 12]);
80
- // Initialize TensorFlow.js backend
81
- return [4 /*yield*/, tf.ready()];
82
- case 2:
83
- // Initialize TensorFlow.js backend
84
- _c.sent();
85
- core_1.logger.info('[VisionModels] TensorFlow.js backend ready');
86
- if (!config.enableObjectDetection) return [3 /*break*/, 6];
87
- _c.label = 3;
88
- case 3:
89
- _c.trys.push([3, 5, , 6]);
90
- core_1.logger.info('[VisionModels] Loading COCO-SSD model...');
91
- _a = this;
92
- return [4 /*yield*/, cocoSsd.load({
93
- base: 'mobilenet_v2',
94
- })];
95
- case 4:
96
- _a.objectDetectionModel = _c.sent();
97
- core_1.logger.info('[VisionModels] COCO-SSD model loaded');
98
- return [3 /*break*/, 6];
99
- case 5:
100
- error_1 = _c.sent();
101
- core_1.logger.error('[VisionModels] Failed to load COCO-SSD model:', error_1);
102
- return [3 /*break*/, 6];
103
- case 6:
104
- if (!config.enablePoseDetection) return [3 /*break*/, 10];
105
- _c.label = 7;
106
- case 7:
107
- _c.trys.push([7, 9, , 10]);
108
- core_1.logger.info('[VisionModels] Loading PoseNet model...');
109
- detectorConfig = {
110
- architecture: 'MobileNetV1',
111
- outputStride: 16,
112
- inputResolution: { width: 640, height: 480 },
113
- multiplier: 0.75,
114
- };
115
- _b = this;
116
- return [4 /*yield*/, poseDetection.createDetector(poseDetection.SupportedModels.PoseNet, detectorConfig)];
117
- case 8:
118
- _b.poseDetector = _c.sent();
119
- core_1.logger.info('[VisionModels] PoseNet model loaded');
120
- return [3 /*break*/, 10];
121
- case 9:
122
- error_2 = _c.sent();
123
- core_1.logger.error('[VisionModels] Failed to load PoseNet model:', error_2);
124
- return [3 /*break*/, 10];
125
- case 10:
126
- this.initialized = true;
127
- core_1.logger.info('[VisionModels] Vision models initialized');
128
- return [3 /*break*/, 12];
129
- case 11:
130
- error_3 = _c.sent();
131
- core_1.logger.error('[VisionModels] Initialization failed:', error_3);
132
- throw error_3;
133
- case 12: return [2 /*return*/];
134
- }
135
- });
136
- });
137
- };
138
- VisionModels.prototype.hasObjectDetection = function () {
139
- return this.objectDetectionModel !== null;
140
- };
141
- VisionModels.prototype.hasPoseDetection = function () {
142
- return this.poseDetector !== null;
143
- };
144
- VisionModels.prototype.detectObjects = function (imageData, width, height, description) {
145
- return __awaiter(this, void 0, void 0, function () {
146
- var imageTensor, batched, predictions, objects, error_4;
147
- return __generator(this, function (_a) {
148
- switch (_a.label) {
149
- case 0:
150
- if (!this.objectDetectionModel) {
151
- core_1.logger.warn('[VisionModels] Object detection model not loaded');
152
- return [2 /*return*/, this.enhancedObjectDetection(description)];
153
- }
154
- _a.label = 1;
155
- case 1:
156
- _a.trys.push([1, 3, , 4]);
157
- imageTensor = tf.node.decodeImage(imageData, 3);
158
- batched = imageTensor.expandDims(0);
159
- return [4 /*yield*/, this.objectDetectionModel.detect(batched)];
160
- case 2:
161
- predictions = _a.sent();
162
- // Clean up tensors
163
- imageTensor.dispose();
164
- batched.dispose();
165
- objects = predictions.map(function (pred, idx) { return ({
166
- id: "obj-".concat(Date.now(), "-").concat(idx),
167
- type: pred.class,
168
- confidence: pred.score,
169
- boundingBox: {
170
- x: pred.bbox[0],
171
- y: pred.bbox[1],
172
- width: pred.bbox[2],
173
- height: pred.bbox[3],
174
- },
175
- }); });
176
- core_1.logger.debug("[VisionModels] Detected ".concat(objects.length, " objects"));
177
- return [2 /*return*/, objects];
178
- case 3:
179
- error_4 = _a.sent();
180
- core_1.logger.error('[VisionModels] Object detection failed:', error_4);
181
- return [2 /*return*/, this.enhancedObjectDetection(description)];
182
- case 4: return [2 /*return*/];
183
- }
184
- });
185
- });
186
- };
187
- VisionModels.prototype.enhancedObjectDetection = function (description) {
188
- // Enhanced object detection based on scene description
189
- if (!description) {
190
- return [];
191
- }
192
- var objects = [];
193
- // Extract objects from description using patterns
194
- var objectPatterns = [
195
- { pattern: /(\d+)?\s*(person|people|man|men|woman|women|child|children)/gi, type: 'person' },
196
- { pattern: /(\d+)?\s*(laptop|computer|monitor|screen|display)/gi, type: 'laptop' },
197
- { pattern: /(\d+)?\s*(phone|smartphone|mobile)/gi, type: 'cell phone' },
198
- { pattern: /(\d+)?\s*(book|notebook|journal)/gi, type: 'book' },
199
- { pattern: /(\d+)?\s*(cup|mug|glass|bottle)/gi, type: 'cup' },
200
- { pattern: /(\d+)?\s*(chair|seat|sofa|couch)/gi, type: 'chair' },
201
- { pattern: /(\d+)?\s*(table|desk)/gi, type: 'dining table' },
202
- { pattern: /(\d+)?\s*(car|vehicle|truck|bus)/gi, type: 'car' },
203
- { pattern: /(\d+)?\s*(dog|cat|pet|animal)/gi, type: 'animal' },
204
- { pattern: /(\d+)?\s*(plant|tree|flower)/gi, type: 'potted plant' },
205
- ];
206
- for (var _i = 0, objectPatterns_1 = objectPatterns; _i < objectPatterns_1.length; _i++) {
207
- var _a = objectPatterns_1[_i], pattern = _a.pattern, type = _a.type;
208
- var matches = Array.from(description.matchAll(pattern));
209
- for (var _b = 0, matches_1 = matches; _b < matches_1.length; _b++) {
210
- var match = matches_1[_b];
211
- var count = match[1] ? parseInt(match[1], 10) : 1;
212
- for (var i = 0; i < count; i++) {
213
- objects.push({
214
- id: "obj-".concat(type, "-").concat(Date.now(), "-").concat(i),
215
- type: type,
216
- confidence: 0.85, // High confidence since it's from VLM
217
- boundingBox: this.generatePlausibleBoundingBox(type, i, count),
218
- });
219
- }
220
- }
221
- }
222
- return objects;
223
- };
224
- VisionModels.prototype.generatePlausibleBoundingBox = function (type, index, total) {
225
- // Generate plausible bounding boxes based on object type and position
226
- var basePositions = {
227
- person: { width: 150, height: 300, y: 100 },
228
- laptop: { width: 200, height: 150, y: 250 },
229
- 'cell phone': { width: 50, height: 100, y: 300 },
230
- book: { width: 100, height: 150, y: 280 },
231
- cup: { width: 60, height: 80, y: 300 },
232
- chair: { width: 180, height: 200, y: 200 },
233
- 'dining table': { width: 400, height: 200, y: 250 },
234
- car: { width: 300, height: 200, y: 150 },
235
- animal: { width: 120, height: 100, y: 300 },
236
- 'potted plant': { width: 100, height: 150, y: 200 },
237
- };
238
- var base = basePositions[type] || { width: 100, height: 100, y: 200 };
239
- var spacing = 640 / (total + 1); // Distribute across frame width
240
- return {
241
- x: spacing * (index + 1) - base.width / 2,
242
- y: base.y + (Math.random() - 0.5) * 50,
243
- width: base.width + (Math.random() - 0.5) * 40,
244
- height: base.height + (Math.random() - 0.5) * 40,
245
- };
246
- };
247
- VisionModels.prototype.detectPoses = function (imageData, width, height, description) {
248
- return __awaiter(this, void 0, void 0, function () {
249
- var imageTensor, poses, error_5;
250
- return __generator(this, function (_a) {
251
- switch (_a.label) {
252
- case 0:
253
- if (!this.poseDetector) {
254
- core_1.logger.warn('[VisionModels] Pose detection model not loaded');
255
- return [2 /*return*/, this.enhancedPoseDetection(description)];
256
- }
257
- _a.label = 1;
258
- case 1:
259
- _a.trys.push([1, 3, , 4]);
260
- imageTensor = tf.node.decodeImage(imageData, 3);
261
- return [4 /*yield*/, this.poseDetector.estimatePoses({
262
- data: new Uint8ClampedArray(imageTensor.dataSync()),
263
- width: width,
264
- height: height,
265
- })];
266
- case 2:
267
- poses = _a.sent();
268
- // Clean up tensor
269
- imageTensor.dispose();
270
- // Convert poses to PersonInfo
271
- return [2 /*return*/, this.convertPosesToPersonInfo(poses)];
272
- case 3:
273
- error_5 = _a.sent();
274
- core_1.logger.error('[VisionModels] Pose detection failed:', error_5);
275
- return [2 /*return*/, this.enhancedPoseDetection(description)];
276
- case 4: return [2 /*return*/];
277
- }
278
- });
279
- });
280
- };
281
- VisionModels.prototype.enhancedPoseDetection = function (description) {
282
- var _a;
283
- // Enhanced pose detection based on scene description
284
- if (!description) {
285
- return [];
286
- }
287
- var people = [];
288
- var descLower = description.toLowerCase();
289
- // Extract people count and descriptions
290
- var peopleMatch = description.match(/(\d+)?\s*(person|people|man|men|woman|women|child|children)/gi);
291
- if (!peopleMatch) {
292
- return [];
293
- }
294
- var count = ((_a = peopleMatch[0].match(/\d+/)) === null || _a === void 0 ? void 0 : _a[0])
295
- ? parseInt(peopleMatch[0].match(/\d+/)[0], 10)
296
- : 1;
297
- // Analyze description for pose and facing information
298
- var poseKeywords = {
299
- standing: ['standing', 'stand', 'upright'],
300
- sitting: ['sitting', 'seated', 'sit', 'chair'],
301
- walking: ['walking', 'walk', 'moving'],
302
- lying: ['lying', 'laying', 'reclined'],
303
- };
304
- var facingKeywords = {
305
- camera: ['facing camera', 'looking at camera', 'facing forward', 'front view'],
306
- away: ['back to camera', 'facing away', 'back view'],
307
- left: ['facing left', 'profile left', 'left side'],
308
- right: ['facing right', 'profile right', 'right side'],
309
- };
310
- var detectedPose = 'standing';
311
- var detectedFacing = 'camera';
312
- // Detect pose
313
- for (var _i = 0, _b = Object.entries(poseKeywords); _i < _b.length; _i++) {
314
- var _c = _b[_i], pose = _c[0], keywords = _c[1];
315
- if (keywords.some(function (kw) { return descLower.includes(kw); })) {
316
- detectedPose = pose;
317
- break;
318
- }
319
- }
320
- // Detect facing
321
- for (var _d = 0, _e = Object.entries(facingKeywords); _d < _e.length; _d++) {
322
- var _f = _e[_d], facing = _f[0], keywords = _f[1];
323
- if (keywords.some(function (kw) { return descLower.includes(kw); })) {
324
- detectedFacing = facing;
325
- break;
326
- }
327
- }
328
- // Create PersonInfo for each detected person
329
- for (var i = 0; i < count; i++) {
330
- var boundingBox = this.generatePlausibleBoundingBox('person', i, count);
331
- people.push({
332
- id: "person-".concat(Date.now(), "-").concat(i),
333
- boundingBox: boundingBox,
334
- pose: detectedPose,
335
- facing: detectedFacing,
336
- confidence: 0.85,
337
- keypoints: this.generatePlausibleKeypoints(boundingBox, detectedPose, detectedFacing).map(function (kp) { return ({
338
- part: kp.name,
339
- position: { x: kp.x, y: kp.y },
340
- score: kp.score,
341
- }); }),
342
- });
343
- }
344
- return people;
345
- };
346
- VisionModels.prototype.generatePlausibleKeypoints = function (boundingBox, pose, _facing) {
347
- // Generate plausible keypoints based on pose and facing
348
- var x = boundingBox.x, y = boundingBox.y, width = boundingBox.width, height = boundingBox.height;
349
- var centerX = x + width / 2;
350
- var keypoints = [];
351
- // Basic keypoint positions relative to bounding box
352
- var positions = {
353
- nose: { x: centerX, y: y + height * 0.1 },
354
- leftEye: { x: centerX - width * 0.1, y: y + height * 0.08 },
355
- rightEye: { x: centerX + width * 0.1, y: y + height * 0.08 },
356
- leftEar: { x: centerX - width * 0.15, y: y + height * 0.1 },
357
- rightEar: { x: centerX + width * 0.15, y: y + height * 0.1 },
358
- leftShoulder: { x: centerX - width * 0.25, y: y + height * 0.25 },
359
- rightShoulder: { x: centerX + width * 0.25, y: y + height * 0.25 },
360
- leftElbow: { x: centerX - width * 0.3, y: y + height * 0.4 },
361
- rightElbow: { x: centerX + width * 0.3, y: y + height * 0.4 },
362
- leftWrist: { x: centerX - width * 0.25, y: y + height * 0.55 },
363
- rightWrist: { x: centerX + width * 0.25, y: y + height * 0.55 },
364
- leftHip: { x: centerX - width * 0.15, y: y + height * 0.5 },
365
- rightHip: { x: centerX + width * 0.15, y: y + height * 0.5 },
366
- leftKnee: { x: centerX - width * 0.15, y: y + height * 0.7 },
367
- rightKnee: { x: centerX + width * 0.15, y: y + height * 0.7 },
368
- leftAnkle: { x: centerX - width * 0.15, y: y + height * 0.9 },
369
- rightAnkle: { x: centerX + width * 0.15, y: y + height * 0.9 },
370
- };
371
- // Adjust positions based on pose
372
- if (pose === 'sitting') {
373
- // Lower hips and knees for sitting pose
374
- positions.leftHip.y += height * 0.1;
375
- positions.rightHip.y += height * 0.1;
376
- positions.leftKnee.y -= height * 0.1;
377
- positions.rightKnee.y -= height * 0.1;
378
- }
379
- // Convert to PoseLandmark format
380
- Object.entries(positions).forEach(function (_a) {
381
- var name = _a[0], pos = _a[1];
382
- keypoints.push({
383
- name: name,
384
- x: pos.x,
385
- y: pos.y,
386
- score: 0.85,
387
- });
388
- });
389
- return keypoints;
390
- };
391
- VisionModels.prototype.convertPosesToPersonInfo = function (poses) {
392
- var _this = this;
393
- return poses.map(function (pose, index) {
394
- var keypoints = pose.keypoints;
395
- // Calculate bounding box from keypoints
396
- var xs = keypoints.map(function (kp) { return kp.x; });
397
- var ys = keypoints.map(function (kp) { return kp.y; });
398
- var minX = Math.min.apply(Math, xs);
399
- var maxX = Math.max.apply(Math, xs);
400
- var minY = Math.min.apply(Math, ys);
401
- var maxY = Math.max.apply(Math, ys);
402
- var boundingBox = {
403
- x: minX,
404
- y: minY,
405
- width: maxX - minX,
406
- height: maxY - minY,
407
- };
408
- // Determine pose from keypoints
409
- var detectedPose = _this.determinePoseFromKeypoints(keypoints);
410
- // Determine facing direction
411
- var facing = _this.determineFacingDirection(keypoints);
412
- // Convert keypoints to our format
413
- var convertedKeypoints = keypoints.map(function (kp) { return ({
414
- part: kp.name || 'unknown',
415
- position: { x: kp.x, y: kp.y },
416
- score: kp.score || 0,
417
- }); });
418
- return {
419
- id: "person-".concat(Date.now(), "-").concat(index),
420
- boundingBox: boundingBox,
421
- pose: detectedPose,
422
- facing: facing,
423
- confidence: pose.score || 0.5,
424
- keypoints: convertedKeypoints,
425
- };
426
- });
427
- };
428
- VisionModels.prototype.determinePoseFromKeypoints = function (keypoints) {
429
- // Simple heuristic to determine pose
430
- var leftHip = keypoints.find(function (kp) { return kp.name === 'left_hip'; });
431
- var rightHip = keypoints.find(function (kp) { return kp.name === 'right_hip'; });
432
- var leftKnee = keypoints.find(function (kp) { return kp.name === 'left_knee'; });
433
- var rightKnee = keypoints.find(function (kp) { return kp.name === 'right_knee'; });
434
- var leftShoulder = keypoints.find(function (kp) { return kp.name === 'left_shoulder'; });
435
- var rightShoulder = keypoints.find(function (kp) { return kp.name === 'right_shoulder'; });
436
- if (!leftHip || !rightHip || !leftKnee || !rightKnee) {
437
- return 'unknown';
438
- }
439
- var hipY = (leftHip.y + rightHip.y) / 2;
440
- var kneeY = (leftKnee.y + rightKnee.y) / 2;
441
- var shoulderY = leftShoulder && rightShoulder ? (leftShoulder.y + rightShoulder.y) / 2 : hipY - 100;
442
- // Check if person is lying down (shoulders and hips at similar height)
443
- if (Math.abs(shoulderY - hipY) < 50) {
444
- return 'lying';
445
- }
446
- // Check if person is sitting (knees close to hips)
447
- if (Math.abs(hipY - kneeY) < 100) {
448
- return 'sitting';
449
- }
450
- // Otherwise assume standing
451
- return 'standing';
452
- };
453
- VisionModels.prototype.determineFacingDirection = function (keypoints) {
454
- var leftShoulder = keypoints.find(function (kp) { return kp.name === 'left_shoulder'; });
455
- var rightShoulder = keypoints.find(function (kp) { return kp.name === 'right_shoulder'; });
456
- var nose = keypoints.find(function (kp) { return kp.name === 'nose'; });
457
- if (!leftShoulder || !rightShoulder || !nose) {
458
- return 'camera'; // Default
459
- }
460
- var shoulderWidth = Math.abs(rightShoulder.x - leftShoulder.x);
461
- var shoulderMidpoint = (leftShoulder.x + rightShoulder.x) / 2;
462
- var noseOffset = nose.x - shoulderMidpoint;
463
- // If shoulders are narrow, person is likely in profile
464
- if (shoulderWidth < 50) {
465
- return noseOffset > 0 ? 'right' : 'left';
466
- }
467
- // If nose is significantly offset from shoulder midpoint, person is turning
468
- if (Math.abs(noseOffset) > shoulderWidth * 0.3) {
469
- return noseOffset > 0 ? 'right' : 'left';
470
- }
471
- // Check if both ears are visible (facing camera) or not (facing away)
472
- var leftEar = keypoints.find(function (kp) { return kp.name === 'left_ear'; });
473
- var rightEar = keypoints.find(function (kp) { return kp.name === 'right_ear'; });
474
- if (leftEar && rightEar && leftEar.score && rightEar.score) {
475
- if (leftEar.score > 0.5 && rightEar.score > 0.5) {
476
- return 'camera';
477
- }
478
- }
479
- return 'camera'; // Default
480
- };
481
- VisionModels.prototype.dispose = function () {
482
- return __awaiter(this, void 0, void 0, function () {
483
- return __generator(this, function (_a) {
484
- if (this.objectDetectionModel) {
485
- // COCO-SSD doesn't have a dispose method, but we can clear the reference
486
- this.objectDetectionModel = null;
487
- }
488
- if (this.poseDetector) {
489
- this.poseDetector.dispose();
490
- this.poseDetector = null;
491
- }
492
- this.initialized = false;
493
- core_1.logger.info('[VisionModels] Models disposed');
494
- return [2 /*return*/];
495
- });
496
- });
497
- };
498
- return VisionModels;
499
- }());
500
- exports.VisionModels = VisionModels;
501
- //# sourceMappingURL=vision-models.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"vision-models.js","sourceRoot":"","sources":["../src/vision-models.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,yDAAyD;AACzD,qDAAuD;AACvD,iEAAmE;AACnE,0CAA4C;AAE5C,sCAAsD;AA4BtD;IAUE,sBAAY,OAAsB,EAAE,MAA0B;QAPtD,yBAAoB,GAAmC,IAAI,CAAC;QAC5D,iBAAY,GAAsC,IAAI,CAAC;QACvD,gBAAW,GAAG,KAAK,CAAC;QACpB,mBAAc,GAA0B,IAAI,CAAC;QAC7C,iBAAY,GAAQ,IAAI,CAAC;QACzB,iBAAY,GAAQ,IAAI,CAAC;QAG/B,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,MAAM,GAAG,MAAM,IAAI;YACtB,SAAS,EAAE;gBACT,OAAO,EAAE,uBAAuB;gBAChC,MAAM,EAAE,SAAS;aAClB;YACD,GAAG,EAAE;gBACH,KAAK,EAAE,QAAQ;gBACf,WAAW,EAAE,GAAG;gBAChB,SAAS,EAAE,GAAG;aACf;SACF,CAAC;IACJ,CAAC;IAEK,iCAAU,GAAhB,UAAiB,MAAyB;;;;;;wBACxC,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;4BACrB,sBAAO;wBACT,CAAC;wBAED,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;wBACrB,aAAM,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;;;;wBAG1D,mCAAmC;wBACnC,qBAAM,EAAE,CAAC,KAAK,EAAE,EAAA;;wBADhB,mCAAmC;wBACnC,SAAgB,CAAC;wBACjB,aAAM,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;6BAGtD,MAAM,CAAC,qBAAqB,EAA5B,wBAA4B;;;;wBAE5B,aAAM,CAAC,IAAI,CAAC,0CAA0C,CAAC,CAAC;wBACxD,KAAA,IAAI,CAAA;wBAAwB,qBAAM,OAAO,CAAC,IAAI,CAAC;gCAC7C,IAAI,EAAE,cAAc;6BACrB,CAAC,EAAA;;wBAFF,GAAK,oBAAoB,GAAG,SAE1B,CAAC;wBACH,aAAM,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAC;;;;wBAEpD,aAAM,CAAC,KAAK,CAAC,+CAA+C,EAAE,OAAK,CAAC,CAAC;;;6BAKrE,MAAM,CAAC,mBAAmB,EAA1B,yBAA0B;;;;wBAE1B,aAAM,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC;wBACjD,cAAc,GAAqC;4BACvD,YAAY,EAAE,aAAa;4BAC3B,YAAY,EAAE,EAAE;4BAChB,eAAe,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;4BAC5C,UAAU,EAAE,IAAI;yBACjB,CAAC;wBAEF,KAAA,IAAI,CAAA;wBAAgB,qBAAM,aAAa,CAAC,cAAc,CACpD,aAAa,CAAC,eAAe,CAAC,OAAO,EACrC,cAAc,CACf,EAAA;;wBAHD,GAAK,YAAY,GAAG,SAGnB,CAAC;wBACF,aAAM,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;;;;wBAEnD,aAAM,CAAC,KAAK,CAAC,8CAA8C,EAAE,OAAK,CAAC,CAAC;;;wBAIxE,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;wBACxB,aAAM,CAAC,IAAI,CAAC,0CAA0C,CAAC,CAAC;;;;wBAExD,aAAM,CAAC,KAAK,CAAC,uCAAuC,EAAE,OAAK,CAAC,CAAC;wBAC7D,MAAM,OAAK,CAAC;;;;;KAEf;IAED,yCAAkB,GAAlB;QACE,OAAO,IAAI,CAAC,oBAAoB,KAAK,IAAI,CAAC;IAC5C,CAAC;IAED,uCAAgB,GAAhB;QACE,OAAO,IAAI,CAAC,YAAY,KAAK,IAAI,CAAC;IACpC,CAAC;IAEK,oCAAa,GAAnB,UACE,SAAiB,EACjB,KAAa,EACb,MAAc,EACd,WAAoB;;;;;;wBAEpB,IAAI,CAAC,IAAI,CAAC,oBAAoB,EAAE,CAAC;4BAC/B,aAAM,CAAC,IAAI,CAAC,kDAAkD,CAAC,CAAC;4BAChE,sBAAO,IAAI,CAAC,uBAAuB,CAAC,WAAW,CAAC,EAAC;wBACnD,CAAC;;;;wBAIO,WAAW,GAAG,EAAE,CAAC,IAAI,CAAC,WAAW,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC;wBAGhD,OAAO,GAAG,WAAW,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;wBAGtB,qBAAM,IAAI,CAAC,oBAAoB,CAAC,MAAM,CAAC,OAAc,CAAC,EAAA;;wBAApE,WAAW,GAAG,SAAsD;wBAE1E,mBAAmB;wBACnB,WAAW,CAAC,OAAO,EAAE,CAAC;wBACtB,OAAO,CAAC,OAAO,EAAE,CAAC;wBAGZ,OAAO,GAAqB,WAAW,CAAC,GAAG,CAAC,UAAC,IAAI,EAAE,GAAG,IAAK,OAAA,CAAC;4BAChE,EAAE,EAAE,cAAO,IAAI,CAAC,GAAG,EAAE,cAAI,GAAG,CAAE;4BAC9B,IAAI,EAAE,IAAI,CAAC,KAAK;4BAChB,UAAU,EAAE,IAAI,CAAC,KAAK;4BACtB,WAAW,EAAE;gCACX,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;gCACf,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;gCACf,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;gCACnB,MAAM,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;6BACrB;yBACF,CAAC,EAV+D,CAU/D,CAAC,CAAC;wBAEJ,aAAM,CAAC,KAAK,CAAC,kCAA2B,OAAO,CAAC,MAAM,aAAU,CAAC,CAAC;wBAClE,sBAAO,OAAO,EAAC;;;wBAEf,aAAM,CAAC,KAAK,CAAC,yCAAyC,EAAE,OAAK,CAAC,CAAC;wBAC/D,sBAAO,IAAI,CAAC,uBAAuB,CAAC,WAAW,CAAC,EAAC;;;;;KAEpD;IAEO,8CAAuB,GAA/B,UAAgC,WAAoB;QAClD,uDAAuD;QACvD,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,IAAM,OAAO,GAAqB,EAAE,CAAC;QAErC,kDAAkD;QAClD,IAAM,cAAc,GAAG;YACrB,EAAE,OAAO,EAAE,+DAA+D,EAAE,IAAI,EAAE,QAAQ,EAAE;YAC5F,EAAE,OAAO,EAAE,qDAAqD,EAAE,IAAI,EAAE,QAAQ,EAAE;YAClF,EAAE,OAAO,EAAE,sCAAsC,EAAE,IAAI,EAAE,YAAY,EAAE;YACvE,EAAE,OAAO,EAAE,oCAAoC,EAAE,IAAI,EAAE,MAAM,EAAE;YAC/D,EAAE,OAAO,EAAE,mCAAmC,EAAE,IAAI,EAAE,KAAK,EAAE;YAC7D,EAAE,OAAO,EAAE,oCAAoC,EAAE,IAAI,EAAE,OAAO,EAAE;YAChE,EAAE,OAAO,EAAE,yBAAyB,EAAE,IAAI,EAAE,cAAc,EAAE;YAC5D,EAAE,OAAO,EAAE,oCAAoC,EAAE,IAAI,EAAE,KAAK,EAAE;YAC9D,EAAE,OAAO,EAAE,iCAAiC,EAAE,IAAI,EAAE,QAAQ,EAAE;YAC9D,EAAE,OAAO,EAAE,gCAAgC,EAAE,IAAI,EAAE,cAAc,EAAE;SACpE,CAAC;QAEF,KAAgC,UAAc,EAAd,iCAAc,EAAd,4BAAc,EAAd,IAAc,EAAE,CAAC;YAAtC,IAAA,yBAAiB,EAAf,OAAO,aAAA,EAAE,IAAI,UAAA;YACxB,IAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;YAC1D,KAAoB,UAAO,EAAP,mBAAO,EAAP,qBAAO,EAAP,IAAO,EAAE,CAAC;gBAAzB,IAAM,KAAK,gBAAA;gBACd,IAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBACpD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC/B,OAAO,CAAC,IAAI,CAAC;wBACX,EAAE,EAAE,cAAO,IAAI,cAAI,IAAI,CAAC,GAAG,EAAE,cAAI,CAAC,CAAE;wBACpC,IAAI,MAAA;wBACJ,UAAU,EAAE,IAAI,EAAE,sCAAsC;wBACxD,WAAW,EAAE,IAAI,CAAC,4BAA4B,CAAC,IAAI,EAAE,CAAC,EAAE,KAAK,CAAC;qBAC/D,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAEO,mDAA4B,GAApC,UACE,IAAY,EACZ,KAAa,EACb,KAAa;QAOb,sEAAsE;QACtE,IAAM,aAAa,GAAiE;YAClF,MAAM,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;YAC3C,MAAM,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;YAC3C,YAAY,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;YAChD,IAAI,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;YACzC,GAAG,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC,EAAE,GAAG,EAAE;YACtC,KAAK,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;YAC1C,cAAc,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;YACnD,GAAG,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;YACxC,MAAM,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;YAC3C,cAAc,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE;SACpD,CAAC;QAEF,IAAM,IAAI,GAAG,aAAa,CAAC,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC;QACxE,IAAM,OAAO,GAAG,GAAG,GAAG,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,gCAAgC;QAEnE,OAAO;YACL,CAAC,EAAE,OAAO,GAAG,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,GAAG,CAAC;YACzC,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,GAAG,CAAC,GAAG,EAAE;YACtC,KAAK,EAAE,IAAI,CAAC,KAAK,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,GAAG,CAAC,GAAG,EAAE;YAC9C,MAAM,EAAE,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,GAAG,CAAC,GAAG,EAAE;SACjD,CAAC;IACJ,CAAC;IAEK,kCAAW,GAAjB,UACE,SAAiB,EACjB,KAAa,EACb,MAAc,EACd,WAAoB;;;;;;wBAEpB,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;4BACvB,aAAM,CAAC,IAAI,CAAC,gDAAgD,CAAC,CAAC;4BAC9D,sBAAO,IAAI,CAAC,qBAAqB,CAAC,WAAW,CAAC,EAAC;wBACjD,CAAC;;;;wBAIO,WAAW,GAAG,EAAE,CAAC,IAAI,CAAC,WAAW,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC;wBAGxC,qBAAM,IAAI,CAAC,YAAY,CAAC,aAAa,CAAC;gCAClD,IAAI,EAAE,IAAI,iBAAiB,CAAC,WAAW,CAAC,QAAQ,EAAE,CAAC;gCACnD,KAAK,OAAA;gCACL,MAAM,QAAA;6BACA,CAAC,EAAA;;wBAJH,KAAK,GAAG,SAIL;wBAET,kBAAkB;wBAClB,WAAW,CAAC,OAAO,EAAE,CAAC;wBAEtB,8BAA8B;wBAC9B,sBAAO,IAAI,CAAC,wBAAwB,CAAC,KAAK,CAAC,EAAC;;;wBAE5C,aAAM,CAAC,KAAK,CAAC,uCAAuC,EAAE,OAAK,CAAC,CAAC;wBAC7D,sBAAO,IAAI,CAAC,qBAAqB,CAAC,WAAW,CAAC,EAAC;;;;;KAElD;IAEO,4CAAqB,GAA7B,UAA8B,WAAoB;;QAChD,qDAAqD;QACrD,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,IAAM,MAAM,GAAiB,EAAE,CAAC;QAChC,IAAM,SAAS,GAAG,WAAW,CAAC,WAAW,EAAE,CAAC;QAE5C,wCAAwC;QACxC,IAAM,WAAW,GAAG,WAAW,CAAC,KAAK,CACnC,+DAA+D,CAChE,CAAC;QACF,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,IAAM,KAAK,GAAG,CAAA,MAAA,WAAW,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,0CAAG,CAAC,CAAC;YAC5C,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;YAC/C,CAAC,CAAC,CAAC,CAAC;QAEN,sDAAsD;QACtD,IAAM,YAAY,GAAG;YACnB,QAAQ,EAAE,CAAC,UAAU,EAAE,OAAO,EAAE,SAAS,CAAC;YAC1C,OAAO,EAAE,CAAC,SAAS,EAAE,QAAQ,EAAE,KAAK,EAAE,OAAO,CAAC;YAC9C,OAAO,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,QAAQ,CAAC;YACtC,KAAK,EAAE,CAAC,OAAO,EAAE,QAAQ,EAAE,UAAU,CAAC;SACvC,CAAC;QAEF,IAAM,cAAc,GAAG;YACrB,MAAM,EAAE,CAAC,eAAe,EAAE,mBAAmB,EAAE,gBAAgB,EAAE,YAAY,CAAC;YAC9E,IAAI,EAAE,CAAC,gBAAgB,EAAE,aAAa,EAAE,WAAW,CAAC;YACpD,IAAI,EAAE,CAAC,aAAa,EAAE,cAAc,EAAE,WAAW,CAAC;YAClD,KAAK,EAAE,CAAC,cAAc,EAAE,eAAe,EAAE,YAAY,CAAC;SACvD,CAAC;QAEF,IAAI,YAAY,GAAG,UAAU,CAAC;QAC9B,IAAI,cAAc,GAAG,QAAQ,CAAC;QAE9B,cAAc;QACd,KAA+B,UAA4B,EAA5B,KAAA,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,EAA5B,cAA4B,EAA5B,IAA4B,EAAE,CAAC;YAAnD,IAAA,WAAgB,EAAf,IAAI,QAAA,EAAE,QAAQ,QAAA;YACxB,IAAI,QAAQ,CAAC,IAAI,CAAC,UAAC,EAAE,IAAK,OAAA,SAAS,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAtB,CAAsB,CAAC,EAAE,CAAC;gBAClD,YAAY,GAAG,IAAI,CAAC;gBACpB,MAAM;YACR,CAAC;QACH,CAAC;QAED,gBAAgB;QAChB,KAAiC,UAA8B,EAA9B,KAAA,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,EAA9B,cAA8B,EAA9B,IAA8B,EAAE,CAAC;YAAvD,IAAA,WAAkB,EAAjB,MAAM,QAAA,EAAE,QAAQ,QAAA;YAC1B,IAAI,QAAQ,CAAC,IAAI,CAAC,UAAC,EAAE,IAAK,OAAA,SAAS,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAtB,CAAsB,CAAC,EAAE,CAAC;gBAClD,cAAc,GAAG,MAAM,CAAC;gBACxB,MAAM;YACR,CAAC;QACH,CAAC;QAED,6CAA6C;QAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/B,IAAM,WAAW,GAAG,IAAI,CAAC,4BAA4B,CAAC,QAAQ,EAAE,CAAC,EAAE,KAAK,CAAC,CAAC;YAE1E,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,iBAAU,IAAI,CAAC,GAAG,EAAE,cAAI,CAAC,CAAE;gBAC/B,WAAW,aAAA;gBACX,IAAI,EAAE,YAA4D;gBAClE,MAAM,EAAE,cAAsD;gBAC9D,UAAU,EAAE,IAAI;gBAChB,SAAS,EAAE,IAAI,CAAC,0BAA0B,CACxC,WAAW,EACX,YAAoB,EACpB,cAAc,CACf,CAAC,GAAG,CAAC,UAAC,EAAE,IAAK,OAAA,CAAC;oBACb,IAAI,EAAE,EAAE,CAAC,IAAI;oBACb,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE;oBAC9B,KAAK,EAAE,EAAE,CAAC,KAAK;iBAChB,CAAC,EAJY,CAIZ,CAAC;aACJ,CAAC,CAAC;QACL,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,iDAA0B,GAAlC,UACE,WAAoE,EACpE,IAAU,EACV,OAAe;QAEf,wDAAwD;QAChD,IAAA,CAAC,GAAuB,WAAW,EAAlC,EAAE,CAAC,GAAoB,WAAW,EAA/B,EAAE,KAAK,GAAa,WAAW,MAAxB,EAAE,MAAM,GAAK,WAAW,OAAhB,CAAiB;QAC5C,IAAM,OAAO,GAAG,CAAC,GAAG,KAAK,GAAG,CAAC,CAAC;QAE9B,IAAM,SAAS,GAAmB,EAAE,CAAC;QAErC,oDAAoD;QACpD,IAAM,SAAS,GAAG;YAChB,IAAI,EAAE,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC,GAAG,MAAM,GAAG,GAAG,EAAE;YACzC,OAAO,EAAE,EAAE,CAAC,EAAE,OAAO,GAAG,KAAK,GAAG,GAAG,EAAE,CAAC,EAAE,CAAC,GAAG,MAAM,GAAG,IAAI,EAAE;YAC3D,QAAQ,EAAE,EAAE,CAAC,EAAE,OAAO,GAAG,KAAK,GAAG,GAAG,EAAE,CAAC,EAAE,CAAC,GAAG,MAAM,GAAG,IAAI,EAAE;YAC5D,OAAO,EAAE,EAAE,CAAC,EAAE,OAAO,GAAG,KAAK,GAAG,IAAI,EAAE,CAAC,EAAE,CAAC,GAAG,MAAM,GAAG,GAAG,EAAE;YAC3D,QAAQ,EAAE,EAAE,CAAC,EAAE,OAAO,GAAG,KAAK,GAAG,IAAI,EAAE,CAAC,EAAE,CAAC,GAAG,MAAM,GAAG,GAAG,EAAE;YAC5D,YAAY,EAAE,EAAE,CAAC,EAAE,OAAO,GAAG,KAAK,GAAG,IAAI,EAAE,CAAC,EAAE,CAAC,GAAG,MAAM,GAAG,IAAI,EAAE;YACjE,aAAa,EAAE,EAAE,CAAC,EAAE,OAAO,GAAG,KAAK,GAAG,IAAI,EAAE,CAAC,EAAE,CAAC,GAAG,MAAM,GAAG,IAAI,EAAE;YAClE,SAAS,EAAE,EAAE,CAAC,EAAE,OAAO,GAAG,KAAK,GAAG,GAAG,EAAE,CAAC,EAAE,CAAC,GAAG,MAAM,GAAG,GAAG,EAAE;YAC5D,UAAU,EAAE,EAAE,CAAC,EAAE,OAAO,GAAG,KAAK,GAAG,GAAG,EAAE,CAAC,EAAE,CAAC,GAAG,MAAM,GAAG,GAAG,EAAE;YAC7D,SAAS,EAAE,EAAE,CAAC,EAAE,OAAO,GAAG,KAAK,GAAG,IAAI,EAAE,CAAC,EAAE,CAAC,GAAG,MAAM,GAAG,IAAI,EAAE;YAC9D,UAAU,EAAE,EAAE,CAAC,EAAE,OAAO,GAAG,KAAK,GAAG,IAAI,EAAE,CAAC,EAAE,CAAC,GAAG,MAAM,GAAG,IAAI,EAAE;YAC/D,OAAO,EAAE,EAAE,CAAC,EAAE,OAAO,GAAG,KAAK,GAAG,IAAI,EAAE,CAAC,EAAE,CAAC,GAAG,MAAM,GAAG,GAAG,EAAE;YAC3D,QAAQ,EAAE,EAAE,CAAC,EAAE,OAAO,GAAG,KAAK,GAAG,IAAI,EAAE,CAAC,EAAE,CAAC,GAAG,MAAM,GAAG,GAAG,EAAE;YAC5D,QAAQ,EAAE,EAAE,CAAC,EAAE,OAAO,GAAG,KAAK,GAAG,IAAI,EAAE,CAAC,EAAE,CAAC,GAAG,MAAM,GAAG,GAAG,EAAE;YAC5D,SAAS,EAAE,EAAE,CAAC,EAAE,OAAO,GAAG,KAAK,GAAG,IAAI,EAAE,CAAC,EAAE,CAAC,GAAG,MAAM,GAAG,GAAG,EAAE;YAC7D,SAAS,EAAE,EAAE,CAAC,EAAE,OAAO,GAAG,KAAK,GAAG,IAAI,EAAE,CAAC,EAAE,CAAC,GAAG,MAAM,GAAG,GAAG,EAAE;YAC7D,UAAU,EAAE,EAAE,CAAC,EAAE,OAAO,GAAG,KAAK,GAAG,IAAI,EAAE,CAAC,EAAE,CAAC,GAAG,MAAM,GAAG,GAAG,EAAE;SAC/D,CAAC;QAEF,iCAAiC;QACjC,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YACvB,wCAAwC;YACxC,SAAS,CAAC,OAAO,CAAC,CAAC,IAAI,MAAM,GAAG,GAAG,CAAC;YACpC,SAAS,CAAC,QAAQ,CAAC,CAAC,IAAI,MAAM,GAAG,GAAG,CAAC;YACrC,SAAS,CAAC,QAAQ,CAAC,CAAC,IAAI,MAAM,GAAG,GAAG,CAAC;YACrC,SAAS,CAAC,SAAS,CAAC,CAAC,IAAI,MAAM,GAAG,GAAG,CAAC;QACxC,CAAC;QAED,iCAAiC;QACjC,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,OAAO,CAAC,UAAC,EAAW;gBAAV,IAAI,QAAA,EAAE,GAAG,QAAA;YAC3C,SAAS,CAAC,IAAI,CAAC;gBACb,IAAI,EAAE,IAAW;gBACjB,CAAC,EAAE,GAAG,CAAC,CAAC;gBACR,CAAC,EAAE,GAAG,CAAC,CAAC;gBACR,KAAK,EAAE,IAAI;aACZ,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,+CAAwB,GAAxB,UAAyB,KAA2B;QAApD,iBAyCC;QAxCC,OAAO,KAAK,CAAC,GAAG,CAAC,UAAC,IAAI,EAAE,KAAK;YAC3B,IAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC;YAEjC,wCAAwC;YACxC,IAAM,EAAE,GAAG,SAAS,CAAC,GAAG,CAAC,UAAC,EAA0B,IAAK,OAAA,EAAE,CAAC,CAAC,EAAJ,CAAI,CAAC,CAAC;YAC/D,IAAM,EAAE,GAAG,SAAS,CAAC,GAAG,CAAC,UAAC,EAA0B,IAAK,OAAA,EAAE,CAAC,CAAC,EAAJ,CAAI,CAAC,CAAC;YAC/D,IAAM,IAAI,GAAG,IAAI,CAAC,GAAG,OAAR,IAAI,EAAQ,EAAE,CAAC,CAAC;YAC7B,IAAM,IAAI,GAAG,IAAI,CAAC,GAAG,OAAR,IAAI,EAAQ,EAAE,CAAC,CAAC;YAC7B,IAAM,IAAI,GAAG,IAAI,CAAC,GAAG,OAAR,IAAI,EAAQ,EAAE,CAAC,CAAC;YAC7B,IAAM,IAAI,GAAG,IAAI,CAAC,GAAG,OAAR,IAAI,EAAQ,EAAE,CAAC,CAAC;YAE7B,IAAM,WAAW,GAAG;gBAClB,CAAC,EAAE,IAAI;gBACP,CAAC,EAAE,IAAI;gBACP,KAAK,EAAE,IAAI,GAAG,IAAI;gBAClB,MAAM,EAAE,IAAI,GAAG,IAAI;aACpB,CAAC;YAEF,gCAAgC;YAChC,IAAM,YAAY,GAAG,KAAI,CAAC,0BAA0B,CAAC,SAAS,CAAC,CAAC;YAEhE,6BAA6B;YAC7B,IAAM,MAAM,GAAG,KAAI,CAAC,wBAAwB,CAAC,SAAS,CAAC,CAAC;YAExD,kCAAkC;YAClC,IAAM,kBAAkB,GAAG,SAAS,CAAC,GAAG,CAAC,UAAC,EAA0B,IAAK,OAAA,CAAC;gBACxE,IAAI,EAAE,EAAE,CAAC,IAAI,IAAI,SAAS;gBAC1B,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE;gBAC9B,KAAK,EAAE,EAAE,CAAC,KAAK,IAAI,CAAC;aACrB,CAAC,EAJuE,CAIvE,CAAC,CAAC;YAEJ,OAAO;gBACL,EAAE,EAAE,iBAAU,IAAI,CAAC,GAAG,EAAE,cAAI,KAAK,CAAE;gBACnC,WAAW,aAAA;gBACX,IAAI,EAAE,YAA4D;gBAClE,MAAM,QAAA;gBACN,UAAU,EAAE,IAAI,CAAC,KAAK,IAAI,GAAG;gBAC7B,SAAS,EAAE,kBAAkB;aAC9B,CAAC;QACJ,CAAC,CAAC,CAAC;IACL,CAAC;IAEO,iDAA0B,GAAlC,UAAmC,SAAmC;QACpE,qCAAqC;QACrC,IAAM,OAAO,GAAG,SAAS,CAAC,IAAI,CAAC,UAAC,EAAE,IAAK,OAAA,EAAE,CAAC,IAAI,KAAK,UAAU,EAAtB,CAAsB,CAAC,CAAC;QAC/D,IAAM,QAAQ,GAAG,SAAS,CAAC,IAAI,CAAC,UAAC,EAAE,IAAK,OAAA,EAAE,CAAC,IAAI,KAAK,WAAW,EAAvB,CAAuB,CAAC,CAAC;QACjE,IAAM,QAAQ,GAAG,SAAS,CAAC,IAAI,CAAC,UAAC,EAAE,IAAK,OAAA,EAAE,CAAC,IAAI,KAAK,WAAW,EAAvB,CAAuB,CAAC,CAAC;QACjE,IAAM,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,UAAC,EAAE,IAAK,OAAA,EAAE,CAAC,IAAI,KAAK,YAAY,EAAxB,CAAwB,CAAC,CAAC;QACnE,IAAM,YAAY,GAAG,SAAS,CAAC,IAAI,CAAC,UAAC,EAAE,IAAK,OAAA,EAAE,CAAC,IAAI,KAAK,eAAe,EAA3B,CAA2B,CAAC,CAAC;QACzE,IAAM,aAAa,GAAG,SAAS,CAAC,IAAI,CAAC,UAAC,EAAE,IAAK,OAAA,EAAE,CAAC,IAAI,KAAK,gBAAgB,EAA5B,CAA4B,CAAC,CAAC;QAE3E,IAAI,CAAC,OAAO,IAAI,CAAC,QAAQ,IAAI,CAAC,QAAQ,IAAI,CAAC,SAAS,EAAE,CAAC;YACrD,OAAO,SAAS,CAAC;QACnB,CAAC;QAED,IAAM,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAC1C,IAAM,KAAK,GAAG,CAAC,QAAQ,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAC7C,IAAM,SAAS,GACb,YAAY,IAAI,aAAa,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,GAAG,aAAa,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,GAAG,GAAG,CAAC;QAEtF,uEAAuE;QACvE,IAAI,IAAI,CAAC,GAAG,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;YACpC,OAAO,OAAO,CAAC;QACjB,CAAC;QAED,mDAAmD;QACnD,IAAI,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,KAAK,CAAC,GAAG,GAAG,EAAE,CAAC;YACjC,OAAO,SAAS,CAAC;QACnB,CAAC;QAED,4BAA4B;QAC5B,OAAO,UAAU,CAAC;IACpB,CAAC;IAEO,+CAAwB,GAAhC,UACE,SAAmC;QAEnC,IAAM,YAAY,GAAG,SAAS,CAAC,IAAI,CAAC,UAAC,EAAE,IAAK,OAAA,EAAE,CAAC,IAAI,KAAK,eAAe,EAA3B,CAA2B,CAAC,CAAC;QACzE,IAAM,aAAa,GAAG,SAAS,CAAC,IAAI,CAAC,UAAC,EAAE,IAAK,OAAA,EAAE,CAAC,IAAI,KAAK,gBAAgB,EAA5B,CAA4B,CAAC,CAAC;QAC3E,IAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,UAAC,EAAE,IAAK,OAAA,EAAE,CAAC,IAAI,KAAK,MAAM,EAAlB,CAAkB,CAAC,CAAC;QAExD,IAAI,CAAC,YAAY,IAAI,CAAC,aAAa,IAAI,CAAC,IAAI,EAAE,CAAC;YAC7C,OAAO,QAAQ,CAAC,CAAC,UAAU;QAC7B,CAAC;QAED,IAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;QACjE,IAAM,gBAAgB,GAAG,CAAC,YAAY,CAAC,CAAC,GAAG,aAAa,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAChE,IAAM,UAAU,GAAG,IAAI,CAAC,CAAC,GAAG,gBAAgB,CAAC;QAE7C,uDAAuD;QACvD,IAAI,aAAa,GAAG,EAAE,EAAE,CAAC;YACvB,OAAO,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC;QAC3C,CAAC;QAED,4EAA4E;QAC5E,IAAI,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,GAAG,aAAa,GAAG,GAAG,EAAE,CAAC;YAC/C,OAAO,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC;QAC3C,CAAC;QAED,sEAAsE;QACtE,IAAM,OAAO,GAAG,SAAS,CAAC,IAAI,CAAC,UAAC,EAAE,IAAK,OAAA,EAAE,CAAC,IAAI,KAAK,UAAU,EAAtB,CAAsB,CAAC,CAAC;QAC/D,IAAM,QAAQ,GAAG,SAAS,CAAC,IAAI,CAAC,UAAC,EAAE,IAAK,OAAA,EAAE,CAAC,IAAI,KAAK,WAAW,EAAvB,CAAuB,CAAC,CAAC;QAEjE,IAAI,OAAO,IAAI,QAAQ,IAAI,OAAO,CAAC,KAAK,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;YAC3D,IAAI,OAAO,CAAC,KAAK,GAAG,GAAG,IAAI,QAAQ,CAAC,KAAK,GAAG,GAAG,EAAE,CAAC;gBAChD,OAAO,QAAQ,CAAC;YAClB,CAAC;QACH,CAAC;QAED,OAAO,QAAQ,CAAC,CAAC,UAAU;IAC7B,CAAC;IAEK,8BAAO,GAAb;;;gBACE,IAAI,IAAI,CAAC,oBAAoB,EAAE,CAAC;oBAC9B,yEAAyE;oBACzE,IAAI,CAAC,oBAAoB,GAAG,IAAI,CAAC;gBACnC,CAAC;gBAED,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;oBACtB,IAAI,CAAC,YAAY,CAAC,OAAO,EAAE,CAAC;oBAC5B,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;gBAC3B,CAAC;gBAED,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;gBACzB,aAAM,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;;;;KAC/C;IACH,mBAAC;AAAD,CAAC,AAvfD,IAufC;AAvfY,oCAAY","sourcesContent":["// Vision models for object detection and pose estimation\nimport * as cocoSsd from '@tensorflow-models/coco-ssd';\nimport * as poseDetection from '@tensorflow-models/pose-detection';\nimport * as tf from '@tensorflow/tfjs-node';\n\nimport { logger, IAgentRuntime } from '@elizaos/core';\nimport { DetectedObject, PersonInfo } from './types';\nimport { Florence2Model } from './florence2-model';\n\n// Define types that are missing from types.ts\nexport interface VisionModelConfig {\n enableObjectDetection?: boolean;\n enablePoseDetection?: boolean;\n florence2?: {\n baseUrl: string;\n apiKey?: string;\n };\n vlm?: {\n model: string;\n temperature: number;\n maxTokens: number;\n };\n}\n\nexport type Pose = 'sitting' | 'standing' | 'lying' | 'walking' | 'unknown';\n\nexport interface PoseLandmark {\n name: string;\n x: number;\n y: number;\n score: number;\n}\n\nexport class VisionModels {\n private runtime: IAgentRuntime;\n private config: VisionModelConfig;\n private objectDetectionModel: cocoSsd.ObjectDetection | null = null;\n private poseDetector: poseDetection.PoseDetector | null = null;\n private initialized = false;\n private florence2Model: Florence2Model | null = null;\n private cocoSsdModel: any = null;\n private posenetModel: any = null;\n\n constructor(runtime: IAgentRuntime, config?: VisionModelConfig) {\n this.runtime = runtime;\n this.config = config || {\n florence2: {\n baseUrl: 'http://localhost:8000',\n apiKey: undefined,\n },\n vlm: {\n model: 'gpt-4o',\n temperature: 0.7,\n maxTokens: 500,\n },\n };\n }\n\n async initialize(config: VisionModelConfig): Promise<void> {\n if (this.initialized) {\n return;\n }\n\n this.config = config;\n logger.info('[VisionModels] Initializing vision models...');\n\n try {\n // Initialize TensorFlow.js backend\n await tf.ready();\n logger.info('[VisionModels] TensorFlow.js backend ready');\n\n // Load object detection model\n if (config.enableObjectDetection) {\n try {\n logger.info('[VisionModels] Loading COCO-SSD model...');\n this.objectDetectionModel = await cocoSsd.load({\n base: 'mobilenet_v2',\n });\n logger.info('[VisionModels] COCO-SSD model loaded');\n } catch (error) {\n logger.error('[VisionModels] Failed to load COCO-SSD model:', error);\n }\n }\n\n // Load pose detection model\n if (config.enablePoseDetection) {\n try {\n logger.info('[VisionModels] Loading PoseNet model...');\n const detectorConfig: poseDetection.PosenetModelConfig = {\n architecture: 'MobileNetV1',\n outputStride: 16,\n inputResolution: { width: 640, height: 480 },\n multiplier: 0.75,\n };\n\n this.poseDetector = await poseDetection.createDetector(\n poseDetection.SupportedModels.PoseNet,\n detectorConfig\n );\n logger.info('[VisionModels] PoseNet model loaded');\n } catch (error) {\n logger.error('[VisionModels] Failed to load PoseNet model:', error);\n }\n }\n\n this.initialized = true;\n logger.info('[VisionModels] Vision models initialized');\n } catch (error) {\n logger.error('[VisionModels] Initialization failed:', error);\n throw error;\n }\n }\n\n hasObjectDetection(): boolean {\n return this.objectDetectionModel !== null;\n }\n\n hasPoseDetection(): boolean {\n return this.poseDetector !== null;\n }\n\n async detectObjects(\n imageData: Buffer,\n width: number,\n height: number,\n description?: string\n ): Promise<DetectedObject[]> {\n if (!this.objectDetectionModel) {\n logger.warn('[VisionModels] Object detection model not loaded');\n return this.enhancedObjectDetection(description);\n }\n\n try {\n // Convert image data to tensor\n const imageTensor = tf.node.decodeImage(imageData, 3);\n\n // Ensure the tensor has the right shape [1, height, width, 3]\n const batched = imageTensor.expandDims(0);\n\n // Run detection\n const predictions = await this.objectDetectionModel.detect(batched as any);\n\n // Clean up tensors\n imageTensor.dispose();\n batched.dispose();\n\n // Convert predictions to our format\n const objects: DetectedObject[] = predictions.map((pred, idx) => ({\n id: `obj-${Date.now()}-${idx}`,\n type: pred.class,\n confidence: pred.score,\n boundingBox: {\n x: pred.bbox[0],\n y: pred.bbox[1],\n width: pred.bbox[2],\n height: pred.bbox[3],\n },\n }));\n\n logger.debug(`[VisionModels] Detected ${objects.length} objects`);\n return objects;\n } catch (error) {\n logger.error('[VisionModels] Object detection failed:', error);\n return this.enhancedObjectDetection(description);\n }\n }\n\n private enhancedObjectDetection(description?: string): DetectedObject[] {\n // Enhanced object detection based on scene description\n if (!description) {\n return [];\n }\n\n const objects: DetectedObject[] = [];\n\n // Extract objects from description using patterns\n const objectPatterns = [\n { pattern: /(\\d+)?\\s*(person|people|man|men|woman|women|child|children)/gi, type: 'person' },\n { pattern: /(\\d+)?\\s*(laptop|computer|monitor|screen|display)/gi, type: 'laptop' },\n { pattern: /(\\d+)?\\s*(phone|smartphone|mobile)/gi, type: 'cell phone' },\n { pattern: /(\\d+)?\\s*(book|notebook|journal)/gi, type: 'book' },\n { pattern: /(\\d+)?\\s*(cup|mug|glass|bottle)/gi, type: 'cup' },\n { pattern: /(\\d+)?\\s*(chair|seat|sofa|couch)/gi, type: 'chair' },\n { pattern: /(\\d+)?\\s*(table|desk)/gi, type: 'dining table' },\n { pattern: /(\\d+)?\\s*(car|vehicle|truck|bus)/gi, type: 'car' },\n { pattern: /(\\d+)?\\s*(dog|cat|pet|animal)/gi, type: 'animal' },\n { pattern: /(\\d+)?\\s*(plant|tree|flower)/gi, type: 'potted plant' },\n ];\n\n for (const { pattern, type } of objectPatterns) {\n const matches = Array.from(description.matchAll(pattern));\n for (const match of matches) {\n const count = match[1] ? parseInt(match[1], 10) : 1;\n for (let i = 0; i < count; i++) {\n objects.push({\n id: `obj-${type}-${Date.now()}-${i}`,\n type,\n confidence: 0.85, // High confidence since it's from VLM\n boundingBox: this.generatePlausibleBoundingBox(type, i, count),\n });\n }\n }\n }\n\n return objects;\n }\n\n private generatePlausibleBoundingBox(\n type: string,\n index: number,\n total: number\n ): {\n x: number;\n y: number;\n width: number;\n height: number;\n } {\n // Generate plausible bounding boxes based on object type and position\n const basePositions: Record<string, { width: number; height: number; y: number }> = {\n person: { width: 150, height: 300, y: 100 },\n laptop: { width: 200, height: 150, y: 250 },\n 'cell phone': { width: 50, height: 100, y: 300 },\n book: { width: 100, height: 150, y: 280 },\n cup: { width: 60, height: 80, y: 300 },\n chair: { width: 180, height: 200, y: 200 },\n 'dining table': { width: 400, height: 200, y: 250 },\n car: { width: 300, height: 200, y: 150 },\n animal: { width: 120, height: 100, y: 300 },\n 'potted plant': { width: 100, height: 150, y: 200 },\n };\n\n const base = basePositions[type] || { width: 100, height: 100, y: 200 };\n const spacing = 640 / (total + 1); // Distribute across frame width\n\n return {\n x: spacing * (index + 1) - base.width / 2,\n y: base.y + (Math.random() - 0.5) * 50,\n width: base.width + (Math.random() - 0.5) * 40,\n height: base.height + (Math.random() - 0.5) * 40,\n };\n }\n\n async detectPoses(\n imageData: Buffer,\n width: number,\n height: number,\n description?: string\n ): Promise<PersonInfo[]> {\n if (!this.poseDetector) {\n logger.warn('[VisionModels] Pose detection model not loaded');\n return this.enhancedPoseDetection(description);\n }\n\n try {\n // Convert image data to tensor\n const imageTensor = tf.node.decodeImage(imageData, 3);\n\n // Run pose detection\n const poses = await this.poseDetector.estimatePoses({\n data: new Uint8ClampedArray(imageTensor.dataSync()),\n width,\n height,\n } as any);\n\n // Clean up tensor\n imageTensor.dispose();\n\n // Convert poses to PersonInfo\n return this.convertPosesToPersonInfo(poses);\n } catch (error) {\n logger.error('[VisionModels] Pose detection failed:', error);\n return this.enhancedPoseDetection(description);\n }\n }\n\n private enhancedPoseDetection(description?: string): PersonInfo[] {\n // Enhanced pose detection based on scene description\n if (!description) {\n return [];\n }\n\n const people: PersonInfo[] = [];\n const descLower = description.toLowerCase();\n\n // Extract people count and descriptions\n const peopleMatch = description.match(\n /(\\d+)?\\s*(person|people|man|men|woman|women|child|children)/gi\n );\n if (!peopleMatch) {\n return [];\n }\n\n const count = peopleMatch[0].match(/\\d+/)?.[0]\n ? parseInt(peopleMatch[0].match(/\\d+/)![0], 10)\n : 1;\n\n // Analyze description for pose and facing information\n const poseKeywords = {\n standing: ['standing', 'stand', 'upright'],\n sitting: ['sitting', 'seated', 'sit', 'chair'],\n walking: ['walking', 'walk', 'moving'],\n lying: ['lying', 'laying', 'reclined'],\n };\n\n const facingKeywords = {\n camera: ['facing camera', 'looking at camera', 'facing forward', 'front view'],\n away: ['back to camera', 'facing away', 'back view'],\n left: ['facing left', 'profile left', 'left side'],\n right: ['facing right', 'profile right', 'right side'],\n };\n\n let detectedPose = 'standing';\n let detectedFacing = 'camera';\n\n // Detect pose\n for (const [pose, keywords] of Object.entries(poseKeywords)) {\n if (keywords.some((kw) => descLower.includes(kw))) {\n detectedPose = pose;\n break;\n }\n }\n\n // Detect facing\n for (const [facing, keywords] of Object.entries(facingKeywords)) {\n if (keywords.some((kw) => descLower.includes(kw))) {\n detectedFacing = facing;\n break;\n }\n }\n\n // Create PersonInfo for each detected person\n for (let i = 0; i < count; i++) {\n const boundingBox = this.generatePlausibleBoundingBox('person', i, count);\n\n people.push({\n id: `person-${Date.now()}-${i}`,\n boundingBox,\n pose: detectedPose as 'sitting' | 'standing' | 'lying' | 'unknown',\n facing: detectedFacing as 'camera' | 'away' | 'left' | 'right',\n confidence: 0.85,\n keypoints: this.generatePlausibleKeypoints(\n boundingBox,\n detectedPose as Pose,\n detectedFacing\n ).map((kp) => ({\n part: kp.name,\n position: { x: kp.x, y: kp.y },\n score: kp.score,\n })),\n });\n }\n\n return people;\n }\n\n private generatePlausibleKeypoints(\n boundingBox: { x: number; y: number; width: number; height: number },\n pose: Pose,\n _facing: string\n ): PoseLandmark[] {\n // Generate plausible keypoints based on pose and facing\n const { x, y, width, height } = boundingBox;\n const centerX = x + width / 2;\n\n const keypoints: PoseLandmark[] = [];\n\n // Basic keypoint positions relative to bounding box\n const positions = {\n nose: { x: centerX, y: y + height * 0.1 },\n leftEye: { x: centerX - width * 0.1, y: y + height * 0.08 },\n rightEye: { x: centerX + width * 0.1, y: y + height * 0.08 },\n leftEar: { x: centerX - width * 0.15, y: y + height * 0.1 },\n rightEar: { x: centerX + width * 0.15, y: y + height * 0.1 },\n leftShoulder: { x: centerX - width * 0.25, y: y + height * 0.25 },\n rightShoulder: { x: centerX + width * 0.25, y: y + height * 0.25 },\n leftElbow: { x: centerX - width * 0.3, y: y + height * 0.4 },\n rightElbow: { x: centerX + width * 0.3, y: y + height * 0.4 },\n leftWrist: { x: centerX - width * 0.25, y: y + height * 0.55 },\n rightWrist: { x: centerX + width * 0.25, y: y + height * 0.55 },\n leftHip: { x: centerX - width * 0.15, y: y + height * 0.5 },\n rightHip: { x: centerX + width * 0.15, y: y + height * 0.5 },\n leftKnee: { x: centerX - width * 0.15, y: y + height * 0.7 },\n rightKnee: { x: centerX + width * 0.15, y: y + height * 0.7 },\n leftAnkle: { x: centerX - width * 0.15, y: y + height * 0.9 },\n rightAnkle: { x: centerX + width * 0.15, y: y + height * 0.9 },\n };\n\n // Adjust positions based on pose\n if (pose === 'sitting') {\n // Lower hips and knees for sitting pose\n positions.leftHip.y += height * 0.1;\n positions.rightHip.y += height * 0.1;\n positions.leftKnee.y -= height * 0.1;\n positions.rightKnee.y -= height * 0.1;\n }\n\n // Convert to PoseLandmark format\n Object.entries(positions).forEach(([name, pos]) => {\n keypoints.push({\n name: name as any,\n x: pos.x,\n y: pos.y,\n score: 0.85,\n });\n });\n\n return keypoints;\n }\n\n convertPosesToPersonInfo(poses: poseDetection.Pose[]): PersonInfo[] {\n return poses.map((pose, index) => {\n const keypoints = pose.keypoints;\n\n // Calculate bounding box from keypoints\n const xs = keypoints.map((kp: poseDetection.Keypoint) => kp.x);\n const ys = keypoints.map((kp: poseDetection.Keypoint) => kp.y);\n const minX = Math.min(...xs);\n const maxX = Math.max(...xs);\n const minY = Math.min(...ys);\n const maxY = Math.max(...ys);\n\n const boundingBox = {\n x: minX,\n y: minY,\n width: maxX - minX,\n height: maxY - minY,\n };\n\n // Determine pose from keypoints\n const detectedPose = this.determinePoseFromKeypoints(keypoints);\n\n // Determine facing direction\n const facing = this.determineFacingDirection(keypoints);\n\n // Convert keypoints to our format\n const convertedKeypoints = keypoints.map((kp: poseDetection.Keypoint) => ({\n part: kp.name || 'unknown',\n position: { x: kp.x, y: kp.y },\n score: kp.score || 0,\n }));\n\n return {\n id: `person-${Date.now()}-${index}`,\n boundingBox,\n pose: detectedPose as 'sitting' | 'standing' | 'lying' | 'unknown',\n facing,\n confidence: pose.score || 0.5,\n keypoints: convertedKeypoints,\n };\n });\n }\n\n private determinePoseFromKeypoints(keypoints: poseDetection.Keypoint[]): Pose {\n // Simple heuristic to determine pose\n const leftHip = keypoints.find((kp) => kp.name === 'left_hip');\n const rightHip = keypoints.find((kp) => kp.name === 'right_hip');\n const leftKnee = keypoints.find((kp) => kp.name === 'left_knee');\n const rightKnee = keypoints.find((kp) => kp.name === 'right_knee');\n const leftShoulder = keypoints.find((kp) => kp.name === 'left_shoulder');\n const rightShoulder = keypoints.find((kp) => kp.name === 'right_shoulder');\n\n if (!leftHip || !rightHip || !leftKnee || !rightKnee) {\n return 'unknown';\n }\n\n const hipY = (leftHip.y + rightHip.y) / 2;\n const kneeY = (leftKnee.y + rightKnee.y) / 2;\n const shoulderY =\n leftShoulder && rightShoulder ? (leftShoulder.y + rightShoulder.y) / 2 : hipY - 100;\n\n // Check if person is lying down (shoulders and hips at similar height)\n if (Math.abs(shoulderY - hipY) < 50) {\n return 'lying';\n }\n\n // Check if person is sitting (knees close to hips)\n if (Math.abs(hipY - kneeY) < 100) {\n return 'sitting';\n }\n\n // Otherwise assume standing\n return 'standing';\n }\n\n private determineFacingDirection(\n keypoints: poseDetection.Keypoint[]\n ): 'camera' | 'away' | 'left' | 'right' {\n const leftShoulder = keypoints.find((kp) => kp.name === 'left_shoulder');\n const rightShoulder = keypoints.find((kp) => kp.name === 'right_shoulder');\n const nose = keypoints.find((kp) => kp.name === 'nose');\n\n if (!leftShoulder || !rightShoulder || !nose) {\n return 'camera'; // Default\n }\n\n const shoulderWidth = Math.abs(rightShoulder.x - leftShoulder.x);\n const shoulderMidpoint = (leftShoulder.x + rightShoulder.x) / 2;\n const noseOffset = nose.x - shoulderMidpoint;\n\n // If shoulders are narrow, person is likely in profile\n if (shoulderWidth < 50) {\n return noseOffset > 0 ? 'right' : 'left';\n }\n\n // If nose is significantly offset from shoulder midpoint, person is turning\n if (Math.abs(noseOffset) > shoulderWidth * 0.3) {\n return noseOffset > 0 ? 'right' : 'left';\n }\n\n // Check if both ears are visible (facing camera) or not (facing away)\n const leftEar = keypoints.find((kp) => kp.name === 'left_ear');\n const rightEar = keypoints.find((kp) => kp.name === 'right_ear');\n\n if (leftEar && rightEar && leftEar.score && rightEar.score) {\n if (leftEar.score > 0.5 && rightEar.score > 0.5) {\n return 'camera';\n }\n }\n\n return 'camera'; // Default\n }\n\n async dispose(): Promise<void> {\n if (this.objectDetectionModel) {\n // COCO-SSD doesn't have a dispose method, but we can clear the reference\n this.objectDetectionModel = null;\n }\n\n if (this.poseDetector) {\n this.poseDetector.dispose();\n this.poseDetector = null;\n }\n\n this.initialized = false;\n logger.info('[VisionModels] Models disposed');\n }\n}\n"]}
@@ -1,61 +0,0 @@
1
- import type { VisionConfig, ScreenCapture, EnhancedSceneDescription } from './types';
2
- interface WorkerStats {
3
- fps: number;
4
- frameCount: number;
5
- lastUpdate: number;
6
- }
7
- export declare class VisionWorkerManager {
8
- private config;
9
- private screenCaptureWorker;
10
- private florence2Worker;
11
- private ocrWorker;
12
- private screenBuffer;
13
- private florence2ResultsBuffer;
14
- private ocrResultsBuffer;
15
- private screenAtomicState;
16
- private screenDataView;
17
- private florence2ResultsView;
18
- private ocrResultsView;
19
- private readonly SCREEN_BUFFER_SIZE;
20
- private readonly FLORENCE2_RESULTS_SIZE;
21
- private readonly OCR_RESULTS_SIZE;
22
- private readonly FRAME_ID_INDEX;
23
- private readonly WIDTH_INDEX;
24
- private readonly HEIGHT_INDEX;
25
- private readonly DISPLAY_INDEX;
26
- private readonly TIMESTAMP_INDEX;
27
- private readonly DATA_OFFSET;
28
- private workerStats;
29
- private latestScreenCapture;
30
- private latestFlorence2Results;
31
- private latestOCRResult;
32
- private lastProcessedFrameId;
33
- private restartAttempts;
34
- private readonly MAX_RESTART_ATTEMPTS;
35
- constructor(config: VisionConfig);
36
- initialize(): Promise<void>;
37
- private startScreenCaptureWorker;
38
- private startFlorence2Worker;
39
- private startOCRWorker;
40
- private updateFlorence2Cache;
41
- private updateOCRCache;
42
- private readFlorence2Result;
43
- private readOCRResult;
44
- getLatestScreenCapture(): ScreenCapture | null;
45
- getLatestEnhancedScene(): EnhancedSceneDescription;
46
- private generateTiles;
47
- getWorkerStats(): Map<string, WorkerStats>;
48
- setDisplayIndex(index: number): Promise<void>;
49
- setTextRegions(regions: Array<{
50
- x: number;
51
- y: number;
52
- width: number;
53
- height: number;
54
- }>): Promise<void>;
55
- stop(): Promise<void>;
56
- private handleWorkerLog;
57
- private restartScreenCaptureWorker;
58
- private restartFlorence2Worker;
59
- private restartOCRWorker;
60
- }
61
- export {};