@elizaos/plugin-vision 1.2.1 → 2.0.0-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/build.config.ts +53 -53
- package/dist/index.js +6716 -67
- package/dist/index.js.map +33 -1
- package/dist/workers/florence2-worker.js +111763 -307
- package/dist/workers/florence2-worker.js.map +92 -1
- package/dist/workers/ocr-worker.js +119177 -339
- package/dist/workers/ocr-worker.js.map +137 -1
- package/dist/workers/screen-capture-worker.js +350 -418
- package/dist/workers/screen-capture-worker.js.map +11 -1
- package/package.json +15 -20
- package/README.md +0 -270
- package/dist/action.d.ts +0 -8
- package/dist/action.js +0 -1212
- package/dist/action.js.map +0 -1
- package/dist/audio-capture-stream.d.ts +0 -42
- package/dist/audio-capture-stream.js +0 -516
- package/dist/audio-capture-stream.js.map +0 -1
- package/dist/audio-capture.d.ts +0 -25
- package/dist/audio-capture.js +0 -412
- package/dist/audio-capture.js.map +0 -1
- package/dist/basic.test.d.ts +0 -1
- package/dist/basic.test.js +0 -97
- package/dist/basic.test.js.map +0 -1
- package/dist/config.d.ts +0 -73
- package/dist/config.js +0 -254
- package/dist/config.js.map +0 -1
- package/dist/entity-tracker.d.ts +0 -32
- package/dist/entity-tracker.js +0 -361
- package/dist/entity-tracker.js.map +0 -1
- package/dist/errors.d.ts +0 -67
- package/dist/errors.js +0 -395
- package/dist/errors.js.map +0 -1
- package/dist/face-recognition.d.ts +0 -31
- package/dist/face-recognition.js +0 -332
- package/dist/face-recognition.js.map +0 -1
- package/dist/florence2-local.d.ts +0 -25
- package/dist/florence2-local.js +0 -280
- package/dist/florence2-local.js.map +0 -1
- package/dist/florence2-model.d.ts +0 -36
- package/dist/florence2-model.js +0 -503
- package/dist/florence2-model.js.map +0 -1
- package/dist/index.d.ts +0 -3
- package/dist/ocr-service-real.d.ts +0 -32
- package/dist/ocr-service-real.js +0 -396
- package/dist/ocr-service-real.js.map +0 -1
- package/dist/ocr-service.d.ts +0 -28
- package/dist/ocr-service.js +0 -216
- package/dist/ocr-service.js.map +0 -1
- package/dist/provider.d.ts +0 -2
- package/dist/provider.js +0 -285
- package/dist/provider.js.map +0 -1
- package/dist/screen-capture.d.ts +0 -16
- package/dist/screen-capture.js +0 -302
- package/dist/screen-capture.js.map +0 -1
- package/dist/service.d.ts +0 -73
- package/dist/service.js +0 -1662
- package/dist/service.js.map +0 -1
- package/dist/tests/e2e/index.d.ts +0 -8
- package/dist/tests/e2e/index.js +0 -33
- package/dist/tests/e2e/index.js.map +0 -1
- package/dist/tests/e2e/run-local.d.ts +0 -2
- package/dist/tests/e2e/run-local.js +0 -166
- package/dist/tests/e2e/run-local.js.map +0 -1
- package/dist/tests/e2e/screen-vision.d.ts +0 -11
- package/dist/tests/e2e/screen-vision.js +0 -384
- package/dist/tests/e2e/screen-vision.js.map +0 -1
- package/dist/tests/e2e/vision-autonomy.d.ts +0 -11
- package/dist/tests/e2e/vision-autonomy.js +0 -375
- package/dist/tests/e2e/vision-autonomy.js.map +0 -1
- package/dist/tests/e2e/vision-basic.d.ts +0 -11
- package/dist/tests/e2e/vision-basic.js +0 -434
- package/dist/tests/e2e/vision-basic.js.map +0 -1
- package/dist/tests/e2e/vision-capture-log.d.ts +0 -11
- package/dist/tests/e2e/vision-capture-log.js +0 -302
- package/dist/tests/e2e/vision-capture-log.js.map +0 -1
- package/dist/tests/e2e/vision-runtime.d.ts +0 -11
- package/dist/tests/e2e/vision-runtime.js +0 -357
- package/dist/tests/e2e/vision-runtime.js.map +0 -1
- package/dist/tests/e2e/vision-worker-tests.d.ts +0 -11
- package/dist/tests/e2e/vision-worker-tests.js +0 -466
- package/dist/tests/e2e/vision-worker-tests.js.map +0 -1
- package/dist/tests/test-pattern-generator.d.ts +0 -40
- package/dist/tests/test-pattern-generator.js +0 -191
- package/dist/tests/test-pattern-generator.js.map +0 -1
- package/dist/tests.d.ts +0 -3
- package/dist/tests.js +0 -11
- package/dist/tests.js.map +0 -1
- package/dist/types.d.ts +0 -222
- package/dist/types.js +0 -16
- package/dist/types.js.map +0 -1
- package/dist/vision-models.d.ts +0 -47
- package/dist/vision-models.js +0 -501
- package/dist/vision-models.js.map +0 -1
- package/dist/vision-worker-manager.d.ts +0 -61
- package/dist/vision-worker-manager.js +0 -668
- package/dist/vision-worker-manager.js.map +0 -1
- package/dist/workers/florence2-worker-simple.d.ts +0 -13
- package/dist/workers/florence2-worker-simple.js +0 -121
- package/dist/workers/florence2-worker-simple.js.map +0 -1
- package/dist/workers/florence2-worker.d.ts +0 -1
- package/dist/workers/ocr-worker.d.ts +0 -1
- package/dist/workers/screen-capture-worker.d.ts +0 -1
- package/dist/workers/worker-logger.d.ts +0 -9
- package/dist/workers/worker-logger.js +0 -95
- package/dist/workers/worker-logger.js.map +0 -1
package/dist/service.js
DELETED
|
@@ -1,1662 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __extends = (this && this.__extends) || (function () {
|
|
3
|
-
var extendStatics = function (d, b) {
|
|
4
|
-
extendStatics = Object.setPrototypeOf ||
|
|
5
|
-
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
|
|
6
|
-
function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };
|
|
7
|
-
return extendStatics(d, b);
|
|
8
|
-
};
|
|
9
|
-
return function (d, b) {
|
|
10
|
-
if (typeof b !== "function" && b !== null)
|
|
11
|
-
throw new TypeError("Class extends value " + String(b) + " is not a constructor or null");
|
|
12
|
-
extendStatics(d, b);
|
|
13
|
-
function __() { this.constructor = d; }
|
|
14
|
-
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
|
|
15
|
-
};
|
|
16
|
-
})();
|
|
17
|
-
var __assign = (this && this.__assign) || function () {
|
|
18
|
-
__assign = Object.assign || function(t) {
|
|
19
|
-
for (var s, i = 1, n = arguments.length; i < n; i++) {
|
|
20
|
-
s = arguments[i];
|
|
21
|
-
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
|
|
22
|
-
t[p] = s[p];
|
|
23
|
-
}
|
|
24
|
-
return t;
|
|
25
|
-
};
|
|
26
|
-
return __assign.apply(this, arguments);
|
|
27
|
-
};
|
|
28
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
29
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
30
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
31
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
32
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
33
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
34
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
35
|
-
});
|
|
36
|
-
};
|
|
37
|
-
var __generator = (this && this.__generator) || function (thisArg, body) {
|
|
38
|
-
var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g = Object.create((typeof Iterator === "function" ? Iterator : Object).prototype);
|
|
39
|
-
return g.next = verb(0), g["throw"] = verb(1), g["return"] = verb(2), typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
|
|
40
|
-
function verb(n) { return function (v) { return step([n, v]); }; }
|
|
41
|
-
function step(op) {
|
|
42
|
-
if (f) throw new TypeError("Generator is already executing.");
|
|
43
|
-
while (g && (g = 0, op[0] && (_ = 0)), _) try {
|
|
44
|
-
if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
|
|
45
|
-
if (y = 0, t) op = [op[0] & 2, t.value];
|
|
46
|
-
switch (op[0]) {
|
|
47
|
-
case 0: case 1: t = op; break;
|
|
48
|
-
case 4: _.label++; return { value: op[1], done: false };
|
|
49
|
-
case 5: _.label++; y = op[1]; op = [0]; continue;
|
|
50
|
-
case 7: op = _.ops.pop(); _.trys.pop(); continue;
|
|
51
|
-
default:
|
|
52
|
-
if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
|
|
53
|
-
if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
|
|
54
|
-
if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
|
|
55
|
-
if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
|
|
56
|
-
if (t[2]) _.ops.pop();
|
|
57
|
-
_.trys.pop(); continue;
|
|
58
|
-
}
|
|
59
|
-
op = body.call(thisArg, _);
|
|
60
|
-
} catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
|
|
61
|
-
if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
|
|
62
|
-
}
|
|
63
|
-
};
|
|
64
|
-
var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
|
|
65
|
-
if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
|
|
66
|
-
if (ar || !(i in from)) {
|
|
67
|
-
if (!ar) ar = Array.prototype.slice.call(from, 0, i);
|
|
68
|
-
ar[i] = from[i];
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
return to.concat(ar || Array.prototype.slice.call(from));
|
|
72
|
-
};
|
|
73
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
74
|
-
exports.VisionService = void 0;
|
|
75
|
-
// Vision service for camera integration and scene analysis
|
|
76
|
-
var core_1 = require("@elizaos/core");
|
|
77
|
-
var child_process_1 = require("child_process");
|
|
78
|
-
var fs = require("fs/promises");
|
|
79
|
-
var path = require("path");
|
|
80
|
-
var sharp_1 = require("sharp");
|
|
81
|
-
var util_1 = require("util");
|
|
82
|
-
var audio_capture_1 = require("./audio-capture");
|
|
83
|
-
var audio_capture_stream_1 = require("./audio-capture-stream");
|
|
84
|
-
var entity_tracker_1 = require("./entity-tracker");
|
|
85
|
-
var face_recognition_1 = require("./face-recognition");
|
|
86
|
-
var florence2_model_1 = require("./florence2-model");
|
|
87
|
-
var ocr_service_1 = require("./ocr-service");
|
|
88
|
-
var screen_capture_1 = require("./screen-capture");
|
|
89
|
-
var types_1 = require("./types");
|
|
90
|
-
var vision_models_1 = require("./vision-models");
|
|
91
|
-
var vision_worker_manager_1 = require("./vision-worker-manager");
|
|
92
|
-
var execAsync = (0, util_1.promisify)(child_process_1.exec);
|
|
93
|
-
var VisionService = /** @class */ (function (_super) {
|
|
94
|
-
__extends(VisionService, _super);
|
|
95
|
-
function VisionService(runtime) {
|
|
96
|
-
var _this = _super.call(this, runtime) || this;
|
|
97
|
-
_this.capabilityDescription = 'Provides visual perception through camera integration and scene analysis.';
|
|
98
|
-
_this.camera = null;
|
|
99
|
-
_this.lastFrame = null;
|
|
100
|
-
_this.lastSceneDescription = null;
|
|
101
|
-
_this.frameProcessingInterval = null;
|
|
102
|
-
_this.screenProcessingInterval = null;
|
|
103
|
-
_this.isProcessing = false;
|
|
104
|
-
_this.isProcessingScreen = false;
|
|
105
|
-
_this.audioCapture = null;
|
|
106
|
-
_this.streamingAudioCapture = null;
|
|
107
|
-
_this.lastScreenCapture = null;
|
|
108
|
-
_this.lastEnhancedScene = null;
|
|
109
|
-
// Worker manager for high-FPS processing
|
|
110
|
-
_this.workerManager = null;
|
|
111
|
-
// Add tracking for last update times
|
|
112
|
-
_this.lastTfUpdateTime = 0;
|
|
113
|
-
_this.lastVlmUpdateTime = 0;
|
|
114
|
-
_this.lastTfDescription = '';
|
|
115
|
-
// Default configuration
|
|
116
|
-
_this.DEFAULT_CONFIG = {
|
|
117
|
-
pixelChangeThreshold: 50, // 50% change required for VLM update
|
|
118
|
-
updateInterval: 100, // Process frames every 100ms
|
|
119
|
-
enablePoseDetection: false,
|
|
120
|
-
enableObjectDetection: false,
|
|
121
|
-
tfUpdateInterval: 1000, // TensorFlow update every 1 second
|
|
122
|
-
vlmUpdateInterval: 10000, // VLM update every 10 seconds
|
|
123
|
-
tfChangeThreshold: 10, // 10% change triggers TF update
|
|
124
|
-
vlmChangeThreshold: 50, // 50% change triggers VLM update
|
|
125
|
-
visionMode: types_1.VisionMode.CAMERA, // Default to camera only
|
|
126
|
-
screenCaptureInterval: 2000, // Screen capture every 2 seconds
|
|
127
|
-
tileSize: 256,
|
|
128
|
-
tileProcessingOrder: 'priority',
|
|
129
|
-
ocrEnabled: true,
|
|
130
|
-
florence2Enabled: true,
|
|
131
|
-
};
|
|
132
|
-
// Load configuration from runtime settings
|
|
133
|
-
_this.visionConfig = _this.parseConfig(runtime);
|
|
134
|
-
// Initialize vision models
|
|
135
|
-
_this.visionModels = new vision_models_1.VisionModels(runtime);
|
|
136
|
-
// Initialize face recognition
|
|
137
|
-
_this.faceRecognition = new face_recognition_1.FaceRecognition();
|
|
138
|
-
// Initialize entity tracker
|
|
139
|
-
var worldId = runtime.getSetting('WORLD_ID') || 'default-world';
|
|
140
|
-
_this.entityTracker = new entity_tracker_1.EntityTracker(worldId);
|
|
141
|
-
// Initialize screen capture
|
|
142
|
-
_this.screenCapture = new screen_capture_1.ScreenCaptureService(_this.visionConfig);
|
|
143
|
-
// Initialize Florence-2
|
|
144
|
-
_this.florence2 = new florence2_model_1.Florence2Model();
|
|
145
|
-
// Initialize OCR service
|
|
146
|
-
_this.ocrService = new ocr_service_1.OCRService();
|
|
147
|
-
core_1.logger.info('[VisionService] Constructed with config:', _this.visionConfig);
|
|
148
|
-
return _this;
|
|
149
|
-
}
|
|
150
|
-
VisionService.prototype.parseConfig = function (runtime) {
|
|
151
|
-
return __assign(__assign({}, this.DEFAULT_CONFIG), { cameraName: runtime.getSetting('CAMERA_NAME') || runtime.getSetting('VISION_CAMERA_NAME'), pixelChangeThreshold: Number(runtime.getSetting('PIXEL_CHANGE_THRESHOLD') ||
|
|
152
|
-
runtime.getSetting('VISION_PIXEL_CHANGE_THRESHOLD')) || this.DEFAULT_CONFIG.pixelChangeThreshold, enableObjectDetection: runtime.getSetting('ENABLE_OBJECT_DETECTION') === 'true' ||
|
|
153
|
-
runtime.getSetting('VISION_ENABLE_OBJECT_DETECTION') === 'true', enablePoseDetection: runtime.getSetting('ENABLE_POSE_DETECTION') === 'true' ||
|
|
154
|
-
runtime.getSetting('VISION_ENABLE_POSE_DETECTION') === 'true', tfUpdateInterval: Number(runtime.getSetting('TF_UPDATE_INTERVAL') ||
|
|
155
|
-
runtime.getSetting('VISION_TF_UPDATE_INTERVAL')) || this.DEFAULT_CONFIG.tfUpdateInterval, vlmUpdateInterval: Number(runtime.getSetting('VLM_UPDATE_INTERVAL') ||
|
|
156
|
-
runtime.getSetting('VISION_VLM_UPDATE_INTERVAL')) || this.DEFAULT_CONFIG.vlmUpdateInterval, tfChangeThreshold: Number(runtime.getSetting('TF_CHANGE_THRESHOLD') ||
|
|
157
|
-
runtime.getSetting('VISION_TF_CHANGE_THRESHOLD')) || this.DEFAULT_CONFIG.tfChangeThreshold, vlmChangeThreshold: Number(runtime.getSetting('VLM_CHANGE_THRESHOLD') ||
|
|
158
|
-
runtime.getSetting('VISION_VLM_CHANGE_THRESHOLD')) || this.DEFAULT_CONFIG.vlmChangeThreshold, visionMode: runtime.getSetting('VISION_MODE') || this.DEFAULT_CONFIG.visionMode, screenCaptureInterval: Number(runtime.getSetting('SCREEN_CAPTURE_INTERVAL') ||
|
|
159
|
-
runtime.getSetting('VISION_SCREEN_CAPTURE_INTERVAL')) || this.DEFAULT_CONFIG.screenCaptureInterval, ocrEnabled: runtime.getSetting('OCR_ENABLED') === 'true' ||
|
|
160
|
-
runtime.getSetting('VISION_OCR_ENABLED') === 'true', florence2Enabled: runtime.getSetting('FLORENCE2_ENABLED') === 'true' ||
|
|
161
|
-
runtime.getSetting('VISION_FLORENCE2_ENABLED') === 'true' });
|
|
162
|
-
};
|
|
163
|
-
VisionService.start = function (runtime) {
|
|
164
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
165
|
-
var service;
|
|
166
|
-
return __generator(this, function (_a) {
|
|
167
|
-
switch (_a.label) {
|
|
168
|
-
case 0:
|
|
169
|
-
service = new VisionService(runtime);
|
|
170
|
-
return [4 /*yield*/, service.initialize()];
|
|
171
|
-
case 1:
|
|
172
|
-
_a.sent();
|
|
173
|
-
return [2 /*return*/, service];
|
|
174
|
-
}
|
|
175
|
-
});
|
|
176
|
-
});
|
|
177
|
-
};
|
|
178
|
-
VisionService.prototype.checkCameraTools = function () {
|
|
179
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
180
|
-
var platform, _error_1;
|
|
181
|
-
return __generator(this, function (_a) {
|
|
182
|
-
switch (_a.label) {
|
|
183
|
-
case 0:
|
|
184
|
-
platform = process.platform;
|
|
185
|
-
_a.label = 1;
|
|
186
|
-
case 1:
|
|
187
|
-
_a.trys.push([1, 8, , 9]);
|
|
188
|
-
if (!(platform === 'darwin')) return [3 /*break*/, 3];
|
|
189
|
-
// Check if imagesnap is installed
|
|
190
|
-
return [4 /*yield*/, execAsync('which imagesnap')];
|
|
191
|
-
case 2:
|
|
192
|
-
// Check if imagesnap is installed
|
|
193
|
-
_a.sent();
|
|
194
|
-
return [2 /*return*/, { available: true, tool: 'imagesnap' }];
|
|
195
|
-
case 3:
|
|
196
|
-
if (!(platform === 'linux')) return [3 /*break*/, 5];
|
|
197
|
-
// Check if fswebcam is installed
|
|
198
|
-
return [4 /*yield*/, execAsync('which fswebcam')];
|
|
199
|
-
case 4:
|
|
200
|
-
// Check if fswebcam is installed
|
|
201
|
-
_a.sent();
|
|
202
|
-
return [2 /*return*/, { available: true, tool: 'fswebcam' }];
|
|
203
|
-
case 5:
|
|
204
|
-
if (!(platform === 'win32')) return [3 /*break*/, 7];
|
|
205
|
-
// Check if ffmpeg is available
|
|
206
|
-
return [4 /*yield*/, execAsync('where ffmpeg')];
|
|
207
|
-
case 6:
|
|
208
|
-
// Check if ffmpeg is available
|
|
209
|
-
_a.sent();
|
|
210
|
-
return [2 /*return*/, { available: true, tool: 'ffmpeg' }];
|
|
211
|
-
case 7: return [2 /*return*/, { available: false, tool: 'none' }];
|
|
212
|
-
case 8:
|
|
213
|
-
_error_1 = _a.sent();
|
|
214
|
-
// Tool not found
|
|
215
|
-
return [2 /*return*/, { available: false, tool: 'none' }];
|
|
216
|
-
case 9: return [2 /*return*/];
|
|
217
|
-
}
|
|
218
|
-
});
|
|
219
|
-
});
|
|
220
|
-
};
|
|
221
|
-
VisionService.prototype.initialize = function () {
|
|
222
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
223
|
-
var useEnhancedModels, _tfError_1, error_1;
|
|
224
|
-
return __generator(this, function (_a) {
|
|
225
|
-
switch (_a.label) {
|
|
226
|
-
case 0:
|
|
227
|
-
_a.trys.push([0, 11, , 12]);
|
|
228
|
-
useEnhancedModels = this.visionConfig.enableObjectDetection || this.visionConfig.enablePoseDetection;
|
|
229
|
-
if (!useEnhancedModels) return [3 /*break*/, 5];
|
|
230
|
-
_a.label = 1;
|
|
231
|
-
case 1:
|
|
232
|
-
_a.trys.push([1, 3, , 5]);
|
|
233
|
-
// Try to initialize TensorFlow models first
|
|
234
|
-
return [4 /*yield*/, this.visionModels.initialize({
|
|
235
|
-
enableObjectDetection: this.visionConfig.enableObjectDetection || false,
|
|
236
|
-
enablePoseDetection: this.visionConfig.enablePoseDetection || false,
|
|
237
|
-
})];
|
|
238
|
-
case 2:
|
|
239
|
-
// Try to initialize TensorFlow models first
|
|
240
|
-
_a.sent();
|
|
241
|
-
core_1.logger.info('[VisionService] Using TensorFlow.js models for advanced detection');
|
|
242
|
-
return [3 /*break*/, 5];
|
|
243
|
-
case 3:
|
|
244
|
-
_tfError_1 = _a.sent();
|
|
245
|
-
core_1.logger.warn('[VisionService] TensorFlow.js not available, falling back to enhanced heuristics');
|
|
246
|
-
// Fall back to enhanced heuristics
|
|
247
|
-
return [4 /*yield*/, this.visionModels.initialize({
|
|
248
|
-
enableObjectDetection: this.visionConfig.enableObjectDetection || false,
|
|
249
|
-
enablePoseDetection: this.visionConfig.enablePoseDetection || false,
|
|
250
|
-
})];
|
|
251
|
-
case 4:
|
|
252
|
-
// Fall back to enhanced heuristics
|
|
253
|
-
_a.sent();
|
|
254
|
-
core_1.logger.info('[VisionService] Using enhanced heuristics for detection');
|
|
255
|
-
return [3 /*break*/, 5];
|
|
256
|
-
case 5:
|
|
257
|
-
if (!(this.visionConfig.visionMode === types_1.VisionMode.SCREEN ||
|
|
258
|
-
this.visionConfig.visionMode === types_1.VisionMode.BOTH)) return [3 /*break*/, 7];
|
|
259
|
-
return [4 /*yield*/, this.initializeScreenVision()];
|
|
260
|
-
case 6:
|
|
261
|
-
_a.sent();
|
|
262
|
-
_a.label = 7;
|
|
263
|
-
case 7:
|
|
264
|
-
if (!(this.visionConfig.visionMode === types_1.VisionMode.CAMERA ||
|
|
265
|
-
this.visionConfig.visionMode === types_1.VisionMode.BOTH)) return [3 /*break*/, 9];
|
|
266
|
-
return [4 /*yield*/, this.initializeCameraVision()];
|
|
267
|
-
case 8:
|
|
268
|
-
_a.sent();
|
|
269
|
-
_a.label = 9;
|
|
270
|
-
case 9:
|
|
271
|
-
// Initialize audio capture if enabled
|
|
272
|
-
return [4 /*yield*/, this.initializeAudioCapture()];
|
|
273
|
-
case 10:
|
|
274
|
-
// Initialize audio capture if enabled
|
|
275
|
-
_a.sent();
|
|
276
|
-
// Start processing based on mode
|
|
277
|
-
this.startProcessing();
|
|
278
|
-
return [3 /*break*/, 12];
|
|
279
|
-
case 11:
|
|
280
|
-
error_1 = _a.sent();
|
|
281
|
-
core_1.logger.error('[VisionService] Failed to initialize:', error_1);
|
|
282
|
-
return [3 /*break*/, 12];
|
|
283
|
-
case 12: return [2 /*return*/];
|
|
284
|
-
}
|
|
285
|
-
});
|
|
286
|
-
});
|
|
287
|
-
};
|
|
288
|
-
VisionService.prototype.initializeScreenVision = function () {
|
|
289
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
290
|
-
var useWorkers, screenInfo, error_2;
|
|
291
|
-
return __generator(this, function (_a) {
|
|
292
|
-
switch (_a.label) {
|
|
293
|
-
case 0:
|
|
294
|
-
_a.trys.push([0, 8, , 9]);
|
|
295
|
-
core_1.logger.info('[VisionService] Initializing screen vision...');
|
|
296
|
-
useWorkers = this.visionConfig.targetScreenFPS && this.visionConfig.targetScreenFPS > 10;
|
|
297
|
-
if (!useWorkers) return [3 /*break*/, 2];
|
|
298
|
-
// Initialize worker manager for high-FPS processing
|
|
299
|
-
core_1.logger.info('[VisionService] Initializing worker threads for high-FPS processing...');
|
|
300
|
-
this.workerManager = new vision_worker_manager_1.VisionWorkerManager(this.visionConfig);
|
|
301
|
-
return [4 /*yield*/, this.workerManager.initialize()];
|
|
302
|
-
case 1:
|
|
303
|
-
_a.sent();
|
|
304
|
-
core_1.logger.info('[VisionService] Worker threads initialized');
|
|
305
|
-
return [3 /*break*/, 6];
|
|
306
|
-
case 2:
|
|
307
|
-
if (!this.visionConfig.florence2Enabled) return [3 /*break*/, 4];
|
|
308
|
-
return [4 /*yield*/, this.florence2.initialize()];
|
|
309
|
-
case 3:
|
|
310
|
-
_a.sent();
|
|
311
|
-
_a.label = 4;
|
|
312
|
-
case 4:
|
|
313
|
-
if (!this.visionConfig.ocrEnabled) return [3 /*break*/, 6];
|
|
314
|
-
return [4 /*yield*/, this.ocrService.initialize()];
|
|
315
|
-
case 5:
|
|
316
|
-
_a.sent();
|
|
317
|
-
_a.label = 6;
|
|
318
|
-
case 6: return [4 /*yield*/, this.screenCapture.getScreenInfo()];
|
|
319
|
-
case 7:
|
|
320
|
-
screenInfo = _a.sent();
|
|
321
|
-
if (screenInfo) {
|
|
322
|
-
core_1.logger.info("[VisionService] Screen resolution: ".concat(screenInfo.width, "x").concat(screenInfo.height));
|
|
323
|
-
}
|
|
324
|
-
core_1.logger.info('[VisionService] Screen vision initialized');
|
|
325
|
-
return [3 /*break*/, 9];
|
|
326
|
-
case 8:
|
|
327
|
-
error_2 = _a.sent();
|
|
328
|
-
core_1.logger.error('[VisionService] Failed to initialize screen vision:', error_2);
|
|
329
|
-
return [3 /*break*/, 9];
|
|
330
|
-
case 9: return [2 /*return*/];
|
|
331
|
-
}
|
|
332
|
-
});
|
|
333
|
-
});
|
|
334
|
-
};
|
|
335
|
-
VisionService.prototype.initializeCameraVision = function () {
|
|
336
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
337
|
-
var toolCheck, platform, toolName, camera;
|
|
338
|
-
return __generator(this, function (_a) {
|
|
339
|
-
switch (_a.label) {
|
|
340
|
-
case 0: return [4 /*yield*/, this.checkCameraTools()];
|
|
341
|
-
case 1:
|
|
342
|
-
toolCheck = _a.sent();
|
|
343
|
-
if (!toolCheck.available) {
|
|
344
|
-
platform = process.platform;
|
|
345
|
-
toolName = platform === 'darwin' ? 'imagesnap' : platform === 'linux' ? 'fswebcam' : 'ffmpeg';
|
|
346
|
-
core_1.logger.warn("[VisionService] Camera capture tool '".concat(toolName, "' not found. Install it to enable camera functionality."));
|
|
347
|
-
core_1.logger.warn('[VisionService] For macOS: brew install imagesnap');
|
|
348
|
-
core_1.logger.warn('[VisionService] For Linux: sudo apt-get install fswebcam');
|
|
349
|
-
core_1.logger.warn('[VisionService] For Windows: Install ffmpeg and add to PATH');
|
|
350
|
-
return [2 /*return*/];
|
|
351
|
-
}
|
|
352
|
-
return [4 /*yield*/, this.findCamera()];
|
|
353
|
-
case 2:
|
|
354
|
-
camera = _a.sent();
|
|
355
|
-
if (camera) {
|
|
356
|
-
this.camera = camera;
|
|
357
|
-
core_1.logger.info("[VisionService] Connected to camera: ".concat(camera.name));
|
|
358
|
-
}
|
|
359
|
-
else {
|
|
360
|
-
core_1.logger.warn('[VisionService] No suitable camera found');
|
|
361
|
-
}
|
|
362
|
-
return [2 /*return*/];
|
|
363
|
-
}
|
|
364
|
-
});
|
|
365
|
-
});
|
|
366
|
-
};
|
|
367
|
-
VisionService.prototype.initializeAudioCapture = function () {
|
|
368
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
369
|
-
var enableMicrophone, useStreamingAudio, streamingConfig, audioConfig, error_3;
|
|
370
|
-
var _this = this;
|
|
371
|
-
return __generator(this, function (_a) {
|
|
372
|
-
switch (_a.label) {
|
|
373
|
-
case 0:
|
|
374
|
-
enableMicrophone = this.runtime.getSetting('ENABLE_MICROPHONE') === 'true';
|
|
375
|
-
useStreamingAudio = this.runtime.getSetting('USE_STREAMING_AUDIO') === 'true';
|
|
376
|
-
if (!enableMicrophone) {
|
|
377
|
-
core_1.logger.info('[VisionService] Microphone capture disabled');
|
|
378
|
-
return [2 /*return*/];
|
|
379
|
-
}
|
|
380
|
-
_a.label = 1;
|
|
381
|
-
case 1:
|
|
382
|
-
_a.trys.push([1, 6, , 7]);
|
|
383
|
-
if (!useStreamingAudio) return [3 /*break*/, 3];
|
|
384
|
-
streamingConfig = {
|
|
385
|
-
enabled: true,
|
|
386
|
-
sampleRate: 16000,
|
|
387
|
-
channels: 1,
|
|
388
|
-
vadThreshold: Number(this.runtime.getSetting('VAD_THRESHOLD')) || 0.01,
|
|
389
|
-
silenceTimeout: Number(this.runtime.getSetting('SILENCE_TIMEOUT')) || 1500,
|
|
390
|
-
responseDelay: Number(this.runtime.getSetting('RESPONSE_DELAY')) || 3000,
|
|
391
|
-
};
|
|
392
|
-
this.streamingAudioCapture = new audio_capture_stream_1.StreamingAudioCaptureService(this.runtime, streamingConfig);
|
|
393
|
-
// Set up event listeners
|
|
394
|
-
this.streamingAudioCapture.on('speechStart', function () {
|
|
395
|
-
core_1.logger.info('[VisionService] User started speaking');
|
|
396
|
-
});
|
|
397
|
-
this.streamingAudioCapture.on('speechEnd', function () {
|
|
398
|
-
core_1.logger.info('[VisionService] User stopped speaking');
|
|
399
|
-
});
|
|
400
|
-
this.streamingAudioCapture.on('transcription', function (data) {
|
|
401
|
-
core_1.logger.info("[VisionService] Transcription (".concat(data.isFinal ? 'final' : 'partial', "): ").concat(data.text));
|
|
402
|
-
});
|
|
403
|
-
this.streamingAudioCapture.on('utteranceComplete', function (text) { return __awaiter(_this, void 0, void 0, function () {
|
|
404
|
-
return __generator(this, function (_a) {
|
|
405
|
-
switch (_a.label) {
|
|
406
|
-
case 0:
|
|
407
|
-
core_1.logger.info('[VisionService] Processing complete utterance:', text);
|
|
408
|
-
// Store the transcription in memory for context
|
|
409
|
-
return [4 /*yield*/, this.storeAudioTranscription(text)];
|
|
410
|
-
case 1:
|
|
411
|
-
// Store the transcription in memory for context
|
|
412
|
-
_a.sent();
|
|
413
|
-
return [2 /*return*/];
|
|
414
|
-
}
|
|
415
|
-
});
|
|
416
|
-
}); });
|
|
417
|
-
return [4 /*yield*/, this.streamingAudioCapture.initialize()];
|
|
418
|
-
case 2:
|
|
419
|
-
_a.sent();
|
|
420
|
-
core_1.logger.info('[VisionService] Streaming audio capture initialized with VAD');
|
|
421
|
-
return [3 /*break*/, 5];
|
|
422
|
-
case 3:
|
|
423
|
-
audioConfig = {
|
|
424
|
-
enabled: true,
|
|
425
|
-
transcriptionInterval: Number(this.runtime.getSetting('TRANSCRIPTION_INTERVAL')) || 30000,
|
|
426
|
-
};
|
|
427
|
-
this.audioCapture = new audio_capture_1.AudioCaptureService(this.runtime, audioConfig);
|
|
428
|
-
return [4 /*yield*/, this.audioCapture.initialize()];
|
|
429
|
-
case 4:
|
|
430
|
-
_a.sent();
|
|
431
|
-
core_1.logger.info('[VisionService] Batch audio capture initialized');
|
|
432
|
-
_a.label = 5;
|
|
433
|
-
case 5: return [3 /*break*/, 7];
|
|
434
|
-
case 6:
|
|
435
|
-
error_3 = _a.sent();
|
|
436
|
-
core_1.logger.error('[VisionService] Failed to initialize audio capture:', error_3);
|
|
437
|
-
return [3 /*break*/, 7];
|
|
438
|
-
case 7: return [2 /*return*/];
|
|
439
|
-
}
|
|
440
|
-
});
|
|
441
|
-
});
|
|
442
|
-
};
|
|
443
|
-
VisionService.prototype.storeAudioTranscription = function (text) {
|
|
444
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
445
|
-
return __generator(this, function (_a) {
|
|
446
|
-
try {
|
|
447
|
-
// Store transcription in the current scene description
|
|
448
|
-
if (this.lastSceneDescription) {
|
|
449
|
-
this.lastSceneDescription.audioTranscription = text;
|
|
450
|
-
}
|
|
451
|
-
// You could also create a memory here if needed
|
|
452
|
-
core_1.logger.debug('[VisionService] Stored audio transcription in scene context');
|
|
453
|
-
}
|
|
454
|
-
catch (error) {
|
|
455
|
-
core_1.logger.error('[VisionService] Failed to store audio transcription:', error);
|
|
456
|
-
}
|
|
457
|
-
return [2 /*return*/];
|
|
458
|
-
});
|
|
459
|
-
});
|
|
460
|
-
};
|
|
461
|
-
VisionService.prototype.startProcessing = function () {
|
|
462
|
-
// Start camera processing if enabled
|
|
463
|
-
if ((this.visionConfig.visionMode === types_1.VisionMode.CAMERA ||
|
|
464
|
-
this.visionConfig.visionMode === types_1.VisionMode.BOTH) &&
|
|
465
|
-
this.camera) {
|
|
466
|
-
this.startFrameProcessing();
|
|
467
|
-
}
|
|
468
|
-
// Start screen processing if enabled
|
|
469
|
-
if (this.visionConfig.visionMode === types_1.VisionMode.SCREEN ||
|
|
470
|
-
this.visionConfig.visionMode === types_1.VisionMode.BOTH) {
|
|
471
|
-
this.startScreenProcessing();
|
|
472
|
-
}
|
|
473
|
-
};
|
|
474
|
-
VisionService.prototype.startFrameProcessing = function () {
|
|
475
|
-
var _this = this;
|
|
476
|
-
if (this.frameProcessingInterval) {
|
|
477
|
-
return;
|
|
478
|
-
}
|
|
479
|
-
this.frameProcessingInterval = setInterval(function () { return __awaiter(_this, void 0, void 0, function () {
|
|
480
|
-
var error_4;
|
|
481
|
-
return __generator(this, function (_a) {
|
|
482
|
-
switch (_a.label) {
|
|
483
|
-
case 0:
|
|
484
|
-
if (!(!this.isProcessing && this.camera)) return [3 /*break*/, 5];
|
|
485
|
-
this.isProcessing = true;
|
|
486
|
-
_a.label = 1;
|
|
487
|
-
case 1:
|
|
488
|
-
_a.trys.push([1, 3, , 4]);
|
|
489
|
-
return [4 /*yield*/, this.captureAndProcessFrame()];
|
|
490
|
-
case 2:
|
|
491
|
-
_a.sent();
|
|
492
|
-
return [3 /*break*/, 4];
|
|
493
|
-
case 3:
|
|
494
|
-
error_4 = _a.sent();
|
|
495
|
-
core_1.logger.error('[VisionService] Frame processing error:', error_4);
|
|
496
|
-
return [3 /*break*/, 4];
|
|
497
|
-
case 4:
|
|
498
|
-
this.isProcessing = false;
|
|
499
|
-
_a.label = 5;
|
|
500
|
-
case 5: return [2 /*return*/];
|
|
501
|
-
}
|
|
502
|
-
});
|
|
503
|
-
}); }, this.visionConfig.updateInterval || 100);
|
|
504
|
-
core_1.logger.debug('[VisionService] Started frame processing loop');
|
|
505
|
-
};
|
|
506
|
-
VisionService.prototype.captureAndProcessFrame = function () {
|
|
507
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
508
|
-
var frameData, frame, changePercentage, _a, error_5;
|
|
509
|
-
return __generator(this, function (_b) {
|
|
510
|
-
switch (_b.label) {
|
|
511
|
-
case 0:
|
|
512
|
-
if (!this.camera) {
|
|
513
|
-
return [2 /*return*/];
|
|
514
|
-
}
|
|
515
|
-
_b.label = 1;
|
|
516
|
-
case 1:
|
|
517
|
-
_b.trys.push([1, 8, , 9]);
|
|
518
|
-
return [4 /*yield*/, this.camera.capture()];
|
|
519
|
-
case 2:
|
|
520
|
-
frameData = _b.sent();
|
|
521
|
-
// Skip if no data
|
|
522
|
-
if (!frameData || frameData.length === 0) {
|
|
523
|
-
core_1.logger.debug('[VisionService] Camera returned empty frame, skipping');
|
|
524
|
-
return [2 /*return*/];
|
|
525
|
-
}
|
|
526
|
-
return [4 /*yield*/, this.processFrameData(frameData)];
|
|
527
|
-
case 3:
|
|
528
|
-
frame = _b.sent();
|
|
529
|
-
// Validate frame before processing
|
|
530
|
-
if (!frame || frame.width === 0 || frame.height === 0) {
|
|
531
|
-
core_1.logger.warn('[VisionService] Invalid frame dimensions, skipping');
|
|
532
|
-
return [2 /*return*/];
|
|
533
|
-
}
|
|
534
|
-
if (!this.lastFrame) return [3 /*break*/, 5];
|
|
535
|
-
return [4 /*yield*/, this.calculatePixelChange(this.lastFrame, frame)];
|
|
536
|
-
case 4:
|
|
537
|
-
_a = _b.sent();
|
|
538
|
-
return [3 /*break*/, 6];
|
|
539
|
-
case 5:
|
|
540
|
-
_a = 100;
|
|
541
|
-
_b.label = 6;
|
|
542
|
-
case 6:
|
|
543
|
-
changePercentage = _a;
|
|
544
|
-
// Update scene description if change is significant or enough time has passed
|
|
545
|
-
// Always call updateSceneDescription - it will decide what to update based on thresholds
|
|
546
|
-
return [4 /*yield*/, this.updateSceneDescription(frame, changePercentage)];
|
|
547
|
-
case 7:
|
|
548
|
-
// Update scene description if change is significant or enough time has passed
|
|
549
|
-
// Always call updateSceneDescription - it will decide what to update based on thresholds
|
|
550
|
-
_b.sent();
|
|
551
|
-
this.lastFrame = frame;
|
|
552
|
-
return [3 /*break*/, 9];
|
|
553
|
-
case 8:
|
|
554
|
-
error_5 = _b.sent();
|
|
555
|
-
core_1.logger.error('[VisionService] Error capturing frame:', error_5);
|
|
556
|
-
return [3 /*break*/, 9];
|
|
557
|
-
case 9: return [2 /*return*/];
|
|
558
|
-
}
|
|
559
|
-
});
|
|
560
|
-
});
|
|
561
|
-
};
|
|
562
|
-
VisionService.prototype.processFrameData = function (data) {
|
|
563
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
564
|
-
var image, metadata, rgbaBuffer;
|
|
565
|
-
return __generator(this, function (_a) {
|
|
566
|
-
switch (_a.label) {
|
|
567
|
-
case 0:
|
|
568
|
-
// Validate input data
|
|
569
|
-
if (!data || data.length === 0) {
|
|
570
|
-
throw new Error('Empty frame data received from camera');
|
|
571
|
-
}
|
|
572
|
-
image = (0, sharp_1.default)(data);
|
|
573
|
-
return [4 /*yield*/, image.metadata()];
|
|
574
|
-
case 1:
|
|
575
|
-
metadata = _a.sent();
|
|
576
|
-
// Validate metadata
|
|
577
|
-
if (!metadata.width || !metadata.height || metadata.width === 0 || metadata.height === 0) {
|
|
578
|
-
throw new Error("Invalid image dimensions: ".concat(metadata.width, "x").concat(metadata.height));
|
|
579
|
-
}
|
|
580
|
-
return [4 /*yield*/, image.ensureAlpha().raw().toBuffer()];
|
|
581
|
-
case 2:
|
|
582
|
-
rgbaBuffer = _a.sent();
|
|
583
|
-
return [2 /*return*/, {
|
|
584
|
-
timestamp: Date.now(),
|
|
585
|
-
width: metadata.width,
|
|
586
|
-
height: metadata.height,
|
|
587
|
-
data: rgbaBuffer,
|
|
588
|
-
format: 'rgba',
|
|
589
|
-
}];
|
|
590
|
-
}
|
|
591
|
-
});
|
|
592
|
-
});
|
|
593
|
-
};
|
|
594
|
-
VisionService.prototype.calculatePixelChange = function (frame1, frame2) {
|
|
595
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
596
|
-
var pixels1, pixels2, changedPixels, totalPixels, threshold, i, r1, g1, b1, r2, g2, b2, diff;
|
|
597
|
-
return __generator(this, function (_a) {
|
|
598
|
-
if (frame1.width !== frame2.width || frame1.height !== frame2.height) {
|
|
599
|
-
return [2 /*return*/, 100]; // Different dimensions = complete change
|
|
600
|
-
}
|
|
601
|
-
pixels1 = frame1.data;
|
|
602
|
-
pixels2 = frame2.data;
|
|
603
|
-
changedPixels = 0;
|
|
604
|
-
totalPixels = frame1.width * frame1.height;
|
|
605
|
-
threshold = 30;
|
|
606
|
-
for (i = 0; i < pixels1.length; i += 4) {
|
|
607
|
-
r1 = pixels1[i];
|
|
608
|
-
g1 = pixels1[i + 1];
|
|
609
|
-
b1 = pixels1[i + 2];
|
|
610
|
-
r2 = pixels2[i];
|
|
611
|
-
g2 = pixels2[i + 1];
|
|
612
|
-
b2 = pixels2[i + 2];
|
|
613
|
-
diff = Math.abs(r1 - r2) + Math.abs(g1 - g2) + Math.abs(b1 - b2);
|
|
614
|
-
if (diff > threshold) {
|
|
615
|
-
changedPixels++;
|
|
616
|
-
}
|
|
617
|
-
}
|
|
618
|
-
return [2 /*return*/, (changedPixels / totalPixels) * 100];
|
|
619
|
-
});
|
|
620
|
-
});
|
|
621
|
-
};
|
|
622
|
-
VisionService.prototype.updateSceneDescription = function (frame, changePercentage) {
|
|
623
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
624
|
-
var currentTime, jpegBuffer, base64Image, imageUrl, timeSinceVlmUpdate, shouldUpdateVlm, description, timeSinceTfUpdate, shouldUpdateTf, detectedObjects, people, poses, personObjects, faceProfiles, enableFaceRecognition, faces, _i, faces_1, face, faceBox, _a, people_1, person, overlap, match, profileId, faceError_1, _trackedEntities, objectSummary, _b, _c, _d, type, count, _e, people_2, person, error_6;
|
|
625
|
-
var _f, _g;
|
|
626
|
-
return __generator(this, function (_h) {
|
|
627
|
-
switch (_h.label) {
|
|
628
|
-
case 0:
|
|
629
|
-
_h.trys.push([0, 27, , 28]);
|
|
630
|
-
currentTime = Date.now();
|
|
631
|
-
return [4 /*yield*/, (0, sharp_1.default)(frame.data, {
|
|
632
|
-
raw: {
|
|
633
|
-
width: frame.width,
|
|
634
|
-
height: frame.height,
|
|
635
|
-
channels: 4,
|
|
636
|
-
},
|
|
637
|
-
})
|
|
638
|
-
.jpeg()
|
|
639
|
-
.toBuffer()];
|
|
640
|
-
case 1:
|
|
641
|
-
jpegBuffer = _h.sent();
|
|
642
|
-
base64Image = jpegBuffer.toString('base64');
|
|
643
|
-
imageUrl = "data:image/jpeg;base64,".concat(base64Image);
|
|
644
|
-
timeSinceVlmUpdate = currentTime - this.lastVlmUpdateTime;
|
|
645
|
-
shouldUpdateVlm = timeSinceVlmUpdate >= this.visionConfig.vlmUpdateInterval || // Time threshold
|
|
646
|
-
changePercentage >= this.visionConfig.vlmChangeThreshold;
|
|
647
|
-
description = this.lastTfDescription;
|
|
648
|
-
if (!shouldUpdateVlm) return [3 /*break*/, 3];
|
|
649
|
-
return [4 /*yield*/, this.describeSceneWithVLM(imageUrl)];
|
|
650
|
-
case 2:
|
|
651
|
-
// Use VLM to describe the scene
|
|
652
|
-
description = _h.sent();
|
|
653
|
-
this.lastVlmUpdateTime = currentTime;
|
|
654
|
-
this.lastTfDescription = description;
|
|
655
|
-
core_1.logger.debug("[VisionService] VLM updated: ".concat(timeSinceVlmUpdate, "ms since last update, ").concat(changePercentage.toFixed(1), "% change"));
|
|
656
|
-
_h.label = 3;
|
|
657
|
-
case 3:
|
|
658
|
-
timeSinceTfUpdate = currentTime - this.lastTfUpdateTime;
|
|
659
|
-
shouldUpdateTf = timeSinceTfUpdate >= this.visionConfig.tfUpdateInterval || // Time threshold
|
|
660
|
-
changePercentage >= this.visionConfig.tfChangeThreshold;
|
|
661
|
-
detectedObjects = [];
|
|
662
|
-
people = [];
|
|
663
|
-
if (!(shouldUpdateTf &&
|
|
664
|
-
(this.visionConfig.enableObjectDetection || this.visionConfig.enablePoseDetection))) return [3 /*break*/, 8];
|
|
665
|
-
this.lastTfUpdateTime = currentTime;
|
|
666
|
-
core_1.logger.debug("[VisionService] TF updating: ".concat(timeSinceTfUpdate, "ms since last update, ").concat(changePercentage.toFixed(1), "% change"));
|
|
667
|
-
if (!this.visionConfig.enableObjectDetection) return [3 /*break*/, 5];
|
|
668
|
-
if (!this.visionModels.hasObjectDetection()) return [3 /*break*/, 5];
|
|
669
|
-
return [4 /*yield*/, this.visionModels.detectObjects(frame.data, frame.width, frame.height)];
|
|
670
|
-
case 4:
|
|
671
|
-
detectedObjects = _h.sent();
|
|
672
|
-
core_1.logger.debug("[VisionService] VisionModels detected ".concat(detectedObjects.length, " objects"));
|
|
673
|
-
_h.label = 5;
|
|
674
|
-
case 5:
|
|
675
|
-
if (!this.visionConfig.enablePoseDetection) return [3 /*break*/, 7];
|
|
676
|
-
if (!this.visionModels.hasPoseDetection()) return [3 /*break*/, 7];
|
|
677
|
-
return [4 /*yield*/, this.visionModels.detectPoses(frame.data, frame.width, frame.height)];
|
|
678
|
-
case 6:
|
|
679
|
-
poses = _h.sent();
|
|
680
|
-
people = poses;
|
|
681
|
-
core_1.logger.debug("[VisionService] VisionModels detected ".concat(people.length, " people with poses"));
|
|
682
|
-
_h.label = 7;
|
|
683
|
-
case 7:
|
|
684
|
-
// If no people detected via pose but objects detected, check for person objects
|
|
685
|
-
if (people.length === 0 && detectedObjects.length > 0) {
|
|
686
|
-
personObjects = detectedObjects.filter(function (obj) { return obj.type === 'person'; });
|
|
687
|
-
people = personObjects.map(function (obj) { return ({
|
|
688
|
-
id: "person-".concat(obj.id),
|
|
689
|
-
pose: 'unknown',
|
|
690
|
-
facing: 'unknown',
|
|
691
|
-
confidence: obj.confidence,
|
|
692
|
-
boundingBox: obj.boundingBox,
|
|
693
|
-
}); });
|
|
694
|
-
}
|
|
695
|
-
return [3 /*break*/, 12];
|
|
696
|
-
case 8:
|
|
697
|
-
if (!(!shouldUpdateTf && this.lastSceneDescription)) return [3 /*break*/, 9];
|
|
698
|
-
// Reuse last detection results if not updating
|
|
699
|
-
detectedObjects = this.lastSceneDescription.objects;
|
|
700
|
-
people = this.lastSceneDescription.people;
|
|
701
|
-
return [3 /*break*/, 12];
|
|
702
|
-
case 9: return [4 /*yield*/, this.detectMotionObjects(frame)];
|
|
703
|
-
case 10:
|
|
704
|
-
// Fall back to motion-based detection
|
|
705
|
-
detectedObjects = _h.sent();
|
|
706
|
-
return [4 /*yield*/, this.detectPeopleFromMotion(frame, detectedObjects)];
|
|
707
|
-
case 11:
|
|
708
|
-
people = _h.sent();
|
|
709
|
-
_h.label = 12;
|
|
710
|
-
case 12:
|
|
711
|
-
faceProfiles = new Map();
|
|
712
|
-
enableFaceRecognition = this.runtime.getSetting('ENABLE_FACE_RECOGNITION') === 'true';
|
|
713
|
-
if (!(enableFaceRecognition && people.length > 0 && frame.width > 0 && frame.height > 0)) return [3 /*break*/, 25];
|
|
714
|
-
_h.label = 13;
|
|
715
|
-
case 13:
|
|
716
|
-
_h.trys.push([13, 24, , 25]);
|
|
717
|
-
// Validate frame data
|
|
718
|
-
if (!frame.data || frame.data.length === 0) {
|
|
719
|
-
core_1.logger.warn('[VisionService] Invalid frame data for face recognition');
|
|
720
|
-
return [2 /*return*/];
|
|
721
|
-
}
|
|
722
|
-
return [4 /*yield*/, this.faceRecognition.detectFaces(frame.data, frame.width, frame.height)];
|
|
723
|
-
case 14:
|
|
724
|
-
faces = _h.sent();
|
|
725
|
-
_i = 0, faces_1 = faces;
|
|
726
|
-
_h.label = 15;
|
|
727
|
-
case 15:
|
|
728
|
-
if (!(_i < faces_1.length)) return [3 /*break*/, 23];
|
|
729
|
-
face = faces_1[_i];
|
|
730
|
-
faceBox = face.detection.box;
|
|
731
|
-
_a = 0, people_1 = people;
|
|
732
|
-
_h.label = 16;
|
|
733
|
-
case 16:
|
|
734
|
-
if (!(_a < people_1.length)) return [3 /*break*/, 22];
|
|
735
|
-
person = people_1[_a];
|
|
736
|
-
overlap = this.calculateBoxOverlap(person.boundingBox, {
|
|
737
|
-
x: Math.round(faceBox.x),
|
|
738
|
-
y: Math.round(faceBox.y),
|
|
739
|
-
width: Math.round(faceBox.width),
|
|
740
|
-
height: Math.round(faceBox.height),
|
|
741
|
-
});
|
|
742
|
-
if (!(overlap > 0.5)) return [3 /*break*/, 21];
|
|
743
|
-
return [4 /*yield*/, this.faceRecognition.recognizeFace(face.descriptor)];
|
|
744
|
-
case 17:
|
|
745
|
-
match = _h.sent();
|
|
746
|
-
profileId = void 0;
|
|
747
|
-
if (!match) return [3 /*break*/, 18];
|
|
748
|
-
profileId = match.profileId;
|
|
749
|
-
core_1.logger.debug("[VisionService] Recognized face: ".concat(profileId, " (distance: ").concat(match.distance, ")"));
|
|
750
|
-
return [3 /*break*/, 20];
|
|
751
|
-
case 18: return [4 /*yield*/, this.faceRecognition.addOrUpdateFace(face.descriptor, {
|
|
752
|
-
attributes: {
|
|
753
|
-
age: (_f = face.ageGender) === null || _f === void 0 ? void 0 : _f.age.toString(),
|
|
754
|
-
gender: (_g = face.ageGender) === null || _g === void 0 ? void 0 : _g.gender,
|
|
755
|
-
emotion: face.expressions
|
|
756
|
-
? this.getDominantExpression(face.expressions)
|
|
757
|
-
: undefined,
|
|
758
|
-
},
|
|
759
|
-
})];
|
|
760
|
-
case 19:
|
|
761
|
-
// Register new face
|
|
762
|
-
profileId = _h.sent();
|
|
763
|
-
core_1.logger.info("[VisionService] New face registered: ".concat(profileId));
|
|
764
|
-
_h.label = 20;
|
|
765
|
-
case 20:
|
|
766
|
-
faceProfiles.set(person.id, profileId);
|
|
767
|
-
return [3 /*break*/, 22];
|
|
768
|
-
case 21:
|
|
769
|
-
_a++;
|
|
770
|
-
return [3 /*break*/, 16];
|
|
771
|
-
case 22:
|
|
772
|
-
_i++;
|
|
773
|
-
return [3 /*break*/, 15];
|
|
774
|
-
case 23: return [3 /*break*/, 25];
|
|
775
|
-
case 24:
|
|
776
|
-
faceError_1 = _h.sent();
|
|
777
|
-
core_1.logger.error('[VisionService] Face recognition error:', faceError_1);
|
|
778
|
-
return [3 /*break*/, 25];
|
|
779
|
-
case 25: return [4 /*yield*/, this.entityTracker.updateEntities(detectedObjects, people, faceProfiles, this.runtime)];
|
|
780
|
-
case 26:
|
|
781
|
-
_trackedEntities = _h.sent();
|
|
782
|
-
// Create scene description
|
|
783
|
-
this.lastSceneDescription = {
|
|
784
|
-
timestamp: frame.timestamp,
|
|
785
|
-
description: description,
|
|
786
|
-
objects: detectedObjects,
|
|
787
|
-
people: people,
|
|
788
|
-
sceneChanged: shouldUpdateVlm || shouldUpdateTf,
|
|
789
|
-
changePercentage: changePercentage,
|
|
790
|
-
};
|
|
791
|
-
// Enhanced logging
|
|
792
|
-
if (shouldUpdateVlm || shouldUpdateTf) {
|
|
793
|
-
core_1.logger.info('[VisionService] Scene Analysis Complete:');
|
|
794
|
-
core_1.logger.info(" VLM Description: ".concat(description.substring(0, 100), "..."));
|
|
795
|
-
core_1.logger.info(" Change: ".concat(changePercentage.toFixed(1), "%"));
|
|
796
|
-
core_1.logger.info(" Updates: ".concat(shouldUpdateVlm ? 'VLM' : '').concat(shouldUpdateVlm && shouldUpdateTf ? ' + ' : '').concat(shouldUpdateTf ? 'TF' : ''));
|
|
797
|
-
core_1.logger.info(" Detection Mode: ".concat(this.visionConfig.enableObjectDetection ? 'Advanced CV' : 'Motion-based'));
|
|
798
|
-
if (detectedObjects.length > 0) {
|
|
799
|
-
core_1.logger.info(" Objects: ".concat(detectedObjects.length, " detected"));
|
|
800
|
-
objectSummary = detectedObjects.reduce(function (acc, obj) {
|
|
801
|
-
acc[obj.type] = (acc[obj.type] || 0) + 1;
|
|
802
|
-
return acc;
|
|
803
|
-
}, {});
|
|
804
|
-
for (_b = 0, _c = Object.entries(objectSummary); _b < _c.length; _b++) {
|
|
805
|
-
_d = _c[_b], type = _d[0], count = _d[1];
|
|
806
|
-
core_1.logger.info(" - ".concat(count, " ").concat(type, "(s)"));
|
|
807
|
-
}
|
|
808
|
-
}
|
|
809
|
-
if (people.length > 0) {
|
|
810
|
-
core_1.logger.info(" People: ".concat(people.length, " detected"));
|
|
811
|
-
for (_e = 0, people_2 = people; _e < people_2.length; _e++) {
|
|
812
|
-
person = people_2[_e];
|
|
813
|
-
core_1.logger.info(" - Person: ".concat(person.pose, " pose, facing ").concat(person.facing, ", confidence: ").concat(person.confidence.toFixed(2)));
|
|
814
|
-
}
|
|
815
|
-
}
|
|
816
|
-
}
|
|
817
|
-
return [3 /*break*/, 28];
|
|
818
|
-
case 27:
|
|
819
|
-
error_6 = _h.sent();
|
|
820
|
-
core_1.logger.error('[VisionService] Failed to update scene description:', error_6);
|
|
821
|
-
return [3 /*break*/, 28];
|
|
822
|
-
case 28: return [2 /*return*/];
|
|
823
|
-
}
|
|
824
|
-
});
|
|
825
|
-
});
|
|
826
|
-
};
|
|
827
|
-
VisionService.prototype.describeSceneWithVLM = function (imageUrl) {
|
|
828
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
829
|
-
var base64Data, imageBuffer, result, florenceError_1, result, description, stringResult, modelError_1, _a, objects, people, description, poses, objectTypes, error_7;
|
|
830
|
-
return __generator(this, function (_b) {
|
|
831
|
-
switch (_b.label) {
|
|
832
|
-
case 0:
|
|
833
|
-
_b.trys.push([0, 8, , 9]);
|
|
834
|
-
if (!imageUrl.startsWith('data:image/')) return [3 /*break*/, 4];
|
|
835
|
-
base64Data = imageUrl.split(',')[1];
|
|
836
|
-
imageBuffer = Buffer.from(base64Data, 'base64');
|
|
837
|
-
if (!this.florence2.isInitialized()) return [3 /*break*/, 4];
|
|
838
|
-
_b.label = 1;
|
|
839
|
-
case 1:
|
|
840
|
-
_b.trys.push([1, 3, , 4]);
|
|
841
|
-
return [4 /*yield*/, this.florence2.analyzeImage(imageBuffer)];
|
|
842
|
-
case 2:
|
|
843
|
-
result = _b.sent();
|
|
844
|
-
if (result.caption) {
|
|
845
|
-
core_1.logger.debug('[VisionService] Florence-2 description:', result.caption);
|
|
846
|
-
return [2 /*return*/, result.caption];
|
|
847
|
-
}
|
|
848
|
-
return [3 /*break*/, 4];
|
|
849
|
-
case 3:
|
|
850
|
-
florenceError_1 = _b.sent();
|
|
851
|
-
core_1.logger.warn('[VisionService] Florence-2 analysis failed, falling back:', florenceError_1);
|
|
852
|
-
return [3 /*break*/, 4];
|
|
853
|
-
case 4:
|
|
854
|
-
_b.trys.push([4, 6, , 7]);
|
|
855
|
-
return [4 /*yield*/, this.runtime.useModel(core_1.ModelType.IMAGE_DESCRIPTION, imageUrl)];
|
|
856
|
-
case 5:
|
|
857
|
-
result = _b.sent();
|
|
858
|
-
if (result && typeof result === 'object' && 'description' in result) {
|
|
859
|
-
description = result.description;
|
|
860
|
-
// Check if we got the unhelpful default response
|
|
861
|
-
if (!description.includes("I'm unable to analyze images") &&
|
|
862
|
-
!description.includes("I can't analyze images")) {
|
|
863
|
-
return [2 /*return*/, description];
|
|
864
|
-
}
|
|
865
|
-
}
|
|
866
|
-
else if (typeof result === 'string') {
|
|
867
|
-
stringResult = result;
|
|
868
|
-
if (stringResult.length > 0 &&
|
|
869
|
-
!stringResult.includes("I'm unable to analyze images") &&
|
|
870
|
-
!stringResult.includes("I can't analyze images")) {
|
|
871
|
-
return [2 /*return*/, stringResult];
|
|
872
|
-
}
|
|
873
|
-
}
|
|
874
|
-
return [3 /*break*/, 7];
|
|
875
|
-
case 6:
|
|
876
|
-
modelError_1 = _b.sent();
|
|
877
|
-
core_1.logger.warn('[VisionService] Runtime IMAGE_DESCRIPTION model failed:', modelError_1);
|
|
878
|
-
return [3 /*break*/, 7];
|
|
879
|
-
case 7:
|
|
880
|
-
// If we got the unhelpful response or an error, provide a basic description based on detected objects
|
|
881
|
-
if (this.lastSceneDescription) {
|
|
882
|
-
_a = this.lastSceneDescription, objects = _a.objects, people = _a.people;
|
|
883
|
-
description = 'Scene contains';
|
|
884
|
-
if (people.length > 0) {
|
|
885
|
-
description += " ".concat(people.length, " person").concat(people.length > 1 ? 's' : '');
|
|
886
|
-
poses = people.map(function (p) { return p.pose; }).filter(function (p) { return p !== 'unknown'; });
|
|
887
|
-
if (poses.length > 0) {
|
|
888
|
-
description += " (".concat(poses.join(', '), ")");
|
|
889
|
-
}
|
|
890
|
-
}
|
|
891
|
-
if (objects.length > 0 && people.length > 0) {
|
|
892
|
-
description += ' and';
|
|
893
|
-
}
|
|
894
|
-
if (objects.length > 0) {
|
|
895
|
-
objectTypes = __spreadArray([], new Set(objects.map(function (o) { return o.type; })), true);
|
|
896
|
-
description += " ".concat(objectTypes.join(', '));
|
|
897
|
-
}
|
|
898
|
-
if (people.length === 0 && objects.length === 0) {
|
|
899
|
-
description = 'Scene appears to be empty or static';
|
|
900
|
-
}
|
|
901
|
-
return [2 /*return*/, description];
|
|
902
|
-
}
|
|
903
|
-
// Final fallback
|
|
904
|
-
return [2 /*return*/, 'Visual scene captured'];
|
|
905
|
-
case 8:
|
|
906
|
-
error_7 = _b.sent();
|
|
907
|
-
core_1.logger.error('[VisionService] VLM description failed:', error_7);
|
|
908
|
-
return [2 /*return*/, 'Unable to describe scene'];
|
|
909
|
-
case 9: return [2 /*return*/];
|
|
910
|
-
}
|
|
911
|
-
});
|
|
912
|
-
});
|
|
913
|
-
};
|
|
914
|
-
VisionService.prototype.detectMotionObjects = function (frame) {
|
|
915
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
916
|
-
var objects, blockSize, motionThreshold, y, x, blockMotion, pixelCount, by, bx, px, py, idx, r1, g1, b1, r2, g2, b2, diff, motionPercentage, merged, filtered;
|
|
917
|
-
return __generator(this, function (_a) {
|
|
918
|
-
if (!this.lastFrame) {
|
|
919
|
-
return [2 /*return*/, []];
|
|
920
|
-
}
|
|
921
|
-
objects = [];
|
|
922
|
-
blockSize = 64;
|
|
923
|
-
motionThreshold = 50;
|
|
924
|
-
// Divide frame into blocks and detect motion regions
|
|
925
|
-
for (y = 0; y < frame.height - blockSize; y += blockSize / 2) {
|
|
926
|
-
// Overlap blocks
|
|
927
|
-
for (x = 0; x < frame.width - blockSize; x += blockSize / 2) {
|
|
928
|
-
blockMotion = 0;
|
|
929
|
-
pixelCount = 0;
|
|
930
|
-
// Check motion in this block
|
|
931
|
-
for (by = 0; by < blockSize; by += 2) {
|
|
932
|
-
// Sample every other pixel for speed
|
|
933
|
-
for (bx = 0; bx < blockSize; bx += 2) {
|
|
934
|
-
px = x + bx;
|
|
935
|
-
py = y + by;
|
|
936
|
-
idx = (py * frame.width + px) * 4;
|
|
937
|
-
if (idx < frame.data.length && idx < this.lastFrame.data.length) {
|
|
938
|
-
r1 = frame.data[idx];
|
|
939
|
-
g1 = frame.data[idx + 1];
|
|
940
|
-
b1 = frame.data[idx + 2];
|
|
941
|
-
r2 = this.lastFrame.data[idx];
|
|
942
|
-
g2 = this.lastFrame.data[idx + 1];
|
|
943
|
-
b2 = this.lastFrame.data[idx + 2];
|
|
944
|
-
diff = Math.abs(r1 - r2) + Math.abs(g1 - g2) + Math.abs(b1 - b2);
|
|
945
|
-
if (diff > motionThreshold) {
|
|
946
|
-
blockMotion++;
|
|
947
|
-
}
|
|
948
|
-
pixelCount++;
|
|
949
|
-
}
|
|
950
|
-
}
|
|
951
|
-
}
|
|
952
|
-
motionPercentage = (blockMotion / pixelCount) * 100;
|
|
953
|
-
if (motionPercentage > 30) {
|
|
954
|
-
// 30% of sampled pixels show motion
|
|
955
|
-
objects.push({
|
|
956
|
-
id: "motion-".concat(x, "-").concat(y, "-").concat(frame.timestamp),
|
|
957
|
-
type: 'motion-object',
|
|
958
|
-
confidence: Math.min(motionPercentage / 100, 1),
|
|
959
|
-
boundingBox: {
|
|
960
|
-
x: x,
|
|
961
|
-
y: y,
|
|
962
|
-
width: blockSize,
|
|
963
|
-
height: blockSize,
|
|
964
|
-
},
|
|
965
|
-
});
|
|
966
|
-
}
|
|
967
|
-
}
|
|
968
|
-
}
|
|
969
|
-
merged = this.mergeAdjacentObjects(objects);
|
|
970
|
-
filtered = merged.filter(function (obj) {
|
|
971
|
-
var area = obj.boundingBox.width * obj.boundingBox.height;
|
|
972
|
-
return area > 2000; // Minimum area threshold
|
|
973
|
-
});
|
|
974
|
-
return [2 /*return*/, filtered];
|
|
975
|
-
});
|
|
976
|
-
});
|
|
977
|
-
};
|
|
978
|
-
VisionService.prototype.mergeAdjacentObjects = function (objects) {
|
|
979
|
-
if (objects.length === 0) {
|
|
980
|
-
return [];
|
|
981
|
-
}
|
|
982
|
-
var merged = [];
|
|
983
|
-
var used = new Set();
|
|
984
|
-
var mergeDistance = 80; // Distance to consider objects adjacent
|
|
985
|
-
for (var i = 0; i < objects.length; i++) {
|
|
986
|
-
if (used.has(i)) {
|
|
987
|
-
continue;
|
|
988
|
-
}
|
|
989
|
-
var current = objects[i];
|
|
990
|
-
var cluster = [current];
|
|
991
|
-
used.add(i);
|
|
992
|
-
// Find all adjacent objects
|
|
993
|
-
var foundNew = true;
|
|
994
|
-
while (foundNew) {
|
|
995
|
-
foundNew = false;
|
|
996
|
-
for (var j = 0; j < objects.length; j++) {
|
|
997
|
-
if (used.has(j)) {
|
|
998
|
-
continue;
|
|
999
|
-
}
|
|
1000
|
-
var other = objects[j];
|
|
1001
|
-
// Check if adjacent to any object in cluster
|
|
1002
|
-
for (var _i = 0, cluster_1 = cluster; _i < cluster_1.length; _i++) {
|
|
1003
|
-
var clusterObj = cluster_1[_i];
|
|
1004
|
-
var isAdjacent = Math.abs(clusterObj.boundingBox.x - other.boundingBox.x) <= mergeDistance &&
|
|
1005
|
-
Math.abs(clusterObj.boundingBox.y - other.boundingBox.y) <= mergeDistance;
|
|
1006
|
-
if (isAdjacent) {
|
|
1007
|
-
cluster.push(other);
|
|
1008
|
-
used.add(j);
|
|
1009
|
-
foundNew = true;
|
|
1010
|
-
break;
|
|
1011
|
-
}
|
|
1012
|
-
}
|
|
1013
|
-
}
|
|
1014
|
-
}
|
|
1015
|
-
// Merge cluster into single object
|
|
1016
|
-
if (cluster.length > 0) {
|
|
1017
|
-
var minX = Math.min.apply(Math, cluster.map(function (o) { return o.boundingBox.x; }));
|
|
1018
|
-
var minY = Math.min.apply(Math, cluster.map(function (o) { return o.boundingBox.y; }));
|
|
1019
|
-
var maxX = Math.max.apply(Math, cluster.map(function (o) { return o.boundingBox.x + o.boundingBox.width; }));
|
|
1020
|
-
var maxY = Math.max.apply(Math, cluster.map(function (o) { return o.boundingBox.y + o.boundingBox.height; }));
|
|
1021
|
-
var avgConfidence = cluster.reduce(function (sum, o) { return sum + o.confidence; }, 0) / cluster.length;
|
|
1022
|
-
merged.push({
|
|
1023
|
-
id: "merged-".concat(minX, "-").concat(minY, "-").concat(Date.now()),
|
|
1024
|
-
type: this.classifyObjectBySize(maxX - minX, maxY - minY),
|
|
1025
|
-
confidence: avgConfidence,
|
|
1026
|
-
boundingBox: {
|
|
1027
|
-
x: minX,
|
|
1028
|
-
y: minY,
|
|
1029
|
-
width: maxX - minX,
|
|
1030
|
-
height: maxY - minY,
|
|
1031
|
-
},
|
|
1032
|
-
});
|
|
1033
|
-
}
|
|
1034
|
-
}
|
|
1035
|
-
return merged;
|
|
1036
|
-
};
|
|
1037
|
-
VisionService.prototype.classifyObjectBySize = function (width, height) {
|
|
1038
|
-
var area = width * height;
|
|
1039
|
-
var aspectRatio = width / height;
|
|
1040
|
-
// Improved classification heuristics
|
|
1041
|
-
if (area > 30000 && aspectRatio > 0.4 && aspectRatio < 0.8) {
|
|
1042
|
-
return 'person-candidate';
|
|
1043
|
-
}
|
|
1044
|
-
else if (area > 20000) {
|
|
1045
|
-
return 'large-object';
|
|
1046
|
-
}
|
|
1047
|
-
else if (area > 8000) {
|
|
1048
|
-
return 'medium-object';
|
|
1049
|
-
}
|
|
1050
|
-
else {
|
|
1051
|
-
return 'small-object';
|
|
1052
|
-
}
|
|
1053
|
-
};
|
|
1054
|
-
VisionService.prototype.detectPeopleFromMotion = function (frame, objects) {
|
|
1055
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
1056
|
-
var people, personCandidates, i, candidate, box, aspectRatio, pose, facing;
|
|
1057
|
-
return __generator(this, function (_a) {
|
|
1058
|
-
people = [];
|
|
1059
|
-
personCandidates = objects.filter(function (o) { return o.type === 'person-candidate'; });
|
|
1060
|
-
for (i = 0; i < personCandidates.length; i++) {
|
|
1061
|
-
candidate = personCandidates[i];
|
|
1062
|
-
box = candidate.boundingBox;
|
|
1063
|
-
aspectRatio = box.width / box.height;
|
|
1064
|
-
pose = 'unknown';
|
|
1065
|
-
if (aspectRatio < 0.6) {
|
|
1066
|
-
pose = 'standing';
|
|
1067
|
-
}
|
|
1068
|
-
else if (aspectRatio > 1.2) {
|
|
1069
|
-
pose = 'lying';
|
|
1070
|
-
}
|
|
1071
|
-
else {
|
|
1072
|
-
pose = 'sitting';
|
|
1073
|
-
}
|
|
1074
|
-
facing = 'unknown';
|
|
1075
|
-
if (this.lastFrame) {
|
|
1076
|
-
// In Phase 1, we'll just use 'unknown' or random assignment
|
|
1077
|
-
// Phase 2 will implement proper pose detection
|
|
1078
|
-
facing = 'camera'; // Default assumption
|
|
1079
|
-
}
|
|
1080
|
-
people.push({
|
|
1081
|
-
id: "person-".concat(i, "-").concat(frame.timestamp),
|
|
1082
|
-
confidence: candidate.confidence,
|
|
1083
|
-
pose: pose,
|
|
1084
|
-
facing: facing,
|
|
1085
|
-
boundingBox: box,
|
|
1086
|
-
});
|
|
1087
|
-
}
|
|
1088
|
-
return [2 /*return*/, people];
|
|
1089
|
-
});
|
|
1090
|
-
});
|
|
1091
|
-
};
|
|
1092
|
-
VisionService.prototype.startScreenProcessing = function () {
|
|
1093
|
-
var _this = this;
|
|
1094
|
-
if (this.screenProcessingInterval) {
|
|
1095
|
-
return;
|
|
1096
|
-
}
|
|
1097
|
-
this.screenProcessingInterval = setInterval(function () { return __awaiter(_this, void 0, void 0, function () {
|
|
1098
|
-
var error_8;
|
|
1099
|
-
return __generator(this, function (_a) {
|
|
1100
|
-
switch (_a.label) {
|
|
1101
|
-
case 0:
|
|
1102
|
-
if (!!this.isProcessingScreen) return [3 /*break*/, 5];
|
|
1103
|
-
this.isProcessingScreen = true;
|
|
1104
|
-
_a.label = 1;
|
|
1105
|
-
case 1:
|
|
1106
|
-
_a.trys.push([1, 3, , 4]);
|
|
1107
|
-
return [4 /*yield*/, this.captureAndProcessScreen()];
|
|
1108
|
-
case 2:
|
|
1109
|
-
_a.sent();
|
|
1110
|
-
return [3 /*break*/, 4];
|
|
1111
|
-
case 3:
|
|
1112
|
-
error_8 = _a.sent();
|
|
1113
|
-
core_1.logger.error('[VisionService] Screen processing error:', error_8);
|
|
1114
|
-
return [3 /*break*/, 4];
|
|
1115
|
-
case 4:
|
|
1116
|
-
this.isProcessingScreen = false;
|
|
1117
|
-
_a.label = 5;
|
|
1118
|
-
case 5: return [2 /*return*/];
|
|
1119
|
-
}
|
|
1120
|
-
});
|
|
1121
|
-
}); }, this.visionConfig.screenCaptureInterval || 2000);
|
|
1122
|
-
core_1.logger.debug('[VisionService] Started screen processing loop');
|
|
1123
|
-
};
|
|
1124
|
-
VisionService.prototype.captureAndProcessScreen = function () {
|
|
1125
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
1126
|
-
var capture, activeTile, tileAnalysis, error_9;
|
|
1127
|
-
return __generator(this, function (_a) {
|
|
1128
|
-
switch (_a.label) {
|
|
1129
|
-
case 0:
|
|
1130
|
-
_a.trys.push([0, 5, , 6]);
|
|
1131
|
-
return [4 /*yield*/, this.screenCapture.captureScreen()];
|
|
1132
|
-
case 1:
|
|
1133
|
-
capture = _a.sent();
|
|
1134
|
-
this.lastScreenCapture = capture;
|
|
1135
|
-
activeTile = this.screenCapture.getActiveTile();
|
|
1136
|
-
if (!(activeTile && activeTile.data)) return [3 /*break*/, 3];
|
|
1137
|
-
return [4 /*yield*/, this.analyzeTile(activeTile)];
|
|
1138
|
-
case 2:
|
|
1139
|
-
tileAnalysis = _a.sent();
|
|
1140
|
-
activeTile.analysis = tileAnalysis;
|
|
1141
|
-
_a.label = 3;
|
|
1142
|
-
case 3:
|
|
1143
|
-
// Update enhanced scene description
|
|
1144
|
-
return [4 /*yield*/, this.updateEnhancedSceneDescription()];
|
|
1145
|
-
case 4:
|
|
1146
|
-
// Update enhanced scene description
|
|
1147
|
-
_a.sent();
|
|
1148
|
-
return [3 /*break*/, 6];
|
|
1149
|
-
case 5:
|
|
1150
|
-
error_9 = _a.sent();
|
|
1151
|
-
core_1.logger.error('[VisionService] Error capturing screen:', error_9);
|
|
1152
|
-
return [3 /*break*/, 6];
|
|
1153
|
-
case 6: return [2 /*return*/];
|
|
1154
|
-
}
|
|
1155
|
-
});
|
|
1156
|
-
});
|
|
1157
|
-
};
|
|
1158
|
-
VisionService.prototype.analyzeTile = function (tile) {
|
|
1159
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
1160
|
-
var analysis, _a, _b, error_10;
|
|
1161
|
-
var _c;
|
|
1162
|
-
return __generator(this, function (_d) {
|
|
1163
|
-
switch (_d.label) {
|
|
1164
|
-
case 0:
|
|
1165
|
-
analysis = {
|
|
1166
|
-
timestamp: Date.now(),
|
|
1167
|
-
};
|
|
1168
|
-
_d.label = 1;
|
|
1169
|
-
case 1:
|
|
1170
|
-
_d.trys.push([1, 6, , 7]);
|
|
1171
|
-
if (!(this.visionConfig.florence2Enabled && tile.data)) return [3 /*break*/, 3];
|
|
1172
|
-
_a = analysis;
|
|
1173
|
-
return [4 /*yield*/, this.florence2.analyzeTile(tile)];
|
|
1174
|
-
case 2:
|
|
1175
|
-
_a.florence2 = _d.sent();
|
|
1176
|
-
analysis.summary = analysis.florence2.caption;
|
|
1177
|
-
_d.label = 3;
|
|
1178
|
-
case 3:
|
|
1179
|
-
if (!(this.visionConfig.ocrEnabled && tile.data)) return [3 /*break*/, 5];
|
|
1180
|
-
_b = analysis;
|
|
1181
|
-
return [4 /*yield*/, this.ocrService.extractFromTile(tile)];
|
|
1182
|
-
case 4:
|
|
1183
|
-
_b.ocr = _d.sent();
|
|
1184
|
-
analysis.text = analysis.ocr.fullText;
|
|
1185
|
-
_d.label = 5;
|
|
1186
|
-
case 5:
|
|
1187
|
-
// Extract objects from Florence-2 results
|
|
1188
|
-
if ((_c = analysis.florence2) === null || _c === void 0 ? void 0 : _c.objects) {
|
|
1189
|
-
analysis.objects = analysis.florence2.objects.map(function (obj) { return ({
|
|
1190
|
-
id: "screen-obj-".concat(Date.now(), "-").concat(Math.random()),
|
|
1191
|
-
type: obj.label,
|
|
1192
|
-
confidence: obj.confidence,
|
|
1193
|
-
boundingBox: obj.bbox,
|
|
1194
|
-
}); });
|
|
1195
|
-
}
|
|
1196
|
-
return [3 /*break*/, 7];
|
|
1197
|
-
case 6:
|
|
1198
|
-
error_10 = _d.sent();
|
|
1199
|
-
core_1.logger.error('[VisionService] Error analyzing tile:', error_10);
|
|
1200
|
-
return [3 /*break*/, 7];
|
|
1201
|
-
case 7: return [2 /*return*/, analysis];
|
|
1202
|
-
}
|
|
1203
|
-
});
|
|
1204
|
-
});
|
|
1205
|
-
};
|
|
1206
|
-
VisionService.prototype.updateEnhancedSceneDescription = function () {
|
|
1207
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
1208
|
-
var enhancedScene, processedTiles, tilesWithContent, windows;
|
|
1209
|
-
var _a, _b, _c;
|
|
1210
|
-
return __generator(this, function (_d) {
|
|
1211
|
-
if (!this.lastScreenCapture) {
|
|
1212
|
-
return [2 /*return*/];
|
|
1213
|
-
}
|
|
1214
|
-
enhancedScene = __assign(__assign({}, (this.lastSceneDescription || {
|
|
1215
|
-
timestamp: Date.now(),
|
|
1216
|
-
description: '',
|
|
1217
|
-
objects: [],
|
|
1218
|
-
people: [],
|
|
1219
|
-
sceneChanged: false,
|
|
1220
|
-
changePercentage: 0,
|
|
1221
|
-
})), { screenCapture: this.lastScreenCapture, screenAnalysis: {
|
|
1222
|
-
fullScreenOCR: '',
|
|
1223
|
-
activeTile: (_a = this.screenCapture.getActiveTile()) === null || _a === void 0 ? void 0 : _a.analysis,
|
|
1224
|
-
gridSummary: '',
|
|
1225
|
-
focusedApp: '',
|
|
1226
|
-
uiElements: [],
|
|
1227
|
-
} });
|
|
1228
|
-
processedTiles = this.lastScreenCapture.tiles.filter(function (t) { var _a; return (_a = t.analysis) === null || _a === void 0 ? void 0 : _a.ocr; });
|
|
1229
|
-
if (processedTiles.length > 0) {
|
|
1230
|
-
enhancedScene.screenAnalysis.fullScreenOCR = processedTiles
|
|
1231
|
-
.map(function (t) { return t.analysis.ocr.fullText; })
|
|
1232
|
-
.join('\n');
|
|
1233
|
-
}
|
|
1234
|
-
// Generate grid summary
|
|
1235
|
-
if (this.lastScreenCapture.tiles.length > 0) {
|
|
1236
|
-
tilesWithContent = this.lastScreenCapture.tiles.filter(function (t) { return t.analysis; });
|
|
1237
|
-
enhancedScene.screenAnalysis.gridSummary = "Screen divided into ".concat(this.lastScreenCapture.tiles.length, " tiles, ").concat(tilesWithContent.length, " analyzed");
|
|
1238
|
-
}
|
|
1239
|
-
// Detect focused application (heuristic based on UI elements)
|
|
1240
|
-
if ((_c = (_b = enhancedScene.screenAnalysis.activeTile) === null || _b === void 0 ? void 0 : _b.florence2) === null || _c === void 0 ? void 0 : _c.objects) {
|
|
1241
|
-
windows = enhancedScene.screenAnalysis.activeTile.florence2.objects.filter(function (obj) { return obj.label === 'window'; });
|
|
1242
|
-
if (windows.length > 0) {
|
|
1243
|
-
enhancedScene.screenAnalysis.focusedApp = 'Desktop Application';
|
|
1244
|
-
}
|
|
1245
|
-
}
|
|
1246
|
-
this.lastEnhancedScene = enhancedScene;
|
|
1247
|
-
return [2 /*return*/];
|
|
1248
|
-
});
|
|
1249
|
-
});
|
|
1250
|
-
};
|
|
1251
|
-
// Public API methods
|
|
1252
|
-
VisionService.prototype.getCurrentFrame = function () {
|
|
1253
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
1254
|
-
return __generator(this, function (_a) {
|
|
1255
|
-
return [2 /*return*/, this.lastFrame];
|
|
1256
|
-
});
|
|
1257
|
-
});
|
|
1258
|
-
};
|
|
1259
|
-
VisionService.prototype.getSceneDescription = function () {
|
|
1260
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
1261
|
-
return __generator(this, function (_a) {
|
|
1262
|
-
return [2 /*return*/, this.lastSceneDescription];
|
|
1263
|
-
});
|
|
1264
|
-
});
|
|
1265
|
-
};
|
|
1266
|
-
VisionService.prototype.getEnhancedSceneDescription = function () {
|
|
1267
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
1268
|
-
return __generator(this, function (_a) {
|
|
1269
|
-
// If worker manager is available, use its high-FPS data
|
|
1270
|
-
if (this.workerManager) {
|
|
1271
|
-
return [2 /*return*/, this.workerManager.getLatestEnhancedScene()];
|
|
1272
|
-
}
|
|
1273
|
-
// Otherwise fall back to standard processing
|
|
1274
|
-
return [2 /*return*/, this.lastEnhancedScene || this.lastSceneDescription];
|
|
1275
|
-
});
|
|
1276
|
-
});
|
|
1277
|
-
};
|
|
1278
|
-
VisionService.prototype.getScreenCapture = function () {
|
|
1279
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
1280
|
-
return __generator(this, function (_a) {
|
|
1281
|
-
return [2 /*return*/, this.lastScreenCapture];
|
|
1282
|
-
});
|
|
1283
|
-
});
|
|
1284
|
-
};
|
|
1285
|
-
VisionService.prototype.getVisionMode = function () {
|
|
1286
|
-
return this.visionConfig.visionMode || types_1.VisionMode.CAMERA;
|
|
1287
|
-
};
|
|
1288
|
-
VisionService.prototype.setVisionMode = function (mode) {
|
|
1289
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
1290
|
-
return __generator(this, function (_a) {
|
|
1291
|
-
switch (_a.label) {
|
|
1292
|
-
case 0:
|
|
1293
|
-
core_1.logger.info("[VisionService] Changing vision mode from ".concat(this.visionConfig.visionMode, " to ").concat(mode));
|
|
1294
|
-
// Stop current processing
|
|
1295
|
-
this.stopProcessing();
|
|
1296
|
-
// Update configuration
|
|
1297
|
-
this.visionConfig.visionMode = mode;
|
|
1298
|
-
// Reinitialize based on new mode
|
|
1299
|
-
if (mode === types_1.VisionMode.OFF) {
|
|
1300
|
-
core_1.logger.info('[VisionService] Vision disabled');
|
|
1301
|
-
return [2 /*return*/];
|
|
1302
|
-
}
|
|
1303
|
-
if (!((mode === types_1.VisionMode.CAMERA || mode === types_1.VisionMode.BOTH) && !this.camera)) return [3 /*break*/, 2];
|
|
1304
|
-
return [4 /*yield*/, this.initializeCameraVision()];
|
|
1305
|
-
case 1:
|
|
1306
|
-
_a.sent();
|
|
1307
|
-
_a.label = 2;
|
|
1308
|
-
case 2:
|
|
1309
|
-
if (!((mode === types_1.VisionMode.SCREEN || mode === types_1.VisionMode.BOTH) &&
|
|
1310
|
-
(!this.florence2.isInitialized() || !this.ocrService.isInitialized()))) return [3 /*break*/, 4];
|
|
1311
|
-
return [4 /*yield*/, this.initializeScreenVision()];
|
|
1312
|
-
case 3:
|
|
1313
|
-
_a.sent();
|
|
1314
|
-
_a.label = 4;
|
|
1315
|
-
case 4:
|
|
1316
|
-
// Start processing for new mode
|
|
1317
|
-
this.startProcessing();
|
|
1318
|
-
return [2 /*return*/];
|
|
1319
|
-
}
|
|
1320
|
-
});
|
|
1321
|
-
});
|
|
1322
|
-
};
|
|
1323
|
-
VisionService.prototype.stopProcessing = function () {
|
|
1324
|
-
if (this.frameProcessingInterval) {
|
|
1325
|
-
clearInterval(this.frameProcessingInterval);
|
|
1326
|
-
this.frameProcessingInterval = null;
|
|
1327
|
-
}
|
|
1328
|
-
if (this.screenProcessingInterval) {
|
|
1329
|
-
clearInterval(this.screenProcessingInterval);
|
|
1330
|
-
this.screenProcessingInterval = null;
|
|
1331
|
-
}
|
|
1332
|
-
};
|
|
1333
|
-
VisionService.prototype.getCameraInfo = function () {
|
|
1334
|
-
if (!this.camera) {
|
|
1335
|
-
return null;
|
|
1336
|
-
}
|
|
1337
|
-
return {
|
|
1338
|
-
id: this.camera.id,
|
|
1339
|
-
name: this.camera.name,
|
|
1340
|
-
connected: true,
|
|
1341
|
-
};
|
|
1342
|
-
};
|
|
1343
|
-
VisionService.prototype.isActive = function () {
|
|
1344
|
-
return this.camera !== null && this.frameProcessingInterval !== null;
|
|
1345
|
-
};
|
|
1346
|
-
// Helper methods for face recognition
|
|
1347
|
-
VisionService.prototype.calculateBoxOverlap = function (box1, box2) {
|
|
1348
|
-
var x1 = Math.max(box1.x, box2.x);
|
|
1349
|
-
var y1 = Math.max(box1.y, box2.y);
|
|
1350
|
-
var x2 = Math.min(box1.x + box1.width, box2.x + box2.width);
|
|
1351
|
-
var y2 = Math.min(box1.y + box1.height, box2.y + box2.height);
|
|
1352
|
-
if (x2 < x1 || y2 < y1) {
|
|
1353
|
-
return 0;
|
|
1354
|
-
}
|
|
1355
|
-
var intersection = (x2 - x1) * (y2 - y1);
|
|
1356
|
-
var area1 = box1.width * box1.height;
|
|
1357
|
-
var area2 = box2.width * box2.height;
|
|
1358
|
-
var union = area1 + area2 - intersection;
|
|
1359
|
-
return intersection / union;
|
|
1360
|
-
};
|
|
1361
|
-
VisionService.prototype.getDominantExpression = function (expressions) {
|
|
1362
|
-
var maxValue = 0;
|
|
1363
|
-
var dominantExpression = 'neutral';
|
|
1364
|
-
for (var _i = 0, _a = Object.entries(expressions); _i < _a.length; _i++) {
|
|
1365
|
-
var _b = _a[_i], expression = _b[0], value = _b[1];
|
|
1366
|
-
if (typeof value === 'number' && value > maxValue) {
|
|
1367
|
-
maxValue = value;
|
|
1368
|
-
dominantExpression = expression;
|
|
1369
|
-
}
|
|
1370
|
-
}
|
|
1371
|
-
return dominantExpression;
|
|
1372
|
-
};
|
|
1373
|
-
// Public methods for entity tracking
|
|
1374
|
-
VisionService.prototype.getEntityTracker = function () {
|
|
1375
|
-
return this.entityTracker;
|
|
1376
|
-
};
|
|
1377
|
-
VisionService.prototype.getFaceRecognition = function () {
|
|
1378
|
-
return this.faceRecognition;
|
|
1379
|
-
};
|
|
1380
|
-
VisionService.prototype.stop = function () {
|
|
1381
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
1382
|
-
return __generator(this, function (_a) {
|
|
1383
|
-
switch (_a.label) {
|
|
1384
|
-
case 0:
|
|
1385
|
-
core_1.logger.info('[VisionService] Stopping vision service...');
|
|
1386
|
-
this.stopProcessing();
|
|
1387
|
-
if (!this.audioCapture) return [3 /*break*/, 2];
|
|
1388
|
-
return [4 /*yield*/, this.audioCapture.stop()];
|
|
1389
|
-
case 1:
|
|
1390
|
-
_a.sent();
|
|
1391
|
-
this.audioCapture = null;
|
|
1392
|
-
_a.label = 2;
|
|
1393
|
-
case 2:
|
|
1394
|
-
if (!this.streamingAudioCapture) return [3 /*break*/, 4];
|
|
1395
|
-
return [4 /*yield*/, this.streamingAudioCapture.stop()];
|
|
1396
|
-
case 3:
|
|
1397
|
-
_a.sent();
|
|
1398
|
-
this.streamingAudioCapture = null;
|
|
1399
|
-
_a.label = 4;
|
|
1400
|
-
case 4:
|
|
1401
|
-
if (!this.visionModels) return [3 /*break*/, 6];
|
|
1402
|
-
return [4 /*yield*/, this.visionModels.dispose()];
|
|
1403
|
-
case 5:
|
|
1404
|
-
_a.sent();
|
|
1405
|
-
_a.label = 6;
|
|
1406
|
-
case 6:
|
|
1407
|
-
if (!this.workerManager) return [3 /*break*/, 8];
|
|
1408
|
-
return [4 /*yield*/, this.workerManager.stop()];
|
|
1409
|
-
case 7:
|
|
1410
|
-
_a.sent();
|
|
1411
|
-
this.workerManager = null;
|
|
1412
|
-
_a.label = 8;
|
|
1413
|
-
case 8:
|
|
1414
|
-
this.camera = null;
|
|
1415
|
-
this.lastFrame = null;
|
|
1416
|
-
this.lastSceneDescription = null;
|
|
1417
|
-
this.lastScreenCapture = null;
|
|
1418
|
-
this.lastEnhancedScene = null;
|
|
1419
|
-
this.isProcessing = false;
|
|
1420
|
-
this.isProcessingScreen = false;
|
|
1421
|
-
// Dispose of models
|
|
1422
|
-
return [4 /*yield*/, this.florence2.dispose()];
|
|
1423
|
-
case 9:
|
|
1424
|
-
// Dispose of models
|
|
1425
|
-
_a.sent();
|
|
1426
|
-
return [4 /*yield*/, this.ocrService.dispose()];
|
|
1427
|
-
case 10:
|
|
1428
|
-
_a.sent();
|
|
1429
|
-
core_1.logger.info('[VisionService] Stopped.');
|
|
1430
|
-
return [2 /*return*/];
|
|
1431
|
-
}
|
|
1432
|
-
});
|
|
1433
|
-
});
|
|
1434
|
-
};
|
|
1435
|
-
VisionService.prototype.findCamera = function () {
|
|
1436
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
1437
|
-
var cameras, searchName_1, matchedCamera, error_11;
|
|
1438
|
-
return __generator(this, function (_a) {
|
|
1439
|
-
switch (_a.label) {
|
|
1440
|
-
case 0:
|
|
1441
|
-
_a.trys.push([0, 2, , 3]);
|
|
1442
|
-
return [4 /*yield*/, this.listCameras()];
|
|
1443
|
-
case 1:
|
|
1444
|
-
cameras = _a.sent();
|
|
1445
|
-
if (cameras.length === 0) {
|
|
1446
|
-
core_1.logger.warn('[VisionService] No cameras detected');
|
|
1447
|
-
return [2 /*return*/, null];
|
|
1448
|
-
}
|
|
1449
|
-
// If camera name is specified, try to find it
|
|
1450
|
-
if (this.visionConfig.cameraName) {
|
|
1451
|
-
searchName_1 = this.visionConfig.cameraName.toLowerCase();
|
|
1452
|
-
matchedCamera = cameras.find(function (cam) { return cam.name.toLowerCase().includes(searchName_1); });
|
|
1453
|
-
if (matchedCamera) {
|
|
1454
|
-
return [2 /*return*/, this.createCameraDevice(matchedCamera)];
|
|
1455
|
-
}
|
|
1456
|
-
core_1.logger.warn("[VisionService] Camera \"".concat(this.visionConfig.cameraName, "\" not found, using default"));
|
|
1457
|
-
}
|
|
1458
|
-
// Use first available camera
|
|
1459
|
-
return [2 /*return*/, this.createCameraDevice(cameras[0])];
|
|
1460
|
-
case 2:
|
|
1461
|
-
error_11 = _a.sent();
|
|
1462
|
-
core_1.logger.error('[VisionService] Error finding camera:', error_11);
|
|
1463
|
-
return [2 /*return*/, null];
|
|
1464
|
-
case 3: return [2 /*return*/];
|
|
1465
|
-
}
|
|
1466
|
-
});
|
|
1467
|
-
});
|
|
1468
|
-
};
|
|
1469
|
-
VisionService.prototype.listCameras = function () {
|
|
1470
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
1471
|
-
var platform, stdout, data, cameras, _i, _a, camera, stdout, cameras, lines, currentName, _b, lines_1, line, devicePath, id, stdout, devices, cameras, _c, devices_1, device, error_12;
|
|
1472
|
-
return __generator(this, function (_d) {
|
|
1473
|
-
switch (_d.label) {
|
|
1474
|
-
case 0:
|
|
1475
|
-
platform = process.platform;
|
|
1476
|
-
_d.label = 1;
|
|
1477
|
-
case 1:
|
|
1478
|
-
_d.trys.push([1, 8, , 9]);
|
|
1479
|
-
if (!(platform === 'darwin')) return [3 /*break*/, 3];
|
|
1480
|
-
return [4 /*yield*/, execAsync('system_profiler SPCameraDataType -json')];
|
|
1481
|
-
case 2:
|
|
1482
|
-
stdout = (_d.sent()).stdout;
|
|
1483
|
-
data = JSON.parse(stdout);
|
|
1484
|
-
cameras = [];
|
|
1485
|
-
if (data.SPCameraDataType && Array.isArray(data.SPCameraDataType)) {
|
|
1486
|
-
for (_i = 0, _a = data.SPCameraDataType; _i < _a.length; _i++) {
|
|
1487
|
-
camera = _a[_i];
|
|
1488
|
-
cameras.push({
|
|
1489
|
-
id: camera.unique_id || camera._name,
|
|
1490
|
-
name: camera._name,
|
|
1491
|
-
connected: true,
|
|
1492
|
-
});
|
|
1493
|
-
}
|
|
1494
|
-
}
|
|
1495
|
-
return [2 /*return*/, cameras];
|
|
1496
|
-
case 3:
|
|
1497
|
-
if (!(platform === 'linux')) return [3 /*break*/, 5];
|
|
1498
|
-
return [4 /*yield*/, execAsync('v4l2-ctl --list-devices')];
|
|
1499
|
-
case 4:
|
|
1500
|
-
stdout = (_d.sent()).stdout;
|
|
1501
|
-
cameras = [];
|
|
1502
|
-
lines = stdout.split('\n');
|
|
1503
|
-
currentName = '';
|
|
1504
|
-
for (_b = 0, lines_1 = lines; _b < lines_1.length; _b++) {
|
|
1505
|
-
line = lines_1[_b];
|
|
1506
|
-
if (line && !line.startsWith('\t')) {
|
|
1507
|
-
currentName = line.replace(':', '').trim();
|
|
1508
|
-
}
|
|
1509
|
-
else if (line.trim().startsWith('/dev/video')) {
|
|
1510
|
-
devicePath = line.trim();
|
|
1511
|
-
id = devicePath.replace('/dev/video', '');
|
|
1512
|
-
cameras.push({
|
|
1513
|
-
id: id,
|
|
1514
|
-
name: currentName,
|
|
1515
|
-
connected: true,
|
|
1516
|
-
});
|
|
1517
|
-
}
|
|
1518
|
-
}
|
|
1519
|
-
return [2 /*return*/, cameras];
|
|
1520
|
-
case 5:
|
|
1521
|
-
if (!(platform === 'win32')) return [3 /*break*/, 7];
|
|
1522
|
-
return [4 /*yield*/, execAsync('powershell -Command "Get-PnpDevice -Class Camera | Select-Object FriendlyName, InstanceId | ConvertTo-Json"')];
|
|
1523
|
-
case 6:
|
|
1524
|
-
stdout = (_d.sent()).stdout;
|
|
1525
|
-
devices = JSON.parse(stdout);
|
|
1526
|
-
cameras = [];
|
|
1527
|
-
if (Array.isArray(devices)) {
|
|
1528
|
-
for (_c = 0, devices_1 = devices; _c < devices_1.length; _c++) {
|
|
1529
|
-
device = devices_1[_c];
|
|
1530
|
-
cameras.push({
|
|
1531
|
-
id: device.InstanceId,
|
|
1532
|
-
name: device.FriendlyName,
|
|
1533
|
-
connected: true,
|
|
1534
|
-
});
|
|
1535
|
-
}
|
|
1536
|
-
}
|
|
1537
|
-
return [2 /*return*/, cameras];
|
|
1538
|
-
case 7: return [2 /*return*/, []];
|
|
1539
|
-
case 8:
|
|
1540
|
-
error_12 = _d.sent();
|
|
1541
|
-
core_1.logger.error('[VisionService] Error listing cameras:', error_12);
|
|
1542
|
-
return [2 /*return*/, []];
|
|
1543
|
-
case 9: return [2 /*return*/];
|
|
1544
|
-
}
|
|
1545
|
-
});
|
|
1546
|
-
});
|
|
1547
|
-
};
|
|
1548
|
-
VisionService.prototype.createCameraDevice = function (info) {
|
|
1549
|
-
var _this = this;
|
|
1550
|
-
var platform = process.platform;
|
|
1551
|
-
return {
|
|
1552
|
-
id: info.id,
|
|
1553
|
-
name: info.name,
|
|
1554
|
-
capture: function () { return __awaiter(_this, void 0, void 0, function () {
|
|
1555
|
-
var tempFile, error_13, error_14, error_15, imageBuffer, error_16;
|
|
1556
|
-
return __generator(this, function (_a) {
|
|
1557
|
-
switch (_a.label) {
|
|
1558
|
-
case 0:
|
|
1559
|
-
tempFile = path.join(process.cwd(), "temp_capture_".concat(Date.now(), ".jpg"));
|
|
1560
|
-
_a.label = 1;
|
|
1561
|
-
case 1:
|
|
1562
|
-
_a.trys.push([1, 20, , 22]);
|
|
1563
|
-
if (!(platform === 'darwin')) return [3 /*break*/, 6];
|
|
1564
|
-
_a.label = 2;
|
|
1565
|
-
case 2:
|
|
1566
|
-
_a.trys.push([2, 4, , 5]);
|
|
1567
|
-
return [4 /*yield*/, execAsync("imagesnap -d \"".concat(info.name, "\" \"").concat(tempFile, "\""))];
|
|
1568
|
-
case 3:
|
|
1569
|
-
_a.sent();
|
|
1570
|
-
return [3 /*break*/, 5];
|
|
1571
|
-
case 4:
|
|
1572
|
-
error_13 = _a.sent();
|
|
1573
|
-
if (error_13.message.includes('command not found')) {
|
|
1574
|
-
throw new Error('imagesnap not installed. Run: brew install imagesnap');
|
|
1575
|
-
}
|
|
1576
|
-
throw error_13;
|
|
1577
|
-
case 5: return [3 /*break*/, 17];
|
|
1578
|
-
case 6:
|
|
1579
|
-
if (!(platform === 'linux')) return [3 /*break*/, 11];
|
|
1580
|
-
_a.label = 7;
|
|
1581
|
-
case 7:
|
|
1582
|
-
_a.trys.push([7, 9, , 10]);
|
|
1583
|
-
return [4 /*yield*/, execAsync("fswebcam -d /dev/video".concat(info.id, " -r 1280x720 --jpeg 85 \"").concat(tempFile, "\""))];
|
|
1584
|
-
case 8:
|
|
1585
|
-
_a.sent();
|
|
1586
|
-
return [3 /*break*/, 10];
|
|
1587
|
-
case 9:
|
|
1588
|
-
error_14 = _a.sent();
|
|
1589
|
-
if (error_14.message.includes('command not found')) {
|
|
1590
|
-
throw new Error('fswebcam not installed. Run: sudo apt-get install fswebcam');
|
|
1591
|
-
}
|
|
1592
|
-
throw error_14;
|
|
1593
|
-
case 10: return [3 /*break*/, 17];
|
|
1594
|
-
case 11:
|
|
1595
|
-
if (!(platform === 'win32')) return [3 /*break*/, 16];
|
|
1596
|
-
_a.label = 12;
|
|
1597
|
-
case 12:
|
|
1598
|
-
_a.trys.push([12, 14, , 15]);
|
|
1599
|
-
return [4 /*yield*/, execAsync("ffmpeg -f dshow -i video=\"".concat(info.name, "\" -frames:v 1 -q:v 2 \"").concat(tempFile, "\" -y"))];
|
|
1600
|
-
case 13:
|
|
1601
|
-
_a.sent();
|
|
1602
|
-
return [3 /*break*/, 15];
|
|
1603
|
-
case 14:
|
|
1604
|
-
error_15 = _a.sent();
|
|
1605
|
-
if (error_15.message.includes('not recognized') || error_15.message.includes('not found')) {
|
|
1606
|
-
throw new Error('ffmpeg not installed. Download from ffmpeg.org and add to PATH');
|
|
1607
|
-
}
|
|
1608
|
-
throw error_15;
|
|
1609
|
-
case 15: return [3 /*break*/, 17];
|
|
1610
|
-
case 16: throw new Error("Unsupported platform: ".concat(platform));
|
|
1611
|
-
case 17: return [4 /*yield*/, fs.readFile(tempFile)];
|
|
1612
|
-
case 18:
|
|
1613
|
-
imageBuffer = _a.sent();
|
|
1614
|
-
// Clean up temp file
|
|
1615
|
-
return [4 /*yield*/, fs.unlink(tempFile).catch(function () { })];
|
|
1616
|
-
case 19:
|
|
1617
|
-
// Clean up temp file
|
|
1618
|
-
_a.sent();
|
|
1619
|
-
return [2 /*return*/, imageBuffer];
|
|
1620
|
-
case 20:
|
|
1621
|
-
error_16 = _a.sent();
|
|
1622
|
-
// Clean up temp file on error
|
|
1623
|
-
return [4 /*yield*/, fs.unlink(tempFile).catch(function () { })];
|
|
1624
|
-
case 21:
|
|
1625
|
-
// Clean up temp file on error
|
|
1626
|
-
_a.sent();
|
|
1627
|
-
throw error_16;
|
|
1628
|
-
case 22: return [2 /*return*/];
|
|
1629
|
-
}
|
|
1630
|
-
});
|
|
1631
|
-
}); },
|
|
1632
|
-
};
|
|
1633
|
-
};
|
|
1634
|
-
VisionService.prototype.captureImage = function () {
|
|
1635
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
1636
|
-
var error_17;
|
|
1637
|
-
return __generator(this, function (_a) {
|
|
1638
|
-
switch (_a.label) {
|
|
1639
|
-
case 0:
|
|
1640
|
-
if (!this.camera) {
|
|
1641
|
-
core_1.logger.warn('[VisionService] No camera available for capture');
|
|
1642
|
-
return [2 /*return*/, null];
|
|
1643
|
-
}
|
|
1644
|
-
_a.label = 1;
|
|
1645
|
-
case 1:
|
|
1646
|
-
_a.trys.push([1, 3, , 4]);
|
|
1647
|
-
return [4 /*yield*/, this.camera.capture()];
|
|
1648
|
-
case 2: return [2 /*return*/, _a.sent()];
|
|
1649
|
-
case 3:
|
|
1650
|
-
error_17 = _a.sent();
|
|
1651
|
-
core_1.logger.error('[VisionService] Failed to capture image:', error_17);
|
|
1652
|
-
return [2 /*return*/, null];
|
|
1653
|
-
case 4: return [2 /*return*/];
|
|
1654
|
-
}
|
|
1655
|
-
});
|
|
1656
|
-
});
|
|
1657
|
-
};
|
|
1658
|
-
VisionService.serviceType = types_1.VisionServiceType.VISION;
|
|
1659
|
-
return VisionService;
|
|
1660
|
-
}(core_1.Service));
|
|
1661
|
-
exports.VisionService = VisionService;
|
|
1662
|
-
//# sourceMappingURL=service.js.map
|