@elizaos/plugin-vision 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.npmignore +5 -0
- package/README.md +270 -0
- package/build.config.ts +70 -0
- package/dist/action.d.ts +8 -0
- package/dist/action.js +1212 -0
- package/dist/action.js.map +1 -0
- package/dist/audio-capture-stream.d.ts +42 -0
- package/dist/audio-capture-stream.js +516 -0
- package/dist/audio-capture-stream.js.map +1 -0
- package/dist/audio-capture.d.ts +25 -0
- package/dist/audio-capture.js +412 -0
- package/dist/audio-capture.js.map +1 -0
- package/dist/basic.test.d.ts +1 -0
- package/dist/basic.test.js +97 -0
- package/dist/basic.test.js.map +1 -0
- package/dist/config.d.ts +73 -0
- package/dist/config.js +254 -0
- package/dist/config.js.map +1 -0
- package/dist/entity-tracker.d.ts +32 -0
- package/dist/entity-tracker.js +361 -0
- package/dist/entity-tracker.js.map +1 -0
- package/dist/errors.d.ts +67 -0
- package/dist/errors.js +395 -0
- package/dist/errors.js.map +1 -0
- package/dist/face-recognition.d.ts +31 -0
- package/dist/face-recognition.js +332 -0
- package/dist/face-recognition.js.map +1 -0
- package/dist/florence2-local.d.ts +25 -0
- package/dist/florence2-local.js +280 -0
- package/dist/florence2-local.js.map +1 -0
- package/dist/florence2-model.d.ts +36 -0
- package/dist/florence2-model.js +503 -0
- package/dist/florence2-model.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.js +73 -0
- package/dist/index.js.map +1 -0
- package/dist/ocr-service-real.d.ts +32 -0
- package/dist/ocr-service-real.js +396 -0
- package/dist/ocr-service-real.js.map +1 -0
- package/dist/ocr-service.d.ts +28 -0
- package/dist/ocr-service.js +216 -0
- package/dist/ocr-service.js.map +1 -0
- package/dist/provider.d.ts +2 -0
- package/dist/provider.js +285 -0
- package/dist/provider.js.map +1 -0
- package/dist/screen-capture.d.ts +16 -0
- package/dist/screen-capture.js +302 -0
- package/dist/screen-capture.js.map +1 -0
- package/dist/service.d.ts +73 -0
- package/dist/service.js +1662 -0
- package/dist/service.js.map +1 -0
- package/dist/tests/e2e/index.d.ts +8 -0
- package/dist/tests/e2e/index.js +33 -0
- package/dist/tests/e2e/index.js.map +1 -0
- package/dist/tests/e2e/run-local.d.ts +2 -0
- package/dist/tests/e2e/run-local.js +166 -0
- package/dist/tests/e2e/run-local.js.map +1 -0
- package/dist/tests/e2e/screen-vision.d.ts +11 -0
- package/dist/tests/e2e/screen-vision.js +384 -0
- package/dist/tests/e2e/screen-vision.js.map +1 -0
- package/dist/tests/e2e/vision-autonomy.d.ts +11 -0
- package/dist/tests/e2e/vision-autonomy.js +375 -0
- package/dist/tests/e2e/vision-autonomy.js.map +1 -0
- package/dist/tests/e2e/vision-basic.d.ts +11 -0
- package/dist/tests/e2e/vision-basic.js +434 -0
- package/dist/tests/e2e/vision-basic.js.map +1 -0
- package/dist/tests/e2e/vision-capture-log.d.ts +11 -0
- package/dist/tests/e2e/vision-capture-log.js +302 -0
- package/dist/tests/e2e/vision-capture-log.js.map +1 -0
- package/dist/tests/e2e/vision-runtime.d.ts +11 -0
- package/dist/tests/e2e/vision-runtime.js +357 -0
- package/dist/tests/e2e/vision-runtime.js.map +1 -0
- package/dist/tests/e2e/vision-worker-tests.d.ts +11 -0
- package/dist/tests/e2e/vision-worker-tests.js +466 -0
- package/dist/tests/e2e/vision-worker-tests.js.map +1 -0
- package/dist/tests/test-pattern-generator.d.ts +40 -0
- package/dist/tests/test-pattern-generator.js +191 -0
- package/dist/tests/test-pattern-generator.js.map +1 -0
- package/dist/tests.d.ts +3 -0
- package/dist/tests.js +11 -0
- package/dist/tests.js.map +1 -0
- package/dist/types.d.ts +222 -0
- package/dist/types.js +16 -0
- package/dist/types.js.map +1 -0
- package/dist/vision-models.d.ts +47 -0
- package/dist/vision-models.js +501 -0
- package/dist/vision-models.js.map +1 -0
- package/dist/vision-worker-manager.d.ts +61 -0
- package/dist/vision-worker-manager.js +668 -0
- package/dist/vision-worker-manager.js.map +1 -0
- package/dist/workers/florence2-worker-simple.d.ts +13 -0
- package/dist/workers/florence2-worker-simple.js +121 -0
- package/dist/workers/florence2-worker-simple.js.map +1 -0
- package/dist/workers/florence2-worker.d.ts +1 -0
- package/dist/workers/florence2-worker.js +328 -0
- package/dist/workers/florence2-worker.js.map +1 -0
- package/dist/workers/ocr-worker.d.ts +1 -0
- package/dist/workers/ocr-worker.js +354 -0
- package/dist/workers/ocr-worker.js.map +1 -0
- package/dist/workers/screen-capture-worker.d.ts +1 -0
- package/dist/workers/screen-capture-worker.js +427 -0
- package/dist/workers/screen-capture-worker.js.map +1 -0
- package/dist/workers/worker-logger.d.ts +9 -0
- package/dist/workers/worker-logger.js +95 -0
- package/dist/workers/worker-logger.js.map +1 -0
- package/package.json +100 -0
package/dist/action.js
ADDED
|
@@ -0,0 +1,1212 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __generator = (this && this.__generator) || function (thisArg, body) {
|
|
12
|
+
var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g = Object.create((typeof Iterator === "function" ? Iterator : Object).prototype);
|
|
13
|
+
return g.next = verb(0), g["throw"] = verb(1), g["return"] = verb(2), typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
|
|
14
|
+
function verb(n) { return function (v) { return step([n, v]); }; }
|
|
15
|
+
function step(op) {
|
|
16
|
+
if (f) throw new TypeError("Generator is already executing.");
|
|
17
|
+
while (g && (g = 0, op[0] && (_ = 0)), _) try {
|
|
18
|
+
if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
|
|
19
|
+
if (y = 0, t) op = [op[0] & 2, t.value];
|
|
20
|
+
switch (op[0]) {
|
|
21
|
+
case 0: case 1: t = op; break;
|
|
22
|
+
case 4: _.label++; return { value: op[1], done: false };
|
|
23
|
+
case 5: _.label++; y = op[1]; op = [0]; continue;
|
|
24
|
+
case 7: op = _.ops.pop(); _.trys.pop(); continue;
|
|
25
|
+
default:
|
|
26
|
+
if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
|
|
27
|
+
if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
|
|
28
|
+
if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
|
|
29
|
+
if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
|
|
30
|
+
if (t[2]) _.ops.pop();
|
|
31
|
+
_.trys.pop(); continue;
|
|
32
|
+
}
|
|
33
|
+
op = body.call(thisArg, _);
|
|
34
|
+
} catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
|
|
35
|
+
if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
|
|
36
|
+
}
|
|
37
|
+
};
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.trackEntityAction = exports.identifyPersonAction = exports.nameEntityAction = exports.setVisionModeAction = exports.killAutonomousAction = exports.captureImageAction = exports.describeSceneAction = void 0;
|
|
40
|
+
// Vision actions for scene analysis and image capture
|
|
41
|
+
var core_1 = require("@elizaos/core");
|
|
42
|
+
var types_1 = require("./types");
|
|
43
|
+
// Helper function to save execution record to message feed
|
|
44
|
+
function saveExecutionRecord(runtime, messageContext, thought, text, actions, attachments) {
|
|
45
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
46
|
+
var memory;
|
|
47
|
+
return __generator(this, function (_a) {
|
|
48
|
+
switch (_a.label) {
|
|
49
|
+
case 0:
|
|
50
|
+
memory = {
|
|
51
|
+
id: (0, core_1.createUniqueUuid)(runtime, "vision-record-".concat(Date.now())),
|
|
52
|
+
content: {
|
|
53
|
+
text: text,
|
|
54
|
+
thought: thought,
|
|
55
|
+
actions: actions || ['VISION_ANALYSIS'],
|
|
56
|
+
attachments: attachments,
|
|
57
|
+
},
|
|
58
|
+
entityId: (0, core_1.createUniqueUuid)(runtime, runtime.agentId),
|
|
59
|
+
agentId: runtime.agentId,
|
|
60
|
+
roomId: messageContext.roomId,
|
|
61
|
+
worldId: messageContext.worldId,
|
|
62
|
+
createdAt: Date.now(),
|
|
63
|
+
};
|
|
64
|
+
return [4 /*yield*/, runtime.createMemory(memory, 'messages')];
|
|
65
|
+
case 1:
|
|
66
|
+
_a.sent();
|
|
67
|
+
return [2 /*return*/];
|
|
68
|
+
}
|
|
69
|
+
});
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
exports.describeSceneAction = {
|
|
73
|
+
name: 'DESCRIBE_SCENE',
|
|
74
|
+
similes: ['ANALYZE_SCENE', 'WHAT_DO_YOU_SEE', 'VISION_CHECK', 'LOOK_AROUND'],
|
|
75
|
+
description: 'Analyzes the current visual scene and provides a detailed description of what the agent sees through the camera. Returns scene analysis data including people count, objects, and camera info for action chaining.',
|
|
76
|
+
validate: function (runtime, _message, _state) { return __awaiter(void 0, void 0, void 0, function () {
|
|
77
|
+
var visionService;
|
|
78
|
+
return __generator(this, function (_a) {
|
|
79
|
+
visionService = runtime.getService('VISION');
|
|
80
|
+
return [2 /*return*/, !!visionService && visionService.isActive()];
|
|
81
|
+
});
|
|
82
|
+
}); },
|
|
83
|
+
handler: function (runtime, message, _state, _options, callback, _responses) { return __awaiter(void 0, void 0, void 0, function () {
|
|
84
|
+
var visionService, thought, text, scene, cameraInfo, thought_1, text_1, peopleCount, objectCount, timestamp, description, facingData, facingDescriptions, objectTypes, objectDescriptions, thought, text, error_1, thought, text;
|
|
85
|
+
return __generator(this, function (_a) {
|
|
86
|
+
switch (_a.label) {
|
|
87
|
+
case 0:
|
|
88
|
+
visionService = runtime.getService('VISION');
|
|
89
|
+
if (!(!visionService || !visionService.isActive())) return [3 /*break*/, 4];
|
|
90
|
+
thought = 'Vision service is not available or no camera is connected.';
|
|
91
|
+
text = 'I cannot see anything right now. No camera is available.';
|
|
92
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, text, ['DESCRIBE_SCENE'])];
|
|
93
|
+
case 1:
|
|
94
|
+
_a.sent();
|
|
95
|
+
if (!callback) return [3 /*break*/, 3];
|
|
96
|
+
return [4 /*yield*/, callback({
|
|
97
|
+
thought: thought,
|
|
98
|
+
text: text,
|
|
99
|
+
actions: ['DESCRIBE_SCENE'],
|
|
100
|
+
})];
|
|
101
|
+
case 2:
|
|
102
|
+
_a.sent();
|
|
103
|
+
_a.label = 3;
|
|
104
|
+
case 3: return [2 /*return*/, {
|
|
105
|
+
text: 'Vision service unavailable - cannot analyze scene',
|
|
106
|
+
values: {
|
|
107
|
+
success: false,
|
|
108
|
+
visionAvailable: false,
|
|
109
|
+
error: 'Vision service not available',
|
|
110
|
+
},
|
|
111
|
+
data: {
|
|
112
|
+
actionName: 'DESCRIBE_SCENE',
|
|
113
|
+
error: 'Vision service not available or no camera connected',
|
|
114
|
+
},
|
|
115
|
+
}];
|
|
116
|
+
case 4:
|
|
117
|
+
_a.trys.push([4, 13, , 17]);
|
|
118
|
+
return [4 /*yield*/, visionService.getSceneDescription()];
|
|
119
|
+
case 5:
|
|
120
|
+
scene = _a.sent();
|
|
121
|
+
cameraInfo = visionService.getCameraInfo();
|
|
122
|
+
if (!!scene) return [3 /*break*/, 9];
|
|
123
|
+
thought_1 = 'Camera is connected but no scene has been analyzed yet.';
|
|
124
|
+
text_1 = "Camera \"".concat(cameraInfo === null || cameraInfo === void 0 ? void 0 : cameraInfo.name, "\" is connected, but I haven't analyzed any scenes yet. Please wait a moment.");
|
|
125
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought_1, text_1, ['DESCRIBE_SCENE'])];
|
|
126
|
+
case 6:
|
|
127
|
+
_a.sent();
|
|
128
|
+
if (!callback) return [3 /*break*/, 8];
|
|
129
|
+
return [4 /*yield*/, callback({
|
|
130
|
+
thought: thought_1,
|
|
131
|
+
text: text_1,
|
|
132
|
+
actions: ['DESCRIBE_SCENE'],
|
|
133
|
+
})];
|
|
134
|
+
case 7:
|
|
135
|
+
_a.sent();
|
|
136
|
+
_a.label = 8;
|
|
137
|
+
case 8: return [2 /*return*/, {
|
|
138
|
+
text: 'Camera connected but no scene analyzed yet',
|
|
139
|
+
values: {
|
|
140
|
+
success: false,
|
|
141
|
+
visionAvailable: true,
|
|
142
|
+
sceneAnalyzed: false,
|
|
143
|
+
cameraName: cameraInfo === null || cameraInfo === void 0 ? void 0 : cameraInfo.name,
|
|
144
|
+
},
|
|
145
|
+
data: {
|
|
146
|
+
actionName: 'DESCRIBE_SCENE',
|
|
147
|
+
cameraInfo: cameraInfo,
|
|
148
|
+
sceneStatus: 'not_analyzed',
|
|
149
|
+
},
|
|
150
|
+
}];
|
|
151
|
+
case 9:
|
|
152
|
+
peopleCount = scene.people.length;
|
|
153
|
+
objectCount = scene.objects.length;
|
|
154
|
+
timestamp = new Date(scene.timestamp).toLocaleString();
|
|
155
|
+
description = "Looking through ".concat((cameraInfo === null || cameraInfo === void 0 ? void 0 : cameraInfo.name) || 'the camera', ", ");
|
|
156
|
+
description += scene.description;
|
|
157
|
+
if (peopleCount > 0) {
|
|
158
|
+
description += "\n\nI can see ".concat(peopleCount, " ").concat(peopleCount === 1 ? 'person' : 'people');
|
|
159
|
+
facingData = scene.people.reduce(function (acc, person) {
|
|
160
|
+
if (person.facing && person.facing !== 'unknown') {
|
|
161
|
+
acc[person.facing] = (acc[person.facing] || 0) + 1;
|
|
162
|
+
}
|
|
163
|
+
return acc;
|
|
164
|
+
}, {});
|
|
165
|
+
if (Object.keys(facingData).length > 0) {
|
|
166
|
+
facingDescriptions = Object.entries(facingData).map(function (_a) {
|
|
167
|
+
var direction = _a[0], count = _a[1];
|
|
168
|
+
return "".concat(count, " facing ").concat(direction);
|
|
169
|
+
});
|
|
170
|
+
description += " (".concat(facingDescriptions.join(', '), ")");
|
|
171
|
+
}
|
|
172
|
+
description += '.';
|
|
173
|
+
}
|
|
174
|
+
if (objectCount > 0) {
|
|
175
|
+
objectTypes = scene.objects.reduce(function (acc, obj) {
|
|
176
|
+
acc[obj.type] = (acc[obj.type] || 0) + 1;
|
|
177
|
+
return acc;
|
|
178
|
+
}, {});
|
|
179
|
+
objectDescriptions = Object.entries(objectTypes).map(function (_a) {
|
|
180
|
+
var type = _a[0], count = _a[1];
|
|
181
|
+
return "".concat(count, " ").concat(type).concat(count > 1 ? 's' : '');
|
|
182
|
+
});
|
|
183
|
+
description += "\n\nObjects detected: ".concat(objectDescriptions.join(', '), ".");
|
|
184
|
+
}
|
|
185
|
+
if (scene.sceneChanged && scene.changePercentage) {
|
|
186
|
+
description += "\n\n(Scene changed by ".concat(scene.changePercentage.toFixed(1), "% since last analysis)");
|
|
187
|
+
}
|
|
188
|
+
thought = "Analyzed the visual scene at ".concat(timestamp, ".");
|
|
189
|
+
text = description;
|
|
190
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, text, ['DESCRIBE_SCENE'])];
|
|
191
|
+
case 10:
|
|
192
|
+
_a.sent();
|
|
193
|
+
if (!callback) return [3 /*break*/, 12];
|
|
194
|
+
return [4 /*yield*/, callback({
|
|
195
|
+
thought: thought,
|
|
196
|
+
text: text,
|
|
197
|
+
actions: ['DESCRIBE_SCENE'],
|
|
198
|
+
})];
|
|
199
|
+
case 11:
|
|
200
|
+
_a.sent();
|
|
201
|
+
_a.label = 12;
|
|
202
|
+
case 12: return [2 /*return*/, {
|
|
203
|
+
text: description,
|
|
204
|
+
values: {
|
|
205
|
+
success: true,
|
|
206
|
+
visionAvailable: true,
|
|
207
|
+
sceneAnalyzed: true,
|
|
208
|
+
peopleCount: peopleCount,
|
|
209
|
+
objectCount: objectCount,
|
|
210
|
+
cameraName: cameraInfo === null || cameraInfo === void 0 ? void 0 : cameraInfo.name,
|
|
211
|
+
sceneChanged: scene.sceneChanged,
|
|
212
|
+
changePercentage: scene.changePercentage,
|
|
213
|
+
},
|
|
214
|
+
data: {
|
|
215
|
+
actionName: 'DESCRIBE_SCENE',
|
|
216
|
+
scene: scene,
|
|
217
|
+
cameraInfo: cameraInfo,
|
|
218
|
+
timestamp: timestamp,
|
|
219
|
+
description: description,
|
|
220
|
+
},
|
|
221
|
+
}];
|
|
222
|
+
case 13:
|
|
223
|
+
error_1 = _a.sent();
|
|
224
|
+
core_1.logger.error('[describeSceneAction] Error analyzing scene:', error_1);
|
|
225
|
+
thought = 'An error occurred while trying to analyze the visual scene.';
|
|
226
|
+
text = "Error analyzing scene: ".concat(error_1.message);
|
|
227
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, text, ['DESCRIBE_SCENE'])];
|
|
228
|
+
case 14:
|
|
229
|
+
_a.sent();
|
|
230
|
+
if (!callback) return [3 /*break*/, 16];
|
|
231
|
+
return [4 /*yield*/, callback({
|
|
232
|
+
thought: thought,
|
|
233
|
+
text: text,
|
|
234
|
+
actions: ['DESCRIBE_SCENE'],
|
|
235
|
+
})];
|
|
236
|
+
case 15:
|
|
237
|
+
_a.sent();
|
|
238
|
+
_a.label = 16;
|
|
239
|
+
case 16: return [2 /*return*/, {
|
|
240
|
+
text: 'Error analyzing scene',
|
|
241
|
+
values: {
|
|
242
|
+
success: false,
|
|
243
|
+
visionAvailable: true,
|
|
244
|
+
error: true,
|
|
245
|
+
errorMessage: error_1.message,
|
|
246
|
+
},
|
|
247
|
+
data: {
|
|
248
|
+
actionName: 'DESCRIBE_SCENE',
|
|
249
|
+
error: error_1.message,
|
|
250
|
+
errorType: 'analysis_error',
|
|
251
|
+
},
|
|
252
|
+
}];
|
|
253
|
+
case 17: return [2 /*return*/];
|
|
254
|
+
}
|
|
255
|
+
});
|
|
256
|
+
}); },
|
|
257
|
+
examples: [
|
|
258
|
+
[
|
|
259
|
+
{ name: '{{user}}', content: { text: 'what do you see?' } },
|
|
260
|
+
{
|
|
261
|
+
name: '{{agent}}',
|
|
262
|
+
content: {
|
|
263
|
+
actions: ['DESCRIBE_SCENE'],
|
|
264
|
+
thought: 'The user wants to know what I can see through my camera.',
|
|
265
|
+
text: 'I see a room with a desk and computer setup. There are 2 people, one is sitting and one is standing.',
|
|
266
|
+
},
|
|
267
|
+
},
|
|
268
|
+
],
|
|
269
|
+
[
|
|
270
|
+
{ name: '{{user}}', content: { text: 'describe the scene and then take a photo' } },
|
|
271
|
+
{
|
|
272
|
+
name: '{{agent}}',
|
|
273
|
+
content: {
|
|
274
|
+
actions: ['DESCRIBE_SCENE', 'CAPTURE_IMAGE'],
|
|
275
|
+
thought: 'I should first analyze the scene, then capture an image for the user.',
|
|
276
|
+
text: 'I can see 3 people in an office setting. Let me capture this scene for you.',
|
|
277
|
+
},
|
|
278
|
+
},
|
|
279
|
+
],
|
|
280
|
+
],
|
|
281
|
+
};
|
|
282
|
+
exports.captureImageAction = {
|
|
283
|
+
name: 'CAPTURE_IMAGE',
|
|
284
|
+
similes: ['TAKE_PHOTO', 'SCREENSHOT', 'CAPTURE_FRAME', 'TAKE_PICTURE'],
|
|
285
|
+
description: 'Captures the current frame from the camera and saves it as an image attachment. Returns image data with camera info and timestamp for action chaining. Can be combined with DESCRIBE_SCENE for analysis or NAME_ENTITY for identification workflows.',
|
|
286
|
+
enabled: false, // Disabled by default - privacy-sensitive, can capture images
|
|
287
|
+
validate: function (runtime, _message, _state) { return __awaiter(void 0, void 0, void 0, function () {
|
|
288
|
+
var visionService;
|
|
289
|
+
return __generator(this, function (_a) {
|
|
290
|
+
visionService = runtime.getService('VISION');
|
|
291
|
+
return [2 /*return*/, !!visionService && visionService.isActive()];
|
|
292
|
+
});
|
|
293
|
+
}); },
|
|
294
|
+
handler: function (runtime, message, _state, _options, callback, _responses) { return __awaiter(void 0, void 0, void 0, function () {
|
|
295
|
+
var visionService, thought, text, imageBuffer, cameraInfo, thought_2, text_2, attachmentId, timestamp, imageAttachment, thought, text, error_2, thought, text;
|
|
296
|
+
return __generator(this, function (_a) {
|
|
297
|
+
switch (_a.label) {
|
|
298
|
+
case 0:
|
|
299
|
+
visionService = runtime.getService('VISION');
|
|
300
|
+
if (!(!visionService || !visionService.isActive())) return [3 /*break*/, 4];
|
|
301
|
+
thought = 'Vision service is not available or no camera is connected.';
|
|
302
|
+
text = 'I cannot capture an image right now. No camera is available.';
|
|
303
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, text, ['CAPTURE_IMAGE'])];
|
|
304
|
+
case 1:
|
|
305
|
+
_a.sent();
|
|
306
|
+
if (!callback) return [3 /*break*/, 3];
|
|
307
|
+
return [4 /*yield*/, callback({
|
|
308
|
+
thought: thought,
|
|
309
|
+
text: text,
|
|
310
|
+
actions: ['CAPTURE_IMAGE'],
|
|
311
|
+
})];
|
|
312
|
+
case 2:
|
|
313
|
+
_a.sent();
|
|
314
|
+
_a.label = 3;
|
|
315
|
+
case 3: return [2 /*return*/, {
|
|
316
|
+
text: 'Vision service unavailable - cannot capture image',
|
|
317
|
+
values: {
|
|
318
|
+
success: false,
|
|
319
|
+
visionAvailable: false,
|
|
320
|
+
error: 'Vision service not available',
|
|
321
|
+
},
|
|
322
|
+
data: {
|
|
323
|
+
actionName: 'CAPTURE_IMAGE',
|
|
324
|
+
error: 'Vision service not available or no camera connected',
|
|
325
|
+
},
|
|
326
|
+
}];
|
|
327
|
+
case 4:
|
|
328
|
+
_a.trys.push([4, 13, , 17]);
|
|
329
|
+
return [4 /*yield*/, visionService.captureImage()];
|
|
330
|
+
case 5:
|
|
331
|
+
imageBuffer = _a.sent();
|
|
332
|
+
cameraInfo = visionService.getCameraInfo();
|
|
333
|
+
if (!!imageBuffer) return [3 /*break*/, 9];
|
|
334
|
+
thought_2 = 'Failed to capture image from camera.';
|
|
335
|
+
text_2 = 'I could not capture an image from the camera. Please try again.';
|
|
336
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought_2, text_2, ['CAPTURE_IMAGE'])];
|
|
337
|
+
case 6:
|
|
338
|
+
_a.sent();
|
|
339
|
+
if (!callback) return [3 /*break*/, 8];
|
|
340
|
+
return [4 /*yield*/, callback({
|
|
341
|
+
thought: thought_2,
|
|
342
|
+
text: text_2,
|
|
343
|
+
actions: ['CAPTURE_IMAGE'],
|
|
344
|
+
})];
|
|
345
|
+
case 7:
|
|
346
|
+
_a.sent();
|
|
347
|
+
_a.label = 8;
|
|
348
|
+
case 8: return [2 /*return*/, {
|
|
349
|
+
text: 'Failed to capture image from camera',
|
|
350
|
+
values: {
|
|
351
|
+
success: false,
|
|
352
|
+
visionAvailable: true,
|
|
353
|
+
captureSuccess: false,
|
|
354
|
+
},
|
|
355
|
+
data: {
|
|
356
|
+
actionName: 'CAPTURE_IMAGE',
|
|
357
|
+
error: 'Camera capture failed',
|
|
358
|
+
cameraInfo: cameraInfo,
|
|
359
|
+
},
|
|
360
|
+
}];
|
|
361
|
+
case 9:
|
|
362
|
+
attachmentId = (0, core_1.createUniqueUuid)(runtime, "capture-".concat(Date.now()));
|
|
363
|
+
timestamp = new Date().toISOString();
|
|
364
|
+
imageAttachment = {
|
|
365
|
+
id: attachmentId,
|
|
366
|
+
title: "Camera Capture - ".concat(timestamp),
|
|
367
|
+
contentType: core_1.ContentType.IMAGE,
|
|
368
|
+
source: "camera:".concat((cameraInfo === null || cameraInfo === void 0 ? void 0 : cameraInfo.name) || 'unknown'),
|
|
369
|
+
url: "data:image/jpeg;base64,".concat(imageBuffer.toString('base64')),
|
|
370
|
+
};
|
|
371
|
+
thought = "Captured an image from camera \"".concat(cameraInfo === null || cameraInfo === void 0 ? void 0 : cameraInfo.name, "\".");
|
|
372
|
+
text = "I've captured an image from the camera at ".concat(timestamp, ".");
|
|
373
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, text, ['CAPTURE_IMAGE'], [imageAttachment])];
|
|
374
|
+
case 10:
|
|
375
|
+
_a.sent();
|
|
376
|
+
if (!callback) return [3 /*break*/, 12];
|
|
377
|
+
return [4 /*yield*/, callback({
|
|
378
|
+
thought: thought,
|
|
379
|
+
text: text,
|
|
380
|
+
actions: ['CAPTURE_IMAGE'],
|
|
381
|
+
attachments: [imageAttachment],
|
|
382
|
+
})];
|
|
383
|
+
case 11:
|
|
384
|
+
_a.sent();
|
|
385
|
+
_a.label = 12;
|
|
386
|
+
case 12: return [2 /*return*/, {
|
|
387
|
+
text: "I've captured an image from the camera at ".concat(timestamp, "."),
|
|
388
|
+
values: {
|
|
389
|
+
success: true,
|
|
390
|
+
visionAvailable: true,
|
|
391
|
+
captureSuccess: true,
|
|
392
|
+
cameraName: cameraInfo === null || cameraInfo === void 0 ? void 0 : cameraInfo.name,
|
|
393
|
+
timestamp: timestamp,
|
|
394
|
+
},
|
|
395
|
+
data: {
|
|
396
|
+
actionName: 'CAPTURE_IMAGE',
|
|
397
|
+
imageAttachment: imageAttachment,
|
|
398
|
+
cameraInfo: cameraInfo,
|
|
399
|
+
timestamp: timestamp,
|
|
400
|
+
},
|
|
401
|
+
}];
|
|
402
|
+
case 13:
|
|
403
|
+
error_2 = _a.sent();
|
|
404
|
+
core_1.logger.error('[captureImageAction] Error capturing image:', error_2);
|
|
405
|
+
thought = 'An error occurred while trying to capture an image.';
|
|
406
|
+
text = "Error capturing image: ".concat(error_2.message);
|
|
407
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, text, ['CAPTURE_IMAGE'])];
|
|
408
|
+
case 14:
|
|
409
|
+
_a.sent();
|
|
410
|
+
if (!callback) return [3 /*break*/, 16];
|
|
411
|
+
return [4 /*yield*/, callback({
|
|
412
|
+
thought: thought,
|
|
413
|
+
text: text,
|
|
414
|
+
actions: ['CAPTURE_IMAGE'],
|
|
415
|
+
})];
|
|
416
|
+
case 15:
|
|
417
|
+
_a.sent();
|
|
418
|
+
_a.label = 16;
|
|
419
|
+
case 16: return [2 /*return*/, {
|
|
420
|
+
text: 'Error capturing image',
|
|
421
|
+
values: {
|
|
422
|
+
success: false,
|
|
423
|
+
visionAvailable: true,
|
|
424
|
+
error: true,
|
|
425
|
+
errorMessage: error_2.message,
|
|
426
|
+
},
|
|
427
|
+
data: {
|
|
428
|
+
actionName: 'CAPTURE_IMAGE',
|
|
429
|
+
error: error_2.message,
|
|
430
|
+
errorType: 'capture_error',
|
|
431
|
+
},
|
|
432
|
+
}];
|
|
433
|
+
case 17: return [2 /*return*/];
|
|
434
|
+
}
|
|
435
|
+
});
|
|
436
|
+
}); },
|
|
437
|
+
examples: [
|
|
438
|
+
// Multi-action: Describe scene then capture image
|
|
439
|
+
[
|
|
440
|
+
{ name: '{{user}}', content: { text: 'describe what you see and take a photo' } },
|
|
441
|
+
{
|
|
442
|
+
name: '{{agent}}',
|
|
443
|
+
content: {
|
|
444
|
+
actions: ['DESCRIBE_SCENE', 'CAPTURE_IMAGE'],
|
|
445
|
+
thought: 'User wants scene analysis followed by image capture.',
|
|
446
|
+
text: 'I can see 3 people in an office setting. Let me capture this scene for you.',
|
|
447
|
+
},
|
|
448
|
+
},
|
|
449
|
+
],
|
|
450
|
+
[
|
|
451
|
+
{ name: '{{user}}', content: { text: 'take a photo' } },
|
|
452
|
+
{
|
|
453
|
+
name: '{{agent}}',
|
|
454
|
+
content: {
|
|
455
|
+
actions: ['CAPTURE_IMAGE'],
|
|
456
|
+
thought: 'The user wants me to capture an image from the camera.',
|
|
457
|
+
text: "I've captured an image from the camera.",
|
|
458
|
+
},
|
|
459
|
+
},
|
|
460
|
+
],
|
|
461
|
+
[
|
|
462
|
+
{ name: '{{user}}', content: { text: 'capture the current scene' } },
|
|
463
|
+
{
|
|
464
|
+
name: '{{agent}}',
|
|
465
|
+
content: {
|
|
466
|
+
actions: ['CAPTURE_IMAGE'],
|
|
467
|
+
},
|
|
468
|
+
},
|
|
469
|
+
],
|
|
470
|
+
],
|
|
471
|
+
};
|
|
472
|
+
exports.killAutonomousAction = {
|
|
473
|
+
name: 'KILL_AUTONOMOUS',
|
|
474
|
+
similes: ['STOP_AUTONOMOUS', 'HALT_AUTONOMOUS', 'KILL_AUTO_LOOP'],
|
|
475
|
+
description: 'Stops the autonomous agent loop for debugging purposes.',
|
|
476
|
+
enabled: false, // Disabled by default - potentially dangerous, can halt autonomous operations
|
|
477
|
+
validate: function (_runtime, _message, _state) { return __awaiter(void 0, void 0, void 0, function () {
|
|
478
|
+
return __generator(this, function (_a) {
|
|
479
|
+
// Always allow this action for debugging
|
|
480
|
+
return [2 /*return*/, true];
|
|
481
|
+
});
|
|
482
|
+
}); },
|
|
483
|
+
handler: function (runtime, message, _state, _options, callback, _responses) { return __awaiter(void 0, void 0, void 0, function () {
|
|
484
|
+
var autonomousService, thought, text, thought, text, error_3, thought, text;
|
|
485
|
+
return __generator(this, function (_a) {
|
|
486
|
+
switch (_a.label) {
|
|
487
|
+
case 0:
|
|
488
|
+
_a.trys.push([0, 9, , 13]);
|
|
489
|
+
autonomousService = runtime.getService('AUTONOMOUS');
|
|
490
|
+
if (!(autonomousService && 'stop' in autonomousService)) return [3 /*break*/, 5];
|
|
491
|
+
return [4 /*yield*/, autonomousService.stop()];
|
|
492
|
+
case 1:
|
|
493
|
+
_a.sent();
|
|
494
|
+
thought = 'Successfully stopped the autonomous agent loop.';
|
|
495
|
+
text = 'Autonomous loop has been killed. The agent will no longer run autonomously until restarted.';
|
|
496
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, text, ['KILL_AUTONOMOUS'])];
|
|
497
|
+
case 2:
|
|
498
|
+
_a.sent();
|
|
499
|
+
if (!callback) return [3 /*break*/, 4];
|
|
500
|
+
return [4 /*yield*/, callback({
|
|
501
|
+
thought: thought,
|
|
502
|
+
text: text,
|
|
503
|
+
actions: ['KILL_AUTONOMOUS'],
|
|
504
|
+
})];
|
|
505
|
+
case 3:
|
|
506
|
+
_a.sent();
|
|
507
|
+
_a.label = 4;
|
|
508
|
+
case 4: return [3 /*break*/, 8];
|
|
509
|
+
case 5:
|
|
510
|
+
thought = 'Autonomous service not found or already stopped.';
|
|
511
|
+
text = 'No autonomous loop was running or the service could not be found.';
|
|
512
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, text, ['KILL_AUTONOMOUS'])];
|
|
513
|
+
case 6:
|
|
514
|
+
_a.sent();
|
|
515
|
+
if (!callback) return [3 /*break*/, 8];
|
|
516
|
+
return [4 /*yield*/, callback({
|
|
517
|
+
thought: thought,
|
|
518
|
+
text: text,
|
|
519
|
+
actions: ['KILL_AUTONOMOUS'],
|
|
520
|
+
})];
|
|
521
|
+
case 7:
|
|
522
|
+
_a.sent();
|
|
523
|
+
_a.label = 8;
|
|
524
|
+
case 8: return [3 /*break*/, 13];
|
|
525
|
+
case 9:
|
|
526
|
+
error_3 = _a.sent();
|
|
527
|
+
core_1.logger.error('[killAutonomousAction] Error stopping autonomous service:', error_3);
|
|
528
|
+
thought = 'An error occurred while trying to stop the autonomous loop.';
|
|
529
|
+
text = "Error stopping autonomous loop: ".concat(error_3.message);
|
|
530
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, text, ['KILL_AUTONOMOUS'])];
|
|
531
|
+
case 10:
|
|
532
|
+
_a.sent();
|
|
533
|
+
if (!callback) return [3 /*break*/, 12];
|
|
534
|
+
return [4 /*yield*/, callback({
|
|
535
|
+
thought: thought,
|
|
536
|
+
text: text,
|
|
537
|
+
actions: ['KILL_AUTONOMOUS'],
|
|
538
|
+
})];
|
|
539
|
+
case 11:
|
|
540
|
+
_a.sent();
|
|
541
|
+
_a.label = 12;
|
|
542
|
+
case 12: return [3 /*break*/, 13];
|
|
543
|
+
case 13: return [2 /*return*/];
|
|
544
|
+
}
|
|
545
|
+
});
|
|
546
|
+
}); },
|
|
547
|
+
examples: [
|
|
548
|
+
[
|
|
549
|
+
{ name: 'user', content: { text: 'kill the autonomous loop' } },
|
|
550
|
+
{
|
|
551
|
+
name: 'agent',
|
|
552
|
+
content: {
|
|
553
|
+
actions: ['KILL_AUTONOMOUS'],
|
|
554
|
+
thought: 'The user wants to stop the autonomous agent loop for debugging.',
|
|
555
|
+
text: 'Autonomous loop has been killed. The agent will no longer run autonomously until restarted.',
|
|
556
|
+
},
|
|
557
|
+
},
|
|
558
|
+
],
|
|
559
|
+
[
|
|
560
|
+
{ name: 'user', content: { text: 'stop autonomous mode' } },
|
|
561
|
+
{
|
|
562
|
+
name: 'agent',
|
|
563
|
+
content: {
|
|
564
|
+
actions: ['KILL_AUTONOMOUS'],
|
|
565
|
+
},
|
|
566
|
+
},
|
|
567
|
+
],
|
|
568
|
+
],
|
|
569
|
+
};
|
|
570
|
+
exports.setVisionModeAction = {
|
|
571
|
+
name: 'SET_VISION_MODE',
|
|
572
|
+
description: 'Set the vision mode to OFF, CAMERA, SCREEN, or BOTH',
|
|
573
|
+
similes: [
|
|
574
|
+
'change vision to {mode}',
|
|
575
|
+
'set vision mode {mode}',
|
|
576
|
+
'switch to {mode} vision',
|
|
577
|
+
'turn vision {mode}',
|
|
578
|
+
'use {mode} vision',
|
|
579
|
+
'enable {mode} vision',
|
|
580
|
+
'disable vision',
|
|
581
|
+
],
|
|
582
|
+
validate: function (runtime, _message, _state) { return __awaiter(void 0, void 0, void 0, function () {
|
|
583
|
+
var visionService;
|
|
584
|
+
return __generator(this, function (_a) {
|
|
585
|
+
visionService = runtime.getService('VISION');
|
|
586
|
+
return [2 /*return*/, visionService !== null];
|
|
587
|
+
});
|
|
588
|
+
}); },
|
|
589
|
+
handler: function (runtime, message, _state, _options, callback, _responses) { return __awaiter(void 0, void 0, void 0, function () {
|
|
590
|
+
var visionService, thought, text, messageText, newMode, thought_3, text_3, currentMode, thought, text, error_4, thought, text;
|
|
591
|
+
var _a;
|
|
592
|
+
return __generator(this, function (_b) {
|
|
593
|
+
switch (_b.label) {
|
|
594
|
+
case 0:
|
|
595
|
+
visionService = runtime.getService('VISION');
|
|
596
|
+
if (!!visionService) return [3 /*break*/, 4];
|
|
597
|
+
thought = 'Vision service is not available.';
|
|
598
|
+
text = 'I cannot change vision mode because the vision service is not available.';
|
|
599
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, text, ['SET_VISION_MODE'])];
|
|
600
|
+
case 1:
|
|
601
|
+
_b.sent();
|
|
602
|
+
if (!callback) return [3 /*break*/, 3];
|
|
603
|
+
return [4 /*yield*/, callback({
|
|
604
|
+
thought: thought,
|
|
605
|
+
text: text,
|
|
606
|
+
actions: ['SET_VISION_MODE'],
|
|
607
|
+
})];
|
|
608
|
+
case 2:
|
|
609
|
+
_b.sent();
|
|
610
|
+
_b.label = 3;
|
|
611
|
+
case 3: return [2 /*return*/];
|
|
612
|
+
case 4:
|
|
613
|
+
_b.trys.push([4, 13, , 17]);
|
|
614
|
+
messageText = ((_a = message.content.text) === null || _a === void 0 ? void 0 : _a.toLowerCase()) || '';
|
|
615
|
+
newMode = null;
|
|
616
|
+
if (messageText.includes('off') || messageText.includes('disable')) {
|
|
617
|
+
newMode = types_1.VisionMode.OFF;
|
|
618
|
+
}
|
|
619
|
+
else if (messageText.includes('both')) {
|
|
620
|
+
newMode = types_1.VisionMode.BOTH;
|
|
621
|
+
}
|
|
622
|
+
else if (messageText.includes('screen')) {
|
|
623
|
+
newMode = types_1.VisionMode.SCREEN;
|
|
624
|
+
}
|
|
625
|
+
else if (messageText.includes('camera')) {
|
|
626
|
+
newMode = types_1.VisionMode.CAMERA;
|
|
627
|
+
}
|
|
628
|
+
if (!!newMode) return [3 /*break*/, 8];
|
|
629
|
+
thought_3 = 'Could not determine the desired vision mode from the message.';
|
|
630
|
+
text_3 = 'Please specify the vision mode: OFF, CAMERA, SCREEN, or BOTH.';
|
|
631
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought_3, text_3, ['SET_VISION_MODE'])];
|
|
632
|
+
case 5:
|
|
633
|
+
_b.sent();
|
|
634
|
+
if (!callback) return [3 /*break*/, 7];
|
|
635
|
+
return [4 /*yield*/, callback({
|
|
636
|
+
thought: thought_3,
|
|
637
|
+
text: text_3,
|
|
638
|
+
actions: ['SET_VISION_MODE'],
|
|
639
|
+
})];
|
|
640
|
+
case 6:
|
|
641
|
+
_b.sent();
|
|
642
|
+
_b.label = 7;
|
|
643
|
+
case 7: return [2 /*return*/];
|
|
644
|
+
case 8:
|
|
645
|
+
currentMode = visionService.getVisionMode();
|
|
646
|
+
return [4 /*yield*/, visionService.setVisionMode(newMode)];
|
|
647
|
+
case 9:
|
|
648
|
+
_b.sent();
|
|
649
|
+
thought = "Changed vision mode from ".concat(currentMode, " to ").concat(newMode, ".");
|
|
650
|
+
text = '';
|
|
651
|
+
switch (newMode) {
|
|
652
|
+
case types_1.VisionMode.OFF:
|
|
653
|
+
text = 'Vision has been disabled. I will no longer process visual input.';
|
|
654
|
+
break;
|
|
655
|
+
case types_1.VisionMode.CAMERA:
|
|
656
|
+
text = 'Vision mode set to CAMERA only. I will process input from the camera.';
|
|
657
|
+
break;
|
|
658
|
+
case types_1.VisionMode.SCREEN:
|
|
659
|
+
text = "Vision mode set to SCREEN only. I will analyze what's on your screen.";
|
|
660
|
+
break;
|
|
661
|
+
case types_1.VisionMode.BOTH:
|
|
662
|
+
text = 'Vision mode set to BOTH. I will process input from both camera and screen.';
|
|
663
|
+
break;
|
|
664
|
+
}
|
|
665
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, text, ['SET_VISION_MODE'])];
|
|
666
|
+
case 10:
|
|
667
|
+
_b.sent();
|
|
668
|
+
if (!callback) return [3 /*break*/, 12];
|
|
669
|
+
return [4 /*yield*/, callback({
|
|
670
|
+
thought: thought,
|
|
671
|
+
text: text,
|
|
672
|
+
actions: ['SET_VISION_MODE'],
|
|
673
|
+
})];
|
|
674
|
+
case 11:
|
|
675
|
+
_b.sent();
|
|
676
|
+
_b.label = 12;
|
|
677
|
+
case 12: return [3 /*break*/, 17];
|
|
678
|
+
case 13:
|
|
679
|
+
error_4 = _b.sent();
|
|
680
|
+
core_1.logger.error('[setVisionModeAction] Error changing vision mode:', error_4);
|
|
681
|
+
thought = 'An error occurred while trying to change the vision mode.';
|
|
682
|
+
text = "Error changing vision mode: ".concat(error_4.message);
|
|
683
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, text, ['SET_VISION_MODE'])];
|
|
684
|
+
case 14:
|
|
685
|
+
_b.sent();
|
|
686
|
+
if (!callback) return [3 /*break*/, 16];
|
|
687
|
+
return [4 /*yield*/, callback({
|
|
688
|
+
thought: thought,
|
|
689
|
+
text: text,
|
|
690
|
+
actions: ['SET_VISION_MODE'],
|
|
691
|
+
})];
|
|
692
|
+
case 15:
|
|
693
|
+
_b.sent();
|
|
694
|
+
_b.label = 16;
|
|
695
|
+
case 16: return [3 /*break*/, 17];
|
|
696
|
+
case 17: return [2 /*return*/];
|
|
697
|
+
}
|
|
698
|
+
});
|
|
699
|
+
}); },
|
|
700
|
+
examples: [
|
|
701
|
+
[
|
|
702
|
+
{ name: 'user', content: { text: 'set vision mode to screen' } },
|
|
703
|
+
{
|
|
704
|
+
name: 'agent',
|
|
705
|
+
content: {
|
|
706
|
+
actions: ['SET_VISION_MODE'],
|
|
707
|
+
thought: 'The user wants to switch to screen vision mode.',
|
|
708
|
+
text: "Vision mode set to SCREEN only. I will analyze what's on your screen.",
|
|
709
|
+
},
|
|
710
|
+
},
|
|
711
|
+
],
|
|
712
|
+
[
|
|
713
|
+
{ name: 'user', content: { text: 'enable both camera and screen vision' } },
|
|
714
|
+
{
|
|
715
|
+
name: 'agent',
|
|
716
|
+
content: {
|
|
717
|
+
actions: ['SET_VISION_MODE'],
|
|
718
|
+
thought: 'The user wants to enable both vision inputs.',
|
|
719
|
+
text: 'Vision mode set to BOTH. I will process input from both camera and screen.',
|
|
720
|
+
},
|
|
721
|
+
},
|
|
722
|
+
],
|
|
723
|
+
[
|
|
724
|
+
{ name: 'user', content: { text: 'turn off vision' } },
|
|
725
|
+
{
|
|
726
|
+
name: 'agent',
|
|
727
|
+
content: {
|
|
728
|
+
actions: ['SET_VISION_MODE'],
|
|
729
|
+
thought: 'The user wants to disable vision.',
|
|
730
|
+
text: 'Vision has been disabled. I will no longer process visual input.',
|
|
731
|
+
},
|
|
732
|
+
},
|
|
733
|
+
],
|
|
734
|
+
],
|
|
735
|
+
};
|
|
736
|
+
// Enhanced actions for entity tracking and face recognition
|
|
737
|
+
exports.nameEntityAction = {
|
|
738
|
+
name: 'NAME_ENTITY',
|
|
739
|
+
description: 'Assign a name to a person or object currently visible in the camera view',
|
|
740
|
+
similes: [
|
|
741
|
+
'call the person {name}',
|
|
742
|
+
'the person in front is {name}',
|
|
743
|
+
'name the person {name}',
|
|
744
|
+
'that person is {name}',
|
|
745
|
+
'the object is a {name}',
|
|
746
|
+
'call that {name}',
|
|
747
|
+
],
|
|
748
|
+
examples: [
|
|
749
|
+
[
|
|
750
|
+
{
|
|
751
|
+
name: 'user',
|
|
752
|
+
content: {
|
|
753
|
+
text: 'The person wearing the blue shirt is named Alice',
|
|
754
|
+
},
|
|
755
|
+
},
|
|
756
|
+
{
|
|
757
|
+
name: 'agent',
|
|
758
|
+
content: {
|
|
759
|
+
text: "I've identified the person in the blue shirt as Alice. I'll remember them for future interactions.",
|
|
760
|
+
actions: ['NAME_ENTITY'],
|
|
761
|
+
},
|
|
762
|
+
},
|
|
763
|
+
],
|
|
764
|
+
[
|
|
765
|
+
{
|
|
766
|
+
name: 'user',
|
|
767
|
+
content: {
|
|
768
|
+
text: 'Call the person on the left Bob',
|
|
769
|
+
},
|
|
770
|
+
},
|
|
771
|
+
{
|
|
772
|
+
name: 'agent',
|
|
773
|
+
content: {
|
|
774
|
+
text: "I've named the person on the left as Bob. Their face profile has been updated.",
|
|
775
|
+
actions: ['NAME_ENTITY'],
|
|
776
|
+
},
|
|
777
|
+
},
|
|
778
|
+
],
|
|
779
|
+
],
|
|
780
|
+
validate: function (runtime, _message, _state) { return __awaiter(void 0, void 0, void 0, function () {
|
|
781
|
+
var visionService;
|
|
782
|
+
return __generator(this, function (_a) {
|
|
783
|
+
visionService = runtime.getService('VISION');
|
|
784
|
+
return [2 /*return*/, (visionService === null || visionService === void 0 ? void 0 : visionService.isActive()) || false];
|
|
785
|
+
});
|
|
786
|
+
}); },
|
|
787
|
+
handler: function (runtime, message, _state, _options, callback) { return __awaiter(void 0, void 0, void 0, function () {
|
|
788
|
+
var visionService, thought, text_4, scene, thought, text_5, text, nameMatch, thought, text_6, name_1, _worldId, entityTracker, activeEntities, people, thought, text_7, targetPerson, success, thought, text_8, thought, text_9, error_5, thought, text;
|
|
789
|
+
var _a;
|
|
790
|
+
return __generator(this, function (_b) {
|
|
791
|
+
switch (_b.label) {
|
|
792
|
+
case 0:
|
|
793
|
+
_b.trys.push([0, 26, , 30]);
|
|
794
|
+
visionService = runtime.getService('VISION');
|
|
795
|
+
if (!!visionService) return [3 /*break*/, 4];
|
|
796
|
+
thought = 'Vision service is not available.';
|
|
797
|
+
text_4 = 'I cannot name entities because the vision service is not available.';
|
|
798
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, text_4, ['NAME_ENTITY'])];
|
|
799
|
+
case 1:
|
|
800
|
+
_b.sent();
|
|
801
|
+
if (!callback) return [3 /*break*/, 3];
|
|
802
|
+
return [4 /*yield*/, callback({ thought: thought, text: text_4, actions: ['NAME_ENTITY'] })];
|
|
803
|
+
case 2:
|
|
804
|
+
_b.sent();
|
|
805
|
+
_b.label = 3;
|
|
806
|
+
case 3: return [2 /*return*/];
|
|
807
|
+
case 4: return [4 /*yield*/, visionService.getSceneDescription()];
|
|
808
|
+
case 5:
|
|
809
|
+
scene = _b.sent();
|
|
810
|
+
if (!(!scene || scene.people.length === 0)) return [3 /*break*/, 9];
|
|
811
|
+
thought = 'No people visible to name.';
|
|
812
|
+
text_5 = "I don't see any people in the current scene to name.";
|
|
813
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, text_5, ['NAME_ENTITY'])];
|
|
814
|
+
case 6:
|
|
815
|
+
_b.sent();
|
|
816
|
+
if (!callback) return [3 /*break*/, 8];
|
|
817
|
+
return [4 /*yield*/, callback({ thought: thought, text: text_5, actions: ['NAME_ENTITY'] })];
|
|
818
|
+
case 7:
|
|
819
|
+
_b.sent();
|
|
820
|
+
_b.label = 8;
|
|
821
|
+
case 8: return [2 /*return*/];
|
|
822
|
+
case 9:
|
|
823
|
+
text = ((_a = message.content.text) === null || _a === void 0 ? void 0 : _a.toLowerCase()) || '';
|
|
824
|
+
nameMatch = text.match(/(?:named?|call(?:ed)?|is)\s+(\w+)/i);
|
|
825
|
+
if (!!nameMatch) return [3 /*break*/, 13];
|
|
826
|
+
thought = 'Could not extract name from message.';
|
|
827
|
+
text_6 = 'I couldn\'t understand what name to assign. Please say something like "The person is named Alice".';
|
|
828
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, text_6, ['NAME_ENTITY'])];
|
|
829
|
+
case 10:
|
|
830
|
+
_b.sent();
|
|
831
|
+
if (!callback) return [3 /*break*/, 12];
|
|
832
|
+
return [4 /*yield*/, callback({ thought: thought, text: text_6, actions: ['NAME_ENTITY'] })];
|
|
833
|
+
case 11:
|
|
834
|
+
_b.sent();
|
|
835
|
+
_b.label = 12;
|
|
836
|
+
case 12: return [2 /*return*/];
|
|
837
|
+
case 13:
|
|
838
|
+
name_1 = nameMatch[1];
|
|
839
|
+
_worldId = message.worldId || 'default-world';
|
|
840
|
+
entityTracker = visionService.getEntityTracker();
|
|
841
|
+
// Update entities
|
|
842
|
+
return [4 /*yield*/, entityTracker.updateEntities(scene.objects, scene.people, undefined, runtime)];
|
|
843
|
+
case 14:
|
|
844
|
+
// Update entities
|
|
845
|
+
_b.sent();
|
|
846
|
+
activeEntities = entityTracker.getActiveEntities();
|
|
847
|
+
people = activeEntities.filter(function (e) { return e.entityType === 'person'; });
|
|
848
|
+
if (!(people.length === 0)) return [3 /*break*/, 18];
|
|
849
|
+
thought = 'No tracked people found.';
|
|
850
|
+
text_7 = "I can see someone but haven't established tracking yet. Please try again in a moment.";
|
|
851
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, text_7, ['NAME_ENTITY'])];
|
|
852
|
+
case 15:
|
|
853
|
+
_b.sent();
|
|
854
|
+
if (!callback) return [3 /*break*/, 17];
|
|
855
|
+
return [4 /*yield*/, callback({ thought: thought, text: text_7, actions: ['NAME_ENTITY'] })];
|
|
856
|
+
case 16:
|
|
857
|
+
_b.sent();
|
|
858
|
+
_b.label = 17;
|
|
859
|
+
case 17: return [2 /*return*/];
|
|
860
|
+
case 18:
|
|
861
|
+
targetPerson = people[0];
|
|
862
|
+
if (people.length > 1) {
|
|
863
|
+
targetPerson = people.reduce(function (prev, curr) {
|
|
864
|
+
var prevArea = prev.lastPosition.width * prev.lastPosition.height;
|
|
865
|
+
var currArea = curr.lastPosition.width * curr.lastPosition.height;
|
|
866
|
+
return currArea > prevArea ? curr : prev;
|
|
867
|
+
});
|
|
868
|
+
}
|
|
869
|
+
success = entityTracker.assignNameToEntity(targetPerson.id, name_1);
|
|
870
|
+
if (!success) return [3 /*break*/, 22];
|
|
871
|
+
thought = "Named entity \"".concat(name_1, "\" and associated with person in scene.");
|
|
872
|
+
text_8 = "I've identified the person as ".concat(name_1, ". I'll remember them for future interactions.");
|
|
873
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, text_8, ['NAME_ENTITY'], undefined)];
|
|
874
|
+
case 19:
|
|
875
|
+
_b.sent();
|
|
876
|
+
if (!callback) return [3 /*break*/, 21];
|
|
877
|
+
return [4 /*yield*/, callback({
|
|
878
|
+
thought: thought,
|
|
879
|
+
text: text_8,
|
|
880
|
+
actions: ['NAME_ENTITY'],
|
|
881
|
+
data: { entityId: targetPerson.id, name: name_1 },
|
|
882
|
+
})];
|
|
883
|
+
case 20:
|
|
884
|
+
_b.sent();
|
|
885
|
+
_b.label = 21;
|
|
886
|
+
case 21:
|
|
887
|
+
core_1.logger.info("[NameEntityAction] Assigned name \"".concat(name_1, "\" to entity ").concat(targetPerson.id));
|
|
888
|
+
return [3 /*break*/, 25];
|
|
889
|
+
case 22:
|
|
890
|
+
thought = 'Failed to assign name to entity.';
|
|
891
|
+
text_9 = 'There was an error assigning the name. Please try again.';
|
|
892
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, text_9, ['NAME_ENTITY'])];
|
|
893
|
+
case 23:
|
|
894
|
+
_b.sent();
|
|
895
|
+
if (!callback) return [3 /*break*/, 25];
|
|
896
|
+
return [4 /*yield*/, callback({ thought: thought, text: text_9, actions: ['NAME_ENTITY'] })];
|
|
897
|
+
case 24:
|
|
898
|
+
_b.sent();
|
|
899
|
+
_b.label = 25;
|
|
900
|
+
case 25: return [3 /*break*/, 30];
|
|
901
|
+
case 26:
|
|
902
|
+
error_5 = _b.sent();
|
|
903
|
+
core_1.logger.error('[NameEntityAction] Error:', error_5);
|
|
904
|
+
thought = 'Failed to name entity.';
|
|
905
|
+
text = "Sorry, I couldn't name the entity: ".concat(error_5 instanceof Error ? error_5.message : 'Unknown error');
|
|
906
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, text, ['NAME_ENTITY'])];
|
|
907
|
+
case 27:
|
|
908
|
+
_b.sent();
|
|
909
|
+
if (!callback) return [3 /*break*/, 29];
|
|
910
|
+
return [4 /*yield*/, callback({ thought: thought, text: text, actions: ['NAME_ENTITY'] })];
|
|
911
|
+
case 28:
|
|
912
|
+
_b.sent();
|
|
913
|
+
_b.label = 29;
|
|
914
|
+
case 29: return [3 /*break*/, 30];
|
|
915
|
+
case 30: return [2 /*return*/];
|
|
916
|
+
}
|
|
917
|
+
});
|
|
918
|
+
}); },
|
|
919
|
+
};
|
|
920
|
+
exports.identifyPersonAction = {
|
|
921
|
+
name: 'IDENTIFY_PERSON',
|
|
922
|
+
description: 'Identify a person in view if they have been seen before',
|
|
923
|
+
enabled: false, // Disabled by default - privacy-sensitive, can identify and recognize people
|
|
924
|
+
similes: [
|
|
925
|
+
'who is that',
|
|
926
|
+
'who is the person',
|
|
927
|
+
'identify the person',
|
|
928
|
+
'do you recognize them',
|
|
929
|
+
'have you seen them before',
|
|
930
|
+
],
|
|
931
|
+
examples: [
|
|
932
|
+
[
|
|
933
|
+
{
|
|
934
|
+
name: 'user',
|
|
935
|
+
content: {
|
|
936
|
+
text: 'Who is the person in front of you?',
|
|
937
|
+
},
|
|
938
|
+
},
|
|
939
|
+
{
|
|
940
|
+
name: 'agent',
|
|
941
|
+
content: {
|
|
942
|
+
text: "That's Alice. I last saw her about 5 minutes ago. She's been here for the past 20 minutes.",
|
|
943
|
+
actions: ['IDENTIFY_PERSON'],
|
|
944
|
+
},
|
|
945
|
+
},
|
|
946
|
+
],
|
|
947
|
+
],
|
|
948
|
+
validate: function (runtime, _message, _state) { return __awaiter(void 0, void 0, void 0, function () {
|
|
949
|
+
var visionService;
|
|
950
|
+
return __generator(this, function (_a) {
|
|
951
|
+
visionService = runtime.getService('VISION');
|
|
952
|
+
return [2 /*return*/, (visionService === null || visionService === void 0 ? void 0 : visionService.isActive()) || false];
|
|
953
|
+
});
|
|
954
|
+
}); },
|
|
955
|
+
handler: function (runtime, message, _state, _options, callback) { return __awaiter(void 0, void 0, void 0, function () {
|
|
956
|
+
var visionService, thought_4, text_10, scene, thought_5, text_11, _worldId, entityTracker, activeEntities, people, thought_6, text_12, _responseText, recognizedCount, unknownCount, identifications, _i, people_1, person, name_2, duration, durationStr, personInfo, personInfo, recentlyLeft, _a, recentlyLeft_1, _b, entity, leftAt, timeAgo, timeStr, thought, text, error_6, thought, text;
|
|
957
|
+
return __generator(this, function (_c) {
|
|
958
|
+
switch (_c.label) {
|
|
959
|
+
case 0:
|
|
960
|
+
_c.trys.push([0, 18, , 22]);
|
|
961
|
+
visionService = runtime.getService('VISION');
|
|
962
|
+
if (!!visionService) return [3 /*break*/, 4];
|
|
963
|
+
thought_4 = 'Vision service is not available.';
|
|
964
|
+
text_10 = 'I cannot identify people because the vision service is not available.';
|
|
965
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought_4, text_10, ['IDENTIFY_PERSON'])];
|
|
966
|
+
case 1:
|
|
967
|
+
_c.sent();
|
|
968
|
+
if (!callback) return [3 /*break*/, 3];
|
|
969
|
+
return [4 /*yield*/, callback({ thought: thought_4, text: text_10, actions: ['IDENTIFY_PERSON'] })];
|
|
970
|
+
case 2:
|
|
971
|
+
_c.sent();
|
|
972
|
+
_c.label = 3;
|
|
973
|
+
case 3: return [2 /*return*/];
|
|
974
|
+
case 4: return [4 /*yield*/, visionService.getSceneDescription()];
|
|
975
|
+
case 5:
|
|
976
|
+
scene = _c.sent();
|
|
977
|
+
if (!(!scene || scene.people.length === 0)) return [3 /*break*/, 9];
|
|
978
|
+
thought_5 = 'No people visible to identify.';
|
|
979
|
+
text_11 = "I don't see any people in the current scene.";
|
|
980
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought_5, text_11, ['IDENTIFY_PERSON'])];
|
|
981
|
+
case 6:
|
|
982
|
+
_c.sent();
|
|
983
|
+
if (!callback) return [3 /*break*/, 8];
|
|
984
|
+
return [4 /*yield*/, callback({ thought: thought_5, text: text_11, actions: ['IDENTIFY_PERSON'] })];
|
|
985
|
+
case 7:
|
|
986
|
+
_c.sent();
|
|
987
|
+
_c.label = 8;
|
|
988
|
+
case 8: return [2 /*return*/];
|
|
989
|
+
case 9:
|
|
990
|
+
_worldId = message.worldId || 'default-world';
|
|
991
|
+
entityTracker = visionService.getEntityTracker();
|
|
992
|
+
// Update entities
|
|
993
|
+
return [4 /*yield*/, entityTracker.updateEntities(scene.objects, scene.people, undefined, runtime)];
|
|
994
|
+
case 10:
|
|
995
|
+
// Update entities
|
|
996
|
+
_c.sent();
|
|
997
|
+
activeEntities = entityTracker.getActiveEntities();
|
|
998
|
+
people = activeEntities.filter(function (e) { return e.entityType === 'person'; });
|
|
999
|
+
if (!(people.length === 0)) return [3 /*break*/, 14];
|
|
1000
|
+
thought_6 = 'No tracked people found.';
|
|
1001
|
+
text_12 = "I can see someone but I'm still processing their identity.";
|
|
1002
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought_6, text_12, ['IDENTIFY_PERSON'])];
|
|
1003
|
+
case 11:
|
|
1004
|
+
_c.sent();
|
|
1005
|
+
if (!callback) return [3 /*break*/, 13];
|
|
1006
|
+
return [4 /*yield*/, callback({ thought: thought_6, text: text_12, actions: ['IDENTIFY_PERSON'] })];
|
|
1007
|
+
case 12:
|
|
1008
|
+
_c.sent();
|
|
1009
|
+
_c.label = 13;
|
|
1010
|
+
case 13: return [2 /*return*/];
|
|
1011
|
+
case 14:
|
|
1012
|
+
_responseText = '';
|
|
1013
|
+
recognizedCount = 0;
|
|
1014
|
+
unknownCount = 0;
|
|
1015
|
+
identifications = [];
|
|
1016
|
+
for (_i = 0, people_1 = people; _i < people_1.length; _i++) {
|
|
1017
|
+
person = people_1[_i];
|
|
1018
|
+
name_2 = person.attributes.name;
|
|
1019
|
+
duration = Date.now() - person.firstSeen;
|
|
1020
|
+
durationStr = duration < 60000
|
|
1021
|
+
? "".concat(Math.round(duration / 1000), " seconds")
|
|
1022
|
+
: "".concat(Math.round(duration / 60000), " minutes");
|
|
1023
|
+
if (name_2) {
|
|
1024
|
+
recognizedCount++;
|
|
1025
|
+
personInfo = "I can see ".concat(name_2, ". They've been here for ").concat(durationStr, ".");
|
|
1026
|
+
identifications.push(personInfo);
|
|
1027
|
+
// Add more context if available
|
|
1028
|
+
if (person.appearances.length > 5) {
|
|
1029
|
+
identifications.push("I've been tracking them consistently.");
|
|
1030
|
+
}
|
|
1031
|
+
}
|
|
1032
|
+
else {
|
|
1033
|
+
unknownCount++;
|
|
1034
|
+
personInfo = "I see an unidentified person who has been here for ".concat(durationStr, ".");
|
|
1035
|
+
identifications.push(personInfo);
|
|
1036
|
+
if (person.attributes.faceId) {
|
|
1037
|
+
identifications.push("I've captured their face profile but they haven't been named yet.");
|
|
1038
|
+
}
|
|
1039
|
+
}
|
|
1040
|
+
}
|
|
1041
|
+
recentlyLeft = entityTracker.getRecentlyLeft();
|
|
1042
|
+
if (recentlyLeft.length > 0) {
|
|
1043
|
+
identifications.push('\nRecently departed:');
|
|
1044
|
+
for (_a = 0, recentlyLeft_1 = recentlyLeft; _a < recentlyLeft_1.length; _a++) {
|
|
1045
|
+
_b = recentlyLeft_1[_a], entity = _b.entity, leftAt = _b.leftAt;
|
|
1046
|
+
if (entity.entityType === 'person' && entity.attributes.name) {
|
|
1047
|
+
timeAgo = Date.now() - leftAt;
|
|
1048
|
+
timeStr = timeAgo < 60000
|
|
1049
|
+
? "".concat(Math.round(timeAgo / 1000), " seconds ago")
|
|
1050
|
+
: "".concat(Math.round(timeAgo / 60000), " minutes ago");
|
|
1051
|
+
identifications.push("".concat(entity.attributes.name, " left ").concat(timeStr, "."));
|
|
1052
|
+
}
|
|
1053
|
+
}
|
|
1054
|
+
}
|
|
1055
|
+
thought = "Identified ".concat(recognizedCount, " known people and ").concat(unknownCount, " unknown people.");
|
|
1056
|
+
text = identifications.join(' ');
|
|
1057
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, text, ['IDENTIFY_PERSON'])];
|
|
1058
|
+
case 15:
|
|
1059
|
+
_c.sent();
|
|
1060
|
+
if (!callback) return [3 /*break*/, 17];
|
|
1061
|
+
return [4 /*yield*/, callback({
|
|
1062
|
+
thought: thought,
|
|
1063
|
+
text: text,
|
|
1064
|
+
actions: ['IDENTIFY_PERSON'],
|
|
1065
|
+
data: { identifications: people },
|
|
1066
|
+
})];
|
|
1067
|
+
case 16:
|
|
1068
|
+
_c.sent();
|
|
1069
|
+
_c.label = 17;
|
|
1070
|
+
case 17: return [3 /*break*/, 22];
|
|
1071
|
+
case 18:
|
|
1072
|
+
error_6 = _c.sent();
|
|
1073
|
+
core_1.logger.error('[identifyPersonAction] Error:', error_6);
|
|
1074
|
+
thought = 'Failed to identify people.';
|
|
1075
|
+
text = "Sorry, I couldn't identify people: ".concat(error_6 instanceof Error ? error_6.message : 'Unknown error');
|
|
1076
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, text, ['IDENTIFY_PERSON'])];
|
|
1077
|
+
case 19:
|
|
1078
|
+
_c.sent();
|
|
1079
|
+
if (!callback) return [3 /*break*/, 21];
|
|
1080
|
+
return [4 /*yield*/, callback({ thought: thought, text: text, actions: ['IDENTIFY_PERSON'] })];
|
|
1081
|
+
case 20:
|
|
1082
|
+
_c.sent();
|
|
1083
|
+
_c.label = 21;
|
|
1084
|
+
case 21: return [3 /*break*/, 22];
|
|
1085
|
+
case 22: return [2 /*return*/];
|
|
1086
|
+
}
|
|
1087
|
+
});
|
|
1088
|
+
}); },
|
|
1089
|
+
};
|
|
1090
|
+
exports.trackEntityAction = {
|
|
1091
|
+
name: 'TRACK_ENTITY',
|
|
1092
|
+
description: 'Start tracking a specific person or object in view',
|
|
1093
|
+
enabled: false, // Disabled by default - privacy-sensitive, can track and monitor people
|
|
1094
|
+
similes: [
|
|
1095
|
+
'track the {description}',
|
|
1096
|
+
'follow the {description}',
|
|
1097
|
+
'keep an eye on the {description}',
|
|
1098
|
+
'watch the {description}',
|
|
1099
|
+
],
|
|
1100
|
+
examples: [
|
|
1101
|
+
[
|
|
1102
|
+
{
|
|
1103
|
+
name: 'user',
|
|
1104
|
+
content: {
|
|
1105
|
+
text: 'Track the person wearing the red shirt',
|
|
1106
|
+
},
|
|
1107
|
+
},
|
|
1108
|
+
{
|
|
1109
|
+
name: 'agent',
|
|
1110
|
+
content: {
|
|
1111
|
+
text: "I'm now tracking the person in the red shirt. I'll notify you of any significant movements or if they leave the scene.",
|
|
1112
|
+
actions: ['TRACK_ENTITY'],
|
|
1113
|
+
},
|
|
1114
|
+
},
|
|
1115
|
+
],
|
|
1116
|
+
],
|
|
1117
|
+
validate: function (runtime, _message, _state) { return __awaiter(void 0, void 0, void 0, function () {
|
|
1118
|
+
var visionService;
|
|
1119
|
+
return __generator(this, function (_a) {
|
|
1120
|
+
visionService = runtime.getService('VISION');
|
|
1121
|
+
return [2 /*return*/, (visionService === null || visionService === void 0 ? void 0 : visionService.isActive()) || false];
|
|
1122
|
+
});
|
|
1123
|
+
}); },
|
|
1124
|
+
handler: function (runtime, message, _state, _options, callback) { return __awaiter(void 0, void 0, void 0, function () {
|
|
1125
|
+
var visionService, thought_7, text, scene, thought_8, text, _text, _worldId, entityTracker, stats, thought, summary, responseText, error_7, thought, text;
|
|
1126
|
+
var _a;
|
|
1127
|
+
return __generator(this, function (_b) {
|
|
1128
|
+
switch (_b.label) {
|
|
1129
|
+
case 0:
|
|
1130
|
+
_b.trys.push([0, 14, , 18]);
|
|
1131
|
+
visionService = runtime.getService('VISION');
|
|
1132
|
+
if (!!visionService) return [3 /*break*/, 4];
|
|
1133
|
+
thought_7 = 'Vision service is not available.';
|
|
1134
|
+
text = 'I cannot track entities because the vision service is not available.';
|
|
1135
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought_7, text, ['TRACK_ENTITY'])];
|
|
1136
|
+
case 1:
|
|
1137
|
+
_b.sent();
|
|
1138
|
+
if (!callback) return [3 /*break*/, 3];
|
|
1139
|
+
return [4 /*yield*/, callback({ thought: thought_7, text: text, actions: ['TRACK_ENTITY'] })];
|
|
1140
|
+
case 2:
|
|
1141
|
+
_b.sent();
|
|
1142
|
+
_b.label = 3;
|
|
1143
|
+
case 3: return [2 /*return*/];
|
|
1144
|
+
case 4: return [4 /*yield*/, visionService.getSceneDescription()];
|
|
1145
|
+
case 5:
|
|
1146
|
+
scene = _b.sent();
|
|
1147
|
+
if (!!scene) return [3 /*break*/, 9];
|
|
1148
|
+
thought_8 = 'No scene available for tracking.';
|
|
1149
|
+
text = 'I need a moment to process the visual scene before I can track entities.';
|
|
1150
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought_8, text, ['TRACK_ENTITY'])];
|
|
1151
|
+
case 6:
|
|
1152
|
+
_b.sent();
|
|
1153
|
+
if (!callback) return [3 /*break*/, 8];
|
|
1154
|
+
return [4 /*yield*/, callback({ thought: thought_8, text: text, actions: ['TRACK_ENTITY'] })];
|
|
1155
|
+
case 7:
|
|
1156
|
+
_b.sent();
|
|
1157
|
+
_b.label = 8;
|
|
1158
|
+
case 8: return [2 /*return*/];
|
|
1159
|
+
case 9:
|
|
1160
|
+
_text = ((_a = message.content.text) === null || _a === void 0 ? void 0 : _a.toLowerCase()) || '';
|
|
1161
|
+
_worldId = message.worldId || 'default-world';
|
|
1162
|
+
entityTracker = visionService.getEntityTracker();
|
|
1163
|
+
// Update entities
|
|
1164
|
+
return [4 /*yield*/, entityTracker.updateEntities(scene.objects, scene.people, undefined, runtime)];
|
|
1165
|
+
case 10:
|
|
1166
|
+
// Update entities
|
|
1167
|
+
_b.sent();
|
|
1168
|
+
stats = entityTracker.getStatistics();
|
|
1169
|
+
thought = "Tracking ".concat(stats.activeEntities, " entities in the scene.");
|
|
1170
|
+
summary = [
|
|
1171
|
+
"I'm now tracking ".concat(stats.activeEntities, " entities in the scene"),
|
|
1172
|
+
"(".concat(stats.people, " people, ").concat(stats.objects, " objects)."),
|
|
1173
|
+
'The visual tracking system will maintain persistent IDs for all entities',
|
|
1174
|
+
'and notify you of significant changes.',
|
|
1175
|
+
];
|
|
1176
|
+
responseText = summary.join(' ');
|
|
1177
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, responseText, ['TRACK_ENTITY'])];
|
|
1178
|
+
case 11:
|
|
1179
|
+
_b.sent();
|
|
1180
|
+
if (!callback) return [3 /*break*/, 13];
|
|
1181
|
+
return [4 /*yield*/, callback({
|
|
1182
|
+
thought: thought,
|
|
1183
|
+
text: responseText,
|
|
1184
|
+
actions: ['TRACK_ENTITY'],
|
|
1185
|
+
data: { entities: stats.activeEntities },
|
|
1186
|
+
})];
|
|
1187
|
+
case 12:
|
|
1188
|
+
_b.sent();
|
|
1189
|
+
_b.label = 13;
|
|
1190
|
+
case 13:
|
|
1191
|
+
core_1.logger.info("[TrackEntityAction] Tracking ".concat(stats.activeEntities, " entities"));
|
|
1192
|
+
return [3 /*break*/, 18];
|
|
1193
|
+
case 14:
|
|
1194
|
+
error_7 = _b.sent();
|
|
1195
|
+
core_1.logger.error('[trackEntityAction] Error:', error_7);
|
|
1196
|
+
thought = 'Failed to track entities.';
|
|
1197
|
+
text = "Sorry, I couldn't track entities: ".concat(error_7 instanceof Error ? error_7.message : 'Unknown error');
|
|
1198
|
+
return [4 /*yield*/, saveExecutionRecord(runtime, message, thought, text, ['TRACK_ENTITY'])];
|
|
1199
|
+
case 15:
|
|
1200
|
+
_b.sent();
|
|
1201
|
+
if (!callback) return [3 /*break*/, 17];
|
|
1202
|
+
return [4 /*yield*/, callback({ thought: thought, text: text, actions: ['TRACK_ENTITY'] })];
|
|
1203
|
+
case 16:
|
|
1204
|
+
_b.sent();
|
|
1205
|
+
_b.label = 17;
|
|
1206
|
+
case 17: return [3 /*break*/, 18];
|
|
1207
|
+
case 18: return [2 /*return*/];
|
|
1208
|
+
}
|
|
1209
|
+
});
|
|
1210
|
+
}); },
|
|
1211
|
+
};
|
|
1212
|
+
//# sourceMappingURL=action.js.map
|