@elizaos/plugin-vision 2.0.0-alpha.7 → 2.0.0-alpha.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +164 -354
- package/dist/index.js.map +7 -8
- package/dist/workers/florence2-worker.js +114850 -0
- package/dist/workers/florence2-worker.js.map +92 -0
- package/dist/workers/ocr-worker.js +122242 -0
- package/dist/workers/ocr-worker.js.map +137 -0
- package/dist/workers/screen-capture-worker.js +359 -0
- package/dist/workers/screen-capture-worker.js.map +11 -0
- package/package.json +5 -5
package/dist/index.js
CHANGED
|
@@ -51,9 +51,32 @@ var describeSceneAction = {
|
|
|
51
51
|
name: "DESCRIBE_SCENE",
|
|
52
52
|
similes: ["ANALYZE_SCENE", "WHAT_DO_YOU_SEE", "VISION_CHECK", "LOOK_AROUND"],
|
|
53
53
|
description: "Analyzes the current visual scene and provides a detailed description of what the agent sees through the camera. Returns scene analysis data including people count, objects, and camera info for action chaining.",
|
|
54
|
-
validate: async (runtime,
|
|
55
|
-
const
|
|
56
|
-
|
|
54
|
+
validate: async (runtime, message, state, options) => {
|
|
55
|
+
const __avTextRaw = typeof message?.content?.text === "string" ? message.content.text : "";
|
|
56
|
+
const __avText = __avTextRaw.toLowerCase();
|
|
57
|
+
const __avVisionService = runtime?.getService?.("VISION");
|
|
58
|
+
const __avLegacyContextOk = Boolean(__avVisionService && typeof __avVisionService.isActive === "function" && __avVisionService.isActive());
|
|
59
|
+
const __avKeywords = ["describe", "scene"];
|
|
60
|
+
const __avKeywordOk = __avKeywords.length > 0 && (__avKeywords.some((kw) => kw.length > 0 && __avText.includes(kw)) || __avLegacyContextOk);
|
|
61
|
+
const __avRegex = new RegExp("\\b(?:describe|scene)\\b", "i");
|
|
62
|
+
const __avRegexOk = __avRegex.test(__avText) || __avLegacyContextOk;
|
|
63
|
+
const __avSource = String(message?.content?.source ?? message?.source ?? "");
|
|
64
|
+
const __avExpectedSource = "";
|
|
65
|
+
const __avSourceOk = __avExpectedSource ? __avSource === __avExpectedSource : Boolean(__avSource || state || runtime?.agentId || runtime?.getService);
|
|
66
|
+
const __avOptions = options && typeof options === "object" ? options : {};
|
|
67
|
+
const __avInputOk = __avText.trim().length > 0 || Object.keys(__avOptions).length > 0 || Boolean(message?.content && typeof message.content === "object");
|
|
68
|
+
if (!(__avKeywordOk && __avRegexOk && __avSourceOk && __avInputOk)) {
|
|
69
|
+
return false;
|
|
70
|
+
}
|
|
71
|
+
const __avLegacyValidate = async (runtime2, _message, _state) => {
|
|
72
|
+
const visionService = runtime2.getService("VISION");
|
|
73
|
+
return !!visionService && visionService.isActive();
|
|
74
|
+
};
|
|
75
|
+
try {
|
|
76
|
+
return Boolean(await __avLegacyValidate(runtime, message, state, options));
|
|
77
|
+
} catch {
|
|
78
|
+
return false;
|
|
79
|
+
}
|
|
57
80
|
},
|
|
58
81
|
handler: async (runtime, message, _state, _options, callback, _responses) => {
|
|
59
82
|
const visionService = runtime.getService("VISION");
|
|
@@ -252,9 +275,32 @@ var captureImageAction = {
|
|
|
252
275
|
name: "CAPTURE_IMAGE",
|
|
253
276
|
similes: ["TAKE_PHOTO", "SCREENSHOT", "CAPTURE_FRAME", "TAKE_PICTURE"],
|
|
254
277
|
description: "Captures the current frame from the camera and saves it as an image attachment. Returns image data with camera info and timestamp for action chaining. Can be combined with DESCRIBE_SCENE for analysis or NAME_ENTITY for identification workflows.",
|
|
255
|
-
validate: async (runtime,
|
|
256
|
-
const
|
|
257
|
-
|
|
278
|
+
validate: async (runtime, message, state, options) => {
|
|
279
|
+
const __avTextRaw = typeof message?.content?.text === "string" ? message.content.text : "";
|
|
280
|
+
const __avText = __avTextRaw.toLowerCase();
|
|
281
|
+
const __avVisionService = runtime?.getService?.("VISION");
|
|
282
|
+
const __avLegacyContextOk = Boolean(__avVisionService && typeof __avVisionService.isActive === "function" && __avVisionService.isActive());
|
|
283
|
+
const __avKeywords = ["capture", "image"];
|
|
284
|
+
const __avKeywordOk = __avKeywords.length > 0 && (__avKeywords.some((kw) => kw.length > 0 && __avText.includes(kw)) || __avLegacyContextOk);
|
|
285
|
+
const __avRegex = new RegExp("\\b(?:capture|image)\\b", "i");
|
|
286
|
+
const __avRegexOk = __avRegex.test(__avText) || __avLegacyContextOk;
|
|
287
|
+
const __avSource = String(message?.content?.source ?? message?.source ?? "");
|
|
288
|
+
const __avExpectedSource = "";
|
|
289
|
+
const __avSourceOk = __avExpectedSource ? __avSource === __avExpectedSource : Boolean(__avSource || state || runtime?.agentId || runtime?.getService);
|
|
290
|
+
const __avOptions = options && typeof options === "object" ? options : {};
|
|
291
|
+
const __avInputOk = __avText.trim().length > 0 || Object.keys(__avOptions).length > 0 || Boolean(message?.content && typeof message.content === "object");
|
|
292
|
+
if (!(__avKeywordOk && __avRegexOk && __avSourceOk && __avInputOk)) {
|
|
293
|
+
return false;
|
|
294
|
+
}
|
|
295
|
+
const __avLegacyValidate = async (runtime2, _message, _state) => {
|
|
296
|
+
const visionService = runtime2.getService("VISION");
|
|
297
|
+
return !!visionService && visionService.isActive();
|
|
298
|
+
};
|
|
299
|
+
try {
|
|
300
|
+
return Boolean(await __avLegacyValidate(runtime, message, state, options));
|
|
301
|
+
} catch {
|
|
302
|
+
return false;
|
|
303
|
+
}
|
|
258
304
|
},
|
|
259
305
|
handler: async (runtime, message, _state, _options, callback, _responses) => {
|
|
260
306
|
const visionService = runtime.getService("VISION");
|
|
@@ -427,91 +473,6 @@ var captureImageAction = {
|
|
|
427
473
|
]
|
|
428
474
|
]
|
|
429
475
|
};
|
|
430
|
-
var killAutonomousAction = {
|
|
431
|
-
name: "KILL_AUTONOMOUS",
|
|
432
|
-
similes: ["STOP_AUTONOMOUS", "HALT_AUTONOMOUS", "KILL_AUTO_LOOP"],
|
|
433
|
-
description: "Stops the autonomous agent loop for debugging purposes.",
|
|
434
|
-
validate: async (_runtime, _message, _state) => {
|
|
435
|
-
return true;
|
|
436
|
-
},
|
|
437
|
-
handler: async (runtime, message, _state, _options, callback, _responses) => {
|
|
438
|
-
try {
|
|
439
|
-
const autonomousService = runtime.getService("AUTONOMOUS");
|
|
440
|
-
if (autonomousService && "stop" in autonomousService && typeof autonomousService.stop === "function") {
|
|
441
|
-
await autonomousService.stop();
|
|
442
|
-
const thought = "Successfully stopped the autonomous agent loop.";
|
|
443
|
-
const text = "Autonomous loop has been killed. The agent will no longer run autonomously until restarted.";
|
|
444
|
-
await saveExecutionRecord(runtime, message, thought, text, ["KILL_AUTONOMOUS"]);
|
|
445
|
-
if (callback) {
|
|
446
|
-
await callback({
|
|
447
|
-
thought,
|
|
448
|
-
text,
|
|
449
|
-
actions: ["KILL_AUTONOMOUS"]
|
|
450
|
-
});
|
|
451
|
-
}
|
|
452
|
-
return {
|
|
453
|
-
success: true,
|
|
454
|
-
text
|
|
455
|
-
};
|
|
456
|
-
} else {
|
|
457
|
-
const thought = "Autonomous service not found or already stopped.";
|
|
458
|
-
const text = "No autonomous loop was running or the service could not be found.";
|
|
459
|
-
await saveExecutionRecord(runtime, message, thought, text, ["KILL_AUTONOMOUS"]);
|
|
460
|
-
if (callback) {
|
|
461
|
-
await callback({
|
|
462
|
-
thought,
|
|
463
|
-
text,
|
|
464
|
-
actions: ["KILL_AUTONOMOUS"]
|
|
465
|
-
});
|
|
466
|
-
}
|
|
467
|
-
return {
|
|
468
|
-
success: false,
|
|
469
|
-
text
|
|
470
|
-
};
|
|
471
|
-
}
|
|
472
|
-
} catch (error) {
|
|
473
|
-
logger.error("[killAutonomousAction] Error stopping autonomous service:", error);
|
|
474
|
-
const thought = "An error occurred while trying to stop the autonomous loop.";
|
|
475
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
476
|
-
const text = `Error stopping autonomous loop: ${errorMessage}`;
|
|
477
|
-
await saveExecutionRecord(runtime, message, thought, text, ["KILL_AUTONOMOUS"]);
|
|
478
|
-
if (callback) {
|
|
479
|
-
await callback({
|
|
480
|
-
thought,
|
|
481
|
-
text,
|
|
482
|
-
actions: ["KILL_AUTONOMOUS"]
|
|
483
|
-
});
|
|
484
|
-
}
|
|
485
|
-
return {
|
|
486
|
-
success: false,
|
|
487
|
-
text,
|
|
488
|
-
error: errorMessage
|
|
489
|
-
};
|
|
490
|
-
}
|
|
491
|
-
},
|
|
492
|
-
examples: [
|
|
493
|
-
[
|
|
494
|
-
{ name: "user", content: { text: "kill the autonomous loop" } },
|
|
495
|
-
{
|
|
496
|
-
name: "agent",
|
|
497
|
-
content: {
|
|
498
|
-
actions: ["KILL_AUTONOMOUS"],
|
|
499
|
-
thought: "The user wants to stop the autonomous agent loop for debugging.",
|
|
500
|
-
text: "Autonomous loop has been killed. The agent will no longer run autonomously until restarted."
|
|
501
|
-
}
|
|
502
|
-
}
|
|
503
|
-
],
|
|
504
|
-
[
|
|
505
|
-
{ name: "user", content: { text: "stop autonomous mode" } },
|
|
506
|
-
{
|
|
507
|
-
name: "agent",
|
|
508
|
-
content: {
|
|
509
|
-
actions: ["KILL_AUTONOMOUS"]
|
|
510
|
-
}
|
|
511
|
-
}
|
|
512
|
-
]
|
|
513
|
-
]
|
|
514
|
-
};
|
|
515
476
|
var setVisionModeAction = {
|
|
516
477
|
name: "SET_VISION_MODE",
|
|
517
478
|
description: "Set the vision mode to OFF, CAMERA, SCREEN, or BOTH",
|
|
@@ -524,9 +485,31 @@ var setVisionModeAction = {
|
|
|
524
485
|
"enable {mode} vision",
|
|
525
486
|
"disable vision"
|
|
526
487
|
],
|
|
527
|
-
validate: async (runtime,
|
|
528
|
-
const
|
|
529
|
-
|
|
488
|
+
validate: async (runtime, message, state, options) => {
|
|
489
|
+
const __avTextRaw = typeof message?.content?.text === "string" ? message.content.text : "";
|
|
490
|
+
const __avText = __avTextRaw.toLowerCase();
|
|
491
|
+
const __avLegacyContextOk = Boolean(runtime?.getService?.("VISION"));
|
|
492
|
+
const __avKeywords = ["set", "vision", "mode"];
|
|
493
|
+
const __avKeywordOk = __avKeywords.length > 0 && (__avKeywords.some((kw) => kw.length > 0 && __avText.includes(kw)) || __avLegacyContextOk);
|
|
494
|
+
const __avRegex = new RegExp("\\b(?:set|vision|mode)\\b", "i");
|
|
495
|
+
const __avRegexOk = __avRegex.test(__avText) || __avLegacyContextOk;
|
|
496
|
+
const __avSource = String(message?.content?.source ?? message?.source ?? "");
|
|
497
|
+
const __avExpectedSource = "";
|
|
498
|
+
const __avSourceOk = __avExpectedSource ? __avSource === __avExpectedSource : Boolean(__avSource || state || runtime?.agentId || runtime?.getService);
|
|
499
|
+
const __avOptions = options && typeof options === "object" ? options : {};
|
|
500
|
+
const __avInputOk = __avText.trim().length > 0 || Object.keys(__avOptions).length > 0 || Boolean(message?.content && typeof message.content === "object");
|
|
501
|
+
if (!(__avKeywordOk && __avRegexOk && __avSourceOk && __avInputOk)) {
|
|
502
|
+
return false;
|
|
503
|
+
}
|
|
504
|
+
const __avLegacyValidate = async (runtime2, _message, _state) => {
|
|
505
|
+
const visionService = runtime2.getService("VISION");
|
|
506
|
+
return visionService !== null;
|
|
507
|
+
};
|
|
508
|
+
try {
|
|
509
|
+
return Boolean(await __avLegacyValidate(runtime, message, state, options));
|
|
510
|
+
} catch {
|
|
511
|
+
return false;
|
|
512
|
+
}
|
|
530
513
|
},
|
|
531
514
|
handler: async (runtime, message, _state, _options, callback, _responses) => {
|
|
532
515
|
const visionService = runtime.getService("VISION");
|
|
@@ -706,9 +689,32 @@ var nameEntityAction = {
|
|
|
706
689
|
}
|
|
707
690
|
]
|
|
708
691
|
],
|
|
709
|
-
validate: async (runtime,
|
|
710
|
-
const
|
|
711
|
-
|
|
692
|
+
validate: async (runtime, message, state, options) => {
|
|
693
|
+
const __avTextRaw = typeof message?.content?.text === "string" ? message.content.text : "";
|
|
694
|
+
const __avText = __avTextRaw.toLowerCase();
|
|
695
|
+
const __avVisionService = runtime?.getService?.("VISION");
|
|
696
|
+
const __avLegacyContextOk = Boolean(__avVisionService && typeof __avVisionService.isActive === "function" && __avVisionService.isActive());
|
|
697
|
+
const __avKeywords = ["name", "entity"];
|
|
698
|
+
const __avKeywordOk = __avKeywords.length > 0 && (__avKeywords.some((kw) => kw.length > 0 && __avText.includes(kw)) || __avLegacyContextOk);
|
|
699
|
+
const __avRegex = new RegExp("\\b(?:name|entity)\\b", "i");
|
|
700
|
+
const __avRegexOk = __avRegex.test(__avText) || __avLegacyContextOk;
|
|
701
|
+
const __avSource = String(message?.content?.source ?? message?.source ?? "");
|
|
702
|
+
const __avExpectedSource = "";
|
|
703
|
+
const __avSourceOk = __avExpectedSource ? __avSource === __avExpectedSource : Boolean(__avSource || state || runtime?.agentId || runtime?.getService);
|
|
704
|
+
const __avOptions = options && typeof options === "object" ? options : {};
|
|
705
|
+
const __avInputOk = __avText.trim().length > 0 || Object.keys(__avOptions).length > 0 || Boolean(message?.content && typeof message.content === "object");
|
|
706
|
+
if (!(__avKeywordOk && __avRegexOk && __avSourceOk && __avInputOk)) {
|
|
707
|
+
return false;
|
|
708
|
+
}
|
|
709
|
+
const __avLegacyValidate = async (runtime2, _message, _state) => {
|
|
710
|
+
const visionService = runtime2.getService("VISION");
|
|
711
|
+
return visionService?.isActive() || false;
|
|
712
|
+
};
|
|
713
|
+
try {
|
|
714
|
+
return Boolean(await __avLegacyValidate(runtime, message, state, options));
|
|
715
|
+
} catch {
|
|
716
|
+
return false;
|
|
717
|
+
}
|
|
712
718
|
},
|
|
713
719
|
handler: async (runtime, message, _state, _options, callback) => {
|
|
714
720
|
try {
|
|
@@ -855,9 +861,32 @@ var identifyPersonAction = {
|
|
|
855
861
|
}
|
|
856
862
|
]
|
|
857
863
|
],
|
|
858
|
-
validate: async (runtime,
|
|
859
|
-
const
|
|
860
|
-
|
|
864
|
+
validate: async (runtime, message, state, options) => {
|
|
865
|
+
const __avTextRaw = typeof message?.content?.text === "string" ? message.content.text : "";
|
|
866
|
+
const __avText = __avTextRaw.toLowerCase();
|
|
867
|
+
const __avVisionService = runtime?.getService?.("VISION");
|
|
868
|
+
const __avLegacyContextOk = Boolean(__avVisionService && typeof __avVisionService.isActive === "function" && __avVisionService.isActive());
|
|
869
|
+
const __avKeywords = ["identify", "person"];
|
|
870
|
+
const __avKeywordOk = __avKeywords.length > 0 && (__avKeywords.some((kw) => kw.length > 0 && __avText.includes(kw)) || __avLegacyContextOk);
|
|
871
|
+
const __avRegex = new RegExp("\\b(?:identify|person)\\b", "i");
|
|
872
|
+
const __avRegexOk = __avRegex.test(__avText) || __avLegacyContextOk;
|
|
873
|
+
const __avSource = String(message?.content?.source ?? message?.source ?? "");
|
|
874
|
+
const __avExpectedSource = "";
|
|
875
|
+
const __avSourceOk = __avExpectedSource ? __avSource === __avExpectedSource : Boolean(__avSource || state || runtime?.agentId || runtime?.getService);
|
|
876
|
+
const __avOptions = options && typeof options === "object" ? options : {};
|
|
877
|
+
const __avInputOk = __avText.trim().length > 0 || Object.keys(__avOptions).length > 0 || Boolean(message?.content && typeof message.content === "object");
|
|
878
|
+
if (!(__avKeywordOk && __avRegexOk && __avSourceOk && __avInputOk)) {
|
|
879
|
+
return false;
|
|
880
|
+
}
|
|
881
|
+
const __avLegacyValidate = async (runtime2, _message, _state) => {
|
|
882
|
+
const visionService = runtime2.getService("VISION");
|
|
883
|
+
return visionService?.isActive() || false;
|
|
884
|
+
};
|
|
885
|
+
try {
|
|
886
|
+
return Boolean(await __avLegacyValidate(runtime, message, state, options));
|
|
887
|
+
} catch {
|
|
888
|
+
return false;
|
|
889
|
+
}
|
|
861
890
|
},
|
|
862
891
|
handler: async (runtime, message, _state, _options, callback) => {
|
|
863
892
|
try {
|
|
@@ -1007,9 +1036,32 @@ var trackEntityAction = {
|
|
|
1007
1036
|
}
|
|
1008
1037
|
]
|
|
1009
1038
|
],
|
|
1010
|
-
validate: async (runtime,
|
|
1011
|
-
const
|
|
1012
|
-
|
|
1039
|
+
validate: async (runtime, message, state, options) => {
|
|
1040
|
+
const __avTextRaw = typeof message?.content?.text === "string" ? message.content.text : "";
|
|
1041
|
+
const __avText = __avTextRaw.toLowerCase();
|
|
1042
|
+
const __avVisionService = runtime?.getService?.("VISION");
|
|
1043
|
+
const __avLegacyContextOk = Boolean(__avVisionService && typeof __avVisionService.isActive === "function" && __avVisionService.isActive());
|
|
1044
|
+
const __avKeywords = ["track", "entity"];
|
|
1045
|
+
const __avKeywordOk = __avKeywords.length > 0 && (__avKeywords.some((kw) => kw.length > 0 && __avText.includes(kw)) || __avLegacyContextOk);
|
|
1046
|
+
const __avRegex = new RegExp("\\b(?:track|entity)\\b", "i");
|
|
1047
|
+
const __avRegexOk = __avRegex.test(__avText) || __avLegacyContextOk;
|
|
1048
|
+
const __avSource = String(message?.content?.source ?? message?.source ?? "");
|
|
1049
|
+
const __avExpectedSource = "";
|
|
1050
|
+
const __avSourceOk = __avExpectedSource ? __avSource === __avExpectedSource : Boolean(__avSource || state || runtime?.agentId || runtime?.getService);
|
|
1051
|
+
const __avOptions = options && typeof options === "object" ? options : {};
|
|
1052
|
+
const __avInputOk = __avText.trim().length > 0 || Object.keys(__avOptions).length > 0 || Boolean(message?.content && typeof message.content === "object");
|
|
1053
|
+
if (!(__avKeywordOk && __avRegexOk && __avSourceOk && __avInputOk)) {
|
|
1054
|
+
return false;
|
|
1055
|
+
}
|
|
1056
|
+
const __avLegacyValidate = async (runtime2, _message, _state) => {
|
|
1057
|
+
const visionService = runtime2.getService("VISION");
|
|
1058
|
+
return visionService?.isActive() || false;
|
|
1059
|
+
};
|
|
1060
|
+
try {
|
|
1061
|
+
return Boolean(await __avLegacyValidate(runtime, message, state, options));
|
|
1062
|
+
} catch {
|
|
1063
|
+
return false;
|
|
1064
|
+
}
|
|
1013
1065
|
},
|
|
1014
1066
|
handler: async (runtime, message, _state, _options, callback) => {
|
|
1015
1067
|
try {
|
|
@@ -3654,7 +3706,7 @@ import * as path4 from "node:path";
|
|
|
3654
3706
|
import { TextDecoder } from "node:util";
|
|
3655
3707
|
import { Worker } from "node:worker_threads";
|
|
3656
3708
|
import { logger as logger13 } from "@elizaos/core";
|
|
3657
|
-
var __dirname = "/Users/shawwalters/eliza-workspace/plugins/plugin-vision/typescript/src";
|
|
3709
|
+
var __dirname = "/Users/shawwalters/eliza-workspace/milady/plugins/plugin-vision/typescript/src";
|
|
3658
3710
|
|
|
3659
3711
|
class VisionWorkerManager {
|
|
3660
3712
|
config;
|
|
@@ -5395,248 +5447,8 @@ class ScreenVisionE2ETestSuite {
|
|
|
5395
5447
|
];
|
|
5396
5448
|
}
|
|
5397
5449
|
var screen_vision_default = new ScreenVisionE2ETestSuite;
|
|
5398
|
-
// src/tests/e2e/vision-autonomy.ts
|
|
5399
|
-
import { createUniqueUuid as createUniqueUuid4 } from "@elizaos/core";
|
|
5400
|
-
class VisionAutonomyE2ETestSuite {
|
|
5401
|
-
name = "plugin-vision-autonomy-e2e";
|
|
5402
|
-
description = "Tests for vision plugin integration with autonomy plugin";
|
|
5403
|
-
tests = [
|
|
5404
|
-
{
|
|
5405
|
-
name: "Should stop autonomous loop with kill command",
|
|
5406
|
-
fn: async (runtime) => {
|
|
5407
|
-
console.log("Testing kill autonomous action...");
|
|
5408
|
-
const roomId = createUniqueUuid4(runtime, "test-room");
|
|
5409
|
-
const message = {
|
|
5410
|
-
id: createUniqueUuid4(runtime, "test-msg-kill"),
|
|
5411
|
-
entityId: runtime.agentId,
|
|
5412
|
-
content: { text: "kill the autonomous loop" },
|
|
5413
|
-
agentId: runtime.agentId,
|
|
5414
|
-
roomId,
|
|
5415
|
-
createdAt: Date.now()
|
|
5416
|
-
};
|
|
5417
|
-
let callbackCalled = false;
|
|
5418
|
-
let callbackResponse = null;
|
|
5419
|
-
const isValid = await killAutonomousAction.validate(runtime, message, {
|
|
5420
|
-
values: {},
|
|
5421
|
-
data: {},
|
|
5422
|
-
text: ""
|
|
5423
|
-
});
|
|
5424
|
-
if (!isValid) {
|
|
5425
|
-
throw new Error("killAutonomousAction validation failed");
|
|
5426
|
-
}
|
|
5427
|
-
await killAutonomousAction.handler(runtime, message, { values: {}, data: {}, text: "" }, {}, async (response) => {
|
|
5428
|
-
callbackCalled = true;
|
|
5429
|
-
callbackResponse = response;
|
|
5430
|
-
return [];
|
|
5431
|
-
});
|
|
5432
|
-
if (!callbackCalled) {
|
|
5433
|
-
throw new Error("Callback was not called - action handler failed");
|
|
5434
|
-
}
|
|
5435
|
-
if (!callbackResponse || !callbackResponse.text) {
|
|
5436
|
-
throw new Error("No response text returned from kill action");
|
|
5437
|
-
}
|
|
5438
|
-
console.log("✓ Kill autonomous action executed");
|
|
5439
|
-
console.log(` Response: ${callbackResponse.text}`);
|
|
5440
|
-
if (callbackResponse.thought) {
|
|
5441
|
-
console.log(` Thought: ${callbackResponse.thought}`);
|
|
5442
|
-
}
|
|
5443
|
-
if (!callbackResponse.actions || !callbackResponse.actions.includes("KILL_AUTONOMOUS")) {
|
|
5444
|
-
throw new Error("Response does not include KILL_AUTONOMOUS action");
|
|
5445
|
-
}
|
|
5446
|
-
}
|
|
5447
|
-
},
|
|
5448
|
-
{
|
|
5449
|
-
name: "Should provide continuous vision updates for autonomous agent",
|
|
5450
|
-
fn: async (runtime) => {
|
|
5451
|
-
console.log("Testing vision updates for autonomous behavior...");
|
|
5452
|
-
const visionService = runtime.getService("VISION");
|
|
5453
|
-
if (!visionService) {
|
|
5454
|
-
throw new Error("Vision service not available");
|
|
5455
|
-
}
|
|
5456
|
-
if (!visionService.isActive()) {
|
|
5457
|
-
console.warn("⚠️ Vision service not active - skipping continuous update test");
|
|
5458
|
-
console.log(" This is acceptable in environments without cameras");
|
|
5459
|
-
return;
|
|
5460
|
-
}
|
|
5461
|
-
const updateIntervals = [];
|
|
5462
|
-
let lastTimestamp = 0;
|
|
5463
|
-
let updateCount = 0;
|
|
5464
|
-
const testDuration = 5000;
|
|
5465
|
-
const startTime = Date.now();
|
|
5466
|
-
console.log(" Monitoring scene updates for 5 seconds...");
|
|
5467
|
-
while (Date.now() - startTime < testDuration) {
|
|
5468
|
-
const scene = await visionService.getSceneDescription();
|
|
5469
|
-
if (scene && scene.timestamp !== lastTimestamp) {
|
|
5470
|
-
if (lastTimestamp > 0) {
|
|
5471
|
-
updateIntervals.push(scene.timestamp - lastTimestamp);
|
|
5472
|
-
}
|
|
5473
|
-
lastTimestamp = scene.timestamp;
|
|
5474
|
-
updateCount++;
|
|
5475
|
-
console.log(` Scene update ${updateCount}: ${scene.description.substring(0, 50)}...`);
|
|
5476
|
-
}
|
|
5477
|
-
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
5478
|
-
}
|
|
5479
|
-
console.log("✓ Vision monitoring complete");
|
|
5480
|
-
console.log(` Total updates: ${updateCount}`);
|
|
5481
|
-
if (updateCount === 0) {
|
|
5482
|
-
throw new Error("No scene updates detected during 5 second monitoring period");
|
|
5483
|
-
}
|
|
5484
|
-
if (updateIntervals.length > 0) {
|
|
5485
|
-
const avgInterval = updateIntervals.reduce((a, b) => a + b, 0) / updateIntervals.length;
|
|
5486
|
-
console.log(` Average update interval: ${Math.round(avgInterval)}ms`);
|
|
5487
|
-
if (avgInterval > 5000) {
|
|
5488
|
-
throw new Error("Scene updates too infrequent for autonomous operation");
|
|
5489
|
-
}
|
|
5490
|
-
}
|
|
5491
|
-
}
|
|
5492
|
-
},
|
|
5493
|
-
{
|
|
5494
|
-
name: "Should maintain vision memory across interactions",
|
|
5495
|
-
fn: async (runtime) => {
|
|
5496
|
-
console.log("Testing vision memory persistence...");
|
|
5497
|
-
const visionService = runtime.getService("VISION");
|
|
5498
|
-
if (!visionService) {
|
|
5499
|
-
throw new Error("Vision service not available");
|
|
5500
|
-
}
|
|
5501
|
-
const roomId = createUniqueUuid4(runtime, "test-room");
|
|
5502
|
-
const firstMessage = {
|
|
5503
|
-
id: createUniqueUuid4(runtime, "test-msg-1"),
|
|
5504
|
-
entityId: runtime.agentId,
|
|
5505
|
-
content: { text: "what do you see?" },
|
|
5506
|
-
agentId: runtime.agentId,
|
|
5507
|
-
roomId,
|
|
5508
|
-
createdAt: Date.now()
|
|
5509
|
-
};
|
|
5510
|
-
await runtime.createMemory(firstMessage, "messages");
|
|
5511
|
-
const firstResponse = {
|
|
5512
|
-
id: createUniqueUuid4(runtime, "test-response-1"),
|
|
5513
|
-
entityId: runtime.agentId,
|
|
5514
|
-
content: {
|
|
5515
|
-
text: "I see a test scene",
|
|
5516
|
-
actions: ["DESCRIBE_SCENE"]
|
|
5517
|
-
},
|
|
5518
|
-
agentId: runtime.agentId,
|
|
5519
|
-
roomId,
|
|
5520
|
-
createdAt: Date.now() + 100
|
|
5521
|
-
};
|
|
5522
|
-
await runtime.createMemory(firstResponse, "messages");
|
|
5523
|
-
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
5524
|
-
const secondMessage = {
|
|
5525
|
-
id: createUniqueUuid4(runtime, "test-msg-2"),
|
|
5526
|
-
entityId: runtime.agentId,
|
|
5527
|
-
content: { text: "what did you see before?" },
|
|
5528
|
-
agentId: runtime.agentId,
|
|
5529
|
-
roomId,
|
|
5530
|
-
createdAt: Date.now() + 1000
|
|
5531
|
-
};
|
|
5532
|
-
await runtime.createMemory(secondMessage, "messages");
|
|
5533
|
-
const secondResponse = {
|
|
5534
|
-
id: createUniqueUuid4(runtime, "test-response-2"),
|
|
5535
|
-
entityId: runtime.agentId,
|
|
5536
|
-
content: {
|
|
5537
|
-
text: "Previously, I saw a test scene",
|
|
5538
|
-
actions: ["DESCRIBE_SCENE"]
|
|
5539
|
-
},
|
|
5540
|
-
agentId: runtime.agentId,
|
|
5541
|
-
roomId,
|
|
5542
|
-
createdAt: Date.now() + 1100
|
|
5543
|
-
};
|
|
5544
|
-
await runtime.createMemory(secondResponse, "messages");
|
|
5545
|
-
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
5546
|
-
const messages = await runtime.getMemories({
|
|
5547
|
-
roomId,
|
|
5548
|
-
count: 10,
|
|
5549
|
-
tableName: "messages"
|
|
5550
|
-
});
|
|
5551
|
-
console.log("✓ Vision memory test complete");
|
|
5552
|
-
console.log(` Total messages: ${messages ? messages.length : 0}`);
|
|
5553
|
-
if (!visionService.isActive()) {
|
|
5554
|
-
console.log(" Vision service not active, simulated memory test only");
|
|
5555
|
-
}
|
|
5556
|
-
}
|
|
5557
|
-
},
|
|
5558
|
-
{
|
|
5559
|
-
name: "Should integrate vision data with agent decision making",
|
|
5560
|
-
fn: async (runtime) => {
|
|
5561
|
-
console.log("Testing vision-based decision making...");
|
|
5562
|
-
const roomId = createUniqueUuid4(runtime, "test-room");
|
|
5563
|
-
const scenarios = [
|
|
5564
|
-
{ text: "Is anyone in the room?", expectedContext: "people" },
|
|
5565
|
-
{ text: "Should I turn on the lights?", expectedContext: "scene" },
|
|
5566
|
-
{ text: "What objects are nearby?", expectedContext: "objects" }
|
|
5567
|
-
];
|
|
5568
|
-
let scenariosWithVision = 0;
|
|
5569
|
-
for (const scenario of scenarios) {
|
|
5570
|
-
const message = {
|
|
5571
|
-
id: createUniqueUuid4(runtime, `test-msg-${Date.now()}`),
|
|
5572
|
-
entityId: runtime.agentId,
|
|
5573
|
-
content: { text: scenario.text },
|
|
5574
|
-
agentId: runtime.agentId,
|
|
5575
|
-
roomId,
|
|
5576
|
-
createdAt: Date.now()
|
|
5577
|
-
};
|
|
5578
|
-
const state = await runtime.composeState(message);
|
|
5579
|
-
const hasVisionContext = state.text.includes("Visual Perception") || state.values.visionAvailable !== undefined;
|
|
5580
|
-
console.log(` Scenario: "${scenario.text}"`);
|
|
5581
|
-
console.log(` Has vision context: ${hasVisionContext}`);
|
|
5582
|
-
if (hasVisionContext) {
|
|
5583
|
-
scenariosWithVision++;
|
|
5584
|
-
if (state.values.sceneDescription && typeof state.values.sceneDescription === "string") {
|
|
5585
|
-
console.log(` Scene info available: ${state.values.sceneDescription.substring(0, 50)}...`);
|
|
5586
|
-
}
|
|
5587
|
-
}
|
|
5588
|
-
}
|
|
5589
|
-
console.log("✓ Vision-based decision making test complete");
|
|
5590
|
-
if (scenariosWithVision !== scenarios.length) {
|
|
5591
|
-
throw new Error(`Vision context missing in ${scenarios.length - scenariosWithVision} scenarios`);
|
|
5592
|
-
}
|
|
5593
|
-
}
|
|
5594
|
-
},
|
|
5595
|
-
{
|
|
5596
|
-
name: "Should handle autonomy gracefully when vision is unavailable",
|
|
5597
|
-
fn: async (runtime) => {
|
|
5598
|
-
console.log("Testing autonomy behavior without vision...");
|
|
5599
|
-
const visionService = runtime.getService("VISION");
|
|
5600
|
-
if (!visionService) {
|
|
5601
|
-
throw new Error("Vision service not registered - cannot test graceful handling");
|
|
5602
|
-
}
|
|
5603
|
-
const isActive = visionService.isActive();
|
|
5604
|
-
console.log(` Vision service active: ${isActive}`);
|
|
5605
|
-
const message = {
|
|
5606
|
-
id: createUniqueUuid4(runtime, "test-msg-no-vision"),
|
|
5607
|
-
entityId: runtime.agentId,
|
|
5608
|
-
content: { text: "test without vision" },
|
|
5609
|
-
agentId: runtime.agentId,
|
|
5610
|
-
roomId: createUniqueUuid4(runtime, "test-room"),
|
|
5611
|
-
createdAt: Date.now()
|
|
5612
|
-
};
|
|
5613
|
-
const state = await runtime.composeState(message);
|
|
5614
|
-
if (state.values.visionAvailable === undefined) {
|
|
5615
|
-
throw new Error("Vision provider did not report availability status");
|
|
5616
|
-
}
|
|
5617
|
-
if (!isActive) {
|
|
5618
|
-
if (state.values.visionAvailable !== false) {
|
|
5619
|
-
throw new Error("Vision incorrectly reports as available when service is not active");
|
|
5620
|
-
}
|
|
5621
|
-
if (!state.values.cameraStatus || typeof state.values.cameraStatus !== "string" || !state.values.cameraStatus.includes("not connected")) {
|
|
5622
|
-
throw new Error("Camera status does not indicate disconnection");
|
|
5623
|
-
}
|
|
5624
|
-
console.log("✓ Vision correctly reports unavailable state");
|
|
5625
|
-
console.log(` Status: ${state.values.cameraStatus}`);
|
|
5626
|
-
} else {
|
|
5627
|
-
if (state.values.visionAvailable !== true) {
|
|
5628
|
-
throw new Error("Vision incorrectly reports as unavailable when service is active");
|
|
5629
|
-
}
|
|
5630
|
-
console.log("✓ Vision correctly reports available state");
|
|
5631
|
-
console.log(` Status: ${state.values.cameraStatus}`);
|
|
5632
|
-
}
|
|
5633
|
-
}
|
|
5634
|
-
}
|
|
5635
|
-
];
|
|
5636
|
-
}
|
|
5637
|
-
var vision_autonomy_default = new VisionAutonomyE2ETestSuite;
|
|
5638
5450
|
// src/tests/e2e/vision-basic.ts
|
|
5639
|
-
import { createUniqueUuid as
|
|
5451
|
+
import { createUniqueUuid as createUniqueUuid4 } from "@elizaos/core";
|
|
5640
5452
|
class VisionBasicE2ETestSuite {
|
|
5641
5453
|
name = "plugin-vision-basic-e2e";
|
|
5642
5454
|
description = "Basic end-to-end tests for vision plugin functionality";
|
|
@@ -5664,9 +5476,9 @@ class VisionBasicE2ETestSuite {
|
|
|
5664
5476
|
name: "Should describe scene when requested",
|
|
5665
5477
|
fn: async (runtime) => {
|
|
5666
5478
|
console.log("Testing scene description action...");
|
|
5667
|
-
const roomId =
|
|
5479
|
+
const roomId = createUniqueUuid4(runtime, "test-room");
|
|
5668
5480
|
const message = {
|
|
5669
|
-
id:
|
|
5481
|
+
id: createUniqueUuid4(runtime, "test-msg-describe"),
|
|
5670
5482
|
entityId: runtime.agentId,
|
|
5671
5483
|
content: { text: "what do you see?" },
|
|
5672
5484
|
agentId: runtime.agentId,
|
|
@@ -5730,9 +5542,9 @@ class VisionBasicE2ETestSuite {
|
|
|
5730
5542
|
name: "Should capture image when requested",
|
|
5731
5543
|
fn: async (runtime) => {
|
|
5732
5544
|
console.log("Testing image capture action...");
|
|
5733
|
-
const roomId =
|
|
5545
|
+
const roomId = createUniqueUuid4(runtime, "test-room");
|
|
5734
5546
|
const message = {
|
|
5735
|
-
id:
|
|
5547
|
+
id: createUniqueUuid4(runtime, "test-msg-capture"),
|
|
5736
5548
|
entityId: runtime.agentId,
|
|
5737
5549
|
content: { text: "take a photo" },
|
|
5738
5550
|
agentId: runtime.agentId,
|
|
@@ -5802,11 +5614,11 @@ class VisionBasicE2ETestSuite {
|
|
|
5802
5614
|
fn: async (runtime) => {
|
|
5803
5615
|
console.log("Testing vision provider...");
|
|
5804
5616
|
const message = {
|
|
5805
|
-
id:
|
|
5617
|
+
id: createUniqueUuid4(runtime, "test-msg-provider"),
|
|
5806
5618
|
entityId: runtime.agentId,
|
|
5807
5619
|
content: { text: "test provider" },
|
|
5808
5620
|
agentId: runtime.agentId,
|
|
5809
|
-
roomId:
|
|
5621
|
+
roomId: createUniqueUuid4(runtime, "test-room"),
|
|
5810
5622
|
createdAt: Date.now()
|
|
5811
5623
|
};
|
|
5812
5624
|
const state = await runtime.composeState(message);
|
|
@@ -6730,7 +6542,6 @@ var vision_worker_tests_default = new VisionWorkerE2ETestSuite;
|
|
|
6730
6542
|
var testSuites = [
|
|
6731
6543
|
vision_runtime_default,
|
|
6732
6544
|
vision_basic_default,
|
|
6733
|
-
vision_autonomy_default,
|
|
6734
6545
|
vision_capture_log_default,
|
|
6735
6546
|
screen_vision_default,
|
|
6736
6547
|
vision_worker_tests_default
|
|
@@ -6745,7 +6556,6 @@ var visionPlugin = {
|
|
|
6745
6556
|
actions: [
|
|
6746
6557
|
describeSceneAction,
|
|
6747
6558
|
captureImageAction,
|
|
6748
|
-
killAutonomousAction,
|
|
6749
6559
|
setVisionModeAction,
|
|
6750
6560
|
nameEntityAction,
|
|
6751
6561
|
identifyPersonAction,
|
|
@@ -6760,4 +6570,4 @@ export {
|
|
|
6760
6570
|
src_default as default
|
|
6761
6571
|
};
|
|
6762
6572
|
|
|
6763
|
-
//# debugId=
|
|
6573
|
+
//# debugId=08C3A6DFE8F9170A64756E2164756E21
|