@elizaos/plugin-vision 2.0.0-alpha.3 → 2.0.0-alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,4 +1,20 @@
1
1
  import { createRequire } from "node:module";
2
+ var __create = Object.create;
3
+ var __getProtoOf = Object.getPrototypeOf;
4
+ var __defProp = Object.defineProperty;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
7
+ var __toESM = (mod, isNodeMode, target) => {
8
+ target = mod != null ? __create(__getProtoOf(mod)) : {};
9
+ const to = isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target;
10
+ for (let key of __getOwnPropNames(mod))
11
+ if (!__hasOwnProp.call(to, key))
12
+ __defProp(to, key, {
13
+ get: () => mod[key],
14
+ enumerable: true
15
+ });
16
+ return to;
17
+ };
2
18
  var __require = /* @__PURE__ */ createRequire(import.meta.url);
3
19
 
4
20
  // src/action.ts
@@ -2264,8 +2280,19 @@ import { logger as logger8 } from "@elizaos/core";
2264
2280
 
2265
2281
  // src/florence2-local.ts
2266
2282
  import { logger as logger7 } from "@elizaos/core";
2267
- import * as tf from "@tensorflow/tfjs-node";
2268
2283
  import sharp from "sharp";
2284
+ var tf = null;
2285
+ async function ensureTf() {
2286
+ if (tf)
2287
+ return true;
2288
+ try {
2289
+ tf = await import("@tensorflow/tfjs-node");
2290
+ return true;
2291
+ } catch {
2292
+ logger7.warn("[Florence2Local] TensorFlow.js native addon not available — using heuristic analysis.");
2293
+ return false;
2294
+ }
2295
+ }
2269
2296
 
2270
2297
  class Florence2Local {
2271
2298
  model = null;
@@ -2281,6 +2308,11 @@ class Florence2Local {
2281
2308
  if (this.initialized) {
2282
2309
  return;
2283
2310
  }
2311
+ const tfAvailable = await ensureTf();
2312
+ if (!tfAvailable || !tf) {
2313
+ this.initialized = true;
2314
+ return;
2315
+ }
2284
2316
  try {
2285
2317
  logger7.info("[VisionModel] Initializing MobileNet model for image analysis...");
2286
2318
  this.model = await tf.loadGraphModel(this.config.modelUrl);
@@ -2295,22 +2327,22 @@ class Florence2Local {
2295
2327
  if (!this.initialized) {
2296
2328
  await this.initialize();
2297
2329
  }
2330
+ if (!tf || !this.model) {
2331
+ return await this.enhancedFallback(imageBuffer);
2332
+ }
2298
2333
  try {
2299
2334
  const preprocessed = await this.preprocessImage(imageBuffer);
2300
- if (this.model) {
2301
- const predictions = await this.runInference(preprocessed);
2302
- preprocessed.dispose();
2303
- return this.parseModelOutput(predictions);
2304
- } else {
2305
- preprocessed.dispose();
2306
- return await this.enhancedFallback(imageBuffer);
2307
- }
2335
+ const predictions = await this.runInference(preprocessed);
2336
+ preprocessed.dispose?.();
2337
+ return this.parseModelOutput(predictions);
2308
2338
  } catch (error) {
2309
2339
  logger7.error("[VisionModel] Analysis failed:", error);
2310
2340
  return await this.enhancedFallback(imageBuffer);
2311
2341
  }
2312
2342
  }
2313
2343
  async preprocessImage(imageBuffer) {
2344
+ if (!tf)
2345
+ throw new Error("TensorFlow.js not available");
2314
2346
  const resized = await sharp(imageBuffer).resize(224, 224).raw().toBuffer();
2315
2347
  const tensor = tf.node.decodeImage(resized, 3);
2316
2348
  const normalized = tf.div(tensor, 255);
@@ -3252,19 +3284,39 @@ class ScreenCaptureService {
3252
3284
 
3253
3285
  // src/vision-models.ts
3254
3286
  import { logger as logger12 } from "@elizaos/core";
3255
- import * as tf2 from "@tensorflow/tfjs-node";
3256
- import * as cocoSsd from "@tensorflow-models/coco-ssd";
3257
- import * as poseDetection from "@tensorflow-models/pose-detection";
3287
+ var tf2 = null;
3288
+ var cocoSsd = null;
3289
+ var poseDetection = null;
3290
+ async function loadTfModules() {
3291
+ if (tf2)
3292
+ return true;
3293
+ try {
3294
+ tf2 = await import("@tensorflow/tfjs-node");
3295
+ cocoSsd = await import("@tensorflow-models/coco-ssd");
3296
+ poseDetection = await import("@tensorflow-models/pose-detection");
3297
+ return true;
3298
+ } catch (err) {
3299
+ logger12.warn("[VisionModels] TensorFlow.js native addon not available — " + "falling back to description-based detection. Run `npm rebuild @tensorflow/tfjs-node --build-addon-from-source` to enable hardware-accelerated vision.");
3300
+ return false;
3301
+ }
3302
+ }
3258
3303
 
3259
3304
  class VisionModels {
3260
3305
  objectDetectionModel = null;
3261
3306
  poseDetector = null;
3262
3307
  initialized = false;
3308
+ tfAvailable = false;
3263
3309
  async initialize(config) {
3264
3310
  if (this.initialized) {
3265
3311
  return;
3266
3312
  }
3267
3313
  logger12.info("[VisionModels] Initializing vision models...");
3314
+ this.tfAvailable = await loadTfModules();
3315
+ if (!this.tfAvailable || !tf2 || !cocoSsd || !poseDetection) {
3316
+ this.initialized = true;
3317
+ logger12.info("[VisionModels] Initialized without TensorFlow (fallback mode)");
3318
+ return;
3319
+ }
3268
3320
  try {
3269
3321
  await tf2.ready();
3270
3322
  logger12.info("[VisionModels] TensorFlow.js backend ready");
@@ -3308,7 +3360,7 @@ class VisionModels {
3308
3360
  return this.poseDetector !== null;
3309
3361
  }
3310
3362
  async detectObjects(imageData, _width, _height, description) {
3311
- if (!this.objectDetectionModel) {
3363
+ if (!this.objectDetectionModel || !tf2) {
3312
3364
  logger12.warn("[VisionModels] Object detection model not loaded");
3313
3365
  return this.enhancedObjectDetection(description);
3314
3366
  }
@@ -3394,7 +3446,7 @@ class VisionModels {
3394
3446
  };
3395
3447
  }
3396
3448
  async detectPoses(imageData, width, height, description) {
3397
- if (!this.poseDetector) {
3449
+ if (!this.poseDetector || !tf2) {
3398
3450
  logger12.warn("[VisionModels] Pose detection model not loaded");
3399
3451
  return this.enhancedPoseDetection(description);
3400
3452
  }
@@ -3601,7 +3653,7 @@ import * as path4 from "node:path";
3601
3653
  import { TextDecoder } from "node:util";
3602
3654
  import { Worker } from "node:worker_threads";
3603
3655
  import { logger as logger13 } from "@elizaos/core";
3604
- var __dirname = "/Users/shawwalters/eliza-ok/plugins/plugin-vision/typescript/src";
3656
+ var __dirname = "/Users/shawwalters/eliza-workspace/plugins/plugin-vision/typescript/src";
3605
3657
 
3606
3658
  class VisionWorkerManager {
3607
3659
  config;
@@ -6041,11 +6093,11 @@ ${Object.entries(captureData.statistics.poseCounts).map(([pose, count]) => `- **
6041
6093
  `) || "- No people detected"}
6042
6094
 
6043
6095
  ## Sample Scene Descriptions
6044
- ${captureData.captures.filter((c) => c.scene?.description).slice(0, 5).map((c, _i) => `### Capture ${c.index} (${c.elapsedMs}ms)
6045
- "${c.scene.description}"
6046
- - Change: ${c.scene.changePercentage?.toFixed(1)}%
6047
- - Objects: ${c.scene.objectCount}
6048
- - People: ${c.scene.peopleCount}`).join(`
6096
+ ${captureData.captures.filter((c) => c.scene?.description).slice(0, 5).map((c, _i) => `### Capture ${c.index} (${c.elapsedMs ?? 0}ms)
6097
+ "${c.scene?.description ?? ""}"
6098
+ - Change: ${c.scene?.changePercentage?.toFixed(1) ?? "0"}%
6099
+ - Objects: ${c.scene?.objectCount ?? 0}
6100
+ - People: ${c.scene?.peopleCount ?? 0}`).join(`
6049
6101
 
6050
6102
  `)}
6051
6103
 
@@ -6078,7 +6130,6 @@ ${captureData.captures.filter((c) => c.scene?.description).slice(0, 5).map((c, _
6078
6130
  var vision_capture_log_default = new VisionCaptureLogTestSuite;
6079
6131
  // src/tests/e2e/vision-runtime.ts
6080
6132
  import { logger as logger15 } from "@elizaos/core";
6081
-
6082
6133
  class VisionRuntimeTestSuite {
6083
6134
  name = "vision-runtime-tests";
6084
6135
  description = "Real runtime tests for vision plugin functionality";
@@ -6087,7 +6138,7 @@ class VisionRuntimeTestSuite {
6087
6138
  name: "Vision service initialization",
6088
6139
  fn: async (runtime) => {
6089
6140
  logger15.info("[Test] Testing vision service initialization...");
6090
- const visionService = runtime.getService("VISION");
6141
+ const visionService = runtime.getService(VisionServiceType.VISION);
6091
6142
  if (!visionService) {
6092
6143
  throw new Error("Vision service not found in runtime");
6093
6144
  }
@@ -6096,7 +6147,7 @@ class VisionRuntimeTestSuite {
6096
6147
  }
6097
6148
  const isActive = visionService.isActive();
6098
6149
  logger15.info(`[Test] Vision service active: ${isActive}`);
6099
- if (!isActive && runtime.getSetting("VISION_MODE") !== "OFF") {
6150
+ if (!isActive && runtime.getSetting("VISION_MODE") !== "OFF" /* OFF */) {
6100
6151
  throw new Error("Vision service should be active but is not");
6101
6152
  }
6102
6153
  logger15.info("[Test] ✅ Vision service initialization test passed");
@@ -6106,7 +6157,7 @@ class VisionRuntimeTestSuite {
6106
6157
  name: "Scene description functionality",
6107
6158
  fn: async (runtime) => {
6108
6159
  logger15.info("[Test] Testing scene description...");
6109
- const visionService = runtime.getService("VISION");
6160
+ const visionService = runtime.getService(VisionServiceType.VISION);
6110
6161
  if (!visionService) {
6111
6162
  throw new Error("Vision service not found");
6112
6163
  }
@@ -6136,13 +6187,18 @@ class VisionRuntimeTestSuite {
6136
6187
  name: "Vision mode switching",
6137
6188
  fn: async (runtime) => {
6138
6189
  logger15.info("[Test] Testing vision mode switching...");
6139
- const visionService = runtime.getService("VISION");
6190
+ const visionService = runtime.getService(VisionServiceType.VISION);
6140
6191
  if (!visionService) {
6141
6192
  throw new Error("Vision service not found");
6142
6193
  }
6143
6194
  const originalMode = visionService.getVisionMode();
6144
6195
  logger15.info(`[Test] Original mode: ${originalMode}`);
6145
- const testModes = ["CAMERA", "SCREEN", "BOTH", "OFF"];
6196
+ const testModes = [
6197
+ "CAMERA" /* CAMERA */,
6198
+ "SCREEN" /* SCREEN */,
6199
+ "BOTH" /* BOTH */,
6200
+ "OFF" /* OFF */
6201
+ ];
6146
6202
  for (const mode of testModes) {
6147
6203
  logger15.info(`[Test] Switching to mode: ${mode}`);
6148
6204
  await visionService.setVisionMode(mode);
@@ -6185,7 +6241,7 @@ class VisionRuntimeTestSuite {
6185
6241
  }
6186
6242
  return [];
6187
6243
  };
6188
- await action.handler(runtime, message, {}, {}, callback);
6244
+ await action.handler(runtime, message, { values: {}, data: {}, text: "" }, {}, callback);
6189
6245
  if (!responseReceived) {
6190
6246
  throw new Error("DESCRIBE_SCENE action did not produce a response");
6191
6247
  }
@@ -6226,7 +6282,7 @@ class VisionRuntimeTestSuite {
6226
6282
  name: "Florence-2 model initialization",
6227
6283
  fn: async (runtime) => {
6228
6284
  logger15.info("[Test] Testing Florence-2 model...");
6229
- const visionService = runtime.getService("VISION");
6285
+ const visionService = runtime.getService(VisionServiceType.VISION);
6230
6286
  if (!visionService) {
6231
6287
  throw new Error("Vision service not found");
6232
6288
  }
@@ -6236,7 +6292,7 @@ class VisionRuntimeTestSuite {
6236
6292
  return;
6237
6293
  }
6238
6294
  const mode = visionService.getVisionMode();
6239
- if (mode === "SCREEN" || mode === "BOTH") {
6295
+ if (mode === "SCREEN" /* SCREEN */ || mode === "BOTH" /* BOTH */) {
6240
6296
  const screenCapture = await visionService.getScreenCapture();
6241
6297
  if (screenCapture) {
6242
6298
  logger15.info("[Test] Screen capture available");
@@ -6251,7 +6307,7 @@ class VisionRuntimeTestSuite {
6251
6307
  name: "OCR service functionality",
6252
6308
  fn: async (runtime) => {
6253
6309
  logger15.info("[Test] Testing OCR service...");
6254
- const visionService = runtime.getService("VISION");
6310
+ const visionService = runtime.getService(VisionServiceType.VISION);
6255
6311
  if (!visionService) {
6256
6312
  throw new Error("Vision service not found");
6257
6313
  }
@@ -6261,7 +6317,7 @@ class VisionRuntimeTestSuite {
6261
6317
  return;
6262
6318
  }
6263
6319
  const mode = visionService.getVisionMode();
6264
- if (mode === "SCREEN" || mode === "BOTH") {
6320
+ if (mode === "SCREEN" /* SCREEN */ || mode === "BOTH" /* BOTH */) {
6265
6321
  const enhancedScene = await visionService.getEnhancedSceneDescription();
6266
6322
  if (enhancedScene?.screenAnalysis) {
6267
6323
  const ocrText = enhancedScene.screenAnalysis.fullScreenOCR;
@@ -6278,7 +6334,7 @@ class VisionRuntimeTestSuite {
6278
6334
  name: "Entity tracking system",
6279
6335
  fn: async (runtime) => {
6280
6336
  logger15.info("[Test] Testing entity tracking...");
6281
- const visionService = runtime.getService("VISION");
6337
+ const visionService = runtime.getService(VisionServiceType.VISION);
6282
6338
  if (!visionService) {
6283
6339
  throw new Error("Vision service not found");
6284
6340
  }
@@ -6289,10 +6345,10 @@ class VisionRuntimeTestSuite {
6289
6345
  const entities = entityTracker.getActiveEntities();
6290
6346
  logger15.info(`[Test] Active entities: ${entities.length}`);
6291
6347
  for (const entity of entities) {
6292
- if (!entity.id || !entity.type || !entity.lastSeen) {
6348
+ if (!entity.id || !entity.entityType || !entity.lastSeen) {
6293
6349
  throw new Error("Entity missing required fields");
6294
6350
  }
6295
- logger15.info(`[Test] Entity ${entity.id}: type=${entity.type}, tracked=${entity.trackingDuration}ms`);
6351
+ logger15.info(`[Test] Entity ${entity.id}: type=${entity.entityType}, tracked=${entity.lastSeen - entity.firstSeen}ms`);
6296
6352
  }
6297
6353
  logger15.info("[Test] ✅ Entity tracking test passed");
6298
6354
  }
@@ -6703,4 +6759,4 @@ export {
6703
6759
  src_default as default
6704
6760
  };
6705
6761
 
6706
- //# debugId=F5001EC904380B0D64756E2164756E21
6762
+ //# debugId=4F137AC378EA73BB64756E2164756E21