@elizaos/plugin-vision 2.0.0-alpha.4 → 2.0.0-alpha.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +68 -16
- package/dist/index.js.map +4 -4
- package/package.json +8 -6
- package/dist/workers/florence2-worker.js +0 -121102
- package/dist/workers/florence2-worker.js.map +0 -92
- package/dist/workers/ocr-worker.js +0 -128510
- package/dist/workers/ocr-worker.js.map +0 -137
- package/dist/workers/screen-capture-worker.js +0 -359
- package/dist/workers/screen-capture-worker.js.map +0 -11
package/dist/index.js
CHANGED
|
@@ -1,4 +1,20 @@
|
|
|
1
1
|
import { createRequire } from "node:module";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
4
|
+
var __defProp = Object.defineProperty;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
+
var __toESM = (mod, isNodeMode, target) => {
|
|
8
|
+
target = mod != null ? __create(__getProtoOf(mod)) : {};
|
|
9
|
+
const to = isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target;
|
|
10
|
+
for (let key of __getOwnPropNames(mod))
|
|
11
|
+
if (!__hasOwnProp.call(to, key))
|
|
12
|
+
__defProp(to, key, {
|
|
13
|
+
get: () => mod[key],
|
|
14
|
+
enumerable: true
|
|
15
|
+
});
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
2
18
|
var __require = /* @__PURE__ */ createRequire(import.meta.url);
|
|
3
19
|
|
|
4
20
|
// src/action.ts
|
|
@@ -2264,8 +2280,19 @@ import { logger as logger8 } from "@elizaos/core";
|
|
|
2264
2280
|
|
|
2265
2281
|
// src/florence2-local.ts
|
|
2266
2282
|
import { logger as logger7 } from "@elizaos/core";
|
|
2267
|
-
import * as tf from "@tensorflow/tfjs-node";
|
|
2268
2283
|
import sharp from "sharp";
|
|
2284
|
+
var tf = null;
|
|
2285
|
+
async function ensureTf() {
|
|
2286
|
+
if (tf)
|
|
2287
|
+
return true;
|
|
2288
|
+
try {
|
|
2289
|
+
tf = await import("@tensorflow/tfjs-node");
|
|
2290
|
+
return true;
|
|
2291
|
+
} catch {
|
|
2292
|
+
logger7.warn("[Florence2Local] TensorFlow.js native addon not available — using heuristic analysis.");
|
|
2293
|
+
return false;
|
|
2294
|
+
}
|
|
2295
|
+
}
|
|
2269
2296
|
|
|
2270
2297
|
class Florence2Local {
|
|
2271
2298
|
model = null;
|
|
@@ -2281,6 +2308,11 @@ class Florence2Local {
|
|
|
2281
2308
|
if (this.initialized) {
|
|
2282
2309
|
return;
|
|
2283
2310
|
}
|
|
2311
|
+
const tfAvailable = await ensureTf();
|
|
2312
|
+
if (!tfAvailable || !tf) {
|
|
2313
|
+
this.initialized = true;
|
|
2314
|
+
return;
|
|
2315
|
+
}
|
|
2284
2316
|
try {
|
|
2285
2317
|
logger7.info("[VisionModel] Initializing MobileNet model for image analysis...");
|
|
2286
2318
|
this.model = await tf.loadGraphModel(this.config.modelUrl);
|
|
@@ -2295,22 +2327,22 @@ class Florence2Local {
|
|
|
2295
2327
|
if (!this.initialized) {
|
|
2296
2328
|
await this.initialize();
|
|
2297
2329
|
}
|
|
2330
|
+
if (!tf || !this.model) {
|
|
2331
|
+
return await this.enhancedFallback(imageBuffer);
|
|
2332
|
+
}
|
|
2298
2333
|
try {
|
|
2299
2334
|
const preprocessed = await this.preprocessImage(imageBuffer);
|
|
2300
|
-
|
|
2301
|
-
|
|
2302
|
-
|
|
2303
|
-
return this.parseModelOutput(predictions);
|
|
2304
|
-
} else {
|
|
2305
|
-
preprocessed.dispose();
|
|
2306
|
-
return await this.enhancedFallback(imageBuffer);
|
|
2307
|
-
}
|
|
2335
|
+
const predictions = await this.runInference(preprocessed);
|
|
2336
|
+
preprocessed.dispose?.();
|
|
2337
|
+
return this.parseModelOutput(predictions);
|
|
2308
2338
|
} catch (error) {
|
|
2309
2339
|
logger7.error("[VisionModel] Analysis failed:", error);
|
|
2310
2340
|
return await this.enhancedFallback(imageBuffer);
|
|
2311
2341
|
}
|
|
2312
2342
|
}
|
|
2313
2343
|
async preprocessImage(imageBuffer) {
|
|
2344
|
+
if (!tf)
|
|
2345
|
+
throw new Error("TensorFlow.js not available");
|
|
2314
2346
|
const resized = await sharp(imageBuffer).resize(224, 224).raw().toBuffer();
|
|
2315
2347
|
const tensor = tf.node.decodeImage(resized, 3);
|
|
2316
2348
|
const normalized = tf.div(tensor, 255);
|
|
@@ -3252,19 +3284,39 @@ class ScreenCaptureService {
|
|
|
3252
3284
|
|
|
3253
3285
|
// src/vision-models.ts
|
|
3254
3286
|
import { logger as logger12 } from "@elizaos/core";
|
|
3255
|
-
|
|
3256
|
-
|
|
3257
|
-
|
|
3287
|
+
var tf2 = null;
|
|
3288
|
+
var cocoSsd = null;
|
|
3289
|
+
var poseDetection = null;
|
|
3290
|
+
async function loadTfModules() {
|
|
3291
|
+
if (tf2)
|
|
3292
|
+
return true;
|
|
3293
|
+
try {
|
|
3294
|
+
tf2 = await import("@tensorflow/tfjs-node");
|
|
3295
|
+
cocoSsd = await import("@tensorflow-models/coco-ssd");
|
|
3296
|
+
poseDetection = await import("@tensorflow-models/pose-detection");
|
|
3297
|
+
return true;
|
|
3298
|
+
} catch (err) {
|
|
3299
|
+
logger12.warn("[VisionModels] TensorFlow.js native addon not available — " + "falling back to description-based detection. Run `npm rebuild @tensorflow/tfjs-node --build-addon-from-source` to enable hardware-accelerated vision.");
|
|
3300
|
+
return false;
|
|
3301
|
+
}
|
|
3302
|
+
}
|
|
3258
3303
|
|
|
3259
3304
|
class VisionModels {
|
|
3260
3305
|
objectDetectionModel = null;
|
|
3261
3306
|
poseDetector = null;
|
|
3262
3307
|
initialized = false;
|
|
3308
|
+
tfAvailable = false;
|
|
3263
3309
|
async initialize(config) {
|
|
3264
3310
|
if (this.initialized) {
|
|
3265
3311
|
return;
|
|
3266
3312
|
}
|
|
3267
3313
|
logger12.info("[VisionModels] Initializing vision models...");
|
|
3314
|
+
this.tfAvailable = await loadTfModules();
|
|
3315
|
+
if (!this.tfAvailable || !tf2 || !cocoSsd || !poseDetection) {
|
|
3316
|
+
this.initialized = true;
|
|
3317
|
+
logger12.info("[VisionModels] Initialized without TensorFlow (fallback mode)");
|
|
3318
|
+
return;
|
|
3319
|
+
}
|
|
3268
3320
|
try {
|
|
3269
3321
|
await tf2.ready();
|
|
3270
3322
|
logger12.info("[VisionModels] TensorFlow.js backend ready");
|
|
@@ -3308,7 +3360,7 @@ class VisionModels {
|
|
|
3308
3360
|
return this.poseDetector !== null;
|
|
3309
3361
|
}
|
|
3310
3362
|
async detectObjects(imageData, _width, _height, description) {
|
|
3311
|
-
if (!this.objectDetectionModel) {
|
|
3363
|
+
if (!this.objectDetectionModel || !tf2) {
|
|
3312
3364
|
logger12.warn("[VisionModels] Object detection model not loaded");
|
|
3313
3365
|
return this.enhancedObjectDetection(description);
|
|
3314
3366
|
}
|
|
@@ -3394,7 +3446,7 @@ class VisionModels {
|
|
|
3394
3446
|
};
|
|
3395
3447
|
}
|
|
3396
3448
|
async detectPoses(imageData, width, height, description) {
|
|
3397
|
-
if (!this.poseDetector) {
|
|
3449
|
+
if (!this.poseDetector || !tf2) {
|
|
3398
3450
|
logger12.warn("[VisionModels] Pose detection model not loaded");
|
|
3399
3451
|
return this.enhancedPoseDetection(description);
|
|
3400
3452
|
}
|
|
@@ -3601,7 +3653,7 @@ import * as path4 from "node:path";
|
|
|
3601
3653
|
import { TextDecoder } from "node:util";
|
|
3602
3654
|
import { Worker } from "node:worker_threads";
|
|
3603
3655
|
import { logger as logger13 } from "@elizaos/core";
|
|
3604
|
-
var __dirname = "/Users/shawwalters/eliza-
|
|
3656
|
+
var __dirname = "/Users/shawwalters/eliza-workspace/plugins/plugin-vision/typescript/src";
|
|
3605
3657
|
|
|
3606
3658
|
class VisionWorkerManager {
|
|
3607
3659
|
config;
|
|
@@ -6707,4 +6759,4 @@ export {
|
|
|
6707
6759
|
src_default as default
|
|
6708
6760
|
};
|
|
6709
6761
|
|
|
6710
|
-
//# debugId=
|
|
6762
|
+
//# debugId=4F137AC378EA73BB64756E2164756E21
|