@elizaos/plugin-vision 2.0.0-beta.1 → 2.0.3-beta.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +73 -301
- package/dist/action.d.ts +3 -0
- package/dist/action.d.ts.map +1 -0
- package/dist/audio-capture-stream.d.ts +42 -0
- package/dist/audio-capture-stream.d.ts.map +1 -0
- package/dist/audio-capture.d.ts +25 -0
- package/dist/audio-capture.d.ts.map +1 -0
- package/dist/computeruse-ocr-bridge.d.ts +50 -0
- package/dist/computeruse-ocr-bridge.d.ts.map +1 -0
- package/dist/config.d.ts +68 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/describe-backpressure.d.ts +90 -0
- package/dist/describe-backpressure.d.ts.map +1 -0
- package/dist/dirty-tile-describer.d.ts +102 -0
- package/dist/dirty-tile-describer.d.ts.map +1 -0
- package/dist/dirty-tile-scene.d.ts +56 -0
- package/dist/dirty-tile-scene.d.ts.map +1 -0
- package/dist/entity-tracker.d.ts +33 -0
- package/dist/entity-tracker.d.ts.map +1 -0
- package/dist/face-detector-ggml.d.ts +60 -0
- package/dist/face-detector-ggml.d.ts.map +1 -0
- package/dist/face-detector-mediapipe.d.ts +25 -0
- package/dist/face-detector-mediapipe.d.ts.map +1 -0
- package/dist/face-recognition-ggml.d.ts +94 -0
- package/dist/face-recognition-ggml.d.ts.map +1 -0
- package/dist/get-screen-elements.d.ts +90 -0
- package/dist/get-screen-elements.d.ts.map +1 -0
- package/dist/get-screen.d.ts +60 -0
- package/dist/get-screen.d.ts.map +1 -0
- package/dist/image/sharp-compat.d.ts +89 -0
- package/dist/image/sharp-compat.d.ts.map +1 -0
- package/dist/image-input.d.ts +15 -0
- package/dist/image-input.d.ts.map +1 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +7957 -6238
- package/dist/index.js.map +41 -26
- package/dist/lifecycle.d.ts +94 -0
- package/dist/lifecycle.d.ts.map +1 -0
- package/dist/mobile/capacitor-camera.d.ts +85 -0
- package/dist/mobile/capacitor-camera.d.ts.map +1 -0
- package/dist/native/doctr-ffi.d.ts +40 -0
- package/dist/native/doctr-ffi.d.ts.map +1 -0
- package/dist/native/yolo-ffi.d.ts +21 -0
- package/dist/native/yolo-ffi.d.ts.map +1 -0
- package/dist/ocr-host-windows.d.ts +34 -0
- package/dist/ocr-host-windows.d.ts.map +1 -0
- package/dist/ocr-service-apple-vision-macos.d.ts +51 -0
- package/dist/ocr-service-apple-vision-macos.d.ts.map +1 -0
- package/dist/ocr-service-doctr.d.ts +61 -0
- package/dist/ocr-service-doctr.d.ts.map +1 -0
- package/dist/ocr-service-linux-tesseract.d.ts +85 -0
- package/dist/ocr-service-linux-tesseract.d.ts.map +1 -0
- package/dist/ocr-service-paddleocr.d.ts +59 -0
- package/dist/ocr-service-paddleocr.d.ts.map +1 -0
- package/dist/ocr-service-windows.d.ts +41 -0
- package/dist/ocr-service-windows.d.ts.map +1 -0
- package/dist/ocr-service.d.ts +91 -0
- package/dist/ocr-service.d.ts.map +1 -0
- package/dist/ocr-with-coords.d.ts +103 -0
- package/dist/ocr-with-coords.d.ts.map +1 -0
- package/dist/person-detector.d.ts +17 -0
- package/dist/person-detector.d.ts.map +1 -0
- package/dist/provider.d.ts +3 -0
- package/dist/provider.d.ts.map +1 -0
- package/dist/routes.d.ts +7 -0
- package/dist/routes.d.ts.map +1 -0
- package/dist/screen-capture-bridge.d.ts +51 -0
- package/dist/screen-capture-bridge.d.ts.map +1 -0
- package/dist/screen-capture.d.ts +17 -0
- package/dist/screen-capture.d.ts.map +1 -0
- package/dist/screen-tiler.d.ts +75 -0
- package/dist/screen-tiler.d.ts.map +1 -0
- package/dist/service.d.ts +176 -0
- package/dist/service.d.ts.map +1 -0
- package/dist/set-of-marks-provider.d.ts +64 -0
- package/dist/set-of-marks-provider.d.ts.map +1 -0
- package/dist/som.d.ts +135 -0
- package/dist/som.d.ts.map +1 -0
- package/dist/som.js +184 -0
- package/dist/som.js.map +11 -0
- package/dist/test-input.d.ts +25 -0
- package/dist/test-input.d.ts.map +1 -0
- package/dist/types.d.ts +241 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/vision-context-augmenter.d.ts +93 -0
- package/dist/vision-context-augmenter.d.ts.map +1 -0
- package/dist/vision-worker-manager.d.ts +51 -0
- package/dist/vision-worker-manager.d.ts.map +1 -0
- package/dist/workers/ocr-worker.d.ts +2 -0
- package/dist/workers/ocr-worker.d.ts.map +1 -0
- package/dist/workers/ocr-worker.js +1075 -7821
- package/dist/workers/ocr-worker.js.map +10 -51
- package/dist/workers/screen-capture-worker.d.ts +2 -0
- package/dist/workers/screen-capture-worker.d.ts.map +1 -0
- package/dist/workers/screen-capture-worker.js +364 -6
- package/dist/workers/screen-capture-worker.js.map +5 -4
- package/dist/workers/worker-logger.d.ts +10 -0
- package/dist/workers/worker-logger.d.ts.map +1 -0
- package/dist/yolo-detector.d.ts +37 -0
- package/dist/yolo-detector.d.ts.map +1 -0
- package/native/doctr.cpp/CMakeLists.txt +58 -0
- package/native/doctr.cpp/README.md +62 -0
- package/native/doctr.cpp/include/doctr.h +91 -0
- package/native/doctr.cpp/scripts/convert.py +98 -0
- package/native/doctr.cpp/src/doctr_det.cpp +112 -0
- package/native/doctr.cpp/src/doctr_rec.cpp +103 -0
- package/native/macos-vision-ocr.swift +113 -0
- package/native/mobilefacenet.cpp/README.md +13 -0
- package/native/movenet.cpp/README.md +10 -0
- package/native/retinaface.cpp/README.md +12 -0
- package/native/yolo.cpp/CMakeLists.txt +57 -0
- package/native/yolo.cpp/README.md +64 -0
- package/native/yolo.cpp/build.mjs +76 -0
- package/native/yolo.cpp/include/yolo.h +62 -0
- package/native/yolo.cpp/scripts/convert.py +248 -0
- package/native/yolo.cpp/src/yolo.cpp +425 -0
- package/native/yolo.cpp/verify/compare.py +99 -0
- package/native/yolo.cpp/verify/make_ref.py +75 -0
- package/native/yolo.cpp/verify/run_ggml.mjs +78 -0
- package/native/yolo.cpp/verify/run_ts.mjs +26 -0
- package/package.json +39 -21
- package/registry-entry.json +43 -0
- package/scripts/vendor-tesseract-linux.mjs +177 -0
- package/build.config.ts +0 -89
- package/dist/workers/florence2-worker.js +0 -779
- package/dist/workers/florence2-worker.js.map +0 -13
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,yBAAyB,CAAC;AAEnE,eAAO,MAAM,iBAAiB;;CAE7B,CAAC;AAEF,OAAO,QAAQ,eAAe,CAAC;IAC7B,UAAU,mBAAmB;QAC3B,MAAM,EAAE,QAAQ,CAAC;KAClB;CACF;AAED,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,OAAO,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB;IAC/B,2EAA2E;IAC3E,SAAS,EAAE,MAAM,CAAC;IAClB,iFAAiF;IACjF,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,cAAc,EAAE,CAAC;IAC1B,MAAM,EAAE,UAAU,EAAE,CAAC;IACrB,YAAY,EAAE,OAAO,CAAC;IACtB,gBAAgB,EAAE,MAAM,CAAC;IACzB,gEAAgE;IAChE,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,2EAA2E;IAC3E,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,mBAAmB,CAAC,EAAE,OAAO,CAAC,mBAAmB,EAAE,IAAI,CAAC,CAAC;IACzD,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAED,MAAM,WAAW,cAAc;IAC7B,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,WAAW,CAAC;CAC1B;AAED,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,SAAS,GAAG,UAAU,GAAG,OAAO,GAAG,SAAS,CAAC;IACnD,MAAM,EAAE,QAAQ,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,SAAS,CAAC;IACzD,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,WAAW,CAAC;IACzB,SAAS,CAAC,EAAE,KAAK,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,QAAQ,EAAE;YAAE,CAAC,EAAE,MAAM,CAAC;YAAC,CAAC,EAAE,MAAM,CAAA;SAAE,CAAC;QACnC,KAAK,EAAE,MAAM,CAAC;KACf,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,WAAW;IAC1B,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,WAAW;IAC1B,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,KAAK,GAAG,MAAM,GAAG,MAAM,GAAG,KAAK,CAAC;CACzC;AAED,oBAAY,UAAU;IACpB,GAAG,QAAQ;IACX,MAAM,WAAW;IACjB,MAAM,WAAW;IACjB,IAAI,SAAS;CACd;AAGD,MAAM,WAAW,aAAa;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,UAAU,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,sEAAsE;IACtE,CAAC,EAAE,MAAM,CAAC;IACV,sEAAsE;IACtE,CAAC,EAAE,MAAM,CAAC;IACV,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,YAAY,CAAC;IACxB;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;;OAKG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,gFAAgF;IAChF,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,YAAY;IAC3B,SAAS,EAAE,MAAM,CAAC;IAClB,GAAG,CAAC,EAAE,SAAS,CAAC;IAChB,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,KAAK,CAAC;QACZ,IAAI,EAAE,MAAM,CAAC;QACb,IAAI,EAAE,WAAW,CAAC;QAClB,UAAU,EAAE,MAAM,CAAC;QACnB,KAAK,CAAC,EAAE,KAAK,CAAC;YACZ,IAAI,EAAE,MAAM,CAAC;YACb,IAAI,EAAE,WAAW,CAAC;YAClB,UAAU,EAAE,MAAM,CAAC;SACpB,CAAC,CAAC;KACJ,CAAC,CAAC;IACH,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,wBAAyB,SAAQ,gBAAgB;IAChE,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,cAAc,CAAC,EAAE;QACf,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,UAAU,CAAC,EAAE,YAAY,CAAC;QAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,UAAU,CAAC,EAAE,KAAK,CAAC;YACjB,IAAI,EAAE,MAAM,CAAC;YACb,IAAI,EAAE,MAAM,CAAC;YACb,QAAQ,EAAE,WAAW,CAAC;SACvB,CAAC,CAAC;KACJ,CAAC;CACH;AAED,MAAM,WAAW,YAAY;IAC3B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,qBAAqB,CAAC,EAAE,OAAO,CAAC;IAChC,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,mBAAmB,CAAC,EAAE,YAAY,GAAG,UAAU,GAAG,QAAQ,CAAC;IAC3D,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,YAAY,CAAC,EAAE;QACb,CAAC,EAAE,MAAM,CAAC;QACV,CAAC,EAAE,MAAM,CAAC;QACV,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,EAAE,MAAM,CAAC;KAChB,CAAC;IACF,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,KAAK,CAAC;QAClB,CAAC,EAAE,MAAM,CAAC;QACV,CAAC,EAAE,MAAM,CAAC;QACV,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,EAAE,MAAM,CAAC;KAChB,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,aAAa;IAC5B,EAAE,EAAE,MAAM,CAAC;IACX,UAAU,EAAE,QAAQ,GAAG,QAAQ,GAAG,KAAK,CAAC;IACxC,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,WAAW,CAAC;IAC1B,WAAW,EAAE,gBAAgB,EAAE,CAAC;IAChC,UAAU,EAAE,gBAAgB,CAAC;IAC7B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,UAAU,gBAAgB;IACxB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,WAAW,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,SAAS,CAAC,EAAE,KAAK,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,QAAQ,EAAE;YAAE,CAAC,EAAE,MAAM,CAAC;YAAC,CAAC,EAAE,MAAM,CAAA;SAAE,CAAC;QACnC,KAAK,EAAE,MAAM,CAAC;KACf,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,gBAAgB;IAC/B,CAAC,GAAG,EAAE,MAAM,GACR,MAAM,GACN,MAAM,GACN,OAAO,GACP,IAAI,GACJ,SAAS,GACT,MAAM,EAAE,GACR,MAAM,EAAE,CAAC;IACb,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IAGvB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,OAAO,GAAG,QAAQ,GAAG,OAAO,CAAC;IACpC,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;CACjB;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;IAChC,UAAU,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,CAAC,CAAC;CACrC;AAED,MAAM,WAAW,WAAW;IAC1B,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,EAAE,EAAE,CAAC;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE;QACX,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC;CACH;AAED,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;IACrC,UAAU,EAAE,MAAM,CAAC;IACnB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,YAAY,EAAE,KAAK,CAAC;QAClB,QAAQ,EAAE,MAAM,CAAC;QACjB,MAAM,EAAE,MAAM,CAAC;QACf,YAAY,EAAE,WAAW,CAAC;KAC3B,CAAC,CAAC;CACJ"}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vision-context augmenter (issue #9105).
|
|
3
|
+
*
|
|
4
|
+
* Runs the token-free pre-vision detectors plugin-vision owns — OCR
|
|
5
|
+
* (tesseract / native OS engines), object detection (YOLO), face detection —
|
|
6
|
+
* over an image and folds their results into the describe prompt as structured
|
|
7
|
+
* text. The on-device Gemma-4 VL model then grounds its description in real
|
|
8
|
+
* extracted signals (small text it would otherwise misread, object identity,
|
|
9
|
+
* presence of people) instead of guessing.
|
|
10
|
+
*
|
|
11
|
+
* This is the *provider* half of the seam. plugin-local-inference owns the
|
|
12
|
+
* registry (`registerVisionContextAugmenter`); this class is registered into it
|
|
13
|
+
* at boot via a best-effort dynamic import (see `index.ts`), mirroring the
|
|
14
|
+
* coord-OCR bridge into plugin-computeruse. Every detector is optional and
|
|
15
|
+
* best-effort: an unavailable or failing detector contributes nothing rather
|
|
16
|
+
* than failing the describe.
|
|
17
|
+
*/
|
|
18
|
+
import { Buffer } from "node:buffer";
|
|
19
|
+
import { type OcrWithCoordsService } from "./ocr-with-coords.js";
|
|
20
|
+
/** Raw signals extracted from an image, joined into prompt-ready strings. */
|
|
21
|
+
export interface FusedVisionSignals {
|
|
22
|
+
ocrText?: string;
|
|
23
|
+
objects?: string;
|
|
24
|
+
faces?: string;
|
|
25
|
+
}
|
|
26
|
+
/** Image wrapper accepted by the augmenter — matches the handler's request shape. */
|
|
27
|
+
export type AugmenterImageInput = {
|
|
28
|
+
kind: "bytes";
|
|
29
|
+
bytes: Uint8Array;
|
|
30
|
+
mimeType?: string;
|
|
31
|
+
} | {
|
|
32
|
+
kind: "base64";
|
|
33
|
+
base64: string;
|
|
34
|
+
mimeType?: string;
|
|
35
|
+
} | {
|
|
36
|
+
kind: "dataUrl";
|
|
37
|
+
dataUrl: string;
|
|
38
|
+
} | {
|
|
39
|
+
kind: "url";
|
|
40
|
+
url: string;
|
|
41
|
+
mimeType?: string;
|
|
42
|
+
};
|
|
43
|
+
export interface VisionAugmentInput {
|
|
44
|
+
image: AugmenterImageInput;
|
|
45
|
+
basePrompt?: string;
|
|
46
|
+
}
|
|
47
|
+
export interface VisionAugmentOutput {
|
|
48
|
+
prompt: string;
|
|
49
|
+
fused: FusedVisionSignals;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Injectable detector hooks. OCR defaults to the registered coord-OCR service;
|
|
53
|
+
* object/face detection are wired only when their native artifacts are present.
|
|
54
|
+
* Exposed so tests can drive the fusion with deterministic fakes (no native
|
|
55
|
+
* libs, no real tesseract).
|
|
56
|
+
*/
|
|
57
|
+
export interface VisionAugmenterDetectors {
|
|
58
|
+
getOcr?: () => OcrWithCoordsService | null;
|
|
59
|
+
detectObjects?: (imageBytes: Buffer) => Promise<ReadonlyArray<{
|
|
60
|
+
type: string;
|
|
61
|
+
confidence: number;
|
|
62
|
+
}>>;
|
|
63
|
+
detectFaces?: (imageBytes: Buffer) => Promise<number>;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Keep only OCR fragments that carry real signal. Tesseract run over a natural
|
|
67
|
+
* photo (vs a clean document) emits per-glyph noise — `|`, `=`, `—`, stray
|
|
68
|
+
* single letters — that would pollute the prompt and waste tokens. Require at
|
|
69
|
+
* least two alphanumeric characters so genuine words/labels survive and noise
|
|
70
|
+
* is dropped. Pure — exported for tests.
|
|
71
|
+
*/
|
|
72
|
+
export declare function isMeaningfulOcrText(text: string): boolean;
|
|
73
|
+
export declare class FusedVisionContextAugmenter {
|
|
74
|
+
private readonly deps;
|
|
75
|
+
readonly name = "vision-fused-context";
|
|
76
|
+
constructor(deps?: VisionAugmenterDetectors);
|
|
77
|
+
augmentImagePrompt(input: VisionAugmentInput): Promise<VisionAugmentOutput | null>;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Compose the final prompt: the caller's prompt (or a default) followed by a
|
|
81
|
+
* clearly-delimited block of detected signals. Pure — exported for tests so the
|
|
82
|
+
* prompt contract has a single source of truth.
|
|
83
|
+
*/
|
|
84
|
+
export declare function buildAugmentedPrompt(basePrompt: string | undefined, fused: FusedVisionSignals): string;
|
|
85
|
+
/**
|
|
86
|
+
* The production augmenter: OCR via the registered coord-OCR service, plus YOLO
|
|
87
|
+
* object detection and BlazeFace face detection wired lazily and gated on their
|
|
88
|
+
* `isAvailable()` probe. Until those native artifacts ship, the gate is false
|
|
89
|
+
* and they contribute nothing — OCR alone augments the prompt. When the
|
|
90
|
+
* artifacts land the same fusion point activates them with no further wiring.
|
|
91
|
+
*/
|
|
92
|
+
export declare function createDefaultVisionAugmenter(): FusedVisionContextAugmenter;
|
|
93
|
+
//# sourceMappingURL=vision-context-augmenter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vision-context-augmenter.d.ts","sourceRoot":"","sources":["../src/vision-context-augmenter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,OAAO,EAEL,KAAK,oBAAoB,EAC1B,MAAM,sBAAsB,CAAC;AAE9B,6EAA6E;AAC7E,MAAM,WAAW,kBAAkB;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,qFAAqF;AACrF,MAAM,MAAM,mBAAmB,GAC3B;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,KAAK,EAAE,UAAU,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,GACvD;IAAE,IAAI,EAAE,QAAQ,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,GACrD;IAAE,IAAI,EAAE,SAAS,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,GACpC;IAAE,IAAI,EAAE,KAAK,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AAEpD,MAAM,WAAW,kBAAkB;IACjC,KAAK,EAAE,mBAAmB,CAAC;IAC3B,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,mBAAmB;IAClC,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,kBAAkB,CAAC;CAC3B;AAED;;;;;GAKG;AACH,MAAM,WAAW,wBAAwB;IACvC,MAAM,CAAC,EAAE,MAAM,oBAAoB,GAAG,IAAI,CAAC;IAC3C,aAAa,CAAC,EAAE,CACd,UAAU,EAAE,MAAM,KACf,OAAO,CAAC,aAAa,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC,CAAC;IAClE,WAAW,CAAC,EAAE,CAAC,UAAU,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;CACvD;AAMD;;;;;;GAMG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAEzD;AASD,qBAAa,2BAA2B;IAG1B,OAAO,CAAC,QAAQ,CAAC,IAAI;IAFjC,QAAQ,CAAC,IAAI,0BAA0B;gBAEV,IAAI,GAAE,wBAA6B;IAE1D,kBAAkB,CACtB,KAAK,EAAE,kBAAkB,GACxB,OAAO,CAAC,mBAAmB,GAAG,IAAI,CAAC;CA6DvC;AAED;;;;GAIG;AACH,wBAAgB,oBAAoB,CAClC,UAAU,EAAE,MAAM,GAAG,SAAS,EAC9B,KAAK,EAAE,kBAAkB,GACxB,MAAM,CAaR;AAiCD;;;;;;GAMG;AACH,wBAAgB,4BAA4B,IAAI,2BAA2B,CAK1E"}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import type { EnhancedSceneDescription, ScreenCapture, VisionConfig } from "./types";
|
|
2
|
+
interface WorkerStats {
|
|
3
|
+
fps: number;
|
|
4
|
+
frameCount: number;
|
|
5
|
+
lastUpdate: number;
|
|
6
|
+
}
|
|
7
|
+
export declare class VisionWorkerManager {
|
|
8
|
+
private config;
|
|
9
|
+
private screenCaptureWorker;
|
|
10
|
+
private ocrWorker;
|
|
11
|
+
private screenBuffer;
|
|
12
|
+
private ocrResultsBuffer;
|
|
13
|
+
private screenAtomicState;
|
|
14
|
+
private ocrResultsView;
|
|
15
|
+
private readonly SCREEN_BUFFER_SIZE;
|
|
16
|
+
private readonly OCR_RESULTS_SIZE;
|
|
17
|
+
private readonly FRAME_ID_INDEX;
|
|
18
|
+
private readonly WIDTH_INDEX;
|
|
19
|
+
private readonly HEIGHT_INDEX;
|
|
20
|
+
private readonly DISPLAY_INDEX;
|
|
21
|
+
private readonly TIMESTAMP_INDEX;
|
|
22
|
+
private workerStats;
|
|
23
|
+
private latestScreenCapture;
|
|
24
|
+
private latestOCRResult;
|
|
25
|
+
private lastProcessedFrameId;
|
|
26
|
+
private restartAttempts;
|
|
27
|
+
private readonly MAX_RESTART_ATTEMPTS;
|
|
28
|
+
constructor(config: VisionConfig);
|
|
29
|
+
initialize(): Promise<void>;
|
|
30
|
+
private startScreenCaptureWorker;
|
|
31
|
+
private startOCRWorker;
|
|
32
|
+
private updateOCRCache;
|
|
33
|
+
private readOCRResult;
|
|
34
|
+
getLatestScreenCapture(): ScreenCapture | null;
|
|
35
|
+
getLatestEnhancedScene(): EnhancedSceneDescription;
|
|
36
|
+
private generateTiles;
|
|
37
|
+
getWorkerStats(): Map<string, WorkerStats>;
|
|
38
|
+
setDisplayIndex(index: number): Promise<void>;
|
|
39
|
+
setTextRegions(regions: Array<{
|
|
40
|
+
x: number;
|
|
41
|
+
y: number;
|
|
42
|
+
width: number;
|
|
43
|
+
height: number;
|
|
44
|
+
}>): Promise<void>;
|
|
45
|
+
stop(): Promise<void>;
|
|
46
|
+
private handleWorkerLog;
|
|
47
|
+
private restartScreenCaptureWorker;
|
|
48
|
+
private restartOCRWorker;
|
|
49
|
+
}
|
|
50
|
+
export {};
|
|
51
|
+
//# sourceMappingURL=vision-worker-manager.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vision-worker-manager.d.ts","sourceRoot":"","sources":["../src/vision-worker-manager.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EACV,wBAAwB,EAExB,aAAa,EACb,YAAY,EACb,MAAM,SAAS,CAAC;AAEjB,UAAU,WAAW;IACnB,GAAG,EAAE,MAAM,CAAC;IACZ,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,qBAAa,mBAAmB;IAC9B,OAAO,CAAC,MAAM,CAAe;IAE7B,OAAO,CAAC,mBAAmB,CAAuB;IAClD,OAAO,CAAC,SAAS,CAAuB;IAExC,OAAO,CAAC,YAAY,CAAoB;IACxC,OAAO,CAAC,gBAAgB,CAAoB;IAE5C,OAAO,CAAC,iBAAiB,CAAa;IACtC,OAAO,CAAC,cAAc,CAAW;IAEjC,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAoB;IACvD,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAmB;IAEpD,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAK;IACpC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAK;IACjC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAK;IAClC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAK;IACnC,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAK;IAErC,OAAO,CAAC,WAAW,CAAkC;IAErD,OAAO,CAAC,mBAAmB,CAA8B;IACzD,OAAO,CAAC,eAAe,CAA0B;IACjD,OAAO,CAAC,oBAAoB,CAAM;IAElC,OAAO,CAAC,eAAe,CAA6B;IACpD,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAK;gBAE9B,MAAM,EAAE,YAAY;IAU1B,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;YAoBnB,wBAAwB;YAmDxB,cAAc;IA+C5B,OAAO,CAAC,cAAc;IActB,OAAO,CAAC,aAAa;IA8BrB,sBAAsB,IAAI,aAAa,GAAG,IAAI;IAsC9C,sBAAsB,IAAI,wBAAwB;IAuBlD,OAAO,CAAC,aAAa;IA0CrB,cAAc,IAAI,GAAG,CAAC,MAAM,EAAE,WAAW,CAAC;IAIpC,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAS7C,cAAc,CAClB,OAAO,EAAE,KAAK,CAAC;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC,GACtE,OAAO,CAAC,IAAI,CAAC;IASV,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IA2B3B,OAAO,CAAC,eAAe;YAwBT,0BAA0B;YAgC1B,gBAAgB;CA+B/B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ocr-worker.d.ts","sourceRoot":"","sources":["../../src/workers/ocr-worker.ts"],"names":[],"mappings":""}
|