@elizaos/plugin-vision 2.0.0-alpha.9 → 2.0.3-beta.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +112 -0
  3. package/auto-enable.ts +29 -0
  4. package/dist/action.d.ts +3 -0
  5. package/dist/action.d.ts.map +1 -0
  6. package/dist/audio-capture-stream.d.ts +42 -0
  7. package/dist/audio-capture-stream.d.ts.map +1 -0
  8. package/dist/audio-capture.d.ts +25 -0
  9. package/dist/audio-capture.d.ts.map +1 -0
  10. package/dist/computeruse-ocr-bridge.d.ts +50 -0
  11. package/dist/computeruse-ocr-bridge.d.ts.map +1 -0
  12. package/dist/config.d.ts +68 -0
  13. package/dist/config.d.ts.map +1 -0
  14. package/dist/describe-backpressure.d.ts +90 -0
  15. package/dist/describe-backpressure.d.ts.map +1 -0
  16. package/dist/dirty-tile-describer.d.ts +102 -0
  17. package/dist/dirty-tile-describer.d.ts.map +1 -0
  18. package/dist/dirty-tile-scene.d.ts +56 -0
  19. package/dist/dirty-tile-scene.d.ts.map +1 -0
  20. package/dist/entity-tracker.d.ts +33 -0
  21. package/dist/entity-tracker.d.ts.map +1 -0
  22. package/dist/face-detector-ggml.d.ts +60 -0
  23. package/dist/face-detector-ggml.d.ts.map +1 -0
  24. package/dist/face-detector-mediapipe.d.ts +25 -0
  25. package/dist/face-detector-mediapipe.d.ts.map +1 -0
  26. package/dist/face-recognition-ggml.d.ts +94 -0
  27. package/dist/face-recognition-ggml.d.ts.map +1 -0
  28. package/dist/get-screen-elements.d.ts +90 -0
  29. package/dist/get-screen-elements.d.ts.map +1 -0
  30. package/dist/get-screen.d.ts +60 -0
  31. package/dist/get-screen.d.ts.map +1 -0
  32. package/dist/image/sharp-compat.d.ts +89 -0
  33. package/dist/image/sharp-compat.d.ts.map +1 -0
  34. package/dist/image-input.d.ts +15 -0
  35. package/dist/image-input.d.ts.map +1 -0
  36. package/dist/index.d.ts +4 -0
  37. package/dist/index.d.ts.map +1 -0
  38. package/dist/index.js +7992 -6026
  39. package/dist/index.js.map +42 -26
  40. package/dist/lifecycle.d.ts +94 -0
  41. package/dist/lifecycle.d.ts.map +1 -0
  42. package/dist/mobile/capacitor-camera.d.ts +85 -0
  43. package/dist/mobile/capacitor-camera.d.ts.map +1 -0
  44. package/dist/native/doctr-ffi.d.ts +40 -0
  45. package/dist/native/doctr-ffi.d.ts.map +1 -0
  46. package/dist/native/yolo-ffi.d.ts +21 -0
  47. package/dist/native/yolo-ffi.d.ts.map +1 -0
  48. package/dist/ocr-host-windows.d.ts +34 -0
  49. package/dist/ocr-host-windows.d.ts.map +1 -0
  50. package/dist/ocr-service-apple-vision-macos.d.ts +51 -0
  51. package/dist/ocr-service-apple-vision-macos.d.ts.map +1 -0
  52. package/dist/ocr-service-doctr.d.ts +61 -0
  53. package/dist/ocr-service-doctr.d.ts.map +1 -0
  54. package/dist/ocr-service-linux-tesseract.d.ts +85 -0
  55. package/dist/ocr-service-linux-tesseract.d.ts.map +1 -0
  56. package/dist/ocr-service-paddleocr.d.ts +59 -0
  57. package/dist/ocr-service-paddleocr.d.ts.map +1 -0
  58. package/dist/ocr-service-windows.d.ts +41 -0
  59. package/dist/ocr-service-windows.d.ts.map +1 -0
  60. package/dist/ocr-service.d.ts +91 -0
  61. package/dist/ocr-service.d.ts.map +1 -0
  62. package/dist/ocr-with-coords.d.ts +103 -0
  63. package/dist/ocr-with-coords.d.ts.map +1 -0
  64. package/dist/person-detector.d.ts +17 -0
  65. package/dist/person-detector.d.ts.map +1 -0
  66. package/dist/provider.d.ts +3 -0
  67. package/dist/provider.d.ts.map +1 -0
  68. package/dist/routes.d.ts +7 -0
  69. package/dist/routes.d.ts.map +1 -0
  70. package/dist/screen-capture-bridge.d.ts +51 -0
  71. package/dist/screen-capture-bridge.d.ts.map +1 -0
  72. package/dist/screen-capture.d.ts +17 -0
  73. package/dist/screen-capture.d.ts.map +1 -0
  74. package/dist/screen-tiler.d.ts +75 -0
  75. package/dist/screen-tiler.d.ts.map +1 -0
  76. package/dist/service.d.ts +176 -0
  77. package/dist/service.d.ts.map +1 -0
  78. package/dist/set-of-marks-provider.d.ts +64 -0
  79. package/dist/set-of-marks-provider.d.ts.map +1 -0
  80. package/dist/som.d.ts +135 -0
  81. package/dist/som.d.ts.map +1 -0
  82. package/dist/som.js +184 -0
  83. package/dist/som.js.map +11 -0
  84. package/dist/test-input.d.ts +25 -0
  85. package/dist/test-input.d.ts.map +1 -0
  86. package/dist/types.d.ts +241 -0
  87. package/dist/types.d.ts.map +1 -0
  88. package/dist/vision-context-augmenter.d.ts +93 -0
  89. package/dist/vision-context-augmenter.d.ts.map +1 -0
  90. package/dist/vision-worker-manager.d.ts +51 -0
  91. package/dist/vision-worker-manager.d.ts.map +1 -0
  92. package/dist/workers/ocr-worker.d.ts +2 -0
  93. package/dist/workers/ocr-worker.d.ts.map +1 -0
  94. package/dist/workers/ocr-worker.js +1066 -121865
  95. package/dist/workers/ocr-worker.js.map +10 -130
  96. package/dist/workers/screen-capture-worker.d.ts +2 -0
  97. package/dist/workers/screen-capture-worker.d.ts.map +1 -0
  98. package/dist/workers/screen-capture-worker.js +371 -8
  99. package/dist/workers/screen-capture-worker.js.map +5 -4
  100. package/dist/workers/worker-logger.d.ts +10 -0
  101. package/dist/workers/worker-logger.d.ts.map +1 -0
  102. package/dist/yolo-detector.d.ts +37 -0
  103. package/dist/yolo-detector.d.ts.map +1 -0
  104. package/native/doctr.cpp/CMakeLists.txt +58 -0
  105. package/native/doctr.cpp/README.md +62 -0
  106. package/native/doctr.cpp/include/doctr.h +91 -0
  107. package/native/doctr.cpp/scripts/convert.py +98 -0
  108. package/native/doctr.cpp/src/doctr_det.cpp +112 -0
  109. package/native/doctr.cpp/src/doctr_rec.cpp +103 -0
  110. package/native/macos-vision-ocr.swift +113 -0
  111. package/native/mobilefacenet.cpp/README.md +13 -0
  112. package/native/movenet.cpp/README.md +10 -0
  113. package/native/retinaface.cpp/README.md +12 -0
  114. package/native/yolo.cpp/CMakeLists.txt +57 -0
  115. package/native/yolo.cpp/README.md +64 -0
  116. package/native/yolo.cpp/build.mjs +76 -0
  117. package/native/yolo.cpp/include/yolo.h +62 -0
  118. package/native/yolo.cpp/scripts/convert.py +248 -0
  119. package/native/yolo.cpp/src/yolo.cpp +425 -0
  120. package/native/yolo.cpp/verify/compare.py +99 -0
  121. package/native/yolo.cpp/verify/make_ref.py +75 -0
  122. package/native/yolo.cpp/verify/run_ggml.mjs +78 -0
  123. package/native/yolo.cpp/verify/run_ts.mjs +26 -0
  124. package/package.json +50 -24
  125. package/registry-entry.json +43 -0
  126. package/scripts/vendor-tesseract-linux.mjs +177 -0
  127. package/build.config.ts +0 -70
  128. package/dist/workers/florence2-worker.js +0 -114850
  129. package/dist/workers/florence2-worker.js.map +0 -92
@@ -0,0 +1,33 @@
1
+ import { type IAgentRuntime } from "@elizaos/core";
2
+ import type { DetectedObject, PersonInfo, TrackedEntity, WorldState } from "./types";
3
+ export declare class EntityTracker {
4
+ private worldState;
5
+ private readonly POSITION_THRESHOLD;
6
+ private readonly MISSING_THRESHOLD;
7
+ private readonly CLEANUP_THRESHOLD;
8
+ constructor(worldId: string);
9
+ updateEntities(detectedObjects: DetectedObject[], people: PersonInfo[], faceProfiles?: Map<string, string>, // Maps person ID to face profile ID
10
+ runtime?: IAgentRuntime): Promise<TrackedEntity[]>;
11
+ private trackPerson;
12
+ private trackObject;
13
+ private findMatchingEntity;
14
+ private calculateDistance;
15
+ private updateWorldState;
16
+ private syncWithRuntime;
17
+ getWorldState(): WorldState;
18
+ getActiveEntities(): TrackedEntity[];
19
+ getEntity(entityId: string): TrackedEntity | undefined;
20
+ getRecentlyLeft(): Array<{
21
+ entity: TrackedEntity;
22
+ leftAt: number;
23
+ }>;
24
+ assignNameToEntity(entityId: string, name: string): boolean;
25
+ getStatistics(): {
26
+ totalEntities: number;
27
+ activeEntities: number;
28
+ recentlyLeft: number;
29
+ people: number;
30
+ objects: number;
31
+ };
32
+ }
33
+ //# sourceMappingURL=entity-tracker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"entity-tracker.d.ts","sourceRoot":"","sources":["../src/entity-tracker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAoB,KAAK,aAAa,EAAU,MAAM,eAAe,CAAC;AAC7E,OAAO,KAAK,EAEV,cAAc,EACd,UAAU,EACV,aAAa,EACb,UAAU,EACX,MAAM,SAAS,CAAC;AAEjB,qBAAa,aAAa;IACxB,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAO;IAC1C,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAQ;IAC1C,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAS;gBAE/B,OAAO,EAAE,MAAM;IAUrB,cAAc,CAClB,eAAe,EAAE,cAAc,EAAE,EACjC,MAAM,EAAE,UAAU,EAAE,EACpB,YAAY,CAAC,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,oCAAoC;IACxE,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,aAAa,EAAE,CAAC;YAoCb,WAAW;YA8DX,WAAW;IAsDzB,OAAO,CAAC,kBAAkB;IA2C1B,OAAO,CAAC,iBAAiB;IAezB,OAAO,CAAC,gBAAgB;YAuCV,eAAe;IAqD7B,aAAa,IAAI,UAAU;IAI3B,iBAAiB,IAAI,aAAa,EAAE;IAMpC,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa,GAAG,SAAS;IAItD,eAAe,IAAI,KAAK,CAAC;QAAE,MAAM,EAAE,aAAa,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IAanE,kBAAkB,CAAC,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO;IAa3D,aAAa,IAAI;QACf,aAAa,EAAE,MAAM,CAAC;QACtB,cAAc,EAAE,MAAM,CAAC;QACvB,YAAY,EAAE,MAAM,CAAC;QACrB,MAAM,EAAE,MAAM,CAAC;QACf,OAAO,EAAE,MAAM,CAAC;KACjB;CAUF"}
@@ -0,0 +1,60 @@
1
+ import type { BoundingBox } from "./types";
2
+ /**
3
+ * Same shape the removed ONNX detector exported. Kept identical so callers can
4
+ * select the native backend without reshaping results.
5
+ */
6
+ export interface MediaPipeFaceConfig {
7
+ modelUrl?: string;
8
+ modelSha256?: string | null;
9
+ modelDir?: string;
10
+ scoreThreshold?: number;
11
+ trusted?: boolean;
12
+ }
13
+ export interface MediaPipeFaceDetection {
14
+ bbox: BoundingBox;
15
+ confidence: number;
16
+ /**
17
+ * BlazeFace's 6 keypoints in canonical order:
18
+ * 0: left eye 1: right eye 2: nose tip
19
+ * 3: mouth 4: left ear 5: right ear
20
+ * Coordinates are in source-image absolute pixels.
21
+ */
22
+ keypoints?: Array<{
23
+ x: number;
24
+ y: number;
25
+ }>;
26
+ }
27
+ /**
28
+ * ggml-backed BlazeFace face detector. Mirrors the
29
+ * `MediaPipeFaceDetector` compatibility surface — same constructor config,
30
+ * same `MediaPipeFaceDetection` output shape.
31
+ *
32
+ * Currently disabled (`isAvailable()` returns `false`) until the
33
+ * face-cpp model entries gain runtime implementations and a BlazeFace GGUF
34
+ * artifact lands.
35
+ */
36
+ export declare class BlazeFaceGgmlDetector {
37
+ private readonly cfg;
38
+ private bindings;
39
+ private handle;
40
+ private initialized;
41
+ private initPromise;
42
+ constructor(config?: MediaPipeFaceConfig);
43
+ /**
44
+ * `true` only when both the native library AND the GGUF weights are
45
+ * on disk. Loading them happens lazily in `initialize()`.
46
+ */
47
+ static isAvailable(): Promise<boolean>;
48
+ isInitialized(): boolean;
49
+ initialize(): Promise<void>;
50
+ private _initialize;
51
+ /**
52
+ * Detect faces in the given image buffer. The buffer can be any
53
+ * sharp-supported format (PNG, JPEG, raw); we resize/letterbox to
54
+ * the BlazeFace 128x128 input via sharp, run the native detector,
55
+ * then return source-pixel bboxes + 6 keypoints.
56
+ */
57
+ detect(imageBuffer: Buffer): Promise<MediaPipeFaceDetection[]>;
58
+ dispose(): Promise<void>;
59
+ }
60
+ //# sourceMappingURL=face-detector-ggml.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"face-detector-ggml.d.ts","sourceRoot":"","sources":["../src/face-detector-ggml.ts"],"names":[],"mappings":"AAwBA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AAO3C;;;GAGG;AACH,MAAM,WAAW,mBAAmB;IAClC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,WAAW,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB;;;;;OAKG;IACH,SAAS,CAAC,EAAE,KAAK,CAAC;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CAC7C;AA4MD;;;;;;;;GAQG;AACH,qBAAa,qBAAqB;IAChC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAGlB;IACF,OAAO,CAAC,QAAQ,CAAmC;IACnD,OAAO,CAAC,MAAM,CAAiB;IAC/B,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,WAAW,CAA8B;gBAErC,MAAM,GAAE,mBAAwB;IAQ5C;;;OAGG;WACU,WAAW,IAAI,OAAO,CAAC,OAAO,CAAC;IAgB5C,aAAa,IAAI,OAAO;IAIlB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;YAOnB,WAAW;IAsBzB;;;;;OAKG;IACG,MAAM,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAAC,sBAAsB,EAAE,CAAC;IAsD9D,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAS/B"}
@@ -0,0 +1,25 @@
1
+ import type { BoundingBox } from "./types";
2
+ export interface MediaPipeFaceConfig {
3
+ modelUrl?: string;
4
+ modelSha256?: string | null;
5
+ modelDir?: string;
6
+ scoreThreshold?: number;
7
+ trusted?: boolean;
8
+ }
9
+ export interface MediaPipeFaceDetection {
10
+ bbox: BoundingBox;
11
+ confidence: number;
12
+ keypoints?: Array<{
13
+ x: number;
14
+ y: number;
15
+ }>;
16
+ }
17
+ export declare class MediaPipeFaceDetector {
18
+ constructor(_config?: MediaPipeFaceConfig);
19
+ static isAvailable(): Promise<boolean>;
20
+ isInitialized(): boolean;
21
+ initialize(): Promise<void>;
22
+ detect(_imageBuffer: Buffer): Promise<MediaPipeFaceDetection[]>;
23
+ dispose(): Promise<void>;
24
+ }
25
+ //# sourceMappingURL=face-detector-mediapipe.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"face-detector-mediapipe.d.ts","sourceRoot":"","sources":["../src/face-detector-mediapipe.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AAE3C,MAAM,WAAW,mBAAmB;IAClC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,WAAW,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,KAAK,CAAC;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CAC7C;AAED,qBAAa,qBAAqB;gBAEpB,OAAO,GAAE,mBAAwB;WAIhC,WAAW,IAAI,OAAO,CAAC,OAAO,CAAC;IAI5C,aAAa,IAAI,OAAO;IAIlB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAM3B,MAAM,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,sBAAsB,EAAE,CAAC;IAM/D,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B"}
@@ -0,0 +1,94 @@
1
+ import { type MediaPipeFaceDetection } from "./face-detector-ggml";
2
+ import type { BoundingBox, FaceProfile } from "./types";
3
+ /**
4
+ * Configuration for the ggml-backed face embedder.
5
+ */
6
+ export interface FaceEmbedGgmlConfig {
7
+ modelPath?: string;
8
+ modelDir?: string;
9
+ }
10
+ /**
11
+ * Cosine distance between two 128-d unit-norm embeddings. Matches
12
+ * `face_embed_distance` in the C library: 0 for identical, 1 for
13
+ * orthogonal, 2 for antipodal.
14
+ */
15
+ export declare function cosineDistance(a: Float32Array, b: Float32Array): number;
16
+ /**
17
+ * L2 distance between two 128-d embeddings. For unit-norm inputs this
18
+ * is sqrt(2 - 2*dot(a, b)), in [0, 2].
19
+ */
20
+ export declare function l2Distance(a: Float32Array, b: Float32Array): number;
21
+ /**
22
+ * ggml-backed 128-d face embedder: a 128-d L2-normalized descriptor per
23
+ * detected face, consumed by the `FaceRecognition` class below.
24
+ */
25
+ export declare class FaceEmbedGgmlRecognizer {
26
+ private readonly cfg;
27
+ private bindings;
28
+ private handle;
29
+ private initialized;
30
+ private initPromise;
31
+ constructor(config?: FaceEmbedGgmlConfig);
32
+ /**
33
+ * `true` only when both the native library AND the GGUF weights are
34
+ * on disk.
35
+ */
36
+ static isAvailable(): Promise<boolean>;
37
+ isInitialized(): boolean;
38
+ initialize(): Promise<void>;
39
+ private _initialize;
40
+ /**
41
+ * Compute a 128-d L2-normalized face embedding from an RGB(A) image
42
+ * buffer plus a detection record (bbox + BlazeFace landmarks).
43
+ *
44
+ * The image is decoded via sharp; pass any sharp-supported format
45
+ * (PNG, JPEG, raw). `detection` should come from
46
+ * `BlazeFaceGgmlDetector` so the keypoints already match the
47
+ * BlazeFace order.
48
+ */
49
+ embed(imageBuffer: Buffer, detection: MediaPipeFaceDetection): Promise<Float32Array>;
50
+ dispose(): Promise<void>;
51
+ }
52
+ /**
53
+ * A detected face: a native BlazeFace detection plus its 128-d ggml
54
+ * embedding. Mirrors the fields `VisionService` reads off each result.
55
+ * The native backend produces no expression / age-gender estimates, so
56
+ * those attributes are left to higher layers.
57
+ */
58
+ export interface DetectedFace {
59
+ detection: {
60
+ box: BoundingBox;
61
+ };
62
+ descriptor: Float32Array;
63
+ }
64
+ /**
65
+ * Native ggml face recognition: BlazeFace detection + 128-d embedding +
66
+ * in-memory matching and persistence. When the native `libface` library
67
+ * or its GGUF weights are not on disk, detection returns an empty list
68
+ * (recognition is disabled, never faked). Matching and storage are pure
69
+ * JS and always available.
70
+ */
71
+ export declare class FaceRecognition {
72
+ private readonly detector;
73
+ private readonly embedder;
74
+ private detectorAvailable;
75
+ private readonly faceLibrary;
76
+ private readonly FACE_MATCH_THRESHOLD;
77
+ private readonly MIN_FACE_SIZE;
78
+ /**
79
+ * Detect faces in a raw RGBA frame and compute an embedding for each.
80
+ * Returns an empty list when the native face backend is unavailable.
81
+ */
82
+ detectFaces(imageData: Buffer, width: number, height: number): Promise<DetectedFace[]>;
83
+ recognizeFace(descriptor: Float32Array): Promise<{
84
+ profileId: string;
85
+ distance: number;
86
+ } | null>;
87
+ addOrUpdateFace(descriptor: Float32Array, attributes?: Partial<FaceProfile>): Promise<string>;
88
+ getFaceProfile(profileId: string): FaceProfile | undefined;
89
+ getAllProfiles(): FaceProfile[];
90
+ saveFaceLibrary(filePath: string): Promise<void>;
91
+ loadFaceLibrary(filePath: string): Promise<void>;
92
+ dispose(): Promise<void>;
93
+ }
94
+ //# sourceMappingURL=face-recognition-ggml.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"face-recognition-ggml.d.ts","sourceRoot":"","sources":["../src/face-recognition-ggml.ts"],"names":[],"mappings":"AAyBA,OAAO,EAEL,KAAK,sBAAsB,EAC5B,MAAM,sBAAsB,CAAC;AAE9B,OAAO,KAAK,EAAE,WAAW,EAAe,WAAW,EAAE,MAAM,SAAS,CAAC;AAkMrE;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;;;GAIG;AACH,wBAAgB,cAAc,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM,CASvE;AAED;;;GAGG;AACH,wBAAgB,UAAU,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM,CAUnE;AAED;;;GAGG;AACH,qBAAa,uBAAuB;IAClC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAA6C;IACjE,OAAO,CAAC,QAAQ,CAAkC;IAClD,OAAO,CAAC,MAAM,CAAiB;IAC/B,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,WAAW,CAA8B;gBAErC,MAAM,GAAE,mBAAwB;IAO5C;;;OAGG;WACU,WAAW,IAAI,OAAO,CAAC,OAAO,CAAC;IAgB5C,aAAa,IAAI,OAAO;IAIlB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;YAOnB,WAAW;IAoBzB;;;;;;;;OAQG;IACG,KAAK,CACT,WAAW,EAAE,MAAM,EACnB,SAAS,EAAE,sBAAsB,GAChC,OAAO,CAAC,YAAY,CAAC;IAsBlB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAS/B;AAID;;;;;GAKG;AACH,MAAM,WAAW,YAAY;IAC3B,SAAS,EAAE;QAAE,GAAG,EAAE,WAAW,CAAA;KAAE,CAAC;IAChC,UAAU,EAAE,YAAY,CAAC;CAC1B;AAED;;;;;;GAMG;AACH,qBAAa,eAAe;IAC1B,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAA+B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAiC;IAC1D,OAAO,CAAC,iBAAiB,CAAwB;IACjD,OAAO,CAAC,QAAQ,CAAC,WAAW,CAG1B;IAGF,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAO;IAE5C,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAM;IAEpC;;;OAGG;IACG,WAAW,CACf,SAAS,EAAE,MAAM,EACjB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,GACb,OAAO,CAAC,YAAY,EAAE,CAAC;IA+CpB,aAAa,CACjB,UAAU,EAAE,YAAY,GACvB,OAAO,CAAC;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;IAiBpD,eAAe,CACnB,UAAU,EAAE,YAAY,EACxB,UAAU,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,GAChC,OAAO,CAAC,MAAM,CAAC;IAgDlB,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,WAAW,GAAG,SAAS;IAI1D,cAAc,IAAI,WAAW,EAAE;IAIzB,eAAe,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAShD,eAAe,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAahD,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAI/B"}
@@ -0,0 +1,90 @@
1
+ /**
2
+ * Pure element-merge core for the GET_SCREEN action (#9105 Slice 2 / M2).
3
+ *
4
+ * GET_SCREEN returns a cheap, token-frugal list of grounded, clickable screen
5
+ * elements unified from three sources: OCR text boxes, accessibility (AX)
6
+ * clickables, and (optionally) VLM-detected elements. This module is the
7
+ * deterministic heart of that envelope — it collapses the three sources into a
8
+ * single deduplicated, stably-ordered element list, recording each element's
9
+ * `groundingSources` provenance.
10
+ *
11
+ * Like the M1 OCR bridge (`computeruse-ocr-bridge.ts`), this is intentionally
12
+ * dependency-free and pure: the source types live in `@elizaos/plugin-computeruse`
13
+ * (`SceneOcrBox` / `SceneAxNode` / `SceneVlmElement`), but we describe their
14
+ * shapes STRUCTURALLY here rather than importing them, to keep the no-hard-dep
15
+ * rule. That also makes the merge engine fully unit-testable with zero
16
+ * environment, decoupled from the runtime/native/model wiring (Slice 3).
17
+ */
18
+ /** Display-local bounding box `[x, y, w, h]`. */
19
+ export type Bbox = readonly [number, number, number, number];
20
+ /** Structural shape of computeruse's `SceneOcrBox`. */
21
+ export interface OcrBoxLike {
22
+ readonly id: string;
23
+ readonly text: string;
24
+ readonly bbox: Bbox;
25
+ readonly conf?: number;
26
+ readonly displayId: number;
27
+ }
28
+ /** Structural shape of computeruse's `SceneAxNode`. */
29
+ export interface AxNodeLike {
30
+ readonly id: string;
31
+ readonly role: string;
32
+ readonly label?: string;
33
+ readonly bbox: Bbox;
34
+ readonly actions?: readonly string[];
35
+ readonly displayId: number;
36
+ }
37
+ /** Structural shape of computeruse's `SceneVlmElement`. */
38
+ export interface VlmElementLike {
39
+ readonly id: string;
40
+ readonly kind: string;
41
+ readonly desc: string;
42
+ readonly bbox: Bbox;
43
+ readonly displayId: number;
44
+ }
45
+ export type GroundingSource = "ocr" | "ax" | "vlm";
46
+ /** A single unified, grounded screen element in the GET_SCREEN envelope. */
47
+ export interface GetScreenElement {
48
+ /** Stable id, preferring the AX id, then OCR, then VLM. */
49
+ id: string;
50
+ /** Display-local bbox `[x, y, w, h]` of the representative (highest-priority) source. */
51
+ bbox: [number, number, number, number];
52
+ /** User-facing text/label: AX label, else OCR text, else VLM description. */
53
+ text: string;
54
+ /** Element kind/role when known: AX role, else VLM kind. */
55
+ kind?: string;
56
+ displayId: number;
57
+ /** AX actions when the element is accessibility-grounded. */
58
+ actions?: string[];
59
+ /** Provenance — every source that contributed to this element, in fixed
60
+ * `ocr < ax < vlm` order for stability. Always non-empty. */
61
+ groundingSources: GroundingSource[];
62
+ }
63
+ export interface MergeScreenInput {
64
+ readonly ocr?: readonly OcrBoxLike[];
65
+ readonly ax?: readonly AxNodeLike[];
66
+ readonly vlm?: readonly VlmElementLike[];
67
+ }
68
+ export interface MergeScreenOptions {
69
+ /** Boxes whose IoU exceeds this collapse into one element (default 0.6). */
70
+ readonly iouThreshold?: number;
71
+ }
72
+ /** Intersection-over-union of two `[x, y, w, h]` boxes. 0 when either is empty
73
+ * or they don't overlap. */
74
+ export declare function bboxIou(a: Bbox, b: Bbox): number;
75
+ /**
76
+ * Merge OCR boxes + AX clickables + VLM elements into one deduplicated,
77
+ * deterministically-ordered element list.
78
+ *
79
+ * - Elements from different sources whose bboxes overlap above `iouThreshold`
80
+ * (and share a `displayId`) collapse into one element that records all
81
+ * contributing sources in `groundingSources`.
82
+ * - Field precedence is AX > OCR > VLM (AX wins id/label/role; OCR text fills
83
+ * in when AX has no label; VLM desc is the last resort).
84
+ * - Output order is top-to-bottom, then left-to-right, so the envelope is
85
+ * stable across turns regardless of input ordering.
86
+ * - Degrades gracefully: any source may be absent/empty (e.g. accessibility off)
87
+ * and the function never throws.
88
+ */
89
+ export declare function mergeScreenElements(input: MergeScreenInput, options?: MergeScreenOptions): GetScreenElement[];
90
+ //# sourceMappingURL=get-screen-elements.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"get-screen-elements.d.ts","sourceRoot":"","sources":["../src/get-screen-elements.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,iDAAiD;AACjD,MAAM,MAAM,IAAI,GAAG,SAAS,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;AAE7D,uDAAuD;AACvD,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC;IACpB,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B;AAED,uDAAuD;AACvD,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC;IACpB,QAAQ,CAAC,OAAO,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IACrC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B;AAED,2DAA2D;AAC3D,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC;IACpB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B;AAED,MAAM,MAAM,eAAe,GAAG,KAAK,GAAG,IAAI,GAAG,KAAK,CAAC;AAEnD,4EAA4E;AAC5E,MAAM,WAAW,gBAAgB;IAC/B,2DAA2D;IAC3D,EAAE,EAAE,MAAM,CAAC;IACX,yFAAyF;IACzF,IAAI,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;IACvC,6EAA6E;IAC7E,IAAI,EAAE,MAAM,CAAC;IACb,4DAA4D;IAC5D,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,6DAA6D;IAC7D,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB;iEAC6D;IAC7D,gBAAgB,EAAE,eAAe,EAAE,CAAC;CACrC;AAED,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,GAAG,CAAC,EAAE,SAAS,UAAU,EAAE,CAAC;IACrC,QAAQ,CAAC,EAAE,CAAC,EAAE,SAAS,UAAU,EAAE,CAAC;IACpC,QAAQ,CAAC,GAAG,CAAC,EAAE,SAAS,cAAc,EAAE,CAAC;CAC1C;AAED,MAAM,WAAW,kBAAkB;IACjC,4EAA4E;IAC5E,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAC;CAChC;AAID;4BAC4B;AAC5B,wBAAgB,OAAO,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,GAAG,MAAM,CAchD;AAoBD;;;;;;;;;;;;;GAaG;AACH,wBAAgB,mBAAmB,CACjC,KAAK,EAAE,gBAAgB,EACvB,OAAO,GAAE,kBAAuB,GAC/B,gBAAgB,EAAE,CAkEpB"}
@@ -0,0 +1,60 @@
1
+ /**
2
+ * GET_SCREEN core (issue #9105 / M2) — token-frugal structured screen readout.
3
+ *
4
+ * Returns OCR text + grounded elements (id/text/bbox/semantic position) from a
5
+ * captured frame using the registered coord-OCR service (native Windows OCR /
6
+ * docTR / Apple Vision — zero LLM tokens). The raw image is OMITTED by default
7
+ * (`includeImage:false`) so a CUA loop can read the screen each tick without
8
+ * spending image tokens; it is only base64-attached when explicitly requested.
9
+ *
10
+ * Pure + injectable (the OCR service can be passed in) so it is unit-testable
11
+ * without a real capture or a registered provider.
12
+ */
13
+ import { type OcrWithCoordsService } from "./ocr-with-coords.js";
14
+ export interface GetScreenElement {
15
+ /**
16
+ * Monotonic 1-based Set-of-Marks number (reading order). A model can pick
17
+ * `[3]` instead of regressing raw coordinates.
18
+ */
19
+ index: number;
20
+ /** Stable per-result id. */
21
+ id: string;
22
+ text: string;
23
+ /** Display-absolute [x, y, width, height]. */
24
+ bbox: [number, number, number, number];
25
+ /** Click target — the integer center of `bbox`, the point `index` resolves to. */
26
+ center: {
27
+ x: number;
28
+ y: number;
29
+ };
30
+ semantic_position: string;
31
+ displayId: number;
32
+ }
33
+ export interface GetScreenResult {
34
+ op: "get_screen";
35
+ displayId: number;
36
+ width: number;
37
+ height: number;
38
+ /** When the source frame was captured (ms epoch). */
39
+ lastChangeTime: number;
40
+ /** True when a coord-OCR provider was available and ran. */
41
+ ocrAvailable: boolean;
42
+ ocrText: string;
43
+ elements: GetScreenElement[];
44
+ elementCount: number;
45
+ /** Base64 PNG — only present when `includeImage` was requested. */
46
+ image?: string;
47
+ }
48
+ export interface BuildGetScreenOptions {
49
+ pngBytes: Uint8Array;
50
+ displayId?: number;
51
+ includeImage?: boolean;
52
+ includeOcr?: boolean;
53
+ capturedAt?: number;
54
+ /** Override for tests; defaults to the registered coord-OCR service. */
55
+ ocrService?: OcrWithCoordsService | null;
56
+ }
57
+ export declare function buildGetScreen(opts: BuildGetScreenOptions): Promise<GetScreenResult>;
58
+ /** Human-readable one-line summary for the agent reply. */
59
+ export declare function summarizeGetScreen(r: GetScreenResult): string;
60
+ //# sourceMappingURL=get-screen.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"get-screen.d.ts","sourceRoot":"","sources":["../src/get-screen.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAEL,KAAK,oBAAoB,EAE1B,MAAM,sBAAsB,CAAC;AAE9B,MAAM,WAAW,gBAAgB;IAC/B;;;OAGG;IACH,KAAK,EAAE,MAAM,CAAC;IACd,4BAA4B;IAC5B,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,8CAA8C;IAC9C,IAAI,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;IACvC,kFAAkF;IAClF,MAAM,EAAE;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IACjC,iBAAiB,EAAE,MAAM,CAAC;IAC1B,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,eAAe;IAC9B,EAAE,EAAE,YAAY,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,qDAAqD;IACrD,cAAc,EAAE,MAAM,CAAC;IACvB,4DAA4D;IAC5D,YAAY,EAAE,OAAO,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,gBAAgB,EAAE,CAAC;IAC7B,YAAY,EAAE,MAAM,CAAC;IACrB,mEAAmE;IACnE,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,qBAAqB;IACpC,QAAQ,EAAE,UAAU,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,wEAAwE;IACxE,UAAU,CAAC,EAAE,oBAAoB,GAAG,IAAI,CAAC;CAC1C;AAED,wBAAsB,cAAc,CAClC,IAAI,EAAE,qBAAqB,GAC1B,OAAO,CAAC,eAAe,CAAC,CAwD1B;AAED,2DAA2D;AAC3D,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,eAAe,GAAG,MAAM,CAa7D"}
@@ -0,0 +1,89 @@
1
+ /** Raw-pixel input descriptor (mirrors `sharp.SharpOptions["raw"]`). */
2
+ export interface SharpRawInput {
3
+ width: number;
4
+ height: number;
5
+ channels: 1 | 2 | 3 | 4;
6
+ }
7
+ /** Constructor options subset used by the codebase. */
8
+ export interface SharpFactoryOptions {
9
+ raw?: SharpRawInput;
10
+ limitInputPixels?: number | boolean;
11
+ failOnError?: boolean;
12
+ }
13
+ /** `resize` options subset used by the codebase. */
14
+ export interface SharpResizeOptions {
15
+ fit?: "fill" | "contain" | "cover" | "inside" | "outside";
16
+ }
17
+ /** RGBA background for `extend` (alpha is accepted but encoded outputs only). */
18
+ export interface SharpColor {
19
+ r: number;
20
+ g: number;
21
+ b: number;
22
+ alpha?: number;
23
+ }
24
+ export interface SharpExtendOptions {
25
+ top?: number;
26
+ bottom?: number;
27
+ left?: number;
28
+ right?: number;
29
+ background?: SharpColor;
30
+ }
31
+ export interface SharpExtractRegion {
32
+ left: number;
33
+ top: number;
34
+ width: number;
35
+ height: number;
36
+ }
37
+ /** Subset of `sharp.Metadata` the codebase reads. */
38
+ export interface SharpMetadata {
39
+ width?: number;
40
+ height?: number;
41
+ channels?: number;
42
+ format?: string;
43
+ }
44
+ export interface SharpRawInfo {
45
+ width: number;
46
+ height: number;
47
+ channels: number;
48
+ }
49
+ export interface SharpResolveWithObject {
50
+ data: Buffer;
51
+ info: SharpRawInfo;
52
+ }
53
+ /**
54
+ * The chainable instance surface. This is structurally the subset of
55
+ * `sharp.Sharp` the codebase touches, so a real `sharp` instance satisfies it
56
+ * and call sites need no per-backend typing.
57
+ */
58
+ export interface SharpInstance {
59
+ metadata(): Promise<SharpMetadata>;
60
+ resize(width: number, height: number, options?: SharpResizeOptions): SharpInstance;
61
+ removeAlpha(): SharpInstance;
62
+ ensureAlpha(): SharpInstance;
63
+ extract(region: SharpExtractRegion): SharpInstance;
64
+ extend(options: SharpExtendOptions): SharpInstance;
65
+ trim(): SharpInstance;
66
+ clone(): SharpInstance;
67
+ png(): SharpInstance;
68
+ jpeg(): SharpInstance;
69
+ raw(): SharpInstance;
70
+ toBuffer(): Promise<Buffer>;
71
+ toBuffer(options: {
72
+ resolveWithObject: true;
73
+ }): Promise<SharpResolveWithObject>;
74
+ }
75
+ /** The callable factory surface (`sharp(input, options?)`). */
76
+ export type SharpFactory = (input?: Buffer | Uint8Array, options?: SharpFactoryOptions) => SharpInstance;
77
+ /**
78
+ * Resolve the image backend. Tries native `sharp` first (dynamic import so the
79
+ * native addon is never touched at module-eval); on any failure falls back to
80
+ * the pure-JS jimp shim. The result is cached for the process lifetime.
81
+ */
82
+ export declare function getSharp(): Promise<SharpFactory>;
83
+ /**
84
+ * Construct the pure-JS shim factory directly. Exposed so the compat test can
85
+ * diff the shim against native `sharp` without depending on which backend
86
+ * `getSharp()` happens to resolve on the host.
87
+ */
88
+ export declare function createJimpShim(): SharpFactory;
89
+ //# sourceMappingURL=sharp-compat.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sharp-compat.d.ts","sourceRoot":"","sources":["../../src/image/sharp-compat.ts"],"names":[],"mappings":"AAmBA,wEAAwE;AACxE,MAAM,WAAW,aAAa;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;CACzB;AAED,uDAAuD;AACvD,MAAM,WAAW,mBAAmB;IAClC,GAAG,CAAC,EAAE,aAAa,CAAC;IACpB,gBAAgB,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC;IACpC,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED,oDAAoD;AACpD,MAAM,WAAW,kBAAkB;IACjC,GAAG,CAAC,EAAE,MAAM,GAAG,SAAS,GAAG,OAAO,GAAG,QAAQ,GAAG,SAAS,CAAC;CAC3D;AAED,iFAAiF;AACjF,MAAM,WAAW,UAAU;IACzB,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,kBAAkB;IACjC,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,UAAU,CAAC;CACzB;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,qDAAqD;AACrD,MAAM,WAAW,aAAa;IAC5B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,YAAY,CAAC;CACpB;AAED;;;;GAIG;AACH,MAAM,WAAW,aAAa;IAC5B,QAAQ,IAAI,OAAO,CAAC,aAAa,CAAC,CAAC;IACnC,MAAM,CACJ,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,kBAAkB,GAC3B,aAAa,CAAC;IACjB,WAAW,IAAI,aAAa,CAAC;IAC7B,WAAW,IAAI,aAAa,CAAC;IAC7B,OAAO,CAAC,MAAM,EAAE,kBAAkB,GAAG,aAAa,CAAC;IACnD,MAAM,CAAC,OAAO,EAAE,kBAAkB,GAAG,aAAa,CAAC;IACnD,IAAI,IAAI,aAAa,CAAC;IACtB,KAAK,IAAI,aAAa,CAAC;IACvB,GAAG,IAAI,aAAa,CAAC;IACrB,IAAI,IAAI,aAAa,CAAC;IACtB,GAAG,IAAI,aAAa,CAAC;IACrB,QAAQ,IAAI,OAAO,CAAC,MAAM,CAAC,CAAC;IAC5B,QAAQ,CAAC,OAAO,EAAE;QAChB,iBAAiB,EAAE,IAAI,CAAC;KACzB,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAAC;CACrC;AAED,+DAA+D;AAC/D,MAAM,MAAM,YAAY,GAAG,CACzB,KAAK,CAAC,EAAE,MAAM,GAAG,UAAU,EAC3B,OAAO,CAAC,EAAE,mBAAmB,KAC1B,aAAa,CAAC;AAInB;;;;GAIG;AACH,wBAAsB,QAAQ,IAAI,OAAO,CAAC,YAAY,CAAC,CAStD;AAyKD;;;;GAIG;AACH,wBAAgB,cAAc,IAAI,YAAY,CAuB7C"}
@@ -0,0 +1,15 @@
1
+ export declare const MAX_VISION_IMAGE_BYTES: number;
2
+ export interface ValidatedVisionImage {
3
+ readonly width: number;
4
+ readonly height: number;
5
+ readonly format: "jpeg" | "png" | "webp";
6
+ readonly contentType: "image/jpeg" | "image/png" | "image/webp";
7
+ }
8
+ export declare function estimateBase64DecodedBytes(base64: string): number;
9
+ export declare function parseVisionDataImageUrl(value: string): {
10
+ mimeType: string;
11
+ base64: string;
12
+ };
13
+ export declare function assertValidVisionImageBuffer(data: Buffer): Promise<ValidatedVisionImage>;
14
+ export declare function assertSafeVisionDataImageUrl(imageUrl: string): Promise<ValidatedVisionImage>;
15
+ //# sourceMappingURL=image-input.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image-input.d.ts","sourceRoot":"","sources":["../src/image-input.ts"],"names":[],"mappings":"AAEA,eAAO,MAAM,sBAAsB,QAAmB,CAAC;AASvD,MAAM,WAAW,oBAAoB;IACnC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,KAAK,GAAG,MAAM,CAAC;IACzC,QAAQ,CAAC,WAAW,EAAE,YAAY,GAAG,WAAW,GAAG,YAAY,CAAC;CACjE;AAED,wBAAgB,0BAA0B,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAKjE;AAED,wBAAgB,uBAAuB,CAAC,KAAK,EAAE,MAAM,GAAG;IACtD,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;CAChB,CA8BA;AAED,wBAAsB,4BAA4B,CAChD,IAAI,EAAE,MAAM,GACX,OAAO,CAAC,oBAAoB,CAAC,CAkC/B;AAED,wBAAsB,4BAA4B,CAChD,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,oBAAoB,CAAC,CAG/B"}
@@ -0,0 +1,4 @@
1
+ import type { Plugin } from "@elizaos/core";
2
+ export declare const visionPlugin: Plugin;
3
+ export default visionPlugin;
4
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAwB5C,eAAO,MAAM,YAAY,EAAE,MAuH1B,CAAC;AAEF,eAAe,YAAY,CAAC"}