stelo 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +184 -0
- package/README.md +853 -0
- package/dist/accessibility.d.ts +227 -0
- package/dist/accessibility.d.ts.map +1 -0
- package/dist/accessibility.js +602 -0
- package/dist/accessibility.js.map +1 -0
- package/dist/agent.d.ts +870 -0
- package/dist/agent.d.ts.map +1 -0
- package/dist/agent.js +1107 -0
- package/dist/agent.js.map +1 -0
- package/dist/audio-stream.d.ts +114 -0
- package/dist/audio-stream.d.ts.map +1 -0
- package/dist/audio-stream.js +167 -0
- package/dist/audio-stream.js.map +1 -0
- package/dist/clipboard.d.ts +99 -0
- package/dist/clipboard.d.ts.map +1 -0
- package/dist/clipboard.js +352 -0
- package/dist/clipboard.js.map +1 -0
- package/dist/config.d.ts +183 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +477 -0
- package/dist/config.js.map +1 -0
- package/dist/context.d.ts +213 -0
- package/dist/context.d.ts.map +1 -0
- package/dist/context.js +387 -0
- package/dist/context.js.map +1 -0
- package/dist/cortex.d.ts +548 -0
- package/dist/cortex.d.ts.map +1 -0
- package/dist/cortex.js +1479 -0
- package/dist/cortex.js.map +1 -0
- package/dist/errors.d.ts +133 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +278 -0
- package/dist/errors.js.map +1 -0
- package/dist/events.d.ts +227 -0
- package/dist/events.d.ts.map +1 -0
- package/dist/events.js +429 -0
- package/dist/events.js.map +1 -0
- package/dist/executor.d.ts +212 -0
- package/dist/executor.d.ts.map +1 -0
- package/dist/executor.js +545 -0
- package/dist/executor.js.map +1 -0
- package/dist/index.d.ts +69 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +167 -0
- package/dist/index.js.map +1 -0
- package/dist/integration.d.ts +159 -0
- package/dist/integration.d.ts.map +1 -0
- package/dist/integration.js +533 -0
- package/dist/integration.js.map +1 -0
- package/dist/keyboard.d.ts +276 -0
- package/dist/keyboard.d.ts.map +1 -0
- package/dist/keyboard.js +404 -0
- package/dist/keyboard.js.map +1 -0
- package/dist/logger.d.ts +198 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +516 -0
- package/dist/logger.js.map +1 -0
- package/dist/middleware.d.ts +183 -0
- package/dist/middleware.d.ts.map +1 -0
- package/dist/middleware.js +493 -0
- package/dist/middleware.js.map +1 -0
- package/dist/monitor.d.ts +136 -0
- package/dist/monitor.d.ts.map +1 -0
- package/dist/monitor.js +341 -0
- package/dist/monitor.js.map +1 -0
- package/dist/mouse.d.ts +290 -0
- package/dist/mouse.d.ts.map +1 -0
- package/dist/mouse.js +466 -0
- package/dist/mouse.js.map +1 -0
- package/dist/plugin.d.ts +157 -0
- package/dist/plugin.d.ts.map +1 -0
- package/dist/plugin.js +409 -0
- package/dist/plugin.js.map +1 -0
- package/dist/process.d.ts +106 -0
- package/dist/process.d.ts.map +1 -0
- package/dist/process.js +326 -0
- package/dist/process.js.map +1 -0
- package/dist/recorder.d.ts +100 -0
- package/dist/recorder.d.ts.map +1 -0
- package/dist/recorder.js +258 -0
- package/dist/recorder.js.map +1 -0
- package/dist/safety.d.ts +59 -0
- package/dist/safety.d.ts.map +1 -0
- package/dist/safety.js +98 -0
- package/dist/safety.js.map +1 -0
- package/dist/scheduler.d.ts +152 -0
- package/dist/scheduler.d.ts.map +1 -0
- package/dist/scheduler.js +615 -0
- package/dist/scheduler.js.map +1 -0
- package/dist/screen.d.ts +96 -0
- package/dist/screen.d.ts.map +1 -0
- package/dist/screen.js +154 -0
- package/dist/screen.js.map +1 -0
- package/dist/session.d.ts +209 -0
- package/dist/session.d.ts.map +1 -0
- package/dist/session.js +479 -0
- package/dist/session.js.map +1 -0
- package/dist/stream.d.ts +168 -0
- package/dist/stream.d.ts.map +1 -0
- package/dist/stream.js +298 -0
- package/dist/stream.js.map +1 -0
- package/dist/telemetry.d.ts +223 -0
- package/dist/telemetry.d.ts.map +1 -0
- package/dist/telemetry.js +433 -0
- package/dist/telemetry.js.map +1 -0
- package/dist/types.d.ts +165 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +8 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/bezier.d.ts +51 -0
- package/dist/utils/bezier.d.ts.map +1 -0
- package/dist/utils/bezier.js +117 -0
- package/dist/utils/bezier.js.map +1 -0
- package/dist/utils/helpers.d.ts +90 -0
- package/dist/utils/helpers.d.ts.map +1 -0
- package/dist/utils/helpers.js +143 -0
- package/dist/utils/helpers.js.map +1 -0
- package/dist/utils/index.d.ts +4 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +18 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/validation.d.ts +254 -0
- package/dist/validation.d.ts.map +1 -0
- package/dist/validation.js +478 -0
- package/dist/validation.js.map +1 -0
- package/dist/vision.d.ts +719 -0
- package/dist/vision.d.ts.map +1 -0
- package/dist/vision.js +1197 -0
- package/dist/vision.js.map +1 -0
- package/dist/window.d.ts +80 -0
- package/dist/window.d.ts.map +1 -0
- package/dist/window.js +170 -0
- package/dist/window.js.map +1 -0
- package/dist/workflow.d.ts +224 -0
- package/dist/workflow.d.ts.map +1 -0
- package/dist/workflow.js +578 -0
- package/dist/workflow.js.map +1 -0
- package/index.d.ts +840 -0
- package/index.js +495 -0
- package/package.json +91 -0
package/dist/vision.js
ADDED
|
@@ -0,0 +1,1197 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// ============================================================================
|
|
3
|
+
// Stelo — Vision & Change Detection Module
|
|
4
|
+
// ============================================================================
|
|
5
|
+
// Advanced screen analysis for desktop automation. Provides visual grounding,
|
|
6
|
+
// change detection, action verification, and state tracking primitives.
|
|
7
|
+
// ============================================================================
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
exports.AgentVision = exports.vision = void 0;
|
|
10
|
+
exports.createAgentVision = createAgentVision;
|
|
11
|
+
const native = require('../index.js');
|
|
12
|
+
// ── Vision Module ───────────────────────────────────────────────────────────
|
|
13
|
+
/**
|
|
14
|
+
* Vision and change detection utilities for automation workflows.
|
|
15
|
+
*
|
|
16
|
+
* These primitives enable visual grounding, action verification, and
|
|
17
|
+
* state tracking - essential building blocks for robust automation flows.
|
|
18
|
+
*
|
|
19
|
+
* @example
|
|
20
|
+
* ```typescript
|
|
21
|
+
* import { vision, screen } from 'stelo';
|
|
22
|
+
*
|
|
23
|
+
* // Take a reference screenshot
|
|
24
|
+
* const before = vision.captureReference();
|
|
25
|
+
*
|
|
26
|
+
* // Perform some action
|
|
27
|
+
* await mouse.click();
|
|
28
|
+
*
|
|
29
|
+
* // Verify the screen changed
|
|
30
|
+
* const diff = vision.diff(before);
|
|
31
|
+
* console.log(`${diff.changePercentage}% of screen changed`);
|
|
32
|
+
*
|
|
33
|
+
* // Wait for UI to stabilize after action
|
|
34
|
+
* await vision.waitForStable({ stabilityThreshold: 0.5 });
|
|
35
|
+
*
|
|
36
|
+
* // Analyze screen as a grid for vision model
|
|
37
|
+
* const grid = vision.analyzeGrid(16, 9);
|
|
38
|
+
* const textCells = grid.cells.filter(c => c.likelyText);
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
exports.vision = {
|
|
42
|
+
/**
|
|
43
|
+
* Capture a reference screenshot for later comparison.
|
|
44
|
+
* Use this before triggering an action to verify it had an effect.
|
|
45
|
+
*
|
|
46
|
+
* @param region - Optional region to capture (full screen if not specified)
|
|
47
|
+
* @returns Reference object to pass to diff()
|
|
48
|
+
*
|
|
49
|
+
* @example
|
|
50
|
+
* ```typescript
|
|
51
|
+
* const before = vision.captureReference();
|
|
52
|
+
* await mouse.click();
|
|
53
|
+
* const diff = vision.diff(before);
|
|
54
|
+
* ```
|
|
55
|
+
*/
|
|
56
|
+
captureReference(region) {
|
|
57
|
+
const cap = region
|
|
58
|
+
? native.screenCapture(region.x, region.y, region.width, region.height)
|
|
59
|
+
: native.screenCapture();
|
|
60
|
+
return {
|
|
61
|
+
data: cap.data,
|
|
62
|
+
width: cap.width,
|
|
63
|
+
height: cap.height,
|
|
64
|
+
x: region?.x ?? 0,
|
|
65
|
+
y: region?.y ?? 0,
|
|
66
|
+
};
|
|
67
|
+
},
|
|
68
|
+
/**
|
|
69
|
+
* Compare current screen to a reference capture.
|
|
70
|
+
* Returns detailed diff statistics including change percentage and bounds.
|
|
71
|
+
*
|
|
72
|
+
* @param reference - Reference from captureReference()
|
|
73
|
+
* @param options - Diff options (tolerance, sample rate)
|
|
74
|
+
* @returns Diff result with change statistics
|
|
75
|
+
*
|
|
76
|
+
* @example
|
|
77
|
+
* ```typescript
|
|
78
|
+
* const diff = vision.diff(before, { tolerance: 15 });
|
|
79
|
+
* if (diff.changePercentage > 1) {
|
|
80
|
+
* console.log('Screen changed!', diff.changedBounds);
|
|
81
|
+
* }
|
|
82
|
+
* ```
|
|
83
|
+
*/
|
|
84
|
+
diff(reference, options) {
|
|
85
|
+
const result = native.visionDiff(reference.data, reference.width, reference.height, reference.x, reference.y, reference.width, reference.height, options?.tolerance, options?.sampleRate);
|
|
86
|
+
return {
|
|
87
|
+
changePercentage: result.changePercentage ?? result.change_percentage,
|
|
88
|
+
changedPixelCount: result.changedPixelCount ?? result.changed_pixel_count,
|
|
89
|
+
totalPixelCount: result.totalPixelCount ?? result.total_pixel_count,
|
|
90
|
+
changedBounds: (result.hasChanges ?? result.has_changes)
|
|
91
|
+
? {
|
|
92
|
+
x: result.changedX ?? result.changed_x,
|
|
93
|
+
y: result.changedY ?? result.changed_y,
|
|
94
|
+
width: result.changedWidth ?? result.changed_width,
|
|
95
|
+
height: result.changedHeight ?? result.changed_height,
|
|
96
|
+
}
|
|
97
|
+
: undefined,
|
|
98
|
+
averageDiff: result.averageDiff ?? result.average_diff,
|
|
99
|
+
maxDiff: result.maxDiff ?? result.max_diff,
|
|
100
|
+
hasChanges: result.hasChanges ?? result.has_changes,
|
|
101
|
+
};
|
|
102
|
+
},
|
|
103
|
+
/**
|
|
104
|
+
* Analyze screen as a grid of cells.
|
|
105
|
+
* Each cell includes statistics useful for vision model region selection.
|
|
106
|
+
*
|
|
107
|
+
* This enables efficient visual grounding - instead of sending the entire
|
|
108
|
+
* screen to a vision model, you can identify regions of interest first.
|
|
109
|
+
*
|
|
110
|
+
* @param cols - Number of columns in grid
|
|
111
|
+
* @param rows - Number of rows in grid
|
|
112
|
+
* @param region - Optional region to analyze (full screen if not specified)
|
|
113
|
+
* @returns Grid analysis with cell statistics
|
|
114
|
+
*
|
|
115
|
+
* @example
|
|
116
|
+
* ```typescript
|
|
117
|
+
* // Analyze screen as 16x9 grid
|
|
118
|
+
* const grid = vision.analyzeGrid(16, 9);
|
|
119
|
+
*
|
|
120
|
+
* // Find cells likely containing text
|
|
121
|
+
* const textCells = grid.cells.filter(c => c.likelyText);
|
|
122
|
+
*
|
|
123
|
+
* // Get center of cell [3, 2]
|
|
124
|
+
* const center = vision.gridCellCenter(grid, 3, 2);
|
|
125
|
+
* await mouse.click(center.x, center.y);
|
|
126
|
+
* ```
|
|
127
|
+
*/
|
|
128
|
+
analyzeGrid(cols, rows, region) {
|
|
129
|
+
const result = native.visionAnalyzeGrid(cols, rows, region?.x, region?.y, region?.width, region?.height);
|
|
130
|
+
const cells = result.cells.map((c) => ({
|
|
131
|
+
gridX: c.gridX ?? c.grid_x,
|
|
132
|
+
gridY: c.gridY ?? c.grid_y,
|
|
133
|
+
screenX: c.screenX ?? c.screen_x,
|
|
134
|
+
screenY: c.screenY ?? c.screen_y,
|
|
135
|
+
width: c.width,
|
|
136
|
+
height: c.height,
|
|
137
|
+
avgColor: { r: c.avgR ?? c.avg_r, g: c.avgG ?? c.avg_g, b: c.avgB ?? c.avg_b },
|
|
138
|
+
variance: c.variance,
|
|
139
|
+
likelyText: c.likelyText ?? c.likely_text,
|
|
140
|
+
likelyUI: c.likelyUi ?? c.likely_ui,
|
|
141
|
+
}));
|
|
142
|
+
const activeCells = [];
|
|
143
|
+
const activeCellsX = result.activeCellsX ?? result.active_cells_x;
|
|
144
|
+
const activeCellsY = result.activeCellsY ?? result.active_cells_y;
|
|
145
|
+
for (let i = 0; i < activeCellsX.length; i++) {
|
|
146
|
+
activeCells.push({
|
|
147
|
+
gridX: activeCellsX[i],
|
|
148
|
+
gridY: activeCellsY[i],
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
return {
|
|
152
|
+
cols: result.cols,
|
|
153
|
+
rows: result.rows,
|
|
154
|
+
cellWidth: result.cellWidth ?? result.cell_width,
|
|
155
|
+
cellHeight: result.cellHeight ?? result.cell_height,
|
|
156
|
+
cells,
|
|
157
|
+
activeCells,
|
|
158
|
+
};
|
|
159
|
+
},
|
|
160
|
+
/**
|
|
161
|
+
* Get the screen center point of a grid cell.
|
|
162
|
+
*
|
|
163
|
+
* @param grid - Grid analysis from analyzeGrid()
|
|
164
|
+
* @param gridX - Cell column index
|
|
165
|
+
* @param gridY - Cell row index
|
|
166
|
+
* @returns Screen coordinates of cell center, or undefined if out of bounds
|
|
167
|
+
*/
|
|
168
|
+
gridCellCenter(grid, gridX, gridY) {
|
|
169
|
+
const cell = grid.cells.find((c) => c.gridX === gridX && c.gridY === gridY);
|
|
170
|
+
if (!cell)
|
|
171
|
+
return undefined;
|
|
172
|
+
return {
|
|
173
|
+
x: cell.screenX + Math.floor(cell.width / 2),
|
|
174
|
+
y: cell.screenY + Math.floor(cell.height / 2),
|
|
175
|
+
};
|
|
176
|
+
},
|
|
177
|
+
/**
|
|
178
|
+
* Wait until the screen changes beyond a threshold.
|
|
179
|
+
* Useful for detecting when an action triggers a visual response.
|
|
180
|
+
*
|
|
181
|
+
* @param thresholdPercent - Minimum change percentage to trigger (0-100)
|
|
182
|
+
* @param timeoutMs - Maximum time to wait
|
|
183
|
+
* @param options - Wait options (region, poll interval)
|
|
184
|
+
* @returns true if change detected, false if timed out
|
|
185
|
+
*
|
|
186
|
+
* @example
|
|
187
|
+
* ```typescript
|
|
188
|
+
* // Click and wait for visual feedback
|
|
189
|
+
* mouse.click();
|
|
190
|
+
* const changed = await vision.waitForChange(0.5, 3000);
|
|
191
|
+
* if (!changed) console.log('Button may not have responded');
|
|
192
|
+
* ```
|
|
193
|
+
*/
|
|
194
|
+
async waitForChange(thresholdPercent, timeoutMs, options) {
|
|
195
|
+
return native.visionWaitForChange(thresholdPercent, timeoutMs, options?.pollIntervalMs, options?.region?.x, options?.region?.y, options?.region?.width, options?.region?.height);
|
|
196
|
+
},
|
|
197
|
+
/**
|
|
198
|
+
* Wait until the screen stabilizes (stops changing).
|
|
199
|
+
* Essential for waiting for animations, loading spinners, or transitions.
|
|
200
|
+
*
|
|
201
|
+
* @param stabilityThreshold - Maximum change % to consider "stable" (0-100)
|
|
202
|
+
* @param stableDurationMs - How long screen must remain stable
|
|
203
|
+
* @param timeoutMs - Maximum time to wait
|
|
204
|
+
* @param options - Wait options (region, poll interval)
|
|
205
|
+
* @returns true if stabilized, false if timed out
|
|
206
|
+
*
|
|
207
|
+
* @example
|
|
208
|
+
* ```typescript
|
|
209
|
+
* // Click a button and wait for animation to complete
|
|
210
|
+
* mouse.click();
|
|
211
|
+
* await vision.waitForStable(0.1, 200, 5000);
|
|
212
|
+
* // Screen is now stable - safe to read or continue
|
|
213
|
+
* ```
|
|
214
|
+
*/
|
|
215
|
+
async waitForStable(stabilityThreshold, stableDurationMs, timeoutMs, options) {
|
|
216
|
+
return native.visionWaitForStable(stabilityThreshold, stableDurationMs, timeoutMs, options?.pollIntervalMs, options?.region?.x, options?.region?.y, options?.region?.width, options?.region?.height);
|
|
217
|
+
},
|
|
218
|
+
/**
|
|
219
|
+
* Compute a perceptual hash of a screen region.
|
|
220
|
+
* Two visually similar images will have hashes with low Hamming distance.
|
|
221
|
+
*
|
|
222
|
+
* Use this for fast "has the screen changed significantly?" checks
|
|
223
|
+
* without doing full pixel comparison.
|
|
224
|
+
*
|
|
225
|
+
* @param region - Optional region to hash (full screen if not specified)
|
|
226
|
+
* @returns 64-bit perceptual hash
|
|
227
|
+
*
|
|
228
|
+
* @example
|
|
229
|
+
* ```typescript
|
|
230
|
+
* const hash1 = vision.perceptualHash();
|
|
231
|
+
* await performSomeAction();
|
|
232
|
+
* const hash2 = vision.perceptualHash();
|
|
233
|
+
* const distance = vision.hashDistance(hash1, hash2);
|
|
234
|
+
* if (distance < 5) console.log('Screen looks similar');
|
|
235
|
+
* ```
|
|
236
|
+
*/
|
|
237
|
+
perceptualHash(region) {
|
|
238
|
+
return native.visionPerceptualHash(region?.x, region?.y, region?.width, region?.height);
|
|
239
|
+
},
|
|
240
|
+
/**
|
|
241
|
+
* Compute Hamming distance between two perceptual hashes.
|
|
242
|
+
* Lower distance = more visually similar. 0 = identical.
|
|
243
|
+
*
|
|
244
|
+
* Rules of thumb:
|
|
245
|
+
* - 0-5: Very similar (minor changes)
|
|
246
|
+
* - 5-10: Moderately similar
|
|
247
|
+
* - 10-20: Significant differences
|
|
248
|
+
* - 20+: Completely different
|
|
249
|
+
*
|
|
250
|
+
* @param hash1 - First perceptual hash
|
|
251
|
+
* @param hash2 - Second perceptual hash
|
|
252
|
+
* @returns Hamming distance (0-64)
|
|
253
|
+
*/
|
|
254
|
+
hashDistance(hash1, hash2) {
|
|
255
|
+
return native.visionHashDistance(hash1, hash2);
|
|
256
|
+
},
|
|
257
|
+
/**
|
|
258
|
+
* Find all pixels matching a color within a region.
|
|
259
|
+
* Returns all matching points up to a maximum count.
|
|
260
|
+
*
|
|
261
|
+
* @param color - Color to search for (RGB)
|
|
262
|
+
* @param tolerance - Color distance tolerance (0-441)
|
|
263
|
+
* @param options - Search options
|
|
264
|
+
* @returns Array of matching screen coordinates
|
|
265
|
+
*
|
|
266
|
+
* @example
|
|
267
|
+
* ```typescript
|
|
268
|
+
* // Find all red pixels
|
|
269
|
+
* const redPixels = vision.findAllColors(
|
|
270
|
+
* { r: 255, g: 0, b: 0 },
|
|
271
|
+
* 30,
|
|
272
|
+
* { maxResults: 100 }
|
|
273
|
+
* );
|
|
274
|
+
* ```
|
|
275
|
+
*/
|
|
276
|
+
findAllColors(color, tolerance, options) {
|
|
277
|
+
const result = native.visionFindAllColors(color.r, color.g, color.b, tolerance, options?.maxResults, options?.region?.x, options?.region?.y, options?.region?.width, options?.region?.height);
|
|
278
|
+
return result.map((p) => ({ x: p.x, y: p.y }));
|
|
279
|
+
},
|
|
280
|
+
/**
|
|
281
|
+
* Find clusters of similar colors (potential UI elements).
|
|
282
|
+
* Clusters are groups of nearby pixels with similar colors.
|
|
283
|
+
*
|
|
284
|
+
* @param color - Color to search for (RGB)
|
|
285
|
+
* @param tolerance - Color distance tolerance (0-441)
|
|
286
|
+
* @param minClusterSize - Minimum pixels to form a cluster
|
|
287
|
+
* @param region - Optional region to search (full screen if not specified)
|
|
288
|
+
* @returns Array of bounding rectangles for each cluster
|
|
289
|
+
*
|
|
290
|
+
* @example
|
|
291
|
+
* ```typescript
|
|
292
|
+
* // Find blue button-like regions
|
|
293
|
+
* const clusters = vision.findColorClusters(
|
|
294
|
+
* { r: 0, g: 120, b: 215 }, // Windows blue
|
|
295
|
+
* 40,
|
|
296
|
+
* 50 // At least 50 pixels
|
|
297
|
+
* );
|
|
298
|
+
* if (clusters.length > 0) {
|
|
299
|
+
* // Click center of first cluster
|
|
300
|
+
* const btn = clusters[0];
|
|
301
|
+
* mouse.click(btn.x + btn.width / 2, btn.y + btn.height / 2);
|
|
302
|
+
* }
|
|
303
|
+
* ```
|
|
304
|
+
*/
|
|
305
|
+
findColorClusters(color, tolerance, minClusterSize, region) {
|
|
306
|
+
const result = native.visionFindColorClusters(color.r, color.g, color.b, tolerance, minClusterSize, region?.x, region?.y, region?.width, region?.height);
|
|
307
|
+
return result.map((r) => ({
|
|
308
|
+
x: r.x,
|
|
309
|
+
y: r.y,
|
|
310
|
+
width: r.width,
|
|
311
|
+
height: r.height,
|
|
312
|
+
}));
|
|
313
|
+
},
|
|
314
|
+
/**
|
|
315
|
+
* Verify an action caused a visual change.
|
|
316
|
+
* High-level primitive that captures before/after and compares.
|
|
317
|
+
*
|
|
318
|
+
* @param action - Async action to execute and verify
|
|
319
|
+
* @param minChangePercent - Minimum change to consider verified
|
|
320
|
+
* @param timeoutMs - Maximum time to wait for change
|
|
321
|
+
* @param region - Optional region to monitor
|
|
322
|
+
* @returns Verification result with diff statistics
|
|
323
|
+
*
|
|
324
|
+
* @example
|
|
325
|
+
* ```typescript
|
|
326
|
+
* const result = await vision.verifyAction(
|
|
327
|
+
* async () => { mouse.click(); },
|
|
328
|
+
* 0.5, // At least 0.5% change
|
|
329
|
+
* 2000
|
|
330
|
+
* );
|
|
331
|
+
* if (!result.verified) {
|
|
332
|
+
* // Click didn't cause visual change - might need to retry
|
|
333
|
+
* }
|
|
334
|
+
* ```
|
|
335
|
+
*/
|
|
336
|
+
async verifyAction(action, minChangePercent, timeoutMs, region) {
|
|
337
|
+
const before = this.captureReference(region);
|
|
338
|
+
const startTime = Date.now();
|
|
339
|
+
await action();
|
|
340
|
+
// Poll for change
|
|
341
|
+
const pollInterval = 50;
|
|
342
|
+
const deadline = startTime + timeoutMs;
|
|
343
|
+
while (Date.now() < deadline) {
|
|
344
|
+
const diff = this.diff(before);
|
|
345
|
+
if (diff.changePercentage >= minChangePercent) {
|
|
346
|
+
return {
|
|
347
|
+
verified: true,
|
|
348
|
+
diff,
|
|
349
|
+
durationMs: Date.now() - startTime,
|
|
350
|
+
};
|
|
351
|
+
}
|
|
352
|
+
await new Promise((resolve) => setTimeout(resolve, pollInterval));
|
|
353
|
+
}
|
|
354
|
+
// Final check
|
|
355
|
+
const finalDiff = this.diff(before);
|
|
356
|
+
return {
|
|
357
|
+
verified: finalDiff.changePercentage >= minChangePercent,
|
|
358
|
+
diff: finalDiff,
|
|
359
|
+
durationMs: Date.now() - startTime,
|
|
360
|
+
};
|
|
361
|
+
},
|
|
362
|
+
/**
|
|
363
|
+
* Take a reference, perform action, wait for stability.
|
|
364
|
+
* Combines action execution with waiting for the UI to settle.
|
|
365
|
+
*
|
|
366
|
+
* @param action - Action to execute
|
|
367
|
+
* @param options - Wait and verification options
|
|
368
|
+
* @returns true if action completed and screen stabilized
|
|
369
|
+
*
|
|
370
|
+
* @example
|
|
371
|
+
* ```typescript
|
|
372
|
+
* // Click and wait for any animation to complete
|
|
373
|
+
* await vision.doAndWaitStable(async () => {
|
|
374
|
+
* await mouse.click(100, 200);
|
|
375
|
+
* });
|
|
376
|
+
* // Screen is now stable
|
|
377
|
+
* ```
|
|
378
|
+
*/
|
|
379
|
+
async doAndWaitStable(action, options) {
|
|
380
|
+
const { stabilityThreshold = 0.1, stableDurationMs = 150, timeoutMs = 5000, region } = options ?? {};
|
|
381
|
+
await action();
|
|
382
|
+
return this.waitForStable(stabilityThreshold, stableDurationMs, timeoutMs, { region });
|
|
383
|
+
},
|
|
384
|
+
};
|
|
385
|
+
// ─── Core Fingerprinting Functions ──────────────────────────────────────────
|
|
386
|
+
function computeFingerprint(data, width, height, startX, startY, cellW, cellH) {
|
|
387
|
+
const brightnessHist = new Float64Array(16);
|
|
388
|
+
const hueHist = new Float64Array(8);
|
|
389
|
+
let sumBright = 0, sumBrightSq = 0;
|
|
390
|
+
let edges = 0;
|
|
391
|
+
let minBright = 255, maxBright = 0;
|
|
392
|
+
let pixelCount = 0;
|
|
393
|
+
const step = 2;
|
|
394
|
+
for (let py = startY; py < startY + cellH && py < height; py += step) {
|
|
395
|
+
for (let px = startX; px < startX + cellW && px < width; px += step) {
|
|
396
|
+
const offset = (py * width + px) * 4;
|
|
397
|
+
const r = data[offset];
|
|
398
|
+
const g = data[offset + 1];
|
|
399
|
+
const b = data[offset + 2];
|
|
400
|
+
const bright = (r * 299 + g * 587 + b * 114) / 1000;
|
|
401
|
+
sumBright += bright;
|
|
402
|
+
sumBrightSq += bright * bright;
|
|
403
|
+
pixelCount++;
|
|
404
|
+
if (bright < minBright)
|
|
405
|
+
minBright = bright;
|
|
406
|
+
if (bright > maxBright)
|
|
407
|
+
maxBright = bright;
|
|
408
|
+
const binIdx = Math.min(15, Math.floor(bright / 16));
|
|
409
|
+
brightnessHist[binIdx]++;
|
|
410
|
+
const maxC = Math.max(r, g, b);
|
|
411
|
+
const minC = Math.min(r, g, b);
|
|
412
|
+
if (maxC - minC > 20) {
|
|
413
|
+
let hue = 0;
|
|
414
|
+
if (maxC === r)
|
|
415
|
+
hue = ((g - b) / (maxC - minC)) % 6;
|
|
416
|
+
else if (maxC === g)
|
|
417
|
+
hue = (b - r) / (maxC - minC) + 2;
|
|
418
|
+
else
|
|
419
|
+
hue = (r - g) / (maxC - minC) + 4;
|
|
420
|
+
if (hue < 0)
|
|
421
|
+
hue += 6;
|
|
422
|
+
const hueBin = Math.min(7, Math.floor(hue / 6 * 8));
|
|
423
|
+
hueHist[hueBin]++;
|
|
424
|
+
}
|
|
425
|
+
if (px + step < startX + cellW && px + step < width) {
|
|
426
|
+
const rOff = (py * width + px + step) * 4;
|
|
427
|
+
const rBright = (data[rOff] * 299 + data[rOff + 1] * 587 + data[rOff + 2] * 114) / 1000;
|
|
428
|
+
if (Math.abs(bright - rBright) > 25)
|
|
429
|
+
edges++;
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
if (pixelCount > 0) {
|
|
434
|
+
for (let i = 0; i < 16; i++)
|
|
435
|
+
brightnessHist[i] /= pixelCount;
|
|
436
|
+
let hueTotal = 0;
|
|
437
|
+
for (let i = 0; i < 8; i++)
|
|
438
|
+
hueTotal += hueHist[i];
|
|
439
|
+
if (hueTotal > 0) {
|
|
440
|
+
for (let i = 0; i < 8; i++)
|
|
441
|
+
hueHist[i] /= hueTotal;
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
if (pixelCount === 0)
|
|
445
|
+
pixelCount = 1;
|
|
446
|
+
const avgBright = sumBright / pixelCount;
|
|
447
|
+
const variance = Math.max(0, sumBrightSq / pixelCount - avgBright * avgBright);
|
|
448
|
+
const miniW = Math.floor(cellW / 4) || 1;
|
|
449
|
+
const miniH = Math.floor(cellH / 4) || 1;
|
|
450
|
+
let phash = 0;
|
|
451
|
+
for (let mr = 0; mr < 4; mr++) {
|
|
452
|
+
for (let mc = 0; mc < 4; mc++) {
|
|
453
|
+
const sx = startX + mc * miniW;
|
|
454
|
+
const sy = startY + mr * miniH;
|
|
455
|
+
if (sx < width && sy < height) {
|
|
456
|
+
const off = (sy * width + sx) * 4;
|
|
457
|
+
const b = (data[off] * 299 + data[off + 1] * 587 + data[off + 2] * 114) / 1000;
|
|
458
|
+
if (b > avgBright)
|
|
459
|
+
phash |= (1 << (mr * 4 + mc));
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
return {
|
|
464
|
+
brightnessHist, hueHist,
|
|
465
|
+
edgeDensity: edges / Math.max(1, pixelCount),
|
|
466
|
+
avgBrightness: avgBright,
|
|
467
|
+
contrast: maxBright - minBright,
|
|
468
|
+
variance,
|
|
469
|
+
aspect: cellW / Math.max(1, cellH),
|
|
470
|
+
phash,
|
|
471
|
+
};
|
|
472
|
+
}
|
|
473
|
+
function fingerprintSimilarity(a, b) {
|
|
474
|
+
let histSim = 0;
|
|
475
|
+
for (let i = 0; i < 16; i++)
|
|
476
|
+
histSim += Math.min(a.brightnessHist[i], b.brightnessHist[i]);
|
|
477
|
+
let hueSim = 0;
|
|
478
|
+
let aHueSum = 0, bHueSum = 0;
|
|
479
|
+
for (let i = 0; i < 8; i++) {
|
|
480
|
+
hueSim += Math.min(a.hueHist[i], b.hueHist[i]);
|
|
481
|
+
aHueSum += a.hueHist[i];
|
|
482
|
+
bHueSum += b.hueHist[i];
|
|
483
|
+
}
|
|
484
|
+
// If both lack color, hue is irrelevant — treat as identical
|
|
485
|
+
if (aHueSum < 0.001 && bHueSum < 0.001)
|
|
486
|
+
hueSim = 1;
|
|
487
|
+
const edgeSim = 1 - Math.abs(a.edgeDensity - b.edgeDensity);
|
|
488
|
+
const contrastSim = 1 - Math.abs(a.contrast - b.contrast) / 255;
|
|
489
|
+
const brightSim = 1 - Math.abs(a.avgBrightness - b.avgBrightness) / 255;
|
|
490
|
+
let xor = a.phash ^ b.phash;
|
|
491
|
+
let hamming = 0;
|
|
492
|
+
while (xor) {
|
|
493
|
+
hamming += xor & 1;
|
|
494
|
+
xor >>= 1;
|
|
495
|
+
}
|
|
496
|
+
const phashSim = 1 - hamming / 16;
|
|
497
|
+
return (histSim * 0.25 +
|
|
498
|
+
hueSim * 0.10 +
|
|
499
|
+
edgeSim * 0.15 +
|
|
500
|
+
contrastSim * 0.15 +
|
|
501
|
+
brightSim * 0.10 +
|
|
502
|
+
phashSim * 0.25);
|
|
503
|
+
}
|
|
504
|
+
// ─── Helper to create LocatedElement ────────────────────────────────────────
|
|
505
|
+
function makeLocatedElement(x, y, w, h, confidence, similarity, fp, label, supportCount = 1) {
|
|
506
|
+
return {
|
|
507
|
+
x, y, w, h, confidence, similarity, label, supportCount, fingerprint: fp,
|
|
508
|
+
click(button) {
|
|
509
|
+
const cx = x + Math.floor(w / 2);
|
|
510
|
+
const cy = y + Math.floor(h / 2);
|
|
511
|
+
native.mouseClickAt(cx, cy, button ?? 'left', false);
|
|
512
|
+
},
|
|
513
|
+
doubleClick() {
|
|
514
|
+
native.mouseMove(x + Math.floor(w / 2), y + Math.floor(h / 2));
|
|
515
|
+
native.mouseDoubleClick('left');
|
|
516
|
+
},
|
|
517
|
+
rightClick() {
|
|
518
|
+
native.mouseClickAt(x + Math.floor(w / 2), y + Math.floor(h / 2), 'right', false);
|
|
519
|
+
},
|
|
520
|
+
moveTo(options) {
|
|
521
|
+
const cx = x + Math.floor(w / 2);
|
|
522
|
+
const cy = y + Math.floor(h / 2);
|
|
523
|
+
if (options?.smooth) {
|
|
524
|
+
native.mouseMoveSmooth(cx, cy, options.duration ?? 300, 'easeInOut');
|
|
525
|
+
}
|
|
526
|
+
else {
|
|
527
|
+
native.mouseMove(cx, cy);
|
|
528
|
+
}
|
|
529
|
+
},
|
|
530
|
+
type(text) {
|
|
531
|
+
const cx = x + Math.floor(w / 2);
|
|
532
|
+
const cy = y + Math.floor(h / 2);
|
|
533
|
+
native.mouseClickAt(cx, cy, 'left', false);
|
|
534
|
+
native.keyboardType(text);
|
|
535
|
+
},
|
|
536
|
+
isStillPresent(threshold) {
|
|
537
|
+
const cap = native.screenCapture();
|
|
538
|
+
const nowFp = computeFingerprint(cap.data, cap.width, cap.height, x, y, w, h);
|
|
539
|
+
return fingerprintSimilarity(fp, nowFp) >= (threshold ?? 0.7);
|
|
540
|
+
},
|
|
541
|
+
};
|
|
542
|
+
}
|
|
543
|
+
// ─── AgentVision Class ──────────────────────────────────────────────────────
|
|
544
|
+
class AgentVision {
|
|
545
|
+
cols;
|
|
546
|
+
rows;
|
|
547
|
+
maxMemories;
|
|
548
|
+
defaultRegionSize;
|
|
549
|
+
matchThreshold;
|
|
550
|
+
searchRadius;
|
|
551
|
+
appContext;
|
|
552
|
+
memories = [];
|
|
553
|
+
sequenceCounter = 0;
|
|
554
|
+
recentActions = [];
|
|
555
|
+
temporalPatterns = new Map();
|
|
556
|
+
// Grid cache
|
|
557
|
+
gridFingerprints = [];
|
|
558
|
+
gridDirty = true;
|
|
559
|
+
lastGridTime = 0;
|
|
560
|
+
lastCapture = null;
|
|
561
|
+
constructor(config = {}) {
|
|
562
|
+
this.cols = config.cols ?? 32;
|
|
563
|
+
this.rows = config.rows ?? 18;
|
|
564
|
+
this.maxMemories = config.maxMemories ?? 2000;
|
|
565
|
+
this.defaultRegionSize = config.defaultRegionSize ?? { w: 100, h: 60 };
|
|
566
|
+
this.matchThreshold = config.matchThreshold ?? 0.65;
|
|
567
|
+
this.searchRadius = config.searchRadius ?? 400;
|
|
568
|
+
this.appContext = config.appContext;
|
|
569
|
+
}
|
|
570
|
+
// ═══ FINGERPRINT — Capture visual identity of any region ══════════════
|
|
571
|
+
/**
|
|
572
|
+
* Fingerprint a screen region. Returns a compact visual descriptor
|
|
573
|
+
* that can be compared against other fingerprints for similarity.
|
|
574
|
+
*
|
|
575
|
+
* @example
|
|
576
|
+
* ```typescript
|
|
577
|
+
* const fp = agentVision.fingerprint(100, 200, 150, 40);
|
|
578
|
+
* // Later: check if the same element is somewhere else
|
|
579
|
+
* const match = agentVision.locate(fp);
|
|
580
|
+
* ```
|
|
581
|
+
*/
|
|
582
|
+
fingerprint(x, y, w, h) {
|
|
583
|
+
const cap = native.screenCapture();
|
|
584
|
+
return computeFingerprint(cap.data, cap.width, cap.height, x, y, w, h);
|
|
585
|
+
}
|
|
586
|
+
/**
|
|
587
|
+
* Compute similarity between two fingerprints (0-1, 1 = identical).
|
|
588
|
+
*/
|
|
589
|
+
similarity(a, b) {
|
|
590
|
+
return fingerprintSimilarity(a, b);
|
|
591
|
+
}
|
|
592
|
+
// ═══ REMEMBER — Teach the agent about screen elements ═════════════════
|
|
593
|
+
/**
|
|
594
|
+
* Remember an element at a position with a label.
|
|
595
|
+
* The agent fingerprints the region and stores it for later recognition.
|
|
596
|
+
*
|
|
597
|
+
* @example
|
|
598
|
+
* ```typescript
|
|
599
|
+
* // Agent clicked Save, now remembers what it looks like
|
|
600
|
+
* agentVision.remember('save-button', 350, 15, { w: 80, h: 30 });
|
|
601
|
+
*
|
|
602
|
+
* // Later, find it again even if it moved
|
|
603
|
+
* const saveBtn = agentVision.find('save-button');
|
|
604
|
+
* if (saveBtn) saveBtn.click();
|
|
605
|
+
* ```
|
|
606
|
+
*/
|
|
607
|
+
remember(label, x, y, regionSize, action = 'click') {
|
|
608
|
+
const size = regionSize ?? this.defaultRegionSize;
|
|
609
|
+
const rx = Math.max(0, x - Math.floor(size.w / 2));
|
|
610
|
+
const ry = Math.max(0, y - Math.floor(size.h / 2));
|
|
611
|
+
const cap = native.screenCapture();
|
|
612
|
+
const fp = computeFingerprint(cap.data, cap.width, cap.height, rx, ry, size.w, size.h);
|
|
613
|
+
const memory = {
|
|
614
|
+
fingerprint: fp,
|
|
615
|
+
action,
|
|
616
|
+
label,
|
|
617
|
+
position: { x, y },
|
|
618
|
+
regionSize: size,
|
|
619
|
+
timestamp: Date.now(),
|
|
620
|
+
appContext: this.appContext,
|
|
621
|
+
sequenceIndex: this.sequenceCounter++,
|
|
622
|
+
};
|
|
623
|
+
this.memories.push(memory);
|
|
624
|
+
this.evictOldMemories();
|
|
625
|
+
this.gridDirty = true;
|
|
626
|
+
// Track temporal patterns
|
|
627
|
+
this.trackAction(action, label, x, y);
|
|
628
|
+
return memory;
|
|
629
|
+
}
|
|
630
|
+
/**
|
|
631
|
+
* Remember after performing a click — records the action AND verifies it.
|
|
632
|
+
* Captures before/after fingerprints and records whether the click did anything.
|
|
633
|
+
*
|
|
634
|
+
* @example
|
|
635
|
+
* ```typescript
|
|
636
|
+
* const result = await agentVision.rememberClick('file-menu', 44, 12);
|
|
637
|
+
* console.log(`Click ${result.success ? 'worked' : 'had no effect'}`);
|
|
638
|
+
* ```
|
|
639
|
+
*/
|
|
640
|
+
async rememberClick(label, x, y, regionSize) {
|
|
641
|
+
const size = regionSize ?? this.defaultRegionSize;
|
|
642
|
+
const rx = Math.max(0, x - Math.floor(size.w / 2));
|
|
643
|
+
const ry = Math.max(0, y - Math.floor(size.h / 2));
|
|
644
|
+
// Capture before
|
|
645
|
+
const capBefore = native.screenCapture();
|
|
646
|
+
const fp = computeFingerprint(capBefore.data, capBefore.width, capBefore.height, rx, ry, size.w, size.h);
|
|
647
|
+
// Perform click
|
|
648
|
+
native.mouseClickAt(x, y, 'left', false);
|
|
649
|
+
// Wait a bit for UI response
|
|
650
|
+
await new Promise(r => setTimeout(r, 400));
|
|
651
|
+
// Capture after and measure change
|
|
652
|
+
const capAfter = native.screenCapture();
|
|
653
|
+
const diff = native.visionDiff(capBefore.data, capBefore.width, capBefore.height, undefined, undefined, undefined, undefined, 10, 2);
|
|
654
|
+
const changePercent = diff.changePercentage ?? diff.change_percentage ?? 0;
|
|
655
|
+
const success = changePercent > 0.3;
|
|
656
|
+
const memory = {
|
|
657
|
+
fingerprint: fp,
|
|
658
|
+
action: 'click',
|
|
659
|
+
label,
|
|
660
|
+
position: { x, y },
|
|
661
|
+
regionSize: size,
|
|
662
|
+
timestamp: Date.now(),
|
|
663
|
+
outcome: { screenChangePercent: changePercent, responseTimeMs: 400 },
|
|
664
|
+
success,
|
|
665
|
+
appContext: this.appContext,
|
|
666
|
+
sequenceIndex: this.sequenceCounter++,
|
|
667
|
+
};
|
|
668
|
+
this.memories.push(memory);
|
|
669
|
+
this.evictOldMemories();
|
|
670
|
+
this.gridDirty = true;
|
|
671
|
+
this.trackAction('click', label, x, y);
|
|
672
|
+
return { ...memory, success };
|
|
673
|
+
}
|
|
674
|
+
// ═══ FIND — Locate remembered elements on screen ══════════════════════
|
|
675
|
+
/**
|
|
676
|
+
* Find a previously remembered element on the current screen.
|
|
677
|
+
* Uses visual fingerprint matching — works even if the element moved.
|
|
678
|
+
*
|
|
679
|
+
* @returns The best match, or null if nothing above threshold
|
|
680
|
+
*
|
|
681
|
+
* @example
|
|
682
|
+
* ```typescript
|
|
683
|
+
* const saveBtn = agentVision.find('save-button');
|
|
684
|
+
* if (saveBtn) {
|
|
685
|
+
* saveBtn.click();
|
|
686
|
+
* } else {
|
|
687
|
+
* console.log('Save button not visible');
|
|
688
|
+
* }
|
|
689
|
+
* ```
|
|
690
|
+
*/
|
|
691
|
+
find(label) {
|
|
692
|
+
const results = this.findAll(label, 1);
|
|
693
|
+
return results.length > 0 ? results[0] : null;
|
|
694
|
+
}
|
|
695
|
+
/**
|
|
696
|
+
* Find all instances of a remembered element on screen.
|
|
697
|
+
*
|
|
698
|
+
* @example
|
|
699
|
+
* ```typescript
|
|
700
|
+
* // Find all things that look like "close-button"
|
|
701
|
+
* const closeButtons = agentVision.findAll('close-button');
|
|
702
|
+
* console.log(`Found ${closeButtons.length} close buttons`);
|
|
703
|
+
* ```
|
|
704
|
+
*/
|
|
705
|
+
findAll(label, maxResults = 20) {
|
|
706
|
+
const labeledMemories = this.memories.filter(m => m.label === label && (!this.appContext || m.appContext === this.appContext));
|
|
707
|
+
if (labeledMemories.length === 0)
|
|
708
|
+
return [];
|
|
709
|
+
this.refreshGrid();
|
|
710
|
+
const cap = this.lastCapture;
|
|
711
|
+
const cellW = Math.floor(cap.width / this.cols);
|
|
712
|
+
const cellH = Math.floor(cap.height / this.rows);
|
|
713
|
+
const results = [];
|
|
714
|
+
for (let row = 0; row < this.rows; row++) {
|
|
715
|
+
for (let col = 0; col < this.cols; col++) {
|
|
716
|
+
const idx = row * this.cols + col;
|
|
717
|
+
const fp = this.gridFingerprints[idx];
|
|
718
|
+
if (!fp)
|
|
719
|
+
continue;
|
|
720
|
+
let bestSim = 0;
|
|
721
|
+
for (const mem of labeledMemories) {
|
|
722
|
+
const sim = fingerprintSimilarity(fp, mem.fingerprint);
|
|
723
|
+
if (sim > bestSim)
|
|
724
|
+
bestSim = sim;
|
|
725
|
+
}
|
|
726
|
+
if (bestSim < this.matchThreshold)
|
|
727
|
+
continue;
|
|
728
|
+
const x = col * cellW;
|
|
729
|
+
const y = row * cellH;
|
|
730
|
+
results.push(makeLocatedElement(x, y, cellW, cellH, bestSim, bestSim, fp, label, labeledMemories.length));
|
|
731
|
+
}
|
|
732
|
+
}
|
|
733
|
+
results.sort((a, b) => b.confidence - a.confidence);
|
|
734
|
+
return results.slice(0, maxResults);
|
|
735
|
+
}
|
|
736
|
+
// ═══ LOCATE — Find element by fingerprint anywhere on screen ══════════
|
|
737
|
+
/**
|
|
738
|
+
* Locate a specific visual fingerprint on the current screen.
|
|
739
|
+
* Does a focused scan: first near the expected position, then widens.
|
|
740
|
+
*
|
|
741
|
+
* @example
|
|
742
|
+
* ```typescript
|
|
743
|
+
* const fp = agentVision.fingerprint(100, 200, 80, 30);
|
|
744
|
+
* // ... some time later, UI may have reorganized ...
|
|
745
|
+
* const found = agentVision.locate(fp, { near: { x: 100, y: 200 } });
|
|
746
|
+
* if (found) found.click();
|
|
747
|
+
* ```
|
|
748
|
+
*/
|
|
749
|
+
locate(target, options) {
|
|
750
|
+
const threshold = options?.threshold ?? this.matchThreshold;
|
|
751
|
+
const radius = options?.searchRadius ?? this.searchRadius;
|
|
752
|
+
const cap = native.screenCapture();
|
|
753
|
+
// Determine region size from fingerprint aspect ratio
|
|
754
|
+
const h = this.defaultRegionSize.h;
|
|
755
|
+
const w = Math.round(h * target.aspect);
|
|
756
|
+
let bestSim = 0;
|
|
757
|
+
let bestX = 0;
|
|
758
|
+
let bestY = 0;
|
|
759
|
+
const stepX = Math.max(8, Math.floor(w * 0.4));
|
|
760
|
+
const stepY = Math.max(8, Math.floor(h * 0.4));
|
|
761
|
+
// If we have a hint, search near it first
|
|
762
|
+
if (options?.near) {
|
|
763
|
+
const sX = Math.max(0, options.near.x - radius);
|
|
764
|
+
const eX = Math.min(cap.width - w, options.near.x + radius);
|
|
765
|
+
const sY = Math.max(0, options.near.y - radius);
|
|
766
|
+
const eY = Math.min(cap.height - h, options.near.y + radius);
|
|
767
|
+
for (let sy = sY; sy < eY; sy += stepY) {
|
|
768
|
+
for (let sx = sX; sx < eX; sx += stepX) {
|
|
769
|
+
const candidate = computeFingerprint(cap.data, cap.width, cap.height, sx, sy, w, h);
|
|
770
|
+
const sim = fingerprintSimilarity(target, candidate);
|
|
771
|
+
if (sim > bestSim) {
|
|
772
|
+
bestSim = sim;
|
|
773
|
+
bestX = sx;
|
|
774
|
+
bestY = sy;
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
if (bestSim >= threshold) {
|
|
779
|
+
return makeLocatedElement(bestX, bestY, w, h, bestSim, bestSim, target);
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
// Full screen scan with grid
|
|
783
|
+
this.refreshGrid();
|
|
784
|
+
const cellW = Math.floor(cap.width / this.cols);
|
|
785
|
+
const cellH = Math.floor(cap.height / this.rows);
|
|
786
|
+
for (let row = 0; row < this.rows; row++) {
|
|
787
|
+
for (let col = 0; col < this.cols; col++) {
|
|
788
|
+
const idx = row * this.cols + col;
|
|
789
|
+
const fp = this.gridFingerprints[idx];
|
|
790
|
+
if (!fp)
|
|
791
|
+
continue;
|
|
792
|
+
const sim = fingerprintSimilarity(target, fp);
|
|
793
|
+
if (sim > bestSim) {
|
|
794
|
+
bestSim = sim;
|
|
795
|
+
bestX = col * cellW;
|
|
796
|
+
bestY = row * cellH;
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
}
|
|
800
|
+
if (bestSim >= threshold) {
|
|
801
|
+
return makeLocatedElement(bestX, bestY, cellW, cellH, bestSim, bestSim, target);
|
|
802
|
+
}
|
|
803
|
+
return null;
|
|
804
|
+
}
|
|
805
|
+
// ═══ FIND BY TEXT — Combine OCR with visual memory ════════════════════
|
|
806
|
+
/**
|
|
807
|
+
* Find an element by text using OCR, then fingerprint it for future recognition.
|
|
808
|
+
* First time: uses OCR. After that: can find it visually even without OCR.
|
|
809
|
+
*
|
|
810
|
+
* @example
|
|
811
|
+
* ```typescript
|
|
812
|
+
* // First call uses OCR to find "Save" text
|
|
813
|
+
* const save = agentVision.findByText('Save');
|
|
814
|
+
* if (save) save.click();
|
|
815
|
+
* // Now agentVision remembers what "Save" looks like visually
|
|
816
|
+
* ```
|
|
817
|
+
*/
|
|
818
|
+
findByText(text, options) {
|
|
819
|
+
const shouldRemember = options?.remember !== false;
|
|
820
|
+
// First try: recall from memory (fast, no OCR needed)
|
|
821
|
+
const recalled = this.find(`text:${text}`);
|
|
822
|
+
if (recalled && recalled.isStillPresent()) {
|
|
823
|
+
return recalled;
|
|
824
|
+
}
|
|
825
|
+
// Second try: OCR
|
|
826
|
+
try {
|
|
827
|
+
const found = native.ocrFindText(text);
|
|
828
|
+
if (found) {
|
|
829
|
+
const x = found.x;
|
|
830
|
+
const y = found.y;
|
|
831
|
+
const w = found.width || this.defaultRegionSize.w;
|
|
832
|
+
const h = found.height || this.defaultRegionSize.h;
|
|
833
|
+
const cap = native.screenCapture();
|
|
834
|
+
const fp = computeFingerprint(cap.data, cap.width, cap.height, x, y, w, h);
|
|
835
|
+
if (shouldRemember) {
|
|
836
|
+
this.remember(`text:${text}`, x + Math.floor(w / 2), y + Math.floor(h / 2), { w, h });
|
|
837
|
+
}
|
|
838
|
+
return makeLocatedElement(x, y, w, h, found.confidence ?? 1.0, 1.0, fp, `text:${text}`);
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
catch { /* OCR unavailable */ }
|
|
842
|
+
return null;
|
|
843
|
+
}
|
|
844
|
+
// ═══ VERIFY — Check if actions had an effect ══════════════════════════
|
|
845
|
+
/**
|
|
846
|
+
* Click an element and verify the screen changed.
|
|
847
|
+
* Returns the located element with success/failure status.
|
|
848
|
+
*
|
|
849
|
+
* @example
|
|
850
|
+
* ```typescript
|
|
851
|
+
* const result = await agentVision.clickAndVerify('submit-button');
|
|
852
|
+
* if (!result.verified) {
|
|
853
|
+
* // Button didn't respond — try again or escalate
|
|
854
|
+
* }
|
|
855
|
+
* ```
|
|
856
|
+
*/
|
|
857
|
+
async clickAndVerify(label, options) {
|
|
858
|
+
const el = this.find(label);
|
|
859
|
+
if (!el)
|
|
860
|
+
return { element: null, verified: false, changePercent: 0 };
|
|
861
|
+
const before = exports.vision.captureReference();
|
|
862
|
+
el.click();
|
|
863
|
+
await new Promise(r => setTimeout(r, options?.timeout ?? 500));
|
|
864
|
+
const diff = exports.vision.diff(before);
|
|
865
|
+
const minChange = options?.minChange ?? 0.3;
|
|
866
|
+
const verified = diff.changePercentage >= minChange;
|
|
867
|
+
// Update memory with outcome
|
|
868
|
+
const mem = [...this.memories].reverse().find(m => m.label === label);
|
|
869
|
+
if (mem) {
|
|
870
|
+
mem.outcome = { screenChangePercent: diff.changePercentage, responseTimeMs: options?.timeout ?? 500 };
|
|
871
|
+
mem.success = verified;
|
|
872
|
+
}
|
|
873
|
+
return { element: el, verified, changePercent: diff.changePercentage };
|
|
874
|
+
}
|
|
875
|
+
/**
|
|
876
|
+
* Check if a specific screen region visually changed since a fingerprint was taken.
|
|
877
|
+
*
|
|
878
|
+
* @example
|
|
879
|
+
* ```typescript
|
|
880
|
+
* const before = agentVision.fingerprint(100, 200, 80, 30);
|
|
881
|
+
* await agent.doSomething();
|
|
882
|
+
* const changed = agentVision.hasChanged(before, 100, 200, 80, 30);
|
|
883
|
+
* ```
|
|
884
|
+
*/
|
|
885
|
+
hasChanged(previousFingerprint, x, y, w, h, changeThreshold = 0.15) {
|
|
886
|
+
const current = this.fingerprint(x, y, w, h);
|
|
887
|
+
const sim = fingerprintSimilarity(previousFingerprint, current);
|
|
888
|
+
return sim < (1 - changeThreshold);
|
|
889
|
+
}
|
|
890
|
+
// ═══ PREDICT — Temporal pattern learning & prediction ═════════════════
|
|
891
|
+
/**
|
|
892
|
+
* Predict what will happen next based on learned temporal patterns.
|
|
893
|
+
*
|
|
894
|
+
* @example
|
|
895
|
+
* ```typescript
|
|
896
|
+
* // After clicking "File" many times, the agent learns:
|
|
897
|
+
* // "After clicking file-menu, a dropdown appears"
|
|
898
|
+
* const next = agentVision.predictNext();
|
|
899
|
+
* if (next) {
|
|
900
|
+
* console.log(`Expected: ${next.nextAction} in ${next.expectedDelayMs}ms`);
|
|
901
|
+
* }
|
|
902
|
+
* ```
|
|
903
|
+
*/
|
|
904
|
+
predictNext(filterLabel) {
|
|
905
|
+
if (this.recentActions.length === 0)
|
|
906
|
+
return null;
|
|
907
|
+
const last = this.recentActions[this.recentActions.length - 1];
|
|
908
|
+
const prefix = `${last.action}:${last.label || ''}→`;
|
|
909
|
+
let bestKey = null;
|
|
910
|
+
let bestPattern = null;
|
|
911
|
+
let bestCount = 0;
|
|
912
|
+
const entries = Array.from(this.temporalPatterns.entries());
|
|
913
|
+
for (let i = 0; i < entries.length; i++) {
|
|
914
|
+
const key = entries[i][0];
|
|
915
|
+
const pattern = entries[i][1];
|
|
916
|
+
if (!key.startsWith(prefix))
|
|
917
|
+
continue;
|
|
918
|
+
if (filterLabel && !key.includes(filterLabel))
|
|
919
|
+
continue;
|
|
920
|
+
if (pattern.count > bestCount) {
|
|
921
|
+
bestKey = key;
|
|
922
|
+
bestPattern = pattern;
|
|
923
|
+
bestCount = pattern.count;
|
|
924
|
+
}
|
|
925
|
+
}
|
|
926
|
+
if (!bestKey || !bestPattern)
|
|
927
|
+
return null;
|
|
928
|
+
const nextPart = bestKey.split('→')[1] || '';
|
|
929
|
+
const parts = nextPart.split(':');
|
|
930
|
+
return {
|
|
931
|
+
nextAction: parts[0] || nextPart,
|
|
932
|
+
label: parts[1] || undefined,
|
|
933
|
+
confidence: Math.min(1, bestPattern.count / 5),
|
|
934
|
+
expectedDelayMs: bestPattern.avgDelayMs,
|
|
935
|
+
predictedRegion: bestPattern.predictedRegion,
|
|
936
|
+
};
|
|
937
|
+
}
|
|
938
|
+
/**
|
|
939
|
+
* Wait for a predicted event to happen.
|
|
940
|
+
* Uses temporal patterns to know WHEN and WHERE to look.
|
|
941
|
+
*
|
|
942
|
+
* @example
|
|
943
|
+
* ```typescript
|
|
944
|
+
* // Click File menu
|
|
945
|
+
* agentVision.find('file-menu')?.click();
|
|
946
|
+
* // Wait for the dropdown the agent learned usually appears
|
|
947
|
+
* const appeared = await agentVision.waitForPredicted('dropdown', 3000);
|
|
948
|
+
* ```
|
|
949
|
+
*/
|
|
950
|
+
async waitForPredicted(label, timeoutMs = 5000) {
|
|
951
|
+
const prediction = this.predictNext(label);
|
|
952
|
+
const pollMs = 100;
|
|
953
|
+
const deadline = Date.now() + timeoutMs;
|
|
954
|
+
// Take baseline for change detection
|
|
955
|
+
const baseline = exports.vision.captureReference(prediction?.predictedRegion
|
|
956
|
+
? { x: prediction.predictedRegion.x, y: prediction.predictedRegion.y, width: prediction.predictedRegion.w, height: prediction.predictedRegion.h }
|
|
957
|
+
: undefined);
|
|
958
|
+
while (Date.now() < deadline) {
|
|
959
|
+
// Check if something changed in the predicted region
|
|
960
|
+
const diff = exports.vision.diff(baseline);
|
|
961
|
+
if (diff.hasChanges && diff.changePercentage > 0.5) {
|
|
962
|
+
// Something appeared — try to locate the expected element
|
|
963
|
+
if (label) {
|
|
964
|
+
const found = this.find(label);
|
|
965
|
+
if (found)
|
|
966
|
+
return found;
|
|
967
|
+
}
|
|
968
|
+
// Or just return the changed region
|
|
969
|
+
if (diff.changedBounds) {
|
|
970
|
+
const b = diff.changedBounds;
|
|
971
|
+
const cap = native.screenCapture();
|
|
972
|
+
const fp = computeFingerprint(cap.data, cap.width, cap.height, b.x, b.y, b.width, b.height);
|
|
973
|
+
return makeLocatedElement(b.x, b.y, b.width, b.height, 0.8, 0.8, fp, label);
|
|
974
|
+
}
|
|
975
|
+
}
|
|
976
|
+
await new Promise(r => setTimeout(r, pollMs));
|
|
977
|
+
}
|
|
978
|
+
return null;
|
|
979
|
+
}
|
|
980
|
+
/**
|
|
981
|
+
* Get all learned temporal patterns.
|
|
982
|
+
*/
|
|
983
|
+
getPatterns() {
|
|
984
|
+
const result = [];
|
|
985
|
+
const entries = Array.from(this.temporalPatterns.entries());
|
|
986
|
+
for (let i = 0; i < entries.length; i++) {
|
|
987
|
+
result.push({ pattern: entries[i][0], count: entries[i][1].count, avgDelayMs: entries[i][1].avgDelayMs });
|
|
988
|
+
}
|
|
989
|
+
return result.sort((a, b) => b.count - a.count);
|
|
990
|
+
}
|
|
991
|
+
// ═══ SCAN — Full screen understanding ═════════════════════════════════
|
|
992
|
+
/**
|
|
993
|
+
* Scan the full screen and return all regions that match ANY remembered element.
|
|
994
|
+
* Gives the agent a complete understanding of "what's on screen that I recognize."
|
|
995
|
+
*
|
|
996
|
+
* @example
|
|
997
|
+
* ```typescript
|
|
998
|
+
* const recognized = agentVision.scan();
|
|
999
|
+
* for (const el of recognized) {
|
|
1000
|
+
* console.log(`Found "${el.label}" at (${el.x},${el.y}) confidence=${el.confidence}`);
|
|
1001
|
+
* }
|
|
1002
|
+
* ```
|
|
1003
|
+
*/
|
|
1004
|
+
scan(threshold) {
|
|
1005
|
+
const minSim = threshold ?? this.matchThreshold;
|
|
1006
|
+
this.refreshGrid();
|
|
1007
|
+
const cap = this.lastCapture;
|
|
1008
|
+
const cellW = Math.floor(cap.width / this.cols);
|
|
1009
|
+
const cellH = Math.floor(cap.height / this.rows);
|
|
1010
|
+
// Group memories by label
|
|
1011
|
+
const labelGroups = new Map();
|
|
1012
|
+
for (const mem of this.memories) {
|
|
1013
|
+
if (!mem.label)
|
|
1014
|
+
continue;
|
|
1015
|
+
const group = labelGroups.get(mem.label) ?? [];
|
|
1016
|
+
group.push(mem);
|
|
1017
|
+
labelGroups.set(mem.label, group);
|
|
1018
|
+
}
|
|
1019
|
+
const results = [];
|
|
1020
|
+
for (let row = 0; row < this.rows; row++) {
|
|
1021
|
+
for (let col = 0; col < this.cols; col++) {
|
|
1022
|
+
const idx = row * this.cols + col;
|
|
1023
|
+
const fp = this.gridFingerprints[idx];
|
|
1024
|
+
if (!fp)
|
|
1025
|
+
continue;
|
|
1026
|
+
let bestLabel;
|
|
1027
|
+
let bestSim = 0;
|
|
1028
|
+
let bestCount = 0;
|
|
1029
|
+
const labelEntries = Array.from(labelGroups.entries());
|
|
1030
|
+
for (let li = 0; li < labelEntries.length; li++) {
|
|
1031
|
+
const label = labelEntries[li][0];
|
|
1032
|
+
const mems = labelEntries[li][1];
|
|
1033
|
+
for (const mem of mems) {
|
|
1034
|
+
const sim = fingerprintSimilarity(fp, mem.fingerprint);
|
|
1035
|
+
if (sim > bestSim) {
|
|
1036
|
+
bestSim = sim;
|
|
1037
|
+
bestLabel = label;
|
|
1038
|
+
bestCount = mems.length;
|
|
1039
|
+
}
|
|
1040
|
+
}
|
|
1041
|
+
}
|
|
1042
|
+
if (bestSim < minSim || !bestLabel)
|
|
1043
|
+
continue;
|
|
1044
|
+
results.push(makeLocatedElement(col * cellW, row * cellH, cellW, cellH, bestSim, bestSim, fp, bestLabel, bestCount));
|
|
1045
|
+
}
|
|
1046
|
+
}
|
|
1047
|
+
// Deduplicate: keep only the best match per label per rough area
|
|
1048
|
+
const deduped = [];
|
|
1049
|
+
const seen = new Set();
|
|
1050
|
+
results.sort((a, b) => b.confidence - a.confidence);
|
|
1051
|
+
for (const el of results) {
|
|
1052
|
+
const key = `${el.label}:${Math.floor(el.x / 200)}:${Math.floor(el.y / 200)}`;
|
|
1053
|
+
if (!seen.has(key)) {
|
|
1054
|
+
seen.add(key);
|
|
1055
|
+
deduped.push(el);
|
|
1056
|
+
}
|
|
1057
|
+
}
|
|
1058
|
+
return deduped;
|
|
1059
|
+
}
|
|
1060
|
+
// ═══ PERSISTENCE — Save/load memory across sessions ═══════════════════
|
|
1061
|
+
/**
|
|
1062
|
+
* Export all visual memories as a JSON string.
|
|
1063
|
+
*
|
|
1064
|
+
* @example
|
|
1065
|
+
* ```typescript
|
|
1066
|
+
* const data = agentVision.save();
|
|
1067
|
+
* fs.writeFileSync('agent-memory.json', data);
|
|
1068
|
+
* ```
|
|
1069
|
+
*/
|
|
1070
|
+
save() {
|
|
1071
|
+
return JSON.stringify({
|
|
1072
|
+
version: 1,
|
|
1073
|
+
appContext: this.appContext,
|
|
1074
|
+
memories: this.memories.map(m => ({
|
|
1075
|
+
...m,
|
|
1076
|
+
fingerprint: {
|
|
1077
|
+
...m.fingerprint,
|
|
1078
|
+
brightnessHist: Array.from(m.fingerprint.brightnessHist),
|
|
1079
|
+
hueHist: Array.from(m.fingerprint.hueHist),
|
|
1080
|
+
},
|
|
1081
|
+
})),
|
|
1082
|
+
temporalPatterns: Array.from(this.temporalPatterns.entries()),
|
|
1083
|
+
});
|
|
1084
|
+
}
|
|
1085
|
+
/**
|
|
1086
|
+
* Load visual memories from a JSON string.
|
|
1087
|
+
*
|
|
1088
|
+
* @example
|
|
1089
|
+
* ```typescript
|
|
1090
|
+
* const data = fs.readFileSync('agent-memory.json', 'utf8');
|
|
1091
|
+
* agentVision.load(data);
|
|
1092
|
+
* // Agent now remembers everything from last session
|
|
1093
|
+
* ```
|
|
1094
|
+
*/
|
|
1095
|
+
load(json) {
|
|
1096
|
+
const data = JSON.parse(json);
|
|
1097
|
+
if (data.memories) {
|
|
1098
|
+
for (const m of data.memories) {
|
|
1099
|
+
m.fingerprint.brightnessHist = new Float64Array(m.fingerprint.brightnessHist);
|
|
1100
|
+
m.fingerprint.hueHist = new Float64Array(m.fingerprint.hueHist);
|
|
1101
|
+
this.memories.push(m);
|
|
1102
|
+
}
|
|
1103
|
+
}
|
|
1104
|
+
if (data.temporalPatterns) {
|
|
1105
|
+
for (const [key, val] of data.temporalPatterns) {
|
|
1106
|
+
this.temporalPatterns.set(key, val);
|
|
1107
|
+
}
|
|
1108
|
+
}
|
|
1109
|
+
}
|
|
1110
|
+
// ═══ STATE ════════════════════════════════════════════════════════════
|
|
1111
|
+
/** Number of stored memories. */
|
|
1112
|
+
get memoryCount() { return this.memories.length; }
|
|
1113
|
+
/** All unique labels the agent has learned. */
|
|
1114
|
+
get knownLabels() {
|
|
1115
|
+
const labels = new Set();
|
|
1116
|
+
for (const m of this.memories) {
|
|
1117
|
+
if (m.label)
|
|
1118
|
+
labels.add(m.label);
|
|
1119
|
+
}
|
|
1120
|
+
return Array.from(labels);
|
|
1121
|
+
}
|
|
1122
|
+
/** Clear all memories. */
|
|
1123
|
+
reset() {
|
|
1124
|
+
this.memories = [];
|
|
1125
|
+
this.temporalPatterns.clear();
|
|
1126
|
+
this.recentActions = [];
|
|
1127
|
+
this.sequenceCounter = 0;
|
|
1128
|
+
this.gridDirty = true;
|
|
1129
|
+
}
|
|
1130
|
+
/** Set the app context (filters memories by app). */
|
|
1131
|
+
setContext(appContext) {
|
|
1132
|
+
this.appContext = appContext;
|
|
1133
|
+
this.gridDirty = true;
|
|
1134
|
+
}
|
|
1135
|
+
// ═══ INTERNALS ════════════════════════════════════════════════════════
|
|
1136
|
+
refreshGrid() {
|
|
1137
|
+
const now = Date.now();
|
|
1138
|
+
if (!this.gridDirty && now - this.lastGridTime < 200)
|
|
1139
|
+
return;
|
|
1140
|
+
const cap = native.screenCapture();
|
|
1141
|
+
this.lastCapture = cap;
|
|
1142
|
+
const cellW = Math.floor(cap.width / this.cols);
|
|
1143
|
+
const cellH = Math.floor(cap.height / this.rows);
|
|
1144
|
+
this.gridFingerprints = [];
|
|
1145
|
+
for (let row = 0; row < this.rows; row++) {
|
|
1146
|
+
for (let col = 0; col < this.cols; col++) {
|
|
1147
|
+
this.gridFingerprints.push(computeFingerprint(cap.data, cap.width, cap.height, col * cellW, row * cellH, cellW, cellH));
|
|
1148
|
+
}
|
|
1149
|
+
}
|
|
1150
|
+
this.gridDirty = false;
|
|
1151
|
+
this.lastGridTime = now;
|
|
1152
|
+
}
|
|
1153
|
+
trackAction(action, label, x, y) {
|
|
1154
|
+
this.recentActions.push({ action, label, time: Date.now(), pos: { x, y } });
|
|
1155
|
+
// Keep only last 30s
|
|
1156
|
+
const cutoff = Date.now() - 30000;
|
|
1157
|
+
this.recentActions = this.recentActions.filter(a => a.time > cutoff);
|
|
1158
|
+
// Learn temporal patterns from consecutive actions
|
|
1159
|
+
for (let i = 1; i < this.recentActions.length; i++) {
|
|
1160
|
+
const prev = this.recentActions[i - 1];
|
|
1161
|
+
const curr = this.recentActions[i];
|
|
1162
|
+
const key = `${prev.action}:${prev.label || ''}→${curr.action}:${curr.label || ''}`;
|
|
1163
|
+
const delay = curr.time - prev.time;
|
|
1164
|
+
const existing = this.temporalPatterns.get(key);
|
|
1165
|
+
if (existing) {
|
|
1166
|
+
existing.avgDelayMs = (existing.avgDelayMs * existing.count + delay) / (existing.count + 1);
|
|
1167
|
+
existing.count++;
|
|
1168
|
+
existing.predictedRegion = { x: curr.pos.x - 50, y: curr.pos.y - 50, w: 100, h: 100 };
|
|
1169
|
+
}
|
|
1170
|
+
else {
|
|
1171
|
+
this.temporalPatterns.set(key, {
|
|
1172
|
+
avgDelayMs: delay,
|
|
1173
|
+
count: 1,
|
|
1174
|
+
predictedRegion: { x: curr.pos.x - 50, y: curr.pos.y - 50, w: 100, h: 100 },
|
|
1175
|
+
});
|
|
1176
|
+
}
|
|
1177
|
+
}
|
|
1178
|
+
}
|
|
1179
|
+
evictOldMemories() {
|
|
1180
|
+
while (this.memories.length > this.maxMemories) {
|
|
1181
|
+
// Remove oldest non-labeled memory first, then oldest overall
|
|
1182
|
+
const unlabeledIdx = this.memories.findIndex(m => !m.label);
|
|
1183
|
+
if (unlabeledIdx >= 0) {
|
|
1184
|
+
this.memories.splice(unlabeledIdx, 1);
|
|
1185
|
+
}
|
|
1186
|
+
else {
|
|
1187
|
+
this.memories.shift();
|
|
1188
|
+
}
|
|
1189
|
+
}
|
|
1190
|
+
}
|
|
1191
|
+
}
|
|
1192
|
+
exports.AgentVision = AgentVision;
|
|
1193
|
+
/** Create a new AgentVision instance. */
|
|
1194
|
+
function createAgentVision(config) {
|
|
1195
|
+
return new AgentVision(config);
|
|
1196
|
+
}
|
|
1197
|
+
//# sourceMappingURL=vision.js.map
|