stelo 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +184 -0
- package/README.md +853 -0
- package/dist/accessibility.d.ts +227 -0
- package/dist/accessibility.d.ts.map +1 -0
- package/dist/accessibility.js +602 -0
- package/dist/accessibility.js.map +1 -0
- package/dist/agent.d.ts +870 -0
- package/dist/agent.d.ts.map +1 -0
- package/dist/agent.js +1107 -0
- package/dist/agent.js.map +1 -0
- package/dist/audio-stream.d.ts +114 -0
- package/dist/audio-stream.d.ts.map +1 -0
- package/dist/audio-stream.js +167 -0
- package/dist/audio-stream.js.map +1 -0
- package/dist/clipboard.d.ts +99 -0
- package/dist/clipboard.d.ts.map +1 -0
- package/dist/clipboard.js +352 -0
- package/dist/clipboard.js.map +1 -0
- package/dist/config.d.ts +183 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +477 -0
- package/dist/config.js.map +1 -0
- package/dist/context.d.ts +213 -0
- package/dist/context.d.ts.map +1 -0
- package/dist/context.js +387 -0
- package/dist/context.js.map +1 -0
- package/dist/cortex.d.ts +548 -0
- package/dist/cortex.d.ts.map +1 -0
- package/dist/cortex.js +1479 -0
- package/dist/cortex.js.map +1 -0
- package/dist/errors.d.ts +133 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +278 -0
- package/dist/errors.js.map +1 -0
- package/dist/events.d.ts +227 -0
- package/dist/events.d.ts.map +1 -0
- package/dist/events.js +429 -0
- package/dist/events.js.map +1 -0
- package/dist/executor.d.ts +212 -0
- package/dist/executor.d.ts.map +1 -0
- package/dist/executor.js +545 -0
- package/dist/executor.js.map +1 -0
- package/dist/index.d.ts +69 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +167 -0
- package/dist/index.js.map +1 -0
- package/dist/integration.d.ts +159 -0
- package/dist/integration.d.ts.map +1 -0
- package/dist/integration.js +533 -0
- package/dist/integration.js.map +1 -0
- package/dist/keyboard.d.ts +276 -0
- package/dist/keyboard.d.ts.map +1 -0
- package/dist/keyboard.js +404 -0
- package/dist/keyboard.js.map +1 -0
- package/dist/logger.d.ts +198 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +516 -0
- package/dist/logger.js.map +1 -0
- package/dist/middleware.d.ts +183 -0
- package/dist/middleware.d.ts.map +1 -0
- package/dist/middleware.js +493 -0
- package/dist/middleware.js.map +1 -0
- package/dist/monitor.d.ts +136 -0
- package/dist/monitor.d.ts.map +1 -0
- package/dist/monitor.js +341 -0
- package/dist/monitor.js.map +1 -0
- package/dist/mouse.d.ts +290 -0
- package/dist/mouse.d.ts.map +1 -0
- package/dist/mouse.js +466 -0
- package/dist/mouse.js.map +1 -0
- package/dist/plugin.d.ts +157 -0
- package/dist/plugin.d.ts.map +1 -0
- package/dist/plugin.js +409 -0
- package/dist/plugin.js.map +1 -0
- package/dist/process.d.ts +106 -0
- package/dist/process.d.ts.map +1 -0
- package/dist/process.js +326 -0
- package/dist/process.js.map +1 -0
- package/dist/recorder.d.ts +100 -0
- package/dist/recorder.d.ts.map +1 -0
- package/dist/recorder.js +258 -0
- package/dist/recorder.js.map +1 -0
- package/dist/safety.d.ts +59 -0
- package/dist/safety.d.ts.map +1 -0
- package/dist/safety.js +98 -0
- package/dist/safety.js.map +1 -0
- package/dist/scheduler.d.ts +152 -0
- package/dist/scheduler.d.ts.map +1 -0
- package/dist/scheduler.js +615 -0
- package/dist/scheduler.js.map +1 -0
- package/dist/screen.d.ts +96 -0
- package/dist/screen.d.ts.map +1 -0
- package/dist/screen.js +154 -0
- package/dist/screen.js.map +1 -0
- package/dist/session.d.ts +209 -0
- package/dist/session.d.ts.map +1 -0
- package/dist/session.js +479 -0
- package/dist/session.js.map +1 -0
- package/dist/stream.d.ts +168 -0
- package/dist/stream.d.ts.map +1 -0
- package/dist/stream.js +298 -0
- package/dist/stream.js.map +1 -0
- package/dist/telemetry.d.ts +223 -0
- package/dist/telemetry.d.ts.map +1 -0
- package/dist/telemetry.js +433 -0
- package/dist/telemetry.js.map +1 -0
- package/dist/types.d.ts +165 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +8 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/bezier.d.ts +51 -0
- package/dist/utils/bezier.d.ts.map +1 -0
- package/dist/utils/bezier.js +117 -0
- package/dist/utils/bezier.js.map +1 -0
- package/dist/utils/helpers.d.ts +90 -0
- package/dist/utils/helpers.d.ts.map +1 -0
- package/dist/utils/helpers.js +143 -0
- package/dist/utils/helpers.js.map +1 -0
- package/dist/utils/index.d.ts +4 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +18 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/validation.d.ts +254 -0
- package/dist/validation.d.ts.map +1 -0
- package/dist/validation.js +478 -0
- package/dist/validation.js.map +1 -0
- package/dist/vision.d.ts +719 -0
- package/dist/vision.d.ts.map +1 -0
- package/dist/vision.js +1197 -0
- package/dist/vision.js.map +1 -0
- package/dist/window.d.ts +80 -0
- package/dist/window.d.ts.map +1 -0
- package/dist/window.js +170 -0
- package/dist/window.js.map +1 -0
- package/dist/workflow.d.ts +224 -0
- package/dist/workflow.d.ts.map +1 -0
- package/dist/workflow.js +578 -0
- package/dist/workflow.js.map +1 -0
- package/index.d.ts +840 -0
- package/index.js +495 -0
- package/package.json +91 -0
package/dist/vision.d.ts
ADDED
|
@@ -0,0 +1,719 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Result of comparing two screen captures
|
|
3
|
+
*/
|
|
4
|
+
export interface ScreenDiff {
|
|
5
|
+
/** Percentage of pixels that changed (0.0 - 100.0) */
|
|
6
|
+
changePercentage: number;
|
|
7
|
+
/** Total number of pixels that changed */
|
|
8
|
+
changedPixelCount: number;
|
|
9
|
+
/** Total number of pixels compared */
|
|
10
|
+
totalPixelCount: number;
|
|
11
|
+
/** Bounding box of changed region (if any) */
|
|
12
|
+
changedBounds?: {
|
|
13
|
+
x: number;
|
|
14
|
+
y: number;
|
|
15
|
+
width: number;
|
|
16
|
+
height: number;
|
|
17
|
+
};
|
|
18
|
+
/** Average color difference across all pixels */
|
|
19
|
+
averageDiff: number;
|
|
20
|
+
/** Maximum single-pixel color difference */
|
|
21
|
+
maxDiff: number;
|
|
22
|
+
/** True if any change was detected */
|
|
23
|
+
hasChanges: boolean;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Grid cell analysis result
|
|
27
|
+
*/
|
|
28
|
+
export interface GridCell {
|
|
29
|
+
/** Cell position in grid (0-indexed) */
|
|
30
|
+
gridX: number;
|
|
31
|
+
gridY: number;
|
|
32
|
+
/** Screen coordinates of cell */
|
|
33
|
+
screenX: number;
|
|
34
|
+
screenY: number;
|
|
35
|
+
/** Cell dimensions */
|
|
36
|
+
width: number;
|
|
37
|
+
height: number;
|
|
38
|
+
/** Average color of the cell */
|
|
39
|
+
avgColor: {
|
|
40
|
+
r: number;
|
|
41
|
+
g: number;
|
|
42
|
+
b: number;
|
|
43
|
+
};
|
|
44
|
+
/** Color variance (higher = more complex content) */
|
|
45
|
+
variance: number;
|
|
46
|
+
/** Whether this cell likely contains text (high contrast) */
|
|
47
|
+
likelyText: boolean;
|
|
48
|
+
/** Whether this cell likely contains UI elements */
|
|
49
|
+
likelyUI: boolean;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Grid analysis result
|
|
53
|
+
*/
|
|
54
|
+
export interface GridAnalysis {
|
|
55
|
+
/** Grid dimensions */
|
|
56
|
+
cols: number;
|
|
57
|
+
rows: number;
|
|
58
|
+
/** Cell size in pixels */
|
|
59
|
+
cellWidth: number;
|
|
60
|
+
cellHeight: number;
|
|
61
|
+
/** All grid cells */
|
|
62
|
+
cells: GridCell[];
|
|
63
|
+
/** Cells with high activity (likely UI elements) */
|
|
64
|
+
activeCells: Array<{
|
|
65
|
+
gridX: number;
|
|
66
|
+
gridY: number;
|
|
67
|
+
}>;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Region on screen
|
|
71
|
+
*/
|
|
72
|
+
export interface Region {
|
|
73
|
+
x: number;
|
|
74
|
+
y: number;
|
|
75
|
+
width: number;
|
|
76
|
+
height: number;
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Reference capture for diff operations
|
|
80
|
+
*/
|
|
81
|
+
export interface CaptureReference {
|
|
82
|
+
data: Buffer;
|
|
83
|
+
width: number;
|
|
84
|
+
height: number;
|
|
85
|
+
x: number;
|
|
86
|
+
y: number;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Options for diff operations
|
|
90
|
+
*/
|
|
91
|
+
export interface DiffOptions {
|
|
92
|
+
/** Minimum color distance to consider a difference (0-441). Default: 10 */
|
|
93
|
+
tolerance?: number;
|
|
94
|
+
/** Only check every Nth pixel (1 = all). Higher = faster but less accurate */
|
|
95
|
+
sampleRate?: number;
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Options for waiting operations
|
|
99
|
+
*/
|
|
100
|
+
export interface WaitOptions {
|
|
101
|
+
/** Region to monitor (full screen if not specified) */
|
|
102
|
+
region?: Region;
|
|
103
|
+
/** Polling interval in ms. Default: 50 */
|
|
104
|
+
pollIntervalMs?: number;
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Vision and change detection utilities for automation workflows.
|
|
108
|
+
*
|
|
109
|
+
* These primitives enable visual grounding, action verification, and
|
|
110
|
+
* state tracking - essential building blocks for robust automation flows.
|
|
111
|
+
*
|
|
112
|
+
* @example
|
|
113
|
+
* ```typescript
|
|
114
|
+
* import { vision, screen } from 'stelo';
|
|
115
|
+
*
|
|
116
|
+
* // Take a reference screenshot
|
|
117
|
+
* const before = vision.captureReference();
|
|
118
|
+
*
|
|
119
|
+
* // Perform some action
|
|
120
|
+
* await mouse.click();
|
|
121
|
+
*
|
|
122
|
+
* // Verify the screen changed
|
|
123
|
+
* const diff = vision.diff(before);
|
|
124
|
+
* console.log(`${diff.changePercentage}% of screen changed`);
|
|
125
|
+
*
|
|
126
|
+
* // Wait for UI to stabilize after action
|
|
127
|
+
* await vision.waitForStable({ stabilityThreshold: 0.5 });
|
|
128
|
+
*
|
|
129
|
+
* // Analyze screen as a grid for vision model
|
|
130
|
+
* const grid = vision.analyzeGrid(16, 9);
|
|
131
|
+
* const textCells = grid.cells.filter(c => c.likelyText);
|
|
132
|
+
* ```
|
|
133
|
+
*/
|
|
134
|
+
export declare const vision: {
|
|
135
|
+
/**
|
|
136
|
+
* Capture a reference screenshot for later comparison.
|
|
137
|
+
* Use this before triggering an action to verify it had an effect.
|
|
138
|
+
*
|
|
139
|
+
* @param region - Optional region to capture (full screen if not specified)
|
|
140
|
+
* @returns Reference object to pass to diff()
|
|
141
|
+
*
|
|
142
|
+
* @example
|
|
143
|
+
* ```typescript
|
|
144
|
+
* const before = vision.captureReference();
|
|
145
|
+
* await mouse.click();
|
|
146
|
+
* const diff = vision.diff(before);
|
|
147
|
+
* ```
|
|
148
|
+
*/
|
|
149
|
+
captureReference(region?: Region): CaptureReference;
|
|
150
|
+
/**
|
|
151
|
+
* Compare current screen to a reference capture.
|
|
152
|
+
* Returns detailed diff statistics including change percentage and bounds.
|
|
153
|
+
*
|
|
154
|
+
* @param reference - Reference from captureReference()
|
|
155
|
+
* @param options - Diff options (tolerance, sample rate)
|
|
156
|
+
* @returns Diff result with change statistics
|
|
157
|
+
*
|
|
158
|
+
* @example
|
|
159
|
+
* ```typescript
|
|
160
|
+
* const diff = vision.diff(before, { tolerance: 15 });
|
|
161
|
+
* if (diff.changePercentage > 1) {
|
|
162
|
+
* console.log('Screen changed!', diff.changedBounds);
|
|
163
|
+
* }
|
|
164
|
+
* ```
|
|
165
|
+
*/
|
|
166
|
+
diff(reference: CaptureReference, options?: DiffOptions): ScreenDiff;
|
|
167
|
+
/**
|
|
168
|
+
* Analyze screen as a grid of cells.
|
|
169
|
+
* Each cell includes statistics useful for vision model region selection.
|
|
170
|
+
*
|
|
171
|
+
* This enables efficient visual grounding - instead of sending the entire
|
|
172
|
+
* screen to a vision model, you can identify regions of interest first.
|
|
173
|
+
*
|
|
174
|
+
* @param cols - Number of columns in grid
|
|
175
|
+
* @param rows - Number of rows in grid
|
|
176
|
+
* @param region - Optional region to analyze (full screen if not specified)
|
|
177
|
+
* @returns Grid analysis with cell statistics
|
|
178
|
+
*
|
|
179
|
+
* @example
|
|
180
|
+
* ```typescript
|
|
181
|
+
* // Analyze screen as 16x9 grid
|
|
182
|
+
* const grid = vision.analyzeGrid(16, 9);
|
|
183
|
+
*
|
|
184
|
+
* // Find cells likely containing text
|
|
185
|
+
* const textCells = grid.cells.filter(c => c.likelyText);
|
|
186
|
+
*
|
|
187
|
+
* // Get center of cell [3, 2]
|
|
188
|
+
* const center = vision.gridCellCenter(grid, 3, 2);
|
|
189
|
+
* await mouse.click(center.x, center.y);
|
|
190
|
+
* ```
|
|
191
|
+
*/
|
|
192
|
+
analyzeGrid(cols: number, rows: number, region?: Region): GridAnalysis;
|
|
193
|
+
/**
|
|
194
|
+
* Get the screen center point of a grid cell.
|
|
195
|
+
*
|
|
196
|
+
* @param grid - Grid analysis from analyzeGrid()
|
|
197
|
+
* @param gridX - Cell column index
|
|
198
|
+
* @param gridY - Cell row index
|
|
199
|
+
* @returns Screen coordinates of cell center, or undefined if out of bounds
|
|
200
|
+
*/
|
|
201
|
+
gridCellCenter(grid: GridAnalysis, gridX: number, gridY: number): {
|
|
202
|
+
x: number;
|
|
203
|
+
y: number;
|
|
204
|
+
} | undefined;
|
|
205
|
+
/**
|
|
206
|
+
* Wait until the screen changes beyond a threshold.
|
|
207
|
+
* Useful for detecting when an action triggers a visual response.
|
|
208
|
+
*
|
|
209
|
+
* @param thresholdPercent - Minimum change percentage to trigger (0-100)
|
|
210
|
+
* @param timeoutMs - Maximum time to wait
|
|
211
|
+
* @param options - Wait options (region, poll interval)
|
|
212
|
+
* @returns true if change detected, false if timed out
|
|
213
|
+
*
|
|
214
|
+
* @example
|
|
215
|
+
* ```typescript
|
|
216
|
+
* // Click and wait for visual feedback
|
|
217
|
+
* mouse.click();
|
|
218
|
+
* const changed = await vision.waitForChange(0.5, 3000);
|
|
219
|
+
* if (!changed) console.log('Button may not have responded');
|
|
220
|
+
* ```
|
|
221
|
+
*/
|
|
222
|
+
waitForChange(thresholdPercent: number, timeoutMs: number, options?: WaitOptions): Promise<boolean>;
|
|
223
|
+
/**
|
|
224
|
+
* Wait until the screen stabilizes (stops changing).
|
|
225
|
+
* Essential for waiting for animations, loading spinners, or transitions.
|
|
226
|
+
*
|
|
227
|
+
* @param stabilityThreshold - Maximum change % to consider "stable" (0-100)
|
|
228
|
+
* @param stableDurationMs - How long screen must remain stable
|
|
229
|
+
* @param timeoutMs - Maximum time to wait
|
|
230
|
+
* @param options - Wait options (region, poll interval)
|
|
231
|
+
* @returns true if stabilized, false if timed out
|
|
232
|
+
*
|
|
233
|
+
* @example
|
|
234
|
+
* ```typescript
|
|
235
|
+
* // Click a button and wait for animation to complete
|
|
236
|
+
* mouse.click();
|
|
237
|
+
* await vision.waitForStable(0.1, 200, 5000);
|
|
238
|
+
* // Screen is now stable - safe to read or continue
|
|
239
|
+
* ```
|
|
240
|
+
*/
|
|
241
|
+
waitForStable(stabilityThreshold: number, stableDurationMs: number, timeoutMs: number, options?: WaitOptions): Promise<boolean>;
|
|
242
|
+
/**
|
|
243
|
+
* Compute a perceptual hash of a screen region.
|
|
244
|
+
* Two visually similar images will have hashes with low Hamming distance.
|
|
245
|
+
*
|
|
246
|
+
* Use this for fast "has the screen changed significantly?" checks
|
|
247
|
+
* without doing full pixel comparison.
|
|
248
|
+
*
|
|
249
|
+
* @param region - Optional region to hash (full screen if not specified)
|
|
250
|
+
* @returns 64-bit perceptual hash
|
|
251
|
+
*
|
|
252
|
+
* @example
|
|
253
|
+
* ```typescript
|
|
254
|
+
* const hash1 = vision.perceptualHash();
|
|
255
|
+
* await performSomeAction();
|
|
256
|
+
* const hash2 = vision.perceptualHash();
|
|
257
|
+
* const distance = vision.hashDistance(hash1, hash2);
|
|
258
|
+
* if (distance < 5) console.log('Screen looks similar');
|
|
259
|
+
* ```
|
|
260
|
+
*/
|
|
261
|
+
perceptualHash(region?: Region): number;
|
|
262
|
+
/**
|
|
263
|
+
* Compute Hamming distance between two perceptual hashes.
|
|
264
|
+
* Lower distance = more visually similar. 0 = identical.
|
|
265
|
+
*
|
|
266
|
+
* Rules of thumb:
|
|
267
|
+
* - 0-5: Very similar (minor changes)
|
|
268
|
+
* - 5-10: Moderately similar
|
|
269
|
+
* - 10-20: Significant differences
|
|
270
|
+
* - 20+: Completely different
|
|
271
|
+
*
|
|
272
|
+
* @param hash1 - First perceptual hash
|
|
273
|
+
* @param hash2 - Second perceptual hash
|
|
274
|
+
* @returns Hamming distance (0-64)
|
|
275
|
+
*/
|
|
276
|
+
hashDistance(hash1: number, hash2: number): number;
|
|
277
|
+
/**
|
|
278
|
+
* Find all pixels matching a color within a region.
|
|
279
|
+
* Returns all matching points up to a maximum count.
|
|
280
|
+
*
|
|
281
|
+
* @param color - Color to search for (RGB)
|
|
282
|
+
* @param tolerance - Color distance tolerance (0-441)
|
|
283
|
+
* @param options - Search options
|
|
284
|
+
* @returns Array of matching screen coordinates
|
|
285
|
+
*
|
|
286
|
+
* @example
|
|
287
|
+
* ```typescript
|
|
288
|
+
* // Find all red pixels
|
|
289
|
+
* const redPixels = vision.findAllColors(
|
|
290
|
+
* { r: 255, g: 0, b: 0 },
|
|
291
|
+
* 30,
|
|
292
|
+
* { maxResults: 100 }
|
|
293
|
+
* );
|
|
294
|
+
* ```
|
|
295
|
+
*/
|
|
296
|
+
findAllColors(color: {
|
|
297
|
+
r: number;
|
|
298
|
+
g: number;
|
|
299
|
+
b: number;
|
|
300
|
+
}, tolerance: number, options?: {
|
|
301
|
+
maxResults?: number;
|
|
302
|
+
region?: Region;
|
|
303
|
+
}): Array<{
|
|
304
|
+
x: number;
|
|
305
|
+
y: number;
|
|
306
|
+
}>;
|
|
307
|
+
/**
|
|
308
|
+
* Find clusters of similar colors (potential UI elements).
|
|
309
|
+
* Clusters are groups of nearby pixels with similar colors.
|
|
310
|
+
*
|
|
311
|
+
* @param color - Color to search for (RGB)
|
|
312
|
+
* @param tolerance - Color distance tolerance (0-441)
|
|
313
|
+
* @param minClusterSize - Minimum pixels to form a cluster
|
|
314
|
+
* @param region - Optional region to search (full screen if not specified)
|
|
315
|
+
* @returns Array of bounding rectangles for each cluster
|
|
316
|
+
*
|
|
317
|
+
* @example
|
|
318
|
+
* ```typescript
|
|
319
|
+
* // Find blue button-like regions
|
|
320
|
+
* const clusters = vision.findColorClusters(
|
|
321
|
+
* { r: 0, g: 120, b: 215 }, // Windows blue
|
|
322
|
+
* 40,
|
|
323
|
+
* 50 // At least 50 pixels
|
|
324
|
+
* );
|
|
325
|
+
* if (clusters.length > 0) {
|
|
326
|
+
* // Click center of first cluster
|
|
327
|
+
* const btn = clusters[0];
|
|
328
|
+
* mouse.click(btn.x + btn.width / 2, btn.y + btn.height / 2);
|
|
329
|
+
* }
|
|
330
|
+
* ```
|
|
331
|
+
*/
|
|
332
|
+
findColorClusters(color: {
|
|
333
|
+
r: number;
|
|
334
|
+
g: number;
|
|
335
|
+
b: number;
|
|
336
|
+
}, tolerance: number, minClusterSize: number, region?: Region): Region[];
|
|
337
|
+
/**
|
|
338
|
+
* Verify an action caused a visual change.
|
|
339
|
+
* High-level primitive that captures before/after and compares.
|
|
340
|
+
*
|
|
341
|
+
* @param action - Async action to execute and verify
|
|
342
|
+
* @param minChangePercent - Minimum change to consider verified
|
|
343
|
+
* @param timeoutMs - Maximum time to wait for change
|
|
344
|
+
* @param region - Optional region to monitor
|
|
345
|
+
* @returns Verification result with diff statistics
|
|
346
|
+
*
|
|
347
|
+
* @example
|
|
348
|
+
* ```typescript
|
|
349
|
+
* const result = await vision.verifyAction(
|
|
350
|
+
* async () => { mouse.click(); },
|
|
351
|
+
* 0.5, // At least 0.5% change
|
|
352
|
+
* 2000
|
|
353
|
+
* );
|
|
354
|
+
* if (!result.verified) {
|
|
355
|
+
* // Click didn't cause visual change - might need to retry
|
|
356
|
+
* }
|
|
357
|
+
* ```
|
|
358
|
+
*/
|
|
359
|
+
verifyAction(action: () => Promise<void> | void, minChangePercent: number, timeoutMs: number, region?: Region): Promise<{
|
|
360
|
+
verified: boolean;
|
|
361
|
+
diff: ScreenDiff;
|
|
362
|
+
durationMs: number;
|
|
363
|
+
}>;
|
|
364
|
+
/**
|
|
365
|
+
* Take a reference, perform action, wait for stability.
|
|
366
|
+
* Combines action execution with waiting for the UI to settle.
|
|
367
|
+
*
|
|
368
|
+
* @param action - Action to execute
|
|
369
|
+
* @param options - Wait and verification options
|
|
370
|
+
* @returns true if action completed and screen stabilized
|
|
371
|
+
*
|
|
372
|
+
* @example
|
|
373
|
+
* ```typescript
|
|
374
|
+
* // Click and wait for any animation to complete
|
|
375
|
+
* await vision.doAndWaitStable(async () => {
|
|
376
|
+
* await mouse.click(100, 200);
|
|
377
|
+
* });
|
|
378
|
+
* // Screen is now stable
|
|
379
|
+
* ```
|
|
380
|
+
*/
|
|
381
|
+
doAndWaitStable(action: () => Promise<void> | void, options?: {
|
|
382
|
+
stabilityThreshold?: number;
|
|
383
|
+
stableDurationMs?: number;
|
|
384
|
+
timeoutMs?: number;
|
|
385
|
+
region?: Region;
|
|
386
|
+
}): Promise<boolean>;
|
|
387
|
+
};
|
|
388
|
+
/** Compact visual descriptor for a screen region. */
|
|
389
|
+
export interface VisualFingerprint {
|
|
390
|
+
brightnessHist: Float64Array;
|
|
391
|
+
hueHist: Float64Array;
|
|
392
|
+
edgeDensity: number;
|
|
393
|
+
avgBrightness: number;
|
|
394
|
+
contrast: number;
|
|
395
|
+
variance: number;
|
|
396
|
+
aspect: number;
|
|
397
|
+
phash: number;
|
|
398
|
+
}
|
|
399
|
+
/** A remembered interaction with a screen element. */
|
|
400
|
+
export interface VisualMemory {
|
|
401
|
+
fingerprint: VisualFingerprint;
|
|
402
|
+
action: string;
|
|
403
|
+
label?: string;
|
|
404
|
+
position: {
|
|
405
|
+
x: number;
|
|
406
|
+
y: number;
|
|
407
|
+
};
|
|
408
|
+
regionSize: {
|
|
409
|
+
w: number;
|
|
410
|
+
h: number;
|
|
411
|
+
};
|
|
412
|
+
timestamp: number;
|
|
413
|
+
outcome?: {
|
|
414
|
+
screenChangePercent: number;
|
|
415
|
+
responseTimeMs: number;
|
|
416
|
+
};
|
|
417
|
+
success?: boolean;
|
|
418
|
+
appContext?: string;
|
|
419
|
+
sequenceIndex: number;
|
|
420
|
+
}
|
|
421
|
+
/** A located element on screen. */
|
|
422
|
+
export interface LocatedElement {
|
|
423
|
+
x: number;
|
|
424
|
+
y: number;
|
|
425
|
+
w: number;
|
|
426
|
+
h: number;
|
|
427
|
+
confidence: number;
|
|
428
|
+
similarity: number;
|
|
429
|
+
label?: string;
|
|
430
|
+
supportCount: number;
|
|
431
|
+
fingerprint: VisualFingerprint;
|
|
432
|
+
click(button?: string): void;
|
|
433
|
+
doubleClick(): void;
|
|
434
|
+
rightClick(): void;
|
|
435
|
+
moveTo(options?: {
|
|
436
|
+
smooth?: boolean;
|
|
437
|
+
duration?: number;
|
|
438
|
+
}): void;
|
|
439
|
+
type(text: string): void;
|
|
440
|
+
/** Verify this element is still visually present */
|
|
441
|
+
isStillPresent(threshold?: number): boolean;
|
|
442
|
+
}
|
|
443
|
+
/** Temporal prediction result. */
|
|
444
|
+
export interface TemporalPrediction {
|
|
445
|
+
nextAction: string;
|
|
446
|
+
confidence: number;
|
|
447
|
+
expectedDelayMs: number;
|
|
448
|
+
predictedRegion?: {
|
|
449
|
+
x: number;
|
|
450
|
+
y: number;
|
|
451
|
+
w: number;
|
|
452
|
+
h: number;
|
|
453
|
+
};
|
|
454
|
+
label?: string;
|
|
455
|
+
}
|
|
456
|
+
/** Configuration for the AgentVision system. */
|
|
457
|
+
export interface AgentVisionConfig {
|
|
458
|
+
/** Grid resolution for screen scanning. Default: 32x18 */
|
|
459
|
+
cols?: number;
|
|
460
|
+
rows?: number;
|
|
461
|
+
/** Maximum memories to keep. Default: 2000 */
|
|
462
|
+
maxMemories?: number;
|
|
463
|
+
/** Fingerprint region around a point. Default: 100x60 */
|
|
464
|
+
defaultRegionSize?: {
|
|
465
|
+
w: number;
|
|
466
|
+
h: number;
|
|
467
|
+
};
|
|
468
|
+
/** Minimum similarity to consider a match. Default: 0.65 */
|
|
469
|
+
matchThreshold?: number;
|
|
470
|
+
/** How far to search around original position. Default: 400px */
|
|
471
|
+
searchRadius?: number;
|
|
472
|
+
/** Application context (e.g. "notepad", "chrome"). Helps filter memories. */
|
|
473
|
+
appContext?: string;
|
|
474
|
+
}
|
|
475
|
+
export declare class AgentVision {
|
|
476
|
+
private cols;
|
|
477
|
+
private rows;
|
|
478
|
+
private maxMemories;
|
|
479
|
+
private defaultRegionSize;
|
|
480
|
+
private matchThreshold;
|
|
481
|
+
private searchRadius;
|
|
482
|
+
private appContext?;
|
|
483
|
+
private memories;
|
|
484
|
+
private sequenceCounter;
|
|
485
|
+
private recentActions;
|
|
486
|
+
private temporalPatterns;
|
|
487
|
+
private gridFingerprints;
|
|
488
|
+
private gridDirty;
|
|
489
|
+
private lastGridTime;
|
|
490
|
+
private lastCapture;
|
|
491
|
+
constructor(config?: AgentVisionConfig);
|
|
492
|
+
/**
|
|
493
|
+
* Fingerprint a screen region. Returns a compact visual descriptor
|
|
494
|
+
* that can be compared against other fingerprints for similarity.
|
|
495
|
+
*
|
|
496
|
+
* @example
|
|
497
|
+
* ```typescript
|
|
498
|
+
* const fp = agentVision.fingerprint(100, 200, 150, 40);
|
|
499
|
+
* // Later: check if the same element is somewhere else
|
|
500
|
+
* const match = agentVision.locate(fp);
|
|
501
|
+
* ```
|
|
502
|
+
*/
|
|
503
|
+
fingerprint(x: number, y: number, w: number, h: number): VisualFingerprint;
|
|
504
|
+
/**
|
|
505
|
+
* Compute similarity between two fingerprints (0-1, 1 = identical).
|
|
506
|
+
*/
|
|
507
|
+
similarity(a: VisualFingerprint, b: VisualFingerprint): number;
|
|
508
|
+
/**
|
|
509
|
+
* Remember an element at a position with a label.
|
|
510
|
+
* The agent fingerprints the region and stores it for later recognition.
|
|
511
|
+
*
|
|
512
|
+
* @example
|
|
513
|
+
* ```typescript
|
|
514
|
+
* // Agent clicked Save, now remembers what it looks like
|
|
515
|
+
* agentVision.remember('save-button', 350, 15, { w: 80, h: 30 });
|
|
516
|
+
*
|
|
517
|
+
* // Later, find it again even if it moved
|
|
518
|
+
* const saveBtn = agentVision.find('save-button');
|
|
519
|
+
* if (saveBtn) saveBtn.click();
|
|
520
|
+
* ```
|
|
521
|
+
*/
|
|
522
|
+
remember(label: string, x: number, y: number, regionSize?: {
|
|
523
|
+
w: number;
|
|
524
|
+
h: number;
|
|
525
|
+
}, action?: string): VisualMemory;
|
|
526
|
+
/**
|
|
527
|
+
* Remember after performing a click — records the action AND verifies it.
|
|
528
|
+
* Captures before/after fingerprints and records whether the click did anything.
|
|
529
|
+
*
|
|
530
|
+
* @example
|
|
531
|
+
* ```typescript
|
|
532
|
+
* const result = await agentVision.rememberClick('file-menu', 44, 12);
|
|
533
|
+
* console.log(`Click ${result.success ? 'worked' : 'had no effect'}`);
|
|
534
|
+
* ```
|
|
535
|
+
*/
|
|
536
|
+
rememberClick(label: string, x: number, y: number, regionSize?: {
|
|
537
|
+
w: number;
|
|
538
|
+
h: number;
|
|
539
|
+
}): Promise<VisualMemory & {
|
|
540
|
+
success: boolean;
|
|
541
|
+
}>;
|
|
542
|
+
/**
|
|
543
|
+
* Find a previously remembered element on the current screen.
|
|
544
|
+
* Uses visual fingerprint matching — works even if the element moved.
|
|
545
|
+
*
|
|
546
|
+
* @returns The best match, or null if nothing above threshold
|
|
547
|
+
*
|
|
548
|
+
* @example
|
|
549
|
+
* ```typescript
|
|
550
|
+
* const saveBtn = agentVision.find('save-button');
|
|
551
|
+
* if (saveBtn) {
|
|
552
|
+
* saveBtn.click();
|
|
553
|
+
* } else {
|
|
554
|
+
* console.log('Save button not visible');
|
|
555
|
+
* }
|
|
556
|
+
* ```
|
|
557
|
+
*/
|
|
558
|
+
find(label: string): LocatedElement | null;
|
|
559
|
+
/**
|
|
560
|
+
* Find all instances of a remembered element on screen.
|
|
561
|
+
*
|
|
562
|
+
* @example
|
|
563
|
+
* ```typescript
|
|
564
|
+
* // Find all things that look like "close-button"
|
|
565
|
+
* const closeButtons = agentVision.findAll('close-button');
|
|
566
|
+
* console.log(`Found ${closeButtons.length} close buttons`);
|
|
567
|
+
* ```
|
|
568
|
+
*/
|
|
569
|
+
findAll(label: string, maxResults?: number): LocatedElement[];
|
|
570
|
+
/**
|
|
571
|
+
* Locate a specific visual fingerprint on the current screen.
|
|
572
|
+
* Does a focused scan: first near the expected position, then widens.
|
|
573
|
+
*
|
|
574
|
+
* @example
|
|
575
|
+
* ```typescript
|
|
576
|
+
* const fp = agentVision.fingerprint(100, 200, 80, 30);
|
|
577
|
+
* // ... some time later, UI may have reorganized ...
|
|
578
|
+
* const found = agentVision.locate(fp, { near: { x: 100, y: 200 } });
|
|
579
|
+
* if (found) found.click();
|
|
580
|
+
* ```
|
|
581
|
+
*/
|
|
582
|
+
locate(target: VisualFingerprint, options?: {
|
|
583
|
+
near?: {
|
|
584
|
+
x: number;
|
|
585
|
+
y: number;
|
|
586
|
+
};
|
|
587
|
+
threshold?: number;
|
|
588
|
+
searchRadius?: number;
|
|
589
|
+
}): LocatedElement | null;
|
|
590
|
+
/**
|
|
591
|
+
* Find an element by text using OCR, then fingerprint it for future recognition.
|
|
592
|
+
* First time: uses OCR. After that: can find it visually even without OCR.
|
|
593
|
+
*
|
|
594
|
+
* @example
|
|
595
|
+
* ```typescript
|
|
596
|
+
* // First call uses OCR to find "Save" text
|
|
597
|
+
* const save = agentVision.findByText('Save');
|
|
598
|
+
* if (save) save.click();
|
|
599
|
+
* // Now agentVision remembers what "Save" looks like visually
|
|
600
|
+
* ```
|
|
601
|
+
*/
|
|
602
|
+
findByText(text: string, options?: {
|
|
603
|
+
remember?: boolean;
|
|
604
|
+
}): LocatedElement | null;
|
|
605
|
+
/**
|
|
606
|
+
* Click an element and verify the screen changed.
|
|
607
|
+
* Returns the located element with success/failure status.
|
|
608
|
+
*
|
|
609
|
+
* @example
|
|
610
|
+
* ```typescript
|
|
611
|
+
* const result = await agentVision.clickAndVerify('submit-button');
|
|
612
|
+
* if (!result.verified) {
|
|
613
|
+
* // Button didn't respond — try again or escalate
|
|
614
|
+
* }
|
|
615
|
+
* ```
|
|
616
|
+
*/
|
|
617
|
+
clickAndVerify(label: string, options?: {
|
|
618
|
+
timeout?: number;
|
|
619
|
+
minChange?: number;
|
|
620
|
+
}): Promise<{
|
|
621
|
+
element: LocatedElement | null;
|
|
622
|
+
verified: boolean;
|
|
623
|
+
changePercent: number;
|
|
624
|
+
}>;
|
|
625
|
+
/**
|
|
626
|
+
* Check if a specific screen region visually changed since a fingerprint was taken.
|
|
627
|
+
*
|
|
628
|
+
* @example
|
|
629
|
+
* ```typescript
|
|
630
|
+
* const before = agentVision.fingerprint(100, 200, 80, 30);
|
|
631
|
+
* await agent.doSomething();
|
|
632
|
+
* const changed = agentVision.hasChanged(before, 100, 200, 80, 30);
|
|
633
|
+
* ```
|
|
634
|
+
*/
|
|
635
|
+
hasChanged(previousFingerprint: VisualFingerprint, x: number, y: number, w: number, h: number, changeThreshold?: number): boolean;
|
|
636
|
+
/**
|
|
637
|
+
* Predict what will happen next based on learned temporal patterns.
|
|
638
|
+
*
|
|
639
|
+
* @example
|
|
640
|
+
* ```typescript
|
|
641
|
+
* // After clicking "File" many times, the agent learns:
|
|
642
|
+
* // "After clicking file-menu, a dropdown appears"
|
|
643
|
+
* const next = agentVision.predictNext();
|
|
644
|
+
* if (next) {
|
|
645
|
+
* console.log(`Expected: ${next.nextAction} in ${next.expectedDelayMs}ms`);
|
|
646
|
+
* }
|
|
647
|
+
* ```
|
|
648
|
+
*/
|
|
649
|
+
predictNext(filterLabel?: string): TemporalPrediction | null;
|
|
650
|
+
/**
|
|
651
|
+
* Wait for a predicted event to happen.
|
|
652
|
+
* Uses temporal patterns to know WHEN and WHERE to look.
|
|
653
|
+
*
|
|
654
|
+
* @example
|
|
655
|
+
* ```typescript
|
|
656
|
+
* // Click File menu
|
|
657
|
+
* agentVision.find('file-menu')?.click();
|
|
658
|
+
* // Wait for the dropdown the agent learned usually appears
|
|
659
|
+
* const appeared = await agentVision.waitForPredicted('dropdown', 3000);
|
|
660
|
+
* ```
|
|
661
|
+
*/
|
|
662
|
+
waitForPredicted(label?: string, timeoutMs?: number): Promise<LocatedElement | null>;
|
|
663
|
+
/**
|
|
664
|
+
* Get all learned temporal patterns.
|
|
665
|
+
*/
|
|
666
|
+
getPatterns(): Array<{
|
|
667
|
+
pattern: string;
|
|
668
|
+
count: number;
|
|
669
|
+
avgDelayMs: number;
|
|
670
|
+
}>;
|
|
671
|
+
/**
|
|
672
|
+
* Scan the full screen and return all regions that match ANY remembered element.
|
|
673
|
+
* Gives the agent a complete understanding of "what's on screen that I recognize."
|
|
674
|
+
*
|
|
675
|
+
* @example
|
|
676
|
+
* ```typescript
|
|
677
|
+
* const recognized = agentVision.scan();
|
|
678
|
+
* for (const el of recognized) {
|
|
679
|
+
* console.log(`Found "${el.label}" at (${el.x},${el.y}) confidence=${el.confidence}`);
|
|
680
|
+
* }
|
|
681
|
+
* ```
|
|
682
|
+
*/
|
|
683
|
+
scan(threshold?: number): LocatedElement[];
|
|
684
|
+
/**
|
|
685
|
+
* Export all visual memories as a JSON string.
|
|
686
|
+
*
|
|
687
|
+
* @example
|
|
688
|
+
* ```typescript
|
|
689
|
+
* const data = agentVision.save();
|
|
690
|
+
* fs.writeFileSync('agent-memory.json', data);
|
|
691
|
+
* ```
|
|
692
|
+
*/
|
|
693
|
+
save(): string;
|
|
694
|
+
/**
|
|
695
|
+
* Load visual memories from a JSON string.
|
|
696
|
+
*
|
|
697
|
+
* @example
|
|
698
|
+
* ```typescript
|
|
699
|
+
* const data = fs.readFileSync('agent-memory.json', 'utf8');
|
|
700
|
+
* agentVision.load(data);
|
|
701
|
+
* // Agent now remembers everything from last session
|
|
702
|
+
* ```
|
|
703
|
+
*/
|
|
704
|
+
load(json: string): void;
|
|
705
|
+
/** Number of stored memories. */
|
|
706
|
+
get memoryCount(): number;
|
|
707
|
+
/** All unique labels the agent has learned. */
|
|
708
|
+
get knownLabels(): string[];
|
|
709
|
+
/** Clear all memories. */
|
|
710
|
+
reset(): void;
|
|
711
|
+
/** Set the app context (filters memories by app). */
|
|
712
|
+
setContext(appContext: string): void;
|
|
713
|
+
private refreshGrid;
|
|
714
|
+
private trackAction;
|
|
715
|
+
private evictOldMemories;
|
|
716
|
+
}
|
|
717
|
+
/** Create a new AgentVision instance. */
|
|
718
|
+
export declare function createAgentVision(config?: AgentVisionConfig): AgentVision;
|
|
719
|
+
//# sourceMappingURL=vision.d.ts.map
|