stelo 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/LICENSE +184 -0
  2. package/README.md +853 -0
  3. package/dist/accessibility.d.ts +227 -0
  4. package/dist/accessibility.d.ts.map +1 -0
  5. package/dist/accessibility.js +602 -0
  6. package/dist/accessibility.js.map +1 -0
  7. package/dist/agent.d.ts +870 -0
  8. package/dist/agent.d.ts.map +1 -0
  9. package/dist/agent.js +1107 -0
  10. package/dist/agent.js.map +1 -0
  11. package/dist/audio-stream.d.ts +114 -0
  12. package/dist/audio-stream.d.ts.map +1 -0
  13. package/dist/audio-stream.js +167 -0
  14. package/dist/audio-stream.js.map +1 -0
  15. package/dist/clipboard.d.ts +99 -0
  16. package/dist/clipboard.d.ts.map +1 -0
  17. package/dist/clipboard.js +352 -0
  18. package/dist/clipboard.js.map +1 -0
  19. package/dist/config.d.ts +183 -0
  20. package/dist/config.d.ts.map +1 -0
  21. package/dist/config.js +477 -0
  22. package/dist/config.js.map +1 -0
  23. package/dist/context.d.ts +213 -0
  24. package/dist/context.d.ts.map +1 -0
  25. package/dist/context.js +387 -0
  26. package/dist/context.js.map +1 -0
  27. package/dist/cortex.d.ts +548 -0
  28. package/dist/cortex.d.ts.map +1 -0
  29. package/dist/cortex.js +1479 -0
  30. package/dist/cortex.js.map +1 -0
  31. package/dist/errors.d.ts +133 -0
  32. package/dist/errors.d.ts.map +1 -0
  33. package/dist/errors.js +278 -0
  34. package/dist/errors.js.map +1 -0
  35. package/dist/events.d.ts +227 -0
  36. package/dist/events.d.ts.map +1 -0
  37. package/dist/events.js +429 -0
  38. package/dist/events.js.map +1 -0
  39. package/dist/executor.d.ts +212 -0
  40. package/dist/executor.d.ts.map +1 -0
  41. package/dist/executor.js +545 -0
  42. package/dist/executor.js.map +1 -0
  43. package/dist/index.d.ts +69 -0
  44. package/dist/index.d.ts.map +1 -0
  45. package/dist/index.js +167 -0
  46. package/dist/index.js.map +1 -0
  47. package/dist/integration.d.ts +159 -0
  48. package/dist/integration.d.ts.map +1 -0
  49. package/dist/integration.js +533 -0
  50. package/dist/integration.js.map +1 -0
  51. package/dist/keyboard.d.ts +276 -0
  52. package/dist/keyboard.d.ts.map +1 -0
  53. package/dist/keyboard.js +404 -0
  54. package/dist/keyboard.js.map +1 -0
  55. package/dist/logger.d.ts +198 -0
  56. package/dist/logger.d.ts.map +1 -0
  57. package/dist/logger.js +516 -0
  58. package/dist/logger.js.map +1 -0
  59. package/dist/middleware.d.ts +183 -0
  60. package/dist/middleware.d.ts.map +1 -0
  61. package/dist/middleware.js +493 -0
  62. package/dist/middleware.js.map +1 -0
  63. package/dist/monitor.d.ts +136 -0
  64. package/dist/monitor.d.ts.map +1 -0
  65. package/dist/monitor.js +341 -0
  66. package/dist/monitor.js.map +1 -0
  67. package/dist/mouse.d.ts +290 -0
  68. package/dist/mouse.d.ts.map +1 -0
  69. package/dist/mouse.js +466 -0
  70. package/dist/mouse.js.map +1 -0
  71. package/dist/plugin.d.ts +157 -0
  72. package/dist/plugin.d.ts.map +1 -0
  73. package/dist/plugin.js +409 -0
  74. package/dist/plugin.js.map +1 -0
  75. package/dist/process.d.ts +106 -0
  76. package/dist/process.d.ts.map +1 -0
  77. package/dist/process.js +326 -0
  78. package/dist/process.js.map +1 -0
  79. package/dist/recorder.d.ts +100 -0
  80. package/dist/recorder.d.ts.map +1 -0
  81. package/dist/recorder.js +258 -0
  82. package/dist/recorder.js.map +1 -0
  83. package/dist/safety.d.ts +59 -0
  84. package/dist/safety.d.ts.map +1 -0
  85. package/dist/safety.js +98 -0
  86. package/dist/safety.js.map +1 -0
  87. package/dist/scheduler.d.ts +152 -0
  88. package/dist/scheduler.d.ts.map +1 -0
  89. package/dist/scheduler.js +615 -0
  90. package/dist/scheduler.js.map +1 -0
  91. package/dist/screen.d.ts +96 -0
  92. package/dist/screen.d.ts.map +1 -0
  93. package/dist/screen.js +154 -0
  94. package/dist/screen.js.map +1 -0
  95. package/dist/session.d.ts +209 -0
  96. package/dist/session.d.ts.map +1 -0
  97. package/dist/session.js +479 -0
  98. package/dist/session.js.map +1 -0
  99. package/dist/stream.d.ts +168 -0
  100. package/dist/stream.d.ts.map +1 -0
  101. package/dist/stream.js +298 -0
  102. package/dist/stream.js.map +1 -0
  103. package/dist/telemetry.d.ts +223 -0
  104. package/dist/telemetry.d.ts.map +1 -0
  105. package/dist/telemetry.js +433 -0
  106. package/dist/telemetry.js.map +1 -0
  107. package/dist/types.d.ts +165 -0
  108. package/dist/types.d.ts.map +1 -0
  109. package/dist/types.js +8 -0
  110. package/dist/types.js.map +1 -0
  111. package/dist/utils/bezier.d.ts +51 -0
  112. package/dist/utils/bezier.d.ts.map +1 -0
  113. package/dist/utils/bezier.js +117 -0
  114. package/dist/utils/bezier.js.map +1 -0
  115. package/dist/utils/helpers.d.ts +90 -0
  116. package/dist/utils/helpers.d.ts.map +1 -0
  117. package/dist/utils/helpers.js +143 -0
  118. package/dist/utils/helpers.js.map +1 -0
  119. package/dist/utils/index.d.ts +4 -0
  120. package/dist/utils/index.d.ts.map +1 -0
  121. package/dist/utils/index.js +18 -0
  122. package/dist/utils/index.js.map +1 -0
  123. package/dist/validation.d.ts +254 -0
  124. package/dist/validation.d.ts.map +1 -0
  125. package/dist/validation.js +478 -0
  126. package/dist/validation.js.map +1 -0
  127. package/dist/vision.d.ts +719 -0
  128. package/dist/vision.d.ts.map +1 -0
  129. package/dist/vision.js +1197 -0
  130. package/dist/vision.js.map +1 -0
  131. package/dist/window.d.ts +80 -0
  132. package/dist/window.d.ts.map +1 -0
  133. package/dist/window.js +170 -0
  134. package/dist/window.js.map +1 -0
  135. package/dist/workflow.d.ts +224 -0
  136. package/dist/workflow.d.ts.map +1 -0
  137. package/dist/workflow.js +578 -0
  138. package/dist/workflow.js.map +1 -0
  139. package/index.d.ts +840 -0
  140. package/index.js +495 -0
  141. package/package.json +91 -0
@@ -0,0 +1,719 @@
1
+ /**
2
+ * Result of comparing two screen captures
3
+ */
4
+ export interface ScreenDiff {
5
+ /** Percentage of pixels that changed (0.0 - 100.0) */
6
+ changePercentage: number;
7
+ /** Total number of pixels that changed */
8
+ changedPixelCount: number;
9
+ /** Total number of pixels compared */
10
+ totalPixelCount: number;
11
+ /** Bounding box of changed region (if any) */
12
+ changedBounds?: {
13
+ x: number;
14
+ y: number;
15
+ width: number;
16
+ height: number;
17
+ };
18
+ /** Average color difference across all pixels */
19
+ averageDiff: number;
20
+ /** Maximum single-pixel color difference */
21
+ maxDiff: number;
22
+ /** True if any change was detected */
23
+ hasChanges: boolean;
24
+ }
25
+ /**
26
+ * Grid cell analysis result
27
+ */
28
+ export interface GridCell {
29
+ /** Cell position in grid (0-indexed) */
30
+ gridX: number;
31
+ gridY: number;
32
+ /** Screen coordinates of cell */
33
+ screenX: number;
34
+ screenY: number;
35
+ /** Cell dimensions */
36
+ width: number;
37
+ height: number;
38
+ /** Average color of the cell */
39
+ avgColor: {
40
+ r: number;
41
+ g: number;
42
+ b: number;
43
+ };
44
+ /** Color variance (higher = more complex content) */
45
+ variance: number;
46
+ /** Whether this cell likely contains text (high contrast) */
47
+ likelyText: boolean;
48
+ /** Whether this cell likely contains UI elements */
49
+ likelyUI: boolean;
50
+ }
51
+ /**
52
+ * Grid analysis result
53
+ */
54
+ export interface GridAnalysis {
55
+ /** Grid dimensions */
56
+ cols: number;
57
+ rows: number;
58
+ /** Cell size in pixels */
59
+ cellWidth: number;
60
+ cellHeight: number;
61
+ /** All grid cells */
62
+ cells: GridCell[];
63
+ /** Cells with high activity (likely UI elements) */
64
+ activeCells: Array<{
65
+ gridX: number;
66
+ gridY: number;
67
+ }>;
68
+ }
69
+ /**
70
+ * Region on screen
71
+ */
72
+ export interface Region {
73
+ x: number;
74
+ y: number;
75
+ width: number;
76
+ height: number;
77
+ }
78
+ /**
79
+ * Reference capture for diff operations
80
+ */
81
+ export interface CaptureReference {
82
+ data: Buffer;
83
+ width: number;
84
+ height: number;
85
+ x: number;
86
+ y: number;
87
+ }
88
+ /**
89
+ * Options for diff operations
90
+ */
91
+ export interface DiffOptions {
92
+ /** Minimum color distance to consider a difference (0-441). Default: 10 */
93
+ tolerance?: number;
94
+ /** Only check every Nth pixel (1 = all). Higher = faster but less accurate */
95
+ sampleRate?: number;
96
+ }
97
+ /**
98
+ * Options for waiting operations
99
+ */
100
+ export interface WaitOptions {
101
+ /** Region to monitor (full screen if not specified) */
102
+ region?: Region;
103
+ /** Polling interval in ms. Default: 50 */
104
+ pollIntervalMs?: number;
105
+ }
106
+ /**
107
+ * Vision and change detection utilities for automation workflows.
108
+ *
109
+ * These primitives enable visual grounding, action verification, and
110
+ * state tracking - essential building blocks for robust automation flows.
111
+ *
112
+ * @example
113
+ * ```typescript
114
+ * import { vision, screen } from 'stelo';
115
+ *
116
+ * // Take a reference screenshot
117
+ * const before = vision.captureReference();
118
+ *
119
+ * // Perform some action
120
+ * await mouse.click();
121
+ *
122
+ * // Verify the screen changed
123
+ * const diff = vision.diff(before);
124
+ * console.log(`${diff.changePercentage}% of screen changed`);
125
+ *
126
+ * // Wait for UI to stabilize after action
127
+ * await vision.waitForStable({ stabilityThreshold: 0.5 });
128
+ *
129
+ * // Analyze screen as a grid for vision model
130
+ * const grid = vision.analyzeGrid(16, 9);
131
+ * const textCells = grid.cells.filter(c => c.likelyText);
132
+ * ```
133
+ */
134
+ export declare const vision: {
135
+ /**
136
+ * Capture a reference screenshot for later comparison.
137
+ * Use this before triggering an action to verify it had an effect.
138
+ *
139
+ * @param region - Optional region to capture (full screen if not specified)
140
+ * @returns Reference object to pass to diff()
141
+ *
142
+ * @example
143
+ * ```typescript
144
+ * const before = vision.captureReference();
145
+ * await mouse.click();
146
+ * const diff = vision.diff(before);
147
+ * ```
148
+ */
149
+ captureReference(region?: Region): CaptureReference;
150
+ /**
151
+ * Compare current screen to a reference capture.
152
+ * Returns detailed diff statistics including change percentage and bounds.
153
+ *
154
+ * @param reference - Reference from captureReference()
155
+ * @param options - Diff options (tolerance, sample rate)
156
+ * @returns Diff result with change statistics
157
+ *
158
+ * @example
159
+ * ```typescript
160
+ * const diff = vision.diff(before, { tolerance: 15 });
161
+ * if (diff.changePercentage > 1) {
162
+ * console.log('Screen changed!', diff.changedBounds);
163
+ * }
164
+ * ```
165
+ */
166
+ diff(reference: CaptureReference, options?: DiffOptions): ScreenDiff;
167
+ /**
168
+ * Analyze screen as a grid of cells.
169
+ * Each cell includes statistics useful for vision model region selection.
170
+ *
171
+ * This enables efficient visual grounding - instead of sending the entire
172
+ * screen to a vision model, you can identify regions of interest first.
173
+ *
174
+ * @param cols - Number of columns in grid
175
+ * @param rows - Number of rows in grid
176
+ * @param region - Optional region to analyze (full screen if not specified)
177
+ * @returns Grid analysis with cell statistics
178
+ *
179
+ * @example
180
+ * ```typescript
181
+ * // Analyze screen as 16x9 grid
182
+ * const grid = vision.analyzeGrid(16, 9);
183
+ *
184
+ * // Find cells likely containing text
185
+ * const textCells = grid.cells.filter(c => c.likelyText);
186
+ *
187
+ * // Get center of cell [3, 2]
188
+ * const center = vision.gridCellCenter(grid, 3, 2);
189
+ * await mouse.click(center.x, center.y);
190
+ * ```
191
+ */
192
+ analyzeGrid(cols: number, rows: number, region?: Region): GridAnalysis;
193
+ /**
194
+ * Get the screen center point of a grid cell.
195
+ *
196
+ * @param grid - Grid analysis from analyzeGrid()
197
+ * @param gridX - Cell column index
198
+ * @param gridY - Cell row index
199
+ * @returns Screen coordinates of cell center, or undefined if out of bounds
200
+ */
201
+ gridCellCenter(grid: GridAnalysis, gridX: number, gridY: number): {
202
+ x: number;
203
+ y: number;
204
+ } | undefined;
205
+ /**
206
+ * Wait until the screen changes beyond a threshold.
207
+ * Useful for detecting when an action triggers a visual response.
208
+ *
209
+ * @param thresholdPercent - Minimum change percentage to trigger (0-100)
210
+ * @param timeoutMs - Maximum time to wait
211
+ * @param options - Wait options (region, poll interval)
212
+ * @returns true if change detected, false if timed out
213
+ *
214
+ * @example
215
+ * ```typescript
216
+ * // Click and wait for visual feedback
217
+ * mouse.click();
218
+ * const changed = await vision.waitForChange(0.5, 3000);
219
+ * if (!changed) console.log('Button may not have responded');
220
+ * ```
221
+ */
222
+ waitForChange(thresholdPercent: number, timeoutMs: number, options?: WaitOptions): Promise<boolean>;
223
+ /**
224
+ * Wait until the screen stabilizes (stops changing).
225
+ * Essential for waiting for animations, loading spinners, or transitions.
226
+ *
227
+ * @param stabilityThreshold - Maximum change % to consider "stable" (0-100)
228
+ * @param stableDurationMs - How long screen must remain stable
229
+ * @param timeoutMs - Maximum time to wait
230
+ * @param options - Wait options (region, poll interval)
231
+ * @returns true if stabilized, false if timed out
232
+ *
233
+ * @example
234
+ * ```typescript
235
+ * // Click a button and wait for animation to complete
236
+ * mouse.click();
237
+ * await vision.waitForStable(0.1, 200, 5000);
238
+ * // Screen is now stable - safe to read or continue
239
+ * ```
240
+ */
241
+ waitForStable(stabilityThreshold: number, stableDurationMs: number, timeoutMs: number, options?: WaitOptions): Promise<boolean>;
242
+ /**
243
+ * Compute a perceptual hash of a screen region.
244
+ * Two visually similar images will have hashes with low Hamming distance.
245
+ *
246
+ * Use this for fast "has the screen changed significantly?" checks
247
+ * without doing full pixel comparison.
248
+ *
249
+ * @param region - Optional region to hash (full screen if not specified)
250
+ * @returns 64-bit perceptual hash
251
+ *
252
+ * @example
253
+ * ```typescript
254
+ * const hash1 = vision.perceptualHash();
255
+ * await performSomeAction();
256
+ * const hash2 = vision.perceptualHash();
257
+ * const distance = vision.hashDistance(hash1, hash2);
258
+ * if (distance < 5) console.log('Screen looks similar');
259
+ * ```
260
+ */
261
+ perceptualHash(region?: Region): number;
262
+ /**
263
+ * Compute Hamming distance between two perceptual hashes.
264
+ * Lower distance = more visually similar. 0 = identical.
265
+ *
266
+ * Rules of thumb:
267
+ * - 0-5: Very similar (minor changes)
268
+ * - 5-10: Moderately similar
269
+ * - 10-20: Significant differences
270
+ * - 20+: Completely different
271
+ *
272
+ * @param hash1 - First perceptual hash
273
+ * @param hash2 - Second perceptual hash
274
+ * @returns Hamming distance (0-64)
275
+ */
276
+ hashDistance(hash1: number, hash2: number): number;
277
+ /**
278
+ * Find all pixels matching a color within a region.
279
+ * Returns all matching points up to a maximum count.
280
+ *
281
+ * @param color - Color to search for (RGB)
282
+ * @param tolerance - Color distance tolerance (0-441)
283
+ * @param options - Search options
284
+ * @returns Array of matching screen coordinates
285
+ *
286
+ * @example
287
+ * ```typescript
288
+ * // Find all red pixels
289
+ * const redPixels = vision.findAllColors(
290
+ * { r: 255, g: 0, b: 0 },
291
+ * 30,
292
+ * { maxResults: 100 }
293
+ * );
294
+ * ```
295
+ */
296
+ findAllColors(color: {
297
+ r: number;
298
+ g: number;
299
+ b: number;
300
+ }, tolerance: number, options?: {
301
+ maxResults?: number;
302
+ region?: Region;
303
+ }): Array<{
304
+ x: number;
305
+ y: number;
306
+ }>;
307
+ /**
308
+ * Find clusters of similar colors (potential UI elements).
309
+ * Clusters are groups of nearby pixels with similar colors.
310
+ *
311
+ * @param color - Color to search for (RGB)
312
+ * @param tolerance - Color distance tolerance (0-441)
313
+ * @param minClusterSize - Minimum pixels to form a cluster
314
+ * @param region - Optional region to search (full screen if not specified)
315
+ * @returns Array of bounding rectangles for each cluster
316
+ *
317
+ * @example
318
+ * ```typescript
319
+ * // Find blue button-like regions
320
+ * const clusters = vision.findColorClusters(
321
+ * { r: 0, g: 120, b: 215 }, // Windows blue
322
+ * 40,
323
+ * 50 // At least 50 pixels
324
+ * );
325
+ * if (clusters.length > 0) {
326
+ * // Click center of first cluster
327
+ * const btn = clusters[0];
328
+ * mouse.click(btn.x + btn.width / 2, btn.y + btn.height / 2);
329
+ * }
330
+ * ```
331
+ */
332
+ findColorClusters(color: {
333
+ r: number;
334
+ g: number;
335
+ b: number;
336
+ }, tolerance: number, minClusterSize: number, region?: Region): Region[];
337
+ /**
338
+ * Verify an action caused a visual change.
339
+ * High-level primitive that captures before/after and compares.
340
+ *
341
+ * @param action - Async action to execute and verify
342
+ * @param minChangePercent - Minimum change to consider verified
343
+ * @param timeoutMs - Maximum time to wait for change
344
+ * @param region - Optional region to monitor
345
+ * @returns Verification result with diff statistics
346
+ *
347
+ * @example
348
+ * ```typescript
349
+ * const result = await vision.verifyAction(
350
+ * async () => { mouse.click(); },
351
+ * 0.5, // At least 0.5% change
352
+ * 2000
353
+ * );
354
+ * if (!result.verified) {
355
+ * // Click didn't cause visual change - might need to retry
356
+ * }
357
+ * ```
358
+ */
359
+ verifyAction(action: () => Promise<void> | void, minChangePercent: number, timeoutMs: number, region?: Region): Promise<{
360
+ verified: boolean;
361
+ diff: ScreenDiff;
362
+ durationMs: number;
363
+ }>;
364
+ /**
365
+ * Take a reference, perform action, wait for stability.
366
+ * Combines action execution with waiting for the UI to settle.
367
+ *
368
+ * @param action - Action to execute
369
+ * @param options - Wait and verification options
370
+ * @returns true if action completed and screen stabilized
371
+ *
372
+ * @example
373
+ * ```typescript
374
+ * // Click and wait for any animation to complete
375
+ * await vision.doAndWaitStable(async () => {
376
+ * await mouse.click(100, 200);
377
+ * });
378
+ * // Screen is now stable
379
+ * ```
380
+ */
381
+ doAndWaitStable(action: () => Promise<void> | void, options?: {
382
+ stabilityThreshold?: number;
383
+ stableDurationMs?: number;
384
+ timeoutMs?: number;
385
+ region?: Region;
386
+ }): Promise<boolean>;
387
+ };
388
+ /** Compact visual descriptor for a screen region. */
389
+ export interface VisualFingerprint {
390
+ brightnessHist: Float64Array;
391
+ hueHist: Float64Array;
392
+ edgeDensity: number;
393
+ avgBrightness: number;
394
+ contrast: number;
395
+ variance: number;
396
+ aspect: number;
397
+ phash: number;
398
+ }
399
+ /** A remembered interaction with a screen element. */
400
+ export interface VisualMemory {
401
+ fingerprint: VisualFingerprint;
402
+ action: string;
403
+ label?: string;
404
+ position: {
405
+ x: number;
406
+ y: number;
407
+ };
408
+ regionSize: {
409
+ w: number;
410
+ h: number;
411
+ };
412
+ timestamp: number;
413
+ outcome?: {
414
+ screenChangePercent: number;
415
+ responseTimeMs: number;
416
+ };
417
+ success?: boolean;
418
+ appContext?: string;
419
+ sequenceIndex: number;
420
+ }
421
+ /** A located element on screen. */
422
+ export interface LocatedElement {
423
+ x: number;
424
+ y: number;
425
+ w: number;
426
+ h: number;
427
+ confidence: number;
428
+ similarity: number;
429
+ label?: string;
430
+ supportCount: number;
431
+ fingerprint: VisualFingerprint;
432
+ click(button?: string): void;
433
+ doubleClick(): void;
434
+ rightClick(): void;
435
+ moveTo(options?: {
436
+ smooth?: boolean;
437
+ duration?: number;
438
+ }): void;
439
+ type(text: string): void;
440
+ /** Verify this element is still visually present */
441
+ isStillPresent(threshold?: number): boolean;
442
+ }
443
+ /** Temporal prediction result. */
444
+ export interface TemporalPrediction {
445
+ nextAction: string;
446
+ confidence: number;
447
+ expectedDelayMs: number;
448
+ predictedRegion?: {
449
+ x: number;
450
+ y: number;
451
+ w: number;
452
+ h: number;
453
+ };
454
+ label?: string;
455
+ }
456
+ /** Configuration for the AgentVision system. */
457
+ export interface AgentVisionConfig {
458
+ /** Grid resolution for screen scanning. Default: 32x18 */
459
+ cols?: number;
460
+ rows?: number;
461
+ /** Maximum memories to keep. Default: 2000 */
462
+ maxMemories?: number;
463
+ /** Fingerprint region around a point. Default: 100x60 */
464
+ defaultRegionSize?: {
465
+ w: number;
466
+ h: number;
467
+ };
468
+ /** Minimum similarity to consider a match. Default: 0.65 */
469
+ matchThreshold?: number;
470
+ /** How far to search around original position. Default: 400px */
471
+ searchRadius?: number;
472
+ /** Application context (e.g. "notepad", "chrome"). Helps filter memories. */
473
+ appContext?: string;
474
+ }
475
+ export declare class AgentVision {
476
+ private cols;
477
+ private rows;
478
+ private maxMemories;
479
+ private defaultRegionSize;
480
+ private matchThreshold;
481
+ private searchRadius;
482
+ private appContext?;
483
+ private memories;
484
+ private sequenceCounter;
485
+ private recentActions;
486
+ private temporalPatterns;
487
+ private gridFingerprints;
488
+ private gridDirty;
489
+ private lastGridTime;
490
+ private lastCapture;
491
+ constructor(config?: AgentVisionConfig);
492
+ /**
493
+ * Fingerprint a screen region. Returns a compact visual descriptor
494
+ * that can be compared against other fingerprints for similarity.
495
+ *
496
+ * @example
497
+ * ```typescript
498
+ * const fp = agentVision.fingerprint(100, 200, 150, 40);
499
+ * // Later: check if the same element is somewhere else
500
+ * const match = agentVision.locate(fp);
501
+ * ```
502
+ */
503
+ fingerprint(x: number, y: number, w: number, h: number): VisualFingerprint;
504
+ /**
505
+ * Compute similarity between two fingerprints (0-1, 1 = identical).
506
+ */
507
+ similarity(a: VisualFingerprint, b: VisualFingerprint): number;
508
+ /**
509
+ * Remember an element at a position with a label.
510
+ * The agent fingerprints the region and stores it for later recognition.
511
+ *
512
+ * @example
513
+ * ```typescript
514
+ * // Agent clicked Save, now remembers what it looks like
515
+ * agentVision.remember('save-button', 350, 15, { w: 80, h: 30 });
516
+ *
517
+ * // Later, find it again even if it moved
518
+ * const saveBtn = agentVision.find('save-button');
519
+ * if (saveBtn) saveBtn.click();
520
+ * ```
521
+ */
522
+ remember(label: string, x: number, y: number, regionSize?: {
523
+ w: number;
524
+ h: number;
525
+ }, action?: string): VisualMemory;
526
+ /**
527
+ * Remember after performing a click — records the action AND verifies it.
528
+ * Captures before/after fingerprints and records whether the click did anything.
529
+ *
530
+ * @example
531
+ * ```typescript
532
+ * const result = await agentVision.rememberClick('file-menu', 44, 12);
533
+ * console.log(`Click ${result.success ? 'worked' : 'had no effect'}`);
534
+ * ```
535
+ */
536
+ rememberClick(label: string, x: number, y: number, regionSize?: {
537
+ w: number;
538
+ h: number;
539
+ }): Promise<VisualMemory & {
540
+ success: boolean;
541
+ }>;
542
+ /**
543
+ * Find a previously remembered element on the current screen.
544
+ * Uses visual fingerprint matching — works even if the element moved.
545
+ *
546
+ * @returns The best match, or null if nothing above threshold
547
+ *
548
+ * @example
549
+ * ```typescript
550
+ * const saveBtn = agentVision.find('save-button');
551
+ * if (saveBtn) {
552
+ * saveBtn.click();
553
+ * } else {
554
+ * console.log('Save button not visible');
555
+ * }
556
+ * ```
557
+ */
558
+ find(label: string): LocatedElement | null;
559
+ /**
560
+ * Find all instances of a remembered element on screen.
561
+ *
562
+ * @example
563
+ * ```typescript
564
+ * // Find all things that look like "close-button"
565
+ * const closeButtons = agentVision.findAll('close-button');
566
+ * console.log(`Found ${closeButtons.length} close buttons`);
567
+ * ```
568
+ */
569
+ findAll(label: string, maxResults?: number): LocatedElement[];
570
+ /**
571
+ * Locate a specific visual fingerprint on the current screen.
572
+ * Does a focused scan: first near the expected position, then widens.
573
+ *
574
+ * @example
575
+ * ```typescript
576
+ * const fp = agentVision.fingerprint(100, 200, 80, 30);
577
+ * // ... some time later, UI may have reorganized ...
578
+ * const found = agentVision.locate(fp, { near: { x: 100, y: 200 } });
579
+ * if (found) found.click();
580
+ * ```
581
+ */
582
+ locate(target: VisualFingerprint, options?: {
583
+ near?: {
584
+ x: number;
585
+ y: number;
586
+ };
587
+ threshold?: number;
588
+ searchRadius?: number;
589
+ }): LocatedElement | null;
590
+ /**
591
+ * Find an element by text using OCR, then fingerprint it for future recognition.
592
+ * First time: uses OCR. After that: can find it visually even without OCR.
593
+ *
594
+ * @example
595
+ * ```typescript
596
+ * // First call uses OCR to find "Save" text
597
+ * const save = agentVision.findByText('Save');
598
+ * if (save) save.click();
599
+ * // Now agentVision remembers what "Save" looks like visually
600
+ * ```
601
+ */
602
+ findByText(text: string, options?: {
603
+ remember?: boolean;
604
+ }): LocatedElement | null;
605
+ /**
606
+ * Click an element and verify the screen changed.
607
+ * Returns the located element with success/failure status.
608
+ *
609
+ * @example
610
+ * ```typescript
611
+ * const result = await agentVision.clickAndVerify('submit-button');
612
+ * if (!result.verified) {
613
+ * // Button didn't respond — try again or escalate
614
+ * }
615
+ * ```
616
+ */
617
+ clickAndVerify(label: string, options?: {
618
+ timeout?: number;
619
+ minChange?: number;
620
+ }): Promise<{
621
+ element: LocatedElement | null;
622
+ verified: boolean;
623
+ changePercent: number;
624
+ }>;
625
+ /**
626
+ * Check if a specific screen region visually changed since a fingerprint was taken.
627
+ *
628
+ * @example
629
+ * ```typescript
630
+ * const before = agentVision.fingerprint(100, 200, 80, 30);
631
+ * await agent.doSomething();
632
+ * const changed = agentVision.hasChanged(before, 100, 200, 80, 30);
633
+ * ```
634
+ */
635
+ hasChanged(previousFingerprint: VisualFingerprint, x: number, y: number, w: number, h: number, changeThreshold?: number): boolean;
636
+ /**
637
+ * Predict what will happen next based on learned temporal patterns.
638
+ *
639
+ * @example
640
+ * ```typescript
641
+ * // After clicking "File" many times, the agent learns:
642
+ * // "After clicking file-menu, a dropdown appears"
643
+ * const next = agentVision.predictNext();
644
+ * if (next) {
645
+ * console.log(`Expected: ${next.nextAction} in ${next.expectedDelayMs}ms`);
646
+ * }
647
+ * ```
648
+ */
649
+ predictNext(filterLabel?: string): TemporalPrediction | null;
650
+ /**
651
+ * Wait for a predicted event to happen.
652
+ * Uses temporal patterns to know WHEN and WHERE to look.
653
+ *
654
+ * @example
655
+ * ```typescript
656
+ * // Click File menu
657
+ * agentVision.find('file-menu')?.click();
658
+ * // Wait for the dropdown the agent learned usually appears
659
+ * const appeared = await agentVision.waitForPredicted('dropdown', 3000);
660
+ * ```
661
+ */
662
+ waitForPredicted(label?: string, timeoutMs?: number): Promise<LocatedElement | null>;
663
+ /**
664
+ * Get all learned temporal patterns.
665
+ */
666
+ getPatterns(): Array<{
667
+ pattern: string;
668
+ count: number;
669
+ avgDelayMs: number;
670
+ }>;
671
+ /**
672
+ * Scan the full screen and return all regions that match ANY remembered element.
673
+ * Gives the agent a complete understanding of "what's on screen that I recognize."
674
+ *
675
+ * @example
676
+ * ```typescript
677
+ * const recognized = agentVision.scan();
678
+ * for (const el of recognized) {
679
+ * console.log(`Found "${el.label}" at (${el.x},${el.y}) confidence=${el.confidence}`);
680
+ * }
681
+ * ```
682
+ */
683
+ scan(threshold?: number): LocatedElement[];
684
+ /**
685
+ * Export all visual memories as a JSON string.
686
+ *
687
+ * @example
688
+ * ```typescript
689
+ * const data = agentVision.save();
690
+ * fs.writeFileSync('agent-memory.json', data);
691
+ * ```
692
+ */
693
+ save(): string;
694
+ /**
695
+ * Load visual memories from a JSON string.
696
+ *
697
+ * @example
698
+ * ```typescript
699
+ * const data = fs.readFileSync('agent-memory.json', 'utf8');
700
+ * agentVision.load(data);
701
+ * // Agent now remembers everything from last session
702
+ * ```
703
+ */
704
+ load(json: string): void;
705
+ /** Number of stored memories. */
706
+ get memoryCount(): number;
707
+ /** All unique labels the agent has learned. */
708
+ get knownLabels(): string[];
709
+ /** Clear all memories. */
710
+ reset(): void;
711
+ /** Set the app context (filters memories by app). */
712
+ setContext(appContext: string): void;
713
+ private refreshGrid;
714
+ private trackAction;
715
+ private evictOldMemories;
716
+ }
717
+ /** Create a new AgentVision instance. */
718
+ export declare function createAgentVision(config?: AgentVisionConfig): AgentVision;
719
+ //# sourceMappingURL=vision.d.ts.map