@aspiresys/visor 1.1.6 → 1.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/matcher.js +4 -0
- package/dist/ocr.js +7 -1
- package/package.json +1 -2
- package/readme.md +171 -203
package/dist/matcher.js
CHANGED
package/dist/ocr.js
CHANGED
|
@@ -76,5 +76,11 @@ async function extractTextFromRegion(region) {
|
|
|
76
76
|
});
|
|
77
77
|
(0, logger_1.log)("[OCR] Extracted Text:");
|
|
78
78
|
(0, logger_1.log)(result.data.text);
|
|
79
|
-
return result.data;
|
|
79
|
+
return result.data.text;
|
|
80
80
|
}
|
|
81
|
+
(async () => {
|
|
82
|
+
const start = Date.now();
|
|
83
|
+
const text = await extractTextFromRegion();
|
|
84
|
+
console.log(`OCR Time: ${Date.now() - start} ms`);
|
|
85
|
+
await terminateOCR();
|
|
86
|
+
})();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aspiresys/visor",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.8",
|
|
4
4
|
"main": "dist/index.js",
|
|
5
5
|
"types": "dist/index.d.ts",
|
|
6
6
|
"scripts": {
|
|
@@ -13,7 +13,6 @@
|
|
|
13
13
|
"pngjs": "^7.0.0",
|
|
14
14
|
"screenshot-desktop": "^1.15.1",
|
|
15
15
|
"sharp": "^0.34.5",
|
|
16
|
-
"systeminformation": "^5.31.7",
|
|
17
16
|
"tesseract.js": "^7.0.0"
|
|
18
17
|
},
|
|
19
18
|
"devDependencies": {
|
package/readme.md
CHANGED
|
@@ -4,10 +4,10 @@ Desktop Visual Automation Framework for Node.js and TypeScript.
|
|
|
4
4
|
|
|
5
5
|
Visor is a visual desktop automation framework that combines:
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
7
|
+
* OpenCV image matching
|
|
8
|
+
* OCR text recognition
|
|
9
|
+
* Mouse & keyboard automation
|
|
10
|
+
* Desktop application automation
|
|
11
11
|
|
|
12
12
|
Visor is designed for automating desktop workflows using visual interactions instead of traditional DOM/browser automation.
|
|
13
13
|
|
|
@@ -15,26 +15,28 @@ Visor is designed for automating desktop workflows using visual interactions ins
|
|
|
15
15
|
|
|
16
16
|
# Features
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
18
|
+
* OpenCV-based image matching
|
|
19
|
+
* Multi-scale image matching
|
|
20
|
+
* OCR automation using Tesseract
|
|
21
|
+
* OCR occurrence indexing
|
|
22
|
+
* Region OCR support
|
|
23
|
+
* Automatic display scaling detection
|
|
24
|
+
* Mouse automation
|
|
25
|
+
* Keyboard automation
|
|
26
|
+
* Drag & drop support
|
|
27
|
+
* Multi-theme image handling
|
|
28
|
+
* Screenshot capture
|
|
29
|
+
* Desktop application automation
|
|
30
|
+
* OCR text searching
|
|
31
|
+
* Wait APIs
|
|
32
|
+
* Multi-image matching
|
|
33
|
+
* Config-driven initialization
|
|
34
|
+
* High-DPI display scaling support
|
|
31
35
|
|
|
32
36
|
---
|
|
33
37
|
|
|
34
38
|
# Installation
|
|
35
39
|
|
|
36
|
-
## Install from npm
|
|
37
|
-
|
|
38
40
|
```bash
|
|
39
41
|
npm install @aspiresys/visor
|
|
40
42
|
```
|
|
@@ -43,23 +45,20 @@ npm install @aspiresys/visor
|
|
|
43
45
|
|
|
44
46
|
# Requirements
|
|
45
47
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
- Node.js 18+
|
|
50
|
-
- TypeScript
|
|
48
|
+
* Windows
|
|
49
|
+
* Node.js 18+
|
|
50
|
+
* TypeScript
|
|
51
51
|
|
|
52
52
|
---
|
|
53
53
|
|
|
54
54
|
# Quick Start
|
|
55
55
|
|
|
56
56
|
```ts
|
|
57
|
-
import { visor } from "visor";
|
|
57
|
+
import { visor } from "@aspiresys/visor";
|
|
58
58
|
|
|
59
59
|
async function main() {
|
|
60
60
|
|
|
61
61
|
visor.loadConfig({
|
|
62
|
-
scaleFactor: 1.5,
|
|
63
62
|
imagePath: "./images",
|
|
64
63
|
debug: true
|
|
65
64
|
});
|
|
@@ -73,7 +72,6 @@ async function main() {
|
|
|
73
72
|
await visor.type(
|
|
74
73
|
"Hello from Visor"
|
|
75
74
|
);
|
|
76
|
-
|
|
77
75
|
}
|
|
78
76
|
|
|
79
77
|
main();
|
|
@@ -83,11 +81,8 @@ main();
|
|
|
83
81
|
|
|
84
82
|
# Configuration
|
|
85
83
|
|
|
86
|
-
Visor supports centralized framework configuration.
|
|
87
|
-
|
|
88
84
|
```ts
|
|
89
85
|
visor.loadConfig({
|
|
90
|
-
scaleFactor: 1.5,
|
|
91
86
|
imagePath: "./images",
|
|
92
87
|
debug: true
|
|
93
88
|
});
|
|
@@ -97,38 +92,59 @@ visor.loadConfig({
|
|
|
97
92
|
|
|
98
93
|
## Configuration Options
|
|
99
94
|
|
|
100
|
-
| Option
|
|
101
|
-
|
|
102
|
-
|
|
|
103
|
-
|
|
|
104
|
-
|
|
|
95
|
+
| Option | Description |
|
|
96
|
+
| ------------ | ---------------------------------------- |
|
|
97
|
+
| scaleFactor | Optional manual display scaling override |
|
|
98
|
+
| imagePath | Default image directory |
|
|
99
|
+
| ssOutputPath | Screenshot output directory |
|
|
100
|
+
| debug | Enable debug logging |
|
|
105
101
|
|
|
106
102
|
---
|
|
107
103
|
|
|
108
104
|
# Display Scaling
|
|
109
105
|
|
|
110
|
-
|
|
106
|
+
Visor automatically detects Windows display scaling and adjusts mouse coordinates accordingly.
|
|
111
107
|
|
|
112
|
-
Common values:
|
|
108
|
+
Common scaling values:
|
|
113
109
|
|
|
114
110
|
| Scaling | Value |
|
|
115
|
-
|
|
116
|
-
| 100%
|
|
117
|
-
| 125%
|
|
118
|
-
| 150%
|
|
119
|
-
|
|
|
111
|
+
| ------- | ----- |
|
|
112
|
+
| 100% | 1.0 |
|
|
113
|
+
| 125% | 1.25 |
|
|
114
|
+
| 150% | 1.5 |
|
|
115
|
+
| 175% | 1.75 |
|
|
116
|
+
| 200% | 2.0 |
|
|
120
117
|
|
|
121
|
-
|
|
118
|
+
Manual override is still supported:
|
|
122
119
|
|
|
123
120
|
```ts
|
|
124
|
-
visor.
|
|
121
|
+
visor.loadConfig({
|
|
122
|
+
scaleFactor: 1.5
|
|
123
|
+
});
|
|
125
124
|
```
|
|
126
125
|
|
|
127
|
-
|
|
126
|
+
---
|
|
127
|
+
|
|
128
|
+
# Multi-Scale Image Matching
|
|
129
|
+
|
|
130
|
+
Visor automatically performs multi-scale template matching to support:
|
|
131
|
+
|
|
132
|
+
* Different Windows scaling settings
|
|
133
|
+
* Different screen resolutions
|
|
134
|
+
* High-DPI displays
|
|
135
|
+
* Cross-machine execution
|
|
136
|
+
|
|
137
|
+
By default Visor evaluates templates across multiple scale levels and automatically selects the best match.
|
|
138
|
+
|
|
139
|
+
Supported environments include:
|
|
140
|
+
|
|
141
|
+
* 100% scaling
|
|
142
|
+
* 125% scaling
|
|
143
|
+
* 150% scaling
|
|
144
|
+
* 175% scaling
|
|
145
|
+
* 200% scaling
|
|
128
146
|
|
|
129
|
-
|
|
130
|
-
- incorrect mouse positioning
|
|
131
|
-
- OCR region issues
|
|
147
|
+
This significantly improves image matching reliability when automation is executed across different machines.
|
|
132
148
|
|
|
133
149
|
---
|
|
134
150
|
|
|
@@ -163,7 +179,12 @@ const exists =
|
|
|
163
179
|
## Wait For Image
|
|
164
180
|
|
|
165
181
|
```ts
|
|
166
|
-
await visor.wait("
|
|
182
|
+
await visor.wait("save.png");
|
|
183
|
+
|
|
184
|
+
await visor.wait("save.png", {
|
|
185
|
+
confidence: 0.9,
|
|
186
|
+
timeout: 10000
|
|
187
|
+
});
|
|
167
188
|
```
|
|
168
189
|
|
|
169
190
|
---
|
|
@@ -172,8 +193,8 @@ await visor.wait("loading-complete.png");
|
|
|
172
193
|
|
|
173
194
|
```ts
|
|
174
195
|
await visor.waitAny([
|
|
175
|
-
"
|
|
176
|
-
"
|
|
196
|
+
"light-theme.png",
|
|
197
|
+
"dark-theme.png"
|
|
177
198
|
]);
|
|
178
199
|
```
|
|
179
200
|
|
|
@@ -194,8 +215,8 @@ await visor.clickAny([
|
|
|
194
215
|
|
|
195
216
|
```ts
|
|
196
217
|
await visor.dragDrop(
|
|
197
|
-
"
|
|
198
|
-
"
|
|
218
|
+
"source.png",
|
|
219
|
+
"target.png"
|
|
199
220
|
);
|
|
200
221
|
```
|
|
201
222
|
|
|
@@ -211,14 +232,16 @@ await visor.hover("menu.png");
|
|
|
211
232
|
|
|
212
233
|
# OCR Automation
|
|
213
234
|
|
|
214
|
-
Visor includes OCR automation
|
|
235
|
+
Visor includes OCR automation powered by Tesseract.js.
|
|
215
236
|
|
|
216
237
|
OCR supports:
|
|
217
238
|
|
|
218
|
-
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
239
|
+
* Full-screen OCR
|
|
240
|
+
* Region OCR
|
|
241
|
+
* Text search
|
|
242
|
+
* Text clicking
|
|
243
|
+
* Text waiting
|
|
244
|
+
* OCR occurrence indexing
|
|
222
245
|
|
|
223
246
|
---
|
|
224
247
|
|
|
@@ -233,6 +256,22 @@ console.log(result.text);
|
|
|
233
256
|
|
|
234
257
|
---
|
|
235
258
|
|
|
259
|
+
## Read Region
|
|
260
|
+
|
|
261
|
+
```ts
|
|
262
|
+
const result =
|
|
263
|
+
await visor.readRegion({
|
|
264
|
+
x: 100,
|
|
265
|
+
y: 100,
|
|
266
|
+
width: 500,
|
|
267
|
+
height: 300
|
|
268
|
+
});
|
|
269
|
+
|
|
270
|
+
console.log(result.text);
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
---
|
|
274
|
+
|
|
236
275
|
## Find Text
|
|
237
276
|
|
|
238
277
|
```ts
|
|
@@ -258,21 +297,44 @@ await visor.waitText("Success");
|
|
|
258
297
|
|
|
259
298
|
---
|
|
260
299
|
|
|
300
|
+
# OCR Occurrence Indexing
|
|
301
|
+
|
|
302
|
+
When the same text appears multiple times on screen, Visor allows selecting a specific occurrence.
|
|
303
|
+
|
|
304
|
+
```ts
|
|
305
|
+
await visor.clickText("Inbox", 0);
|
|
306
|
+
await visor.clickText("Inbox", 1);
|
|
307
|
+
await visor.clickText("Inbox", 2);
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
OCR elements are processed from:
|
|
311
|
+
|
|
312
|
+
```text
|
|
313
|
+
Top → Bottom
|
|
314
|
+
Left → Right
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
This improves automation stability when multiple matching text elements exist on screen.
|
|
318
|
+
|
|
319
|
+
---
|
|
320
|
+
|
|
261
321
|
# OCR Optimizations
|
|
262
322
|
|
|
263
323
|
Visor includes:
|
|
264
324
|
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
325
|
+
* Shared OCR worker reuse
|
|
326
|
+
* OCR preprocessing
|
|
327
|
+
* Grayscale normalization
|
|
328
|
+
* Image sharpening
|
|
329
|
+
* Confidence filtering
|
|
330
|
+
* OCR occurrence indexing
|
|
270
331
|
|
|
271
|
-
|
|
332
|
+
Benefits:
|
|
272
333
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
334
|
+
* Faster OCR execution
|
|
335
|
+
* Improved OCR accuracy
|
|
336
|
+
* Lower memory usage
|
|
337
|
+
* Improved framework stability
|
|
276
338
|
|
|
277
339
|
---
|
|
278
340
|
|
|
@@ -339,8 +401,6 @@ await visor.press(
|
|
|
339
401
|
|
|
340
402
|
# Screenshot Automation
|
|
341
403
|
|
|
342
|
-
## Capture Screenshot
|
|
343
|
-
|
|
344
404
|
```ts
|
|
345
405
|
await visor.captureScreenshot(
|
|
346
406
|
"./screenshots/home.png"
|
|
@@ -367,128 +427,34 @@ await visor.closeApp("notepad.exe");
|
|
|
367
427
|
|
|
368
428
|
---
|
|
369
429
|
|
|
370
|
-
# Teams Automation Example
|
|
371
|
-
|
|
372
|
-
```ts
|
|
373
|
-
import { visor } from "visor";
|
|
374
|
-
|
|
375
|
-
async function teamsDemo() {
|
|
376
|
-
|
|
377
|
-
visor.loadConfig({
|
|
378
|
-
scaleFactor: 1.5,
|
|
379
|
-
imagePath: "./images",
|
|
380
|
-
debug: true
|
|
381
|
-
});
|
|
382
|
-
|
|
383
|
-
await visor.openApp(
|
|
384
|
-
"ms-teams.exe"
|
|
385
|
-
);
|
|
386
|
-
|
|
387
|
-
await visor.waitAny([
|
|
388
|
-
"teams-light.png",
|
|
389
|
-
"teams-dark.png"
|
|
390
|
-
]);
|
|
391
|
-
|
|
392
|
-
await visor.clickAny([
|
|
393
|
-
"chat-light.png",
|
|
394
|
-
"chat-dark.png"
|
|
395
|
-
]);
|
|
396
|
-
|
|
397
|
-
await visor.clickText("Search");
|
|
398
|
-
|
|
399
|
-
await visor.type("John");
|
|
400
|
-
|
|
401
|
-
}
|
|
402
|
-
```
|
|
403
|
-
|
|
404
|
-
---
|
|
405
|
-
|
|
406
430
|
# Confidence Thresholds
|
|
407
431
|
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
Accepted range:
|
|
432
|
+
Supported range:
|
|
411
433
|
|
|
412
|
-
```
|
|
413
|
-
0.0
|
|
434
|
+
```text
|
|
435
|
+
0.0 - 1.0
|
|
414
436
|
```
|
|
415
437
|
|
|
416
438
|
Recommended values:
|
|
417
439
|
|
|
418
|
-
| Confidence | Usage
|
|
419
|
-
|
|
420
|
-
|
|
|
421
|
-
|
|
|
422
|
-
|
|
|
423
|
-
|
|
424
|
-
Lower confidence increases flexibility but may increase false positives.
|
|
425
|
-
|
|
426
|
-
---
|
|
427
|
-
|
|
428
|
-
# Performance Notes
|
|
429
|
-
|
|
430
|
-
## OCR
|
|
431
|
-
|
|
432
|
-
OCR operations are computationally expensive.
|
|
433
|
-
|
|
434
|
-
OCR speed depends on:
|
|
435
|
-
|
|
436
|
-
- display resolution
|
|
437
|
-
- text density
|
|
438
|
-
- screen complexity
|
|
439
|
-
- hardware performance
|
|
440
|
-
|
|
441
|
-
---
|
|
442
|
-
|
|
443
|
-
# Debug Logging
|
|
444
|
-
|
|
445
|
-
Enable debug logs:
|
|
446
|
-
|
|
447
|
-
```ts
|
|
448
|
-
visor.setDebug(true);
|
|
449
|
-
```
|
|
450
|
-
|
|
451
|
-
---
|
|
452
|
-
|
|
453
|
-
# Best Practices
|
|
454
|
-
|
|
455
|
-
## Use Stable Images
|
|
456
|
-
|
|
457
|
-
Prefer:
|
|
458
|
-
|
|
459
|
-
- high contrast images
|
|
460
|
-
- unique UI elements
|
|
461
|
-
- properly cropped templates
|
|
462
|
-
|
|
463
|
-
Avoid:
|
|
464
|
-
|
|
465
|
-
- blurry screenshots
|
|
466
|
-
- partially hidden elements
|
|
467
|
-
- scaled screenshots
|
|
468
|
-
|
|
469
|
-
---
|
|
470
|
-
|
|
471
|
-
## Use Proper Scaling
|
|
472
|
-
|
|
473
|
-
Always configure:
|
|
474
|
-
|
|
475
|
-
```ts
|
|
476
|
-
visor.setScaleFactor(...);
|
|
477
|
-
```
|
|
478
|
-
|
|
479
|
-
before automation begins.
|
|
440
|
+
| Confidence | Usage |
|
|
441
|
+
| ---------- | --------------- |
|
|
442
|
+
| 0.7 | Dynamic UI |
|
|
443
|
+
| 0.8 | General usage |
|
|
444
|
+
| 0.9 | Strict matching |
|
|
480
445
|
|
|
481
446
|
---
|
|
482
447
|
|
|
483
|
-
|
|
448
|
+
# Performance Improvements
|
|
484
449
|
|
|
485
|
-
|
|
450
|
+
Visor includes:
|
|
486
451
|
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
452
|
+
* Shared OCR worker reuse
|
|
453
|
+
* Multi-scale image matching
|
|
454
|
+
* OCR preprocessing pipeline
|
|
455
|
+
* Automatic display scaling detection
|
|
490
456
|
|
|
491
|
-
|
|
457
|
+
These improvements increase reliability across varying display configurations and reduce OCR initialization overhead.
|
|
492
458
|
|
|
493
459
|
---
|
|
494
460
|
|
|
@@ -498,10 +464,10 @@ instead of excessive hard sleeps.
|
|
|
498
464
|
|
|
499
465
|
Possible causes:
|
|
500
466
|
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
467
|
+
* Incorrect image path
|
|
468
|
+
* Low confidence threshold
|
|
469
|
+
* Theme mismatch
|
|
470
|
+
* Poor template quality
|
|
505
471
|
|
|
506
472
|
---
|
|
507
473
|
|
|
@@ -509,40 +475,42 @@ Possible causes:
|
|
|
509
475
|
|
|
510
476
|
Possible causes:
|
|
511
477
|
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
478
|
+
* Small fonts
|
|
479
|
+
* Low contrast text
|
|
480
|
+
* Blurry UI elements
|
|
515
481
|
|
|
516
482
|
---
|
|
517
483
|
|
|
518
484
|
## Mouse Clicking Incorrect Position
|
|
519
485
|
|
|
520
|
-
|
|
486
|
+
Visor automatically detects Windows display scaling.
|
|
487
|
+
|
|
488
|
+
If required, manually override:
|
|
521
489
|
|
|
522
|
-
|
|
523
|
-
|
|
490
|
+
```ts
|
|
491
|
+
visor.loadConfig({
|
|
492
|
+
scaleFactor: 1.5
|
|
493
|
+
});
|
|
494
|
+
```
|
|
524
495
|
|
|
525
496
|
---
|
|
526
497
|
|
|
527
498
|
# Roadmap
|
|
528
499
|
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
500
|
+
* Template cache
|
|
501
|
+
* OCR cache
|
|
502
|
+
* Scale cache
|
|
503
|
+
* Parallel image matching
|
|
504
|
+
* Advanced OCR tuning
|
|
505
|
+
* Electron recorder
|
|
506
|
+
* AI-assisted automation
|
|
535
507
|
|
|
536
508
|
---
|
|
537
509
|
|
|
538
510
|
# Tech Stack
|
|
539
511
|
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
- sharp
|
|
546
|
-
- nut.js
|
|
547
|
-
|
|
548
|
-
---
|
|
512
|
+
* OpenCV
|
|
513
|
+
* Tesseract.js
|
|
514
|
+
* screenshot-desktop
|
|
515
|
+
* sharp
|
|
516
|
+
* nut.js
|