testdriverai 7.3.9 → 7.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/agent/lib/commands.js +3 -2
- package/docs/v7/assert.mdx +31 -0
- package/docs/v7/client.mdx +29 -0
- package/docs/v7/find.mdx +24 -0
- package/mcp-server/dist/server.mjs +2 -2
- package/mcp-server/src/server.ts +2 -2
- package/package.json +1 -1
- package/sdk.d.ts +19 -2
- package/sdk.js +130 -59
package/CHANGELOG.md
CHANGED
package/agent/lib/commands.js
CHANGED
|
@@ -226,8 +226,8 @@ const createCommands = (
|
|
|
226
226
|
const assertTimestamp = Date.now();
|
|
227
227
|
const assertStartTime = assertTimestamp;
|
|
228
228
|
|
|
229
|
-
// Extract cache options
|
|
230
|
-
const { threshold = -1, cacheKey, os, resolution } = options;
|
|
229
|
+
// Extract cache and AI options
|
|
230
|
+
const { threshold = -1, cacheKey, os, resolution, ai } = options;
|
|
231
231
|
|
|
232
232
|
// Debug log cache settings
|
|
233
233
|
emitter.emit(
|
|
@@ -243,6 +243,7 @@ const createCommands = (
|
|
|
243
243
|
cacheKey,
|
|
244
244
|
os,
|
|
245
245
|
resolution,
|
|
246
|
+
ai,
|
|
246
247
|
});
|
|
247
248
|
|
|
248
249
|
const assertDuration = Date.now() - assertStartTime;
|
package/docs/v7/assert.mdx
CHANGED
|
@@ -13,6 +13,7 @@ Make AI-powered assertions about the current screen state using natural language
|
|
|
13
13
|
|
|
14
14
|
```javascript
|
|
15
15
|
await testdriver.assert(assertion)
|
|
16
|
+
await testdriver.assert(assertion, options)
|
|
16
17
|
```
|
|
17
18
|
|
|
18
19
|
## Parameters
|
|
@@ -21,6 +22,36 @@ await testdriver.assert(assertion)
|
|
|
21
22
|
Natural language description of what should be true
|
|
22
23
|
</ParamField>
|
|
23
24
|
|
|
25
|
+
<ParamField path="options" type="object">
|
|
26
|
+
Optional configuration
|
|
27
|
+
|
|
28
|
+
<Expandable title="properties">
|
|
29
|
+
<ParamField path="ai" type="object">
|
|
30
|
+
AI sampling configuration for this assert call (overrides global `ai` config from constructor).
|
|
31
|
+
|
|
32
|
+
<Expandable title="properties">
|
|
33
|
+
<ParamField path="temperature" type="number">
|
|
34
|
+
Controls randomness. `0` = deterministic, higher = more creative. Default: model default.
|
|
35
|
+
</ParamField>
|
|
36
|
+
|
|
37
|
+
<ParamField path="top" type="object">
|
|
38
|
+
Sampling parameters
|
|
39
|
+
|
|
40
|
+
<Expandable title="properties">
|
|
41
|
+
<ParamField path="p" type="number">
|
|
42
|
+
Top-P (nucleus sampling). Range: 0-1.
|
|
43
|
+
</ParamField>
|
|
44
|
+
|
|
45
|
+
<ParamField path="k" type="number">
|
|
46
|
+
Top-K sampling. `1` = most deterministic.
|
|
47
|
+
</ParamField>
|
|
48
|
+
</Expandable>
|
|
49
|
+
</ParamField>
|
|
50
|
+
</Expandable>
|
|
51
|
+
</ParamField>
|
|
52
|
+
</Expandable>
|
|
53
|
+
</ParamField>
|
|
54
|
+
|
|
24
55
|
## Returns
|
|
25
56
|
|
|
26
57
|
`Promise<boolean>` - `true` if assertion passes, throws error if assertion fails
|
package/docs/v7/client.mdx
CHANGED
|
@@ -52,6 +52,30 @@ const testdriver = new TestDriver(apiKey, options)
|
|
|
52
52
|
<ParamField path="environment" type="object">
|
|
53
53
|
Additional environment variables to pass to the sandbox
|
|
54
54
|
</ParamField>
|
|
55
|
+
|
|
56
|
+
<ParamField path="ai" type="object">
|
|
57
|
+
Global AI sampling configuration. Controls how the AI model generates responses for `find()` verification and `assert()` calls. Can be overridden per call.
|
|
58
|
+
|
|
59
|
+
<Expandable title="properties">
|
|
60
|
+
<ParamField path="temperature" type="number">
|
|
61
|
+
Controls randomness in AI responses. `0` = deterministic (best for verification), higher values = more creative. Default: `0` for find verification, model default for assert.
|
|
62
|
+
</ParamField>
|
|
63
|
+
|
|
64
|
+
<ParamField path="top" type="object">
|
|
65
|
+
Nucleus and top-k sampling parameters
|
|
66
|
+
|
|
67
|
+
<Expandable title="properties">
|
|
68
|
+
<ParamField path="p" type="number">
|
|
69
|
+
Top-P (nucleus sampling). Limits token choices to the smallest set whose cumulative probability exceeds P. Lower values = more focused responses. Range: 0-1.
|
|
70
|
+
</ParamField>
|
|
71
|
+
|
|
72
|
+
<ParamField path="k" type="number">
|
|
73
|
+
Top-K sampling. Limits token choices to the top K most likely tokens. `1` = always pick the most likely token. `0` = disabled (consider all tokens).
|
|
74
|
+
</ParamField>
|
|
75
|
+
</Expandable>
|
|
76
|
+
</ParamField>
|
|
77
|
+
</Expandable>
|
|
78
|
+
</ParamField>
|
|
55
79
|
</Expandable>
|
|
56
80
|
</ParamField>
|
|
57
81
|
|
|
@@ -68,6 +92,11 @@ const testdriver = new TestDriver({
|
|
|
68
92
|
analytics: true
|
|
69
93
|
});
|
|
70
94
|
|
|
95
|
+
// With AI config for stricter verification
|
|
96
|
+
const testdriver = new TestDriver({
|
|
97
|
+
ai: { temperature: 0, top: { p: 0.9, k: 40 } }
|
|
98
|
+
});
|
|
99
|
+
|
|
71
100
|
// Or pass API key explicitly
|
|
72
101
|
const testdriver = new TestDriver('your-api-key', {
|
|
73
102
|
os: 'windows'
|
package/docs/v7/find.mdx
CHANGED
|
@@ -41,6 +41,30 @@ const element = await testdriver.find(description, options)
|
|
|
41
41
|
<ParamField path="zoom" type="boolean" default={false}>
|
|
42
42
|
Enable two-phase zoom mode for better precision in crowded UIs with many similar elements.
|
|
43
43
|
</ParamField>
|
|
44
|
+
|
|
45
|
+
<ParamField path="ai" type="object">
|
|
46
|
+
AI sampling configuration for this find call (overrides global `ai` config from constructor).
|
|
47
|
+
|
|
48
|
+
<Expandable title="properties">
|
|
49
|
+
<ParamField path="temperature" type="number">
|
|
50
|
+
Controls randomness. `0` = deterministic. Default: `0` for find verification.
|
|
51
|
+
</ParamField>
|
|
52
|
+
|
|
53
|
+
<ParamField path="top" type="object">
|
|
54
|
+
Sampling parameters
|
|
55
|
+
|
|
56
|
+
<Expandable title="properties">
|
|
57
|
+
<ParamField path="p" type="number">
|
|
58
|
+
Top-P (nucleus sampling). Range: 0-1.
|
|
59
|
+
</ParamField>
|
|
60
|
+
|
|
61
|
+
<ParamField path="k" type="number">
|
|
62
|
+
Top-K sampling. `1` = most deterministic.
|
|
63
|
+
</ParamField>
|
|
64
|
+
</Expandable>
|
|
65
|
+
</ParamField>
|
|
66
|
+
</Expandable>
|
|
67
|
+
</ParamField>
|
|
44
68
|
</Expandable>
|
|
45
69
|
</ParamField>
|
|
46
70
|
|
|
@@ -401,8 +401,8 @@ Debug mode (connect to existing sandbox):
|
|
|
401
401
|
const TestDriverSDK = (await import("../../sdk.js")).default;
|
|
402
402
|
// Determine preview mode from environment variable
|
|
403
403
|
// TD_PREVIEW can be "ide", "browser", or "none"
|
|
404
|
-
// Default to "
|
|
405
|
-
const previewMode = process.env.TD_PREVIEW || "
|
|
404
|
+
// Default to "ide" so the live preview shows within the IDE
|
|
405
|
+
const previewMode = process.env.TD_PREVIEW || "ide";
|
|
406
406
|
logger.debug("session_start: Preview mode", { preview: previewMode });
|
|
407
407
|
// Get IP from params or environment (for self-hosted instances)
|
|
408
408
|
const instanceIp = params.ip || process.env.TD_IP;
|
package/mcp-server/src/server.ts
CHANGED
|
@@ -509,8 +509,8 @@ Debug mode (connect to existing sandbox):
|
|
|
509
509
|
|
|
510
510
|
// Determine preview mode from environment variable
|
|
511
511
|
// TD_PREVIEW can be "ide", "browser", or "none"
|
|
512
|
-
// Default to "
|
|
513
|
-
const previewMode = process.env.TD_PREVIEW || "
|
|
512
|
+
// Default to "ide" so the live preview shows within the IDE
|
|
513
|
+
const previewMode = process.env.TD_PREVIEW || "ide";
|
|
514
514
|
logger.debug("session_start: Preview mode", { preview: previewMode });
|
|
515
515
|
|
|
516
516
|
// Get IP from params or environment (for self-hosted instances)
|
package/package.json
CHANGED
package/sdk.d.ts
CHANGED
|
@@ -230,6 +230,8 @@ export interface TestDriverOptions {
|
|
|
230
230
|
logging?: boolean;
|
|
231
231
|
/** Enable/disable cache (default: true). Set to false to force regeneration on all find operations */
|
|
232
232
|
cache?: boolean;
|
|
233
|
+
/** Global AI sampling configuration. Can be overridden per find() or assert() call. */
|
|
234
|
+
ai?: AIConfig;
|
|
233
235
|
/** Cache threshold configuration for different methods */
|
|
234
236
|
cacheThreshold?: {
|
|
235
237
|
/** Threshold for find operations (default: 0.05 = 5% difference, 95% similarity) */
|
|
@@ -546,6 +548,19 @@ export interface FocusApplicationOptions {
|
|
|
546
548
|
name: string;
|
|
547
549
|
}
|
|
548
550
|
|
|
551
|
+
/** AI sampling configuration for controlling model behavior */
|
|
552
|
+
export interface AIConfig {
|
|
553
|
+
/** Temperature for AI sampling (0 = deterministic, higher = more creative). Default: 0 for find verification, model default for assert. */
|
|
554
|
+
temperature?: number;
|
|
555
|
+
/** Top-P and Top-K sampling parameters */
|
|
556
|
+
top?: {
|
|
557
|
+
/** Top-P (nucleus sampling). Controls diversity by limiting to top P probability mass. Range: 0-1. */
|
|
558
|
+
p?: number;
|
|
559
|
+
/** Top-K sampling. Limits choices to top K tokens. 1 = always pick most likely. 0 = disabled. */
|
|
560
|
+
k?: number;
|
|
561
|
+
};
|
|
562
|
+
}
|
|
563
|
+
|
|
549
564
|
/** Options for extract command */
|
|
550
565
|
export interface ExtractOptions {
|
|
551
566
|
/** What to extract */
|
|
@@ -564,6 +579,8 @@ export interface AssertOptions {
|
|
|
564
579
|
os?: string;
|
|
565
580
|
/** Screen resolution for cache partitioning */
|
|
566
581
|
resolution?: string;
|
|
582
|
+
/** AI sampling configuration (overrides global ai config) */
|
|
583
|
+
ai?: AIConfig;
|
|
567
584
|
}
|
|
568
585
|
|
|
569
586
|
/** Options for exec command */
|
|
@@ -1028,7 +1045,7 @@ export default class TestDriverSDK {
|
|
|
1028
1045
|
find(description: string, cacheThreshold?: number): ChainableElementPromise;
|
|
1029
1046
|
find(
|
|
1030
1047
|
description: string,
|
|
1031
|
-
options?: { cacheThreshold?: number; cacheKey?: string; timeout?: number },
|
|
1048
|
+
options?: { cacheThreshold?: number; cacheKey?: string; timeout?: number; ai?: AIConfig },
|
|
1032
1049
|
): ChainableElementPromise;
|
|
1033
1050
|
|
|
1034
1051
|
/**
|
|
@@ -1267,7 +1284,7 @@ export default class TestDriverSDK {
|
|
|
1267
1284
|
* // With custom threshold
|
|
1268
1285
|
* await client.assert('the page loaded', { threshold: 0.01, cacheKey: 'login-test' });
|
|
1269
1286
|
*/
|
|
1270
|
-
assert(assertion: string, options?: { threshold?: number; cacheKey?: string; os?: string; resolution?: string }): Promise<boolean>;
|
|
1287
|
+
assert(assertion: string, options?: { threshold?: number; cacheKey?: string; os?: string; resolution?: string; ai?: AIConfig }): Promise<boolean>;
|
|
1271
1288
|
|
|
1272
1289
|
/**
|
|
1273
1290
|
* Extract information from the screen using AI
|
package/sdk.js
CHANGED
|
@@ -430,8 +430,9 @@ class Element {
|
|
|
430
430
|
/**
|
|
431
431
|
* Find the element on screen
|
|
432
432
|
* @param {string} [newDescription] - Optional new description to search for
|
|
433
|
-
* @param {Object} [options] - Optional options object with
|
|
433
|
+
* @param {Object} [options] - Optional options object with cache thresholds, cacheKey, and/or timeout
|
|
434
434
|
* @param {number} [options.timeout] - Max time in ms to poll for element (polls every 5 seconds)
|
|
435
|
+
* @param {Object} [options.cache] - Cache configuration { thresholds: { screen, element } }
|
|
435
436
|
* @returns {Promise<Element>} This element instance
|
|
436
437
|
*/
|
|
437
438
|
async find(newDescription, options) {
|
|
@@ -468,10 +469,12 @@ class Element {
|
|
|
468
469
|
this._screenshot = screenshot;
|
|
469
470
|
}
|
|
470
471
|
|
|
471
|
-
// Handle options - can be a number (cacheThreshold) or object with cacheKey/cacheThreshold
|
|
472
|
+
// Handle options - can be a number (cacheThreshold) or object with cacheKey/cacheThreshold/cache
|
|
472
473
|
let cacheKey = null;
|
|
473
474
|
let cacheThreshold = null;
|
|
475
|
+
let perCommandThresholds = null; // Per-command { screen, element } override
|
|
474
476
|
let zoom = false; // Default to disabled, enable with zoom: true
|
|
477
|
+
let perCommandAi = null; // Per-command AI config override
|
|
475
478
|
|
|
476
479
|
if (typeof options === "number") {
|
|
477
480
|
// Legacy: options is just a number threshold
|
|
@@ -482,6 +485,10 @@ class Element {
|
|
|
482
485
|
cacheThreshold = options.cacheThreshold ?? null;
|
|
483
486
|
// zoom defaults to false unless explicitly set to true
|
|
484
487
|
zoom = options.zoom === true;
|
|
488
|
+
// Per-command cache thresholds: { cache: { thresholds: { screen: 0.1, element: 0.2 } } }
|
|
489
|
+
if (typeof options.cache === "object" && options.cache?.thresholds) {
|
|
490
|
+
perCommandThresholds = options.cache.thresholds;
|
|
491
|
+
}
|
|
485
492
|
}
|
|
486
493
|
|
|
487
494
|
// Use default cacheKey from SDK constructor if not provided in find() options
|
|
@@ -499,19 +506,25 @@ class Element {
|
|
|
499
506
|
// - If cacheKey is provided, enable cache with threshold
|
|
500
507
|
// - If no cacheKey, disable cache
|
|
501
508
|
let threshold;
|
|
509
|
+
let elementSimilarity;
|
|
502
510
|
if (this.sdk._cacheExplicitlyDisabled) {
|
|
503
511
|
// Cache explicitly disabled via cache: false option or TD_NO_CACHE env
|
|
504
512
|
threshold = -1;
|
|
513
|
+
elementSimilarity = -1;
|
|
505
514
|
cacheKey = null; // Clear any cacheKey to ensure cache is truly disabled
|
|
506
515
|
} else if (cacheKey) {
|
|
507
516
|
// cacheKey provided - enable cache with threshold
|
|
508
|
-
|
|
517
|
+
// Per-command thresholds > legacy cacheThreshold > global config
|
|
518
|
+
threshold = perCommandThresholds?.screen ?? cacheThreshold ?? this.sdk.cacheConfig?.thresholds?.find?.screen ?? 0.01;
|
|
519
|
+
elementSimilarity = perCommandThresholds?.element ?? this.sdk.cacheConfig?.thresholds?.find?.element ?? 0.8;
|
|
509
520
|
} else if (cacheThreshold !== null) {
|
|
510
521
|
// Explicit threshold provided without cacheKey
|
|
511
|
-
threshold = cacheThreshold;
|
|
522
|
+
threshold = perCommandThresholds?.screen ?? cacheThreshold;
|
|
523
|
+
elementSimilarity = perCommandThresholds?.element ?? this.sdk.cacheConfig?.thresholds?.find?.element ?? 0.8;
|
|
512
524
|
} else {
|
|
513
525
|
// No cacheKey, no explicit threshold - disable cache
|
|
514
526
|
threshold = -1;
|
|
527
|
+
elementSimilarity = -1;
|
|
515
528
|
}
|
|
516
529
|
|
|
517
530
|
// Store the threshold for debugging
|
|
@@ -536,10 +549,16 @@ class Element {
|
|
|
536
549
|
element: description,
|
|
537
550
|
image: screenshot,
|
|
538
551
|
threshold: threshold,
|
|
552
|
+
elementSimilarity: elementSimilarity,
|
|
539
553
|
cacheKey: cacheKey,
|
|
540
554
|
os: this.sdk.os,
|
|
541
555
|
resolution: this.sdk.resolution,
|
|
542
556
|
zoom: zoom,
|
|
557
|
+
ai: {
|
|
558
|
+
...this.sdk.aiConfig,
|
|
559
|
+
...(perCommandAi || {}),
|
|
560
|
+
top: { ...this.sdk.aiConfig?.top, ...(perCommandAi?.top || {}) },
|
|
561
|
+
},
|
|
543
562
|
});
|
|
544
563
|
|
|
545
564
|
const duration = Date.now() - startTime;
|
|
@@ -736,6 +755,9 @@ class Element {
|
|
|
736
755
|
cacheHit: debugInfo.cacheHit,
|
|
737
756
|
selectorId: this._response?.selector,
|
|
738
757
|
consoleUrl: consoleUrl,
|
|
758
|
+
validated: response.validated ?? null,
|
|
759
|
+
validationConfidence: response.validationConfidence ?? null,
|
|
760
|
+
coordsUpdated: response.coordsUpdated ?? null,
|
|
739
761
|
};
|
|
740
762
|
if (!debugInfo.cacheHit) {
|
|
741
763
|
meta.confidence = debugInfo.confidence;
|
|
@@ -1441,15 +1463,49 @@ class TestDriverSDK {
|
|
|
1441
1463
|
findAll: -1,
|
|
1442
1464
|
assert: -1,
|
|
1443
1465
|
};
|
|
1466
|
+
this.cacheConfig = {
|
|
1467
|
+
enabled: false,
|
|
1468
|
+
thresholds: {
|
|
1469
|
+
find: { screen: -1, element: -1 },
|
|
1470
|
+
assert: -1,
|
|
1471
|
+
},
|
|
1472
|
+
};
|
|
1444
1473
|
} else {
|
|
1445
|
-
//
|
|
1474
|
+
// Support cache object format: { cache: { thresholds: { find: { screen: 0.01, element: 0.8 }, assert: 0.05 } } }
|
|
1475
|
+
const cacheOpts = typeof options.cache === "object" ? options.cache : {};
|
|
1476
|
+
const thresholds = cacheOpts.thresholds || {};
|
|
1477
|
+
const findThresholds = typeof thresholds.find === "object" ? thresholds.find : {};
|
|
1478
|
+
|
|
1479
|
+
this.cacheConfig = {
|
|
1480
|
+
enabled: cacheOpts.enabled !== false,
|
|
1481
|
+
thresholds: {
|
|
1482
|
+
find: {
|
|
1483
|
+
screen: findThresholds.screen ?? 0.01, // Default: 1% pixel diff allowed
|
|
1484
|
+
element: findThresholds.element ?? 0.8, // Default: 80% OpenCV correlation
|
|
1485
|
+
},
|
|
1486
|
+
assert: thresholds.assert ?? 0.05, // Default: 5% pixel diff for assertions
|
|
1487
|
+
},
|
|
1488
|
+
};
|
|
1489
|
+
|
|
1490
|
+
// Legacy cacheThresholds - keep for backwards compatibility
|
|
1446
1491
|
this.cacheThresholds = {
|
|
1447
|
-
find: options.cacheThreshold?.find ??
|
|
1448
|
-
findAll: options.cacheThreshold?.findAll ??
|
|
1449
|
-
assert: options.cacheThreshold?.assert ??
|
|
1492
|
+
find: options.cacheThreshold?.find ?? this.cacheConfig.thresholds.find.screen,
|
|
1493
|
+
findAll: options.cacheThreshold?.findAll ?? this.cacheConfig.thresholds.find.screen,
|
|
1494
|
+
assert: options.cacheThreshold?.assert ?? this.cacheConfig.thresholds.assert,
|
|
1450
1495
|
};
|
|
1451
1496
|
}
|
|
1452
1497
|
|
|
1498
|
+
// AI sampling configuration
|
|
1499
|
+
// Supports: { ai: { temperature: 0, top: { p: 1, k: 0 } } }
|
|
1500
|
+
// Can be overridden per find() or assert() call
|
|
1501
|
+
this.aiConfig = typeof options.ai === "object" ? {
|
|
1502
|
+
temperature: options.ai.temperature,
|
|
1503
|
+
top: {
|
|
1504
|
+
p: options.ai.top?.p,
|
|
1505
|
+
k: options.ai.top?.k,
|
|
1506
|
+
},
|
|
1507
|
+
} : {};
|
|
1508
|
+
|
|
1453
1509
|
// Redraw configuration
|
|
1454
1510
|
// Supports both:
|
|
1455
1511
|
// - redraw: { enabled: true, diffThreshold: 0.1, screenRedraw: true, networkMonitor: true }
|
|
@@ -2791,7 +2847,7 @@ CAPTCHA_SOLVER_EOF`,
|
|
|
2791
2847
|
* Automatically locates the element and returns it
|
|
2792
2848
|
*
|
|
2793
2849
|
* @param {string} description - Description of the element to find
|
|
2794
|
-
* @param {number | Object} [options] - Cache options: number for threshold, or object with {cacheKey,
|
|
2850
|
+
* @param {number | Object} [options] - Cache options: number for threshold, or object with {cacheKey, cache: { thresholds: { screen, element } }}
|
|
2795
2851
|
* @returns {Promise<Element> & ChainableElement} Element instance that has been located, with chainable methods
|
|
2796
2852
|
*
|
|
2797
2853
|
* @example
|
|
@@ -2880,7 +2936,7 @@ CAPTCHA_SOLVER_EOF`,
|
|
|
2880
2936
|
* Automatically locates all matching elements and returns them as an array
|
|
2881
2937
|
*
|
|
2882
2938
|
* @param {string} description - Description of the elements to find
|
|
2883
|
-
* @param {number | Object} [options] - Cache options: number for threshold, or object with {cacheKey,
|
|
2939
|
+
* @param {number | Object} [options] - Cache options: number for threshold, or object with {cacheKey, cache: { thresholds: { screen } }}
|
|
2884
2940
|
* @returns {Promise<Element[]>} Array of Element instances that have been located
|
|
2885
2941
|
*
|
|
2886
2942
|
* @example
|
|
@@ -2936,9 +2992,10 @@ CAPTCHA_SOLVER_EOF`,
|
|
|
2936
2992
|
try {
|
|
2937
2993
|
const screenshot = await this.system.captureScreenBase64();
|
|
2938
2994
|
|
|
2939
|
-
// Handle options - can be a number (cacheThreshold) or object with cacheKey/cacheThreshold
|
|
2995
|
+
// Handle options - can be a number (cacheThreshold) or object with cacheKey/cacheThreshold/cache
|
|
2940
2996
|
let cacheKey = null;
|
|
2941
2997
|
let cacheThreshold = null;
|
|
2998
|
+
let perCommandThresholds = null; // Per-command { screen } override (findAll has no element threshold)
|
|
2942
2999
|
|
|
2943
3000
|
if (typeof options === "number") {
|
|
2944
3001
|
// Legacy: options is just a number threshold
|
|
@@ -2947,6 +3004,10 @@ CAPTCHA_SOLVER_EOF`,
|
|
|
2947
3004
|
// New: options is an object with cacheKey and/or cacheThreshold
|
|
2948
3005
|
cacheKey = options.cacheKey || null;
|
|
2949
3006
|
cacheThreshold = options.cacheThreshold ?? null;
|
|
3007
|
+
// Per-command cache thresholds: { cache: { thresholds: { screen: 0.1 } } }
|
|
3008
|
+
if (typeof options.cache === "object" && options.cache?.thresholds) {
|
|
3009
|
+
perCommandThresholds = options.cache.thresholds;
|
|
3010
|
+
}
|
|
2950
3011
|
}
|
|
2951
3012
|
|
|
2952
3013
|
// Use default cacheKey from SDK constructor if not provided in findAll() options
|
|
@@ -2969,11 +3030,11 @@ CAPTCHA_SOLVER_EOF`,
|
|
|
2969
3030
|
threshold = -1;
|
|
2970
3031
|
cacheKey = null; // Clear any cacheKey to ensure cache is truly disabled
|
|
2971
3032
|
} else if (cacheKey) {
|
|
2972
|
-
// cacheKey provided - enable cache with threshold
|
|
2973
|
-
threshold = cacheThreshold ?? this.
|
|
3033
|
+
// cacheKey provided - enable cache with threshold (findAll only uses screen, no element)
|
|
3034
|
+
threshold = perCommandThresholds?.screen ?? cacheThreshold ?? this.cacheConfig?.thresholds?.find?.screen ?? 0.01;
|
|
2974
3035
|
} else if (cacheThreshold !== null) {
|
|
2975
3036
|
// Explicit threshold provided without cacheKey
|
|
2976
|
-
threshold = cacheThreshold;
|
|
3037
|
+
threshold = perCommandThresholds?.screen ?? cacheThreshold;
|
|
2977
3038
|
} else {
|
|
2978
3039
|
// No cacheKey, no explicit threshold - disable cache
|
|
2979
3040
|
threshold = -1;
|
|
@@ -2994,7 +3055,7 @@ CAPTCHA_SOLVER_EOF`,
|
|
|
2994
3055
|
}
|
|
2995
3056
|
|
|
2996
3057
|
const response = await this.apiClient.req(
|
|
2997
|
-
"/api/v7.0.0/testdriver
|
|
3058
|
+
"/api/v7.0.0/testdriver/find-all",
|
|
2998
3059
|
{
|
|
2999
3060
|
session: this.getSessionId(),
|
|
3000
3061
|
element: description,
|
|
@@ -3010,7 +3071,7 @@ CAPTCHA_SOLVER_EOF`,
|
|
|
3010
3071
|
|
|
3011
3072
|
if (response && response.elements && response.elements.length > 0) {
|
|
3012
3073
|
// Single log at the end - found elements
|
|
3013
|
-
const formattedMessage = formatter.
|
|
3074
|
+
const formattedMessage = formatter.formatElementsFound(
|
|
3014
3075
|
description,
|
|
3015
3076
|
response.elements.length,
|
|
3016
3077
|
{
|
|
@@ -3093,7 +3154,7 @@ CAPTCHA_SOLVER_EOF`,
|
|
|
3093
3154
|
const duration = Date.now() - startTime;
|
|
3094
3155
|
|
|
3095
3156
|
// Single log at the end - no elements found
|
|
3096
|
-
const formattedMessage = formatter.
|
|
3157
|
+
const formattedMessage = formatter.formatElementsFound(
|
|
3097
3158
|
description,
|
|
3098
3159
|
0,
|
|
3099
3160
|
{
|
|
@@ -3139,7 +3200,7 @@ CAPTCHA_SOLVER_EOF`,
|
|
|
3139
3200
|
const duration = Date.now() - startTime;
|
|
3140
3201
|
|
|
3141
3202
|
// Single log at the end - error
|
|
3142
|
-
const formattedMessage = formatter.
|
|
3203
|
+
const formattedMessage = formatter.formatElementsFound(
|
|
3143
3204
|
description,
|
|
3144
3205
|
0,
|
|
3145
3206
|
{
|
|
@@ -3334,16 +3395,30 @@ CAPTCHA_SOLVER_EOF`,
|
|
|
3334
3395
|
let result;
|
|
3335
3396
|
// Special handling for assert to inject SDK options (cacheKey, os, resolution, threshold)
|
|
3336
3397
|
// similar to how find() handles these in the Element class
|
|
3398
|
+
// Note: assert does NOT use elementSimilarity (template matching not relevant for assertions)
|
|
3337
3399
|
if (commandName === 'assert') {
|
|
3338
3400
|
const assertion = args[0];
|
|
3339
3401
|
const userOptions = args[1] || {};
|
|
3340
3402
|
|
|
3403
|
+
// Support per-command cache threshold override: { cache: { threshold: 0.05 } }
|
|
3404
|
+
const perCommandThreshold = typeof userOptions.cache === "object"
|
|
3405
|
+
? userOptions.cache.threshold
|
|
3406
|
+
: undefined;
|
|
3407
|
+
|
|
3341
3408
|
// Merge SDK defaults with user options (user options take precedence)
|
|
3342
3409
|
const mergedOptions = {
|
|
3343
3410
|
cacheKey: userOptions.cacheKey ?? sdk.options.cacheKey,
|
|
3344
3411
|
os: userOptions.os ?? sdk.os,
|
|
3345
3412
|
resolution: userOptions.resolution ?? sdk.resolution,
|
|
3346
|
-
threshold:
|
|
3413
|
+
threshold: perCommandThreshold ?? userOptions.threshold ?? (sdk.cacheConfig?.thresholds?.assert ?? sdk.cacheThresholds?.assert ?? 0.05),
|
|
3414
|
+
ai: {
|
|
3415
|
+
...sdk.aiConfig,
|
|
3416
|
+
...(typeof userOptions.ai === "object" ? userOptions.ai : {}),
|
|
3417
|
+
top: {
|
|
3418
|
+
...sdk.aiConfig?.top,
|
|
3419
|
+
...(typeof userOptions.ai === "object" ? userOptions.ai?.top : {}),
|
|
3420
|
+
},
|
|
3421
|
+
},
|
|
3347
3422
|
};
|
|
3348
3423
|
|
|
3349
3424
|
// Note: commands.assert takes (assertion, options), shouldThrow is determined internally
|
|
@@ -3451,74 +3526,70 @@ CAPTCHA_SOLVER_EOF`,
|
|
|
3451
3526
|
}
|
|
3452
3527
|
|
|
3453
3528
|
/**
|
|
3454
|
-
*
|
|
3455
|
-
* Returns structured data with
|
|
3529
|
+
* Parse the current screen using OmniParser v2 to detect all UI elements
|
|
3530
|
+
* Returns structured data with element types, bounding boxes, and content
|
|
3531
|
+
* Requires enterprise or self-hosted plan.
|
|
3456
3532
|
*
|
|
3457
|
-
* @returns {Promise<
|
|
3533
|
+
* @returns {Promise<ParseResult>} Parsed screen elements
|
|
3458
3534
|
*
|
|
3459
|
-
* @typedef {Object}
|
|
3460
|
-
* @property {
|
|
3461
|
-
* @property {string}
|
|
3462
|
-
* @property {number} confidence - Overall OCR confidence (0-100)
|
|
3535
|
+
* @typedef {Object} ParseResult
|
|
3536
|
+
* @property {ParsedElement[]} elements - Array of detected UI elements
|
|
3537
|
+
* @property {string} annotatedImageUrl - URL of the annotated screenshot
|
|
3463
3538
|
* @property {number} imageWidth - Width of the analyzed image
|
|
3464
3539
|
* @property {number} imageHeight - Height of the analyzed image
|
|
3465
3540
|
*
|
|
3466
|
-
* @typedef {Object}
|
|
3467
|
-
* @property {
|
|
3468
|
-
* @property {
|
|
3469
|
-
* @property {
|
|
3541
|
+
* @typedef {Object} ParsedElement
|
|
3542
|
+
* @property {number} index - Element index
|
|
3543
|
+
* @property {string} type - Element type (e.g. "text", "icon", "button")
|
|
3544
|
+
* @property {string} content - Text content or description
|
|
3545
|
+
* @property {string} interactivity - Interactivity level (e.g. "clickable", "non-interactive")
|
|
3546
|
+
* @property {Object} bbox - Bounding box in pixel coordinates
|
|
3470
3547
|
* @property {number} bbox.x0 - Left edge X coordinate
|
|
3471
3548
|
* @property {number} bbox.y0 - Top edge Y coordinate
|
|
3472
3549
|
* @property {number} bbox.x1 - Right edge X coordinate
|
|
3473
3550
|
* @property {number} bbox.y1 - Bottom edge Y coordinate
|
|
3551
|
+
* @property {Object} boundingBox - Bounding box as {left, top, width, height}
|
|
3552
|
+
* @property {number} boundingBox.left - Left position
|
|
3553
|
+
* @property {number} boundingBox.top - Top position
|
|
3554
|
+
* @property {number} boundingBox.width - Element width
|
|
3555
|
+
* @property {number} boundingBox.height - Element height
|
|
3474
3556
|
*
|
|
3475
3557
|
* @example
|
|
3476
|
-
* // Get all
|
|
3477
|
-
* const result = await testdriver.
|
|
3478
|
-
* console.log(result.
|
|
3479
|
-
* // "Welcome to TestDriver Sign In Email Password Submit"
|
|
3558
|
+
* // Get all elements on screen
|
|
3559
|
+
* const result = await testdriver.parse();
|
|
3560
|
+
* console.log(`Found ${result.elements.length} elements`);
|
|
3480
3561
|
*
|
|
3481
3562
|
* @example
|
|
3482
|
-
* // Find
|
|
3483
|
-
* const result = await testdriver.
|
|
3484
|
-
* const
|
|
3485
|
-
* w.content.toLowerCase().includes('button')
|
|
3486
|
-
* );
|
|
3563
|
+
* // Find clickable elements
|
|
3564
|
+
* const result = await testdriver.parse();
|
|
3565
|
+
* const clickable = result.elements.filter(e => e.interactivity === 'clickable');
|
|
3487
3566
|
*
|
|
3488
3567
|
* @example
|
|
3489
|
-
* //
|
|
3490
|
-
* const result = await testdriver.
|
|
3491
|
-
* const
|
|
3492
|
-
*
|
|
3493
|
-
* // Calculate center of the word
|
|
3494
|
-
* const x = (submitWord.bbox.x0 + submitWord.bbox.x1) / 2;
|
|
3495
|
-
* const y = (submitWord.bbox.y0 + submitWord.bbox.y1) / 2;
|
|
3496
|
-
* await testdriver.click({ x, y });
|
|
3497
|
-
* }
|
|
3498
|
-
*
|
|
3499
|
-
* @example
|
|
3500
|
-
* // Check if specific text exists on screen
|
|
3501
|
-
* const result = await testdriver.ocr();
|
|
3502
|
-
* const hasError = result.words.some(w =>
|
|
3503
|
-
* w.content.toLowerCase().includes('error')
|
|
3504
|
-
* );
|
|
3568
|
+
* // Find text content
|
|
3569
|
+
* const result = await testdriver.parse();
|
|
3570
|
+
* const textElements = result.elements.filter(e => e.type === 'text');
|
|
3571
|
+
* textElements.forEach(e => console.log(e.content));
|
|
3505
3572
|
*/
|
|
3506
|
-
async
|
|
3573
|
+
async parse() {
|
|
3507
3574
|
this._ensureConnected();
|
|
3508
3575
|
|
|
3509
3576
|
const { events } = require("./agent/events.js");
|
|
3510
|
-
this.emitter.emit(events.log.log, "🔍 Running
|
|
3577
|
+
this.emitter.emit(events.log.log, "🔍 Running OmniParser screen analysis...");
|
|
3511
3578
|
|
|
3512
3579
|
const screenshot = await this.system.captureScreenBase64();
|
|
3513
3580
|
|
|
3514
|
-
const response = await this.apiClient.req("
|
|
3581
|
+
const response = await this.apiClient.req("parse", {
|
|
3515
3582
|
session: this.getSessionId(),
|
|
3516
3583
|
image: screenshot,
|
|
3517
3584
|
});
|
|
3518
3585
|
|
|
3586
|
+
if (response.error) {
|
|
3587
|
+
throw new Error(response.error);
|
|
3588
|
+
}
|
|
3589
|
+
|
|
3519
3590
|
this.emitter.emit(
|
|
3520
3591
|
events.log.log,
|
|
3521
|
-
`✅
|
|
3592
|
+
`✅ Parse complete: ${response.elements?.length || 0} elements detected`,
|
|
3522
3593
|
);
|
|
3523
3594
|
|
|
3524
3595
|
return response;
|