@nuanu-ai/agentbrowse 0.2.29 → 0.2.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/README.md +3 -0
  2. package/dist/commands/extract-snapshot-sanitizer.d.ts +3 -0
  3. package/dist/commands/extract-snapshot-sanitizer.d.ts.map +1 -1
  4. package/dist/commands/extract-snapshot-sanitizer.js +33 -0
  5. package/dist/commands/extract-stagehand-executor.d.ts.map +1 -1
  6. package/dist/commands/extract-stagehand-executor.js +84 -20
  7. package/dist/commands/observe-inventory.d.ts +10 -0
  8. package/dist/commands/observe-inventory.d.ts.map +1 -1
  9. package/dist/commands/observe-inventory.js +388 -1
  10. package/dist/commands/observe.d.ts +1 -0
  11. package/dist/commands/observe.d.ts.map +1 -1
  12. package/dist/commands/observe.js +1 -1
  13. package/dist/commands/observe.test-harness.d.ts +1 -0
  14. package/dist/commands/observe.test-harness.d.ts.map +1 -1
  15. package/dist/commands/semantic-observe.d.ts +7 -1
  16. package/dist/commands/semantic-observe.d.ts.map +1 -1
  17. package/dist/commands/semantic-observe.js +609 -83
  18. package/dist/index.d.ts.map +1 -1
  19. package/dist/index.js +9 -2
  20. package/dist/runtime-state.d.ts +47 -1
  21. package/dist/runtime-state.d.ts.map +1 -1
  22. package/dist/runtime-state.js +88 -2
  23. package/dist/secrets/protected-field-values.d.ts +2 -0
  24. package/dist/secrets/protected-field-values.d.ts.map +1 -1
  25. package/dist/secrets/protected-field-values.js +50 -17
  26. package/dist/secrets/protected-fill.d.ts.map +1 -1
  27. package/dist/secrets/protected-fill.js +6 -0
  28. package/dist/secrets/protected-value-adapters.d.ts +3 -0
  29. package/dist/secrets/protected-value-adapters.d.ts.map +1 -0
  30. package/dist/secrets/protected-value-adapters.js +39 -0
  31. package/dist/update-check.d.ts +14 -0
  32. package/dist/update-check.d.ts.map +1 -0
  33. package/dist/update-check.js +182 -0
  34. package/package.json +1 -1
@@ -1,6 +1,7 @@
1
1
  import { z } from 'zod';
2
2
  import { AgentpayStagehandLlmClient } from '../agentpay-stagehand-llm.js';
3
3
  import { resolveAgentpayGatewayConfig } from '../agentpay-gateway.js';
4
+ import { recordLlmUsage, recordPayloadBudget } from '../runtime-state.js';
4
5
  const rerankSchema = z.object({
5
6
  matches: z
6
7
  .array(z.object({
@@ -9,6 +10,7 @@ const rerankSchema = z.object({
9
10
  .max(8),
10
11
  });
11
12
  const RERANK_CANDIDATE_LIMIT = 120;
13
+ const GOAL_RETRIEVAL_ENTITY_LIMIT = 64;
12
14
  const FORM_BUCKET_RESERVE_LIMIT = 48;
13
15
  const FORM_BUCKET_RESERVE_PER_BUCKET = 8;
14
16
  const SCOPE_BUCKET_RESERVE_LIMIT = 24;
@@ -27,12 +29,60 @@ const HIGH_SIGNAL_SCOPE_KINDS = new Set([
27
29
  'card',
28
30
  'form',
29
31
  ]);
32
+ const GOAL_TEXT_STOPWORDS = new Set([
33
+ 'a',
34
+ 'an',
35
+ 'and',
36
+ 'at',
37
+ 'for',
38
+ 'from',
39
+ 'in',
40
+ 'of',
41
+ 'on',
42
+ 'or',
43
+ 'the',
44
+ 'to',
45
+ 'with',
46
+ 'в',
47
+ 'для',
48
+ 'и',
49
+ 'или',
50
+ 'на',
51
+ 'найти',
52
+ ]);
30
53
  function isFieldLikeTarget(target) {
31
54
  const kind = (target.kind ?? '').trim().toLowerCase();
32
55
  const role = (target.role ?? '').trim().toLowerCase();
33
56
  return (['input', 'textarea', 'select', 'combobox'].includes(kind) ||
34
57
  ['textbox', 'combobox'].includes(role));
35
58
  }
59
+ function isScopeLikeCandidate(target) {
60
+ return ((target.goalInventoryType ?? '').trim().toLowerCase() === 'scope' ||
61
+ (target.capability ?? '').trim().toLowerCase() === 'scope');
62
+ }
63
+ function isActionLikeTargetCandidate(target) {
64
+ if (isScopeLikeCandidate(target) || isFieldLikeTarget(target)) {
65
+ return false;
66
+ }
67
+ const kind = (target.kind ?? '').trim().toLowerCase();
68
+ const role = (target.role ?? '').trim().toLowerCase();
69
+ return (target.allowedActions?.includes('click') ||
70
+ target.allowedActions?.includes('press') ||
71
+ kind === 'button' ||
72
+ role === 'button' ||
73
+ kind === 'link' ||
74
+ role === 'link');
75
+ }
76
+ function normalizeRetrievalText(value) {
77
+ const normalized = value?.replace(/\s+/g, ' ').trim().toLowerCase();
78
+ return normalized ? normalized : undefined;
79
+ }
80
+ function tokenizeRetrievalText(value) {
81
+ if (!value) {
82
+ return [];
83
+ }
84
+ return (value.match(/[\p{L}\p{N}]+/gu) ?? []).map((token) => token.toLowerCase());
85
+ }
36
86
  function semanticFormContextKey(context) {
37
87
  for (const node of [context?.landmark, context?.container, context?.group, context?.item]) {
38
88
  const kind = (node?.kind ?? '').trim().toLowerCase();
@@ -58,6 +108,17 @@ function candidateBucketKey(target, options = {}) {
58
108
  const surfaceKey = (options.preferFormBucket ? formKey : undefined) ?? target.surfaceRef ?? formKey ?? 'page-root';
59
109
  return `${frameKey}|${surfaceKey}`;
60
110
  }
111
+ function surfaceIdentityOf(target) {
112
+ const explicitSurfaceId = target.goalSurfaceId?.trim();
113
+ if (explicitSurfaceId) {
114
+ return explicitSurfaceId;
115
+ }
116
+ const surfaceRef = target.surfaceRef?.trim();
117
+ if (!surfaceRef) {
118
+ return undefined;
119
+ }
120
+ return surfaceRef.startsWith('scope:') ? surfaceRef.slice('scope:'.length) : surfaceRef;
121
+ }
61
122
  function isPrimaryFormControlTarget(target) {
62
123
  const kind = (target.kind ?? '').trim().toLowerCase();
63
124
  const role = (target.role ?? '').trim().toLowerCase();
@@ -102,6 +163,31 @@ function isHighSignalScopeCandidate(target) {
102
163
  const kind = (target.kind ?? '').trim().toLowerCase();
103
164
  return HIGH_SIGNAL_SCOPE_KINDS.has(kind);
104
165
  }
166
+ function contentItemBucketKey(target) {
167
+ if (isScopeLikeCandidate(target) ||
168
+ !isActionLikeTargetCandidate(target) ||
169
+ Boolean(formBucketKey(target)) ||
170
+ isPrimaryFormControlTarget(target)) {
171
+ return undefined;
172
+ }
173
+ const surfaceKind = (target.surfaceKind ?? '').trim().toLowerCase();
174
+ if (['form', 'dialog', 'listbox', 'menu', 'popover', 'dropdown', 'datepicker'].includes(surfaceKind)) {
175
+ return undefined;
176
+ }
177
+ const surfaceIdentity = surfaceIdentityOf(target);
178
+ if (surfaceIdentity) {
179
+ return `surface:${surfaceIdentity}`;
180
+ }
181
+ const itemKey = normalizeRetrievalText(target.context?.item?.label ?? target.context?.item?.text);
182
+ if (itemKey) {
183
+ return `item:${itemKey}`;
184
+ }
185
+ const containerKey = normalizeRetrievalText(target.context?.container?.label ?? target.context?.container?.text);
186
+ if (containerKey) {
187
+ return `container:${containerKey}`;
188
+ }
189
+ return undefined;
190
+ }
105
191
  function scopeCandidatePriority(target) {
106
192
  const kind = (target.kind ?? '').trim().toLowerCase();
107
193
  if (kind === 'dialog' || kind === 'listbox' || kind === 'menu') {
@@ -118,6 +204,343 @@ function scopeCandidatePriority(target) {
118
204
  }
119
205
  return 4;
120
206
  }
207
+ function representativeCandidateScore(target) {
208
+ const normalizedLabel = normalizeRetrievalText(target.label);
209
+ const kind = (target.kind ?? '').trim().toLowerCase();
210
+ const role = (target.role ?? '').trim().toLowerCase();
211
+ let score = (target.surfacePriority ?? 0) * 10;
212
+ if (isPrimaryFormControlTarget(target)) {
213
+ score += 1_500 - primaryFormTargetPriority(target) * 100;
214
+ }
215
+ else if (isFieldLikeTarget(target)) {
216
+ score += 1_250;
217
+ }
218
+ else if (isActionLikeTargetCandidate(target)) {
219
+ score += 1_000;
220
+ }
221
+ else if (isScopeLikeCandidate(target)) {
222
+ score += 700;
223
+ }
224
+ if (target.allowedActions?.includes('fill') || target.allowedActions?.includes('select')) {
225
+ score += 120;
226
+ }
227
+ if (target.allowedActions?.includes('click') || target.allowedActions?.includes('press')) {
228
+ score += 80;
229
+ }
230
+ if (target.acceptancePolicy === 'submit') {
231
+ score += 120;
232
+ }
233
+ if (target.acceptancePolicy === 'navigation') {
234
+ score += 60;
235
+ }
236
+ if (kind === 'link' || role === 'link') {
237
+ score += 40;
238
+ }
239
+ if (normalizedLabel) {
240
+ score += Math.min(normalizedLabel.length, 100);
241
+ if (normalizedLabel === 'button' || normalizedLabel === 'link') {
242
+ score -= 300;
243
+ }
244
+ if (normalizedLabel.includes('opens in new window')) {
245
+ score -= 120;
246
+ }
247
+ if (normalizedLabel.includes('save this item')) {
248
+ score -= 80;
249
+ }
250
+ }
251
+ return score;
252
+ }
253
+ function entityMemberPriority(entityKind, target) {
254
+ if (entityKind === 'form' && isPrimaryFormControlTarget(target)) {
255
+ return 5_000 - primaryFormTargetPriority(target) * 100;
256
+ }
257
+ if (entityKind === 'scope' && isScopeLikeCandidate(target)) {
258
+ return representativeCandidateScore(target) - 200;
259
+ }
260
+ return representativeCandidateScore(target);
261
+ }
262
+ function compareEntityMembers(entityKind, left, right) {
263
+ const scoreDelta = entityMemberPriority(entityKind, right.target) - entityMemberPriority(entityKind, left.target);
264
+ if (scoreDelta !== 0) {
265
+ return scoreDelta;
266
+ }
267
+ return left.index - right.index;
268
+ }
269
+ function pickEntityLabel(entityKind, representative) {
270
+ if (entityKind === 'form') {
271
+ return (representative.context?.landmark?.label ??
272
+ representative.context?.container?.label ??
273
+ representative.context?.group?.label ??
274
+ representative.surfaceLabel ??
275
+ representative.label);
276
+ }
277
+ return (representative.label ??
278
+ representative.context?.container?.label ??
279
+ representative.context?.item?.label ??
280
+ representative.surfaceLabel ??
281
+ representative.context?.group?.label ??
282
+ representative.context?.landmark?.label);
283
+ }
284
+ function collectRepresentativeLabels(targets) {
285
+ const labels = [];
286
+ for (const target of targets) {
287
+ const label = target.label?.replace(/\s+/g, ' ').trim();
288
+ if (!label || label === 'Button' || label === 'Link' || labels.includes(label)) {
289
+ continue;
290
+ }
291
+ labels.push(label);
292
+ if (labels.length >= 4) {
293
+ break;
294
+ }
295
+ }
296
+ return labels;
297
+ }
298
+ function buildEntitySearchText(entityLabel, representative, representativeLabels) {
299
+ return [
300
+ entityLabel,
301
+ representative.label,
302
+ ...representativeLabels,
303
+ representative.surfaceLabel,
304
+ representative.context?.item?.label,
305
+ representative.context?.item?.text,
306
+ representative.context?.group?.label,
307
+ representative.context?.group?.text,
308
+ representative.context?.container?.label,
309
+ representative.context?.container?.text,
310
+ representative.context?.landmark?.label,
311
+ representative.context?.landmark?.text,
312
+ representative.context?.hintText,
313
+ ]
314
+ .map((value) => normalizeRetrievalText(value))
315
+ .filter((value) => Boolean(value))
316
+ .join(' ');
317
+ }
318
+ function buildGoalRetrievalEntity(entityKind, entityKey, memberIndexes, targets) {
319
+ const orderedMembers = [...memberIndexes]
320
+ .map((index) => ({ index, target: targets[index] }))
321
+ .sort((left, right) => compareEntityMembers(entityKind, left, right));
322
+ const representative = orderedMembers[0].target;
323
+ const representativeLabels = collectRepresentativeLabels(orderedMembers.map((entry) => entry.target));
324
+ const label = pickEntityLabel(entityKind, representative);
325
+ return {
326
+ entityKind,
327
+ entityKey,
328
+ firstIndex: Math.min(...memberIndexes),
329
+ memberIndexes: orderedMembers.map((entry) => entry.index),
330
+ representative,
331
+ label,
332
+ kind: entityKind === 'form'
333
+ ? 'form'
334
+ : entityKind === 'item'
335
+ ? representative.surfaceKind ?? representative.kind
336
+ : representative.kind,
337
+ surfaceKind: representative.surfaceKind,
338
+ surfaceLabel: representative.surfaceLabel,
339
+ surfacePriority: representative.surfacePriority,
340
+ framePath: representative.framePath,
341
+ frameUrl: representative.frameUrl,
342
+ context: representative.context,
343
+ structure: representative.structure,
344
+ representativeLabels,
345
+ searchText: buildEntitySearchText(label, representative, representativeLabels),
346
+ };
347
+ }
348
+ function buildGoalRetrievalEntities(targets) {
349
+ const entities = [];
350
+ const groupedTargetIndexes = new Set();
351
+ const nonScopeTargetsBySurface = new Map();
352
+ for (const [index, target] of targets.entries()) {
353
+ if (isScopeLikeCandidate(target)) {
354
+ continue;
355
+ }
356
+ const surfaceIdentity = surfaceIdentityOf(target);
357
+ if (!surfaceIdentity) {
358
+ continue;
359
+ }
360
+ const linked = nonScopeTargetsBySurface.get(surfaceIdentity) ?? [];
361
+ linked.push(index);
362
+ nonScopeTargetsBySurface.set(surfaceIdentity, linked);
363
+ }
364
+ const formGroups = new Map();
365
+ for (const [index, target] of targets.entries()) {
366
+ if (isScopeLikeCandidate(target)) {
367
+ continue;
368
+ }
369
+ const bucketKey = formBucketKey(target);
370
+ if (!bucketKey) {
371
+ continue;
372
+ }
373
+ const members = formGroups.get(bucketKey) ?? [];
374
+ members.push(index);
375
+ formGroups.set(bucketKey, members);
376
+ }
377
+ for (const [key, memberIndexes] of formGroups.entries()) {
378
+ entities.push(buildGoalRetrievalEntity('form', `form:${key}`, memberIndexes, targets));
379
+ memberIndexes.forEach((index) => groupedTargetIndexes.add(index));
380
+ }
381
+ const itemGroups = new Map();
382
+ for (const [index, target] of targets.entries()) {
383
+ const bucketKey = contentItemBucketKey(target);
384
+ if (!bucketKey) {
385
+ continue;
386
+ }
387
+ const members = itemGroups.get(bucketKey) ?? [];
388
+ members.push(index);
389
+ itemGroups.set(bucketKey, members);
390
+ }
391
+ for (const [key, memberIndexes] of itemGroups.entries()) {
392
+ entities.push(buildGoalRetrievalEntity('item', `item:${key}`, memberIndexes, targets));
393
+ memberIndexes.forEach((index) => groupedTargetIndexes.add(index));
394
+ }
395
+ for (const [index, target] of targets.entries()) {
396
+ if (!isHighSignalScopeCandidate(target)) {
397
+ continue;
398
+ }
399
+ const kind = (target.kind ?? '').trim().toLowerCase();
400
+ const surfaceIdentity = surfaceIdentityOf(target);
401
+ if (!surfaceIdentity) {
402
+ continue;
403
+ }
404
+ if (kind === 'form') {
405
+ continue;
406
+ }
407
+ if (kind === 'card' && itemGroups.has(`surface:${surfaceIdentity}`)) {
408
+ continue;
409
+ }
410
+ const linkedTargets = nonScopeTargetsBySurface.get(surfaceIdentity) ?? [];
411
+ const memberIndexes = [index, ...linkedTargets];
412
+ entities.push(buildGoalRetrievalEntity('scope', `scope:${surfaceIdentity}`, memberIndexes, targets));
413
+ linkedTargets.forEach((targetIndex) => groupedTargetIndexes.add(targetIndex));
414
+ }
415
+ for (const [index, target] of targets.entries()) {
416
+ if (isScopeLikeCandidate(target) || groupedTargetIndexes.has(index)) {
417
+ continue;
418
+ }
419
+ if (!isActionLikeTargetCandidate(target) &&
420
+ !isFieldLikeTarget(target) &&
421
+ !isPrimaryFormControlTarget(target)) {
422
+ continue;
423
+ }
424
+ entities.push(buildGoalRetrievalEntity('standalone', `target:${index}`, [index], targets));
425
+ }
426
+ return entities.sort((left, right) => left.firstIndex - right.firstIndex);
427
+ }
428
+ function retrievalEntityPriority(entity) {
429
+ let score = (entity.surfacePriority ?? 0) * 10;
430
+ switch (entity.entityKind) {
431
+ case 'form':
432
+ score += 900;
433
+ break;
434
+ case 'item':
435
+ score += 820;
436
+ break;
437
+ case 'scope':
438
+ score += 720;
439
+ break;
440
+ case 'standalone':
441
+ score += 600;
442
+ break;
443
+ }
444
+ score += Math.min(entity.memberIndexes.length, 6) * 25;
445
+ score += representativeCandidateScore(entity.representative);
446
+ return score;
447
+ }
448
+ function scoreRetrievalEntityAgainstGoal(goal, entity) {
449
+ const normalizedGoal = normalizeRetrievalText(goal);
450
+ if (!normalizedGoal) {
451
+ return 0;
452
+ }
453
+ const entityText = entity.searchText;
454
+ if (!entityText) {
455
+ return 0;
456
+ }
457
+ let score = 0;
458
+ const goalTokens = tokenizeRetrievalText(normalizedGoal).filter((token) => token.length >= 2 && !GOAL_TEXT_STOPWORDS.has(token));
459
+ const entityTokens = new Set(tokenizeRetrievalText(entityText));
460
+ if (entityText.includes(normalizedGoal)) {
461
+ score += 220;
462
+ }
463
+ const entityLabel = normalizeRetrievalText(entity.label);
464
+ if (entityLabel) {
465
+ if (normalizedGoal.includes(entityLabel)) {
466
+ score += 180;
467
+ }
468
+ if (entityLabel.includes(normalizedGoal)) {
469
+ score += 140;
470
+ }
471
+ }
472
+ let matchedTokenCount = 0;
473
+ for (const token of goalTokens) {
474
+ if (entityTokens.has(token)) {
475
+ matchedTokenCount += 1;
476
+ score += 8 + Math.min(token.length, 12);
477
+ }
478
+ }
479
+ if (goalTokens.length > 0) {
480
+ const coverage = matchedTokenCount / goalTokens.length;
481
+ if (coverage === 1) {
482
+ score += 120;
483
+ }
484
+ else if (coverage >= 0.75) {
485
+ score += 70;
486
+ }
487
+ else if (coverage >= 0.5) {
488
+ score += 35;
489
+ }
490
+ }
491
+ return score;
492
+ }
493
+ function preselectRetrievalEntities(goal, entities) {
494
+ if (entities.length <= GOAL_RETRIEVAL_ENTITY_LIMIT) {
495
+ return [...entities];
496
+ }
497
+ const scored = entities.map((entity) => ({
498
+ entity,
499
+ lexicalScore: scoreRetrievalEntityAgainstGoal(goal, entity),
500
+ priority: retrievalEntityPriority(entity),
501
+ }));
502
+ const selected = [];
503
+ const seenKeys = new Set();
504
+ for (const entry of scored
505
+ .filter((candidate) => candidate.lexicalScore > 0)
506
+ .sort((left, right) => right.lexicalScore - left.lexicalScore ||
507
+ right.priority - left.priority ||
508
+ left.entity.firstIndex - right.entity.firstIndex)) {
509
+ if (seenKeys.has(entry.entity.entityKey)) {
510
+ continue;
511
+ }
512
+ seenKeys.add(entry.entity.entityKey);
513
+ selected.push(entry.entity);
514
+ if (selected.length >= GOAL_RETRIEVAL_ENTITY_LIMIT) {
515
+ return selected;
516
+ }
517
+ }
518
+ for (const entry of scored.sort((left, right) => right.priority - left.priority || left.entity.firstIndex - right.entity.firstIndex)) {
519
+ if (seenKeys.has(entry.entity.entityKey)) {
520
+ continue;
521
+ }
522
+ seenKeys.add(entry.entity.entityKey);
523
+ selected.push(entry.entity);
524
+ if (selected.length >= GOAL_RETRIEVAL_ENTITY_LIMIT) {
525
+ break;
526
+ }
527
+ }
528
+ return selected;
529
+ }
530
+ function expandRetrievalEntitiesToCandidates(targets, entities) {
531
+ const orderedIndexes = [];
532
+ const seenIndexes = new Set();
533
+ for (const entity of entities) {
534
+ for (const index of entity.memberIndexes) {
535
+ if (seenIndexes.has(index)) {
536
+ continue;
537
+ }
538
+ seenIndexes.add(index);
539
+ orderedIndexes.push(index);
540
+ }
541
+ }
542
+ return orderedIndexes.map((index) => targets[index]).filter(Boolean);
543
+ }
121
544
  function collectBucketedCandidateIndexes(targets, options) {
122
545
  const bucketEntries = new Map();
123
546
  for (const [index, target] of targets.entries()) {
@@ -211,124 +634,220 @@ function diversifyCandidates(targets) {
211
634
  }
212
635
  return orderedIndexes.map((index) => targets[index]).slice(0, RERANK_CANDIDATE_LIMIT);
213
636
  }
214
- function buildCandidateSummary(target, index) {
637
+ function buildCandidateSummary(target, index, surfaceSummaryIdBySurfaceKey) {
638
+ return buildCompactCandidateSummary(target, index, surfaceSummaryIdBySurfaceKey);
639
+ }
640
+ function compactContextValue(value, maxLength = 80) {
641
+ const normalized = value?.replace(/\s+/g, ' ').trim();
642
+ if (!normalized) {
643
+ return undefined;
644
+ }
645
+ return normalized.length > maxLength ? `${normalized.slice(0, maxLength - 1)}…` : normalized;
646
+ }
647
+ function compactCandidateContext(target) {
648
+ const context = target.context;
649
+ if (!context) {
650
+ return undefined;
651
+ }
652
+ const compacted = {};
653
+ const push = (key, value) => {
654
+ const compactedValue = compactContextValue(value);
655
+ if (!compactedValue) {
656
+ return;
657
+ }
658
+ compacted[key] = compactedValue;
659
+ };
660
+ push('item', context.item?.label ?? context.item?.text);
661
+ push('group', context.group?.label ?? context.group?.text);
662
+ push('container', context.container?.label ?? context.container?.text);
663
+ push('landmark', context.landmark?.label ?? context.landmark?.text);
664
+ push('hint', context.hintText);
665
+ return Object.keys(compacted).length > 0 ? compacted : undefined;
666
+ }
667
+ function normalizedGoalText(value) {
668
+ return value.replace(/\s+/g, ' ').trim().toLowerCase();
669
+ }
670
+ function goalNeedsScopeCandidates(goal) {
671
+ const normalizedGoal = normalizedGoalText(goal);
672
+ if (!normalizedGoal) {
673
+ return true;
674
+ }
675
+ const scopeHints = [
676
+ 'form',
677
+ 'forms',
678
+ 'field',
679
+ 'fields',
680
+ 'control',
681
+ 'controls',
682
+ 'dialog',
683
+ 'modal',
684
+ 'popup',
685
+ 'widget',
686
+ 'section',
687
+ 'panel',
688
+ 'menu',
689
+ 'listbox',
690
+ 'dropdown',
691
+ 'calendar',
692
+ 'datepicker',
693
+ 'card',
694
+ 'results',
695
+ 'scope',
696
+ 'region',
697
+ 'surface',
698
+ 'форма',
699
+ 'форму',
700
+ 'поля',
701
+ 'поля ввода',
702
+ 'контрол',
703
+ 'диалог',
704
+ 'модал',
705
+ 'попап',
706
+ 'виджет',
707
+ 'секци',
708
+ 'панел',
709
+ 'меню',
710
+ 'список',
711
+ 'выпада',
712
+ 'календар',
713
+ 'карточ',
714
+ 'результат',
715
+ 'область',
716
+ 'регион',
717
+ ];
718
+ return scopeHints.some((hint) => normalizedGoal.includes(hint));
719
+ }
720
+ function relevantTargetsForGoal(goal, targets) {
721
+ if (goalNeedsScopeCandidates(goal)) {
722
+ return [...targets];
723
+ }
724
+ const directTargets = targets.filter((target) => !isScopeLikeCandidate(target));
725
+ return directTargets.length > 0 ? directTargets : [...targets];
726
+ }
727
+ function buildSurfaceSummaryLine(target, id) {
728
+ const parts = [`${id}`];
729
+ const context = compactCandidateContext(target);
730
+ const surfaceKind = target.surfaceKind ??
731
+ target.context?.container?.kind ??
732
+ target.context?.group?.kind ??
733
+ target.kind;
734
+ const surfaceLabel = target.surfaceLabel ??
735
+ target.context?.container?.label ??
736
+ target.context?.group?.label ??
737
+ target.context?.landmark?.label ??
738
+ target.label;
739
+ if (surfaceKind) {
740
+ parts.push(`kind=${JSON.stringify(surfaceKind)}`);
741
+ }
742
+ if (surfaceLabel) {
743
+ parts.push(`label=${JSON.stringify(surfaceLabel)}`);
744
+ }
745
+ if (typeof target.surfacePriority === 'number') {
746
+ parts.push(`priority=${JSON.stringify(target.surfacePriority)}`);
747
+ }
748
+ if (target.framePath?.length) {
749
+ parts.push(`framePath=${JSON.stringify(target.framePath)}`);
750
+ }
751
+ if (context) {
752
+ parts.push(`context=${JSON.stringify(context)}`);
753
+ }
754
+ return parts.join(' | ');
755
+ }
756
+ function buildSurfaceSummaries(targets) {
757
+ const summaries = [];
758
+ const summaryIdBySurfaceKey = new Map();
759
+ for (const target of targets) {
760
+ const surfaceKey = surfaceIdentityOf(target);
761
+ if (!surfaceKey || summaryIdBySurfaceKey.has(surfaceKey)) {
762
+ continue;
763
+ }
764
+ const id = `s${summaries.length + 1}`;
765
+ summaryIdBySurfaceKey.set(surfaceKey, id);
766
+ summaries.push({
767
+ id,
768
+ line: buildSurfaceSummaryLine(target, id),
769
+ });
770
+ }
771
+ return { summaries, summaryIdBySurfaceKey };
772
+ }
773
+ function buildCompactCandidateSummary(target, index, surfaceSummaryIdBySurfaceKey) {
215
774
  const parts = [`id=c${index + 1}`];
216
- const container = target.context?.container;
217
- const item = target.context?.item;
218
- const group = target.context?.group;
219
- const landmark = target.context?.landmark;
220
- const layout = target.context?.layout;
221
- const visual = target.context?.visual;
775
+ const compactContext = compactCandidateContext(target);
776
+ const surfaceSummaryId = surfaceIdentityOf(target)
777
+ ? surfaceSummaryIdBySurfaceKey?.get(surfaceIdentityOf(target))
778
+ : undefined;
222
779
  if (target.kind)
223
780
  parts.push(`kind=${JSON.stringify(target.kind)}`);
224
781
  if (target.role)
225
782
  parts.push(`role=${JSON.stringify(target.role)}`);
226
783
  if (target.label)
227
784
  parts.push(`label=${JSON.stringify(target.label)}`);
228
- if (target.interactionHint) {
229
- parts.push(`interactionHint=${JSON.stringify(target.interactionHint)}`);
230
- }
231
- if (target.capability)
232
- parts.push(`capability=${JSON.stringify(target.capability)}`);
233
785
  if (target.allowedActions?.length) {
234
- parts.push(`allowedActions=${JSON.stringify(target.allowedActions)}`);
786
+ parts.push(`actions=${JSON.stringify(target.allowedActions)}`);
235
787
  }
236
788
  if (target.acceptancePolicy) {
237
- parts.push(`acceptancePolicy=${JSON.stringify(target.acceptancePolicy)}`);
789
+ parts.push(`policy=${JSON.stringify(target.acceptancePolicy)}`);
238
790
  }
239
791
  if (target.controlFamily) {
240
- parts.push(`controlFamily=${JSON.stringify(target.controlFamily)}`);
241
- }
242
- if (target.surfaceRef)
243
- parts.push(`surfaceRef=${JSON.stringify(target.surfaceRef)}`);
244
- if (target.surfaceKind)
245
- parts.push(`surfaceKind=${JSON.stringify(target.surfaceKind)}`);
246
- if (target.surfaceLabel)
247
- parts.push(`surfaceLabel=${JSON.stringify(target.surfaceLabel)}`);
248
- if (typeof target.surfacePriority === 'number') {
249
- parts.push(`surfacePriority=${JSON.stringify(target.surfacePriority)}`);
792
+ parts.push(`family=${JSON.stringify(target.controlFamily)}`);
793
+ }
794
+ if (target.capability && target.capability !== 'actionable') {
795
+ parts.push(`capability=${JSON.stringify(target.capability)}`);
796
+ }
797
+ if (surfaceSummaryId) {
798
+ parts.push(`surface=${surfaceSummaryId}`);
250
799
  }
251
- if (target.framePath?.length)
252
- parts.push(`framePath=${JSON.stringify(target.framePath)}`);
253
- if (target.frameUrl)
254
- parts.push(`frameUrl=${JSON.stringify(target.frameUrl)}`);
255
- if (target.formSelector)
256
- parts.push(`formSelector=${JSON.stringify(target.formSelector)}`);
257
800
  if (target.structure)
258
801
  parts.push(`structure=${JSON.stringify(target.structure)}`);
259
802
  if (target.placeholder)
260
803
  parts.push(`placeholder=${JSON.stringify(target.placeholder)}`);
261
- if (target.title)
262
- parts.push(`title=${JSON.stringify(target.title)}`);
263
804
  if (target.states)
264
805
  parts.push(`state=${JSON.stringify(target.states)}`);
265
- if (item?.kind)
266
- parts.push(`itemKind=${JSON.stringify(item.kind)}`);
267
- if (item?.label)
268
- parts.push(`itemLabel=${JSON.stringify(item.label)}`);
269
- if (item?.text)
270
- parts.push(`itemText=${JSON.stringify(item.text)}`);
271
- if (group?.kind)
272
- parts.push(`groupKind=${JSON.stringify(group.kind)}`);
273
- if (group?.label)
274
- parts.push(`groupLabel=${JSON.stringify(group.label)}`);
275
- if (group?.text)
276
- parts.push(`groupText=${JSON.stringify(group.text)}`);
277
- if (container?.kind)
278
- parts.push(`containerKind=${JSON.stringify(container.kind)}`);
279
- if (container?.label)
280
- parts.push(`containerLabel=${JSON.stringify(container.label)}`);
281
- if (container?.text)
282
- parts.push(`containerText=${JSON.stringify(container.text)}`);
283
- if (landmark?.kind)
284
- parts.push(`landmarkKind=${JSON.stringify(landmark.kind)}`);
285
- if (landmark?.label)
286
- parts.push(`landmarkLabel=${JSON.stringify(landmark.label)}`);
287
- if (landmark?.text)
288
- parts.push(`landmarkText=${JSON.stringify(landmark.text)}`);
289
- if (layout?.lane)
290
- parts.push(`lane=${JSON.stringify(layout.lane)}`);
291
- if (layout?.band)
292
- parts.push(`band=${JSON.stringify(layout.band)}`);
293
- if (visual)
294
- parts.push(`visual=${JSON.stringify(visual)}`);
295
- if (target.context?.hintText)
296
- parts.push(`hintText=${JSON.stringify(target.context.hintText)}`);
806
+ if (target.framePath?.length)
807
+ parts.push(`framePath=${JSON.stringify(target.framePath)}`);
808
+ if (compactContext)
809
+ parts.push(`context=${JSON.stringify(compactContext)}`);
297
810
  return parts.join(' | ');
298
811
  }
299
812
  function normalizeCandidateId(value) {
300
813
  return value.trim().toLowerCase();
301
814
  }
302
- export async function rerankDomTargetsForGoal(instruction, targets) {
815
+ export async function rerankDomTargetsForGoal(instruction, targets, options = {}) {
303
816
  if (targets.length === 0) {
304
817
  return [];
305
818
  }
819
+ const relevantTargets = relevantTargetsForGoal(instruction, targets);
820
+ options.session &&
821
+ recordPayloadBudget(options.session, {
822
+ observeRerankCandidatesSeen: targets.length,
823
+ });
306
824
  const gateway = resolveAgentpayGatewayConfig();
307
825
  const client = new AgentpayStagehandLlmClient(gateway);
308
- const candidates = diversifyCandidates(targets);
826
+ const retrievalEntities = buildGoalRetrievalEntities(relevantTargets);
827
+ const retrievedCandidates = relevantTargets.length > RERANK_CANDIDATE_LIMIT
828
+ ? expandRetrievalEntitiesToCandidates(relevantTargets, preselectRetrievalEntities(instruction, retrievalEntities))
829
+ : [...relevantTargets];
830
+ const candidates = diversifyCandidates(retrievedCandidates.length > 0 ? retrievedCandidates : [...relevantTargets]);
831
+ const { summaries: surfaceSummaries, summaryIdBySurfaceKey } = buildSurfaceSummaries(candidates);
832
+ options.session &&
833
+ recordPayloadBudget(options.session, {
834
+ observeRerankCandidatesSent: candidates.length,
835
+ });
309
836
  const prompt = [
310
- 'You are choosing from already discovered visible actionable candidates on a webpage.',
311
- 'Select only the candidate IDs that directly satisfy the user goal.',
312
- 'Use container/landmark/layout/state information to disambiguate similar labels.',
313
- 'Prefer candidates whose surrounding container clearly corroborates the goal.',
314
- 'Prefer actionable candidates over scope or informational candidates unless the goal explicitly asks for a container or region.',
315
- 'Prefer direct visible targets over indirect controls when the direct visible target is present.',
316
- 'Prefer candidates that belong to an active local surface (dialog/listbox/popover/card container) over unrelated page-root candidates when both are plausible matches.',
317
- 'For date cells, seat cells, and other structured-grid targets, use row/column/zone/cell metadata and state to distinguish plausible matches.',
318
- 'When the goal refers to an input field or typed value, prefer the directly editable input/textarea/select over a non-editable wrapper such as a combobox shell.',
319
- 'When the goal refers to a specific field or control, an exact disabled/readonly match is still relevant. Prefer the exact field over nearby prerequisite controls and use state/context to indicate that it is gated.',
320
- 'Prefer candidates whose explicit state matches the goal and avoid candidates whose state contradicts the goal.',
321
- 'Use visual cues only as supporting evidence when they help distinguish active vs muted targets in the same group.',
322
- 'When the goal is about setting or choosing a form/search/filter value, prefer the primary form control or open picker inside a form/dialog/combobox over lower informational charts, summaries, or content collections that merely mention similar values.',
323
- 'When the goal asks for a form or a set of related controls, return the direct controls from the relevant form instead of nearby navigation links or surrounding section headings.',
324
- 'If a secondary chart/list mirrors the same options as a primary form control, do not select the secondary surface unless the goal explicitly asks for that chart/list.',
325
- 'Avoid navigation, filter, and summary controls when the goal refers to a concrete content item unless the goal explicitly asks for those controls.',
837
+ 'You are choosing from already discovered visible webpage candidates.',
838
+ 'Select only candidate IDs that directly satisfy the goal.',
839
+ 'Prefer direct actionable controls over surrounding regions unless the goal explicitly asks for a form, region, widget, or set of controls.',
840
+ 'Use owning surface, compact context, explicit state, and structure metadata to disambiguate similar labels.',
841
+ 'For input/value goals, prefer the directly editable field or primary picker trigger over wrappers or mirrored summary content.',
842
+ 'For structured-grid targets such as dates or seats, use row/column/zone/cell metadata and state.',
843
+ 'An exact disabled or readonly field can still be relevant when the goal refers to that specific field.',
326
844
  'Do not invent IDs. Return an empty list if nothing clearly matches.',
327
845
  '',
328
846
  `Goal: ${instruction}`,
329
847
  '',
848
+ ...(surfaceSummaries.length > 0 ? ['Surfaces:', ...surfaceSummaries.map((entry) => entry.line), ''] : []),
330
849
  'Candidates:',
331
- ...candidates.map((target, index) => buildCandidateSummary(target, index)),
850
+ ...candidates.map((target, index) => buildCandidateSummary(target, index, summaryIdBySurfaceKey)),
332
851
  ].join('\n');
333
852
  const result = await client.createChatCompletion({
334
853
  logger: () => { },
@@ -340,6 +859,13 @@ export async function rerankDomTargetsForGoal(instruction, targets) {
340
859
  },
341
860
  },
342
861
  });
862
+ if (options.session) {
863
+ recordLlmUsage(options.session, {
864
+ purpose: 'browse.observe',
865
+ usage: result.usage,
866
+ inputChars: prompt.length,
867
+ });
868
+ }
343
869
  const selectedIds = new Set(result.data.matches.map((match) => normalizeCandidateId(match.candidateId)));
344
870
  return candidates.filter((_, index) => selectedIds.has(`c${index + 1}`));
345
871
  }