@exodus/xqa 5.4.0 → 5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/xqa.cjs +208 -36
  2. package/package.json +2 -2
package/dist/xqa.cjs CHANGED
@@ -22262,10 +22262,10 @@ var require_array = __commonJS({
22262
22262
  "use strict";
22263
22263
  Object.defineProperty(exports2, "__esModule", { value: true });
22264
22264
  exports2.splitWhen = exports2.flatten = void 0;
22265
- function flatten(items) {
22265
+ function flatten2(items) {
22266
22266
  return items.reduce((collection, item) => [].concat(collection, item), []);
22267
22267
  }
22268
- exports2.flatten = flatten;
22268
+ exports2.flatten = flatten2;
22269
22269
  function splitWhen(items, predicate) {
22270
22270
  const result = [[]];
22271
22271
  let groupIndex = 0;
@@ -63484,6 +63484,104 @@ function collectElements(elements, screen) {
63484
63484
  walk(elements);
63485
63485
  return into;
63486
63486
  }
63487
+ var OCCLUDED_BY_OVERLAP_TAG = "[occluded-by-overlap]";
63488
+ var FULL_BBOX_CONTAINMENT_RATIO = 0.85;
63489
+ function frameContainsPoint(frame, point) {
63490
+ return point.x >= frame.x && point.x < frame.x + frame.width && point.y >= frame.y && point.y < frame.y + frame.height;
63491
+ }
63492
+ function frameCenter(frame) {
63493
+ return { x: frame.x + frame.width / 2, y: frame.y + frame.height / 2 };
63494
+ }
63495
+ function frameArea(frame) {
63496
+ return Math.max(0, frame.width) * Math.max(0, frame.height);
63497
+ }
63498
+ function intersectionArea(left, right) {
63499
+ const x1 = Math.max(left.x, right.x);
63500
+ const y12 = Math.max(left.y, right.y);
63501
+ const x22 = Math.min(left.x + left.width, right.x + right.width);
63502
+ const y22 = Math.min(left.y + left.height, right.y + right.height);
63503
+ if (x22 <= x1 || y22 <= y12) {
63504
+ return 0;
63505
+ }
63506
+ return (x22 - x1) * (y22 - y12);
63507
+ }
63508
+ function selfNode(element, input) {
63509
+ if (!element.frame || !isInViewport(element.frame, input.screen)) {
63510
+ return void 0;
63511
+ }
63512
+ return {
63513
+ element,
63514
+ frame: element.frame,
63515
+ ancestors: input.ancestors,
63516
+ treeOrder: input.startOrder
63517
+ };
63518
+ }
63519
+ function flattenList(list, input) {
63520
+ let nextOrder = input.startOrder;
63521
+ const collected = [];
63522
+ for (const element of list) {
63523
+ const subtree = flattenSubtree(element, { ...input, startOrder: nextOrder });
63524
+ collected.push(...subtree.nodes);
63525
+ nextOrder = subtree.nextOrder;
63526
+ }
63527
+ return { nodes: collected, nextOrder };
63528
+ }
63529
+ function flattenSubtree(element, input) {
63530
+ const self2 = selfNode(element, input);
63531
+ const selfNodes = self2 ? [self2] : [];
63532
+ const orderAfterSelf = self2 ? input.startOrder + 1 : input.startOrder;
63533
+ if (!element.children || element.children.length === 0) {
63534
+ return { nodes: selfNodes, nextOrder: orderAfterSelf };
63535
+ }
63536
+ const childAncestors = new Set(input.ancestors);
63537
+ childAncestors.add(element);
63538
+ const childOutput = flattenList(element.children, {
63539
+ ancestors: childAncestors,
63540
+ screen: input.screen,
63541
+ startOrder: orderAfterSelf
63542
+ });
63543
+ return {
63544
+ nodes: [...selfNodes, ...childOutput.nodes],
63545
+ nextOrder: childOutput.nextOrder
63546
+ };
63547
+ }
63548
+ function flatten(elements, screen) {
63549
+ return flattenList(elements, {
63550
+ ancestors: /* @__PURE__ */ new Set(),
63551
+ screen,
63552
+ startOrder: 0
63553
+ }).nodes;
63554
+ }
63555
+ function blocksTapPoint(target, candidate) {
63556
+ return frameContainsPoint(candidate.frame, frameCenter(target.frame));
63557
+ }
63558
+ function fullyCoversBoundingBox(target, candidate) {
63559
+ const targetArea = frameArea(target.frame);
63560
+ if (targetArea <= 0) {
63561
+ return false;
63562
+ }
63563
+ return intersectionArea(target.frame, candidate.frame) / targetArea >= FULL_BBOX_CONTAINMENT_RATIO;
63564
+ }
63565
+ function isOccluder(target, candidate) {
63566
+ if (candidate.treeOrder <= target.treeOrder) {
63567
+ return false;
63568
+ }
63569
+ if (candidate.ancestors.has(target.element)) {
63570
+ return false;
63571
+ }
63572
+ if (target.ancestors.has(candidate.element)) {
63573
+ return false;
63574
+ }
63575
+ return blocksTapPoint(target, candidate) || fullyCoversBoundingBox(target, candidate);
63576
+ }
63577
+ function isOccluded(target, nodes) {
63578
+ return nodes.some((candidate) => isOccluder(target, candidate));
63579
+ }
63580
+ function detectOccludedElements(elements, screen) {
63581
+ const nodes = flatten(elements, screen);
63582
+ const occludedElements = nodes.filter((target) => isOccluded(target, nodes)).map((node) => node.element);
63583
+ return new Set(occludedElements);
63584
+ }
63487
63585
  function resolveLabel(element) {
63488
63586
  return element.AXLabel ?? element.AXValue ?? "";
63489
63587
  }
@@ -63509,15 +63607,27 @@ function resolveClippingTags(frame, screen) {
63509
63607
  }
63510
63608
  return tags.length > 0 ? ` ${tags.join(" ")}` : "";
63511
63609
  }
63512
- function formatElement(element, screen) {
63610
+ function formatElement(element, context) {
63513
63611
  const type2 = resolveType(element);
63514
63612
  const label = resolveLabel(element);
63515
63613
  const frame = element.frame ?? { x: 0, y: 0, width: 0, height: 0 };
63516
63614
  const cx = Math.round(frame.x + frame.width / 2);
63517
63615
  const cy = Math.round(frame.y + frame.height / 2);
63518
63616
  const state = element.enabled === false ? " [disabled]" : "";
63519
- const clipping = resolveClippingTags(frame, screen);
63520
- return `[${type2}] "${label}" at (${String(cx)}, ${String(cy)}) size ${String(Math.round(frame.width))}x${String(Math.round(frame.height))}${state}${clipping}`;
63617
+ const clipping = resolveClippingTags(frame, context.screen);
63618
+ const occluded = context.occluded.has(element) ? ` ${OCCLUDED_BY_OVERLAP_TAG}` : "";
63619
+ return `[${type2}] "${label}" at (${String(cx)}, ${String(cy)}) size ${String(Math.round(frame.width))}x${String(Math.round(frame.height))}${state}${clipping}${occluded}`;
63620
+ }
63621
+ function collectPrunedOccluded(list, query) {
63622
+ return list.flatMap((element) => {
63623
+ const inViewport = element.frame !== void 0 && isInViewport(element.frame, query.screen);
63624
+ const self2 = inViewport && query.occluded.has(element) && !query.visible.has(element) ? [element] : [];
63625
+ const children = element.children ? collectPrunedOccluded(element.children, query) : [];
63626
+ return [...self2, ...children];
63627
+ });
63628
+ }
63629
+ function findPrunedOccluded(query) {
63630
+ return collectPrunedOccluded(query.elements, query);
63521
63631
  }
63522
63632
  function formatAccessibilityElements(elements) {
63523
63633
  const app = elements.find((element) => element.type === "Application");
@@ -63525,7 +63635,12 @@ function formatAccessibilityElements(elements) {
63525
63635
  const screenHeight = app?.frame?.height ?? DEFAULT_SCREEN_HEIGHT;
63526
63636
  const screen = { width: screenWidth, height: screenHeight };
63527
63637
  const visible = collectElements(elements, screen);
63528
- const elementList = visible.length === 0 ? "No elements found." : visible.map((element) => formatElement(element, screen)).join("\n");
63638
+ const visibleSet = new Set(visible);
63639
+ const occluded = detectOccludedElements(elements, screen);
63640
+ const context = { screen, occluded };
63641
+ const prunedOccluded = findPrunedOccluded({ elements, visible: visibleSet, occluded, screen });
63642
+ const renderable = [...visible, ...prunedOccluded];
63643
+ const elementList = renderable.length === 0 ? "No elements found." : renderable.map((element) => formatElement(element, context)).join("\n");
63529
63644
  const appName = app?.AXLabel;
63530
63645
  return appName ? `Running app: ${appName}
63531
63646
 
@@ -63713,7 +63828,7 @@ function createListAppsTool(udid = "booted") {
63713
63828
  }
63714
63829
  var DEFAULT_LONG_PRESS_DURATION_MS = 500;
63715
63830
  var MIN_PLAUSIBLE_LONG_PRESS_DURATION_MS = 100;
63716
- var DEFAULT_SWIPE_DURATION_MS = 100;
63831
+ var DEFAULT_SWIPE_DURATION_MS = 300;
63717
63832
  var MIN_PLAUSIBLE_SWIPE_DURATION_MS = 50;
63718
63833
  var MS_PER_SECOND = 1e3;
63719
63834
  var ENTER_KEY_CODE = "0x28";
@@ -63836,13 +63951,13 @@ function createLongPressTool(udid = "booted") {
63836
63951
  }
63837
63952
  var DURATION_DESCRIPTION2 = `Gesture duration in milliseconds. Default ${String(
63838
63953
  DEFAULT_SWIPE_DURATION_MS
63839
- )}ms (flick) works for short lists. Examples: duration 500 = 0.5 seconds, duration 1000 = 1 second. Velocity = distance / duration - raise duration at fixed distance to slow the gesture and reduce momentum. Raise to 400-800ms for controlled scrolling on long lists where the flick overshoots. Values under ${String(
63954
+ )}ms (controlled flick) works for most lists and avoids overshoot on medium-density content. Examples: duration 500 = 0.5 seconds, duration 1000 = 1 second. Velocity = distance / duration - raise duration at fixed distance to slow the gesture and reduce momentum. Raise to 500-800ms for slow controlled scrolling on long lists; lower to 100-150ms for a fast flick when long-distance scroll is desired. Values under ${String(
63840
63955
  MIN_PLAUSIBLE_SWIPE_DURATION_MS
63841
63956
  )}ms almost always indicate a unit mistake (seconds passed instead of milliseconds).`;
63842
63957
  var DELTA_DESCRIPTION = "Pixel distance between interpolated touch points along the swipe path. Smaller values (e.g. 5) produce a denser event stream - smoother motion and more controllable stop-velocity, recommended when combining with a raised duration to tame long-list overshoot. Larger values produce coarser strokes. Omit to use idb defaults.";
63843
63958
  var TOOL_DESCRIPTION2 = `Swipe on the screen from one point to another. Duration is in milliseconds (default ${String(
63844
63959
  DEFAULT_SWIPE_DURATION_MS
63845
- )}ms, a flick). Examples: duration 500 = 0.5 seconds, duration 1000 = 1 second. Use the default flick for scrolling lists, dismissing sheets, triggering paging. Use duration 500+ for slow drag (reorder, pan). For long lists where default flick overshoots: shorten swipe distance AND raise duration to 400-800ms to lower velocity; optionally lower delta for denser touch events and a more controllable stop. Do not pass seconds (e.g. 0.5) - that would swipe for less than a millisecond.`;
63960
+ )}ms, a controlled flick). Examples: duration 500 = 0.5 seconds, duration 1000 = 1 second. The default duration suits most scrolling, sheet dismissal, and paging; shorten to 100-150ms when you need a long-distance fast flick; raise to 500+ for slow controlled drag (reorder, pan). For long lists where the default overshoots: shorten swipe distance AND raise duration; optionally lower delta for denser touch events and a more controllable stop. Do not pass seconds (e.g. 0.5) - that would swipe for less than a millisecond.`;
63846
63961
  var SWIPE_SCHEMA = {
63847
63962
  x_start: external_exports.number(),
63848
63963
  y_start: external_exports.number(),
@@ -63887,7 +64002,7 @@ function buildSuccessText2(input) {
63887
64002
  MIN_PLAUSIBLE_SWIPE_DURATION_MS
63888
64003
  )}ms - this is almost certainly a unit mistake. The duration parameter is in milliseconds; use ${String(
63889
64004
  DEFAULT_SWIPE_DURATION_MS
63890
- )}ms for a flick and 400-800ms for slow drag (e.g. duration 500 = 0.5 seconds).`;
64005
+ )}ms for the default controlled flick and 500-800ms for slow drag (e.g. duration 500 = 0.5 seconds).`;
63891
64006
  }
63892
64007
  return base;
63893
64008
  }
@@ -74240,11 +74355,13 @@ async function runViewUiCapture(context, state) {
74240
74355
  state
74241
74356
  });
74242
74357
  }
74243
- var VIEW_UI_DESCRIPTION = `Capture current screen state: accessibility tree (element labels, positions, attributes) and screenshot in one call. Use when you need to tap an element, assert element presence or labels, check attributes, or track screen identity via <screen_id>.
74358
+ var VIEW_UI_DESCRIPTION = `Capture current screen state. This is your sole observation tool: returns a screenshot (your visual perception of the app) and an accessibility tree (interactability metadata and tap coordinates) in a single call. Use for all state observation, navigation decisions, element verification, and pre-interaction checks.
74359
+
74360
+ The screenshot is the ground truth for what screen you are on, what state the app is in, what content is visible, and what UX is happening. The a11y tree is authoritative for two questions only: "is this element interactable?" and "what tap coordinates should I use?" \u2014 never derive coordinates from the screenshot.
74244
74361
 
74245
74362
  The result begins with a <screen_id> tag containing the current screen identifier. Use this to detect screen changes and track navigation history.
74246
74363
 
74247
- Do not call \`screenshot\` immediately before or after this tool for the same state \u2014 this tool already includes the screenshot.
74364
+ The \`screenshot\` tool is reserved exclusively for polling during a transient loading state to avoid incrementing the stuck-loop counter. Do not use \`screenshot\` for any other observation purpose.
74248
74365
 
74249
74366
  IMPORTANT: Snapshot coordinates and screenshot pixels are in the same logical point space. Do not apply any scaling factor (no 2x retina adjustment).`;
74250
74367
  var VIEW_UI_TOOL_NAME = "mcp__mobile-ios__view_ui";
@@ -74693,12 +74810,19 @@ function startAndRun(params) {
74693
74810
  });
74694
74811
  });
74695
74812
  }
74813
+ var PERCEPTION_MODEL_SECTION = `## Perception Model
74814
+
74815
+ Every \`view_ui\` call returns two artifacts simultaneously:
74816
+
74817
+ - **Screenshot** \u2014 your visual perception of the app. This is the ground truth for what screen you are on, what state the app is in, what content is visible, and what UX is happening. Reason from the screenshot first when answering "what is the app showing me right now?"
74818
+ - **A11y tree** \u2014 metadata about that visual reality. It is authoritative for two questions only: "is this element interactable?" and "what tap coordinates should I use?" Never derive coordinates from the screenshot, even when the screenshot appears to show an element clearly.
74819
+
74820
+ Precedence: the screenshot governs comprehension of screen identity, state, and content. The a11y tree governs interactability and coordinates. These domains do not overlap \u2014 there is no scenario where the screenshot overrides a11y-sourced coordinates, and there is no scenario where the a11y tree overrides screenshot-sourced understanding of what the app is showing.`;
74696
74821
  var TOOL_SELECTION_SECTION = `## Tool Selection
74697
74822
 
74698
- - \`view_ui\` \u2014 returns accessibility tree (element labels, positions, attributes) AND screenshot; use when you need to tap, assert element presence, or read labels
74699
- - \`screenshot\` \u2014 returns screenshot only; use for passive visual verification (confirm transition occurred, loading finished, outcome visible) when you do not need element data
74700
- - Never call \`screenshot\` immediately before or after \`view_ui\` for the same state \u2014 \`view_ui\` already includes the screenshot
74701
- - \`screenshot\` calls do not emit a \`<screen_id>\` and do not advance the stuck-loop counter; if screen identity tracking matters, use \`view_ui\``;
74823
+ - \`view_ui\` \u2014 your sole observation tool; returns a screenshot (visual ground truth) and an a11y tree (interactability metadata and tap coordinates) in one call; use for all state observation, navigation decisions, element verification, and pre-interaction checks
74824
+ - \`screenshot\` \u2014 loading polls only; use exclusively while waiting for a transient loading state to resolve, to avoid false stuck-loop counter increments; do not use \`screenshot\` for any other observation purpose \u2014 see LOADING_STATE_RULE
74825
+ - \`screenshot\` calls do not emit a \`<screen_id>\` and do not advance the stuck-loop counter`;
74702
74826
  var DEV_ENVIRONMENT_SECTION = `## Environment
74703
74827
 
74704
74828
  This is a development build. Debug overlays and internal messages are expected artifacts \u2014 do not report them as findings.`;
@@ -74711,9 +74835,8 @@ At every reasoning step, maintain a mental ledger:
74711
74835
 
74712
74836
  Consult the ledger before every action. Always prefer navigating to a QUEUE screen over a VISITED one.`;
74713
74837
  var SESSION_START_RULE = `Before taking any other action \u2014 including initializing the Working State ledger or emitting findings \u2014 call \`view_ui\` once to observe the starting screen`;
74714
- var POST_ACTION_OBSERVE_RULE = `After any action, observe the screen before deciding next step \u2014 use \`screenshot\` when confirming a purely visual outcome (transition occurred, element disappeared, content appeared, loading finished); use \`view_ui\` when you need element labels, tap coordinates, or accessibility attributes`;
74715
- var NO_REDUNDANT_CAPTURE_RULE = `Never call both \`screenshot\` and \`view_ui\` back-to-back for the same observation \u2014 \`view_ui\` includes a screenshot; if you need both tree and image, one \`view_ui\` call suffices`;
74716
- var BACK_NAV_RULE = `After navigating forward to any new screen: attempt to return to the expected parent in PATH \u2014 consult App Knowledge first for the correct exit gesture on this screen, then try in order: (1) any visible back/close button, (2) OS back gesture, (3) swipe up, (4) swipe down, (5) swipe left, (6) swipe right \u2014 confirm return via \`screenshot\` if the parent is visually unambiguous, \`view_ui\` otherwise \u2014 only after ALL attempts fail emit a \`back-nav-failure\` finding, then navigate forward again to continue`;
74838
+ var POST_ACTION_OBSERVE_RULE = `After any action, call \`view_ui\` to observe the resulting screen state before deciding the next step. Exception: if the screen is in a transient loading state, use \`screenshot\` to poll \u2014 see LOADING_STATE_RULE.`;
74839
+ var BACK_NAV_RULE = `After navigating forward to any new screen: attempt to return to the expected parent in PATH \u2014 consult App Knowledge first for the correct exit gesture on this screen, then try in order: (1) any visible back/close button, (2) OS back gesture, (3) swipe up, (4) swipe down, (5) swipe left, (6) swipe right \u2014 confirm return via \`view_ui\` \u2014 only after ALL attempts fail emit a \`back-nav-failure\` finding, then navigate forward again to continue`;
74717
74840
  var QUEUE_FIRST_RULE = `Before selecting any action, prefer navigating to a QUEUE screen over re-exploring a VISITED one`;
74718
74841
  var STUCK_LOOP_RULE = `Stuck loop \u2014 emit a \`stuck-loop\` finding when any of these signals occur:
74719
74842
  (1) \`view_ui\` returns the same \`<screen_id>\` across 3 or more consecutive \`view_ui\` calls
@@ -74736,34 +74859,49 @@ Example: if tab bar positions \`Tokens(-31) ETH(65) ... Tron(352)\` are unchange
74736
74859
  Notes:
74737
74860
  - \`screenshot\`-only calls do not update the stuck-loop counter; only \`view_ui\` calls count
74738
74861
  - Zero-delta scroll stall is not a separate finding type \u2014 report as \`stuck-loop\``;
74739
- var LOADING_STATE_RULE = `Transient loading state: when the screen shows spinners, skeleton screens, progress bars, "Loading..." text, or placeholder content NOT described in spec or app context \u2014 use \`screenshot\` to poll for resolution (up to 3 retries); switch to \`view_ui\` only on the final check or when you need element data to act \u2014 if loading persists after 3 retries, proceed with what is visible; if spec or app context explicitly describes a loading screen as a step, do not retry \u2014 call \`view_ui\` and assert normally`;
74862
+ var LOADING_STATE_RULE = `Transient loading state: when the screen shows spinners, skeleton screens, progress bars, "Loading..." text, or placeholder content NOT described in spec or app context \u2014 use \`screenshot\` to poll for resolution (up to 3 retries); \`screenshot\` is used here specifically to avoid incrementing the stuck-loop counter during intentional wait cycles, not because it provides different visual information. Call \`view_ui\` on the final check or whenever you are ready to act. If loading persists after 3 retries, proceed with what is visible. If spec or app context explicitly describes a loading screen as a step, skip polling \u2014 call \`view_ui\` and assert normally.`;
74740
74863
  var EXPECTED_CONTENT_MISSING_RULE = `Expected content missing: when \`view_ui\` shows no loading indicator yet omits an element named or strongly implied by spec or app context \u2014 and its absence is not semantically consistent with the current screen \u2014 call \`wait_seconds\` with 2\u20135 seconds and retry \`view_ui\` up to 2 times; if element remains absent, emit a \`missing-content\` finding stating what was expected and what was observed`;
74741
- var CLIPPED_ELEMENT_RULE = `Never tap an element tagged \`[clipped-top]\`, \`[clipped-bottom]\`, \`[clipped-left]\`, or \`[clipped-right]\` \u2014 scroll to fully reveal it first, then re-call \`view_ui\` before tapping`;
74864
+ var CLIPPED_ELEMENT_RULE = `Never tap an element tagged \`[clipped-top]\`, \`[clipped-bottom]\`, \`[clipped-left]\`, or \`[clipped-right]\` \u2014 scroll to fully reveal it first, then re-call \`view_ui\` before tapping. Only the explicit \`[clipped-*]\` tag in the a11y tree triggers this rule. Do NOT infer clipping from coordinate proximity to viewport edges (a low y-coord does not imply \`[clipped-top]\`).`;
74742
74865
  var SCROLL_FOLD_RULE = `Scrollable lists: elements outside the visible viewport are absent from the a11y tree by design \u2014 this applies to elements below the fold in vertical lists AND elements clipped off-left or off-right in horizontal lists \u2014 scroll or swipe in the appropriate axis to reveal before asserting presence or absence; never emit a finding solely because list items, rows, or tabs are missing from the tree on a scrollable screen; if swipe attempts yield no position change across 2+ cycles, apply the scroll-stall path in STUCK_LOOP_RULE.`;
74866
+ var COORDINATE_SOURCE_RULE = `Never derive tap coordinates from the screenshot. Coordinates are authoritative only from the a11y tree returned by \`view_ui\`. Snapshot coordinates and screenshot pixels are in the same logical point space \u2014 no scaling factor is required. If an element is visible in the screenshot but absent from the a11y tree, apply A11Y_FALLBACK_RULE \u2014 do not estimate its position from visual layout.`;
74867
+ var GHOST_A11Y_ELEMENT_RULE = `Ghost a11y element: an element is a ghost when EITHER of these holds:
74868
+ (1) it is tagged \`${OCCLUDED_BY_OVERLAP_TAG}\` in the a11y tree (deterministic detector flagged it as covered by a later-z-order non-ancestor element whose frame either contains the target's tap-point center or fully covers its bbox). The detector is conservative \u2014 absence of the tag does NOT prove the element is not occluded; criterion (2) still applies, OR
74869
+ (2) the a11y tree reports an element AND the screenshot at that element's coordinates shows visibly different UI (a different layer, a different screen, no visible element at all). This includes the case where the a11y tree contains elements from two contradictory layers (e.g. a "USDC on ETH Network" modal AND a "USDC on SOL Network" modal at the same time, when only one can be visually present).
74870
+
74871
+ CRITICAL \u2014 finding-emission is mandatory and must happen FIRST:
74872
+ - The instant you observe ANY a11y/screenshot mismatch (criterion 1 or 2), STOP planning gestures.
74873
+ - Emit a \`ghost-a11y-element\` finding via \`report_finding\` BEFORE attempting any recovery. The finding must state: (a) what you intended to tap, (b) the a11y element's reported coordinates and label, (c) whether the \`${OCCLUDED_BY_OVERLAP_TAG}\` tag was present, (d) what the screenshot shows at those coordinates instead.
74874
+ - Recovery attempts WITHOUT first emitting the finding are a rule violation. The mismatch IS the bug \u2014 silently working around it loses the signal.
74875
+ - Do NOT tap a ghost element's reported coordinates. Even if the a11y label matches your intent, tapping at those coordinates will hit the visible layer's element at the same point, not the ghost.
74876
+
74877
+ After the finding is emitted, attempt to surface the correct layer in this order: (1) any visible close/X button on the blocking layer, (2) swipe down (sheet dismiss), (3) OS back gesture, (4) swipe up. Call a fresh \`view_ui\` after each recovery attempt before retrying the original tap. If the same overlap recurs after all recovery attempts, emit a separate \`stuck-modal\` finding for the visible layer that is blocking access.`;
74743
74878
  var A11Y_FALLBACK_RULE = `Missing a11y element \u2014 if you intend to tap or interact with a UI element and that element is absent from the most recent \`view_ui\` a11y tree, emit a \`missing-a11y-element\` finding immediately, then continue: in freestyle mode keep exploring other reachable screens; in spec mode advance to the next step.
74744
74879
 
74745
74880
  The finding must state:
74746
- (1) your intent (what you were trying to do)
74747
- (2) the approximate visual region where the element appeared (coords/size from the screenshot)
74748
- (3) nearby labeled elements from the a11y tree that serve as landmarks
74881
+ (1) your intent (what you were trying to do), in user-visible terms (e.g. "tap the Send button on the Portfolio screen")
74882
+ (2) the approximate visual region where the element appeared \u2014 name the screen and describe its location in words (e.g. "top-right of the Receive sheet"), not pixel coordinates; any coordinates referenced here are descriptive only and must NOT be used as a tap target (see COORDINATE_SOURCE_RULE)
74883
+ (3) nearby labeled elements that serve as landmarks \u2014 use their on-screen labels
74884
+
74885
+ When writing the \`description\` field, follow Description Style: never paste raw coordinates, hex addresses, screen IDs, or accessibility tree excerpts.
74749
74886
 
74750
74887
  Rules:
74751
- - Visible in the screenshot does NOT imply interactable; the a11y tree is authoritative
74752
- - do NOT estimate its coordinates from the screenshot
74753
- - do NOT attempt any pixel-based tap
74888
+ - Visible in the screenshot does NOT imply interactable; the a11y tree is authoritative for interactability and coordinates
74889
+ - COORDINATE_SOURCE_RULE applies; do NOT attempt any pixel-based tap
74754
74890
  - do NOT retry at different coordinates
74755
74891
  - do NOT long-press or swipe in the element's visual region as a fallback
74756
74892
  - a failed pixel tap is never an \`interaction-regression\` \u2014 it is a \`missing-a11y-element\``;
74893
+ var FREESTYLE_ANTI_RATIONALIZATION_RULE = `Reframe-by-substitution check: triggers ONLY when you have stated an explicit prior intent \u2014 verbatim in your reasoning \u2014 to interact with element X to achieve goal Y, and the observed UI does NOT contain X performing Y. If in that situation you find yourself reasoning "the [different element] is functioning as the [intended Y]" or "this is just a different way of doing [intended action]" rather than observing the literal X performing Y, do NOT mark the goal achieved. Emit a \`spec-deviation\` finding stating: (a) your prior intent verbatim, (b) what the screenshot shows instead, (c) the reframing reasoning verbatim. This rule does NOT trigger on benign UI variation (button label "Continue" vs "Next" with the same effect) \u2014 only on substituting a different element/affordance for the one originally intended. Lighter than spec-mode ANTI_RATIONALIZATION_RULE because freestyle has no spec outcome text; the trigger is the agent's own prior intent statement.`;
74757
74894
  var PLATFORM_FIRST_RUN_RULE = `OS permission and platform dialogs on fresh install are normal platform behavior, not app bugs \u2014 this includes: iOS notification permission ("Would Like to Send You Notifications"), iOS Face ID / Touch ID enrollment, iOS App Tracking Transparency, iOS "Allow Paste" prompts, Android runtime permission dialogs (camera, microphone, contacts, location, storage), and Android biometric prompts \u2014 when such a dialog appears while executing a step, dismiss it via the appropriate button (Allow, Don't Allow, OK, or OS back), then retry the action that triggered it; only emit \`spec-deviation\` if, after dismissing the dialog AND retrying the action, the expected screen or outcome still does not appear \u2014 do NOT emit any finding on the dialog itself.`;
74758
74895
  var COMMON_RULE_BULLETS = [
74759
74896
  SESSION_START_RULE,
74760
74897
  POST_ACTION_OBSERVE_RULE,
74761
- NO_REDUNDANT_CAPTURE_RULE,
74762
74898
  BACK_NAV_RULE,
74763
74899
  QUEUE_FIRST_RULE,
74764
74900
  STUCK_LOOP_RULE,
74765
74901
  LOADING_STATE_RULE,
74766
74902
  EXPECTED_CONTENT_MISSING_RULE,
74903
+ COORDINATE_SOURCE_RULE,
74904
+ GHOST_A11Y_ELEMENT_RULE,
74767
74905
  A11Y_FALLBACK_RULE,
74768
74906
  CLIPPED_ELEMENT_RULE,
74769
74907
  SCROLL_FOLD_RULE
@@ -74777,12 +74915,38 @@ Write the description (what you saw vs. expected, where, when) before committing
74777
74915
  - LOW \u2014 speculative; only include when freestyle/low-confidence triggers require it`;
74778
74916
  var FINDING_TAXONOMY_SECTION = `## Finding Types
74779
74917
 
74780
- You may emit only these trigger types: \`back-nav-failure\`, \`dead-end\`, \`stuck-modal\`, \`stuck-loop\`, \`missing-a11y-element\`, \`missing-content\`, \`spec-deviation\`, \`destructive-only-exit\`. Do NOT emit \`design-system-violation\`, \`motion-regression\`, \`continuity-regression\`, \`interaction-regression\`, or \`loading-regression\` \u2014 those belong to other agents.
74918
+ You may emit only these trigger types: \`back-nav-failure\`, \`dead-end\`, \`stuck-modal\`, \`stuck-loop\`, \`missing-a11y-element\`, \`ghost-a11y-element\`, \`missing-content\`, \`spec-deviation\`, \`destructive-only-exit\`. Do NOT emit \`design-system-violation\`, \`motion-regression\`, \`continuity-regression\`, \`interaction-regression\`, or \`loading-regression\` \u2014 those belong to other agents.
74781
74919
 
74782
74920
  ${CONFIDENCE_RUBRIC_SECTION}`;
74921
+ var DESCRIPTION_STYLE_SECTION = `## Description Style
74922
+
74923
+ The \`description\` field is read by a QA tester who has not seen your reasoning. Write so they can reproduce and triage without internal context.
74924
+
74925
+ Required:
74926
+ - One short sentence first: the user action plus the observed problem, in product terms (use the on-screen labels of buttons, screens, and modals)
74927
+ - Add a second sentence if you can state the probable cause in product terms without implementation guessing (e.g. "modal stacked behind another modal", "address did not change after picking the network")
74928
+ - Past tense, declarative, plain English
74929
+ - Keep to 1\u20132 sentences
74930
+
74931
+ Forbidden:
74932
+ - Internal jargon: \`a11y tree\`, \`view_ui\`, \`screen_id\`, \`ghost element\`, \`occluded-by-overlap\`, \`clipped-*\`, \`stuck-loop\`, tool names
74933
+ - PATH notation (e.g. \`Home > Settings > Privacy\`), screen IDs, internal element tags
74934
+ - Pixel coordinates, element positions, hex offsets, raw addresses, technical IDs
74935
+ - First-person narration ("I tapped\u2026", "Let me try\u2026"), tool-call traces, reasoning steps
74936
+ - Speculation about implementation ("rendering issue", "z-index", "layering bug") \u2014 describe the user-visible effect instead
74937
+
74938
+ Example 1 \u2014 overlapping modals:
74939
+ - Bad: \`After selecting Solana from the network picker while viewing the ETH Receive screen, the a11y tree shows SOL Network confirmation elements ("SOL NETWORK" text at (201,319) ...) but the screenshot still visually displays the ETH Network Receive screen with address 0xb30...\`
74940
+ - Good: \`Picking SOL in the network selector did not change the receive address. The SOL confirmation modal appeared behind the network picker modal instead of replacing the ETH receive screen.\`
74941
+
74942
+ Example 2 \u2014 dead end:
74943
+ - Bad: \`view_ui shows no elements matching 'Back' or 'Close' on screen_id=privacy_settings_0; PATH is Home > Settings > Privacy; OS back gesture and swipe down/up/left/right all returned the same screen_id\`
74944
+ - Good: \`The Privacy Settings screen had no way to exit. Tapping the back area and swiping in every direction did not navigate away.\``;
74783
74945
  var REPORTING_FINDINGS_BASE = `## Reporting Findings
74784
74946
 
74785
- CRITICAL: When you observe a finding, call \`report_finding\` IMMEDIATELY \u2014 before taking any further actions. Do not batch findings. Do not wait until the end of the run. Each \`report_finding\` call atomically records one finding with the current screen attached; the server captures the screenshot. Do not pass screenshot paths or step indices. If you are uncertain whether something warrants a finding, do not report it \u2014 \`report_finding\` is for confirmed observations only.`;
74947
+ CRITICAL: When you observe a finding, call \`report_finding\` IMMEDIATELY \u2014 before taking any further actions. Do not batch findings. Do not wait until the end of the run. Each \`report_finding\` call atomically records one finding with the current screen attached; the server captures the screenshot. Do not pass screenshot paths or step indices. If you are uncertain whether something warrants a finding, do not report it \u2014 \`report_finding\` is for confirmed observations only.
74948
+
74949
+ ${DESCRIPTION_STYLE_SECTION}`;
74786
74950
  function buildReportFindingSection(scenarioId) {
74787
74951
  if (scenarioId === void 0) {
74788
74952
  return REPORTING_FINDINGS_BASE;
@@ -74812,7 +74976,11 @@ function buildEnvSection(buildEnv3) {
74812
74976
 
74813
74977
  ${DEV_ENVIRONMENT_SECTION}` : "";
74814
74978
  }
74815
- var FREESTYLE_RULE_BULLETS = [...COMMON_RULE_BULLETS, PLATFORM_FIRST_RUN_RULE];
74979
+ var FREESTYLE_RULE_BULLETS = [
74980
+ ...COMMON_RULE_BULLETS,
74981
+ PLATFORM_FIRST_RUN_RULE,
74982
+ FREESTYLE_ANTI_RATIONALIZATION_RULE
74983
+ ];
74816
74984
  var FREESTYLE_RULES_SECTION = buildRulesSection2(FREESTYLE_RULE_BULLETS);
74817
74985
  var WHAT_TO_TEST_SECTION = `## What to Test
74818
74986
 
@@ -74856,6 +75024,8 @@ function buildFreestyleBody({
74856
75024
 
74857
75025
  ${contextBlock}
74858
75026
 
75027
+ ${PERCEPTION_MODEL_SECTION}
75028
+
74859
75029
  ${TOOL_SELECTION_SECTION}
74860
75030
 
74861
75031
  ${FREESTYLE_RULES_SECTION}
@@ -74881,7 +75051,7 @@ var FREESTYLE_TEMPLATE = (options2) => {
74881
75051
  const reportingSection = buildReportFindingSection(scenarioId);
74882
75052
  return buildFreestyleBody({ contextBlock, environmentSection, reportingSection });
74883
75053
  };
74884
- var OUTCOME_LITERAL_RULE = `When verifying a step outcome or assertion, interpret all quantifiers literally and apply them exhaustively. Any keyword that imposes a universal or count-bound constraint \u2014 including but not limited to \`only\`, \`all\`, \`every\`, \`each\`, \`both\`, \`no\`, \`none\`, \`neither\`, \`exactly N\`, \`at least N\`, \`fewer than N\`, \`more than N\` \u2014 a single counter-example observed in \`view_ui\` or \`screenshot\` constitutes a failed constraint.
75054
+ var OUTCOME_LITERAL_RULE = `When verifying a step outcome or assertion, interpret all quantifiers literally and apply them exhaustively. Any keyword that imposes a universal or count-bound constraint \u2014 including but not limited to \`only\`, \`all\`, \`every\`, \`each\`, \`both\`, \`no\`, \`none\`, \`neither\`, \`exactly N\`, \`at least N\`, \`fewer than N\`, \`more than N\` \u2014 a single counter-example observed via \`view_ui\` constitutes a failed constraint.
74885
75055
 
74886
75056
  Scope:
74887
75057
  - Applies only when the outcome text contains a universal or count-bound quantifier
@@ -74897,10 +75067,10 @@ On violation: if one item violates the constraint, emit \`spec-deviation\` immed
74897
75067
  Precedence: when the counter-evidence is an element absent from the a11y tree, A11Y_FALLBACK_RULE determines the finding type (\`missing-a11y-element\`). OUTCOME_LITERAL_RULE applies only to observed-but-unwanted elements.`;
74898
75068
  var ANTI_RATIONALIZATION_RULE = `During outcome verification, monitor your own reasoning for reconciliation hypotheses. A reconciliation hypothesis is any reasoning that re-frames, redefines, or reinterprets the observed counter-example or target class in order to produce agreement with the spec outcome \u2014 regardless of phrasing. Treat such reasoning as a deviation signal, not a resolution: stop, do NOT mark the step complete, and emit \`spec-deviation\` with: (a) the literal outcome text, (b) the specific observation that triggered the hypothesis, (c) the reconciliation reasoning itself verbatim.
74899
75069
 
74900
- Attestation: before marking any quantifier-bearing outcome complete, state explicitly in your reasoning: \`No reconciliation hypothesis generated. Counter-examples found: [list or none].\` If you cannot make that statement honestly, a hypothesis exists \u2014 emit \`spec-deviation\`.
75070
+ Attestation: before marking any step complete where an explicit \`\u2192 outcome\` is present in the spec step, state explicitly in your reasoning: \`No reconciliation hypothesis generated. Counter-examples found: [list or none].\` If you cannot make that statement honestly, a hypothesis exists \u2014 emit \`spec-deviation\`.
74901
75071
 
74902
75072
  Ambiguity: when outcome verification is ambiguous, first re-verify via a fresh \`view_ui\` and re-evaluate against the outcome text. If still ambiguous after re-verification, emit \`spec-deviation\` citing the ambiguity \u2014 silence is not a pass, and marking the step complete without explicit evaluation does not qualify.`;
74903
- var SPEC_ASSERTION_RULE = `Each item in \`**Assertions**\` is a mandatory pass/fail check \u2014 verify using \`view_ui\` when the assertion targets an element attribute, label, or presence in the tree; use \`screenshot\` when the assertion is purely visual; if neither can confirm, emit a \`spec-deviation\` finding based on what is observable`;
75073
+ var SPEC_ASSERTION_RULE = `Each item in \`**Assertions**\` is a mandatory pass/fail check \u2014 verify using \`view_ui\`; the screenshot embedded in the response is your visual evidence and the a11y tree confirms element presence and attributes. If the result cannot confirm the assertion, emit a \`spec-deviation\` finding based on what is observable.`;
74904
75074
  var SPEC_PASSIVE_BREAKAGE_RULE = `Flag crash dialogs, unexpected system errors, or navigation failures that occur as a direct result of executing a spec step; if you observe a visibly broken element in passing while navigating, note it without interacting with it`;
74905
75075
  var SPEC_RULE_BULLETS = [
74906
75076
  ...COMMON_RULE_BULLETS,
@@ -74927,7 +75097,7 @@ Each step has this shape:
74927
75097
  <intent> [\u2192 <outcome>] [hint: <advisory>]
74928
75098
 
74929
75099
  - The intent phrase is your goal. Achieve it by any reasonable UI path.
74930
- - If an outcome state is present, it is your verification target. After acting, confirm the outcome is met before marking the step complete \u2014 use \`screenshot\` when the outcome is purely visual (screen transition visible, element gone, content appeared); use \`view_ui\` when the outcome requires asserting element labels, attributes, or coordinates. If no outcome is given, proceed when the action succeeds.
75100
+ - If an outcome state is present, it is your verification target. After acting, call \`view_ui\` to confirm the outcome \u2014 the embedded screenshot verifies visual transitions and the a11y tree verifies element state. If no outcome is given, proceed when the action succeeds.
74931
75101
  - A hint is advisory only. Prefer an element matching the hint, but if no literal match exists, use intent and visual context to select the best candidate. Never fail a step solely because a hint label is absent.
74932
75102
  - Infer element role (primary action, secondary action, dismissal) from visual hierarchy, position, and hint text. Authors do not specify role.
74933
75103
  - If no element satisfies the intent after exhausting visible UI, emit a \`spec-deviation\` finding and halt that step.`;
@@ -74956,6 +75126,8 @@ function buildSpecModeBody({
74956
75126
 
74957
75127
  ${contextBlock}
74958
75128
 
75129
+ ${PERCEPTION_MODEL_SECTION}
75130
+
74959
75131
  ${TOOL_SELECTION_SECTION}
74960
75132
 
74961
75133
  ${SPEC_RULES_SECTION}
@@ -94353,7 +94525,7 @@ function buildProgram(options2) {
94353
94525
 
94354
94526
  // src/index.ts
94355
94527
  process.title = "xqa";
94356
- var version2 = `${"5.4.0"}${false ? ` (dev build +${"fb81480"})` : ""}`;
94528
+ var version2 = `${"5.5.0"}${false ? ` (dev build +${"432b4b3"})` : ""}`;
94357
94529
  var program2 = buildProgram({ version: version2 });
94358
94530
  void program2.parseAsync(process.argv);
94359
94531
  /*! Bundled license information:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@exodus/xqa",
3
- "version": "5.4.0",
3
+ "version": "5.5.0",
4
4
  "type": "module",
5
5
  "engines": {
6
6
  "node": ">=22"
@@ -36,8 +36,8 @@
36
36
  "@qa-agents/mobile-ios": "0.0.0",
37
37
  "@qa-agents/pipeline": "0.0.0",
38
38
  "@qa-agents/planner": "0.0.0",
39
- "@qa-agents/shared": "0.0.0",
40
39
  "@qa-agents/triager": "0.0.0",
40
+ "@qa-agents/shared": "0.0.0",
41
41
  "@qa-agents/typescript-config": "0.0.0"
42
42
  },
43
43
  "dependencies": {