npm - @exodus/xqa - Versions diffs - 5.4.0 → 5.5.0 - Mend

@exodus/xqa 5.4.0 → 5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/xqa.cjs +208 -36
package/package.json +2 -2

package/dist/xqa.cjs CHANGED Viewed

@@ -22262,10 +22262,10 @@ var require_array = __commonJS({
     "use strict";
     Object.defineProperty(exports2, "__esModule", { value: true });
     exports2.splitWhen = exports2.flatten = void 0;
-    function flatten(items) {
+    function flatten2(items) {
       return items.reduce((collection, item) => [].concat(collection, item), []);
     }
-    exports2.flatten = flatten;
+    exports2.flatten = flatten2;
     function splitWhen(items, predicate) {
       const result = [[]];
       let groupIndex = 0;
@@ -63484,6 +63484,104 @@ function collectElements(elements, screen) {
   walk(elements);
   return into;
 }
+var OCCLUDED_BY_OVERLAP_TAG = "[occluded-by-overlap]";
+var FULL_BBOX_CONTAINMENT_RATIO = 0.85;
+function frameContainsPoint(frame, point) {
+  return point.x >= frame.x && point.x < frame.x + frame.width && point.y >= frame.y && point.y < frame.y + frame.height;
+}
+function frameCenter(frame) {
+  return { x: frame.x + frame.width / 2, y: frame.y + frame.height / 2 };
+}
+function frameArea(frame) {
+  return Math.max(0, frame.width) * Math.max(0, frame.height);
+}
+function intersectionArea(left, right) {
+  const x1 = Math.max(left.x, right.x);
+  const y12 = Math.max(left.y, right.y);
+  const x22 = Math.min(left.x + left.width, right.x + right.width);
+  const y22 = Math.min(left.y + left.height, right.y + right.height);
+  if (x22 <= x1 || y22 <= y12) {
+    return 0;
+  }
+  return (x22 - x1) * (y22 - y12);
+}
+function selfNode(element, input) {
+  if (!element.frame || !isInViewport(element.frame, input.screen)) {
+    return void 0;
+  }
+  return {
+    element,
+    frame: element.frame,
+    ancestors: input.ancestors,
+    treeOrder: input.startOrder
+  };
+}
+function flattenList(list, input) {
+  let nextOrder = input.startOrder;
+  const collected = [];
+  for (const element of list) {
+    const subtree = flattenSubtree(element, { ...input, startOrder: nextOrder });
+    collected.push(...subtree.nodes);
+    nextOrder = subtree.nextOrder;
+  }
+  return { nodes: collected, nextOrder };
+}
+function flattenSubtree(element, input) {
+  const self2 = selfNode(element, input);
+  const selfNodes = self2 ? [self2] : [];
+  const orderAfterSelf = self2 ? input.startOrder + 1 : input.startOrder;
+  if (!element.children || element.children.length === 0) {
+    return { nodes: selfNodes, nextOrder: orderAfterSelf };
+  }
+  const childAncestors = new Set(input.ancestors);
+  childAncestors.add(element);
+  const childOutput = flattenList(element.children, {
+    ancestors: childAncestors,
+    screen: input.screen,
+    startOrder: orderAfterSelf
+  });
+  return {
+    nodes: [...selfNodes, ...childOutput.nodes],
+    nextOrder: childOutput.nextOrder
+  };
+}
+function flatten(elements, screen) {
+  return flattenList(elements, {
+    ancestors: /* @__PURE__ */ new Set(),
+    screen,
+    startOrder: 0
+  }).nodes;
+}
+function blocksTapPoint(target, candidate) {
+  return frameContainsPoint(candidate.frame, frameCenter(target.frame));
+}
+function fullyCoversBoundingBox(target, candidate) {
+  const targetArea = frameArea(target.frame);
+  if (targetArea <= 0) {
+    return false;
+  }
+  return intersectionArea(target.frame, candidate.frame) / targetArea >= FULL_BBOX_CONTAINMENT_RATIO;
+}
+function isOccluder(target, candidate) {
+  if (candidate.treeOrder <= target.treeOrder) {
+    return false;
+  }
+  if (candidate.ancestors.has(target.element)) {
+    return false;
+  }
+  if (target.ancestors.has(candidate.element)) {
+    return false;
+  }
+  return blocksTapPoint(target, candidate) || fullyCoversBoundingBox(target, candidate);
+}
+function isOccluded(target, nodes) {
+  return nodes.some((candidate) => isOccluder(target, candidate));
+}
+function detectOccludedElements(elements, screen) {
+  const nodes = flatten(elements, screen);
+  const occludedElements = nodes.filter((target) => isOccluded(target, nodes)).map((node) => node.element);
+  return new Set(occludedElements);
+}
 function resolveLabel(element) {
   return element.AXLabel ?? element.AXValue ?? "";
 }
@@ -63509,15 +63607,27 @@ function resolveClippingTags(frame, screen) {
   }
   return tags.length > 0 ? ` ${tags.join(" ")}` : "";
 }
-function formatElement(element, screen) {
+function formatElement(element, context) {
   const type2 = resolveType(element);
   const label = resolveLabel(element);
   const frame = element.frame ?? { x: 0, y: 0, width: 0, height: 0 };
   const cx = Math.round(frame.x + frame.width / 2);
   const cy = Math.round(frame.y + frame.height / 2);
   const state = element.enabled === false ? " [disabled]" : "";
-  const clipping = resolveClippingTags(frame, screen);
-  return `[${type2}] "${label}" at (${String(cx)}, ${String(cy)}) size ${String(Math.round(frame.width))}x${String(Math.round(frame.height))}${state}${clipping}`;
+  const clipping = resolveClippingTags(frame, context.screen);
+  const occluded = context.occluded.has(element) ? ` ${OCCLUDED_BY_OVERLAP_TAG}` : "";
+  return `[${type2}] "${label}" at (${String(cx)}, ${String(cy)}) size ${String(Math.round(frame.width))}x${String(Math.round(frame.height))}${state}${clipping}${occluded}`;
+}
+function collectPrunedOccluded(list, query) {
+  return list.flatMap((element) => {
+    const inViewport = element.frame !== void 0 && isInViewport(element.frame, query.screen);
+    const self2 = inViewport && query.occluded.has(element) && !query.visible.has(element) ? [element] : [];
+    const children = element.children ? collectPrunedOccluded(element.children, query) : [];
+    return [...self2, ...children];
+  });
+}
+function findPrunedOccluded(query) {
+  return collectPrunedOccluded(query.elements, query);
 }
 function formatAccessibilityElements(elements) {
   const app = elements.find((element) => element.type === "Application");
@@ -63525,7 +63635,12 @@ function formatAccessibilityElements(elements) {
   const screenHeight = app?.frame?.height ?? DEFAULT_SCREEN_HEIGHT;
   const screen = { width: screenWidth, height: screenHeight };
   const visible = collectElements(elements, screen);
-  const elementList = visible.length === 0 ? "No elements found." : visible.map((element) => formatElement(element, screen)).join("\n");
+  const visibleSet = new Set(visible);
+  const occluded = detectOccludedElements(elements, screen);
+  const context = { screen, occluded };
+  const prunedOccluded = findPrunedOccluded({ elements, visible: visibleSet, occluded, screen });
+  const renderable = [...visible, ...prunedOccluded];
+  const elementList = renderable.length === 0 ? "No elements found." : renderable.map((element) => formatElement(element, context)).join("\n");
   const appName = app?.AXLabel;
   return appName ? `Running app: ${appName}
@@ -63713,7 +63828,7 @@ function createListAppsTool(udid = "booted") {
 }
 var DEFAULT_LONG_PRESS_DURATION_MS = 500;
 var MIN_PLAUSIBLE_LONG_PRESS_DURATION_MS = 100;
-var DEFAULT_SWIPE_DURATION_MS = 100;
+var DEFAULT_SWIPE_DURATION_MS = 300;
 var MIN_PLAUSIBLE_SWIPE_DURATION_MS = 50;
 var MS_PER_SECOND = 1e3;
 var ENTER_KEY_CODE = "0x28";
@@ -63836,13 +63951,13 @@ function createLongPressTool(udid = "booted") {
 }
 var DURATION_DESCRIPTION2 = `Gesture duration in milliseconds. Default ${String(
   DEFAULT_SWIPE_DURATION_MS
-)}ms (flick) works for short lists. Examples: duration 500 = 0.5 seconds, duration 1000 = 1 second. Velocity = distance / duration - raise duration at fixed distance to slow the gesture and reduce momentum. Raise to 400-800ms for controlled scrolling on long lists where the flick overshoots. Values under ${String(
+)}ms (controlled flick) works for most lists and avoids overshoot on medium-density content. Examples: duration 500 = 0.5 seconds, duration 1000 = 1 second. Velocity = distance / duration - raise duration at fixed distance to slow the gesture and reduce momentum. Raise to 500-800ms for slow controlled scrolling on long lists; lower to 100-150ms for a fast flick when long-distance scroll is desired. Values under ${String(
   MIN_PLAUSIBLE_SWIPE_DURATION_MS
 )}ms almost always indicate a unit mistake (seconds passed instead of milliseconds).`;
 var DELTA_DESCRIPTION = "Pixel distance between interpolated touch points along the swipe path. Smaller values (e.g. 5) produce a denser event stream - smoother motion and more controllable stop-velocity, recommended when combining with a raised duration to tame long-list overshoot. Larger values produce coarser strokes. Omit to use idb defaults.";
 var TOOL_DESCRIPTION2 = `Swipe on the screen from one point to another. Duration is in milliseconds (default ${String(
   DEFAULT_SWIPE_DURATION_MS
-)}ms, a flick). Examples: duration 500 = 0.5 seconds, duration 1000 = 1 second. Use the default flick for scrolling lists, dismissing sheets, triggering paging. Use duration 500+ for slow drag (reorder, pan). For long lists where default flick overshoots: shorten swipe distance AND raise duration to 400-800ms to lower velocity; optionally lower delta for denser touch events and a more controllable stop. Do not pass seconds (e.g. 0.5) - that would swipe for less than a millisecond.`;
+)}ms, a controlled flick). Examples: duration 500 = 0.5 seconds, duration 1000 = 1 second. The default duration suits most scrolling, sheet dismissal, and paging; shorten to 100-150ms when you need a long-distance fast flick; raise to 500+ for slow controlled drag (reorder, pan). For long lists where the default overshoots: shorten swipe distance AND raise duration; optionally lower delta for denser touch events and a more controllable stop. Do not pass seconds (e.g. 0.5) - that would swipe for less than a millisecond.`;
 var SWIPE_SCHEMA = {
   x_start: external_exports.number(),
   y_start: external_exports.number(),
@@ -63887,7 +64002,7 @@ function buildSuccessText2(input) {
       MIN_PLAUSIBLE_SWIPE_DURATION_MS
     )}ms - this is almost certainly a unit mistake. The duration parameter is in milliseconds; use ${String(
       DEFAULT_SWIPE_DURATION_MS
-    )}ms for a flick and 400-800ms for slow drag (e.g. duration 500 = 0.5 seconds).`;
+    )}ms for the default controlled flick and 500-800ms for slow drag (e.g. duration 500 = 0.5 seconds).`;
   }
   return base;
 }
@@ -74240,11 +74355,13 @@ async function runViewUiCapture(context, state) {
     state
   });
 }
-var VIEW_UI_DESCRIPTION = `Capture current screen state: accessibility tree (element labels, positions, attributes) and screenshot in one call. Use when you need to tap an element, assert element presence or labels, check attributes, or track screen identity via <screen_id>.
+var VIEW_UI_DESCRIPTION = `Capture current screen state. This is your sole observation tool: returns a screenshot (your visual perception of the app) and an accessibility tree (interactability metadata and tap coordinates) in a single call. Use for all state observation, navigation decisions, element verification, and pre-interaction checks.
+The screenshot is the ground truth for what screen you are on, what state the app is in, what content is visible, and what UX is happening. The a11y tree is authoritative for two questions only: "is this element interactable?" and "what tap coordinates should I use?" \u2014 never derive coordinates from the screenshot.
 The result begins with a <screen_id> tag containing the current screen identifier. Use this to detect screen changes and track navigation history.
-Do not call \`screenshot\` immediately before or after this tool for the same state \u2014 this tool already includes the screenshot.
+The \`screenshot\` tool is reserved exclusively for polling during a transient loading state to avoid incrementing the stuck-loop counter. Do not use \`screenshot\` for any other observation purpose.
 IMPORTANT: Snapshot coordinates and screenshot pixels are in the same logical point space. Do not apply any scaling factor (no 2x retina adjustment).`;
 var VIEW_UI_TOOL_NAME = "mcp__mobile-ios__view_ui";
@@ -74693,12 +74810,19 @@ function startAndRun(params) {
     });
   });
 }
+var PERCEPTION_MODEL_SECTION = `## Perception Model
+Every \`view_ui\` call returns two artifacts simultaneously:
+- **Screenshot** \u2014 your visual perception of the app. This is the ground truth for what screen you are on, what state the app is in, what content is visible, and what UX is happening. Reason from the screenshot first when answering "what is the app showing me right now?"
+- **A11y tree** \u2014 metadata about that visual reality. It is authoritative for two questions only: "is this element interactable?" and "what tap coordinates should I use?" Never derive coordinates from the screenshot, even when the screenshot appears to show an element clearly.
+Precedence: the screenshot governs comprehension of screen identity, state, and content. The a11y tree governs interactability and coordinates. These domains do not overlap \u2014 there is no scenario where the screenshot overrides a11y-sourced coordinates, and there is no scenario where the a11y tree overrides screenshot-sourced understanding of what the app is showing.`;
 var TOOL_SELECTION_SECTION = `## Tool Selection
-- \`view_ui\` \u2014 returns accessibility tree (element labels, positions, attributes) AND screenshot; use when you need to tap, assert element presence, or read labels
-- \`screenshot\` \u2014 returns screenshot only; use for passive visual verification (confirm transition occurred, loading finished, outcome visible) when you do not need element data
-- Never call \`screenshot\` immediately before or after \`view_ui\` for the same state \u2014 \`view_ui\` already includes the screenshot
-- \`screenshot\` calls do not emit a \`<screen_id>\` and do not advance the stuck-loop counter; if screen identity tracking matters, use \`view_ui\``;
+- \`view_ui\` \u2014 your sole observation tool; returns a screenshot (visual ground truth) and an a11y tree (interactability metadata and tap coordinates) in one call; use for all state observation, navigation decisions, element verification, and pre-interaction checks
+- \`screenshot\` \u2014 loading polls only; use exclusively while waiting for a transient loading state to resolve, to avoid false stuck-loop counter increments; do not use \`screenshot\` for any other observation purpose \u2014 see LOADING_STATE_RULE
+- \`screenshot\` calls do not emit a \`<screen_id>\` and do not advance the stuck-loop counter`;
 var DEV_ENVIRONMENT_SECTION = `## Environment
 This is a development build. Debug overlays and internal messages are expected artifacts \u2014 do not report them as findings.`;
@@ -74711,9 +74835,8 @@ At every reasoning step, maintain a mental ledger:
 Consult the ledger before every action. Always prefer navigating to a QUEUE screen over a VISITED one.`;
 var SESSION_START_RULE = `Before taking any other action \u2014 including initializing the Working State ledger or emitting findings \u2014 call \`view_ui\` once to observe the starting screen`;
-var POST_ACTION_OBSERVE_RULE = `After any action, observe the screen before deciding next step \u2014 use \`screenshot\` when confirming a purely visual outcome (transition occurred, element disappeared, content appeared, loading finished); use \`view_ui\` when you need element labels, tap coordinates, or accessibility attributes`;
-var NO_REDUNDANT_CAPTURE_RULE = `Never call both \`screenshot\` and \`view_ui\` back-to-back for the same observation \u2014 \`view_ui\` includes a screenshot; if you need both tree and image, one \`view_ui\` call suffices`;
-var BACK_NAV_RULE = `After navigating forward to any new screen: attempt to return to the expected parent in PATH \u2014 consult App Knowledge first for the correct exit gesture on this screen, then try in order: (1) any visible back/close button, (2) OS back gesture, (3) swipe up, (4) swipe down, (5) swipe left, (6) swipe right \u2014 confirm return via \`screenshot\` if the parent is visually unambiguous, \`view_ui\` otherwise \u2014 only after ALL attempts fail emit a \`back-nav-failure\` finding, then navigate forward again to continue`;
+var POST_ACTION_OBSERVE_RULE = `After any action, call \`view_ui\` to observe the resulting screen state before deciding the next step. Exception: if the screen is in a transient loading state, use \`screenshot\` to poll \u2014 see LOADING_STATE_RULE.`;
+var BACK_NAV_RULE = `After navigating forward to any new screen: attempt to return to the expected parent in PATH \u2014 consult App Knowledge first for the correct exit gesture on this screen, then try in order: (1) any visible back/close button, (2) OS back gesture, (3) swipe up, (4) swipe down, (5) swipe left, (6) swipe right \u2014 confirm return via \`view_ui\` \u2014 only after ALL attempts fail emit a \`back-nav-failure\` finding, then navigate forward again to continue`;
 var QUEUE_FIRST_RULE = `Before selecting any action, prefer navigating to a QUEUE screen over re-exploring a VISITED one`;
 var STUCK_LOOP_RULE = `Stuck loop \u2014 emit a \`stuck-loop\` finding when any of these signals occur:
 (1) \`view_ui\` returns the same \`<screen_id>\` across 3 or more consecutive \`view_ui\` calls
@@ -74736,34 +74859,49 @@ Example: if tab bar positions \`Tokens(-31) ETH(65) ... Tron(352)\` are unchange
 Notes:
 - \`screenshot\`-only calls do not update the stuck-loop counter; only \`view_ui\` calls count
 - Zero-delta scroll stall is not a separate finding type \u2014 report as \`stuck-loop\``;
-var LOADING_STATE_RULE = `Transient loading state: when the screen shows spinners, skeleton screens, progress bars, "Loading..." text, or placeholder content NOT described in spec or app context \u2014 use \`screenshot\` to poll for resolution (up to 3 retries); switch to \`view_ui\` only on the final check or when you need element data to act \u2014 if loading persists after 3 retries, proceed with what is visible; if spec or app context explicitly describes a loading screen as a step, do not retry \u2014 call \`view_ui\` and assert normally`;
+var LOADING_STATE_RULE = `Transient loading state: when the screen shows spinners, skeleton screens, progress bars, "Loading..." text, or placeholder content NOT described in spec or app context \u2014 use \`screenshot\` to poll for resolution (up to 3 retries); \`screenshot\` is used here specifically to avoid incrementing the stuck-loop counter during intentional wait cycles, not because it provides different visual information. Call \`view_ui\` on the final check or whenever you are ready to act. If loading persists after 3 retries, proceed with what is visible. If spec or app context explicitly describes a loading screen as a step, skip polling \u2014 call \`view_ui\` and assert normally.`;
 var EXPECTED_CONTENT_MISSING_RULE = `Expected content missing: when \`view_ui\` shows no loading indicator yet omits an element named or strongly implied by spec or app context \u2014 and its absence is not semantically consistent with the current screen \u2014 call \`wait_seconds\` with 2\u20135 seconds and retry \`view_ui\` up to 2 times; if element remains absent, emit a \`missing-content\` finding stating what was expected and what was observed`;
-var CLIPPED_ELEMENT_RULE = `Never tap an element tagged \`[clipped-top]\`, \`[clipped-bottom]\`, \`[clipped-left]\`, or \`[clipped-right]\` \u2014 scroll to fully reveal it first, then re-call \`view_ui\` before tapping`;
+var CLIPPED_ELEMENT_RULE = `Never tap an element tagged \`[clipped-top]\`, \`[clipped-bottom]\`, \`[clipped-left]\`, or \`[clipped-right]\` \u2014 scroll to fully reveal it first, then re-call \`view_ui\` before tapping. Only the explicit \`[clipped-*]\` tag in the a11y tree triggers this rule. Do NOT infer clipping from coordinate proximity to viewport edges (a low y-coord does not imply \`[clipped-top]\`).`;
 var SCROLL_FOLD_RULE = `Scrollable lists: elements outside the visible viewport are absent from the a11y tree by design \u2014 this applies to elements below the fold in vertical lists AND elements clipped off-left or off-right in horizontal lists \u2014 scroll or swipe in the appropriate axis to reveal before asserting presence or absence; never emit a finding solely because list items, rows, or tabs are missing from the tree on a scrollable screen; if swipe attempts yield no position change across 2+ cycles, apply the scroll-stall path in STUCK_LOOP_RULE.`;
+var COORDINATE_SOURCE_RULE = `Never derive tap coordinates from the screenshot. Coordinates are authoritative only from the a11y tree returned by \`view_ui\`. Snapshot coordinates and screenshot pixels are in the same logical point space \u2014 no scaling factor is required. If an element is visible in the screenshot but absent from the a11y tree, apply A11Y_FALLBACK_RULE \u2014 do not estimate its position from visual layout.`;
+var GHOST_A11Y_ELEMENT_RULE = `Ghost a11y element: an element is a ghost when EITHER of these holds:
+(1) it is tagged \`${OCCLUDED_BY_OVERLAP_TAG}\` in the a11y tree (deterministic detector flagged it as covered by a later-z-order non-ancestor element whose frame either contains the target's tap-point center or fully covers its bbox). The detector is conservative \u2014 absence of the tag does NOT prove the element is not occluded; criterion (2) still applies, OR
+(2) the a11y tree reports an element AND the screenshot at that element's coordinates shows visibly different UI (a different layer, a different screen, no visible element at all). This includes the case where the a11y tree contains elements from two contradictory layers (e.g. a "USDC on ETH Network" modal AND a "USDC on SOL Network" modal at the same time, when only one can be visually present).
+CRITICAL \u2014 finding-emission is mandatory and must happen FIRST:
+- The instant you observe ANY a11y/screenshot mismatch (criterion 1 or 2), STOP planning gestures.
+- Emit a \`ghost-a11y-element\` finding via \`report_finding\` BEFORE attempting any recovery. The finding must state: (a) what you intended to tap, (b) the a11y element's reported coordinates and label, (c) whether the \`${OCCLUDED_BY_OVERLAP_TAG}\` tag was present, (d) what the screenshot shows at those coordinates instead.
+- Recovery attempts WITHOUT first emitting the finding are a rule violation. The mismatch IS the bug \u2014 silently working around it loses the signal.
+- Do NOT tap a ghost element's reported coordinates. Even if the a11y label matches your intent, tapping at those coordinates will hit the visible layer's element at the same point, not the ghost.
+After the finding is emitted, attempt to surface the correct layer in this order: (1) any visible close/X button on the blocking layer, (2) swipe down (sheet dismiss), (3) OS back gesture, (4) swipe up. Call a fresh \`view_ui\` after each recovery attempt before retrying the original tap. If the same overlap recurs after all recovery attempts, emit a separate \`stuck-modal\` finding for the visible layer that is blocking access.`;
 var A11Y_FALLBACK_RULE = `Missing a11y element \u2014 if you intend to tap or interact with a UI element and that element is absent from the most recent \`view_ui\` a11y tree, emit a \`missing-a11y-element\` finding immediately, then continue: in freestyle mode keep exploring other reachable screens; in spec mode advance to the next step.
 The finding must state:
-(1) your intent (what you were trying to do)
-(2) the approximate visual region where the element appeared (coords/size from the screenshot)
-(3) nearby labeled elements from the a11y tree that serve as landmarks
+(1) your intent (what you were trying to do), in user-visible terms (e.g. "tap the Send button on the Portfolio screen")
+(2) the approximate visual region where the element appeared \u2014 name the screen and describe its location in words (e.g. "top-right of the Receive sheet"), not pixel coordinates; any coordinates referenced here are descriptive only and must NOT be used as a tap target (see COORDINATE_SOURCE_RULE)
+(3) nearby labeled elements that serve as landmarks \u2014 use their on-screen labels
+When writing the \`description\` field, follow Description Style: never paste raw coordinates, hex addresses, screen IDs, or accessibility tree excerpts.
 Rules:
-- Visible in the screenshot does NOT imply interactable; the a11y tree is authoritative
-- do NOT estimate its coordinates from the screenshot
-- do NOT attempt any pixel-based tap
+- Visible in the screenshot does NOT imply interactable; the a11y tree is authoritative for interactability and coordinates
+- COORDINATE_SOURCE_RULE applies; do NOT attempt any pixel-based tap
 - do NOT retry at different coordinates
 - do NOT long-press or swipe in the element's visual region as a fallback
 - a failed pixel tap is never an \`interaction-regression\` \u2014 it is a \`missing-a11y-element\``;
+var FREESTYLE_ANTI_RATIONALIZATION_RULE = `Reframe-by-substitution check: triggers ONLY when you have stated an explicit prior intent \u2014 verbatim in your reasoning \u2014 to interact with element X to achieve goal Y, and the observed UI does NOT contain X performing Y. If in that situation you find yourself reasoning "the [different element] is functioning as the [intended Y]" or "this is just a different way of doing [intended action]" rather than observing the literal X performing Y, do NOT mark the goal achieved. Emit a \`spec-deviation\` finding stating: (a) your prior intent verbatim, (b) what the screenshot shows instead, (c) the reframing reasoning verbatim. This rule does NOT trigger on benign UI variation (button label "Continue" vs "Next" with the same effect) \u2014 only on substituting a different element/affordance for the one originally intended. Lighter than spec-mode ANTI_RATIONALIZATION_RULE because freestyle has no spec outcome text; the trigger is the agent's own prior intent statement.`;
 var PLATFORM_FIRST_RUN_RULE = `OS permission and platform dialogs on fresh install are normal platform behavior, not app bugs \u2014 this includes: iOS notification permission ("Would Like to Send You Notifications"), iOS Face ID / Touch ID enrollment, iOS App Tracking Transparency, iOS "Allow Paste" prompts, Android runtime permission dialogs (camera, microphone, contacts, location, storage), and Android biometric prompts \u2014 when such a dialog appears while executing a step, dismiss it via the appropriate button (Allow, Don't Allow, OK, or OS back), then retry the action that triggered it; only emit \`spec-deviation\` if, after dismissing the dialog AND retrying the action, the expected screen or outcome still does not appear \u2014 do NOT emit any finding on the dialog itself.`;
 var COMMON_RULE_BULLETS = [
   SESSION_START_RULE,
   POST_ACTION_OBSERVE_RULE,
-  NO_REDUNDANT_CAPTURE_RULE,
   BACK_NAV_RULE,
   QUEUE_FIRST_RULE,
   STUCK_LOOP_RULE,
   LOADING_STATE_RULE,
   EXPECTED_CONTENT_MISSING_RULE,
+  COORDINATE_SOURCE_RULE,
+  GHOST_A11Y_ELEMENT_RULE,
   A11Y_FALLBACK_RULE,
   CLIPPED_ELEMENT_RULE,
   SCROLL_FOLD_RULE
@@ -74777,12 +74915,38 @@ Write the description (what you saw vs. expected, where, when) before committing
 - LOW \u2014 speculative; only include when freestyle/low-confidence triggers require it`;
 var FINDING_TAXONOMY_SECTION = `## Finding Types
-You may emit only these trigger types: \`back-nav-failure\`, \`dead-end\`, \`stuck-modal\`, \`stuck-loop\`, \`missing-a11y-element\`, \`missing-content\`, \`spec-deviation\`, \`destructive-only-exit\`. Do NOT emit \`design-system-violation\`, \`motion-regression\`, \`continuity-regression\`, \`interaction-regression\`, or \`loading-regression\` \u2014 those belong to other agents.
+You may emit only these trigger types: \`back-nav-failure\`, \`dead-end\`, \`stuck-modal\`, \`stuck-loop\`, \`missing-a11y-element\`, \`ghost-a11y-element\`, \`missing-content\`, \`spec-deviation\`, \`destructive-only-exit\`. Do NOT emit \`design-system-violation\`, \`motion-regression\`, \`continuity-regression\`, \`interaction-regression\`, or \`loading-regression\` \u2014 those belong to other agents.
 ${CONFIDENCE_RUBRIC_SECTION}`;
+var DESCRIPTION_STYLE_SECTION = `## Description Style
+The \`description\` field is read by a QA tester who has not seen your reasoning. Write so they can reproduce and triage without internal context.
+Required:
+- One short sentence first: the user action plus the observed problem, in product terms (use the on-screen labels of buttons, screens, and modals)
+- Add a second sentence if you can state the probable cause in product terms without implementation guessing (e.g. "modal stacked behind another modal", "address did not change after picking the network")
+- Past tense, declarative, plain English
+- Keep to 1\u20132 sentences
+Forbidden:
+- Internal jargon: \`a11y tree\`, \`view_ui\`, \`screen_id\`, \`ghost element\`, \`occluded-by-overlap\`, \`clipped-*\`, \`stuck-loop\`, tool names
+- PATH notation (e.g. \`Home > Settings > Privacy\`), screen IDs, internal element tags
+- Pixel coordinates, element positions, hex offsets, raw addresses, technical IDs
+- First-person narration ("I tapped\u2026", "Let me try\u2026"), tool-call traces, reasoning steps
+- Speculation about implementation ("rendering issue", "z-index", "layering bug") \u2014 describe the user-visible effect instead
+Example 1 \u2014 overlapping modals:
+- Bad: \`After selecting Solana from the network picker while viewing the ETH Receive screen, the a11y tree shows SOL Network confirmation elements ("SOL NETWORK" text at (201,319) ...) but the screenshot still visually displays the ETH Network Receive screen with address 0xb30...\`
+- Good: \`Picking SOL in the network selector did not change the receive address. The SOL confirmation modal appeared behind the network picker modal instead of replacing the ETH receive screen.\`
+Example 2 \u2014 dead end:
+- Bad: \`view_ui shows no elements matching 'Back' or 'Close' on screen_id=privacy_settings_0; PATH is Home > Settings > Privacy; OS back gesture and swipe down/up/left/right all returned the same screen_id\`
+- Good: \`The Privacy Settings screen had no way to exit. Tapping the back area and swiping in every direction did not navigate away.\``;
 var REPORTING_FINDINGS_BASE = `## Reporting Findings
-CRITICAL: When you observe a finding, call \`report_finding\` IMMEDIATELY \u2014 before taking any further actions. Do not batch findings. Do not wait until the end of the run. Each \`report_finding\` call atomically records one finding with the current screen attached; the server captures the screenshot. Do not pass screenshot paths or step indices. If you are uncertain whether something warrants a finding, do not report it \u2014 \`report_finding\` is for confirmed observations only.`;
+CRITICAL: When you observe a finding, call \`report_finding\` IMMEDIATELY \u2014 before taking any further actions. Do not batch findings. Do not wait until the end of the run. Each \`report_finding\` call atomically records one finding with the current screen attached; the server captures the screenshot. Do not pass screenshot paths or step indices. If you are uncertain whether something warrants a finding, do not report it \u2014 \`report_finding\` is for confirmed observations only.
+${DESCRIPTION_STYLE_SECTION}`;
 function buildReportFindingSection(scenarioId) {
   if (scenarioId === void 0) {
     return REPORTING_FINDINGS_BASE;
@@ -74812,7 +74976,11 @@ function buildEnvSection(buildEnv3) {
 ${DEV_ENVIRONMENT_SECTION}` : "";
 }
-var FREESTYLE_RULE_BULLETS = [...COMMON_RULE_BULLETS, PLATFORM_FIRST_RUN_RULE];
+var FREESTYLE_RULE_BULLETS = [
+  ...COMMON_RULE_BULLETS,
+  PLATFORM_FIRST_RUN_RULE,
+  FREESTYLE_ANTI_RATIONALIZATION_RULE
+];
 var FREESTYLE_RULES_SECTION = buildRulesSection2(FREESTYLE_RULE_BULLETS);
 var WHAT_TO_TEST_SECTION = `## What to Test
@@ -74856,6 +75024,8 @@ function buildFreestyleBody({
 ${contextBlock}
+${PERCEPTION_MODEL_SECTION}
 ${TOOL_SELECTION_SECTION}
 ${FREESTYLE_RULES_SECTION}
@@ -74881,7 +75051,7 @@ var FREESTYLE_TEMPLATE = (options2) => {
   const reportingSection = buildReportFindingSection(scenarioId);
   return buildFreestyleBody({ contextBlock, environmentSection, reportingSection });
 };
-var OUTCOME_LITERAL_RULE = `When verifying a step outcome or assertion, interpret all quantifiers literally and apply them exhaustively. Any keyword that imposes a universal or count-bound constraint \u2014 including but not limited to \`only\`, \`all\`, \`every\`, \`each\`, \`both\`, \`no\`, \`none\`, \`neither\`, \`exactly N\`, \`at least N\`, \`fewer than N\`, \`more than N\` \u2014 a single counter-example observed in \`view_ui\` or \`screenshot\` constitutes a failed constraint.
+var OUTCOME_LITERAL_RULE = `When verifying a step outcome or assertion, interpret all quantifiers literally and apply them exhaustively. Any keyword that imposes a universal or count-bound constraint \u2014 including but not limited to \`only\`, \`all\`, \`every\`, \`each\`, \`both\`, \`no\`, \`none\`, \`neither\`, \`exactly N\`, \`at least N\`, \`fewer than N\`, \`more than N\` \u2014 a single counter-example observed via \`view_ui\` constitutes a failed constraint.
 Scope:
 - Applies only when the outcome text contains a universal or count-bound quantifier
@@ -74897,10 +75067,10 @@ On violation: if one item violates the constraint, emit \`spec-deviation\` immed
 Precedence: when the counter-evidence is an element absent from the a11y tree, A11Y_FALLBACK_RULE determines the finding type (\`missing-a11y-element\`). OUTCOME_LITERAL_RULE applies only to observed-but-unwanted elements.`;
 var ANTI_RATIONALIZATION_RULE = `During outcome verification, monitor your own reasoning for reconciliation hypotheses. A reconciliation hypothesis is any reasoning that re-frames, redefines, or reinterprets the observed counter-example or target class in order to produce agreement with the spec outcome \u2014 regardless of phrasing. Treat such reasoning as a deviation signal, not a resolution: stop, do NOT mark the step complete, and emit \`spec-deviation\` with: (a) the literal outcome text, (b) the specific observation that triggered the hypothesis, (c) the reconciliation reasoning itself verbatim.
-Attestation: before marking any quantifier-bearing outcome complete, state explicitly in your reasoning: \`No reconciliation hypothesis generated. Counter-examples found: [list or none].\` If you cannot make that statement honestly, a hypothesis exists \u2014 emit \`spec-deviation\`.
+Attestation: before marking any step complete where an explicit \`\u2192 outcome\` is present in the spec step, state explicitly in your reasoning: \`No reconciliation hypothesis generated. Counter-examples found: [list or none].\` If you cannot make that statement honestly, a hypothesis exists \u2014 emit \`spec-deviation\`.
 Ambiguity: when outcome verification is ambiguous, first re-verify via a fresh \`view_ui\` and re-evaluate against the outcome text. If still ambiguous after re-verification, emit \`spec-deviation\` citing the ambiguity \u2014 silence is not a pass, and marking the step complete without explicit evaluation does not qualify.`;
-var SPEC_ASSERTION_RULE = `Each item in \`**Assertions**\` is a mandatory pass/fail check \u2014 verify using \`view_ui\` when the assertion targets an element attribute, label, or presence in the tree; use \`screenshot\` when the assertion is purely visual; if neither can confirm, emit a \`spec-deviation\` finding based on what is observable`;
+var SPEC_ASSERTION_RULE = `Each item in \`**Assertions**\` is a mandatory pass/fail check \u2014 verify using \`view_ui\`; the screenshot embedded in the response is your visual evidence and the a11y tree confirms element presence and attributes. If the result cannot confirm the assertion, emit a \`spec-deviation\` finding based on what is observable.`;
 var SPEC_PASSIVE_BREAKAGE_RULE = `Flag crash dialogs, unexpected system errors, or navigation failures that occur as a direct result of executing a spec step; if you observe a visibly broken element in passing while navigating, note it without interacting with it`;
 var SPEC_RULE_BULLETS = [
   ...COMMON_RULE_BULLETS,
@@ -74927,7 +75097,7 @@ Each step has this shape:
   <intent> [\u2192 <outcome>] [hint: <advisory>]
 - The intent phrase is your goal. Achieve it by any reasonable UI path.
-- If an outcome state is present, it is your verification target. After acting, confirm the outcome is met before marking the step complete \u2014 use \`screenshot\` when the outcome is purely visual (screen transition visible, element gone, content appeared); use \`view_ui\` when the outcome requires asserting element labels, attributes, or coordinates. If no outcome is given, proceed when the action succeeds.
+- If an outcome state is present, it is your verification target. After acting, call \`view_ui\` to confirm the outcome \u2014 the embedded screenshot verifies visual transitions and the a11y tree verifies element state. If no outcome is given, proceed when the action succeeds.
 - A hint is advisory only. Prefer an element matching the hint, but if no literal match exists, use intent and visual context to select the best candidate. Never fail a step solely because a hint label is absent.
 - Infer element role (primary action, secondary action, dismissal) from visual hierarchy, position, and hint text. Authors do not specify role.
 - If no element satisfies the intent after exhausting visible UI, emit a \`spec-deviation\` finding and halt that step.`;
@@ -74956,6 +75126,8 @@ function buildSpecModeBody({
 ${contextBlock}
+${PERCEPTION_MODEL_SECTION}
 ${TOOL_SELECTION_SECTION}
 ${SPEC_RULES_SECTION}
@@ -94353,7 +94525,7 @@ function buildProgram(options2) {
 // src/index.ts
 process.title = "xqa";
-var version2 = `${"5.4.0"}${false ? ` (dev build +${"fb81480"})` : ""}`;
+var version2 = `${"5.5.0"}${false ? ` (dev build +${"432b4b3"})` : ""}`;
 var program2 = buildProgram({ version: version2 });
 void program2.parseAsync(process.argv);
 /*! Bundled license information:

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@exodus/xqa",
-  "version": "5.4.0",
+  "version": "5.5.0",
   "type": "module",
   "engines": {
     "node": ">=22"
@@ -36,8 +36,8 @@
     "@qa-agents/mobile-ios": "0.0.0",
     "@qa-agents/pipeline": "0.0.0",
     "@qa-agents/planner": "0.0.0",
-    "@qa-agents/shared": "0.0.0",
     "@qa-agents/triager": "0.0.0",
+    "@qa-agents/shared": "0.0.0",
     "@qa-agents/typescript-config": "0.0.0"
   },
   "dependencies": {