@exodus/xqa 5.4.0 → 5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/xqa.cjs +208 -36
- package/package.json +2 -2
package/dist/xqa.cjs
CHANGED
|
@@ -22262,10 +22262,10 @@ var require_array = __commonJS({
|
|
|
22262
22262
|
"use strict";
|
|
22263
22263
|
Object.defineProperty(exports2, "__esModule", { value: true });
|
|
22264
22264
|
exports2.splitWhen = exports2.flatten = void 0;
|
|
22265
|
-
function
|
|
22265
|
+
function flatten2(items) {
|
|
22266
22266
|
return items.reduce((collection, item) => [].concat(collection, item), []);
|
|
22267
22267
|
}
|
|
22268
|
-
exports2.flatten =
|
|
22268
|
+
exports2.flatten = flatten2;
|
|
22269
22269
|
function splitWhen(items, predicate) {
|
|
22270
22270
|
const result = [[]];
|
|
22271
22271
|
let groupIndex = 0;
|
|
@@ -63484,6 +63484,104 @@ function collectElements(elements, screen) {
|
|
|
63484
63484
|
walk(elements);
|
|
63485
63485
|
return into;
|
|
63486
63486
|
}
|
|
63487
|
+
var OCCLUDED_BY_OVERLAP_TAG = "[occluded-by-overlap]";
|
|
63488
|
+
var FULL_BBOX_CONTAINMENT_RATIO = 0.85;
|
|
63489
|
+
function frameContainsPoint(frame, point) {
|
|
63490
|
+
return point.x >= frame.x && point.x < frame.x + frame.width && point.y >= frame.y && point.y < frame.y + frame.height;
|
|
63491
|
+
}
|
|
63492
|
+
function frameCenter(frame) {
|
|
63493
|
+
return { x: frame.x + frame.width / 2, y: frame.y + frame.height / 2 };
|
|
63494
|
+
}
|
|
63495
|
+
function frameArea(frame) {
|
|
63496
|
+
return Math.max(0, frame.width) * Math.max(0, frame.height);
|
|
63497
|
+
}
|
|
63498
|
+
function intersectionArea(left, right) {
|
|
63499
|
+
const x1 = Math.max(left.x, right.x);
|
|
63500
|
+
const y12 = Math.max(left.y, right.y);
|
|
63501
|
+
const x22 = Math.min(left.x + left.width, right.x + right.width);
|
|
63502
|
+
const y22 = Math.min(left.y + left.height, right.y + right.height);
|
|
63503
|
+
if (x22 <= x1 || y22 <= y12) {
|
|
63504
|
+
return 0;
|
|
63505
|
+
}
|
|
63506
|
+
return (x22 - x1) * (y22 - y12);
|
|
63507
|
+
}
|
|
63508
|
+
function selfNode(element, input) {
|
|
63509
|
+
if (!element.frame || !isInViewport(element.frame, input.screen)) {
|
|
63510
|
+
return void 0;
|
|
63511
|
+
}
|
|
63512
|
+
return {
|
|
63513
|
+
element,
|
|
63514
|
+
frame: element.frame,
|
|
63515
|
+
ancestors: input.ancestors,
|
|
63516
|
+
treeOrder: input.startOrder
|
|
63517
|
+
};
|
|
63518
|
+
}
|
|
63519
|
+
function flattenList(list, input) {
|
|
63520
|
+
let nextOrder = input.startOrder;
|
|
63521
|
+
const collected = [];
|
|
63522
|
+
for (const element of list) {
|
|
63523
|
+
const subtree = flattenSubtree(element, { ...input, startOrder: nextOrder });
|
|
63524
|
+
collected.push(...subtree.nodes);
|
|
63525
|
+
nextOrder = subtree.nextOrder;
|
|
63526
|
+
}
|
|
63527
|
+
return { nodes: collected, nextOrder };
|
|
63528
|
+
}
|
|
63529
|
+
function flattenSubtree(element, input) {
|
|
63530
|
+
const self2 = selfNode(element, input);
|
|
63531
|
+
const selfNodes = self2 ? [self2] : [];
|
|
63532
|
+
const orderAfterSelf = self2 ? input.startOrder + 1 : input.startOrder;
|
|
63533
|
+
if (!element.children || element.children.length === 0) {
|
|
63534
|
+
return { nodes: selfNodes, nextOrder: orderAfterSelf };
|
|
63535
|
+
}
|
|
63536
|
+
const childAncestors = new Set(input.ancestors);
|
|
63537
|
+
childAncestors.add(element);
|
|
63538
|
+
const childOutput = flattenList(element.children, {
|
|
63539
|
+
ancestors: childAncestors,
|
|
63540
|
+
screen: input.screen,
|
|
63541
|
+
startOrder: orderAfterSelf
|
|
63542
|
+
});
|
|
63543
|
+
return {
|
|
63544
|
+
nodes: [...selfNodes, ...childOutput.nodes],
|
|
63545
|
+
nextOrder: childOutput.nextOrder
|
|
63546
|
+
};
|
|
63547
|
+
}
|
|
63548
|
+
function flatten(elements, screen) {
|
|
63549
|
+
return flattenList(elements, {
|
|
63550
|
+
ancestors: /* @__PURE__ */ new Set(),
|
|
63551
|
+
screen,
|
|
63552
|
+
startOrder: 0
|
|
63553
|
+
}).nodes;
|
|
63554
|
+
}
|
|
63555
|
+
function blocksTapPoint(target, candidate) {
|
|
63556
|
+
return frameContainsPoint(candidate.frame, frameCenter(target.frame));
|
|
63557
|
+
}
|
|
63558
|
+
function fullyCoversBoundingBox(target, candidate) {
|
|
63559
|
+
const targetArea = frameArea(target.frame);
|
|
63560
|
+
if (targetArea <= 0) {
|
|
63561
|
+
return false;
|
|
63562
|
+
}
|
|
63563
|
+
return intersectionArea(target.frame, candidate.frame) / targetArea >= FULL_BBOX_CONTAINMENT_RATIO;
|
|
63564
|
+
}
|
|
63565
|
+
function isOccluder(target, candidate) {
|
|
63566
|
+
if (candidate.treeOrder <= target.treeOrder) {
|
|
63567
|
+
return false;
|
|
63568
|
+
}
|
|
63569
|
+
if (candidate.ancestors.has(target.element)) {
|
|
63570
|
+
return false;
|
|
63571
|
+
}
|
|
63572
|
+
if (target.ancestors.has(candidate.element)) {
|
|
63573
|
+
return false;
|
|
63574
|
+
}
|
|
63575
|
+
return blocksTapPoint(target, candidate) || fullyCoversBoundingBox(target, candidate);
|
|
63576
|
+
}
|
|
63577
|
+
function isOccluded(target, nodes) {
|
|
63578
|
+
return nodes.some((candidate) => isOccluder(target, candidate));
|
|
63579
|
+
}
|
|
63580
|
+
function detectOccludedElements(elements, screen) {
|
|
63581
|
+
const nodes = flatten(elements, screen);
|
|
63582
|
+
const occludedElements = nodes.filter((target) => isOccluded(target, nodes)).map((node) => node.element);
|
|
63583
|
+
return new Set(occludedElements);
|
|
63584
|
+
}
|
|
63487
63585
|
function resolveLabel(element) {
|
|
63488
63586
|
return element.AXLabel ?? element.AXValue ?? "";
|
|
63489
63587
|
}
|
|
@@ -63509,15 +63607,27 @@ function resolveClippingTags(frame, screen) {
|
|
|
63509
63607
|
}
|
|
63510
63608
|
return tags.length > 0 ? ` ${tags.join(" ")}` : "";
|
|
63511
63609
|
}
|
|
63512
|
-
function formatElement(element,
|
|
63610
|
+
function formatElement(element, context) {
|
|
63513
63611
|
const type2 = resolveType(element);
|
|
63514
63612
|
const label = resolveLabel(element);
|
|
63515
63613
|
const frame = element.frame ?? { x: 0, y: 0, width: 0, height: 0 };
|
|
63516
63614
|
const cx = Math.round(frame.x + frame.width / 2);
|
|
63517
63615
|
const cy = Math.round(frame.y + frame.height / 2);
|
|
63518
63616
|
const state = element.enabled === false ? " [disabled]" : "";
|
|
63519
|
-
const clipping = resolveClippingTags(frame, screen);
|
|
63520
|
-
|
|
63617
|
+
const clipping = resolveClippingTags(frame, context.screen);
|
|
63618
|
+
const occluded = context.occluded.has(element) ? ` ${OCCLUDED_BY_OVERLAP_TAG}` : "";
|
|
63619
|
+
return `[${type2}] "${label}" at (${String(cx)}, ${String(cy)}) size ${String(Math.round(frame.width))}x${String(Math.round(frame.height))}${state}${clipping}${occluded}`;
|
|
63620
|
+
}
|
|
63621
|
+
function collectPrunedOccluded(list, query) {
|
|
63622
|
+
return list.flatMap((element) => {
|
|
63623
|
+
const inViewport = element.frame !== void 0 && isInViewport(element.frame, query.screen);
|
|
63624
|
+
const self2 = inViewport && query.occluded.has(element) && !query.visible.has(element) ? [element] : [];
|
|
63625
|
+
const children = element.children ? collectPrunedOccluded(element.children, query) : [];
|
|
63626
|
+
return [...self2, ...children];
|
|
63627
|
+
});
|
|
63628
|
+
}
|
|
63629
|
+
function findPrunedOccluded(query) {
|
|
63630
|
+
return collectPrunedOccluded(query.elements, query);
|
|
63521
63631
|
}
|
|
63522
63632
|
function formatAccessibilityElements(elements) {
|
|
63523
63633
|
const app = elements.find((element) => element.type === "Application");
|
|
@@ -63525,7 +63635,12 @@ function formatAccessibilityElements(elements) {
|
|
|
63525
63635
|
const screenHeight = app?.frame?.height ?? DEFAULT_SCREEN_HEIGHT;
|
|
63526
63636
|
const screen = { width: screenWidth, height: screenHeight };
|
|
63527
63637
|
const visible = collectElements(elements, screen);
|
|
63528
|
-
const
|
|
63638
|
+
const visibleSet = new Set(visible);
|
|
63639
|
+
const occluded = detectOccludedElements(elements, screen);
|
|
63640
|
+
const context = { screen, occluded };
|
|
63641
|
+
const prunedOccluded = findPrunedOccluded({ elements, visible: visibleSet, occluded, screen });
|
|
63642
|
+
const renderable = [...visible, ...prunedOccluded];
|
|
63643
|
+
const elementList = renderable.length === 0 ? "No elements found." : renderable.map((element) => formatElement(element, context)).join("\n");
|
|
63529
63644
|
const appName = app?.AXLabel;
|
|
63530
63645
|
return appName ? `Running app: ${appName}
|
|
63531
63646
|
|
|
@@ -63713,7 +63828,7 @@ function createListAppsTool(udid = "booted") {
|
|
|
63713
63828
|
}
|
|
63714
63829
|
var DEFAULT_LONG_PRESS_DURATION_MS = 500;
|
|
63715
63830
|
var MIN_PLAUSIBLE_LONG_PRESS_DURATION_MS = 100;
|
|
63716
|
-
var DEFAULT_SWIPE_DURATION_MS =
|
|
63831
|
+
var DEFAULT_SWIPE_DURATION_MS = 300;
|
|
63717
63832
|
var MIN_PLAUSIBLE_SWIPE_DURATION_MS = 50;
|
|
63718
63833
|
var MS_PER_SECOND = 1e3;
|
|
63719
63834
|
var ENTER_KEY_CODE = "0x28";
|
|
@@ -63836,13 +63951,13 @@ function createLongPressTool(udid = "booted") {
|
|
|
63836
63951
|
}
|
|
63837
63952
|
var DURATION_DESCRIPTION2 = `Gesture duration in milliseconds. Default ${String(
|
|
63838
63953
|
DEFAULT_SWIPE_DURATION_MS
|
|
63839
|
-
)}ms (flick) works for
|
|
63954
|
+
)}ms (controlled flick) works for most lists and avoids overshoot on medium-density content. Examples: duration 500 = 0.5 seconds, duration 1000 = 1 second. Velocity = distance / duration - raise duration at fixed distance to slow the gesture and reduce momentum. Raise to 500-800ms for slow controlled scrolling on long lists; lower to 100-150ms for a fast flick when long-distance scroll is desired. Values under ${String(
|
|
63840
63955
|
MIN_PLAUSIBLE_SWIPE_DURATION_MS
|
|
63841
63956
|
)}ms almost always indicate a unit mistake (seconds passed instead of milliseconds).`;
|
|
63842
63957
|
var DELTA_DESCRIPTION = "Pixel distance between interpolated touch points along the swipe path. Smaller values (e.g. 5) produce a denser event stream - smoother motion and more controllable stop-velocity, recommended when combining with a raised duration to tame long-list overshoot. Larger values produce coarser strokes. Omit to use idb defaults.";
|
|
63843
63958
|
var TOOL_DESCRIPTION2 = `Swipe on the screen from one point to another. Duration is in milliseconds (default ${String(
|
|
63844
63959
|
DEFAULT_SWIPE_DURATION_MS
|
|
63845
|
-
)}ms, a flick). Examples: duration 500 = 0.5 seconds, duration 1000 = 1 second.
|
|
63960
|
+
)}ms, a controlled flick). Examples: duration 500 = 0.5 seconds, duration 1000 = 1 second. The default duration suits most scrolling, sheet dismissal, and paging; shorten to 100-150ms when you need a long-distance fast flick; raise to 500+ for slow controlled drag (reorder, pan). For long lists where the default overshoots: shorten swipe distance AND raise duration; optionally lower delta for denser touch events and a more controllable stop. Do not pass seconds (e.g. 0.5) - that would swipe for less than a millisecond.`;
|
|
63846
63961
|
var SWIPE_SCHEMA = {
|
|
63847
63962
|
x_start: external_exports.number(),
|
|
63848
63963
|
y_start: external_exports.number(),
|
|
@@ -63887,7 +64002,7 @@ function buildSuccessText2(input) {
|
|
|
63887
64002
|
MIN_PLAUSIBLE_SWIPE_DURATION_MS
|
|
63888
64003
|
)}ms - this is almost certainly a unit mistake. The duration parameter is in milliseconds; use ${String(
|
|
63889
64004
|
DEFAULT_SWIPE_DURATION_MS
|
|
63890
|
-
)}ms for
|
|
64005
|
+
)}ms for the default controlled flick and 500-800ms for slow drag (e.g. duration 500 = 0.5 seconds).`;
|
|
63891
64006
|
}
|
|
63892
64007
|
return base;
|
|
63893
64008
|
}
|
|
@@ -74240,11 +74355,13 @@ async function runViewUiCapture(context, state) {
|
|
|
74240
74355
|
state
|
|
74241
74356
|
});
|
|
74242
74357
|
}
|
|
74243
|
-
var VIEW_UI_DESCRIPTION = `Capture current screen state:
|
|
74358
|
+
var VIEW_UI_DESCRIPTION = `Capture current screen state. This is your sole observation tool: returns a screenshot (your visual perception of the app) and an accessibility tree (interactability metadata and tap coordinates) in a single call. Use for all state observation, navigation decisions, element verification, and pre-interaction checks.
|
|
74359
|
+
|
|
74360
|
+
The screenshot is the ground truth for what screen you are on, what state the app is in, what content is visible, and what UX is happening. The a11y tree is authoritative for two questions only: "is this element interactable?" and "what tap coordinates should I use?" \u2014 never derive coordinates from the screenshot.
|
|
74244
74361
|
|
|
74245
74362
|
The result begins with a <screen_id> tag containing the current screen identifier. Use this to detect screen changes and track navigation history.
|
|
74246
74363
|
|
|
74247
|
-
|
|
74364
|
+
The \`screenshot\` tool is reserved exclusively for polling during a transient loading state to avoid incrementing the stuck-loop counter. Do not use \`screenshot\` for any other observation purpose.
|
|
74248
74365
|
|
|
74249
74366
|
IMPORTANT: Snapshot coordinates and screenshot pixels are in the same logical point space. Do not apply any scaling factor (no 2x retina adjustment).`;
|
|
74250
74367
|
var VIEW_UI_TOOL_NAME = "mcp__mobile-ios__view_ui";
|
|
@@ -74693,12 +74810,19 @@ function startAndRun(params) {
|
|
|
74693
74810
|
});
|
|
74694
74811
|
});
|
|
74695
74812
|
}
|
|
74813
|
+
var PERCEPTION_MODEL_SECTION = `## Perception Model
|
|
74814
|
+
|
|
74815
|
+
Every \`view_ui\` call returns two artifacts simultaneously:
|
|
74816
|
+
|
|
74817
|
+
- **Screenshot** \u2014 your visual perception of the app. This is the ground truth for what screen you are on, what state the app is in, what content is visible, and what UX is happening. Reason from the screenshot first when answering "what is the app showing me right now?"
|
|
74818
|
+
- **A11y tree** \u2014 metadata about that visual reality. It is authoritative for two questions only: "is this element interactable?" and "what tap coordinates should I use?" Never derive coordinates from the screenshot, even when the screenshot appears to show an element clearly.
|
|
74819
|
+
|
|
74820
|
+
Precedence: the screenshot governs comprehension of screen identity, state, and content. The a11y tree governs interactability and coordinates. These domains do not overlap \u2014 there is no scenario where the screenshot overrides a11y-sourced coordinates, and there is no scenario where the a11y tree overrides screenshot-sourced understanding of what the app is showing.`;
|
|
74696
74821
|
var TOOL_SELECTION_SECTION = `## Tool Selection
|
|
74697
74822
|
|
|
74698
|
-
- \`view_ui\` \u2014 returns
|
|
74699
|
-
- \`screenshot\` \u2014
|
|
74700
|
-
-
|
|
74701
|
-
- \`screenshot\` calls do not emit a \`<screen_id>\` and do not advance the stuck-loop counter; if screen identity tracking matters, use \`view_ui\``;
|
|
74823
|
+
- \`view_ui\` \u2014 your sole observation tool; returns a screenshot (visual ground truth) and an a11y tree (interactability metadata and tap coordinates) in one call; use for all state observation, navigation decisions, element verification, and pre-interaction checks
|
|
74824
|
+
- \`screenshot\` \u2014 loading polls only; use exclusively while waiting for a transient loading state to resolve, to avoid false stuck-loop counter increments; do not use \`screenshot\` for any other observation purpose \u2014 see LOADING_STATE_RULE
|
|
74825
|
+
- \`screenshot\` calls do not emit a \`<screen_id>\` and do not advance the stuck-loop counter`;
|
|
74702
74826
|
var DEV_ENVIRONMENT_SECTION = `## Environment
|
|
74703
74827
|
|
|
74704
74828
|
This is a development build. Debug overlays and internal messages are expected artifacts \u2014 do not report them as findings.`;
|
|
@@ -74711,9 +74835,8 @@ At every reasoning step, maintain a mental ledger:
|
|
|
74711
74835
|
|
|
74712
74836
|
Consult the ledger before every action. Always prefer navigating to a QUEUE screen over a VISITED one.`;
|
|
74713
74837
|
var SESSION_START_RULE = `Before taking any other action \u2014 including initializing the Working State ledger or emitting findings \u2014 call \`view_ui\` once to observe the starting screen`;
|
|
74714
|
-
var POST_ACTION_OBSERVE_RULE = `After any action, observe the screen before deciding next step
|
|
74715
|
-
var
|
|
74716
|
-
var BACK_NAV_RULE = `After navigating forward to any new screen: attempt to return to the expected parent in PATH \u2014 consult App Knowledge first for the correct exit gesture on this screen, then try in order: (1) any visible back/close button, (2) OS back gesture, (3) swipe up, (4) swipe down, (5) swipe left, (6) swipe right \u2014 confirm return via \`screenshot\` if the parent is visually unambiguous, \`view_ui\` otherwise \u2014 only after ALL attempts fail emit a \`back-nav-failure\` finding, then navigate forward again to continue`;
|
|
74838
|
+
var POST_ACTION_OBSERVE_RULE = `After any action, call \`view_ui\` to observe the resulting screen state before deciding the next step. Exception: if the screen is in a transient loading state, use \`screenshot\` to poll \u2014 see LOADING_STATE_RULE.`;
|
|
74839
|
+
var BACK_NAV_RULE = `After navigating forward to any new screen: attempt to return to the expected parent in PATH \u2014 consult App Knowledge first for the correct exit gesture on this screen, then try in order: (1) any visible back/close button, (2) OS back gesture, (3) swipe up, (4) swipe down, (5) swipe left, (6) swipe right \u2014 confirm return via \`view_ui\` \u2014 only after ALL attempts fail emit a \`back-nav-failure\` finding, then navigate forward again to continue`;
|
|
74717
74840
|
var QUEUE_FIRST_RULE = `Before selecting any action, prefer navigating to a QUEUE screen over re-exploring a VISITED one`;
|
|
74718
74841
|
var STUCK_LOOP_RULE = `Stuck loop \u2014 emit a \`stuck-loop\` finding when any of these signals occur:
|
|
74719
74842
|
(1) \`view_ui\` returns the same \`<screen_id>\` across 3 or more consecutive \`view_ui\` calls
|
|
@@ -74736,34 +74859,49 @@ Example: if tab bar positions \`Tokens(-31) ETH(65) ... Tron(352)\` are unchange
|
|
|
74736
74859
|
Notes:
|
|
74737
74860
|
- \`screenshot\`-only calls do not update the stuck-loop counter; only \`view_ui\` calls count
|
|
74738
74861
|
- Zero-delta scroll stall is not a separate finding type \u2014 report as \`stuck-loop\``;
|
|
74739
|
-
var LOADING_STATE_RULE = `Transient loading state: when the screen shows spinners, skeleton screens, progress bars, "Loading..." text, or placeholder content NOT described in spec or app context \u2014 use \`screenshot\` to poll for resolution (up to 3 retries);
|
|
74862
|
+
var LOADING_STATE_RULE = `Transient loading state: when the screen shows spinners, skeleton screens, progress bars, "Loading..." text, or placeholder content NOT described in spec or app context \u2014 use \`screenshot\` to poll for resolution (up to 3 retries); \`screenshot\` is used here specifically to avoid incrementing the stuck-loop counter during intentional wait cycles, not because it provides different visual information. Call \`view_ui\` on the final check or whenever you are ready to act. If loading persists after 3 retries, proceed with what is visible. If spec or app context explicitly describes a loading screen as a step, skip polling \u2014 call \`view_ui\` and assert normally.`;
|
|
74740
74863
|
var EXPECTED_CONTENT_MISSING_RULE = `Expected content missing: when \`view_ui\` shows no loading indicator yet omits an element named or strongly implied by spec or app context \u2014 and its absence is not semantically consistent with the current screen \u2014 call \`wait_seconds\` with 2\u20135 seconds and retry \`view_ui\` up to 2 times; if element remains absent, emit a \`missing-content\` finding stating what was expected and what was observed`;
|
|
74741
|
-
var CLIPPED_ELEMENT_RULE = `Never tap an element tagged \`[clipped-top]\`, \`[clipped-bottom]\`, \`[clipped-left]\`, or \`[clipped-right]\` \u2014 scroll to fully reveal it first, then re-call \`view_ui\` before tapping
|
|
74864
|
+
var CLIPPED_ELEMENT_RULE = `Never tap an element tagged \`[clipped-top]\`, \`[clipped-bottom]\`, \`[clipped-left]\`, or \`[clipped-right]\` \u2014 scroll to fully reveal it first, then re-call \`view_ui\` before tapping. Only the explicit \`[clipped-*]\` tag in the a11y tree triggers this rule. Do NOT infer clipping from coordinate proximity to viewport edges (a low y-coord does not imply \`[clipped-top]\`).`;
|
|
74742
74865
|
var SCROLL_FOLD_RULE = `Scrollable lists: elements outside the visible viewport are absent from the a11y tree by design \u2014 this applies to elements below the fold in vertical lists AND elements clipped off-left or off-right in horizontal lists \u2014 scroll or swipe in the appropriate axis to reveal before asserting presence or absence; never emit a finding solely because list items, rows, or tabs are missing from the tree on a scrollable screen; if swipe attempts yield no position change across 2+ cycles, apply the scroll-stall path in STUCK_LOOP_RULE.`;
|
|
74866
|
+
var COORDINATE_SOURCE_RULE = `Never derive tap coordinates from the screenshot. Coordinates are authoritative only from the a11y tree returned by \`view_ui\`. Snapshot coordinates and screenshot pixels are in the same logical point space \u2014 no scaling factor is required. If an element is visible in the screenshot but absent from the a11y tree, apply A11Y_FALLBACK_RULE \u2014 do not estimate its position from visual layout.`;
|
|
74867
|
+
var GHOST_A11Y_ELEMENT_RULE = `Ghost a11y element: an element is a ghost when EITHER of these holds:
|
|
74868
|
+
(1) it is tagged \`${OCCLUDED_BY_OVERLAP_TAG}\` in the a11y tree (deterministic detector flagged it as covered by a later-z-order non-ancestor element whose frame either contains the target's tap-point center or fully covers its bbox). The detector is conservative \u2014 absence of the tag does NOT prove the element is not occluded; criterion (2) still applies, OR
|
|
74869
|
+
(2) the a11y tree reports an element AND the screenshot at that element's coordinates shows visibly different UI (a different layer, a different screen, no visible element at all). This includes the case where the a11y tree contains elements from two contradictory layers (e.g. a "USDC on ETH Network" modal AND a "USDC on SOL Network" modal at the same time, when only one can be visually present).
|
|
74870
|
+
|
|
74871
|
+
CRITICAL \u2014 finding-emission is mandatory and must happen FIRST:
|
|
74872
|
+
- The instant you observe ANY a11y/screenshot mismatch (criterion 1 or 2), STOP planning gestures.
|
|
74873
|
+
- Emit a \`ghost-a11y-element\` finding via \`report_finding\` BEFORE attempting any recovery. The finding must state: (a) what you intended to tap, (b) the a11y element's reported coordinates and label, (c) whether the \`${OCCLUDED_BY_OVERLAP_TAG}\` tag was present, (d) what the screenshot shows at those coordinates instead.
|
|
74874
|
+
- Recovery attempts WITHOUT first emitting the finding are a rule violation. The mismatch IS the bug \u2014 silently working around it loses the signal.
|
|
74875
|
+
- Do NOT tap a ghost element's reported coordinates. Even if the a11y label matches your intent, tapping at those coordinates will hit the visible layer's element at the same point, not the ghost.
|
|
74876
|
+
|
|
74877
|
+
After the finding is emitted, attempt to surface the correct layer in this order: (1) any visible close/X button on the blocking layer, (2) swipe down (sheet dismiss), (3) OS back gesture, (4) swipe up. Call a fresh \`view_ui\` after each recovery attempt before retrying the original tap. If the same overlap recurs after all recovery attempts, emit a separate \`stuck-modal\` finding for the visible layer that is blocking access.`;
|
|
74743
74878
|
var A11Y_FALLBACK_RULE = `Missing a11y element \u2014 if you intend to tap or interact with a UI element and that element is absent from the most recent \`view_ui\` a11y tree, emit a \`missing-a11y-element\` finding immediately, then continue: in freestyle mode keep exploring other reachable screens; in spec mode advance to the next step.
|
|
74744
74879
|
|
|
74745
74880
|
The finding must state:
|
|
74746
|
-
(1) your intent (what you were trying to do)
|
|
74747
|
-
(2) the approximate visual region where the element appeared (
|
|
74748
|
-
(3) nearby labeled elements
|
|
74881
|
+
(1) your intent (what you were trying to do), in user-visible terms (e.g. "tap the Send button on the Portfolio screen")
|
|
74882
|
+
(2) the approximate visual region where the element appeared \u2014 name the screen and describe its location in words (e.g. "top-right of the Receive sheet"), not pixel coordinates; any coordinates referenced here are descriptive only and must NOT be used as a tap target (see COORDINATE_SOURCE_RULE)
|
|
74883
|
+
(3) nearby labeled elements that serve as landmarks \u2014 use their on-screen labels
|
|
74884
|
+
|
|
74885
|
+
When writing the \`description\` field, follow Description Style: never paste raw coordinates, hex addresses, screen IDs, or accessibility tree excerpts.
|
|
74749
74886
|
|
|
74750
74887
|
Rules:
|
|
74751
|
-
- Visible in the screenshot does NOT imply interactable; the a11y tree is authoritative
|
|
74752
|
-
- do NOT
|
|
74753
|
-
- do NOT attempt any pixel-based tap
|
|
74888
|
+
- Visible in the screenshot does NOT imply interactable; the a11y tree is authoritative for interactability and coordinates
|
|
74889
|
+
- COORDINATE_SOURCE_RULE applies; do NOT attempt any pixel-based tap
|
|
74754
74890
|
- do NOT retry at different coordinates
|
|
74755
74891
|
- do NOT long-press or swipe in the element's visual region as a fallback
|
|
74756
74892
|
- a failed pixel tap is never an \`interaction-regression\` \u2014 it is a \`missing-a11y-element\``;
|
|
74893
|
+
var FREESTYLE_ANTI_RATIONALIZATION_RULE = `Reframe-by-substitution check: triggers ONLY when you have stated an explicit prior intent \u2014 verbatim in your reasoning \u2014 to interact with element X to achieve goal Y, and the observed UI does NOT contain X performing Y. If in that situation you find yourself reasoning "the [different element] is functioning as the [intended Y]" or "this is just a different way of doing [intended action]" rather than observing the literal X performing Y, do NOT mark the goal achieved. Emit a \`spec-deviation\` finding stating: (a) your prior intent verbatim, (b) what the screenshot shows instead, (c) the reframing reasoning verbatim. This rule does NOT trigger on benign UI variation (button label "Continue" vs "Next" with the same effect) \u2014 only on substituting a different element/affordance for the one originally intended. Lighter than spec-mode ANTI_RATIONALIZATION_RULE because freestyle has no spec outcome text; the trigger is the agent's own prior intent statement.`;
|
|
74757
74894
|
var PLATFORM_FIRST_RUN_RULE = `OS permission and platform dialogs on fresh install are normal platform behavior, not app bugs \u2014 this includes: iOS notification permission ("Would Like to Send You Notifications"), iOS Face ID / Touch ID enrollment, iOS App Tracking Transparency, iOS "Allow Paste" prompts, Android runtime permission dialogs (camera, microphone, contacts, location, storage), and Android biometric prompts \u2014 when such a dialog appears while executing a step, dismiss it via the appropriate button (Allow, Don't Allow, OK, or OS back), then retry the action that triggered it; only emit \`spec-deviation\` if, after dismissing the dialog AND retrying the action, the expected screen or outcome still does not appear \u2014 do NOT emit any finding on the dialog itself.`;
|
|
74758
74895
|
var COMMON_RULE_BULLETS = [
|
|
74759
74896
|
SESSION_START_RULE,
|
|
74760
74897
|
POST_ACTION_OBSERVE_RULE,
|
|
74761
|
-
NO_REDUNDANT_CAPTURE_RULE,
|
|
74762
74898
|
BACK_NAV_RULE,
|
|
74763
74899
|
QUEUE_FIRST_RULE,
|
|
74764
74900
|
STUCK_LOOP_RULE,
|
|
74765
74901
|
LOADING_STATE_RULE,
|
|
74766
74902
|
EXPECTED_CONTENT_MISSING_RULE,
|
|
74903
|
+
COORDINATE_SOURCE_RULE,
|
|
74904
|
+
GHOST_A11Y_ELEMENT_RULE,
|
|
74767
74905
|
A11Y_FALLBACK_RULE,
|
|
74768
74906
|
CLIPPED_ELEMENT_RULE,
|
|
74769
74907
|
SCROLL_FOLD_RULE
|
|
@@ -74777,12 +74915,38 @@ Write the description (what you saw vs. expected, where, when) before committing
|
|
|
74777
74915
|
- LOW \u2014 speculative; only include when freestyle/low-confidence triggers require it`;
|
|
74778
74916
|
var FINDING_TAXONOMY_SECTION = `## Finding Types
|
|
74779
74917
|
|
|
74780
|
-
You may emit only these trigger types: \`back-nav-failure\`, \`dead-end\`, \`stuck-modal\`, \`stuck-loop\`, \`missing-a11y-element\`, \`missing-content\`, \`spec-deviation\`, \`destructive-only-exit\`. Do NOT emit \`design-system-violation\`, \`motion-regression\`, \`continuity-regression\`, \`interaction-regression\`, or \`loading-regression\` \u2014 those belong to other agents.
|
|
74918
|
+
You may emit only these trigger types: \`back-nav-failure\`, \`dead-end\`, \`stuck-modal\`, \`stuck-loop\`, \`missing-a11y-element\`, \`ghost-a11y-element\`, \`missing-content\`, \`spec-deviation\`, \`destructive-only-exit\`. Do NOT emit \`design-system-violation\`, \`motion-regression\`, \`continuity-regression\`, \`interaction-regression\`, or \`loading-regression\` \u2014 those belong to other agents.
|
|
74781
74919
|
|
|
74782
74920
|
${CONFIDENCE_RUBRIC_SECTION}`;
|
|
74921
|
+
var DESCRIPTION_STYLE_SECTION = `## Description Style
|
|
74922
|
+
|
|
74923
|
+
The \`description\` field is read by a QA tester who has not seen your reasoning. Write so they can reproduce and triage without internal context.
|
|
74924
|
+
|
|
74925
|
+
Required:
|
|
74926
|
+
- One short sentence first: the user action plus the observed problem, in product terms (use the on-screen labels of buttons, screens, and modals)
|
|
74927
|
+
- Add a second sentence if you can state the probable cause in product terms without implementation guessing (e.g. "modal stacked behind another modal", "address did not change after picking the network")
|
|
74928
|
+
- Past tense, declarative, plain English
|
|
74929
|
+
- Keep to 1\u20132 sentences
|
|
74930
|
+
|
|
74931
|
+
Forbidden:
|
|
74932
|
+
- Internal jargon: \`a11y tree\`, \`view_ui\`, \`screen_id\`, \`ghost element\`, \`occluded-by-overlap\`, \`clipped-*\`, \`stuck-loop\`, tool names
|
|
74933
|
+
- PATH notation (e.g. \`Home > Settings > Privacy\`), screen IDs, internal element tags
|
|
74934
|
+
- Pixel coordinates, element positions, hex offsets, raw addresses, technical IDs
|
|
74935
|
+
- First-person narration ("I tapped\u2026", "Let me try\u2026"), tool-call traces, reasoning steps
|
|
74936
|
+
- Speculation about implementation ("rendering issue", "z-index", "layering bug") \u2014 describe the user-visible effect instead
|
|
74937
|
+
|
|
74938
|
+
Example 1 \u2014 overlapping modals:
|
|
74939
|
+
- Bad: \`After selecting Solana from the network picker while viewing the ETH Receive screen, the a11y tree shows SOL Network confirmation elements ("SOL NETWORK" text at (201,319) ...) but the screenshot still visually displays the ETH Network Receive screen with address 0xb30...\`
|
|
74940
|
+
- Good: \`Picking SOL in the network selector did not change the receive address. The SOL confirmation modal appeared behind the network picker modal instead of replacing the ETH receive screen.\`
|
|
74941
|
+
|
|
74942
|
+
Example 2 \u2014 dead end:
|
|
74943
|
+
- Bad: \`view_ui shows no elements matching 'Back' or 'Close' on screen_id=privacy_settings_0; PATH is Home > Settings > Privacy; OS back gesture and swipe down/up/left/right all returned the same screen_id\`
|
|
74944
|
+
- Good: \`The Privacy Settings screen had no way to exit. Tapping the back area and swiping in every direction did not navigate away.\``;
|
|
74783
74945
|
var REPORTING_FINDINGS_BASE = `## Reporting Findings
|
|
74784
74946
|
|
|
74785
|
-
CRITICAL: When you observe a finding, call \`report_finding\` IMMEDIATELY \u2014 before taking any further actions. Do not batch findings. Do not wait until the end of the run. Each \`report_finding\` call atomically records one finding with the current screen attached; the server captures the screenshot. Do not pass screenshot paths or step indices. If you are uncertain whether something warrants a finding, do not report it \u2014 \`report_finding\` is for confirmed observations only
|
|
74947
|
+
CRITICAL: When you observe a finding, call \`report_finding\` IMMEDIATELY \u2014 before taking any further actions. Do not batch findings. Do not wait until the end of the run. Each \`report_finding\` call atomically records one finding with the current screen attached; the server captures the screenshot. Do not pass screenshot paths or step indices. If you are uncertain whether something warrants a finding, do not report it \u2014 \`report_finding\` is for confirmed observations only.
|
|
74948
|
+
|
|
74949
|
+
${DESCRIPTION_STYLE_SECTION}`;
|
|
74786
74950
|
function buildReportFindingSection(scenarioId) {
|
|
74787
74951
|
if (scenarioId === void 0) {
|
|
74788
74952
|
return REPORTING_FINDINGS_BASE;
|
|
@@ -74812,7 +74976,11 @@ function buildEnvSection(buildEnv3) {
|
|
|
74812
74976
|
|
|
74813
74977
|
${DEV_ENVIRONMENT_SECTION}` : "";
|
|
74814
74978
|
}
|
|
74815
|
-
var FREESTYLE_RULE_BULLETS = [
|
|
74979
|
+
var FREESTYLE_RULE_BULLETS = [
|
|
74980
|
+
...COMMON_RULE_BULLETS,
|
|
74981
|
+
PLATFORM_FIRST_RUN_RULE,
|
|
74982
|
+
FREESTYLE_ANTI_RATIONALIZATION_RULE
|
|
74983
|
+
];
|
|
74816
74984
|
var FREESTYLE_RULES_SECTION = buildRulesSection2(FREESTYLE_RULE_BULLETS);
|
|
74817
74985
|
var WHAT_TO_TEST_SECTION = `## What to Test
|
|
74818
74986
|
|
|
@@ -74856,6 +75024,8 @@ function buildFreestyleBody({
|
|
|
74856
75024
|
|
|
74857
75025
|
${contextBlock}
|
|
74858
75026
|
|
|
75027
|
+
${PERCEPTION_MODEL_SECTION}
|
|
75028
|
+
|
|
74859
75029
|
${TOOL_SELECTION_SECTION}
|
|
74860
75030
|
|
|
74861
75031
|
${FREESTYLE_RULES_SECTION}
|
|
@@ -74881,7 +75051,7 @@ var FREESTYLE_TEMPLATE = (options2) => {
|
|
|
74881
75051
|
const reportingSection = buildReportFindingSection(scenarioId);
|
|
74882
75052
|
return buildFreestyleBody({ contextBlock, environmentSection, reportingSection });
|
|
74883
75053
|
};
|
|
74884
|
-
var OUTCOME_LITERAL_RULE = `When verifying a step outcome or assertion, interpret all quantifiers literally and apply them exhaustively. Any keyword that imposes a universal or count-bound constraint \u2014 including but not limited to \`only\`, \`all\`, \`every\`, \`each\`, \`both\`, \`no\`, \`none\`, \`neither\`, \`exactly N\`, \`at least N\`, \`fewer than N\`, \`more than N\` \u2014 a single counter-example observed
|
|
75054
|
+
var OUTCOME_LITERAL_RULE = `When verifying a step outcome or assertion, interpret all quantifiers literally and apply them exhaustively. Any keyword that imposes a universal or count-bound constraint \u2014 including but not limited to \`only\`, \`all\`, \`every\`, \`each\`, \`both\`, \`no\`, \`none\`, \`neither\`, \`exactly N\`, \`at least N\`, \`fewer than N\`, \`more than N\` \u2014 a single counter-example observed via \`view_ui\` constitutes a failed constraint.
|
|
74885
75055
|
|
|
74886
75056
|
Scope:
|
|
74887
75057
|
- Applies only when the outcome text contains a universal or count-bound quantifier
|
|
@@ -74897,10 +75067,10 @@ On violation: if one item violates the constraint, emit \`spec-deviation\` immed
|
|
|
74897
75067
|
Precedence: when the counter-evidence is an element absent from the a11y tree, A11Y_FALLBACK_RULE determines the finding type (\`missing-a11y-element\`). OUTCOME_LITERAL_RULE applies only to observed-but-unwanted elements.`;
|
|
74898
75068
|
var ANTI_RATIONALIZATION_RULE = `During outcome verification, monitor your own reasoning for reconciliation hypotheses. A reconciliation hypothesis is any reasoning that re-frames, redefines, or reinterprets the observed counter-example or target class in order to produce agreement with the spec outcome \u2014 regardless of phrasing. Treat such reasoning as a deviation signal, not a resolution: stop, do NOT mark the step complete, and emit \`spec-deviation\` with: (a) the literal outcome text, (b) the specific observation that triggered the hypothesis, (c) the reconciliation reasoning itself verbatim.
|
|
74899
75069
|
|
|
74900
|
-
Attestation: before marking any
|
|
75070
|
+
Attestation: before marking any step complete where an explicit \`\u2192 outcome\` is present in the spec step, state explicitly in your reasoning: \`No reconciliation hypothesis generated. Counter-examples found: [list or none].\` If you cannot make that statement honestly, a hypothesis exists \u2014 emit \`spec-deviation\`.
|
|
74901
75071
|
|
|
74902
75072
|
Ambiguity: when outcome verification is ambiguous, first re-verify via a fresh \`view_ui\` and re-evaluate against the outcome text. If still ambiguous after re-verification, emit \`spec-deviation\` citing the ambiguity \u2014 silence is not a pass, and marking the step complete without explicit evaluation does not qualify.`;
|
|
74903
|
-
var SPEC_ASSERTION_RULE = `Each item in \`**Assertions**\` is a mandatory pass/fail check \u2014 verify using \`view_ui
|
|
75073
|
+
var SPEC_ASSERTION_RULE = `Each item in \`**Assertions**\` is a mandatory pass/fail check \u2014 verify using \`view_ui\`; the screenshot embedded in the response is your visual evidence and the a11y tree confirms element presence and attributes. If the result cannot confirm the assertion, emit a \`spec-deviation\` finding based on what is observable.`;
|
|
74904
75074
|
var SPEC_PASSIVE_BREAKAGE_RULE = `Flag crash dialogs, unexpected system errors, or navigation failures that occur as a direct result of executing a spec step; if you observe a visibly broken element in passing while navigating, note it without interacting with it`;
|
|
74905
75075
|
var SPEC_RULE_BULLETS = [
|
|
74906
75076
|
...COMMON_RULE_BULLETS,
|
|
@@ -74927,7 +75097,7 @@ Each step has this shape:
|
|
|
74927
75097
|
<intent> [\u2192 <outcome>] [hint: <advisory>]
|
|
74928
75098
|
|
|
74929
75099
|
- The intent phrase is your goal. Achieve it by any reasonable UI path.
|
|
74930
|
-
- If an outcome state is present, it is your verification target. After acting,
|
|
75100
|
+
- If an outcome state is present, it is your verification target. After acting, call \`view_ui\` to confirm the outcome \u2014 the embedded screenshot verifies visual transitions and the a11y tree verifies element state. If no outcome is given, proceed when the action succeeds.
|
|
74931
75101
|
- A hint is advisory only. Prefer an element matching the hint, but if no literal match exists, use intent and visual context to select the best candidate. Never fail a step solely because a hint label is absent.
|
|
74932
75102
|
- Infer element role (primary action, secondary action, dismissal) from visual hierarchy, position, and hint text. Authors do not specify role.
|
|
74933
75103
|
- If no element satisfies the intent after exhausting visible UI, emit a \`spec-deviation\` finding and halt that step.`;
|
|
@@ -74956,6 +75126,8 @@ function buildSpecModeBody({
|
|
|
74956
75126
|
|
|
74957
75127
|
${contextBlock}
|
|
74958
75128
|
|
|
75129
|
+
${PERCEPTION_MODEL_SECTION}
|
|
75130
|
+
|
|
74959
75131
|
${TOOL_SELECTION_SECTION}
|
|
74960
75132
|
|
|
74961
75133
|
${SPEC_RULES_SECTION}
|
|
@@ -94353,7 +94525,7 @@ function buildProgram(options2) {
|
|
|
94353
94525
|
|
|
94354
94526
|
// src/index.ts
|
|
94355
94527
|
process.title = "xqa";
|
|
94356
|
-
var version2 = `${"5.
|
|
94528
|
+
var version2 = `${"5.5.0"}${false ? ` (dev build +${"432b4b3"})` : ""}`;
|
|
94357
94529
|
var program2 = buildProgram({ version: version2 });
|
|
94358
94530
|
void program2.parseAsync(process.argv);
|
|
94359
94531
|
/*! Bundled license information:
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@exodus/xqa",
|
|
3
|
-
"version": "5.
|
|
3
|
+
"version": "5.5.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=22"
|
|
@@ -36,8 +36,8 @@
|
|
|
36
36
|
"@qa-agents/mobile-ios": "0.0.0",
|
|
37
37
|
"@qa-agents/pipeline": "0.0.0",
|
|
38
38
|
"@qa-agents/planner": "0.0.0",
|
|
39
|
-
"@qa-agents/shared": "0.0.0",
|
|
40
39
|
"@qa-agents/triager": "0.0.0",
|
|
40
|
+
"@qa-agents/shared": "0.0.0",
|
|
41
41
|
"@qa-agents/typescript-config": "0.0.0"
|
|
42
42
|
},
|
|
43
43
|
"dependencies": {
|