@wa008/ui-audit-mcp 2.2.2 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +4 -5
  2. package/dist/src/evaluation/checklist.js +67 -67
  3. package/dist/src/evaluation/checklist.js.map +1 -1
  4. package/dist/src/evaluation/state-machine.d.ts +43 -0
  5. package/dist/src/evaluation/state-machine.d.ts.map +1 -0
  6. package/dist/src/evaluation/state-machine.js +79 -0
  7. package/dist/src/evaluation/state-machine.js.map +1 -0
  8. package/dist/src/index.d.ts +6 -6
  9. package/dist/src/index.js +12 -15
  10. package/dist/src/index.js.map +1 -1
  11. package/dist/src/logger/audit-log.d.ts.map +1 -1
  12. package/dist/src/logger/audit-log.js +2 -0
  13. package/dist/src/logger/audit-log.js.map +1 -1
  14. package/dist/src/tools/evaluate.d.ts +41 -0
  15. package/dist/src/tools/evaluate.d.ts.map +1 -0
  16. package/dist/src/tools/evaluate.js +87 -0
  17. package/dist/src/tools/evaluate.js.map +1 -0
  18. package/dist/src/tools/get-audit-status.js +19 -19
  19. package/dist/src/tools/get-audit-status.js.map +1 -1
  20. package/dist/src/tools/swipe.js +1 -1
  21. package/dist/src/tools/swipe.js.map +1 -1
  22. package/dist/src/tools/take-screenshot.js +1 -1
  23. package/dist/src/tools/take-screenshot.js.map +1 -1
  24. package/dist/src/tools/tap.js +1 -1
  25. package/dist/src/tools/tap.js.map +1 -1
  26. package/dist/src/types.d.ts +2 -0
  27. package/dist/src/types.d.ts.map +1 -1
  28. package/package.json +1 -1
  29. package/dist/src/tools/get-evaluation-criteria.d.ts +0 -15
  30. package/dist/src/tools/get-evaluation-criteria.d.ts.map +0 -1
  31. package/dist/src/tools/get-evaluation-criteria.js +0 -29
  32. package/dist/src/tools/get-evaluation-criteria.js.map +0 -1
  33. package/dist/src/tools/submit-dimension-score.d.ts +0 -27
  34. package/dist/src/tools/submit-dimension-score.d.ts.map +0 -1
  35. package/dist/src/tools/submit-dimension-score.js +0 -35
  36. package/dist/src/tools/submit-dimension-score.js.map +0 -1
package/README.md CHANGED
@@ -54,16 +54,15 @@ npm install
54
54
  | `launch_app` | Launch app by Bundle ID |
55
55
  | `take_screenshot` | Capture screen & track evaluation step |
56
56
  | `tap` / `swipe` | Interact with UI (ratio coordinates), screenshot, and track step |
57
- | `get_evaluation_criteria` | Get dimensions and scoring rubrics (Prompts) |
58
- | `submit_dimension_score` | Submit score (1-10) for a single dimension on a tracked step |
57
+ | `evaluate` | Unified evaluation: get dimension prompt (initial) or submit score + get next (submit mode) |
59
58
  | `get_audit_status` | View missing evaluations dashboard or final markdown report |
60
59
 
61
60
  ## Typical Workflow
62
61
  1. `launch_app("com.example.app")`
63
62
  2. `take_screenshot("MyTestCase", 1, "Verify Initial Screen")` or `tap(0.5, 0.5, "MyTestCase", 2, "Click Login")`
64
- 3. `get_evaluation_criteria("overlap")` → Read the prompt for overlap
65
- 4. Agent analyzes UI → `submit_dimension_score("MyTestCase", 1, "overlap", 9, "No overlap")`
66
- 5. Repeat 3-4 until all dimensions are evaluated for the step
63
+ 3. `evaluate("MyTestCase", 1)` → Returns the first dimension prompt + `evaluationToken`
64
+ 4. Agent analyzes UI → `evaluate("MyTestCase", 1, token, 9, "No overlap")` → Records score, returns next dimension prompt + new token
65
+ 5. Repeat step 4 until all 5 dimensions are evaluated
67
66
  6. `get_audit_status(["MyTestCase"])` → Output the full markdown report
68
67
 
69
68
  ## Data
@@ -3,98 +3,98 @@ export const DIMENSIONS = [
3
3
  {
4
4
  id: "overlap",
5
5
  name: "Element Overlap & Safe Areas",
6
- description: "You are a user-centric QA inspector evaluating visual interference and interactive crowding.\n" +
7
- "Evaluate the screen from the perspective of a real human user trying to read and tap efficiently.\n" +
6
+ description: "You are evaluating this screen as a real user trying to read content and tap buttons efficiently.\n" +
8
7
  "Assume there ARE overlap issues until you have visually scanned the entire screen.\n\n" +
9
8
  "CHECK FOR VISUAL INTERFERENCE & COGNITIVE NOISE:\n" +
10
- "1. SYSTEM EDGE INTERFERENCE (CRITICAL): Scan the very top, bottom, and corners. Do any app elements (e.g., buttons, avatars, titles) visually mix with system indicators (time, battery, notch, home indicator)? If an app button sits awkwardly close to system icons, it creates cognitive confusion and accidental click risks. This is a CRITICAL BUG.\n" +
11
- "2. INTERACTIVE CROWDING: Are touch targets (buttons, links) so close to each other or to the screen edges that a user with a large thumb might accidentally trigger the wrong action or a system gesture?\n" +
12
- "3. TEXT & CONTENT CLASH: Do any app elements (text, images, floating buttons) visually collide with or bleed into each other, making them harder to read or recognize?\n" +
13
- "4. MODAL/POPUP OBSTRUCTION: If a popup or sheet is present, does it awkwardly obscure essential context that the user still needs to see?\n\n" +
14
- "SCORING ATTITUDE: Focus on the HUMAN visual experience. If an element 'feels' like it's stepping on another element's toes or invading system space, it is an overlap failure. Punish visual crowding heavily. If unsure, score LOW.\n" +
15
- "NOTE: Do NOT evaluate logical spacing consistency (layout), colors (style), or text meaning (info_clarity).",
16
- scoringGuide: "0-2: Severe visual interference. Elements clearly collide (e.g. app button invading system status bar area), causing immediate cognitive confusion or severe accidental click risks.\n" +
17
- "3-4: Elements don't perfectly collide, but are awkwardly close to each other or to system edges, creating noticeable visual noise or crowding.\n" +
18
- "5-6: All elements are separate, but touch targets might be a bit too close for comfort. Risk of fat-finger errors.\n" +
19
- "7-8: Clean separation. No visual interference. App UI respects system boundaries and provides clear touch targets.\n" +
20
- "9-10: Perfect logical grouping, generous touch targets, and completely unambiguous visual separation over the entire screen.",
9
+ "1. SYSTEM EDGE INTERFERENCE (CRITICAL): Scan the very top, bottom, and corners of the screen. Do any app elements (buttons, icons, avatars, titles) visually touch, overlap with, or sit uncomfortably close to system indicators (time, battery, signal, notch/Dynamic Island, home indicator bar)? If a user might confuse an app element with a system element, or accidentally tap the wrong one, this is a CRITICAL BUG.\n" +
10
+ "2. INTERACTIVE CROWDING: Are any two tappable elements (buttons, links, cards) so close together that a user with a normal-sized finger might accidentally tap the wrong one? Are touch targets dangerously close to screen edges where system gestures occur?\n" +
11
+ "3. CONTENT COLLISION: Do any visible elements (text, images, floating buttons, badges) visually overlap each other, making any of them harder to read, recognize, or tap? Two elements occupying the same visual space is always a bug.\n" +
12
+ "4. MODAL/POPUP OBSTRUCTION: If a popup, sheet, or overlay is present, does it cover essential information that the user still needs to see, such as the content they are acting upon?\n\n" +
13
+ "SCORING ATTITUDE: Focus on what a real user would EXPERIENCE. If something 'feels' too close or visually confusing, it is an overlap failure. When in doubt, score LOW.\n" +
14
+ "NOTE: Do NOT evaluate spacing consistency (layout), color choices (style), or text meaning (info_clarity).",
15
+ scoringGuide: "0-2: Severe visual collision. Elements clearly overlap each other or invade system areas (e.g., a button sitting on top of the status bar), causing immediate confusion or making the interface unusable.\n" +
16
+ "3-4: Elements don't directly collide, but are awkwardly close to each other or to system edges, creating noticeable visual noise or high accidental-tap risk.\n" +
17
+ "5-6: All elements are visually separate, but some touch targets feel uncomfortably close. A careful user would be fine, but a hurried user might mis-tap.\n" +
18
+ "7-8: Clean separation throughout. No visual interference. The app clearly respects system boundaries and provides comfortable touch targets.\n" +
19
+ "9-10: Perfect separation generous spacing between all interactive elements, completely unambiguous visual boundaries across the entire screen.",
21
20
  },
22
21
  {
23
22
  id: "layout",
24
23
  name: "Layout & Spatial Balance",
25
- description: "You are a strict QA inspector for layout quality. Your job is to find spatial problems.\n" +
24
+ description: "You are evaluating this screen as a real user who expects a well-organized, visually balanced interface.\n" +
26
25
  "Assume the layout has issues until you verify otherwise.\n\n" +
27
26
  "CHECK EACH ASPECT:\n" +
28
- "1. LETTERBOXING: Does the app appear to run inside a smaller box with black/empty bars? This is score 0, no exceptions.\n" +
29
- "2. SCREEN UTILIZATION: Is there a large empty area that serves no purpose? Is content crammed into only half the screen while the rest is wasted?\n" +
30
- "3. ALIGNMENT: Pick any two similar elements (e.g., two cards, two labels). Are their left edges aligned? Are the gaps between them equal? Misalignment = deduction.\n" +
31
- "4. SPACING: Are gaps between elements consistent? Measure visually: if card A-to-B gap looks different from card B-to-C gap, that is a problem.\n" +
32
- "5. RESPONSIVE FIT: Does the layout feel designed for this screen size, or does it look like it was designed for a different device?\n\n" +
33
- "SCORING ATTITUDE: Do not be generous. A 'basically fine' layout is a 6, not an 8.\n" +
34
- "NOTE: Do NOT evaluate overlap (that is overlap), colors (that is style), or text meaning (that is info_clarity).",
35
- scoringGuide: "0-2: Letterboxing detected, or layout is severely broken (elements piled up, massive empty areas, content off-screen).\n" +
36
- "3-4: Major layout issues clearly uneven spacing, significant misalignment, or large wasted areas.\n" +
37
- "5-6: Layout is functional but has noticeable imperfections some uneven spacing or minor alignment issues. This is the 'OK but not great' range.\n" +
27
+ "1. LETTERBOXING: Does the app appear to run inside a smaller box with black or empty bars on any side? This is score 0, no exceptions.\n" +
28
+ "2. SCREEN UTILIZATION: Is there a large empty area that serves no purpose? Is content crammed into only part of the screen while the rest is wasted? A user should feel the app was designed for this exact screen size.\n" +
29
+ "3. ALIGNMENT: Pick any two similar elements (e.g., two cards, two labels). Are their edges aligned? Are the gaps between them equal? Visible misalignment = deduction.\n" +
30
+ "4. SPACING CONSISTENCY: Are gaps between elements consistent? If the space between Card A and Card B looks visibly different from the space between Card B and Card C, that is a problem.\n" +
31
+ "5. RESPONSIVE FIT: Does the layout feel natural for this screen size, or does it look stretched, squished, or designed for a different device?\n" +
32
+ "6. CONTENT OVERFLOW: Does any content look like it has outgrown its container? Look for: text that wraps unexpectedly causing one card to be noticeably taller than its neighbor in a grid, content that appears cut off at a container edge, or scrollable areas that clip content in a way that confuses the user about whether more content exists.\n\n" +
33
+ "SCORING ATTITUDE: Do not be generous. A layout that 'looks fine at a glance' but has visible imperfections upon closer inspection is a 6, not an 8.\n" +
34
+ "NOTE: Do NOT evaluate element collision (that is overlap), color choices (that is style), or text meaning (that is info_clarity).",
35
+ scoringGuide: "0-2: Letterboxing detected, or layout is severely broken (elements piled on top of each other, massive unusable empty areas, content pushed off-screen).\n" +
36
+ "3-4: Major layout issuesclearly uneven spacing, significant misalignment, or large wasted areas that make the app feel unfinished.\n" +
37
+ "5-6: Layout is functional but has noticeable imperfections — some uneven spacing, minor alignment issues, or one area that feels out of balance.\n" +
38
38
  "7-8: Well-structured layout with only very minor imperfections that require careful inspection to notice.\n" +
39
39
  "9-10: Flawless — pixel-perfect alignment, perfectly consistent spacing, optimal screen utilization. Extremely rare.",
40
40
  },
41
41
  {
42
42
  id: "info_clarity",
43
43
  name: "Information Clarity & Readability",
44
- description: "You are a strict QA inspector for information clarity. Your job is to find confusing elements.\n" +
45
- "Imagine you are a FIRST-TIME user of this app who has never seen it before.\n\n" +
44
+ description: "You are evaluating this screen as a FIRST-TIME user who has never seen this app before.\n" +
45
+ "Your goal is to determine how quickly and easily a new user could understand this screen.\n\n" +
46
46
  "CHECK EACH ASPECT:\n" +
47
- "1. FIRST IMPRESSION: Within 2 seconds of looking at this screen, can you tell what it does and what the main action is? If not, that is a problem.\n" +
48
- "2. BUTTON LABELS: Read every button/link text. Would a first-time user understand what each does? 'Submit' is clearer than a bare icon.\n" +
49
- "3. HIERARCHY: Is the most important element (title, CTA) visually the most prominent? Or is secondary info competing for attention?\n" +
50
- "4. TEXT TRUNCATION: Is any text cut off with '...' or overflowing? Even one truncated label = deduction.\n" +
51
- "5. CONTRAST: Is all text easily readable against its background? Check small/light text especially.\n" +
52
- "6. ICON MEANING: Are there any icons without labels that could be ambiguous?\n\n" +
53
- "SCORING ATTITUDE: If any single element is confusing, cap the score at 7. " +
54
- "Multiple unclear elements = score 5 or below.\n" +
55
- "NOTE: Do NOT evaluate spacing (layout), overlap (overlap), or color harmony (style).",
56
- scoringGuide: "0-2: Screen purpose is unclear, CTA is missing/hidden, or labels are severely misleading.\n" +
57
- "3-4: Core purpose is guessable but requires effort. Multiple ambiguous labels or icons.\n" +
58
- "5-6: Generally understandable but has 1-2 confusing elements (ambiguous icon, truncated text, weak hierarchy).\n" +
59
- "7-8: Clear and well-organized with only one very minor issue. First-time user would understand immediately.\n" +
60
- "9-10: Crystal clear — perfect hierarchy, unambiguous labels, no truncation, excellent contrast. Every element self-explanatory.",
47
+ "1. FIRST IMPRESSION: Within 2 seconds of looking at this screen, can you tell what it does and what the main action is? If it takes effort to figure out the screen's purpose, that is a problem.\n" +
48
+ "2. BUTTON & LABEL CLARITY: Read every button, link, and label. Would a first-time user immediately understand what each one does? Vague labels like 'Go' or unlabeled icon-only buttons are problematic.\n" +
49
+ "3. VISUAL HIERARCHY: Is the most important element (page title, primary action button) visually the most prominent? Or do secondary elements compete for the user's attention?\n" +
50
+ "4. TEXT COMPLETENESS: Is any text cut off with '...' (truncation), overflowing its container, or so small that a user would struggle to read it? Even one truncated label that hides meaningful information = deduction.\n" +
51
+ "5. READABILITY & CONTRAST: Can all text be comfortably read against its background? Pay special attention to small text, light-colored text on light backgrounds, or text placed over images.\n" +
52
+ "6. ICON MEANING: Are there any icons without text labels whose meaning might be ambiguous to a new user? A trash can icon is universally understood; a custom abstract icon is not.\n\n" +
53
+ "SCORING ATTITUDE: If any single element would make a first-time user pause and wonder 'what does this do?', cap the score at 7. Multiple confusing elements = score 5 or below.\n" +
54
+ "NOTE: Do NOT evaluate spacing (layout), visual collision (overlap), or color harmony (style).",
55
+ scoringGuide: "0-2: The screen's purpose is unclear, the primary action is missing or hidden, or labels are severely misleading.\n" +
56
+ "3-4: The core purpose is guessable but requires effort. Multiple labels or icons are ambiguous.\n" +
57
+ "5-6: Generally understandable but 1-2 elements are confusing (ambiguous icon, truncated text hiding important info, weak visual hierarchy).\n" +
58
+ "7-8: Clear and well-organized with only one very minor issue. A first-time user would understand the screen immediately.\n" +
59
+ "9-10: Crystal clear perfect hierarchy, every label self-explanatory, no truncation, excellent contrast. A user of any background would instantly understand this screen.",
61
60
  },
62
61
  {
63
62
  id: "style",
64
63
  name: "Visual Style & Consistency",
65
- description: "You are a strict QA inspector for visual design consistency. Your job is to find inconsistencies.\n" +
66
- "Compare every element against every other element of the same type.\n\n" +
64
+ description: "You are evaluating this screen as a user who expects a polished, professionally designed app.\n" +
65
+ "Your job is to find visual inconsistencies and rendering defects that make the app look unfinished.\n\n" +
67
66
  "CHECK EACH ASPECT:\n" +
68
- "1. COLOR PALETTE: Count the number of distinct colors used. Do they feel intentional and harmonious, or random?\n" +
69
- "2. TYPOGRAPHY: Are there more than 2 font sizes that don't follow a clear scale? Mixed font weights without reason?\n" +
70
- "3. COMPONENT STYLING: Compare all buttons same corner radius? Same padding? Compare all cards same shadow, border, background?\n" +
71
- "4. ICONOGRAPHY: Are all icons the same style (outline vs filled, same stroke width)? Mixed styles = deduction.\n" +
72
- "5. POLISH: Does it look professionally designed or like a prototype? Placeholder content (e.g., 'Lorem ipsum', stock icons) = major deduction.\n\n" +
73
- "SCORING ATTITUDE: Consistency is binary per element — if two buttons look different, they are inconsistent, period.\n" +
74
- "NOTE: Do NOT evaluate text meaning (info_clarity), spatial layout (layout), or overlap (overlap).",
75
- scoringGuide: "0-2: No design system clashing colors, random fonts, inconsistent components everywhere.\n" +
76
- "3-4: Some design intent visible but multiple clear inconsistencies (different button styles, mixed icon sets).\n" +
77
- "5-6: Basic consistency exists but with noticeable deviations. 'Mostly OK' = 6 at best.\n" +
78
- "7-8: Strong consistency with only 1 minor deviation requiring careful inspection.\n" +
79
- "9-10: Pixel-perfect design system adherence. Every element cohesive. Extremely rare.",
67
+ "1. COLOR PALETTE: Count the number of distinct colors used. Do they feel intentional and harmonious, or random and clashing?\n" +
68
+ "2. TYPOGRAPHY: Are font sizes used consistently? Are there mixed font weights that don't follow a clear hierarchy? More than 2-3 font sizes without a clear reason is a problem.\n" +
69
+ "3. COMPONENT CONSISTENCY: Compare all elements of the same type. Do all buttons have the same shape, size, and styling? Do all cards have the same shadows, borders, and corner radius? Any difference between similar components = deduction.\n" +
70
+ "4. ICONOGRAPHY: Are all icons visually consistent (all outline style OR all filled style, consistent stroke width and sizing)? Mixing styles within the same screen = deduction.\n" +
71
+ "5. POLISH & FINISH: Does the app look professionally designed, or does it feel like an unfinished prototype? Placeholder content (e.g., 'Lorem ipsum', default system icons used inappropriately) = major deduction.\n" +
72
+ "6. RENDERING INTEGRITY: Inspect each UI element individually for visual defects. Look for: a visible ring, halo, or unintended border of a different color around an element (e.g., a white circle visible behind a colored circular button this means the element was not built cleanly), mismatched shapes within a single component, clipping artifacts that cut off part of an icon or image, or any visual glitch that makes a component look broken. Even 1-2 pixels of visible defect = deduction.\n\n" +
73
+ "SCORING ATTITUDE: Consistency is binary if two buttons of the same type look different, they are inconsistent, period. A single rendering defect on any element caps the score at 7.\n" +
74
+ "NOTE: Do NOT evaluate text meaning (info_clarity), spatial arrangement (layout), or element collision (overlap).",
75
+ scoringGuide: "0-2: No design system clashing colors, random fonts, inconsistent components everywhere, or multiple rendering defects.\n" +
76
+ "3-4: Some design intent visible but multiple clear inconsistencies (different button styles, mixed icon sets, visible rendering glitches).\n" +
77
+ "5-6: Basic consistency exists but with noticeable deviations or at least one obvious rendering defect.\n" +
78
+ "7-8: Strong consistency with only 1 minor deviation or subtle defect requiring careful inspection.\n" +
79
+ "9-10: Pixel-perfect design system adherence. Every element cohesive and cleanly rendered. Extremely rare.",
80
80
  },
81
81
  {
82
82
  id: "action_result",
83
83
  name: "Action Result Verification",
84
- description: "You are a strict QA inspector verifying whether the action actually worked.\n" +
85
- "The step's expectedOutcome describes what SHOULD have happened. Compare the screenshot against it.\n\n" +
84
+ description: "You are evaluating whether the action the user just performed actually produced the expected result.\n" +
85
+ "The step's expectedOutcome describes what SHOULD have happened. Compare the current screenshot against it.\n\n" +
86
86
  "CHECK EACH ASPECT:\n" +
87
- "1. SCREEN TRANSITION: If the action was a navigation tap, did the correct destination screen appear? Wrong screen = score 0.\n" +
88
- "2. STATE CHANGE: If the action should toggle/add/remove something, is that change visually confirmed in the screenshot?\n" +
89
- "3. ERROR PRESENCE: Are there any error dialogs, crash screens, or infinite loading spinners? These indicate failure.\n" +
90
- "4. NO RESPONSE: Does the screen look identical to before the action? If so, the action likely missed or failed.\n\n" +
91
- "SCORING ATTITUDE: If the expectedOutcome is not clearly achieved, score LOW. Partial success is still a problem.\n" +
92
- "If this step is a pure observation (no expectedOutcome), score 10.",
93
- scoringGuide: "0-2: Action clearly failed — wrong screen, error dialog, crash, or no UI response at all.\n" +
94
- "3-4: Action had some effect but the result does not match expectedOutcome (e.g., navigated to wrong tab).\n" +
95
- "5-6: Action partially achieved the goal but with unexpected side effects or missing expected elements.\n" +
96
- "7-8: Action succeeded — the expected screen/state is present but with minor discrepancies.\n" +
97
- "9-10: Action fully succeeded — screenshot exactly matches expectedOutcome. Or no expectedOutcome (pure observation).",
87
+ "1. CORRECT DESTINATION: If the user tapped a navigation element, did the correct screen appear? Landing on the wrong screen = score 0.\n" +
88
+ "2. VISIBLE STATE CHANGE: If the action should have added, removed, toggled, or updated something, is that change clearly visible in the screenshot? The user should be able to confirm the action worked.\n" +
89
+ "3. ERROR INDICATORS: Are there any error dialogs, crash screens, blank white screens, or infinite loading spinners? These all indicate the action failed.\n" +
90
+ "4. NO VISIBLE RESPONSE: Does the screen look identical to before the action was performed? If nothing changed, the action likely failed or missed its target.\n\n" +
91
+ "SCORING ATTITUDE: If the expectedOutcome is not clearly achieved in the screenshot, score LOW. Partial success is still a problem — the user expects the action to fully work.\n" +
92
+ "If this step is a pure observation with no expectedOutcome specified, score 10.",
93
+ scoringGuide: "0-2: Action clearly failed — wrong screen, error dialog, crash, blank screen, or no visible response at all.\n" +
94
+ "3-4: Action had some effect but the result does not match the expectedOutcome (e.g., navigated to the wrong tab, wrong data displayed).\n" +
95
+ "5-6: Action partially achieved the goal but with unexpected side effects, missing expected elements, or incomplete state changes.\n" +
96
+ "7-8: Action succeeded — the expected screen or state is present but with minor discrepancies from the expectedOutcome.\n" +
97
+ "9-10: Action fully succeeded — the screenshot clearly and completely matches the expectedOutcome. Or this is a pure observation step with no expectedOutcome.",
98
98
  },
99
99
  ];
100
100
  export const REQUIRED_DIMS = DIMENSIONS.map(d => d.id);
@@ -1 +1 @@
1
- {"version":3,"file":"checklist.js","sourceRoot":"","sources":["../../../src/evaluation/checklist.ts"],"names":[],"mappings":"AAEA,+DAA+D;AAC/D,MAAM,CAAC,MAAM,UAAU,GAAoB;IACvC;QACI,EAAE,EAAE,SAAS;QACb,IAAI,EAAE,8BAA8B;QACpC,WAAW,EACP,gGAAgG;YAChG,qGAAqG;YACrG,wFAAwF;YACxF,oDAAoD;YACpD,8VAA8V;YAC9V,6MAA6M;YAC7M,0KAA0K;YAC1K,+IAA+I;YAC/I,wOAAwO;YACxO,6GAA6G;QACjH,YAAY,EACR,wLAAwL;YACxL,kJAAkJ;YAClJ,sHAAsH;YACtH,sHAAsH;YACtH,8HAA8H;KACrI;IACD;QACI,EAAE,EAAE,QAAQ;QACZ,IAAI,EAAE,0BAA0B;QAChC,WAAW,EACP,2FAA2F;YAC3F,8DAA8D;YAC9D,sBAAsB;YACtB,2HAA2H;YAC3H,qJAAqJ;YACrJ,uKAAuK;YACvK,mJAAmJ;YACnJ,yIAAyI;YACzI,qFAAqF;YACrF,kHAAkH;QACtH,YAAY,EACR,0HAA0H;YAC1H,uGAAuG;YACvG,qJAAqJ;YACrJ,6GAA6G;YAC7G,qHAAqH;KAC5H;IACD;QACI,EAAE,EAAE,cAAc;QAClB,IAAI,EAAE,mCAAmC;QACzC,WAAW,EACP,kGAAkG;YAClG,iFAAiF;YACjF,sBAAsB;YACtB,sJAAsJ;YACtJ,2IAA2I;YAC3I,uIAAuI;YACvI,4GAA4G;YAC5G,uGAAuG;YACvG,kFAAkF;YAClF,4EAA4E;YAC5E,iDAAiD;YACjD,sFAAsF;QAC1F,YAAY,EACR,6FAA6F;YAC7F,2FAA2F;YAC3F,kHAAkH;YAClH,+GAA+G;YAC/G,iIAAiI;KACxI;IACD;QACI,EAAE,EAAE,OAAO;QACX,IAAI,EAAE,4BAA4B;QAClC,WAAW,EACP,qGAAqG;YACrG,yEAAyE;YACzE,sBAAsB;YACtB,mHAAmH;YACnH,uHAAuH;YACvH,sIAAsI;YACtI,kHAAkH;YAClH,oJAAoJ;YACpJ,uHAAuH;YACvH,mGAAmG;QACvG,YAAY,EACR,8FAA8F;YAC9F,kHAAkH;YAClH,0FAA0F;YAC1F,qFAAqF;YACrF,sFAAsF;KAC7F;IACD;QACI,EAAE,EAAE,eAAe;QACnB,IAAI,EAAE,4BAA4B;QAClC,WAAW,EACP,+EAA+E;YAC/E,wGAAwG;YACxG,sBAAsB;YACtB,gIAAgI;YAChI,2HAA2H;YAC3H,wHAAwH;YACxH,qHAAqH;YACrH,oHAAoH;YACpH,oEAAoE;QACxE,YAAY,EACR,6FAA6F;YAC7F,6GAA6G;YAC7G,0GAA0G;YAC1G,8FAA8F;YAC9F,sHAAsH;KAC7H;CACJ,CAAC;AAEF,MAAM,CAAC,MAAM,aAAa,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AACvD,MAAM,CAAC,MAAM,aAAa,GAAG,CAAC,CAAC"}
1
+ {"version":3,"file":"checklist.js","sourceRoot":"","sources":["../../../src/evaluation/checklist.ts"],"names":[],"mappings":"AAEA,+DAA+D;AAC/D,MAAM,CAAC,MAAM,UAAU,GAAoB;IACvC;QACI,EAAE,EAAE,SAAS;QACb,IAAI,EAAE,8BAA8B;QACpC,WAAW,EACP,qGAAqG;YACrG,wFAAwF;YACxF,oDAAoD;YACpD,iaAAia;YACja,kQAAkQ;YAClQ,2OAA2O;YAC3O,2LAA2L;YAC3L,2KAA2K;YAC3K,4GAA4G;QAChH,YAAY,EACR,6MAA6M;YAC7M,iKAAiK;YACjK,6JAA6J;YAC7J,gJAAgJ;YAChJ,kJAAkJ;KACzJ;IACD;QACI,EAAE,EAAE,QAAQ;QACZ,IAAI,EAAE,0BAA0B;QAChC,WAAW,EACP,4GAA4G;YAC5G,8DAA8D;YAC9D,sBAAsB;YACtB,0IAA0I;YAC1I,4NAA4N;YAC5N,0KAA0K;YAC1K,6LAA6L;YAC7L,kJAAkJ;YAClJ,4VAA4V;YAC5V,uJAAuJ;YACvJ,mIAAmI;QACvI,YAAY,EACR,4JAA4J;YAC5J,yIAAyI;YACzI,oJAAoJ;YACpJ,6GAA6G;YAC7G,qHAAqH;KAC5H;IACD;QACI,EAAE,EAAE,cAAc;QAClB,IAAI,EAAE,mCAAmC;QACzC,WAAW,EACP,2FAA2F;YAC3F,+FAA+F;YAC/F,sBAAsB;YACtB,qMAAqM;YACrM,4MAA4M;YAC5M,kLAAkL;YAClL,4NAA4N;YAC5N,iMAAiM;YACjM,yLAAyL;YACzL,mLAAmL;YACnL,+FAA+F;QACnG,YAAY,EACR,qHAAqH;YACrH,mGAAmG;YACnG,+IAA+I;YAC/I,4HAA4H;YAC5H,4KAA4K;KACnL;IACD;QACI,EAAE,EAAE,OAAO;QACX,IAAI,EAAE,4BAA4B;QAClC,WAAW,EACP,iGAAiG;YACjG,yGAAyG;YACzG,sBAAsB;YACtB,gIAAgI;YAChI,oLAAoL;YACpL,kPAAkP;YAClP,oLAAoL;YACpL,wNAAwN;YACxN,ifAAif;YACjf,0LAA0L;YAC1L,kHAAkH;QACtH,YAAY,EACR,6HAA6H;YAC7H,8IAA8I;YAC9I,0GAA0G;YAC1G,sGAAsG;YACtG,2GAA2G;KAClH;IACD;QACI,EAAE,EAAE,eAAe;QACnB,IAAI,EAAE,4BAA4B;QAClC,WAAW,EACP,wGAAwG;YACxG,gHAAgH;YAChH,sBAAsB;YACtB,0IAA0I;YAC1I,6MAA6M;YAC7M,6JAA6J;YAC7J,mKAAmK;YACnK,kLAAkL;YAClL,iFAAiF;QACrF,YAAY,EACR,gHAAgH;YAChH,2IAA2I;YAC3I,qIAAqI;YACrI,0HAA0H;YAC1H,+JAA+J;KACtK;CACJ,CAAC;AAEF,MAAM,CAAC,MAAM,aAAa,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AACvD,MAAM,CAAC,MAAM,aAAa,GAAG,CAAC,CAAC"}
@@ -0,0 +1,43 @@
1
+ export declare function getPendingTaskState(caseName: string, stepIndex: number): {
2
+ completed: boolean;
3
+ message: string;
4
+ dimensionId?: undefined;
5
+ dimensionName?: undefined;
6
+ description?: undefined;
7
+ scoringGuide?: undefined;
8
+ token?: undefined;
9
+ stepIndex?: undefined;
10
+ } | {
11
+ completed: boolean;
12
+ dimensionId: string;
13
+ dimensionName: string;
14
+ description: string;
15
+ scoringGuide: string;
16
+ token: string;
17
+ stepIndex: number;
18
+ message?: undefined;
19
+ };
20
+ export declare function validateAndAdvanceState(caseName: string, stepIndex: number, token: string, score: number, reason: string): {
21
+ completedDimId: string;
22
+ score: number;
23
+ nextState: {
24
+ completed: boolean;
25
+ message: string;
26
+ dimensionId?: undefined;
27
+ dimensionName?: undefined;
28
+ description?: undefined;
29
+ scoringGuide?: undefined;
30
+ token?: undefined;
31
+ stepIndex?: undefined;
32
+ } | {
33
+ completed: boolean;
34
+ dimensionId: string;
35
+ dimensionName: string;
36
+ description: string;
37
+ scoringGuide: string;
38
+ token: string;
39
+ stepIndex: number;
40
+ message?: undefined;
41
+ };
42
+ };
43
+ //# sourceMappingURL=state-machine.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"state-machine.d.ts","sourceRoot":"","sources":["../../../src/evaluation/state-machine.ts"],"names":[],"mappings":"AAQA,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM;;;;;;;;;;;;;;;;;;EAwCtE;AAED,wBAAgB,uBAAuB,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;;;;;;;;;;;;;;;;;;;;;;EA4CxH"}
@@ -0,0 +1,79 @@
1
+ import crypto from "crypto";
2
+ import { readLog, writeLog } from "../logger/audit-log.js";
3
+ import { DIMENSIONS, REQUIRED_DIMS } from "./checklist.js";
4
+ function generateToken() {
5
+ return "tok_" + crypto.randomBytes(4).toString("hex");
6
+ }
7
+ export function getPendingTaskState(caseName, stepIndex) {
8
+ const log = readLog(caseName);
9
+ if (!log) {
10
+ throw new Error(`Test case not found: ${caseName}`);
11
+ }
12
+ const step = log.steps[stepIndex];
13
+ if (!step) {
14
+ throw new Error(`Step ${stepIndex} not found in case ${caseName}`);
15
+ }
16
+ if (step.currentDimIndex >= REQUIRED_DIMS.length) {
17
+ return {
18
+ completed: true,
19
+ message: `All ${REQUIRED_DIMS.length} dimensions for Step ${stepIndex} are fully evaluated! You may now proceed to interact with the device further (e.g. tap, swipe) or call get_audit_status.`
20
+ };
21
+ }
22
+ // Generate a token if there isn't one
23
+ if (!step.evaluationToken) {
24
+ step.evaluationToken = generateToken();
25
+ writeLog(log);
26
+ }
27
+ const currentDimId = REQUIRED_DIMS[step.currentDimIndex];
28
+ const criteria = DIMENSIONS.find(d => d.id === currentDimId);
29
+ if (!criteria) {
30
+ throw new Error(`Dimension '${currentDimId}' not found.`);
31
+ }
32
+ return {
33
+ completed: false,
34
+ dimensionId: currentDimId,
35
+ dimensionName: criteria.name,
36
+ description: criteria.description,
37
+ scoringGuide: criteria.scoringGuide,
38
+ token: step.evaluationToken,
39
+ stepIndex: stepIndex
40
+ };
41
+ }
42
+ export function validateAndAdvanceState(caseName, stepIndex, token, score, reason) {
43
+ const log = readLog(caseName);
44
+ if (!log) {
45
+ throw new Error(`Test case not found: ${caseName}`);
46
+ }
47
+ const step = log.steps[stepIndex];
48
+ if (!step) {
49
+ throw new Error(`Step ${stepIndex} not found in case ${caseName}`);
50
+ }
51
+ if (step.currentDimIndex >= REQUIRED_DIMS.length) {
52
+ throw new Error(`Step ${stepIndex} is already fully evaluated.`);
53
+ }
54
+ if (!step.evaluationToken || step.evaluationToken !== token) {
55
+ throw new Error("Invalid token. Do not guess tokens or run multiple submissions at once.");
56
+ }
57
+ const currentDimId = REQUIRED_DIMS[step.currentDimIndex];
58
+ // Record the score
59
+ step.evaluations[currentDimId] = {
60
+ score,
61
+ reason,
62
+ };
63
+ // Advance the state
64
+ step.currentDimIndex += 1;
65
+ // Clear the old token and generate a new one if not completed
66
+ if (step.currentDimIndex < REQUIRED_DIMS.length) {
67
+ step.evaluationToken = generateToken();
68
+ }
69
+ else {
70
+ step.evaluationToken = undefined;
71
+ }
72
+ writeLog(log);
73
+ return {
74
+ completedDimId: currentDimId,
75
+ score,
76
+ nextState: getPendingTaskState(caseName, stepIndex)
77
+ };
78
+ }
79
+ //# sourceMappingURL=state-machine.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"state-machine.js","sourceRoot":"","sources":["../../../src/evaluation/state-machine.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAC3D,OAAO,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAE3D,SAAS,aAAa;IAClB,OAAO,MAAM,GAAG,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;AAC1D,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,QAAgB,EAAE,SAAiB;IACnE,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC9B,IAAI,CAAC,GAAG,EAAE,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,wBAAwB,QAAQ,EAAE,CAAC,CAAC;IACxD,CAAC;IAED,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IAClC,IAAI,CAAC,IAAI,EAAE,CAAC;QACR,MAAM,IAAI,KAAK,CAAC,QAAQ,SAAS,sBAAsB,QAAQ,EAAE,CAAC,CAAC;IACvE,CAAC;IAED,IAAI,IAAI,CAAC,eAAe,IAAI,aAAa,CAAC,MAAM,EAAE,CAAC;QAC/C,OAAO;YACH,SAAS,EAAE,IAAI;YACf,OAAO,EAAE,OAAO,aAAa,CAAC,MAAM,wBAAwB,SAAS,2HAA2H;SACnM,CAAC;IACN,CAAC;IAED,sCAAsC;IACtC,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC;QACxB,IAAI,CAAC,eAAe,GAAG,aAAa,EAAE,CAAC;QACvC,QAAQ,CAAC,GAAG,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,YAAY,GAAG,aAAa,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;IACzD,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,YAAY,CAAC,CAAC;IAE7D,IAAI,CAAC,QAAQ,EAAE,CAAC;QACZ,MAAM,IAAI,KAAK,CAAC,cAAc,YAAY,cAAc,CAAC,CAAC;IAC9D,CAAC;IAED,OAAO;QACH,SAAS,EAAE,KAAK;QAChB,WAAW,EAAE,YAAY;QACzB,aAAa,EAAE,QAAQ,CAAC,IAAI;QAC5B,WAAW,EAAE,QAAQ,CAAC,WAAW;QACjC,YAAY,EAAE,QAAQ,CAAC,YAAY;QACnC,KAAK,EAAE,IAAI,CAAC,eAAe;QAC3B,SAAS,EAAE,SAAS;KACvB,CAAC;AACN,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,QAAgB,EAAE,SAAiB,EAAE,KAAa,EAAE,KAAa,EAAE,MAAc;IACrH,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC9B,IAAI,CAAC,GAAG,EAAE,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,wBAAwB,QAAQ,EAAE,CAAC,CAAC;IACxD,CAAC;IAED,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IAClC,IAAI,CAAC,IAAI,EAAE,CAAC;QACR,MAAM,IAAI,KAAK,CAAC,QAAQ,SAAS,sBAAsB,QAAQ,EAAE,CAAC,CAAC;IACvE,CAAC;IAED,IAAI,IAAI,CAAC,eAAe,IAAI,aAAa,CAAC,MAAM,EAAE,CAAC;QAC/C,MAAM,IAAI,KAAK,CAAC,QAAQ,SAAS,8BAA8B,CAAC,CAAC;IACrE,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,eAAe,IAAI,IAAI,CAAC,eAAe,KAAK,KAAK,EAAE,CAAC;QAC1D,MAAM,IAAI,KAAK,CAAC,yEAAyE,CAAC,CAAC;IAC/F,CAAC;IAED,MAAM,YAAY,GAAG,aAAa,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;IAEzD,mBAAmB;IACnB,IAAI,CAAC,WAAW,CAAC,YAAY,CAAC,GAAG;QAC7B,KAAK;QACL,MAAM;KACT,CAAC;IAEF,oBAAoB;IACpB,IAAI,CAAC,eAAe,IAAI,CAAC,CAAC;IAE1B,8DAA8D;IAC9D,IAAI,IAAI,CAAC,eAAe,GAAG,aAAa,CAAC,MAAM,EAAE,CAAC;QAC9C,IAAI,CAAC,eAAe,GAAG,aAAa,EAAE,CAAC;IAC3C,CAAC;SAAM,CAAC;QACJ,IAAI,CAAC,eAAe,GAAG,SAAS,CAAC;IACrC,CAAC;IAED,QAAQ,CAAC,GAAG,CAAC,CAAC;IAEd,OAAO;QACH,cAAc,EAAE,YAAY;QAC5B,KAAK;QACL,SAAS,EAAE,mBAAmB,CAAC,QAAQ,EAAE,SAAS,CAAC;KACtD,CAAC;AACN,CAAC"}
@@ -7,12 +7,12 @@
7
7
  * and provides a rule-based, trackable evaluation workflow.
8
8
  *
9
9
  * Evaluation workflow for each step:
10
- * 1. Perform an action (tap / swipe / take_screenshot) — this auto-takes a screenshot and registers a step.
11
- * 2. For each required dimension (overlap, layout, info_clarity, style, action_result):
12
- * a. Call get_evaluation_criteria(dimension) to read the scoring rubric.
13
- * b. Visually inspect the screenshot.
14
- * c. Call submit_dimension_score to record your score.
15
- * 3. Call get_audit_status to confirm all dimensions are complete before proceeding to the next action.
10
+ * 1. Perform an action (tap / swipe / take_screenshot) — auto-takes a screenshot and registers a step.
11
+ * 2. Call evaluate(caseName, stepIndex) returns the first dimension prompt + token.
12
+ * 3. Visually inspect the screenshot based on the prompt.
13
+ * 4. Call evaluate(caseName, stepIndex, token, score, reason) — records the score, returns the next dimension prompt + new token.
14
+ * 5. Repeat step 3-4 until all 5 dimensions are evaluated.
15
+ * 6. Call get_audit_status to get the final markdown report.
16
16
  */
17
17
  export {};
18
18
  //# sourceMappingURL=index.d.ts.map
package/dist/src/index.js CHANGED
@@ -7,12 +7,12 @@
7
7
  * and provides a rule-based, trackable evaluation workflow.
8
8
  *
9
9
  * Evaluation workflow for each step:
10
- * 1. Perform an action (tap / swipe / take_screenshot) — this auto-takes a screenshot and registers a step.
11
- * 2. For each required dimension (overlap, layout, info_clarity, style, action_result):
12
- * a. Call get_evaluation_criteria(dimension) to read the scoring rubric.
13
- * b. Visually inspect the screenshot.
14
- * c. Call submit_dimension_score to record your score.
15
- * 3. Call get_audit_status to confirm all dimensions are complete before proceeding to the next action.
10
+ * 1. Perform an action (tap / swipe / take_screenshot) — auto-takes a screenshot and registers a step.
11
+ * 2. Call evaluate(caseName, stepIndex) returns the first dimension prompt + token.
12
+ * 3. Visually inspect the screenshot based on the prompt.
13
+ * 4. Call evaluate(caseName, stepIndex, token, score, reason) — records the score, returns the next dimension prompt + new token.
14
+ * 5. Repeat step 3-4 until all 5 dimensions are evaluated.
15
+ * 6. Call get_audit_status to get the final markdown report.
16
16
  */
17
17
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
18
18
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
@@ -20,12 +20,11 @@ import { launchAppSchema, launchApp } from "./tools/launch-app.js";
20
20
  import { takeScreenshotSchema, takeScreenshot } from "./tools/take-screenshot.js";
21
21
  import { tapSchema, tap } from "./tools/tap.js";
22
22
  import { swipeSchema, swipe } from "./tools/swipe.js";
23
- import { getEvaluationCriteriaSchema, getEvaluationCriteria } from "./tools/get-evaluation-criteria.js";
24
- import { submitDimensionScoreSchema, submitDimensionScore } from "./tools/submit-dimension-score.js";
23
+ import { evaluateSchema, evaluate } from "./tools/evaluate.js";
25
24
  import { getAuditStatusSchema, getAuditStatus } from "./tools/get-audit-status.js";
26
25
  const server = new McpServer({
27
26
  name: "ui-audit-mcp",
28
- version: "2.0.0",
27
+ version: "2.3.0",
29
28
  });
30
29
  // ─── Device operation tools ────────────────────────────────
31
30
  server.tool("launch_app", "Launch an iOS app on the simulator by its bundle ID.", launchAppSchema.shape, async (args) => launchApp(args));
@@ -36,12 +35,10 @@ server.tool("tap", "Tap at a position on screen (ratio 0-1). Automatically captu
36
35
  server.tool("swipe", "Swipe on screen (ratio 0-1). Automatically captures a screenshot and registers a tracked step. " +
37
36
  "After this call, evaluate the screenshot across all required dimensions before moving to the next action.", swipeSchema.shape, async (args) => swipe(args));
38
37
  // ─── Evaluation tools ──────────────────────────────────────
39
- server.tool("get_evaluation_criteria", "Get UI evaluation dimensions and scoring rubrics. " +
40
- "Call without arguments to list all dimension IDs. Pass a dimension ID (e.g. 'overlap') to get its detailed prompt and scoring guide. " +
41
- "Required dimensions for every step: overlap, layout, info_clarity, style, action_result.", getEvaluationCriteriaSchema.shape, async (args) => getEvaluationCriteria(args));
42
- server.tool("submit_dimension_score", "Submit a score (0-10) and reason for one UI dimension on a specific step. " +
43
- "Every step requires scores for all 5 dimensions: overlap, layout, info_clarity, style, action_result. " +
44
- "After submitting, continue with the remaining dimensions, then call get_audit_status to confirm completion.", submitDimensionScoreSchema.shape, async (args) => submitDimensionScore(args));
38
+ server.tool("evaluate", "Unified evaluation tool. Two modes:\n" +
39
+ "1) INITIAL: Call with only caseName + stepIndex (omit score/reason/token) returns the first pending dimension's prompt, scoring guide, and evaluationToken.\n" +
40
+ "2) SUBMIT: Call with caseName + stepIndex + evaluationToken + score + reason → records the score, advances to the next dimension, and returns its prompt + new token (or completion message).\n" +
41
+ "Required dimensions per step: overlap, layout, info_clarity, style, action_result.", evaluateSchema.shape, async (args) => evaluate(args));
45
42
  server.tool("get_audit_status", "Get the current dashboard or test case report. " +
46
43
  "Use this to see which dimensions are missing (Pending) or to get the final markdown report if all steps are fully evaluated.", getAuditStatusSchema.shape, async (args) => getAuditStatus(args));
47
44
  import { preflight } from "./device/adapter.js";
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";AAEA;;;;;;;;;;;;;;GAcG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AAEjF,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AACnE,OAAO,EAAE,oBAAoB,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAClF,OAAO,EAAE,SAAS,EAAE,GAAG,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,EAAE,WAAW,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAE,2BAA2B,EAAE,qBAAqB,EAAE,MAAM,oCAAoC,CAAC;AACxG,OAAO,EAAE,0BAA0B,EAAE,oBAAoB,EAAE,MAAM,mCAAmC,CAAC;AACrG,OAAO,EAAE,oBAAoB,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAEnF,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC;IACzB,IAAI,EAAE,cAAc;IACpB,OAAO,EAAE,OAAO;CACnB,CAAC,CAAC;AAEH,8DAA8D;AAE9D,MAAM,CAAC,IAAI,CACP,YAAY,EACZ,sDAAsD,EACtD,eAAe,CAAC,KAAK,EACrB,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,CAClC,CAAC;AAEF,MAAM,CAAC,IAAI,CACP,iBAAiB,EACjB,qFAAqF;IACrF,2GAA2G,EAC3G,oBAAoB,CAAC,KAAK,EAC1B,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,cAAc,CAAC,IAAI,CAAC,CACvC,CAAC;AAEF,MAAM,CAAC,IAAI,CACP,KAAK,EACL,6GAA6G;IAC7G,2GAA2G,EAC3G,SAAS,CAAC,KAAK,EACf,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,CAC5B,CAAC;AAEF,MAAM,CAAC,IAAI,CACP,OAAO,EACP,iGAAiG;IACjG,2GAA2G,EAC3G,WAAW,CAAC,KAAK,EACjB,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAC9B,CAAC;AAEF,8DAA8D;AAE9D,MAAM,CAAC,IAAI,CACP,yBAAyB,EACzB,oDAAoD;IACpD,uIAAuI;IACvI,0FAA0F,EAC1F,2BAA2B,CAAC,KAAK,EACjC,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,qBAAqB,CAAC,IAAI,CAAC,CAC9C,CAAC;AAEF,MAAM,CAAC,IAAI,CACP,wBAAwB,EACxB,4EAA4E;IAC5E,wGAAwG;IACxG,6GAA6G,EAC7G,0BAA0B,CAAC,KAAK,EAChC,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,oBAAoB,CAAC,IAAI,CAAC,CAC7C,CAAC;AAEF,MAAM,CAAC,IAAI,CACP,kBAAkB,EAClB,iDAAiD;IACjD,8HAA8H,EAC9H,oBAAoB,CAAC,KAAK,EAC1B,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,cAAc,CAAC,IAAI,CAAC,CACvC,CAAC;AAEF,OAAO,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAEhD,8DAA8D;AAE9D,KAAK,UAAU,IAAI;IACf,0DAA0D;IAC1D,MAAM,KAAK,GAAG,MAAM,SAAS,EAAE,CAAC;IAChC,IAAI,CAAC,KAAK,CAAC,EAAE,EAAE,CAAC;QACZ,OAAO,CAAC,KAAK,CAAC,0CAA0C,CAAC,CAAC;QAC1D,KAAK,MAAM,CAAC,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;YAC5B,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QAC9B,CAAC;QACD,OAAO,CAAC,KAAK,CAAC,sDAAsD,CAAC,CAAC;IAC1E,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAC7C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAChC,OAAO,CAAC,KAAK,CAAC,mDAAmD,CAAC,CAAC;AACvE,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACjB,OAAO,CAAC,KAAK,CAAC,6BAA6B,EAAE,GAAG,CAAC,CAAC;IAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACpB,CAAC,CAAC,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";AAEA;;;;;;;;;;;;;;GAcG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AAEjF,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AACnE,OAAO,EAAE,oBAAoB,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAClF,OAAO,EAAE,SAAS,EAAE,GAAG,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,EAAE,WAAW,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,MAAM,qBAAqB,CAAC;AAC/D,OAAO,EAAE,oBAAoB,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAEnF,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC;IACzB,IAAI,EAAE,cAAc;IACpB,OAAO,EAAE,OAAO;CACnB,CAAC,CAAC;AAEH,8DAA8D;AAE9D,MAAM,CAAC,IAAI,CACP,YAAY,EACZ,sDAAsD,EACtD,eAAe,CAAC,KAAK,EACrB,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,CAClC,CAAC;AAEF,MAAM,CAAC,IAAI,CACP,iBAAiB,EACjB,qFAAqF;IACrF,2GAA2G,EAC3G,oBAAoB,CAAC,KAAK,EAC1B,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,cAAc,CAAC,IAAI,CAAC,CACvC,CAAC;AAEF,MAAM,CAAC,IAAI,CACP,KAAK,EACL,6GAA6G;IAC7G,2GAA2G,EAC3G,SAAS,CAAC,KAAK,EACf,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,CAC5B,CAAC;AAEF,MAAM,CAAC,IAAI,CACP,OAAO,EACP,iGAAiG;IACjG,2GAA2G,EAC3G,WAAW,CAAC,KAAK,EACjB,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAC9B,CAAC;AAEF,8DAA8D;AAE9D,MAAM,CAAC,IAAI,CACP,UAAU,EACV,uCAAuC;IACvC,iKAAiK;IACjK,iMAAiM;IACjM,oFAAoF,EACpF,cAAc,CAAC,KAAK,EACpB,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CACjC,CAAC;AAEF,MAAM,CAAC,IAAI,CACP,kBAAkB,EAClB,iDAAiD;IACjD,8HAA8H,EAC9H,oBAAoB,CAAC,KAAK,EAC1B,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,cAAc,CAAC,IAAI,CAAC,CACvC,CAAC;AAEF,OAAO,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAEhD,8DAA8D;AAE9D,KAAK,UAAU,IAAI;IACf,0DAA0D;IAC1D,MAAM,KAAK,GAAG,MAAM,SAAS,EAAE,CAAC;IAChC,IAAI,CAAC,KAAK,CAAC,EAAE,EAAE,CAAC;QACZ,OAAO,CAAC,KAAK,CAAC,0CAA0C,CAAC,CAAC;QAC1D,KAAK,MAAM,CAAC,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;YAC5B,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QAC9B,CAAC;QACD,OAAO,CAAC,KAAK,CAAC,sDAAsD,CAAC,CAAC;IAC1E,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAC7C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAChC,OAAO,CAAC,KAAK,CAAC,mDAAmD,CAAC,CAAC;AACvE,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACjB,OAAO,CAAC,KAAK,CAAC,6BAA6B,EAAE,GAAG,CAAC,CAAC;IAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACpB,CAAC,CAAC,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"audit-log.d.ts","sourceRoot":"","sources":["../../../src/logger/audit-log.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,QAAQ,EAA8B,MAAM,aAAa,CAAC;AAmBnE,wBAAgB,OAAO,CAAC,QAAQ,EAAE,MAAM,GAAG,QAAQ,GAAG,IAAI,CAMzD;AAED,wBAAgB,QAAQ,CAAC,GAAG,EAAE,QAAQ,GAAG,IAAI,CAG5C;AAED,wBAAgB,WAAW,IAAI,QAAQ,EAAE,CAexC;AAED,wBAAgB,UAAU,CACtB,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,MAAM,EACnB,UAAU,EAAE,KAAK,GAAG,OAAO,GAAG,YAAY,EAC1C,cAAc,EAAE,MAAM,EACtB,WAAW,CAAC,EAAE;IAAE,CAAC,EAAE,MAAM,CAAC;IAAC,CAAC,EAAE,MAAM,CAAA;CAAE,EACtC,eAAe,CAAC,EAAE,MAAM,GACzB,IAAI,CAuBN;AAED,wBAAgB,WAAW,CACvB,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,EACjB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,GACf,IAAI,CAiBN"}
1
+ {"version":3,"file":"audit-log.d.ts","sourceRoot":"","sources":["../../../src/logger/audit-log.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,QAAQ,EAA8B,MAAM,aAAa,CAAC;AAmBnE,wBAAgB,OAAO,CAAC,QAAQ,EAAE,MAAM,GAAG,QAAQ,GAAG,IAAI,CAMzD;AAED,wBAAgB,QAAQ,CAAC,GAAG,EAAE,QAAQ,GAAG,IAAI,CAG5C;AAED,wBAAgB,WAAW,IAAI,QAAQ,EAAE,CAexC;AAED,wBAAgB,UAAU,CACtB,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,MAAM,EACnB,UAAU,EAAE,KAAK,GAAG,OAAO,GAAG,YAAY,EAC1C,cAAc,EAAE,MAAM,EACtB,WAAW,CAAC,EAAE;IAAE,CAAC,EAAE,MAAM,CAAC;IAAC,CAAC,EAAE,MAAM,CAAA;CAAE,EACtC,eAAe,CAAC,EAAE,MAAM,GACzB,IAAI,CAyBN;AAED,wBAAgB,WAAW,CACvB,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,EACjB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,GACf,IAAI,CAiBN"}
@@ -64,6 +64,8 @@ export function recordStep(caseName, stepIndex, description, actionType, screens
64
64
  expectedOutcome,
65
65
  timestamp: new Date().toISOString(),
66
66
  evaluations: existingStep ? existingStep.evaluations : {},
67
+ currentDimIndex: existingStep && existingStep.currentDimIndex !== undefined ? existingStep.currentDimIndex : 0,
68
+ evaluationToken: existingStep ? existingStep.evaluationToken : undefined,
67
69
  };
68
70
  writeLog(log);
69
71
  }
@@ -1 +1 @@
1
- {"version":3,"file":"audit-log.js","sourceRoot":"","sources":["../../../src/logger/audit-log.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,IAAI,CAAC;AAGpB,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,EAAE,eAAe,CAAC,CAAC;AAC1D,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;AAC5C,MAAM,cAAc,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;AAE1D,2BAA2B;AAC3B,SAAS,UAAU;IACf,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC;QAAE,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC1E,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC;QAAE,EAAE,CAAC,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACxE,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,cAAc,CAAC;QAAE,EAAE,CAAC,SAAS,CAAC,cAAc,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;AAC1F,CAAC;AAED,UAAU,EAAE,CAAC;AAEb,SAAS,UAAU,CAAC,QAAgB;IAChC,OAAO,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,GAAG,QAAQ,OAAO,CAAC,CAAC;AAClD,CAAC;AAED,MAAM,UAAU,OAAO,CAAC,QAAgB;IACpC,MAAM,OAAO,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC;IACrC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QAC1B,OAAO,IAAI,CAAC;IAChB,CAAC;IACD,OAAO,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC;AACzD,CAAC;AAED,MAAM,UAAU,QAAQ,CAAC,GAAa;IAClC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IACzC,EAAE,CAAC,aAAa,CAAC,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;AACrE,CAAC;AAED,MAAM,UAAU,WAAW;IACvB,MAAM,IAAI,GAAe,EAAE,CAAC;IAC5B,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC;QAAE,OAAO,IAAI,CAAC;IACzC,MAAM,KAAK,GAAG,EAAE,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;IACtC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACvB,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;YACzB,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,EAAE,OAAO,CAAC,CAAC;YACnE,IAAI,CAAC;gBACD,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;YACnC,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACT,OAAO,CAAC,KAAK,CAAC,6BAA6B,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC;YAC1D,CAAC;QACL,CAAC;IACL,CAAC;IACD,OAAO,IAAI,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,UAAU,CACtB,QAAgB,EAChB,SAAiB,EACjB,WAAmB,EACnB,UAA0C,EAC1C,cAAsB,EACtB,WAAsC,EACtC,eAAwB;IAExB,IAAI,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC5B,IAAI,CAAC,GAAG,EAAE,CAAC;QACP,GAAG,GAAG;YACF,QAAQ;YACR,KAAK,EAAE,EAAE;SACZ,CAAC;IACN,CAAC;IAED,MAAM,YAAY,GAAG,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IAE1C,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC,GAAG;QACnB,SAAS;QACT,WAAW;QACX,UAAU;QACV,cAAc;QACd,WAAW;QACX,eAAe;QACf,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,WAAW,EAAE,YAAY,CAAC,CAAC,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE;KAC5D,CAAC;IAEF,QAAQ,CAAC,GAAG,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,WAAW,CACvB,QAAgB,EAChB,SAAiB,EACjB,SAAiB,EACjB,KAAa,EACb,MAAc;IAEd,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC9B,IAAI,CAAC,GAAG,EAAE,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,wBAAwB,QAAQ,EAAE,CAAC,CAAC;IACxD,CAAC;IAED,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IAClC,IAAI,CAAC,IAAI,EAAE,CAAC;QACR,MAAM,IAAI,KAAK,CAAC,QAAQ,SAAS,sBAAsB,QAAQ,EAAE,CAAC,CAAC;IACvE,CAAC;IAED,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,GAAG;QAC1B,KAAK;QACL,MAAM;KACT,CAAC;IAEF,QAAQ,CAAC,GAAG,CAAC,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"audit-log.js","sourceRoot":"","sources":["../../../src/logger/audit-log.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,IAAI,CAAC;AAGpB,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,EAAE,eAAe,CAAC,CAAC;AAC1D,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;AAC5C,MAAM,cAAc,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;AAE1D,2BAA2B;AAC3B,SAAS,UAAU;IACf,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC;QAAE,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC1E,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC;QAAE,EAAE,CAAC,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACxE,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,cAAc,CAAC;QAAE,EAAE,CAAC,SAAS,CAAC,cAAc,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;AAC1F,CAAC;AAED,UAAU,EAAE,CAAC;AAEb,SAAS,UAAU,CAAC,QAAgB;IAChC,OAAO,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,GAAG,QAAQ,OAAO,CAAC,CAAC;AAClD,CAAC;AAED,MAAM,UAAU,OAAO,CAAC,QAAgB;IACpC,MAAM,OAAO,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC;IACrC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QAC1B,OAAO,IAAI,CAAC;IAChB,CAAC;IACD,OAAO,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC;AACzD,CAAC;AAED,MAAM,UAAU,QAAQ,CAAC,GAAa;IAClC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IACzC,EAAE,CAAC,aAAa,CAAC,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;AACrE,CAAC;AAED,MAAM,UAAU,WAAW;IACvB,MAAM,IAAI,GAAe,EAAE,CAAC;IAC5B,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC;QAAE,OAAO,IAAI,CAAC;IACzC,MAAM,KAAK,GAAG,EAAE,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;IACtC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACvB,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;YACzB,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,EAAE,OAAO,CAAC,CAAC;YACnE,IAAI,CAAC;gBACD,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;YACnC,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACT,OAAO,CAAC,KAAK,CAAC,6BAA6B,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC;YAC1D,CAAC;QACL,CAAC;IACL,CAAC;IACD,OAAO,IAAI,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,UAAU,CACtB,QAAgB,EAChB,SAAiB,EACjB,WAAmB,EACnB,UAA0C,EAC1C,cAAsB,EACtB,WAAsC,EACtC,eAAwB;IAExB,IAAI,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC5B,IAAI,CAAC,GAAG,EAAE,CAAC;QACP,GAAG,GAAG;YACF,QAAQ;YACR,KAAK,EAAE,EAAE;SACZ,CAAC;IACN,CAAC;IAED,MAAM,YAAY,GAAG,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IAE1C,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC,GAAG;QACnB,SAAS;QACT,WAAW;QACX,UAAU;QACV,cAAc;QACd,WAAW;QACX,eAAe;QACf,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,WAAW,EAAE,YAAY,CAAC,CAAC,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE;QACzD,eAAe,EAAE,YAAY,IAAI,YAAY,CAAC,eAAe,KAAK,SAAS,CAAC,CAAC,CAAC,YAAY,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC9G,eAAe,EAAE,YAAY,CAAC,CAAC,CAAC,YAAY,CAAC,eAAe,CAAC,CAAC,CAAC,SAAS;KAC3E,CAAC;IAEF,QAAQ,CAAC,GAAG,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,WAAW,CACvB,QAAgB,EAChB,SAAiB,EACjB,SAAiB,EACjB,KAAa,EACb,MAAc;IAEd,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC9B,IAAI,CAAC,GAAG,EAAE,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,wBAAwB,QAAQ,EAAE,CAAC,CAAC;IACxD,CAAC;IAED,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IAClC,IAAI,CAAC,IAAI,EAAE,CAAC;QACR,MAAM,IAAI,KAAK,CAAC,QAAQ,SAAS,sBAAsB,QAAQ,EAAE,CAAC,CAAC;IACvE,CAAC;IAED,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,GAAG;QAC1B,KAAK;QACL,MAAM;KACT,CAAC;IAEF,QAAQ,CAAC,GAAG,CAAC,CAAC;AAClB,CAAC"}
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Tool: evaluate
3
+ *
4
+ * Unified evaluation tool that combines get_pending_task and submit_evaluation.
5
+ *
6
+ * Two modes:
7
+ * 1. Initial mode (score/reason/token omitted):
8
+ * Returns the first pending dimension's prompt, scoring guide, and a token.
9
+ *
10
+ * 2. Submit-and-advance mode (score + reason + token all provided):
11
+ * Validates the token, records the score, advances to the next dimension.
12
+ * If more dimensions remain, returns the next prompt + new token.
13
+ * If all done, returns a completion message.
14
+ */
15
+ import { z } from "zod";
16
+ export declare const evaluateSchema: z.ZodObject<{
17
+ caseName: z.ZodString;
18
+ stepIndex: z.ZodNumber;
19
+ evaluationToken: z.ZodOptional<z.ZodString>;
20
+ score: z.ZodOptional<z.ZodNumber>;
21
+ reason: z.ZodOptional<z.ZodString>;
22
+ }, "strip", z.ZodTypeAny, {
23
+ caseName: string;
24
+ stepIndex: number;
25
+ evaluationToken?: string | undefined;
26
+ score?: number | undefined;
27
+ reason?: string | undefined;
28
+ }, {
29
+ caseName: string;
30
+ stepIndex: number;
31
+ evaluationToken?: string | undefined;
32
+ score?: number | undefined;
33
+ reason?: string | undefined;
34
+ }>;
35
+ export declare function evaluate(input: z.infer<typeof evaluateSchema>): Promise<{
36
+ content: {
37
+ type: "text";
38
+ text: string;
39
+ }[];
40
+ }>;
41
+ //# sourceMappingURL=evaluate.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluate.d.ts","sourceRoot":"","sources":["../../../src/tools/evaluate.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAGxB,eAAO,MAAM,cAAc;;;;;;;;;;;;;;;;;;EAazB,CAAC;AAqBH,wBAAsB,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,OAAO,cAAc,CAAC;;;;;GAmEnE"}
@@ -0,0 +1,87 @@
1
+ /**
2
+ * Tool: evaluate
3
+ *
4
+ * Unified evaluation tool that combines get_pending_task and submit_evaluation.
5
+ *
6
+ * Two modes:
7
+ * 1. Initial mode (score/reason/token omitted):
8
+ * Returns the first pending dimension's prompt, scoring guide, and a token.
9
+ *
10
+ * 2. Submit-and-advance mode (score + reason + token all provided):
11
+ * Validates the token, records the score, advances to the next dimension.
12
+ * If more dimensions remain, returns the next prompt + new token.
13
+ * If all done, returns a completion message.
14
+ */
15
+ import { z } from "zod";
16
+ import { getPendingTaskState, validateAndAdvanceState } from "../evaluation/state-machine.js";
17
+ export const evaluateSchema = z.object({
18
+ caseName: z.string().describe("Name of the test case"),
19
+ stepIndex: z.number().int().positive().describe("Step index to evaluate"),
20
+ // Optional: omit all three for initial query, provide all three to submit
21
+ evaluationToken: z.string().optional().describe("The exact token from the previous evaluate call. Omit on the first call to get the initial dimension."),
22
+ score: z.number().int().min(0).max(10).optional().describe("Score from 0 (worst) to 10 (perfect). Required when submitting an evaluation."),
23
+ reason: z.string().optional().describe("Analysis and reason for this score. Required when submitting an evaluation."),
24
+ });
25
+ function renderDimensionPrompt(state, prefix) {
26
+ return `${prefix}\n\n` +
27
+ `🔴 CURRENT TASK: ${state.dimensionName}\n\n` +
28
+ `PROMPT: ${state.description}\n\n` +
29
+ `SCORING GUIDE: \n${state.scoringGuide}\n\n` +
30
+ `⚠️ ACTION REQUIRED:\n` +
31
+ `Inspect the screenshot specifically for this dimension. Then call \`evaluate\` with:\n` +
32
+ ` "evaluationToken": "${state.token}"\n` +
33
+ ` "score": <your score 0-10>\n` +
34
+ ` "reason": "<your analysis>"\n\n` +
35
+ `Do NOT evaluate other dimensions until you finish this one.`;
36
+ }
37
+ export async function evaluate(input) {
38
+ const hasScore = input.score !== undefined;
39
+ const hasReason = input.reason !== undefined;
40
+ const hasToken = input.evaluationToken !== undefined;
41
+ const submissionFieldCount = [hasScore, hasReason, hasToken].filter(Boolean).length;
42
+ // Validate: all three must be present or all absent
43
+ if (submissionFieldCount > 0 && submissionFieldCount < 3) {
44
+ return {
45
+ content: [{
46
+ type: "text",
47
+ text: "❌ Error: `evaluationToken`, `score`, and `reason` must ALL be provided together, or ALL be omitted.\n" +
48
+ "- Omit all three to get the first pending dimension.\n" +
49
+ "- Provide all three to submit an evaluation and advance."
50
+ }]
51
+ };
52
+ }
53
+ // ── Mode 1: Initial query (no score/reason/token) ──
54
+ if (submissionFieldCount === 0) {
55
+ const state = getPendingTaskState(input.caseName, input.stepIndex);
56
+ if (state.completed) {
57
+ return {
58
+ content: [{
59
+ type: "text",
60
+ text: state.message || "✅ All dimensions for this step are fully evaluated."
61
+ }]
62
+ };
63
+ }
64
+ const markdown = renderDimensionPrompt(state, `📋 Starting evaluation for Step ${input.stepIndex}:`);
65
+ return {
66
+ content: [{ type: "text", text: markdown }],
67
+ };
68
+ }
69
+ // ── Mode 2: Submit score and advance ──
70
+ const result = validateAndAdvanceState(input.caseName, input.stepIndex, input.evaluationToken, input.score, input.reason);
71
+ const nextState = result.nextState;
72
+ if (nextState.completed) {
73
+ return {
74
+ content: [{
75
+ type: "text",
76
+ text: `✅ Score ${result.score} recorded for [${result.completedDimId}].\n\n` +
77
+ `🎉 All dimensions for Step ${input.stepIndex} are fully evaluated!\n` +
78
+ `You may now proceed to interact with the device (tap, swipe) or call get_audit_status.`
79
+ }]
80
+ };
81
+ }
82
+ const markdown = renderDimensionPrompt(nextState, `✅ Score ${result.score} recorded for [${result.completedDimId}].\n\n➡️ Next dimension:`);
83
+ return {
84
+ content: [{ type: "text", text: markdown }],
85
+ };
86
+ }
87
+ //# sourceMappingURL=evaluate.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluate.js","sourceRoot":"","sources":["../../../src/tools/evaluate.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,mBAAmB,EAAE,uBAAuB,EAAE,MAAM,gCAAgC,CAAC;AAE9F,MAAM,CAAC,MAAM,cAAc,GAAG,CAAC,CAAC,MAAM,CAAC;IACnC,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uBAAuB,CAAC;IACtD,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,wBAAwB,CAAC;IACzE,0EAA0E;IAC1E,eAAe,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAC3C,uGAAuG,CAC1G;IACD,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CACtD,+EAA+E,CAClF;IACD,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAClC,6EAA6E,CAChF;CACJ,CAAC,CAAC;AAEH,SAAS,qBAAqB,CAAC,KAM9B,EAAE,MAAc;IACb,OAAO,GAAG,MAAM,MAAM;QAClB,oBAAoB,KAAK,CAAC,aAAa,MAAM;QAC7C,WAAW,KAAK,CAAC,WAAW,MAAM;QAClC,oBAAoB,KAAK,CAAC,YAAY,MAAM;QAC5C,uBAAuB;QACvB,wFAAwF;QACxF,yBAAyB,KAAK,CAAC,KAAK,KAAK;QACzC,gCAAgC;QAChC,mCAAmC;QACnC,6DAA6D,CAAC;AACtE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,KAAqC;IAChE,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,KAAK,SAAS,CAAC;IAC3C,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,KAAK,SAAS,CAAC;IAC7C,MAAM,QAAQ,GAAG,KAAK,CAAC,eAAe,KAAK,SAAS,CAAC;IAErD,MAAM,oBAAoB,GAAG,CAAC,QAAQ,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;IAEpF,oDAAoD;IACpD,IAAI,oBAAoB,GAAG,CAAC,IAAI,oBAAoB,GAAG,CAAC,EAAE,CAAC;QACvD,OAAO;YACH,OAAO,EAAE,CAAC;oBACN,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,uGAAuG;wBACzG,wDAAwD;wBACxD,0DAA0D;iBACjE,CAAC;SACL,CAAC;IACN,CAAC;IAED,sDAAsD;IACtD,IAAI,oBAAoB,KAAK,CAAC,EAAE,CAAC;QAC7B,MAAM,KAAK,GAAG,mBAAmB,CAAC,KAAK,CAAC,QAAQ,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;QAEnE,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;YAClB,OAAO;gBACH,OAAO,EAAE,CAAC;wBACN,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,KAAK,CAAC,OAAO,IAAI,qDAAqD;qBAC/E,CAAC;aACL,CAAC;QACN,CAAC;QAED,MAAM,QAAQ,GAAG,qBAAqB,CAAC,KAAK,EAAE,mCAAmC,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC;QACrG,OAAO;YACH,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;SACvD,CAAC;IACN,CAAC;IAED,yCAAyC;IACzC,MAAM,MAAM,GAAG,uBAAuB,CAClC,KAAK,CAAC,QAAQ,EACd,KAAK,CAAC,SAAS,EACf,KAAK,CAAC,eAAgB,EACtB,KAAK,CAAC,KAAM,EACZ,KAAK,CAAC,MAAO,CAChB,CAAC;IAEF,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC;IAEnC,IAAI,SAAS,CAAC,SAAS,EAAE,CAAC;QACtB,OAAO;YACH,OAAO,EAAE,CAAC;oBACN,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,WAAW,MAAM,CAAC,KAAK,kBAAkB,MAAM,CAAC,cAAc,QAAQ;wBACxE,8BAA8B,KAAK,CAAC,SAAS,yBAAyB;wBACtE,wFAAwF;iBAC/F,CAAC;SACL,CAAC;IACN,CAAC;IAED,MAAM,QAAQ,GAAG,qBAAqB,CAAC,SAAS,EAC5C,WAAW,MAAM,CAAC,KAAK,kBAAkB,MAAM,CAAC,cAAc,0BAA0B,CAC3F,CAAC;IAEF,OAAO;QACH,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;KACvD,CAAC;AACN,CAAC"}
@@ -6,38 +6,38 @@ export const getAuditStatusSchema = z.object({
6
6
  });
7
7
  // Helper for rendering incomplete template
8
8
  function renderIncompleteTemplate(log, missingItems) {
9
- let md = `# 📊 审计进度报告: ${log.caseName}\n`;
10
- md += `**状态**: 🚧 评估未完成\n\n`;
9
+ let md = `# 📊 Audit Progress Report: ${log.caseName}\n`;
10
+ md += `**Status**: 🚧 Evaluation Incomplete\n\n`;
11
11
  // Missing summary
12
- md += `## 🔴 待办评估任务 (Pending Evaluations)\n`;
13
- md += `Agent 请注意:您还需要完成以下维度的评估才能生成最终成绩:\n`;
12
+ md += `## 🔴 Pending Evaluations\n`;
13
+ md += `Attention Agent: You still need to complete the evaluations for the following dimensions to generate the final report:\n`;
14
14
  for (const item of missingItems) {
15
- md += `- Step ${item.stepIndex}: 缺失 [${item.missing.join(", ")}]\n`;
15
+ md += `- Step ${item.stepIndex}: Missing [${item.missing.join(", ")}]\n`;
16
16
  }
17
17
  md += `\n`;
18
18
  // Step details
19
- md += `## 📝 已评估记录\n`;
19
+ md += `## 📝 Evaluated Records\n`;
20
20
  md += renderStepDetails(log);
21
21
  return md;
22
22
  }
23
23
  // Helper for rendering complete template
24
24
  function renderCompleteTemplate(log, avg, passed, failedItems) {
25
- let md = `# 🏆 最终审计报告: ${log.caseName}\n`;
26
- md += `**状态**:评估已完成\n\n`;
27
- md += `## 📈 总成绩单\n`;
28
- md += `- **平均分**: ${avg.toFixed(1)} / 10\n`;
29
- md += `- **结论**: ${passed ? "✅ 通过 (Pass)" : "❌ 驳回 (Fail)"}\n`;
25
+ let md = `# 🏆 Final Audit Report: ${log.caseName}\n`;
26
+ md += `**Status**:Evaluation Complete\n\n`;
27
+ md += `## 📈 Overall Scores\n`;
28
+ md += `- **Average Score**: ${avg.toFixed(1)} / 10\n`;
29
+ md += `- **Result**: ${passed ? "✅ Pass" : "❌ Fail"}\n`;
30
30
  if (failedItems.length > 0) {
31
- md += `- **不合格项**:\n`;
31
+ md += `- **Failed Items**:\n`;
32
32
  for (const f of failedItems) {
33
- md += ` - Step ${f.stepIndex} [${f.dim}]: ${f.score} - ${f.reason}\n`;
33
+ md += ` - Step ${f.stepIndex} [${f.dim}]: Score ${f.score} - ${f.reason}\n`;
34
34
  }
35
35
  }
36
36
  else {
37
- md += `- **不合格项**: 无\n`;
37
+ md += `- **Failed Items**: None\n`;
38
38
  }
39
39
  md += `\n`;
40
- md += `## 📝 详细步骤\n`;
40
+ md += `## 📝 Detailed Steps\n`;
41
41
  md += renderStepDetails(log);
42
42
  return md;
43
43
  }
@@ -46,18 +46,18 @@ function renderStepDetails(log) {
46
46
  const steps = Object.values(log.steps).sort((a, b) => a.stepIndex - b.stepIndex);
47
47
  for (const step of steps) {
48
48
  md += `### Step ${step.stepIndex}: ${step.description}\n`;
49
- md += `- **操作**: ${step.actionType} ${step.coordinates ? `(x: ${step.coordinates.x}, y: ${step.coordinates.y})` : ""}\n`;
49
+ md += `- **Action**: ${step.actionType} ${step.coordinates ? `(x: ${step.coordinates.x}, y: ${step.coordinates.y})` : ""}\n`;
50
50
  if (step.expectedOutcome) {
51
- md += `- **预期结果**: ${step.expectedOutcome}\n`;
51
+ md += `- **Expected Outcome**: ${step.expectedOutcome}\n`;
52
52
  }
53
53
  for (const dim of REQUIRED_DIMS) {
54
54
  const ev = step.evaluations ? step.evaluations[dim] : undefined;
55
55
  if (ev) {
56
56
  const tag = ev.score >= PASSING_SCORE ? "Pass ✅" : "Fail ❌";
57
- md += `- **${dim}**: ${ev.score} ${tag} - ${ev.reason}\n`;
57
+ md += `- **${dim}**: Score ${ev.score} ${tag} - ${ev.reason}\n`;
58
58
  }
59
59
  else {
60
- md += `- **${dim}**: 🔴 缺失评估\n`;
60
+ md += `- **${dim}**: 🔴 Missing Evaluation\n`;
61
61
  }
62
62
  }
63
63
  md += `\n`;
@@ -1 +1 @@
1
- {"version":3,"file":"get-audit-status.js","sourceRoot":"","sources":["../../../src/tools/get-audit-status.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAC9D,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAG1E,MAAM,CAAC,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;IACzC,SAAS,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,gFAAgF,CAAC;CACvI,CAAC,CAAC;AAEH,2CAA2C;AAC3C,SAAS,wBAAwB,CAAC,GAAa,EAAE,YAAwD;IACrG,IAAI,EAAE,GAAG,gBAAgB,GAAG,CAAC,QAAQ,IAAI,CAAC;IAC1C,EAAE,IAAI,sBAAsB,CAAC;IAE7B,kBAAkB;IAClB,EAAE,IAAI,sCAAsC,CAAC;IAC7C,EAAE,IAAI,oCAAoC,CAAC;IAC3C,KAAK,MAAM,IAAI,IAAI,YAAY,EAAE,CAAC;QAC9B,EAAE,IAAI,UAAU,IAAI,CAAC,SAAS,SAAS,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC;IACxE,CAAC;IACD,EAAE,IAAI,IAAI,CAAC;IAEX,eAAe;IACf,EAAE,IAAI,eAAe,CAAC;IACtB,EAAE,IAAI,iBAAiB,CAAC,GAAG,CAAC,CAAC;IAE7B,OAAO,EAAE,CAAC;AACd,CAAC;AAED,yCAAyC;AACzC,SAAS,sBAAsB,CAAC,GAAa,EAAE,GAAW,EAAE,MAAe,EAAE,WAAkB;IAC3F,IAAI,EAAE,GAAG,gBAAgB,GAAG,CAAC,QAAQ,IAAI,CAAC;IAC1C,EAAE,IAAI,qBAAqB,CAAC;IAE5B,EAAE,IAAI,cAAc,CAAC;IACrB,EAAE,IAAI,cAAc,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC;IAC5C,EAAE,IAAI,aAAa,MAAM,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,aAAa,IAAI,CAAC;IAE9D,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACzB,EAAE,IAAI,eAAe,CAAC;QACtB,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;YAC1B,EAAE,IAAI,YAAY,CAAC,CAAC,SAAS,KAAK,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,KAAK,OAAO,CAAC,CAAC,MAAM,IAAI,CAAC;QAC5E,CAAC;IACL,CAAC;SAAM,CAAC;QACJ,EAAE,IAAI,iBAAiB,CAAC;IAC5B,CAAC;IACD,EAAE,IAAI,IAAI,CAAC;IAEX,EAAE,IAAI,cAAc,CAAC;IACrB,EAAE,IAAI,iBAAiB,CAAC,GAAG,CAAC,CAAC;IAE7B,OAAO,EAAE,CAAC;AACd,CAAC;AAED,SAAS,iBAAiB,CAAC,GAAa;IACpC,IAAI,EAAE,GAAG,EAAE,CAAC;IACZ,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC;IACjF,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACvB,EAAE,IAAI,YAAY,IAAI,CAAC,SAAS,KAAK,IAAI,CAAC,WAAW,IAAI,CAAC;QAC1D,EAAE,IAAI,aAAa,IAAI,CAAC,UAAU,IAAI,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,OAAO,IAAI,CAAC,WAAW,CAAC,CAAC,QAAQ,IAAI,CAAC,WAAW,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC;QACzH,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACvB,EAAE,IAAI,eAAe,IAAI,CAAC,eAAe,IAAI,CAAC;QAClD,CAAC;QAED,KAAK,MAAM,GAAG,IAAI,aAAa,EAAE,CAAC;YAC9B,MAAM,EAAE,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAChE,IAAI,EAAE,EAAE,CAAC;gBACL,MAAM,GAAG,GAAG,EAAE,CAAC,KAAK,IAAI,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC;gBAC5D,EAAE,IAAI,OAAO,GAAG,OAAO,EAAE,CAAC,KAAK,KAAK,GAAG,MAAM,EAAE,CAAC,MAAM,IAAI,CAAC;YAC/D,CAAC;iBAAM,CAAC;gBACJ,EAAE,IAAI,OAAO,GAAG,eAAe,CAAC;YACpC,CAAC;QACL,CAAC;QACD,EAAE,IAAI,IAAI,CAAC;IACf,CAAC;IACD,OAAO,EAAE,CAAC;AACd,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,KAA2C;IAC5E,IAAI,IAAI,GAAe,EAAE,CAAC;IAC1B,IAAI,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChD,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;YACjC,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;YAC1B,IAAI,GAAG;gBAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC5B,CAAC;IACL,CAAC;SAAM,CAAC;QACJ,IAAI,GAAG,WAAW,EAAE,CAAC;IACzB,CAAC;IAED,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACpB,OAAO;YACH,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,gFAAgF,EAAE,CAAC;SAC/H,CAAC;IACN,CAAC;IAED,IAAI,cAAc,GAAG,EAAE,CAAC;IAExB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACrB,IAAI,WAAW,GAAG,IAAI,CAAC;QACvB,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,WAAW,GAAU,EAAE,CAAC;QAC5B,IAAI,YAAY,GAA+C,EAAE,CAAC;QAElE,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YAC1C,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC;YACtD,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;YAElE,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrB,WAAW,GAAG,KAAK,CAAC;gBACpB,YAAY,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,CAAC,CAAC;YAC9D,CAAC;YAED,KAAK,MAAM,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,EAAE,CAAC;gBAC7D,UAAU,IAAI,EAAE,CAAC,KAAK,CAAC;gBACvB,UAAU,IAAI,CAAC,CAAC;gBAChB,IAAI,EAAE,CAAC,KAAK,GAAG,aAAa,EAAE,CAAC;oBAC3B,WAAW,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,CAAC,CAAC;gBAC7F,CAAC;YACL,CAAC;QACL,CAAC;QAED,IAAI,CAAC,WAAW,EAAE,CAAC;YACf,cAAc,IAAI,wBAAwB,CAAC,GAAG,EAAE,YAAY,CAAC,GAAG,SAAS,CAAC;QAC9E,CAAC;aAAM,CAAC;YACJ,MAAM,GAAG,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;YACzD,MAAM,MAAM,GAAG,WAAW,CAAC,MAAM,KAAK,CAAC,CAAC;YACxC,cAAc,IAAI,sBAAsB,CAAC,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,WAAW,CAAC,GAAG,SAAS,CAAC;QACxF,CAAC;IACL,CAAC;IAED,OAAO;QACH,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,cAAc,CAAC,IAAI,EAAE,EAAE,CAAC;KACpE,CAAC;AACN,CAAC"}
1
+ {"version":3,"file":"get-audit-status.js","sourceRoot":"","sources":["../../../src/tools/get-audit-status.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAC9D,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAG1E,MAAM,CAAC,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;IACzC,SAAS,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,gFAAgF,CAAC;CACvI,CAAC,CAAC;AAEH,2CAA2C;AAC3C,SAAS,wBAAwB,CAAC,GAAa,EAAE,YAAwD;IACrG,IAAI,EAAE,GAAG,+BAA+B,GAAG,CAAC,QAAQ,IAAI,CAAC;IACzD,EAAE,IAAI,0CAA0C,CAAC;IAEjD,kBAAkB;IAClB,EAAE,IAAI,6BAA6B,CAAC;IACpC,EAAE,IAAI,0HAA0H,CAAC;IACjI,KAAK,MAAM,IAAI,IAAI,YAAY,EAAE,CAAC;QAC9B,EAAE,IAAI,UAAU,IAAI,CAAC,SAAS,cAAc,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC;IAC7E,CAAC;IACD,EAAE,IAAI,IAAI,CAAC;IAEX,eAAe;IACf,EAAE,IAAI,2BAA2B,CAAC;IAClC,EAAE,IAAI,iBAAiB,CAAC,GAAG,CAAC,CAAC;IAE7B,OAAO,EAAE,CAAC;AACd,CAAC;AAED,yCAAyC;AACzC,SAAS,sBAAsB,CAAC,GAAa,EAAE,GAAW,EAAE,MAAe,EAAE,WAAkB;IAC3F,IAAI,EAAE,GAAG,4BAA4B,GAAG,CAAC,QAAQ,IAAI,CAAC;IACtD,EAAE,IAAI,uCAAuC,CAAC;IAE9C,EAAE,IAAI,wBAAwB,CAAC;IAC/B,EAAE,IAAI,wBAAwB,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC;IACtD,EAAE,IAAI,iBAAiB,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,IAAI,CAAC;IAExD,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACzB,EAAE,IAAI,uBAAuB,CAAC;QAC9B,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;YAC1B,EAAE,IAAI,YAAY,CAAC,CAAC,SAAS,KAAK,CAAC,CAAC,GAAG,YAAY,CAAC,CAAC,KAAK,MAAM,CAAC,CAAC,MAAM,IAAI,CAAC;QACjF,CAAC;IACL,CAAC;SAAM,CAAC;QACJ,EAAE,IAAI,4BAA4B,CAAC;IACvC,CAAC;IACD,EAAE,IAAI,IAAI,CAAC;IAEX,EAAE,IAAI,wBAAwB,CAAC;IAC/B,EAAE,IAAI,iBAAiB,CAAC,GAAG,CAAC,CAAC;IAE7B,OAAO,EAAE,CAAC;AACd,CAAC;AAED,SAAS,iBAAiB,CAAC,GAAa;IACpC,IAAI,EAAE,GAAG,EAAE,CAAC;IACZ,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC;IACjF,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACvB,EAAE,IAAI,YAAY,IAAI,CAAC,SAAS,KAAK,IAAI,CAAC,WAAW,IAAI,CAAC;QAC1D,EAAE,IAAI,iBAAiB,IAAI,CAAC,UAAU,IAAI,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,OAAO,IAAI,CAAC,WAAW,CAAC,CAAC,QAAQ,IAAI,CAAC,WAAW,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC;QAC7H,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACvB,EAAE,IAAI,2BAA2B,IAAI,CAAC,eAAe,IAAI,CAAC;QAC9D,CAAC;QAED,KAAK,MAAM,GAAG,IAAI,aAAa,EAAE,CAAC;YAC9B,MAAM,EAAE,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAChE,IAAI,EAAE,EAAE,CAAC;gBACL,MAAM,GAAG,GAAG,EAAE,CAAC,KAAK,IAAI,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC;gBAC5D,EAAE,IAAI,OAAO,GAAG,aAAa,EAAE,CAAC,KAAK,IAAI,GAAG,MAAM,EAAE,CAAC,MAAM,IAAI,CAAC;YACpE,CAAC;iBAAM,CAAC;gBACJ,EAAE,IAAI,OAAO,GAAG,6BAA6B,CAAC;YAClD,CAAC;QACL,CAAC;QACD,EAAE,IAAI,IAAI,CAAC;IACf,CAAC;IACD,OAAO,EAAE,CAAC;AACd,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,KAA2C;IAC5E,IAAI,IAAI,GAAe,EAAE,CAAC;IAC1B,IAAI,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChD,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;YACjC,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;YAC1B,IAAI,GAAG;gBAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC5B,CAAC;IACL,CAAC;SAAM,CAAC;QACJ,IAAI,GAAG,WAAW,EAAE,CAAC;IACzB,CAAC;IAED,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACpB,OAAO;YACH,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,gFAAgF,EAAE,CAAC;SAC/H,CAAC;IACN,CAAC;IAED,IAAI,cAAc,GAAG,EAAE,CAAC;IAExB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACrB,IAAI,WAAW,GAAG,IAAI,CAAC;QACvB,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,WAAW,GAAU,EAAE,CAAC;QAC5B,IAAI,YAAY,GAA+C,EAAE,CAAC;QAElE,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YAC1C,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC;YACtD,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;YAElE,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrB,WAAW,GAAG,KAAK,CAAC;gBACpB,YAAY,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,CAAC,CAAC;YAC9D,CAAC;YAED,KAAK,MAAM,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,EAAE,CAAC;gBAC7D,UAAU,IAAI,EAAE,CAAC,KAAK,CAAC;gBACvB,UAAU,IAAI,CAAC,CAAC;gBAChB,IAAI,EAAE,CAAC,KAAK,GAAG,aAAa,EAAE,CAAC;oBAC3B,WAAW,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,CAAC,CAAC;gBAC7F,CAAC;YACL,CAAC;QACL,CAAC;QAED,IAAI,CAAC,WAAW,EAAE,CAAC;YACf,cAAc,IAAI,wBAAwB,CAAC,GAAG,EAAE,YAAY,CAAC,GAAG,SAAS,CAAC;QAC9E,CAAC;aAAM,CAAC;YACJ,MAAM,GAAG,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;YACzD,MAAM,MAAM,GAAG,WAAW,CAAC,MAAM,KAAK,CAAC,CAAC;YACxC,cAAc,IAAI,sBAAsB,CAAC,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,WAAW,CAAC,GAAG,SAAS,CAAC;QACxF,CAAC;IACL,CAAC;IAED,OAAO;QACH,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,cAAc,CAAC,IAAI,EAAE,EAAE,CAAC;KACpE,CAAC;AACN,CAAC"}
@@ -32,7 +32,7 @@ export async function swipe(input) {
32
32
  {
33
33
  type: "text",
34
34
  text: JSON.stringify({
35
- message: `Step ${input.stepIndex} recorded. Expected: "${input.expectedOutcome}". Please now evaluate this screenshot across all 5 dimensions (overlap, layout, info_clarity, style, action_result) using submit_dimension_score before proceeding.`,
35
+ message: `Step ${input.stepIndex} recorded. Expected: "${input.expectedOutcome}". Please call evaluate to retrieve the first evaluation dimension and its token before proceeding.`,
36
36
  caseName: input.caseName,
37
37
  stepIndex: input.stepIndex,
38
38
  expectedOutcome: input.expectedOutcome,
@@ -1 +1 @@
1
- {"version":3,"file":"swipe.js","sourceRoot":"","sources":["../../../src/tools/swipe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,MAAM,MAAM,sBAAsB,CAAC;AAC/C,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAEpD,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC;IAChC,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,8BAA8B,CAAC;IACzE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,4BAA4B,CAAC;IACvE,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,4BAA4B,CAAC;IACrE,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,0BAA0B,CAAC;IACnE,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uBAAuB,CAAC;IACtD,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,gCAAgC,CAAC;IACjF,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,2CAA2C,CAAC;IAC7E,eAAe,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,yFAAyF,CAAC;CAClI,CAAC,CAAC;AAEH,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,KAAkC;IAC1D,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;IACtF,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QAClB,OAAO;YACH,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,oBAAoB,MAAM,CAAC,KAAK,EAAE,EAAE,CAAC;SACjF,CAAC;IACN,CAAC;IAED,iDAAiD;IACjD,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;IAExD,MAAM,UAAU,GAAG,MAAM,MAAM,CAAC,cAAc,EAAE,CAAC;IACjD,UAAU,CACN,KAAK,CAAC,QAAQ,EACd,KAAK,CAAC,SAAS,EACf,KAAK,CAAC,WAAW,EACjB,OAAO,EACP,UAAU,CAAC,QAAQ,EACnB,EAAE,CAAC,EAAE,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,KAAK,CAAC,MAAM,EAAE,EACpC,KAAK,CAAC,eAAe,CACxB,CAAC;IAEF,OAAO;QACH,OAAO,EAAE;YACL;gBACI,IAAI,EAAE,OAAgB;gBACtB,IAAI,EAAE,UAAU,CAAC,WAAW;gBAC5B,QAAQ,EAAE,WAAoB;aACjC;YACD;gBACI,IAAI,EAAE,MAAe;gBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;oBACjB,OAAO,EAAE,QAAQ,KAAK,CAAC,SAAS,yBAAyB,KAAK,CAAC,eAAe,sKAAsK;oBACpP,QAAQ,EAAE,KAAK,CAAC,QAAQ;oBACxB,SAAS,EAAE,KAAK,CAAC,SAAS;oBAC1B,eAAe,EAAE,KAAK,CAAC,eAAe;oBACtC,WAAW,EAAE,UAAU,CAAC,KAAK;oBAC7B,YAAY,EAAE,UAAU,CAAC,MAAM;iBAClC,CAAC;aACL;SACJ;KACJ,CAAC;AACN,CAAC"}
1
+ {"version":3,"file":"swipe.js","sourceRoot":"","sources":["../../../src/tools/swipe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,MAAM,MAAM,sBAAsB,CAAC;AAC/C,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAEpD,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC;IAChC,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,8BAA8B,CAAC;IACzE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,4BAA4B,CAAC;IACvE,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,4BAA4B,CAAC;IACrE,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,0BAA0B,CAAC;IACnE,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uBAAuB,CAAC;IACtD,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,gCAAgC,CAAC;IACjF,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,2CAA2C,CAAC;IAC7E,eAAe,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,yFAAyF,CAAC;CAClI,CAAC,CAAC;AAEH,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,KAAkC;IAC1D,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;IACtF,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QAClB,OAAO;YACH,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,oBAAoB,MAAM,CAAC,KAAK,EAAE,EAAE,CAAC;SACjF,CAAC;IACN,CAAC;IAED,iDAAiD;IACjD,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;IAExD,MAAM,UAAU,GAAG,MAAM,MAAM,CAAC,cAAc,EAAE,CAAC;IACjD,UAAU,CACN,KAAK,CAAC,QAAQ,EACd,KAAK,CAAC,SAAS,EACf,KAAK,CAAC,WAAW,EACjB,OAAO,EACP,UAAU,CAAC,QAAQ,EACnB,EAAE,CAAC,EAAE,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,KAAK,CAAC,MAAM,EAAE,EACpC,KAAK,CAAC,eAAe,CACxB,CAAC;IAEF,OAAO;QACH,OAAO,EAAE;YACL;gBACI,IAAI,EAAE,OAAgB;gBACtB,IAAI,EAAE,UAAU,CAAC,WAAW;gBAC5B,QAAQ,EAAE,WAAoB;aACjC;YACD;gBACI,IAAI,EAAE,MAAe;gBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;oBACjB,OAAO,EAAE,QAAQ,KAAK,CAAC,SAAS,yBAAyB,KAAK,CAAC,eAAe,qGAAqG;oBACnL,QAAQ,EAAE,KAAK,CAAC,QAAQ;oBACxB,SAAS,EAAE,KAAK,CAAC,SAAS;oBAC1B,eAAe,EAAE,KAAK,CAAC,eAAe;oBACtC,WAAW,EAAE,UAAU,CAAC,KAAK;oBAC7B,YAAY,EAAE,UAAU,CAAC,MAAM;iBAClC,CAAC;aACL;SACJ;KACJ,CAAC;AACN,CAAC"}
@@ -20,7 +20,7 @@ export async function takeScreenshot(input) {
20
20
  {
21
21
  type: "text",
22
22
  text: JSON.stringify({
23
- message: `Step ${input.stepIndex} recorded. Please now evaluate this screenshot across all 5 dimensions (overlap, layout, info_clarity, style, action_result) using submit_dimension_score before proceeding.`,
23
+ message: `Step ${input.stepIndex} recorded. Please call evaluate to retrieve the first evaluation dimension and its token before proceeding.`,
24
24
  caseName: input.caseName,
25
25
  stepIndex: input.stepIndex,
26
26
  expectedOutcome: input.expectedOutcome ?? "(pure observation)",
@@ -1 +1 @@
1
- {"version":3,"file":"take-screenshot.js","sourceRoot":"","sources":["../../../src/tools/take-screenshot.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,MAAM,MAAM,sBAAsB,CAAC;AAC/C,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAEpD,MAAM,CAAC,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;IACzC,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uBAAuB,CAAC;IACtD,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,gCAAgC,CAAC;IACjF,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;IACrE,eAAe,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,8EAA8E,CAAC;CAClI,CAAC,CAAC;AAEH,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,KAA2C;IAC5E,MAAM,UAAU,GAAG,MAAM,MAAM,CAAC,cAAc,EAAE,CAAC;IAEjD,UAAU,CACN,KAAK,CAAC,QAAQ,EACd,KAAK,CAAC,SAAS,EACf,KAAK,CAAC,WAAW,EACjB,YAAY,EACZ,UAAU,CAAC,QAAQ,EACnB,SAAS,EACT,KAAK,CAAC,eAAe,CACxB,CAAC;IAEF,OAAO;QACH,OAAO,EAAE;YACL;gBACI,IAAI,EAAE,OAAgB;gBACtB,IAAI,EAAE,UAAU,CAAC,WAAW;gBAC5B,QAAQ,EAAE,WAAoB;aACjC;YACD;gBACI,IAAI,EAAE,MAAe;gBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;oBACjB,OAAO,EAAE,QAAQ,KAAK,CAAC,SAAS,8KAA8K;oBAC9M,QAAQ,EAAE,KAAK,CAAC,QAAQ;oBACxB,SAAS,EAAE,KAAK,CAAC,SAAS;oBAC1B,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,oBAAoB;oBAC9D,WAAW,EAAE,UAAU,CAAC,KAAK;oBAC7B,YAAY,EAAE,UAAU,CAAC,MAAM;iBAClC,CAAC;aACL;SACJ;KACJ,CAAC;AACN,CAAC"}
1
+ {"version":3,"file":"take-screenshot.js","sourceRoot":"","sources":["../../../src/tools/take-screenshot.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,MAAM,MAAM,sBAAsB,CAAC;AAC/C,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAEpD,MAAM,CAAC,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;IACzC,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uBAAuB,CAAC;IACtD,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,gCAAgC,CAAC;IACjF,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;IACrE,eAAe,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,8EAA8E,CAAC;CAClI,CAAC,CAAC;AAEH,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,KAA2C;IAC5E,MAAM,UAAU,GAAG,MAAM,MAAM,CAAC,cAAc,EAAE,CAAC;IAEjD,UAAU,CACN,KAAK,CAAC,QAAQ,EACd,KAAK,CAAC,SAAS,EACf,KAAK,CAAC,WAAW,EACjB,YAAY,EACZ,UAAU,CAAC,QAAQ,EACnB,SAAS,EACT,KAAK,CAAC,eAAe,CACxB,CAAC;IAEF,OAAO;QACH,OAAO,EAAE;YACL;gBACI,IAAI,EAAE,OAAgB;gBACtB,IAAI,EAAE,UAAU,CAAC,WAAW;gBAC5B,QAAQ,EAAE,WAAoB;aACjC;YACD;gBACI,IAAI,EAAE,MAAe;gBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;oBACjB,OAAO,EAAE,QAAQ,KAAK,CAAC,SAAS,6GAA6G;oBAC7I,QAAQ,EAAE,KAAK,CAAC,QAAQ;oBACxB,SAAS,EAAE,KAAK,CAAC,SAAS;oBAC1B,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,oBAAoB;oBAC9D,WAAW,EAAE,UAAU,CAAC,KAAK;oBAC7B,YAAY,EAAE,UAAU,CAAC,MAAM;iBAClC,CAAC;aACL;SACJ;KACJ,CAAC;AACN,CAAC"}
@@ -30,7 +30,7 @@ export async function tap(input) {
30
30
  {
31
31
  type: "text",
32
32
  text: JSON.stringify({
33
- message: `Step ${input.stepIndex} recorded. Expected: "${input.expectedOutcome}". Please now evaluate this screenshot across all 5 dimensions (overlap, layout, info_clarity, style, action_result) using submit_dimension_score before proceeding.`,
33
+ message: `Step ${input.stepIndex} recorded. Expected: "${input.expectedOutcome}". Please call evaluate to retrieve the first evaluation dimension and its token before proceeding.`,
34
34
  caseName: input.caseName,
35
35
  stepIndex: input.stepIndex,
36
36
  expectedOutcome: input.expectedOutcome,
@@ -1 +1 @@
1
- {"version":3,"file":"tap.js","sourceRoot":"","sources":["../../../src/tools/tap.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,MAAM,MAAM,sBAAsB,CAAC;AAC/C,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAEpD,MAAM,CAAC,MAAM,SAAS,GAAG,CAAC,CAAC,MAAM,CAAC;IAC9B,CAAC,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,iDAAiD,CAAC;IACvF,CAAC,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,+CAA+C,CAAC;IACrF,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uBAAuB,CAAC;IACtD,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,gCAAgC,CAAC;IACjF,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,2CAA2C,CAAC;IAC7E,eAAe,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,4EAA4E,CAAC;CACrH,CAAC,CAAC;AAEH,MAAM,CAAC,KAAK,UAAU,GAAG,CAAC,KAAgC;IACtD,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC;IAClD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QAClB,OAAO;YACH,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,kBAAkB,MAAM,CAAC,KAAK,EAAE,EAAE,CAAC;SAC/E,CAAC;IACN,CAAC;IAED,iDAAiD;IACjD,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;IAExD,MAAM,UAAU,GAAG,MAAM,MAAM,CAAC,cAAc,EAAE,CAAC;IACjD,UAAU,CACN,KAAK,CAAC,QAAQ,EACd,KAAK,CAAC,SAAS,EACf,KAAK,CAAC,WAAW,EACjB,KAAK,EACL,UAAU,CAAC,QAAQ,EACnB,EAAE,CAAC,EAAE,KAAK,CAAC,CAAC,EAAE,CAAC,EAAE,KAAK,CAAC,CAAC,EAAE,EAC1B,KAAK,CAAC,eAAe,CACxB,CAAC;IAEF,OAAO;QACH,OAAO,EAAE;YACL;gBACI,IAAI,EAAE,OAAgB;gBACtB,IAAI,EAAE,UAAU,CAAC,WAAW;gBAC5B,QAAQ,EAAE,WAAoB;aACjC;YACD;gBACI,IAAI,EAAE,MAAe;gBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;oBACjB,OAAO,EAAE,QAAQ,KAAK,CAAC,SAAS,yBAAyB,KAAK,CAAC,eAAe,sKAAsK;oBACpP,QAAQ,EAAE,KAAK,CAAC,QAAQ;oBACxB,SAAS,EAAE,KAAK,CAAC,SAAS;oBAC1B,eAAe,EAAE,KAAK,CAAC,eAAe;oBACtC,WAAW,EAAE,UAAU,CAAC,KAAK;oBAC7B,YAAY,EAAE,UAAU,CAAC,MAAM;iBAClC,CAAC;aACL;SACJ;KACJ,CAAC;AACN,CAAC"}
1
+ {"version":3,"file":"tap.js","sourceRoot":"","sources":["../../../src/tools/tap.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,MAAM,MAAM,sBAAsB,CAAC;AAC/C,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAEpD,MAAM,CAAC,MAAM,SAAS,GAAG,CAAC,CAAC,MAAM,CAAC;IAC9B,CAAC,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,iDAAiD,CAAC;IACvF,CAAC,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,+CAA+C,CAAC;IACrF,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uBAAuB,CAAC;IACtD,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,gCAAgC,CAAC;IACjF,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,2CAA2C,CAAC;IAC7E,eAAe,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,4EAA4E,CAAC;CACrH,CAAC,CAAC;AAEH,MAAM,CAAC,KAAK,UAAU,GAAG,CAAC,KAAgC;IACtD,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC;IAClD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QAClB,OAAO;YACH,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,kBAAkB,MAAM,CAAC,KAAK,EAAE,EAAE,CAAC;SAC/E,CAAC;IACN,CAAC;IAED,iDAAiD;IACjD,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;IAExD,MAAM,UAAU,GAAG,MAAM,MAAM,CAAC,cAAc,EAAE,CAAC;IACjD,UAAU,CACN,KAAK,CAAC,QAAQ,EACd,KAAK,CAAC,SAAS,EACf,KAAK,CAAC,WAAW,EACjB,KAAK,EACL,UAAU,CAAC,QAAQ,EACnB,EAAE,CAAC,EAAE,KAAK,CAAC,CAAC,EAAE,CAAC,EAAE,KAAK,CAAC,CAAC,EAAE,EAC1B,KAAK,CAAC,eAAe,CACxB,CAAC;IAEF,OAAO;QACH,OAAO,EAAE;YACL;gBACI,IAAI,EAAE,OAAgB;gBACtB,IAAI,EAAE,UAAU,CAAC,WAAW;gBAC5B,QAAQ,EAAE,WAAoB;aACjC;YACD;gBACI,IAAI,EAAE,MAAe;gBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;oBACjB,OAAO,EAAE,QAAQ,KAAK,CAAC,SAAS,yBAAyB,KAAK,CAAC,eAAe,qGAAqG;oBACnL,QAAQ,EAAE,KAAK,CAAC,QAAQ;oBACxB,SAAS,EAAE,KAAK,CAAC,SAAS;oBAC1B,eAAe,EAAE,KAAK,CAAC,eAAe;oBACtC,WAAW,EAAE,UAAU,CAAC,KAAK;oBAC7B,YAAY,EAAE,UAAU,CAAC,MAAM;iBAClC,CAAC;aACL;SACJ;KACJ,CAAC;AACN,CAAC"}
@@ -23,6 +23,8 @@ export interface StepRecord {
23
23
  screenshotPath: string;
24
24
  timestamp: string;
25
25
  evaluations: Record<string, DimensionScore>;
26
+ currentDimIndex: number;
27
+ evaluationToken?: string;
26
28
  }
27
29
  export interface AuditLog {
28
30
  caseName: string;
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,aAAa;IAC1B,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,cAAc;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,UAAU;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,KAAK,GAAG,OAAO,GAAG,YAAY,CAAC;IAC3C,WAAW,CAAC,EAAE;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IACvC,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,cAAc,EAAE,MAAM,CAAC;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;CAC/C;AAED,MAAM,WAAW,QAAQ;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;CACrC"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,aAAa;IAC1B,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,cAAc;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,UAAU;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,KAAK,GAAG,OAAO,GAAG,YAAY,CAAC;IAC3C,WAAW,CAAC,EAAE;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IACvC,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,cAAc,EAAE,MAAM,CAAC;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;IAC5C,eAAe,EAAE,MAAM,CAAC;IACxB,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,MAAM,WAAW,QAAQ;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;CACrC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wa008/ui-audit-mcp",
3
- "version": "2.2.2",
3
+ "version": "2.3.1",
4
4
  "description": "MCP server for iOS app UI evaluation and testing, powered by idb + xcrun simctl",
5
5
  "type": "module",
6
6
  "main": "dist/src/index.js",
@@ -1,15 +0,0 @@
1
- import { z } from "zod";
2
- export declare const getEvaluationCriteriaSchema: z.ZodObject<{
3
- dimension: z.ZodOptional<z.ZodString>;
4
- }, "strip", z.ZodTypeAny, {
5
- dimension?: string | undefined;
6
- }, {
7
- dimension?: string | undefined;
8
- }>;
9
- export declare function getEvaluationCriteria(input: z.infer<typeof getEvaluationCriteriaSchema>): Promise<{
10
- content: {
11
- type: "text";
12
- text: string;
13
- }[];
14
- }>;
15
- //# sourceMappingURL=get-evaluation-criteria.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"get-evaluation-criteria.d.ts","sourceRoot":"","sources":["../../../src/tools/get-evaluation-criteria.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAGxB,eAAO,MAAM,2BAA2B;;;;;;EAEtC,CAAC;AAEH,wBAAsB,qBAAqB,CAAC,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,OAAO,2BAA2B,CAAC;;;;;GAyB7F"}
@@ -1,29 +0,0 @@
1
- import { z } from "zod";
2
- import { DIMENSIONS } from "../evaluation/checklist.js";
3
- export const getEvaluationCriteriaSchema = z.object({
4
- dimension: z.string().optional().describe("ID of the dimension (e.g., 'overlap'). Omit to list all available dimensions."),
5
- });
6
- export async function getEvaluationCriteria(input) {
7
- if (!input.dimension) {
8
- return {
9
- content: [{
10
- type: "text",
11
- text: JSON.stringify({
12
- availableDimensions: DIMENSIONS.map(d => ({ id: d.id, name: d.name })),
13
- tip: "Call get_evaluation_criteria with a specific dimension ID to see its scoring rubric."
14
- }, null, 2),
15
- }],
16
- };
17
- }
18
- const criteria = DIMENSIONS.find(d => d.id === input.dimension);
19
- if (!criteria) {
20
- throw new Error(`Dimension '${input.dimension}' not found. Available dimensions: ${DIMENSIONS.map(d => d.id).join(", ")}`);
21
- }
22
- const markdown = `# 审计准则:${criteria.name} [${criteria.id}]\n\n` +
23
- `## 🧠 核心考察点 (Description)\n${criteria.description}\n\n` +
24
- `## 📊 评分细则 (Scoring Guide)\n${criteria.scoringGuide}\n`;
25
- return {
26
- content: [{ type: "text", text: markdown }],
27
- };
28
- }
29
- //# sourceMappingURL=get-evaluation-criteria.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"get-evaluation-criteria.js","sourceRoot":"","sources":["../../../src/tools/get-evaluation-criteria.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,UAAU,EAAE,MAAM,4BAA4B,CAAC;AAExD,MAAM,CAAC,MAAM,2BAA2B,GAAG,CAAC,CAAC,MAAM,CAAC;IAChD,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,+EAA+E,CAAC;CAC7H,CAAC,CAAC;AAEH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CAAC,KAAkD;IAC1F,IAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;QACnB,OAAO;YACH,OAAO,EAAE,CAAC;oBACN,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;wBACjB,mBAAmB,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;wBACtE,GAAG,EAAE,sFAAsF;qBAC9F,EAAE,IAAI,EAAE,CAAC,CAAC;iBACd,CAAC;SACL,CAAC;IACN,CAAC;IAED,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,KAAK,CAAC,SAAS,CAAC,CAAC;IAChE,IAAI,CAAC,QAAQ,EAAE,CAAC;QACZ,MAAM,IAAI,KAAK,CAAC,cAAc,KAAK,CAAC,SAAS,sCAAsC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC/H,CAAC;IAED,MAAM,QAAQ,GAAG,UAAU,QAAQ,CAAC,IAAI,KAAK,QAAQ,CAAC,EAAE,OAAO;QAC3D,8BAA8B,QAAQ,CAAC,WAAW,MAAM;QACxD,+BAA+B,QAAQ,CAAC,YAAY,IAAI,CAAC;IAE7D,OAAO;QACH,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;KACvD,CAAC;AACN,CAAC"}
@@ -1,27 +0,0 @@
1
- import { z } from "zod";
2
- export declare const submitDimensionScoreSchema: z.ZodObject<{
3
- caseName: z.ZodString;
4
- stepIndex: z.ZodNumber;
5
- dimension: z.ZodString;
6
- score: z.ZodNumber;
7
- reason: z.ZodString;
8
- }, "strip", z.ZodTypeAny, {
9
- caseName: string;
10
- stepIndex: number;
11
- dimension: string;
12
- score: number;
13
- reason: string;
14
- }, {
15
- caseName: string;
16
- stepIndex: number;
17
- dimension: string;
18
- score: number;
19
- reason: string;
20
- }>;
21
- export declare function submitDimensionScore(input: z.infer<typeof submitDimensionScoreSchema>): Promise<{
22
- content: {
23
- type: "text";
24
- text: string;
25
- }[];
26
- }>;
27
- //# sourceMappingURL=submit-dimension-score.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"submit-dimension-score.d.ts","sourceRoot":"","sources":["../../../src/tools/submit-dimension-score.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAIxB,eAAO,MAAM,0BAA0B;;;;;;;;;;;;;;;;;;EAMrC,CAAC;AAEH,wBAAsB,oBAAoB,CAAC,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,OAAO,0BAA0B,CAAC;;;;;GA0B3F"}
@@ -1,35 +0,0 @@
1
- import { z } from "zod";
2
- import { recordScore, readLog } from "../logger/audit-log.js";
3
- import { REQUIRED_DIMS } from "../evaluation/checklist.js";
4
- export const submitDimensionScoreSchema = z.object({
5
- caseName: z.string().describe("Name of the test case"),
6
- stepIndex: z.number().int().positive().describe("Step index this evaluation belongs to"),
7
- dimension: z.string().describe(`The dimension ID (must be one of: ${REQUIRED_DIMS.join(", ")})`),
8
- score: z.number().int().min(0).max(10).describe("Score from 0 (worst) to 10 (perfect)"),
9
- reason: z.string().describe("Analysis and reason for this score"),
10
- });
11
- export async function submitDimensionScore(input) {
12
- if (!REQUIRED_DIMS.includes(input.dimension)) {
13
- throw new Error(`Invalid dimension '${input.dimension}'. Must be one of: ${REQUIRED_DIMS.join(", ")}`);
14
- }
15
- recordScore(input.caseName, input.stepIndex, input.dimension, input.score, input.reason);
16
- // After writing, calculate remaining dimensions for this step to provide a helpful hint
17
- const log = readLog(input.caseName);
18
- const step = log?.steps[input.stepIndex];
19
- const evaluated = step ? Object.keys(step.evaluations || {}) : [];
20
- const remaining = REQUIRED_DIMS.filter(d => !evaluated.includes(d));
21
- let hint;
22
- if (remaining.length === 0) {
23
- hint = `All 5 dimensions for Step ${input.stepIndex} are complete. You may call get_audit_status to review progress or proceed to the next action.`;
24
- }
25
- else {
26
- hint = `${remaining.length} dimension(s) still needed for Step ${input.stepIndex}: ${remaining.join(", ")}. Please evaluate them before moving to the next action.`;
27
- }
28
- return {
29
- content: [{
30
- type: "text",
31
- text: `Recorded [${input.dimension}] for ${input.caseName} Step ${input.stepIndex} — Score: ${input.score}. ${hint}`
32
- }]
33
- };
34
- }
35
- //# sourceMappingURL=submit-dimension-score.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"submit-dimension-score.js","sourceRoot":"","sources":["../../../src/tools/submit-dimension-score.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,WAAW,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AAC9D,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAE3D,MAAM,CAAC,MAAM,0BAA0B,GAAG,CAAC,CAAC,MAAM,CAAC;IAC/C,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uBAAuB,CAAC;IACtD,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,uCAAuC,CAAC;IACxF,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,qCAAqC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC;IAChG,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,sCAAsC,CAAC;IACvF,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,oCAAoC,CAAC;CACpE,CAAC,CAAC;AAEH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,KAAiD;IACxF,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC;QAC3C,MAAM,IAAI,KAAK,CAAC,sBAAsB,KAAK,CAAC,SAAS,sBAAsB,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC3G,CAAC;IAED,WAAW,CAAC,KAAK,CAAC,QAAQ,EAAE,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAEzF,wFAAwF;IACxF,MAAM,GAAG,GAAG,OAAO,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IACpC,MAAM,IAAI,GAAG,GAAG,EAAE,KAAK,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IACzC,MAAM,SAAS,GAAG,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAClE,MAAM,SAAS,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;IAEpE,IAAI,IAAY,CAAC;IACjB,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,IAAI,GAAG,6BAA6B,KAAK,CAAC,SAAS,gGAAgG,CAAC;IACxJ,CAAC;SAAM,CAAC;QACJ,IAAI,GAAG,GAAG,SAAS,CAAC,MAAM,uCAAuC,KAAK,CAAC,SAAS,KAAK,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,0DAA0D,CAAC;IACxK,CAAC;IAED,OAAO;QACH,OAAO,EAAE,CAAC;gBACN,IAAI,EAAE,MAAe;gBACrB,IAAI,EAAE,aAAa,KAAK,CAAC,SAAS,SAAS,KAAK,CAAC,QAAQ,SAAS,KAAK,CAAC,SAAS,aAAa,KAAK,CAAC,KAAK,KAAK,IAAI,EAAE;aACvH,CAAC;KACL,CAAC;AACN,CAAC"}