@wa008/ui-audit-mcp 2.3.2 → 2.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -4,7 +4,7 @@ An MCP server for iOS app UI evaluation and testing.
4
4
 
5
5
  ## Features
6
6
  - **Device Control**: Launch apps, take screenshots, tap, and swipe on iOS Simulators.
7
- - **UI Evaluation**: Structured checklists for quality and style consistency.
7
+ - **UI Evaluation**: 3-dimension user-perspective evaluation (outcome, usability, aesthetics).
8
8
 
9
9
  ## Prerequisites
10
10
  - **Node.js 18+**
@@ -57,12 +57,26 @@ npm install
57
57
  | `evaluate` | Unified evaluation: get dimension prompt (initial) or submit score + get next (submit mode) |
58
58
  | `get_audit_status` | View missing evaluations dashboard or final markdown report |
59
59
 
60
+ ## Evaluation Dimensions
61
+
62
+ Each step is evaluated on up to 3 dimensions, presented in order from easiest to hardest:
63
+
64
+ | # | Dimension | User's Question | Applied When |
65
+ |---|-----------|----------------|--------------|
66
+ | 1 | **outcome** | "Did my action produce the expected result?" | Action steps (tap/swipe) with `expectedOutcome` |
67
+ | 2 | **usability** | "Can I see everything, understand it, and interact smoothly?" | All steps |
68
+ | 3 | **aesthetics** | "Does this look professional and polished?" | All steps |
69
+
70
+ - For observation steps (no `expectedOutcome`), `outcome` is skipped automatically.
71
+ - Each dimension must be completed before the next one is presented.
72
+ - Pass threshold: score ≥ 8.
73
+
60
74
  ## Typical Workflow
61
75
  1. `launch_app("com.example.app")`
62
76
  2. `take_screenshot("MyTestCase", 1, "Verify Initial Screen")` or `tap(0.5, 0.5, "MyTestCase", 2, "Click Login")`
63
77
  3. `evaluate("MyTestCase", 1)` → Returns the first dimension prompt + `evaluationToken`
64
- 4. Agent analyzes UI → `evaluate("MyTestCase", 1, token, 9, "No overlap")` → Records score, returns next dimension prompt + new token
65
- 5. Repeat step 4 until all 5 dimensions are evaluated
78
+ 4. Agent analyzes UI → `evaluate("MyTestCase", 1, token, 9, "Clean and clear")` → Records score, returns next dimension prompt + new token
79
+ 5. Repeat step 4 until all dimensions are evaluated (2 for observation, 3 for action steps)
66
80
  6. `get_audit_status(["MyTestCase"])` → Output the full markdown report
67
81
 
68
82
  ## Data
@@ -1,6 +1,14 @@
1
1
  import { ChecklistItem } from "../types.js";
2
- /** System-wide UI quality audit dimensions — strict scoring */
2
+ /**
3
+ * UI quality audit dimensions — 3 user-perspective dimensions, ordered easy → hard.
4
+ *
5
+ * Order: outcome → usability → aesthetics
6
+ * - outcome is skipped for pure observation steps (no expectedOutcome).
7
+ * - Each dimension must be completed before the next one is presented.
8
+ */
3
9
  export declare const DIMENSIONS: ChecklistItem[];
4
- export declare const REQUIRED_DIMS: string[];
10
+ export declare const ALL_DIMS: string[];
11
+ /** Get the applicable dimension IDs for a given step. */
12
+ export declare function getRequiredDimsForStep(hasExpectedOutcome: boolean): string[];
5
13
  export declare const PASSING_SCORE = 8;
6
14
  //# sourceMappingURL=checklist.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"checklist.d.ts","sourceRoot":"","sources":["../../../src/evaluation/checklist.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,+DAA+D;AAC/D,eAAO,MAAM,UAAU,EAAE,aAAa,EA2GrC,CAAC;AAEF,eAAO,MAAM,aAAa,UAA4B,CAAC;AACvD,eAAO,MAAM,aAAa,IAAI,CAAC"}
1
+ {"version":3,"file":"checklist.d.ts","sourceRoot":"","sources":["../../../src/evaluation/checklist.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C;;;;;;GAMG;AACH,eAAO,MAAM,UAAU,EAAE,aAAa,EAsFrC,CAAC;AAEF,eAAO,MAAM,QAAQ,UAA4B,CAAC;AAElD,yDAAyD;AACzD,wBAAgB,sBAAsB,CAAC,kBAAkB,EAAE,OAAO,GAAG,MAAM,EAAE,CAK5E;AAED,eAAO,MAAM,aAAa,IAAI,CAAC"}
@@ -1,102 +1,98 @@
1
- /** System-wide UI quality audit dimensions — strict scoring */
1
+ /**
2
+ * UI quality audit dimensions — 3 user-perspective dimensions, ordered easy → hard.
3
+ *
4
+ * Order: outcome → usability → aesthetics
5
+ * - outcome is skipped for pure observation steps (no expectedOutcome).
6
+ * - Each dimension must be completed before the next one is presented.
7
+ */
2
8
  export const DIMENSIONS = [
3
9
  {
4
- id: "overlap",
5
- name: "Element Overlap & Safe Areas",
6
- description: "You are evaluating this screen as a real user trying to read content and tap buttons efficiently.\n" +
7
- "Assume there ARE overlap issues until you have visually scanned the entire screen.\n\n" +
8
- "CHECK FOR VISUAL INTERFERENCE & COGNITIVE NOISE:\n" +
9
- "1. SYSTEM EDGE INTERFERENCE (CRITICAL): Scan the very top, bottom, and corners of the screen. Do any app elements (buttons, icons, avatars, titles) visually touch, overlap with, or sit uncomfortably close to system indicators (time, battery, signal, notch/Dynamic Island, home indicator bar)? If a user might confuse an app element with a system element, or accidentally tap the wrong one, this is a CRITICAL BUG.\n" +
10
- "2. INTERACTIVE CROWDING: Are any two tappable elements (buttons, links, cards) so close together that a user with a normal-sized finger might accidentally tap the wrong one? Are touch targets dangerously close to screen edges where system gestures occur?\n" +
11
- "3. CONTENT COLLISION: Do any visible elements (text, images, floating buttons, badges) visually overlap each other, making any of them harder to read, recognize, or tap? Two elements occupying the same visual space is always a bug.\n" +
12
- "4. MODAL/POPUP OBSTRUCTION: If a popup, sheet, or overlay is present, does it cover essential information that the user still needs to see, such as the content they are acting upon?\n\n" +
13
- "SCORING ATTITUDE: Focus on what a real user would EXPERIENCE. If something 'feels' too close or visually confusing, it is an overlap failure. When in doubt, score LOW.\n" +
14
- "NOTE: Do NOT evaluate spacing consistency (layout), color choices (style), or text meaning (info_clarity).",
15
- scoringGuide: "0-2: Severe visual collision. Elements clearly overlap each other or invade system areas (e.g., a button sitting on top of the status bar), causing immediate confusion or making the interface unusable.\n" +
16
- "3-4: Elements don't directly collide, but are awkwardly close to each other or to system edges, creating noticeable visual noise or high accidental-tap risk.\n" +
17
- "5-6: All elements are visually separate, but some touch targets feel uncomfortably close. A careful user would be fine, but a hurried user might mis-tap.\n" +
18
- "7-8: Clean separation throughout. No visual interference. The app clearly respects system boundaries and provides comfortable touch targets.\n" +
19
- "9-10: Perfect separation — generous spacing between all interactive elements, completely unambiguous visual boundaries across the entire screen.",
10
+ id: "outcome",
11
+ name: "Action Result",
12
+ description: "You just performed an action (tap or swipe). Now look at the screenshot and answer one question:\n" +
13
+ "Did the action produce the result you expected?\n\n" +
14
+ "CHECK:\n" +
15
+ "1. CORRECT SCREEN: Is this the screen you expected to see after the action? If you tapped a settings icon, are you on the settings page? Landing on the wrong screen is a critical failure.\n" +
16
+ "2. VISIBLE CONFIRMATION: Is there a clear visual change that confirms the action worked? For example: a new page appeared, a toggle changed state, an item was added/removed, or a form was submitted. The user should be able to tell at a glance that something happened.\n" +
17
+ "3. NO ERRORS: Are there any error messages, crash screens, blank white screens, frozen loading spinners, or other signs that something went wrong?\n" +
18
+ "4. COMPLETE RESPONSE: Did the action fully complete? Partial results (e.g., page loaded but key content is missing, or only half the expected change occurred) still count as a problem.\n\n" +
19
+ "SCORING ATTITUDE: This is mostly a yes-or-no judgment. If the screen clearly matches the expectedOutcome, score high. If it clearly doesn't, score low. Partial success is still a failure from the user's perspective.",
20
+ scoringGuide: "0-2: Action clearly failed wrong screen, error/crash, blank screen, or zero visible response.\n" +
21
+ "3-4: Something changed, but it's not the expected result (e.g., wrong page, wrong data displayed).\n" +
22
+ "5-6: Action partially succeeded the right screen appeared but with missing content, unexpected side effects, or incomplete state changes.\n" +
23
+ "7-8: Action succeeded the expected result is clearly present, with only very minor discrepancies.\n" +
24
+ "9-10: Perfect the screenshot fully and completely matches the expectedOutcome.",
20
25
  },
21
26
  {
22
- id: "layout",
23
- name: "Layout & Spatial Balance",
24
- description: "You are evaluating this screen as a real user who expects a well-organized, visually balanced interface.\n" +
25
- "Assume the layout has issues until you verify otherwise.\n\n" +
26
- "CHECK EACH ASPECT:\n" +
27
- "1. LETTERBOXING: Does the app appear to run inside a smaller box with black or empty bars on any side? This is score 0, no exceptions.\n" +
28
- "2. SCREEN UTILIZATION: Is there a large empty area that serves no purpose? Is content crammed into only part of the screen while the rest is wasted? A user should feel the app was designed for this exact screen size.\n" +
29
- "3. ALIGNMENT: Pick any two similar elements (e.g., two cards, two labels). Are their edges aligned? Are the gaps between them equal? Visible misalignment = deduction.\n" +
30
- "4. SPACING CONSISTENCY: Are gaps between elements consistent? If the space between Card A and Card B looks visibly different from the space between Card B and Card C, that is a problem.\n" +
31
- "5. RESPONSIVE FIT: Does the layout feel natural for this screen size, or does it look stretched, squished, or designed for a different device?\n" +
32
- "6. CONTENT OVERFLOW: Does any content look like it has outgrown its container? Look for: text that wraps unexpectedly causing one card to be noticeably taller than its neighbor in a grid, content that appears cut off at a container edge, or scrollable areas that clip content in a way that confuses the user about whether more content exists.\n\n" +
33
- "SCORING ATTITUDE: Do not be generous. A layout that 'looks fine at a glance' but has visible imperfections upon closer inspection is a 6, not an 8.\n" +
34
- "NOTE: Do NOT evaluate element collision (that is overlap), color choices (that is style), or text meaning (that is info_clarity).",
35
- scoringGuide: "0-2: Letterboxing detected, or layout is severely broken (elements piled on top of each other, massive unusable empty areas, content pushed off-screen).\n" +
36
- "3-4: Major layout issues — clearly uneven spacing, significant misalignment, or large wasted areas that make the app feel unfinished.\n" +
37
- "5-6: Layout is functional but has noticeable imperfections some uneven spacing, minor alignment issues, or one area that feels out of balance.\n" +
38
- "7-8: Well-structured layout with only very minor imperfections that require careful inspection to notice.\n" +
39
- "9-10: Flawless pixel-perfect alignment, perfectly consistent spacing, optimal screen utilization. Extremely rare.",
27
+ id: "usability",
28
+ name: "Usability",
29
+ description: "You are a first-time user who just opened this app. You have never seen it before.\n" +
30
+ "Look at this screen and ask yourself: Can I use this screen smoothly and without confusion?\n\n" +
31
+ "CHECK:\n" +
32
+ "1. CONTENT VISIBILITY: Can you see ALL the content on screen completely? Look for:\n" +
33
+ " - Text that is cut off with '...' or overflows its container\n" +
34
+ " - Elements hidden behind other elements (a popup covering needed info, a floating button sitting on top of text)\n" +
35
+ " - Content pushed under the system status bar or behind the bottom home indicator\n" +
36
+ " - If a keyboard is visible, does it cover any input field or button the user needs right now?\n" +
37
+ "2. COMPREHENSION: Within 2 seconds, can you tell what this screen is for and what you should do next?\n" +
38
+ " - Is the page title or header clear about the screen's purpose?\n" +
39
+ " - Is the most important element (primary action button, main content) the most visually prominent thing on screen?\n" +
40
+ "3. LABEL CLARITY: Read every button, link, and label. Would a new user immediately understand what each one does?\n" +
41
+ " - Vague labels like 'Go', 'Submit', or 'OK' without context are confusing\n" +
42
+ " - Icon-only buttons without text labels: is the icon universally understood (e.g., trash can, magnifying glass) or ambiguous (abstract shapes)?\n" +
43
+ "4. READABILITY: Can all text be comfortably read?\n" +
44
+ " - Check text against its background is there enough contrast?\n" +
45
+ " - Is any text too small to read comfortably?\n" +
46
+ " - Pay special attention to text placed over images or colored backgrounds\n" +
47
+ "5. TAP ACCURACY: Can you confidently tap each button without accidentally hitting the wrong one?\n" +
48
+ " - Are any two tappable elements so close together that a normal finger might hit the wrong one?\n" +
49
+ " - Are any buttons uncomfortably close to the screen edge where system gestures occur?\n\n" +
50
+ "SCORING ATTITUDE: If any single element would make a new user pause and think 'what does this mean?' or 'where did that text go?', that is a usability problem. One confusing element caps the score at 7. Multiple issues = score 5 or below.",
51
+ scoringGuide: "0-2: The screen is unusable — purpose unclear, critical content hidden or overlapping, major elements unreadable or unreachable.\n" +
52
+ "3-4: The screen's purpose is guessable but requires effort. Multiple labels are confusing, or significant content is blocked/truncated.\n" +
53
+ "5-6: Generally usable but with noticeable issues — one or two confusing labels, some truncated text, or a few elements that feel too close together.\n" +
54
+ "7-8: Smooth experience with only one very minor issue (e.g., one slightly ambiguous icon that most users would still figure out).\n" +
55
+ "9-10: Perfectly clear — every element is visible, readable, self-explanatory, and easy to interact with. A user of any background would have zero confusion.",
40
56
  },
41
57
  {
42
- id: "info_clarity",
43
- name: "Information Clarity & Readability",
44
- description: "You are evaluating this screen as a FIRST-TIME user who has never seen this app before.\n" +
45
- "Your goal is to determine how quickly and easily a new user could understand this screen.\n\n" +
46
- "CHECK EACH ASPECT:\n" +
47
- "1. FIRST IMPRESSION: Within 2 seconds of looking at this screen, can you tell what it does and what the main action is? If it takes effort to figure out the screen's purpose, that is a problem.\n" +
48
- "2. BUTTON & LABEL CLARITY: Read every button, link, and label. Would a first-time user immediately understand what each one does? Vague labels like 'Go' or unlabeled icon-only buttons are problematic.\n" +
49
- "3. VISUAL HIERARCHY: Is the most important element (page title, primary action button) visually the most prominent? Or do secondary elements compete for the user's attention?\n" +
50
- "4. TEXT COMPLETENESS: Is any text cut off with '...' (truncation), overflowing its container, or so small that a user would struggle to read it? Even one truncated label that hides meaningful information = deduction.\n" +
51
- "5. READABILITY & CONTRAST: Can all text be comfortably read against its background? Pay special attention to small text, light-colored text on light backgrounds, or text placed over images.\n" +
52
- "6. ICON MEANING: Are there any icons without text labels whose meaning might be ambiguous to a new user? A trash can icon is universally understood; a custom abstract icon is not.\n\n" +
53
- "SCORING ATTITUDE: If any single element would make a first-time user pause and wonder 'what does this do?', cap the score at 7. Multiple confusing elements = score 5 or below.\n" +
54
- "NOTE: Do NOT evaluate spacing (layout), visual collision (overlap), or color harmony (style).",
55
- scoringGuide: "0-2: The screen's purpose is unclear, the primary action is missing or hidden, or labels are severely misleading.\n" +
56
- "3-4: The core purpose is guessable but requires effort. Multiple labels or icons are ambiguous.\n" +
57
- "5-6: Generally understandable but 1-2 elements are confusing (ambiguous icon, truncated text hiding important info, weak visual hierarchy).\n" +
58
- "7-8: Clear and well-organized with only one very minor issue. A first-time user would understand the screen immediately.\n" +
59
- "9-10: Crystal clearperfect hierarchy, every label self-explanatory, no truncation, excellent contrast. A user of any background would instantly understand this screen.",
60
- },
61
- {
62
- id: "style",
63
- name: "Visual Style & Consistency",
64
- description: "You are evaluating this screen as a user who expects a polished, professionally designed app.\n" +
65
- "Your job is to find visual inconsistencies and rendering defects that make the app look unfinished.\n\n" +
66
- "CHECK EACH ASPECT:\n" +
67
- "1. COLOR PALETTE: Count the number of distinct colors used. Do they feel intentional and harmonious, or random and clashing?\n" +
68
- "2. TYPOGRAPHY: Are font sizes used consistently? Are there mixed font weights that don't follow a clear hierarchy? More than 2-3 font sizes without a clear reason is a problem.\n" +
69
- "3. COMPONENT CONSISTENCY: Compare all elements of the same type. Do all buttons have the same shape, size, and styling? Do all cards have the same shadows, borders, and corner radius? Any difference between similar components = deduction.\n" +
70
- "4. ICONOGRAPHY: Are all icons visually consistent (all outline style OR all filled style, consistent stroke width and sizing)? Mixing styles within the same screen = deduction.\n" +
71
- "5. POLISH & FINISH: Does the app look professionally designed, or does it feel like an unfinished prototype? Placeholder content (e.g., 'Lorem ipsum', default system icons used inappropriately) = major deduction.\n" +
72
- "6. RENDERING INTEGRITY: Inspect each UI element individually for visual defects. Look for: a visible ring, halo, or unintended border of a different color around an element (e.g., a white circle visible behind a colored circular button — this means the element was not built cleanly), mismatched shapes within a single component, clipping artifacts that cut off part of an icon or image, or any visual glitch that makes a component look broken. Even 1-2 pixels of visible defect = deduction.\n\n" +
73
- "SCORING ATTITUDE: Consistency is binary — if two buttons of the same type look different, they are inconsistent, period. A single rendering defect on any element caps the score at 7.\n" +
74
- "NOTE: Do NOT evaluate text meaning (info_clarity), spatial arrangement (layout), or element collision (overlap).",
75
- scoringGuide: "0-2: No design system — clashing colors, random fonts, inconsistent components everywhere, or multiple rendering defects.\n" +
76
- "3-4: Some design intent visible but multiple clear inconsistencies (different button styles, mixed icon sets, visible rendering glitches).\n" +
77
- "5-6: Basic consistency exists but with noticeable deviations or at least one obvious rendering defect.\n" +
78
- "7-8: Strong consistency with only 1 minor deviation or subtle defect requiring careful inspection.\n" +
79
- "9-10: Pixel-perfect design system adherence. Every element cohesive and cleanly rendered. Extremely rare.",
80
- },
81
- {
82
- id: "action_result",
83
- name: "Action Result Verification",
84
- description: "You are evaluating whether the action the user just performed actually produced the expected result.\n" +
85
- "The step's expectedOutcome describes what SHOULD have happened. Compare the current screenshot against it.\n\n" +
86
- "CHECK EACH ASPECT:\n" +
87
- "1. CORRECT DESTINATION: If the user tapped a navigation element, did the correct screen appear? Landing on the wrong screen = score 0.\n" +
88
- "2. VISIBLE STATE CHANGE: If the action should have added, removed, toggled, or updated something, is that change clearly visible in the screenshot? The user should be able to confirm the action worked.\n" +
89
- "3. ERROR INDICATORS: Are there any error dialogs, crash screens, blank white screens, or infinite loading spinners? These all indicate the action failed.\n" +
90
- "4. NO VISIBLE RESPONSE: Does the screen look identical to before the action was performed? If nothing changed, the action likely failed or missed its target.\n\n" +
91
- "SCORING ATTITUDE: If the expectedOutcome is not clearly achieved in the screenshot, score LOW. Partial success is still a problem — the user expects the action to fully work.\n" +
92
- "If this step is a pure observation with no expectedOutcome specified, score 10.",
93
- scoringGuide: "0-2: Action clearly failed — wrong screen, error dialog, crash, blank screen, or no visible response at all.\n" +
94
- "3-4: Action had some effect but the result does not match the expectedOutcome (e.g., navigated to the wrong tab, wrong data displayed).\n" +
95
- "5-6: Action partially achieved the goal but with unexpected side effects, missing expected elements, or incomplete state changes.\n" +
96
- "7-8: Action succeeded — the expected screen or state is present but with minor discrepancies from the expectedOutcome.\n" +
97
- "9-10: Action fully succeeded — the screenshot clearly and completely matches the expectedOutcome. Or this is a pure observation step with no expectedOutcome.",
58
+ id: "aesthetics",
59
+ name: "Aesthetics",
60
+ description: "You are a user who has seen many well-designed apps. You have basic aesthetic expectations.\n" +
61
+ "Look at this screen and ask yourself: Does this look like a professionally designed, finished product?\n\n" +
62
+ "CHECK:\n" +
63
+ "1. LAYOUT BALANCE: Does the screen feel well-organized?\n" +
64
+ " - Is there a large empty area that serves no purpose, making the app feel unfinished?\n" +
65
+ " - Is content crammed into one part of the screen while the rest is wasted?\n" +
66
+ " - Does the layout feel natural for this phone screen, or does it look like it was designed for a different device?\n" +
67
+ "2. ALIGNMENT & SPACING: Are elements neatly arranged?\n" +
68
+ " - Pick any two similar elements (e.g., two cards, two list items). Are their edges aligned? Are the gaps between them equal?\n" +
69
+ " - If the gap between element A and B looks visibly different from the gap between B and C, that is a problem.\n" +
70
+ "3. COLOR & TYPOGRAPHY: Do colors and fonts feel intentional and harmonious?\n" +
71
+ " - Do the colors look like they belong together, or do some feel random/clashing?\n" +
72
+ " - Are font sizes and weights used consistently? (e.g., all section titles same size, all body text same size)\n" +
73
+ "4. COMPONENT CONSISTENCY: Do similar elements look the same?\n" +
74
+ " - Compare all buttons same shape, size, and style?\n" +
75
+ " - Compare all cards same shadows, borders, and corner radius?\n" +
76
+ " - Compare all icons — same visual style (all outline or all filled, similar sizing)?\n" +
77
+ " - Any visible difference between elements that should be identical is a consistency failure.\n" +
78
+ "5. OVERALL POLISH: Does the app feel 'finished'?\n" +
79
+ " - Does it look like a real product someone would download from the App Store?\n" +
80
+ " - Or does it feel like an unfinished prototype with placeholder content, mismatched pieces, or rough edges?\n\n" +
81
+ "SCORING ATTITUDE: Trust your gut. If your first impression is 'this looks a bit rough', it is. A screen that passes a quick glance but has visible imperfections on closer look is a 6, not an 8. Consistency is binary — if two cards of the same type look different, they are inconsistent, period.",
82
+ scoringGuide: "0-2: No visual coherence — clashing colors, random fonts, messy layout, clearly unfinished or broken appearance.\n" +
83
+ "3-4: Some design effort visible but multiple obvious issues uneven spacing, inconsistent components, large wasted areas, or mixed visual styles.\n" +
84
+ "5-6: Decent overall but with noticeable imperfections some uneven spacing, a few inconsistent elements, or one area that feels out of balance.\n" +
85
+ "7-8: Polished and professional with only very minor imperfections that require careful inspection to notice.\n" +
86
+ "9-10: Flawless perfectly balanced layout, fully consistent components, harmonious colors and typography. Feels like a top-tier app. Extremely rare.",
98
87
  },
99
88
  ];
100
- export const REQUIRED_DIMS = DIMENSIONS.map(d => d.id);
89
+ export const ALL_DIMS = DIMENSIONS.map(d => d.id);
90
+ /** Get the applicable dimension IDs for a given step. */
91
+ export function getRequiredDimsForStep(hasExpectedOutcome) {
92
+ if (hasExpectedOutcome) {
93
+ return ALL_DIMS;
94
+ }
95
+ return ALL_DIMS.filter(d => d !== "outcome");
96
+ }
101
97
  export const PASSING_SCORE = 8;
102
98
  //# sourceMappingURL=checklist.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"checklist.js","sourceRoot":"","sources":["../../../src/evaluation/checklist.ts"],"names":[],"mappings":"AAEA,+DAA+D;AAC/D,MAAM,CAAC,MAAM,UAAU,GAAoB;IACvC;QACI,EAAE,EAAE,SAAS;QACb,IAAI,EAAE,8BAA8B;QACpC,WAAW,EACP,qGAAqG;YACrG,wFAAwF;YACxF,oDAAoD;YACpD,iaAAia;YACja,kQAAkQ;YAClQ,2OAA2O;YAC3O,2LAA2L;YAC3L,2KAA2K;YAC3K,4GAA4G;QAChH,YAAY,EACR,6MAA6M;YAC7M,iKAAiK;YACjK,6JAA6J;YAC7J,gJAAgJ;YAChJ,kJAAkJ;KACzJ;IACD;QACI,EAAE,EAAE,QAAQ;QACZ,IAAI,EAAE,0BAA0B;QAChC,WAAW,EACP,4GAA4G;YAC5G,8DAA8D;YAC9D,sBAAsB;YACtB,0IAA0I;YAC1I,4NAA4N;YAC5N,0KAA0K;YAC1K,6LAA6L;YAC7L,kJAAkJ;YAClJ,4VAA4V;YAC5V,uJAAuJ;YACvJ,mIAAmI;QACvI,YAAY,EACR,4JAA4J;YAC5J,yIAAyI;YACzI,oJAAoJ;YACpJ,6GAA6G;YAC7G,qHAAqH;KAC5H;IACD;QACI,EAAE,EAAE,cAAc;QAClB,IAAI,EAAE,mCAAmC;QACzC,WAAW,EACP,2FAA2F;YAC3F,+FAA+F;YAC/F,sBAAsB;YACtB,qMAAqM;YACrM,4MAA4M;YAC5M,kLAAkL;YAClL,4NAA4N;YAC5N,iMAAiM;YACjM,yLAAyL;YACzL,mLAAmL;YACnL,+FAA+F;QACnG,YAAY,EACR,qHAAqH;YACrH,mGAAmG;YACnG,+IAA+I;YAC/I,4HAA4H;YAC5H,4KAA4K;KACnL;IACD;QACI,EAAE,EAAE,OAAO;QACX,IAAI,EAAE,4BAA4B;QAClC,WAAW,EACP,iGAAiG;YACjG,yGAAyG;YACzG,sBAAsB;YACtB,gIAAgI;YAChI,oLAAoL;YACpL,kPAAkP;YAClP,oLAAoL;YACpL,wNAAwN;YACxN,ifAAif;YACjf,0LAA0L;YAC1L,kHAAkH;QACtH,YAAY,EACR,6HAA6H;YAC7H,8IAA8I;YAC9I,0GAA0G;YAC1G,sGAAsG;YACtG,2GAA2G;KAClH;IACD;QACI,EAAE,EAAE,eAAe;QACnB,IAAI,EAAE,4BAA4B;QAClC,WAAW,EACP,wGAAwG;YACxG,gHAAgH;YAChH,sBAAsB;YACtB,0IAA0I;YAC1I,6MAA6M;YAC7M,6JAA6J;YAC7J,mKAAmK;YACnK,kLAAkL;YAClL,iFAAiF;QACrF,YAAY,EACR,gHAAgH;YAChH,2IAA2I;YAC3I,qIAAqI;YACrI,0HAA0H;YAC1H,+JAA+J;KACtK;CACJ,CAAC;AAEF,MAAM,CAAC,MAAM,aAAa,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AACvD,MAAM,CAAC,MAAM,aAAa,GAAG,CAAC,CAAC"}
1
+ {"version":3,"file":"checklist.js","sourceRoot":"","sources":["../../../src/evaluation/checklist.ts"],"names":[],"mappings":"AAEA;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,UAAU,GAAoB;IACvC;QACI,EAAE,EAAE,SAAS;QACb,IAAI,EAAE,eAAe;QACrB,WAAW,EACP,oGAAoG;YACpG,qDAAqD;YACrD,UAAU;YACV,+LAA+L;YAC/L,+QAA+Q;YAC/Q,sJAAsJ;YACtJ,8LAA8L;YAC9L,yNAAyN;QAC7N,YAAY,EACR,mGAAmG;YACnG,sGAAsG;YACtG,+IAA+I;YAC/I,uGAAuG;YACvG,kFAAkF;KACzF;IACD;QACI,EAAE,EAAE,WAAW;QACf,IAAI,EAAE,WAAW;QACjB,WAAW,EACP,sFAAsF;YACtF,iGAAiG;YACjG,UAAU;YACV,sFAAsF;YACtF,mEAAmE;YACnE,uHAAuH;YACvH,uFAAuF;YACvF,oGAAoG;YACpG,yGAAyG;YACzG,sEAAsE;YACtE,yHAAyH;YACzH,qHAAqH;YACrH,gFAAgF;YAChF,sJAAsJ;YACtJ,qDAAqD;YACrD,sEAAsE;YACtE,mDAAmD;YACnD,gFAAgF;YAChF,oGAAoG;YACpG,sGAAsG;YACtG,8FAA8F;YAC9F,gPAAgP;QACpP,YAAY,EACR,oIAAoI;YACpI,2IAA2I;YAC3I,wJAAwJ;YACxJ,qIAAqI;YACrI,8JAA8J;KACrK;IACD;QACI,EAAE,EAAE,YAAY;QAChB,IAAI,EAAE,YAAY;QAClB,WAAW,EACP,+FAA+F;YAC/F,4GAA4G;YAC5G,UAAU;YACV,2DAA2D;YAC3D,4FAA4F;YAC5F,iFAAiF;YACjF,yHAAyH;YACzH,yDAAyD;YACzD,mIAAmI;YACnI,oHAAoH;YACpH,+EAA+E;YAC/E,uFAAuF;YACvF,oHAAoH;YACpH,gEAAgE;YAChE,2DAA2D;YAC3D,sEAAsE;YACtE,2FAA2F;YAC3F,mGAAmG;YACnG,oDAAoD;YACpD,oFAAoF;YACpF,oHAAoH;YACpH,wSAAwS;QAC5S,YAAY,EACR,oHAAoH;YACpH,sJAAsJ;YACtJ,oJAAoJ;YACpJ,gHAAgH;YAChH,uJAAuJ;KAC9J;CACJ,CAAC;AAEF,MAAM,CAAC,MAAM,QAAQ,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AAElD,yDAAyD;AACzD,MAAM,UAAU,sBAAsB,CAAC,kBAA2B;IAC9D,IAAI,kBAAkB,EAAE,CAAC;QACrB,OAAO,QAAQ,CAAC;IACpB,CAAC;IACD,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,SAAS,CAAC,CAAC;AACjD,CAAC;AAED,MAAM,CAAC,MAAM,aAAa,GAAG,CAAC,CAAC"}
@@ -7,6 +7,7 @@ export declare function getPendingTaskState(caseName: string, stepIndex: number)
7
7
  scoringGuide?: undefined;
8
8
  token?: undefined;
9
9
  stepIndex?: undefined;
10
+ progress?: undefined;
10
11
  } | {
11
12
  completed: boolean;
12
13
  dimensionId: string;
@@ -15,6 +16,7 @@ export declare function getPendingTaskState(caseName: string, stepIndex: number)
15
16
  scoringGuide: string;
16
17
  token: string;
17
18
  stepIndex: number;
19
+ progress: string;
18
20
  message?: undefined;
19
21
  };
20
22
  export declare function validateAndAdvanceState(caseName: string, stepIndex: number, token: string, score: number, reason: string): {
@@ -29,6 +31,7 @@ export declare function validateAndAdvanceState(caseName: string, stepIndex: num
29
31
  scoringGuide?: undefined;
30
32
  token?: undefined;
31
33
  stepIndex?: undefined;
34
+ progress?: undefined;
32
35
  } | {
33
36
  completed: boolean;
34
37
  dimensionId: string;
@@ -37,6 +40,7 @@ export declare function validateAndAdvanceState(caseName: string, stepIndex: num
37
40
  scoringGuide: string;
38
41
  token: string;
39
42
  stepIndex: number;
43
+ progress: string;
40
44
  message?: undefined;
41
45
  };
42
46
  };
@@ -1 +1 @@
1
- {"version":3,"file":"state-machine.d.ts","sourceRoot":"","sources":["../../../src/evaluation/state-machine.ts"],"names":[],"mappings":"AAQA,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM;;;;;;;;;;;;;;;;;;EAwCtE;AAED,wBAAgB,uBAAuB,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;;;;;;;;;;;;;;;;;;;;;;EA4CxH"}
1
+ {"version":3,"file":"state-machine.d.ts","sourceRoot":"","sources":["../../../src/evaluation/state-machine.ts"],"names":[],"mappings":"AAQA,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM;;;;;;;;;;;;;;;;;;;;EA0CtE;AAED,wBAAgB,uBAAuB,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;;;;;;;;;;;;;;;;;;;;;;;;EA2CxH"}
@@ -1,6 +1,6 @@
1
1
  import crypto from "crypto";
2
2
  import { readLog, writeLog } from "../logger/audit-log.js";
3
- import { DIMENSIONS, REQUIRED_DIMS } from "./checklist.js";
3
+ import { DIMENSIONS, getRequiredDimsForStep } from "./checklist.js";
4
4
  function generateToken() {
5
5
  return "tok_" + crypto.randomBytes(4).toString("hex");
6
6
  }
@@ -13,18 +13,18 @@ export function getPendingTaskState(caseName, stepIndex) {
13
13
  if (!step) {
14
14
  throw new Error(`Step ${stepIndex} not found in case ${caseName}`);
15
15
  }
16
- if (step.currentDimIndex >= REQUIRED_DIMS.length) {
16
+ const requiredDims = getRequiredDimsForStep(!!step.expectedOutcome);
17
+ if (step.currentDimIndex >= requiredDims.length) {
17
18
  return {
18
19
  completed: true,
19
- message: `All ${REQUIRED_DIMS.length} dimensions for Step ${stepIndex} are fully evaluated! You may now proceed to interact with the device further (e.g. tap, swipe) or call get_audit_status.`
20
+ message: `All ${requiredDims.length} dimensions for Step ${stepIndex} are fully evaluated! You may now proceed to interact with the device further (e.g. tap, swipe) or call get_audit_status.`
20
21
  };
21
22
  }
22
- // Generate a token if there isn't one
23
23
  if (!step.evaluationToken) {
24
24
  step.evaluationToken = generateToken();
25
25
  writeLog(log);
26
26
  }
27
- const currentDimId = REQUIRED_DIMS[step.currentDimIndex];
27
+ const currentDimId = requiredDims[step.currentDimIndex];
28
28
  const criteria = DIMENSIONS.find(d => d.id === currentDimId);
29
29
  if (!criteria) {
30
30
  throw new Error(`Dimension '${currentDimId}' not found.`);
@@ -36,7 +36,8 @@ export function getPendingTaskState(caseName, stepIndex) {
36
36
  description: criteria.description,
37
37
  scoringGuide: criteria.scoringGuide,
38
38
  token: step.evaluationToken,
39
- stepIndex: stepIndex
39
+ stepIndex: stepIndex,
40
+ progress: `${step.currentDimIndex + 1} / ${requiredDims.length}`,
40
41
  };
41
42
  }
42
43
  export function validateAndAdvanceState(caseName, stepIndex, token, score, reason) {
@@ -48,22 +49,20 @@ export function validateAndAdvanceState(caseName, stepIndex, token, score, reaso
48
49
  if (!step) {
49
50
  throw new Error(`Step ${stepIndex} not found in case ${caseName}`);
50
51
  }
51
- if (step.currentDimIndex >= REQUIRED_DIMS.length) {
52
+ const requiredDims = getRequiredDimsForStep(!!step.expectedOutcome);
53
+ if (step.currentDimIndex >= requiredDims.length) {
52
54
  throw new Error(`Step ${stepIndex} is already fully evaluated.`);
53
55
  }
54
56
  if (!step.evaluationToken || step.evaluationToken !== token) {
55
57
  throw new Error("Invalid token. Do not guess tokens or run multiple submissions at once.");
56
58
  }
57
- const currentDimId = REQUIRED_DIMS[step.currentDimIndex];
58
- // Record the score
59
+ const currentDimId = requiredDims[step.currentDimIndex];
59
60
  step.evaluations[currentDimId] = {
60
61
  score,
61
62
  reason,
62
63
  };
63
- // Advance the state
64
64
  step.currentDimIndex += 1;
65
- // Clear the old token and generate a new one if not completed
66
- if (step.currentDimIndex < REQUIRED_DIMS.length) {
65
+ if (step.currentDimIndex < requiredDims.length) {
67
66
  step.evaluationToken = generateToken();
68
67
  }
69
68
  else {
@@ -1 +1 @@
1
- {"version":3,"file":"state-machine.js","sourceRoot":"","sources":["../../../src/evaluation/state-machine.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAC3D,OAAO,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAE3D,SAAS,aAAa;IAClB,OAAO,MAAM,GAAG,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;AAC1D,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,QAAgB,EAAE,SAAiB;IACnE,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC9B,IAAI,CAAC,GAAG,EAAE,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,wBAAwB,QAAQ,EAAE,CAAC,CAAC;IACxD,CAAC;IAED,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IAClC,IAAI,CAAC,IAAI,EAAE,CAAC;QACR,MAAM,IAAI,KAAK,CAAC,QAAQ,SAAS,sBAAsB,QAAQ,EAAE,CAAC,CAAC;IACvE,CAAC;IAED,IAAI,IAAI,CAAC,eAAe,IAAI,aAAa,CAAC,MAAM,EAAE,CAAC;QAC/C,OAAO;YACH,SAAS,EAAE,IAAI;YACf,OAAO,EAAE,OAAO,aAAa,CAAC,MAAM,wBAAwB,SAAS,2HAA2H;SACnM,CAAC;IACN,CAAC;IAED,sCAAsC;IACtC,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC;QACxB,IAAI,CAAC,eAAe,GAAG,aAAa,EAAE,CAAC;QACvC,QAAQ,CAAC,GAAG,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,YAAY,GAAG,aAAa,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;IACzD,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,YAAY,CAAC,CAAC;IAE7D,IAAI,CAAC,QAAQ,EAAE,CAAC;QACZ,MAAM,IAAI,KAAK,CAAC,cAAc,YAAY,cAAc,CAAC,CAAC;IAC9D,CAAC;IAED,OAAO;QACH,SAAS,EAAE,KAAK;QAChB,WAAW,EAAE,YAAY;QACzB,aAAa,EAAE,QAAQ,CAAC,IAAI;QAC5B,WAAW,EAAE,QAAQ,CAAC,WAAW;QACjC,YAAY,EAAE,QAAQ,CAAC,YAAY;QACnC,KAAK,EAAE,IAAI,CAAC,eAAe;QAC3B,SAAS,EAAE,SAAS;KACvB,CAAC;AACN,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,QAAgB,EAAE,SAAiB,EAAE,KAAa,EAAE,KAAa,EAAE,MAAc;IACrH,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC9B,IAAI,CAAC,GAAG,EAAE,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,wBAAwB,QAAQ,EAAE,CAAC,CAAC;IACxD,CAAC;IAED,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IAClC,IAAI,CAAC,IAAI,EAAE,CAAC;QACR,MAAM,IAAI,KAAK,CAAC,QAAQ,SAAS,sBAAsB,QAAQ,EAAE,CAAC,CAAC;IACvE,CAAC;IAED,IAAI,IAAI,CAAC,eAAe,IAAI,aAAa,CAAC,MAAM,EAAE,CAAC;QAC/C,MAAM,IAAI,KAAK,CAAC,QAAQ,SAAS,8BAA8B,CAAC,CAAC;IACrE,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,eAAe,IAAI,IAAI,CAAC,eAAe,KAAK,KAAK,EAAE,CAAC;QAC1D,MAAM,IAAI,KAAK,CAAC,yEAAyE,CAAC,CAAC;IAC/F,CAAC;IAED,MAAM,YAAY,GAAG,aAAa,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;IAEzD,mBAAmB;IACnB,IAAI,CAAC,WAAW,CAAC,YAAY,CAAC,GAAG;QAC7B,KAAK;QACL,MAAM;KACT,CAAC;IAEF,oBAAoB;IACpB,IAAI,CAAC,eAAe,IAAI,CAAC,CAAC;IAE1B,8DAA8D;IAC9D,IAAI,IAAI,CAAC,eAAe,GAAG,aAAa,CAAC,MAAM,EAAE,CAAC;QAC9C,IAAI,CAAC,eAAe,GAAG,aAAa,EAAE,CAAC;IAC3C,CAAC;SAAM,CAAC;QACJ,IAAI,CAAC,eAAe,GAAG,SAAS,CAAC;IACrC,CAAC;IAED,QAAQ,CAAC,GAAG,CAAC,CAAC;IAEd,OAAO;QACH,cAAc,EAAE,YAAY;QAC5B,KAAK;QACL,SAAS,EAAE,mBAAmB,CAAC,QAAQ,EAAE,SAAS,CAAC;KACtD,CAAC;AACN,CAAC"}
1
+ {"version":3,"file":"state-machine.js","sourceRoot":"","sources":["../../../src/evaluation/state-machine.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAC3D,OAAO,EAAE,UAAU,EAAE,sBAAsB,EAAE,MAAM,gBAAgB,CAAC;AAEpE,SAAS,aAAa;IAClB,OAAO,MAAM,GAAG,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;AAC1D,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,QAAgB,EAAE,SAAiB;IACnE,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC9B,IAAI,CAAC,GAAG,EAAE,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,wBAAwB,QAAQ,EAAE,CAAC,CAAC;IACxD,CAAC;IAED,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IAClC,IAAI,CAAC,IAAI,EAAE,CAAC;QACR,MAAM,IAAI,KAAK,CAAC,QAAQ,SAAS,sBAAsB,QAAQ,EAAE,CAAC,CAAC;IACvE,CAAC;IAED,MAAM,YAAY,GAAG,sBAAsB,CAAC,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;IAEpE,IAAI,IAAI,CAAC,eAAe,IAAI,YAAY,CAAC,MAAM,EAAE,CAAC;QAC9C,OAAO;YACH,SAAS,EAAE,IAAI;YACf,OAAO,EAAE,OAAO,YAAY,CAAC,MAAM,wBAAwB,SAAS,2HAA2H;SAClM,CAAC;IACN,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC;QACxB,IAAI,CAAC,eAAe,GAAG,aAAa,EAAE,CAAC;QACvC,QAAQ,CAAC,GAAG,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,YAAY,GAAG,YAAY,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;IACxD,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,YAAY,CAAC,CAAC;IAE7D,IAAI,CAAC,QAAQ,EAAE,CAAC;QACZ,MAAM,IAAI,KAAK,CAAC,cAAc,YAAY,cAAc,CAAC,CAAC;IAC9D,CAAC;IAED,OAAO;QACH,SAAS,EAAE,KAAK;QAChB,WAAW,EAAE,YAAY;QACzB,aAAa,EAAE,QAAQ,CAAC,IAAI;QAC5B,WAAW,EAAE,QAAQ,CAAC,WAAW;QACjC,YAAY,EAAE,QAAQ,CAAC,YAAY;QACnC,KAAK,EAAE,IAAI,CAAC,eAAe;QAC3B,SAAS,EAAE,SAAS;QACpB,QAAQ,EAAE,GAAG,IAAI,CAAC,eAAe,GAAG,CAAC,MAAM,YAAY,CAAC,MAAM,EAAE;KACnE,CAAC;AACN,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,QAAgB,EAAE,SAAiB,EAAE,KAAa,EAAE,KAAa,EAAE,MAAc;IACrH,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC9B,IAAI,CAAC,GAAG,EAAE,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,wBAAwB,QAAQ,EAAE,CAAC,CAAC;IACxD,CAAC;IAED,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IAClC,IAAI,CAAC,IAAI,EAAE,CAAC;QACR,MAAM,IAAI,KAAK,CAAC,QAAQ,SAAS,sBAAsB,QAAQ,EAAE,CAAC,CAAC;IACvE,CAAC;IAED,MAAM,YAAY,GAAG,sBAAsB,CAAC,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;IAEpE,IAAI,IAAI,CAAC,eAAe,IAAI,YAAY,CAAC,MAAM,EAAE,CAAC;QAC9C,MAAM,IAAI,KAAK,CAAC,QAAQ,SAAS,8BAA8B,CAAC,CAAC;IACrE,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,eAAe,IAAI,IAAI,CAAC,eAAe,KAAK,KAAK,EAAE,CAAC;QAC1D,MAAM,IAAI,KAAK,CAAC,yEAAyE,CAAC,CAAC;IAC/F,CAAC;IAED,MAAM,YAAY,GAAG,YAAY,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;IAExD,IAAI,CAAC,WAAW,CAAC,YAAY,CAAC,GAAG;QAC7B,KAAK;QACL,MAAM;KACT,CAAC;IAEF,IAAI,CAAC,eAAe,IAAI,CAAC,CAAC;IAE1B,IAAI,IAAI,CAAC,eAAe,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC;QAC7C,IAAI,CAAC,eAAe,GAAG,aAAa,EAAE,CAAC;IAC3C,CAAC;SAAM,CAAC;QACJ,IAAI,CAAC,eAAe,GAAG,SAAS,CAAC;IACrC,CAAC;IAED,QAAQ,CAAC,GAAG,CAAC,CAAC;IAEd,OAAO;QACH,cAAc,EAAE,YAAY;QAC5B,KAAK;QACL,SAAS,EAAE,mBAAmB,CAAC,QAAQ,EAAE,SAAS,CAAC;KACtD,CAAC;AACN,CAAC"}
package/dist/src/index.js CHANGED
@@ -38,7 +38,7 @@ server.tool("swipe", "Swipe on screen (ratio 0-1). Automatically captures a scre
38
38
  server.tool("evaluate", "Unified evaluation tool. Two modes:\n" +
39
39
  "1) INITIAL: Call with only caseName + stepIndex (omit score/reason/token) → returns the first pending dimension's prompt, scoring guide, and evaluationToken.\n" +
40
40
  "2) SUBMIT: Call with caseName + stepIndex + evaluationToken + score + reason → records the score, advances to the next dimension, and returns its prompt + new token (or completion message).\n" +
41
- "Required dimensions per step: overlap, layout, info_clarity, style, action_result.", evaluateSchema.shape, async (args) => evaluate(args));
41
+ "Dimensions (easy hard): outcome usability → aesthetics. For observation steps (no expectedOutcome), outcome is skipped automatically.", evaluateSchema.shape, async (args) => evaluate(args));
42
42
  server.tool("get_audit_status", "Get the current dashboard or test case report. " +
43
43
  "Use this to see which dimensions are missing (Pending) or to get the final markdown report if all steps are fully evaluated.", getAuditStatusSchema.shape, async (args) => getAuditStatus(args));
44
44
  import { preflight } from "./device/adapter.js";
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";AAEA;;;;;;;;;;;;;;GAcG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AAEjF,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AACnE,OAAO,EAAE,oBAAoB,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAClF,OAAO,EAAE,SAAS,EAAE,GAAG,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,EAAE,WAAW,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,MAAM,qBAAqB,CAAC;AAC/D,OAAO,EAAE,oBAAoB,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAEnF,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC;IACzB,IAAI,EAAE,cAAc;IACpB,OAAO,EAAE,OAAO;CACnB,CAAC,CAAC;AAEH,8DAA8D;AAE9D,MAAM,CAAC,IAAI,CACP,YAAY,EACZ,sDAAsD,EACtD,eAAe,CAAC,KAAK,EACrB,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,CAClC,CAAC;AAEF,MAAM,CAAC,IAAI,CACP,iBAAiB,EACjB,qFAAqF;IACrF,2GAA2G,EAC3G,oBAAoB,CAAC,KAAK,EAC1B,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,cAAc,CAAC,IAAI,CAAC,CACvC,CAAC;AAEF,MAAM,CAAC,IAAI,CACP,KAAK,EACL,6GAA6G;IAC7G,2GAA2G,EAC3G,SAAS,CAAC,KAAK,EACf,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,CAC5B,CAAC;AAEF,MAAM,CAAC,IAAI,CACP,OAAO,EACP,iGAAiG;IACjG,2GAA2G,EAC3G,WAAW,CAAC,KAAK,EACjB,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAC9B,CAAC;AAEF,8DAA8D;AAE9D,MAAM,CAAC,IAAI,CACP,UAAU,EACV,uCAAuC;IACvC,iKAAiK;IACjK,iMAAiM;IACjM,oFAAoF,EACpF,cAAc,CAAC,KAAK,EACpB,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CACjC,CAAC;AAEF,MAAM,CAAC,IAAI,CACP,kBAAkB,EAClB,iDAAiD;IACjD,8HAA8H,EAC9H,oBAAoB,CAAC,KAAK,EAC1B,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,cAAc,CAAC,IAAI,CAAC,CACvC,CAAC;AAEF,OAAO,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAEhD,8DAA8D;AAE9D,KAAK,UAAU,IAAI;IACf,0DAA0D;IAC1D,MAAM,KAAK,GAAG,MAAM,SAAS,EAAE,CAAC;IAChC,IAAI,CAAC,KAAK,CAAC,EAAE,EAAE,CAAC;QACZ,OAAO,CAAC,KAAK,CAAC,0CAA0C,CAAC,CAAC;QAC1D,KAAK,MAAM,CAAC,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;YAC5B,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QAC9B,CAAC;QACD,OAAO,CAAC,KAAK,CAAC,sDAAsD,CAAC,CAAC;IAC1E,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAC7C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAChC,OAAO,CAAC,KAAK,CAAC,mDAAmD,CAAC,CAAC;AACvE,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACjB,OAAO,CAAC,KAAK,CAAC,6BAA6B,EAAE,GAAG,CAAC,CAAC;IAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACpB,CAAC,CAAC,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";AAEA;;;;;;;;;;;;;;GAcG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AAEjF,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AACnE,OAAO,EAAE,oBAAoB,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAClF,OAAO,EAAE,SAAS,EAAE,GAAG,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,EAAE,WAAW,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,MAAM,qBAAqB,CAAC;AAC/D,OAAO,EAAE,oBAAoB,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAEnF,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC;IACzB,IAAI,EAAE,cAAc;IACpB,OAAO,EAAE,OAAO;CACnB,CAAC,CAAC;AAEH,8DAA8D;AAE9D,MAAM,CAAC,IAAI,CACP,YAAY,EACZ,sDAAsD,EACtD,eAAe,CAAC,KAAK,EACrB,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,CAClC,CAAC;AAEF,MAAM,CAAC,IAAI,CACP,iBAAiB,EACjB,qFAAqF;IACrF,2GAA2G,EAC3G,oBAAoB,CAAC,KAAK,EAC1B,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,cAAc,CAAC,IAAI,CAAC,CACvC,CAAC;AAEF,MAAM,CAAC,IAAI,CACP,KAAK,EACL,6GAA6G;IAC7G,2GAA2G,EAC3G,SAAS,CAAC,KAAK,EACf,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,CAC5B,CAAC;AAEF,MAAM,CAAC,IAAI,CACP,OAAO,EACP,iGAAiG;IACjG,2GAA2G,EAC3G,WAAW,CAAC,KAAK,EACjB,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAC9B,CAAC;AAEF,8DAA8D;AAE9D,MAAM,CAAC,IAAI,CACP,UAAU,EACV,uCAAuC;IACvC,iKAAiK;IACjK,iMAAiM;IACjM,2IAA2I,EAC3I,cAAc,CAAC,KAAK,EACpB,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CACjC,CAAC;AAEF,MAAM,CAAC,IAAI,CACP,kBAAkB,EAClB,iDAAiD;IACjD,8HAA8H,EAC9H,oBAAoB,CAAC,KAAK,EAC1B,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,cAAc,CAAC,IAAI,CAAC,CACvC,CAAC;AAEF,OAAO,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAEhD,8DAA8D;AAE9D,KAAK,UAAU,IAAI;IACf,0DAA0D;IAC1D,MAAM,KAAK,GAAG,MAAM,SAAS,EAAE,CAAC;IAChC,IAAI,CAAC,KAAK,CAAC,EAAE,EAAE,CAAC;QACZ,OAAO,CAAC,KAAK,CAAC,0CAA0C,CAAC,CAAC;QAC1D,KAAK,MAAM,CAAC,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;YAC5B,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QAC9B,CAAC;QACD,OAAO,CAAC,KAAK,CAAC,sDAAsD,CAAC,CAAC;IAC1E,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAC7C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAChC,OAAO,CAAC,KAAK,CAAC,mDAAmD,CAAC,CAAC;AACvE,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACjB,OAAO,CAAC,KAAK,CAAC,6BAA6B,EAAE,GAAG,CAAC,CAAC;IAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACpB,CAAC,CAAC,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"evaluate.d.ts","sourceRoot":"","sources":["../../../src/tools/evaluate.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAGxB,eAAO,MAAM,cAAc;;;;;;;;;;;;;;;;;;EAazB,CAAC;AAqBH,wBAAsB,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,OAAO,cAAc,CAAC;;;;;GAmEnE"}
1
+ {"version":3,"file":"evaluate.d.ts","sourceRoot":"","sources":["../../../src/tools/evaluate.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAGxB,eAAO,MAAM,cAAc;;;;;;;;;;;;;;;;;;EAYzB,CAAC;AAqBH,wBAAsB,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,OAAO,cAAc,CAAC;;;;;GAmEnE"}
@@ -17,7 +17,6 @@ import { getPendingTaskState, validateAndAdvanceState } from "../evaluation/stat
17
17
  export const evaluateSchema = z.object({
18
18
  caseName: z.string().describe("Name of the test case"),
19
19
  stepIndex: z.number().int().positive().describe("Step index to evaluate"),
20
- // Optional: omit all three for initial query, provide all three to submit
21
20
  evaluationToken: z.string().optional().describe("The exact token from the previous evaluate call. Omit on the first call to get the initial dimension."),
22
21
  score: z.number().int().min(0).max(10).optional().describe("Score from 0 (worst) to 10 (perfect). Required when submitting an evaluation."),
23
22
  reason: z.string().optional().describe("Analysis and reason for this score. Required when submitting an evaluation."),
@@ -1 +1 @@
1
- {"version":3,"file":"evaluate.js","sourceRoot":"","sources":["../../../src/tools/evaluate.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,mBAAmB,EAAE,uBAAuB,EAAE,MAAM,gCAAgC,CAAC;AAE9F,MAAM,CAAC,MAAM,cAAc,GAAG,CAAC,CAAC,MAAM,CAAC;IACnC,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uBAAuB,CAAC;IACtD,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,wBAAwB,CAAC;IACzE,0EAA0E;IAC1E,eAAe,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAC3C,uGAAuG,CAC1G;IACD,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CACtD,+EAA+E,CAClF;IACD,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAClC,6EAA6E,CAChF;CACJ,CAAC,CAAC;AAEH,SAAS,qBAAqB,CAAC,KAM9B,EAAE,MAAc;IACb,OAAO,GAAG,MAAM,MAAM;QAClB,oBAAoB,KAAK,CAAC,aAAa,MAAM;QAC7C,WAAW,KAAK,CAAC,WAAW,MAAM;QAClC,oBAAoB,KAAK,CAAC,YAAY,MAAM;QAC5C,uBAAuB;QACvB,wFAAwF;QACxF,yBAAyB,KAAK,CAAC,KAAK,KAAK;QACzC,gCAAgC;QAChC,mCAAmC;QACnC,6DAA6D,CAAC;AACtE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,KAAqC;IAChE,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,KAAK,SAAS,CAAC;IAC3C,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,KAAK,SAAS,CAAC;IAC7C,MAAM,QAAQ,GAAG,KAAK,CAAC,eAAe,KAAK,SAAS,CAAC;IAErD,MAAM,oBAAoB,GAAG,CAAC,QAAQ,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;IAEpF,oDAAoD;IACpD,IAAI,oBAAoB,GAAG,CAAC,IAAI,oBAAoB,GAAG,CAAC,EAAE,CAAC;QACvD,OAAO;YACH,OAAO,EAAE,CAAC;oBACN,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,uGAAuG;wBACzG,wDAAwD;wBACxD,0DAA0D;iBACjE,CAAC;SACL,CAAC;IACN,CAAC;IAED,sDAAsD;IACtD,IAAI,oBAAoB,KAAK,CAAC,EAAE,CAAC;QAC7B,MAAM,KAAK,GAAG,mBAAmB,CAAC,KAAK,CAAC,QAAQ,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;QAEnE,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;YAClB,OAAO;gBACH,OAAO,EAAE,CAAC;wBACN,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,KAAK,CAAC,OAAO,IAAI,qDAAqD;qBAC/E,CAAC;aACL,CAAC;QACN,CAAC;QAED,MAAM,QAAQ,GAAG,qBAAqB,CAAC,KAAK,EAAE,mCAAmC,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC;QACrG,OAAO;YACH,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;SACvD,CAAC;IACN,CAAC;IAED,yCAAyC;IACzC,MAAM,MAAM,GAAG,uBAAuB,CAClC,KAAK,CAAC,QAAQ,EACd,KAAK,CAAC,SAAS,EACf,KAAK,CAAC,eAAgB,EACtB,KAAK,CAAC,KAAM,EACZ,KAAK,CAAC,MAAO,CAChB,CAAC;IAEF,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC;IAEnC,IAAI,SAAS,CAAC,SAAS,EAAE,CAAC;QACtB,OAAO;YACH,OAAO,EAAE,CAAC;oBACN,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,WAAW,MAAM,CAAC,KAAK,kBAAkB,MAAM,CAAC,cAAc,QAAQ;wBACxE,8BAA8B,KAAK,CAAC,SAAS,yBAAyB;wBACtE,wFAAwF;iBAC/F,CAAC;SACL,CAAC;IACN,CAAC;IAED,MAAM,QAAQ,GAAG,qBAAqB,CAAC,SAAS,EAC5C,WAAW,MAAM,CAAC,KAAK,kBAAkB,MAAM,CAAC,cAAc,0BAA0B,CAC3F,CAAC;IAEF,OAAO;QACH,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;KACvD,CAAC;AACN,CAAC"}
1
+ {"version":3,"file":"evaluate.js","sourceRoot":"","sources":["../../../src/tools/evaluate.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,mBAAmB,EAAE,uBAAuB,EAAE,MAAM,gCAAgC,CAAC;AAE9F,MAAM,CAAC,MAAM,cAAc,GAAG,CAAC,CAAC,MAAM,CAAC;IACnC,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uBAAuB,CAAC;IACtD,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,wBAAwB,CAAC;IACzE,eAAe,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAC3C,uGAAuG,CAC1G;IACD,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CACtD,+EAA+E,CAClF;IACD,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAClC,6EAA6E,CAChF;CACJ,CAAC,CAAC;AAEH,SAAS,qBAAqB,CAAC,KAM9B,EAAE,MAAc;IACb,OAAO,GAAG,MAAM,MAAM;QAClB,oBAAoB,KAAK,CAAC,aAAa,MAAM;QAC7C,WAAW,KAAK,CAAC,WAAW,MAAM;QAClC,oBAAoB,KAAK,CAAC,YAAY,MAAM;QAC5C,uBAAuB;QACvB,wFAAwF;QACxF,yBAAyB,KAAK,CAAC,KAAK,KAAK;QACzC,gCAAgC;QAChC,mCAAmC;QACnC,6DAA6D,CAAC;AACtE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,KAAqC;IAChE,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,KAAK,SAAS,CAAC;IAC3C,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,KAAK,SAAS,CAAC;IAC7C,MAAM,QAAQ,GAAG,KAAK,CAAC,eAAe,KAAK,SAAS,CAAC;IAErD,MAAM,oBAAoB,GAAG,CAAC,QAAQ,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;IAEpF,oDAAoD;IACpD,IAAI,oBAAoB,GAAG,CAAC,IAAI,oBAAoB,GAAG,CAAC,EAAE,CAAC;QACvD,OAAO;YACH,OAAO,EAAE,CAAC;oBACN,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,uGAAuG;wBACzG,wDAAwD;wBACxD,0DAA0D;iBACjE,CAAC;SACL,CAAC;IACN,CAAC;IAED,sDAAsD;IACtD,IAAI,oBAAoB,KAAK,CAAC,EAAE,CAAC;QAC7B,MAAM,KAAK,GAAG,mBAAmB,CAAC,KAAK,CAAC,QAAQ,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;QAEnE,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;YAClB,OAAO;gBACH,OAAO,EAAE,CAAC;wBACN,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,KAAK,CAAC,OAAO,IAAI,qDAAqD;qBAC/E,CAAC;aACL,CAAC;QACN,CAAC;QAED,MAAM,QAAQ,GAAG,qBAAqB,CAAC,KAAK,EAAE,mCAAmC,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC;QACrG,OAAO;YACH,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;SACvD,CAAC;IACN,CAAC;IAED,yCAAyC;IACzC,MAAM,MAAM,GAAG,uBAAuB,CAClC,KAAK,CAAC,QAAQ,EACd,KAAK,CAAC,SAAS,EACf,KAAK,CAAC,eAAgB,EACtB,KAAK,CAAC,KAAM,EACZ,KAAK,CAAC,MAAO,CAChB,CAAC;IAEF,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC;IAEnC,IAAI,SAAS,CAAC,SAAS,EAAE,CAAC;QACtB,OAAO;YACH,OAAO,EAAE,CAAC;oBACN,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,WAAW,MAAM,CAAC,KAAK,kBAAkB,MAAM,CAAC,cAAc,QAAQ;wBACxE,8BAA8B,KAAK,CAAC,SAAS,yBAAyB;wBACtE,wFAAwF;iBAC/F,CAAC;SACL,CAAC;IACN,CAAC;IAED,MAAM,QAAQ,GAAG,qBAAqB,CAAC,SAAS,EAC5C,WAAW,MAAM,CAAC,KAAK,kBAAkB,MAAM,CAAC,cAAc,0BAA0B,CAC3F,CAAC;IAEF,OAAO;QACH,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;KACvD,CAAC;AACN,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"get-audit-status.d.ts","sourceRoot":"","sources":["../../../src/tools/get-audit-status.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAKxB,eAAO,MAAM,oBAAoB;;;;;;EAE/B,CAAC;AAuEH,wBAAsB,cAAc,CAAC,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC;;;;;GAwD/E"}
1
+ {"version":3,"file":"get-audit-status.d.ts","sourceRoot":"","sources":["../../../src/tools/get-audit-status.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAKxB,eAAO,MAAM,oBAAoB;;;;;;EAE/B,CAAC;AAwEH,wBAAsB,cAAc,CAAC,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC;;;;;GA4D/E"}
@@ -1,6 +1,6 @@
1
1
  import { z } from "zod";
2
2
  import { readLog, readAllLogs } from "../logger/audit-log.js";
3
- import { REQUIRED_DIMS, PASSING_SCORE } from "../evaluation/checklist.js";
3
+ import { getRequiredDimsForStep, PASSING_SCORE } from "../evaluation/checklist.js";
4
4
  export const getAuditStatusSchema = z.object({
5
5
  caseNames: z.array(z.string()).optional().describe("Optional list of caseNames to explicitly query. Omit to view global dashboard."),
6
6
  });
@@ -50,7 +50,8 @@ function renderStepDetails(log) {
50
50
  if (step.expectedOutcome) {
51
51
  md += `- **Expected Outcome**: ${step.expectedOutcome}\n`;
52
52
  }
53
- for (const dim of REQUIRED_DIMS) {
53
+ const requiredDims = getRequiredDimsForStep(!!step.expectedOutcome);
54
+ for (const dim of requiredDims) {
54
55
  const ev = step.evaluations ? step.evaluations[dim] : undefined;
55
56
  if (ev) {
56
57
  const tag = ev.score >= PASSING_SCORE ? "Pass ✅" : "Fail ❌";
@@ -89,17 +90,21 @@ export async function getAuditStatus(input) {
89
90
  let failedItems = [];
90
91
  let missingItems = [];
91
92
  for (const step of Object.values(log.steps)) {
93
+ const requiredDims = getRequiredDimsForStep(!!step.expectedOutcome);
92
94
  const evaluated = Object.keys(step.evaluations || {});
93
- const missing = REQUIRED_DIMS.filter(d => !evaluated.includes(d));
95
+ const missing = requiredDims.filter(d => !evaluated.includes(d));
94
96
  if (missing.length > 0) {
95
97
  allComplete = false;
96
98
  missingItems.push({ stepIndex: step.stepIndex, missing });
97
99
  }
98
- for (const [dim, ev] of Object.entries(step.evaluations || {})) {
99
- totalScore += ev.score;
100
- totalCount += 1;
101
- if (ev.score < PASSING_SCORE) {
102
- failedItems.push({ stepIndex: step.stepIndex, dim, score: ev.score, reason: ev.reason });
100
+ for (const dim of requiredDims) {
101
+ const ev = step.evaluations?.[dim];
102
+ if (ev) {
103
+ totalScore += ev.score;
104
+ totalCount += 1;
105
+ if (ev.score < PASSING_SCORE) {
106
+ failedItems.push({ stepIndex: step.stepIndex, dim, score: ev.score, reason: ev.reason });
107
+ }
103
108
  }
104
109
  }
105
110
  }
@@ -1 +1 @@
1
- {"version":3,"file":"get-audit-status.js","sourceRoot":"","sources":["../../../src/tools/get-audit-status.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAC9D,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAG1E,MAAM,CAAC,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;IACzC,SAAS,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,gFAAgF,CAAC;CACvI,CAAC,CAAC;AAEH,2CAA2C;AAC3C,SAAS,wBAAwB,CAAC,GAAa,EAAE,YAAwD;IACrG,IAAI,EAAE,GAAG,+BAA+B,GAAG,CAAC,QAAQ,IAAI,CAAC;IACzD,EAAE,IAAI,0CAA0C,CAAC;IAEjD,kBAAkB;IAClB,EAAE,IAAI,6BAA6B,CAAC;IACpC,EAAE,IAAI,0HAA0H,CAAC;IACjI,KAAK,MAAM,IAAI,IAAI,YAAY,EAAE,CAAC;QAC9B,EAAE,IAAI,UAAU,IAAI,CAAC,SAAS,cAAc,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC;IAC7E,CAAC;IACD,EAAE,IAAI,IAAI,CAAC;IAEX,eAAe;IACf,EAAE,IAAI,2BAA2B,CAAC;IAClC,EAAE,IAAI,iBAAiB,CAAC,GAAG,CAAC,CAAC;IAE7B,OAAO,EAAE,CAAC;AACd,CAAC;AAED,yCAAyC;AACzC,SAAS,sBAAsB,CAAC,GAAa,EAAE,GAAW,EAAE,MAAe,EAAE,WAAkB;IAC3F,IAAI,EAAE,GAAG,4BAA4B,GAAG,CAAC,QAAQ,IAAI,CAAC;IACtD,EAAE,IAAI,uCAAuC,CAAC;IAE9C,EAAE,IAAI,wBAAwB,CAAC;IAC/B,EAAE,IAAI,wBAAwB,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC;IACtD,EAAE,IAAI,iBAAiB,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,IAAI,CAAC;IAExD,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACzB,EAAE,IAAI,uBAAuB,CAAC;QAC9B,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;YAC1B,EAAE,IAAI,YAAY,CAAC,CAAC,SAAS,KAAK,CAAC,CAAC,GAAG,YAAY,CAAC,CAAC,KAAK,MAAM,CAAC,CAAC,MAAM,IAAI,CAAC;QACjF,CAAC;IACL,CAAC;SAAM,CAAC;QACJ,EAAE,IAAI,4BAA4B,CAAC;IACvC,CAAC;IACD,EAAE,IAAI,IAAI,CAAC;IAEX,EAAE,IAAI,wBAAwB,CAAC;IAC/B,EAAE,IAAI,iBAAiB,CAAC,GAAG,CAAC,CAAC;IAE7B,OAAO,EAAE,CAAC;AACd,CAAC;AAED,SAAS,iBAAiB,CAAC,GAAa;IACpC,IAAI,EAAE,GAAG,EAAE,CAAC;IACZ,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC;IACjF,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACvB,EAAE,IAAI,YAAY,IAAI,CAAC,SAAS,KAAK,IAAI,CAAC,WAAW,IAAI,CAAC;QAC1D,EAAE,IAAI,iBAAiB,IAAI,CAAC,UAAU,IAAI,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,OAAO,IAAI,CAAC,WAAW,CAAC,CAAC,QAAQ,IAAI,CAAC,WAAW,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC;QAC7H,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACvB,EAAE,IAAI,2BAA2B,IAAI,CAAC,eAAe,IAAI,CAAC;QAC9D,CAAC;QAED,KAAK,MAAM,GAAG,IAAI,aAAa,EAAE,CAAC;YAC9B,MAAM,EAAE,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAChE,IAAI,EAAE,EAAE,CAAC;gBACL,MAAM,GAAG,GAAG,EAAE,CAAC,KAAK,IAAI,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC;gBAC5D,EAAE,IAAI,OAAO,GAAG,aAAa,EAAE,CAAC,KAAK,IAAI,GAAG,MAAM,EAAE,CAAC,MAAM,IAAI,CAAC;YACpE,CAAC;iBAAM,CAAC;gBACJ,EAAE,IAAI,OAAO,GAAG,6BAA6B,CAAC;YAClD,CAAC;QACL,CAAC;QACD,EAAE,IAAI,IAAI,CAAC;IACf,CAAC;IACD,OAAO,EAAE,CAAC;AACd,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,KAA2C;IAC5E,IAAI,IAAI,GAAe,EAAE,CAAC;IAC1B,IAAI,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChD,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;YACjC,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;YAC1B,IAAI,GAAG;gBAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC5B,CAAC;IACL,CAAC;SAAM,CAAC;QACJ,IAAI,GAAG,WAAW,EAAE,CAAC;IACzB,CAAC;IAED,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACpB,OAAO;YACH,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,gFAAgF,EAAE,CAAC;SAC/H,CAAC;IACN,CAAC;IAED,IAAI,cAAc,GAAG,EAAE,CAAC;IAExB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACrB,IAAI,WAAW,GAAG,IAAI,CAAC;QACvB,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,WAAW,GAAU,EAAE,CAAC;QAC5B,IAAI,YAAY,GAA+C,EAAE,CAAC;QAElE,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YAC1C,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC;YACtD,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;YAElE,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrB,WAAW,GAAG,KAAK,CAAC;gBACpB,YAAY,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,CAAC,CAAC;YAC9D,CAAC;YAED,KAAK,MAAM,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,EAAE,CAAC;gBAC7D,UAAU,IAAI,EAAE,CAAC,KAAK,CAAC;gBACvB,UAAU,IAAI,CAAC,CAAC;gBAChB,IAAI,EAAE,CAAC,KAAK,GAAG,aAAa,EAAE,CAAC;oBAC3B,WAAW,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,CAAC,CAAC;gBAC7F,CAAC;YACL,CAAC;QACL,CAAC;QAED,IAAI,CAAC,WAAW,EAAE,CAAC;YACf,cAAc,IAAI,wBAAwB,CAAC,GAAG,EAAE,YAAY,CAAC,GAAG,SAAS,CAAC;QAC9E,CAAC;aAAM,CAAC;YACJ,MAAM,GAAG,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;YACzD,MAAM,MAAM,GAAG,WAAW,CAAC,MAAM,KAAK,CAAC,CAAC;YACxC,cAAc,IAAI,sBAAsB,CAAC,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,WAAW,CAAC,GAAG,SAAS,CAAC;QACxF,CAAC;IACL,CAAC;IAED,OAAO;QACH,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,cAAc,CAAC,IAAI,EAAE,EAAE,CAAC;KACpE,CAAC;AACN,CAAC"}
1
+ {"version":3,"file":"get-audit-status.js","sourceRoot":"","sources":["../../../src/tools/get-audit-status.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAC9D,OAAO,EAAE,sBAAsB,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAGnF,MAAM,CAAC,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;IACzC,SAAS,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,gFAAgF,CAAC;CACvI,CAAC,CAAC;AAEH,2CAA2C;AAC3C,SAAS,wBAAwB,CAAC,GAAa,EAAE,YAAwD;IACrG,IAAI,EAAE,GAAG,+BAA+B,GAAG,CAAC,QAAQ,IAAI,CAAC;IACzD,EAAE,IAAI,0CAA0C,CAAC;IAEjD,kBAAkB;IAClB,EAAE,IAAI,6BAA6B,CAAC;IACpC,EAAE,IAAI,0HAA0H,CAAC;IACjI,KAAK,MAAM,IAAI,IAAI,YAAY,EAAE,CAAC;QAC9B,EAAE,IAAI,UAAU,IAAI,CAAC,SAAS,cAAc,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC;IAC7E,CAAC;IACD,EAAE,IAAI,IAAI,CAAC;IAEX,eAAe;IACf,EAAE,IAAI,2BAA2B,CAAC;IAClC,EAAE,IAAI,iBAAiB,CAAC,GAAG,CAAC,CAAC;IAE7B,OAAO,EAAE,CAAC;AACd,CAAC;AAED,yCAAyC;AACzC,SAAS,sBAAsB,CAAC,GAAa,EAAE,GAAW,EAAE,MAAe,EAAE,WAAkB;IAC3F,IAAI,EAAE,GAAG,4BAA4B,GAAG,CAAC,QAAQ,IAAI,CAAC;IACtD,EAAE,IAAI,uCAAuC,CAAC;IAE9C,EAAE,IAAI,wBAAwB,CAAC;IAC/B,EAAE,IAAI,wBAAwB,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC;IACtD,EAAE,IAAI,iBAAiB,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,IAAI,CAAC;IAExD,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACzB,EAAE,IAAI,uBAAuB,CAAC;QAC9B,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;YAC1B,EAAE,IAAI,YAAY,CAAC,CAAC,SAAS,KAAK,CAAC,CAAC,GAAG,YAAY,CAAC,CAAC,KAAK,MAAM,CAAC,CAAC,MAAM,IAAI,CAAC;QACjF,CAAC;IACL,CAAC;SAAM,CAAC;QACJ,EAAE,IAAI,4BAA4B,CAAC;IACvC,CAAC;IACD,EAAE,IAAI,IAAI,CAAC;IAEX,EAAE,IAAI,wBAAwB,CAAC;IAC/B,EAAE,IAAI,iBAAiB,CAAC,GAAG,CAAC,CAAC;IAE7B,OAAO,EAAE,CAAC;AACd,CAAC;AAED,SAAS,iBAAiB,CAAC,GAAa;IACpC,IAAI,EAAE,GAAG,EAAE,CAAC;IACZ,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC;IACjF,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACvB,EAAE,IAAI,YAAY,IAAI,CAAC,SAAS,KAAK,IAAI,CAAC,WAAW,IAAI,CAAC;QAC1D,EAAE,IAAI,iBAAiB,IAAI,CAAC,UAAU,IAAI,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,OAAO,IAAI,CAAC,WAAW,CAAC,CAAC,QAAQ,IAAI,CAAC,WAAW,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC;QAC7H,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACvB,EAAE,IAAI,2BAA2B,IAAI,CAAC,eAAe,IAAI,CAAC;QAC9D,CAAC;QAED,MAAM,YAAY,GAAG,sBAAsB,CAAC,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QACpE,KAAK,MAAM,GAAG,IAAI,YAAY,EAAE,CAAC;YAC7B,MAAM,EAAE,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAChE,IAAI,EAAE,EAAE,CAAC;gBACL,MAAM,GAAG,GAAG,EAAE,CAAC,KAAK,IAAI,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC;gBAC5D,EAAE,IAAI,OAAO,GAAG,aAAa,EAAE,CAAC,KAAK,IAAI,GAAG,MAAM,EAAE,CAAC,MAAM,IAAI,CAAC;YACpE,CAAC;iBAAM,CAAC;gBACJ,EAAE,IAAI,OAAO,GAAG,6BAA6B,CAAC;YAClD,CAAC;QACL,CAAC;QACD,EAAE,IAAI,IAAI,CAAC;IACf,CAAC;IACD,OAAO,EAAE,CAAC;AACd,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,KAA2C;IAC5E,IAAI,IAAI,GAAe,EAAE,CAAC;IAC1B,IAAI,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChD,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;YACjC,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;YAC1B,IAAI,GAAG;gBAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC5B,CAAC;IACL,CAAC;SAAM,CAAC;QACJ,IAAI,GAAG,WAAW,EAAE,CAAC;IACzB,CAAC;IAED,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACpB,OAAO;YACH,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,gFAAgF,EAAE,CAAC;SAC/H,CAAC;IACN,CAAC;IAED,IAAI,cAAc,GAAG,EAAE,CAAC;IAExB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACrB,IAAI,WAAW,GAAG,IAAI,CAAC;QACvB,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,WAAW,GAAU,EAAE,CAAC;QAC5B,IAAI,YAAY,GAA+C,EAAE,CAAC;QAElE,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YAC1C,MAAM,YAAY,GAAG,sBAAsB,CAAC,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;YACpE,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC;YACtD,MAAM,OAAO,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;YAEjE,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrB,WAAW,GAAG,KAAK,CAAC;gBACpB,YAAY,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,CAAC,CAAC;YAC9D,CAAC;YAED,KAAK,MAAM,GAAG,IAAI,YAAY,EAAE,CAAC;gBAC7B,MAAM,EAAE,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,GAAG,CAAC,CAAC;gBACnC,IAAI,EAAE,EAAE,CAAC;oBACL,UAAU,IAAI,EAAE,CAAC,KAAK,CAAC;oBACvB,UAAU,IAAI,CAAC,CAAC;oBAChB,IAAI,EAAE,CAAC,KAAK,GAAG,aAAa,EAAE,CAAC;wBAC3B,WAAW,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,CAAC,CAAC;oBAC7F,CAAC;gBACL,CAAC;YACL,CAAC;QACL,CAAC;QAED,IAAI,CAAC,WAAW,EAAE,CAAC;YACf,cAAc,IAAI,wBAAwB,CAAC,GAAG,EAAE,YAAY,CAAC,GAAG,SAAS,CAAC;QAC9E,CAAC;aAAM,CAAC;YACJ,MAAM,GAAG,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;YACzD,MAAM,MAAM,GAAG,WAAW,CAAC,MAAM,KAAK,CAAC,CAAC;YACxC,cAAc,IAAI,sBAAsB,CAAC,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,WAAW,CAAC,GAAG,SAAS,CAAC;QACxF,CAAC;IACL,CAAC;IAED,OAAO;QACH,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,cAAc,CAAC,IAAI,EAAE,EAAE,CAAC;KACpE,CAAC;AACN,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wa008/ui-audit-mcp",
3
- "version": "2.3.2",
3
+ "version": "2.4.1",
4
4
  "description": "MCP server for iOS app UI evaluation and testing, powered by idb + xcrun simctl",
5
5
  "type": "module",
6
6
  "main": "dist/src/index.js",
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=agent-demo.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"agent-demo.d.ts","sourceRoot":"","sources":["../../examples/agent-demo.ts"],"names":[],"mappings":""}
@@ -1,79 +0,0 @@
1
- import { Client } from "@modelcontextprotocol/sdk/client/index.js";
2
- import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
3
- import * as path from "path";
4
- // Connect to the local MCP server
5
- async function run() {
6
- console.log("Starting MCP Client...");
7
- const transport = new StdioClientTransport({
8
- command: "node",
9
- args: [path.join(process.cwd(), "dist/src/index.js")],
10
- });
11
- const client = new Client({
12
- name: "test-agent",
13
- version: "1.0.0",
14
- }, {
15
- capabilities: {},
16
- });
17
- try {
18
- await client.connect(transport);
19
- console.log("Connected to MCP server.");
20
- // 1. Launch App
21
- console.log("Launching Preferences app...");
22
- await client.callTool({
23
- name: "launch_app",
24
- arguments: { appId: "com.apple.Preferences" },
25
- });
26
- // 2. Take Screenshot
27
- console.log("Taking screenshot...");
28
- const screenshotResult = await client.callTool({
29
- name: "take_screenshot",
30
- arguments: {},
31
- });
32
- if (screenshotResult.imageBase64) {
33
- console.log(`Screenshot captured (${screenshotResult.width}x${screenshotResult.height})`);
34
- }
35
- // 3. Get Checklist (Screen)
36
- console.log("Getting checklist for 'SettingsScreen'...");
37
- const checklistResult = await client.callTool({
38
- name: "get_checklist",
39
- arguments: { type: "screen", screenName: "SettingsScreen" },
40
- });
41
- const checklistData = JSON.parse(checklistResult.content[0].text);
42
- const sessionId = checklistData.sessionId;
43
- const items = checklistData.checklist;
44
- console.log(`Session ID: ${sessionId}`);
45
- console.log(`Checklist items: ${items.map((i) => i.id).join(", ")}`);
46
- // 4. Submit Evaluation (Simulate passing)
47
- console.log("Submitting passing evaluation...");
48
- const scores = items.map((item) => ({
49
- id: item.id,
50
- score: 5,
51
- }));
52
- const evalResult = await client.callTool({
53
- name: "submit_evaluation",
54
- arguments: {
55
- sessionId: sessionId,
56
- scores: scores,
57
- },
58
- });
59
- console.log("Evaluation result:", JSON.parse(evalResult.content[0].text));
60
- // 5. Get Logs
61
- console.log("Fetching logs...");
62
- const logResult = await client.callTool({
63
- name: "get_evaluation_log",
64
- arguments: { limit: 1 },
65
- });
66
- console.log("Logs retrieved.");
67
- }
68
- catch (error) {
69
- console.error("Error running test agent:", error);
70
- }
71
- finally {
72
- // transport.close() is not exposed directly on StdioClientTransport in some versions,
73
- // but client.close() should handle it?
74
- // Actually, just exit process.
75
- process.exit(0);
76
- }
77
- }
78
- run();
79
- //# sourceMappingURL=agent-demo.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"agent-demo.js","sourceRoot":"","sources":["../../examples/agent-demo.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,EAAE,MAAM,2CAA2C,CAAC;AACnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AAEjF,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAG7B,kCAAkC;AAClC,KAAK,UAAU,GAAG;IACd,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;IAEtC,MAAM,SAAS,GAAG,IAAI,oBAAoB,CAAC;QACvC,OAAO,EAAE,MAAM;QACf,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,mBAAmB,CAAC,CAAC;KACxD,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,IAAI,MAAM,CACrB;QACI,IAAI,EAAE,YAAY;QAClB,OAAO,EAAE,OAAO;KACnB,EACD;QACI,YAAY,EAAE,EAAE;KACnB,CACJ,CAAC;IAEF,IAAI,CAAC;QACD,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;QAChC,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;QAExC,gBAAgB;QAChB,OAAO,CAAC,GAAG,CAAC,8BAA8B,CAAC,CAAC;QAC5C,MAAM,MAAM,CAAC,QAAQ,CAAC;YAClB,IAAI,EAAE,YAAY;YAClB,SAAS,EAAE,EAAE,KAAK,EAAE,uBAAuB,EAAE;SAChD,CAAC,CAAC;QAEH,qBAAqB;QACrB,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,CAAC;QACpC,MAAM,gBAAgB,GAAQ,MAAM,MAAM,CAAC,QAAQ,CAAC;YAChD,IAAI,EAAE,iBAAiB;YACvB,SAAS,EAAE,EAAE;SAChB,CAAC,CAAC;QAEH,IAAI,gBAAgB,CAAC,WAAW,EAAE,CAAC;YAC/B,OAAO,CAAC,GAAG,CAAC,wBAAwB,gBAAgB,CAAC,KAAK,IAAI,gBAAgB,CAAC,MAAM,GAAG,CAAC,CAAC;QAC9F,CAAC;QAED,4BAA4B;QAC5B,OAAO,CAAC,GAAG,CAAC,2CAA2C,CAAC,CAAC;QACzD,MAAM,eAAe,GAAQ,MAAM,MAAM,CAAC,QAAQ,CAAC;YAC/C,IAAI,EAAE,eAAe;YACrB,SAAS,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,UAAU,EAAE,gBAAgB,EAAE;SAC9D,CAAC,CAAC;QAEH,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAClE,MAAM,SAAS,GAAG,aAAa,CAAC,SAAS,CAAC;QAC1C,MAAM,KAAK,GAAG,aAAa,CAAC,SAAS,CAAC;QAEtC,OAAO,CAAC,GAAG,CAAC,eAAe,SAAS,EAAE,CAAC,CAAC;QACxC,OAAO,CAAC,GAAG,CAAC,oBAAoB,KAAK,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAE1E,0CAA0C;QAC1C,OAAO,CAAC,GAAG,CAAC,kCAAkC,CAAC,CAAC;QAChD,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAS,EAAE,EAAE,CAAC,CAAC;YACrC,EAAE,EAAE,IAAI,CAAC,EAAE;YACX,KAAK,EAAE,CAAC;SACX,CAAC,CAAC,CAAC;QAEJ,MAAM,UAAU,GAAQ,MAAM,MAAM,CAAC,QAAQ,CAAC;YAC1C,IAAI,EAAE,mBAAmB;YACzB,SAAS,EAAE;gBACP,SAAS,EAAE,SAAS;gBACpB,MAAM,EAAE,MAAM;aACjB;SACJ,CAAC,CAAC;QAEH,OAAO,CAAC,GAAG,CAAC,oBAAoB,EAAE,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QAE1E,cAAc;QACd,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC;QAChC,MAAM,SAAS,GAAQ,MAAM,MAAM,CAAC,QAAQ,CAAC;YACzC,IAAI,EAAE,oBAAoB;YAC1B,SAAS,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE;SAC1B,CAAC,CAAC;QAEH,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAC;IAEnC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CAAC,2BAA2B,EAAE,KAAK,CAAC,CAAC;IACtD,CAAC;YAAS,CAAC;QACP,sFAAsF;QACtF,uCAAuC;QACvC,+BAA+B;QAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;AACL,CAAC;AAED,GAAG,EAAE,CAAC"}