@athenaflow/plugin-agent-web-interface 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/.claude-plugin/plugin.json +19 -0
  2. package/.codex-plugin/plugin.json +16 -0
  3. package/.mcp.json +8 -0
  4. package/dist/1.0.4/.agents/plugins/marketplace.json +14 -0
  5. package/dist/1.0.4/claude/plugin/.claude-plugin/plugin.json +19 -0
  6. package/dist/1.0.4/claude/plugin/.mcp.json +8 -0
  7. package/dist/1.0.4/claude/plugin/package.json +9 -0
  8. package/dist/1.0.4/claude/plugin/skills/agent-web-interface-guide/SKILL.md +302 -0
  9. package/dist/1.0.4/claude/plugin/skills/agent-web-interface-guide/agents/claude.yaml +3 -0
  10. package/dist/1.0.4/claude/plugin/skills/agent-web-interface-guide/agents/openai.yaml +10 -0
  11. package/dist/1.0.4/codex/plugin/.codex-plugin/plugin.json +16 -0
  12. package/dist/1.0.4/codex/plugin/.mcp.json +8 -0
  13. package/dist/1.0.4/codex/plugin/package.json +9 -0
  14. package/dist/1.0.4/codex/plugin/skills/agent-web-interface-guide/SKILL.md +302 -0
  15. package/dist/1.0.4/codex/plugin/skills/agent-web-interface-guide/agents/claude.yaml +3 -0
  16. package/dist/1.0.4/codex/plugin/skills/agent-web-interface-guide/agents/openai.yaml +10 -0
  17. package/dist/1.0.4/release.json +18 -0
  18. package/dist/1.0.6/.agents/plugins/marketplace.json +14 -0
  19. package/dist/1.0.6/claude/plugin/.claude-plugin/plugin.json +19 -0
  20. package/dist/1.0.6/claude/plugin/.mcp.json +8 -0
  21. package/dist/1.0.6/claude/plugin/package.json +9 -0
  22. package/dist/1.0.6/claude/plugin/skills/agent-web-interface-guide/SKILL.md +303 -0
  23. package/dist/1.0.6/claude/plugin/skills/agent-web-interface-guide/agents/claude.yaml +3 -0
  24. package/dist/1.0.6/claude/plugin/skills/agent-web-interface-guide/agents/openai.yaml +10 -0
  25. package/dist/1.0.6/codex/plugin/.codex-plugin/plugin.json +16 -0
  26. package/dist/1.0.6/codex/plugin/.mcp.json +8 -0
  27. package/dist/1.0.6/codex/plugin/package.json +9 -0
  28. package/dist/1.0.6/codex/plugin/skills/agent-web-interface-guide/SKILL.md +303 -0
  29. package/dist/1.0.6/codex/plugin/skills/agent-web-interface-guide/agents/claude.yaml +3 -0
  30. package/dist/1.0.6/codex/plugin/skills/agent-web-interface-guide/agents/openai.yaml +10 -0
  31. package/dist/1.0.6/release.json +18 -0
  32. package/package.json +13 -0
  33. package/skills/agent-web-interface-guide/SKILL.md +303 -0
  34. package/skills/agent-web-interface-guide/agents/claude.yaml +3 -0
  35. package/skills/agent-web-interface-guide/agents/openai.yaml +10 -0
@@ -0,0 +1,18 @@
1
+ {
2
+ "schemaVersion": 1,
3
+ "pluginRef": "agent-web-interface@athena-workflow-marketplace",
4
+ "pluginName": "agent-web-interface",
5
+ "marketplaceName": "athena-workflow-marketplace",
6
+ "version": "1.0.6",
7
+ "artifacts": {
8
+ "claude": {
9
+ "type": "directory",
10
+ "path": "./claude/plugin"
11
+ },
12
+ "codex": {
13
+ "type": "marketplace",
14
+ "marketplacePath": "./.agents/plugins/marketplace.json",
15
+ "pluginPath": "./codex/plugin"
16
+ }
17
+ }
18
+ }
package/package.json ADDED
@@ -0,0 +1,13 @@
1
+ {
2
+ "name": "@athenaflow/plugin-agent-web-interface",
3
+ "version": "1.0.6",
4
+ "description": "Open live web pages, click through real flows, fill forms, add items to cart, and inspect page state or selectors",
5
+ "license": "MIT",
6
+ "publishConfig": {
7
+ "access": "public"
8
+ },
9
+ "scripts": {
10
+ "build:artifacts": "node ../../scripts/build-plugin-artifacts.mjs .",
11
+ "prepack": "npm run build:artifacts"
12
+ }
13
+ }
@@ -0,0 +1,303 @@
1
+ ---
2
+ name: agent-web-interface-guide
3
+ description: >
4
+ Use this skill to act on live web pages in a browser. It can open a page, click through flows,
5
+ type into fields, submit forms, add products to cart, review page state, and capture Playwright
6
+ selectors for important elements. Use it whenever the task includes a URL or page reference and
7
+ you need to check, verify, inspect, extract selectors from, or actively interact with that page.
8
+ allowed-tools: mcp__plugin_agent-web-interface_browser__navigate mcp__plugin_agent-web-interface_browser__snapshot mcp__plugin_agent-web-interface_browser__find mcp__plugin_agent-web-interface_browser__get_element mcp__plugin_agent-web-interface_browser__scroll_to mcp__plugin_agent-web-interface_browser__scroll mcp__plugin_agent-web-interface_browser__click mcp__plugin_agent-web-interface_browser__type mcp__plugin_agent-web-interface_browser__press mcp__plugin_agent-web-interface_browser__select mcp__plugin_agent-web-interface_browser__hover mcp__plugin_agent-web-interface_browser__get_form mcp__plugin_agent-web-interface_browser__get_field mcp__plugin_agent-web-interface_browser__list_pages mcp__plugin_agent-web-interface_browser__drag
9
+ ---
10
+
11
+ # Agent Web Interface Guide
12
+
13
+ Use this skill to open live web pages, carry out actions, move through multi-step flows, validate page state, and capture selectors for automation.
14
+
15
+ Common uses:
16
+ - Review a live page or multi-step flow
17
+ - Click through navigation, buttons, dialogs, and other actions
18
+ - Fill, submit, or inspect forms and validation states
19
+ - Add products to cart or complete other in-page actions
20
+ - Capture reliable Playwright selectors for key elements
21
+
22
+ ## Input
23
+
24
+ Parse the target URL and exploration goal from: $ARGUMENTS
25
+
26
+ ## Workflow
27
+
28
+ 1. **Navigate or recover the right page** — use `list_pages` and explicit `page_id` when session state may be ambiguous
29
+ 2. **Orient first** — read the current state, active region, and visible controls before acting
30
+ 3. **Choose the lightest useful tool**
31
+ - Use page state or `snapshot` output for quick orientation
32
+ - Use `find` with `label`, `kind`, and `region` to narrow targets
33
+ - Use `get_form` when the task is clearly form-driven
34
+ - Use `get_element` for a chosen target, offsets, or selector extraction
35
+ 4. **Act one step at a time** — click, type, select, scroll, or drag only as needed to advance the task
36
+ 5. **Reacquire state after meaningful changes** — after navigation, overlays, search expansion, dialog opening, or large DOM updates, refresh your understanding before reusing old `eid`s
37
+ 6. **Inspect forms or extract selectors only when relevant** — do this when the user asks for them or when they materially help complete the task
38
+ 7. **Report** what you did, what happened, and any selectors or form details that matter
39
+
40
+ ## Output Format
41
+
42
+ Always include:
43
+ 1. **What you accomplished** — the result, finding, or outcome
44
+ 2. **Steps taken** — pages visited, buttons clicked, forms filled
45
+ 3. **Observations** — notable page states, messages, and behaviors
46
+ 4. **Selectors** (when relevant) — Playwright-compatible selectors for key elements
47
+ 5. **Form details** (when relevant) — only include when they helped drive the task
48
+
49
+ ## Operating Heuristics
50
+
51
+ - Prefer `find` over manual scanning when snapshots are trimmed or the page is dense
52
+ - Filter `find` aggressively with `kind`, `label`, and `region` before broad exploration
53
+ - Expect search UIs to appear as buttons or comboboxes before they expose a text field
54
+ - Expect overlays, drawers, and dialogs to mutate the page in place without changing the URL
55
+ - Treat `eid`s as short-lived after large mutations; reacquire targets instead of assuming old ids still work
56
+ - Trust `get_form` as a helper, not as ground truth; busy pages may contain multiple unrelated forms
57
+ - Use `observations`, `baseline`, and `diff` to confirm whether an action actually changed the page
58
+ - Prefer sequential progress on gated flows; if a control is disabled, look for the prerequisite choice above it
59
+
60
+ ## State Snapshot Structure
61
+
62
+ Every navigation or action returns a `<state>` snapshot:
63
+
64
+ ```xml
65
+ <state step="N" title="Page Title" url="https://...">
66
+ <meta view="1521x752" scroll="0,0" layer="main" />
67
+ <baseline reason="first|navigation" />
68
+ <diff type="mutation" added="N" removed="N" />
69
+ <observations>...</observations>
70
+ <region name="main">...</region>
71
+ </state>
72
+ ```
73
+
74
+ ### Key Elements
75
+
76
+ | Element | Purpose |
77
+ |---------|---------|
78
+ | `<meta>` | Viewport size, scroll position, active layer |
79
+ | `<baseline reason="...">` | Fresh snapshot - `"first"` (initial load) or `"navigation"` (URL change) |
80
+ | `<diff type="mutation">` | Incremental update with `added`/`removed` counts |
81
+ | `<observations>` | What appeared/disappeared after the action |
82
+ | `<region>` | Semantic page areas with interactive elements |
83
+
84
+ ## Observations
85
+
86
+ After actions (click, type, select), watch for changes:
87
+
88
+ ```xml
89
+ <observations>
90
+ <appeared when="action">Your Bag is empty</appeared>
91
+ <appeared when="action" role="status"></appeared>
92
+ <disappeared when="action" role="status"></disappeared>
93
+ </observations>
94
+ ```
95
+
96
+ - `<appeared>`: New content visible after action
97
+ - `<disappeared>`: Content removed after action
98
+ - `role` attribute: Semantic role (status, alert, dialog)
99
+
100
+ ## Regions
101
+
102
+ Page content is organized into semantic regions:
103
+
104
+ ```xml
105
+ <region name="main">
106
+ <link id="..." href="...">Link text</link>
107
+ <btn id="...">Button text</btn>
108
+ <!-- trimmed 50 items. Use find with region=main to see all -->
109
+ </region>
110
+ <region name="nav" unchanged="true" count="90" />
111
+ ```
112
+
113
+ ### Region Types
114
+ - `main` - Primary content area
115
+ - `nav` - Navigation menus
116
+ - `header` - Page header
117
+ - `footer` - Page footer
118
+ - `form` - Form containers
119
+ - `aside` - Sidebars
120
+ - `search` - Search areas
121
+
122
+ ### Optimization Hints
123
+ - `unchanged="true" count="N"` - Region didn't change, shows element count
124
+ - `<!-- trimmed N items -->` - Use `find` with `region` filter to see all
125
+
126
+ ## Element Types in Snapshots
127
+
128
+ | Tag | Element | Key Attributes |
129
+ |-----|---------|----------------|
130
+ | `<link>` | Hyperlink | `id`, `href` |
131
+ | `<btn>` | Button | `id`, `val`, `enabled` |
132
+ | `<rad>` | Radio button | `id`, `val`, `checked`, `focused` |
133
+ | `<sel>` | Dropdown/select | `id`, `expanded`, `focused` |
134
+ | `<elt>` | Input/generic | `id`, `type`, `val`, `focused`, `enabled`, `selected` |
135
+
136
+ ### Common Attributes
137
+
138
+ | Attribute | Meaning |
139
+ |-----------|---------|
140
+ | `id` | Element ID (eid) - use this to target the element |
141
+ | `enabled="false"` | Element is disabled (common in sequential forms) |
142
+ | `checked="true"` | Radio/checkbox is selected |
143
+ | `focused="true"` | Element has keyboard focus |
144
+ | `expanded="true"` | Dropdown is open |
145
+ | `selected="true"` | Option/tab is selected |
146
+ | `val` | Element value |
147
+
148
+ ## Progressive Enablement Pattern
149
+
150
+ Many sites use progressive enablement: later options stay disabled until earlier choices are made.
151
+
152
+ ```xml
153
+ <!-- Step 1: Model selection enabled -->
154
+ <rad id="model1" val="pro">iPhone 17 Pro</rad>
155
+ <rad id="color1" enabled="false" val="silver">Silver</rad> <!-- disabled -->
156
+
157
+ <!-- After selecting model, colors become enabled -->
158
+ <rad id="model1" checked="true" val="pro">iPhone 17 Pro</rad>
159
+ <rad id="color1" val="silver">Silver</rad> <!-- now enabled -->
160
+ ```
161
+
162
+ Common places this appears:
163
+ - Ecommerce product configuration
164
+ - Checkout and payment flows
165
+ - Onboarding wizards
166
+ - Settings pages with dependent options
167
+
168
+ **Strategy**: If you see `enabled="false"`, work upward to identify and complete the prerequisite step before continuing.
169
+
170
+ ## find Response
171
+
172
+ ```xml
173
+ <result type="find" page_id="..." snapshot_id="..." count="N">
174
+ <match eid="abc123"
175
+ kind="button|link|radio|checkbox|textbox|combobox|heading|image"
176
+ label="Button text"
177
+ region="main|nav|header|footer"
178
+ selector="role=button[name=&quot;...&quot;]"
179
+ visible="true"
180
+ enabled="true"
181
+ href="..." />
182
+ </result>
183
+ ```
184
+
185
+ ### Filter Parameters
186
+ - `kind`: Element type filter
187
+ - `label`: Case-insensitive substring match
188
+ - `region`: Restrict to semantic area
189
+ - `limit`: Max results (default 10)
190
+ - `include_readable`: Include text content (default true)
191
+
192
+ ## get_element Response
193
+
194
+ ```xml
195
+ <node eid="abc123" kind="link" region="main" group="tbody-28"
196
+ x="147.875" y="11.5" w="97.97" h="16.5"
197
+ display="inline" zone="top-left">
198
+ Element label text
199
+ <selector primary='role=link[name="..."]' />
200
+ <attrs href="..." />
201
+ </node>
202
+ ```
203
+
204
+ - `primary`: Best Playwright selector
205
+ - Position info: `x`, `y`, `w`, `h`, `zone`
206
+ - `group`: Logical grouping (for tables, lists)
207
+
208
+ ## get_form Response
209
+
210
+ ```xml
211
+ <forms page="page-id">
212
+ <form id="form-xxx" intent="search|login|signup|checkout" completion="100%">
213
+ <input eid="748" purpose="search">Search Wikipedia</input>
214
+ <combobox eid="750" purpose="selection" filled="true">EN</combobox>
215
+ <button eid="820" type="submit" primary="true">Search</button>
216
+ <next eid="748" reason="Optional field" />
217
+ </form>
218
+ </forms>
219
+ ```
220
+
221
+ - `intent`: Form purpose (search, login, checkout, etc.)
222
+ - `completion`: Percentage filled
223
+ - `next`: Suggested next field to fill with reason
224
+
225
+ ## list_pages Response
226
+
227
+ ```xml
228
+ <result type="list_pages" status="success">
229
+ <pages count="N">
230
+ <page page_id="page-xxx" url="https://..." title="Page Title" />
231
+ </pages>
232
+ </result>
233
+ ```
234
+
235
+ Use `page_id` to target specific browser tabs.
236
+
237
+ ## Session Recovery
238
+
239
+ The browser persists across conversation sessions — tabs from prior sessions remain open. On a new session, there is no "current" page; actions without `page_id` may target an arbitrary tab.
240
+
241
+ When encountering a "no page/session" error or resuming from a prior session:
242
+
243
+ 1. Call `list_pages` to see all open tabs with `page_id`, URL, and title
244
+ 2. Identify the target page by URL or title
245
+ 3. Pass `page_id` explicitly to all subsequent calls (`snapshot`, `find`, `click`, etc.)
246
+ 4. If the page is not found, navigate fresh — the tab may have been closed
247
+
248
+ **Caveats:**
249
+ - **Stale tab URLs**: `list_pages` shows the URL at open time. For SPAs, use `snapshot` with `page_id` to see actual current state.
250
+ - **Tab accumulation**: The browser accumulates tabs across sessions. Always use `page_id` to target the correct one.
251
+ - **Single active work tab assumptions**: Do not assume you have multiple useful tabs open. Check `list_pages` instead of relying on prior turn memory.
252
+
253
+ ## Error Responses
254
+
255
+ ```xml
256
+ <error>Field not found in any form: abc123</error>
257
+ ```
258
+
259
+ Common errors:
260
+ - Element ID not found (page may have changed)
261
+ - Element not visible/enabled
262
+ - Form field not in any form context
263
+ - No page/session (see Session Recovery above)
264
+
265
+ When this happens:
266
+ 1. Re-check the current page state
267
+ 2. Re-run `find` or `get_form` from the latest state
268
+ 3. Continue only with fresh `eid`s
269
+
270
+ ## Canvas Interactions
271
+
272
+ `<canvas>` elements render pixels, not DOM nodes — standard selectors don't work inside them. Use these tools for canvas-based UIs (drawing apps, games, visualizations):
273
+
274
+ - **`inspect_canvas`** — the key tool. Pass a canvas `eid` and it auto-detects the rendering library (Fabric.js, Konva, PixiJS, Phaser, Three.js, EaselJS, or raw canvas), queries the scene graph for objects with positions/sizes/labels, and returns an annotated screenshot with coordinate grid overlay and bounding boxes. Supports configurable `grid_spacing` (use 10px for precise handle targeting).
275
+ - **`click`** with `eid` + `x`/`y` — click at offset relative to canvas top-left (e.g., select a shape)
276
+ - **`drag`** with `eid` + source/target coordinates — drag within canvas (e.g., move objects, scale/rotate handles)
277
+ - **`screenshot`** with `eid` — capture just the canvas to visually verify state
278
+
279
+ **Workflow:** `find` → `get_element` (position) → `inspect_canvas` (discover objects) → `click`/`drag` (interact) → re-inspect to verify.
280
+
281
+ ## Best Practices
282
+
283
+ 1. **Use `find`** when snapshot shows `<!-- trimmed -->`
284
+ 2. **Track `<baseline>` vs `<diff>`** to know if you have full or partial state
285
+ 3. **Always pass `page_id`** when working across sessions or with multiple tabs
286
+ 4. **Reacquire targets after large mutations** instead of reusing stale `eid`s
287
+ 5. **Keep selector extraction optional** unless the task asks for it or automation handoff is part of the outcome
288
+
289
+ ## Example Usage
290
+
291
+ ```
292
+ Claude Code: /agent-web-interface-guide https://airbnb.com Walk through the search and booking flow for stays in Tokyo
293
+ Codex: $agent-web-interface-guide https://airbnb.com Walk through the search and booking flow for stays in Tokyo
294
+
295
+ Claude Code: /agent-web-interface-guide https://apple.com/store Configure an iPhone and add it to the bag, then summarize the steps
296
+ Codex: $agent-web-interface-guide https://apple.com/store Configure an iPhone and add it to the bag, then summarize the steps
297
+
298
+ Claude Code: /agent-web-interface-guide https://developer.mozilla.org Find the Fetch API docs and note how the search flow behaves
299
+ Codex: $agent-web-interface-guide https://developer.mozilla.org Find the Fetch API docs and note how the search flow behaves
300
+
301
+ Claude Code: /agent-web-interface-guide https://example.com/login Extract the login form selectors and field purposes
302
+ Codex: $agent-web-interface-guide https://example.com/login Extract the login form selectors and field purposes
303
+ ```
@@ -0,0 +1,3 @@
1
+ frontmatter:
2
+ argument-hint: "<url> <what to explore or do>"
3
+ user-invocable: true
@@ -0,0 +1,10 @@
1
+ interface:
2
+ display_name: "Act On Live Web Page"
3
+ short_description: "Open a live page, complete web actions efficiently, and inspect state when needed"
4
+ default_prompt: "Open this site, carry out the requested flow efficiently, and report the important observations, state changes, and any relevant selectors or form details."
5
+
6
+ dependencies:
7
+ tools:
8
+ - type: "mcp"
9
+ value: "agent-web-interface"
10
+ description: "Browser automation tools for carrying out live page actions and inspecting the resulting state"