argus-testing 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- argus_testing-0.1.0/.gitignore +13 -0
- argus_testing-0.1.0/CLAUDE.md +72 -0
- argus_testing-0.1.0/LICENSE +21 -0
- argus_testing-0.1.0/PKG-INFO +145 -0
- argus_testing-0.1.0/README.md +119 -0
- argus_testing-0.1.0/argus/__init__.py +3 -0
- argus_testing-0.1.0/argus/browser.py +286 -0
- argus_testing-0.1.0/argus/cli.py +58 -0
- argus_testing-0.1.0/argus/config.py +68 -0
- argus_testing-0.1.0/argus/detector.py +271 -0
- argus_testing-0.1.0/argus/explorer.py +281 -0
- argus_testing-0.1.0/argus/mcp_server.py +547 -0
- argus_testing-0.1.0/argus/models.py +114 -0
- argus_testing-0.1.0/argus/planner.py +215 -0
- argus_testing-0.1.0/argus/reporter.py +187 -0
- argus_testing-0.1.0/docs/ROADMAP.md +52 -0
- argus_testing-0.1.0/docs/STATUS.md +75 -0
- argus_testing-0.1.0/docs/TODO.md +30 -0
- argus_testing-0.1.0/examples/focus.yaml +39 -0
- argus_testing-0.1.0/pyproject.toml +38 -0
- argus_testing-0.1.0/test-site/app.py +696 -0
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# Argus — AI-Powered Exploratory QA Agent
|
|
2
|
+
|
|
3
|
+
## What is this
|
|
4
|
+
|
|
5
|
+
Argus is an open-source tool that tests web applications like a real user. Give it a URL, it explores your app — clicking buttons, filling forms, trying edge cases — and generates an HTML report of every bug it finds.
|
|
6
|
+
|
|
7
|
+
**The key insight:** Existing testing tools (Playwright, Cypress) only test what you script. Argus discovers bugs you didn't think to test for.
|
|
8
|
+
|
|
9
|
+
## Two usage modes
|
|
10
|
+
|
|
11
|
+
1. **CLI mode** (`argus <url>`) — Standalone with built-in LLM planner via LiteLLM. Needs an API key (OpenAI, Anthropic, DeepSeek, Gemini, Ollama, etc.).
|
|
12
|
+
2. **MCP mode** (`argus-mcp`) — Claude Code becomes the AI brain. Argus provides browser tools. No API key needed. Configured via `claude mcp add argus -- argus-mcp`.
|
|
13
|
+
|
|
14
|
+
## Project goals
|
|
15
|
+
|
|
16
|
+
- **Primary:** Get GitHub stars. This means: solve a real pain point, one-command setup, great README with GIFs/screenshots, polished UX.
|
|
17
|
+
- **Secondary:** Portfolio piece for Big Tech AI/ML Engineer interviews. Demonstrate AI system engineering (not prompt wrappers) — async workers, state management, cost control, scaling.
|
|
18
|
+
- **Tertiary:** Eventually commercializable as a SaaS.
|
|
19
|
+
|
|
20
|
+
## Architecture
|
|
21
|
+
|
|
22
|
+
```
|
|
23
|
+
argus/
|
|
24
|
+
├── cli.py # CLI entry point (click)
|
|
25
|
+
├── config.py # YAML config + focus areas
|
|
26
|
+
├── browser.py # Playwright driver + DOM extraction + page content extraction + error capture
|
|
27
|
+
├── planner.py # LLM exploration strategy (LiteLLM) — CLI mode brain
|
|
28
|
+
├── detector.py # Bug detection: console/network errors + smart detection (text anomalies,
|
|
29
|
+
│ # count consistency, CSS state, toast cross-check, state verification)
|
|
30
|
+
├── explorer.py # Main loop: observe → plan → act → detect → verify → screenshot
|
|
31
|
+
├── reporter.py # Self-contained HTML report with embedded base64 screenshots
|
|
32
|
+
├── mcp_server.py # MCP server exposing browser tools for Claude Code (12 tools incl. verify_action)
|
|
33
|
+
└── models.py # Data models (Bug, Action, PageState, Screenshot, etc.)
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Tech stack
|
|
37
|
+
|
|
38
|
+
Python 3.10+ | Playwright | LiteLLM | MCP SDK | Click | Rich | PyYAML
|
|
39
|
+
|
|
40
|
+
## Development
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
cd /Users/yichen/projects/argus
|
|
44
|
+
source .venv/bin/activate
|
|
45
|
+
pip install -e .
|
|
46
|
+
playwright install chromium
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Test site
|
|
50
|
+
|
|
51
|
+
`test-site/app.py` is BuggyTasks — a realistic task management app (Starlette) with **22 intentional bugs** across 4 difficulty tiers:
|
|
52
|
+
|
|
53
|
+
- **Tier 1 (surface):** console errors, dead links, API 500s
|
|
54
|
+
- **Tier 2 (form):** auth bypass, password mismatch, XSS, form data loss
|
|
55
|
+
- **Tier 3 (logic):** fake delete/save, off-by-one count, pagination dupes, race conditions
|
|
56
|
+
- **Tier 4 (UX):** case-sensitive search, broken dates, eternal "Loading...", misleading success toasts
|
|
57
|
+
|
|
58
|
+
The full bug catalog is documented at the top of `test-site/app.py`. Run with:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
cd test-site && python app.py # runs on http://127.0.0.1:5555
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## GitHub
|
|
65
|
+
|
|
66
|
+
Repo: https://github.com/chriswu727/argus (owner: chriswu727)
|
|
67
|
+
|
|
68
|
+
## Key files for context
|
|
69
|
+
|
|
70
|
+
- `docs/STATUS.md` — Current completion status and what's done
|
|
71
|
+
- `docs/ROADMAP.md` — Future features and priorities
|
|
72
|
+
- `docs/TODO.md` — Immediate next tasks
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Yichen Wu
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: argus-testing
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: AI-powered exploratory testing agent that discovers bugs like a real user
|
|
5
|
+
Project-URL: Homepage, https://github.com/chriswu727/argus
|
|
6
|
+
Project-URL: Repository, https://github.com/chriswu727/argus
|
|
7
|
+
Author-email: Yichen Wu <yichenwujob@gmail.com>
|
|
8
|
+
License: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: agent,ai,claude-code,exploratory-testing,mcp,playwright,qa,testing
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Topic :: Software Development :: Testing
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Requires-Dist: click>=8.0.0
|
|
18
|
+
Requires-Dist: jinja2>=3.0.0
|
|
19
|
+
Requires-Dist: litellm>=1.40.0
|
|
20
|
+
Requires-Dist: mcp[cli]>=1.0.0
|
|
21
|
+
Requires-Dist: playwright>=1.40.0
|
|
22
|
+
Requires-Dist: pydantic>=2.0.0
|
|
23
|
+
Requires-Dist: pyyaml>=6.0
|
|
24
|
+
Requires-Dist: rich>=13.0.0
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# Argus
|
|
28
|
+
|
|
29
|
+
**AI-powered exploratory QA agent.** Give it a URL, it explores your app like a real user — clicking buttons, filling forms, trying edge cases — and finds bugs that scripted tests miss.
|
|
30
|
+
|
|
31
|
+
Unlike Playwright or Cypress, you don't write test scripts. Argus **discovers bugs you didn't think to test for.**
|
|
32
|
+
|
|
33
|
+
## What It Catches
|
|
34
|
+
|
|
35
|
+
Traditional test tools only catch what you explicitly script. Argus catches:
|
|
36
|
+
|
|
37
|
+
- **Console errors & crashes** — JS exceptions, unhandled promises
|
|
38
|
+
- **Broken API calls** — HTTP 4xx/5xx responses
|
|
39
|
+
- **Fake success operations** — "Saved!" toast shown but server returned 500
|
|
40
|
+
- **Silent data loss** — delete says "Deleted!" but item still exists on refresh
|
|
41
|
+
- **Broken date formatting** — "1.52 days ago" instead of "2 days ago"
|
|
42
|
+
- **Count mismatches** — dashboard says "7 tasks" but there are actually 8
|
|
43
|
+
- **Dead links** — navigation links pointing to 404 pages
|
|
44
|
+
- **XSS vulnerabilities** — unsanitized user input reflected in HTML
|
|
45
|
+
|
|
46
|
+
## Quick Start (MCP Server for Claude Code)
|
|
47
|
+
|
|
48
|
+
The recommended way to use Argus. Claude Code becomes the AI brain — no API key needed.
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install argus-testing
|
|
52
|
+
playwright install chromium
|
|
53
|
+
claude mcp add argus -- argus-mcp
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Then in Claude Code:
|
|
57
|
+
|
|
58
|
+
> "Test my app at http://localhost:3000, focus on the checkout flow"
|
|
59
|
+
|
|
60
|
+
Claude Code will explore your app using Argus tools, try edge cases, verify that actions actually persist, and generate an HTML bug report.
|
|
61
|
+
|
|
62
|
+
### MCP Tools
|
|
63
|
+
|
|
64
|
+
| Tool | What it does |
|
|
65
|
+
|------|-------------|
|
|
66
|
+
| `start_session(url)` | Launch browser, navigate to URL |
|
|
67
|
+
| `get_page_state()` | See all interactive elements + page content + counts + toasts |
|
|
68
|
+
| `click(index)` | Click an element |
|
|
69
|
+
| `type_text(index, text)` | Type into an input field |
|
|
70
|
+
| `navigate(url)` | Go to a URL |
|
|
71
|
+
| `screenshot(name)` | Capture the current page |
|
|
72
|
+
| `get_errors()` | Get console/network errors + smart detection (broken dates, count mismatches, misleading toasts) |
|
|
73
|
+
| `verify_action(type, text, url)` | Verify a delete/edit actually persisted by navigating and checking |
|
|
74
|
+
| `end_session()` | Close browser, generate HTML report |
|
|
75
|
+
|
|
76
|
+
## Alternative: Standalone CLI
|
|
77
|
+
|
|
78
|
+
Bring your own LLM API key. Argus has a built-in AI planner that decides what to explore.
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
pip install argus-testing
|
|
82
|
+
playwright install chromium
|
|
83
|
+
|
|
84
|
+
export DEEPSEEK_API_KEY=sk-... # or OPENAI_API_KEY, ANTHROPIC_API_KEY
|
|
85
|
+
|
|
86
|
+
argus http://localhost:3000 --model deepseek/deepseek-chat -n 50
|
|
87
|
+
argus http://localhost:3000 -f "test login with edge cases" --headed
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Supports 100+ models via [LiteLLM](https://github.com/BerriAI/litellm): OpenAI, Anthropic, DeepSeek, Gemini, Ollama (free/local), etc.
|
|
91
|
+
|
|
92
|
+
## Smart Detection
|
|
93
|
+
|
|
94
|
+
Argus goes beyond console errors. It extracts page content and analyzes it:
|
|
95
|
+
|
|
96
|
+
| Detection | How it works | Example bug found |
|
|
97
|
+
|-----------|-------------|-------------------|
|
|
98
|
+
| **State verification** | After delete/edit, navigates back and checks if the change persisted | "Deleted!" shown but item reappears on refresh |
|
|
99
|
+
| **Toast + error cross-check** | Correlates success toasts with HTTP 500 responses | Settings shows "Saved!" but server crashed |
|
|
100
|
+
| **Text anomaly scanning** | Regex patterns on visible page text | "NaN", "1.52 days ago", eternal "Loading..." |
|
|
101
|
+
| **Count consistency** | Compares displayed counts against actual rendered items | Header says "7 tasks" but 8 items visible |
|
|
102
|
+
| **CSS state analysis** | Checks semantic CSS classes for contradictory states | "0 remaining" shown in red (should be success) |
|
|
103
|
+
|
|
104
|
+
## Bug Report
|
|
105
|
+
|
|
106
|
+
Each session generates a self-contained HTML report with:
|
|
107
|
+
|
|
108
|
+
- Bug cards with severity, type, description, and reproduction steps
|
|
109
|
+
- Embedded screenshots (base64 — no external files needed)
|
|
110
|
+
- Testing timeline showing every page visited
|
|
111
|
+
- Console and network error logs
|
|
112
|
+
|
|
113
|
+
## How It Works
|
|
114
|
+
|
|
115
|
+
```
|
|
116
|
+
You give a URL
|
|
117
|
+
→ Argus opens a real browser (Playwright)
|
|
118
|
+
→ AI explores: clicks, types, navigates, tries edge cases
|
|
119
|
+
→ Smart detection analyzes page content after every action
|
|
120
|
+
→ Generates HTML report with all bugs found
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Architecture
|
|
124
|
+
|
|
125
|
+
```
|
|
126
|
+
argus/
|
|
127
|
+
├── browser.py # Playwright driver + DOM extraction + page content analysis
|
|
128
|
+
├── detector.py # Smart bug detection (5 detection methods)
|
|
129
|
+
├── mcp_server.py # MCP server (12 tools) for Claude Code
|
|
130
|
+
├── explorer.py # CLI exploration loop
|
|
131
|
+
├── planner.py # LLM planner for CLI mode (LiteLLM)
|
|
132
|
+
├── reporter.py # Self-contained HTML report generator
|
|
133
|
+
├── models.py # Data models
|
|
134
|
+
├── config.py # YAML config + focus areas
|
|
135
|
+
└── cli.py # CLI entry point
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## Requirements
|
|
139
|
+
|
|
140
|
+
- Python 3.10+
|
|
141
|
+
- Chromium (auto-installed via `playwright install chromium`)
|
|
142
|
+
|
|
143
|
+
## License
|
|
144
|
+
|
|
145
|
+
MIT
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# Argus
|
|
2
|
+
|
|
3
|
+
**AI-powered exploratory QA agent.** Give it a URL, it explores your app like a real user — clicking buttons, filling forms, trying edge cases — and finds bugs that scripted tests miss.
|
|
4
|
+
|
|
5
|
+
Unlike Playwright or Cypress, you don't write test scripts. Argus **discovers bugs you didn't think to test for.**
|
|
6
|
+
|
|
7
|
+
## What It Catches
|
|
8
|
+
|
|
9
|
+
Traditional test tools only catch what you explicitly script. Argus catches:
|
|
10
|
+
|
|
11
|
+
- **Console errors & crashes** — JS exceptions, unhandled promises
|
|
12
|
+
- **Broken API calls** — HTTP 4xx/5xx responses
|
|
13
|
+
- **Fake success operations** — "Saved!" toast shown but server returned 500
|
|
14
|
+
- **Silent data loss** — delete says "Deleted!" but item still exists on refresh
|
|
15
|
+
- **Broken date formatting** — "1.52 days ago" instead of "2 days ago"
|
|
16
|
+
- **Count mismatches** — dashboard says "7 tasks" but there are actually 8
|
|
17
|
+
- **Dead links** — navigation links pointing to 404 pages
|
|
18
|
+
- **XSS vulnerabilities** — unsanitized user input reflected in HTML
|
|
19
|
+
|
|
20
|
+
## Quick Start (MCP Server for Claude Code)
|
|
21
|
+
|
|
22
|
+
The recommended way to use Argus. Claude Code becomes the AI brain — no API key needed.
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install argus-testing
|
|
26
|
+
playwright install chromium
|
|
27
|
+
claude mcp add argus -- argus-mcp
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Then in Claude Code:
|
|
31
|
+
|
|
32
|
+
> "Test my app at http://localhost:3000, focus on the checkout flow"
|
|
33
|
+
|
|
34
|
+
Claude Code will explore your app using Argus tools, try edge cases, verify that actions actually persist, and generate an HTML bug report.
|
|
35
|
+
|
|
36
|
+
### MCP Tools
|
|
37
|
+
|
|
38
|
+
| Tool | What it does |
|
|
39
|
+
|------|-------------|
|
|
40
|
+
| `start_session(url)` | Launch browser, navigate to URL |
|
|
41
|
+
| `get_page_state()` | See all interactive elements + page content + counts + toasts |
|
|
42
|
+
| `click(index)` | Click an element |
|
|
43
|
+
| `type_text(index, text)` | Type into an input field |
|
|
44
|
+
| `navigate(url)` | Go to a URL |
|
|
45
|
+
| `screenshot(name)` | Capture the current page |
|
|
46
|
+
| `get_errors()` | Get console/network errors + smart detection (broken dates, count mismatches, misleading toasts) |
|
|
47
|
+
| `verify_action(type, text, url)` | Verify a delete/edit actually persisted by navigating and checking |
|
|
48
|
+
| `end_session()` | Close browser, generate HTML report |
|
|
49
|
+
|
|
50
|
+
## Alternative: Standalone CLI
|
|
51
|
+
|
|
52
|
+
Bring your own LLM API key. Argus has a built-in AI planner that decides what to explore.
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
pip install argus-testing
|
|
56
|
+
playwright install chromium
|
|
57
|
+
|
|
58
|
+
export DEEPSEEK_API_KEY=sk-... # or OPENAI_API_KEY, ANTHROPIC_API_KEY
|
|
59
|
+
|
|
60
|
+
argus http://localhost:3000 --model deepseek/deepseek-chat -n 50
|
|
61
|
+
argus http://localhost:3000 -f "test login with edge cases" --headed
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Supports 100+ models via [LiteLLM](https://github.com/BerriAI/litellm): OpenAI, Anthropic, DeepSeek, Gemini, Ollama (free/local), etc.
|
|
65
|
+
|
|
66
|
+
## Smart Detection
|
|
67
|
+
|
|
68
|
+
Argus goes beyond console errors. It extracts page content and analyzes it:
|
|
69
|
+
|
|
70
|
+
| Detection | How it works | Example bug found |
|
|
71
|
+
|-----------|-------------|-------------------|
|
|
72
|
+
| **State verification** | After delete/edit, navigates back and checks if the change persisted | "Deleted!" shown but item reappears on refresh |
|
|
73
|
+
| **Toast + error cross-check** | Correlates success toasts with HTTP 500 responses | Settings shows "Saved!" but server crashed |
|
|
74
|
+
| **Text anomaly scanning** | Regex patterns on visible page text | "NaN", "1.52 days ago", eternal "Loading..." |
|
|
75
|
+
| **Count consistency** | Compares displayed counts against actual rendered items | Header says "7 tasks" but 8 items visible |
|
|
76
|
+
| **CSS state analysis** | Checks semantic CSS classes for contradictory states | "0 remaining" shown in red (should be success) |
|
|
77
|
+
|
|
78
|
+
## Bug Report
|
|
79
|
+
|
|
80
|
+
Each session generates a self-contained HTML report with:
|
|
81
|
+
|
|
82
|
+
- Bug cards with severity, type, description, and reproduction steps
|
|
83
|
+
- Embedded screenshots (base64 — no external files needed)
|
|
84
|
+
- Testing timeline showing every page visited
|
|
85
|
+
- Console and network error logs
|
|
86
|
+
|
|
87
|
+
## How It Works
|
|
88
|
+
|
|
89
|
+
```
|
|
90
|
+
You give a URL
|
|
91
|
+
→ Argus opens a real browser (Playwright)
|
|
92
|
+
→ AI explores: clicks, types, navigates, tries edge cases
|
|
93
|
+
→ Smart detection analyzes page content after every action
|
|
94
|
+
→ Generates HTML report with all bugs found
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Architecture
|
|
98
|
+
|
|
99
|
+
```
|
|
100
|
+
argus/
|
|
101
|
+
├── browser.py # Playwright driver + DOM extraction + page content analysis
|
|
102
|
+
├── detector.py # Smart bug detection (5 detection methods)
|
|
103
|
+
├── mcp_server.py # MCP server (12 tools) for Claude Code
|
|
104
|
+
├── explorer.py # CLI exploration loop
|
|
105
|
+
├── planner.py # LLM planner for CLI mode (LiteLLM)
|
|
106
|
+
├── reporter.py # Self-contained HTML report generator
|
|
107
|
+
├── models.py # Data models
|
|
108
|
+
├── config.py # YAML config + focus areas
|
|
109
|
+
└── cli.py # CLI entry point
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Requirements
|
|
113
|
+
|
|
114
|
+
- Python 3.10+
|
|
115
|
+
- Chromium (auto-installed via `playwright install chromium`)
|
|
116
|
+
|
|
117
|
+
## License
|
|
118
|
+
|
|
119
|
+
MIT
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from playwright.async_api import Browser, BrowserContext, Page, async_playwright
|
|
8
|
+
|
|
9
|
+
from typing import Dict, List, Optional, Tuple
|
|
10
|
+
|
|
11
|
+
from .models import InteractiveElement, PageState
|
|
12
|
+
|
|
13
|
+
# JS snippet to extract visible interactive elements from the page.
|
|
14
|
+
_EXTRACT_ELEMENTS_JS = """
|
|
15
|
+
() => {
|
|
16
|
+
const sel = 'a, button, input, select, textarea, [role="button"], [role="link"], [role="tab"], [role="menuitem"], [onclick], [tabindex]:not([tabindex="-1"])';
|
|
17
|
+
const els = document.querySelectorAll(sel);
|
|
18
|
+
return Array.from(els).map((el, i) => {
|
|
19
|
+
const rect = el.getBoundingClientRect();
|
|
20
|
+
const style = window.getComputedStyle(el);
|
|
21
|
+
if (rect.width === 0 || rect.height === 0 || style.display === 'none' || style.visibility === 'hidden') return null;
|
|
22
|
+
return {
|
|
23
|
+
index: i,
|
|
24
|
+
tag: el.tagName.toLowerCase(),
|
|
25
|
+
type: el.type || null,
|
|
26
|
+
text: (el.textContent || '').trim().slice(0, 100) || null,
|
|
27
|
+
placeholder: el.placeholder || null,
|
|
28
|
+
href: el.href || null,
|
|
29
|
+
value: el.value || null,
|
|
30
|
+
disabled: el.disabled || false,
|
|
31
|
+
role: el.getAttribute('role') || null,
|
|
32
|
+
aria_label: el.getAttribute('aria-label') || null,
|
|
33
|
+
name: el.name || null,
|
|
34
|
+
id: el.id || null,
|
|
35
|
+
};
|
|
36
|
+
}).filter(Boolean);
|
|
37
|
+
}
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
# JS snippet to extract full page content for smart detection.
|
|
41
|
+
_EXTRACT_PAGE_CONTENT_JS = """
|
|
42
|
+
() => {
|
|
43
|
+
const result = { pageText: '', toasts: [], counts: {}, cssIndicators: [], itemLists: {} };
|
|
44
|
+
|
|
45
|
+
// 1. Full visible text — simple and robust
|
|
46
|
+
try {
|
|
47
|
+
result.pageText = (document.body.innerText || '').slice(0, 5000);
|
|
48
|
+
} catch(e) {}
|
|
49
|
+
|
|
50
|
+
// 2. Toast/notification messages
|
|
51
|
+
try {
|
|
52
|
+
const sels = '.toast, [role="alert"], .alert-success, .alert-error, .alert-warning, [class*="toast"]';
|
|
53
|
+
document.querySelectorAll(sels).forEach(el => {
|
|
54
|
+
const text = el.textContent.trim();
|
|
55
|
+
if (text) {
|
|
56
|
+
const s = window.getComputedStyle(el);
|
|
57
|
+
result.toasts.push({
|
|
58
|
+
text: text.slice(0, 200),
|
|
59
|
+
visible: s.display !== 'none' && s.visibility !== 'hidden',
|
|
60
|
+
classes: el.className || ''
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
});
|
|
64
|
+
} catch(e) {}
|
|
65
|
+
|
|
66
|
+
// 3. Number + label counts
|
|
67
|
+
try {
|
|
68
|
+
document.querySelectorAll('.stat, .stat-val, .count, .badge, h1, h2, h3, p, span').forEach(el => {
|
|
69
|
+
const text = el.textContent.trim();
|
|
70
|
+
const m = text.match(/^(\\d+)\\s+(.+)$/);
|
|
71
|
+
if (m) result.counts[m[2].trim()] = parseInt(m[1], 10);
|
|
72
|
+
});
|
|
73
|
+
} catch(e) {}
|
|
74
|
+
|
|
75
|
+
// 4. Semantic CSS indicators
|
|
76
|
+
try {
|
|
77
|
+
['remaining-zero','task-done','error','loading','spinner','alert-error','alert-success'].forEach(cls => {
|
|
78
|
+
document.querySelectorAll('.' + cls).forEach(el => {
|
|
79
|
+
result.cssIndicators.push({
|
|
80
|
+
cls: cls,
|
|
81
|
+
text: el.textContent.trim().slice(0, 100),
|
|
82
|
+
tag: el.tagName.toLowerCase()
|
|
83
|
+
});
|
|
84
|
+
});
|
|
85
|
+
});
|
|
86
|
+
} catch(e) {}
|
|
87
|
+
|
|
88
|
+
// 5. Item lists
|
|
89
|
+
try {
|
|
90
|
+
document.querySelectorAll('.card, .list, ul, ol').forEach(container => {
|
|
91
|
+
const items = container.querySelectorAll('.task-item, .list-item, li, tr');
|
|
92
|
+
if (items.length >= 2) {
|
|
93
|
+
const key = (container.className || container.tagName).slice(0, 50);
|
|
94
|
+
result.itemLists[key] = Array.from(items).map(it => it.textContent.trim().slice(0, 200));
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
} catch(e) {}
|
|
98
|
+
|
|
99
|
+
return result;
|
|
100
|
+
}
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class BrowserDriver:
|
|
105
|
+
"""Wraps Playwright to drive a browser and capture errors."""
|
|
106
|
+
|
|
107
|
+
def __init__(
|
|
108
|
+
self,
|
|
109
|
+
headless: bool = True,
|
|
110
|
+
viewport_width: int = 1280,
|
|
111
|
+
viewport_height: int = 720,
|
|
112
|
+
):
|
|
113
|
+
self.headless = headless
|
|
114
|
+
self.viewport = {"width": viewport_width, "height": viewport_height}
|
|
115
|
+
self._playwright = None
|
|
116
|
+
self._browser: Optional[Browser] = None
|
|
117
|
+
self._context: Optional[BrowserContext] = None
|
|
118
|
+
self._page: Optional[Page] = None
|
|
119
|
+
self.console_errors: List[Dict] = []
|
|
120
|
+
self.network_errors: List[Dict] = []
|
|
121
|
+
|
|
122
|
+
# -- lifecycle --
|
|
123
|
+
|
|
124
|
+
async def start(self):
|
|
125
|
+
self._playwright = await async_playwright().start()
|
|
126
|
+
self._browser = await self._playwright.chromium.launch(headless=self.headless)
|
|
127
|
+
self._context = await self._browser.new_context(viewport=self.viewport)
|
|
128
|
+
self._page = await self._context.new_page()
|
|
129
|
+
self._setup_listeners()
|
|
130
|
+
|
|
131
|
+
async def stop(self):
|
|
132
|
+
if self._browser:
|
|
133
|
+
await self._browser.close()
|
|
134
|
+
if self._playwright:
|
|
135
|
+
await self._playwright.stop()
|
|
136
|
+
|
|
137
|
+
# -- listeners --
|
|
138
|
+
|
|
139
|
+
def _setup_listeners(self):
|
|
140
|
+
self._page.on("console", self._on_console)
|
|
141
|
+
self._page.on("pageerror", self._on_page_error)
|
|
142
|
+
self._page.on("response", self._on_response)
|
|
143
|
+
|
|
144
|
+
def _on_console(self, msg):
|
|
145
|
+
if msg.type in ("error", "warning"):
|
|
146
|
+
self.console_errors.append({
|
|
147
|
+
"type": msg.type,
|
|
148
|
+
"text": msg.text,
|
|
149
|
+
"url": self._page.url,
|
|
150
|
+
"timestamp": datetime.now().isoformat(),
|
|
151
|
+
})
|
|
152
|
+
|
|
153
|
+
def _on_page_error(self, error):
|
|
154
|
+
self.console_errors.append({
|
|
155
|
+
"type": "exception",
|
|
156
|
+
"text": str(error),
|
|
157
|
+
"url": self._page.url,
|
|
158
|
+
"timestamp": datetime.now().isoformat(),
|
|
159
|
+
})
|
|
160
|
+
|
|
161
|
+
async def _on_response(self, response):
|
|
162
|
+
if response.status >= 400:
|
|
163
|
+
self.network_errors.append({
|
|
164
|
+
"url": response.url,
|
|
165
|
+
"status": response.status,
|
|
166
|
+
"method": response.request.method,
|
|
167
|
+
"page_url": self._page.url,
|
|
168
|
+
"timestamp": datetime.now().isoformat(),
|
|
169
|
+
})
|
|
170
|
+
|
|
171
|
+
# -- navigation --
|
|
172
|
+
|
|
173
|
+
async def goto(self, url: str):
|
|
174
|
+
await self._page.goto(url, wait_until="networkidle", timeout=30_000)
|
|
175
|
+
|
|
176
|
+
# -- state extraction --
|
|
177
|
+
|
|
178
|
+
async def get_state(self) -> PageState:
|
|
179
|
+
elements = await self._extract_elements()
|
|
180
|
+
content = await self._extract_page_content()
|
|
181
|
+
return PageState(
|
|
182
|
+
url=self._page.url,
|
|
183
|
+
title=await self._page.title(),
|
|
184
|
+
elements=elements,
|
|
185
|
+
page_text=content.get("pageText", ""),
|
|
186
|
+
toast_messages=[t["text"] for t in content.get("toasts", []) if t.get("visible")],
|
|
187
|
+
counts=content.get("counts", {}),
|
|
188
|
+
css_indicators=[
|
|
189
|
+
f"{ind['cls']}:{ind['text']}"
|
|
190
|
+
for ind in content.get("cssIndicators", [])
|
|
191
|
+
],
|
|
192
|
+
item_lists=content.get("itemLists", {}),
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
async def refresh_and_get_state(self) -> PageState:
|
|
196
|
+
"""Reload the current page and return fresh state for verification."""
|
|
197
|
+
await self._page.reload(wait_until="networkidle", timeout=15_000)
|
|
198
|
+
return await self.get_state()
|
|
199
|
+
|
|
200
|
+
async def _extract_elements(self) -> List[InteractiveElement]:
|
|
201
|
+
raw = await self._page.evaluate(_EXTRACT_ELEMENTS_JS)
|
|
202
|
+
return [InteractiveElement(**el) for el in raw]
|
|
203
|
+
|
|
204
|
+
async def _extract_page_content(self) -> Dict:
|
|
205
|
+
try:
|
|
206
|
+
return await self._page.evaluate(_EXTRACT_PAGE_CONTENT_JS)
|
|
207
|
+
except Exception:
|
|
208
|
+
return {}
|
|
209
|
+
|
|
210
|
+
# -- actions --
|
|
211
|
+
|
|
212
|
+
async def click(self, element_index: int, elements: List[InteractiveElement]) -> bool:
|
|
213
|
+
el = elements[element_index]
|
|
214
|
+
selector = self._build_selector(el)
|
|
215
|
+
try:
|
|
216
|
+
await self._page.click(selector, timeout=5_000)
|
|
217
|
+
await self._page.wait_for_load_state("networkidle", timeout=10_000)
|
|
218
|
+
return True
|
|
219
|
+
except Exception:
|
|
220
|
+
return False
|
|
221
|
+
|
|
222
|
+
async def type_text(
|
|
223
|
+
self, element_index: int, text: str, elements: List[InteractiveElement]
|
|
224
|
+
) -> bool:
|
|
225
|
+
el = elements[element_index]
|
|
226
|
+
selector = self._build_selector(el)
|
|
227
|
+
try:
|
|
228
|
+
await self._page.fill(selector, text, timeout=5_000)
|
|
229
|
+
return True
|
|
230
|
+
except Exception:
|
|
231
|
+
return False
|
|
232
|
+
|
|
233
|
+
async def select_option(
|
|
234
|
+
self, element_index: int, value: str, elements: List[InteractiveElement]
|
|
235
|
+
) -> bool:
|
|
236
|
+
el = elements[element_index]
|
|
237
|
+
selector = self._build_selector(el)
|
|
238
|
+
try:
|
|
239
|
+
await self._page.select_option(selector, value, timeout=5_000)
|
|
240
|
+
return True
|
|
241
|
+
except Exception:
|
|
242
|
+
return False
|
|
243
|
+
|
|
244
|
+
async def go_back(self) -> bool:
|
|
245
|
+
try:
|
|
246
|
+
await self._page.go_back(wait_until="networkidle", timeout=10_000)
|
|
247
|
+
return True
|
|
248
|
+
except Exception:
|
|
249
|
+
return False
|
|
250
|
+
|
|
251
|
+
async def scroll_down(self):
|
|
252
|
+
await self._page.evaluate("window.scrollBy(0, 500)")
|
|
253
|
+
await asyncio.sleep(0.5)
|
|
254
|
+
|
|
255
|
+
async def screenshot(self, path: str) -> str:
|
|
256
|
+
Path(path).parent.mkdir(parents=True, exist_ok=True)
|
|
257
|
+
await self._page.screenshot(path=path, full_page=False)
|
|
258
|
+
return path
|
|
259
|
+
|
|
260
|
+
# -- error draining --
|
|
261
|
+
|
|
262
|
+
def drain_errors(self) -> Tuple[List[Dict], List[Dict]]:
|
|
263
|
+
console = self.console_errors.copy()
|
|
264
|
+
network = self.network_errors.copy()
|
|
265
|
+
self.console_errors.clear()
|
|
266
|
+
self.network_errors.clear()
|
|
267
|
+
return console, network
|
|
268
|
+
|
|
269
|
+
# -- selector building --
|
|
270
|
+
|
|
271
|
+
@staticmethod
|
|
272
|
+
def _build_selector(el: InteractiveElement) -> str:
|
|
273
|
+
if el.id:
|
|
274
|
+
return f"#{el.id}"
|
|
275
|
+
parts = [el.tag]
|
|
276
|
+
if el.name:
|
|
277
|
+
parts.append(f'[name="{el.name}"]')
|
|
278
|
+
if el.type and el.tag == "input":
|
|
279
|
+
parts.append(f'[type="{el.type}"]')
|
|
280
|
+
if el.role:
|
|
281
|
+
parts.append(f'[role="{el.role}"]')
|
|
282
|
+
selector = "".join(parts)
|
|
283
|
+
if el.text and not el.name and not el.id:
|
|
284
|
+
text_escaped = el.text[:50].replace('"', '\\"')
|
|
285
|
+
return f'{el.tag}:has-text("{text_escaped}")'
|
|
286
|
+
return selector
|