@suiflex/suitest-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +77 -0
- package/bin/suitest-mcp.js +123 -0
- package/package.json +50 -0
- package/python/suitest_lifecycle/__init__.py +3 -0
- package/python/suitest_lifecycle/analyzers/__init__.py +1 -0
- package/python/suitest_lifecycle/analyzers/crawl.py +187 -0
- package/python/suitest_lifecycle/analyzers/express.py +226 -0
- package/python/suitest_lifecycle/analyzers/openapi.py +163 -0
- package/python/suitest_lifecycle/analyzers/postman.py +132 -0
- package/python/suitest_lifecycle/analyzers/react.py +107 -0
- package/python/suitest_lifecycle/analyzers/zod_schema.py +131 -0
- package/python/suitest_lifecycle/blackbox/__init__.py +11 -0
- package/python/suitest_lifecycle/blackbox/bootstrap.py +249 -0
- package/python/suitest_lifecycle/blackbox/crawler.py +383 -0
- package/python/suitest_lifecycle/blackbox/detector.py +169 -0
- package/python/suitest_lifecycle/blackbox/generator.py +608 -0
- package/python/suitest_lifecycle/blackbox/graph.py +107 -0
- package/python/suitest_lifecycle/blackbox/mcp.py +546 -0
- package/python/suitest_lifecycle/blackbox/models.py +299 -0
- package/python/suitest_lifecycle/blackbox/prd_ingest.py +108 -0
- package/python/suitest_lifecycle/blackbox/reporter.py +76 -0
- package/python/suitest_lifecycle/blackbox/selector.py +111 -0
- package/python/suitest_lifecycle/cli.py +127 -0
- package/python/suitest_lifecycle/config.py +314 -0
- package/python/suitest_lifecycle/enrich.py +140 -0
- package/python/suitest_lifecycle/exporters/__init__.py +1 -0
- package/python/suitest_lifecycle/exporters/backend.py +345 -0
- package/python/suitest_lifecycle/exporters/frontend.py +459 -0
- package/python/suitest_lifecycle/frontend_runtime.py +77 -0
- package/python/suitest_lifecycle/llm_bridge.py +365 -0
- package/python/suitest_lifecycle/mcp_server.py +187 -0
- package/python/suitest_lifecycle/models.py +166 -0
- package/python/suitest_lifecycle/orchestrator.py +500 -0
- package/python/suitest_lifecycle/paths.py +90 -0
- package/python/suitest_lifecycle/plan.py +366 -0
- package/python/suitest_lifecycle/plan_frontend.py +252 -0
- package/python/suitest_lifecycle/prd.py +92 -0
- package/python/suitest_lifecycle/process.py +111 -0
- package/python/suitest_lifecycle/publish.py +218 -0
- package/python/suitest_lifecycle/readiness.py +83 -0
- package/python/suitest_lifecycle/report.py +179 -0
- package/python/suitest_lifecycle/runner.py +138 -0
- package/python/suitest_lifecycle/serialize.py +131 -0
- package/python/suitest_lifecycle/tcm.py +149 -0
- package/python/suitest_lifecycle/tools.py +217 -0
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
"""Blackbox crawler — open a real browser, log in, walk the app, capture evidence.
|
|
2
|
+
|
|
3
|
+
No repo, no LLM, no testid requirement. Per page it records: interactive
|
|
4
|
+
elements (rich attributes for the selector strategy), pattern classification,
|
|
5
|
+
screenshot, console errors, network failures, and blank/crash detection.
|
|
6
|
+
SafeMode (default ON) refuses to follow destructive links (logout, delete,
|
|
7
|
+
billing, payment, …).
|
|
8
|
+
|
|
9
|
+
Async Playwright; :func:`discover` is the sync entry every consumer calls.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import asyncio
|
|
15
|
+
import contextlib
|
|
16
|
+
import re
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import TYPE_CHECKING
|
|
19
|
+
|
|
20
|
+
from suitest_lifecycle.blackbox.detector import detect_login_form, detect_page_pattern
|
|
21
|
+
from suitest_lifecycle.blackbox.models import (
|
|
22
|
+
BlackboxUiConfig,
|
|
23
|
+
DiscoveryResult,
|
|
24
|
+
ElementInfo,
|
|
25
|
+
LoginProbe,
|
|
26
|
+
PageInfo,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
if TYPE_CHECKING:
|
|
30
|
+
from playwright.async_api import Page
|
|
31
|
+
|
|
32
|
+
# Rich per-element capture: every attribute the selector strategy ranks.
|
|
33
|
+
_EXTRACT_JS = r"""
|
|
34
|
+
() => {
|
|
35
|
+
const vis = (el) => { const r = el.getBoundingClientRect(); return r.width > 0 && r.height > 0; };
|
|
36
|
+
const labelFor = (el) => {
|
|
37
|
+
if (el.id) { const l = document.querySelector(`label[for="${CSS.escape(el.id)}"]`); if (l) return l.innerText.trim(); }
|
|
38
|
+
const wrap = el.closest('label'); return wrap ? wrap.innerText.trim().slice(0, 80) : '';
|
|
39
|
+
};
|
|
40
|
+
const cssPath = (el) => {
|
|
41
|
+
const parts = [];
|
|
42
|
+
let cur = el;
|
|
43
|
+
for (let i = 0; cur && cur.nodeType === 1 && i < 5; i++) {
|
|
44
|
+
let part = cur.tagName.toLowerCase();
|
|
45
|
+
if (cur.id) { parts.unshift(`#${cur.id}`); break; }
|
|
46
|
+
const sibs = cur.parentElement ? [...cur.parentElement.children].filter(c => c.tagName === cur.tagName) : [];
|
|
47
|
+
if (sibs.length > 1) part += `:nth-of-type(${sibs.indexOf(cur) + 1})`;
|
|
48
|
+
parts.unshift(part);
|
|
49
|
+
cur = cur.parentElement;
|
|
50
|
+
}
|
|
51
|
+
return parts.join(' > ');
|
|
52
|
+
};
|
|
53
|
+
const info = (el, kind) => ({
|
|
54
|
+
tag: el.tagName.toLowerCase(),
|
|
55
|
+
kind,
|
|
56
|
+
testid: el.getAttribute('data-testid') || el.getAttribute('data-cy') || el.getAttribute('data-test') || '',
|
|
57
|
+
testid_attr: el.hasAttribute('data-testid') ? 'data-testid' : el.hasAttribute('data-cy') ? 'data-cy' : el.hasAttribute('data-test') ? 'data-test' : '',
|
|
58
|
+
role: el.getAttribute('role') || '',
|
|
59
|
+
aria_label: el.getAttribute('aria-label') || '',
|
|
60
|
+
label: labelFor(el),
|
|
61
|
+
placeholder: el.getAttribute('placeholder') || '',
|
|
62
|
+
name: el.getAttribute('name') || '',
|
|
63
|
+
input_type: (el.getAttribute('type') || '').toLowerCase(),
|
|
64
|
+
autocomplete: el.getAttribute('autocomplete') || '',
|
|
65
|
+
text: (el.innerText || el.value || '').trim().slice(0, 80),
|
|
66
|
+
dom_id: el.id || '',
|
|
67
|
+
href: el.getAttribute && el.tagName === 'A' ? (el.getAttribute('href') || '') : '',
|
|
68
|
+
css: cssPath(el),
|
|
69
|
+
required: el.required === true || el.getAttribute('aria-required') === 'true',
|
|
70
|
+
});
|
|
71
|
+
const inputs = [...document.querySelectorAll('input,textarea,select')].filter(vis).map(e => info(e, e.tagName === 'SELECT' ? 'select' : (e.type === 'checkbox' ? 'checkbox' : 'input')));
|
|
72
|
+
const buttons = [...document.querySelectorAll('button,[role=button],input[type=submit],a[role=button]')].filter(vis).map(e => info(e, 'button'));
|
|
73
|
+
const links = [...document.querySelectorAll('a[href]')].filter(vis).map(e => info(e, 'link')).filter(l => l.href.startsWith('/'));
|
|
74
|
+
const tables = document.querySelectorAll('table tbody tr, [role=row], [role=grid] [role=row]');
|
|
75
|
+
let rowSelector = '';
|
|
76
|
+
if (document.querySelector('table tbody tr')) rowSelector = 'table tbody tr';
|
|
77
|
+
else if (document.querySelector('[role=grid] [role=row]')) rowSelector = '[role=grid] [role=row]';
|
|
78
|
+
// repeated-testid rows (list rendered as divs)
|
|
79
|
+
const counts = {};
|
|
80
|
+
document.querySelectorAll('[data-testid]').forEach(e => { const t = e.getAttribute('data-testid'); counts[t] = (counts[t] || 0) + 1; });
|
|
81
|
+
const repeated = Object.entries(counts).filter(([, n]) => n >= 3).sort((a, b) => b[1] - a[1]);
|
|
82
|
+
if (!rowSelector && repeated.length) rowSelector = `[data-testid="${repeated[0][0]}"]`;
|
|
83
|
+
const search = [...document.querySelectorAll('input[type=search],input[placeholder]')].filter(vis)
|
|
84
|
+
.find(e => /search|cari|filter/i.test((e.getAttribute('placeholder') || '') + (e.getAttribute('name') || '') + (e.getAttribute('aria-label') || '')));
|
|
85
|
+
const pager = [...document.querySelectorAll('button,a')].filter(vis)
|
|
86
|
+
.find(e => /next|»|older|selanjutnya/i.test((e.innerText || '').trim()) || (e.getAttribute('aria-label') || '').match(/next page/i));
|
|
87
|
+
return {
|
|
88
|
+
title: document.title || '',
|
|
89
|
+
inputs, buttons, links,
|
|
90
|
+
testids: [...new Set([...document.querySelectorAll('[data-testid]')].map(e => e.getAttribute('data-testid')))],
|
|
91
|
+
hasTable: tables.length >= 1 || !!rowSelector,
|
|
92
|
+
rowSelector,
|
|
93
|
+
hasModal: !!document.querySelector('[role=dialog],[aria-modal=true],dialog[open]'),
|
|
94
|
+
searchInfo: search ? info(search, 'input') : null,
|
|
95
|
+
pagerInfo: pager ? info(pager, 'button') : null,
|
|
96
|
+
textSample: (document.body ? document.body.innerText : '').trim().slice(0, 1500),
|
|
97
|
+
elementCount: document.body ? document.body.querySelectorAll('*').length : 0,
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
_SAFE_SKIP_HREF = re.compile(
|
|
103
|
+
r"(logout|log-out|sign-?out|delete|remove|destroy|billing|payment|checkout|subscribe"
|
|
104
|
+
r"|unsubscribe|deactivate)",
|
|
105
|
+
re.I,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
async def _goto(page: Page, url: str) -> None:
|
|
110
|
+
"""Navigate like the real internet works: DOMContentLoaded is the contract;
|
|
111
|
+
networkidle is only a short best-effort settle (analytics/long-polling on
|
|
112
|
+
public sites keep the network busy forever)."""
|
|
113
|
+
await page.goto(url, wait_until="domcontentloaded", timeout=20000)
|
|
114
|
+
with contextlib.suppress(Exception):
|
|
115
|
+
await page.wait_for_load_state("networkidle", timeout=3000)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _route_of(url: str, base: str) -> str:
|
|
119
|
+
tail = url[len(base) :] if url.startswith(base) else url
|
|
120
|
+
tail = tail.split("?", 1)[0].split("#", 1)[0]
|
|
121
|
+
return tail or "/"
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _excluded(route: str, cfg: BlackboxUiConfig) -> bool:
|
|
125
|
+
if any(route.startswith(x) for x in cfg.crawl.exclude):
|
|
126
|
+
return True
|
|
127
|
+
if cfg.crawl.include and not any(route.startswith(x) for x in cfg.crawl.include):
|
|
128
|
+
return True
|
|
129
|
+
return bool(cfg.crawl.safe_mode and _SAFE_SKIP_HREF.search(route))
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
async def _snapshot(
|
|
133
|
+
page: Page,
|
|
134
|
+
route: str,
|
|
135
|
+
depth: int,
|
|
136
|
+
evidence_dir: Path,
|
|
137
|
+
console_errors: list[str],
|
|
138
|
+
network_errors: list[str],
|
|
139
|
+
*,
|
|
140
|
+
shot_name: str,
|
|
141
|
+
) -> PageInfo:
|
|
142
|
+
data = await page.evaluate(_EXTRACT_JS)
|
|
143
|
+
shot = ""
|
|
144
|
+
try:
|
|
145
|
+
evidence_dir.mkdir(parents=True, exist_ok=True) # noqa: ASYNC240 — tiny, one-off, local FS
|
|
146
|
+
shot_path = evidence_dir / f"{shot_name}.png"
|
|
147
|
+
await page.screenshot(path=str(shot_path))
|
|
148
|
+
shot = str(shot_path)
|
|
149
|
+
except Exception: # screenshot must never sink the crawl
|
|
150
|
+
shot = ""
|
|
151
|
+
info = PageInfo(
|
|
152
|
+
route=route,
|
|
153
|
+
url=page.url,
|
|
154
|
+
title=str(data.get("title", "")),
|
|
155
|
+
depth=depth,
|
|
156
|
+
inputs=[ElementInfo.from_json(e) for e in data.get("inputs", [])],
|
|
157
|
+
buttons=[ElementInfo.from_json(e) for e in data.get("buttons", [])],
|
|
158
|
+
links=[ElementInfo.from_json(e) for e in data.get("links", [])],
|
|
159
|
+
nav_routes=sorted({str(e.get("href", "")) for e in data.get("links", []) if e.get("href")}),
|
|
160
|
+
testids=[str(t) for t in data.get("testids", [])],
|
|
161
|
+
has_table=bool(data.get("hasTable")),
|
|
162
|
+
row_locator=(
|
|
163
|
+
f"page.locator('{data.get('rowSelector')}')" if data.get("rowSelector") else ""
|
|
164
|
+
),
|
|
165
|
+
has_modal=bool(data.get("hasModal")),
|
|
166
|
+
console_errors=list(console_errors),
|
|
167
|
+
network_errors=list(network_errors),
|
|
168
|
+
screenshot=shot,
|
|
169
|
+
visible_text_sample=str(data.get("textSample", "")),
|
|
170
|
+
blank=int(data.get("elementCount", 0)) < 5 or not str(data.get("textSample", "")).strip(),
|
|
171
|
+
)
|
|
172
|
+
from suitest_lifecycle.blackbox.selector import build_locator
|
|
173
|
+
|
|
174
|
+
if data.get("searchInfo"):
|
|
175
|
+
info.search_locator = build_locator(ElementInfo.from_json(data["searchInfo"]))
|
|
176
|
+
if data.get("pagerInfo"):
|
|
177
|
+
info.pagination_locator = build_locator(ElementInfo.from_json(data["pagerInfo"]))
|
|
178
|
+
info.pattern = detect_page_pattern(info)
|
|
179
|
+
info.has_form = info.pattern == "form" or (
|
|
180
|
+
len([e for e in info.inputs if e.input_type not in ("checkbox", "radio", "hidden")]) >= 2
|
|
181
|
+
and bool(info.buttons)
|
|
182
|
+
)
|
|
183
|
+
return info
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
async def _discover(cfg: BlackboxUiConfig, evidence_dir: Path) -> DiscoveryResult:
|
|
187
|
+
from playwright.async_api import async_playwright
|
|
188
|
+
|
|
189
|
+
base = cfg.target_url.rstrip("/")
|
|
190
|
+
result = DiscoveryResult(base_url=base)
|
|
191
|
+
console_errors: list[str] = []
|
|
192
|
+
network_errors: list[str] = []
|
|
193
|
+
|
|
194
|
+
async with async_playwright() as p:
|
|
195
|
+
browser = await p.chromium.launch(headless=not cfg.headed)
|
|
196
|
+
ctx = await browser.new_context(viewport={"width": 1280, "height": 720})
|
|
197
|
+
page = await ctx.new_page()
|
|
198
|
+
page.on(
|
|
199
|
+
"console",
|
|
200
|
+
lambda m: console_errors.append(m.text[:300]) if m.type == "error" else None,
|
|
201
|
+
)
|
|
202
|
+
page.on("pageerror", lambda e: console_errors.append(str(e)[:300]))
|
|
203
|
+
page.on(
|
|
204
|
+
"response",
|
|
205
|
+
lambda r: (
|
|
206
|
+
network_errors.append(f"{r.status} {r.url[:200]}") if r.status >= 500 else None
|
|
207
|
+
),
|
|
208
|
+
)
|
|
209
|
+
page.on(
|
|
210
|
+
"requestfailed",
|
|
211
|
+
# ERR_ABORTED = request cancelled by navigation (Next.js RSC
|
|
212
|
+
# prefetch, analytics beacons) — normal behavior, not a bug signal.
|
|
213
|
+
lambda r: (
|
|
214
|
+
network_errors.append(f"FAILED {r.url[:200]} {r.failure or ''}")
|
|
215
|
+
if "ERR_ABORTED" not in str(r.failure or "")
|
|
216
|
+
else None
|
|
217
|
+
),
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# ---- 1. login page ---------------------------------------------------
|
|
221
|
+
login_route = cfg.auth.login_url or "/login"
|
|
222
|
+
try:
|
|
223
|
+
await _goto(page, base + login_route)
|
|
224
|
+
except Exception:
|
|
225
|
+
try:
|
|
226
|
+
await _goto(page, base + "/")
|
|
227
|
+
except Exception as exc:
|
|
228
|
+
result.errors.append(f"target unreachable: {exc}")
|
|
229
|
+
await browser.close()
|
|
230
|
+
return result
|
|
231
|
+
login_route = _route_of(page.url, base)
|
|
232
|
+
console_errors.clear()
|
|
233
|
+
network_errors.clear()
|
|
234
|
+
login_page = await _snapshot(
|
|
235
|
+
page, login_route, 0, evidence_dir, console_errors, network_errors, shot_name="login"
|
|
236
|
+
)
|
|
237
|
+
result.pages.append(login_page)
|
|
238
|
+
|
|
239
|
+
form = detect_login_form(login_page, ignore_testids=cfg.crawl.ignore_testids)
|
|
240
|
+
# manual overrides beat detection (docs: selectors.loginUsername/…)
|
|
241
|
+
if cfg.selectors.login_username:
|
|
242
|
+
form.username = _as_locator(cfg.selectors.login_username)
|
|
243
|
+
if cfg.selectors.login_password:
|
|
244
|
+
form.password = _as_locator(cfg.selectors.login_password)
|
|
245
|
+
if cfg.selectors.login_submit:
|
|
246
|
+
form.submit = _as_locator(cfg.selectors.login_submit)
|
|
247
|
+
result.login = form if form.found() else None
|
|
248
|
+
|
|
249
|
+
# ---- 2. perform login -------------------------------------------------
|
|
250
|
+
if result.login and cfg.auth.username and cfg.auth.password:
|
|
251
|
+
probe = LoginProbe(attempted=True)
|
|
252
|
+
try:
|
|
253
|
+
await _eval_locator(page, form.username).fill(cfg.auth.username)
|
|
254
|
+
await _eval_locator(page, form.password).fill(cfg.auth.password)
|
|
255
|
+
await _eval_locator(page, form.submit).click()
|
|
256
|
+
with contextlib.suppress(Exception):
|
|
257
|
+
await page.wait_for_url(
|
|
258
|
+
lambda u: _route_of(u, base) != login_route, timeout=10000
|
|
259
|
+
)
|
|
260
|
+
with contextlib.suppress(Exception):
|
|
261
|
+
await page.wait_for_load_state("networkidle", timeout=5000)
|
|
262
|
+
landed = _route_of(page.url, base)
|
|
263
|
+
probe.landed_route = landed
|
|
264
|
+
probe.success = landed != login_route
|
|
265
|
+
if not probe.success:
|
|
266
|
+
# still on login — look for an error region to report
|
|
267
|
+
err = await page.evaluate(
|
|
268
|
+
"() => { const e = document.querySelector('[role=alert],"
|
|
269
|
+
"[class*=error],[data-testid*=error]');"
|
|
270
|
+
" return e ? (e.innerText || '').slice(0, 120) : ''; }"
|
|
271
|
+
)
|
|
272
|
+
probe.detail = f"stayed on {landed}; error: {err or 'none shown'}"
|
|
273
|
+
except Exception as exc:
|
|
274
|
+
probe.detail = f"login interaction failed: {exc}"[:300]
|
|
275
|
+
result.login_probe = probe
|
|
276
|
+
elif result.login:
|
|
277
|
+
result.login_probe = LoginProbe(attempted=False, detail="no credentials configured")
|
|
278
|
+
|
|
279
|
+
# ---- 3. BFS crawl ------------------------------------------------------
|
|
280
|
+
# Seed from everywhere we already know: where we landed, the root, the
|
|
281
|
+
# nav links captured on the entry page, and config includes. Without
|
|
282
|
+
# this, an app with no /login (entry 404s) would dead-end immediately.
|
|
283
|
+
start_route = _route_of(page.url, base)
|
|
284
|
+
seeds = [start_route, "/", *login_page.nav_routes, *cfg.crawl.include]
|
|
285
|
+
queue: list[tuple[str, int]] = [(r, 0) for r in dict.fromkeys(seeds)]
|
|
286
|
+
visited: set[str] = {login_route}
|
|
287
|
+
while queue and len(result.pages) < cfg.crawl.max_routes:
|
|
288
|
+
route, depth = queue.pop(0)
|
|
289
|
+
if route in visited or depth > cfg.crawl.max_depth:
|
|
290
|
+
continue
|
|
291
|
+
if _excluded(route, cfg) and route != start_route:
|
|
292
|
+
result.skipped_routes.append(route)
|
|
293
|
+
continue
|
|
294
|
+
visited.add(route)
|
|
295
|
+
console_errors.clear()
|
|
296
|
+
network_errors.clear()
|
|
297
|
+
try:
|
|
298
|
+
await _goto(page, base + route)
|
|
299
|
+
except Exception as exc:
|
|
300
|
+
result.errors.append(f"{route}: navigation failed: {exc}"[:200])
|
|
301
|
+
continue
|
|
302
|
+
landed = _route_of(page.url, base)
|
|
303
|
+
info = await _snapshot(
|
|
304
|
+
page,
|
|
305
|
+
route,
|
|
306
|
+
depth,
|
|
307
|
+
evidence_dir,
|
|
308
|
+
console_errors,
|
|
309
|
+
network_errors,
|
|
310
|
+
shot_name=_shot_name(route),
|
|
311
|
+
)
|
|
312
|
+
info.protected = landed != route and "login" in landed.lower()
|
|
313
|
+
result.pages.append(info)
|
|
314
|
+
for href in info.nav_routes:
|
|
315
|
+
if href not in visited and len(queue) < cfg.crawl.max_routes * 3:
|
|
316
|
+
queue.append((href, depth + 1))
|
|
317
|
+
|
|
318
|
+
await browser.close()
|
|
319
|
+
return result
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def _shot_name(route: str) -> str:
|
|
323
|
+
slug = re.sub(r"[^a-zA-Z0-9]+", "_", route).strip("_") or "root"
|
|
324
|
+
return f"page_{slug}"[:80]
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def _as_locator(expr: str) -> str:
|
|
328
|
+
"""Accept either a full ``page.…`` expression or a raw CSS selector."""
|
|
329
|
+
e = expr.strip()
|
|
330
|
+
return e if e.startswith("page.") else f'page.locator("{e}")'
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def _eval_locator(page: Page, expr: str):
|
|
334
|
+
"""Resolve a stored locator EXPRESSION on a live page.
|
|
335
|
+
|
|
336
|
+
The expression grammar is our own output (``build_locator``/`_as_locator``),
|
|
337
|
+
so evaluating it against the page object is safe and keeps one single
|
|
338
|
+
source of truth between discovery-time interaction and generated code.
|
|
339
|
+
"""
|
|
340
|
+
return eval(expr, {"page": page})
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def discover(cfg: BlackboxUiConfig, evidence_dir: str | Path) -> DiscoveryResult:
|
|
344
|
+
"""Sync entry: full blackbox discovery (login + crawl + evidence)."""
|
|
345
|
+
return asyncio.run(_discover(cfg, Path(evidence_dir)))
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
async def _analyze_one(cfg: BlackboxUiConfig, url: str, evidence_dir: Path) -> PageInfo:
|
|
349
|
+
from playwright.async_api import async_playwright
|
|
350
|
+
|
|
351
|
+
base = cfg.target_url.rstrip("/") or url
|
|
352
|
+
console_errors: list[str] = []
|
|
353
|
+
network_errors: list[str] = []
|
|
354
|
+
async with async_playwright() as p:
|
|
355
|
+
browser = await p.chromium.launch(headless=not cfg.headed)
|
|
356
|
+
page = await (await browser.new_context(viewport={"width": 1280, "height": 720})).new_page()
|
|
357
|
+
page.on(
|
|
358
|
+
"console",
|
|
359
|
+
lambda m: console_errors.append(m.text[:300]) if m.type == "error" else None,
|
|
360
|
+
)
|
|
361
|
+
page.on("pageerror", lambda e: console_errors.append(str(e)[:300]))
|
|
362
|
+
target = url if url.startswith("http") else base + url
|
|
363
|
+
await _goto(page, target)
|
|
364
|
+
route = _route_of(page.url, base)
|
|
365
|
+
info = await _snapshot(
|
|
366
|
+
page,
|
|
367
|
+
route,
|
|
368
|
+
0,
|
|
369
|
+
evidence_dir,
|
|
370
|
+
console_errors,
|
|
371
|
+
network_errors,
|
|
372
|
+
shot_name=_shot_name(route),
|
|
373
|
+
)
|
|
374
|
+
await browser.close()
|
|
375
|
+
return info
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
def analyze_single_page(cfg: BlackboxUiConfig, url: str, evidence_dir: str | Path) -> PageInfo:
|
|
379
|
+
"""Sync entry: analyze ONE page (pattern + elements + evidence), no crawl."""
|
|
380
|
+
return asyncio.run(_analyze_one(cfg, url, Path(evidence_dir)))
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
__all__ = ["analyze_single_page", "discover"]
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""Heuristic detectors: login form + page pattern. Deterministic, no LLM.
|
|
2
|
+
|
|
3
|
+
Nothing here references any app-specific ``data-testid`` — the old
|
|
4
|
+
suitest-example convention is only an input signal (via the selector strategy's
|
|
5
|
+
tier 1), never a requirement.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
|
|
12
|
+
from suitest_lifecycle.blackbox.models import ElementInfo, LoginForm, PageInfo
|
|
13
|
+
from suitest_lifecycle.blackbox.selector import build_locator
|
|
14
|
+
|
|
15
|
+
_USERNAME_HINTS = ("email", "e-mail", "user", "login", "account", "identifier", "phone")
|
|
16
|
+
_SUBMIT_HINTS = ("sign in", "log in", "login", "masuk", "submit", "continue", "next")
|
|
17
|
+
_REMEMBER_HINTS = ("remember", "ingat", "keep me")
|
|
18
|
+
_DESTRUCTIVE_HINTS = (
|
|
19
|
+
"delete",
|
|
20
|
+
"remove",
|
|
21
|
+
"destroy",
|
|
22
|
+
"logout",
|
|
23
|
+
"log out",
|
|
24
|
+
"sign out",
|
|
25
|
+
"keluar",
|
|
26
|
+
"hapus",
|
|
27
|
+
"cancel subscription",
|
|
28
|
+
"unsubscribe",
|
|
29
|
+
"payment",
|
|
30
|
+
"pay ",
|
|
31
|
+
"checkout",
|
|
32
|
+
"billing",
|
|
33
|
+
"publish",
|
|
34
|
+
"send",
|
|
35
|
+
"approve",
|
|
36
|
+
"reject",
|
|
37
|
+
"submit final",
|
|
38
|
+
"deactivate",
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
_ERROR_TEXT_RE = re.compile(
|
|
42
|
+
r"(something went wrong|internal server error|unexpected error|exception|traceback"
|
|
43
|
+
r"|terjadi kesalahan)",
|
|
44
|
+
re.I,
|
|
45
|
+
)
|
|
46
|
+
_FORBIDDEN_RE = re.compile(r"(forbidden|unauthorized|access denied|403|401|tidak berhak)", re.I)
|
|
47
|
+
_NOT_FOUND_RE = re.compile(r"(not found|404|page (doesn.t|does not) exist|halaman tidak)", re.I)
|
|
48
|
+
_EMPTY_RE = re.compile(r"(no \w+ yet|nothing here|empty|no results|no data|tidak ada \w+)", re.I)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _blob(el: ElementInfo) -> str:
|
|
52
|
+
return " ".join(
|
|
53
|
+
(
|
|
54
|
+
el.testid,
|
|
55
|
+
el.name,
|
|
56
|
+
el.dom_id,
|
|
57
|
+
el.placeholder,
|
|
58
|
+
el.label,
|
|
59
|
+
el.aria_label,
|
|
60
|
+
el.autocomplete,
|
|
61
|
+
el.text,
|
|
62
|
+
)
|
|
63
|
+
).lower()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def is_destructive(el: ElementInfo) -> bool:
|
|
67
|
+
"""SafeMode gate — never click/submit these during crawl or generated tests."""
|
|
68
|
+
return any(h in _blob(el) for h in _DESTRUCTIVE_HINTS)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def detect_login_form(page: PageInfo, *, ignore_testids: bool = False) -> LoginForm:
|
|
72
|
+
"""Find username/password/submit (+ remember) among a page's elements.
|
|
73
|
+
|
|
74
|
+
Works on ANY attribute the DOM offers: type, name, autocomplete, label,
|
|
75
|
+
placeholder, aria-label, visible text. Returns an empty LoginForm when no
|
|
76
|
+
password field exists (``.found()`` is False).
|
|
77
|
+
"""
|
|
78
|
+
form = LoginForm(route=page.route)
|
|
79
|
+
|
|
80
|
+
password = next(
|
|
81
|
+
(e for e in page.inputs if e.input_type == "password" or "password" in _blob(e)),
|
|
82
|
+
None,
|
|
83
|
+
)
|
|
84
|
+
if password is None:
|
|
85
|
+
return form
|
|
86
|
+
|
|
87
|
+
username = None
|
|
88
|
+
for e in page.inputs:
|
|
89
|
+
if e is password or e.input_type in ("checkbox", "radio", "hidden", "submit"):
|
|
90
|
+
continue
|
|
91
|
+
blob = _blob(e)
|
|
92
|
+
if e.input_type == "email" or e.autocomplete in ("username", "email"):
|
|
93
|
+
username = e
|
|
94
|
+
break
|
|
95
|
+
if any(h in blob for h in _USERNAME_HINTS):
|
|
96
|
+
username = e
|
|
97
|
+
break
|
|
98
|
+
if username is None: # fall back to the text input right before the password
|
|
99
|
+
text_inputs = [
|
|
100
|
+
e
|
|
101
|
+
for e in page.inputs
|
|
102
|
+
if e is not password and e.input_type in ("", "text", "email", "tel")
|
|
103
|
+
]
|
|
104
|
+
username = text_inputs[0] if text_inputs else None
|
|
105
|
+
if username is None:
|
|
106
|
+
return form
|
|
107
|
+
|
|
108
|
+
submit = None
|
|
109
|
+
for b in page.buttons:
|
|
110
|
+
blob = _blob(b)
|
|
111
|
+
if is_destructive(b):
|
|
112
|
+
continue
|
|
113
|
+
if b.input_type == "submit" or any(h in blob for h in _SUBMIT_HINTS):
|
|
114
|
+
submit = b
|
|
115
|
+
break
|
|
116
|
+
if submit is None and page.buttons:
|
|
117
|
+
submit = page.buttons[0]
|
|
118
|
+
if submit is None:
|
|
119
|
+
return form
|
|
120
|
+
|
|
121
|
+
form.username = build_locator(username, ignore_testids=ignore_testids)
|
|
122
|
+
form.password = build_locator(password, ignore_testids=ignore_testids)
|
|
123
|
+
form.submit = build_locator(submit, ignore_testids=ignore_testids)
|
|
124
|
+
|
|
125
|
+
remember = next(
|
|
126
|
+
(
|
|
127
|
+
e
|
|
128
|
+
for e in page.inputs
|
|
129
|
+
if e.input_type == "checkbox" and any(h in _blob(e) for h in _REMEMBER_HINTS)
|
|
130
|
+
),
|
|
131
|
+
None,
|
|
132
|
+
)
|
|
133
|
+
if remember is not None:
|
|
134
|
+
form.remember = build_locator(remember, ignore_testids=ignore_testids)
|
|
135
|
+
|
|
136
|
+
error = next((e for e in page.inputs + page.buttons if "error" in _blob(e)), None)
|
|
137
|
+
if error is not None:
|
|
138
|
+
form.error = build_locator(error, ignore_testids=ignore_testids)
|
|
139
|
+
return form
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def detect_page_pattern(page: PageInfo) -> str:
|
|
143
|
+
"""Classify a crawled page into one of ``PAGE_PATTERNS``."""
|
|
144
|
+
text = page.visible_text_sample
|
|
145
|
+
if page.blank:
|
|
146
|
+
return "blank"
|
|
147
|
+
if _NOT_FOUND_RE.search(text) or _NOT_FOUND_RE.search(page.title):
|
|
148
|
+
return "not_found"
|
|
149
|
+
if _FORBIDDEN_RE.search(text):
|
|
150
|
+
return "forbidden"
|
|
151
|
+
if _ERROR_TEXT_RE.search(text):
|
|
152
|
+
return "error"
|
|
153
|
+
if any(e.input_type == "password" for e in page.inputs):
|
|
154
|
+
return "login"
|
|
155
|
+
if page.has_modal:
|
|
156
|
+
return "modal"
|
|
157
|
+
if page.has_table:
|
|
158
|
+
return "list"
|
|
159
|
+
if _EMPTY_RE.search(text):
|
|
160
|
+
return "empty"
|
|
161
|
+
form_inputs = [e for e in page.inputs if e.input_type not in ("checkbox", "radio", "hidden")]
|
|
162
|
+
if len(form_inputs) >= 2 and page.buttons:
|
|
163
|
+
return "form"
|
|
164
|
+
route = page.route.lower()
|
|
165
|
+
if any(k in route for k in ("dashboard", "home", "overview")) or "dashboard" in text.lower():
|
|
166
|
+
return "dashboard"
|
|
167
|
+
if re.search(r"/\d+$|/[0-9a-f-]{8,}$", page.route):
|
|
168
|
+
return "detail"
|
|
169
|
+
return "unknown"
|