@suiflex/suitest-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +77 -0
  3. package/bin/suitest-mcp.js +123 -0
  4. package/package.json +50 -0
  5. package/python/suitest_lifecycle/__init__.py +3 -0
  6. package/python/suitest_lifecycle/analyzers/__init__.py +1 -0
  7. package/python/suitest_lifecycle/analyzers/crawl.py +187 -0
  8. package/python/suitest_lifecycle/analyzers/express.py +226 -0
  9. package/python/suitest_lifecycle/analyzers/openapi.py +163 -0
  10. package/python/suitest_lifecycle/analyzers/postman.py +132 -0
  11. package/python/suitest_lifecycle/analyzers/react.py +107 -0
  12. package/python/suitest_lifecycle/analyzers/zod_schema.py +131 -0
  13. package/python/suitest_lifecycle/blackbox/__init__.py +11 -0
  14. package/python/suitest_lifecycle/blackbox/bootstrap.py +249 -0
  15. package/python/suitest_lifecycle/blackbox/crawler.py +383 -0
  16. package/python/suitest_lifecycle/blackbox/detector.py +169 -0
  17. package/python/suitest_lifecycle/blackbox/generator.py +608 -0
  18. package/python/suitest_lifecycle/blackbox/graph.py +107 -0
  19. package/python/suitest_lifecycle/blackbox/mcp.py +546 -0
  20. package/python/suitest_lifecycle/blackbox/models.py +299 -0
  21. package/python/suitest_lifecycle/blackbox/prd_ingest.py +108 -0
  22. package/python/suitest_lifecycle/blackbox/reporter.py +76 -0
  23. package/python/suitest_lifecycle/blackbox/selector.py +111 -0
  24. package/python/suitest_lifecycle/cli.py +127 -0
  25. package/python/suitest_lifecycle/config.py +314 -0
  26. package/python/suitest_lifecycle/enrich.py +140 -0
  27. package/python/suitest_lifecycle/exporters/__init__.py +1 -0
  28. package/python/suitest_lifecycle/exporters/backend.py +345 -0
  29. package/python/suitest_lifecycle/exporters/frontend.py +459 -0
  30. package/python/suitest_lifecycle/frontend_runtime.py +77 -0
  31. package/python/suitest_lifecycle/llm_bridge.py +365 -0
  32. package/python/suitest_lifecycle/mcp_server.py +187 -0
  33. package/python/suitest_lifecycle/models.py +166 -0
  34. package/python/suitest_lifecycle/orchestrator.py +500 -0
  35. package/python/suitest_lifecycle/paths.py +90 -0
  36. package/python/suitest_lifecycle/plan.py +366 -0
  37. package/python/suitest_lifecycle/plan_frontend.py +252 -0
  38. package/python/suitest_lifecycle/prd.py +92 -0
  39. package/python/suitest_lifecycle/process.py +111 -0
  40. package/python/suitest_lifecycle/publish.py +218 -0
  41. package/python/suitest_lifecycle/readiness.py +83 -0
  42. package/python/suitest_lifecycle/report.py +179 -0
  43. package/python/suitest_lifecycle/runner.py +138 -0
  44. package/python/suitest_lifecycle/serialize.py +131 -0
  45. package/python/suitest_lifecycle/tcm.py +149 -0
  46. package/python/suitest_lifecycle/tools.py +217 -0
@@ -0,0 +1,383 @@
1
+ """Blackbox crawler — open a real browser, log in, walk the app, capture evidence.
2
+
3
+ No repo, no LLM, no testid requirement. Per page it records: interactive
4
+ elements (rich attributes for the selector strategy), pattern classification,
5
+ screenshot, console errors, network failures, and blank/crash detection.
6
+ SafeMode (default ON) refuses to follow destructive links (logout, delete,
7
+ billing, payment, …).
8
+
9
+ Async Playwright; :func:`discover` is the sync entry every consumer calls.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import asyncio
15
+ import contextlib
16
+ import re
17
+ from pathlib import Path
18
+ from typing import TYPE_CHECKING
19
+
20
+ from suitest_lifecycle.blackbox.detector import detect_login_form, detect_page_pattern
21
+ from suitest_lifecycle.blackbox.models import (
22
+ BlackboxUiConfig,
23
+ DiscoveryResult,
24
+ ElementInfo,
25
+ LoginProbe,
26
+ PageInfo,
27
+ )
28
+
29
+ if TYPE_CHECKING:
30
+ from playwright.async_api import Page
31
+
32
+ # Rich per-element capture: every attribute the selector strategy ranks.
33
+ _EXTRACT_JS = r"""
34
+ () => {
35
+ const vis = (el) => { const r = el.getBoundingClientRect(); return r.width > 0 && r.height > 0; };
36
+ const labelFor = (el) => {
37
+ if (el.id) { const l = document.querySelector(`label[for="${CSS.escape(el.id)}"]`); if (l) return l.innerText.trim(); }
38
+ const wrap = el.closest('label'); return wrap ? wrap.innerText.trim().slice(0, 80) : '';
39
+ };
40
+ const cssPath = (el) => {
41
+ const parts = [];
42
+ let cur = el;
43
+ for (let i = 0; cur && cur.nodeType === 1 && i < 5; i++) {
44
+ let part = cur.tagName.toLowerCase();
45
+ if (cur.id) { parts.unshift(`#${cur.id}`); break; }
46
+ const sibs = cur.parentElement ? [...cur.parentElement.children].filter(c => c.tagName === cur.tagName) : [];
47
+ if (sibs.length > 1) part += `:nth-of-type(${sibs.indexOf(cur) + 1})`;
48
+ parts.unshift(part);
49
+ cur = cur.parentElement;
50
+ }
51
+ return parts.join(' > ');
52
+ };
53
+ const info = (el, kind) => ({
54
+ tag: el.tagName.toLowerCase(),
55
+ kind,
56
+ testid: el.getAttribute('data-testid') || el.getAttribute('data-cy') || el.getAttribute('data-test') || '',
57
+ testid_attr: el.hasAttribute('data-testid') ? 'data-testid' : el.hasAttribute('data-cy') ? 'data-cy' : el.hasAttribute('data-test') ? 'data-test' : '',
58
+ role: el.getAttribute('role') || '',
59
+ aria_label: el.getAttribute('aria-label') || '',
60
+ label: labelFor(el),
61
+ placeholder: el.getAttribute('placeholder') || '',
62
+ name: el.getAttribute('name') || '',
63
+ input_type: (el.getAttribute('type') || '').toLowerCase(),
64
+ autocomplete: el.getAttribute('autocomplete') || '',
65
+ text: (el.innerText || el.value || '').trim().slice(0, 80),
66
+ dom_id: el.id || '',
67
+ href: el.getAttribute && el.tagName === 'A' ? (el.getAttribute('href') || '') : '',
68
+ css: cssPath(el),
69
+ required: el.required === true || el.getAttribute('aria-required') === 'true',
70
+ });
71
+ const inputs = [...document.querySelectorAll('input,textarea,select')].filter(vis).map(e => info(e, e.tagName === 'SELECT' ? 'select' : (e.type === 'checkbox' ? 'checkbox' : 'input')));
72
+ const buttons = [...document.querySelectorAll('button,[role=button],input[type=submit],a[role=button]')].filter(vis).map(e => info(e, 'button'));
73
+ const links = [...document.querySelectorAll('a[href]')].filter(vis).map(e => info(e, 'link')).filter(l => l.href.startsWith('/'));
74
+ const tables = document.querySelectorAll('table tbody tr, [role=row], [role=grid] [role=row]');
75
+ let rowSelector = '';
76
+ if (document.querySelector('table tbody tr')) rowSelector = 'table tbody tr';
77
+ else if (document.querySelector('[role=grid] [role=row]')) rowSelector = '[role=grid] [role=row]';
78
+ // repeated-testid rows (list rendered as divs)
79
+ const counts = {};
80
+ document.querySelectorAll('[data-testid]').forEach(e => { const t = e.getAttribute('data-testid'); counts[t] = (counts[t] || 0) + 1; });
81
+ const repeated = Object.entries(counts).filter(([, n]) => n >= 3).sort((a, b) => b[1] - a[1]);
82
+ if (!rowSelector && repeated.length) rowSelector = `[data-testid="${repeated[0][0]}"]`;
83
+ const search = [...document.querySelectorAll('input[type=search],input[placeholder]')].filter(vis)
84
+ .find(e => /search|cari|filter/i.test((e.getAttribute('placeholder') || '') + (e.getAttribute('name') || '') + (e.getAttribute('aria-label') || '')));
85
+ const pager = [...document.querySelectorAll('button,a')].filter(vis)
86
+ .find(e => /next|»|older|selanjutnya/i.test((e.innerText || '').trim()) || (e.getAttribute('aria-label') || '').match(/next page/i));
87
+ return {
88
+ title: document.title || '',
89
+ inputs, buttons, links,
90
+ testids: [...new Set([...document.querySelectorAll('[data-testid]')].map(e => e.getAttribute('data-testid')))],
91
+ hasTable: tables.length >= 1 || !!rowSelector,
92
+ rowSelector,
93
+ hasModal: !!document.querySelector('[role=dialog],[aria-modal=true],dialog[open]'),
94
+ searchInfo: search ? info(search, 'input') : null,
95
+ pagerInfo: pager ? info(pager, 'button') : null,
96
+ textSample: (document.body ? document.body.innerText : '').trim().slice(0, 1500),
97
+ elementCount: document.body ? document.body.querySelectorAll('*').length : 0,
98
+ };
99
+ }
100
+ """
101
+
102
+ _SAFE_SKIP_HREF = re.compile(
103
+ r"(logout|log-out|sign-?out|delete|remove|destroy|billing|payment|checkout|subscribe"
104
+ r"|unsubscribe|deactivate)",
105
+ re.I,
106
+ )
107
+
108
+
109
+ async def _goto(page: Page, url: str) -> None:
110
+ """Navigate like the real internet works: DOMContentLoaded is the contract;
111
+ networkidle is only a short best-effort settle (analytics/long-polling on
112
+ public sites keep the network busy forever)."""
113
+ await page.goto(url, wait_until="domcontentloaded", timeout=20000)
114
+ with contextlib.suppress(Exception):
115
+ await page.wait_for_load_state("networkidle", timeout=3000)
116
+
117
+
118
+ def _route_of(url: str, base: str) -> str:
119
+ tail = url[len(base) :] if url.startswith(base) else url
120
+ tail = tail.split("?", 1)[0].split("#", 1)[0]
121
+ return tail or "/"
122
+
123
+
124
+ def _excluded(route: str, cfg: BlackboxUiConfig) -> bool:
125
+ if any(route.startswith(x) for x in cfg.crawl.exclude):
126
+ return True
127
+ if cfg.crawl.include and not any(route.startswith(x) for x in cfg.crawl.include):
128
+ return True
129
+ return bool(cfg.crawl.safe_mode and _SAFE_SKIP_HREF.search(route))
130
+
131
+
132
+ async def _snapshot(
133
+ page: Page,
134
+ route: str,
135
+ depth: int,
136
+ evidence_dir: Path,
137
+ console_errors: list[str],
138
+ network_errors: list[str],
139
+ *,
140
+ shot_name: str,
141
+ ) -> PageInfo:
142
+ data = await page.evaluate(_EXTRACT_JS)
143
+ shot = ""
144
+ try:
145
+ evidence_dir.mkdir(parents=True, exist_ok=True) # noqa: ASYNC240 — tiny, one-off, local FS
146
+ shot_path = evidence_dir / f"{shot_name}.png"
147
+ await page.screenshot(path=str(shot_path))
148
+ shot = str(shot_path)
149
+ except Exception: # screenshot must never sink the crawl
150
+ shot = ""
151
+ info = PageInfo(
152
+ route=route,
153
+ url=page.url,
154
+ title=str(data.get("title", "")),
155
+ depth=depth,
156
+ inputs=[ElementInfo.from_json(e) for e in data.get("inputs", [])],
157
+ buttons=[ElementInfo.from_json(e) for e in data.get("buttons", [])],
158
+ links=[ElementInfo.from_json(e) for e in data.get("links", [])],
159
+ nav_routes=sorted({str(e.get("href", "")) for e in data.get("links", []) if e.get("href")}),
160
+ testids=[str(t) for t in data.get("testids", [])],
161
+ has_table=bool(data.get("hasTable")),
162
+ row_locator=(
163
+ f"page.locator('{data.get('rowSelector')}')" if data.get("rowSelector") else ""
164
+ ),
165
+ has_modal=bool(data.get("hasModal")),
166
+ console_errors=list(console_errors),
167
+ network_errors=list(network_errors),
168
+ screenshot=shot,
169
+ visible_text_sample=str(data.get("textSample", "")),
170
+ blank=int(data.get("elementCount", 0)) < 5 or not str(data.get("textSample", "")).strip(),
171
+ )
172
+ from suitest_lifecycle.blackbox.selector import build_locator
173
+
174
+ if data.get("searchInfo"):
175
+ info.search_locator = build_locator(ElementInfo.from_json(data["searchInfo"]))
176
+ if data.get("pagerInfo"):
177
+ info.pagination_locator = build_locator(ElementInfo.from_json(data["pagerInfo"]))
178
+ info.pattern = detect_page_pattern(info)
179
+ info.has_form = info.pattern == "form" or (
180
+ len([e for e in info.inputs if e.input_type not in ("checkbox", "radio", "hidden")]) >= 2
181
+ and bool(info.buttons)
182
+ )
183
+ return info
184
+
185
+
186
+ async def _discover(cfg: BlackboxUiConfig, evidence_dir: Path) -> DiscoveryResult:
187
+ from playwright.async_api import async_playwright
188
+
189
+ base = cfg.target_url.rstrip("/")
190
+ result = DiscoveryResult(base_url=base)
191
+ console_errors: list[str] = []
192
+ network_errors: list[str] = []
193
+
194
+ async with async_playwright() as p:
195
+ browser = await p.chromium.launch(headless=not cfg.headed)
196
+ ctx = await browser.new_context(viewport={"width": 1280, "height": 720})
197
+ page = await ctx.new_page()
198
+ page.on(
199
+ "console",
200
+ lambda m: console_errors.append(m.text[:300]) if m.type == "error" else None,
201
+ )
202
+ page.on("pageerror", lambda e: console_errors.append(str(e)[:300]))
203
+ page.on(
204
+ "response",
205
+ lambda r: (
206
+ network_errors.append(f"{r.status} {r.url[:200]}") if r.status >= 500 else None
207
+ ),
208
+ )
209
+ page.on(
210
+ "requestfailed",
211
+ # ERR_ABORTED = request cancelled by navigation (Next.js RSC
212
+ # prefetch, analytics beacons) — normal behavior, not a bug signal.
213
+ lambda r: (
214
+ network_errors.append(f"FAILED {r.url[:200]} {r.failure or ''}")
215
+ if "ERR_ABORTED" not in str(r.failure or "")
216
+ else None
217
+ ),
218
+ )
219
+
220
+ # ---- 1. login page ---------------------------------------------------
221
+ login_route = cfg.auth.login_url or "/login"
222
+ try:
223
+ await _goto(page, base + login_route)
224
+ except Exception:
225
+ try:
226
+ await _goto(page, base + "/")
227
+ except Exception as exc:
228
+ result.errors.append(f"target unreachable: {exc}")
229
+ await browser.close()
230
+ return result
231
+ login_route = _route_of(page.url, base)
232
+ console_errors.clear()
233
+ network_errors.clear()
234
+ login_page = await _snapshot(
235
+ page, login_route, 0, evidence_dir, console_errors, network_errors, shot_name="login"
236
+ )
237
+ result.pages.append(login_page)
238
+
239
+ form = detect_login_form(login_page, ignore_testids=cfg.crawl.ignore_testids)
240
+ # manual overrides beat detection (docs: selectors.loginUsername/…)
241
+ if cfg.selectors.login_username:
242
+ form.username = _as_locator(cfg.selectors.login_username)
243
+ if cfg.selectors.login_password:
244
+ form.password = _as_locator(cfg.selectors.login_password)
245
+ if cfg.selectors.login_submit:
246
+ form.submit = _as_locator(cfg.selectors.login_submit)
247
+ result.login = form if form.found() else None
248
+
249
+ # ---- 2. perform login -------------------------------------------------
250
+ if result.login and cfg.auth.username and cfg.auth.password:
251
+ probe = LoginProbe(attempted=True)
252
+ try:
253
+ await _eval_locator(page, form.username).fill(cfg.auth.username)
254
+ await _eval_locator(page, form.password).fill(cfg.auth.password)
255
+ await _eval_locator(page, form.submit).click()
256
+ with contextlib.suppress(Exception):
257
+ await page.wait_for_url(
258
+ lambda u: _route_of(u, base) != login_route, timeout=10000
259
+ )
260
+ with contextlib.suppress(Exception):
261
+ await page.wait_for_load_state("networkidle", timeout=5000)
262
+ landed = _route_of(page.url, base)
263
+ probe.landed_route = landed
264
+ probe.success = landed != login_route
265
+ if not probe.success:
266
+ # still on login — look for an error region to report
267
+ err = await page.evaluate(
268
+ "() => { const e = document.querySelector('[role=alert],"
269
+ "[class*=error],[data-testid*=error]');"
270
+ " return e ? (e.innerText || '').slice(0, 120) : ''; }"
271
+ )
272
+ probe.detail = f"stayed on {landed}; error: {err or 'none shown'}"
273
+ except Exception as exc:
274
+ probe.detail = f"login interaction failed: {exc}"[:300]
275
+ result.login_probe = probe
276
+ elif result.login:
277
+ result.login_probe = LoginProbe(attempted=False, detail="no credentials configured")
278
+
279
+ # ---- 3. BFS crawl ------------------------------------------------------
280
+ # Seed from everywhere we already know: where we landed, the root, the
281
+ # nav links captured on the entry page, and config includes. Without
282
+ # this, an app with no /login (entry 404s) would dead-end immediately.
283
+ start_route = _route_of(page.url, base)
284
+ seeds = [start_route, "/", *login_page.nav_routes, *cfg.crawl.include]
285
+ queue: list[tuple[str, int]] = [(r, 0) for r in dict.fromkeys(seeds)]
286
+ visited: set[str] = {login_route}
287
+ while queue and len(result.pages) < cfg.crawl.max_routes:
288
+ route, depth = queue.pop(0)
289
+ if route in visited or depth > cfg.crawl.max_depth:
290
+ continue
291
+ if _excluded(route, cfg) and route != start_route:
292
+ result.skipped_routes.append(route)
293
+ continue
294
+ visited.add(route)
295
+ console_errors.clear()
296
+ network_errors.clear()
297
+ try:
298
+ await _goto(page, base + route)
299
+ except Exception as exc:
300
+ result.errors.append(f"{route}: navigation failed: {exc}"[:200])
301
+ continue
302
+ landed = _route_of(page.url, base)
303
+ info = await _snapshot(
304
+ page,
305
+ route,
306
+ depth,
307
+ evidence_dir,
308
+ console_errors,
309
+ network_errors,
310
+ shot_name=_shot_name(route),
311
+ )
312
+ info.protected = landed != route and "login" in landed.lower()
313
+ result.pages.append(info)
314
+ for href in info.nav_routes:
315
+ if href not in visited and len(queue) < cfg.crawl.max_routes * 3:
316
+ queue.append((href, depth + 1))
317
+
318
+ await browser.close()
319
+ return result
320
+
321
+
322
+ def _shot_name(route: str) -> str:
323
+ slug = re.sub(r"[^a-zA-Z0-9]+", "_", route).strip("_") or "root"
324
+ return f"page_{slug}"[:80]
325
+
326
+
327
+ def _as_locator(expr: str) -> str:
328
+ """Accept either a full ``page.…`` expression or a raw CSS selector."""
329
+ e = expr.strip()
330
+ return e if e.startswith("page.") else f'page.locator("{e}")'
331
+
332
+
333
+ def _eval_locator(page: Page, expr: str):
334
+ """Resolve a stored locator EXPRESSION on a live page.
335
+
336
+ The expression grammar is our own output (``build_locator``/`_as_locator``),
337
+ so evaluating it against the page object is safe and keeps one single
338
+ source of truth between discovery-time interaction and generated code.
339
+ """
340
+ return eval(expr, {"page": page})
341
+
342
+
343
+ def discover(cfg: BlackboxUiConfig, evidence_dir: str | Path) -> DiscoveryResult:
344
+ """Sync entry: full blackbox discovery (login + crawl + evidence)."""
345
+ return asyncio.run(_discover(cfg, Path(evidence_dir)))
346
+
347
+
348
+ async def _analyze_one(cfg: BlackboxUiConfig, url: str, evidence_dir: Path) -> PageInfo:
349
+ from playwright.async_api import async_playwright
350
+
351
+ base = cfg.target_url.rstrip("/") or url
352
+ console_errors: list[str] = []
353
+ network_errors: list[str] = []
354
+ async with async_playwright() as p:
355
+ browser = await p.chromium.launch(headless=not cfg.headed)
356
+ page = await (await browser.new_context(viewport={"width": 1280, "height": 720})).new_page()
357
+ page.on(
358
+ "console",
359
+ lambda m: console_errors.append(m.text[:300]) if m.type == "error" else None,
360
+ )
361
+ page.on("pageerror", lambda e: console_errors.append(str(e)[:300]))
362
+ target = url if url.startswith("http") else base + url
363
+ await _goto(page, target)
364
+ route = _route_of(page.url, base)
365
+ info = await _snapshot(
366
+ page,
367
+ route,
368
+ 0,
369
+ evidence_dir,
370
+ console_errors,
371
+ network_errors,
372
+ shot_name=_shot_name(route),
373
+ )
374
+ await browser.close()
375
+ return info
376
+
377
+
378
+ def analyze_single_page(cfg: BlackboxUiConfig, url: str, evidence_dir: str | Path) -> PageInfo:
379
+ """Sync entry: analyze ONE page (pattern + elements + evidence), no crawl."""
380
+ return asyncio.run(_analyze_one(cfg, url, Path(evidence_dir)))
381
+
382
+
383
+ __all__ = ["analyze_single_page", "discover"]
@@ -0,0 +1,169 @@
1
+ """Heuristic detectors: login form + page pattern. Deterministic, no LLM.
2
+
3
+ Nothing here references any app-specific ``data-testid`` — the old
4
+ suitest-example convention is only an input signal (via the selector strategy's
5
+ tier 1), never a requirement.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import re
11
+
12
+ from suitest_lifecycle.blackbox.models import ElementInfo, LoginForm, PageInfo
13
+ from suitest_lifecycle.blackbox.selector import build_locator
14
+
15
+ _USERNAME_HINTS = ("email", "e-mail", "user", "login", "account", "identifier", "phone")
16
+ _SUBMIT_HINTS = ("sign in", "log in", "login", "masuk", "submit", "continue", "next")
17
+ _REMEMBER_HINTS = ("remember", "ingat", "keep me")
18
+ _DESTRUCTIVE_HINTS = (
19
+ "delete",
20
+ "remove",
21
+ "destroy",
22
+ "logout",
23
+ "log out",
24
+ "sign out",
25
+ "keluar",
26
+ "hapus",
27
+ "cancel subscription",
28
+ "unsubscribe",
29
+ "payment",
30
+ "pay ",
31
+ "checkout",
32
+ "billing",
33
+ "publish",
34
+ "send",
35
+ "approve",
36
+ "reject",
37
+ "submit final",
38
+ "deactivate",
39
+ )
40
+
41
+ _ERROR_TEXT_RE = re.compile(
42
+ r"(something went wrong|internal server error|unexpected error|exception|traceback"
43
+ r"|terjadi kesalahan)",
44
+ re.I,
45
+ )
46
+ _FORBIDDEN_RE = re.compile(r"(forbidden|unauthorized|access denied|403|401|tidak berhak)", re.I)
47
+ _NOT_FOUND_RE = re.compile(r"(not found|404|page (doesn.t|does not) exist|halaman tidak)", re.I)
48
+ _EMPTY_RE = re.compile(r"(no \w+ yet|nothing here|empty|no results|no data|tidak ada \w+)", re.I)
49
+
50
+
51
+ def _blob(el: ElementInfo) -> str:
52
+ return " ".join(
53
+ (
54
+ el.testid,
55
+ el.name,
56
+ el.dom_id,
57
+ el.placeholder,
58
+ el.label,
59
+ el.aria_label,
60
+ el.autocomplete,
61
+ el.text,
62
+ )
63
+ ).lower()
64
+
65
+
66
+ def is_destructive(el: ElementInfo) -> bool:
67
+ """SafeMode gate — never click/submit these during crawl or generated tests."""
68
+ return any(h in _blob(el) for h in _DESTRUCTIVE_HINTS)
69
+
70
+
71
+ def detect_login_form(page: PageInfo, *, ignore_testids: bool = False) -> LoginForm:
72
+ """Find username/password/submit (+ remember) among a page's elements.
73
+
74
+ Works on ANY attribute the DOM offers: type, name, autocomplete, label,
75
+ placeholder, aria-label, visible text. Returns an empty LoginForm when no
76
+ password field exists (``.found()`` is False).
77
+ """
78
+ form = LoginForm(route=page.route)
79
+
80
+ password = next(
81
+ (e for e in page.inputs if e.input_type == "password" or "password" in _blob(e)),
82
+ None,
83
+ )
84
+ if password is None:
85
+ return form
86
+
87
+ username = None
88
+ for e in page.inputs:
89
+ if e is password or e.input_type in ("checkbox", "radio", "hidden", "submit"):
90
+ continue
91
+ blob = _blob(e)
92
+ if e.input_type == "email" or e.autocomplete in ("username", "email"):
93
+ username = e
94
+ break
95
+ if any(h in blob for h in _USERNAME_HINTS):
96
+ username = e
97
+ break
98
+ if username is None: # fall back to the text input right before the password
99
+ text_inputs = [
100
+ e
101
+ for e in page.inputs
102
+ if e is not password and e.input_type in ("", "text", "email", "tel")
103
+ ]
104
+ username = text_inputs[0] if text_inputs else None
105
+ if username is None:
106
+ return form
107
+
108
+ submit = None
109
+ for b in page.buttons:
110
+ blob = _blob(b)
111
+ if is_destructive(b):
112
+ continue
113
+ if b.input_type == "submit" or any(h in blob for h in _SUBMIT_HINTS):
114
+ submit = b
115
+ break
116
+ if submit is None and page.buttons:
117
+ submit = page.buttons[0]
118
+ if submit is None:
119
+ return form
120
+
121
+ form.username = build_locator(username, ignore_testids=ignore_testids)
122
+ form.password = build_locator(password, ignore_testids=ignore_testids)
123
+ form.submit = build_locator(submit, ignore_testids=ignore_testids)
124
+
125
+ remember = next(
126
+ (
127
+ e
128
+ for e in page.inputs
129
+ if e.input_type == "checkbox" and any(h in _blob(e) for h in _REMEMBER_HINTS)
130
+ ),
131
+ None,
132
+ )
133
+ if remember is not None:
134
+ form.remember = build_locator(remember, ignore_testids=ignore_testids)
135
+
136
+ error = next((e for e in page.inputs + page.buttons if "error" in _blob(e)), None)
137
+ if error is not None:
138
+ form.error = build_locator(error, ignore_testids=ignore_testids)
139
+ return form
140
+
141
+
142
+ def detect_page_pattern(page: PageInfo) -> str:
143
+ """Classify a crawled page into one of ``PAGE_PATTERNS``."""
144
+ text = page.visible_text_sample
145
+ if page.blank:
146
+ return "blank"
147
+ if _NOT_FOUND_RE.search(text) or _NOT_FOUND_RE.search(page.title):
148
+ return "not_found"
149
+ if _FORBIDDEN_RE.search(text):
150
+ return "forbidden"
151
+ if _ERROR_TEXT_RE.search(text):
152
+ return "error"
153
+ if any(e.input_type == "password" for e in page.inputs):
154
+ return "login"
155
+ if page.has_modal:
156
+ return "modal"
157
+ if page.has_table:
158
+ return "list"
159
+ if _EMPTY_RE.search(text):
160
+ return "empty"
161
+ form_inputs = [e for e in page.inputs if e.input_type not in ("checkbox", "radio", "hidden")]
162
+ if len(form_inputs) >= 2 and page.buttons:
163
+ return "form"
164
+ route = page.route.lower()
165
+ if any(k in route for k in ("dashboard", "home", "overview")) or "dashboard" in text.lower():
166
+ return "dashboard"
167
+ if re.search(r"/\d+$|/[0-9a-f-]{8,}$", page.route):
168
+ return "detail"
169
+ return "unknown"