web-tester-for-claude 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +651 -0
- package/bin/web-tester.js +35 -0
- package/package.json +64 -0
- package/src/browser/attrs.ts +79 -0
- package/src/browser/session.ts +139 -0
- package/src/cli.ts +1488 -0
- package/src/impact.ts +165 -0
- package/src/init.ts +260 -0
- package/src/inspector/capture.ts +293 -0
- package/src/inspector/deep.ts +147 -0
- package/src/inspector/packs.ts +98 -0
- package/src/inspector/report.ts +667 -0
- package/src/inspector/run.ts +544 -0
- package/src/inspector/steps.ts +380 -0
- package/src/inspector/summarise.ts +178 -0
- package/src/inspector/verdict.ts +275 -0
- package/src/journeys.ts +78 -0
- package/src/kb.ts +84 -0
- package/src/map/classify.ts +149 -0
- package/src/map/crawl.ts +394 -0
- package/src/map/generate.ts +253 -0
- package/src/map/report.ts +112 -0
- package/src/map/run.ts +219 -0
- package/src/sitemap.ts +75 -0
- package/src/sweep.ts +476 -0
- package/src/templates/agent-section.md +77 -0
- package/src/templates/dot-web-tester/impact-rules.json +36 -0
- package/src/templates/dot-web-tester/instructions/getting-started.md +62 -0
- package/src/templates/dot-web-tester/instructions/recipes.md +105 -0
- package/src/templates/dot-web-tester/journeys/example-signup.json +17 -0
- package/src/templates/dot-web-tester/urls-smoke.txt +19 -0
- package/src/templates/skill.md +59 -0
- package/src/util/log.ts +26 -0
- package/src/util/paths.ts +141 -0
- package/src/util/prompt.ts +50 -0
- package/tsconfig.json +14 -0
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
import type { Page } from "playwright";
|
|
2
|
+
import type {
|
|
3
|
+
ConsoleEntry,
|
|
4
|
+
NetworkEntry,
|
|
5
|
+
PageErrorEntry
|
|
6
|
+
} from "./capture";
|
|
7
|
+
|
|
8
|
+
export type FailOnKind =
|
|
9
|
+
| "page-errors"
|
|
10
|
+
| "console-errors"
|
|
11
|
+
| "http-4xx"
|
|
12
|
+
| "http-5xx";
|
|
13
|
+
|
|
14
|
+
const FAIL_ON_ALIASES: Record<string, FailOnKind> = {
|
|
15
|
+
"page-errors": "page-errors",
|
|
16
|
+
"console-errors": "console-errors",
|
|
17
|
+
"http-4xx": "http-4xx",
|
|
18
|
+
"http-5xx": "http-5xx",
|
|
19
|
+
"4xx": "http-4xx",
|
|
20
|
+
"5xx": "http-5xx"
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
export function parseFailOn(raw: string): FailOnKind[] {
|
|
24
|
+
const parts = raw
|
|
25
|
+
.split(",")
|
|
26
|
+
.map((s) => s.trim())
|
|
27
|
+
.filter(Boolean);
|
|
28
|
+
const out: FailOnKind[] = [];
|
|
29
|
+
for (const p of parts) {
|
|
30
|
+
const mapped = FAIL_ON_ALIASES[p];
|
|
31
|
+
if (!mapped)
|
|
32
|
+
throw new Error(
|
|
33
|
+
`--fail-on unknown kind "${p}". Known: page-errors, console-errors, 4xx, 5xx`
|
|
34
|
+
);
|
|
35
|
+
if (!out.includes(mapped)) out.push(mapped);
|
|
36
|
+
}
|
|
37
|
+
return out;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export type Expectation =
|
|
41
|
+
| { kind: "text"; text: string }
|
|
42
|
+
| { kind: "no-text"; text: string }
|
|
43
|
+
| { kind: "selector"; selector: string }
|
|
44
|
+
| { kind: "no-selector"; selector: string }
|
|
45
|
+
| { kind: "attr"; name: string; value: string };
|
|
46
|
+
|
|
47
|
+
export type ExpectationResult = {
|
|
48
|
+
expectation: Expectation;
|
|
49
|
+
ok: boolean;
|
|
50
|
+
detail?: string;
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Parse `--expect <kind>=<value>` shorthand.
|
|
55
|
+
*
|
|
56
|
+
* text=Welcome → page must contain text "Welcome"
|
|
57
|
+
* no-text=Error → page must NOT contain text "Error"
|
|
58
|
+
* selector=button[type=submit] → element must be visible
|
|
59
|
+
* no-selector=.error-banner → element must not be visible
|
|
60
|
+
* attr=Quantity:1000 → data-attr-name="Quantity" must have value or label "1000"
|
|
61
|
+
*/
|
|
62
|
+
export function parseExpectation(raw: string): Expectation {
|
|
63
|
+
const trimmed = raw.trim();
|
|
64
|
+
if (!trimmed) throw new Error("empty --expect");
|
|
65
|
+
const eq = trimmed.indexOf("=");
|
|
66
|
+
if (eq === -1)
|
|
67
|
+
throw new Error(
|
|
68
|
+
`--expect needs <kind>=<value> (got "${raw}"). Kinds: text, no-text, selector, no-selector, attr`
|
|
69
|
+
);
|
|
70
|
+
const kind = trimmed.slice(0, eq);
|
|
71
|
+
const value = trimmed.slice(eq + 1);
|
|
72
|
+
switch (kind) {
|
|
73
|
+
case "text":
|
|
74
|
+
if (!value) throw new Error("--expect text= needs text");
|
|
75
|
+
return { kind: "text", text: value };
|
|
76
|
+
case "no-text":
|
|
77
|
+
if (!value) throw new Error("--expect no-text= needs text");
|
|
78
|
+
return { kind: "no-text", text: value };
|
|
79
|
+
case "selector":
|
|
80
|
+
if (!value) throw new Error("--expect selector= needs a selector");
|
|
81
|
+
return { kind: "selector", selector: value };
|
|
82
|
+
case "no-selector":
|
|
83
|
+
if (!value) throw new Error("--expect no-selector= needs a selector");
|
|
84
|
+
return { kind: "no-selector", selector: value };
|
|
85
|
+
case "attr": {
|
|
86
|
+
const colon = value.indexOf(":");
|
|
87
|
+
if (colon === -1)
|
|
88
|
+
throw new Error(
|
|
89
|
+
"--expect attr= needs <name>:<value> (e.g. attr=Quantity:1000)"
|
|
90
|
+
);
|
|
91
|
+
return {
|
|
92
|
+
kind: "attr",
|
|
93
|
+
name: value.slice(0, colon),
|
|
94
|
+
value: value.slice(colon + 1)
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
default:
|
|
98
|
+
throw new Error(
|
|
99
|
+
`--expect unknown kind "${kind}" in "${raw}". Kinds: text, no-text, selector, no-selector, attr`
|
|
100
|
+
);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* How long to wait for a positive assertion (text/selector visible, attr
|
|
106
|
+
* present) before deciding it failed. 5s leaves room for an element that
|
|
107
|
+
* renders a few seconds after a client-side route transition + hydration;
|
|
108
|
+
* tighter values flake on prod-like latency.
|
|
109
|
+
*/
|
|
110
|
+
const EXPECT_TIMEOUT_MS = 5_000;
|
|
111
|
+
|
|
112
|
+
export async function evaluateExpectations(
|
|
113
|
+
page: Page,
|
|
114
|
+
expectations: Expectation[]
|
|
115
|
+
): Promise<ExpectationResult[]> {
|
|
116
|
+
const results: ExpectationResult[] = [];
|
|
117
|
+
for (const e of expectations) {
|
|
118
|
+
results.push(await evaluateOne(page, e));
|
|
119
|
+
}
|
|
120
|
+
return results;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
async function evaluateOne(
|
|
124
|
+
page: Page,
|
|
125
|
+
e: Expectation
|
|
126
|
+
): Promise<ExpectationResult> {
|
|
127
|
+
try {
|
|
128
|
+
if (e.kind === "text") {
|
|
129
|
+
const found = await page
|
|
130
|
+
.getByText(e.text)
|
|
131
|
+
.first()
|
|
132
|
+
.waitFor({ state: "visible", timeout: EXPECT_TIMEOUT_MS })
|
|
133
|
+
.then(() => true)
|
|
134
|
+
.catch(() => false);
|
|
135
|
+
return {
|
|
136
|
+
expectation: e,
|
|
137
|
+
ok: found,
|
|
138
|
+
...(found ? {} : { detail: `text "${e.text}" not visible on final page` })
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
if (e.kind === "no-text") {
|
|
142
|
+
// Slightly shorter timeout — we expect this NOT to appear.
|
|
143
|
+
const found = await page
|
|
144
|
+
.getByText(e.text)
|
|
145
|
+
.first()
|
|
146
|
+
.waitFor({ state: "visible", timeout: 1_500 })
|
|
147
|
+
.then(() => true)
|
|
148
|
+
.catch(() => false);
|
|
149
|
+
return {
|
|
150
|
+
expectation: e,
|
|
151
|
+
ok: !found,
|
|
152
|
+
...(found
|
|
153
|
+
? { detail: `text "${e.text}" was visible (should not be)` }
|
|
154
|
+
: {})
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
if (e.kind === "selector") {
|
|
158
|
+
const found = await page
|
|
159
|
+
.locator(e.selector)
|
|
160
|
+
.first()
|
|
161
|
+
.waitFor({ state: "visible", timeout: EXPECT_TIMEOUT_MS })
|
|
162
|
+
.then(() => true)
|
|
163
|
+
.catch(() => false);
|
|
164
|
+
return {
|
|
165
|
+
expectation: e,
|
|
166
|
+
ok: found,
|
|
167
|
+
...(found ? {} : { detail: `selector "${e.selector}" not visible` })
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
if (e.kind === "no-selector") {
|
|
171
|
+
const found = await page
|
|
172
|
+
.locator(e.selector)
|
|
173
|
+
.first()
|
|
174
|
+
.waitFor({ state: "visible", timeout: 1_500 })
|
|
175
|
+
.then(() => true)
|
|
176
|
+
.catch(() => false);
|
|
177
|
+
return {
|
|
178
|
+
expectation: e,
|
|
179
|
+
ok: !found,
|
|
180
|
+
...(found
|
|
181
|
+
? { detail: `selector "${e.selector}" was visible (should not be)` }
|
|
182
|
+
: {})
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
// attr
|
|
186
|
+
const snapshot = await page.evaluate((name) => {
|
|
187
|
+
const el = document.querySelector(`[data-attr-name="${name}"]`);
|
|
188
|
+
if (!el) return null;
|
|
189
|
+
return {
|
|
190
|
+
value: el.getAttribute("data-attr-selected-value") ?? "",
|
|
191
|
+
label: el.getAttribute("data-attr-selected-label") ?? ""
|
|
192
|
+
};
|
|
193
|
+
}, e.name);
|
|
194
|
+
if (snapshot === null) {
|
|
195
|
+
return {
|
|
196
|
+
expectation: e,
|
|
197
|
+
ok: false,
|
|
198
|
+
detail: `no data-attr-name="${e.name}" found — page may need instrumentation`
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
const matches = snapshot.value === e.value || snapshot.label === e.value;
|
|
202
|
+
return {
|
|
203
|
+
expectation: e,
|
|
204
|
+
ok: matches,
|
|
205
|
+
...(matches
|
|
206
|
+
? {}
|
|
207
|
+
: {
|
|
208
|
+
detail: `attr "${e.name}" was value="${snapshot.value}" label="${snapshot.label}", expected "${e.value}"`
|
|
209
|
+
})
|
|
210
|
+
};
|
|
211
|
+
} catch (err) {
|
|
212
|
+
return {
|
|
213
|
+
expectation: e,
|
|
214
|
+
ok: false,
|
|
215
|
+
detail: err instanceof Error ? err.message : String(err)
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
export type VerdictInputs = {
|
|
221
|
+
failedSteps: number;
|
|
222
|
+
pageErrors: PageErrorEntry[];
|
|
223
|
+
consoleEntries: ConsoleEntry[];
|
|
224
|
+
networkEntries: NetworkEntry[];
|
|
225
|
+
expectations: ExpectationResult[];
|
|
226
|
+
failOn: FailOnKind[];
|
|
227
|
+
};
|
|
228
|
+
|
|
229
|
+
export type Verdict = {
|
|
230
|
+
ok: boolean;
|
|
231
|
+
triggers: string[];
|
|
232
|
+
};
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Compute the final pass/fail verdict. `ok` is true iff:
|
|
236
|
+
* - every step executed (failedSteps === 0)
|
|
237
|
+
* - every expectation passed
|
|
238
|
+
* - no signal from --fail-on tripped
|
|
239
|
+
*
|
|
240
|
+
* `triggers` is a list of one-line reasons the verdict failed, suitable for
|
|
241
|
+
* surfacing in the CLI summary and HTML report. Empty when ok.
|
|
242
|
+
*/
|
|
243
|
+
export function computeVerdict(inputs: VerdictInputs): Verdict {
|
|
244
|
+
const triggers: string[] = [];
|
|
245
|
+
if (inputs.failedSteps > 0)
|
|
246
|
+
triggers.push(`${inputs.failedSteps} step(s) failed`);
|
|
247
|
+
|
|
248
|
+
if (inputs.failOn.includes("page-errors") && inputs.pageErrors.length > 0)
|
|
249
|
+
triggers.push(`${inputs.pageErrors.length} uncaught page error(s)`);
|
|
250
|
+
|
|
251
|
+
if (inputs.failOn.includes("console-errors")) {
|
|
252
|
+
const n = inputs.consoleEntries.filter((e) => e.type === "error").length;
|
|
253
|
+
if (n > 0) triggers.push(`${n} console error(s)`);
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
if (inputs.failOn.includes("http-4xx")) {
|
|
257
|
+
const n = inputs.networkEntries.filter(
|
|
258
|
+
(e) => e.status !== null && e.status >= 400 && e.status < 500
|
|
259
|
+
).length;
|
|
260
|
+
if (n > 0) triggers.push(`${n} 4xx response(s)`);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
if (inputs.failOn.includes("http-5xx")) {
|
|
264
|
+
const n = inputs.networkEntries.filter(
|
|
265
|
+
(e) => e.status !== null && e.status >= 500
|
|
266
|
+
).length;
|
|
267
|
+
if (n > 0) triggers.push(`${n} 5xx response(s)`);
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
const failedExp = inputs.expectations.filter((r) => !r.ok);
|
|
271
|
+
if (failedExp.length > 0)
|
|
272
|
+
triggers.push(`${failedExp.length} expectation(s) failed`);
|
|
273
|
+
|
|
274
|
+
return { ok: triggers.length === 0, triggers };
|
|
275
|
+
}
|
package/src/journeys.ts
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { existsSync, mkdirSync, readdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { resolve } from "node:path";
|
|
3
|
+
import { userJourneysDir } from "./util/paths";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* A "journey" bundles a URL + step chain + assertions into a single named
|
|
7
|
+
* recipe — the same shape as a manual `inspect` invocation, but persisted
|
|
8
|
+
* to disk so it can be invoked by name (and by the `impact` command). New
|
|
9
|
+
* journeys go in `.web-tester/journeys/<name>.json` at your project root.
|
|
10
|
+
*/
|
|
11
|
+
export type Journey = {
|
|
12
|
+
/** Optional human description, for the CLI listing. */
|
|
13
|
+
description?: string;
|
|
14
|
+
/** Path or absolute URL; resolved against WEB_TESTER_BASE_URL if relative. */
|
|
15
|
+
url: string;
|
|
16
|
+
/** Step strings in the same grammar as `--step` (parsed with `parseStep`). */
|
|
17
|
+
steps: string[];
|
|
18
|
+
/** Expectation strings in `--expect` syntax (parsed with `parseExpectation`). */
|
|
19
|
+
expectations?: string[];
|
|
20
|
+
/** Fail-on signals (`page-errors,4xx,5xx,console-errors`). Default 5xx only. */
|
|
21
|
+
failOn?: string;
|
|
22
|
+
/** Persist-check window in ms; 0 = single check. */
|
|
23
|
+
persistMs?: number;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
export function loadJourney(name: string): Journey {
|
|
27
|
+
const dir = userJourneysDir();
|
|
28
|
+
const path = resolve(dir, `${name}.json`);
|
|
29
|
+
if (!existsSync(path))
|
|
30
|
+
throw new Error(
|
|
31
|
+
`unknown journey "${name}". Looked for ${path}. Known: ${listJourneyNames().join(", ") || "(none — add .json files to .web-tester/journeys/)"}`
|
|
32
|
+
);
|
|
33
|
+
const raw = readFileSync(path, "utf-8");
|
|
34
|
+
const parsed = JSON.parse(raw) as Journey;
|
|
35
|
+
if (!parsed.url || !Array.isArray(parsed.steps))
|
|
36
|
+
throw new Error(`journey "${name}" is missing required "url" or "steps"`);
|
|
37
|
+
return parsed;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Persist a journey to `.web-tester/journeys/<name>.json` — the plain-text
|
|
42
|
+
* "recipe" a rerun replays. Stores only the flow (url + steps + assertions),
|
|
43
|
+
* never run artifacts. The name is slugified so it's safe as a filename.
|
|
44
|
+
* Returns the absolute path written.
|
|
45
|
+
*/
|
|
46
|
+
export function saveJourney(name: string, journey: Journey): string {
|
|
47
|
+
const slug = name
|
|
48
|
+
.replace(/\.json$/i, "")
|
|
49
|
+
.replace(/[^a-z0-9_-]+/gi, "-")
|
|
50
|
+
.replace(/^-+|-+$/g, "")
|
|
51
|
+
.toLowerCase();
|
|
52
|
+
if (!slug) throw new Error(`invalid journey name: "${name}"`);
|
|
53
|
+
const dir = userJourneysDir();
|
|
54
|
+
mkdirSync(dir, { recursive: true });
|
|
55
|
+
const path = resolve(dir, `${slug}.json`);
|
|
56
|
+
writeFileSync(path, `${JSON.stringify(journey, null, 2)}\n`);
|
|
57
|
+
return path;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export function listJourneyNames(): string[] {
|
|
61
|
+
const dir = userJourneysDir();
|
|
62
|
+
if (!existsSync(dir)) return [];
|
|
63
|
+
return readdirSync(dir)
|
|
64
|
+
.filter((f) => f.endsWith(".json"))
|
|
65
|
+
.map((f) => f.slice(0, -".json".length))
|
|
66
|
+
.sort();
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export function listJourneys(): Array<{ name: string; description: string }> {
|
|
70
|
+
return listJourneyNames().map((name) => {
|
|
71
|
+
try {
|
|
72
|
+
const j = loadJourney(name);
|
|
73
|
+
return { name, description: j.description ?? "" };
|
|
74
|
+
} catch {
|
|
75
|
+
return { name, description: "(failed to load)" };
|
|
76
|
+
}
|
|
77
|
+
});
|
|
78
|
+
}
|
package/src/kb.ts
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import { existsSync, readFileSync, readdirSync } from "node:fs";
|
|
2
|
+
import { resolve } from "node:path";
|
|
3
|
+
import { userKnowledgeDirs } from "./util/paths";
|
|
4
|
+
|
|
5
|
+
export type KnowledgeFile = {
|
|
6
|
+
/** Slug derived from filename (no `.md`). */
|
|
7
|
+
topic: string;
|
|
8
|
+
/** First markdown H1 in the file, or filename if absent. */
|
|
9
|
+
title: string;
|
|
10
|
+
/** Absolute path on disk. */
|
|
11
|
+
path: string;
|
|
12
|
+
/** Tags / id parsed out of any YAML-ish frontmatter block. */
|
|
13
|
+
meta: Record<string, string | string[] | boolean>;
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
function parseSimpleFrontmatter(
|
|
17
|
+
raw: string
|
|
18
|
+
): { meta: Record<string, string | string[] | boolean>; body: string } {
|
|
19
|
+
const match = raw.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
|
|
20
|
+
if (!match || match[1] === undefined || match[2] === undefined) {
|
|
21
|
+
return { meta: {}, body: raw };
|
|
22
|
+
}
|
|
23
|
+
const meta: Record<string, string | string[] | boolean> = {};
|
|
24
|
+
for (const line of match[1].split("\n")) {
|
|
25
|
+
const kv = line.match(/^([a-zA-Z][a-zA-Z0-9_]*):\s*(.*)$/);
|
|
26
|
+
if (!kv) continue;
|
|
27
|
+
const key = kv[1] ?? "";
|
|
28
|
+
const value = (kv[2] ?? "").trim();
|
|
29
|
+
if (value === "") continue;
|
|
30
|
+
if (value === "true") meta[key] = true;
|
|
31
|
+
else if (value === "false") meta[key] = false;
|
|
32
|
+
else if (/^\[.*\]$/.test(value)) {
|
|
33
|
+
meta[key] = value
|
|
34
|
+
.slice(1, -1)
|
|
35
|
+
.split(",")
|
|
36
|
+
.map((s) => s.trim())
|
|
37
|
+
.filter(Boolean);
|
|
38
|
+
} else meta[key] = value;
|
|
39
|
+
}
|
|
40
|
+
return { meta, body: match[2] };
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function firstH1(body: string): string | null {
|
|
44
|
+
const line = body.split("\n").find((l) => l.startsWith("# "));
|
|
45
|
+
return line ? line.replace(/^#\s+/, "").trim() : null;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Locate the directory that holds the user's KB markdown. Walks the
|
|
50
|
+
* candidate locations returned by `userKnowledgeDirs()` in order; first
|
|
51
|
+
* match wins. Returns null if none of the candidates exist.
|
|
52
|
+
*/
|
|
53
|
+
function resolveKnowledgeDir(): string | null {
|
|
54
|
+
for (const dir of userKnowledgeDirs()) {
|
|
55
|
+
if (existsSync(dir)) return dir;
|
|
56
|
+
}
|
|
57
|
+
return null;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export function listKnowledge(): KnowledgeFile[] {
|
|
61
|
+
const dir = resolveKnowledgeDir();
|
|
62
|
+
if (!dir) return [];
|
|
63
|
+
const files = readdirSync(dir)
|
|
64
|
+
.filter((f) => f.endsWith(".md") && f !== "README.md")
|
|
65
|
+
.sort();
|
|
66
|
+
return files.map((f) => {
|
|
67
|
+
const path = resolve(dir, f);
|
|
68
|
+
const raw = readFileSync(path, "utf-8");
|
|
69
|
+
const { meta, body } = parseSimpleFrontmatter(raw);
|
|
70
|
+
const titleMeta = typeof meta.title === "string" ? meta.title : null;
|
|
71
|
+
const title = titleMeta ?? firstH1(body) ?? f;
|
|
72
|
+
return { topic: f.replace(/\.md$/, ""), title, path, meta };
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export function readKnowledge(topic: string): KnowledgeFile & { contents: string } {
|
|
77
|
+
const all = listKnowledge();
|
|
78
|
+
const found = all.find((k) => k.topic === topic);
|
|
79
|
+
if (!found) {
|
|
80
|
+
const known = all.map((k) => k.topic).join(", ") || "(no .md files found in .web-tester/)";
|
|
81
|
+
throw new Error(`knowledge topic "${topic}" not found. Known: ${known}`);
|
|
82
|
+
}
|
|
83
|
+
return { ...found, contents: readFileSync(found.path, "utf-8") };
|
|
84
|
+
}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import type { PageFacts } from "./crawl";
|
|
2
|
+
|
|
3
|
+
export type PageType =
|
|
4
|
+
| "home"
|
|
5
|
+
| "auth"
|
|
6
|
+
| "search"
|
|
7
|
+
| "form"
|
|
8
|
+
| "list"
|
|
9
|
+
| "detail"
|
|
10
|
+
| "content"
|
|
11
|
+
| "error";
|
|
12
|
+
|
|
13
|
+
export type ClassifiedPage = PageFacts & {
|
|
14
|
+
type: PageType;
|
|
15
|
+
template: string;
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
export type RouteGroup = {
|
|
19
|
+
template: string;
|
|
20
|
+
type: PageType;
|
|
21
|
+
count: number;
|
|
22
|
+
/** Best representative path (prefers a healthy page). */
|
|
23
|
+
representative: ClassifiedPage;
|
|
24
|
+
members: ClassifiedPage[];
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
28
|
+
const HEX_RE = /^[0-9a-f]{12,}$/i;
|
|
29
|
+
|
|
30
|
+
/** Does a path segment look like a dynamic id/slug rather than a fixed route? */
|
|
31
|
+
function isDynamicSegment(seg: string): boolean {
|
|
32
|
+
if (!seg) return false;
|
|
33
|
+
if (/^\d+$/.test(seg)) return true; // numeric id
|
|
34
|
+
if (UUID_RE.test(seg)) return true;
|
|
35
|
+
if (HEX_RE.test(seg)) return true;
|
|
36
|
+
// Slug ending in a numeric id, e.g. "blue-widget-12345".
|
|
37
|
+
if (/-\d{3,}$/.test(seg)) return true;
|
|
38
|
+
// A long run of digits strongly implies an id (e.g. "sku12345",
|
|
39
|
+
// "order2024001"). Deliberately conservative: version-y segments like
|
|
40
|
+
// "v2beta" or "apiv2docs" have no 4+ digit run, so they stay fixed routes —
|
|
41
|
+
// over-collapsing distinct routes into one template is worse than the
|
|
42
|
+
// reverse for a site map.
|
|
43
|
+
if (/\d{4,}/.test(seg)) return true;
|
|
44
|
+
return false;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Collapse a concrete path into a route template by replacing dynamic
|
|
49
|
+
* segments with `:id`. Query strings are dropped. `/products/12345?ref=x`
|
|
50
|
+
* and `/products/67890` both become `/products/:id`.
|
|
51
|
+
*/
|
|
52
|
+
export function routeTemplate(path: string): string {
|
|
53
|
+
const [pathname = "/"] = path.split("?");
|
|
54
|
+
const segments = pathname.split("/").filter(Boolean);
|
|
55
|
+
if (segments.length === 0) return "/";
|
|
56
|
+
const mapped = segments.map((s) => (isDynamicSegment(s) ? ":id" : s));
|
|
57
|
+
return `/${mapped.join("/")}`;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const AUTH_RE =
|
|
61
|
+
/(^|\/)(login|log-in|signin|sign-in|signup|sign-up|register|auth|forgot|password|reset)(\/|$)/i;
|
|
62
|
+
|
|
63
|
+
export function classify(facts: PageFacts): PageType {
|
|
64
|
+
if (!facts.ok) return "error";
|
|
65
|
+
|
|
66
|
+
const path = facts.finalPath || facts.path;
|
|
67
|
+
const template = routeTemplate(path);
|
|
68
|
+
|
|
69
|
+
if (path === "/" || template === "/") return "home";
|
|
70
|
+
|
|
71
|
+
if (facts.passwordFields > 0 || AUTH_RE.test(path)) return "auth";
|
|
72
|
+
|
|
73
|
+
if (
|
|
74
|
+
facts.searchInputs > 0 ||
|
|
75
|
+
/(^|\/)search(\/|$)/i.test(path) ||
|
|
76
|
+
/[?&](q|query|s|search)=/.test(path)
|
|
77
|
+
)
|
|
78
|
+
return "search";
|
|
79
|
+
|
|
80
|
+
// A list/index page: several catalog-style card links, or many internal
|
|
81
|
+
// links concentrated in <main>.
|
|
82
|
+
if (facts.cardLinkCount >= 3) return "list";
|
|
83
|
+
|
|
84
|
+
// A meaningful form (more than a newsletter single-field) that isn't auth.
|
|
85
|
+
const richForm = facts.forms.some(
|
|
86
|
+
(f) => f.fields.filter((x) => x.type !== "hidden").length >= 2
|
|
87
|
+
);
|
|
88
|
+
if (richForm) return "form";
|
|
89
|
+
|
|
90
|
+
// Dynamic route → a detail/show page.
|
|
91
|
+
if (template !== path && template.includes(":id")) return "detail";
|
|
92
|
+
|
|
93
|
+
return "content";
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export function classifyAll(pages: PageFacts[]): ClassifiedPage[] {
|
|
97
|
+
return pages.map((p) => ({
|
|
98
|
+
...p,
|
|
99
|
+
type: classify(p),
|
|
100
|
+
template: routeTemplate(p.finalPath || p.path)
|
|
101
|
+
}));
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/** Group classified pages by route template, picking a healthy representative. */
|
|
105
|
+
export function groupByTemplate(pages: ClassifiedPage[]): RouteGroup[] {
|
|
106
|
+
const groups = new Map<string, ClassifiedPage[]>();
|
|
107
|
+
for (const p of pages) {
|
|
108
|
+
const list = groups.get(p.template);
|
|
109
|
+
if (list) list.push(p);
|
|
110
|
+
else groups.set(p.template, [p]);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const result: RouteGroup[] = [];
|
|
114
|
+
for (const [template, members] of groups) {
|
|
115
|
+
// Prefer an ok page with the shortest path as the representative.
|
|
116
|
+
const sorted = [...members].sort((a, b) => {
|
|
117
|
+
if (a.ok !== b.ok) return a.ok ? -1 : 1;
|
|
118
|
+
return a.path.length - b.path.length;
|
|
119
|
+
});
|
|
120
|
+
const representative = sorted[0]!;
|
|
121
|
+
// The group's type is its representative's type, unless that's an error
|
|
122
|
+
// but a healthy member exists.
|
|
123
|
+
result.push({
|
|
124
|
+
template,
|
|
125
|
+
type: representative.type,
|
|
126
|
+
count: members.length,
|
|
127
|
+
representative,
|
|
128
|
+
members
|
|
129
|
+
});
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Stable, useful ordering: home first, then by type, then alphabetically.
|
|
133
|
+
const typeRank: Record<PageType, number> = {
|
|
134
|
+
home: 0,
|
|
135
|
+
list: 1,
|
|
136
|
+
detail: 2,
|
|
137
|
+
form: 3,
|
|
138
|
+
auth: 4,
|
|
139
|
+
search: 5,
|
|
140
|
+
content: 6,
|
|
141
|
+
error: 7
|
|
142
|
+
};
|
|
143
|
+
result.sort((a, b) => {
|
|
144
|
+
if (typeRank[a.type] !== typeRank[b.type])
|
|
145
|
+
return typeRank[a.type] - typeRank[b.type];
|
|
146
|
+
return a.template.localeCompare(b.template);
|
|
147
|
+
});
|
|
148
|
+
return result;
|
|
149
|
+
}
|