ax-audit 3.1.0 → 3.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +60 -0
- package/README.md +61 -225
- package/dist/checks/agent-access.d.ts +16 -0
- package/dist/checks/agent-access.d.ts.map +1 -0
- package/dist/checks/agent-access.js +110 -0
- package/dist/checks/agent-access.js.map +1 -0
- package/dist/checks/crawl-efficiency.d.ts +4 -0
- package/dist/checks/crawl-efficiency.d.ts.map +1 -0
- package/dist/checks/crawl-efficiency.js +122 -0
- package/dist/checks/crawl-efficiency.js.map +1 -0
- package/dist/checks/index.d.ts.map +1 -1
- package/dist/checks/index.js +6 -0
- package/dist/checks/index.js.map +1 -1
- package/dist/checks/robots-txt.d.ts +20 -0
- package/dist/checks/robots-txt.d.ts.map +1 -1
- package/dist/checks/robots-txt.js +111 -3
- package/dist/checks/robots-txt.js.map +1 -1
- package/dist/checks/rsl.d.ts +6 -0
- package/dist/checks/rsl.d.ts.map +1 -0
- package/dist/checks/rsl.js +252 -0
- package/dist/checks/rsl.js.map +1 -0
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +20 -2
- package/dist/cli.js.map +1 -1
- package/dist/constants.d.ts +17 -0
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +39 -1
- package/dist/constants.js.map +1 -1
- package/dist/fetcher.d.ts +5 -1
- package/dist/fetcher.d.ts.map +1 -1
- package/dist/fetcher.js +32 -27
- package/dist/fetcher.js.map +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/orchestrator.d.ts +2 -2
- package/dist/orchestrator.d.ts.map +1 -1
- package/dist/orchestrator.js +13 -6
- package/dist/orchestrator.js.map +1 -1
- package/dist/reporter/index.d.ts.map +1 -1
- package/dist/reporter/index.js +7 -0
- package/dist/reporter/index.js.map +1 -1
- package/dist/reporter/markdown.d.ts +8 -0
- package/dist/reporter/markdown.d.ts.map +1 -0
- package/dist/reporter/markdown.js +76 -0
- package/dist/reporter/markdown.js.map +1 -0
- package/dist/types.d.ts +7 -1
- package/dist/types.d.ts.map +1 -1
- package/docs/api.md +200 -0
- package/docs/architecture.md +88 -0
- package/docs/checks.md +322 -0
- package/docs/ci.md +89 -0
- package/docs/cli.md +67 -0
- package/docs/concepts.md +87 -0
- package/docs/faq.md +77 -0
- package/docs/getting-started.md +101 -0
- package/package.json +2 -1
package/dist/types.d.ts
CHANGED
|
@@ -65,6 +65,12 @@ export interface AuditOptions {
|
|
|
65
65
|
checks?: string[];
|
|
66
66
|
timeout?: number;
|
|
67
67
|
verbose?: boolean;
|
|
68
|
+
/** Retry attempts for transient fetch failures (timeouts, 5xx, network errors). Default 2. */
|
|
69
|
+
retries?: number;
|
|
70
|
+
}
|
|
71
|
+
export interface BatchOptions extends Omit<AuditOptions, 'url'> {
|
|
72
|
+
/** Maximum number of URLs audited in parallel. Default 1 (sequential). */
|
|
73
|
+
concurrency?: number;
|
|
68
74
|
}
|
|
69
75
|
export interface BatchAuditReport {
|
|
70
76
|
reports: AuditReport[];
|
|
@@ -82,7 +88,7 @@ export interface SecurityHeader {
|
|
|
82
88
|
label: string;
|
|
83
89
|
critical: boolean;
|
|
84
90
|
}
|
|
85
|
-
export type OutputFormat = 'terminal' | 'json' | 'html';
|
|
91
|
+
export type OutputFormat = 'terminal' | 'json' | 'html' | 'markdown';
|
|
86
92
|
/** Minimal snapshot of an audit run stored as the baseline file. */
|
|
87
93
|
export interface BaselineData {
|
|
88
94
|
url: string;
|
package/dist/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,aAAa,GAAG,MAAM,GAAG,MAAM,GAAG,MAAM,CAAC;AAErD,MAAM,WAAW,OAAO;IACtB,MAAM,EAAE,aAAa,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,WAAW;IAC1B,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,OAAO,EAAE,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,aAAa;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,EAAE,EAAE,OAAO,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,iDAAiD;AACjD,MAAM,WAAW,YAAY;IAC3B;;;;OAIG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,YAAY;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,KAAK,OAAO,CAAC,aAAa,CAAC,CAAC;IACvE,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACjC;AAED,MAAM,WAAW,WAAW;IAC1B,GAAG,EAAE,CAAC,GAAG,EAAE,YAAY,KAAK,OAAO,CAAC,WAAW,CAAC,CAAC;IACjD,IAAI,EAAE,SAAS,CAAC;CACjB;AAED,MAAM,WAAW,KAAK;IACpB,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,WAAW;IAC1B,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,KAAK,CAAC;IACb,OAAO,EAAE,WAAW,EAAE,CAAC;IACvB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,YAAY;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,OAAO,CAAC;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,aAAa,GAAG,MAAM,GAAG,MAAM,GAAG,MAAM,CAAC;AAErD,MAAM,WAAW,OAAO;IACtB,MAAM,EAAE,aAAa,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,WAAW;IAC1B,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,OAAO,EAAE,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,aAAa;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,EAAE,EAAE,OAAO,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,iDAAiD;AACjD,MAAM,WAAW,YAAY;IAC3B;;;;OAIG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,YAAY;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,KAAK,OAAO,CAAC,aAAa,CAAC,CAAC;IACvE,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACjC;AAED,MAAM,WAAW,WAAW;IAC1B,GAAG,EAAE,CAAC,GAAG,EAAE,YAAY,KAAK,OAAO,CAAC,WAAW,CAAC,CAAC;IACjD,IAAI,EAAE,SAAS,CAAC;CACjB;AAED,MAAM,WAAW,KAAK;IACpB,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,WAAW;IAC1B,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,KAAK,CAAC;IACb,OAAO,EAAE,WAAW,EAAE,CAAC;IACvB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,YAAY;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,8FAA8F;IAC9F,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,YAAa,SAAQ,IAAI,CAAC,YAAY,EAAE,KAAK,CAAC;IAC7D,0EAA0E;IAC1E,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,WAAW,EAAE,CAAC;IACvB,OAAO,EAAE;QACP,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,EAAE,MAAM,CAAC;QACf,MAAM,EAAE,MAAM,CAAC;QACf,YAAY,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,KAAK,CAAC;KACd,CAAC;IACF,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,MAAM,YAAY,GAAG,UAAU,GAAG,MAAM,GAAG,MAAM,GAAG,UAAU,CAAC;AAIrE,oEAAoE;AACpE,MAAM,WAAW,YAAY;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAChC;AAED,6BAA6B;AAC7B,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,iEAAiE;AACjE,MAAM,WAAW,YAAY;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,iBAAiB,EAAE,MAAM,CAAC;IAC1B,gBAAgB,EAAE,MAAM,CAAC;IACzB,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,SAAS,EAAE,CAAC;IACpB,WAAW,EAAE,SAAS,EAAE,CAAC;IACzB,YAAY,EAAE,SAAS,EAAE,CAAC;CAC3B"}
|
package/docs/api.md
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
# Programmatic API
|
|
2
|
+
|
|
3
|
+
Full TypeScript support; every public type is exported from the package root.
|
|
4
|
+
|
|
5
|
+
```typescript
|
|
6
|
+
import { audit, batchAudit } from 'ax-audit';
|
|
7
|
+
import type { AuditReport, BatchAuditReport } from 'ax-audit';
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
The package is ESM-only (`"type": "module"`), targets Node 18+ (built-in `fetch`), and ships type declarations at `dist/index.d.ts`.
|
|
11
|
+
|
|
12
|
+
## `audit(options): Promise<AuditReport>`
|
|
13
|
+
|
|
14
|
+
Runs all (or selected) checks against one URL.
|
|
15
|
+
|
|
16
|
+
```typescript
|
|
17
|
+
function audit(options: AuditOptions): Promise<AuditReport>;
|
|
18
|
+
|
|
19
|
+
interface AuditOptions {
|
|
20
|
+
url: string; // required, fully qualified (scheme included)
|
|
21
|
+
checks?: string[]; // default: all. Unknown IDs are silently ignored here
|
|
22
|
+
// (the CLI validates them; the API does not)
|
|
23
|
+
timeout?: number; // ms per request, default 10000
|
|
24
|
+
retries?: number; // transient-failure retries, default 2
|
|
25
|
+
verbose?: boolean; // log to stderr, default false
|
|
26
|
+
}
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Behavior:
|
|
30
|
+
|
|
31
|
+
- Checks execute in parallel via `Promise.allSettled`. A check that **throws** becomes a score-0 `CheckResult` whose findings contain the error — `audit` itself never rejects for a check failure.
|
|
32
|
+
- All HTTP requests in a run share an in-memory cache keyed on URL + normalized request headers (`Vary`-aware).
|
|
33
|
+
- The homepage is fetched once and passed to every check as `ctx.html` / `ctx.headers`.
|
|
34
|
+
|
|
35
|
+
```typescript
|
|
36
|
+
const report = await audit({ url: 'https://example.com' });
|
|
37
|
+
report.overallScore; // number 0–100, weighted
|
|
38
|
+
report.grade; // { min, label, color }
|
|
39
|
+
report.results; // CheckResult[]
|
|
40
|
+
report.duration; // ms
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
`audit` **rejects** only for an unrecoverable setup error (e.g. an invalid `url` that can't be parsed). Network failure on the homepage fetch does not reject — it surfaces as failing checks.
|
|
44
|
+
|
|
45
|
+
## `batchAudit(urls, options?): Promise<BatchAuditReport>`
|
|
46
|
+
|
|
47
|
+
```typescript
|
|
48
|
+
function batchAudit(urls: string[], options?: BatchOptions): Promise<BatchAuditReport>;
|
|
49
|
+
|
|
50
|
+
interface BatchOptions extends Omit<AuditOptions, 'url'> {
|
|
51
|
+
concurrency?: number; // max parallel audits, default 1 (sequential)
|
|
52
|
+
}
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Report order always matches input order, regardless of `concurrency`. `concurrency < 1` is treated as 1.
|
|
56
|
+
|
|
57
|
+
```typescript
|
|
58
|
+
const batch = await batchAudit(urls, { concurrency: 4, retries: 2 });
|
|
59
|
+
batch.reports; // AuditReport[], input order
|
|
60
|
+
batch.summary.total; // number
|
|
61
|
+
batch.summary.passed; // count scoring >= 70
|
|
62
|
+
batch.summary.failed; // count scoring < 70
|
|
63
|
+
batch.summary.averageScore;
|
|
64
|
+
batch.summary.grade; // Grade for the average
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Result shapes
|
|
68
|
+
|
|
69
|
+
```typescript
|
|
70
|
+
interface AuditReport {
|
|
71
|
+
url: string;
|
|
72
|
+
timestamp: string; // ISO 8601
|
|
73
|
+
overallScore: number; // 0–100
|
|
74
|
+
grade: Grade;
|
|
75
|
+
results: CheckResult[];
|
|
76
|
+
duration: number; // ms
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
interface CheckResult {
|
|
80
|
+
id: string; // e.g. 'llms-txt'
|
|
81
|
+
name: string; // e.g. 'LLMs.txt'
|
|
82
|
+
description: string;
|
|
83
|
+
score: number; // 0–100, clamped
|
|
84
|
+
findings: Finding[];
|
|
85
|
+
duration: number; // ms
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
interface Finding {
|
|
89
|
+
status: 'pass' | 'warn' | 'fail';
|
|
90
|
+
message: string;
|
|
91
|
+
detail?: string;
|
|
92
|
+
hint?: string; // remediation advice (warn/fail)
|
|
93
|
+
learnMoreUrl?: string; // link to the matching remediation guide
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
interface Grade { min: number; label: string; color: string; }
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## Scoring
|
|
100
|
+
|
|
101
|
+
```typescript
|
|
102
|
+
function calculateOverallScore(results: CheckResult[], metas: CheckMeta[]): number;
|
|
103
|
+
function getGrade(score: number): Grade;
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
`calculateOverallScore` computes the weighted average; it falls back to a plain average when all selected checks have weight 0, and returns 0 for empty input. `getGrade` maps a score to its `Grade`.
|
|
107
|
+
|
|
108
|
+
## Baselines
|
|
109
|
+
|
|
110
|
+
```typescript
|
|
111
|
+
function toBaselineData(report: AuditReport): BaselineData;
|
|
112
|
+
function saveBaseline(path: string, report: AuditReport): void; // writes JSON
|
|
113
|
+
function loadBaseline(path: string): BaselineData; // throws on missing/invalid file
|
|
114
|
+
function diffBaseline(baseline: BaselineData, report: AuditReport): BaselineDiff;
|
|
115
|
+
|
|
116
|
+
interface BaselineData {
|
|
117
|
+
url: string;
|
|
118
|
+
timestamp: string;
|
|
119
|
+
overallScore: number;
|
|
120
|
+
checks: Record<string, number>; // checkId → score
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
interface BaselineDiff {
|
|
124
|
+
url: string;
|
|
125
|
+
baselineTimestamp: string;
|
|
126
|
+
currentTimestamp: string;
|
|
127
|
+
overallPrevious: number;
|
|
128
|
+
overallCurrent: number;
|
|
129
|
+
overallDelta: number;
|
|
130
|
+
checks: CheckDiff[];
|
|
131
|
+
regressions: CheckDiff[]; // delta < 0
|
|
132
|
+
improvements: CheckDiff[]; // delta > 0
|
|
133
|
+
}
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
`loadBaseline` throws (does not return null) on a missing file or invalid JSON — wrap it in try/catch. Checks present now but absent from the baseline appear as new (no delta); checks removed since are ignored.
|
|
137
|
+
|
|
138
|
+
## Reporters
|
|
139
|
+
|
|
140
|
+
```typescript
|
|
141
|
+
function renderMarkdown(report: AuditReport, diff?: BaselineDiff): string;
|
|
142
|
+
function renderBatchMarkdown(batch: BatchAuditReport): string;
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
Both return a Markdown string (summary table + findings with status emoji; baseline deltas when a diff is passed). Terminal and HTML rendering are CLI-internal; for other formats, consume the `AuditReport` JSON directly.
|
|
146
|
+
|
|
147
|
+
## Checks registry
|
|
148
|
+
|
|
149
|
+
```typescript
|
|
150
|
+
import { checks } from 'ax-audit';
|
|
151
|
+
|
|
152
|
+
interface CheckModule {
|
|
153
|
+
run: (ctx: CheckContext) => Promise<CheckResult>;
|
|
154
|
+
meta: CheckMeta; // { id, name, description, weight }
|
|
155
|
+
}
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
Run an individual check with a custom context:
|
|
159
|
+
|
|
160
|
+
```typescript
|
|
161
|
+
const llms = checks.find((c) => c.meta.id === 'llms-txt')!;
|
|
162
|
+
const result = await llms.run({
|
|
163
|
+
url: 'https://example.com', // no trailing slash
|
|
164
|
+
html: homepageHtml,
|
|
165
|
+
headers: homepageHeaders, // lowercased keys
|
|
166
|
+
fetch: myFetchImpl,
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
interface CheckContext {
|
|
170
|
+
url: string;
|
|
171
|
+
html: string;
|
|
172
|
+
headers: Record<string, string>;
|
|
173
|
+
fetch: (url: string, options?: FetchOptions) => Promise<FetchResponse>;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
interface FetchOptions { headers?: Record<string, string>; }
|
|
177
|
+
|
|
178
|
+
interface FetchResponse {
|
|
179
|
+
status: number; // 0 on network error
|
|
180
|
+
headers: Record<string, string>; // lowercased keys
|
|
181
|
+
body: string;
|
|
182
|
+
ok: boolean;
|
|
183
|
+
url: string; // final URL after redirects
|
|
184
|
+
error?: string; // set when status === 0
|
|
185
|
+
}
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
`ctx.fetch` custom headers merge case-insensitively over the defaults (a custom `Accept` or `User-Agent` replaces the default), and responses are cached per URL + normalized headers — mirroring HTTP `Vary`. Your `fetch` implementation should never throw; model failures as `{ status: 0, ok: false, error }`.
|
|
189
|
+
|
|
190
|
+
## API stability
|
|
191
|
+
|
|
192
|
+
Within a major version, the exported function signatures and the `AuditReport` JSON shape are stable. Specifically:
|
|
193
|
+
|
|
194
|
+
- **Stable:** `audit`, `batchAudit`, `calculateOverallScore`, `getGrade`, the baseline functions, the reporter functions, and all exported type *shapes*.
|
|
195
|
+
- **May change in minor versions:** the *set* of checks (new checks are added), individual check `score`/`findings` content, and check `weight` values (new checks start at 0; reweighting is reserved for majors). Treat `results` as a list to iterate, not a fixed-length tuple.
|
|
196
|
+
- **Internal:** anything not exported from `src/index.ts`, including terminal/HTML reporters and individual check modules' internals.
|
|
197
|
+
|
|
198
|
+
## Exported types
|
|
199
|
+
|
|
200
|
+
`AuditOptions`, `BatchOptions`, `AuditReport`, `BatchAuditReport`, `CheckResult`, `CheckMeta`, `CheckContext`, `CheckModule`, `Finding`, `FindingStatus`, `FetchOptions`, `FetchResponse`, `Grade`, `OutputFormat`, `BaselineData`, `BaselineDiff`, `CheckDiff`.
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# Architecture
|
|
2
|
+
|
|
3
|
+
ax-audit is a dependency-light TypeScript codebase: two runtime dependencies (`chalk`, `commander`), Node 18+ built-in `fetch`, no HTTP libraries, no XML/HTML parser dependencies (regex-based primitives), and the built-in `node:test` runner.
|
|
4
|
+
|
|
5
|
+
## Pipeline
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
cli.ts ──► orchestrator.ts ──► checks/* (Promise.allSettled, parallel)
|
|
9
|
+
│ │
|
|
10
|
+
▼ ▼
|
|
11
|
+
fetcher.ts scorer.ts ──► reporter/{terminal,json,html,markdown}
|
|
12
|
+
(cache + retries) │
|
|
13
|
+
▲ baseline.ts (save / load / diff)
|
|
14
|
+
└── shared by every check via CheckContext.fetch
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
1. **cli.ts** parses and validates flags, loads the baseline if requested, and dispatches to single or batch mode.
|
|
18
|
+
2. **orchestrator.ts** (`audit`) creates one fetcher per run, fetches the homepage once, builds the `CheckContext` (`url`, `html`, `headers`, `fetch`), and runs all selected checks in parallel. A check that throws is converted into a score-0 result with the error as a finding — one bad check never kills the audit. `batchAudit` runs `audit` per URL through an order-preserving work queue with configurable `concurrency`.
|
|
19
|
+
3. **fetcher.ts** wraps `fetch` with: per-run in-memory caching keyed on URL + normalized (lowercased, sorted) custom headers — mirroring HTTP `Vary` semantics so a `text/markdown` probe never collides with the HTML fetch; case-insensitive header merging over defaults; timeouts via `AbortController`; and retries with exponential backoff for transient failures (status 0, 408, 425, 429, 5xx). Errors never throw — they become `{ status: 0, ok: false, error }` results, also cached.
|
|
20
|
+
4. **checks/** — one module per check (18). Each exports `default` (async check function) and `meta` (`{ id, name, description, weight }`).
|
|
21
|
+
5. **scorer.ts** computes the weighted average; when all selected checks have weight 0 it falls back to a plain average.
|
|
22
|
+
6. **reporter/** renders to terminal (chalk), JSON, self-contained HTML, or Markdown.
|
|
23
|
+
7. **baseline.ts** persists minimal score snapshots and computes per-check diffs for regression gating.
|
|
24
|
+
|
|
25
|
+
## Anatomy of a check
|
|
26
|
+
|
|
27
|
+
```typescript
|
|
28
|
+
import { guideUrl } from '../guide-urls.js';
|
|
29
|
+
import type { CheckContext, CheckResult, CheckMeta, Finding } from '../types.js';
|
|
30
|
+
import { buildResult } from './utils.js';
|
|
31
|
+
|
|
32
|
+
export const meta: CheckMeta = {
|
|
33
|
+
id: 'my-check',
|
|
34
|
+
name: 'My Check',
|
|
35
|
+
description: 'One-line description shown in reports',
|
|
36
|
+
weight: 0, // new checks start informational in 3.x
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
export default async function check(ctx: CheckContext): Promise<CheckResult> {
|
|
40
|
+
const start = performance.now();
|
|
41
|
+
const findings: Finding[] = [];
|
|
42
|
+
let score = 100;
|
|
43
|
+
|
|
44
|
+
const res = await ctx.fetch(`${ctx.url}/something`, { headers: { Accept: 'application/json' } });
|
|
45
|
+
if (!res.ok) {
|
|
46
|
+
findings.push({
|
|
47
|
+
status: 'fail',
|
|
48
|
+
message: '/something not found',
|
|
49
|
+
hint: 'Actionable, copy-pasteable advice.',
|
|
50
|
+
learnMoreUrl: guideUrl(meta.id, 'not-found'),
|
|
51
|
+
});
|
|
52
|
+
return buildResult(meta, 0, findings, start);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// ... validations, each pushing a pass/warn/fail Finding and adjusting score
|
|
56
|
+
|
|
57
|
+
return buildResult(meta, score, findings, start);
|
|
58
|
+
}
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Conventions:
|
|
62
|
+
|
|
63
|
+
- **Findings are actionable.** Every `warn`/`fail` carries a `hint` with concrete remediation and a `learnMoreUrl` pointing to `lucioduran.com/projects/ax-audit/guides/<check-id>#<anchor>`. Every anchor must have a section in that guide.
|
|
64
|
+
- **Scores are clamped** to [0, 100] by `buildResult`.
|
|
65
|
+
- **Shared HTML primitives** live in `checks/html-utils.ts` (`getMetaContent`, `findLinkTags`, `getAttribute`, `extractVisibleText`, …) — no per-check regex duplication.
|
|
66
|
+
- **Content-Type validation** uses `checkContentType` from `checks/utils.ts` (−5 convention for mismatches).
|
|
67
|
+
- **Network goes through `ctx.fetch`** — never raw `fetch` — so caching, retries, timeouts, and `--verbose` logging apply uniformly.
|
|
68
|
+
|
|
69
|
+
## Adding a new check
|
|
70
|
+
|
|
71
|
+
1. Create `src/checks/your-check.ts` exporting `default` + `meta` (weight 0 — see scoring policy below).
|
|
72
|
+
2. Register it in `src/checks/index.ts`.
|
|
73
|
+
3. Add its weight to `CHECK_WEIGHTS` in `src/constants.ts`.
|
|
74
|
+
4. Add a test suite in `test/checks/your-check.test.js` using `mockContext` / `mockResponse` from `test/helpers.js`. Route values can be functions `(url, fetchOptions) => response` when the response must vary by request headers.
|
|
75
|
+
5. Document it in `docs/checks.md` and the README table.
|
|
76
|
+
6. Write the remediation guide covering every `learnMoreUrl` anchor you emit.
|
|
77
|
+
|
|
78
|
+
## Scoring policy (3.x)
|
|
79
|
+
|
|
80
|
+
Score deltas on the same site are treated as **breaking** (see CHANGELOG 3.0.0). Therefore in 3.x:
|
|
81
|
+
|
|
82
|
+
- New checks ship with **weight 0** (informational): full findings, no effect on the overall score or baselines.
|
|
83
|
+
- New findings inside weighted checks must be informational (no score deduction) — see the Content Signals findings in `robots-txt`.
|
|
84
|
+
- Weight redistribution happens in major versions (v4.0).
|
|
85
|
+
|
|
86
|
+
## Testing
|
|
87
|
+
|
|
88
|
+
`npm test` builds (`tsc`) and runs `node --test`. The suite (301 tests) covers every check, the scorer, baseline logic, the Markdown reporter, plus integration tests that spin up real local HTTP servers for the fetcher (per-header caching, retries) and the batch orchestrator (ordering, concurrency caps). No test dependencies beyond Node.
|
package/docs/checks.md
ADDED
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
# Checks Reference
|
|
2
|
+
|
|
3
|
+
ax-audit runs 18 checks. Fourteen are **weighted** (summing to 100% of the overall score); four are **informational** in 3.x — they run and report findings but carry weight 0 until v4.0, because score-affecting changes are treated as breaking (see [CHANGELOG 3.0.0](../CHANGELOG.md)).
|
|
4
|
+
|
|
5
|
+
This page documents the **exact scoring** of every check: each deduction, bonus, and formula, extracted from the source. Every finding links to a step-by-step remediation guide at `lucioduran.com/projects/ax-audit/guides/<check-id>`.
|
|
6
|
+
|
|
7
|
+
**Reading the tables:** each check starts at 100 unless noted. Deductions stack additively; `buildResult` clamps the final score to [0, 100]. "Hard fail" rows short-circuit the check.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Weighted checks
|
|
12
|
+
|
|
13
|
+
### `llms-txt` — 11%
|
|
14
|
+
|
|
15
|
+
`/llms.txt` presence and [llmstxt.org](https://llmstxt.org) spec compliance.
|
|
16
|
+
|
|
17
|
+
| Condition | Points |
|
|
18
|
+
| --- | --- |
|
|
19
|
+
| `/llms.txt` not found | **hard fail → 0** |
|
|
20
|
+
| Wrong Content-Type (expected `text/plain` or `text/markdown`) | −5 |
|
|
21
|
+
| First line is not an H1 (`# `) | −15 |
|
|
22
|
+
| No blockquote description (`> `) | −10 |
|
|
23
|
+
| No `##` section headings | −10 |
|
|
24
|
+
| No Markdown links | −10 |
|
|
25
|
+
| Content under 100 characters | −10 |
|
|
26
|
+
| `/llms-full.txt` also available | **+10** (capped at 100) |
|
|
27
|
+
|
|
28
|
+
### `robots-txt` — 11%
|
|
29
|
+
|
|
30
|
+
AI-crawler configuration. Core crawlers: GPTBot, ClaudeBot, ChatGPT-User, Claude-SearchBot, Google-Extended, PerplexityBot, OAI-SearchBot, CCBot.
|
|
31
|
+
|
|
32
|
+
| Condition | Points |
|
|
33
|
+
| --- | --- |
|
|
34
|
+
| `/robots.txt` not found | **hard fail → 0** |
|
|
35
|
+
| No core AI crawler explicitly configured | −40 |
|
|
36
|
+
| Some core crawlers missing | −`round(missing/8 × 30)` |
|
|
37
|
+
| Core crawler(s) blocked only via `User-agent: *` + `Disallow: /` | −5 per crawler |
|
|
38
|
+
| Known AI crawler(s) explicitly blocked (`Disallow: /`) | −3 per crawler |
|
|
39
|
+
| No `Sitemap:` directive | −5 |
|
|
40
|
+
| Partial path restrictions on AI crawlers | warn only, 0 |
|
|
41
|
+
| [Content Signals](https://contentsignals.org) findings (declared / malformed / unknown / missing) | informational, 0 in 3.x |
|
|
42
|
+
|
|
43
|
+
### `html-rendering` — 9%
|
|
44
|
+
|
|
45
|
+
Whether the static HTML contains content — most AI crawlers do not execute JavaScript. Thresholds: 500 chars / 80 words of visible text, 5% text-to-markup ratio.
|
|
46
|
+
|
|
47
|
+
| Condition | Points |
|
|
48
|
+
| --- | --- |
|
|
49
|
+
| No HTML body returned | **hard fail → 0** |
|
|
50
|
+
| Zero visible text in static HTML | −50 |
|
|
51
|
+
| Sparse content (< 500 chars or < 80 words) | −25 |
|
|
52
|
+
| Text-to-markup ratio < 5% | −10 |
|
|
53
|
+
| Empty SPA mount point (`#root`, `#__next`, `#__nuxt`, `#app`, `#svelte`, `#gatsby`) | −20 |
|
|
54
|
+
| 0 semantic landmarks (`<main>`, `<article>`, `<header>`, `<footer>`, `<nav>`) | −15 |
|
|
55
|
+
| 1–2 semantic landmarks | −10 |
|
|
56
|
+
| No `<h1>` | −10 |
|
|
57
|
+
| Multiple or empty `<h1>` | −5 |
|
|
58
|
+
| > 15 executable scripts without `<noscript>` fallback | −5 |
|
|
59
|
+
| `<img alt>` coverage < 90% | −5 |
|
|
60
|
+
|
|
61
|
+
### `structured-data` — 9%
|
|
62
|
+
|
|
63
|
+
JSON-LD on the homepage. Key entity types: Person, Organization, WebSite, WebPage, ProfilePage.
|
|
64
|
+
|
|
65
|
+
| Condition | Points |
|
|
66
|
+
| --- | --- |
|
|
67
|
+
| No JSON-LD blocks | **hard fail → 0** |
|
|
68
|
+
| Every JSON-LD block has invalid JSON | **→ 10** |
|
|
69
|
+
| Invalid JSON in a block | −10 per block |
|
|
70
|
+
| No schema.org `@context` | −15 |
|
|
71
|
+
| No key entity types found | −15 |
|
|
72
|
+
| Only one key entity type | −10 |
|
|
73
|
+
| No `@graph` array | −5 |
|
|
74
|
+
| No `BreadcrumbList` | −5 |
|
|
75
|
+
|
|
76
|
+
### `http-headers` — 9%
|
|
77
|
+
|
|
78
|
+
Security headers, AI discovery `Link` headers (RFC 5988-parsed), CORS on `.well-known`.
|
|
79
|
+
|
|
80
|
+
| Condition | Points |
|
|
81
|
+
| --- | --- |
|
|
82
|
+
| No headers retrievable | **hard fail → 0** |
|
|
83
|
+
| Missing critical security header (HSTS, X-Content-Type-Options) | −10 each |
|
|
84
|
+
| Only 1–3 of the 7 tracked security headers present | −5 |
|
|
85
|
+
| `Link` header missing both llms.txt and agent.json references | −15 |
|
|
86
|
+
| `Link` header missing one of the two | −5 |
|
|
87
|
+
| No CORS on `/.well-known/agent.json` | −10 |
|
|
88
|
+
|
|
89
|
+
### `agent-json` — 7%
|
|
90
|
+
|
|
91
|
+
`/.well-known/agent.json` [A2A Agent Card](https://a2a-protocol.org). Required fields: `name`, `description`, `url`, `skills`.
|
|
92
|
+
|
|
93
|
+
| Condition | Points |
|
|
94
|
+
| --- | --- |
|
|
95
|
+
| Not found | **hard fail → 0** |
|
|
96
|
+
| Invalid JSON | **→ 10** |
|
|
97
|
+
| Wrong Content-Type (expected `application/json`) | −5 |
|
|
98
|
+
| Missing required field | −15 per field |
|
|
99
|
+
| `url` on a different origin | −5 |
|
|
100
|
+
| `url` not an absolute URL | −5 |
|
|
101
|
+
| `skills` empty | −10 |
|
|
102
|
+
| `skills` entries missing `id` or `description` | −5 |
|
|
103
|
+
| No `protocolVersion` | −5 |
|
|
104
|
+
| No optional fields (`capabilities`, `authentication`, `documentationUrl`) | −5 |
|
|
105
|
+
|
|
106
|
+
### `mcp` — 7%
|
|
107
|
+
|
|
108
|
+
`/.well-known/mcp.json` [Model Context Protocol](https://modelcontextprotocol.io) server configuration.
|
|
109
|
+
|
|
110
|
+
| Condition | Points |
|
|
111
|
+
| --- | --- |
|
|
112
|
+
| Not found | **hard fail → 0** |
|
|
113
|
+
| Invalid JSON | **→ 10** |
|
|
114
|
+
| Wrong Content-Type | −5 |
|
|
115
|
+
| Missing `name` | −10 |
|
|
116
|
+
| Missing `description` | −5 |
|
|
117
|
+
| No `tools` array, or empty | −15 |
|
|
118
|
+
| No tool has a description | −10 |
|
|
119
|
+
| Some tools missing descriptions | −5 |
|
|
120
|
+
| No `resources` | −5 |
|
|
121
|
+
| No protocol version | −5 |
|
|
122
|
+
| No CORS headers | −10 |
|
|
123
|
+
|
|
124
|
+
### `seo-basics` — 7%
|
|
125
|
+
|
|
126
|
+
Head-tag fundamentals. Bounds: title 20–70 chars, description 70–160.
|
|
127
|
+
|
|
128
|
+
| Condition | Points |
|
|
129
|
+
| --- | --- |
|
|
130
|
+
| Homepage HTML unavailable | **hard fail → 0** |
|
|
131
|
+
| `<title>` missing or empty | −25 |
|
|
132
|
+
| Title too short / too long | −10 / −5 |
|
|
133
|
+
| Meta description missing | −20 |
|
|
134
|
+
| Description too short / too long | −8 / −5 |
|
|
135
|
+
| Description duplicates the title | −5 |
|
|
136
|
+
| No canonical link | −10 |
|
|
137
|
+
| Multiple canonicals / missing href / relative href | −5 each |
|
|
138
|
+
| `<html lang>` missing / invalid BCP 47 | −10 / −5 |
|
|
139
|
+
| No UTF-8 charset | −5 |
|
|
140
|
+
| Missing viewport | −5 |
|
|
141
|
+
| hreflang present without `x-default` | −3 |
|
|
142
|
+
|
|
143
|
+
### `security-txt` — 6%
|
|
144
|
+
|
|
145
|
+
`/.well-known/security.txt` per [RFC 9116](https://www.rfc-editor.org/rfc/rfc9116).
|
|
146
|
+
|
|
147
|
+
| Condition | Points |
|
|
148
|
+
| --- | --- |
|
|
149
|
+
| Not found | **hard fail → 0** |
|
|
150
|
+
| Missing `Contact` or `Expires` | −25 per field |
|
|
151
|
+
| `Expires` in the past | −20 |
|
|
152
|
+
| No optional fields (Canonical, Preferred-Languages, Policy, Encryption, Hiring) | −5 |
|
|
153
|
+
|
|
154
|
+
### `meta-tags` — 6%
|
|
155
|
+
|
|
156
|
+
AI meta tags (`ai:summary`, `ai:content_type`, `ai:author`, `ai:api`, `ai:agent_card`), discovery links, Open Graph, Twitter Card.
|
|
157
|
+
|
|
158
|
+
| Condition | Points |
|
|
159
|
+
| --- | --- |
|
|
160
|
+
| Homepage HTML unavailable | **hard fail → 0** |
|
|
161
|
+
| 0 AI meta tags | −18 |
|
|
162
|
+
| Only 1–2 AI meta tags | −12 |
|
|
163
|
+
| No `rel="alternate"` → llms.txt | −12 |
|
|
164
|
+
| No `rel="alternate"` → agent.json | −8 |
|
|
165
|
+
| No `rel="me"` identity links | −8 |
|
|
166
|
+
| No Open Graph tags at all | −12 |
|
|
167
|
+
| OG required incomplete (`og:title`, `og:description`, `og:url`, `og:type`) | −8 |
|
|
168
|
+
| OG recommended incomplete (`og:image`, `og:site_name`) | −3 |
|
|
169
|
+
| No Twitter Card tags at all | −6 |
|
|
170
|
+
| Twitter required incomplete (`twitter:card`, `twitter:title`, `twitter:description`) | −5 |
|
|
171
|
+
| Twitter recommended incomplete (`twitter:image`) | −2 |
|
|
172
|
+
|
|
173
|
+
### `openapi` — 6%
|
|
174
|
+
|
|
175
|
+
`/.well-known/openapi.json`.
|
|
176
|
+
|
|
177
|
+
| Condition | Points |
|
|
178
|
+
| --- | --- |
|
|
179
|
+
| Not found | **hard fail → 0** |
|
|
180
|
+
| Invalid JSON | **→ 10** |
|
|
181
|
+
| Wrong Content-Type | −5 |
|
|
182
|
+
| No `openapi`/`swagger` version field | −20 |
|
|
183
|
+
| Swagger 2.x instead of OpenAPI 3.x | −10 |
|
|
184
|
+
| Missing `info.title` | −10 |
|
|
185
|
+
| Missing `info.description` | −5 |
|
|
186
|
+
| No `paths` documented | −15 |
|
|
187
|
+
| No `servers` | −5 |
|
|
188
|
+
|
|
189
|
+
### `tls-https` — 5%
|
|
190
|
+
|
|
191
|
+
HTTPS, redirect, HSTS. Thresholds: max-age ≥ 15,768,000s (~6 months), preload ≥ 31,536,000s (1 year).
|
|
192
|
+
|
|
193
|
+
| Condition | Points |
|
|
194
|
+
| --- | --- |
|
|
195
|
+
| Invalid URL | **hard fail → 0** |
|
|
196
|
+
| Served over plain HTTP | −50 |
|
|
197
|
+
| HTTP does not redirect to HTTPS | −15 |
|
|
198
|
+
| Redirect unverifiable | −5 |
|
|
199
|
+
| No HSTS header | −15 |
|
|
200
|
+
| HSTS without `max-age` | −10 |
|
|
201
|
+
| `max-age` < 6 months | −5 |
|
|
202
|
+
| No `includeSubDomains` | −5 |
|
|
203
|
+
| `preload` present but ineligible | −5 |
|
|
204
|
+
| No `preload` directive | −3 |
|
|
205
|
+
|
|
206
|
+
### `sitemap` — 4%
|
|
207
|
+
|
|
208
|
+
Located via robots.txt `Sitemap:` or `/sitemap.xml`. Limits: 50,000 URLs / 50 MB / 365-day freshness.
|
|
209
|
+
|
|
210
|
+
| Condition | Points |
|
|
211
|
+
| --- | --- |
|
|
212
|
+
| No sitemap found | **hard fail → 0** |
|
|
213
|
+
| Response is not XML | **→ 20** |
|
|
214
|
+
| Over 50 MB | −10 |
|
|
215
|
+
| Unexpected Content-Type | −5 |
|
|
216
|
+
| Sitemap index with no `<sitemap>` entries | −20, stop |
|
|
217
|
+
| Some sampled child sitemaps unreachable | −10 |
|
|
218
|
+
| `<urlset>` with no `<url>` entries | −30 |
|
|
219
|
+
| Over 50,000 URLs declared | −10 |
|
|
220
|
+
| `<lastmod>` coverage < 50% | −5 |
|
|
221
|
+
| Newest `<lastmod>` older than 365 days | −5 |
|
|
222
|
+
|
|
223
|
+
### `well-known-ai` — 3%
|
|
224
|
+
|
|
225
|
+
Emerging AI discovery files. **Purely proportional** — no deductions:
|
|
226
|
+
|
|
227
|
+
```
|
|
228
|
+
score = round(present / 5 × 100)
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
over `/.well-known/ai.txt` (Spawning), `/.well-known/genai.txt`, `/ai-plugin.json`, `/agents.json`, `/.well-known/nlweb.json`. Files with invalid content produce warnings without counting as present.
|
|
232
|
+
|
|
233
|
+
---
|
|
234
|
+
|
|
235
|
+
## Informational checks (weight 0 in 3.x)
|
|
236
|
+
|
|
237
|
+
These run on every audit and report full findings, but do not affect the overall score or baselines. They gain weight in v4.0.
|
|
238
|
+
|
|
239
|
+
### `content-negotiation` — Markdown for Agents
|
|
240
|
+
|
|
241
|
+
Probes the homepage with `Accept: text/markdown` — the pattern served by Cloudflare and Vercel and requested by Claude Code, Cursor, and OpenCode (~80% token reduction vs HTML).
|
|
242
|
+
|
|
243
|
+
| Condition | Points |
|
|
244
|
+
| --- | --- |
|
|
245
|
+
| Probe request fails (network) | **hard fail → 0** |
|
|
246
|
+
| No Markdown served, no fallback | **→ 0** |
|
|
247
|
+
| No Markdown served, but `<link rel="alternate" type="text/markdown">` present | **→ 40** |
|
|
248
|
+
| Markdown served (correct Content-Type, 2xx) | base 100 |
|
|
249
|
+
| Body is empty | −30 |
|
|
250
|
+
| Body is a relabeled HTML document | −25 |
|
|
251
|
+
| `Vary` does not include `Accept` | −15 |
|
|
252
|
+
| Markdown not smaller than HTML | warn only, 0 |
|
|
253
|
+
|
|
254
|
+
### `rsl` — Really Simple Licensing
|
|
255
|
+
|
|
256
|
+
[RSL 1.0](https://rslstandard.org/rsl) discovery (robots.txt `License:`, `Link: rel="license"` header, `<link rel="license" type="application/rsl+xml">`) and document validation. Plain CC-style license links without the RSL media type are ignored.
|
|
257
|
+
|
|
258
|
+
| Condition | Points |
|
|
259
|
+
| --- | --- |
|
|
260
|
+
| No discovery mechanism found | **hard fail → 0** |
|
|
261
|
+
| License document unreachable | **→ 25** (cap) |
|
|
262
|
+
| Root `<rsl>` element missing | −40, stop |
|
|
263
|
+
| No `<content>` elements | −20, stop |
|
|
264
|
+
| Wrong or missing `https://rslstandard.org/rsl` namespace | −15 |
|
|
265
|
+
| `<license>` elements missing | −15 |
|
|
266
|
+
| robots.txt `License:` not an absolute URI | −10 |
|
|
267
|
+
| `<content>` missing required `url` attribute | −10 |
|
|
268
|
+
| Wrong Content-Type (expected `application/rsl+xml`) | −5 |
|
|
269
|
+
| `permits`/`prohibits` with invalid `type` | −5 |
|
|
270
|
+
| Tokens outside the RSL 1.0 vocabulary (incl. pre-1.0 draft tokens) | −5 |
|
|
271
|
+
| Invalid `payment` type | −5 |
|
|
272
|
+
|
|
273
|
+
### `agent-access` — Cloaking detection
|
|
274
|
+
|
|
275
|
+
Probes the homepage with realistic UAs for the 8 core AI crawlers and compares status + visible text against the default-UA baseline. **Credit-ratio formula:**
|
|
276
|
+
|
|
277
|
+
```
|
|
278
|
+
score = round(credit / 8 × 100)
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
| Outcome per crawler | Credit |
|
|
282
|
+
| --- | --- |
|
|
283
|
+
| Equivalent response | 1 |
|
|
284
|
+
| Blocked, consistent with robots.txt `Disallow` (explicit or wildcard) | 1 |
|
|
285
|
+
| 200 but < 50% of baseline visible text (baseline ≥ 200 chars) | 0.5 |
|
|
286
|
+
| Blocked while robots.txt allows (or doesn't restrict) it | 0 |
|
|
287
|
+
| Baseline request itself fails | **hard fail → 0** |
|
|
288
|
+
|
|
289
|
+
Caveat: WAFs using Web Bot Auth / IP verification may pass the real crawler while rejecting this unverified probe — confirm against WAF logs before changing rules.
|
|
290
|
+
|
|
291
|
+
### `crawl-efficiency`
|
|
292
|
+
|
|
293
|
+
| Condition | Points |
|
|
294
|
+
| --- | --- |
|
|
295
|
+
| Homepage request fails | **hard fail → 0** |
|
|
296
|
+
| Uncompressed response | −30 |
|
|
297
|
+
| gzip/deflate/zstd instead of Brotli | pass with suggestion, 0 |
|
|
298
|
+
| No `ETag` / `Last-Modified` validator | −30 |
|
|
299
|
+
| Validator present but conditional request not answered with `304` | −15 |
|
|
300
|
+
| Page > 2 MB decompressed | −10 |
|
|
301
|
+
| Page > 500 KB decompressed | −5 |
|
|
302
|
+
|
|
303
|
+
---
|
|
304
|
+
|
|
305
|
+
## Overall scoring model
|
|
306
|
+
|
|
307
|
+
Each check returns 0–100. The overall score is the weighted average across the checks that ran:
|
|
308
|
+
|
|
309
|
+
```
|
|
310
|
+
overall = round( Σ (score_i / 100 × weight_i) / Σ weight_i × 100 )
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
When every selected check has weight 0 (e.g. `--checks rsl`), the overall falls back to a plain average of check scores.
|
|
314
|
+
|
|
315
|
+
| Grade | Score | Exit code |
|
|
316
|
+
| --- | --- | --- |
|
|
317
|
+
| Excellent | 90–100 | 0 |
|
|
318
|
+
| Good | 70–89 | 0 |
|
|
319
|
+
| Fair | 50–69 | 1 |
|
|
320
|
+
| Poor | 0–49 | 1 |
|
|
321
|
+
|
|
322
|
+
Weights live in `src/constants.ts` (`CHECK_WEIGHTS`); a check's own `meta.weight` takes precedence. The scoring policy for 3.x — why new checks ship at weight 0 — is documented in [architecture.md](./architecture.md).
|