ax-audit 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +88 -0
- package/bin/ax-audit.js +4 -0
- package/lib/checks/agent-json.js +77 -0
- package/lib/checks/http-headers.js +87 -0
- package/lib/checks/index.js +19 -0
- package/lib/checks/llms-txt.js +80 -0
- package/lib/checks/meta-tags.js +87 -0
- package/lib/checks/openapi.js +79 -0
- package/lib/checks/robots-txt.js +91 -0
- package/lib/checks/security-txt.js +67 -0
- package/lib/checks/structured-data.js +126 -0
- package/lib/cli.js +50 -0
- package/lib/constants.js +78 -0
- package/lib/fetcher.js +64 -0
- package/lib/orchestrator.js +57 -0
- package/lib/reporter/index.js +14 -0
- package/lib/reporter/json.js +3 -0
- package/lib/reporter/terminal.js +49 -0
- package/lib/scorer.js +34 -0
- package/package.json +52 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Lucio Duran
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# ax-audit
|
|
2
|
+
|
|
3
|
+
Audit websites for **AI Agent Experience (AX)** readiness. Lighthouse for AI Agents.
|
|
4
|
+
|
|
5
|
+
```
|
|
6
|
+
npx ax-audit https://example.com
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
## What it checks
|
|
10
|
+
|
|
11
|
+
| Check | Description | Weight |
|
|
12
|
+
|---|---|---|
|
|
13
|
+
| **LLMs.txt** | `/llms.txt` presence and [spec](https://llmstxt.org) compliance | 15% |
|
|
14
|
+
| **Robots.txt** | AI crawler configuration (GPTBot, ClaudeBot, etc.) | 15% |
|
|
15
|
+
| **Structured Data** | JSON-LD on homepage (schema.org, @graph, entity types) | 15% |
|
|
16
|
+
| **HTTP Headers** | Security headers + AI discovery Link headers + CORS | 15% |
|
|
17
|
+
| **Agent Card** | `/.well-known/agent.json` [A2A protocol](https://a2a-protocol.org) | 10% |
|
|
18
|
+
| **Security.txt** | `/.well-known/security.txt` [RFC 9116](https://www.rfc-editor.org/rfc/rfc9116) | 10% |
|
|
19
|
+
| **Meta Tags** | AI meta tags (`ai:*`), `rel="alternate"`, `rel="me"` | 10% |
|
|
20
|
+
| **OpenAPI** | `/.well-known/openapi.json` presence and validity | 10% |
|
|
21
|
+
|
|
22
|
+
## Install
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
npm install -g ax-audit
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Or run directly:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
npx ax-audit https://your-site.com
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Usage
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
# Terminal output (default)
|
|
38
|
+
ax-audit https://example.com
|
|
39
|
+
|
|
40
|
+
# JSON output (for CI pipelines)
|
|
41
|
+
ax-audit https://example.com --json
|
|
42
|
+
|
|
43
|
+
# Run specific checks only
|
|
44
|
+
ax-audit https://example.com --checks llms-txt,robots-txt,agent-json
|
|
45
|
+
|
|
46
|
+
# Custom timeout (default: 10s)
|
|
47
|
+
ax-audit https://example.com --timeout 15000
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Programmatic API
|
|
51
|
+
|
|
52
|
+
```javascript
|
|
53
|
+
import { audit } from 'ax-audit';
|
|
54
|
+
|
|
55
|
+
const report = await audit({ url: 'https://example.com' });
|
|
56
|
+
console.log(report.overallScore); // 0-100
|
|
57
|
+
console.log(report.grade.label); // 'Excellent' | 'Good' | 'Fair' | 'Poor'
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Scoring
|
|
61
|
+
|
|
62
|
+
Each check returns a score from 0 to 100. The overall score is a weighted average.
|
|
63
|
+
|
|
64
|
+
| Grade | Score | Exit Code |
|
|
65
|
+
|---|---|---|
|
|
66
|
+
| Excellent | 90-100 | 0 |
|
|
67
|
+
| Good | 70-89 | 0 |
|
|
68
|
+
| Fair | 50-69 | 1 |
|
|
69
|
+
| Poor | 0-49 | 1 |
|
|
70
|
+
|
|
71
|
+
Exit codes make it easy to gate CI/CD deployments on AX readiness.
|
|
72
|
+
|
|
73
|
+
## CI Integration
|
|
74
|
+
|
|
75
|
+
### GitHub Actions
|
|
76
|
+
|
|
77
|
+
```yaml
|
|
78
|
+
- name: AX Audit
|
|
79
|
+
run: npx ax-audit https://your-site.com --json > ax-report.json
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Requirements
|
|
83
|
+
|
|
84
|
+
- Node.js >= 18.0.0
|
|
85
|
+
|
|
86
|
+
## License
|
|
87
|
+
|
|
88
|
+
MIT
|
package/bin/ax-audit.js
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import { AGENT_JSON_REQUIRED_FIELDS } from '../constants.js';
|
|
2
|
+
|
|
3
|
+
export const meta = {
|
|
4
|
+
id: 'agent-json',
|
|
5
|
+
name: 'Agent Card (A2A)',
|
|
6
|
+
description: 'Checks /.well-known/agent.json A2A protocol compliance',
|
|
7
|
+
weight: 10,
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
export default async function check(ctx) {
|
|
11
|
+
const start = performance.now();
|
|
12
|
+
const findings = [];
|
|
13
|
+
let score = 100;
|
|
14
|
+
|
|
15
|
+
const res = await ctx.fetch(`${ctx.url}/.well-known/agent.json`);
|
|
16
|
+
|
|
17
|
+
if (!res.ok) {
|
|
18
|
+
findings.push({ status: 'fail', message: '/.well-known/agent.json not found', detail: `HTTP ${res.status || 'network error'}` });
|
|
19
|
+
return result(0, findings, start);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
findings.push({ status: 'pass', message: '/.well-known/agent.json exists' });
|
|
23
|
+
|
|
24
|
+
// Valid JSON
|
|
25
|
+
let data;
|
|
26
|
+
try {
|
|
27
|
+
data = JSON.parse(res.body);
|
|
28
|
+
} catch {
|
|
29
|
+
findings.push({ status: 'fail', message: 'Invalid JSON' });
|
|
30
|
+
return result(10, findings, start);
|
|
31
|
+
}
|
|
32
|
+
findings.push({ status: 'pass', message: 'Valid JSON' });
|
|
33
|
+
|
|
34
|
+
// Required fields
|
|
35
|
+
for (const field of AGENT_JSON_REQUIRED_FIELDS) {
|
|
36
|
+
if (data[field] !== undefined && data[field] !== null) {
|
|
37
|
+
findings.push({ status: 'pass', message: `Required field "${field}" present` });
|
|
38
|
+
} else {
|
|
39
|
+
findings.push({ status: 'fail', message: `Required field "${field}" missing` });
|
|
40
|
+
score -= 15;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Skills array
|
|
45
|
+
if (Array.isArray(data.skills) && data.skills.length > 0) {
|
|
46
|
+
findings.push({ status: 'pass', message: `${data.skills.length} skill(s) defined` });
|
|
47
|
+
} else if (Array.isArray(data.skills)) {
|
|
48
|
+
findings.push({ status: 'warn', message: 'Skills array is empty' });
|
|
49
|
+
score -= 10;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Protocol version
|
|
53
|
+
if (data.protocolVersion) {
|
|
54
|
+
findings.push({ status: 'pass', message: `Protocol version: ${data.protocolVersion}` });
|
|
55
|
+
} else {
|
|
56
|
+
findings.push({ status: 'warn', message: 'No protocolVersion field' });
|
|
57
|
+
score -= 5;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Optional valuable fields
|
|
61
|
+
const optionalFields = ['capabilities', 'authentication', 'documentationUrl'];
|
|
62
|
+
const presentOptional = optionalFields.filter(f => data[f] !== undefined);
|
|
63
|
+
if (presentOptional.length === optionalFields.length) {
|
|
64
|
+
findings.push({ status: 'pass', message: 'All optional fields present (capabilities, authentication, documentationUrl)' });
|
|
65
|
+
} else if (presentOptional.length > 0) {
|
|
66
|
+
findings.push({ status: 'pass', message: `${presentOptional.length}/${optionalFields.length} optional fields present` });
|
|
67
|
+
} else {
|
|
68
|
+
findings.push({ status: 'warn', message: 'No optional fields (capabilities, authentication, documentationUrl)' });
|
|
69
|
+
score -= 5;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return result(Math.max(0, score), findings, start);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function result(score, findings, start) {
|
|
76
|
+
return { id: meta.id, name: meta.name, description: meta.description, score, findings, duration: Math.round(performance.now() - start) };
|
|
77
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import { SECURITY_HEADERS } from '../constants.js';
|
|
2
|
+
|
|
3
|
+
export const meta = {
|
|
4
|
+
id: 'http-headers',
|
|
5
|
+
name: 'HTTP Headers',
|
|
6
|
+
description: 'Checks security headers, AI discovery Link headers, and CORS',
|
|
7
|
+
weight: 15,
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
export default async function check(ctx) {
|
|
11
|
+
const start = performance.now();
|
|
12
|
+
const findings = [];
|
|
13
|
+
let score = 100;
|
|
14
|
+
|
|
15
|
+
const headers = ctx.headers;
|
|
16
|
+
if (!headers || Object.keys(headers).length === 0) {
|
|
17
|
+
findings.push({ status: 'fail', message: 'Could not fetch homepage headers' });
|
|
18
|
+
return result(0, findings, start);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// Security headers
|
|
22
|
+
let securityCount = 0;
|
|
23
|
+
for (const header of SECURITY_HEADERS) {
|
|
24
|
+
if (headers[header.name]) {
|
|
25
|
+
securityCount++;
|
|
26
|
+
} else if (header.critical) {
|
|
27
|
+
findings.push({ status: 'fail', message: `Missing critical header: ${header.label}` });
|
|
28
|
+
score -= 10;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
if (securityCount === SECURITY_HEADERS.length) {
|
|
33
|
+
findings.push({ status: 'pass', message: `All ${SECURITY_HEADERS.length} security headers present` });
|
|
34
|
+
} else if (securityCount >= 4) {
|
|
35
|
+
findings.push({ status: 'pass', message: `${securityCount}/${SECURITY_HEADERS.length} security headers present` });
|
|
36
|
+
} else {
|
|
37
|
+
findings.push({ status: 'warn', message: `Only ${securityCount}/${SECURITY_HEADERS.length} security headers present` });
|
|
38
|
+
score -= 5;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Link header for AI discovery
|
|
42
|
+
const linkHeader = headers['link'] || '';
|
|
43
|
+
const hasLlmsLink = /llms\.txt/i.test(linkHeader);
|
|
44
|
+
const hasAgentLink = /agent\.json/i.test(linkHeader);
|
|
45
|
+
|
|
46
|
+
if (hasLlmsLink && hasAgentLink) {
|
|
47
|
+
findings.push({ status: 'pass', message: 'Link header references both llms.txt and agent.json' });
|
|
48
|
+
} else if (hasLlmsLink) {
|
|
49
|
+
findings.push({ status: 'pass', message: 'Link header references llms.txt' });
|
|
50
|
+
findings.push({ status: 'warn', message: 'Link header does not reference agent.json' });
|
|
51
|
+
score -= 5;
|
|
52
|
+
} else if (hasAgentLink) {
|
|
53
|
+
findings.push({ status: 'pass', message: 'Link header references agent.json' });
|
|
54
|
+
findings.push({ status: 'warn', message: 'Link header does not reference llms.txt' });
|
|
55
|
+
score -= 5;
|
|
56
|
+
} else if (linkHeader) {
|
|
57
|
+
findings.push({ status: 'warn', message: 'Link header present but does not reference AI discovery files' });
|
|
58
|
+
score -= 15;
|
|
59
|
+
} else {
|
|
60
|
+
findings.push({ status: 'warn', message: 'No Link header for AI discovery (llms.txt, agent.json)' });
|
|
61
|
+
score -= 15;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// CORS on .well-known
|
|
65
|
+
const wellKnownRes = await ctx.fetch(`${ctx.url}/.well-known/agent.json`);
|
|
66
|
+
if (wellKnownRes.ok) {
|
|
67
|
+
const cors = wellKnownRes.headers['access-control-allow-origin'];
|
|
68
|
+
if (cors === '*' || cors) {
|
|
69
|
+
findings.push({ status: 'pass', message: 'CORS enabled on .well-known resources' });
|
|
70
|
+
} else {
|
|
71
|
+
findings.push({ status: 'warn', message: 'No CORS headers on .well-known resources' });
|
|
72
|
+
score -= 10;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// X-Robots-Tag on AI files
|
|
77
|
+
const llmsRes = await ctx.fetch(`${ctx.url}/llms.txt`);
|
|
78
|
+
if (llmsRes.ok && llmsRes.headers['x-robots-tag']?.includes('noindex')) {
|
|
79
|
+
findings.push({ status: 'pass', message: 'X-Robots-Tag: noindex on /llms.txt (prevents search indexing of raw text)' });
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return result(Math.max(0, score), findings, start);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function result(score, findings, start) {
|
|
86
|
+
return { id: meta.id, name: meta.name, description: meta.description, score, findings, duration: Math.round(performance.now() - start) };
|
|
87
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import llmsTxt, { meta as llmsTxtMeta } from './llms-txt.js';
|
|
2
|
+
import robotsTxt, { meta as robotsTxtMeta } from './robots-txt.js';
|
|
3
|
+
import agentJson, { meta as agentJsonMeta } from './agent-json.js';
|
|
4
|
+
import securityTxt, { meta as securityTxtMeta } from './security-txt.js';
|
|
5
|
+
import structuredData, { meta as structuredDataMeta } from './structured-data.js';
|
|
6
|
+
import metaTags, { meta as metaTagsMeta } from './meta-tags.js';
|
|
7
|
+
import openapi, { meta as openapiMeta } from './openapi.js';
|
|
8
|
+
import httpHeaders, { meta as httpHeadersMeta } from './http-headers.js';
|
|
9
|
+
|
|
10
|
+
export const checks = [
|
|
11
|
+
{ run: llmsTxt, meta: llmsTxtMeta },
|
|
12
|
+
{ run: robotsTxt, meta: robotsTxtMeta },
|
|
13
|
+
{ run: agentJson, meta: agentJsonMeta },
|
|
14
|
+
{ run: securityTxt, meta: securityTxtMeta },
|
|
15
|
+
{ run: structuredData, meta: structuredDataMeta },
|
|
16
|
+
{ run: metaTags, meta: metaTagsMeta },
|
|
17
|
+
{ run: openapi, meta: openapiMeta },
|
|
18
|
+
{ run: httpHeaders, meta: httpHeadersMeta },
|
|
19
|
+
];
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
export const meta = {
|
|
2
|
+
id: 'llms-txt',
|
|
3
|
+
name: 'LLMs.txt',
|
|
4
|
+
description: 'Checks /llms.txt presence and spec compliance',
|
|
5
|
+
weight: 15,
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
export default async function check(ctx) {
|
|
9
|
+
const start = performance.now();
|
|
10
|
+
const findings = [];
|
|
11
|
+
let score = 100;
|
|
12
|
+
|
|
13
|
+
const res = await ctx.fetch(`${ctx.url}/llms.txt`);
|
|
14
|
+
|
|
15
|
+
if (!res.ok) {
|
|
16
|
+
findings.push({ status: 'fail', message: '/llms.txt not found', detail: `HTTP ${res.status || 'network error'}` });
|
|
17
|
+
return result(0, findings, start);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
findings.push({ status: 'pass', message: '/llms.txt exists' });
|
|
21
|
+
const text = res.body;
|
|
22
|
+
|
|
23
|
+
// H1 heading (first non-empty line should start with "# ")
|
|
24
|
+
const lines = text.split('\n').map(l => l.trim()).filter(Boolean);
|
|
25
|
+
if (!lines[0]?.startsWith('# ')) {
|
|
26
|
+
findings.push({ status: 'warn', message: 'Missing H1 heading (first line should start with "# ")' });
|
|
27
|
+
score -= 15;
|
|
28
|
+
} else {
|
|
29
|
+
findings.push({ status: 'pass', message: `H1 heading: "${lines[0].slice(2)}"` });
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Blockquote description ("> ")
|
|
33
|
+
const hasBlockquote = lines.some(l => l.startsWith('> '));
|
|
34
|
+
if (!hasBlockquote) {
|
|
35
|
+
findings.push({ status: 'warn', message: 'No blockquote description found ("> ...")' });
|
|
36
|
+
score -= 10;
|
|
37
|
+
} else {
|
|
38
|
+
findings.push({ status: 'pass', message: 'Blockquote description present' });
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Section headings ("## ")
|
|
42
|
+
const sections = lines.filter(l => l.startsWith('## '));
|
|
43
|
+
if (sections.length === 0) {
|
|
44
|
+
findings.push({ status: 'warn', message: 'No section headings found (## ...)' });
|
|
45
|
+
score -= 10;
|
|
46
|
+
} else {
|
|
47
|
+
findings.push({ status: 'pass', message: `${sections.length} section heading(s) found` });
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Markdown links [text](url)
|
|
51
|
+
const linkPattern = /\[([^\]]+)\]\((https?:\/\/[^)]+)\)/g;
|
|
52
|
+
const links = [...text.matchAll(linkPattern)];
|
|
53
|
+
if (links.length === 0) {
|
|
54
|
+
findings.push({ status: 'warn', message: 'No Markdown links found' });
|
|
55
|
+
score -= 10;
|
|
56
|
+
} else {
|
|
57
|
+
findings.push({ status: 'pass', message: `${links.length} link(s) found` });
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Content substance
|
|
61
|
+
if (text.length < 100) {
|
|
62
|
+
findings.push({ status: 'warn', message: 'Content appears minimal (< 100 characters)' });
|
|
63
|
+
score -= 10;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Bonus: /llms-full.txt
|
|
67
|
+
const fullRes = await ctx.fetch(`${ctx.url}/llms-full.txt`);
|
|
68
|
+
if (fullRes.ok) {
|
|
69
|
+
findings.push({ status: 'pass', message: '/llms-full.txt also available (bonus)' });
|
|
70
|
+
score = Math.min(100, score + 10);
|
|
71
|
+
} else {
|
|
72
|
+
findings.push({ status: 'warn', message: '/llms-full.txt not found (optional but recommended)' });
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return result(Math.max(0, score), findings, start);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function result(score, findings, start) {
|
|
79
|
+
return { id: meta.id, name: meta.name, description: meta.description, score, findings, duration: Math.round(performance.now() - start) };
|
|
80
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
export const meta = {
|
|
2
|
+
id: 'meta-tags',
|
|
3
|
+
name: 'Meta Tags',
|
|
4
|
+
description: 'Checks AI meta tags, rel="alternate", and rel="me" links',
|
|
5
|
+
weight: 10,
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
const AI_META_NAMES = ['ai:summary', 'ai:content_type', 'ai:author', 'ai:api', 'ai:agent_card'];
|
|
9
|
+
|
|
10
|
+
export default async function check(ctx) {
|
|
11
|
+
const start = performance.now();
|
|
12
|
+
const findings = [];
|
|
13
|
+
let score = 100;
|
|
14
|
+
|
|
15
|
+
const html = ctx.html;
|
|
16
|
+
if (!html) {
|
|
17
|
+
findings.push({ status: 'fail', message: 'Could not fetch homepage HTML' });
|
|
18
|
+
return result(0, findings, start);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// AI meta tags (ai:*)
|
|
22
|
+
const foundAiMeta = AI_META_NAMES.filter(name => {
|
|
23
|
+
const pattern = new RegExp(`<meta\\s+[^>]*name=["']${escapeRegex(name)}["'][^>]*>`, 'i');
|
|
24
|
+
return pattern.test(html);
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
if (foundAiMeta.length >= 3) {
|
|
28
|
+
findings.push({ status: 'pass', message: `${foundAiMeta.length}/${AI_META_NAMES.length} AI meta tags found`, detail: foundAiMeta.join(', ') });
|
|
29
|
+
} else if (foundAiMeta.length > 0) {
|
|
30
|
+
findings.push({ status: 'warn', message: `${foundAiMeta.length}/${AI_META_NAMES.length} AI meta tags found`, detail: foundAiMeta.join(', ') });
|
|
31
|
+
score -= 15;
|
|
32
|
+
} else {
|
|
33
|
+
findings.push({ status: 'warn', message: 'No AI meta tags (ai:*) found' });
|
|
34
|
+
score -= 25;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// rel="alternate" to llms.txt
|
|
38
|
+
const hasLlmsAlternate = /rel=["']alternate["'][^>]*llms\.txt/i.test(html) ||
|
|
39
|
+
/llms\.txt[^>]*rel=["']alternate["']/i.test(html);
|
|
40
|
+
if (hasLlmsAlternate) {
|
|
41
|
+
findings.push({ status: 'pass', message: 'rel="alternate" link to llms.txt present' });
|
|
42
|
+
} else {
|
|
43
|
+
findings.push({ status: 'warn', message: 'No rel="alternate" link to llms.txt in HTML' });
|
|
44
|
+
score -= 15;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// rel="alternate" to agent.json
|
|
48
|
+
const hasAgentAlternate = /rel=["']alternate["'][^>]*agent\.json/i.test(html) ||
|
|
49
|
+
/agent\.json[^>]*rel=["']alternate["']/i.test(html);
|
|
50
|
+
if (hasAgentAlternate) {
|
|
51
|
+
findings.push({ status: 'pass', message: 'rel="alternate" link to agent.json present' });
|
|
52
|
+
} else {
|
|
53
|
+
findings.push({ status: 'warn', message: 'No rel="alternate" link to agent.json in HTML' });
|
|
54
|
+
score -= 10;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// rel="me" identity links
|
|
58
|
+
const relMePattern = /rel=["']me["']/gi;
|
|
59
|
+
const relMeCount = (html.match(relMePattern) || []).length;
|
|
60
|
+
if (relMeCount >= 3) {
|
|
61
|
+
findings.push({ status: 'pass', message: `${relMeCount} rel="me" identity link(s) found` });
|
|
62
|
+
} else if (relMeCount > 0) {
|
|
63
|
+
findings.push({ status: 'pass', message: `${relMeCount} rel="me" identity link(s) found` });
|
|
64
|
+
} else {
|
|
65
|
+
findings.push({ status: 'warn', message: 'No rel="me" identity links found' });
|
|
66
|
+
score -= 10;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// OpenGraph basics
|
|
70
|
+
const hasOg = /<meta\s+[^>]*property=["']og:/i.test(html);
|
|
71
|
+
if (hasOg) {
|
|
72
|
+
findings.push({ status: 'pass', message: 'OpenGraph meta tags present' });
|
|
73
|
+
} else {
|
|
74
|
+
findings.push({ status: 'warn', message: 'No OpenGraph meta tags found' });
|
|
75
|
+
score -= 10;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return result(Math.max(0, score), findings, start);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function escapeRegex(str) {
|
|
82
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function result(score, findings, start) {
|
|
86
|
+
return { id: meta.id, name: meta.name, description: meta.description, score, findings, duration: Math.round(performance.now() - start) };
|
|
87
|
+
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
export const meta = {
|
|
2
|
+
id: 'openapi',
|
|
3
|
+
name: 'OpenAPI Spec',
|
|
4
|
+
description: 'Checks /.well-known/openapi.json presence and validity',
|
|
5
|
+
weight: 10,
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
export default async function check(ctx) {
|
|
9
|
+
const start = performance.now();
|
|
10
|
+
const findings = [];
|
|
11
|
+
let score = 100;
|
|
12
|
+
|
|
13
|
+
const res = await ctx.fetch(`${ctx.url}/.well-known/openapi.json`);
|
|
14
|
+
|
|
15
|
+
if (!res.ok) {
|
|
16
|
+
findings.push({ status: 'fail', message: '/.well-known/openapi.json not found', detail: `HTTP ${res.status || 'network error'}` });
|
|
17
|
+
return result(0, findings, start);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
findings.push({ status: 'pass', message: '/.well-known/openapi.json exists' });
|
|
21
|
+
|
|
22
|
+
// Valid JSON
|
|
23
|
+
let data;
|
|
24
|
+
try {
|
|
25
|
+
data = JSON.parse(res.body);
|
|
26
|
+
} catch {
|
|
27
|
+
findings.push({ status: 'fail', message: 'Invalid JSON' });
|
|
28
|
+
return result(10, findings, start);
|
|
29
|
+
}
|
|
30
|
+
findings.push({ status: 'pass', message: 'Valid JSON' });
|
|
31
|
+
|
|
32
|
+
// OpenAPI version field
|
|
33
|
+
if (data.openapi) {
|
|
34
|
+
findings.push({ status: 'pass', message: `OpenAPI version: ${data.openapi}` });
|
|
35
|
+
} else if (data.swagger) {
|
|
36
|
+
findings.push({ status: 'warn', message: `Swagger version: ${data.swagger} (consider upgrading to OpenAPI 3.x)` });
|
|
37
|
+
score -= 10;
|
|
38
|
+
} else {
|
|
39
|
+
findings.push({ status: 'fail', message: 'No openapi or swagger version field' });
|
|
40
|
+
score -= 20;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Info object
|
|
44
|
+
if (data.info && data.info.title) {
|
|
45
|
+
findings.push({ status: 'pass', message: `API title: "${data.info.title}"` });
|
|
46
|
+
} else {
|
|
47
|
+
findings.push({ status: 'warn', message: 'Missing info.title' });
|
|
48
|
+
score -= 10;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if (data.info?.description) {
|
|
52
|
+
findings.push({ status: 'pass', message: 'API description present' });
|
|
53
|
+
} else {
|
|
54
|
+
findings.push({ status: 'warn', message: 'Missing info.description' });
|
|
55
|
+
score -= 5;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Paths
|
|
59
|
+
if (data.paths && Object.keys(data.paths).length > 0) {
|
|
60
|
+
findings.push({ status: 'pass', message: `${Object.keys(data.paths).length} path(s) documented` });
|
|
61
|
+
} else {
|
|
62
|
+
findings.push({ status: 'warn', message: 'No paths documented' });
|
|
63
|
+
score -= 15;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Servers
|
|
67
|
+
if (Array.isArray(data.servers) && data.servers.length > 0) {
|
|
68
|
+
findings.push({ status: 'pass', message: `${data.servers.length} server(s) defined` });
|
|
69
|
+
} else {
|
|
70
|
+
findings.push({ status: 'warn', message: 'No servers defined' });
|
|
71
|
+
score -= 5;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return result(Math.max(0, score), findings, start);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function result(score, findings, start) {
|
|
78
|
+
return { id: meta.id, name: meta.name, description: meta.description, score, findings, duration: Math.round(performance.now() - start) };
|
|
79
|
+
}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import { ALL_AI_CRAWLERS, CORE_AI_CRAWLERS } from '../constants.js';
|
|
2
|
+
|
|
3
|
+
export const meta = {
|
|
4
|
+
id: 'robots-txt',
|
|
5
|
+
name: 'Robots.txt',
|
|
6
|
+
description: 'Checks AI crawler configuration in robots.txt',
|
|
7
|
+
weight: 15,
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
export default async function check(ctx) {
|
|
11
|
+
const start = performance.now();
|
|
12
|
+
const findings = [];
|
|
13
|
+
let score = 100;
|
|
14
|
+
|
|
15
|
+
const res = await ctx.fetch(`${ctx.url}/robots.txt`);
|
|
16
|
+
|
|
17
|
+
if (!res.ok) {
|
|
18
|
+
findings.push({ status: 'fail', message: '/robots.txt not found' });
|
|
19
|
+
return result(0, findings, start);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
findings.push({ status: 'pass', message: '/robots.txt exists' });
|
|
23
|
+
const text = res.body;
|
|
24
|
+
const configuredBots = parseUserAgents(text);
|
|
25
|
+
|
|
26
|
+
// Core AI crawlers
|
|
27
|
+
const coreConfigured = CORE_AI_CRAWLERS.filter(bot =>
|
|
28
|
+
configuredBots.some(b => b.name.toLowerCase() === bot.toLowerCase())
|
|
29
|
+
);
|
|
30
|
+
const coreMissing = CORE_AI_CRAWLERS.filter(bot =>
|
|
31
|
+
!configuredBots.some(b => b.name.toLowerCase() === bot.toLowerCase())
|
|
32
|
+
);
|
|
33
|
+
|
|
34
|
+
if (coreConfigured.length === CORE_AI_CRAWLERS.length) {
|
|
35
|
+
findings.push({ status: 'pass', message: `All ${CORE_AI_CRAWLERS.length} core AI crawlers explicitly configured` });
|
|
36
|
+
} else if (coreConfigured.length > 0) {
|
|
37
|
+
findings.push({ status: 'warn', message: `${coreConfigured.length}/${CORE_AI_CRAWLERS.length} core AI crawlers configured`, detail: `Missing: ${coreMissing.join(', ')}` });
|
|
38
|
+
score -= Math.round((coreMissing.length / CORE_AI_CRAWLERS.length) * 30);
|
|
39
|
+
} else {
|
|
40
|
+
findings.push({ status: 'fail', message: 'No core AI crawlers explicitly configured', detail: `Expected: ${CORE_AI_CRAWLERS.join(', ')}` });
|
|
41
|
+
score -= 40;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Blocked AI crawlers
|
|
45
|
+
const blockedBots = configuredBots.filter(b =>
|
|
46
|
+
ALL_AI_CRAWLERS.some(ai => ai.toLowerCase() === b.name.toLowerCase()) && b.disallowed
|
|
47
|
+
);
|
|
48
|
+
if (blockedBots.length > 0) {
|
|
49
|
+
findings.push({ status: 'warn', message: `${blockedBots.length} AI crawler(s) explicitly blocked`, detail: blockedBots.map(b => b.name).join(', ') });
|
|
50
|
+
score -= blockedBots.length * 3;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Sitemap directive
|
|
54
|
+
if (/^Sitemap:/mi.test(text)) {
|
|
55
|
+
findings.push({ status: 'pass', message: 'Sitemap directive present' });
|
|
56
|
+
} else {
|
|
57
|
+
findings.push({ status: 'warn', message: 'No Sitemap directive found' });
|
|
58
|
+
score -= 5;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Total AI crawler coverage
|
|
62
|
+
const totalConfigured = ALL_AI_CRAWLERS.filter(bot =>
|
|
63
|
+
configuredBots.some(b => b.name.toLowerCase() === bot.toLowerCase())
|
|
64
|
+
);
|
|
65
|
+
findings.push({
|
|
66
|
+
status: totalConfigured.length >= 10 ? 'pass' : 'warn',
|
|
67
|
+
message: `${totalConfigured.length}/${ALL_AI_CRAWLERS.length} known AI crawlers have explicit rules`,
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
return result(Math.max(0, Math.min(100, score)), findings, start);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function parseUserAgents(text) {
|
|
74
|
+
const blocks = [];
|
|
75
|
+
let current = null;
|
|
76
|
+
for (const line of text.split('\n')) {
|
|
77
|
+
const trimmed = line.trim();
|
|
78
|
+
const uaMatch = trimmed.match(/^User-agent:\s*(.+)/i);
|
|
79
|
+
if (uaMatch) {
|
|
80
|
+
current = { name: uaMatch[1].trim(), disallowed: false };
|
|
81
|
+
blocks.push(current);
|
|
82
|
+
} else if (current && /^Disallow:\s*\/\s*$/i.test(trimmed)) {
|
|
83
|
+
current.disallowed = true;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
return blocks;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function result(score, findings, start) {
|
|
90
|
+
return { id: meta.id, name: meta.name, description: meta.description, score, findings, duration: Math.round(performance.now() - start) };
|
|
91
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { SECURITY_TXT_REQUIRED_FIELDS } from '../constants.js';
|
|
2
|
+
|
|
3
|
+
export const meta = {
|
|
4
|
+
id: 'security-txt',
|
|
5
|
+
name: 'Security.txt',
|
|
6
|
+
description: 'Checks /.well-known/security.txt RFC 9116 compliance',
|
|
7
|
+
weight: 10,
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
export default async function check(ctx) {
|
|
11
|
+
const start = performance.now();
|
|
12
|
+
const findings = [];
|
|
13
|
+
let score = 100;
|
|
14
|
+
|
|
15
|
+
const res = await ctx.fetch(`${ctx.url}/.well-known/security.txt`);
|
|
16
|
+
|
|
17
|
+
if (!res.ok) {
|
|
18
|
+
findings.push({ status: 'fail', message: '/.well-known/security.txt not found', detail: `HTTP ${res.status || 'network error'}` });
|
|
19
|
+
return result(0, findings, start);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
findings.push({ status: 'pass', message: '/.well-known/security.txt exists' });
|
|
23
|
+
const text = res.body;
|
|
24
|
+
|
|
25
|
+
// Required fields
|
|
26
|
+
for (const field of SECURITY_TXT_REQUIRED_FIELDS) {
|
|
27
|
+
const regex = new RegExp(`^${field}:`, 'mi');
|
|
28
|
+
if (regex.test(text)) {
|
|
29
|
+
findings.push({ status: 'pass', message: `Required field "${field}" present` });
|
|
30
|
+
} else {
|
|
31
|
+
findings.push({ status: 'fail', message: `Required field "${field}" missing (RFC 9116)` });
|
|
32
|
+
score -= 25;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Expires not in the past
|
|
37
|
+
const expiresMatch = text.match(/^Expires:\s*(.+)/mi);
|
|
38
|
+
if (expiresMatch) {
|
|
39
|
+
const expiresDate = new Date(expiresMatch[1].trim());
|
|
40
|
+
if (!isNaN(expiresDate.getTime())) {
|
|
41
|
+
if (expiresDate > new Date()) {
|
|
42
|
+
findings.push({ status: 'pass', message: `Expires date is in the future (${expiresDate.toISOString().split('T')[0]})` });
|
|
43
|
+
} else {
|
|
44
|
+
findings.push({ status: 'fail', message: 'Expires date is in the past — security.txt is expired' });
|
|
45
|
+
score -= 20;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Optional fields
|
|
51
|
+
const optionalFields = ['Canonical', 'Preferred-Languages', 'Policy', 'Encryption', 'Hiring'];
|
|
52
|
+
const present = optionalFields.filter(f => new RegExp(`^${f}:`, 'mi').test(text));
|
|
53
|
+
if (present.length >= 3) {
|
|
54
|
+
findings.push({ status: 'pass', message: `${present.length}/${optionalFields.length} optional fields present` });
|
|
55
|
+
} else if (present.length > 0) {
|
|
56
|
+
findings.push({ status: 'pass', message: `${present.length}/${optionalFields.length} optional fields present` });
|
|
57
|
+
} else {
|
|
58
|
+
findings.push({ status: 'warn', message: 'No optional fields (Canonical, Preferred-Languages, Policy, etc.)' });
|
|
59
|
+
score -= 5;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return result(Math.max(0, score), findings, start);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function result(score, findings, start) {
|
|
66
|
+
return { id: meta.id, name: meta.name, description: meta.description, score, findings, duration: Math.round(performance.now() - start) };
|
|
67
|
+
}
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
export const meta = {
|
|
2
|
+
id: 'structured-data',
|
|
3
|
+
name: 'Structured Data',
|
|
4
|
+
description: 'Checks JSON-LD structured data on homepage',
|
|
5
|
+
weight: 15,
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
export default async function check(ctx) {
|
|
9
|
+
const start = performance.now();
|
|
10
|
+
const findings = [];
|
|
11
|
+
let score = 100;
|
|
12
|
+
|
|
13
|
+
const html = ctx.html;
|
|
14
|
+
if (!html) {
|
|
15
|
+
findings.push({ status: 'fail', message: 'Could not fetch homepage HTML' });
|
|
16
|
+
return result(0, findings, start);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// Extract JSON-LD blocks
|
|
20
|
+
const jsonLdPattern = /<script\s+type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi;
|
|
21
|
+
const blocks = [...html.matchAll(jsonLdPattern)];
|
|
22
|
+
|
|
23
|
+
if (blocks.length === 0) {
|
|
24
|
+
findings.push({ status: 'fail', message: 'No JSON-LD structured data found' });
|
|
25
|
+
return result(0, findings, start);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
findings.push({ status: 'pass', message: `${blocks.length} JSON-LD block(s) found` });
|
|
29
|
+
|
|
30
|
+
// Parse all blocks (unescape HTML entities first — common in SSR frameworks)
|
|
31
|
+
const parsed = [];
|
|
32
|
+
for (const block of blocks) {
|
|
33
|
+
const raw = unescapeHtml(block[1]);
|
|
34
|
+
try {
|
|
35
|
+
parsed.push(JSON.parse(raw));
|
|
36
|
+
} catch {
|
|
37
|
+
findings.push({ status: 'warn', message: 'Invalid JSON in a JSON-LD block' });
|
|
38
|
+
score -= 10;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (parsed.length === 0) {
|
|
43
|
+
findings.push({ status: 'fail', message: 'All JSON-LD blocks have invalid JSON' });
|
|
44
|
+
return result(10, findings, start);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Check for @context schema.org
|
|
48
|
+
const hasContext = parsed.some(d => {
|
|
49
|
+
const ctx = d['@context'];
|
|
50
|
+
return ctx && (ctx === 'https://schema.org' || ctx === 'https://schema.org/' || ctx === 'http://schema.org');
|
|
51
|
+
});
|
|
52
|
+
if (hasContext) {
|
|
53
|
+
findings.push({ status: 'pass', message: '@context references schema.org' });
|
|
54
|
+
} else {
|
|
55
|
+
findings.push({ status: 'warn', message: 'No @context referencing schema.org' });
|
|
56
|
+
score -= 15;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Check for @graph (multi-entity pattern)
|
|
60
|
+
const hasGraph = parsed.some(d => Array.isArray(d['@graph']));
|
|
61
|
+
if (hasGraph) {
|
|
62
|
+
findings.push({ status: 'pass', message: '@graph array present (multi-entity structured data)' });
|
|
63
|
+
} else {
|
|
64
|
+
findings.push({ status: 'warn', message: 'No @graph array (single-entity only)' });
|
|
65
|
+
score -= 5;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Collect all types
|
|
69
|
+
const allTypes = new Set();
|
|
70
|
+
for (const d of parsed) {
|
|
71
|
+
collectTypes(d, allTypes);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Key types for AI agents
|
|
75
|
+
const importantTypes = ['Person', 'Organization', 'WebSite', 'WebPage', 'ProfilePage'];
|
|
76
|
+
const foundTypes = importantTypes.filter(t => allTypes.has(t));
|
|
77
|
+
|
|
78
|
+
if (foundTypes.length >= 2) {
|
|
79
|
+
findings.push({ status: 'pass', message: `Key types found: ${foundTypes.join(', ')}` });
|
|
80
|
+
} else if (foundTypes.length === 1) {
|
|
81
|
+
findings.push({ status: 'warn', message: `Only 1 key type found: ${foundTypes[0]}`, detail: `Consider adding: ${importantTypes.filter(t => !allTypes.has(t)).join(', ')}` });
|
|
82
|
+
score -= 10;
|
|
83
|
+
} else {
|
|
84
|
+
findings.push({ status: 'warn', message: 'No key entity types (Person, Organization, WebSite, etc.)' });
|
|
85
|
+
score -= 15;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// BreadcrumbList
|
|
89
|
+
if (allTypes.has('BreadcrumbList')) {
|
|
90
|
+
findings.push({ status: 'pass', message: 'BreadcrumbList present' });
|
|
91
|
+
} else {
|
|
92
|
+
findings.push({ status: 'warn', message: 'No BreadcrumbList found' });
|
|
93
|
+
score -= 5;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
return result(Math.max(0, score), findings, start);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
function unescapeHtml(str) {
|
|
100
|
+
return str
|
|
101
|
+
.replace(/&/g, '&')
|
|
102
|
+
.replace(/</g, '<')
|
|
103
|
+
.replace(/>/g, '>')
|
|
104
|
+
.replace(/"/g, '"')
|
|
105
|
+
.replace(/'/g, "'")
|
|
106
|
+
.replace(/'/g, "'")
|
|
107
|
+
.replace(///g, '/');
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function collectTypes(obj, types) {
|
|
111
|
+
if (!obj || typeof obj !== 'object') return;
|
|
112
|
+
if (obj['@type']) {
|
|
113
|
+
const t = Array.isArray(obj['@type']) ? obj['@type'] : [obj['@type']];
|
|
114
|
+
t.forEach(type => types.add(type));
|
|
115
|
+
}
|
|
116
|
+
if (Array.isArray(obj['@graph'])) {
|
|
117
|
+
obj['@graph'].forEach(item => collectTypes(item, types));
|
|
118
|
+
}
|
|
119
|
+
if (Array.isArray(obj)) {
|
|
120
|
+
obj.forEach(item => collectTypes(item, types));
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function result(score, findings, start) {
|
|
125
|
+
return { id: meta.id, name: meta.name, description: meta.description, score, findings, duration: Math.round(performance.now() - start) };
|
|
126
|
+
}
|
package/lib/cli.js
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { Command } from 'commander';
|
|
2
|
+
import { audit } from './orchestrator.js';
|
|
3
|
+
import { report } from './reporter/index.js';
|
|
4
|
+
import { VERSION } from './constants.js';
|
|
5
|
+
|
|
6
|
+
export function cli(argv) {
|
|
7
|
+
const program = new Command();
|
|
8
|
+
|
|
9
|
+
program
|
|
10
|
+
.name('ax-audit')
|
|
11
|
+
.description('Audit websites for AI Agent Experience (AX) readiness. Lighthouse for AI Agents.')
|
|
12
|
+
.version(VERSION, '-v, --version')
|
|
13
|
+
.argument('<url>', 'URL to audit (e.g., https://example.com)')
|
|
14
|
+
.option('--json', 'Output results as JSON')
|
|
15
|
+
.option('--output <format>', 'Output format: terminal, json', 'terminal')
|
|
16
|
+
.option('--checks <list>', 'Comma-separated list of checks to run')
|
|
17
|
+
.option('--timeout <ms>', 'Per-request timeout in milliseconds', '10000')
|
|
18
|
+
.action(async (url, options) => {
|
|
19
|
+
// Validate URL
|
|
20
|
+
try {
|
|
21
|
+
new URL(url);
|
|
22
|
+
} catch {
|
|
23
|
+
console.error(`Error: Invalid URL "${url}". Provide a full URL like https://example.com`);
|
|
24
|
+
process.exit(1);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const format = options.json ? 'json' : options.output;
|
|
28
|
+
const checks = options.checks
|
|
29
|
+
? options.checks.split(',').map(s => s.trim())
|
|
30
|
+
: undefined;
|
|
31
|
+
|
|
32
|
+
try {
|
|
33
|
+
const result = await audit({
|
|
34
|
+
url,
|
|
35
|
+
checks,
|
|
36
|
+
timeout: parseInt(options.timeout, 10),
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
report(result, format);
|
|
40
|
+
|
|
41
|
+
// Exit code: 0 for Good+, 1 for Fair/Poor
|
|
42
|
+
process.exit(result.overallScore >= 70 ? 0 : 1);
|
|
43
|
+
} catch (err) {
|
|
44
|
+
console.error(`Fatal: ${err.message}`);
|
|
45
|
+
process.exit(2);
|
|
46
|
+
}
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
program.parse(argv);
|
|
50
|
+
}
|
package/lib/constants.js
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { readFileSync } from 'node:fs';
|
|
2
|
+
import { fileURLToPath } from 'node:url';
|
|
3
|
+
import { dirname, join } from 'node:path';
|
|
4
|
+
|
|
5
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
6
|
+
const pkg = JSON.parse(readFileSync(join(__dirname, '..', 'package.json'), 'utf-8'));
|
|
7
|
+
|
|
8
|
+
export const VERSION = pkg.version;
|
|
9
|
+
export const USER_AGENT = `ax-audit/${pkg.version} (https://github.com/lucioduran/ax-audit)`;
|
|
10
|
+
|
|
11
|
+
// AI crawlers categorized by function
|
|
12
|
+
export const AI_CRAWLERS = {
|
|
13
|
+
training: [
|
|
14
|
+
'GPTBot', 'ClaudeBot', 'Claude-Web', 'Anthropic-AI',
|
|
15
|
+
'Google-Extended', 'CCBot', 'Bytespider',
|
|
16
|
+
'Meta-ExternalAgent', 'Meta-ExternalFetcher',
|
|
17
|
+
'Cohere-AI', 'cohere-training-data-crawler',
|
|
18
|
+
'Applebot-Extended', 'Amazonbot', 'AI2Bot', 'AI2Bot-Dolma',
|
|
19
|
+
'DeepSeek-AI', 'PanguBot', 'Diffbot',
|
|
20
|
+
],
|
|
21
|
+
search: [
|
|
22
|
+
'OAI-SearchBot', 'ChatGPT-User', 'Claude-SearchBot', 'Claude-User',
|
|
23
|
+
'PerplexityBot', 'Perplexity-User', 'DuckAssistBot', 'YouBot',
|
|
24
|
+
'Petalbot', 'Google-CloudVertexBot', 'Gemini',
|
|
25
|
+
],
|
|
26
|
+
fetching: [
|
|
27
|
+
'FirecrawlAgent', 'Facebookbot',
|
|
28
|
+
],
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
export const ALL_AI_CRAWLERS = [
|
|
32
|
+
...AI_CRAWLERS.training,
|
|
33
|
+
...AI_CRAWLERS.search,
|
|
34
|
+
...AI_CRAWLERS.fetching,
|
|
35
|
+
];
|
|
36
|
+
|
|
37
|
+
// The most important AI crawlers to explicitly configure
|
|
38
|
+
export const CORE_AI_CRAWLERS = [
|
|
39
|
+
'GPTBot', 'ClaudeBot', 'ChatGPT-User', 'Claude-SearchBot',
|
|
40
|
+
'Google-Extended', 'PerplexityBot',
|
|
41
|
+
];
|
|
42
|
+
|
|
43
|
+
// Scoring weights (must sum to 100)
|
|
44
|
+
export const CHECK_WEIGHTS = {
|
|
45
|
+
'llms-txt': 15,
|
|
46
|
+
'robots-txt': 15,
|
|
47
|
+
'structured-data': 15,
|
|
48
|
+
'http-headers': 15,
|
|
49
|
+
'agent-json': 10,
|
|
50
|
+
'security-txt': 10,
|
|
51
|
+
'meta-tags': 10,
|
|
52
|
+
'openapi': 10,
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
// Grade thresholds
|
|
56
|
+
export const GRADES = [
|
|
57
|
+
{ min: 90, label: 'Excellent', color: 'green' },
|
|
58
|
+
{ min: 70, label: 'Good', color: 'yellow' },
|
|
59
|
+
{ min: 50, label: 'Fair', color: 'orange' },
|
|
60
|
+
{ min: 0, label: 'Poor', color: 'red' },
|
|
61
|
+
];
|
|
62
|
+
|
|
63
|
+
// A2A Agent Card required fields
|
|
64
|
+
export const AGENT_JSON_REQUIRED_FIELDS = ['name', 'description', 'url', 'skills'];
|
|
65
|
+
|
|
66
|
+
// RFC 9116 required fields
|
|
67
|
+
export const SECURITY_TXT_REQUIRED_FIELDS = ['Contact', 'Expires'];
|
|
68
|
+
|
|
69
|
+
// Security headers to check
|
|
70
|
+
export const SECURITY_HEADERS = [
|
|
71
|
+
{ name: 'strict-transport-security', label: 'Strict-Transport-Security', critical: true },
|
|
72
|
+
{ name: 'x-content-type-options', label: 'X-Content-Type-Options', critical: true },
|
|
73
|
+
{ name: 'x-frame-options', label: 'X-Frame-Options', critical: false },
|
|
74
|
+
{ name: 'x-xss-protection', label: 'X-XSS-Protection', critical: false },
|
|
75
|
+
{ name: 'referrer-policy', label: 'Referrer-Policy', critical: false },
|
|
76
|
+
{ name: 'permissions-policy', label: 'Permissions-Policy', critical: false },
|
|
77
|
+
{ name: 'content-security-policy', label: 'Content-Security-Policy', critical: false },
|
|
78
|
+
];
|
package/lib/fetcher.js
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { USER_AGENT } from './constants.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Creates a fetch wrapper with in-memory caching, timeout, and custom user-agent.
|
|
5
|
+
* @param {{ timeout?: number }} options
|
|
6
|
+
*/
|
|
7
|
+
export function createFetcher({ timeout = 10000 } = {}) {
|
|
8
|
+
const cache = new Map();
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Fetch a URL with caching and timeout.
|
|
12
|
+
* Returns a normalized response object (never throws).
|
|
13
|
+
*/
|
|
14
|
+
async function fetchUrl(url) {
|
|
15
|
+
if (cache.has(url)) return cache.get(url);
|
|
16
|
+
|
|
17
|
+
const controller = new AbortController();
|
|
18
|
+
const timer = setTimeout(() => controller.abort(), timeout);
|
|
19
|
+
|
|
20
|
+
try {
|
|
21
|
+
const response = await fetch(url, {
|
|
22
|
+
signal: controller.signal,
|
|
23
|
+
headers: {
|
|
24
|
+
'User-Agent': USER_AGENT,
|
|
25
|
+
'Accept': 'text/html, application/json, text/plain, */*',
|
|
26
|
+
},
|
|
27
|
+
redirect: 'follow',
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
const body = await response.text();
|
|
31
|
+
|
|
32
|
+
const headers = {};
|
|
33
|
+
response.headers.forEach((value, key) => {
|
|
34
|
+
headers[key.toLowerCase()] = value;
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
const result = {
|
|
38
|
+
status: response.status,
|
|
39
|
+
headers,
|
|
40
|
+
body,
|
|
41
|
+
ok: response.ok,
|
|
42
|
+
url: response.url,
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
cache.set(url, result);
|
|
46
|
+
return result;
|
|
47
|
+
} catch (err) {
|
|
48
|
+
const result = {
|
|
49
|
+
status: 0,
|
|
50
|
+
headers: {},
|
|
51
|
+
body: '',
|
|
52
|
+
ok: false,
|
|
53
|
+
url,
|
|
54
|
+
error: err.name === 'AbortError' ? 'Request timed out' : err.message,
|
|
55
|
+
};
|
|
56
|
+
cache.set(url, result);
|
|
57
|
+
return result;
|
|
58
|
+
} finally {
|
|
59
|
+
clearTimeout(timer);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
return { fetch: fetchUrl, fetchPage: fetchUrl };
|
|
64
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { createFetcher } from './fetcher.js';
|
|
2
|
+
import { checks as allChecks } from './checks/index.js';
|
|
3
|
+
import { calculateOverallScore, getGrade } from './scorer.js';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Run a full AX audit on a URL.
|
|
7
|
+
* @param {{ url: string, checks?: string[], timeout?: number }} options
|
|
8
|
+
*/
|
|
9
|
+
export async function audit(options) {
|
|
10
|
+
const startTime = performance.now();
|
|
11
|
+
const fetcher = createFetcher({ timeout: options.timeout || 10000 });
|
|
12
|
+
|
|
13
|
+
// Pre-fetch homepage (shared across checks that need HTML/headers)
|
|
14
|
+
const homepage = await fetcher.fetchPage(options.url);
|
|
15
|
+
|
|
16
|
+
const ctx = {
|
|
17
|
+
url: options.url.replace(/\/$/, ''),
|
|
18
|
+
fetch: fetcher.fetch,
|
|
19
|
+
html: homepage.body,
|
|
20
|
+
headers: homepage.headers,
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
// Filter checks if --checks flag was used
|
|
24
|
+
const checksToRun = options.checks
|
|
25
|
+
? allChecks.filter(c => options.checks.includes(c.meta.id))
|
|
26
|
+
: allChecks;
|
|
27
|
+
|
|
28
|
+
// Run all checks in parallel (Promise.allSettled for resilience)
|
|
29
|
+
const settled = await Promise.allSettled(
|
|
30
|
+
checksToRun.map(c => c.run(ctx))
|
|
31
|
+
);
|
|
32
|
+
|
|
33
|
+
// Collect results, handle crashed checks gracefully
|
|
34
|
+
const results = settled.map((s, i) => {
|
|
35
|
+
if (s.status === 'fulfilled') return s.value;
|
|
36
|
+
return {
|
|
37
|
+
id: checksToRun[i].meta.id,
|
|
38
|
+
name: checksToRun[i].meta.name,
|
|
39
|
+
description: checksToRun[i].meta.description,
|
|
40
|
+
score: 0,
|
|
41
|
+
findings: [{ status: 'fail', message: `Check crashed: ${s.reason?.message || 'Unknown error'}` }],
|
|
42
|
+
duration: 0,
|
|
43
|
+
};
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
const overallScore = calculateOverallScore(results, checksToRun.map(c => c.meta));
|
|
47
|
+
const grade = getGrade(overallScore);
|
|
48
|
+
|
|
49
|
+
return {
|
|
50
|
+
url: options.url,
|
|
51
|
+
timestamp: new Date().toISOString(),
|
|
52
|
+
overallScore,
|
|
53
|
+
grade,
|
|
54
|
+
results,
|
|
55
|
+
duration: Math.round(performance.now() - startTime),
|
|
56
|
+
};
|
|
57
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { reportTerminal } from './terminal.js';
|
|
2
|
+
import { reportJson } from './json.js';
|
|
3
|
+
|
|
4
|
+
export function report(auditReport, format) {
|
|
5
|
+
switch (format) {
|
|
6
|
+
case 'json':
|
|
7
|
+
reportJson(auditReport);
|
|
8
|
+
break;
|
|
9
|
+
case 'terminal':
|
|
10
|
+
default:
|
|
11
|
+
reportTerminal(auditReport);
|
|
12
|
+
break;
|
|
13
|
+
}
|
|
14
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import chalk from 'chalk';
|
|
2
|
+
import { GRADES } from '../constants.js';
|
|
3
|
+
|
|
4
|
+
const STATUS_ICONS = {
|
|
5
|
+
pass: chalk.green(' PASS '),
|
|
6
|
+
warn: chalk.yellow(' WARN '),
|
|
7
|
+
fail: chalk.red(' FAIL '),
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
function gradeColor(grade) {
|
|
11
|
+
if (grade.label === 'Excellent') return chalk.green;
|
|
12
|
+
if (grade.label === 'Good') return chalk.yellow;
|
|
13
|
+
if (grade.label === 'Fair') return chalk.hex('#FFA500');
|
|
14
|
+
return chalk.red;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function reportTerminal(report) {
|
|
18
|
+
const grade = GRADES.find(g => report.overallScore >= g.min) || GRADES[GRADES.length - 1];
|
|
19
|
+
const colorFn = gradeColor(grade);
|
|
20
|
+
|
|
21
|
+
console.log();
|
|
22
|
+
console.log(chalk.bold(' AX Audit Report'));
|
|
23
|
+
console.log(chalk.dim(` ${report.url}`));
|
|
24
|
+
console.log(chalk.dim(` ${report.timestamp} (${report.duration}ms)`));
|
|
25
|
+
console.log();
|
|
26
|
+
|
|
27
|
+
// Score bar
|
|
28
|
+
const barWidth = 40;
|
|
29
|
+
const filled = Math.round((report.overallScore / 100) * barWidth);
|
|
30
|
+
const empty = barWidth - filled;
|
|
31
|
+
console.log(` ${colorFn('\u2588'.repeat(filled))}${chalk.gray('\u2591'.repeat(empty))} ${colorFn.bold(report.overallScore + '/100')} ${colorFn(grade.label)}`);
|
|
32
|
+
console.log();
|
|
33
|
+
|
|
34
|
+
// Individual checks
|
|
35
|
+
for (const check of report.results) {
|
|
36
|
+
console.log(chalk.bold(` ${check.name}`) + chalk.dim(` (${check.score}/100)`));
|
|
37
|
+
|
|
38
|
+
for (const finding of check.findings) {
|
|
39
|
+
console.log(`${STATUS_ICONS[finding.status]} ${finding.message}`);
|
|
40
|
+
if (finding.detail) {
|
|
41
|
+
console.log(chalk.dim(` ${finding.detail}`));
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
console.log();
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
console.log(chalk.dim(' Powered by ax-audit \u2014 Lighthouse for AI Agents'));
|
|
48
|
+
console.log();
|
|
49
|
+
}
|
package/lib/scorer.js
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { CHECK_WEIGHTS, GRADES } from './constants.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Calculate the weighted overall score from individual check results.
|
|
5
|
+
* Re-normalizes if only a subset of checks was run.
|
|
6
|
+
*/
|
|
7
|
+
export function calculateOverallScore(results, metas) {
|
|
8
|
+
const weightMap = {};
|
|
9
|
+
let totalWeight = 0;
|
|
10
|
+
|
|
11
|
+
for (const m of metas) {
|
|
12
|
+
weightMap[m.id] = m.weight ?? CHECK_WEIGHTS[m.id] ?? 10;
|
|
13
|
+
totalWeight += weightMap[m.id];
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
let weightedSum = 0;
|
|
17
|
+
for (const r of results) {
|
|
18
|
+
const weight = weightMap[r.id] || 0;
|
|
19
|
+
weightedSum += (r.score / 100) * weight;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const overall = Math.round((weightedSum / totalWeight) * 100);
|
|
23
|
+
return Math.max(0, Math.min(100, overall));
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Map a numeric score to a grade.
|
|
28
|
+
*/
|
|
29
|
+
export function getGrade(score) {
|
|
30
|
+
for (const grade of GRADES) {
|
|
31
|
+
if (score >= grade.min) return grade;
|
|
32
|
+
}
|
|
33
|
+
return GRADES[GRADES.length - 1];
|
|
34
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "ax-audit",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Audit websites for AI Agent Experience (AX) readiness. Lighthouse for AI Agents.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"author": "Lucio Duran <email@lucioduran.com> (https://lucioduran.com)",
|
|
8
|
+
"repository": {
|
|
9
|
+
"type": "git",
|
|
10
|
+
"url": "https://github.com/lucioduran/ax-audit.git"
|
|
11
|
+
},
|
|
12
|
+
"homepage": "https://github.com/lucioduran/ax-audit#readme",
|
|
13
|
+
"bugs": {
|
|
14
|
+
"url": "https://github.com/lucioduran/ax-audit/issues"
|
|
15
|
+
},
|
|
16
|
+
"keywords": [
|
|
17
|
+
"ax",
|
|
18
|
+
"ai",
|
|
19
|
+
"agent-experience",
|
|
20
|
+
"audit",
|
|
21
|
+
"lighthouse",
|
|
22
|
+
"llms-txt",
|
|
23
|
+
"a2a",
|
|
24
|
+
"seo",
|
|
25
|
+
"cli",
|
|
26
|
+
"devtools"
|
|
27
|
+
],
|
|
28
|
+
"bin": {
|
|
29
|
+
"ax-audit": "./bin/ax-audit.js"
|
|
30
|
+
},
|
|
31
|
+
"exports": {
|
|
32
|
+
".": "./lib/orchestrator.js"
|
|
33
|
+
},
|
|
34
|
+
"files": [
|
|
35
|
+
"bin/",
|
|
36
|
+
"lib/",
|
|
37
|
+
"LICENSE",
|
|
38
|
+
"README.md"
|
|
39
|
+
],
|
|
40
|
+
"engines": {
|
|
41
|
+
"node": ">=18.0.0"
|
|
42
|
+
},
|
|
43
|
+
"scripts": {
|
|
44
|
+
"start": "node bin/ax-audit.js",
|
|
45
|
+
"test": "node --test test/**/*.test.js",
|
|
46
|
+
"prepublishOnly": "npm test"
|
|
47
|
+
},
|
|
48
|
+
"dependencies": {
|
|
49
|
+
"chalk": "^5.4.1",
|
|
50
|
+
"commander": "^13.1.0"
|
|
51
|
+
}
|
|
52
|
+
}
|