@agentimization/core 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -155
- package/dist/index.js +392 -27
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,43 +1,6 @@
|
|
|
1
|
-
|
|
2
|
-
<img src="https://img.shields.io/npm/v/agentimization?style=flat-square&color=blue" alt="npm version" />
|
|
3
|
-
<img src="https://img.shields.io/badge/license-MIT-green?style=flat-square" alt="license" />
|
|
4
|
-
<img src="https://img.shields.io/badge/checks-35-purple?style=flat-square" alt="checks" />
|
|
5
|
-
</p>
|
|
1
|
+
# agentimization
|
|
6
2
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
<p align="center">
|
|
10
|
-
GEO audit for agent-ready websites.<br/>
|
|
11
|
-
One command to check if AI agents can discover, parse, and cite your content.
|
|
12
|
-
</p>
|
|
13
|
-
|
|
14
|
-
---
|
|
15
|
-
|
|
16
|
-
## Why
|
|
17
|
-
|
|
18
|
-
AI agents (Claude, ChatGPT, Perplexity, Gemini) are becoming a major source of traffic and citations. But most websites are invisible to them — no `llms.txt`, no markdown endpoints, no structured data, client-rendered content that crawlers can't read.
|
|
19
|
-
|
|
20
|
-
**Agentimization** runs checks across 8 categories and gives you a GEO score from 0–100, with specific fixes you can hand off to an AI coding agent.
|
|
21
|
-
|
|
22
|
-
## Install
|
|
23
|
-
|
|
24
|
-
```bash
|
|
25
|
-
npx agentimization https://your-site.com
|
|
26
|
-
```
|
|
27
|
-
|
|
28
|
-
Or install globally:
|
|
29
|
-
|
|
30
|
-
```bash
|
|
31
|
-
npm install -g agentimization
|
|
32
|
-
```
|
|
33
|
-
|
|
34
|
-
## Usage
|
|
35
|
-
|
|
36
|
-
### Audit a live site
|
|
37
|
-
|
|
38
|
-
```bash
|
|
39
|
-
agentimization https://docs.anthropic.com
|
|
40
|
-
```
|
|
3
|
+
[](https://www.npmjs.com/package/agentimization)
|
|
41
4
|
|
|
42
5
|
```text
|
|
43
6
|
╭───────────────────────────────────────────────╮
|
|
@@ -45,149 +8,70 @@ agentimization https://docs.anthropic.com
|
|
|
45
8
|
│ ░▓▒░▓░░▒▓▒░▓░░▒▓▓░▒░▓▒░░▓▒░▓░▒░░▓▒░░▓░▒ │
|
|
46
9
|
│ ▓░▒▓░░▒▓▒░░▓░▒▓▒░░▓░░▓▒░▓░▒░░▓▒░▓░░▒▓░ │
|
|
47
10
|
│ ░▒▓░▒░▓▒░░▓░▒▓░░▒▓▒░░▓░▒▓░░▒▓░ agentimization │
|
|
48
|
-
│ │
|
|
49
|
-
│ https://docs.anthropic.com │
|
|
50
|
-
│ │
|
|
51
|
-
│ Crawling the site, one sec… │
|
|
52
11
|
╰───────────────────────────────────────────────╯
|
|
53
12
|
```
|
|
54
13
|
|
|
55
|
-
|
|
14
|
+
geo audit for agent-ready websites and projects.
|
|
56
15
|
|
|
57
|
-
|
|
58
|
-
agentimization .
|
|
59
|
-
agentimization ./docs
|
|
60
|
-
```
|
|
16
|
+
geomaxx your site so ai agents can actually find, parse, and cite it.
|
|
61
17
|
|
|
62
|
-
|
|
18
|
+
## install
|
|
63
19
|
|
|
64
20
|
```bash
|
|
65
|
-
|
|
66
|
-
agentimization https://example.com --json
|
|
67
|
-
|
|
68
|
-
# Markdown report — paste into Claude, ChatGPT, etc.
|
|
69
|
-
agentimization https://example.com --md
|
|
70
|
-
|
|
71
|
-
# Filter by category
|
|
72
|
-
agentimization https://example.com --category content-discoverability
|
|
21
|
+
npx agentimization https://your-site.com
|
|
73
22
|
```
|
|
74
23
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
Agentimization shows an interactive menu when the audit finishes:
|
|
24
|
+
## usage
|
|
78
25
|
|
|
79
|
-
|
|
80
|
-
- **Save JSON report** — full audit data written to `agentimization-report.json`
|
|
81
|
-
- **Run another URL or path** — keep the session open and audit the next site
|
|
82
|
-
- **Exit**
|
|
26
|
+
audit a live site:
|
|
83
27
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
| Category | What it checks |
|
|
89
|
-
|---|---|
|
|
90
|
-
| **Content Discoverability** | `llms.txt` existence, structure, size, coverage, link resolution. Sitemap presence. `robots.txt` AI agent rules. |
|
|
91
|
-
| **Markdown Availability** | `.md` URL support, `Accept: text/markdown` content negotiation, HTML↔markdown parity. |
|
|
92
|
-
| **Content Structure** | Code fence validity, heading hierarchy, tabbed content serialization. |
|
|
93
|
-
| **Page Size & Rendering** | SSR vs CSR detection, HTML/markdown page size, content start position (boilerplate ratio). |
|
|
94
|
-
| **URL Stability** | HTTP status codes, redirect behavior, cache header hygiene. |
|
|
95
|
-
| **Authentication & Access** | Auth gate detection, alternative access paths for gated content. |
|
|
96
|
-
| **GEO Signals** | Structured data (JSON-LD), citation worthiness, topical authority, content freshness, E-E-A-T signals, FAQ schema, canonical URLs. |
|
|
97
|
-
| **Agent Protocols** | AGENTS.md, MCP server card, API catalog (RFC 9727), content signals (AI usage declarations), Link headers (RFC 8288), agent skills index. |
|
|
98
|
-
|
|
99
|
-
## Scoring
|
|
100
|
-
|
|
101
|
-
Each check returns **pass**, **warn**, **fail**, **skip**, or **info**. Checks are weighted by importance, and scores roll up into category scores and an overall grade:
|
|
102
|
-
|
|
103
|
-
| Grade | Score |
|
|
104
|
-
|---|---|
|
|
105
|
-
| A+ | 95–100 |
|
|
106
|
-
| A | 85–94 |
|
|
107
|
-
| B | 70–84 |
|
|
108
|
-
| C | 55–69 |
|
|
109
|
-
| D | 40–54 |
|
|
110
|
-
| F | 0–39 |
|
|
111
|
-
|
|
112
|
-
## Example scores
|
|
113
|
-
|
|
114
|
-
How popular sites score on Agentimization (approximate, scores change as sites update):
|
|
115
|
-
|
|
116
|
-
| Site | Grade | Score | Notes |
|
|
117
|
-
|---|---|---|---|
|
|
118
|
-
| `docs.anthropic.com` | **A** | 88 | Strong `llms.txt`, good markdown, structured data |
|
|
119
|
-
| `docs.stripe.com` | **A** | 91 | Excellent discoverability, markdown endpoints, great structure |
|
|
120
|
-
| `nextjs.org/docs` | **B** | 76 | Good SSR, missing `llms.txt`, decent GEO signals |
|
|
121
|
-
| `react.dev` | **B** | 72 | Good structure, no `llms.txt`, client-heavy rendering |
|
|
122
|
-
| `en.wikipedia.org` | **A** | 86 | Great content structure, strong citations, no `llms.txt` |
|
|
123
|
-
| `medium.com` | **D** | 45 | Auth gates, weak markdown, no `llms.txt` |
|
|
124
|
-
| `substack.com` | **C** | 58 | Mixed access, some content gated |
|
|
125
|
-
|
|
126
|
-
> These are illustrative examples. Run `agentimization <url>` to get real-time scores.
|
|
127
|
-
|
|
128
|
-
## Local mode
|
|
129
|
-
|
|
130
|
-
When you pass a directory path instead of a URL, Agentimization runs in **local mode**:
|
|
28
|
+
```bash
|
|
29
|
+
agentimization https://docs.your-site.com
|
|
30
|
+
```
|
|
131
31
|
|
|
132
|
-
|
|
133
|
-
- Skips network-only checks (content negotiation, auth detection, cache headers, etc.)
|
|
134
|
-
- Perfect as a **CI pre-deploy step** — catch GEO regressions before they ship
|
|
32
|
+
audit a local directory:
|
|
135
33
|
|
|
136
34
|
```bash
|
|
137
|
-
|
|
138
|
-
agentimization . --json
|
|
139
|
-
# Exit code 1 if score < 50
|
|
35
|
+
agentimization .
|
|
140
36
|
```
|
|
141
37
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
```typescript
|
|
145
|
-
import { audit, auditLocal } from "@agentimization/core"
|
|
38
|
+
pipe results to a tool or file:
|
|
146
39
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
40
|
+
```bash
|
|
41
|
+
agentimization https://your-site.com --json > report.json
|
|
42
|
+
agentimization https://your-site.com --md | pbcopy
|
|
43
|
+
```
|
|
150
44
|
|
|
151
|
-
|
|
152
|
-
const local = await auditLocal("./docs")
|
|
153
|
-
console.log(local.grade, local.overall_score)
|
|
45
|
+
## what it checks
|
|
154
46
|
|
|
155
|
-
|
|
156
|
-
const result = await audit("https://example.com", {
|
|
157
|
-
sampleSize: 20,
|
|
158
|
-
categories: ["content-discoverability", "geo-signals"],
|
|
159
|
-
onEvent: (event) => console.log(event),
|
|
160
|
-
})
|
|
161
|
-
```
|
|
47
|
+
36 checks across 8 categories. each one is a thing ai agents need to discover, parse, or cite your content.
|
|
162
48
|
|
|
163
|
-
|
|
49
|
+
- content discoverability: `llms.txt`, sitemap, robots
|
|
50
|
+
- markdown availability: `.md` urls, content negotiation
|
|
51
|
+
- content structure: headings, code fences, hidden tabs
|
|
52
|
+
- page size and rendering: ssr vs csr, boilerplate ratio
|
|
53
|
+
- url stability: status codes, redirects, canonicals
|
|
54
|
+
- authentication and access: gates, alternative paths
|
|
55
|
+
- geo signals: json-ld, citations, freshness, e-e-a-t
|
|
56
|
+
- agent protocols: mcp card, api catalog, agents.md, link headers
|
|
164
57
|
|
|
165
|
-
|
|
58
|
+
## how it works
|
|
166
59
|
|
|
167
|
-
|
|
168
|
-
2. **Parse** it efficiently (markdown availability, clean HTML, SSR)
|
|
169
|
-
3. **Cite** it accurately (structured data, canonical URLs, E-E-A-T signals)
|
|
60
|
+
it samples up to 10 pages of your site, runs 36 checks against the html, headers, and well-known files, then weights them into a 0 to 100 score. failed checks come with a suggestion you can paste into your ai coding agent.
|
|
170
61
|
|
|
171
|
-
|
|
62
|
+
## requirements
|
|
172
63
|
|
|
173
|
-
|
|
64
|
+
node 18 or newer.
|
|
174
65
|
|
|
175
|
-
|
|
176
|
-
git clone https://github.com/antlio/agentimization
|
|
177
|
-
cd agentimization
|
|
178
|
-
bun install
|
|
179
|
-
bun run build
|
|
180
|
-
bun run typecheck
|
|
181
|
-
```
|
|
66
|
+
## programmatic use
|
|
182
67
|
|
|
183
|
-
|
|
68
|
+
```typescript
|
|
69
|
+
import { audit } from "@agentimization/core"
|
|
184
70
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
packages/core — Audit engine + all 36 checks
|
|
188
|
-
apps/cli — CLI (Commander.js + Ink)
|
|
71
|
+
const result = await audit("https://your-site.com")
|
|
72
|
+
console.log(result.grade, result.overall_score)
|
|
189
73
|
```
|
|
190
74
|
|
|
191
|
-
##
|
|
75
|
+
## license
|
|
192
76
|
|
|
193
|
-
|
|
77
|
+
mit
|
package/dist/index.js
CHANGED
|
@@ -769,11 +769,11 @@ var extractMetaTags = (html) => {
|
|
|
769
769
|
const metaRegex = /<meta[^>]+(?:name|property)=["']([^"']+)["'][^>]+content=["']([^"']+)["']/gi;
|
|
770
770
|
let match;
|
|
771
771
|
while ((match = metaRegex.exec(html)) !== null) {
|
|
772
|
-
meta[match[1]] = match[2];
|
|
772
|
+
meta[match[1].toLowerCase()] = match[2];
|
|
773
773
|
}
|
|
774
774
|
const metaRegex2 = /<meta[^>]+content=["']([^"']+)["'][^>]+(?:name|property)=["']([^"']+)["']/gi;
|
|
775
775
|
while ((match = metaRegex2.exec(html)) !== null) {
|
|
776
|
-
meta[match[2]] = match[1];
|
|
776
|
+
meta[match[2].toLowerCase()] = match[1];
|
|
777
777
|
}
|
|
778
778
|
return meta;
|
|
779
779
|
};
|
|
@@ -789,6 +789,24 @@ var extractJsonLd = (html) => {
|
|
|
789
789
|
}
|
|
790
790
|
return results;
|
|
791
791
|
};
|
|
792
|
+
var readAttr = (attrs, name) => {
|
|
793
|
+
const re = new RegExp(`\\b${name}=(?:"([^"]*)"|'([^']*)')`, "i");
|
|
794
|
+
const m = attrs.match(re);
|
|
795
|
+
if (!m) return void 0;
|
|
796
|
+
return m[1] ?? m[2];
|
|
797
|
+
};
|
|
798
|
+
var extractImages = (html) => {
|
|
799
|
+
const images = [];
|
|
800
|
+
const imgRegex = /<img\b([^>]*)>/gi;
|
|
801
|
+
let match;
|
|
802
|
+
while ((match = imgRegex.exec(html)) !== null) {
|
|
803
|
+
const attrs = match[1];
|
|
804
|
+
const src = readAttr(attrs, "src");
|
|
805
|
+
if (src === void 0) continue;
|
|
806
|
+
images.push({ src, alt: readAttr(attrs, "alt") });
|
|
807
|
+
}
|
|
808
|
+
return images;
|
|
809
|
+
};
|
|
792
810
|
var extractHeadings = (html) => {
|
|
793
811
|
const headings = [];
|
|
794
812
|
const regex = /<h([1-6])[^>]*>([\s\S]*?)<\/h\1>/gi;
|
|
@@ -902,6 +920,53 @@ var renderingStrategy = {
|
|
|
902
920
|
};
|
|
903
921
|
}
|
|
904
922
|
};
|
|
923
|
+
var substantialTextContent = {
|
|
924
|
+
id: "substantial-text-content",
|
|
925
|
+
name: "Substantial Text Content",
|
|
926
|
+
category: "page-size",
|
|
927
|
+
description: "Checks for at least 100 words of readable body text",
|
|
928
|
+
weight: 0.8,
|
|
929
|
+
run: async (ctx) => {
|
|
930
|
+
const pages = ctx.sampledPages.slice(0, 10);
|
|
931
|
+
if (pages.length === 0) {
|
|
932
|
+
return {
|
|
933
|
+
id: "substantial-text-content",
|
|
934
|
+
name: "Substantial Text Content",
|
|
935
|
+
category: "page-size",
|
|
936
|
+
status: "skip",
|
|
937
|
+
message: "No pages sampled"
|
|
938
|
+
};
|
|
939
|
+
}
|
|
940
|
+
let withSubstantialContent = 0;
|
|
941
|
+
let totalWords = 0;
|
|
942
|
+
for (const page of pages) {
|
|
943
|
+
const text = stripHtml(page.html);
|
|
944
|
+
const words = text.split(/\s+/).filter((w) => w.length > 0).length;
|
|
945
|
+
totalWords += words;
|
|
946
|
+
if (words >= 100) withSubstantialContent++;
|
|
947
|
+
}
|
|
948
|
+
const avgWords = Math.round(totalWords / pages.length);
|
|
949
|
+
if (withSubstantialContent === pages.length) {
|
|
950
|
+
return {
|
|
951
|
+
id: "substantial-text-content",
|
|
952
|
+
name: "Substantial Text Content",
|
|
953
|
+
category: "page-size",
|
|
954
|
+
status: "pass",
|
|
955
|
+
message: `All ${pages.length} pages have \u2265100 words of body text (avg ${avgWords})`,
|
|
956
|
+
metadata: { withSubstantialContent, avgWords }
|
|
957
|
+
};
|
|
958
|
+
}
|
|
959
|
+
return {
|
|
960
|
+
id: "substantial-text-content",
|
|
961
|
+
name: "Substantial Text Content",
|
|
962
|
+
category: "page-size",
|
|
963
|
+
status: withSubstantialContent > 0 ? "warn" : "fail",
|
|
964
|
+
message: `${withSubstantialContent}/${pages.length} pages have \u2265100 words of body text (avg ${avgWords})`,
|
|
965
|
+
suggestion: "Generative engines can't cite pages that are mostly images or short copy. Add at least 100 words of substantive text content per page.",
|
|
966
|
+
metadata: { withSubstantialContent, avgWords }
|
|
967
|
+
};
|
|
968
|
+
}
|
|
969
|
+
};
|
|
905
970
|
var pageSizeHtml = {
|
|
906
971
|
id: "page-size-html",
|
|
907
972
|
name: "Page Size (HTML)",
|
|
@@ -993,7 +1058,7 @@ var contentStartPosition = {
|
|
|
993
1058
|
id: "content-start-position",
|
|
994
1059
|
name: "Content Start Position",
|
|
995
1060
|
category: "page-size",
|
|
996
|
-
description: "Checks
|
|
1061
|
+
description: "Checks how soon main content starts relative to total HTML",
|
|
997
1062
|
weight: 0.5,
|
|
998
1063
|
run: async (ctx) => {
|
|
999
1064
|
const pages = ctx.sampledPages.slice(0, 10);
|
|
@@ -1010,17 +1075,16 @@ var contentStartPosition = {
|
|
|
1010
1075
|
url: p.url,
|
|
1011
1076
|
position: findContentStartPosition(p.html)
|
|
1012
1077
|
}));
|
|
1013
|
-
const earlyStart = positions.filter((p) => p.position <= 0.1);
|
|
1014
1078
|
const medianPct = Math.round(
|
|
1015
1079
|
positions.map((p) => p.position).sort((a, b) => a - b)[Math.floor(positions.length / 2)] * 100
|
|
1016
1080
|
);
|
|
1017
|
-
if (
|
|
1081
|
+
if (medianPct <= 30) {
|
|
1018
1082
|
return {
|
|
1019
1083
|
id: "content-start-position",
|
|
1020
1084
|
name: "Content Start Position",
|
|
1021
1085
|
category: "page-size",
|
|
1022
1086
|
status: "pass",
|
|
1023
|
-
message: `
|
|
1087
|
+
message: `content starts at ${medianPct}% of html (median over ${pages.length} pages)`,
|
|
1024
1088
|
metadata: { medianPct }
|
|
1025
1089
|
};
|
|
1026
1090
|
}
|
|
@@ -1028,15 +1092,16 @@ var contentStartPosition = {
|
|
|
1028
1092
|
id: "content-start-position",
|
|
1029
1093
|
name: "Content Start Position",
|
|
1030
1094
|
category: "page-size",
|
|
1031
|
-
status: "warn",
|
|
1032
|
-
message: `
|
|
1033
|
-
suggestion: "
|
|
1034
|
-
metadata: { medianPct
|
|
1095
|
+
status: medianPct <= 50 ? "warn" : "fail",
|
|
1096
|
+
message: `content starts at ${medianPct}% of html (median over ${pages.length} pages)`,
|
|
1097
|
+
suggestion: "trim head metadata or move main content higher in the html so ai agents do not waste context tokens on boilerplate before reaching real content.",
|
|
1098
|
+
metadata: { medianPct }
|
|
1035
1099
|
};
|
|
1036
1100
|
}
|
|
1037
1101
|
};
|
|
1038
1102
|
var pageSizeChecks = [
|
|
1039
1103
|
renderingStrategy,
|
|
1104
|
+
substantialTextContent,
|
|
1040
1105
|
pageSizeHtml,
|
|
1041
1106
|
pageSizeMarkdown,
|
|
1042
1107
|
contentStartPosition
|
|
@@ -1210,13 +1275,105 @@ var tabbedContentSerialization = {
|
|
|
1210
1275
|
};
|
|
1211
1276
|
}
|
|
1212
1277
|
};
|
|
1278
|
+
var imageAltText = {
|
|
1279
|
+
id: "image-alt-text",
|
|
1280
|
+
name: "Image Alt Text Coverage",
|
|
1281
|
+
category: "content-structure",
|
|
1282
|
+
description: "Checks that at least 50% of images have descriptive alt text",
|
|
1283
|
+
weight: 0.5,
|
|
1284
|
+
run: async (ctx) => {
|
|
1285
|
+
const pages = ctx.sampledPages.slice(0, 10);
|
|
1286
|
+
if (pages.length === 0) {
|
|
1287
|
+
return {
|
|
1288
|
+
id: "image-alt-text",
|
|
1289
|
+
name: "Image Alt Text Coverage",
|
|
1290
|
+
category: "content-structure",
|
|
1291
|
+
status: "skip",
|
|
1292
|
+
message: "No pages sampled"
|
|
1293
|
+
};
|
|
1294
|
+
}
|
|
1295
|
+
const allImages = pages.flatMap((p) => extractImages(p.html));
|
|
1296
|
+
const contentImages = allImages.filter((img) => img.alt === void 0 || img.alt.trim().length > 0);
|
|
1297
|
+
const decorativeImages = allImages.length - contentImages.length;
|
|
1298
|
+
const withAlt = contentImages.filter((img) => img.alt !== void 0 && img.alt.trim().length > 0).length;
|
|
1299
|
+
if (allImages.length === 0) {
|
|
1300
|
+
return {
|
|
1301
|
+
id: "image-alt-text",
|
|
1302
|
+
name: "Image Alt Text Coverage",
|
|
1303
|
+
category: "content-structure",
|
|
1304
|
+
status: "info",
|
|
1305
|
+
message: `No images found across ${pages.length} sampled pages`
|
|
1306
|
+
};
|
|
1307
|
+
}
|
|
1308
|
+
if (contentImages.length === 0) {
|
|
1309
|
+
return {
|
|
1310
|
+
id: "image-alt-text",
|
|
1311
|
+
name: "Image Alt Text Coverage",
|
|
1312
|
+
category: "content-structure",
|
|
1313
|
+
status: "info",
|
|
1314
|
+
message: `All ${allImages.length} sampled images are decorative (alt="")`,
|
|
1315
|
+
metadata: { decorativeImages, totalImages: allImages.length }
|
|
1316
|
+
};
|
|
1317
|
+
}
|
|
1318
|
+
const ratio = withAlt / contentImages.length;
|
|
1319
|
+
const pct = Math.round(ratio * 100);
|
|
1320
|
+
const summary = `${withAlt}/${contentImages.length} content images have descriptive alt text (${pct}%)${decorativeImages > 0 ? `; ${decorativeImages} decorative skipped` : ""}`;
|
|
1321
|
+
if (ratio >= 0.5) {
|
|
1322
|
+
return {
|
|
1323
|
+
id: "image-alt-text",
|
|
1324
|
+
name: "Image Alt Text Coverage",
|
|
1325
|
+
category: "content-structure",
|
|
1326
|
+
status: "pass",
|
|
1327
|
+
message: summary,
|
|
1328
|
+
metadata: { withAlt, contentImages: contentImages.length, decorativeImages, pct }
|
|
1329
|
+
};
|
|
1330
|
+
}
|
|
1331
|
+
return {
|
|
1332
|
+
id: "image-alt-text",
|
|
1333
|
+
name: "Image Alt Text Coverage",
|
|
1334
|
+
category: "content-structure",
|
|
1335
|
+
status: ratio >= 0.25 ? "warn" : "fail",
|
|
1336
|
+
message: summary,
|
|
1337
|
+
suggestion: `Add descriptive alt text to at least 50% of content images. AI agents and screen readers rely on alt text to understand visual content. Mark purely decorative images with alt="" so they don't dilute the ratio.`,
|
|
1338
|
+
metadata: { withAlt, contentImages: contentImages.length, decorativeImages, pct }
|
|
1339
|
+
};
|
|
1340
|
+
}
|
|
1341
|
+
};
|
|
1213
1342
|
var contentStructureChecks = [
|
|
1214
1343
|
markdownCodeFenceValidity,
|
|
1215
1344
|
sectionHeaderQuality,
|
|
1216
|
-
tabbedContentSerialization
|
|
1345
|
+
tabbedContentSerialization,
|
|
1346
|
+
imageAltText
|
|
1217
1347
|
];
|
|
1218
1348
|
|
|
1219
1349
|
// src/checks/url-stability.ts
|
|
1350
|
+
var httpsEnabled = {
|
|
1351
|
+
id: "https-enabled",
|
|
1352
|
+
name: "HTTPS Enabled",
|
|
1353
|
+
category: "url-stability",
|
|
1354
|
+
description: "Checks if the site is served over HTTPS",
|
|
1355
|
+
weight: 0.7,
|
|
1356
|
+
requiresNetwork: true,
|
|
1357
|
+
run: async (ctx) => {
|
|
1358
|
+
if (ctx.baseUrl.protocol === "https:") {
|
|
1359
|
+
return {
|
|
1360
|
+
id: "https-enabled",
|
|
1361
|
+
name: "HTTPS Enabled",
|
|
1362
|
+
category: "url-stability",
|
|
1363
|
+
status: "pass",
|
|
1364
|
+
message: "Site is served over HTTPS"
|
|
1365
|
+
};
|
|
1366
|
+
}
|
|
1367
|
+
return {
|
|
1368
|
+
id: "https-enabled",
|
|
1369
|
+
name: "HTTPS Enabled",
|
|
1370
|
+
category: "url-stability",
|
|
1371
|
+
status: "fail",
|
|
1372
|
+
message: `Site is served over ${ctx.baseUrl.protocol.replace(":", "")} \u2014 AI crawlers de-prioritize non-HTTPS sources`,
|
|
1373
|
+
suggestion: "Serve your site over HTTPS. AI crawlers like GPTBot, ClaudeBot, and PerplexityBot strongly prefer HTTPS and may skip plain HTTP entirely."
|
|
1374
|
+
};
|
|
1375
|
+
}
|
|
1376
|
+
};
|
|
1220
1377
|
var httpStatusCodes = {
|
|
1221
1378
|
id: "http-status-codes",
|
|
1222
1379
|
name: "HTTP Status Codes",
|
|
@@ -1328,6 +1485,7 @@ var cacheHeaderHygiene = {
|
|
|
1328
1485
|
}
|
|
1329
1486
|
};
|
|
1330
1487
|
var urlStabilityChecks = [
|
|
1488
|
+
httpsEnabled,
|
|
1331
1489
|
httpStatusCodes,
|
|
1332
1490
|
redirectBehavior,
|
|
1333
1491
|
cacheHeaderHygiene
|
|
@@ -1573,9 +1731,10 @@ var topicalAuthoritySignals = {
|
|
|
1573
1731
|
const pages = ctx.sampledPages.slice(0, 10);
|
|
1574
1732
|
let totalInternalLinks = 0;
|
|
1575
1733
|
let pagesWithGoodLinking = 0;
|
|
1734
|
+
const resolveBase = ctx.mode === "local" ? ctx.baseUrl.href : ctx.baseUrl.origin;
|
|
1576
1735
|
for (const page of pages) {
|
|
1577
|
-
const links = extractLinks(page.html,
|
|
1578
|
-
const internalLinks = ctx.mode === "local" ? links.filter((l) =>
|
|
1736
|
+
const links = extractLinks(page.html, resolveBase);
|
|
1737
|
+
const internalLinks = ctx.mode === "local" ? links.filter((l) => l.startsWith("file:")) : links.filter((l) => {
|
|
1579
1738
|
try {
|
|
1580
1739
|
return new URL(l).origin === ctx.baseUrl.origin;
|
|
1581
1740
|
} catch {
|
|
@@ -1671,7 +1830,8 @@ var eeatSignals = {
|
|
|
1671
1830
|
const hasAuthorHtml = /class=["'][^"']*author[^"']*["']|rel=["']author["']/i.test(page.html);
|
|
1672
1831
|
if (hasAuthorMeta || hasAuthorJsonLd || hasAuthorHtml) withAuthor++;
|
|
1673
1832
|
const hasCredentials = /Ph\.?D|M\.?D|CPA|certified|licensed|expert|specialist/i.test(page.html);
|
|
1674
|
-
const
|
|
1833
|
+
const linkBase = ctx.mode === "local" ? ctx.baseUrl.href : ctx.baseUrl.origin;
|
|
1834
|
+
const hasAboutPage = extractLinks(page.html, linkBase).some((l) => /about|team|author/i.test(l));
|
|
1675
1835
|
if (hasCredentials || hasAboutPage) withExpertise++;
|
|
1676
1836
|
}
|
|
1677
1837
|
const score = (withAuthor + withExpertise) / (pages.length * 2);
|
|
@@ -1735,6 +1895,185 @@ var faqSchema = {
|
|
|
1735
1895
|
};
|
|
1736
1896
|
}
|
|
1737
1897
|
};
|
|
1898
|
+
var metaDescription = {
|
|
1899
|
+
id: "meta-description",
|
|
1900
|
+
name: "Meta Description",
|
|
1901
|
+
category: "geo-signals",
|
|
1902
|
+
description: "Checks for a meta description between 50 and 160 characters",
|
|
1903
|
+
weight: 0.5,
|
|
1904
|
+
run: async (ctx) => {
|
|
1905
|
+
const pages = ctx.sampledPages.slice(0, 10);
|
|
1906
|
+
if (pages.length === 0) {
|
|
1907
|
+
return {
|
|
1908
|
+
id: "meta-description",
|
|
1909
|
+
name: "Meta Description",
|
|
1910
|
+
category: "geo-signals",
|
|
1911
|
+
status: "skip",
|
|
1912
|
+
message: "No pages sampled"
|
|
1913
|
+
};
|
|
1914
|
+
}
|
|
1915
|
+
let withGoodDescription = 0;
|
|
1916
|
+
let missing = 0;
|
|
1917
|
+
let tooShort = 0;
|
|
1918
|
+
let tooLong = 0;
|
|
1919
|
+
for (const page of pages) {
|
|
1920
|
+
const meta = extractMetaTags(page.html);
|
|
1921
|
+
const description = meta["description"]?.trim();
|
|
1922
|
+
if (!description) {
|
|
1923
|
+
missing++;
|
|
1924
|
+
continue;
|
|
1925
|
+
}
|
|
1926
|
+
const len = description.length;
|
|
1927
|
+
if (len >= 50 && len <= 160) withGoodDescription++;
|
|
1928
|
+
else if (len < 50) tooShort++;
|
|
1929
|
+
else tooLong++;
|
|
1930
|
+
}
|
|
1931
|
+
if (withGoodDescription === pages.length) {
|
|
1932
|
+
return {
|
|
1933
|
+
id: "meta-description",
|
|
1934
|
+
name: "Meta Description",
|
|
1935
|
+
category: "geo-signals",
|
|
1936
|
+
status: "pass",
|
|
1937
|
+
message: `All ${pages.length} pages have a meta description between 50\u2013160 characters`,
|
|
1938
|
+
metadata: { withGoodDescription }
|
|
1939
|
+
};
|
|
1940
|
+
}
|
|
1941
|
+
if (missing === pages.length) {
|
|
1942
|
+
return {
|
|
1943
|
+
id: "meta-description",
|
|
1944
|
+
name: "Meta Description",
|
|
1945
|
+
category: "geo-signals",
|
|
1946
|
+
status: "fail",
|
|
1947
|
+
message: "No meta description found on any sampled page",
|
|
1948
|
+
suggestion: 'Add a <meta name="description"> between 50 and 160 characters to every page. Generative engines quote meta descriptions when summarizing your content.'
|
|
1949
|
+
};
|
|
1950
|
+
}
|
|
1951
|
+
const detail = [
|
|
1952
|
+
missing > 0 ? `${missing} missing` : null,
|
|
1953
|
+
tooShort > 0 ? `${tooShort} too short` : null,
|
|
1954
|
+
tooLong > 0 ? `${tooLong} too long` : null
|
|
1955
|
+
].filter(Boolean).join(" \xB7 ");
|
|
1956
|
+
return {
|
|
1957
|
+
id: "meta-description",
|
|
1958
|
+
name: "Meta Description",
|
|
1959
|
+
category: "geo-signals",
|
|
1960
|
+
status: missing >= pages.length / 2 ? "fail" : "warn",
|
|
1961
|
+
message: `${withGoodDescription}/${pages.length} pages have meta descriptions in the 50\u2013160 char range${detail ? ` \xB7 ${detail}` : ""}`,
|
|
1962
|
+
suggestion: missing > 0 ? 'Add a <meta name="description"> between 50 and 160 characters to every page. Some pages are missing it entirely.' : "Aim for 50\u2013160 characters. Shorter descriptions lack context for AI; longer ones get truncated.",
|
|
1963
|
+
metadata: { withGoodDescription, missing, tooShort, tooLong }
|
|
1964
|
+
};
|
|
1965
|
+
}
|
|
1966
|
+
};
|
|
1967
|
+
var openGraphTags = {
|
|
1968
|
+
id: "open-graph-tags",
|
|
1969
|
+
name: "Open Graph Tags",
|
|
1970
|
+
category: "geo-signals",
|
|
1971
|
+
description: "Checks for og:title, og:description, og:image, and og:url",
|
|
1972
|
+
weight: 0.5,
|
|
1973
|
+
run: async (ctx) => {
|
|
1974
|
+
const pages = ctx.sampledPages.slice(0, 10);
|
|
1975
|
+
if (pages.length === 0) {
|
|
1976
|
+
return {
|
|
1977
|
+
id: "open-graph-tags",
|
|
1978
|
+
name: "Open Graph Tags",
|
|
1979
|
+
category: "geo-signals",
|
|
1980
|
+
status: "skip",
|
|
1981
|
+
message: "No pages sampled"
|
|
1982
|
+
};
|
|
1983
|
+
}
|
|
1984
|
+
const required = ["og:title", "og:description", "og:image", "og:url"];
|
|
1985
|
+
let fullCoverage = 0;
|
|
1986
|
+
let partialCoverage = 0;
|
|
1987
|
+
const missingCounts = { "og:title": 0, "og:description": 0, "og:image": 0, "og:url": 0 };
|
|
1988
|
+
for (const page of pages) {
|
|
1989
|
+
const meta = extractMetaTags(page.html);
|
|
1990
|
+
const missing = required.filter((tag) => !meta[tag]);
|
|
1991
|
+
for (const tag of missing) missingCounts[tag] = (missingCounts[tag] ?? 0) + 1;
|
|
1992
|
+
if (missing.length === 0) fullCoverage++;
|
|
1993
|
+
else if (missing.length < required.length) partialCoverage++;
|
|
1994
|
+
}
|
|
1995
|
+
if (fullCoverage === pages.length) {
|
|
1996
|
+
return {
|
|
1997
|
+
id: "open-graph-tags",
|
|
1998
|
+
name: "Open Graph Tags",
|
|
1999
|
+
category: "geo-signals",
|
|
2000
|
+
status: "pass",
|
|
2001
|
+
message: `All ${pages.length} pages have complete Open Graph tags`
|
|
2002
|
+
};
|
|
2003
|
+
}
|
|
2004
|
+
const mostMissing = Object.entries(missingCounts).filter(([, n]) => n > 0).sort(([, a], [, b]) => b - a).map(([tag]) => tag);
|
|
2005
|
+
const noneCovered = pages.length - fullCoverage - partialCoverage;
|
|
2006
|
+
return {
|
|
2007
|
+
id: "open-graph-tags",
|
|
2008
|
+
name: "Open Graph Tags",
|
|
2009
|
+
category: "geo-signals",
|
|
2010
|
+
status: fullCoverage + partialCoverage === 0 ? "fail" : "warn",
|
|
2011
|
+
message: `${fullCoverage}/${pages.length} pages have complete Open Graph tags${partialCoverage > 0 ? ` \xB7 ${partialCoverage} partial` : ""}${noneCovered > 0 ? ` \xB7 ${noneCovered} with none` : ""}${mostMissing.length > 0 ? ` \xB7 most often missing: ${mostMissing.slice(0, 2).join(", ")}` : ""}`,
|
|
2012
|
+
suggestion: "Add og:title, og:description, og:image, and og:url to every page. AI engines and link previews use these to render rich citations of your content.",
|
|
2013
|
+
metadata: { fullCoverage, partialCoverage, noneCovered, missingCounts }
|
|
2014
|
+
};
|
|
2015
|
+
}
|
|
2016
|
+
};
|
|
2017
|
+
var externalCitations = {
|
|
2018
|
+
id: "external-citations",
|
|
2019
|
+
name: "External Citations",
|
|
2020
|
+
category: "geo-signals",
|
|
2021
|
+
description: "Checks for at least 2 outbound links to external sources per page",
|
|
2022
|
+
weight: 0.5,
|
|
2023
|
+
run: async (ctx) => {
|
|
2024
|
+
if (ctx.mode === "local") {
|
|
2025
|
+
return {
|
|
2026
|
+
id: "external-citations",
|
|
2027
|
+
name: "External Citations",
|
|
2028
|
+
category: "geo-signals",
|
|
2029
|
+
status: "info",
|
|
2030
|
+
message: "External link detection requires a live origin to compare against"
|
|
2031
|
+
};
|
|
2032
|
+
}
|
|
2033
|
+
const pages = ctx.sampledPages.slice(0, 10);
|
|
2034
|
+
if (pages.length === 0) {
|
|
2035
|
+
return {
|
|
2036
|
+
id: "external-citations",
|
|
2037
|
+
name: "External Citations",
|
|
2038
|
+
category: "geo-signals",
|
|
2039
|
+
status: "skip",
|
|
2040
|
+
message: "No pages sampled"
|
|
2041
|
+
};
|
|
2042
|
+
}
|
|
2043
|
+
const origin = ctx.baseUrl.origin;
|
|
2044
|
+
let pagesWithCitations = 0;
|
|
2045
|
+
let totalExternal = 0;
|
|
2046
|
+
for (const page of pages) {
|
|
2047
|
+
const links = extractLinks(page.html, origin);
|
|
2048
|
+
const external = links.filter((l) => {
|
|
2049
|
+
const u = new URL(l);
|
|
2050
|
+
return u.protocol.startsWith("http") && u.origin !== origin;
|
|
2051
|
+
});
|
|
2052
|
+
totalExternal += external.length;
|
|
2053
|
+
if (external.length >= 2) pagesWithCitations++;
|
|
2054
|
+
}
|
|
2055
|
+
const avgExternal = Math.round(totalExternal / pages.length);
|
|
2056
|
+
if (pagesWithCitations >= pages.length * 0.7) {
|
|
2057
|
+
return {
|
|
2058
|
+
id: "external-citations",
|
|
2059
|
+
name: "External Citations",
|
|
2060
|
+
category: "geo-signals",
|
|
2061
|
+
status: "pass",
|
|
2062
|
+
message: `${pagesWithCitations}/${pages.length} pages have \u22652 outbound links (avg ${avgExternal}/page)`,
|
|
2063
|
+
metadata: { pagesWithCitations, avgExternal }
|
|
2064
|
+
};
|
|
2065
|
+
}
|
|
2066
|
+
return {
|
|
2067
|
+
id: "external-citations",
|
|
2068
|
+
name: "External Citations",
|
|
2069
|
+
category: "geo-signals",
|
|
2070
|
+
status: pagesWithCitations > 0 ? "warn" : "fail",
|
|
2071
|
+
message: `Only ${pagesWithCitations}/${pages.length} pages have \u22652 outbound links (avg ${avgExternal}/page)`,
|
|
2072
|
+
suggestion: "Add at least 2 outbound links to authoritative external sources per page. Citing sources signals credibility to generative engines, which weigh outbound links when deciding what to cite.",
|
|
2073
|
+
metadata: { pagesWithCitations, avgExternal }
|
|
2074
|
+
};
|
|
2075
|
+
}
|
|
2076
|
+
};
|
|
1738
2077
|
var canonicalUrlConsistency = {
|
|
1739
2078
|
id: "canonical-url-consistency",
|
|
1740
2079
|
name: "Canonical URL Consistency",
|
|
@@ -1742,6 +2081,15 @@ var canonicalUrlConsistency = {
|
|
|
1742
2081
|
description: "Checks if pages have consistent canonical URLs",
|
|
1743
2082
|
weight: 0.5,
|
|
1744
2083
|
run: async (ctx) => {
|
|
2084
|
+
if (ctx.mode === "local") {
|
|
2085
|
+
return {
|
|
2086
|
+
id: "canonical-url-consistency",
|
|
2087
|
+
name: "Canonical URL Consistency",
|
|
2088
|
+
category: "geo-signals",
|
|
2089
|
+
status: "info",
|
|
2090
|
+
message: "only meaningful for live urls. re-run against a deployed site to verify"
|
|
2091
|
+
};
|
|
2092
|
+
}
|
|
1745
2093
|
const pages = ctx.sampledPages.slice(0, 10);
|
|
1746
2094
|
let withCanonical = 0;
|
|
1747
2095
|
let selfReferencing = 0;
|
|
@@ -1796,6 +2144,9 @@ var geoSignalChecks = [
|
|
|
1796
2144
|
contentFreshness,
|
|
1797
2145
|
eeatSignals,
|
|
1798
2146
|
faqSchema,
|
|
2147
|
+
metaDescription,
|
|
2148
|
+
openGraphTags,
|
|
2149
|
+
externalCitations,
|
|
1799
2150
|
canonicalUrlConsistency
|
|
1800
2151
|
];
|
|
1801
2152
|
|
|
@@ -2185,7 +2536,30 @@ var ALL_CHECKS = [
|
|
|
2185
2536
|
|
|
2186
2537
|
// src/utils/local.ts
|
|
2187
2538
|
import { readFileSync, readdirSync, existsSync } from "fs";
|
|
2188
|
-
import { join, relative, extname } from "path";
|
|
2539
|
+
import { dirname, join, relative, extname } from "path";
|
|
2540
|
+
var readIfExists = (path) => {
|
|
2541
|
+
try {
|
|
2542
|
+
if (existsSync(path)) {
|
|
2543
|
+
return readFileSync(path, "utf-8");
|
|
2544
|
+
}
|
|
2545
|
+
} catch {
|
|
2546
|
+
}
|
|
2547
|
+
return void 0;
|
|
2548
|
+
};
|
|
2549
|
+
var findUpward = (start, names, maxDepth = 6) => {
|
|
2550
|
+
let current = start;
|
|
2551
|
+
for (let i = 0; i < maxDepth; i++) {
|
|
2552
|
+
for (const name of names) {
|
|
2553
|
+
const candidate = join(current, name);
|
|
2554
|
+
const value = readIfExists(candidate);
|
|
2555
|
+
if (value !== void 0) return value;
|
|
2556
|
+
}
|
|
2557
|
+
const parent = dirname(current);
|
|
2558
|
+
if (parent === current) break;
|
|
2559
|
+
current = parent;
|
|
2560
|
+
}
|
|
2561
|
+
return void 0;
|
|
2562
|
+
};
|
|
2189
2563
|
var walkDir = (dir, extensions, maxDepth = 10) => {
|
|
2190
2564
|
if (maxDepth <= 0) return [];
|
|
2191
2565
|
const results = [];
|
|
@@ -2206,15 +2580,6 @@ var walkDir = (dir, extensions, maxDepth = 10) => {
|
|
|
2206
2580
|
}
|
|
2207
2581
|
return results;
|
|
2208
2582
|
};
|
|
2209
|
-
var readIfExists = (path) => {
|
|
2210
|
-
try {
|
|
2211
|
-
if (existsSync(path)) {
|
|
2212
|
-
return readFileSync(path, "utf-8");
|
|
2213
|
-
}
|
|
2214
|
-
} catch {
|
|
2215
|
-
}
|
|
2216
|
-
return void 0;
|
|
2217
|
-
};
|
|
2218
2583
|
var buildLocalContext = (dirPath, config) => {
|
|
2219
2584
|
const baseUrl = new URL(`file://${dirPath}`);
|
|
2220
2585
|
const robotsTxt = readIfExists(join(dirPath, "robots.txt"));
|
|
@@ -2224,7 +2589,7 @@ var buildLocalContext = (dirPath, config) => {
|
|
|
2224
2589
|
const mcpServerCard2 = readIfExists(join(dirPath, ".well-known", "mcp", "server-card.json"));
|
|
2225
2590
|
const apiCatalog2 = readIfExists(join(dirPath, ".well-known", "api-catalog"));
|
|
2226
2591
|
const agentSkillsIndex2 = readIfExists(join(dirPath, ".well-known", "agent-skills", "index.json"));
|
|
2227
|
-
const agentsMd2 =
|
|
2592
|
+
const agentsMd2 = findUpward(dirPath, ["AGENTS.md", "AGENT.md"]);
|
|
2228
2593
|
const sitemapUrls = sitemapXml ? parseSitemapUrls(sitemapXml) : [];
|
|
2229
2594
|
if (!sitemapXml && robotsTxt) {
|
|
2230
2595
|
const sitemapMatch = robotsTxt.match(/Sitemap:\s*(.+)/i);
|
|
@@ -2241,8 +2606,8 @@ var buildLocalContext = (dirPath, config) => {
|
|
|
2241
2606
|
}
|
|
2242
2607
|
const htmlFiles = walkDir(dirPath, /* @__PURE__ */ new Set([".html", ".htm"]));
|
|
2243
2608
|
const mdFiles = walkDir(dirPath, /* @__PURE__ */ new Set([".md", ".mdx"]));
|
|
2244
|
-
const
|
|
2245
|
-
const sampled =
|
|
2609
|
+
const sampleSource = htmlFiles.length > 0 ? htmlFiles : mdFiles;
|
|
2610
|
+
const sampled = sampleSource.slice(0, config.sampleSize);
|
|
2246
2611
|
const sampledPages = sampled.map((filePath) => {
|
|
2247
2612
|
const content = readFileSync(filePath, "utf-8");
|
|
2248
2613
|
const relPath = relative(dirPath, filePath);
|