@houseofmvps/claude-rank 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +240 -59
- package/package.json +1 -1
- package/tools/aeo-scanner.mjs +14 -1
- package/tools/geo-scanner.mjs +32 -9
- package/tools/lib/html-parser.mjs +28 -2
- package/tools/seo-scanner.mjs +13 -1
package/README.md
CHANGED
|
@@ -1,91 +1,213 @@
|
|
|
1
|
-
|
|
1
|
+
<div align="center">
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
# claude-rank
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
### The most comprehensive SEO/GEO/AEO plugin for Claude Code. 74+ rules. Auto-fix everything. Dominate search — traditional and AI.
|
|
6
6
|
|
|
7
|
-
[](https://www.npmjs.com/package/@houseofmvps/claude-rank)
|
|
8
|
+
[](https://www.npmjs.com/package/@houseofmvps/claude-rank)
|
|
9
|
+
[](https://www.npmjs.com/package/@houseofmvps/claude-rank)
|
|
10
|
+
[](https://github.com/Houseofmvps/claude-rank/stargazers)
|
|
11
|
+
[](https://github.com/Houseofmvps/claude-rank/watchers)
|
|
12
|
+
[](LICENSE)
|
|
13
|
+
[](https://github.com/sponsors/Houseofmvps)
|
|
10
14
|
|
|
11
15
|
---
|
|
12
16
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
-
|
|
16
|
-
- **Auto-fix generators** for robots.txt, sitemap.xml, llms.txt, and JSON-LD structured data — findings become fixes in one command
|
|
17
|
-
- **Score tracking with trend history** — see whether your site is improving or declining over time
|
|
17
|
+
[](https://x.com/kaileskkhumar)
|
|
18
|
+
[](https://www.linkedin.com/in/kailesk-khumar)
|
|
19
|
+
[](https://houseofmvps.com)
|
|
18
20
|
|
|
19
|
-
|
|
21
|
+
**Built by [Kailesk Khumar](https://www.linkedin.com/in/kailesk-khumar), solo founder of [houseofmvps.com](https://houseofmvps.com)**
|
|
20
22
|
|
|
21
|
-
|
|
23
|
+
*One indie hacker. One plugin. Every search engine covered.*
|
|
22
24
|
|
|
23
|
-
|
|
24
|
-
- **AEO (Answer Engine Optimization)** — 12 rules for featured snippets, People Also Ask boxes, and voice search. Zero of these are in claude-seo.
|
|
25
|
-
- **Auto-fix** — claude-seo tells you what's wrong. claude-rank fixes it.
|
|
25
|
+
</div>
|
|
26
26
|
|
|
27
27
|
---
|
|
28
28
|
|
|
29
|
-
## Quick
|
|
29
|
+
## Quick Start
|
|
30
|
+
|
|
31
|
+
### Install as a Claude Code plugin (recommended)
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
claude plugin add @houseofmvps/claude-rank
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
That's it. Restart Claude Code and all 6 skills + 4 agents are active.
|
|
30
38
|
|
|
31
|
-
|
|
39
|
+
### Or use standalone — no plugin install needed
|
|
32
40
|
|
|
33
41
|
```bash
|
|
34
42
|
npx @houseofmvps/claude-rank scan ./my-project
|
|
43
|
+
npx @houseofmvps/claude-rank geo ./my-project
|
|
44
|
+
npx @houseofmvps/claude-rank aeo ./my-project
|
|
45
|
+
npx @houseofmvps/claude-rank schema ./my-project
|
|
35
46
|
```
|
|
36
47
|
|
|
37
|
-
Or install globally
|
|
48
|
+
### Or install globally
|
|
38
49
|
|
|
39
50
|
```bash
|
|
40
51
|
npm install -g @houseofmvps/claude-rank
|
|
52
|
+
claude-rank scan ./my-project
|
|
41
53
|
```
|
|
42
54
|
|
|
43
|
-
|
|
55
|
+
### Use in Claude Code
|
|
56
|
+
|
|
57
|
+
Once installed, use slash commands inside any Claude Code session:
|
|
44
58
|
|
|
45
59
|
```
|
|
46
|
-
/rank
|
|
60
|
+
/rank # Smart routing — detects what your project needs
|
|
61
|
+
/rank audit # Full 8-phase SEO/GEO/AEO audit with auto-fix
|
|
62
|
+
/rank geo # Deep AI search optimization audit
|
|
63
|
+
/rank aeo # Answer engine optimization audit
|
|
64
|
+
/rank fix # Auto-fix all findings in one command
|
|
65
|
+
/rank schema # Detect, validate, generate, inject JSON-LD
|
|
47
66
|
```
|
|
48
67
|
|
|
68
|
+
**Zero configuration.** claude-rank reads your project structure and self-configures.
|
|
69
|
+
|
|
49
70
|
---
|
|
50
71
|
|
|
51
|
-
##
|
|
72
|
+
## The Problem
|
|
52
73
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
74
|
+
You shipped your SaaS. Traffic is flat. You Google your product name — page 3. You ask ChatGPT about your niche — your site isn't mentioned. Perplexity doesn't cite you. Google AI Overviews skips you entirely.
|
|
75
|
+
|
|
76
|
+
Most SEO tools check title tags and call it a day. They don't know that:
|
|
77
|
+
|
|
78
|
+
- **AI search engines are replacing traditional search** — and your content isn't optimized for them
|
|
79
|
+
- **Featured snippets and voice search** have completely different optimization rules than regular SEO
|
|
80
|
+
- **Your robots.txt is blocking GPTBot, PerplexityBot, and ClaudeBot** — AI can't cite what it can't crawl
|
|
81
|
+
- **You don't have an llms.txt** — the file AI assistants look for to understand your project
|
|
82
|
+
- **Your structured data is missing or broken** — you're invisible to rich results
|
|
83
|
+
|
|
84
|
+
That's not an SEO problem. That's a visibility problem across every search surface that exists in 2026.
|
|
85
|
+
|
|
86
|
+
## The Solution
|
|
87
|
+
|
|
88
|
+
```
|
|
89
|
+
/rank audit
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
One command. Three scanners run in parallel — SEO, GEO, and AEO. 74+ rules checked. Every finding gets an automated fix. Score tracked over time.
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
SEO Score: 87/100 ████████████░░ (37 rules)
|
|
96
|
+
GEO Score: 92/100 █████████████░ (25 rules)
|
|
97
|
+
AEO Score: 78/100 ██████████░░░░ (12 rules)
|
|
98
|
+
Overall: 86/100 READY TO RANK
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
**Score below 80?** Run `/rank fix` and it auto-generates what's missing — robots.txt, sitemap.xml, llms.txt, JSON-LD schema — then re-scans to show your improvement.
|
|
59
102
|
|
|
60
103
|
---
|
|
61
104
|
|
|
62
|
-
##
|
|
105
|
+
## What It Does
|
|
63
106
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
|
69
|
-
|
|
70
|
-
|
|
|
71
|
-
|
|
|
107
|
+
### SEO Scanner — 37 Rules
|
|
108
|
+
|
|
109
|
+
Traditional search optimization. The foundation.
|
|
110
|
+
|
|
111
|
+
| Category | What it checks |
|
|
112
|
+
|---|---|
|
|
113
|
+
| **Meta** | Title (length, uniqueness), meta description, viewport, charset, canonical URL, lang attribute |
|
|
114
|
+
| **Content** | H1 presence, heading hierarchy, word count, image alt text, thin content detection |
|
|
115
|
+
| **Technical** | robots.txt, sitemap.xml, HTTPS, mobile-friendly viewport, analytics detection |
|
|
116
|
+
| **Structured Data** | JSON-LD presence, schema validation, required fields |
|
|
117
|
+
| **Cross-Page** | Duplicate titles across pages, duplicate descriptions, canonical conflicts, orphan pages |
|
|
118
|
+
|
|
119
|
+
### GEO Scanner — 25 Rules
|
|
120
|
+
|
|
121
|
+
Generative Engine Optimization. For AI search engines: ChatGPT, Perplexity, Gemini, Google AI Overviews.
|
|
122
|
+
|
|
123
|
+
| Category | What it checks |
|
|
124
|
+
|---|---|
|
|
125
|
+
| **AI Crawlers** | robots.txt rules for 9 bots (GPTBot, PerplexityBot, ClaudeBot, Claude-Web, Google-Extended, CCBot, anthropic-ai, Googlebot, Bingbot) |
|
|
126
|
+
| **AI Discoverability** | llms.txt presence, sitemap.xml, structured data quality |
|
|
127
|
+
| **Content Structure** | Question-format H2s, definition patterns, statistics, data tables, lists |
|
|
128
|
+
| **Citation Readiness** | Passage length (134-167 word sweet spot), direct answers in first 40-60 words |
|
|
129
|
+
| **Authority Signals** | Author attribution, organization schema, source citations |
|
|
130
|
+
|
|
131
|
+
### AEO Scanner — 12 Rules
|
|
132
|
+
|
|
133
|
+
Answer Engine Optimization. Featured snippets, People Also Ask, voice search.
|
|
134
|
+
|
|
135
|
+
| Category | What it checks |
|
|
136
|
+
|---|---|
|
|
137
|
+
| **Schema** | FAQPage, HowTo, speakable, Article structured data |
|
|
138
|
+
| **Snippet Fitness** | Answer paragraph length (40-60 words optimal), numbered steps, definition patterns |
|
|
139
|
+
| **Voice Search** | Concise answers under 29 words (Google voice search average), conversational phrasing |
|
|
140
|
+
| **Rich Results** | Featured image, breadcrumb markup, review schema |
|
|
141
|
+
|
|
142
|
+
### Auto-Fix Generators
|
|
143
|
+
|
|
144
|
+
Every finding has a fix. Not "consider adding" — actual file generation:
|
|
145
|
+
|
|
146
|
+
| Generator | What it creates |
|
|
147
|
+
|---|---|
|
|
148
|
+
| **robots.txt** | AI-friendly rules allowing all 9 AI crawlers + sitemap directive |
|
|
149
|
+
| **sitemap.xml** | Auto-detected routes (Next.js App/Pages Router, static HTML) |
|
|
150
|
+
| **llms.txt** | AI discoverability file from your package.json |
|
|
151
|
+
| **JSON-LD Schema** | 12 types: Organization, Article, Product, FAQPage, HowTo, LocalBusiness, Person, WebSite, BreadcrumbList, SoftwareApplication, VideoObject, ItemList |
|
|
152
|
+
|
|
153
|
+
### Schema Engine — Full CRUD
|
|
154
|
+
|
|
155
|
+
Not just detection. Full lifecycle management:
|
|
156
|
+
|
|
157
|
+
```
|
|
158
|
+
Detect → Find all JSON-LD in your HTML files
|
|
159
|
+
Validate → Check against Google's requirements
|
|
160
|
+
Generate → Create missing schema from your project data
|
|
161
|
+
Inject → Add generated schema into your HTML <head>
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
### Score Tracking
|
|
165
|
+
|
|
166
|
+
Every audit saves your scores. See trends over time:
|
|
167
|
+
|
|
168
|
+
```
|
|
169
|
+
2026-03-25 SEO: 62 GEO: 45 AEO: 38
|
|
170
|
+
2026-03-26 SEO: 78 GEO: 72 AEO: 65 (+16, +27, +27)
|
|
171
|
+
2026-03-28 SEO: 87 GEO: 92 AEO: 78 (+9, +20, +13)
|
|
172
|
+
```
|
|
72
173
|
|
|
73
174
|
---
|
|
74
175
|
|
|
75
|
-
## Scoring
|
|
176
|
+
## Scoring System
|
|
177
|
+
|
|
178
|
+
All scores run from 0 to 100. Higher is better. Findings are weighted by severity:
|
|
76
179
|
|
|
77
|
-
|
|
180
|
+
| Severity | Deduction | Example |
|
|
181
|
+
|----------|-----------|---------|
|
|
182
|
+
| Critical | -20 points | No title tag, robots.txt blocking all crawlers |
|
|
183
|
+
| High | -10 points | Missing meta description, no JSON-LD, AI bots blocked |
|
|
184
|
+
| Medium | -5 points | Title too long, missing OG tags, no llms.txt |
|
|
185
|
+
| Low | -2 points | Missing lang attribute, no analytics detected |
|
|
78
186
|
|
|
79
|
-
|
|
187
|
+
Each audit produces separate SEO, GEO, and AEO scores plus a composite. Same rule on multiple pages = one deduction (not N deductions).
|
|
80
188
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
|
86
|
-
|
|
189
|
+
---
|
|
190
|
+
|
|
191
|
+
## CLI Commands
|
|
192
|
+
|
|
193
|
+
| Command | Description |
|
|
194
|
+
|---------|-------------|
|
|
195
|
+
| `claude-rank scan ./project` | Full SEO scan (37 rules) |
|
|
196
|
+
| `claude-rank geo ./project` | GEO scan — AI search optimization (25 rules) |
|
|
197
|
+
| `claude-rank aeo ./project` | AEO scan — answer engine optimization (12 rules) |
|
|
198
|
+
| `claude-rank schema ./project` | Detect structured data across all HTML |
|
|
199
|
+
| `claude-rank help` | Show available commands |
|
|
87
200
|
|
|
88
|
-
|
|
201
|
+
## Slash Commands (Claude Code)
|
|
202
|
+
|
|
203
|
+
| Command | Description |
|
|
204
|
+
|---------|-------------|
|
|
205
|
+
| `/rank` | Smart routing — detects what your project needs |
|
|
206
|
+
| `/rank audit` | Full 8-phase SEO/GEO/AEO audit with auto-fix |
|
|
207
|
+
| `/rank geo` | Deep GEO audit targeting AI search visibility |
|
|
208
|
+
| `/rank aeo` | Answer engine optimization audit |
|
|
209
|
+
| `/rank fix` | Auto-fix all findings in one command |
|
|
210
|
+
| `/rank schema` | Detect, validate, generate, inject JSON-LD |
|
|
89
211
|
|
|
90
212
|
---
|
|
91
213
|
|
|
@@ -97,11 +219,15 @@ Each audit produces separate SEO, GEO, and AEO scores, plus a composite score. S
|
|
|
97
219
|
| GEO — AI search (Perplexity, ChatGPT, Gemini) | 25 rules | Basic |
|
|
98
220
|
| AEO — featured snippets, voice search | 12 rules | None |
|
|
99
221
|
| Auto-fix generators | Yes | No |
|
|
100
|
-
| Schema management (detect / validate / generate / inject) | Full | Detect only |
|
|
222
|
+
| Schema management (detect / validate / generate / inject) | Full CRUD | Detect only |
|
|
101
223
|
| Score tracking with history and trends | Yes | None |
|
|
102
|
-
| Cross-page analysis | Yes | No |
|
|
224
|
+
| Cross-page analysis (duplicates, orphans, canonicals) | Yes | No |
|
|
103
225
|
| AI bot detection | 9 bots | Basic |
|
|
104
226
|
| llms.txt generation | Yes | No |
|
|
227
|
+
| robots.txt generation | Yes | No |
|
|
228
|
+
| sitemap.xml generation | Yes | No |
|
|
229
|
+
|
|
230
|
+
**claude-seo tells you what's wrong. claude-rank fixes it.**
|
|
105
231
|
|
|
106
232
|
---
|
|
107
233
|
|
|
@@ -109,37 +235,92 @@ Each audit produces separate SEO, GEO, and AEO scores, plus a composite score. S
|
|
|
109
235
|
|
|
110
236
|
Two terms that matter and are often confused:
|
|
111
237
|
|
|
112
|
-
- **GEO (Generative Engine Optimization)** — optimization for AI-powered search engines that generate answers (Perplexity, ChatGPT Search, Gemini). This is NOT geographic targeting.
|
|
238
|
+
- **GEO (Generative Engine Optimization)** — optimization for AI-powered search engines that generate answers (Perplexity, ChatGPT Search, Gemini, Google AI Overviews). This is NOT geographic targeting.
|
|
113
239
|
- **AEO (Answer Engine Optimization)** — optimization for direct answer features: featured snippets, People Also Ask, voice assistants.
|
|
114
240
|
|
|
115
241
|
---
|
|
116
242
|
|
|
243
|
+
## Security
|
|
244
|
+
|
|
245
|
+
| Protection | How |
|
|
246
|
+
|---|---|
|
|
247
|
+
| **No shell injection** | `execFileSync` with array args everywhere — zero shell interpolation |
|
|
248
|
+
| **SSRF protection** | All HTTP tools block private IPs, cloud metadata, non-HTTP schemes |
|
|
249
|
+
| **No telemetry** | Zero data collection. No phone-home. Ever. |
|
|
250
|
+
| **1 dependency** | `htmlparser2` only (30KB). No native bindings. No `node-gyp`. |
|
|
251
|
+
| **85 tests** | Security module, all scanners, CLI, integration tests |
|
|
252
|
+
| **File safety** | 10MB read cap. 5MB response cap. Restrictive write permissions. |
|
|
253
|
+
|
|
254
|
+
See [SECURITY.md](SECURITY.md) for the full vulnerability disclosure policy.
|
|
255
|
+
|
|
256
|
+
---
|
|
257
|
+
|
|
258
|
+
## What's Inside
|
|
259
|
+
|
|
260
|
+
| Category | Count | Highlights |
|
|
261
|
+
|---|---|---|
|
|
262
|
+
| **Tools** | 8 | SEO scanner (37 rules), GEO scanner (25 rules), AEO scanner (12 rules), schema engine, robots analyzer, sitemap analyzer, llms.txt generator, audit history |
|
|
263
|
+
| **Skills** | 6 | /rank, /rank audit, /rank geo, /rank aeo, /rank fix, /rank schema |
|
|
264
|
+
| **Agents** | 4 | SEO auditor, GEO auditor, AEO auditor, Schema auditor |
|
|
265
|
+
| **Commands** | 6 | All slash commands above |
|
|
266
|
+
| **Research** | 3 | SEO benchmarks, GEO research, schema catalog |
|
|
267
|
+
|
|
268
|
+
---
|
|
269
|
+
|
|
117
270
|
## Requirements
|
|
118
271
|
|
|
119
272
|
- Node.js >= 18
|
|
120
273
|
- ESM environment (no CommonJS)
|
|
121
274
|
- No build step required
|
|
122
275
|
|
|
123
|
-
Single runtime dependency: `htmlparser2`
|
|
276
|
+
Single runtime dependency: `htmlparser2` (30KB)
|
|
124
277
|
|
|
125
278
|
---
|
|
126
279
|
|
|
127
|
-
##
|
|
280
|
+
## Sponsor This Project
|
|
128
281
|
|
|
129
|
-
|
|
282
|
+
I built claude-rank alone — nights and weekends, between building my own SaaS products. No VC funding. No team. Just one person who got tired of being invisible to AI search and decided to fix it for everyone.
|
|
130
283
|
|
|
131
|
-
|
|
284
|
+
This plugin is **free forever.** No pro tier. No paywalls. No "upgrade to unlock." Every feature you just read about — all 8 tools, 6 skills, 4 agents — is yours, completely free.
|
|
285
|
+
|
|
286
|
+
But building and maintaining something this comprehensive takes real time. Every scanner rule I add, every false positive I fix, every new AI crawler I track — that's time I'm not spending on my own products.
|
|
287
|
+
|
|
288
|
+
**If claude-rank helped your site rank higher** — one AI citation it earned you, one missing schema it generated, one robots.txt fix that unblocked GPTBot — I'd be grateful if you considered sponsoring.
|
|
289
|
+
|
|
290
|
+
[](https://github.com/sponsors/Houseofmvps)
|
|
291
|
+
|
|
292
|
+
Thanks for using claude-rank. Now go dominate every search engine — traditional and AI.
|
|
293
|
+
|
|
294
|
+
*— [Kailesk Khumar](https://www.linkedin.com/in/kailesk-khumar), solo founder of [houseofmvps.com](https://houseofmvps.com)*
|
|
132
295
|
|
|
133
|
-
|
|
296
|
+
---
|
|
297
|
+
|
|
298
|
+
## Contributing
|
|
134
299
|
|
|
135
|
-
|
|
300
|
+
Found a bug? Want a new scanner rule? [Open an issue](https://github.com/Houseofmvps/claude-rank/issues) or PR.
|
|
301
|
+
|
|
302
|
+
```bash
|
|
303
|
+
git clone https://github.com/Houseofmvps/claude-rank.git
|
|
304
|
+
cd claude-rank
|
|
305
|
+
npm install
|
|
306
|
+
npm test # 85 tests, node:test
|
|
307
|
+
node tools/<tool>.mjs # No build step
|
|
308
|
+
```
|
|
136
309
|
|
|
137
|
-
See [
|
|
310
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
311
|
+
|
|
312
|
+
---
|
|
138
313
|
|
|
139
314
|
## License
|
|
140
315
|
|
|
141
|
-
MIT —
|
|
316
|
+
MIT — [LICENSE](LICENSE). **Free forever.** No pro tier. No paywalls.
|
|
142
317
|
|
|
143
318
|
---
|
|
144
319
|
|
|
145
|
-
|
|
320
|
+
<div align="center">
|
|
321
|
+
|
|
322
|
+
**If claude-rank helped you rank higher, [star the repo](https://github.com/Houseofmvps/claude-rank) and tell a friend.**
|
|
323
|
+
|
|
324
|
+
[](https://github.com/Houseofmvps/claude-rank/stargazers)
|
|
325
|
+
|
|
326
|
+
</div>
|
package/package.json
CHANGED
package/tools/aeo-scanner.mjs
CHANGED
|
@@ -208,7 +208,20 @@ function analyzePage(filePath) {
|
|
|
208
208
|
* @returns {{ files_scanned, findings, scores: { aeo }, summary }}
|
|
209
209
|
*/
|
|
210
210
|
export function scanDirectory(rootDir) {
|
|
211
|
-
|
|
211
|
+
let htmlFiles = findHtmlFiles(rootDir);
|
|
212
|
+
|
|
213
|
+
// If dist/build/out has HTML, exclude root index.html (Vite/webpack source template)
|
|
214
|
+
const hasBuildDir = htmlFiles.some(f => {
|
|
215
|
+
const rel = path.relative(rootDir, f);
|
|
216
|
+
return rel.startsWith('dist' + path.sep) || rel.startsWith('build' + path.sep) || rel.startsWith('out' + path.sep);
|
|
217
|
+
});
|
|
218
|
+
if (hasBuildDir) {
|
|
219
|
+
htmlFiles = htmlFiles.filter(f => {
|
|
220
|
+
const rel = path.relative(rootDir, f);
|
|
221
|
+
return rel !== 'index.html' && rel !== 'index.htm';
|
|
222
|
+
});
|
|
223
|
+
}
|
|
224
|
+
|
|
212
225
|
const findings = [];
|
|
213
226
|
|
|
214
227
|
// Per-file analyses
|
package/tools/geo-scanner.mjs
CHANGED
|
@@ -155,16 +155,26 @@ function parseRobotsTxt(content) {
|
|
|
155
155
|
*/
|
|
156
156
|
function extractSchemaTypes(jsonLdContent) {
|
|
157
157
|
const types = new Set();
|
|
158
|
+
|
|
159
|
+
function walkSchema(obj) {
|
|
160
|
+
if (!obj || typeof obj !== 'object') return;
|
|
161
|
+
if (Array.isArray(obj)) {
|
|
162
|
+
for (const item of obj) walkSchema(item);
|
|
163
|
+
return;
|
|
164
|
+
}
|
|
165
|
+
if (obj['@type']) {
|
|
166
|
+
const t = Array.isArray(obj['@type']) ? obj['@type'] : [obj['@type']];
|
|
167
|
+
for (const type of t) types.add(type);
|
|
168
|
+
}
|
|
169
|
+
// Walk all nested objects to find embedded schemas (e.g., author: { @type: "Person" })
|
|
170
|
+
for (const val of Object.values(obj)) {
|
|
171
|
+
if (val && typeof val === 'object') walkSchema(val);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
158
175
|
for (const raw of jsonLdContent) {
|
|
159
176
|
try {
|
|
160
|
-
|
|
161
|
-
const items = Array.isArray(parsed) ? parsed : [parsed];
|
|
162
|
-
for (const item of items) {
|
|
163
|
-
if (item && item['@type']) {
|
|
164
|
-
const t = Array.isArray(item['@type']) ? item['@type'] : [item['@type']];
|
|
165
|
-
for (const type of t) types.add(type);
|
|
166
|
-
}
|
|
167
|
-
}
|
|
177
|
+
walkSchema(JSON.parse(raw));
|
|
168
178
|
} catch {
|
|
169
179
|
// Non-parseable JSON-LD — skip
|
|
170
180
|
}
|
|
@@ -307,7 +317,20 @@ export function scanDirectory(rootDir) {
|
|
|
307
317
|
// 3. Scan HTML files
|
|
308
318
|
// -------------------------------------------------------------------------
|
|
309
319
|
|
|
310
|
-
|
|
320
|
+
let htmlFiles = findHtmlFiles(rootDir);
|
|
321
|
+
|
|
322
|
+
// If dist/build/out has HTML, exclude root index.html (Vite/webpack source template)
|
|
323
|
+
const hasBuildDir = htmlFiles.some(f => {
|
|
324
|
+
const rel = path.relative(rootDir, f);
|
|
325
|
+
return rel.startsWith('dist' + path.sep) || rel.startsWith('build' + path.sep) || rel.startsWith('out' + path.sep);
|
|
326
|
+
});
|
|
327
|
+
if (hasBuildDir) {
|
|
328
|
+
htmlFiles = htmlFiles.filter(f => {
|
|
329
|
+
const rel = path.relative(rootDir, f);
|
|
330
|
+
return rel !== 'index.html' && rel !== 'index.htm';
|
|
331
|
+
});
|
|
332
|
+
}
|
|
333
|
+
|
|
311
334
|
let filesScanned = 0;
|
|
312
335
|
|
|
313
336
|
// Aggregate data across all pages
|
|
@@ -127,6 +127,8 @@ export function parseHtml(htmlString) {
|
|
|
127
127
|
let currentHeadingLevel = 0;
|
|
128
128
|
let isJsonLd = false;
|
|
129
129
|
let currentHeadingText = '';
|
|
130
|
+
let currentScriptSrc = '';
|
|
131
|
+
let inlineScriptBuffer = '';
|
|
130
132
|
let bodyTextBuffer = '';
|
|
131
133
|
|
|
132
134
|
const parser = new Parser(
|
|
@@ -252,8 +254,9 @@ export function parseHtml(htmlString) {
|
|
|
252
254
|
}
|
|
253
255
|
|
|
254
256
|
// Count total and deferred scripts
|
|
257
|
+
// type="module" is deferred by default per HTML spec
|
|
255
258
|
state.totalScripts++;
|
|
256
|
-
if (attribs.async !== undefined || attribs.defer !== undefined) {
|
|
259
|
+
if (attribs.async !== undefined || attribs.defer !== undefined || scriptType === 'module') {
|
|
257
260
|
state.deferredScripts++;
|
|
258
261
|
}
|
|
259
262
|
|
|
@@ -269,6 +272,7 @@ export function parseHtml(htmlString) {
|
|
|
269
272
|
}
|
|
270
273
|
|
|
271
274
|
inScript = true;
|
|
275
|
+
currentScriptSrc = src;
|
|
272
276
|
return;
|
|
273
277
|
}
|
|
274
278
|
|
|
@@ -349,6 +353,12 @@ export function parseHtml(htmlString) {
|
|
|
349
353
|
return;
|
|
350
354
|
}
|
|
351
355
|
|
|
356
|
+
// Inline script content — accumulate for analytics detection
|
|
357
|
+
if (inScript && !isJsonLd) {
|
|
358
|
+
inlineScriptBuffer += text;
|
|
359
|
+
return;
|
|
360
|
+
}
|
|
361
|
+
|
|
352
362
|
// Body text (skip script/style)
|
|
353
363
|
if (inBody && !inScript && !inStyle) {
|
|
354
364
|
bodyTextBuffer += text + ' ';
|
|
@@ -372,7 +382,19 @@ export function parseHtml(htmlString) {
|
|
|
372
382
|
state.jsonLdScripts++;
|
|
373
383
|
isJsonLd = false;
|
|
374
384
|
}
|
|
385
|
+
// Check inline script content for analytics patterns (catches lazy-loaded GA etc.)
|
|
386
|
+
if (!state.hasAnalytics && !currentScriptSrc && inlineScriptBuffer) {
|
|
387
|
+
for (const { pattern, provider } of ANALYTICS_PATTERNS) {
|
|
388
|
+
if (inlineScriptBuffer.includes(pattern)) {
|
|
389
|
+
state.hasAnalytics = true;
|
|
390
|
+
state.analyticsProvider = provider;
|
|
391
|
+
break;
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
}
|
|
375
395
|
inScript = false;
|
|
396
|
+
currentScriptSrc = '';
|
|
397
|
+
inlineScriptBuffer = '';
|
|
376
398
|
return;
|
|
377
399
|
}
|
|
378
400
|
|
|
@@ -451,7 +473,9 @@ export async function parseHtmlFile(filePath) {
|
|
|
451
473
|
// findHtmlFiles — recursively find .html/.htm files
|
|
452
474
|
// ---------------------------------------------------------------------------
|
|
453
475
|
|
|
454
|
-
const SKIP_DIRS = new Set(['node_modules', '.git', '.next', '.nuxt', '.svelte-kit', '.cache', '.turbo']);
|
|
476
|
+
const SKIP_DIRS = new Set(['node_modules', '.git', '.next', '.nuxt', '.svelte-kit', '.cache', '.turbo', 'public']);
|
|
477
|
+
// Files that look like HTML but aren't real pages (e.g., Google/Bing site verification)
|
|
478
|
+
const SKIP_FILE_PATTERNS = [/^google[a-f0-9]+\.html$/, /^bing[a-f0-9]+\.html$/, /^yandex_[a-f0-9]+\.html$/];
|
|
455
479
|
|
|
456
480
|
/**
|
|
457
481
|
* Recursively find all .html/.htm files under a directory.
|
|
@@ -479,6 +503,8 @@ export function findHtmlFiles(dir) {
|
|
|
479
503
|
} else if (entry.isFile()) {
|
|
480
504
|
const ext = path.extname(entry.name).toLowerCase();
|
|
481
505
|
if (ext === '.html' || ext === '.htm') {
|
|
506
|
+
// Skip search engine verification files
|
|
507
|
+
if (SKIP_FILE_PATTERNS.some(p => p.test(entry.name))) continue;
|
|
482
508
|
results.push(fullPath);
|
|
483
509
|
}
|
|
484
510
|
}
|
package/tools/seo-scanner.mjs
CHANGED
|
@@ -436,7 +436,19 @@ function calculateScore(findings) {
|
|
|
436
436
|
*/
|
|
437
437
|
export function scanDirectory(rootDir) {
|
|
438
438
|
const absRoot = path.resolve(rootDir);
|
|
439
|
-
|
|
439
|
+
let htmlFiles = findHtmlFiles(absRoot);
|
|
440
|
+
|
|
441
|
+
// If dist/ or build/ has HTML, exclude root index.html (Vite/webpack source template)
|
|
442
|
+
const hasBuildDir = htmlFiles.some(f => {
|
|
443
|
+
const rel = path.relative(absRoot, f);
|
|
444
|
+
return rel.startsWith('dist' + path.sep) || rel.startsWith('build' + path.sep) || rel.startsWith('out' + path.sep);
|
|
445
|
+
});
|
|
446
|
+
if (hasBuildDir) {
|
|
447
|
+
htmlFiles = htmlFiles.filter(f => {
|
|
448
|
+
const rel = path.relative(absRoot, f);
|
|
449
|
+
return rel !== 'index.html' && rel !== 'index.htm';
|
|
450
|
+
});
|
|
451
|
+
}
|
|
440
452
|
|
|
441
453
|
// Backend-only detection
|
|
442
454
|
if (isBackendOnlyProject(absRoot, htmlFiles)) {
|