@houseofmvps/claude-rank 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -9
- package/agents/aeo-auditor.md +73 -10
- package/agents/geo-auditor.md +71 -11
- package/agents/schema-auditor.md +79 -5
- package/agents/seo-auditor.md +63 -8
- package/bin/claude-rank.mjs +32 -0
- package/package.json +1 -1
- package/skills/rank-aeo/SKILL.md +28 -0
- package/skills/rank-audit/SKILL.md +91 -3
- package/skills/rank-geo/SKILL.md +18 -5
- package/tools/lib/crawler.mjs +1 -0
- package/tools/lib/url-fetcher.mjs +57 -22
- package/tools/lighthouse-scanner.mjs +280 -0
- package/tools/seo-scanner.mjs +17 -0
- package/tools/url-scanner.mjs +47 -0
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
<img src="assets/hero-banner.png" alt="claude-rank — SEO/GEO/AEO Plugin for Claude Code" width="100%"/>
|
|
4
4
|
|
|
5
|
-
### The most comprehensive SEO/GEO/AEO plugin for Claude Code.
|
|
5
|
+
### The most comprehensive SEO/GEO/AEO plugin for Claude Code. 80+ rules. Auto-fix everything. Dominate search — traditional and AI.
|
|
6
6
|
|
|
7
7
|
[](https://www.npmjs.com/package/@houseofmvps/claude-rank)
|
|
8
8
|
[](https://www.npmjs.com/package/@houseofmvps/claude-rank)
|
|
@@ -143,7 +143,7 @@ That's not an SEO problem. That's a visibility problem across every search surfa
|
|
|
143
143
|
/rank audit
|
|
144
144
|
```
|
|
145
145
|
|
|
146
|
-
One command. Three scanners run in parallel — SEO, GEO, and AEO.
|
|
146
|
+
One command. Three scanners run in parallel — SEO, GEO, and AEO. 80+ rules checked. Every finding gets an automated fix. Score tracked over time.
|
|
147
147
|
|
|
148
148
|
```
|
|
149
149
|
SEO Score: 87/100 ████████████░░ (37 rules)
|
|
@@ -158,7 +158,7 @@ Overall: 86/100 READY TO RANK
|
|
|
158
158
|
|
|
159
159
|
## What It Does
|
|
160
160
|
|
|
161
|
-
### SEO Scanner —
|
|
161
|
+
### SEO Scanner — 39 Rules
|
|
162
162
|
|
|
163
163
|
Traditional search optimization. The foundation.
|
|
164
164
|
|
|
@@ -166,8 +166,8 @@ Traditional search optimization. The foundation.
|
|
|
166
166
|
|---|---|
|
|
167
167
|
| **Meta** | Title (length, uniqueness), meta description, viewport, charset, canonical URL, lang attribute |
|
|
168
168
|
| **Content** | H1 presence, heading hierarchy, word count, image alt text, thin content detection |
|
|
169
|
-
| **Technical** | robots.txt, sitemap.xml, HTTPS, mobile-friendly viewport, analytics detection |
|
|
170
|
-
| **Structured Data** | JSON-LD presence, schema validation
|
|
169
|
+
| **Technical** | robots.txt, sitemap.xml, HTTPS, mobile-friendly viewport, analytics detection, redirect chain detection |
|
|
170
|
+
| **Structured Data** | JSON-LD presence, schema validation against Google's required fields (14 schema types) |
|
|
171
171
|
| **Cross-Page** | Duplicate titles across pages, duplicate descriptions, canonical conflicts, orphan pages |
|
|
172
172
|
|
|
173
173
|
### GEO Scanner — 25 Rules
|
|
@@ -204,6 +204,33 @@ Every finding has a fix. Not "consider adding" — actual file generation:
|
|
|
204
204
|
| **llms.txt** | AI discoverability file from your package.json |
|
|
205
205
|
| **JSON-LD Schema** | 12 types: Organization, Article, Product, FAQPage, HowTo, LocalBusiness, Person, WebSite, BreadcrumbList, SoftwareApplication, VideoObject, ItemList |
|
|
206
206
|
|
|
207
|
+
### Core Web Vitals (Lighthouse)
|
|
208
|
+
|
|
209
|
+
Optional performance scoring powered by Lighthouse. Requires `lighthouse` and `chrome-launcher`:
|
|
210
|
+
|
|
211
|
+
```bash
|
|
212
|
+
npm install -g lighthouse chrome-launcher
|
|
213
|
+
claude-rank cwv https://example.com
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
| Metric | What it measures | Good | Poor |
|
|
217
|
+
|---|---|---|---|
|
|
218
|
+
| **LCP** | Largest Contentful Paint | < 2.5s | > 4.0s |
|
|
219
|
+
| **CLS** | Cumulative Layout Shift | < 0.1 | > 0.25 |
|
|
220
|
+
| **FCP** | First Contentful Paint | < 1.8s | > 3.0s |
|
|
221
|
+
| **TBT** | Total Blocking Time (proxy for INP) | < 200ms | > 600ms |
|
|
222
|
+
| **SI** | Speed Index | < 3.4s | > 5.8s |
|
|
223
|
+
|
|
224
|
+
Graceful fallback: if Lighthouse isn't installed, tells the user how to enable it.
|
|
225
|
+
|
|
226
|
+
### Post-Audit Action Plans
|
|
227
|
+
|
|
228
|
+
Every audit now ends with exact step-by-step instructions for:
|
|
229
|
+
|
|
230
|
+
- **Google Search Console** — submit sitemap, request indexing for money pages, check coverage, validate rich results, monitor Core Web Vitals
|
|
231
|
+
- **Bing Webmaster Tools** — submit URLs, enable IndexNow, verify robots.txt for Copilot/ChatGPT visibility
|
|
232
|
+
- **AI Search Verification** — test visibility in ChatGPT, Perplexity, Gemini, Google AI Overviews
|
|
233
|
+
|
|
207
234
|
### Schema Engine — Full CRUD
|
|
208
235
|
|
|
209
236
|
Not just detection. Full lifecycle management:
|
|
@@ -250,6 +277,7 @@ Each audit produces separate SEO, GEO, and AEO scores plus a composite. Same rul
|
|
|
250
277
|
| `claude-rank geo ./project` | GEO scan — AI search optimization (25 rules) |
|
|
251
278
|
| `claude-rank aeo ./project` | AEO scan — answer engine optimization (12 rules) |
|
|
252
279
|
| `claude-rank schema ./project` | Detect structured data across all HTML |
|
|
280
|
+
| `claude-rank cwv https://example.com` | Core Web Vitals via Lighthouse (optional) |
|
|
253
281
|
| `claude-rank help` | Show available commands |
|
|
254
282
|
|
|
255
283
|
## Slash Commands (Claude Code)
|
|
@@ -269,13 +297,20 @@ Each audit produces separate SEO, GEO, and AEO scores plus a composite. Same rul
|
|
|
269
297
|
|
|
270
298
|
| Feature | claude-rank | claude-seo |
|
|
271
299
|
|---------|:-----------:|:----------:|
|
|
272
|
-
| SEO rules |
|
|
300
|
+
| SEO rules | 39 | ~20 |
|
|
273
301
|
| GEO — AI search (Perplexity, ChatGPT, Gemini) | 25 rules | Basic |
|
|
274
302
|
| AEO — featured snippets, voice search | 12 rules | None |
|
|
275
303
|
| Auto-fix generators | Yes | No |
|
|
276
304
|
| Schema management (detect / validate / generate / inject) | Full CRUD | Detect only |
|
|
305
|
+
| Core Web Vitals / Lighthouse | Yes (optional) | No |
|
|
306
|
+
| Redirect chain detection | Yes | No |
|
|
307
|
+
| Schema validation (Google required fields) | 14 types | No |
|
|
308
|
+
| Post-audit GSC/Bing action plans | Yes | No |
|
|
277
309
|
| Score tracking with history and trends | Yes | None |
|
|
278
310
|
| Cross-page analysis (duplicates, orphans, canonicals) | Yes | No |
|
|
311
|
+
| Multi-page URL crawling (up to 50 pages) | Yes | No |
|
|
312
|
+
| HTML report export (agency-ready) | Yes | No |
|
|
313
|
+
| CI/CD threshold mode | Yes | No |
|
|
279
314
|
| AI bot detection | 9 bots | Basic |
|
|
280
315
|
| llms.txt generation | Yes | No |
|
|
281
316
|
| robots.txt generation | Yes | No |
|
|
@@ -302,7 +337,7 @@ Two terms that matter and are often confused:
|
|
|
302
337
|
| **SSRF protection** | All HTTP tools block private IPs, cloud metadata, non-HTTP schemes |
|
|
303
338
|
| **No telemetry** | Zero data collection. No phone-home. Ever. |
|
|
304
339
|
| **1 dependency** | `htmlparser2` only (30KB). No native bindings. No `node-gyp`. |
|
|
305
|
-
| **
|
|
340
|
+
| **200 tests** | Security module, all scanners, CLI, integration tests |
|
|
306
341
|
| **File safety** | 10MB read cap. 5MB response cap. Restrictive write permissions. |
|
|
307
342
|
|
|
308
343
|
See [SECURITY.md](SECURITY.md) for the full vulnerability disclosure policy.
|
|
@@ -313,7 +348,7 @@ See [SECURITY.md](SECURITY.md) for the full vulnerability disclosure policy.
|
|
|
313
348
|
|
|
314
349
|
| Category | Count | Highlights |
|
|
315
350
|
|---|---|---|
|
|
316
|
-
| **Tools** |
|
|
351
|
+
| **Tools** | 9 | SEO scanner (39 rules), GEO scanner (25 rules), AEO scanner (12 rules), Lighthouse/CWV scanner, schema engine, robots analyzer, sitemap analyzer, llms.txt generator, audit history |
|
|
317
352
|
| **Skills** | 6 | /rank, /rank audit, /rank geo, /rank aeo, /rank fix, /rank schema |
|
|
318
353
|
| **Agents** | 4 | SEO auditor, GEO auditor, AEO auditor, Schema auditor |
|
|
319
354
|
| **Commands** | 6 | All slash commands above |
|
|
@@ -357,7 +392,7 @@ Found a bug? Want a new scanner rule? [Open an issue](https://github.com/Houseof
|
|
|
357
392
|
git clone https://github.com/Houseofmvps/claude-rank.git
|
|
358
393
|
cd claude-rank
|
|
359
394
|
npm install
|
|
360
|
-
npm test #
|
|
395
|
+
npm test # 200 tests, node:test
|
|
361
396
|
node tools/<tool>.mjs # No build step
|
|
362
397
|
```
|
|
363
398
|
|
package/agents/aeo-auditor.md
CHANGED
|
@@ -1,26 +1,89 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: aeo-auditor
|
|
3
|
-
description: Runs AEO audit
|
|
3
|
+
description: Runs AEO audit for featured snippets, voice search, and People Also Ask optimization with rich result submission guidance.
|
|
4
4
|
model: inherit
|
|
5
5
|
---
|
|
6
6
|
|
|
7
|
-
You are the AEO Auditor agent for claude-rank.
|
|
7
|
+
You are the AEO Auditor agent for claude-rank. Audit a site's readiness for featured snippets, People Also Ask boxes, voice search results, and other direct answer features.
|
|
8
8
|
|
|
9
|
-
##
|
|
9
|
+
## Step 1: Identify Snippet Opportunities
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
11
|
+
Before scanning, assess the site's answer engine potential:
|
|
12
|
+
- **Blog/content sites**: High snippet opportunity — look for how-to, what-is, comparison content
|
|
13
|
+
- **SaaS**: Medium opportunity — pricing FAQs, feature comparisons, "how does [product] work?"
|
|
14
|
+
- **E-commerce**: High opportunity — product FAQs, buying guides, "best [category]" content
|
|
15
|
+
- **Local business**: High opportunity — service FAQs, "near me" patterns, operating hours
|
|
14
16
|
|
|
15
|
-
##
|
|
17
|
+
## Step 2: Run Scanner
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
node ${CLAUDE_PLUGIN_ROOT}/tools/aeo-scanner.mjs <project-directory>
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Parse the JSON output.
|
|
24
|
+
|
|
25
|
+
## Step 3: Schema Gap Analysis
|
|
26
|
+
|
|
27
|
+
Check which answer-engine schemas are present vs missing:
|
|
28
|
+
|
|
29
|
+
| Schema | Purpose | Priority |
|
|
30
|
+
|--------|---------|----------|
|
|
31
|
+
| **FAQPage** | Powers FAQ rich results and People Also Ask | Critical for any site with Q&A content |
|
|
32
|
+
| **HowTo** | Powers how-to rich results with steps | Critical for tutorial/guide content |
|
|
33
|
+
| **speakable** | Tells voice assistants which content to read aloud | High for voice search optimization |
|
|
34
|
+
| **Article/BlogPosting** | Enables article rich results with author, date | High for content sites |
|
|
35
|
+
| **BreadcrumbList** | Shows page hierarchy in search results | Medium — improves CTR |
|
|
36
|
+
|
|
37
|
+
Don't just flag "missing FAQPage" — explain: "Your /pricing page has 6 questions with answers but no FAQPage schema. Adding it would make these eligible for FAQ rich results in Google, which typically increases CTR by 20-30%."
|
|
16
38
|
|
|
17
|
-
|
|
39
|
+
## Step 4: Snippet Fitness Analysis
|
|
40
|
+
|
|
41
|
+
Evaluate content readiness for featured snippets:
|
|
42
|
+
|
|
43
|
+
- **Paragraph snippets** (most common): Need a direct, concise answer in 40-60 words immediately after a question H2. Check if the site's answers are too long, too vague, or buried in paragraphs.
|
|
44
|
+
- **List snippets**: Need numbered/bulleted lists under "how to" or "best" H2s. Check for procedural content that isn't using ordered lists.
|
|
45
|
+
- **Table snippets**: Need HTML tables for comparison content. Check for comparison pages without proper table markup.
|
|
46
|
+
- **Voice search**: Google voice answers average 29 words. Check if any answers are concise enough.
|
|
47
|
+
|
|
48
|
+
## Step 5: Prioritized Recommendations
|
|
49
|
+
|
|
50
|
+
1. **Add FAQPage schema** to pages with Q&A patterns (biggest immediate win)
|
|
51
|
+
2. **Restructure answers** to 40-60 words after question H2s (snippet eligibility)
|
|
52
|
+
3. **Add HowTo schema** to tutorial/guide pages with steps
|
|
53
|
+
4. **Add speakable** to key content sections for voice search
|
|
54
|
+
5. **Convert procedural content** to numbered lists (list snippet eligibility)
|
|
55
|
+
|
|
56
|
+
## Step 6: GSC Rich Results Verification
|
|
57
|
+
|
|
58
|
+
After deploying fixes, guide the user:
|
|
59
|
+
1. **Test before deploying**: Use [Rich Results Test](https://search.google.com/test/rich-results) on each page with new schema
|
|
60
|
+
2. **Request indexing** in GSC for pages with new FAQ/HowTo schema
|
|
61
|
+
3. **Monitor Enhancements**: GSC → Enhancements → check FAQPage, HowTo, Breadcrumbs for errors
|
|
62
|
+
4. **Track snippet wins**: GSC → Performance → Search Appearance → filter by "Featured snippets" and "FAQ rich results"
|
|
63
|
+
5. **Bing submission**: Submit pages with new schema via Bing URL Submission for Copilot visibility
|
|
64
|
+
|
|
65
|
+
## Output Format
|
|
18
66
|
|
|
19
67
|
```json
|
|
20
68
|
{
|
|
21
69
|
"category": "aeo",
|
|
22
|
-
"scores": { "aeo":
|
|
70
|
+
"scores": { "aeo": 58 },
|
|
23
71
|
"findings": [...],
|
|
24
|
-
"
|
|
72
|
+
"snippet_opportunities": [
|
|
73
|
+
"/pricing — 6 Q&A patterns detected, no FAQPage schema (add schema for FAQ rich results)",
|
|
74
|
+
"/blog/how-to-cancel — step-by-step content with no HowTo schema (add for how-to rich results)",
|
|
75
|
+
"/features — comparison content with no HTML table (add table for table snippets)"
|
|
76
|
+
],
|
|
77
|
+
"quick_wins": [
|
|
78
|
+
"Add FAQPage schema to /pricing — 6 questions already structured as Q&A",
|
|
79
|
+
"Restructure /blog answers to 40-60 words for snippet eligibility",
|
|
80
|
+
"Add speakable to homepage hero section for voice search"
|
|
81
|
+
],
|
|
82
|
+
"fixes_available": 3,
|
|
83
|
+
"gsc_actions": [
|
|
84
|
+
"Test new schema at search.google.com/test/rich-results before deploying",
|
|
85
|
+
"Request indexing for pages with new schema in GSC URL Inspection",
|
|
86
|
+
"Monitor GSC → Enhancements → FAQPage for validation status"
|
|
87
|
+
]
|
|
25
88
|
}
|
|
26
89
|
```
|
package/agents/geo-auditor.md
CHANGED
|
@@ -1,27 +1,87 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: geo-auditor
|
|
3
|
-
description: Runs GEO audit
|
|
3
|
+
description: Runs GEO audit for AI search visibility, checks AI bot access, analyzes citation readiness, and guides AI search submission.
|
|
4
4
|
model: inherit
|
|
5
5
|
---
|
|
6
6
|
|
|
7
|
-
You are the GEO Auditor agent for claude-rank.
|
|
7
|
+
You are the GEO Auditor agent for claude-rank. Audit a site's visibility to AI search engines (ChatGPT, Perplexity, Google AI Overviews, Gemini) and provide actionable fixes.
|
|
8
8
|
|
|
9
|
-
##
|
|
9
|
+
## Step 1: Detect AI Readiness Level
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
11
|
+
Before scanning, quickly assess the site's AI search maturity:
|
|
12
|
+
- **Level 0 (Invisible)**: No llms.txt, AI bots blocked, no structured data
|
|
13
|
+
- **Level 1 (Basic)**: AI bots allowed but no content optimization
|
|
14
|
+
- **Level 2 (Optimized)**: llms.txt present, question headers, citation-ready passages
|
|
15
|
+
- **Level 3 (Dominant)**: All of above + comparison tables, statistics, author authority signals
|
|
15
16
|
|
|
16
|
-
|
|
17
|
+
This framing helps users understand where they are and where they need to be.
|
|
18
|
+
|
|
19
|
+
## Step 2: Run Scanner
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
node ${CLAUDE_PLUGIN_ROOT}/tools/geo-scanner.mjs <project-directory>
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Parse the JSON output.
|
|
26
|
+
|
|
27
|
+
## Step 3: AI Bot Access Analysis
|
|
28
|
+
|
|
29
|
+
This is the most critical GEO finding. Check robots.txt for each bot:
|
|
30
|
+
- **GPTBot** (OpenAI/ChatGPT) — blocked = invisible to ChatGPT search
|
|
31
|
+
- **PerplexityBot** — blocked = invisible to Perplexity
|
|
32
|
+
- **ClaudeBot / Claude-Web** — blocked = invisible to Claude search
|
|
33
|
+
- **Google-Extended** — blocked = excluded from Google AI Overviews training
|
|
34
|
+
- **CCBot** (Common Crawl) — blocked = excluded from many AI training datasets
|
|
35
|
+
- **Bingbot** — blocked = invisible to Microsoft Copilot and ChatGPT Browse
|
|
36
|
+
|
|
37
|
+
If ANY AI bot is blocked, this is the #1 priority fix. Explain exactly which bots are blocked and what AI products they power.
|
|
17
38
|
|
|
18
|
-
|
|
39
|
+
## Step 4: Content Citation Readiness
|
|
40
|
+
|
|
41
|
+
Analyze content structure for AI citation probability:
|
|
42
|
+
- **Question H2s**: AI engines prefer to cite content organized as questions ("What is X?", "How does Y work?")
|
|
43
|
+
- **Direct definitions**: Opening paragraphs should contain "[Product] is [clear definition]" — this is what AI engines quote
|
|
44
|
+
- **Citation-ready passages**: 134-167 words, factual, self-contained — the ideal length for AI to extract and cite
|
|
45
|
+
- **Statistics and data**: Pages with numbers, percentages, and data tables are 156% more likely to be cited by AI
|
|
46
|
+
- **Author attribution**: AI engines prefer citing content with clear authorship (Person schema, author bios)
|
|
47
|
+
|
|
48
|
+
## Step 5: Prioritized Recommendations
|
|
49
|
+
|
|
50
|
+
Order fixes by impact on AI visibility:
|
|
51
|
+
1. **Unblock AI bots** in robots.txt (immediate — AI can't cite what it can't crawl)
|
|
52
|
+
2. **Add llms.txt** (tells AI assistants what your site is about)
|
|
53
|
+
3. **Add Organization schema** (establishes entity identity for AI)
|
|
54
|
+
4. **Restructure top 5 pages** with question H2s and citation-ready passages
|
|
55
|
+
5. **Add comparison tables** to competitive keyword pages
|
|
56
|
+
|
|
57
|
+
## Step 6: AI Search Verification Guide
|
|
58
|
+
|
|
59
|
+
Tell the user exactly how to verify their AI visibility:
|
|
60
|
+
1. Deploy fixes and wait 2-4 weeks for AI re-crawling
|
|
61
|
+
2. Search brand name + top keywords in ChatGPT, Perplexity, Gemini
|
|
62
|
+
3. Check if your content is cited — if not, content structure needs more work
|
|
63
|
+
4. Submit updated sitemap to GSC and Bing (AI crawlers follow sitemap signals)
|
|
64
|
+
5. Use Bing IndexNow for faster re-indexing (feeds into Copilot/ChatGPT)
|
|
65
|
+
|
|
66
|
+
## Output Format
|
|
19
67
|
|
|
20
68
|
```json
|
|
21
69
|
{
|
|
22
70
|
"category": "geo",
|
|
23
|
-
"
|
|
71
|
+
"ai_readiness_level": 1,
|
|
72
|
+
"scores": { "geo": 65 },
|
|
24
73
|
"findings": [...],
|
|
25
|
-
"
|
|
74
|
+
"blocked_bots": ["GPTBot", "ClaudeBot"],
|
|
75
|
+
"quick_wins": [
|
|
76
|
+
"Unblock GPTBot and ClaudeBot in robots.txt — you're invisible to ChatGPT and Claude search",
|
|
77
|
+
"Add llms.txt — AI assistants will discover your product",
|
|
78
|
+
"Add question H2s to your top 3 pages — increases AI citation probability"
|
|
79
|
+
],
|
|
80
|
+
"fixes_available": 4,
|
|
81
|
+
"verification_steps": [
|
|
82
|
+
"After deploying: search '[your product]' in Perplexity — check if cited",
|
|
83
|
+
"Submit updated sitemap to GSC and Bing Webmaster Tools",
|
|
84
|
+
"Enable IndexNow for faster Bing/Copilot re-indexing"
|
|
85
|
+
]
|
|
26
86
|
}
|
|
27
87
|
```
|
package/agents/schema-auditor.md
CHANGED
|
@@ -1,17 +1,91 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: schema-auditor
|
|
3
|
-
description: Detects, validates, and
|
|
3
|
+
description: Detects, validates, and recommends structured data based on project type. Provides schema gap analysis with Google requirements.
|
|
4
4
|
model: inherit
|
|
5
5
|
---
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
You are the Schema Auditor agent for claude-rank. Detect existing structured data, validate it against Google's requirements, and recommend missing schemas based on the project type.
|
|
8
|
+
|
|
9
|
+
## Step 1: Detect Existing Schema
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
node ${CLAUDE_PLUGIN_ROOT}/tools/schema-engine.mjs detect <project-directory>
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
Parse the output to identify all JSON-LD schema types found across the site.
|
|
16
|
+
|
|
17
|
+
## Step 2: Identify Project Type
|
|
18
|
+
|
|
19
|
+
Determine the site type to know which schemas are critical vs optional:
|
|
20
|
+
|
|
21
|
+
| Project Type | Required Schema | Recommended Schema |
|
|
22
|
+
|---|---|---|
|
|
23
|
+
| **SaaS** | Organization, WebSite | SoftwareApplication, FAQPage, BreadcrumbList, Article |
|
|
24
|
+
| **E-commerce** | Organization, Product+Offer | BreadcrumbList, FAQPage, ItemList, Review |
|
|
25
|
+
| **Blog/Publisher** | Organization, Article/BlogPosting | Person (author), BreadcrumbList, FAQPage |
|
|
26
|
+
| **Local Business** | LocalBusiness, Organization | FAQPage, BreadcrumbList, Service |
|
|
27
|
+
| **Agency** | Organization, WebSite | FAQPage, BreadcrumbList, Person (team), Service |
|
|
28
|
+
|
|
29
|
+
## Step 3: Validate Against Google Requirements
|
|
30
|
+
|
|
31
|
+
For each detected schema type, validate required fields per Google's spec:
|
|
32
|
+
|
|
33
|
+
**Organization**: Must have `name`, `url`. Should have `logo`, `contactPoint`, `sameAs`.
|
|
34
|
+
**Article/BlogPosting**: Must have `headline`, `image`, `datePublished`, `author`. Missing `image` is the most common error.
|
|
35
|
+
**Product**: Must have `name`, `image`. If offers present: `price`, `priceCurrency`, `availability` required.
|
|
36
|
+
**FAQPage**: Must have at least one `mainEntity` with `Question` type. Each question needs `acceptedAnswer` with `text`.
|
|
37
|
+
**HowTo**: Must have `name`, `step[]`. Each step needs `text` or `name`.
|
|
38
|
+
**LocalBusiness**: Must have `name`, `address`, `telephone`. Should have `openingHours`, `geo`.
|
|
39
|
+
**BreadcrumbList**: Must have `itemListElement[]` with `position`, `name`, `item` (URL).
|
|
40
|
+
**SoftwareApplication**: Must have `name`, `operatingSystem` or `applicationCategory`. Should have `offers`, `aggregateRating`.
|
|
41
|
+
|
|
42
|
+
Flag missing required fields as errors. Flag missing recommended fields as warnings.
|
|
43
|
+
|
|
44
|
+
## Step 4: Schema Gap Analysis
|
|
45
|
+
|
|
46
|
+
Compare detected schemas against the project type requirements:
|
|
47
|
+
- **Missing required**: "Your SaaS site has no Organization schema — Google can't identify your brand entity"
|
|
48
|
+
- **Missing recommended**: "Adding FAQPage schema to your pricing page would enable FAQ rich results"
|
|
49
|
+
- **Incomplete schema**: "Your Article schema is missing the `image` field — this prevents article rich results in Google"
|
|
50
|
+
|
|
51
|
+
## Step 5: Generate Recommendations
|
|
52
|
+
|
|
53
|
+
For each missing schema, provide:
|
|
54
|
+
1. Which schema type to add
|
|
55
|
+
2. Which page(s) it should go on
|
|
56
|
+
3. What data to populate it with (infer from existing page content)
|
|
57
|
+
4. The generation command: `node ${CLAUDE_PLUGIN_ROOT}/tools/schema-engine.mjs generate <type> --name="..." --url="..."`
|
|
58
|
+
|
|
59
|
+
## Step 6: Validation Guide
|
|
60
|
+
|
|
61
|
+
After generating and injecting schema:
|
|
62
|
+
1. Test each page with [Rich Results Test](https://search.google.com/test/rich-results)
|
|
63
|
+
2. Test with [Schema.org Validator](https://validator.schema.org/) for general correctness
|
|
64
|
+
3. Request indexing in GSC for pages with new schema
|
|
65
|
+
4. Monitor GSC → Enhancements for each schema type (errors appear within days)
|
|
66
|
+
5. Submit to Bing Webmaster Tools for Copilot/ChatGPT visibility
|
|
67
|
+
|
|
68
|
+
## Output Format
|
|
8
69
|
|
|
9
|
-
Return JSON:
|
|
10
70
|
```json
|
|
11
71
|
{
|
|
12
72
|
"category": "schema",
|
|
73
|
+
"project_type": "saas",
|
|
13
74
|
"schemas_found": ["Organization", "FAQPage"],
|
|
14
|
-
"validation_issues": [
|
|
15
|
-
|
|
75
|
+
"validation_issues": [
|
|
76
|
+
{ "type": "Organization", "issue": "Missing 'logo' field (recommended)", "severity": "warning" }
|
|
77
|
+
],
|
|
78
|
+
"missing_required": ["WebSite", "SoftwareApplication"],
|
|
79
|
+
"missing_recommended": ["BreadcrumbList", "Article"],
|
|
80
|
+
"recommendations": [
|
|
81
|
+
"Add WebSite schema with SearchAction to homepage — enables sitelinks search box",
|
|
82
|
+
"Add SoftwareApplication schema to pricing page — enables software rich results",
|
|
83
|
+
"Add BreadcrumbList to all pages — improves search result appearance"
|
|
84
|
+
],
|
|
85
|
+
"gsc_actions": [
|
|
86
|
+
"Test new schema at search.google.com/test/rich-results",
|
|
87
|
+
"Monitor GSC → Enhancements for validation status",
|
|
88
|
+
"Request indexing for pages with new schema"
|
|
89
|
+
]
|
|
16
90
|
}
|
|
17
91
|
```
|
package/agents/seo-auditor.md
CHANGED
|
@@ -1,16 +1,60 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: seo-auditor
|
|
3
|
-
description: Runs core SEO audit
|
|
3
|
+
description: Runs core SEO audit, analyzes findings, identifies quick wins, and provides actionable fix priorities with GSC submission guidance.
|
|
4
4
|
model: inherit
|
|
5
5
|
---
|
|
6
6
|
|
|
7
|
-
You are the SEO Auditor agent for claude-rank. Run a comprehensive SEO audit.
|
|
7
|
+
You are the SEO Auditor agent for claude-rank. Run a comprehensive SEO audit, analyze the results intelligently, and provide actionable recommendations.
|
|
8
8
|
|
|
9
|
-
##
|
|
9
|
+
## Step 1: Detect Project Type
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
11
|
+
Before scanning, identify what kind of site this is by checking for signals:
|
|
12
|
+
- **SaaS**: Look for pricing pages, /dashboard, /signup, free trial CTAs
|
|
13
|
+
- **E-commerce**: Look for /product, /cart, /checkout, Product schema
|
|
14
|
+
- **Blog/Publisher**: Look for /blog, /posts, article schema, RSS feeds, author pages
|
|
15
|
+
- **Local Business**: Look for address, phone number, Google Maps embed, service area pages
|
|
16
|
+
- **Agency/Portfolio**: Look for /case-studies, /clients, /services, testimonials
|
|
17
|
+
|
|
18
|
+
This determines which findings matter most (e.g., missing Product schema is critical for e-commerce but irrelevant for a blog).
|
|
19
|
+
|
|
20
|
+
## Step 2: Run Scanner
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
node ${CLAUDE_PLUGIN_ROOT}/tools/seo-scanner.mjs <project-directory>
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Parse the JSON output for findings and scores.
|
|
27
|
+
|
|
28
|
+
## Step 3: Analyze and Prioritize
|
|
29
|
+
|
|
30
|
+
Don't just list findings. Analyze them:
|
|
31
|
+
|
|
32
|
+
1. **Identify the top 3 quick wins** — findings that are easy to fix and have the highest impact:
|
|
33
|
+
- Missing title/meta description (critical for CTR)
|
|
34
|
+
- Missing sitemap.xml (critical for indexing)
|
|
35
|
+
- Blocked crawlers in robots.txt (critical for visibility)
|
|
36
|
+
|
|
37
|
+
2. **Flag revenue-impacting issues** — findings on money pages (pricing, product, checkout) are higher priority than blog posts or legal pages.
|
|
38
|
+
|
|
39
|
+
3. **Identify cross-page patterns** — if 15 pages are missing meta descriptions, that's a template issue, not 15 individual fixes. Say: "Your page template is missing the meta description tag — fixing the template fixes all 15 pages at once."
|
|
40
|
+
|
|
41
|
+
4. **Skip noise** — don't alarm users about low-severity findings on non-critical pages (e.g., missing analytics on a privacy policy page).
|
|
42
|
+
|
|
43
|
+
## Step 4: Recommend Fix Order
|
|
44
|
+
|
|
45
|
+
Prioritize fixes by impact:
|
|
46
|
+
1. **Blocking issues first** — noindex on important pages, robots.txt blocking crawlers, missing sitemap
|
|
47
|
+
2. **Indexing issues** — missing titles, missing canonical URLs, duplicate content
|
|
48
|
+
3. **Ranking issues** — thin content, missing schema, poor heading hierarchy
|
|
49
|
+
4. **Enhancement** — OG tags, Twitter cards, analytics, favicon
|
|
50
|
+
|
|
51
|
+
## Step 5: GSC/Bing Next Steps
|
|
52
|
+
|
|
53
|
+
After presenting findings, tell the user exactly what to do in search consoles:
|
|
54
|
+
- Which pages to request indexing for (the ones with fixes applied)
|
|
55
|
+
- Whether to resubmit sitemap (if sitemap was generated/updated)
|
|
56
|
+
- Which GSC reports to check (Coverage for indexing issues, Enhancements for schema)
|
|
57
|
+
- Bing URL Submission for fast re-indexing
|
|
14
58
|
|
|
15
59
|
## Output Format
|
|
16
60
|
|
|
@@ -19,10 +63,21 @@ Return results as a JSON code block:
|
|
|
19
63
|
```json
|
|
20
64
|
{
|
|
21
65
|
"category": "seo",
|
|
66
|
+
"project_type": "saas",
|
|
22
67
|
"scores": { "seo": 72 },
|
|
23
68
|
"findings": [
|
|
24
|
-
{ "severity": "high", "
|
|
69
|
+
{ "severity": "high", "rule": "missing-meta-description", "file": "index.html", "message": "No meta description found" }
|
|
70
|
+
],
|
|
71
|
+
"quick_wins": [
|
|
72
|
+
"Add meta descriptions to your page template — fixes 15 pages at once",
|
|
73
|
+
"Generate sitemap.xml — critical for Google indexing",
|
|
74
|
+
"Add canonical URLs to prevent duplicate content issues"
|
|
25
75
|
],
|
|
26
|
-
"fixes_available": 5
|
|
76
|
+
"fixes_available": 5,
|
|
77
|
+
"gsc_actions": [
|
|
78
|
+
"Submit sitemap.xml in GSC → Sitemaps",
|
|
79
|
+
"Request indexing for homepage and pricing page in URL Inspection",
|
|
80
|
+
"Check Coverage report for 'Crawled - currently not indexed' pages"
|
|
81
|
+
]
|
|
27
82
|
}
|
|
28
83
|
```
|
package/bin/claude-rank.mjs
CHANGED
|
@@ -42,6 +42,7 @@ Commands:
|
|
|
42
42
|
scan Run core SEO scanner (default)
|
|
43
43
|
geo Run GEO (AI search) scanner
|
|
44
44
|
aeo Run AEO (answer engine) scanner
|
|
45
|
+
cwv Run Core Web Vitals / Lighthouse audit (requires: npm i -g lighthouse chrome-launcher)
|
|
45
46
|
schema Detect and validate structured data
|
|
46
47
|
help Show this help message
|
|
47
48
|
|
|
@@ -72,6 +73,37 @@ Examples:
|
|
|
72
73
|
process.exit(0);
|
|
73
74
|
}
|
|
74
75
|
|
|
76
|
+
// Handle CWV command separately (requires URL, optional dependency)
|
|
77
|
+
if (command === 'cwv') {
|
|
78
|
+
const url = dir.startsWith('http://') || dir.startsWith('https://') ? dir : null;
|
|
79
|
+
if (!url) {
|
|
80
|
+
console.error('The cwv command requires a URL. Usage: claude-rank cwv https://example.com');
|
|
81
|
+
process.exit(1);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Clear argv before importing
|
|
85
|
+
process.argv = process.argv.slice(0, 2);
|
|
86
|
+
|
|
87
|
+
const { runLighthouse, isAvailable } = await import(new URL('../tools/lighthouse-scanner.mjs', import.meta.url));
|
|
88
|
+
const check = isAvailable();
|
|
89
|
+
if (!check.available) {
|
|
90
|
+
console.log(`\n Core Web Vitals scanner requires Lighthouse.\n`);
|
|
91
|
+
console.log(` Install: npm install -g lighthouse chrome-launcher\n`);
|
|
92
|
+
console.log(` Then run: claude-rank cwv ${url}\n`);
|
|
93
|
+
process.exit(0);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const result = await runLighthouse(url);
|
|
97
|
+
if (jsonFlag) {
|
|
98
|
+
console.log(JSON.stringify(result, null, 2));
|
|
99
|
+
} else {
|
|
100
|
+
// Pretty output is handled inside lighthouse-scanner.mjs CLI
|
|
101
|
+
// For programmatic use, just output JSON
|
|
102
|
+
console.log(JSON.stringify(result, null, 2));
|
|
103
|
+
}
|
|
104
|
+
process.exit(0);
|
|
105
|
+
}
|
|
106
|
+
|
|
75
107
|
const toolPath = commands[command];
|
|
76
108
|
if (!toolPath) {
|
|
77
109
|
console.error(`Unknown command: ${command}. Run "claude-rank help" for usage.`);
|
package/package.json
CHANGED
package/skills/rank-aeo/SKILL.md
CHANGED
|
@@ -35,3 +35,31 @@ Re-run aeo-scanner. Show before/after AEO score.
|
|
|
35
35
|
- Target conversational long-tail queries ("how do I...", "what is the best...")
|
|
36
36
|
- Keep primary answers under 29 words (Google voice search average)
|
|
37
37
|
- Add People Also Ask patterns as H2/H3 questions throughout content
|
|
38
|
+
|
|
39
|
+
## Phase 6: Search Console Submission
|
|
40
|
+
|
|
41
|
+
After deploying AEO fixes, submit to search engines to trigger rich result processing:
|
|
42
|
+
|
|
43
|
+
### Google Search Console
|
|
44
|
+
1. **Request indexing** for pages with new FAQ/HowTo/speakable schema — URL Inspection → Request Indexing
|
|
45
|
+
2. **Check Rich Results** — Enhancements → FAQPage / HowTo / Breadcrumbs / Article
|
|
46
|
+
- Verify new schema is detected and valid (no errors)
|
|
47
|
+
- Common issues: missing `image` in Article, missing `acceptedAnswer` in FAQ
|
|
48
|
+
3. **Test individual pages** — Use [Rich Results Test](https://search.google.com/test/rich-results) before and after fixes
|
|
49
|
+
4. **Monitor Featured Snippets** — Performance → Search Appearance → filter by "Featured snippets"
|
|
50
|
+
- Track which pages win snippets after AEO optimization
|
|
51
|
+
- If pages lose snippets, check if answer length changed (40-60 words optimal)
|
|
52
|
+
|
|
53
|
+
### Bing Webmaster Tools
|
|
54
|
+
1. **Submit URLs** — URL Submission → submit all pages with new schema
|
|
55
|
+
2. **Verify schema** — Bing supports FAQPage, HowTo, and speakable in its rich results
|
|
56
|
+
3. **Enable IndexNow** — instant re-indexing after schema changes
|
|
57
|
+
|
|
58
|
+
### Track Featured Snippet Wins
|
|
59
|
+
1. In GSC → Performance → Search Appearance → "Featured snippets"
|
|
60
|
+
2. Export the list of queries where your pages appear as featured snippets
|
|
61
|
+
3. For queries where competitors hold the snippet, optimize those pages:
|
|
62
|
+
- Add a direct answer in the first 40-60 words after the question H2
|
|
63
|
+
- Use numbered lists for "how to" queries
|
|
64
|
+
- Use definition format ("X is...") for "what is" queries
|
|
65
|
+
4. Recheck weekly — featured snippet ownership changes frequently
|
|
@@ -73,6 +73,94 @@ Advise on content optimizations the scanner cannot automate:
|
|
|
73
73
|
|
|
74
74
|
Guide link building: create link-worthy assets, guest posting, broken link building, HARO.
|
|
75
75
|
|
|
76
|
-
## Phase 8:
|
|
77
|
-
|
|
78
|
-
|
|
76
|
+
## Phase 8: Search Console Action Plan
|
|
77
|
+
|
|
78
|
+
After fixing issues, guide the user through submitting their improved site to search engines. This is the critical bridge between "audit complete" and "actually ranking."
|
|
79
|
+
|
|
80
|
+
### Google Search Console (GSC)
|
|
81
|
+
|
|
82
|
+
1. **Submit Sitemap**
|
|
83
|
+
- Go to [Google Search Console](https://search.google.com/search-console)
|
|
84
|
+
- Select your property → Sitemaps → Enter `sitemap.xml` → Submit
|
|
85
|
+
- If sitemap was just generated by `/rank fix`, confirm the file is deployed first
|
|
86
|
+
|
|
87
|
+
2. **Request Indexing for Money Pages**
|
|
88
|
+
- Go to URL Inspection → Paste each high-priority page URL
|
|
89
|
+
- Click "Request Indexing" for pages that were fixed (new title, meta description, schema added)
|
|
90
|
+
- Priority order for indexing requests:
|
|
91
|
+
- Homepage
|
|
92
|
+
- Pricing / signup page
|
|
93
|
+
- Top landing pages (highest revenue/conversion)
|
|
94
|
+
- Blog posts targeting competitive keywords
|
|
95
|
+
- Google allows ~10-12 indexing requests per day — prioritize your money pages
|
|
96
|
+
|
|
97
|
+
3. **Check Index Coverage**
|
|
98
|
+
- Go to Pages → review "Not indexed" list
|
|
99
|
+
- "Crawled - currently not indexed" → page needs content improvements or more internal links
|
|
100
|
+
- "Discovered - currently not indexed" → page needs stronger internal links pointing to it
|
|
101
|
+
- Verify any `noindex` pages found by scanner are intentional
|
|
102
|
+
|
|
103
|
+
4. **Validate Robots.txt**
|
|
104
|
+
- Go to Settings → Crawling → Open robots.txt report
|
|
105
|
+
- Verify updated robots.txt (AI bots unblocked) is live and valid
|
|
106
|
+
- Test specific URLs to confirm they're crawlable
|
|
107
|
+
|
|
108
|
+
5. **Check Rich Results**
|
|
109
|
+
- Go to Enhancements → review each schema type (FAQ, HowTo, Product, etc.)
|
|
110
|
+
- If `/rank fix` generated new JSON-LD, check for validation errors here
|
|
111
|
+
- Use [Rich Results Test](https://search.google.com/test/rich-results) to test individual pages
|
|
112
|
+
- Common issues: missing `image` field in Article, missing `price` in Product
|
|
113
|
+
|
|
114
|
+
6. **Monitor Core Web Vitals**
|
|
115
|
+
- Go to Experience → Core Web Vitals
|
|
116
|
+
- Note any "Poor" or "Needs Improvement" URLs — these directly affect rankings
|
|
117
|
+
- Focus on: LCP (Largest Contentful Paint), CLS (Cumulative Layout Shift), INP (Interaction to Next Paint)
|
|
118
|
+
|
|
119
|
+
### Bing Webmaster Tools
|
|
120
|
+
|
|
121
|
+
1. **Submit Sitemap**
|
|
122
|
+
- Go to [Bing Webmaster Tools](https://www.bing.com/webmasters)
|
|
123
|
+
- Configure Sitemaps → Submit your sitemap.xml URL
|
|
124
|
+
- Bing also reads the `Sitemap:` directive in robots.txt (already added by `/rank fix`)
|
|
125
|
+
|
|
126
|
+
2. **Submit URLs for Fast Indexing**
|
|
127
|
+
- Go to URL Submission → submit your top 10 money pages
|
|
128
|
+
- Bing allows up to 10,000 URL submissions per day (far more generous than Google)
|
|
129
|
+
- Submit ALL pages that had SEO fixes applied
|
|
130
|
+
|
|
131
|
+
3. **Enable IndexNow** (instant indexing)
|
|
132
|
+
- Bing supports [IndexNow](https://www.indexnow.org/) for near-instant indexing
|
|
133
|
+
- Generate an API key at indexnow.org → place key file at your domain root
|
|
134
|
+
- If using Next.js/WordPress, install IndexNow plugin for automatic ping on publish
|
|
135
|
+
- This feeds into Bing, Yandex, and Seznam simultaneously
|
|
136
|
+
|
|
137
|
+
4. **Verify Robots.txt**
|
|
138
|
+
- Go to Configure My Site → Block URLs → Robots.txt Tester
|
|
139
|
+
- Important: Bingbot feeds into Microsoft Copilot and ChatGPT Browse — keeping it unblocked is critical for AI visibility
|
|
140
|
+
|
|
141
|
+
### AI Search Verification
|
|
142
|
+
|
|
143
|
+
After deploying fixes, verify your site is visible to AI search engines:
|
|
144
|
+
|
|
145
|
+
1. **Test AI Visibility** (wait 2-4 weeks after robots.txt changes)
|
|
146
|
+
- Search your brand name + top 3 keywords in:
|
|
147
|
+
- ChatGPT (chat.openai.com)
|
|
148
|
+
- Perplexity (perplexity.ai)
|
|
149
|
+
- Google Gemini (gemini.google.com)
|
|
150
|
+
- Google AI Overviews (google.com — check the AI summary box)
|
|
151
|
+
- Screenshot results as baseline for tracking improvement
|
|
152
|
+
|
|
153
|
+
2. **Verify llms.txt**
|
|
154
|
+
- Visit `https://yourdomain.com/llms.txt` — confirm it returns content
|
|
155
|
+
- Check that it accurately describes your product and links to key pages
|
|
156
|
+
|
|
157
|
+
3. **Monitor AI Citations Weekly**
|
|
158
|
+
- Search your niche keywords in Perplexity and ChatGPT every week
|
|
159
|
+
- Track which pages get cited vs competitors
|
|
160
|
+
- Focus content improvements on topics where competitors are cited but you're not
|
|
161
|
+
- Add comparison tables, statistics, and direct definitions to boost citation probability
|
|
162
|
+
|
|
163
|
+
## Phase 9: Next Steps
|
|
164
|
+
|
|
165
|
+
Recommend which `/rank` sub-commands to run next based on lowest scores.
|
|
166
|
+
Present the user with a prioritized action checklist they can track.
|
package/skills/rank-geo/SKILL.md
CHANGED
|
@@ -33,10 +33,23 @@ Present GEO findings grouped by:
|
|
|
33
33
|
|
|
34
34
|
Re-run geo-scanner. Show before/after GEO score.
|
|
35
35
|
|
|
36
|
-
## Phase 5:
|
|
36
|
+
## Phase 5: Search Console Submission
|
|
37
37
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
38
|
+
After deploying GEO fixes, submit to search engines so AI crawlers pick up the changes:
|
|
39
|
+
|
|
40
|
+
### Google Search Console
|
|
41
|
+
1. **Resubmit sitemap** — Sitemaps → Resubmit `sitemap.xml` (triggers recrawl)
|
|
42
|
+
2. **Request indexing** for pages where you unblocked AI bots or added schema — URL Inspection → Request Indexing
|
|
43
|
+
3. **Check robots.txt** — Settings → Crawling → verify your updated robots.txt is live (AI bots unblocked)
|
|
44
|
+
|
|
45
|
+
### Bing Webmaster Tools
|
|
46
|
+
1. **Submit URLs** — URL Submission → submit pages with new schema/content structure
|
|
47
|
+
2. **Verify robots.txt** — Bingbot feeds Microsoft Copilot and ChatGPT Browse — keeping it unblocked is essential
|
|
48
|
+
3. **Enable IndexNow** — Near-instant indexing for Bing, Yandex, Seznam. Generate key at indexnow.org
|
|
49
|
+
|
|
50
|
+
### AI Search Verification (wait 2-4 weeks)
|
|
51
|
+
1. Search your brand name + top 3 keywords in ChatGPT, Perplexity, Google AI Overviews, Gemini
|
|
52
|
+
2. Screenshot results as a baseline
|
|
53
|
+
3. Note which competitors are cited — create citation-ready content for each gap (134-167 word passages)
|
|
42
54
|
4. Add comparison tables and statistics (156% higher AI selection with multimedia)
|
|
55
|
+
5. Set up weekly monitoring — track your AI citation rate vs competitors
|
package/tools/lib/crawler.mjs
CHANGED
|
@@ -5,8 +5,9 @@
|
|
|
5
5
|
|
|
6
6
|
import { validateUrl, createResponseAccumulator } from './security.mjs';
|
|
7
7
|
|
|
8
|
-
const USER_AGENT = 'claude-rank/1.1
|
|
8
|
+
const USER_AGENT = 'claude-rank/1.3.1 (https://github.com/Houseofmvps/claude-rank)';
|
|
9
9
|
const TIMEOUT_MS = 15_000;
|
|
10
|
+
const MAX_REDIRECTS = 10;
|
|
10
11
|
|
|
11
12
|
/**
|
|
12
13
|
* Fetch a page by URL with SSRF protection and response size limits.
|
|
@@ -20,34 +21,67 @@ export async function fetchPage(url) {
|
|
|
20
21
|
throw new Error(`URL blocked: ${validation.reason}`);
|
|
21
22
|
}
|
|
22
23
|
|
|
23
|
-
// 2.
|
|
24
|
-
const
|
|
25
|
-
|
|
26
|
-
|
|
24
|
+
// 2. Follow redirects manually to detect chains
|
|
25
|
+
const redirectChain = [];
|
|
26
|
+
let currentUrl = url;
|
|
27
27
|
let response;
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
28
|
+
|
|
29
|
+
for (let i = 0; i <= MAX_REDIRECTS; i++) {
|
|
30
|
+
const controller = new AbortController();
|
|
31
|
+
const timeoutId = setTimeout(() => controller.abort(), TIMEOUT_MS);
|
|
32
|
+
|
|
33
|
+
try {
|
|
34
|
+
response = await fetch(currentUrl, {
|
|
35
|
+
signal: controller.signal,
|
|
36
|
+
headers: {
|
|
37
|
+
'User-Agent': USER_AGENT,
|
|
38
|
+
'Accept': 'text/html,application/xhtml+xml,*/*',
|
|
39
|
+
},
|
|
40
|
+
redirect: 'manual',
|
|
41
|
+
});
|
|
42
|
+
} catch (err) {
|
|
43
|
+
clearTimeout(timeoutId);
|
|
44
|
+
if (err.name === 'AbortError') {
|
|
45
|
+
throw new Error(`Request timed out after ${TIMEOUT_MS / 1000}s: ${currentUrl}`);
|
|
46
|
+
}
|
|
47
|
+
throw new Error(`Fetch failed for ${currentUrl}: ${err.message}`);
|
|
48
|
+
}
|
|
49
|
+
|
|
38
50
|
clearTimeout(timeoutId);
|
|
39
|
-
|
|
40
|
-
|
|
51
|
+
|
|
52
|
+
// Check for redirect (3xx status)
|
|
53
|
+
if (response.status >= 300 && response.status < 400) {
|
|
54
|
+
const location = response.headers.get('location');
|
|
55
|
+
if (!location) break;
|
|
56
|
+
|
|
57
|
+
// Resolve relative redirects
|
|
58
|
+
const nextUrl = new URL(location, currentUrl).href;
|
|
59
|
+
redirectChain.push({ from: currentUrl, to: nextUrl, statusCode: response.status });
|
|
60
|
+
|
|
61
|
+
// SSRF check the redirect target
|
|
62
|
+
const nextValidation = validateUrl(nextUrl);
|
|
63
|
+
if (!nextValidation.valid) {
|
|
64
|
+
throw new Error(`Redirect target blocked: ${nextValidation.reason}`);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
currentUrl = nextUrl;
|
|
68
|
+
|
|
69
|
+
if (i === MAX_REDIRECTS) {
|
|
70
|
+
throw new Error(`Too many redirects (>${MAX_REDIRECTS}): ${url}`);
|
|
71
|
+
}
|
|
72
|
+
continue;
|
|
41
73
|
}
|
|
42
|
-
|
|
74
|
+
|
|
75
|
+
break;
|
|
43
76
|
}
|
|
44
77
|
|
|
45
|
-
|
|
78
|
+
const finalUrl = currentUrl;
|
|
79
|
+
const redirected = redirectChain.length > 0;
|
|
46
80
|
|
|
47
81
|
// 3. Check Content-Type — only scan HTML responses
|
|
48
82
|
const contentType = response.headers.get('content-type') || '';
|
|
49
83
|
if (!contentType.includes('text/html') && !contentType.includes('application/xhtml+xml')) {
|
|
50
|
-
throw new Error(`Not an HTML page (Content-Type: ${contentType}): ${
|
|
84
|
+
throw new Error(`Not an HTML page (Content-Type: ${contentType}): ${finalUrl}`);
|
|
51
85
|
}
|
|
52
86
|
|
|
53
87
|
// 4. Read body with size limits using response accumulator
|
|
@@ -73,7 +107,8 @@ export async function fetchPage(url) {
|
|
|
73
107
|
html,
|
|
74
108
|
url,
|
|
75
109
|
statusCode: response.status,
|
|
76
|
-
redirected
|
|
77
|
-
finalUrl
|
|
110
|
+
redirected,
|
|
111
|
+
finalUrl,
|
|
112
|
+
redirectChain,
|
|
78
113
|
};
|
|
79
114
|
}
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lighthouse-scanner.mjs — Core Web Vitals scanner using Lighthouse.
|
|
3
|
+
* Optional dependency: works when `lighthouse` and Chrome are available.
|
|
4
|
+
* Gracefully returns unavailable status when not installed.
|
|
5
|
+
*
|
|
6
|
+
* Usage:
|
|
7
|
+
* node tools/lighthouse-scanner.mjs <url>
|
|
8
|
+
* node tools/lighthouse-scanner.mjs <url> --json
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
// Try to import lighthouse (optional dependency)
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
|
|
15
|
+
let lighthouse = null;
|
|
16
|
+
let chromeLauncher = null;
|
|
17
|
+
|
|
18
|
+
try {
|
|
19
|
+
lighthouse = (await import('lighthouse')).default;
|
|
20
|
+
chromeLauncher = await import('chrome-launcher');
|
|
21
|
+
} catch {
|
|
22
|
+
// lighthouse not installed — will return unavailable status
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
// CWV thresholds (Google's current thresholds as of 2026)
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
|
|
29
|
+
const CWV_THRESHOLDS = {
|
|
30
|
+
LCP: { good: 2500, poor: 4000 }, // Largest Contentful Paint (ms)
|
|
31
|
+
CLS: { good: 0.1, poor: 0.25 }, // Cumulative Layout Shift
|
|
32
|
+
INP: { good: 200, poor: 500 }, // Interaction to Next Paint (ms)
|
|
33
|
+
FCP: { good: 1800, poor: 3000 }, // First Contentful Paint (ms)
|
|
34
|
+
TBT: { good: 200, poor: 600 }, // Total Blocking Time (ms)
|
|
35
|
+
SI: { good: 3400, poor: 5800 }, // Speed Index (ms)
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
function rateMetric(value, thresholds) {
|
|
39
|
+
if (value <= thresholds.good) return 'good';
|
|
40
|
+
if (value <= thresholds.poor) return 'needs-improvement';
|
|
41
|
+
return 'poor';
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
// Rule definitions
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
|
|
48
|
+
const RULES = {
|
|
49
|
+
'cwv-lcp-poor': { severity: 'high', deduction: 10 },
|
|
50
|
+
'cwv-lcp-needs-work': { severity: 'medium', deduction: 5 },
|
|
51
|
+
'cwv-cls-poor': { severity: 'high', deduction: 10 },
|
|
52
|
+
'cwv-cls-needs-work': { severity: 'medium', deduction: 5 },
|
|
53
|
+
'cwv-fcp-poor': { severity: 'medium', deduction: 5 },
|
|
54
|
+
'cwv-fcp-needs-work': { severity: 'low', deduction: 2 },
|
|
55
|
+
'cwv-tbt-poor': { severity: 'high', deduction: 10 },
|
|
56
|
+
'cwv-tbt-needs-work': { severity: 'medium', deduction: 5 },
|
|
57
|
+
'cwv-si-poor': { severity: 'medium', deduction: 5 },
|
|
58
|
+
'cwv-si-needs-work': { severity: 'low', deduction: 2 },
|
|
59
|
+
'perf-score-poor': { severity: 'high', deduction: 10 },
|
|
60
|
+
'perf-score-needs-work': { severity: 'medium', deduction: 5 },
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
// isAvailable — check if Lighthouse can run
|
|
65
|
+
// ---------------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Check if Lighthouse is available (installed + Chrome present).
|
|
69
|
+
* @returns {{ available: boolean, reason?: string }}
|
|
70
|
+
*/
|
|
71
|
+
export function isAvailable() {
|
|
72
|
+
if (!lighthouse || !chromeLauncher) {
|
|
73
|
+
return {
|
|
74
|
+
available: false,
|
|
75
|
+
reason: 'Lighthouse not installed. Run: npm install -g lighthouse chrome-launcher',
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
return { available: true };
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// ---------------------------------------------------------------------------
|
|
82
|
+
// runLighthouse — run audit and extract CWV metrics
|
|
83
|
+
// ---------------------------------------------------------------------------
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Run Lighthouse audit on a URL and return Core Web Vitals metrics.
|
|
87
|
+
* @param {string} url — the URL to audit
|
|
88
|
+
* @returns {Promise<object>} { url, available, metrics, findings, scores, summary }
|
|
89
|
+
*/
|
|
90
|
+
export async function runLighthouse(url) {
|
|
91
|
+
const check = isAvailable();
|
|
92
|
+
if (!check.available) {
|
|
93
|
+
return {
|
|
94
|
+
url,
|
|
95
|
+
available: false,
|
|
96
|
+
reason: check.reason,
|
|
97
|
+
metrics: null,
|
|
98
|
+
findings: [],
|
|
99
|
+
scores: { performance: null },
|
|
100
|
+
summary: { critical: 0, high: 0, medium: 0, low: 0 },
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Launch Chrome headless
|
|
105
|
+
const chrome = await chromeLauncher.launch({
|
|
106
|
+
chromeFlags: ['--headless', '--no-sandbox', '--disable-gpu'],
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
try {
|
|
110
|
+
const result = await lighthouse(url, {
|
|
111
|
+
port: chrome.port,
|
|
112
|
+
output: 'json',
|
|
113
|
+
onlyCategories: ['performance'],
|
|
114
|
+
formFactor: 'mobile',
|
|
115
|
+
screenEmulation: {
|
|
116
|
+
mobile: true,
|
|
117
|
+
width: 412,
|
|
118
|
+
height: 823,
|
|
119
|
+
deviceScaleFactor: 1.75,
|
|
120
|
+
},
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
const lhr = result.lhr;
|
|
124
|
+
const audits = lhr.audits;
|
|
125
|
+
|
|
126
|
+
// Extract metrics
|
|
127
|
+
const metrics = {
|
|
128
|
+
performanceScore: Math.round((lhr.categories.performance?.score || 0) * 100),
|
|
129
|
+
LCP: audits['largest-contentful-paint']?.numericValue || null,
|
|
130
|
+
CLS: audits['cumulative-layout-shift']?.numericValue || null,
|
|
131
|
+
FCP: audits['first-contentful-paint']?.numericValue || null,
|
|
132
|
+
TBT: audits['total-blocking-time']?.numericValue || null,
|
|
133
|
+
SI: audits['speed-index']?.numericValue || null,
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
// Generate findings
|
|
137
|
+
const findings = [];
|
|
138
|
+
|
|
139
|
+
function addFinding(rule, message) {
|
|
140
|
+
const def = RULES[rule];
|
|
141
|
+
if (def) {
|
|
142
|
+
findings.push({ rule, severity: def.severity, file: url, message });
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Performance score
|
|
147
|
+
if (metrics.performanceScore < 50) {
|
|
148
|
+
addFinding('perf-score-poor', `Lighthouse performance score is ${metrics.performanceScore}/100 (poor, target: 90+)`);
|
|
149
|
+
} else if (metrics.performanceScore < 90) {
|
|
150
|
+
addFinding('perf-score-needs-work', `Lighthouse performance score is ${metrics.performanceScore}/100 (target: 90+)`);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// LCP
|
|
154
|
+
if (metrics.LCP !== null) {
|
|
155
|
+
const rating = rateMetric(metrics.LCP, CWV_THRESHOLDS.LCP);
|
|
156
|
+
const lcpSec = (metrics.LCP / 1000).toFixed(1);
|
|
157
|
+
if (rating === 'poor') {
|
|
158
|
+
addFinding('cwv-lcp-poor', `LCP is ${lcpSec}s (poor — should be under ${CWV_THRESHOLDS.LCP.good / 1000}s)`);
|
|
159
|
+
} else if (rating === 'needs-improvement') {
|
|
160
|
+
addFinding('cwv-lcp-needs-work', `LCP is ${lcpSec}s (needs improvement — target: under ${CWV_THRESHOLDS.LCP.good / 1000}s)`);
|
|
161
|
+
}
|
|
162
|
+
metrics.LCP_rating = rating;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// CLS
|
|
166
|
+
if (metrics.CLS !== null) {
|
|
167
|
+
const rating = rateMetric(metrics.CLS, CWV_THRESHOLDS.CLS);
|
|
168
|
+
if (rating === 'poor') {
|
|
169
|
+
addFinding('cwv-cls-poor', `CLS is ${metrics.CLS.toFixed(3)} (poor — should be under ${CWV_THRESHOLDS.CLS.good})`);
|
|
170
|
+
} else if (rating === 'needs-improvement') {
|
|
171
|
+
addFinding('cwv-cls-needs-work', `CLS is ${metrics.CLS.toFixed(3)} (needs improvement — target: under ${CWV_THRESHOLDS.CLS.good})`);
|
|
172
|
+
}
|
|
173
|
+
metrics.CLS_rating = rating;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// FCP
|
|
177
|
+
if (metrics.FCP !== null) {
|
|
178
|
+
const rating = rateMetric(metrics.FCP, CWV_THRESHOLDS.FCP);
|
|
179
|
+
const fcpSec = (metrics.FCP / 1000).toFixed(1);
|
|
180
|
+
if (rating === 'poor') {
|
|
181
|
+
addFinding('cwv-fcp-poor', `FCP is ${fcpSec}s (poor — should be under ${CWV_THRESHOLDS.FCP.good / 1000}s)`);
|
|
182
|
+
} else if (rating === 'needs-improvement') {
|
|
183
|
+
addFinding('cwv-fcp-needs-work', `FCP is ${fcpSec}s (needs improvement — target: under ${CWV_THRESHOLDS.FCP.good / 1000}s)`);
|
|
184
|
+
}
|
|
185
|
+
metrics.FCP_rating = rating;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// TBT (proxy for INP in lab data)
|
|
189
|
+
if (metrics.TBT !== null) {
|
|
190
|
+
const rating = rateMetric(metrics.TBT, CWV_THRESHOLDS.TBT);
|
|
191
|
+
if (rating === 'poor') {
|
|
192
|
+
addFinding('cwv-tbt-poor', `TBT is ${Math.round(metrics.TBT)}ms (poor — should be under ${CWV_THRESHOLDS.TBT.good}ms)`);
|
|
193
|
+
} else if (rating === 'needs-improvement') {
|
|
194
|
+
addFinding('cwv-tbt-needs-work', `TBT is ${Math.round(metrics.TBT)}ms (needs improvement — target: under ${CWV_THRESHOLDS.TBT.good}ms)`);
|
|
195
|
+
}
|
|
196
|
+
metrics.TBT_rating = rating;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Speed Index
|
|
200
|
+
if (metrics.SI !== null) {
|
|
201
|
+
const rating = rateMetric(metrics.SI, CWV_THRESHOLDS.SI);
|
|
202
|
+
const siSec = (metrics.SI / 1000).toFixed(1);
|
|
203
|
+
if (rating === 'poor') {
|
|
204
|
+
addFinding('cwv-si-poor', `Speed Index is ${siSec}s (poor — should be under ${CWV_THRESHOLDS.SI.good / 1000}s)`);
|
|
205
|
+
} else if (rating === 'needs-improvement') {
|
|
206
|
+
addFinding('cwv-si-needs-work', `Speed Index is ${siSec}s (needs improvement — target: under ${CWV_THRESHOLDS.SI.good / 1000}s)`);
|
|
207
|
+
}
|
|
208
|
+
metrics.SI_rating = rating;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Calculate CWV score
|
|
212
|
+
const triggeredRules = new Set(findings.map(f => f.rule));
|
|
213
|
+
let score = 100;
|
|
214
|
+
for (const rule of triggeredRules) {
|
|
215
|
+
const def = RULES[rule];
|
|
216
|
+
if (def) score -= def.deduction;
|
|
217
|
+
}
|
|
218
|
+
score = Math.max(0, score);
|
|
219
|
+
|
|
220
|
+
const summary = { critical: 0, high: 0, medium: 0, low: 0 };
|
|
221
|
+
for (const f of findings) {
|
|
222
|
+
if (summary[f.severity] !== undefined) summary[f.severity]++;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
return {
|
|
226
|
+
url,
|
|
227
|
+
available: true,
|
|
228
|
+
metrics,
|
|
229
|
+
findings,
|
|
230
|
+
scores: { performance: score, lighthouseScore: metrics.performanceScore },
|
|
231
|
+
summary,
|
|
232
|
+
};
|
|
233
|
+
} finally {
|
|
234
|
+
await chrome.kill();
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// ---------------------------------------------------------------------------
|
|
239
|
+
// CLI entry point
|
|
240
|
+
// ---------------------------------------------------------------------------
|
|
241
|
+
|
|
242
|
+
const args = process.argv.slice(2);
|
|
243
|
+
if (args.length > 0 && args[0] !== 'detect' && args[0] !== 'generate') {
|
|
244
|
+
const url = args[0];
|
|
245
|
+
const jsonFlag = args.includes('--json');
|
|
246
|
+
|
|
247
|
+
const result = await runLighthouse(url);
|
|
248
|
+
|
|
249
|
+
if (jsonFlag || !process.stdout.isTTY) {
|
|
250
|
+
console.log(JSON.stringify(result, null, 2));
|
|
251
|
+
} else {
|
|
252
|
+
if (!result.available) {
|
|
253
|
+
console.log(`\n Lighthouse not available: ${result.reason}\n`);
|
|
254
|
+
process.exit(0);
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
const m = result.metrics;
|
|
258
|
+
console.log('');
|
|
259
|
+
console.log(' Core Web Vitals Report');
|
|
260
|
+
console.log(' ═══════════════════════');
|
|
261
|
+
console.log(` Performance Score: ${m.performanceScore}/100`);
|
|
262
|
+
console.log('');
|
|
263
|
+
console.log(` LCP (Largest Contentful Paint): ${(m.LCP / 1000).toFixed(1)}s [${m.LCP_rating}]`);
|
|
264
|
+
console.log(` CLS (Cumulative Layout Shift): ${m.CLS.toFixed(3)} [${m.CLS_rating}]`);
|
|
265
|
+
console.log(` FCP (First Contentful Paint): ${(m.FCP / 1000).toFixed(1)}s [${m.FCP_rating}]`);
|
|
266
|
+
console.log(` TBT (Total Blocking Time): ${Math.round(m.TBT)}ms [${m.TBT_rating}]`);
|
|
267
|
+
console.log(` SI (Speed Index): ${(m.SI / 1000).toFixed(1)}s [${m.SI_rating}]`);
|
|
268
|
+
console.log('');
|
|
269
|
+
|
|
270
|
+
if (result.findings.length > 0) {
|
|
271
|
+
console.log(' Findings:');
|
|
272
|
+
for (const f of result.findings) {
|
|
273
|
+
console.log(` ${f.severity.toUpperCase().padEnd(8)} ${f.message}`);
|
|
274
|
+
}
|
|
275
|
+
} else {
|
|
276
|
+
console.log(' All Core Web Vitals are good!');
|
|
277
|
+
}
|
|
278
|
+
console.log('');
|
|
279
|
+
}
|
|
280
|
+
}
|
package/tools/seo-scanner.mjs
CHANGED
|
@@ -7,6 +7,7 @@ import fs from 'node:fs';
|
|
|
7
7
|
import path from 'node:path';
|
|
8
8
|
import { parseHtml, findHtmlFiles, detectPageType } from './lib/html-parser.mjs';
|
|
9
9
|
import { checkFileSize } from './lib/security.mjs';
|
|
10
|
+
import { validateSchema } from './schema-engine.mjs';
|
|
10
11
|
|
|
11
12
|
// ---------------------------------------------------------------------------
|
|
12
13
|
// Backend framework detection
|
|
@@ -80,6 +81,7 @@ const RULES = {
|
|
|
80
81
|
'images-missing-dimensions': { severity: 'medium', deduction: 5 },
|
|
81
82
|
'missing-main-landmark': { severity: 'medium', deduction: 5 },
|
|
82
83
|
'missing-json-ld': { severity: 'medium', deduction: 5 },
|
|
84
|
+
'schema-invalid': { severity: 'medium', deduction: 5 },
|
|
83
85
|
'missing-favicon': { severity: 'medium', deduction: 5 },
|
|
84
86
|
'no-analytics': { severity: 'medium', deduction: 5 },
|
|
85
87
|
|
|
@@ -243,6 +245,21 @@ function checkFile(state, filePath, rootDir, opts = {}) {
|
|
|
243
245
|
add('missing-json-ld', 'Page has no JSON-LD structured data');
|
|
244
246
|
}
|
|
245
247
|
|
|
248
|
+
// Validate JSON-LD schema against Google's required fields
|
|
249
|
+
if (state.jsonLdContent && state.jsonLdContent.length > 0) {
|
|
250
|
+
for (const raw of state.jsonLdContent) {
|
|
251
|
+
try {
|
|
252
|
+
const data = JSON.parse(raw);
|
|
253
|
+
const issues = validateSchema(data);
|
|
254
|
+
if (issues.length > 0) {
|
|
255
|
+
add('schema-invalid', `JSON-LD ${data['@type'] || 'unknown'} schema has issues: ${issues.join('; ')}`);
|
|
256
|
+
}
|
|
257
|
+
} catch {
|
|
258
|
+
// Malformed JSON-LD — already handled by missing-json-ld if count is 0
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
246
263
|
if (!state.hasFavicon) {
|
|
247
264
|
add('missing-favicon', 'Page is missing a favicon link');
|
|
248
265
|
}
|
package/tools/url-scanner.mjs
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
import { parseHtml, detectPageType } from './lib/html-parser.mjs';
|
|
9
9
|
import { fetchPage } from './lib/url-fetcher.mjs';
|
|
10
10
|
import { crawlSite } from './lib/crawler.mjs';
|
|
11
|
+
import { validateSchema } from './schema-engine.mjs';
|
|
11
12
|
|
|
12
13
|
// ---------------------------------------------------------------------------
|
|
13
14
|
// Rule definitions (same as seo-scanner, minus cross-page-only rules)
|
|
@@ -42,6 +43,7 @@ const RULES = {
|
|
|
42
43
|
'images-missing-dimensions': { severity: 'medium', deduction: 5 },
|
|
43
44
|
'missing-main-landmark': { severity: 'medium', deduction: 5 },
|
|
44
45
|
'missing-json-ld': { severity: 'medium', deduction: 5 },
|
|
46
|
+
'schema-invalid': { severity: 'medium', deduction: 5 },
|
|
45
47
|
'missing-favicon': { severity: 'medium', deduction: 5 },
|
|
46
48
|
'no-analytics': { severity: 'medium', deduction: 5 },
|
|
47
49
|
|
|
@@ -62,6 +64,7 @@ const RULES = {
|
|
|
62
64
|
// HTTP-level rules (URL-scan only)
|
|
63
65
|
'http-error': { severity: 'critical', deduction: 20 },
|
|
64
66
|
'redirect-detected': { severity: 'low', deduction: 2 },
|
|
67
|
+
'redirect-chain': { severity: 'medium', deduction: 5 },
|
|
65
68
|
};
|
|
66
69
|
|
|
67
70
|
// ---------------------------------------------------------------------------
|
|
@@ -203,6 +206,21 @@ function checkPage(state, pageUrl) {
|
|
|
203
206
|
add('missing-json-ld', 'Page has no JSON-LD structured data');
|
|
204
207
|
}
|
|
205
208
|
|
|
209
|
+
// Validate JSON-LD schema against Google's required fields
|
|
210
|
+
if (state.jsonLdContent && state.jsonLdContent.length > 0) {
|
|
211
|
+
for (const raw of state.jsonLdContent) {
|
|
212
|
+
try {
|
|
213
|
+
const data = JSON.parse(raw);
|
|
214
|
+
const issues = validateSchema(data);
|
|
215
|
+
if (issues.length > 0) {
|
|
216
|
+
add('schema-invalid', `JSON-LD ${data['@type'] || 'unknown'} schema has issues: ${issues.join('; ')}`);
|
|
217
|
+
}
|
|
218
|
+
} catch {
|
|
219
|
+
// Malformed JSON-LD
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
206
224
|
if (!state.hasFavicon) {
|
|
207
225
|
add('missing-favicon', 'Page is missing a favicon link');
|
|
208
226
|
}
|
|
@@ -325,6 +343,20 @@ export async function scanUrl(url) {
|
|
|
325
343
|
file: url,
|
|
326
344
|
message: `URL redirected: ${url} → ${page.finalUrl}`,
|
|
327
345
|
});
|
|
346
|
+
|
|
347
|
+
// Redirect chain detection: flag chains with 2+ hops
|
|
348
|
+
if (page.redirectChain && page.redirectChain.length > 1) {
|
|
349
|
+
const chainDef = RULES['redirect-chain'];
|
|
350
|
+
const hops = page.redirectChain.map(r => `${r.from} (${r.statusCode})`).join(' → ');
|
|
351
|
+
findings.push({
|
|
352
|
+
rule: 'redirect-chain',
|
|
353
|
+
severity: chainDef.severity,
|
|
354
|
+
file: url,
|
|
355
|
+
message: `Redirect chain with ${page.redirectChain.length} hops: ${hops} → ${page.finalUrl}`,
|
|
356
|
+
chainLength: page.redirectChain.length,
|
|
357
|
+
chain: page.redirectChain,
|
|
358
|
+
});
|
|
359
|
+
}
|
|
328
360
|
}
|
|
329
361
|
|
|
330
362
|
const seoScore = calculateScore(findings);
|
|
@@ -345,6 +377,7 @@ export async function scanUrl(url) {
|
|
|
345
377
|
statusCode: page.statusCode,
|
|
346
378
|
redirected: page.redirected,
|
|
347
379
|
finalUrl: page.finalUrl,
|
|
380
|
+
redirectChain: page.redirectChain || [],
|
|
348
381
|
},
|
|
349
382
|
};
|
|
350
383
|
}
|
|
@@ -463,6 +496,20 @@ export async function scanSite(startUrl, options = {}) {
|
|
|
463
496
|
});
|
|
464
497
|
}
|
|
465
498
|
|
|
499
|
+
// Redirect chain detection
|
|
500
|
+
if (page.redirectChain && page.redirectChain.length > 1) {
|
|
501
|
+
const chainDef = RULES['redirect-chain'];
|
|
502
|
+
const hops = page.redirectChain.map(r => `${r.from} (${r.statusCode})`).join(' → ');
|
|
503
|
+
pageFindings.push({
|
|
504
|
+
rule: 'redirect-chain',
|
|
505
|
+
severity: chainDef.severity,
|
|
506
|
+
file: page.url,
|
|
507
|
+
message: `Redirect chain with ${page.redirectChain.length} hops: ${hops} → ${page.url}`,
|
|
508
|
+
chainLength: page.redirectChain.length,
|
|
509
|
+
chain: page.redirectChain,
|
|
510
|
+
});
|
|
511
|
+
}
|
|
512
|
+
|
|
466
513
|
perPageFindings.push(...pageFindings);
|
|
467
514
|
}
|
|
468
515
|
|