vercel-seo-audit 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +150 -50
  3. package/dist/audit/crawl.d.ts +3 -0
  4. package/dist/audit/crawl.d.ts.map +1 -0
  5. package/dist/audit/crawl.js +162 -0
  6. package/dist/audit/crawl.js.map +1 -0
  7. package/dist/audit/i18n.d.ts +3 -0
  8. package/dist/audit/i18n.d.ts.map +1 -0
  9. package/dist/audit/i18n.js +136 -0
  10. package/dist/audit/i18n.js.map +1 -0
  11. package/dist/audit/images.d.ts +3 -0
  12. package/dist/audit/images.d.ts.map +1 -0
  13. package/dist/audit/images.js +134 -0
  14. package/dist/audit/images.js.map +1 -0
  15. package/dist/audit/index.d.ts +4 -0
  16. package/dist/audit/index.d.ts.map +1 -1
  17. package/dist/audit/index.js +4 -0
  18. package/dist/audit/index.js.map +1 -1
  19. package/dist/audit/sitemap.d.ts.map +1 -1
  20. package/dist/audit/sitemap.js +19 -0
  21. package/dist/audit/sitemap.js.map +1 -1
  22. package/dist/audit/structuredData.d.ts +3 -0
  23. package/dist/audit/structuredData.d.ts.map +1 -0
  24. package/dist/audit/structuredData.js +131 -0
  25. package/dist/audit/structuredData.js.map +1 -0
  26. package/dist/cli.js +92 -21
  27. package/dist/cli.js.map +1 -1
  28. package/dist/constants.d.ts +2 -0
  29. package/dist/constants.d.ts.map +1 -1
  30. package/dist/constants.js +2 -0
  31. package/dist/constants.js.map +1 -1
  32. package/dist/runner.d.ts +1 -0
  33. package/dist/runner.d.ts.map +1 -1
  34. package/dist/runner.js +10 -1
  35. package/dist/runner.js.map +1 -1
  36. package/dist/types.d.ts +18 -2
  37. package/dist/types.d.ts.map +1 -1
  38. package/dist/utils/config.d.ts +4 -0
  39. package/dist/utils/config.d.ts.map +1 -0
  40. package/dist/utils/config.js +90 -0
  41. package/dist/utils/config.js.map +1 -0
  42. package/dist/utils/html-parser.d.ts +15 -0
  43. package/dist/utils/html-parser.d.ts.map +1 -1
  44. package/dist/utils/html-parser.js +33 -0
  45. package/dist/utils/html-parser.js.map +1 -1
  46. package/dist/utils/output.d.ts +3 -1
  47. package/dist/utils/output.d.ts.map +1 -1
  48. package/dist/utils/output.js +29 -0
  49. package/dist/utils/output.js.map +1 -1
  50. package/package.json +8 -3
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Yusufhan Sacak
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md CHANGED
@@ -1,40 +1,28 @@
1
1
  # vercel-seo-audit
2
2
 
3
+ [![npm version](https://img.shields.io/npm/v/vercel-seo-audit.svg)](https://www.npmjs.com/package/vercel-seo-audit)
4
+ [![npm downloads](https://img.shields.io/npm/dm/vercel-seo-audit.svg)](https://www.npmjs.com/package/vercel-seo-audit)
5
+ [![CI](https://github.com/JosephDoUrden/vercel-seo-audit/actions/workflows/ci.yml/badge.svg)](https://github.com/JosephDoUrden/vercel-seo-audit/actions/workflows/ci.yml)
6
+ [![licence](https://img.shields.io/npm/l/vercel-seo-audit.svg)](./LICENSE)
7
+
3
8
  > **If you're using Vercel and Google hates your site, this is for you.**
4
9
 
5
- A fast, developer-friendly CLI that explains **why Google isnt indexing your Next.js site** — beyond the vague stuff in Search Console.
10
+ A fast, developer-friendly CLI that explains **why Google isn't indexing your Next.js site** — beyond the vague stuff in Search Console.
6
11
  It detects the misconfigs that silently kill crawling and indexing: **redirect traps (308), missing robots/sitemap, noindex headers, canonical mismatches, and Vercel/Next.js quirks**.
7
12
 
8
13
  ---
9
14
 
10
- ## Why this exists
11
-
12
- Google Search Console often reports symptoms like:
13
- - *“Page with redirect”*
14
- - *“Discovered – currently not indexed”*
15
- - *“Alternate page with proper canonical”*
16
-
17
- …but doesn’t tell you **what’s actually wrong** or **what to change**.
18
-
19
- `vercel-seo-audit` turns those symptoms into **actionable fixes**.
20
-
21
- For a deeper dive into why Next.js sites often struggle with indexing, see:
22
- - [Why Google Refuses to Index Your Next.js Site](https://dev.to/yusufhansck/why-google-refuses-to-index-your-nextjs-site-173a) (dev.to)
23
- - [Why Google Refuses to Index Your Next.js Site](https://yusufhansacak.medium.com/why-google-refuses-to-index-your-next-js-site-04a924948859) (Medium)
24
-
25
- ---
26
-
27
15
  ## Quick start
28
16
 
29
17
  ```bash
30
- npx vercel-seo-audit https://yusufhan.dev
18
+ npx vercel-seo-audit https://your-site.com
31
19
  ```
32
20
 
33
21
  Install globally (optional):
34
22
 
35
23
  ```bash
36
24
  npm i -g vercel-seo-audit
37
- vercel-seo-audit https://yusufhan.dev
25
+ vercel-seo-audit https://your-site.com
38
26
  ```
39
27
 
40
28
  ---
@@ -42,13 +30,19 @@ vercel-seo-audit https://yusufhan.dev
42
30
  ## Example output
43
31
 
44
32
  ```txt
45
- SEO Audit Report for https://yusufhan.dev/
33
+ SEO Audit Report for https://example.com/
46
34
  Completed in 1118ms at 2026-01-31T12:30:54.448Z
47
35
 
48
36
  Summary:
49
- 2 warnings
50
- 1 info
51
- 1 passed
37
+ 1 error
38
+ 3 warnings
39
+ 2 info
40
+ ✔ 4 passed
41
+
42
+ REDIRECTS
43
+ ────────────────────────────────────────
44
+ ✖ [ERROR] Redirect chain detected (3 hops)
45
+ → Reduce to a single redirect: http://example.com → https://example.com/
52
46
 
53
47
  ROBOTS
54
48
  ────────────────────────────────────────
@@ -59,40 +53,71 @@ SEO Audit Report for https://yusufhan.dev/
59
53
  ────────────────────────────────────────
60
54
  ⚠ [WARNING] sitemap.xml not found
61
55
  → Add app/sitemap.ts in Next.js App Router
56
+
57
+ METADATA
58
+ ────────────────────────────────────────
59
+ ⚠ [WARNING] Canonical URL mismatch
60
+ → Canonical points to https://www.example.com/ but page is https://example.com/
61
+ ✔ [PASS] Title tag present
62
+ ✔ [PASS] Meta description present
62
63
  ```
63
64
 
64
65
  ---
65
66
 
67
+ ## Common pitfalls we catch
68
+
69
+ These are the issues that silently tank your rankings on Next.js/Vercel:
70
+
71
+ - **Next.js 308 trailing-slash traps** — `trailingSlash: true` creates 308 redirect loops that waste crawl budget and confuse Google.
72
+ - **www / non-www domain redirects** — serving the same content on both domains without a redirect splits link equity and causes duplicate-content issues.
73
+ - **Middleware rewrites & redirects affecting crawlers** — Next.js middleware can rewrite or redirect in ways that only affect bots, breaking indexing without any visible symptoms in a browser.
74
+ - **Canonical mismatch** — the `<link rel="canonical">` points to a different URL than the page actually lives at, telling Google to ignore the page.
75
+ - **Sitemap / robots issues** — missing `sitemap.xml`, empty sitemaps, robots.txt blocking Googlebot, or `Sitemap:` directive pointing to the wrong URL.
76
+
77
+ ---
78
+
66
79
  ## Usage
67
80
 
68
81
  ```bash
69
82
  # Basic audit
70
- vercel-seo-audit https://yusufhan.dev
83
+ vercel-seo-audit https://your-site.com
71
84
 
72
85
  # JSON output (pipe to jq, save to file, feed to CI)
73
- vercel-seo-audit https://yusufhan.dev --json
86
+ vercel-seo-audit https://your-site.com --json
74
87
 
75
88
  # Verbose mode — raw HTTP details for each finding
76
- vercel-seo-audit https://yusufhan.dev --verbose
89
+ vercel-seo-audit https://your-site.com --verbose
77
90
 
78
91
  # Custom timeout (default: 10s)
79
- vercel-seo-audit https://yusufhan.dev --timeout 15000
92
+ vercel-seo-audit https://your-site.com --timeout 15000
80
93
 
81
94
  # Check specific pages for redirect issues
82
- vercel-seo-audit https://yusufhan.dev --pages /docs,/team,/careers
95
+ vercel-seo-audit https://your-site.com --pages /docs,/team,/careers
83
96
 
84
97
  # Audit as Googlebot
85
- vercel-seo-audit https://yusufhan.dev --user-agent googlebot
98
+ vercel-seo-audit https://your-site.com --user-agent googlebot
86
99
 
87
100
  # Audit as Bingbot
88
- vercel-seo-audit https://yusufhan.dev --user-agent bingbot
101
+ vercel-seo-audit https://your-site.com --user-agent bingbot
89
102
 
90
103
  # Custom crawler user-agent
91
- vercel-seo-audit https://yusufhan.dev --user-agent "Googlebot-Image/1.0"
104
+ vercel-seo-audit https://your-site.com --user-agent "Googlebot-Image/1.0"
92
105
 
93
106
  # Write report to file (json or md)
94
- vercel-seo-audit https://yusufhan.dev --report json
95
- vercel-seo-audit https://yusufhan.dev --report md
107
+ vercel-seo-audit https://your-site.com --report json
108
+ vercel-seo-audit https://your-site.com --report md
109
+
110
+ # Compare against a previous report to detect regressions
111
+ vercel-seo-audit https://your-site.com --diff previous-report.json
112
+
113
+ # Diff with JSON output
114
+ vercel-seo-audit https://your-site.com --diff previous-report.json --json
115
+
116
+ # Crawl all sitemap URLs and audit each page (default: 50 pages)
117
+ vercel-seo-audit https://your-site.com --crawl
118
+
119
+ # Crawl with a custom limit
120
+ vercel-seo-audit https://your-site.com --crawl 100
96
121
  ```
97
122
 
98
123
  ---
@@ -105,7 +130,7 @@ vercel-seo-audit https://yusufhan.dev --report md
105
130
  * HTTP → HTTPS redirect
106
131
  * Trailing slash consistency (catches Next.js **308 traps**)
107
132
  * Meta refresh redirects (`<meta http-equiv="refresh">`)
108
- * Samples common routes: `/about`, `/contact`, `/blog`, `/pricing` (customizable with `--pages`)
133
+ * Samples common routes: `/about`, `/contact`, `/blog`, `/pricing` (customisable with `--pages`)
109
134
 
110
135
  ### robots.txt
111
136
 
@@ -117,7 +142,7 @@ vercel-seo-audit https://yusufhan.dev --report md
117
142
  ### Sitemap
118
143
 
119
144
  * Missing or malformed `sitemap.xml`
120
- * Sitemap redirects (some crawlers wont follow)
145
+ * Sitemap redirects (some crawlers won't follow)
121
146
  * Empty sitemap / broken URLs (samples up to 10)
122
147
  * Sitemap index support
123
148
  * Cross-check with robots.txt `Sitemap:` directive
@@ -135,17 +160,55 @@ vercel-seo-audit https://yusufhan.dev --report md
135
160
  * `/favicon.ico` exists but no `<link>` tag
136
161
  * Conflicting favicon declarations (multiple icons)
137
162
 
163
+ ### Structured Data
164
+
165
+ * Missing JSON-LD blocks entirely
166
+ * Invalid JSON syntax in `<script type="application/ld+json">`
167
+ * Missing `@context` or `@type` properties
168
+ * Missing required fields for known types (Article, FAQPage, Product, Organisation, etc.)
169
+
170
+ ### Image SEO
171
+
172
+ * Missing `alt` attributes (accessibility + SEO)
173
+ * Empty `alt` on potentially non-decorative images
174
+ * Not using `next/image` component on Next.js sites
175
+ * Missing `loading="lazy"` on below-fold images
176
+ * Oversized images (> 200 KB via HEAD request)
177
+ * Missing `width`/`height` attributes (causes layout shift)
178
+
179
+ ### Crawl Mode (`--crawl`)
180
+
181
+ When `--crawl` is enabled, every URL from the sitemap is fetched and audited for:
182
+
183
+ * Non-2xx status codes (broken pages)
184
+ * `noindex` directives (meta tag or `X-Robots-Tag` header)
185
+ * Missing `<title>` tag
186
+ * Missing meta description
187
+ * Missing or mismatched canonical URL
188
+ * Missing JSON-LD structured data
189
+
190
+ Progress is printed to stderr as each page is crawled.
191
+
192
+ ### Internationalisation (hreflang)
193
+
194
+ * Missing hreflang tags entirely (informational for single-language sites)
195
+ * Invalid language/region codes
196
+ * Missing self-referencing hreflang entry
197
+ * Missing `x-default` fallback
198
+ * Duplicate hreflang values
199
+ * Missing reciprocal links (page A→B but B doesn't→A)
200
+
138
201
  ### Next.js / Vercel
139
202
 
140
203
  * Detect Vercel deployment
141
- * Detect Next.js trailing slash redirect behavior
204
+ * Detect Next.js trailing slash redirect behaviour
142
205
  * Middleware rewrite/redirect headers (best-effort)
143
206
 
144
207
  ---
145
208
 
146
209
  ## Severity & exit codes
147
210
 
148
- Findings are categorized by severity:
211
+ Findings are categorised by severity:
149
212
 
150
213
  | Icon | Severity | Meaning |
151
214
  | ---- | ----------- | ---------------------------------- |
@@ -158,7 +221,7 @@ Exit codes:
158
221
 
159
222
  | Code | Meaning |
160
223
  | ---- | ------------------------------------------ |
161
- | `0` | No errors found (warnings/info dont fail) |
224
+ | `0` | No errors found (warnings/info don't fail) |
162
225
  | `1` | One or more errors found |
163
226
  | `2` | Crash / invalid input |
164
227
 
@@ -180,7 +243,7 @@ jobs:
180
243
  steps:
181
244
  - uses: JosephDoUrden/vercel-seo-audit@v1
182
245
  with:
183
- url: https://yusufhan.dev
246
+ url: https://your-site.com
184
247
  strict: true
185
248
  report: json
186
249
  ```
@@ -194,6 +257,7 @@ All inputs:
194
257
  | `user-agent` | no | — | `googlebot`, `bingbot`, or custom string |
195
258
  | `pages` | no | — | Comma-separated page paths |
196
259
  | `report` | no | — | Write report file: `json` or `md` |
260
+ | `crawl` | no | — | Crawl sitemap URLs (number = page limit, default 50) |
197
261
  | `timeout` | no | `10000` | Request timeout in ms |
198
262
  | `verbose` | no | `false` | Show detailed output |
199
263
 
@@ -211,25 +275,45 @@ jobs:
211
275
  audit:
212
276
  runs-on: ubuntu-latest
213
277
  steps:
214
- - uses: actions/checkout@v4
215
278
  - uses: actions/setup-node@v4
216
279
  with:
217
280
  node-version: 20
218
- - run: npx vercel-seo-audit https://yusufhan.dev --json
281
+ - run: npx vercel-seo-audit https://your-site.com --strict --report json
282
+ ```
283
+
284
+ ### Strict mode & reports in CI
285
+
286
+ Use `--strict` (`-S`) to fail CI on warnings as well as errors — useful for enforcing SEO standards on every deploy:
287
+
288
+ ```bash
289
+ # Fail on any warning or error
290
+ npx vercel-seo-audit https://your-site.com --strict
291
+
292
+ # Generate a JSON report and fail strictly
293
+ npx vercel-seo-audit https://your-site.com --strict --report json
294
+
295
+ # Generate a Markdown report for PR comments
296
+ npx vercel-seo-audit https://your-site.com --report md
219
297
  ```
220
298
 
221
- >[!TIP]
222
- >If you want warnings to fail CI too, add a `--strict` or `-S` flag.
299
+ > [!TIP]
300
+ > If you want warnings to fail CI too, add a `--strict` or `-S` flag.
223
301
 
224
302
  ---
225
303
 
226
304
  ## Roadmap
227
305
 
228
306
  * [x] ~~`--strict` (warnings fail with exit code 1)~~
229
- * [x] ~~`--pages` to customize sampled paths (`/about,/pricing`)~~
307
+ * [x] ~~`--pages` to customise sampled paths (`/about,/pricing`)~~
230
308
  * [x] ~~`--user-agent` presets (`googlebot`, `bingbot`)~~
231
309
  * [x] ~~`--report` to write `report.json` / `report.md`~~
232
310
  * [x] ~~GitHub Action marketplace wrapper~~
311
+ * [x] ~~`--diff` to compare two audit runs and detect regressions~~
312
+ * [x] ~~Structured data / JSON-LD validation~~
313
+ * [x] ~~`--crawl` mode to audit all pages from sitemap~~
314
+ * [x] ~~i18n / `hreflang` validation~~
315
+ * [x] ~~Image SEO checks (missing `alt`, `next/image`, lazy loading)~~
316
+ * [ ] Config file (`.seoauditrc.json`) for project-level defaults
233
317
 
234
318
  ---
235
319
 
@@ -239,7 +323,7 @@ jobs:
239
323
  No — it explains & verifies the things Search Console often reports vaguely.
240
324
 
241
325
  **Will it scan my entire site?**
242
- No. It checks critical endpoints + samples common pages to stay fast.
326
+ No. It checks critical endpoints + samples common pages to stay fast. Use `--crawl` for a full sitemap audit.
243
327
 
244
328
  **Does it work on non-Next.js sites?**
245
329
  Yes for most checks (redirects/robots/sitemap/metadata). Some checks are Next.js/Vercel-specific.
@@ -248,14 +332,30 @@ Yes for most checks (redirects/robots/sitemap/metadata). Some checks are Next.js
248
332
 
249
333
  ## Contributing
250
334
 
251
- PRs welcome. If you’re fixing a false positive, include:
335
+ Contributions are welcome! Please read the [Contributing Guide](./CONTRIBUTING.md) before opening a PR.
336
+
337
+ If you're looking for a place to start, check out issues labelled [`good first issue`](https://github.com/JosephDoUrden/vercel-seo-audit/labels/good%20first%20issue).
338
+
339
+ If you're reporting a false positive, include:
252
340
 
253
341
  * the URL (or a reproducible HTML sample)
254
- * expected behavior
342
+ * expected behaviour
255
343
  * actual output
256
344
 
257
345
  ---
258
346
 
259
- ## License
347
+ ## Releasing
348
+
349
+ Releases are automated with [release-please](https://github.com/googleapis/release-please).
350
+ Merge conventional commits to `main` and release-please opens a release PR that
351
+ bumps the version and updates `CHANGELOG.md`. Merging that PR creates a GitHub
352
+ Release, which triggers npm publish automatically.
353
+
354
+ See [docs/RELEASING.md](./docs/RELEASING.md) for the full process, commit
355
+ message format, and required secrets.
356
+
357
+ ---
358
+
359
+ ## Licence
260
360
 
261
- MIT
361
+ [MIT](./LICENSE)
@@ -0,0 +1,3 @@
1
+ import type { AuditContext, AuditFinding } from '../types.js';
2
+ export declare function auditCrawl(ctx: AuditContext): Promise<AuditFinding[]>;
3
+ //# sourceMappingURL=crawl.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"crawl.d.ts","sourceRoot":"","sources":["../../src/audit/crawl.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAW9D,wBAAsB,UAAU,CAAC,GAAG,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CA8B3E"}
@@ -0,0 +1,162 @@
1
+ import { DEFAULT_CRAWL_LIMIT, CRAWL_CONCURRENCY } from '../constants.js';
2
+ import { fetchPage } from '../utils/http.js';
3
+ import { getNoindexDirective, getTitle, getMetaTag, getCanonicalUrl, } from '../utils/html-parser.js';
4
+ import * as cheerio from 'cheerio';
5
+ export async function auditCrawl(ctx) {
6
+ const findings = [];
7
+ if (!ctx.sitemapUrls || ctx.sitemapUrls.length === 0) {
8
+ return findings;
9
+ }
10
+ const limit = ctx.crawlLimit ?? DEFAULT_CRAWL_LIMIT;
11
+ const urls = ctx.sitemapUrls.slice(0, limit);
12
+ const total = urls.length;
13
+ // Process in batches of CRAWL_CONCURRENCY
14
+ for (let i = 0; i < urls.length; i += CRAWL_CONCURRENCY) {
15
+ const batch = urls.slice(i, i + CRAWL_CONCURRENCY);
16
+ const results = await Promise.allSettled(batch.map(async (pageUrl, batchIdx) => {
17
+ const idx = i + batchIdx + 1;
18
+ process.stderr.write(`Crawling [${idx}/${total}] ${pageUrl}\n`);
19
+ return auditPage(pageUrl, ctx);
20
+ }));
21
+ for (const result of results) {
22
+ if (result.status === 'fulfilled') {
23
+ findings.push(...result.value);
24
+ }
25
+ }
26
+ }
27
+ return findings;
28
+ }
29
+ async function auditPage(pageUrl, ctx) {
30
+ const findings = [];
31
+ let body;
32
+ let status;
33
+ let headers;
34
+ try {
35
+ const res = await fetchPage(pageUrl, ctx.fetchOptions);
36
+ body = res.body;
37
+ status = res.status;
38
+ headers = res.headers;
39
+ }
40
+ catch {
41
+ findings.push({
42
+ code: 'CRAWL_PAGE_ERROR',
43
+ severity: 'error',
44
+ category: 'crawl',
45
+ message: `Failed to fetch page: ${pageUrl}`,
46
+ explanation: 'The page could not be reached, which means search engines cannot crawl it either.',
47
+ suggestion: 'Ensure the page is accessible and not timing out.',
48
+ url: pageUrl,
49
+ });
50
+ return findings;
51
+ }
52
+ // Non-2xx status
53
+ if (status < 200 || status >= 300) {
54
+ findings.push({
55
+ code: 'CRAWL_PAGE_ERROR',
56
+ severity: 'error',
57
+ category: 'crawl',
58
+ message: `Page returned HTTP ${status}: ${pageUrl}`,
59
+ explanation: 'Pages in the sitemap should return a 200 status. Non-2xx pages waste crawl budget.',
60
+ suggestion: 'Fix the page or remove it from the sitemap.',
61
+ details: { status },
62
+ url: pageUrl,
63
+ });
64
+ return findings;
65
+ }
66
+ // noindex via meta tag
67
+ if (getNoindexDirective(body)) {
68
+ findings.push({
69
+ code: 'CRAWL_PAGE_NOINDEX',
70
+ severity: 'warning',
71
+ category: 'crawl',
72
+ message: `Page has noindex directive: ${pageUrl}`,
73
+ explanation: 'A page in the sitemap should not have a noindex directive — it sends conflicting signals to search engines.',
74
+ suggestion: 'Remove the noindex tag or remove the page from the sitemap.',
75
+ url: pageUrl,
76
+ });
77
+ }
78
+ // noindex via X-Robots-Tag header
79
+ const xRobotsTag = headers.get('x-robots-tag') ?? '';
80
+ if (xRobotsTag.toLowerCase().includes('noindex')) {
81
+ findings.push({
82
+ code: 'CRAWL_PAGE_NOINDEX',
83
+ severity: 'warning',
84
+ category: 'crawl',
85
+ message: `Page has X-Robots-Tag noindex header: ${pageUrl}`,
86
+ explanation: 'The X-Robots-Tag header tells search engines not to index this page, conflicting with its presence in the sitemap.',
87
+ suggestion: 'Remove the X-Robots-Tag noindex header or remove the page from the sitemap.',
88
+ url: pageUrl,
89
+ });
90
+ }
91
+ // Missing title
92
+ if (!getTitle(body)) {
93
+ findings.push({
94
+ code: 'CRAWL_PAGE_TITLE_MISSING',
95
+ severity: 'warning',
96
+ category: 'crawl',
97
+ message: `Page is missing <title>: ${pageUrl}`,
98
+ explanation: 'The title tag is a critical ranking signal and is displayed in search results.',
99
+ suggestion: 'Add a unique, descriptive <title> tag to this page.',
100
+ url: pageUrl,
101
+ });
102
+ }
103
+ // Missing meta description
104
+ if (!getMetaTag(body, 'description')) {
105
+ findings.push({
106
+ code: 'CRAWL_PAGE_DESCRIPTION_MISSING',
107
+ severity: 'info',
108
+ category: 'crawl',
109
+ message: `Page is missing meta description: ${pageUrl}`,
110
+ explanation: 'Meta descriptions are shown in search result snippets and can improve click-through rates.',
111
+ suggestion: 'Add a <meta name="description"> tag with a concise summary of the page.',
112
+ url: pageUrl,
113
+ });
114
+ }
115
+ // Missing canonical
116
+ const canonical = getCanonicalUrl(body);
117
+ if (!canonical) {
118
+ findings.push({
119
+ code: 'CRAWL_PAGE_CANONICAL_MISSING',
120
+ severity: 'warning',
121
+ category: 'crawl',
122
+ message: `Page is missing canonical tag: ${pageUrl}`,
123
+ explanation: 'Without a canonical tag, search engines may treat URL variations as duplicate content.',
124
+ suggestion: 'Add a <link rel="canonical"> tag pointing to the preferred URL.',
125
+ url: pageUrl,
126
+ });
127
+ }
128
+ else {
129
+ // Canonical mismatch
130
+ const resolvedCanonical = new URL(canonical, pageUrl).href;
131
+ const normalizedPage = pageUrl.replace(/\/$/, '');
132
+ const normalizedCanonical = resolvedCanonical.replace(/\/$/, '');
133
+ if (normalizedCanonical !== normalizedPage) {
134
+ findings.push({
135
+ code: 'CRAWL_PAGE_CANONICAL_MISMATCH',
136
+ severity: 'warning',
137
+ category: 'crawl',
138
+ message: `Canonical URL does not match page URL: ${pageUrl}`,
139
+ explanation: 'The canonical tag points to a different URL, which tells search engines this page is a duplicate.',
140
+ suggestion: 'Update the canonical tag to match the page URL, or remove this page from the sitemap.',
141
+ details: { canonical: resolvedCanonical, pageUrl },
142
+ url: pageUrl,
143
+ });
144
+ }
145
+ }
146
+ // Missing JSON-LD
147
+ const $ = cheerio.load(body);
148
+ const jsonldScripts = $('script[type="application/ld+json"]');
149
+ if (jsonldScripts.length === 0) {
150
+ findings.push({
151
+ code: 'CRAWL_PAGE_JSONLD_MISSING',
152
+ severity: 'info',
153
+ category: 'crawl',
154
+ message: `Page has no structured data: ${pageUrl}`,
155
+ explanation: 'Structured data helps search engines understand page content and can enable rich results.',
156
+ suggestion: 'Add JSON-LD structured data relevant to the page content.',
157
+ url: pageUrl,
158
+ });
159
+ }
160
+ return findings;
161
+ }
162
+ //# sourceMappingURL=crawl.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"crawl.js","sourceRoot":"","sources":["../../src/audit/crawl.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,mBAAmB,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AACzE,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EACL,mBAAmB,EACnB,QAAQ,EACR,UAAU,EACV,eAAe,GAChB,MAAM,yBAAyB,CAAC;AACjC,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,GAAiB;IAChD,MAAM,QAAQ,GAAmB,EAAE,CAAC;IAEpC,IAAI,CAAC,GAAG,CAAC,WAAW,IAAI,GAAG,CAAC,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACrD,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,KAAK,GAAG,GAAG,CAAC,UAAU,IAAI,mBAAmB,CAAC;IACpD,MAAM,IAAI,GAAG,GAAG,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IAC7C,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC;IAE1B,0CAA0C;IAC1C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,IAAI,iBAAiB,EAAE,CAAC;QACxD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,iBAAiB,CAAC,CAAC;QACnD,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CACtC,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,EAAE;YACpC,MAAM,GAAG,GAAG,CAAC,GAAG,QAAQ,GAAG,CAAC,CAAC;YAC7B,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,aAAa,GAAG,IAAI,KAAK,KAAK,OAAO,IAAI,CAAC,CAAC;YAChE,OAAO,SAAS,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;QACjC,CAAC,CAAC,CACH,CAAC;QAEF,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,IAAI,MAAM,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;gBAClC,QAAQ,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,KAAK,UAAU,SAAS,CACtB,OAAe,EACf,GAAiB;IAEjB,MAAM,QAAQ,GAAmB,EAAE,CAAC;IAEpC,IAAI,IAAY,CAAC;IACjB,IAAI,MAAc,CAAC;IACnB,IAAI,OAAgB,CAAC;IACrB,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,SAAS,CAAC,OAAO,EAAE,GAAG,CAAC,YAAY,CAAC,CAAC;QACvD,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;QAChB,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC;QACpB,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC;IACxB,CAAC;IAAC,MAAM,CAAC;QACP,QAAQ,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,kBAAkB;YACxB,QAAQ,EAAE,OAAO;YACjB,QAAQ,EAAE,OAAO;YACjB,OAAO,EAAE,yBAAyB,OAAO,EAAE;YAC3C,WAAW,EAAE,mFAAmF;YAChG,UAAU,EAAE,mDAAmD;YAC/D,GAAG,EAAE,OAAO;SACb,CAAC,CAAC;QACH,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,iBAAiB;IACjB,IAAI,MAAM,GAAG,GAAG,IAAI,MAAM,IAAI,GAAG,EAAE,CAAC;QAClC,QAAQ,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,kBAAkB;YACxB,QAAQ,EAAE,OAAO;YACjB,QAAQ,EAAE,OAAO;YACjB,OAAO,EAAE,sBAAsB,MAAM,KAAK,OAAO,EAAE;YACnD,WAAW,EAAE,oFAAoF;YACjG,UAAU,EAAE,6CAA6C;YACzD,OAAO,EAAE,EAAE,MAAM,EAAE;YACnB,GAAG,EAAE,OAAO;SACb,CAAC,CAAC;QACH,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,uBAAuB;IACvB,IAAI,mBAAmB,CAAC,IAAI,CAAC,EAAE,CAAC;QAC9B,QAAQ,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,oBAAoB;YAC1B,QAAQ,EAAE,SAAS;YACnB,QAAQ,EAAE,OAAO;YACjB,OAAO,EAAE,+BAA+B,OAAO,EAAE;YACjD,WAAW,EAAE,6GAA6G;YAC1H,UAAU,EAAE,6DAA6D;YACzE,GAAG,EAAE,OAAO;SACb,CAAC,CAAC;IACL,CAAC;IAED,kCAAkC;IAClC,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;IACrD,IAAI,UAAU,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;QACjD,QAAQ,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,oBAAoB;YAC1B,QAAQ,EAAE,SAAS;YACnB,QAAQ,EAAE,OAAO;YACjB,OAAO,EAAE,yCAAyC,OAAO,EAAE;YAC3D,WAAW,EAAE,oHAAoH;YACjI,UAAU,EAAE,6EAA6E;YACzF,GAAG,EAAE,OAAO;SACb,CAAC,CAAC;IACL,CAAC;IAED,gBAAgB;IAChB,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QACpB,QAAQ,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,0BAA0B;YAChC,QAAQ,EAAE,SAAS;YACnB,QAAQ,EAAE,OAAO;YACjB,OAAO,EAAE,4BAA4B,OAAO,EAAE;YAC9C,WAAW,EAAE,gFAAgF;YAC7F,UAAU,EAAE,qDAAqD;YACjE,GAAG,EAAE,OAAO;SACb,CAAC,CAAC;IACL,CAAC;IAED,2BAA2B;IAC3B,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,aAAa,CAAC,EAAE,CAAC;QACrC,QAAQ,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,gCAAgC;YACtC,QAAQ,EAAE,MAAM;YAChB,QAAQ,EAAE,OAAO;YACjB,OAAO,EAAE,qCAAqC,OAAO,EAAE;YACvD,WAAW,EAAE,4FAA4F;YACzG,UAAU,EAAE,yEAAyE;YACrF,GAAG,EAAE,OAAO;SACb,CAAC,CAAC;IACL,CAAC;IAED,oBAAoB;IACpB,MAAM,SAAS,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACxC,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,QAAQ,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,8BAA8B;YACpC,QAAQ,EAAE,SAAS;YACnB,QAAQ,EAAE,OAAO;YACjB,OAAO,EAAE,kCAAkC,OAAO,EAAE;YACpD,WAAW,EAAE,wFAAwF;YACrG,UAAU,EAAE,iEAAiE;YAC7E,GAAG,EAAE,OAAO;SACb,CAAC,CAAC;IACL,CAAC;SAAM,CAAC;QACN,qBAAqB;QACrB,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;QAC3D,MAAM,cAAc,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAClD,MAAM,mBAAmB,GAAG,iBAAiB,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QACjE,IAAI,mBAAmB,KAAK,cAAc,EAAE,CAAC;YAC3C,QAAQ,CAAC,IAAI,CAAC;gBACZ,IAAI,EAAE,+BAA+B;gBACrC,QAAQ,EAAE,SAAS;gBACnB,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,0CAA0C,OAAO,EAAE;gBAC5D,WAAW,EAAE,mGAAmG;gBAChH,UAAU,EAAE,uFAAuF;gBACnG,OAAO,EAAE,EAAE,SAAS,EAAE,iBAAiB,EAAE,OAAO,EAAE;gBAClD,GAAG,EAAE,OAAO;aACb,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,kBAAkB;IAClB,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,aAAa,GAAG,CAAC,CAAC,oCAAoC,CAAC,CAAC;IAC9D,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,QAAQ,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,2BAA2B;YACjC,QAAQ,EAAE,MAAM;YAChB,QAAQ,EAAE,OAAO;YACjB,OAAO,EAAE,gCAAgC,OAAO,EAAE;YAClD,WAAW,EAAE,2FAA2F;YACxG,UAAU,EAAE,2DAA2D;YACvE,GAAG,EAAE,OAAO;SACb,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { AuditContext, AuditFinding } from '../types.js';
2
+ export declare function auditI18n(ctx: AuditContext): Promise<AuditFinding[]>;
3
+ //# sourceMappingURL=i18n.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"i18n.d.ts","sourceRoot":"","sources":["../../src/audit/i18n.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAoB9D,wBAAsB,SAAS,CAAC,GAAG,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAiJ1E"}