@jdevalk/astro-seo-graph 0.4.2 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,9 +1,12 @@
1
1
  # @jdevalk/astro-seo-graph
2
2
 
3
+ [![npm version](https://img.shields.io/npm/v/@jdevalk/astro-seo-graph)](https://www.npmjs.com/package/@jdevalk/astro-seo-graph)
4
+ [![license](https://img.shields.io/npm/l/@jdevalk/astro-seo-graph)](https://github.com/jdevalk/seo-graph/blob/main/LICENSE)
5
+
3
6
  Astro integration for [`@jdevalk/seo-graph-core`](../seo-graph-core). Ships a
4
7
  `<Seo>` component, route factories for agent-ready schema endpoints, a
5
- content-collection aggregator, breadcrumb helpers, and Zod helpers for content
6
- schemas.
8
+ content-collection aggregator, breadcrumb helpers, a fuzzy 404 redirect
9
+ component, and Zod helpers for content schemas.
7
10
 
8
11
  For detailed usage — including all builder signatures, site-type recipes, and
9
12
  schema.org best practices — see [AGENTS.md](https://github.com/jdevalk/seo-graph/blob/main/AGENTS.md).
@@ -20,6 +23,7 @@ schema.org best practices — see [AGENTS.md](https://github.com/jdevalk/seo-gra
20
23
  | **`buildAstroSeoProps`** | Pure-TS logic that powers `<Seo>` — exported for users who want to feed a different head component. |
21
24
  | **`buildAlternateLinks`** | Pure helper that turns a `{ hreflang, href }` entry list into normalized `<link rel="alternate">` tags plus an `x-default`. Used internally by `<Seo>`'s `alternates` prop, and exported for non-Astro callers (e.g. CMS plugins feeding their own metadata pipelines). |
22
25
  | **`breadcrumbsFromUrl`** | Derives a breadcrumb trail from an Astro URL. Splits path segments, supports custom display names and segment skipping. Returns `BreadcrumbItem[]` ready to pass to `buildBreadcrumbList`. |
26
+ | **`<FuzzyRedirect>`** | Drop-in 404 component. Fetches your sitemap, fuzzy-matches the current URL against known paths, and suggests or auto-redirects to the closest match. |
23
27
 
24
28
  ## Installation
25
29
 
@@ -107,6 +111,57 @@ Segments without a `names` entry are title-cased from their slug
107
111
  (e.g. `https://example.com/docs`) are supported — pass the base path as part
108
112
  of `siteUrl`.
109
113
 
114
+ ## Fuzzy 404 redirect
115
+
116
+ When a visitor hits a 404, `<FuzzyRedirect>` fetches your sitemap, compares
117
+ the mistyped URL against all known paths, and either suggests the closest
118
+ match or auto-redirects. Drop it into your `404.astro` page:
119
+
120
+ ```astro
121
+ ---
122
+ // src/pages/404.astro
123
+ import FuzzyRedirect from '@jdevalk/astro-seo-graph/FuzzyRedirect.astro';
124
+ ---
125
+
126
+ <html lang="en">
127
+ <head>
128
+ <meta charset="utf-8" />
129
+ <title>Page not found</title>
130
+ </head>
131
+ <body>
132
+ <h1>Page not found</h1>
133
+ <p>Sorry, the page you're looking for doesn't exist.</p>
134
+ <p style="font-size: 1.25em; font-weight: bold;">
135
+ <FuzzyRedirect />
136
+ </p>
137
+ <p><a href="/">Go to the homepage</a></p>
138
+ </body>
139
+ </html>
140
+ ```
141
+
142
+ When a close match is found, the component renders a message like
143
+ **Did you mean [/seo-graph/](/seo-graph/)?** inside the element where
144
+ you place it. Style the surrounding element to make it prominent.
145
+
146
+ ### How it works
147
+
148
+ 1. Fetches `/sitemap-index.xml` (follows sitemap index → child sitemaps)
149
+ 2. Extracts all paths and computes
150
+ [Levenshtein similarity](https://en.wikipedia.org/wiki/Levenshtein_distance)
151
+ against the current URL
152
+ 3. **0.6–0.85 similarity**: shows "Did you mean /correct-path/?"
153
+ 4. **Above 0.85**: auto-redirects with `window.location.replace`
154
+ 5. **Below 0.6 or exact match**: does nothing
155
+
156
+ ### Props
157
+
158
+ | Prop | Default | Description |
159
+ | ----------------------- | ---------------------- | -------------------------------------------------- |
160
+ | `threshold` | `0.6` | Minimum similarity for a suggestion to appear |
161
+ | `autoRedirectThreshold` | `0.85` | Similarity above which the user is auto-redirected |
162
+ | `sitemapUrl` | `'/sitemap-index.xml'` | URL of the sitemap index or sitemap file |
163
+ | `suggestionText` | `'Did you mean'` | Text shown before the suggested link |
164
+
110
165
  ## hreflang alternates
111
166
 
112
167
  For multilingual sites, pass an `alternates` prop with one entry per locale.
@@ -0,0 +1,151 @@
1
+ ---
2
+ interface Props {
3
+ /**
4
+ * Minimum similarity score (0–1) for a suggestion to appear.
5
+ * Defaults to 0.6.
6
+ */
7
+ threshold?: number;
8
+ /**
9
+ * Similarity score (0–1) above which the user is automatically
10
+ * redirected instead of shown a suggestion. Set to 1 to disable
11
+ * auto-redirect. Defaults to 0.85.
12
+ */
13
+ autoRedirectThreshold?: number;
14
+ /**
15
+ * URL of the sitemap index or sitemap file to fetch.
16
+ * Defaults to '/sitemap-index.xml'.
17
+ */
18
+ sitemapUrl?: string;
19
+ /**
20
+ * Text shown before the suggestion link. Defaults to 'Did you mean'.
21
+ */
22
+ suggestionText?: string;
23
+ }
24
+
25
+ const {
26
+ threshold = 0.6,
27
+ autoRedirectThreshold = 0.85,
28
+ sitemapUrl = '/sitemap-index.xml',
29
+ suggestionText = 'Did you mean',
30
+ } = Astro.props;
31
+ ---
32
+
33
+ <div
34
+ id="fuzzy-redirect"
35
+ data-threshold={threshold}
36
+ data-auto-redirect-threshold={autoRedirectThreshold}
37
+ data-sitemap-url={sitemapUrl}
38
+ data-suggestion-text={suggestionText}
39
+ ></div>
40
+
41
+ <script>
42
+ function levenshtein(a: string, b: string): number {
43
+ const m = a.length;
44
+ const n = b.length;
45
+ if (m === 0) return n;
46
+ if (n === 0) return m;
47
+
48
+ // Single-row DP: prev[j] holds the cost for (i-1, j).
49
+ let prev = Array.from({ length: n + 1 }, (_, j) => j);
50
+ for (let i = 1; i <= m; i++) {
51
+ let prevDiag = prev[0]!;
52
+ prev[0] = i;
53
+ for (let j = 1; j <= n; j++) {
54
+ const temp = prev[j]!;
55
+ prev[j] =
56
+ a[i - 1] === b[j - 1]
57
+ ? prevDiag
58
+ : 1 + Math.min(prevDiag, prev[j - 1]!, prev[j]!);
59
+ prevDiag = temp;
60
+ }
61
+ }
62
+ return prev[n]!;
63
+ }
64
+
65
+ function similarity(a: string, b: string): number {
66
+ const maxLen = Math.max(a.length, b.length);
67
+ if (maxLen === 0) return 1;
68
+ return 1 - levenshtein(a, b) / maxLen;
69
+ }
70
+
71
+ async function fetchSitemapUrls(sitemapUrl: string): Promise<string[]> {
72
+ const response = await fetch(sitemapUrl);
73
+ if (!response.ok) return [];
74
+
75
+ const text = await response.text();
76
+ const parser = new DOMParser();
77
+ const doc = parser.parseFromString(text, 'text/xml');
78
+
79
+ // Check if this is a sitemap index.
80
+ const sitemapLocs = doc.querySelectorAll('sitemap > loc');
81
+ if (sitemapLocs.length > 0) {
82
+ // Fetch each child sitemap in parallel.
83
+ const childUrls = await Promise.all(
84
+ Array.from(sitemapLocs).map((loc) => fetchSitemapUrls(loc.textContent?.trim() ?? '')),
85
+ );
86
+ return childUrls.flat();
87
+ }
88
+
89
+ // Regular sitemap — extract <url><loc> entries.
90
+ return Array.from(doc.querySelectorAll('url > loc'))
91
+ .map((loc) => loc.textContent?.trim() ?? '')
92
+ .filter(Boolean);
93
+ }
94
+
95
+ async function run() {
96
+ const container = document.getElementById('fuzzy-redirect');
97
+ if (!container) return;
98
+
99
+ const threshold = parseFloat(container.dataset.threshold ?? '0.6');
100
+ const autoRedirectThreshold = parseFloat(container.dataset.autoRedirectThreshold ?? '0.85');
101
+ const sitemapUrl = container.dataset.sitemapUrl ?? '/sitemap-index.xml';
102
+ const suggestionText = container.dataset.suggestionText ?? 'Did you mean';
103
+
104
+ const currentPath = window.location.pathname;
105
+ const urls = await fetchSitemapUrls(sitemapUrl);
106
+
107
+ // Extract paths from full URLs.
108
+ const paths = urls.map((url) => {
109
+ try {
110
+ return new URL(url).pathname;
111
+ } catch {
112
+ return url;
113
+ }
114
+ });
115
+
116
+ // Find the closest match.
117
+ let bestPath = '';
118
+ let bestScore = 0;
119
+ for (const path of paths) {
120
+ const score = similarity(currentPath, path);
121
+ if (score > bestScore) {
122
+ bestScore = score;
123
+ bestPath = path;
124
+ }
125
+ }
126
+
127
+ console.log(
128
+ `[FuzzyRedirect] Best match for "${currentPath}": "${bestPath}" (similarity: ${bestScore.toFixed(3)})`,
129
+ );
130
+
131
+ // Exact match means the 404 is correct — the path exists in the
132
+ // sitemap but returned a 404 (possibly a stale sitemap entry).
133
+ if (bestScore >= 1 || bestScore < threshold) return;
134
+
135
+ if (bestScore >= autoRedirectThreshold) {
136
+ window.location.replace(bestPath);
137
+ return;
138
+ }
139
+
140
+ // Show suggestion.
141
+ const link = document.createElement('a');
142
+ link.href = bestPath;
143
+ link.textContent = bestPath;
144
+
145
+ container.textContent = `${suggestionText} `;
146
+ container.appendChild(link);
147
+ container.append('?');
148
+ }
149
+
150
+ run();
151
+ </script>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jdevalk/astro-seo-graph",
3
- "version": "0.4.2",
3
+ "version": "0.5.1",
4
4
  "description": "Astro integration for @jdevalk/seo-graph-core. Seo component, route factories, content-collection aggregator, Zod content helpers.",
5
5
  "keywords": [
6
6
  "astro",
@@ -22,7 +22,8 @@
22
22
  "types": "./dist/index.d.ts",
23
23
  "import": "./dist/index.js"
24
24
  },
25
- "./Seo.astro": "./dist/components/Seo.astro"
25
+ "./Seo.astro": "./dist/components/Seo.astro",
26
+ "./FuzzyRedirect.astro": "./dist/components/FuzzyRedirect.astro"
26
27
  },
27
28
  "files": [
28
29
  "dist",