@growth-labs/seo 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/dist/utils/validation.d.ts.map +1 -1
  2. package/dist/utils/validation.js +22 -0
  3. package/dist/utils/validation.js.map +1 -1
  4. package/package.json +9 -5
  5. package/src/_internal/state.ts +26 -0
  6. package/src/bindings.ts +146 -0
  7. package/src/cron/prune-aeo-r2.ts +140 -0
  8. package/src/durable-objects/aeo-revalidation-coord.ts +246 -0
  9. package/src/index.ts +380 -0
  10. package/src/middleware/seo.ts +350 -0
  11. package/src/options.ts +456 -0
  12. package/src/routes/aeo-twin.ts +130 -0
  13. package/src/routes/apple-news.ts +36 -0
  14. package/src/routes/llms-full.ts +36 -0
  15. package/src/routes/llms.ts +15 -0
  16. package/src/routes/podcast-narration.ts +45 -0
  17. package/src/routes/podcast.ts +27 -0
  18. package/src/routes/revalidate.ts +298 -0
  19. package/src/routes/robots.ts +21 -0
  20. package/src/routes/rss.ts +29 -0
  21. package/src/routes/sitemap-articles.ts +25 -0
  22. package/src/routes/sitemap-index.ts +89 -0
  23. package/src/routes/sitemap-markdown.ts +39 -0
  24. package/src/routes/sitemap-pages.ts +24 -0
  25. package/src/routes/sitemap-products.ts +24 -0
  26. package/src/routes/sitemap-videos.ts +24 -0
  27. package/src/runtime.ts +17 -0
  28. package/src/site-url-core.ts +71 -0
  29. package/src/site-url.ts +21 -0
  30. package/src/types.ts +166 -0
  31. package/src/utils/aeo-summary.ts +176 -0
  32. package/src/utils/aeo-twin-emitter.ts +173 -0
  33. package/src/utils/aeo.ts +223 -0
  34. package/src/utils/apple-news-anf.ts +163 -0
  35. package/src/utils/apple-news-rss.ts +136 -0
  36. package/src/utils/content-filter.ts +87 -0
  37. package/src/utils/crawler-class.ts +155 -0
  38. package/src/utils/define-content-provider.ts +65 -0
  39. package/src/utils/effective-auth.ts +44 -0
  40. package/src/utils/fcrdns.ts +269 -0
  41. package/src/utils/fresh-layer.ts +175 -0
  42. package/src/utils/hreflang.ts +26 -0
  43. package/src/utils/index.ts +91 -0
  44. package/src/utils/json-ld/article.ts +120 -0
  45. package/src/utils/json-ld/audio.ts +32 -0
  46. package/src/utils/json-ld/breadcrumb.ts +28 -0
  47. package/src/utils/json-ld/faq.ts +18 -0
  48. package/src/utils/json-ld/howto.ts +23 -0
  49. package/src/utils/json-ld/index.ts +12 -0
  50. package/src/utils/json-ld/item-list.ts +26 -0
  51. package/src/utils/json-ld/organization.ts +42 -0
  52. package/src/utils/json-ld/person.ts +25 -0
  53. package/src/utils/json-ld/product.ts +155 -0
  54. package/src/utils/json-ld/video.ts +20 -0
  55. package/src/utils/json-ld/website.ts +27 -0
  56. package/src/utils/llms-full.ts +90 -0
  57. package/src/utils/llms.ts +45 -0
  58. package/src/utils/meta.ts +184 -0
  59. package/src/utils/podcast.ts +112 -0
  60. package/src/utils/robots.ts +47 -0
  61. package/src/utils/rss.ts +64 -0
  62. package/src/utils/seo-head.ts +81 -0
  63. package/src/utils/sitemap-markdown.ts +80 -0
  64. package/src/utils/sitemap.ts +169 -0
  65. package/src/utils/staleness.ts +61 -0
  66. package/src/utils/validation.ts +308 -0
  67. package/src/virtual.d.ts +8 -0
  68. package/src/vite-plugin.ts +66 -0
@@ -0,0 +1,308 @@
1
+ export interface ValidationResult {
2
+ errors: string[]
3
+ warnings: string[]
4
+ }
5
+
6
+ export interface PageValidationOptions {
7
+ titleMaxLength: number
8
+ descriptionMaxLength: number
9
+ heroMinWidth?: number
10
+ }
11
+
12
+ /**
13
+ * Validate a JSON-LD object for common SEO issues.
14
+ */
15
+ export function validateJsonLd(jsonLd: Record<string, unknown>): ValidationResult {
16
+ const errors: string[] = []
17
+ const warnings: string[] = []
18
+
19
+ // Must have @context
20
+ if (!jsonLd['@context']) {
21
+ errors.push('JSON-LD missing @context')
22
+ }
23
+
24
+ // Must have @type
25
+ const type = jsonLd['@type']
26
+ if (!type) {
27
+ errors.push('JSON-LD missing @type')
28
+ }
29
+
30
+ const typeStr = Array.isArray(type) ? type[0] : String(type ?? '')
31
+
32
+ // Article checks
33
+ if (typeStr === 'Article' || typeStr === 'NewsArticle' || typeStr === 'BlogPosting') {
34
+ if (!jsonLd.image) {
35
+ warnings.push(`${typeStr} JSON-LD missing recommended "image" property`)
36
+ }
37
+ if (!jsonLd.author) {
38
+ warnings.push(`${typeStr} JSON-LD missing recommended "author" property`)
39
+ }
40
+ const headline = jsonLd.headline
41
+ if (headline && typeof headline === 'string' && headline.length > 110) {
42
+ warnings.push(`${typeStr} JSON-LD "headline" exceeds 110 characters (${headline.length})`)
43
+ }
44
+ }
45
+
46
+ // Product checks
47
+ if (typeStr === 'Product') {
48
+ const offers = jsonLd.offers
49
+ if (!offers) {
50
+ warnings.push('Product JSON-LD missing recommended "offers" property')
51
+ } else {
52
+ const offersObj = Array.isArray(offers) ? offers[0] : offers
53
+ if (offersObj && typeof offersObj === 'object') {
54
+ const o = offersObj as Record<string, unknown>
55
+ if (!o.price && o.price !== 0) {
56
+ warnings.push('Product JSON-LD offers missing "price"')
57
+ }
58
+ if (!o.availability) {
59
+ warnings.push('Product JSON-LD offers missing "availability"')
60
+ }
61
+ }
62
+ }
63
+ }
64
+
65
+ // BreadcrumbList position ordering
66
+ if (typeStr === 'BreadcrumbList') {
67
+ const itemListElement = jsonLd.itemListElement
68
+ if (Array.isArray(itemListElement)) {
69
+ let lastPosition = 0
70
+ for (let i = 0; i < itemListElement.length; i++) {
71
+ const item = itemListElement[i] as Record<string, unknown>
72
+ const position = Number(item.position)
73
+ if (position <= lastPosition) {
74
+ errors.push(
75
+ `BreadcrumbList itemListElement[${i}] position ${position} is not in ascending order`,
76
+ )
77
+ }
78
+ lastPosition = position
79
+ }
80
+ }
81
+ }
82
+
83
+ return { errors, warnings }
84
+ }
85
+
86
+ /**
87
+ * Validate an HTML string for common on-page SEO issues.
88
+ */
89
+ export function validatePage(html: string, options: PageValidationOptions): ValidationResult {
90
+ const errors: string[] = []
91
+ const warnings: string[] = []
92
+
93
+ if (isNoindexMetaRefreshRedirect(html)) {
94
+ return { errors, warnings }
95
+ }
96
+
97
+ const { titleMaxLength, descriptionMaxLength } = options
98
+
99
+ // Title checks
100
+ const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i)
101
+ if (!titleMatch) {
102
+ errors.push('Missing <title> tag')
103
+ } else {
104
+ const title = titleMatch[1].trim()
105
+ if (title.length > titleMaxLength) {
106
+ warnings.push(`<title> exceeds ${titleMaxLength} characters (${title.length})`)
107
+ }
108
+ }
109
+
110
+ // Meta description checks
111
+ const descMatch =
112
+ html.match(/<meta\s[^>]*name=["']description["'][^>]*>/i) ??
113
+ html.match(/<meta\s[^>]*name=description[^>]*>/i)
114
+ if (!descMatch) {
115
+ warnings.push('Missing meta description')
116
+ } else {
117
+ const contentMatch =
118
+ descMatch[0].match(/content=["']([^"']*)["']/i) ?? descMatch[0].match(/content=([^\s>]+)/i)
119
+ if (contentMatch) {
120
+ const desc = contentMatch[1].trim()
121
+ if (desc.length > descriptionMaxLength) {
122
+ warnings.push(
123
+ `Meta description exceeds ${descriptionMaxLength} characters (${desc.length})`,
124
+ )
125
+ }
126
+ }
127
+ }
128
+
129
+ // Canonical check
130
+ const canonicalMatch =
131
+ html.match(/<link\s[^>]*rel=["']canonical["'][^>]*>/i) ??
132
+ html.match(/<link\s[^>]*rel=canonical[^>]*>/i)
133
+ if (!canonicalMatch) {
134
+ warnings.push('Missing canonical link')
135
+ }
136
+
137
+ // og:image check
138
+ const ogImageMatch =
139
+ html.match(/<meta\s[^>]*property=["']og:image["'][^>]*>/i) ??
140
+ html.match(/<meta\s[^>]*property=og:image[^>]*>/i)
141
+ if (!ogImageMatch) {
142
+ warnings.push('Missing og:image meta tag')
143
+ }
144
+
145
+ // og:image:width check against heroMinWidth
146
+ if (options.heroMinWidth) {
147
+ const ogWidthMatch =
148
+ html.match(/<meta\s[^>]*property=["']og:image:width["'][^>]*>/i) ??
149
+ html.match(/<meta\s[^>]*property=og:image:width[^>]*>/i)
150
+ if (ogWidthMatch) {
151
+ const widthContentMatch =
152
+ ogWidthMatch[0].match(/content=["']([^"']*)["']/i) ??
153
+ ogWidthMatch[0].match(/content=([^\s>]+)/i)
154
+ if (widthContentMatch) {
155
+ const width = Number(widthContentMatch[1])
156
+ if (!Number.isNaN(width) && width < options.heroMinWidth) {
157
+ warnings.push(`Hero image width ${width}px is below minimum ${options.heroMinWidth}px`)
158
+ }
159
+ }
160
+ }
161
+ }
162
+
163
+ // H1 checks
164
+ const h1Matches = html.match(/<h1[\s>]/gi) ?? []
165
+ if (h1Matches.length === 0) {
166
+ warnings.push('Missing H1 tag')
167
+ } else if (h1Matches.length > 1) {
168
+ warnings.push(`Multiple H1 tags found (${h1Matches.length})`)
169
+ }
170
+
171
+ // JSON-LD presence check
172
+ const jsonLdMatch = html.match(/<script\s[^>]*type=["']application\/ld\+json["'][^>]*>/i)
173
+ if (!jsonLdMatch) {
174
+ warnings.push('Missing JSON-LD structured data')
175
+ }
176
+
177
+ return { errors, warnings }
178
+ }
179
+
180
+ function isNoindexMetaRefreshRedirect(html: string): boolean {
181
+ const metaTags = html.match(/<meta\s+[^>]*>/gi) ?? []
182
+ const hasRefresh = metaTags.some(
183
+ (tag) => getHtmlAttr(tag, 'http-equiv')?.toLowerCase() === 'refresh',
184
+ )
185
+ const hasNoindex = metaTags.some((tag) => {
186
+ if (getHtmlAttr(tag, 'name')?.toLowerCase() !== 'robots') return false
187
+ const content = getHtmlAttr(tag, 'content')?.toLowerCase() ?? ''
188
+ return content
189
+ .split(',')
190
+ .map((part) => part.trim())
191
+ .includes('noindex')
192
+ })
193
+ return hasRefresh && hasNoindex
194
+ }
195
+
196
+ function getHtmlAttr(tag: string, name: string): string | undefined {
197
+ const escapedName = name.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
198
+ const match = tag.match(
199
+ new RegExp(`\\b${escapedName}\\s*=\\s*(?:"([^"]*)"|'([^']*)'|([^\\s>]+))`, 'i'),
200
+ )
201
+ return match?.[1] ?? match?.[2] ?? match?.[3]
202
+ }
203
+
204
+ // ─── Hreflang reciprocity ───
205
+
206
+ import type { ContentItem } from '../types.js'
207
+
208
+ export interface HreflangReciprocityIssue {
209
+ url: string
210
+ missingReciprocal: {
211
+ from: string
212
+ lang: string
213
+ expectedBackReference: string
214
+ }
215
+ }
216
+
217
+ /**
218
+ * Verify that every `alternateLocales` entry has a reciprocal entry on the
219
+ * target side. For search engines, missing reciprocals are a hard error — Google
220
+ * will ignore the hreflang annotations altogether.
221
+ *
222
+ * Runs in O(N^2) worst-case but N is bounded by the number of translated articles,
223
+ * not the entire catalog, so this is fine at build time.
224
+ *
225
+ * Returns the list of reciprocity violations. Callers log these as errors in the
226
+ * build-time validation hook.
227
+ */
228
+ export function validateHreflangReciprocity(items: ContentItem[]): HreflangReciprocityIssue[] {
229
+ const byUrl = new Map<string, ContentItem>()
230
+ for (const item of items) byUrl.set(item.url, item)
231
+
232
+ const issues: HreflangReciprocityIssue[] = []
233
+ for (const item of items) {
234
+ if (!item.alternateLocales || item.alternateLocales.length === 0) continue
235
+ for (const alt of item.alternateLocales) {
236
+ const altItem = byUrl.get(alt.url)
237
+ if (!altItem) {
238
+ issues.push({
239
+ url: item.url,
240
+ missingReciprocal: {
241
+ from: alt.url,
242
+ lang: alt.lang,
243
+ expectedBackReference: item.url,
244
+ },
245
+ })
246
+ continue
247
+ }
248
+ const hasBackRef = altItem.alternateLocales?.some((a) => a.url === item.url)
249
+ if (!hasBackRef) {
250
+ issues.push({
251
+ url: item.url,
252
+ missingReciprocal: {
253
+ from: alt.url,
254
+ lang: alt.lang,
255
+ expectedBackReference: item.url,
256
+ },
257
+ })
258
+ }
259
+ }
260
+ }
261
+ return issues
262
+ }
263
+
264
+ // ─── Prerender-gated-content guard ───
265
+
266
+ export interface PrerenderGuardIssue {
267
+ route: string
268
+ access: 'members'
269
+ message: string
270
+ }
271
+
272
+ /**
273
+ * Fails the build when a route whose ContentItem has `access: 'members'` is
274
+ * configured as `prerender: true`. This is the load-bearing check that makes
275
+ * Flexible Sampling + static mode the only known-bad combo. The consumer's
276
+ * Astro integration iterates the build manifest and calls this with the
277
+ * prerendered-route URL set crossed against contentProvider output.
278
+ *
279
+ * Spec lines 1389-1405.
280
+ *
281
+ * Returns an empty array on success; non-empty means the build MUST fail.
282
+ */
283
+ export function validatePrerenderedGatedRoutes({
284
+ prerenderedUrls,
285
+ items,
286
+ }: {
287
+ prerenderedUrls: Set<string>
288
+ items: ContentItem[]
289
+ }): PrerenderGuardIssue[] {
290
+ const issues: PrerenderGuardIssue[] = []
291
+ for (const item of items) {
292
+ if (item.access !== 'members') continue
293
+ const path = new URL(item.url).pathname
294
+ if (prerenderedUrls.has(path) || prerenderedUrls.has(`${path}/`)) {
295
+ issues.push({
296
+ route: path,
297
+ access: 'members',
298
+ message:
299
+ `Route ${path} is prerendered but serves a members-gated item. ` +
300
+ `Prerendered HTML is the same bytes for every requester — there is no ` +
301
+ `way to serve a teaser to anonymous users and the full body to verified ` +
302
+ `Googlebot from the same static file. Set export const prerender = false ` +
303
+ `on this route, or mark this item access: 'public'.`,
304
+ })
305
+ }
306
+ }
307
+ return issues
308
+ }
@@ -0,0 +1,8 @@
1
+ declare module 'virtual:growth-labs/seo/config' {
2
+ import type { ResolvedSeoOptions } from '@growth-labs/seo'
3
+ import type { ContentProvider } from '@growth-labs/seo'
4
+
5
+ export const config: ResolvedSeoOptions
6
+ export function getConfig(): ResolvedSeoOptions
7
+ export function getContentProvider(): ContentProvider | undefined
8
+ }
@@ -0,0 +1,66 @@
1
+ import type { Plugin } from 'vite'
2
+ import type { ResolvedSeoOptions } from './options.js'
3
+
4
+ const VIRTUAL_MODULE_ID = 'virtual:growth-labs/seo/config'
5
+ const RESOLVED_VIRTUAL_MODULE_ID = `\0${VIRTUAL_MODULE_ID}`
6
+
7
+ export interface SeoVitePluginOptions {
8
+ config: ResolvedSeoOptions
9
+ /**
10
+ * Optional module specifier whose default export is the ContentProvider.
11
+ * When set, the generated virtual module imports it and registers the
12
+ * provider via `_setContentProvider` at load time. This is how state
13
+ * gets seeded in Cloudflare's prerender-worker context, where
14
+ * `seo(userOptions)` from astro.config.mjs never runs.
15
+ *
16
+ * The specifier is passed directly to Vite's resolver. Project-root-relative
17
+ * paths (e.g. '/src/lib/content-provider.ts') are the usual form.
18
+ */
19
+ contentProviderModule?: string
20
+ }
21
+
22
+ export function growthLabsSeoPlugin(opts: SeoVitePluginOptions | ResolvedSeoOptions): Plugin {
23
+ // Back-compat: callers from 0.2.x passed ResolvedSeoOptions directly.
24
+ const normalized: SeoVitePluginOptions =
25
+ 'config' in opts && 'site' in (opts as SeoVitePluginOptions).config
26
+ ? (opts as SeoVitePluginOptions)
27
+ : { config: opts as ResolvedSeoOptions }
28
+
29
+ const { config, contentProviderModule } = normalized
30
+ // contentProvider is a function — not JSON-serializable. Strip it; the
31
+ // module-path form seeds state separately.
32
+ const { contentProvider: _, ...staticConfig } = config as Record<string, unknown>
33
+
34
+ return {
35
+ name: 'growth-labs-seo-config',
36
+ resolveId(id) {
37
+ if (id === VIRTUAL_MODULE_ID) {
38
+ return RESOLVED_VIRTUAL_MODULE_ID
39
+ }
40
+ },
41
+ load(id) {
42
+ if (id !== RESOLVED_VIRTUAL_MODULE_ID) return
43
+
44
+ // The generated module seeds state via side-effect at import time.
45
+ // Routes/middleware that `import 'virtual:growth-labs/seo/config'` before
46
+ // calling getConfig() populate state in whatever environment they run —
47
+ // the main Worker AND the Cloudflare prerender worker.
48
+ const lines: string[] = [
49
+ `import { _setConfig, _setContentProvider } from '@growth-labs/seo/_internal/state';`,
50
+ ]
51
+ if (contentProviderModule) {
52
+ lines.push(`import _cp from ${JSON.stringify(contentProviderModule)};`)
53
+ }
54
+ lines.push(`const config = ${JSON.stringify(staticConfig)};`)
55
+ lines.push(`_setConfig(config);`)
56
+ if (contentProviderModule) {
57
+ lines.push(`_setContentProvider(_cp);`)
58
+ }
59
+ lines.push(`export { config };`)
60
+ lines.push(
61
+ `export { getConfig, getContentProvider } from '@growth-labs/seo/_internal/state';`,
62
+ )
63
+ return lines.join('\n')
64
+ },
65
+ }
66
+ }