@growth-labs/seo 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/dist/utils/validation.d.ts.map +1 -1
  2. package/dist/utils/validation.js +22 -0
  3. package/dist/utils/validation.js.map +1 -1
  4. package/package.json +9 -5
  5. package/src/_internal/state.ts +26 -0
  6. package/src/bindings.ts +146 -0
  7. package/src/cron/prune-aeo-r2.ts +140 -0
  8. package/src/durable-objects/aeo-revalidation-coord.ts +246 -0
  9. package/src/index.ts +380 -0
  10. package/src/middleware/seo.ts +350 -0
  11. package/src/options.ts +456 -0
  12. package/src/routes/aeo-twin.ts +130 -0
  13. package/src/routes/apple-news.ts +36 -0
  14. package/src/routes/llms-full.ts +36 -0
  15. package/src/routes/llms.ts +15 -0
  16. package/src/routes/podcast-narration.ts +45 -0
  17. package/src/routes/podcast.ts +27 -0
  18. package/src/routes/revalidate.ts +298 -0
  19. package/src/routes/robots.ts +21 -0
  20. package/src/routes/rss.ts +29 -0
  21. package/src/routes/sitemap-articles.ts +25 -0
  22. package/src/routes/sitemap-index.ts +89 -0
  23. package/src/routes/sitemap-markdown.ts +39 -0
  24. package/src/routes/sitemap-pages.ts +24 -0
  25. package/src/routes/sitemap-products.ts +24 -0
  26. package/src/routes/sitemap-videos.ts +24 -0
  27. package/src/runtime.ts +17 -0
  28. package/src/site-url-core.ts +71 -0
  29. package/src/site-url.ts +21 -0
  30. package/src/types.ts +166 -0
  31. package/src/utils/aeo-summary.ts +176 -0
  32. package/src/utils/aeo-twin-emitter.ts +173 -0
  33. package/src/utils/aeo.ts +223 -0
  34. package/src/utils/apple-news-anf.ts +163 -0
  35. package/src/utils/apple-news-rss.ts +136 -0
  36. package/src/utils/content-filter.ts +87 -0
  37. package/src/utils/crawler-class.ts +155 -0
  38. package/src/utils/define-content-provider.ts +65 -0
  39. package/src/utils/effective-auth.ts +44 -0
  40. package/src/utils/fcrdns.ts +269 -0
  41. package/src/utils/fresh-layer.ts +175 -0
  42. package/src/utils/hreflang.ts +26 -0
  43. package/src/utils/index.ts +91 -0
  44. package/src/utils/json-ld/article.ts +120 -0
  45. package/src/utils/json-ld/audio.ts +32 -0
  46. package/src/utils/json-ld/breadcrumb.ts +28 -0
  47. package/src/utils/json-ld/faq.ts +18 -0
  48. package/src/utils/json-ld/howto.ts +23 -0
  49. package/src/utils/json-ld/index.ts +12 -0
  50. package/src/utils/json-ld/item-list.ts +26 -0
  51. package/src/utils/json-ld/organization.ts +42 -0
  52. package/src/utils/json-ld/person.ts +25 -0
  53. package/src/utils/json-ld/product.ts +155 -0
  54. package/src/utils/json-ld/video.ts +20 -0
  55. package/src/utils/json-ld/website.ts +27 -0
  56. package/src/utils/llms-full.ts +90 -0
  57. package/src/utils/llms.ts +45 -0
  58. package/src/utils/meta.ts +184 -0
  59. package/src/utils/podcast.ts +112 -0
  60. package/src/utils/robots.ts +47 -0
  61. package/src/utils/rss.ts +64 -0
  62. package/src/utils/seo-head.ts +81 -0
  63. package/src/utils/sitemap-markdown.ts +80 -0
  64. package/src/utils/sitemap.ts +169 -0
  65. package/src/utils/staleness.ts +61 -0
  66. package/src/utils/validation.ts +308 -0
  67. package/src/virtual.d.ts +8 -0
  68. package/src/vite-plugin.ts +66 -0
package/src/index.ts ADDED
@@ -0,0 +1,380 @@
1
+ import { readdirSync, readFileSync } from 'node:fs'
2
+ import { join } from 'node:path'
3
+ import { fileURLToPath } from 'node:url'
4
+ import type { AstroIntegration } from 'astro'
5
+ import { _setConfig, _setContentProvider } from './_internal/state.js'
6
+ import { resolveAeoTwins, type SeoOptions, validatedSeoOptionsSchema } from './options.js'
7
+ import { SITEMAP_INDEX_PATH } from './utils/sitemap.js'
8
+ import { validateJsonLd, validatePage, validatePrerenderedGatedRoutes } from './utils/validation.js'
9
+ import { growthLabsSeoPlugin } from './vite-plugin.js'
10
+
11
+ function resolveEntrypoint(path: string): string {
12
+ const ext = typeof import.meta.url === 'string' && import.meta.url.endsWith('.ts') ? '.ts' : '.js'
13
+ return fileURLToPath(new URL(`${path}${ext}`, import.meta.url))
14
+ }
15
+
16
+ export default function seo(userOptions: SeoOptions): AstroIntegration {
17
+ const options = validatedSeoOptionsSchema.parse(userOptions)
18
+ const aeo = resolveAeoTwins(options.aeoTwins)
19
+
20
+ _setConfig(options)
21
+ if (userOptions.contentProvider) {
22
+ _setContentProvider(userOptions.contentProvider)
23
+ }
24
+
25
+ // ─── Provider-wired detection ───
26
+ // A "content source" is either the inline `contentProvider` function (main
27
+ // Worker only — deprecated) or a `contentProviderModule` specifier (canonical,
28
+ // survives the Cloudflare prerender Worker). When neither is set, we must not
29
+ // inject crawler-visible routes, because they would silently serve empty bodies
30
+ // and robots.txt would advertise the empty sitemap index to crawlers — the
31
+ // exact footgun 0.3.0 closes.
32
+ const providerWired =
33
+ typeof userOptions.contentProvider === 'function' ||
34
+ (typeof options.contentProviderModule === 'string' && options.contentProviderModule.length > 0)
35
+
36
+ return {
37
+ name: '@growth-labs/seo',
38
+ hooks: {
39
+ 'astro:config:setup': ({ addMiddleware, injectRoute, updateConfig, logger }) => {
40
+ updateConfig({
41
+ vite: {
42
+ plugins: [
43
+ growthLabsSeoPlugin({
44
+ config: options,
45
+ contentProviderModule: options.contentProviderModule,
46
+ }),
47
+ ],
48
+ },
49
+ })
50
+
51
+ addMiddleware({ entrypoint: resolveEntrypoint('./middleware/seo'), order: 'post' })
52
+
53
+ // ─── Deprecation warning for inline contentProvider ───
54
+ if (typeof userOptions.contentProvider === 'function') {
55
+ logger.warn(
56
+ '`contentProvider` (inline function) is deprecated and does not survive the ' +
57
+ 'Cloudflare prerender Worker. Move your provider into its own module and ' +
58
+ 'set `contentProviderModule: "/src/lib/content-provider.mjs"` instead. ' +
59
+ 'See packages-docs/seo-contentprovidermodule.md. The inline form will be ' +
60
+ 'removed in the next breaking release.',
61
+ )
62
+ }
63
+
64
+ // ─── Internally-inconsistent config throws ───
65
+ // Feature flags that only make sense with a content source. Failing
66
+ // here is louder than silently skipping the route injection below.
67
+ if (!providerWired) {
68
+ const needsProvider: string[] = []
69
+ if (options.aeoTwins !== false) needsProvider.push('aeoTwins')
70
+ if (options.rss) needsProvider.push('rss: true')
71
+ if (options.llmsTxt) needsProvider.push('llmsTxt: true')
72
+ if (options.llmsFullTxt) needsProvider.push('llmsFullTxt: true')
73
+ if (options.flexibleSampling.enabled) needsProvider.push('flexibleSampling.enabled: true')
74
+ if (options.appleNews?.enabled) needsProvider.push('appleNews.enabled: true')
75
+ if (options.podcast?.enabled) needsProvider.push('podcast.enabled: true')
76
+ if (options.audioNarration?.asPodcastFeed)
77
+ needsProvider.push('audioNarration.asPodcastFeed: true')
78
+ if (options.commerce?.enabled) needsProvider.push('commerce.enabled: true')
79
+ if (needsProvider.length > 0) {
80
+ throw new Error(
81
+ `@growth-labs/seo: ${needsProvider.join(', ')} require a content source, but ` +
82
+ 'neither `contentProvider` nor `contentProviderModule` is set.\n' +
83
+ ' Set `contentProviderModule: "/src/lib/content-provider.mjs"` whose ' +
84
+ 'default export is the ContentProvider function. See ' +
85
+ 'packages-docs/seo-contentprovidermodule.md for a worked D1-backed example.',
86
+ )
87
+ }
88
+ }
89
+
90
+ const injected: string[] = []
91
+ const skipped: Array<{ pattern: string; reason: string }> = []
92
+
93
+ // ─── Sitemaps (provider-wired gated) ───
94
+ if (providerWired) {
95
+ injectRoute({
96
+ pattern: SITEMAP_INDEX_PATH,
97
+ entrypoint: resolveEntrypoint('./routes/sitemap-index'),
98
+ prerender: false,
99
+ })
100
+ injected.push(SITEMAP_INDEX_PATH)
101
+ injectRoute({
102
+ pattern: '/sitemap-articles.xml',
103
+ entrypoint: resolveEntrypoint('./routes/sitemap-articles'),
104
+ prerender: false,
105
+ })
106
+ injected.push('/sitemap-articles.xml')
107
+ injectRoute({
108
+ pattern: '/sitemap-pages.xml',
109
+ entrypoint: resolveEntrypoint('./routes/sitemap-pages'),
110
+ prerender: false,
111
+ })
112
+ injected.push('/sitemap-pages.xml')
113
+ injectRoute({
114
+ pattern: '/sitemap-videos.xml',
115
+ entrypoint: resolveEntrypoint('./routes/sitemap-videos'),
116
+ prerender: false,
117
+ })
118
+ injected.push('/sitemap-videos.xml')
119
+ if (options.commerce?.enabled) {
120
+ injectRoute({
121
+ pattern: '/sitemap-products.xml',
122
+ entrypoint: resolveEntrypoint('./routes/sitemap-products'),
123
+ prerender: false,
124
+ })
125
+ injected.push('/sitemap-products.xml')
126
+ } else {
127
+ skipped.push({
128
+ pattern: '/sitemap-products.xml',
129
+ reason: 'commerce.enabled is false',
130
+ })
131
+ }
132
+ if (options.markdownSitemap && aeo && aeo.mode !== 'middleware') {
133
+ injectRoute({
134
+ pattern: '/sitemap-markdown.xml',
135
+ entrypoint: resolveEntrypoint('./routes/sitemap-markdown'),
136
+ prerender: false,
137
+ })
138
+ injected.push('/sitemap-markdown.xml')
139
+ } else if (options.markdownSitemap) {
140
+ skipped.push({
141
+ pattern: '/sitemap-markdown.xml',
142
+ reason: aeo ? `aeoTwins.mode='${aeo.mode}'` : 'aeoTwins disabled',
143
+ })
144
+ }
145
+ } else {
146
+ for (const p of [
147
+ SITEMAP_INDEX_PATH,
148
+ '/sitemap-articles.xml',
149
+ '/sitemap-pages.xml',
150
+ '/sitemap-videos.xml',
151
+ '/sitemap-products.xml',
152
+ '/sitemap-markdown.xml',
153
+ ]) {
154
+ skipped.push({ pattern: p, reason: 'no contentProviderModule wired' })
155
+ }
156
+ }
157
+
158
+ // ─── robots.txt ───
159
+ // Inject only when we have something meaningful to emit: either a provider
160
+ // (so a Sitemap: line is worth advertising) or additionalRules the consumer
161
+ // configured. Otherwise the consumer can own /robots.txt themselves; we
162
+ // would only be shipping a no-op `User-agent: *\nAllow: /` anyway.
163
+ const robotsHasCustomRules = (options.robots.additionalRules?.length ?? 0) > 0
164
+ if (providerWired || robotsHasCustomRules) {
165
+ injectRoute({
166
+ pattern: '/robots.txt',
167
+ entrypoint: resolveEntrypoint('./routes/robots'),
168
+ prerender: false,
169
+ })
170
+ injected.push('/robots.txt')
171
+ } else {
172
+ skipped.push({
173
+ pattern: '/robots.txt',
174
+ reason:
175
+ 'no contentProviderModule wired and no robots.additionalRules — consumer owns it',
176
+ })
177
+ }
178
+
179
+ // ─── llms.txt / llms-full.txt ───
180
+ if (options.llmsTxt && providerWired) {
181
+ injectRoute({
182
+ pattern: '/llms.txt',
183
+ entrypoint: resolveEntrypoint('./routes/llms'),
184
+ prerender: false,
185
+ })
186
+ injected.push('/llms.txt')
187
+ }
188
+ if (options.llmsFullTxt && providerWired) {
189
+ injectRoute({
190
+ pattern: '/llms-full.txt',
191
+ entrypoint: resolveEntrypoint('./routes/llms-full'),
192
+ prerender: false,
193
+ })
194
+ injected.push('/llms-full.txt')
195
+ }
196
+
197
+ // ─── RSS ───
198
+ if (options.rss && providerWired) {
199
+ injectRoute({
200
+ pattern: '/feed.xml',
201
+ entrypoint: resolveEntrypoint('./routes/rss'),
202
+ prerender: false,
203
+ })
204
+ injected.push('/feed.xml')
205
+ }
206
+
207
+ // ─── Apple News Publisher RSS ───
208
+ if (options.appleNews?.enabled && providerWired) {
209
+ injectRoute({
210
+ pattern: options.appleNews.feedPath,
211
+ entrypoint: resolveEntrypoint('./routes/apple-news'),
212
+ prerender: false,
213
+ })
214
+ injected.push(options.appleNews.feedPath)
215
+ }
216
+
217
+ // ─── Podcast ───
218
+ if (options.podcast?.enabled && providerWired) {
219
+ injectRoute({
220
+ pattern: options.podcast.feedPath,
221
+ entrypoint: resolveEntrypoint('./routes/podcast'),
222
+ prerender: false,
223
+ })
224
+ injected.push(options.podcast.feedPath)
225
+ }
226
+
227
+ // ─── Narrated articles podcast feed ───
228
+ if (options.audioNarration?.asPodcastFeed && providerWired) {
229
+ injectRoute({
230
+ pattern: options.audioNarration.podcastFeedPath,
231
+ entrypoint: resolveEntrypoint('./routes/podcast-narration'),
232
+ prerender: false,
233
+ })
234
+ injected.push(options.audioNarration.podcastFeedPath)
235
+ }
236
+
237
+ // ─── Revalidation endpoint ───
238
+ // Only when onDemandRevalidation is enabled; the inconsistency throw above
239
+ // ensures that requires a provider when aeoTwins is enabled.
240
+ if (aeo?.onDemandRevalidation) {
241
+ injectRoute({
242
+ pattern: '/_seo/revalidate',
243
+ entrypoint: resolveEntrypoint('./routes/revalidate'),
244
+ prerender: false,
245
+ })
246
+ injected.push('/_seo/revalidate')
247
+ }
248
+
249
+ // ─── AEO twin emission (static + both modes) ───
250
+ // Injected as a prerender route so contentProvider runs inside Astro's
251
+ // build pipeline. Raw-Node astro:build:done couldn't resolve Vite-virtual
252
+ // imports like `astro:content` that consumers commonly use in their
253
+ // contentProvider implementations.
254
+ if (aeo && aeo.mode !== 'middleware' && providerWired) {
255
+ injectRoute({
256
+ pattern: '/[...aeoPath].md',
257
+ entrypoint: resolveEntrypoint('./routes/aeo-twin'),
258
+ })
259
+ injected.push('/[...aeoPath].md')
260
+ }
261
+
262
+ // ─── Injection summary ───
263
+ // One line covering injected + skipped routes. This is the crawler-visible
264
+ // contract consumers inherit; making it loud at build time prevents silent
265
+ // SEO regressions on bump-only adoption.
266
+ const skippedStr =
267
+ skipped.length > 0
268
+ ? ` skipped ${skipped.length}: ${skipped
269
+ .map((s) => `${s.pattern} (${s.reason})`)
270
+ .join(', ')}`
271
+ : ''
272
+ logger.info(
273
+ `SEO routes: injected ${injected.length} — ${injected.join(', ') || '(none)'}.${skippedStr}`,
274
+ )
275
+ },
276
+
277
+ 'astro:build:done': async ({ dir, pages, logger }) => {
278
+ const outDir = fileURLToPath(dir)
279
+ const contentProvider = userOptions.contentProvider
280
+
281
+ // ─── Build-time prerender-gated guard (test 32) ───
282
+ // Note: we try to load items here to check the guard, but this is
283
+ // best-effort. If contentProvider uses astro:content or other Vite
284
+ // virtuals, it will fail at raw-Node import time and we skip silently.
285
+ // The guard is a defence-in-depth check; the primary guarantee comes
286
+ // from the consumer-set `export const prerender = false` on gated routes.
287
+ if (contentProvider && options.flexibleSampling?.enabled) {
288
+ try {
289
+ const items = await contentProvider({ type: 'articles' }, {} as never)
290
+ const prerenderedUrls = new Set(pages.map((p) => `/${p.pathname}`.replace(/\/+/g, '/')))
291
+ const issues = validatePrerenderedGatedRoutes({ prerenderedUrls, items })
292
+ if (issues.length > 0) {
293
+ for (const issue of issues) logger.error(issue.message)
294
+ throw new Error(
295
+ `[@growth-labs/seo] ${issues.length} prerendered route(s) serve members-gated items. See errors above.`,
296
+ )
297
+ }
298
+ } catch (err) {
299
+ // Don't fail the build if the provider can't run from Node context —
300
+ // the injected prerender route handles the primary emission path.
301
+ logger.info(
302
+ `Skipped prerender-gated-content guard (contentProvider not Node-ESM-safe): ${err instanceof Error ? err.message : String(err)}`,
303
+ )
304
+ }
305
+ }
306
+
307
+ // ─── Legacy per-page validation ───
308
+ if (!options.validation.enabled) return
309
+
310
+ const htmlFiles = findHtmlFiles(outDir)
311
+ let errorCount = 0
312
+ let warningCount = 0
313
+
314
+ for (const file of htmlFiles) {
315
+ const html = readFileSync(file, 'utf-8')
316
+ const result = validatePage(html, {
317
+ titleMaxLength: options.validation.titleMaxLength,
318
+ descriptionMaxLength: options.validation.descriptionMaxLength,
319
+ heroMinWidth: options.validation.heroMinWidth,
320
+ })
321
+ const relPath = file.replace(outDir, '')
322
+ for (const error of result.errors) {
323
+ logger.error(`${relPath}: ${error}`)
324
+ errorCount++
325
+ }
326
+ for (const warning of result.warnings) {
327
+ logger.warn(`${relPath}: ${warning}`)
328
+ warningCount++
329
+ }
330
+
331
+ const jsonLdMatches = html.matchAll(
332
+ /<script type="application\/ld\+json">([\s\S]*?)<\/script>/gi,
333
+ )
334
+ for (const match of jsonLdMatches) {
335
+ try {
336
+ const parsed = JSON.parse(match[1]!)
337
+ const ldResult = validateJsonLd(parsed)
338
+ for (const e of ldResult.errors) {
339
+ logger.error(`${relPath} [JSON-LD]: ${e}`)
340
+ errorCount++
341
+ }
342
+ for (const w of ldResult.warnings) {
343
+ logger.warn(`${relPath} [JSON-LD]: ${w}`)
344
+ warningCount++
345
+ }
346
+ } catch {
347
+ logger.error(`${relPath}: Invalid JSON-LD`)
348
+ errorCount++
349
+ }
350
+ }
351
+ }
352
+
353
+ if (errorCount || warningCount) {
354
+ logger.info(`SEO validation: ${errorCount} errors, ${warningCount} warnings`)
355
+ } else {
356
+ logger.info('SEO validation: all checks passed')
357
+ }
358
+ },
359
+ },
360
+ }
361
+ }
362
+
363
+ function findHtmlFiles(dir: string): string[] {
364
+ const results: string[] = []
365
+ try {
366
+ const entries = readdirSync(dir, { withFileTypes: true })
367
+ for (const entry of entries) {
368
+ const fullPath = join(dir, entry.name)
369
+ if (entry.isDirectory()) results.push(...findHtmlFiles(fullPath))
370
+ else if (entry.name.endsWith('.html')) results.push(fullPath)
371
+ }
372
+ } catch {}
373
+ return results
374
+ }
375
+
376
+ export { getConfig, getContentProvider } from './_internal/state.js'
377
+ export type { ResolvedSeoOptions, SeoOptions } from './options.js'
378
+ export { resolveAeoTwins, seoOptionsSchema, validatedSeoOptionsSchema } from './options.js'
379
+ export { resolveSeoConfig, resolveSiteUrl } from './site-url-core.js'
380
+ export type * from './types.js'