@growth-labs/seo 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/dist/utils/validation.d.ts.map +1 -1
  2. package/dist/utils/validation.js +22 -0
  3. package/dist/utils/validation.js.map +1 -1
  4. package/package.json +9 -5
  5. package/src/_internal/state.ts +26 -0
  6. package/src/bindings.ts +146 -0
  7. package/src/cron/prune-aeo-r2.ts +140 -0
  8. package/src/durable-objects/aeo-revalidation-coord.ts +246 -0
  9. package/src/index.ts +380 -0
  10. package/src/middleware/seo.ts +350 -0
  11. package/src/options.ts +456 -0
  12. package/src/routes/aeo-twin.ts +130 -0
  13. package/src/routes/apple-news.ts +36 -0
  14. package/src/routes/llms-full.ts +36 -0
  15. package/src/routes/llms.ts +15 -0
  16. package/src/routes/podcast-narration.ts +45 -0
  17. package/src/routes/podcast.ts +27 -0
  18. package/src/routes/revalidate.ts +298 -0
  19. package/src/routes/robots.ts +21 -0
  20. package/src/routes/rss.ts +29 -0
  21. package/src/routes/sitemap-articles.ts +25 -0
  22. package/src/routes/sitemap-index.ts +89 -0
  23. package/src/routes/sitemap-markdown.ts +39 -0
  24. package/src/routes/sitemap-pages.ts +24 -0
  25. package/src/routes/sitemap-products.ts +24 -0
  26. package/src/routes/sitemap-videos.ts +24 -0
  27. package/src/runtime.ts +17 -0
  28. package/src/site-url-core.ts +71 -0
  29. package/src/site-url.ts +21 -0
  30. package/src/types.ts +166 -0
  31. package/src/utils/aeo-summary.ts +176 -0
  32. package/src/utils/aeo-twin-emitter.ts +173 -0
  33. package/src/utils/aeo.ts +223 -0
  34. package/src/utils/apple-news-anf.ts +163 -0
  35. package/src/utils/apple-news-rss.ts +136 -0
  36. package/src/utils/content-filter.ts +87 -0
  37. package/src/utils/crawler-class.ts +155 -0
  38. package/src/utils/define-content-provider.ts +65 -0
  39. package/src/utils/effective-auth.ts +44 -0
  40. package/src/utils/fcrdns.ts +269 -0
  41. package/src/utils/fresh-layer.ts +175 -0
  42. package/src/utils/hreflang.ts +26 -0
  43. package/src/utils/index.ts +91 -0
  44. package/src/utils/json-ld/article.ts +120 -0
  45. package/src/utils/json-ld/audio.ts +32 -0
  46. package/src/utils/json-ld/breadcrumb.ts +28 -0
  47. package/src/utils/json-ld/faq.ts +18 -0
  48. package/src/utils/json-ld/howto.ts +23 -0
  49. package/src/utils/json-ld/index.ts +12 -0
  50. package/src/utils/json-ld/item-list.ts +26 -0
  51. package/src/utils/json-ld/organization.ts +42 -0
  52. package/src/utils/json-ld/person.ts +25 -0
  53. package/src/utils/json-ld/product.ts +155 -0
  54. package/src/utils/json-ld/video.ts +20 -0
  55. package/src/utils/json-ld/website.ts +27 -0
  56. package/src/utils/llms-full.ts +90 -0
  57. package/src/utils/llms.ts +45 -0
  58. package/src/utils/meta.ts +184 -0
  59. package/src/utils/podcast.ts +112 -0
  60. package/src/utils/robots.ts +47 -0
  61. package/src/utils/rss.ts +64 -0
  62. package/src/utils/seo-head.ts +81 -0
  63. package/src/utils/sitemap-markdown.ts +80 -0
  64. package/src/utils/sitemap.ts +169 -0
  65. package/src/utils/staleness.ts +61 -0
  66. package/src/utils/validation.ts +308 -0
  67. package/src/virtual.d.ts +8 -0
  68. package/src/vite-plugin.ts +66 -0
@@ -0,0 +1,64 @@
1
+ import type { SeoOptionsWithResolvedSite } from '../options.js'
2
+ import type { ContentItem } from '../types.js'
3
+
4
+ function escapeXml(str: string): string {
5
+ return str
6
+ .replace(/&/g, '&')
7
+ .replace(/</g, '&lt;')
8
+ .replace(/>/g, '&gt;')
9
+ .replace(/"/g, '&quot;')
10
+ .replace(/'/g, '&apos;')
11
+ }
12
+
13
+ function toRfc822(dateStr: string): string {
14
+ return new Date(dateStr).toUTCString()
15
+ }
16
+
17
+ export function generateRssFeed(
18
+ items: ContentItem[],
19
+ options: SeoOptionsWithResolvedSite,
20
+ description?: string,
21
+ ): string {
22
+ const { site, organization, defaults } = options
23
+ const feedDescription = description ?? organization.name
24
+ const language = defaults.locale.replace('_', '-').toLowerCase()
25
+ const lastBuildDate = new Date().toUTCString()
26
+
27
+ const itemsXml = items
28
+ .map((item) => {
29
+ const pubDate = item.datePublished ? toRfc822(item.datePublished) : ''
30
+ const pubDateTag = pubDate ? `\n <pubDate>${escapeXml(pubDate)}</pubDate>` : ''
31
+ const descTag = item.description
32
+ ? `\n <description>${escapeXml(item.description)}</description>`
33
+ : ''
34
+ const creatorsXml = (item.authors ?? [])
35
+ .map((a) => `\n <dc:creator>${escapeXml(a.name)}</dc:creator>`)
36
+ .join('')
37
+ const enclosureTag = item.audio
38
+ ? `\n <enclosure url="${escapeXml(item.audio.url)}" type="audio/mpeg" length="0"/>`
39
+ : ''
40
+
41
+ return ` <item>
42
+ <title>${escapeXml(item.title)}</title>
43
+ <link>${escapeXml(item.url)}</link>
44
+ <guid isPermaLink="true">${escapeXml(item.url)}</guid>${pubDateTag}${descTag}${creatorsXml}${enclosureTag}
45
+ </item>`
46
+ })
47
+ .join('\n')
48
+
49
+ return `<?xml version="1.0" encoding="UTF-8"?>
50
+ <rss version="2.0"
51
+ xmlns:atom="http://www.w3.org/2005/Atom"
52
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
53
+ xmlns:content="http://purl.org/rss/1.0/modules/content/">
54
+ <channel>
55
+ <title>${escapeXml(organization.name)}</title>
56
+ <description>${escapeXml(feedDescription)}</description>
57
+ <link>${escapeXml(site)}</link>
58
+ <language>${escapeXml(language)}</language>
59
+ <atom:link href="${escapeXml(`${site}/feed.xml`)}" rel="self" type="application/rss+xml"/>
60
+ <lastBuildDate>${lastBuildDate}</lastBuildDate>
61
+ ${itemsXml}
62
+ </channel>
63
+ </rss>`
64
+ }
@@ -0,0 +1,81 @@
1
+ import type { SeoOptionsWithResolvedSite } from '../options.js'
2
+ import type { ContentItem, JsonLdObject } from '../types.js'
3
+ import {
4
+ generateArticleJsonLd,
5
+ generateAudioJsonLd,
6
+ generateOrganizationJsonLd,
7
+ generateProductJsonLd,
8
+ generateVideoJsonLd,
9
+ generateWebSiteJsonLd,
10
+ } from './json-ld/index.js'
11
+ import type { OgVariant } from './meta.js'
12
+
13
+ interface BuildSeoHeadTitleDescriptionOptions {
14
+ item?: ContentItem
15
+ options: SeoOptionsWithResolvedSite
16
+ title?: string
17
+ description?: string
18
+ }
19
+
20
+ interface BuildSeoHeadJsonLdOptions {
21
+ item?: ContentItem
22
+ options: SeoOptionsWithResolvedSite
23
+ variant: OgVariant
24
+ emitJsonLd?: boolean
25
+ }
26
+
27
+ export function buildSeoHeadTitleDescription({
28
+ item,
29
+ options,
30
+ title,
31
+ description,
32
+ }: BuildSeoHeadTitleDescriptionOptions): { title: string; description?: string } {
33
+ const baseTitle = title ?? item?.title ?? options.organization.name
34
+ const suffix = options.defaults.titleSuffix
35
+ const resolvedTitle = suffix && !baseTitle.endsWith(suffix) ? `${baseTitle}${suffix}` : baseTitle
36
+ const resolvedDescription = description ?? item?.description
37
+
38
+ return {
39
+ title: resolvedTitle,
40
+ description: resolvedDescription,
41
+ }
42
+ }
43
+
44
+ export function buildSeoHeadJsonLd({
45
+ item,
46
+ options,
47
+ variant,
48
+ emitJsonLd = true,
49
+ }: BuildSeoHeadJsonLdOptions): JsonLdObject[] {
50
+ if (!emitJsonLd) return []
51
+
52
+ if (!item) {
53
+ return [generateWebSiteJsonLd(options), generateOrganizationJsonLd(options)]
54
+ }
55
+
56
+ const jsonLd: JsonLdObject[] = []
57
+
58
+ if (variant === 'article') {
59
+ jsonLd.push(generateArticleJsonLd(item, options))
60
+ } else if (variant === 'product' && item.product) {
61
+ jsonLd.push(generateProductJsonLd(item.product, item.url, options))
62
+ } else if (variant === 'website') {
63
+ jsonLd.push(generateWebSiteJsonLd(options))
64
+ }
65
+
66
+ if (item.video) {
67
+ jsonLd.push(generateVideoJsonLd(item))
68
+ }
69
+
70
+ if (item.audio) {
71
+ jsonLd.push(
72
+ generateAudioJsonLd(item.audio, {
73
+ title: item.title,
74
+ datePublished: item.datePublished,
75
+ articleUrl: item.url,
76
+ }),
77
+ )
78
+ }
79
+
80
+ return jsonLd
81
+ }
@@ -0,0 +1,80 @@
1
+ import type { ContentItem } from '../types.js'
2
+ import { forMarkdownSitemap } from './content-filter.js'
3
+
4
+ function escapeXml(str: string): string {
5
+ return str
6
+ .replace(/&/g, '&amp;')
7
+ .replace(/</g, '&lt;')
8
+ .replace(/>/g, '&gt;')
9
+ .replace(/"/g, '&quot;')
10
+ .replace(/'/g, '&apos;')
11
+ }
12
+
13
+ /**
14
+ * Map an article URL to its primary markdown twin URL. Default appends `.md` to
15
+ * the URL, collapsing any trailing slash.
16
+ *
17
+ * Consumers can override via `aeoTwins.twinUrl` in config; when overridden, the
18
+ * same function drives both twin emission and sitemap discovery URL.
19
+ */
20
+ function defaultTwinUrl(articleUrl: string): string {
21
+ const trimmed = articleUrl.replace(/\/+$/, '')
22
+ return `${trimmed}.md`
23
+ }
24
+
25
+ export interface MarkdownSitemapEntry {
26
+ primaryTwinUrl: string
27
+ lastmod?: string
28
+ // Optional: freshLayer-sourced override (for when R2 has a later lastModified
29
+ // than the item's dateModified at build time).
30
+ freshLayerLastModified?: string
31
+ }
32
+
33
+ export interface GenerateMarkdownSitemapOptions {
34
+ items: ContentItem[]
35
+ twinUrl?: (articleUrl: string) => string
36
+ // Optional: per-slug R2 lastmod overrides (keyed by the article URL).
37
+ // When present, lastmod reflects max(item.dateModified, freshLayer.lastModified).
38
+ freshLayerLastmod?: Map<string, string>
39
+ }
40
+
41
+ /**
42
+ * Generate `/sitemap-markdown.xml` — the AEO twin URL sitemap.
43
+ *
44
+ * Google discovers URLs via crawlable links and sitemaps, not MIME-probing. This
45
+ * sitemap lists every emitted `.md` twin URL so the markdown corpus is discoverable
46
+ * the same way HTML pages are.
47
+ *
48
+ * Filtering rules (centralized in content-filter.forMarkdownSitemap):
49
+ * - Members items excluded unconditionally (no .md twin exists for them).
50
+ * - Public items with `includeInSitemap: false` excluded.
51
+ */
52
+ export function generateMarkdownSitemap({
53
+ items,
54
+ twinUrl = defaultTwinUrl,
55
+ freshLayerLastmod,
56
+ }: GenerateMarkdownSitemapOptions): string {
57
+ const filtered = forMarkdownSitemap(items)
58
+ const entries = filtered
59
+ .map((item) => {
60
+ const url = twinUrl(item.url)
61
+ const freshMod = freshLayerLastmod?.get(item.url)
62
+ const buildMod = item.dateModified ?? item.datePublished ?? ''
63
+ // Pick the most recent of build-time and fresh-layer lastmod.
64
+ const lastmod = pickLatest(freshMod, buildMod)
65
+ const lastmodTag = lastmod ? `\n <lastmod>${escapeXml(lastmod)}</lastmod>` : ''
66
+ return ` <url>\n <loc>${escapeXml(url)}</loc>${lastmodTag}\n </url>`
67
+ })
68
+ .join('\n')
69
+
70
+ return `<?xml version="1.0" encoding="UTF-8"?>
71
+ <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
72
+ ${entries}
73
+ </urlset>`
74
+ }
75
+
76
+ function pickLatest(a: string | undefined, b: string | undefined): string | undefined {
77
+ if (!a) return b
78
+ if (!b) return a
79
+ return new Date(a).getTime() >= new Date(b).getTime() ? a : b
80
+ }
@@ -0,0 +1,169 @@
1
+ import type { ResolvedSeoOptions } from '../options.js'
2
+ import type { ContentItem } from '../types.js'
3
+
4
+ export const SITEMAP_INDEX_PATH = '/sitemap-index.xml'
5
+
6
+ export interface SitemapEntry {
7
+ loc: string
8
+ lastmod?: string
9
+ }
10
+
11
+ function escapeXml(str: string): string {
12
+ return str
13
+ .replace(/&/g, '&amp;')
14
+ .replace(/</g, '&lt;')
15
+ .replace(/>/g, '&gt;')
16
+ .replace(/"/g, '&quot;')
17
+ .replace(/'/g, '&apos;')
18
+ }
19
+
20
+ export function generateSitemapIndex(sitemaps: SitemapEntry[]): string {
21
+ const entries = sitemaps
22
+ .map((s) => {
23
+ const lastmod = s.lastmod ? `\n <lastmod>${escapeXml(s.lastmod)}</lastmod>` : ''
24
+ return ` <sitemap>\n <loc>${escapeXml(s.loc)}</loc>${lastmod}\n </sitemap>`
25
+ })
26
+ .join('\n')
27
+
28
+ return `<?xml version="1.0" encoding="UTF-8"?>
29
+ <sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
30
+ ${entries}
31
+ </sitemapindex>`
32
+ }
33
+
34
+ function isWithin48Hours(dateStr: string): boolean {
35
+ const published = new Date(dateStr)
36
+ const now = new Date()
37
+ const diff = now.getTime() - published.getTime()
38
+ return diff <= 48 * 60 * 60 * 1000
39
+ }
40
+
41
+ export function generateArticleSitemap(
42
+ articles: ContentItem[],
43
+ options: ResolvedSeoOptions,
44
+ ): string {
45
+ const { googleNews, organization } = options
46
+
47
+ const entries = articles
48
+ .map((item) => {
49
+ const lastmod = item.dateModified ?? item.datePublished ?? ''
50
+ const lastmodTag = lastmod ? `\n <lastmod>${escapeXml(lastmod)}</lastmod>` : ''
51
+
52
+ // image:image
53
+ const imageTag = item.image
54
+ ? `\n <image:image>\n <image:loc>${escapeXml(item.image)}</image:loc>\n </image:image>`
55
+ : ''
56
+
57
+ // news:news (only for recent articles when googleNews enabled)
58
+ let newsTag = ''
59
+ if (googleNews && item.datePublished && isWithin48Hours(item.datePublished)) {
60
+ newsTag = `\n <news:news>
61
+ <news:publication>
62
+ <news:name>${escapeXml(organization.name)}</news:name>
63
+ <news:language>${escapeXml('en')}</news:language>
64
+ </news:publication>
65
+ <news:publication_date>${escapeXml(item.datePublished)}</news:publication_date>
66
+ <news:title>${escapeXml(item.title)}</news:title>
67
+ </news:news>`
68
+ }
69
+
70
+ // xhtml:link hreflang
71
+ const hreflangTags = (item.alternateLocales ?? [])
72
+ .map(
73
+ (alt) =>
74
+ `\n <xhtml:link rel="alternate" hreflang="${escapeXml(alt.lang)}" href="${escapeXml(alt.url)}"/>`,
75
+ )
76
+ .join('')
77
+
78
+ return ` <url>\n <loc>${escapeXml(item.url)}</loc>${lastmodTag}${imageTag}${newsTag}${hreflangTags}\n </url>`
79
+ })
80
+ .join('\n')
81
+
82
+ return `<?xml version="1.0" encoding="UTF-8"?>
83
+ <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
84
+ xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"
85
+ xmlns:news="http://www.google.com/schemas/sitemap-news/0.9"
86
+ xmlns:xhtml="http://www.w3.org/1999/xhtml">
87
+ ${entries}
88
+ </urlset>`
89
+ }
90
+
91
+ export function generatePagesSitemap(pages: ContentItem[]): string {
92
+ const entries = pages
93
+ .map((item) => {
94
+ const lastmod = item.dateModified ?? item.datePublished ?? ''
95
+ const lastmodTag = lastmod ? `\n <lastmod>${escapeXml(lastmod)}</lastmod>` : ''
96
+ return ` <url>\n <loc>${escapeXml(item.url)}</loc>${lastmodTag}\n </url>`
97
+ })
98
+ .join('\n')
99
+
100
+ return `<?xml version="1.0" encoding="UTF-8"?>
101
+ <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
102
+ ${entries}
103
+ </urlset>`
104
+ }
105
+
106
+ export function generateVideoSitemap(items: ContentItem[]): string {
107
+ const entries = items
108
+ .filter((item) => item.video)
109
+ .map((item) => {
110
+ const v = item.video!
111
+ const contentUrlTag = v.contentUrl
112
+ ? `\n <video:content_loc>${escapeXml(v.contentUrl)}</video:content_loc>`
113
+ : ''
114
+ const embedUrlTag = v.embedUrl
115
+ ? `\n <video:player_loc>${escapeXml(v.embedUrl)}</video:player_loc>`
116
+ : ''
117
+ const descriptionTag = item.description
118
+ ? `\n <video:description>${escapeXml(item.description)}</video:description>`
119
+ : ''
120
+ const durationTag = v.duration
121
+ ? `\n <video:duration>${escapeXml(v.duration)}</video:duration>`
122
+ : ''
123
+ const pubDateTag = item.datePublished
124
+ ? `\n <video:publication_date>${escapeXml(item.datePublished)}</video:publication_date>`
125
+ : ''
126
+
127
+ return ` <url>
128
+ <loc>${escapeXml(item.url)}</loc>
129
+ <video:video>
130
+ <video:thumbnail_loc>${escapeXml(v.thumbnailUrl)}</video:thumbnail_loc>
131
+ <video:title>${escapeXml(item.title)}</video:title>${descriptionTag}${contentUrlTag}${embedUrlTag}${durationTag}${pubDateTag}
132
+ </video:video>
133
+ </url>`
134
+ })
135
+ .join('\n')
136
+
137
+ return `<?xml version="1.0" encoding="UTF-8"?>
138
+ <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
139
+ xmlns:video="http://www.google.com/schemas/sitemap-video/1.1">
140
+ ${entries}
141
+ </urlset>`
142
+ }
143
+
144
+ export function generateProductSitemap(items: ContentItem[]): string {
145
+ const entries = items
146
+ .filter((item) => item.product)
147
+ .map((item) => {
148
+ const p = item.product!
149
+ const images = p.images.length > 0 ? p.images : item.image ? [item.image] : []
150
+ const imageTags = images
151
+ .map(
152
+ (img) =>
153
+ `\n <image:image>\n <image:loc>${escapeXml(img)}</image:loc>\n </image:image>`,
154
+ )
155
+ .join('')
156
+
157
+ const lastmod = item.dateModified ?? item.datePublished ?? ''
158
+ const lastmodTag = lastmod ? `\n <lastmod>${escapeXml(lastmod)}</lastmod>` : ''
159
+
160
+ return ` <url>\n <loc>${escapeXml(item.url)}</loc>${lastmodTag}${imageTags}\n </url>`
161
+ })
162
+ .join('\n')
163
+
164
+ return `<?xml version="1.0" encoding="UTF-8"?>
165
+ <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
166
+ xmlns:image="http://www.google.com/schemas/sitemap-image/1.1">
167
+ ${entries}
168
+ </urlset>`
169
+ }
@@ -0,0 +1,61 @@
1
+ import type { ContentItem } from '../types.js'
2
+
3
+ /**
4
+ * Compute a SHA-256 content hash for staleness validation. Uses `crypto.subtle`
5
+ * so the same implementation runs in Node (build-time) and Workers (runtime).
6
+ * Never node:crypto.
7
+ *
8
+ * The hash input is deterministic: item title + description + rendered body.
9
+ * Order matters: changes to any field produce a new hash. Whitespace-insensitive
10
+ * if the consumer normalizes before calling.
11
+ */
12
+ export async function computeContentHash(
13
+ item: Pick<ContentItem, 'title' | 'description'>,
14
+ renderedBody: string,
15
+ ): Promise<string> {
16
+ const input = [item.title, item.description ?? '', renderedBody].join('\u0001')
17
+ const bytes = new TextEncoder().encode(input)
18
+ const digest = await crypto.subtle.digest('SHA-256', bytes)
19
+ return toHex(new Uint8Array(digest))
20
+ }
21
+
22
+ function toHex(bytes: Uint8Array): string {
23
+ let out = ''
24
+ for (const b of bytes) {
25
+ out += b.toString(16).padStart(2, '0')
26
+ }
27
+ return out
28
+ }
29
+
30
+ export interface StalenessDriftRecord {
31
+ url: string
32
+ expectedHash: string
33
+ actualHash: string
34
+ }
35
+
36
+ /**
37
+ * Compare expected-vs-actual content hashes across a batch of items. Used by the
38
+ * build-time validator to detect drift between two `contentProvider` calls made
39
+ * during the same build (rare but possible when provider pulls from a mutating
40
+ * source).
41
+ *
42
+ * Returns the list of items whose hash differs. Callers log these as warnings;
43
+ * the build does NOT fail on drift (spec 1403: "does not fail the build").
44
+ */
45
+ export async function checkStaleness(
46
+ items: ContentItem[],
47
+ expectedHashes: Map<string, string>,
48
+ renderBody: (item: ContentItem) => string | Promise<string>,
49
+ ): Promise<StalenessDriftRecord[]> {
50
+ const drift: StalenessDriftRecord[] = []
51
+ for (const item of items) {
52
+ const expected = expectedHashes.get(item.url)
53
+ if (!expected) continue
54
+ const body = await renderBody(item)
55
+ const actual = await computeContentHash(item, body)
56
+ if (actual !== expected) {
57
+ drift.push({ url: item.url, expectedHash: expected, actualHash: actual })
58
+ }
59
+ }
60
+ return drift
61
+ }