@growth-labs/seo 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/utils/validation.d.ts.map +1 -1
- package/dist/utils/validation.js +22 -0
- package/dist/utils/validation.js.map +1 -1
- package/package.json +9 -5
- package/src/_internal/state.ts +26 -0
- package/src/bindings.ts +146 -0
- package/src/cron/prune-aeo-r2.ts +140 -0
- package/src/durable-objects/aeo-revalidation-coord.ts +246 -0
- package/src/index.ts +380 -0
- package/src/middleware/seo.ts +350 -0
- package/src/options.ts +456 -0
- package/src/routes/aeo-twin.ts +130 -0
- package/src/routes/apple-news.ts +36 -0
- package/src/routes/llms-full.ts +36 -0
- package/src/routes/llms.ts +15 -0
- package/src/routes/podcast-narration.ts +45 -0
- package/src/routes/podcast.ts +27 -0
- package/src/routes/revalidate.ts +298 -0
- package/src/routes/robots.ts +21 -0
- package/src/routes/rss.ts +29 -0
- package/src/routes/sitemap-articles.ts +25 -0
- package/src/routes/sitemap-index.ts +89 -0
- package/src/routes/sitemap-markdown.ts +39 -0
- package/src/routes/sitemap-pages.ts +24 -0
- package/src/routes/sitemap-products.ts +24 -0
- package/src/routes/sitemap-videos.ts +24 -0
- package/src/runtime.ts +17 -0
- package/src/site-url-core.ts +71 -0
- package/src/site-url.ts +21 -0
- package/src/types.ts +166 -0
- package/src/utils/aeo-summary.ts +176 -0
- package/src/utils/aeo-twin-emitter.ts +173 -0
- package/src/utils/aeo.ts +223 -0
- package/src/utils/apple-news-anf.ts +163 -0
- package/src/utils/apple-news-rss.ts +136 -0
- package/src/utils/content-filter.ts +87 -0
- package/src/utils/crawler-class.ts +155 -0
- package/src/utils/define-content-provider.ts +65 -0
- package/src/utils/effective-auth.ts +44 -0
- package/src/utils/fcrdns.ts +269 -0
- package/src/utils/fresh-layer.ts +175 -0
- package/src/utils/hreflang.ts +26 -0
- package/src/utils/index.ts +91 -0
- package/src/utils/json-ld/article.ts +120 -0
- package/src/utils/json-ld/audio.ts +32 -0
- package/src/utils/json-ld/breadcrumb.ts +28 -0
- package/src/utils/json-ld/faq.ts +18 -0
- package/src/utils/json-ld/howto.ts +23 -0
- package/src/utils/json-ld/index.ts +12 -0
- package/src/utils/json-ld/item-list.ts +26 -0
- package/src/utils/json-ld/organization.ts +42 -0
- package/src/utils/json-ld/person.ts +25 -0
- package/src/utils/json-ld/product.ts +155 -0
- package/src/utils/json-ld/video.ts +20 -0
- package/src/utils/json-ld/website.ts +27 -0
- package/src/utils/llms-full.ts +90 -0
- package/src/utils/llms.ts +45 -0
- package/src/utils/meta.ts +184 -0
- package/src/utils/podcast.ts +112 -0
- package/src/utils/robots.ts +47 -0
- package/src/utils/rss.ts +64 -0
- package/src/utils/seo-head.ts +81 -0
- package/src/utils/sitemap-markdown.ts +80 -0
- package/src/utils/sitemap.ts +169 -0
- package/src/utils/staleness.ts +61 -0
- package/src/utils/validation.ts +308 -0
- package/src/virtual.d.ts +8 -0
- package/src/vite-plugin.ts +66 -0
package/src/utils/rss.ts
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import type { SeoOptionsWithResolvedSite } from '../options.js'
|
|
2
|
+
import type { ContentItem } from '../types.js'
|
|
3
|
+
|
|
4
|
+
function escapeXml(str: string): string {
|
|
5
|
+
return str
|
|
6
|
+
.replace(/&/g, '&')
|
|
7
|
+
.replace(/</g, '<')
|
|
8
|
+
.replace(/>/g, '>')
|
|
9
|
+
.replace(/"/g, '"')
|
|
10
|
+
.replace(/'/g, ''')
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
function toRfc822(dateStr: string): string {
|
|
14
|
+
return new Date(dateStr).toUTCString()
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function generateRssFeed(
|
|
18
|
+
items: ContentItem[],
|
|
19
|
+
options: SeoOptionsWithResolvedSite,
|
|
20
|
+
description?: string,
|
|
21
|
+
): string {
|
|
22
|
+
const { site, organization, defaults } = options
|
|
23
|
+
const feedDescription = description ?? organization.name
|
|
24
|
+
const language = defaults.locale.replace('_', '-').toLowerCase()
|
|
25
|
+
const lastBuildDate = new Date().toUTCString()
|
|
26
|
+
|
|
27
|
+
const itemsXml = items
|
|
28
|
+
.map((item) => {
|
|
29
|
+
const pubDate = item.datePublished ? toRfc822(item.datePublished) : ''
|
|
30
|
+
const pubDateTag = pubDate ? `\n <pubDate>${escapeXml(pubDate)}</pubDate>` : ''
|
|
31
|
+
const descTag = item.description
|
|
32
|
+
? `\n <description>${escapeXml(item.description)}</description>`
|
|
33
|
+
: ''
|
|
34
|
+
const creatorsXml = (item.authors ?? [])
|
|
35
|
+
.map((a) => `\n <dc:creator>${escapeXml(a.name)}</dc:creator>`)
|
|
36
|
+
.join('')
|
|
37
|
+
const enclosureTag = item.audio
|
|
38
|
+
? `\n <enclosure url="${escapeXml(item.audio.url)}" type="audio/mpeg" length="0"/>`
|
|
39
|
+
: ''
|
|
40
|
+
|
|
41
|
+
return ` <item>
|
|
42
|
+
<title>${escapeXml(item.title)}</title>
|
|
43
|
+
<link>${escapeXml(item.url)}</link>
|
|
44
|
+
<guid isPermaLink="true">${escapeXml(item.url)}</guid>${pubDateTag}${descTag}${creatorsXml}${enclosureTag}
|
|
45
|
+
</item>`
|
|
46
|
+
})
|
|
47
|
+
.join('\n')
|
|
48
|
+
|
|
49
|
+
return `<?xml version="1.0" encoding="UTF-8"?>
|
|
50
|
+
<rss version="2.0"
|
|
51
|
+
xmlns:atom="http://www.w3.org/2005/Atom"
|
|
52
|
+
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
|
53
|
+
xmlns:content="http://purl.org/rss/1.0/modules/content/">
|
|
54
|
+
<channel>
|
|
55
|
+
<title>${escapeXml(organization.name)}</title>
|
|
56
|
+
<description>${escapeXml(feedDescription)}</description>
|
|
57
|
+
<link>${escapeXml(site)}</link>
|
|
58
|
+
<language>${escapeXml(language)}</language>
|
|
59
|
+
<atom:link href="${escapeXml(`${site}/feed.xml`)}" rel="self" type="application/rss+xml"/>
|
|
60
|
+
<lastBuildDate>${lastBuildDate}</lastBuildDate>
|
|
61
|
+
${itemsXml}
|
|
62
|
+
</channel>
|
|
63
|
+
</rss>`
|
|
64
|
+
}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import type { SeoOptionsWithResolvedSite } from '../options.js'
|
|
2
|
+
import type { ContentItem, JsonLdObject } from '../types.js'
|
|
3
|
+
import {
|
|
4
|
+
generateArticleJsonLd,
|
|
5
|
+
generateAudioJsonLd,
|
|
6
|
+
generateOrganizationJsonLd,
|
|
7
|
+
generateProductJsonLd,
|
|
8
|
+
generateVideoJsonLd,
|
|
9
|
+
generateWebSiteJsonLd,
|
|
10
|
+
} from './json-ld/index.js'
|
|
11
|
+
import type { OgVariant } from './meta.js'
|
|
12
|
+
|
|
13
|
+
interface BuildSeoHeadTitleDescriptionOptions {
|
|
14
|
+
item?: ContentItem
|
|
15
|
+
options: SeoOptionsWithResolvedSite
|
|
16
|
+
title?: string
|
|
17
|
+
description?: string
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
interface BuildSeoHeadJsonLdOptions {
|
|
21
|
+
item?: ContentItem
|
|
22
|
+
options: SeoOptionsWithResolvedSite
|
|
23
|
+
variant: OgVariant
|
|
24
|
+
emitJsonLd?: boolean
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function buildSeoHeadTitleDescription({
|
|
28
|
+
item,
|
|
29
|
+
options,
|
|
30
|
+
title,
|
|
31
|
+
description,
|
|
32
|
+
}: BuildSeoHeadTitleDescriptionOptions): { title: string; description?: string } {
|
|
33
|
+
const baseTitle = title ?? item?.title ?? options.organization.name
|
|
34
|
+
const suffix = options.defaults.titleSuffix
|
|
35
|
+
const resolvedTitle = suffix && !baseTitle.endsWith(suffix) ? `${baseTitle}${suffix}` : baseTitle
|
|
36
|
+
const resolvedDescription = description ?? item?.description
|
|
37
|
+
|
|
38
|
+
return {
|
|
39
|
+
title: resolvedTitle,
|
|
40
|
+
description: resolvedDescription,
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export function buildSeoHeadJsonLd({
|
|
45
|
+
item,
|
|
46
|
+
options,
|
|
47
|
+
variant,
|
|
48
|
+
emitJsonLd = true,
|
|
49
|
+
}: BuildSeoHeadJsonLdOptions): JsonLdObject[] {
|
|
50
|
+
if (!emitJsonLd) return []
|
|
51
|
+
|
|
52
|
+
if (!item) {
|
|
53
|
+
return [generateWebSiteJsonLd(options), generateOrganizationJsonLd(options)]
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const jsonLd: JsonLdObject[] = []
|
|
57
|
+
|
|
58
|
+
if (variant === 'article') {
|
|
59
|
+
jsonLd.push(generateArticleJsonLd(item, options))
|
|
60
|
+
} else if (variant === 'product' && item.product) {
|
|
61
|
+
jsonLd.push(generateProductJsonLd(item.product, item.url, options))
|
|
62
|
+
} else if (variant === 'website') {
|
|
63
|
+
jsonLd.push(generateWebSiteJsonLd(options))
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if (item.video) {
|
|
67
|
+
jsonLd.push(generateVideoJsonLd(item))
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (item.audio) {
|
|
71
|
+
jsonLd.push(
|
|
72
|
+
generateAudioJsonLd(item.audio, {
|
|
73
|
+
title: item.title,
|
|
74
|
+
datePublished: item.datePublished,
|
|
75
|
+
articleUrl: item.url,
|
|
76
|
+
}),
|
|
77
|
+
)
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return jsonLd
|
|
81
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import type { ContentItem } from '../types.js'
|
|
2
|
+
import { forMarkdownSitemap } from './content-filter.js'
|
|
3
|
+
|
|
4
|
+
function escapeXml(str: string): string {
|
|
5
|
+
return str
|
|
6
|
+
.replace(/&/g, '&')
|
|
7
|
+
.replace(/</g, '<')
|
|
8
|
+
.replace(/>/g, '>')
|
|
9
|
+
.replace(/"/g, '"')
|
|
10
|
+
.replace(/'/g, ''')
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Map an article URL to its primary markdown twin URL. Default appends `.md` to
|
|
15
|
+
* the URL, collapsing any trailing slash.
|
|
16
|
+
*
|
|
17
|
+
* Consumers can override via `aeoTwins.twinUrl` in config; when overridden, the
|
|
18
|
+
* same function drives both twin emission and sitemap discovery URL.
|
|
19
|
+
*/
|
|
20
|
+
function defaultTwinUrl(articleUrl: string): string {
|
|
21
|
+
const trimmed = articleUrl.replace(/\/+$/, '')
|
|
22
|
+
return `${trimmed}.md`
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export interface MarkdownSitemapEntry {
|
|
26
|
+
primaryTwinUrl: string
|
|
27
|
+
lastmod?: string
|
|
28
|
+
// Optional: freshLayer-sourced override (for when R2 has a later lastModified
|
|
29
|
+
// than the item's dateModified at build time).
|
|
30
|
+
freshLayerLastModified?: string
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export interface GenerateMarkdownSitemapOptions {
|
|
34
|
+
items: ContentItem[]
|
|
35
|
+
twinUrl?: (articleUrl: string) => string
|
|
36
|
+
// Optional: per-slug R2 lastmod overrides (keyed by the article URL).
|
|
37
|
+
// When present, lastmod reflects max(item.dateModified, freshLayer.lastModified).
|
|
38
|
+
freshLayerLastmod?: Map<string, string>
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Generate `/sitemap-markdown.xml` — the AEO twin URL sitemap.
|
|
43
|
+
*
|
|
44
|
+
* Google discovers URLs via crawlable links and sitemaps, not MIME-probing. This
|
|
45
|
+
* sitemap lists every emitted `.md` twin URL so the markdown corpus is discoverable
|
|
46
|
+
* the same way HTML pages are.
|
|
47
|
+
*
|
|
48
|
+
* Filtering rules (centralized in content-filter.forMarkdownSitemap):
|
|
49
|
+
* - Members items excluded unconditionally (no .md twin exists for them).
|
|
50
|
+
* - Public items with `includeInSitemap: false` excluded.
|
|
51
|
+
*/
|
|
52
|
+
export function generateMarkdownSitemap({
|
|
53
|
+
items,
|
|
54
|
+
twinUrl = defaultTwinUrl,
|
|
55
|
+
freshLayerLastmod,
|
|
56
|
+
}: GenerateMarkdownSitemapOptions): string {
|
|
57
|
+
const filtered = forMarkdownSitemap(items)
|
|
58
|
+
const entries = filtered
|
|
59
|
+
.map((item) => {
|
|
60
|
+
const url = twinUrl(item.url)
|
|
61
|
+
const freshMod = freshLayerLastmod?.get(item.url)
|
|
62
|
+
const buildMod = item.dateModified ?? item.datePublished ?? ''
|
|
63
|
+
// Pick the most recent of build-time and fresh-layer lastmod.
|
|
64
|
+
const lastmod = pickLatest(freshMod, buildMod)
|
|
65
|
+
const lastmodTag = lastmod ? `\n <lastmod>${escapeXml(lastmod)}</lastmod>` : ''
|
|
66
|
+
return ` <url>\n <loc>${escapeXml(url)}</loc>${lastmodTag}\n </url>`
|
|
67
|
+
})
|
|
68
|
+
.join('\n')
|
|
69
|
+
|
|
70
|
+
return `<?xml version="1.0" encoding="UTF-8"?>
|
|
71
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
72
|
+
${entries}
|
|
73
|
+
</urlset>`
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function pickLatest(a: string | undefined, b: string | undefined): string | undefined {
|
|
77
|
+
if (!a) return b
|
|
78
|
+
if (!b) return a
|
|
79
|
+
return new Date(a).getTime() >= new Date(b).getTime() ? a : b
|
|
80
|
+
}
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
import type { ResolvedSeoOptions } from '../options.js'
|
|
2
|
+
import type { ContentItem } from '../types.js'
|
|
3
|
+
|
|
4
|
+
export const SITEMAP_INDEX_PATH = '/sitemap-index.xml'
|
|
5
|
+
|
|
6
|
+
export interface SitemapEntry {
|
|
7
|
+
loc: string
|
|
8
|
+
lastmod?: string
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
function escapeXml(str: string): string {
|
|
12
|
+
return str
|
|
13
|
+
.replace(/&/g, '&')
|
|
14
|
+
.replace(/</g, '<')
|
|
15
|
+
.replace(/>/g, '>')
|
|
16
|
+
.replace(/"/g, '"')
|
|
17
|
+
.replace(/'/g, ''')
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function generateSitemapIndex(sitemaps: SitemapEntry[]): string {
|
|
21
|
+
const entries = sitemaps
|
|
22
|
+
.map((s) => {
|
|
23
|
+
const lastmod = s.lastmod ? `\n <lastmod>${escapeXml(s.lastmod)}</lastmod>` : ''
|
|
24
|
+
return ` <sitemap>\n <loc>${escapeXml(s.loc)}</loc>${lastmod}\n </sitemap>`
|
|
25
|
+
})
|
|
26
|
+
.join('\n')
|
|
27
|
+
|
|
28
|
+
return `<?xml version="1.0" encoding="UTF-8"?>
|
|
29
|
+
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
30
|
+
${entries}
|
|
31
|
+
</sitemapindex>`
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function isWithin48Hours(dateStr: string): boolean {
|
|
35
|
+
const published = new Date(dateStr)
|
|
36
|
+
const now = new Date()
|
|
37
|
+
const diff = now.getTime() - published.getTime()
|
|
38
|
+
return diff <= 48 * 60 * 60 * 1000
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export function generateArticleSitemap(
|
|
42
|
+
articles: ContentItem[],
|
|
43
|
+
options: ResolvedSeoOptions,
|
|
44
|
+
): string {
|
|
45
|
+
const { googleNews, organization } = options
|
|
46
|
+
|
|
47
|
+
const entries = articles
|
|
48
|
+
.map((item) => {
|
|
49
|
+
const lastmod = item.dateModified ?? item.datePublished ?? ''
|
|
50
|
+
const lastmodTag = lastmod ? `\n <lastmod>${escapeXml(lastmod)}</lastmod>` : ''
|
|
51
|
+
|
|
52
|
+
// image:image
|
|
53
|
+
const imageTag = item.image
|
|
54
|
+
? `\n <image:image>\n <image:loc>${escapeXml(item.image)}</image:loc>\n </image:image>`
|
|
55
|
+
: ''
|
|
56
|
+
|
|
57
|
+
// news:news (only for recent articles when googleNews enabled)
|
|
58
|
+
let newsTag = ''
|
|
59
|
+
if (googleNews && item.datePublished && isWithin48Hours(item.datePublished)) {
|
|
60
|
+
newsTag = `\n <news:news>
|
|
61
|
+
<news:publication>
|
|
62
|
+
<news:name>${escapeXml(organization.name)}</news:name>
|
|
63
|
+
<news:language>${escapeXml('en')}</news:language>
|
|
64
|
+
</news:publication>
|
|
65
|
+
<news:publication_date>${escapeXml(item.datePublished)}</news:publication_date>
|
|
66
|
+
<news:title>${escapeXml(item.title)}</news:title>
|
|
67
|
+
</news:news>`
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// xhtml:link hreflang
|
|
71
|
+
const hreflangTags = (item.alternateLocales ?? [])
|
|
72
|
+
.map(
|
|
73
|
+
(alt) =>
|
|
74
|
+
`\n <xhtml:link rel="alternate" hreflang="${escapeXml(alt.lang)}" href="${escapeXml(alt.url)}"/>`,
|
|
75
|
+
)
|
|
76
|
+
.join('')
|
|
77
|
+
|
|
78
|
+
return ` <url>\n <loc>${escapeXml(item.url)}</loc>${lastmodTag}${imageTag}${newsTag}${hreflangTags}\n </url>`
|
|
79
|
+
})
|
|
80
|
+
.join('\n')
|
|
81
|
+
|
|
82
|
+
return `<?xml version="1.0" encoding="UTF-8"?>
|
|
83
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
|
84
|
+
xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"
|
|
85
|
+
xmlns:news="http://www.google.com/schemas/sitemap-news/0.9"
|
|
86
|
+
xmlns:xhtml="http://www.w3.org/1999/xhtml">
|
|
87
|
+
${entries}
|
|
88
|
+
</urlset>`
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export function generatePagesSitemap(pages: ContentItem[]): string {
|
|
92
|
+
const entries = pages
|
|
93
|
+
.map((item) => {
|
|
94
|
+
const lastmod = item.dateModified ?? item.datePublished ?? ''
|
|
95
|
+
const lastmodTag = lastmod ? `\n <lastmod>${escapeXml(lastmod)}</lastmod>` : ''
|
|
96
|
+
return ` <url>\n <loc>${escapeXml(item.url)}</loc>${lastmodTag}\n </url>`
|
|
97
|
+
})
|
|
98
|
+
.join('\n')
|
|
99
|
+
|
|
100
|
+
return `<?xml version="1.0" encoding="UTF-8"?>
|
|
101
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
102
|
+
${entries}
|
|
103
|
+
</urlset>`
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
export function generateVideoSitemap(items: ContentItem[]): string {
|
|
107
|
+
const entries = items
|
|
108
|
+
.filter((item) => item.video)
|
|
109
|
+
.map((item) => {
|
|
110
|
+
const v = item.video!
|
|
111
|
+
const contentUrlTag = v.contentUrl
|
|
112
|
+
? `\n <video:content_loc>${escapeXml(v.contentUrl)}</video:content_loc>`
|
|
113
|
+
: ''
|
|
114
|
+
const embedUrlTag = v.embedUrl
|
|
115
|
+
? `\n <video:player_loc>${escapeXml(v.embedUrl)}</video:player_loc>`
|
|
116
|
+
: ''
|
|
117
|
+
const descriptionTag = item.description
|
|
118
|
+
? `\n <video:description>${escapeXml(item.description)}</video:description>`
|
|
119
|
+
: ''
|
|
120
|
+
const durationTag = v.duration
|
|
121
|
+
? `\n <video:duration>${escapeXml(v.duration)}</video:duration>`
|
|
122
|
+
: ''
|
|
123
|
+
const pubDateTag = item.datePublished
|
|
124
|
+
? `\n <video:publication_date>${escapeXml(item.datePublished)}</video:publication_date>`
|
|
125
|
+
: ''
|
|
126
|
+
|
|
127
|
+
return ` <url>
|
|
128
|
+
<loc>${escapeXml(item.url)}</loc>
|
|
129
|
+
<video:video>
|
|
130
|
+
<video:thumbnail_loc>${escapeXml(v.thumbnailUrl)}</video:thumbnail_loc>
|
|
131
|
+
<video:title>${escapeXml(item.title)}</video:title>${descriptionTag}${contentUrlTag}${embedUrlTag}${durationTag}${pubDateTag}
|
|
132
|
+
</video:video>
|
|
133
|
+
</url>`
|
|
134
|
+
})
|
|
135
|
+
.join('\n')
|
|
136
|
+
|
|
137
|
+
return `<?xml version="1.0" encoding="UTF-8"?>
|
|
138
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
|
139
|
+
xmlns:video="http://www.google.com/schemas/sitemap-video/1.1">
|
|
140
|
+
${entries}
|
|
141
|
+
</urlset>`
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
export function generateProductSitemap(items: ContentItem[]): string {
|
|
145
|
+
const entries = items
|
|
146
|
+
.filter((item) => item.product)
|
|
147
|
+
.map((item) => {
|
|
148
|
+
const p = item.product!
|
|
149
|
+
const images = p.images.length > 0 ? p.images : item.image ? [item.image] : []
|
|
150
|
+
const imageTags = images
|
|
151
|
+
.map(
|
|
152
|
+
(img) =>
|
|
153
|
+
`\n <image:image>\n <image:loc>${escapeXml(img)}</image:loc>\n </image:image>`,
|
|
154
|
+
)
|
|
155
|
+
.join('')
|
|
156
|
+
|
|
157
|
+
const lastmod = item.dateModified ?? item.datePublished ?? ''
|
|
158
|
+
const lastmodTag = lastmod ? `\n <lastmod>${escapeXml(lastmod)}</lastmod>` : ''
|
|
159
|
+
|
|
160
|
+
return ` <url>\n <loc>${escapeXml(item.url)}</loc>${lastmodTag}${imageTags}\n </url>`
|
|
161
|
+
})
|
|
162
|
+
.join('\n')
|
|
163
|
+
|
|
164
|
+
return `<?xml version="1.0" encoding="UTF-8"?>
|
|
165
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
|
166
|
+
xmlns:image="http://www.google.com/schemas/sitemap-image/1.1">
|
|
167
|
+
${entries}
|
|
168
|
+
</urlset>`
|
|
169
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import type { ContentItem } from '../types.js'
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Compute a SHA-256 content hash for staleness validation. Uses `crypto.subtle`
|
|
5
|
+
* so the same implementation runs in Node (build-time) and Workers (runtime).
|
|
6
|
+
* Never node:crypto.
|
|
7
|
+
*
|
|
8
|
+
* The hash input is deterministic: item title + description + rendered body.
|
|
9
|
+
* Order matters: changes to any field produce a new hash. Whitespace-insensitive
|
|
10
|
+
* if the consumer normalizes before calling.
|
|
11
|
+
*/
|
|
12
|
+
export async function computeContentHash(
|
|
13
|
+
item: Pick<ContentItem, 'title' | 'description'>,
|
|
14
|
+
renderedBody: string,
|
|
15
|
+
): Promise<string> {
|
|
16
|
+
const input = [item.title, item.description ?? '', renderedBody].join('\u0001')
|
|
17
|
+
const bytes = new TextEncoder().encode(input)
|
|
18
|
+
const digest = await crypto.subtle.digest('SHA-256', bytes)
|
|
19
|
+
return toHex(new Uint8Array(digest))
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function toHex(bytes: Uint8Array): string {
|
|
23
|
+
let out = ''
|
|
24
|
+
for (const b of bytes) {
|
|
25
|
+
out += b.toString(16).padStart(2, '0')
|
|
26
|
+
}
|
|
27
|
+
return out
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface StalenessDriftRecord {
|
|
31
|
+
url: string
|
|
32
|
+
expectedHash: string
|
|
33
|
+
actualHash: string
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Compare expected-vs-actual content hashes across a batch of items. Used by the
|
|
38
|
+
* build-time validator to detect drift between two `contentProvider` calls made
|
|
39
|
+
* during the same build (rare but possible when provider pulls from a mutating
|
|
40
|
+
* source).
|
|
41
|
+
*
|
|
42
|
+
* Returns the list of items whose hash differs. Callers log these as warnings;
|
|
43
|
+
* the build does NOT fail on drift (spec 1403: "does not fail the build").
|
|
44
|
+
*/
|
|
45
|
+
export async function checkStaleness(
|
|
46
|
+
items: ContentItem[],
|
|
47
|
+
expectedHashes: Map<string, string>,
|
|
48
|
+
renderBody: (item: ContentItem) => string | Promise<string>,
|
|
49
|
+
): Promise<StalenessDriftRecord[]> {
|
|
50
|
+
const drift: StalenessDriftRecord[] = []
|
|
51
|
+
for (const item of items) {
|
|
52
|
+
const expected = expectedHashes.get(item.url)
|
|
53
|
+
if (!expected) continue
|
|
54
|
+
const body = await renderBody(item)
|
|
55
|
+
const actual = await computeContentHash(item, body)
|
|
56
|
+
if (actual !== expected) {
|
|
57
|
+
drift.push({ url: item.url, expectedHash: expected, actualHash: actual })
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
return drift
|
|
61
|
+
}
|