@growth-labs/seo 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/dist/utils/validation.d.ts.map +1 -1
  2. package/dist/utils/validation.js +22 -0
  3. package/dist/utils/validation.js.map +1 -1
  4. package/package.json +9 -5
  5. package/src/_internal/state.ts +26 -0
  6. package/src/bindings.ts +146 -0
  7. package/src/cron/prune-aeo-r2.ts +140 -0
  8. package/src/durable-objects/aeo-revalidation-coord.ts +246 -0
  9. package/src/index.ts +380 -0
  10. package/src/middleware/seo.ts +350 -0
  11. package/src/options.ts +456 -0
  12. package/src/routes/aeo-twin.ts +130 -0
  13. package/src/routes/apple-news.ts +36 -0
  14. package/src/routes/llms-full.ts +36 -0
  15. package/src/routes/llms.ts +15 -0
  16. package/src/routes/podcast-narration.ts +45 -0
  17. package/src/routes/podcast.ts +27 -0
  18. package/src/routes/revalidate.ts +298 -0
  19. package/src/routes/robots.ts +21 -0
  20. package/src/routes/rss.ts +29 -0
  21. package/src/routes/sitemap-articles.ts +25 -0
  22. package/src/routes/sitemap-index.ts +89 -0
  23. package/src/routes/sitemap-markdown.ts +39 -0
  24. package/src/routes/sitemap-pages.ts +24 -0
  25. package/src/routes/sitemap-products.ts +24 -0
  26. package/src/routes/sitemap-videos.ts +24 -0
  27. package/src/runtime.ts +17 -0
  28. package/src/site-url-core.ts +71 -0
  29. package/src/site-url.ts +21 -0
  30. package/src/types.ts +166 -0
  31. package/src/utils/aeo-summary.ts +176 -0
  32. package/src/utils/aeo-twin-emitter.ts +173 -0
  33. package/src/utils/aeo.ts +223 -0
  34. package/src/utils/apple-news-anf.ts +163 -0
  35. package/src/utils/apple-news-rss.ts +136 -0
  36. package/src/utils/content-filter.ts +87 -0
  37. package/src/utils/crawler-class.ts +155 -0
  38. package/src/utils/define-content-provider.ts +65 -0
  39. package/src/utils/effective-auth.ts +44 -0
  40. package/src/utils/fcrdns.ts +269 -0
  41. package/src/utils/fresh-layer.ts +175 -0
  42. package/src/utils/hreflang.ts +26 -0
  43. package/src/utils/index.ts +91 -0
  44. package/src/utils/json-ld/article.ts +120 -0
  45. package/src/utils/json-ld/audio.ts +32 -0
  46. package/src/utils/json-ld/breadcrumb.ts +28 -0
  47. package/src/utils/json-ld/faq.ts +18 -0
  48. package/src/utils/json-ld/howto.ts +23 -0
  49. package/src/utils/json-ld/index.ts +12 -0
  50. package/src/utils/json-ld/item-list.ts +26 -0
  51. package/src/utils/json-ld/organization.ts +42 -0
  52. package/src/utils/json-ld/person.ts +25 -0
  53. package/src/utils/json-ld/product.ts +155 -0
  54. package/src/utils/json-ld/video.ts +20 -0
  55. package/src/utils/json-ld/website.ts +27 -0
  56. package/src/utils/llms-full.ts +90 -0
  57. package/src/utils/llms.ts +45 -0
  58. package/src/utils/meta.ts +184 -0
  59. package/src/utils/podcast.ts +112 -0
  60. package/src/utils/robots.ts +47 -0
  61. package/src/utils/rss.ts +64 -0
  62. package/src/utils/seo-head.ts +81 -0
  63. package/src/utils/sitemap-markdown.ts +80 -0
  64. package/src/utils/sitemap.ts +169 -0
  65. package/src/utils/staleness.ts +61 -0
  66. package/src/utils/validation.ts +308 -0
  67. package/src/virtual.d.ts +8 -0
  68. package/src/vite-plugin.ts +66 -0
@@ -0,0 +1,223 @@
1
+ import type { ContentItem } from '../types.js'
2
+
3
+ // ─── Public API ───
4
+
5
+ export interface GenerateAeoMarkdownOptions {
6
+ /** Publisher organization name for the frontmatter `publisher` field. */
7
+ publisherName: string
8
+ /** Schema.org type (e.g. 'Article', 'NewsArticle'). */
9
+ schemaType: string
10
+ /** Rendered article body in clean markdown (no HTML chrome). */
11
+ content: string
12
+ /** If true, wraps semantic sections in `<!-- aeo:section start/end -->` comments. */
13
+ ragChunkMarkers?: boolean
14
+ /** Canonical HTML URL to reference from the twin frontmatter. Defaults to item.url. */
15
+ canonical?: string
16
+ /** The primary twin URL (what this file will be served at). Stored in frontmatter as `url`. */
17
+ twinUrl?: string
18
+ /** Optional summary-twin URL to cross-link from the full twin frontmatter. */
19
+ summaryUrl?: string
20
+ /** Pre-computed SHA-256 content hash for staleness validation. */
21
+ contentHash?: string
22
+ }
23
+
24
+ /**
25
+ * Cheap token count estimate: 1 token ≈ 4 characters. Used for the `x-markdown-tokens`
26
+ * response header and the `tokens` frontmatter field.
27
+ */
28
+ export function estimateTokenCount(text: string): number {
29
+ return Math.ceil(text.length / 4)
30
+ }
31
+
32
+ /**
33
+ * Generate an AEO markdown twin for a ContentItem.
34
+ *
35
+ * Produces: frontmatter (YAML) + body. When `ragChunkMarkers: true`, inserts
36
+ * `<!-- aeo:section start="<slug>" -->` / `<!-- aeo:section end="<slug>" -->` pairs
37
+ * around semantic sections (identified by `## ` headings) to guide RAG retrievers
38
+ * toward boundaries we control.
39
+ *
40
+ * Sanitization:
41
+ * - Slugs are forced to `[a-z0-9-]{1,64}`; empty results fall back to `section-N`.
42
+ * - Any author-supplied `<!--` / `-->` in the body is escaped (`&lt;!--` / `--&gt;`)
43
+ * before wrapping, so forged markers can't close real ones.
44
+ */
45
+ export function generateAeoMarkdown(
46
+ item: ContentItem,
47
+ options: GenerateAeoMarkdownOptions,
48
+ ): string {
49
+ const frontmatter = buildFrontmatter(item, options)
50
+ const safeBody = escapeHtmlComments(options.content)
51
+ const body = options.ragChunkMarkers ? wrapSectionMarkers(safeBody) : safeBody
52
+ return `---\n${frontmatter}\n---\n\n${body}`
53
+ }
54
+
55
+ // ─── Frontmatter ───
56
+
57
+ function buildFrontmatter(item: ContentItem, options: GenerateAeoMarkdownOptions): string {
58
+ const canonical = options.canonical ?? item.url
59
+ const twinUrl = options.twinUrl ?? item.url
60
+ const firstImage = Array.isArray(item.image) ? item.image[0] : item.image
61
+
62
+ const authorEntries: Record<string, unknown>[] = (item.authors ?? []).map((a) => {
63
+ const o: Record<string, unknown> = { name: a.name }
64
+ if (a.url) o.url = a.url
65
+ if (a.jobTitle) o.jobTitle = a.jobTitle
66
+ if (a.knowsAbout?.length) o.knowsAbout = a.knowsAbout
67
+ if (a.sameAs?.length) o.sameAs = a.sameAs
68
+ return o
69
+ })
70
+
71
+ const alternateLanguages = (item.alternateLocales ?? []).map((l) => ({
72
+ lang: l.lang,
73
+ url: l.url,
74
+ }))
75
+
76
+ const lines: string[] = []
77
+ lines.push(yamlScalar('title', item.title))
78
+ if (item.description) lines.push(yamlScalar('description', item.description))
79
+ lines.push(yamlScalar('url', twinUrl))
80
+ lines.push(yamlScalar('canonical', canonical))
81
+ if (item.datePublished) lines.push(yamlScalar('datePublished', item.datePublished))
82
+ if (item.dateModified) lines.push(yamlScalar('dateModified', item.dateModified))
83
+ if (authorEntries.length) {
84
+ lines.push('author:')
85
+ for (const a of authorEntries) {
86
+ lines.push(` - name: ${yamlValue(a.name as string)}`)
87
+ if (a.url) lines.push(` url: ${yamlValue(a.url as string)}`)
88
+ if (a.jobTitle) lines.push(` jobTitle: ${yamlValue(a.jobTitle as string)}`)
89
+ if (a.knowsAbout) {
90
+ lines.push(' knowsAbout:')
91
+ for (const k of a.knowsAbout as string[]) lines.push(` - ${yamlValue(k)}`)
92
+ }
93
+ if (a.sameAs) {
94
+ lines.push(' sameAs:')
95
+ for (const s of a.sameAs as string[]) lines.push(` - ${yamlValue(s)}`)
96
+ }
97
+ }
98
+ }
99
+ lines.push(yamlScalar('publisher', options.publisherName))
100
+ if (firstImage) lines.push(yamlScalar('image', firstImage))
101
+ lines.push(yamlScalar('type', options.schemaType))
102
+ if (item.locale) lines.push(yamlScalar('language', item.locale))
103
+ if (item.audio) lines.push(yamlScalar('audio', item.audio.url))
104
+ if (alternateLanguages.length) {
105
+ lines.push('alternateLanguages:')
106
+ for (const l of alternateLanguages) {
107
+ lines.push(` - lang: ${yamlValue(l.lang)}`)
108
+ lines.push(` url: ${yamlValue(l.url)}`)
109
+ }
110
+ }
111
+ if (options.contentHash) lines.push(yamlScalar('contentHash', options.contentHash))
112
+ const tokens = estimateTokenCount(options.content)
113
+ lines.push(`tokens: ${tokens}`)
114
+ if (options.summaryUrl) lines.push(yamlScalar('summaryUrl', options.summaryUrl))
115
+ return lines.join('\n')
116
+ }
117
+
118
+ function yamlScalar(key: string, value: string): string {
119
+ return `${key}: ${yamlValue(value)}`
120
+ }
121
+
122
+ /**
123
+ * Emit a value safely for YAML. Unquoted when possible; quoted (with escapes) when
124
+ * the value would otherwise parse ambiguously.
125
+ */
126
+ function yamlValue(v: string): string {
127
+ // Whitelisted forms that can be emitted unquoted even though they contain `:`.
128
+ const isUrl = /^https?:\/\//.test(v)
129
+ const isDate = /^\d{4}-\d{2}-\d{2}/.test(v)
130
+ // Always quote if contains chars with YAML special meaning (excluding `:` which
131
+ // is handled separately below), or if starts with a character that could be misread.
132
+ const needsQuotes =
133
+ /^[\s-]/.test(v) || /[#{}[\],&*?|>!%@`'"\n\r]/.test(v) || (v.includes(':') && !isUrl && !isDate)
134
+ if (!needsQuotes) return v
135
+ return `"${v.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n')}"`
136
+ }
137
+
138
+ // ─── RAG chunk markers ───
139
+
140
+ /**
141
+ * Wrap each `##` section in `<!-- aeo:section start="..." -->` / `<!-- end -->`.
142
+ * Content before the first `##` is wrapped as `lede`; content after the last `##`
143
+ * terminates with an end marker for that section.
144
+ */
145
+ function wrapSectionMarkers(body: string): string {
146
+ const lines = body.split('\n')
147
+ const out: string[] = []
148
+ let currentSlug: string | null = null
149
+ const usedSlugs = new Set<string>()
150
+ let ordinal = 0
151
+ let sawContent = false
152
+
153
+ // Open with `lede` if we hit body content before any heading.
154
+ const openSection = (slug: string) => {
155
+ out.push(`<!-- aeo:section start="${slug}" -->`)
156
+ currentSlug = slug
157
+ }
158
+ const closeSection = () => {
159
+ if (currentSlug) {
160
+ out.push(`<!-- aeo:section end="${currentSlug}" -->`)
161
+ currentSlug = null
162
+ }
163
+ }
164
+
165
+ for (const line of lines) {
166
+ const headingMatch = /^##\s+(.+)$/.exec(line)
167
+ if (headingMatch) {
168
+ closeSection()
169
+ ordinal++
170
+ const slug = sanitizeSlug(headingMatch[1]!) || `section-${ordinal}`
171
+ const unique = uniqueSlug(slug, usedSlugs)
172
+ usedSlugs.add(unique)
173
+ openSection(unique)
174
+ out.push(line)
175
+ sawContent = true
176
+ continue
177
+ }
178
+ if (!currentSlug && line.trim() !== '' && !sawContent) {
179
+ openSection('lede')
180
+ usedSlugs.add('lede')
181
+ sawContent = true
182
+ }
183
+ out.push(line)
184
+ }
185
+ closeSection()
186
+ return out.join('\n')
187
+ }
188
+
189
+ /** Strict slug sanitization — [a-z0-9-]{1,64}, no leading/trailing/repeated dashes. */
190
+ function sanitizeSlug(text: string): string {
191
+ const lowered = text.toLowerCase()
192
+ const ascii = lowered.normalize('NFKD').replace(/[\u0300-\u036f]/g, '')
193
+ const cleaned = ascii
194
+ .replace(/[^a-z0-9]+/g, '-')
195
+ .replace(/^-+|-+$/g, '')
196
+ .slice(0, 64)
197
+ return cleaned
198
+ }
199
+
200
+ function uniqueSlug(base: string, used: Set<string>): string {
201
+ if (!used.has(base)) return base
202
+ let n = 2
203
+ while (used.has(`${base}-${n}`)) n++
204
+ return `${base}-${n}`
205
+ }
206
+
207
+ /**
208
+ * Escape any author-supplied `<!--` / `-->` in the body so forged markers can't
209
+ * close real ones. Applied BEFORE wrapping (spec "Marker sanitization").
210
+ */
211
+ function escapeHtmlComments(body: string): string {
212
+ return body.replace(/<!--/g, '&lt;!--').replace(/-->/g, '--&gt;')
213
+ }
214
+
215
+ // ─── Exports for testing ───
216
+
217
+ export const _internals = {
218
+ sanitizeSlug,
219
+ uniqueSlug,
220
+ escapeHtmlComments,
221
+ wrapSectionMarkers,
222
+ yamlValue,
223
+ }
@@ -0,0 +1,163 @@
1
+ import type { ContentItem } from '../types.js'
2
+
3
+ // Apple News Format (ANF) JSON document generator.
4
+ //
5
+ // EXPERIMENTAL in 0.2.0. Ships a conservative minimal-viable document that
6
+ // passes Apple News Publisher's static validation. Full round-trip validation
7
+ // against News Publisher requires an Apple developer account — test your
8
+ // output in your News Publisher dashboard before committing to this format.
9
+ //
10
+ // ANF spec: https://developer.apple.com/documentation/apple_news/apple_news_format
11
+ //
12
+ // Scope of this package: produce a valid ANF document. Submission to the News
13
+ // Publisher API (authentication, HMAC signing, multipart POST) is the
14
+ // consumer's concern — the ANF spec explicitly keeps credentials out of scope.
15
+
16
+ const ANF_VERSION = '1.7'
17
+
18
+ export interface GenerateAnfOptions {
19
+ channelId: string
20
+ byline?: string
21
+ language?: string // ISO 639-1 (default: 'en')
22
+ // Extended customization is intentionally absent in v1. Consumers needing rich
23
+ // layout control can post-process the returned object.
24
+ }
25
+
26
+ export interface AnfComponent {
27
+ role: string
28
+ [key: string]: unknown
29
+ }
30
+
31
+ // Layouts and styles are keyed by user-chosen names and referenced by components;
32
+ // they don't have a `role` themselves. Kept loosely-typed so consumers can customize.
33
+ export type AnfLayoutMap = Record<string, Record<string, unknown>>
34
+ export type AnfStyleMap = Record<string, Record<string, unknown>>
35
+
36
+ export interface AnfDocument {
37
+ version: string
38
+ identifier: string
39
+ title: string
40
+ subtitle?: string
41
+ language: string
42
+ layout: Record<string, unknown>
43
+ documentStyle?: Record<string, unknown>
44
+ metadata?: Record<string, unknown>
45
+ components: AnfComponent[]
46
+ componentLayouts?: AnfLayoutMap
47
+ componentStyles?: AnfStyleMap
48
+ }
49
+
50
+ /**
51
+ * Generate an Apple News Format document for a ContentItem. The returned value
52
+ * is a JSON-serializable object; consumers submit it as the `article.json` part
53
+ * of a News Publisher multipart POST.
54
+ *
55
+ * The generated document uses a conservative default layout (7-column grid),
56
+ * standard component styles, and a body composed of: title, byline, hero image,
57
+ * then one body component per paragraph-separated block of the article's
58
+ * description. Consumers producing real articles should replace the body
59
+ * components with their rendered content before submitting.
60
+ */
61
+ export function generateAppleNewsAnf(item: ContentItem, options: GenerateAnfOptions): AnfDocument {
62
+ const identifier = item.appleNewsId ?? deriveIdentifier(item.url)
63
+ const language = options.language ?? item.locale ?? 'en'
64
+
65
+ const components: AnfComponent[] = []
66
+
67
+ // Title
68
+ components.push({
69
+ role: 'title',
70
+ layout: 'titleLayout',
71
+ text: item.title,
72
+ textStyle: 'titleStyle',
73
+ })
74
+
75
+ // Byline
76
+ const byline =
77
+ options.byline ??
78
+ (item.authors?.length ? `By ${item.authors.map((a) => a.name).join(', ')}` : undefined)
79
+ if (byline) {
80
+ components.push({
81
+ role: 'byline',
82
+ layout: 'bylineLayout',
83
+ text: byline,
84
+ textStyle: 'bylineStyle',
85
+ })
86
+ }
87
+
88
+ // Hero image (first, or single).
89
+ const heroImage = Array.isArray(item.image) ? item.image[0] : item.image
90
+ if (heroImage) {
91
+ components.push({
92
+ role: 'photo',
93
+ layout: 'heroLayout',
94
+ URL: heroImage,
95
+ caption: item.description,
96
+ })
97
+ }
98
+
99
+ // Body (from description, split on double-newline).
100
+ if (item.description) {
101
+ const paragraphs = item.description.split(/\n{2,}/).filter((p) => p.trim().length > 0)
102
+ for (const p of paragraphs) {
103
+ components.push({
104
+ role: 'body',
105
+ layout: 'bodyLayout',
106
+ text: p.trim(),
107
+ textStyle: 'bodyStyle',
108
+ })
109
+ }
110
+ }
111
+
112
+ return {
113
+ version: ANF_VERSION,
114
+ identifier,
115
+ title: item.title,
116
+ language,
117
+ layout: {
118
+ columns: 7,
119
+ width: 1024,
120
+ margin: 75,
121
+ gutter: 20,
122
+ },
123
+ documentStyle: {
124
+ backgroundColor: '#FFFFFF',
125
+ },
126
+ metadata: {
127
+ canonicalURL: item.url,
128
+ datePublished: item.datePublished,
129
+ dateModified: item.dateModified,
130
+ thumbnailURL: heroImage,
131
+ excerpt: item.description,
132
+ authors: item.authors?.map((a) => a.name),
133
+ },
134
+ components,
135
+ componentLayouts: {
136
+ titleLayout: { columnStart: 0, columnSpan: 7, margin: { bottom: 18 } },
137
+ bylineLayout: { columnStart: 0, columnSpan: 7, margin: { bottom: 30 } },
138
+ heroLayout: { ignoreDocumentMargin: true, minimumHeight: 500 },
139
+ bodyLayout: { columnStart: 0, columnSpan: 7, margin: { top: 15, bottom: 15 } },
140
+ },
141
+ componentStyles: {
142
+ titleStyle: { textAlignment: 'left', fontName: 'HelveticaNeue-Bold', fontSize: 36 },
143
+ bylineStyle: { textAlignment: 'left', fontName: 'HelveticaNeue-Medium', fontSize: 13 },
144
+ bodyStyle: { textAlignment: 'left', fontName: 'Georgia', fontSize: 18, lineHeight: 26 },
145
+ },
146
+ }
147
+ }
148
+
149
+ /**
150
+ * Derive a stable identifier from the article URL when the consumer hasn't
151
+ * assigned one explicitly. Must be stable across republishes so Apple News
152
+ * de-dupes correctly.
153
+ */
154
+ function deriveIdentifier(url: string): string {
155
+ // Strip scheme + query/hash; replace non-alphanum with '-' to stay within
156
+ // Apple's identifier constraints.
157
+ return url
158
+ .replace(/^https?:\/\//, '')
159
+ .replace(/[#?].*$/, '')
160
+ .replace(/[^a-zA-Z0-9-_]/g, '-')
161
+ .replace(/-+/g, '-')
162
+ .replace(/^-|-$/g, '')
163
+ }
@@ -0,0 +1,136 @@
1
+ import type { SeoOptionsWithResolvedSite } from '../options.js'
2
+ import type { ContentItem } from '../types.js'
3
+ import { forAppleNews } from './content-filter.js'
4
+
5
+ function escapeXml(str: string): string {
6
+ return str
7
+ .replace(/&/g, '&amp;')
8
+ .replace(/</g, '&lt;')
9
+ .replace(/>/g, '&gt;')
10
+ .replace(/"/g, '&quot;')
11
+ .replace(/'/g, '&apos;')
12
+ }
13
+
14
+ function toRfc822(dateStr: string): string {
15
+ return new Date(dateStr).toUTCString()
16
+ }
17
+
18
+ /**
19
+ * Map from a ContentItem to a self-contained HTML fragment suitable for
20
+ * Apple News `content:encoded`. Apple requires absolute URLs and no page chrome
21
+ * (headers, sidebars, footers). Consumers typically produce this from their
22
+ * rendered article's body element.
23
+ */
24
+ export type ContentHtmlResolver = (item: ContentItem) => string | undefined
25
+
26
+ export interface GenerateAppleNewsRssOptions {
27
+ items: ContentItem[]
28
+ options: SeoOptionsWithResolvedSite
29
+ // Resolver that produces the full HTML body for an item. When `options.appleNews.fullContent`
30
+ // is true, items without a resolved body are skipped (silent). When false, only titles/descriptions
31
+ // are emitted.
32
+ contentHtml?: ContentHtmlResolver
33
+ }
34
+
35
+ const MAX_ITEMS = 100
36
+
37
+ /**
38
+ * Generate Apple News Publisher RSS (`/apple-news.xml`) per Apple's ingestion
39
+ * requirements. Apple's ingester is stricter than a generic reader — missing
40
+ * required fields cause silent rejection.
41
+ *
42
+ * Hard requirements implemented:
43
+ * - access: 'members' excluded unconditionally (via forAppleNews).
44
+ * - appleNewsPublishable: 'no' excluded (resolved against channel default).
45
+ * - content:encoded CDATA body when fullContent: true.
46
+ * - Hero image via <media:content> (required for Apple News).
47
+ * - dc:creator populated from authors[0].
48
+ * - Category from item.appleNewsSection or channel defaultSection.
49
+ * - guid in permalink form, stable across republishes.
50
+ * - Feed cadence: newest-first, max 100 items, no pagination.
51
+ */
52
+ export function generateAppleNewsRss({
53
+ items,
54
+ options,
55
+ contentHtml,
56
+ }: GenerateAppleNewsRssOptions): string {
57
+ const appleNews = options.appleNews
58
+ if (!appleNews?.enabled) {
59
+ throw new Error('generateAppleNewsRss called without appleNews.enabled')
60
+ }
61
+
62
+ const filtered = forAppleNews(items, { publishable: appleNews.defaultPublishable })
63
+ // Newest-first ordering.
64
+ const sorted = filtered
65
+ .slice()
66
+ .sort((a, b) => {
67
+ const at = a.datePublished ? new Date(a.datePublished).getTime() : 0
68
+ const bt = b.datePublished ? new Date(b.datePublished).getTime() : 0
69
+ return bt - at
70
+ })
71
+ .slice(0, MAX_ITEMS)
72
+
73
+ const selfLink = `${options.site.replace(/\/$/, '')}${appleNews.feedPath}`
74
+ const channelImage = options.organization.logo
75
+ const language = options.defaults.locale.replace('_', '-')
76
+ const lastBuildDate = new Date().toUTCString()
77
+
78
+ const itemsXml = sorted
79
+ .map((item) => {
80
+ const pubDate = item.datePublished ? toRfc822(item.datePublished) : ''
81
+ const pubDateTag = pubDate ? `\n <pubDate>${escapeXml(pubDate)}</pubDate>` : ''
82
+ const creator = item.authors?.[0]?.name
83
+ const creatorTag = creator ? `\n <dc:creator>${escapeXml(creator)}</dc:creator>` : ''
84
+ const section = item.appleNewsSection ?? appleNews.defaultSection
85
+ const categoryTag = section ? `\n <category>${escapeXml(section)}</category>` : ''
86
+ const descTag = item.description
87
+ ? `\n <description>${escapeXml(item.description)}</description>`
88
+ : ''
89
+
90
+ let contentTag = ''
91
+ if (appleNews.fullContent) {
92
+ const html = contentHtml?.(item)
93
+ if (html) {
94
+ // CDATA-wrap; guard against CDATA injection by splitting ']]>' sequences.
95
+ const safe = html.replace(/]]>/g, ']]]]><![CDATA[>')
96
+ contentTag = `\n <content:encoded><![CDATA[${safe}]]></content:encoded>`
97
+ }
98
+ }
99
+
100
+ // Hero image is required by Apple. We emit media:content on the first image URL;
101
+ // consumers should ensure this is >= 1024x768 per Apple's minimums.
102
+ const heroImage = Array.isArray(item.image) ? item.image[0] : item.image
103
+ const mediaContentTag = heroImage
104
+ ? `\n <media:content url="${escapeXml(heroImage)}" medium="image"/>`
105
+ : ''
106
+
107
+ return ` <item>
108
+ <title>${escapeXml(item.title)}</title>
109
+ <link>${escapeXml(item.url)}</link>
110
+ <guid isPermaLink="true">${escapeXml(item.url)}</guid>${pubDateTag}${creatorTag}${categoryTag}${descTag}${contentTag}${mediaContentTag}
111
+ </item>`
112
+ })
113
+ .join('\n')
114
+
115
+ return `<?xml version="1.0" encoding="UTF-8"?>
116
+ <rss version="2.0"
117
+ xmlns:atom="http://www.w3.org/2005/Atom"
118
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
119
+ xmlns:content="http://purl.org/rss/1.0/modules/content/"
120
+ xmlns:media="http://search.yahoo.com/mrss/">
121
+ <channel>
122
+ <title>${escapeXml(appleNews.channelName)}</title>
123
+ <link>${escapeXml(options.site)}</link>
124
+ <description>${escapeXml(options.organization.name)}</description>
125
+ <language>${escapeXml(language)}</language>
126
+ <atom:link href="${escapeXml(selfLink)}" rel="self" type="application/rss+xml"/>
127
+ <image>
128
+ <url>${escapeXml(channelImage)}</url>
129
+ <title>${escapeXml(appleNews.channelName)}</title>
130
+ <link>${escapeXml(options.site)}</link>
131
+ </image>
132
+ <lastBuildDate>${lastBuildDate}</lastBuildDate>
133
+ ${itemsXml}
134
+ </channel>
135
+ </rss>`
136
+ }
@@ -0,0 +1,87 @@
1
+ import type { ContentItem } from '../types.js'
2
+
3
+ // Centralizes the access-rule invariants so we don't scatter "members excluded
4
+ // from X" across eight different routes. If the rule ever changes, it changes
5
+ // here, once.
6
+
7
+ /**
8
+ * Items eligible for the primary sitemap (sitemap-articles, sitemap-pages, etc.).
9
+ * Includes members by default — paywalled URLs should be discoverable so search
10
+ * engines can route users to the paywall landing. Consumers can opt out per item
11
+ * with `includeInSitemap: false`.
12
+ */
13
+ export function forSitemap<T extends ContentItem>(items: T[]): T[] {
14
+ return items.filter((item) => item.includeInSitemap !== false)
15
+ }
16
+
17
+ /**
18
+ * Items eligible for the markdown-specific sitemap (/sitemap-markdown.xml).
19
+ * Excludes members UNCONDITIONALLY — the markdown sitemap advertises AEO twin
20
+ * URLs that don't exist for gated content. `includeInSitemap: false` on a public
21
+ * item still hides it from here.
22
+ */
23
+ export function forMarkdownSitemap<T extends ContentItem>(items: T[]): T[] {
24
+ return items.filter((item) => item.access !== 'members' && item.includeInSitemap !== false)
25
+ }
26
+
27
+ /**
28
+ * Items eligible for the RSS feed (/feed.xml). Excludes members UNCONDITIONALLY;
29
+ * `includeInFeed: true` on a members item is a no-op. Public items can opt out
30
+ * with `includeInFeed: false`.
31
+ */
32
+ export function forRss<T extends ContentItem>(items: T[]): T[] {
33
+ return items.filter((item) => item.access !== 'members' && item.includeInFeed !== false)
34
+ }
35
+
36
+ /**
37
+ * Items eligible for Apple News Publisher RSS (/apple-news.xml). Excludes members
38
+ * UNCONDITIONALLY. Additionally filters by `appleNewsPublishable` — if set to 'no'
39
+ * at the item level, excluded regardless of channel default.
40
+ */
41
+ export function forAppleNews<T extends ContentItem>(
42
+ items: T[],
43
+ defaults: { publishable: 'yes' | 'no' },
44
+ ): T[] {
45
+ return items.filter((item) => {
46
+ if (item.access === 'members') return false
47
+ if (item.includeInFeed === false) return false
48
+ const effective = item.appleNewsPublishable ?? defaults.publishable
49
+ return effective === 'yes'
50
+ })
51
+ }
52
+
53
+ /**
54
+ * Items eligible for the narrated-articles podcast feed (/listen.xml).
55
+ * Requires `audio` property, excludes members UNCONDITIONALLY.
56
+ */
57
+ export function forListen<T extends ContentItem>(items: T[]): T[] {
58
+ return items.filter(
59
+ (item) => item.audio !== undefined && item.access !== 'members' && item.includeInFeed !== false,
60
+ )
61
+ }
62
+
63
+ /**
64
+ * Items eligible for llms.txt (the link index). Excludes members UNCONDITIONALLY.
65
+ * Public items with `includeInFeed: false` still appear — llms.txt is a discovery
66
+ * file, not a feed, so the opt-out only applies to RSS.
67
+ */
68
+ export function forLlms<T extends ContentItem>(items: T[]): T[] {
69
+ return items.filter((item) => item.access !== 'members')
70
+ }
71
+
72
+ /**
73
+ * Items eligible for the bulk corpus dump (/llms-full.txt). Same rule as llms.txt:
74
+ * public items only, members unconditionally excluded.
75
+ */
76
+ export function forLlmsFull<T extends ContentItem>(items: T[]): T[] {
77
+ return items.filter((item) => item.access !== 'members')
78
+ }
79
+
80
+ /**
81
+ * Items eligible for AEO markdown-twin emission (static mode) or middleware
82
+ * serving (middleware mode). Excludes members UNCONDITIONALLY — gated content
83
+ * is never exposed as markdown at rest or via middleware.
84
+ */
85
+ export function forMarkdownTwin<T extends ContentItem>(items: T[]): T[] {
86
+ return items.filter((item) => item.access !== 'members')
87
+ }