@growth-labs/seo 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/utils/validation.d.ts.map +1 -1
- package/dist/utils/validation.js +22 -0
- package/dist/utils/validation.js.map +1 -1
- package/package.json +9 -5
- package/src/_internal/state.ts +26 -0
- package/src/bindings.ts +146 -0
- package/src/cron/prune-aeo-r2.ts +140 -0
- package/src/durable-objects/aeo-revalidation-coord.ts +246 -0
- package/src/index.ts +380 -0
- package/src/middleware/seo.ts +350 -0
- package/src/options.ts +456 -0
- package/src/routes/aeo-twin.ts +130 -0
- package/src/routes/apple-news.ts +36 -0
- package/src/routes/llms-full.ts +36 -0
- package/src/routes/llms.ts +15 -0
- package/src/routes/podcast-narration.ts +45 -0
- package/src/routes/podcast.ts +27 -0
- package/src/routes/revalidate.ts +298 -0
- package/src/routes/robots.ts +21 -0
- package/src/routes/rss.ts +29 -0
- package/src/routes/sitemap-articles.ts +25 -0
- package/src/routes/sitemap-index.ts +89 -0
- package/src/routes/sitemap-markdown.ts +39 -0
- package/src/routes/sitemap-pages.ts +24 -0
- package/src/routes/sitemap-products.ts +24 -0
- package/src/routes/sitemap-videos.ts +24 -0
- package/src/runtime.ts +17 -0
- package/src/site-url-core.ts +71 -0
- package/src/site-url.ts +21 -0
- package/src/types.ts +166 -0
- package/src/utils/aeo-summary.ts +176 -0
- package/src/utils/aeo-twin-emitter.ts +173 -0
- package/src/utils/aeo.ts +223 -0
- package/src/utils/apple-news-anf.ts +163 -0
- package/src/utils/apple-news-rss.ts +136 -0
- package/src/utils/content-filter.ts +87 -0
- package/src/utils/crawler-class.ts +155 -0
- package/src/utils/define-content-provider.ts +65 -0
- package/src/utils/effective-auth.ts +44 -0
- package/src/utils/fcrdns.ts +269 -0
- package/src/utils/fresh-layer.ts +175 -0
- package/src/utils/hreflang.ts +26 -0
- package/src/utils/index.ts +91 -0
- package/src/utils/json-ld/article.ts +120 -0
- package/src/utils/json-ld/audio.ts +32 -0
- package/src/utils/json-ld/breadcrumb.ts +28 -0
- package/src/utils/json-ld/faq.ts +18 -0
- package/src/utils/json-ld/howto.ts +23 -0
- package/src/utils/json-ld/index.ts +12 -0
- package/src/utils/json-ld/item-list.ts +26 -0
- package/src/utils/json-ld/organization.ts +42 -0
- package/src/utils/json-ld/person.ts +25 -0
- package/src/utils/json-ld/product.ts +155 -0
- package/src/utils/json-ld/video.ts +20 -0
- package/src/utils/json-ld/website.ts +27 -0
- package/src/utils/llms-full.ts +90 -0
- package/src/utils/llms.ts +45 -0
- package/src/utils/meta.ts +184 -0
- package/src/utils/podcast.ts +112 -0
- package/src/utils/robots.ts +47 -0
- package/src/utils/rss.ts +64 -0
- package/src/utils/seo-head.ts +81 -0
- package/src/utils/sitemap-markdown.ts +80 -0
- package/src/utils/sitemap.ts +169 -0
- package/src/utils/staleness.ts +61 -0
- package/src/utils/validation.ts +308 -0
- package/src/virtual.d.ts +8 -0
- package/src/vite-plugin.ts +66 -0
package/src/utils/aeo.ts
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import type { ContentItem } from '../types.js'
|
|
2
|
+
|
|
3
|
+
// ─── Public API ───
|
|
4
|
+
|
|
5
|
+
export interface GenerateAeoMarkdownOptions {
|
|
6
|
+
/** Publisher organization name for the frontmatter `publisher` field. */
|
|
7
|
+
publisherName: string
|
|
8
|
+
/** Schema.org type (e.g. 'Article', 'NewsArticle'). */
|
|
9
|
+
schemaType: string
|
|
10
|
+
/** Rendered article body in clean markdown (no HTML chrome). */
|
|
11
|
+
content: string
|
|
12
|
+
/** If true, wraps semantic sections in `<!-- aeo:section start/end -->` comments. */
|
|
13
|
+
ragChunkMarkers?: boolean
|
|
14
|
+
/** Canonical HTML URL to reference from the twin frontmatter. Defaults to item.url. */
|
|
15
|
+
canonical?: string
|
|
16
|
+
/** The primary twin URL (what this file will be served at). Stored in frontmatter as `url`. */
|
|
17
|
+
twinUrl?: string
|
|
18
|
+
/** Optional summary-twin URL to cross-link from the full twin frontmatter. */
|
|
19
|
+
summaryUrl?: string
|
|
20
|
+
/** Pre-computed SHA-256 content hash for staleness validation. */
|
|
21
|
+
contentHash?: string
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Cheap token count estimate: 1 token ≈ 4 characters. Used for the `x-markdown-tokens`
|
|
26
|
+
* response header and the `tokens` frontmatter field.
|
|
27
|
+
*/
|
|
28
|
+
export function estimateTokenCount(text: string): number {
|
|
29
|
+
return Math.ceil(text.length / 4)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Generate an AEO markdown twin for a ContentItem.
|
|
34
|
+
*
|
|
35
|
+
* Produces: frontmatter (YAML) + body. When `ragChunkMarkers: true`, inserts
|
|
36
|
+
* `<!-- aeo:section start="<slug>" -->` / `<!-- aeo:section end="<slug>" -->` pairs
|
|
37
|
+
* around semantic sections (identified by `## ` headings) to guide RAG retrievers
|
|
38
|
+
* toward boundaries we control.
|
|
39
|
+
*
|
|
40
|
+
* Sanitization:
|
|
41
|
+
* - Slugs are forced to `[a-z0-9-]{1,64}`; empty results fall back to `section-N`.
|
|
42
|
+
* - Any author-supplied `<!--` / `-->` in the body is escaped (`<!--` / `-->`)
|
|
43
|
+
* before wrapping, so forged markers can't close real ones.
|
|
44
|
+
*/
|
|
45
|
+
export function generateAeoMarkdown(
|
|
46
|
+
item: ContentItem,
|
|
47
|
+
options: GenerateAeoMarkdownOptions,
|
|
48
|
+
): string {
|
|
49
|
+
const frontmatter = buildFrontmatter(item, options)
|
|
50
|
+
const safeBody = escapeHtmlComments(options.content)
|
|
51
|
+
const body = options.ragChunkMarkers ? wrapSectionMarkers(safeBody) : safeBody
|
|
52
|
+
return `---\n${frontmatter}\n---\n\n${body}`
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// ─── Frontmatter ───
|
|
56
|
+
|
|
57
|
+
function buildFrontmatter(item: ContentItem, options: GenerateAeoMarkdownOptions): string {
|
|
58
|
+
const canonical = options.canonical ?? item.url
|
|
59
|
+
const twinUrl = options.twinUrl ?? item.url
|
|
60
|
+
const firstImage = Array.isArray(item.image) ? item.image[0] : item.image
|
|
61
|
+
|
|
62
|
+
const authorEntries: Record<string, unknown>[] = (item.authors ?? []).map((a) => {
|
|
63
|
+
const o: Record<string, unknown> = { name: a.name }
|
|
64
|
+
if (a.url) o.url = a.url
|
|
65
|
+
if (a.jobTitle) o.jobTitle = a.jobTitle
|
|
66
|
+
if (a.knowsAbout?.length) o.knowsAbout = a.knowsAbout
|
|
67
|
+
if (a.sameAs?.length) o.sameAs = a.sameAs
|
|
68
|
+
return o
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
const alternateLanguages = (item.alternateLocales ?? []).map((l) => ({
|
|
72
|
+
lang: l.lang,
|
|
73
|
+
url: l.url,
|
|
74
|
+
}))
|
|
75
|
+
|
|
76
|
+
const lines: string[] = []
|
|
77
|
+
lines.push(yamlScalar('title', item.title))
|
|
78
|
+
if (item.description) lines.push(yamlScalar('description', item.description))
|
|
79
|
+
lines.push(yamlScalar('url', twinUrl))
|
|
80
|
+
lines.push(yamlScalar('canonical', canonical))
|
|
81
|
+
if (item.datePublished) lines.push(yamlScalar('datePublished', item.datePublished))
|
|
82
|
+
if (item.dateModified) lines.push(yamlScalar('dateModified', item.dateModified))
|
|
83
|
+
if (authorEntries.length) {
|
|
84
|
+
lines.push('author:')
|
|
85
|
+
for (const a of authorEntries) {
|
|
86
|
+
lines.push(` - name: ${yamlValue(a.name as string)}`)
|
|
87
|
+
if (a.url) lines.push(` url: ${yamlValue(a.url as string)}`)
|
|
88
|
+
if (a.jobTitle) lines.push(` jobTitle: ${yamlValue(a.jobTitle as string)}`)
|
|
89
|
+
if (a.knowsAbout) {
|
|
90
|
+
lines.push(' knowsAbout:')
|
|
91
|
+
for (const k of a.knowsAbout as string[]) lines.push(` - ${yamlValue(k)}`)
|
|
92
|
+
}
|
|
93
|
+
if (a.sameAs) {
|
|
94
|
+
lines.push(' sameAs:')
|
|
95
|
+
for (const s of a.sameAs as string[]) lines.push(` - ${yamlValue(s)}`)
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
lines.push(yamlScalar('publisher', options.publisherName))
|
|
100
|
+
if (firstImage) lines.push(yamlScalar('image', firstImage))
|
|
101
|
+
lines.push(yamlScalar('type', options.schemaType))
|
|
102
|
+
if (item.locale) lines.push(yamlScalar('language', item.locale))
|
|
103
|
+
if (item.audio) lines.push(yamlScalar('audio', item.audio.url))
|
|
104
|
+
if (alternateLanguages.length) {
|
|
105
|
+
lines.push('alternateLanguages:')
|
|
106
|
+
for (const l of alternateLanguages) {
|
|
107
|
+
lines.push(` - lang: ${yamlValue(l.lang)}`)
|
|
108
|
+
lines.push(` url: ${yamlValue(l.url)}`)
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
if (options.contentHash) lines.push(yamlScalar('contentHash', options.contentHash))
|
|
112
|
+
const tokens = estimateTokenCount(options.content)
|
|
113
|
+
lines.push(`tokens: ${tokens}`)
|
|
114
|
+
if (options.summaryUrl) lines.push(yamlScalar('summaryUrl', options.summaryUrl))
|
|
115
|
+
return lines.join('\n')
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function yamlScalar(key: string, value: string): string {
|
|
119
|
+
return `${key}: ${yamlValue(value)}`
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Emit a value safely for YAML. Unquoted when possible; quoted (with escapes) when
|
|
124
|
+
* the value would otherwise parse ambiguously.
|
|
125
|
+
*/
|
|
126
|
+
function yamlValue(v: string): string {
|
|
127
|
+
// Whitelisted forms that can be emitted unquoted even though they contain `:`.
|
|
128
|
+
const isUrl = /^https?:\/\//.test(v)
|
|
129
|
+
const isDate = /^\d{4}-\d{2}-\d{2}/.test(v)
|
|
130
|
+
// Always quote if contains chars with YAML special meaning (excluding `:` which
|
|
131
|
+
// is handled separately below), or if starts with a character that could be misread.
|
|
132
|
+
const needsQuotes =
|
|
133
|
+
/^[\s-]/.test(v) || /[#{}[\],&*?|>!%@`'"\n\r]/.test(v) || (v.includes(':') && !isUrl && !isDate)
|
|
134
|
+
if (!needsQuotes) return v
|
|
135
|
+
return `"${v.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n')}"`
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// ─── RAG chunk markers ───
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Wrap each `##` section in `<!-- aeo:section start="..." -->` / `<!-- end -->`.
|
|
142
|
+
* Content before the first `##` is wrapped as `lede`; content after the last `##`
|
|
143
|
+
* terminates with an end marker for that section.
|
|
144
|
+
*/
|
|
145
|
+
function wrapSectionMarkers(body: string): string {
|
|
146
|
+
const lines = body.split('\n')
|
|
147
|
+
const out: string[] = []
|
|
148
|
+
let currentSlug: string | null = null
|
|
149
|
+
const usedSlugs = new Set<string>()
|
|
150
|
+
let ordinal = 0
|
|
151
|
+
let sawContent = false
|
|
152
|
+
|
|
153
|
+
// Open with `lede` if we hit body content before any heading.
|
|
154
|
+
const openSection = (slug: string) => {
|
|
155
|
+
out.push(`<!-- aeo:section start="${slug}" -->`)
|
|
156
|
+
currentSlug = slug
|
|
157
|
+
}
|
|
158
|
+
const closeSection = () => {
|
|
159
|
+
if (currentSlug) {
|
|
160
|
+
out.push(`<!-- aeo:section end="${currentSlug}" -->`)
|
|
161
|
+
currentSlug = null
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
for (const line of lines) {
|
|
166
|
+
const headingMatch = /^##\s+(.+)$/.exec(line)
|
|
167
|
+
if (headingMatch) {
|
|
168
|
+
closeSection()
|
|
169
|
+
ordinal++
|
|
170
|
+
const slug = sanitizeSlug(headingMatch[1]!) || `section-${ordinal}`
|
|
171
|
+
const unique = uniqueSlug(slug, usedSlugs)
|
|
172
|
+
usedSlugs.add(unique)
|
|
173
|
+
openSection(unique)
|
|
174
|
+
out.push(line)
|
|
175
|
+
sawContent = true
|
|
176
|
+
continue
|
|
177
|
+
}
|
|
178
|
+
if (!currentSlug && line.trim() !== '' && !sawContent) {
|
|
179
|
+
openSection('lede')
|
|
180
|
+
usedSlugs.add('lede')
|
|
181
|
+
sawContent = true
|
|
182
|
+
}
|
|
183
|
+
out.push(line)
|
|
184
|
+
}
|
|
185
|
+
closeSection()
|
|
186
|
+
return out.join('\n')
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/** Strict slug sanitization — [a-z0-9-]{1,64}, no leading/trailing/repeated dashes. */
|
|
190
|
+
function sanitizeSlug(text: string): string {
|
|
191
|
+
const lowered = text.toLowerCase()
|
|
192
|
+
const ascii = lowered.normalize('NFKD').replace(/[\u0300-\u036f]/g, '')
|
|
193
|
+
const cleaned = ascii
|
|
194
|
+
.replace(/[^a-z0-9]+/g, '-')
|
|
195
|
+
.replace(/^-+|-+$/g, '')
|
|
196
|
+
.slice(0, 64)
|
|
197
|
+
return cleaned
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function uniqueSlug(base: string, used: Set<string>): string {
|
|
201
|
+
if (!used.has(base)) return base
|
|
202
|
+
let n = 2
|
|
203
|
+
while (used.has(`${base}-${n}`)) n++
|
|
204
|
+
return `${base}-${n}`
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Escape any author-supplied `<!--` / `-->` in the body so forged markers can't
|
|
209
|
+
* close real ones. Applied BEFORE wrapping (spec "Marker sanitization").
|
|
210
|
+
*/
|
|
211
|
+
function escapeHtmlComments(body: string): string {
|
|
212
|
+
return body.replace(/<!--/g, '<!--').replace(/-->/g, '-->')
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// ─── Exports for testing ───
|
|
216
|
+
|
|
217
|
+
export const _internals = {
|
|
218
|
+
sanitizeSlug,
|
|
219
|
+
uniqueSlug,
|
|
220
|
+
escapeHtmlComments,
|
|
221
|
+
wrapSectionMarkers,
|
|
222
|
+
yamlValue,
|
|
223
|
+
}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import type { ContentItem } from '../types.js'
|
|
2
|
+
|
|
3
|
+
// Apple News Format (ANF) JSON document generator.
|
|
4
|
+
//
|
|
5
|
+
// EXPERIMENTAL in 0.2.0. Ships a conservative minimal-viable document that
|
|
6
|
+
// passes Apple News Publisher's static validation. Full round-trip validation
|
|
7
|
+
// against News Publisher requires an Apple developer account — test your
|
|
8
|
+
// output in your News Publisher dashboard before committing to this format.
|
|
9
|
+
//
|
|
10
|
+
// ANF spec: https://developer.apple.com/documentation/apple_news/apple_news_format
|
|
11
|
+
//
|
|
12
|
+
// Scope of this package: produce a valid ANF document. Submission to the News
|
|
13
|
+
// Publisher API (authentication, HMAC signing, multipart POST) is the
|
|
14
|
+
// consumer's concern — the ANF spec explicitly keeps credentials out of scope.
|
|
15
|
+
|
|
16
|
+
const ANF_VERSION = '1.7'
|
|
17
|
+
|
|
18
|
+
export interface GenerateAnfOptions {
|
|
19
|
+
channelId: string
|
|
20
|
+
byline?: string
|
|
21
|
+
language?: string // ISO 639-1 (default: 'en')
|
|
22
|
+
// Extended customization is intentionally absent in v1. Consumers needing rich
|
|
23
|
+
// layout control can post-process the returned object.
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export interface AnfComponent {
|
|
27
|
+
role: string
|
|
28
|
+
[key: string]: unknown
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// Layouts and styles are keyed by user-chosen names and referenced by components;
|
|
32
|
+
// they don't have a `role` themselves. Kept loosely-typed so consumers can customize.
|
|
33
|
+
export type AnfLayoutMap = Record<string, Record<string, unknown>>
|
|
34
|
+
export type AnfStyleMap = Record<string, Record<string, unknown>>
|
|
35
|
+
|
|
36
|
+
export interface AnfDocument {
|
|
37
|
+
version: string
|
|
38
|
+
identifier: string
|
|
39
|
+
title: string
|
|
40
|
+
subtitle?: string
|
|
41
|
+
language: string
|
|
42
|
+
layout: Record<string, unknown>
|
|
43
|
+
documentStyle?: Record<string, unknown>
|
|
44
|
+
metadata?: Record<string, unknown>
|
|
45
|
+
components: AnfComponent[]
|
|
46
|
+
componentLayouts?: AnfLayoutMap
|
|
47
|
+
componentStyles?: AnfStyleMap
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Generate an Apple News Format document for a ContentItem. The returned value
|
|
52
|
+
* is a JSON-serializable object; consumers submit it as the `article.json` part
|
|
53
|
+
* of a News Publisher multipart POST.
|
|
54
|
+
*
|
|
55
|
+
* The generated document uses a conservative default layout (7-column grid),
|
|
56
|
+
* standard component styles, and a body composed of: title, byline, hero image,
|
|
57
|
+
* then one body component per paragraph-separated block of the article's
|
|
58
|
+
* description. Consumers producing real articles should replace the body
|
|
59
|
+
* components with their rendered content before submitting.
|
|
60
|
+
*/
|
|
61
|
+
export function generateAppleNewsAnf(item: ContentItem, options: GenerateAnfOptions): AnfDocument {
|
|
62
|
+
const identifier = item.appleNewsId ?? deriveIdentifier(item.url)
|
|
63
|
+
const language = options.language ?? item.locale ?? 'en'
|
|
64
|
+
|
|
65
|
+
const components: AnfComponent[] = []
|
|
66
|
+
|
|
67
|
+
// Title
|
|
68
|
+
components.push({
|
|
69
|
+
role: 'title',
|
|
70
|
+
layout: 'titleLayout',
|
|
71
|
+
text: item.title,
|
|
72
|
+
textStyle: 'titleStyle',
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
// Byline
|
|
76
|
+
const byline =
|
|
77
|
+
options.byline ??
|
|
78
|
+
(item.authors?.length ? `By ${item.authors.map((a) => a.name).join(', ')}` : undefined)
|
|
79
|
+
if (byline) {
|
|
80
|
+
components.push({
|
|
81
|
+
role: 'byline',
|
|
82
|
+
layout: 'bylineLayout',
|
|
83
|
+
text: byline,
|
|
84
|
+
textStyle: 'bylineStyle',
|
|
85
|
+
})
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Hero image (first, or single).
|
|
89
|
+
const heroImage = Array.isArray(item.image) ? item.image[0] : item.image
|
|
90
|
+
if (heroImage) {
|
|
91
|
+
components.push({
|
|
92
|
+
role: 'photo',
|
|
93
|
+
layout: 'heroLayout',
|
|
94
|
+
URL: heroImage,
|
|
95
|
+
caption: item.description,
|
|
96
|
+
})
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Body (from description, split on double-newline).
|
|
100
|
+
if (item.description) {
|
|
101
|
+
const paragraphs = item.description.split(/\n{2,}/).filter((p) => p.trim().length > 0)
|
|
102
|
+
for (const p of paragraphs) {
|
|
103
|
+
components.push({
|
|
104
|
+
role: 'body',
|
|
105
|
+
layout: 'bodyLayout',
|
|
106
|
+
text: p.trim(),
|
|
107
|
+
textStyle: 'bodyStyle',
|
|
108
|
+
})
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return {
|
|
113
|
+
version: ANF_VERSION,
|
|
114
|
+
identifier,
|
|
115
|
+
title: item.title,
|
|
116
|
+
language,
|
|
117
|
+
layout: {
|
|
118
|
+
columns: 7,
|
|
119
|
+
width: 1024,
|
|
120
|
+
margin: 75,
|
|
121
|
+
gutter: 20,
|
|
122
|
+
},
|
|
123
|
+
documentStyle: {
|
|
124
|
+
backgroundColor: '#FFFFFF',
|
|
125
|
+
},
|
|
126
|
+
metadata: {
|
|
127
|
+
canonicalURL: item.url,
|
|
128
|
+
datePublished: item.datePublished,
|
|
129
|
+
dateModified: item.dateModified,
|
|
130
|
+
thumbnailURL: heroImage,
|
|
131
|
+
excerpt: item.description,
|
|
132
|
+
authors: item.authors?.map((a) => a.name),
|
|
133
|
+
},
|
|
134
|
+
components,
|
|
135
|
+
componentLayouts: {
|
|
136
|
+
titleLayout: { columnStart: 0, columnSpan: 7, margin: { bottom: 18 } },
|
|
137
|
+
bylineLayout: { columnStart: 0, columnSpan: 7, margin: { bottom: 30 } },
|
|
138
|
+
heroLayout: { ignoreDocumentMargin: true, minimumHeight: 500 },
|
|
139
|
+
bodyLayout: { columnStart: 0, columnSpan: 7, margin: { top: 15, bottom: 15 } },
|
|
140
|
+
},
|
|
141
|
+
componentStyles: {
|
|
142
|
+
titleStyle: { textAlignment: 'left', fontName: 'HelveticaNeue-Bold', fontSize: 36 },
|
|
143
|
+
bylineStyle: { textAlignment: 'left', fontName: 'HelveticaNeue-Medium', fontSize: 13 },
|
|
144
|
+
bodyStyle: { textAlignment: 'left', fontName: 'Georgia', fontSize: 18, lineHeight: 26 },
|
|
145
|
+
},
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Derive a stable identifier from the article URL when the consumer hasn't
|
|
151
|
+
* assigned one explicitly. Must be stable across republishes so Apple News
|
|
152
|
+
* de-dupes correctly.
|
|
153
|
+
*/
|
|
154
|
+
function deriveIdentifier(url: string): string {
|
|
155
|
+
// Strip scheme + query/hash; replace non-alphanum with '-' to stay within
|
|
156
|
+
// Apple's identifier constraints.
|
|
157
|
+
return url
|
|
158
|
+
.replace(/^https?:\/\//, '')
|
|
159
|
+
.replace(/[#?].*$/, '')
|
|
160
|
+
.replace(/[^a-zA-Z0-9-_]/g, '-')
|
|
161
|
+
.replace(/-+/g, '-')
|
|
162
|
+
.replace(/^-|-$/g, '')
|
|
163
|
+
}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import type { SeoOptionsWithResolvedSite } from '../options.js'
|
|
2
|
+
import type { ContentItem } from '../types.js'
|
|
3
|
+
import { forAppleNews } from './content-filter.js'
|
|
4
|
+
|
|
5
|
+
function escapeXml(str: string): string {
|
|
6
|
+
return str
|
|
7
|
+
.replace(/&/g, '&')
|
|
8
|
+
.replace(/</g, '<')
|
|
9
|
+
.replace(/>/g, '>')
|
|
10
|
+
.replace(/"/g, '"')
|
|
11
|
+
.replace(/'/g, ''')
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function toRfc822(dateStr: string): string {
|
|
15
|
+
return new Date(dateStr).toUTCString()
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Map from a ContentItem to a self-contained HTML fragment suitable for
|
|
20
|
+
* Apple News `content:encoded`. Apple requires absolute URLs and no page chrome
|
|
21
|
+
* (headers, sidebars, footers). Consumers typically produce this from their
|
|
22
|
+
* rendered article's body element.
|
|
23
|
+
*/
|
|
24
|
+
export type ContentHtmlResolver = (item: ContentItem) => string | undefined
|
|
25
|
+
|
|
26
|
+
export interface GenerateAppleNewsRssOptions {
|
|
27
|
+
items: ContentItem[]
|
|
28
|
+
options: SeoOptionsWithResolvedSite
|
|
29
|
+
// Resolver that produces the full HTML body for an item. When `options.appleNews.fullContent`
|
|
30
|
+
// is true, items without a resolved body are skipped (silent). When false, only titles/descriptions
|
|
31
|
+
// are emitted.
|
|
32
|
+
contentHtml?: ContentHtmlResolver
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const MAX_ITEMS = 100
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Generate Apple News Publisher RSS (`/apple-news.xml`) per Apple's ingestion
|
|
39
|
+
* requirements. Apple's ingester is stricter than a generic reader — missing
|
|
40
|
+
* required fields cause silent rejection.
|
|
41
|
+
*
|
|
42
|
+
* Hard requirements implemented:
|
|
43
|
+
* - access: 'members' excluded unconditionally (via forAppleNews).
|
|
44
|
+
* - appleNewsPublishable: 'no' excluded (resolved against channel default).
|
|
45
|
+
* - content:encoded CDATA body when fullContent: true.
|
|
46
|
+
* - Hero image via <media:content> (required for Apple News).
|
|
47
|
+
* - dc:creator populated from authors[0].
|
|
48
|
+
* - Category from item.appleNewsSection or channel defaultSection.
|
|
49
|
+
* - guid in permalink form, stable across republishes.
|
|
50
|
+
* - Feed cadence: newest-first, max 100 items, no pagination.
|
|
51
|
+
*/
|
|
52
|
+
export function generateAppleNewsRss({
|
|
53
|
+
items,
|
|
54
|
+
options,
|
|
55
|
+
contentHtml,
|
|
56
|
+
}: GenerateAppleNewsRssOptions): string {
|
|
57
|
+
const appleNews = options.appleNews
|
|
58
|
+
if (!appleNews?.enabled) {
|
|
59
|
+
throw new Error('generateAppleNewsRss called without appleNews.enabled')
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
const filtered = forAppleNews(items, { publishable: appleNews.defaultPublishable })
|
|
63
|
+
// Newest-first ordering.
|
|
64
|
+
const sorted = filtered
|
|
65
|
+
.slice()
|
|
66
|
+
.sort((a, b) => {
|
|
67
|
+
const at = a.datePublished ? new Date(a.datePublished).getTime() : 0
|
|
68
|
+
const bt = b.datePublished ? new Date(b.datePublished).getTime() : 0
|
|
69
|
+
return bt - at
|
|
70
|
+
})
|
|
71
|
+
.slice(0, MAX_ITEMS)
|
|
72
|
+
|
|
73
|
+
const selfLink = `${options.site.replace(/\/$/, '')}${appleNews.feedPath}`
|
|
74
|
+
const channelImage = options.organization.logo
|
|
75
|
+
const language = options.defaults.locale.replace('_', '-')
|
|
76
|
+
const lastBuildDate = new Date().toUTCString()
|
|
77
|
+
|
|
78
|
+
const itemsXml = sorted
|
|
79
|
+
.map((item) => {
|
|
80
|
+
const pubDate = item.datePublished ? toRfc822(item.datePublished) : ''
|
|
81
|
+
const pubDateTag = pubDate ? `\n <pubDate>${escapeXml(pubDate)}</pubDate>` : ''
|
|
82
|
+
const creator = item.authors?.[0]?.name
|
|
83
|
+
const creatorTag = creator ? `\n <dc:creator>${escapeXml(creator)}</dc:creator>` : ''
|
|
84
|
+
const section = item.appleNewsSection ?? appleNews.defaultSection
|
|
85
|
+
const categoryTag = section ? `\n <category>${escapeXml(section)}</category>` : ''
|
|
86
|
+
const descTag = item.description
|
|
87
|
+
? `\n <description>${escapeXml(item.description)}</description>`
|
|
88
|
+
: ''
|
|
89
|
+
|
|
90
|
+
let contentTag = ''
|
|
91
|
+
if (appleNews.fullContent) {
|
|
92
|
+
const html = contentHtml?.(item)
|
|
93
|
+
if (html) {
|
|
94
|
+
// CDATA-wrap; guard against CDATA injection by splitting ']]>' sequences.
|
|
95
|
+
const safe = html.replace(/]]>/g, ']]]]><![CDATA[>')
|
|
96
|
+
contentTag = `\n <content:encoded><![CDATA[${safe}]]></content:encoded>`
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Hero image is required by Apple. We emit media:content on the first image URL;
|
|
101
|
+
// consumers should ensure this is >= 1024x768 per Apple's minimums.
|
|
102
|
+
const heroImage = Array.isArray(item.image) ? item.image[0] : item.image
|
|
103
|
+
const mediaContentTag = heroImage
|
|
104
|
+
? `\n <media:content url="${escapeXml(heroImage)}" medium="image"/>`
|
|
105
|
+
: ''
|
|
106
|
+
|
|
107
|
+
return ` <item>
|
|
108
|
+
<title>${escapeXml(item.title)}</title>
|
|
109
|
+
<link>${escapeXml(item.url)}</link>
|
|
110
|
+
<guid isPermaLink="true">${escapeXml(item.url)}</guid>${pubDateTag}${creatorTag}${categoryTag}${descTag}${contentTag}${mediaContentTag}
|
|
111
|
+
</item>`
|
|
112
|
+
})
|
|
113
|
+
.join('\n')
|
|
114
|
+
|
|
115
|
+
return `<?xml version="1.0" encoding="UTF-8"?>
|
|
116
|
+
<rss version="2.0"
|
|
117
|
+
xmlns:atom="http://www.w3.org/2005/Atom"
|
|
118
|
+
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
|
119
|
+
xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
|
120
|
+
xmlns:media="http://search.yahoo.com/mrss/">
|
|
121
|
+
<channel>
|
|
122
|
+
<title>${escapeXml(appleNews.channelName)}</title>
|
|
123
|
+
<link>${escapeXml(options.site)}</link>
|
|
124
|
+
<description>${escapeXml(options.organization.name)}</description>
|
|
125
|
+
<language>${escapeXml(language)}</language>
|
|
126
|
+
<atom:link href="${escapeXml(selfLink)}" rel="self" type="application/rss+xml"/>
|
|
127
|
+
<image>
|
|
128
|
+
<url>${escapeXml(channelImage)}</url>
|
|
129
|
+
<title>${escapeXml(appleNews.channelName)}</title>
|
|
130
|
+
<link>${escapeXml(options.site)}</link>
|
|
131
|
+
</image>
|
|
132
|
+
<lastBuildDate>${lastBuildDate}</lastBuildDate>
|
|
133
|
+
${itemsXml}
|
|
134
|
+
</channel>
|
|
135
|
+
</rss>`
|
|
136
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import type { ContentItem } from '../types.js'
|
|
2
|
+
|
|
3
|
+
// Centralizes the access-rule invariants so we don't scatter "members excluded
|
|
4
|
+
// from X" across eight different routes. If the rule ever changes, it changes
|
|
5
|
+
// here, once.
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Items eligible for the primary sitemap (sitemap-articles, sitemap-pages, etc.).
|
|
9
|
+
* Includes members by default — paywalled URLs should be discoverable so search
|
|
10
|
+
* engines can route users to the paywall landing. Consumers can opt out per item
|
|
11
|
+
* with `includeInSitemap: false`.
|
|
12
|
+
*/
|
|
13
|
+
export function forSitemap<T extends ContentItem>(items: T[]): T[] {
|
|
14
|
+
return items.filter((item) => item.includeInSitemap !== false)
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Items eligible for the markdown-specific sitemap (/sitemap-markdown.xml).
|
|
19
|
+
* Excludes members UNCONDITIONALLY — the markdown sitemap advertises AEO twin
|
|
20
|
+
* URLs that don't exist for gated content. `includeInSitemap: false` on a public
|
|
21
|
+
* item still hides it from here.
|
|
22
|
+
*/
|
|
23
|
+
export function forMarkdownSitemap<T extends ContentItem>(items: T[]): T[] {
|
|
24
|
+
return items.filter((item) => item.access !== 'members' && item.includeInSitemap !== false)
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Items eligible for the RSS feed (/feed.xml). Excludes members UNCONDITIONALLY;
|
|
29
|
+
* `includeInFeed: true` on a members item is a no-op. Public items can opt out
|
|
30
|
+
* with `includeInFeed: false`.
|
|
31
|
+
*/
|
|
32
|
+
export function forRss<T extends ContentItem>(items: T[]): T[] {
|
|
33
|
+
return items.filter((item) => item.access !== 'members' && item.includeInFeed !== false)
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Items eligible for Apple News Publisher RSS (/apple-news.xml). Excludes members
|
|
38
|
+
* UNCONDITIONALLY. Additionally filters by `appleNewsPublishable` — if set to 'no'
|
|
39
|
+
* at the item level, excluded regardless of channel default.
|
|
40
|
+
*/
|
|
41
|
+
export function forAppleNews<T extends ContentItem>(
|
|
42
|
+
items: T[],
|
|
43
|
+
defaults: { publishable: 'yes' | 'no' },
|
|
44
|
+
): T[] {
|
|
45
|
+
return items.filter((item) => {
|
|
46
|
+
if (item.access === 'members') return false
|
|
47
|
+
if (item.includeInFeed === false) return false
|
|
48
|
+
const effective = item.appleNewsPublishable ?? defaults.publishable
|
|
49
|
+
return effective === 'yes'
|
|
50
|
+
})
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Items eligible for the narrated-articles podcast feed (/listen.xml).
|
|
55
|
+
* Requires `audio` property, excludes members UNCONDITIONALLY.
|
|
56
|
+
*/
|
|
57
|
+
export function forListen<T extends ContentItem>(items: T[]): T[] {
|
|
58
|
+
return items.filter(
|
|
59
|
+
(item) => item.audio !== undefined && item.access !== 'members' && item.includeInFeed !== false,
|
|
60
|
+
)
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Items eligible for llms.txt (the link index). Excludes members UNCONDITIONALLY.
|
|
65
|
+
* Public items with `includeInFeed: false` still appear — llms.txt is a discovery
|
|
66
|
+
* file, not a feed, so the opt-out only applies to RSS.
|
|
67
|
+
*/
|
|
68
|
+
export function forLlms<T extends ContentItem>(items: T[]): T[] {
|
|
69
|
+
return items.filter((item) => item.access !== 'members')
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Items eligible for the bulk corpus dump (/llms-full.txt). Same rule as llms.txt:
|
|
74
|
+
* public items only, members unconditionally excluded.
|
|
75
|
+
*/
|
|
76
|
+
export function forLlmsFull<T extends ContentItem>(items: T[]): T[] {
|
|
77
|
+
return items.filter((item) => item.access !== 'members')
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Items eligible for AEO markdown-twin emission (static mode) or middleware
|
|
82
|
+
* serving (middleware mode). Excludes members UNCONDITIONALLY — gated content
|
|
83
|
+
* is never exposed as markdown at rest or via middleware.
|
|
84
|
+
*/
|
|
85
|
+
export function forMarkdownTwin<T extends ContentItem>(items: T[]): T[] {
|
|
86
|
+
return items.filter((item) => item.access !== 'members')
|
|
87
|
+
}
|