@growth-labs/seo 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/dist/utils/validation.d.ts.map +1 -1
  2. package/dist/utils/validation.js +22 -0
  3. package/dist/utils/validation.js.map +1 -1
  4. package/package.json +9 -5
  5. package/src/_internal/state.ts +26 -0
  6. package/src/bindings.ts +146 -0
  7. package/src/cron/prune-aeo-r2.ts +140 -0
  8. package/src/durable-objects/aeo-revalidation-coord.ts +246 -0
  9. package/src/index.ts +380 -0
  10. package/src/middleware/seo.ts +350 -0
  11. package/src/options.ts +456 -0
  12. package/src/routes/aeo-twin.ts +130 -0
  13. package/src/routes/apple-news.ts +36 -0
  14. package/src/routes/llms-full.ts +36 -0
  15. package/src/routes/llms.ts +15 -0
  16. package/src/routes/podcast-narration.ts +45 -0
  17. package/src/routes/podcast.ts +27 -0
  18. package/src/routes/revalidate.ts +298 -0
  19. package/src/routes/robots.ts +21 -0
  20. package/src/routes/rss.ts +29 -0
  21. package/src/routes/sitemap-articles.ts +25 -0
  22. package/src/routes/sitemap-index.ts +89 -0
  23. package/src/routes/sitemap-markdown.ts +39 -0
  24. package/src/routes/sitemap-pages.ts +24 -0
  25. package/src/routes/sitemap-products.ts +24 -0
  26. package/src/routes/sitemap-videos.ts +24 -0
  27. package/src/runtime.ts +17 -0
  28. package/src/site-url-core.ts +71 -0
  29. package/src/site-url.ts +21 -0
  30. package/src/types.ts +166 -0
  31. package/src/utils/aeo-summary.ts +176 -0
  32. package/src/utils/aeo-twin-emitter.ts +173 -0
  33. package/src/utils/aeo.ts +223 -0
  34. package/src/utils/apple-news-anf.ts +163 -0
  35. package/src/utils/apple-news-rss.ts +136 -0
  36. package/src/utils/content-filter.ts +87 -0
  37. package/src/utils/crawler-class.ts +155 -0
  38. package/src/utils/define-content-provider.ts +65 -0
  39. package/src/utils/effective-auth.ts +44 -0
  40. package/src/utils/fcrdns.ts +269 -0
  41. package/src/utils/fresh-layer.ts +175 -0
  42. package/src/utils/hreflang.ts +26 -0
  43. package/src/utils/index.ts +91 -0
  44. package/src/utils/json-ld/article.ts +120 -0
  45. package/src/utils/json-ld/audio.ts +32 -0
  46. package/src/utils/json-ld/breadcrumb.ts +28 -0
  47. package/src/utils/json-ld/faq.ts +18 -0
  48. package/src/utils/json-ld/howto.ts +23 -0
  49. package/src/utils/json-ld/index.ts +12 -0
  50. package/src/utils/json-ld/item-list.ts +26 -0
  51. package/src/utils/json-ld/organization.ts +42 -0
  52. package/src/utils/json-ld/person.ts +25 -0
  53. package/src/utils/json-ld/product.ts +155 -0
  54. package/src/utils/json-ld/video.ts +20 -0
  55. package/src/utils/json-ld/website.ts +27 -0
  56. package/src/utils/llms-full.ts +90 -0
  57. package/src/utils/llms.ts +45 -0
  58. package/src/utils/meta.ts +184 -0
  59. package/src/utils/podcast.ts +112 -0
  60. package/src/utils/robots.ts +47 -0
  61. package/src/utils/rss.ts +64 -0
  62. package/src/utils/seo-head.ts +81 -0
  63. package/src/utils/sitemap-markdown.ts +80 -0
  64. package/src/utils/sitemap.ts +169 -0
  65. package/src/utils/staleness.ts +61 -0
  66. package/src/utils/validation.ts +308 -0
  67. package/src/virtual.d.ts +8 -0
  68. package/src/vite-plugin.ts +66 -0
@@ -0,0 +1,71 @@
1
+ import type { ResolvedSeoOptions, SeoOptionsWithResolvedSite } from './options.js'
2
+
3
+ export interface SiteUrlEnvVarRef {
4
+ envVar: string
5
+ }
6
+
7
+ export type SiteUrlSource = string | SiteUrlEnvVarRef
8
+
9
+ export function isSiteUrlEnvVarRef(value: unknown): value is SiteUrlEnvVarRef {
10
+ return (
11
+ typeof value === 'object' &&
12
+ value !== null &&
13
+ 'envVar' in value &&
14
+ typeof value.envVar === 'string'
15
+ )
16
+ }
17
+
18
+ export function validateSiteUrl(value: string, label = 'site'): string {
19
+ try {
20
+ new URL(value)
21
+ return value
22
+ } catch {
23
+ throw new Error(`@growth-labs/seo: ${label} must resolve to a valid URL.`)
24
+ }
25
+ }
26
+
27
+ export function resolveSiteUrl(site: SiteUrlSource, bindingsEnv?: Record<string, unknown>): string {
28
+ if (typeof site === 'string') {
29
+ return validateSiteUrl(site)
30
+ }
31
+
32
+ if (!isSiteUrlEnvVarRef(site)) {
33
+ throw new Error('@growth-labs/seo: site must be a URL string or { envVar }.')
34
+ }
35
+
36
+ const fromCloudflare = bindingsEnv?.[site.envVar]
37
+ if (typeof fromCloudflare === 'string') {
38
+ return validateSiteUrl(fromCloudflare, `site env binding ${site.envVar}`)
39
+ }
40
+ if (fromCloudflare !== undefined) {
41
+ throw new Error(`@growth-labs/seo: site env binding ${site.envVar} must be a string URL.`)
42
+ }
43
+
44
+ const fromNode = getNodeEnv(site.envVar)
45
+ if (typeof fromNode === 'string') {
46
+ return validateSiteUrl(fromNode, `process.env.${site.envVar}`)
47
+ }
48
+
49
+ throw new Error(
50
+ `@growth-labs/seo: site env binding ${site.envVar} is not set. ` +
51
+ 'Provide it as a Cloudflare Worker env binding, or use process.env only in Node tooling/tests.',
52
+ )
53
+ }
54
+
55
+ export function resolveSeoConfig(
56
+ config: ResolvedSeoOptions,
57
+ bindingsEnv?: Record<string, unknown>,
58
+ ): SeoOptionsWithResolvedSite {
59
+ return {
60
+ ...config,
61
+ site: resolveSiteUrl(config.site, bindingsEnv),
62
+ }
63
+ }
64
+
65
+ function getNodeEnv(name: string): string | undefined {
66
+ return (
67
+ globalThis as {
68
+ process?: { env?: Record<string, string | undefined> }
69
+ }
70
+ ).process?.env?.[name]
71
+ }
@@ -0,0 +1,21 @@
1
+ import { env as cloudflareEnv } from 'cloudflare:workers'
2
+ import type { ResolvedSeoOptions, SeoOptionsWithResolvedSite } from './options.js'
3
+ import {
4
+ resolveSeoConfig as resolveSeoConfigCore,
5
+ resolveSiteUrl as resolveSiteUrlCore,
6
+ type SiteUrlSource,
7
+ } from './site-url-core.js'
8
+
9
+ export function resolveSiteUrl(
10
+ site: SiteUrlSource,
11
+ bindingsEnv: Record<string, unknown> = cloudflareEnv as Record<string, unknown>,
12
+ ): string {
13
+ return resolveSiteUrlCore(site, bindingsEnv)
14
+ }
15
+
16
+ export function resolveSeoConfig(
17
+ config: ResolvedSeoOptions,
18
+ bindingsEnv?: Record<string, unknown>,
19
+ ): SeoOptionsWithResolvedSite {
20
+ return resolveSeoConfigCore(config, bindingsEnv ?? (cloudflareEnv as Record<string, unknown>))
21
+ }
package/src/types.ts ADDED
@@ -0,0 +1,166 @@
1
+ import type { APIContext } from 'astro'
2
+
3
+ // ─── Content data interfaces ───
4
+
5
+ export interface ContentAuthor {
6
+ name: string
7
+ url?: string
8
+ jobTitle?: string
9
+ knowsAbout?: string[]
10
+ sameAs?: string[]
11
+ }
12
+
13
+ export interface ContentAudio {
14
+ url: string
15
+ duration: string // ISO 8601, e.g. 'PT8M30S'
16
+ narrator?: string
17
+ }
18
+
19
+ export interface ContentLocaleAlternate {
20
+ lang: string
21
+ url: string
22
+ }
23
+
24
+ export interface ProductVariant {
25
+ name: string
26
+ sku?: string
27
+ price: number
28
+ currency?: string
29
+ availability: 'InStock' | 'OutOfStock' | 'PreOrder' | 'Discontinued'
30
+ image?: string
31
+ }
32
+
33
+ export interface ContentProduct {
34
+ name: string
35
+ description: string
36
+ price: number
37
+ currency: string
38
+ availability: 'InStock' | 'OutOfStock' | 'PreOrder' | 'Discontinued'
39
+ brand?: string
40
+ sku?: string
41
+ gtin?: string
42
+ mpn?: string
43
+ images: string[]
44
+ rating?: {
45
+ value: number
46
+ count: number
47
+ bestRating?: number
48
+ }
49
+ reviews?: Array<{
50
+ author: string
51
+ rating: number
52
+ body?: string
53
+ datePublished?: string
54
+ }>
55
+ variants?: ProductVariant[]
56
+ condition?: 'NewCondition' | 'UsedCondition' | 'RefurbishedCondition'
57
+ }
58
+
59
+ export interface PodcastEpisodeMeta {
60
+ episodeNumber?: number
61
+ seasonNumber?: number
62
+ episodeType?: 'full' | 'trailer' | 'bonus'
63
+ }
64
+
65
+ // 'public' — freely available to crawlers, LLMs, AEO twins emitted.
66
+ // 'members' — gated content; no .md twin emitted. Unconditionally excluded from llms.txt,
67
+ // llms-full.txt, /feed.xml, /apple-news.xml, /listen.xml, and sitemap-markdown.xml.
68
+ // Still eligible for the primary sitemap so the paywalled URL remains indexable.
69
+ // `includeInFeed` is a no-op for members.
70
+ export type ContentAccess = 'public' | 'members'
71
+
72
+ export interface ContentItem {
73
+ // ─── Core ───
74
+ url: string
75
+ title: string
76
+ description?: string
77
+ image?: string
78
+ datePublished?: string
79
+ dateModified?: string
80
+ authors?: ContentAuthor[]
81
+
82
+ // ─── Audio narration ───
83
+ audio?: ContentAudio
84
+ podcastEpisode?: PodcastEpisodeMeta
85
+
86
+ // ─── Multilingual ───
87
+ locale?: string
88
+ alternateLocales?: ContentLocaleAlternate[]
89
+
90
+ // ─── Apple News ───
91
+ appleNewsId?: string
92
+ appleNewsPublishable?: 'yes' | 'no'
93
+ appleNewsSection?: string
94
+ newsKeywords?: string[]
95
+
96
+ // ─── Access / paywall ───
97
+ access?: ContentAccess
98
+ includeInSitemap?: boolean // default true
99
+ includeInFeed?: boolean // public-only; no-op for members
100
+ summary?: string // consumer-provided summary for the summary-twin generator
101
+ isAccessibleForFree?: boolean // JSON-LD; derived from `access` if omitted
102
+ paywallCssSelector?: string // e.g. '.premium-content' — used in JSON-LD hasPart
103
+
104
+ // ─── Commerce ───
105
+ product?: ContentProduct
106
+
107
+ // ─── FAQ / HowTo / Video ───
108
+ faq?: Array<{ question: string; answer: string }>
109
+ howToSteps?: Array<{ name: string; text: string; image?: string }>
110
+ video?: {
111
+ thumbnailUrl: string
112
+ duration: string
113
+ contentUrl?: string
114
+ embedUrl?: string
115
+ }
116
+ }
117
+
118
+ // ─── Content provider ───
119
+
120
+ export type ContentType = 'articles' | 'pages' | 'videos' | 'products' | 'authors'
121
+
122
+ export interface ContentProviderParams {
123
+ type: ContentType
124
+ slugs?: string[]
125
+ }
126
+
127
+ export type ContentProvider = (
128
+ params: ContentProviderParams,
129
+ context: APIContext,
130
+ ) => Promise<ContentItem[]>
131
+
132
+ // ─── Crawler-class dispatch ───
133
+
134
+ // Set at request time by the SEO middleware on Astro.locals. Drives body-variant selection,
135
+ // cache-key segmentation, and JSON-LD redaction. See spec "Crawler-class policy".
136
+ export type CrawlerClass =
137
+ | 'verifiedSearchCrawler'
138
+ | 'llmTrainingCrawler'
139
+ | 'userDirectedLlmAgent'
140
+ | 'anonymous'
141
+
142
+ // Derived segment that incorporates the crawler-class override of raw consumer auth.
143
+ // Consumers use this (never raw authSegment) in cache keys to avoid leaking member bodies
144
+ // to user-directed LLM agents bearing member cookies.
145
+ export type EffectiveAuthSegment = 'anon' | 'member' | 'search-full'
146
+
147
+ // ─── Utility return types ───
148
+
149
+ export interface MetaTag {
150
+ [key: string]: string
151
+ }
152
+
153
+ export interface CanonicalLink {
154
+ rel: 'canonical'
155
+ href: string
156
+ }
157
+
158
+ export interface HreflangLink {
159
+ rel: 'alternate'
160
+ hreflang: string
161
+ href: string
162
+ }
163
+
164
+ // ─── JSON-LD base ───
165
+
166
+ export type JsonLdObject = Record<string, unknown>
@@ -0,0 +1,176 @@
1
+ import type { ContentItem } from '../types.js'
2
+ import { estimateTokenCount } from './aeo.js'
3
+
4
+ // ─── Public API ───
5
+
6
+ export interface GenerateSummaryTwinOptions {
7
+ publisherName: string
8
+ schemaType: string
9
+ /** Full article body in markdown (used by tiers 2 and 3 when item.summary is absent). */
10
+ content?: string
11
+ /** URL of the full twin for the `fullUrl` frontmatter cross-link. */
12
+ fullUrl: string
13
+ /** Target cap in tokens. Default 400; hard ceiling. */
14
+ maxTokens?: number
15
+ /** Emits a build-time warning callback when tier-4 fallback fires (spec 1483). */
16
+ onMinimalFallback?: (item: ContentItem) => void
17
+ }
18
+
19
+ export interface GenerateSummaryTwinResult {
20
+ markdown: string
21
+ tier: 1 | 2 | 3 | 4
22
+ }
23
+
24
+ const DEFAULT_MAX_TOKENS = 400
25
+
26
+ /**
27
+ * Generate a summary twin for a ContentItem via the 4-tier fallback chain
28
+ * (spec "Summary twin"):
29
+ *
30
+ * 1. item.summary (consumer-provided) — used verbatim.
31
+ * 2. Bullet-list extraction from the article body — 3-5 claims pulled from
32
+ * top-level `- ` / `* ` list items.
33
+ * 3. Narrative fallback — description + first sentence of each `## section`
34
+ * (max 5) + final sentence of article as conclusion.
35
+ * 4. Minimal — description only. NO frontmatter flag (LLMs would propagate it
36
+ * into user-visible citations per v5 review). Build-time telemetry only,
37
+ * via the onMinimalFallback callback.
38
+ *
39
+ * The returned markdown includes frontmatter: `type: summary`, `title`, `url`,
40
+ * `fullUrl`, `datePublished`/`dateModified` if set. First-match wins.
41
+ */
42
+ export function generateSummaryTwin(
43
+ item: ContentItem,
44
+ options: GenerateSummaryTwinOptions,
45
+ ): GenerateSummaryTwinResult {
46
+ const maxTokens = options.maxTokens ?? DEFAULT_MAX_TOKENS
47
+
48
+ const { body, tier } = pickBody(item, options.content, options.onMinimalFallback)
49
+ const capped = capToTokens(body, maxTokens)
50
+
51
+ const fm = buildFrontmatter(item, options)
52
+ const markdown = `---\n${fm}\n---\n\n${capped}`
53
+
54
+ return { markdown, tier }
55
+ }
56
+
57
+ // ─── Internals ───
58
+
59
+ function pickBody(
60
+ item: ContentItem,
61
+ content: string | undefined,
62
+ onMinimalFallback: ((item: ContentItem) => void) | undefined,
63
+ ): { body: string; tier: 1 | 2 | 3 | 4 } {
64
+ // Tier 1: explicit summary
65
+ if (item.summary && item.summary.trim().length > 0) {
66
+ return { body: item.summary.trim(), tier: 1 }
67
+ }
68
+
69
+ // Tier 2: bullet-list extraction
70
+ if (content) {
71
+ const bullets = extractTopLevelBullets(content)
72
+ if (bullets.length >= 3) {
73
+ const selected = bullets.slice(0, 5)
74
+ const lede = item.description?.trim() ?? item.title
75
+ return {
76
+ body: `${lede}\n\n${selected.map((b) => `- ${b}`).join('\n')}`,
77
+ tier: 2,
78
+ }
79
+ }
80
+ }
81
+
82
+ // Tier 3: narrative fallback
83
+ if (content) {
84
+ const sectionFirsts = extractFirstSentencePerSection(content).slice(0, 5)
85
+ const lastSentence = extractLastSentence(content)
86
+ const lede = item.description?.trim()
87
+ const narrativeParts: string[] = []
88
+ if (lede) narrativeParts.push(lede)
89
+ if (sectionFirsts.length > 0) {
90
+ narrativeParts.push(sectionFirsts.map((s) => `- ${s}`).join('\n'))
91
+ }
92
+ if (lastSentence) narrativeParts.push(lastSentence)
93
+ const narrative = narrativeParts.join('\n\n')
94
+ if (narrative.length >= 100) {
95
+ return { body: narrative, tier: 3 }
96
+ }
97
+ }
98
+
99
+ // Tier 4: minimal fallback
100
+ onMinimalFallback?.(item)
101
+ return { body: item.description ?? item.title, tier: 4 }
102
+ }
103
+
104
+ function extractTopLevelBullets(md: string): string[] {
105
+ const out: string[] = []
106
+ // Match top-level (no leading whitespace) `- ` or `* ` bullets.
107
+ const re = /^[ \t]*(?:[-*])\s+(.+)$/gm
108
+ let m: RegExpExecArray | null = re.exec(md)
109
+ while (m !== null) {
110
+ const line = m[1]?.trim()
111
+ if (line && line.length > 0) out.push(line)
112
+ m = re.exec(md)
113
+ }
114
+ return out
115
+ }
116
+
117
+ function extractFirstSentencePerSection(md: string): string[] {
118
+ const out: string[] = []
119
+ // Split on `## ` headings; skip the heading line itself and find first sentence of body.
120
+ const sections = md.split(/^## .+$/gm).slice(1)
121
+ for (const section of sections) {
122
+ const firstPara = section
123
+ .split(/\n{2,}/)
124
+ .map((p) => p.trim())
125
+ .find((p) => p.length > 0 && !p.startsWith('#'))
126
+ if (firstPara) {
127
+ const sentence = firstSentence(firstPara)
128
+ if (sentence) out.push(sentence)
129
+ }
130
+ }
131
+ return out
132
+ }
133
+
134
+ function extractLastSentence(md: string): string | undefined {
135
+ // Take the last non-empty paragraph, pick its last sentence.
136
+ const paras = md
137
+ .split(/\n{2,}/)
138
+ .map((p) => p.trim())
139
+ .filter((p) => p.length > 0 && !p.startsWith('#'))
140
+ const lastPara = paras[paras.length - 1]
141
+ if (!lastPara) return undefined
142
+ const sentences = lastPara.split(/(?<=[.!?])\s+/)
143
+ return sentences[sentences.length - 1]?.trim()
144
+ }
145
+
146
+ function firstSentence(text: string): string | undefined {
147
+ const match = /^(.+?[.!?])(?:\s|$)/.exec(text)
148
+ return match?.[1]?.trim() ?? text.split('\n')[0]?.trim()
149
+ }
150
+
151
+ function capToTokens(body: string, maxTokens: number): string {
152
+ if (estimateTokenCount(body) <= maxTokens) return body
153
+ const maxChars = maxTokens * 4
154
+ return `${body.slice(0, maxChars).trimEnd()}…`
155
+ }
156
+
157
+ function buildFrontmatter(item: ContentItem, options: GenerateSummaryTwinOptions): string {
158
+ const lines: string[] = []
159
+ lines.push(`type: summary`)
160
+ lines.push(`title: ${yv(item.title)}`)
161
+ lines.push(`url: ${yv(`${options.fullUrl}.summary.md`)}`)
162
+ lines.push(`fullUrl: ${yv(options.fullUrl)}`)
163
+ if (item.datePublished) lines.push(`datePublished: ${yv(item.datePublished)}`)
164
+ if (item.dateModified) lines.push(`dateModified: ${yv(item.dateModified)}`)
165
+ lines.push(`publisher: ${yv(options.publisherName)}`)
166
+ lines.push(`schemaType: ${yv(options.schemaType)}`)
167
+ return lines.join('\n')
168
+ }
169
+
170
+ function yv(v: string): string {
171
+ const isUrl = /^https?:\/\//.test(v)
172
+ const isDate = /^\d{4}-\d{2}-\d{2}/.test(v)
173
+ const needsQuotes = /[#{}[\],&*?|>!%@`'"\n]/.test(v) || (v.includes(':') && !isUrl && !isDate)
174
+ if (!needsQuotes) return v
175
+ return `"${v.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n')}"`
176
+ }
@@ -0,0 +1,173 @@
1
+ import type { ContentItem } from '../types.js'
2
+ import { type GenerateAeoMarkdownOptions, generateAeoMarkdown } from './aeo.js'
3
+ import { generateSummaryTwin } from './aeo-summary.js'
4
+ import { forMarkdownTwin } from './content-filter.js'
5
+ import { computeContentHash } from './staleness.js'
6
+
7
+ // Public API ─────────────────────────────────────────────────────────────
8
+
9
+ export type RenderBody = (item: ContentItem) => string | Promise<string>
10
+
11
+ export interface EmitAeoTwinsOptions {
12
+ items: ContentItem[]
13
+ publisherName: string
14
+ schemaType: string
15
+ /** Resolver that returns the article body in markdown. Required; the emitter
16
+ * can't synthesize content. Consumers typically produce this from their CMS. */
17
+ renderBody: RenderBody
18
+ /** Map item.url → primary twin URL. Default: append '.md'. */
19
+ twinUrl?: (articleUrl: string) => string
20
+ /** Predicate applied after the default members filter. Default: always true. */
21
+ include?: (item: ContentItem) => boolean
22
+ /** Emit a summary twin alongside the primary. Default true. */
23
+ summaryTwin?: boolean
24
+ /** Wrap semantic sections in <!-- aeo:section --> markers. Default true. */
25
+ ragChunkMarkers?: boolean
26
+ /** Stale-hash metadata mode. */
27
+ stalenessCheck?: 'content-hash' | 'dateModified' | 'none'
28
+ /** Called when the summary generator falls back to tier 4. Used for build telemetry. */
29
+ onSummaryMinimalFallback?: (item: ContentItem) => void
30
+ }
31
+
32
+ export interface EmittedTwin {
33
+ /** URL path this file will be served at (used by the writer to derive the filesystem path). */
34
+ urlPath: string
35
+ /** Full URL (for frontmatter). */
36
+ url: string
37
+ /** File body. */
38
+ content: string
39
+ /** Content-type for HTTP response headers when this file is served. */
40
+ contentType: string
41
+ /** If this emission is the primary twin for an item, the item is populated. */
42
+ item?: ContentItem
43
+ /** If this emission is a summary twin, the corresponding primary URL. */
44
+ primaryUrl?: string
45
+ /** Primary twins carry a stable content hash for staleness validation; absent on summaries. */
46
+ contentHash?: string
47
+ }
48
+
49
+ export interface EmitAeoTwinsResult {
50
+ twins: EmittedTwin[]
51
+ /** Map of item.url → contentHash. Persist to disk for staleness validation. */
52
+ contentHashes: Map<string, string>
53
+ /** Number of items filtered out (either by access rule or consumer predicate). */
54
+ skipped: number
55
+ }
56
+
57
+ /**
58
+ * Compute all twin files for a given contentProvider output, without touching
59
+ * the filesystem. Callers (index.ts at astro:build:done time) write the returned
60
+ * twins to disk and record content hashes for later staleness validation.
61
+ *
62
+ * Filtering:
63
+ * - Members items excluded unconditionally (via forMarkdownTwin).
64
+ * - Consumer-supplied `include` predicate applied after the access filter.
65
+ *
66
+ * For each surviving item we emit:
67
+ * - Primary twin at twinUrl(item.url).
68
+ * - Summary twin at <primary>.summary.md (when summaryTwin: true).
69
+ *
70
+ * Aliases (spec twinAliases) are NOT emitted as static files in v7 — they're
71
+ * middleware-only redirects. Static-mode twins live at the primary URL only.
72
+ */
73
+ export async function emitAeoTwins(options: EmitAeoTwinsOptions): Promise<EmitAeoTwinsResult> {
74
+ const {
75
+ items,
76
+ publisherName,
77
+ schemaType,
78
+ renderBody,
79
+ twinUrl = defaultTwinUrl,
80
+ include = () => true,
81
+ summaryTwin = true,
82
+ ragChunkMarkers = true,
83
+ stalenessCheck = 'content-hash',
84
+ onSummaryMinimalFallback,
85
+ } = options
86
+
87
+ const filtered = forMarkdownTwin(items).filter(include)
88
+ const skipped = items.length - filtered.length
89
+
90
+ const twins: EmittedTwin[] = []
91
+ const contentHashes = new Map<string, string>()
92
+
93
+ for (const item of filtered) {
94
+ const primaryUrl = twinUrl(item.url)
95
+ const primaryUrlPath = urlPath(primaryUrl)
96
+ const body = await renderBody(item)
97
+
98
+ const contentHash =
99
+ stalenessCheck === 'content-hash' ? await computeContentHash(item, body) : undefined
100
+ if (contentHash) contentHashes.set(item.url, contentHash)
101
+
102
+ const summaryUrl = summaryTwin ? `${primaryUrl}.summary.md` : undefined
103
+
104
+ const aeoOpts: GenerateAeoMarkdownOptions = {
105
+ publisherName,
106
+ schemaType,
107
+ content: body,
108
+ ragChunkMarkers,
109
+ canonical: item.url,
110
+ twinUrl: primaryUrl,
111
+ summaryUrl,
112
+ contentHash,
113
+ }
114
+ const primaryContent = generateAeoMarkdown(item, aeoOpts)
115
+
116
+ twins.push({
117
+ urlPath: primaryUrlPath,
118
+ url: primaryUrl,
119
+ content: primaryContent,
120
+ contentType: 'text/markdown; charset=utf-8',
121
+ item,
122
+ contentHash,
123
+ })
124
+
125
+ if (summaryTwin && summaryUrl) {
126
+ const summary = generateSummaryTwin(item, {
127
+ publisherName,
128
+ schemaType,
129
+ content: body,
130
+ fullUrl: primaryUrl,
131
+ onMinimalFallback: onSummaryMinimalFallback,
132
+ })
133
+ twins.push({
134
+ urlPath: urlPath(summaryUrl),
135
+ url: summaryUrl,
136
+ content: summary.markdown,
137
+ contentType: 'text/markdown; charset=utf-8',
138
+ primaryUrl,
139
+ })
140
+ }
141
+ }
142
+
143
+ return { twins, contentHashes, skipped }
144
+ }
145
+
146
+ // ─── Defaults ───
147
+
148
+ /**
149
+ * Default twinUrl: strip trailing slashes and append '.md'.
150
+ * `/article/midway/` -> `/article/midway.md`
151
+ * `/article/midway` -> `/article/midway.md`
152
+ */
153
+ function defaultTwinUrl(articleUrl: string): string {
154
+ return `${articleUrl.replace(/\/+$/, '')}.md`
155
+ }
156
+
157
+ /**
158
+ * Extract the URL-path portion of an absolute or relative URL.
159
+ * Used by the caller to derive the filesystem write path under dist/client/.
160
+ */
161
+ function urlPath(url: string): string {
162
+ try {
163
+ return new URL(url).pathname
164
+ } catch {
165
+ // Relative URL — assume it's already a path.
166
+ return url.startsWith('/') ? url : `/${url}`
167
+ }
168
+ }
169
+
170
+ export const _internals = {
171
+ defaultTwinUrl,
172
+ urlPath,
173
+ }