@growth-labs/seo 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/utils/validation.d.ts.map +1 -1
- package/dist/utils/validation.js +22 -0
- package/dist/utils/validation.js.map +1 -1
- package/package.json +9 -5
- package/src/_internal/state.ts +26 -0
- package/src/bindings.ts +146 -0
- package/src/cron/prune-aeo-r2.ts +140 -0
- package/src/durable-objects/aeo-revalidation-coord.ts +246 -0
- package/src/index.ts +380 -0
- package/src/middleware/seo.ts +350 -0
- package/src/options.ts +456 -0
- package/src/routes/aeo-twin.ts +130 -0
- package/src/routes/apple-news.ts +36 -0
- package/src/routes/llms-full.ts +36 -0
- package/src/routes/llms.ts +15 -0
- package/src/routes/podcast-narration.ts +45 -0
- package/src/routes/podcast.ts +27 -0
- package/src/routes/revalidate.ts +298 -0
- package/src/routes/robots.ts +21 -0
- package/src/routes/rss.ts +29 -0
- package/src/routes/sitemap-articles.ts +25 -0
- package/src/routes/sitemap-index.ts +89 -0
- package/src/routes/sitemap-markdown.ts +39 -0
- package/src/routes/sitemap-pages.ts +24 -0
- package/src/routes/sitemap-products.ts +24 -0
- package/src/routes/sitemap-videos.ts +24 -0
- package/src/runtime.ts +17 -0
- package/src/site-url-core.ts +71 -0
- package/src/site-url.ts +21 -0
- package/src/types.ts +166 -0
- package/src/utils/aeo-summary.ts +176 -0
- package/src/utils/aeo-twin-emitter.ts +173 -0
- package/src/utils/aeo.ts +223 -0
- package/src/utils/apple-news-anf.ts +163 -0
- package/src/utils/apple-news-rss.ts +136 -0
- package/src/utils/content-filter.ts +87 -0
- package/src/utils/crawler-class.ts +155 -0
- package/src/utils/define-content-provider.ts +65 -0
- package/src/utils/effective-auth.ts +44 -0
- package/src/utils/fcrdns.ts +269 -0
- package/src/utils/fresh-layer.ts +175 -0
- package/src/utils/hreflang.ts +26 -0
- package/src/utils/index.ts +91 -0
- package/src/utils/json-ld/article.ts +120 -0
- package/src/utils/json-ld/audio.ts +32 -0
- package/src/utils/json-ld/breadcrumb.ts +28 -0
- package/src/utils/json-ld/faq.ts +18 -0
- package/src/utils/json-ld/howto.ts +23 -0
- package/src/utils/json-ld/index.ts +12 -0
- package/src/utils/json-ld/item-list.ts +26 -0
- package/src/utils/json-ld/organization.ts +42 -0
- package/src/utils/json-ld/person.ts +25 -0
- package/src/utils/json-ld/product.ts +155 -0
- package/src/utils/json-ld/video.ts +20 -0
- package/src/utils/json-ld/website.ts +27 -0
- package/src/utils/llms-full.ts +90 -0
- package/src/utils/llms.ts +45 -0
- package/src/utils/meta.ts +184 -0
- package/src/utils/podcast.ts +112 -0
- package/src/utils/robots.ts +47 -0
- package/src/utils/rss.ts +64 -0
- package/src/utils/seo-head.ts +81 -0
- package/src/utils/sitemap-markdown.ts +80 -0
- package/src/utils/sitemap.ts +169 -0
- package/src/utils/staleness.ts +61 -0
- package/src/utils/validation.ts +308 -0
- package/src/virtual.d.ts +8 -0
- package/src/vite-plugin.ts +66 -0
package/src/options.ts
ADDED
|
@@ -0,0 +1,456 @@
|
|
|
1
|
+
import { z } from 'zod'
|
|
2
|
+
import { validateSiteUrl } from './site-url-core.js'
|
|
3
|
+
import type { ContentItem, ContentProvider } from './types.js'
|
|
4
|
+
|
|
5
|
+
// ─── Locale ───
|
|
6
|
+
|
|
7
|
+
const localeSchema = z.object({
|
|
8
|
+
lang: z.string(),
|
|
9
|
+
region: z.string().optional(),
|
|
10
|
+
urlPrefix: z.string(),
|
|
11
|
+
domain: z.string().optional(),
|
|
12
|
+
})
|
|
13
|
+
|
|
14
|
+
// ─── Podcast ───
|
|
15
|
+
|
|
16
|
+
const podcastSchema = z.object({
|
|
17
|
+
enabled: z.boolean().default(false),
|
|
18
|
+
title: z.string(),
|
|
19
|
+
description: z.string(),
|
|
20
|
+
author: z.string(),
|
|
21
|
+
email: z.string().email(),
|
|
22
|
+
image: z.string().url(),
|
|
23
|
+
category: z.string(),
|
|
24
|
+
subcategory: z.string().optional(),
|
|
25
|
+
language: z.string().default('en'),
|
|
26
|
+
explicit: z.boolean().default(false),
|
|
27
|
+
feedPath: z.string().default('/podcast.xml'),
|
|
28
|
+
type: z.enum(['episodic', 'serial']).default('episodic'),
|
|
29
|
+
copyright: z.string().optional(),
|
|
30
|
+
})
|
|
31
|
+
|
|
32
|
+
// ─── Commerce ───
|
|
33
|
+
|
|
34
|
+
const commerceSchema = z.object({
|
|
35
|
+
enabled: z.boolean().default(false),
|
|
36
|
+
returnPolicy: z
|
|
37
|
+
.object({
|
|
38
|
+
applicableCountry: z.string(),
|
|
39
|
+
returnPolicyCategory: z.enum([
|
|
40
|
+
'MerchantReturnFiniteReturnWindow',
|
|
41
|
+
'MerchantReturnNotPermitted',
|
|
42
|
+
'MerchantReturnUnlimitedWindow',
|
|
43
|
+
]),
|
|
44
|
+
merchantReturnDays: z.number().optional(),
|
|
45
|
+
returnMethod: z.enum(['ReturnByMail', 'ReturnInStore', 'ReturnAtKiosk']).optional(),
|
|
46
|
+
returnFees: z.enum(['FreeReturn', 'ReturnFeesCustomerResponsibility']).optional(),
|
|
47
|
+
})
|
|
48
|
+
.optional(),
|
|
49
|
+
currency: z.string().default('USD'),
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
// ─── Apple News ───
|
|
53
|
+
|
|
54
|
+
const appleNewsSchema = z.object({
|
|
55
|
+
enabled: z.boolean().default(false),
|
|
56
|
+
channelName: z.string(),
|
|
57
|
+
channelId: z.string().optional(),
|
|
58
|
+
feedPath: z.string().default('/apple-news.xml'),
|
|
59
|
+
// Default publishable state applied to items that don't set their own apple-news-publishable.
|
|
60
|
+
// 'yes' opts every public article in by default.
|
|
61
|
+
defaultPublishable: z.enum(['yes', 'no']).default('yes'),
|
|
62
|
+
// Default Apple News section. Per-article sections can override.
|
|
63
|
+
defaultSection: z.string().optional(),
|
|
64
|
+
// If true, include full article body in content:encoded (recommended).
|
|
65
|
+
fullContent: z.boolean().default(true),
|
|
66
|
+
// Emit <link rel="alternate" type="application/rss+xml" title="Apple News"> in <head>.
|
|
67
|
+
discoveryLink: z.boolean().default(true),
|
|
68
|
+
})
|
|
69
|
+
|
|
70
|
+
// ─── Fresh-twin storage layer ───
|
|
71
|
+
|
|
72
|
+
const freshLayerSchema = z.object({
|
|
73
|
+
// Binding name in wrangler.toml (R2 bucket or KV namespace).
|
|
74
|
+
bindingName: z.string().default('AEO_TWINS'),
|
|
75
|
+
// 'r2' (preferred) or 'kv'. Autodetected from the binding shape at runtime;
|
|
76
|
+
// the explicit hint avoids ambiguity.
|
|
77
|
+
type: z.enum(['r2', 'kv']).default('r2'),
|
|
78
|
+
// Durable Object class binding for revalidation coordination. A single DO class
|
|
79
|
+
// handles rate limit, per-slug lock, and idempotency; one instance per request-hostname.
|
|
80
|
+
coordinatorBindingName: z.string().default('AEO_REVALIDATION_COORD'),
|
|
81
|
+
// Days to retain old-version R2 prefixes before the scheduled prune Worker deletes them.
|
|
82
|
+
retentionDays: z.number().int().min(1).max(365).default(7),
|
|
83
|
+
})
|
|
84
|
+
|
|
85
|
+
// ─── AEO twins (object form) ───
|
|
86
|
+
//
|
|
87
|
+
// The boolean form `aeoTwins: true` is equivalent to `{ mode: 'static' }`.
|
|
88
|
+
// The object form exposes full configuration.
|
|
89
|
+
const aeoTwinsObjectSchema = z.object({
|
|
90
|
+
// 'static' — emit prerendered .md files at build time (default, fastest serve path)
|
|
91
|
+
// 'middleware' — serve .md via Accept: text/markdown on same URL (requires SSR)
|
|
92
|
+
// 'both' — emit static twins AND run middleware negotiation (freshness on cache-miss
|
|
93
|
+
// via on-demand revalidation)
|
|
94
|
+
//
|
|
95
|
+
// STALENESS WARNING: `static` mode is frozen at build time. If HTML is editable
|
|
96
|
+
// post-publish (CMS corrections, breaking-news updates), the .md twin will drift.
|
|
97
|
+
// Answer engines that compare the two may downgrade trust.
|
|
98
|
+
// - Immutable-after-publish content -> `static` is safe.
|
|
99
|
+
// - Mutable content -> `both` + `onDemandRevalidation` + CMS webhook.
|
|
100
|
+
// - Gated content with Flexible Sampling -> `middleware` (rejected with `static`).
|
|
101
|
+
mode: z.enum(['static', 'middleware', 'both']).default('static'),
|
|
102
|
+
|
|
103
|
+
// Maps an article URL to its PRIMARY markdown twin URL (singular). The primary URL is
|
|
104
|
+
// the canonical twin used in the Link: rel="alternate" header, sitemap-markdown.xml,
|
|
105
|
+
// and summary-twin naming (<primary>.summary.md).
|
|
106
|
+
// Default: appends '.md' (e.g. /article/midway -> /article/midway.md).
|
|
107
|
+
twinUrl: z.function().args(z.string()).returns(z.string()).optional(),
|
|
108
|
+
|
|
109
|
+
// Alias suffixes that redirect (301) to the primary twin URL via middleware.
|
|
110
|
+
// Only active when `mode !== 'static'` (pure-static aliases were dropped in v7;
|
|
111
|
+
// they created a weak fallback when Cloudflare Assets can't serve 301s).
|
|
112
|
+
// Default: ['/index.md'] so `/article/midway/index.md` aliases to `/article/midway.md`
|
|
113
|
+
// under middleware-backed modes.
|
|
114
|
+
twinAliases: z.array(z.string()).default(['/index.md']),
|
|
115
|
+
|
|
116
|
+
// Emit a short "summary twin" (~300 tokens) at <primary-twin-url>.summary.md
|
|
117
|
+
// for context-limited agents (Perplexity, Apple Intelligence, long-convo ChatGPT).
|
|
118
|
+
summaryTwin: z.boolean().default(true),
|
|
119
|
+
|
|
120
|
+
// Emit semantic chunk markers (<!-- aeo:section --> comments) in .md bodies so
|
|
121
|
+
// RAG retrievers split on our boundaries, not theirs.
|
|
122
|
+
ragChunkMarkers: z.boolean().default(true),
|
|
123
|
+
|
|
124
|
+
// Headers applied to every emitted .md file. Defaults enforce canonicalization
|
|
125
|
+
// back to the HTML master to prevent duplicate-content issues.
|
|
126
|
+
responseHeaders: z
|
|
127
|
+
.object({
|
|
128
|
+
noindex: z.boolean().default(true), // X-Robots-Tag: noindex
|
|
129
|
+
canonicalToHtml: z.boolean().default(true), // Link: <html-url>; rel="canonical"
|
|
130
|
+
})
|
|
131
|
+
.default({}),
|
|
132
|
+
|
|
133
|
+
// Predicate to filter items at twin-emission time. Defaults to the access rule
|
|
134
|
+
// (no twin for members). Override for custom gating (e.g. exclude previews).
|
|
135
|
+
include: z.function().args(z.custom<ContentItem>()).returns(z.boolean()).optional(),
|
|
136
|
+
|
|
137
|
+
// When to treat static twins as stale vs live HTML.
|
|
138
|
+
// 'content-hash' — SHA-256 of body+description embedded in frontmatter; validation
|
|
139
|
+
// hook compares against the current contentProvider output at build time.
|
|
140
|
+
// 'dateModified' — compare frontmatter.dateModified against item.dateModified.
|
|
141
|
+
// 'none' — never check (only safe for immutable-after-publish content).
|
|
142
|
+
stalenessCheck: z.enum(['content-hash', 'dateModified', 'none']).default('content-hash'),
|
|
143
|
+
|
|
144
|
+
// Expose `POST /_seo/revalidate` for CMS webhooks to push fresh twins.
|
|
145
|
+
// Requires `freshLayer` + a `revalidateToken` >= 32 bytes (enforced at parse time).
|
|
146
|
+
onDemandRevalidation: z.boolean().default(false),
|
|
147
|
+
|
|
148
|
+
// Authentication token for the revalidate endpoint. Passed as `Authorization: Bearer`.
|
|
149
|
+
// Must be >=32 bytes when `onDemandRevalidation: true`; rejected at config-parse
|
|
150
|
+
// otherwise (a short token means a public unauthenticated DoS vector).
|
|
151
|
+
revalidateToken: z.string().min(32).optional(),
|
|
152
|
+
|
|
153
|
+
// Fresh-twin storage layer. Required when `mode !== 'static'` or `onDemandRevalidation: true`.
|
|
154
|
+
// Cloudflare Workers Assets are immutable at runtime, so post-build twin writes go here.
|
|
155
|
+
freshLayer: freshLayerSchema.optional(),
|
|
156
|
+
})
|
|
157
|
+
|
|
158
|
+
// ─── Flexible Sampling (Google paywall policy) ───
|
|
159
|
+
|
|
160
|
+
const flexibleSamplingSchema = z
|
|
161
|
+
.object({
|
|
162
|
+
enabled: z.boolean().default(false),
|
|
163
|
+
// Free-content strategy visible to anonymous users.
|
|
164
|
+
sampleMode: z.enum(['metered', 'lead-in', 'none']).default('lead-in'),
|
|
165
|
+
// Paragraphs of lead-in visible before the gate.
|
|
166
|
+
leadInParagraphs: z.number().int().min(0).default(2),
|
|
167
|
+
// Free articles per rolling 30 days for 'metered' mode. Consumer enforces count;
|
|
168
|
+
// this option surfaces the signal to the package.
|
|
169
|
+
meteredLimit: z.number().int().min(0).default(3),
|
|
170
|
+
})
|
|
171
|
+
.default({})
|
|
172
|
+
|
|
173
|
+
// ─── Crawler policy ───
|
|
174
|
+
|
|
175
|
+
const crawlerPolicySchema = z
|
|
176
|
+
.object({
|
|
177
|
+
// Block known LLM training crawlers at robots.txt AND via 403 at middleware on `access: 'members'`.
|
|
178
|
+
blockLlmTrainingCrawlers: z.boolean().default(true),
|
|
179
|
+
// User-directed LLM agents (ChatGPT-User, Claude-User, PerplexityBot-User) get the anonymous
|
|
180
|
+
// gated body, regardless of cookies. Never the full member body.
|
|
181
|
+
userDirectedAgentsSeePublicOnly: z.boolean().default(true),
|
|
182
|
+
// If true, verified Googlebot/Bingbot/Applebot receive full body on `access: 'members'`
|
|
183
|
+
// items with paywall JSON-LD markup (requires `flexibleSampling.enabled`).
|
|
184
|
+
verifiedSearchCrawlersSeeFullBody: z.boolean().default(true),
|
|
185
|
+
})
|
|
186
|
+
.default({})
|
|
187
|
+
|
|
188
|
+
// ─── Main schema ───
|
|
189
|
+
|
|
190
|
+
const siteUrlSchema = z.union([
|
|
191
|
+
z.string().url(),
|
|
192
|
+
z
|
|
193
|
+
.function()
|
|
194
|
+
.args()
|
|
195
|
+
.returns(z.string())
|
|
196
|
+
.transform((resolver) => validateSiteUrl(resolver(), 'site resolver return value')),
|
|
197
|
+
z.object({ envVar: z.string().min(1) }).strict(),
|
|
198
|
+
])
|
|
199
|
+
|
|
200
|
+
export const seoOptionsSchema = z.object({
|
|
201
|
+
// ─── Required ───
|
|
202
|
+
site: siteUrlSchema,
|
|
203
|
+
organization: z.object({
|
|
204
|
+
name: z.string(),
|
|
205
|
+
url: z.string().url().optional(),
|
|
206
|
+
logo: z.string().url(),
|
|
207
|
+
sameAs: z.array(z.string().url()).optional(),
|
|
208
|
+
}),
|
|
209
|
+
|
|
210
|
+
// ─── Schema type for articles ───
|
|
211
|
+
schemaType: z.enum(['Article', 'NewsArticle', 'BlogPosting']).default('Article'),
|
|
212
|
+
|
|
213
|
+
// ─── Feature flags ───
|
|
214
|
+
googleNews: z.boolean().default(false),
|
|
215
|
+
llmsTxt: z.boolean().default(false),
|
|
216
|
+
llmsFullTxt: z.boolean().default(false),
|
|
217
|
+
markdownSitemap: z.boolean().default(true),
|
|
218
|
+
rss: z.boolean().default(false),
|
|
219
|
+
|
|
220
|
+
// ─── AEO twins ───
|
|
221
|
+
// Boolean form = { mode: 'static' } when true, no twins emitted when false.
|
|
222
|
+
aeoTwins: z.union([z.boolean(), aeoTwinsObjectSchema]).default(false),
|
|
223
|
+
|
|
224
|
+
// ─── Multilingual ───
|
|
225
|
+
locales: z.array(localeSchema).optional(),
|
|
226
|
+
defaultLocale: z.string().optional(),
|
|
227
|
+
|
|
228
|
+
// ─── Podcast ───
|
|
229
|
+
podcast: podcastSchema.optional(),
|
|
230
|
+
|
|
231
|
+
// ─── Apple News ───
|
|
232
|
+
appleNews: appleNewsSchema.optional(),
|
|
233
|
+
|
|
234
|
+
// ─── Commerce ───
|
|
235
|
+
commerce: commerceSchema.optional(),
|
|
236
|
+
|
|
237
|
+
// ─── Audio narration ───
|
|
238
|
+
audioNarration: z
|
|
239
|
+
.object({
|
|
240
|
+
enabled: z.boolean().default(false),
|
|
241
|
+
speakableSelectors: z.array(z.string()).default(['.article-headline', '.article-body']),
|
|
242
|
+
narratorName: z.string().optional(),
|
|
243
|
+
asPodcastFeed: z.boolean().default(false),
|
|
244
|
+
podcastFeedPath: z.string().default('/listen.xml'),
|
|
245
|
+
})
|
|
246
|
+
.optional(),
|
|
247
|
+
|
|
248
|
+
// ─── Flexible Sampling ───
|
|
249
|
+
flexibleSampling: flexibleSamplingSchema,
|
|
250
|
+
|
|
251
|
+
// ─── Crawler policy ───
|
|
252
|
+
crawlerPolicy: crawlerPolicySchema,
|
|
253
|
+
|
|
254
|
+
// ─── Trailing slash policy ───
|
|
255
|
+
trailingSlash: z.enum(['always', 'never', 'ignore']).default('never'),
|
|
256
|
+
|
|
257
|
+
// ─── Content-Signal header (Cloudflare convention) ───
|
|
258
|
+
contentSignal: z
|
|
259
|
+
.object({
|
|
260
|
+
aiTrain: z.enum(['yes', 'no']).default('no'),
|
|
261
|
+
search: z.enum(['yes', 'no']).default('yes'),
|
|
262
|
+
aiInput: z.enum(['yes', 'no']).default('yes'),
|
|
263
|
+
})
|
|
264
|
+
.default({}),
|
|
265
|
+
|
|
266
|
+
// ─── Validation thresholds ───
|
|
267
|
+
validation: z
|
|
268
|
+
.object({
|
|
269
|
+
heroMinWidth: z.number().default(1200),
|
|
270
|
+
titleMaxLength: z.number().default(110),
|
|
271
|
+
descriptionMaxLength: z.number().default(160),
|
|
272
|
+
enabled: z.boolean().default(true),
|
|
273
|
+
})
|
|
274
|
+
.default({}),
|
|
275
|
+
|
|
276
|
+
// ─── robots.txt ───
|
|
277
|
+
robots: z
|
|
278
|
+
.object({
|
|
279
|
+
additionalRules: z
|
|
280
|
+
.array(
|
|
281
|
+
z.object({
|
|
282
|
+
userAgent: z.string(),
|
|
283
|
+
allow: z.array(z.string()).optional(),
|
|
284
|
+
disallow: z.array(z.string()).optional(),
|
|
285
|
+
}),
|
|
286
|
+
)
|
|
287
|
+
.optional(),
|
|
288
|
+
})
|
|
289
|
+
.default({}),
|
|
290
|
+
|
|
291
|
+
// ─── Default meta ───
|
|
292
|
+
defaults: z
|
|
293
|
+
.object({
|
|
294
|
+
titleSuffix: z.string().optional(),
|
|
295
|
+
defaultImage: z.string().url().optional(),
|
|
296
|
+
twitterSite: z.string().optional(),
|
|
297
|
+
twitterCardType: z.enum(['summary', 'summary_large_image']).default('summary_large_image'),
|
|
298
|
+
locale: z.string().default('en_US'),
|
|
299
|
+
})
|
|
300
|
+
.default({}),
|
|
301
|
+
|
|
302
|
+
// ─── Content data provider (DEPRECATED) ───
|
|
303
|
+
/**
|
|
304
|
+
* @deprecated Use `contentProviderModule` instead. Inline function providers
|
|
305
|
+
* do not survive the Cloudflare prerender Worker because the Astro
|
|
306
|
+
* Cloudflare adapter prerenders static routes in a separate Worker process
|
|
307
|
+
* that does not re-execute `astro.config.mjs`. The integration still accepts
|
|
308
|
+
* this form and emits a `logger.warn` at `astro:config:setup`; it will be
|
|
309
|
+
* removed in the next breaking release.
|
|
310
|
+
*
|
|
311
|
+
* See packages-docs/seo-contentprovidermodule.md for the migration.
|
|
312
|
+
*/
|
|
313
|
+
contentProvider: z.custom<ContentProvider>().optional(),
|
|
314
|
+
|
|
315
|
+
// ─── Content provider module path (for Cloudflare prerender workers) ───
|
|
316
|
+
// When the Astro Cloudflare adapter prerenders routes, it spawns a separate
|
|
317
|
+
// Worker process that does NOT re-execute astro.config.mjs — so a
|
|
318
|
+
// `contentProvider` function set in the main build-time Node process never
|
|
319
|
+
// reaches the prerender Worker. Injected prerender routes (specifically
|
|
320
|
+
// `/[...aeoPath].md`) would fail with "integration not initialized".
|
|
321
|
+
//
|
|
322
|
+
// The fix: pass the provider as a module specifier here. The vite plugin
|
|
323
|
+
// emits an import into its generated virtual module so every environment
|
|
324
|
+
// Vite bundles — main Worker AND prerender Worker — seeds state at load.
|
|
325
|
+
//
|
|
326
|
+
// Specifier form: a Vite-resolvable import path. Project-root-relative
|
|
327
|
+
// (e.g. '/src/lib/content-provider.ts') is the usual case. The module must
|
|
328
|
+
// default-export a function matching the ContentProvider signature.
|
|
329
|
+
contentProviderModule: z.string().optional(),
|
|
330
|
+
|
|
331
|
+
// ─── llms.txt content ───
|
|
332
|
+
llmsContent: z
|
|
333
|
+
.object({
|
|
334
|
+
description: z.string(),
|
|
335
|
+
sections: z.array(
|
|
336
|
+
z.object({
|
|
337
|
+
heading: z.string(),
|
|
338
|
+
links: z.array(
|
|
339
|
+
z.object({
|
|
340
|
+
title: z.string(),
|
|
341
|
+
url: z.string(),
|
|
342
|
+
description: z.string(),
|
|
343
|
+
}),
|
|
344
|
+
),
|
|
345
|
+
}),
|
|
346
|
+
),
|
|
347
|
+
optionalLinks: z.array(z.object({ title: z.string(), url: z.string() })).optional(),
|
|
348
|
+
})
|
|
349
|
+
.optional(),
|
|
350
|
+
})
|
|
351
|
+
|
|
352
|
+
// ─── Cross-field validation ───
|
|
353
|
+
//
|
|
354
|
+
// Enforces invariants Zod's type system can't express on its own. These are hard
|
|
355
|
+
// errors at parse time — not runtime surprises. Three invariants:
|
|
356
|
+
//
|
|
357
|
+
// 1. `flexibleSampling.enabled: true` + `aeoTwins.mode: 'static'` is rejected
|
|
358
|
+
// (static mode has no Worker to run the verified-Googlebot dispatch).
|
|
359
|
+
// 2. `mode !== 'static'` OR `onDemandRevalidation: true` requires `freshLayer`
|
|
360
|
+
// (the R2/KV binding that stores post-build twin writes).
|
|
361
|
+
// 3. `onDemandRevalidation: true` requires `revalidateToken >= 32` bytes
|
|
362
|
+
// (shorter means a public unauthenticated DoS vector; enforced here rather
|
|
363
|
+
// than discovered at runtime).
|
|
364
|
+
//
|
|
365
|
+
// A fourth implicit rejection: `mode: 'static'` + `onDemandRevalidation: true`
|
|
366
|
+
// is incoherent — static mode has no Worker to intercept revalidation writes,
|
|
367
|
+
// so R2 pushes never reach requests.
|
|
368
|
+
export const validatedSeoOptionsSchema = seoOptionsSchema.superRefine((opts, ctx) => {
|
|
369
|
+
// Normalize aeoTwins to the object form for cross-field checks.
|
|
370
|
+
const aeo =
|
|
371
|
+
typeof opts.aeoTwins === 'boolean'
|
|
372
|
+
? opts.aeoTwins
|
|
373
|
+
? { mode: 'static' as const, onDemandRevalidation: false }
|
|
374
|
+
: undefined
|
|
375
|
+
: opts.aeoTwins
|
|
376
|
+
|
|
377
|
+
if (!aeo) {
|
|
378
|
+
// aeoTwins: false (or unset default false) -> no further checks needed.
|
|
379
|
+
// Flexible Sampling without any AEO mode set is also fine (just no markdown twins).
|
|
380
|
+
return
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
const mode = aeo.mode
|
|
384
|
+
const onDemand = 'onDemandRevalidation' in aeo && aeo.onDemandRevalidation === true
|
|
385
|
+
const token = 'revalidateToken' in aeo ? aeo.revalidateToken : undefined
|
|
386
|
+
const freshLayer = 'freshLayer' in aeo ? aeo.freshLayer : undefined
|
|
387
|
+
|
|
388
|
+
// Invariant 1: Flexible Sampling cannot run on static mode.
|
|
389
|
+
if (opts.flexibleSampling?.enabled && mode === 'static') {
|
|
390
|
+
ctx.addIssue({
|
|
391
|
+
code: z.ZodIssueCode.custom,
|
|
392
|
+
path: ['flexibleSampling', 'enabled'],
|
|
393
|
+
message:
|
|
394
|
+
"flexibleSampling.enabled requires aeoTwins.mode 'middleware' or 'both'. " +
|
|
395
|
+
'Static mode has no Worker to run the crawler-class dispatch, so verified Googlebot ' +
|
|
396
|
+
'never receives the full-body paywall JSON-LD. Set mode to "middleware" or "both".',
|
|
397
|
+
})
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// Invariant 4: static + onDemandRevalidation is incoherent.
|
|
401
|
+
if (mode === 'static' && onDemand) {
|
|
402
|
+
ctx.addIssue({
|
|
403
|
+
code: z.ZodIssueCode.custom,
|
|
404
|
+
path: ['aeoTwins', 'onDemandRevalidation'],
|
|
405
|
+
message:
|
|
406
|
+
"onDemandRevalidation requires aeoTwins.mode 'middleware' or 'both' — static mode " +
|
|
407
|
+
'serves files directly from Cloudflare Assets with no Worker hop, so R2 writes from ' +
|
|
408
|
+
'the revalidate endpoint would never reach requests.',
|
|
409
|
+
})
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// Invariant 2: freshLayer required when mode !== 'static' or onDemandRevalidation.
|
|
413
|
+
const needsFreshLayer = (mode !== undefined && mode !== 'static') || onDemand
|
|
414
|
+
if (needsFreshLayer && !freshLayer) {
|
|
415
|
+
ctx.addIssue({
|
|
416
|
+
code: z.ZodIssueCode.custom,
|
|
417
|
+
path: ['aeoTwins', 'freshLayer'],
|
|
418
|
+
message:
|
|
419
|
+
`aeoTwins.freshLayer is required when mode='${mode ?? 'undefined'}' or ` +
|
|
420
|
+
'onDemandRevalidation=true. Configure an R2 or KV binding plus a Durable Object ' +
|
|
421
|
+
'binding in wrangler.toml and reference them via { bindingName, coordinatorBindingName }.',
|
|
422
|
+
})
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
// Invariant 3: onDemandRevalidation requires >=32 byte token.
|
|
426
|
+
if (onDemand && (!token || token.length < 32)) {
|
|
427
|
+
ctx.addIssue({
|
|
428
|
+
code: z.ZodIssueCode.custom,
|
|
429
|
+
path: ['aeoTwins', 'revalidateToken'],
|
|
430
|
+
message:
|
|
431
|
+
'onDemandRevalidation requires a revalidateToken of at least 32 characters. ' +
|
|
432
|
+
'Generate one with `openssl rand -base64 32` and store it in your environment.',
|
|
433
|
+
})
|
|
434
|
+
}
|
|
435
|
+
})
|
|
436
|
+
|
|
437
|
+
export type SeoOptions = z.input<typeof validatedSeoOptionsSchema>
|
|
438
|
+
export type ResolvedSeoOptions = z.output<typeof validatedSeoOptionsSchema>
|
|
439
|
+
export type SeoOptionsWithResolvedSite = Omit<ResolvedSeoOptions, 'site'> & { site: string }
|
|
440
|
+
|
|
441
|
+
/**
|
|
442
|
+
* Resolves the aeoTwins field to its object form for downstream consumers.
|
|
443
|
+
* `aeoTwins: true` -> { mode: 'static', ...defaults }
|
|
444
|
+
* `aeoTwins: false` -> undefined (no twins)
|
|
445
|
+
* `aeoTwins: { ... }` -> as-is with schema defaults applied
|
|
446
|
+
*/
|
|
447
|
+
export function resolveAeoTwins(
|
|
448
|
+
aeoTwins: ResolvedSeoOptions['aeoTwins'],
|
|
449
|
+
): z.output<typeof aeoTwinsObjectSchema> | undefined {
|
|
450
|
+
if (aeoTwins === false) return undefined
|
|
451
|
+
if (aeoTwins === true) {
|
|
452
|
+
// Apply schema defaults by parsing an empty object.
|
|
453
|
+
return aeoTwinsObjectSchema.parse({})
|
|
454
|
+
}
|
|
455
|
+
return aeoTwins
|
|
456
|
+
}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import 'virtual:growth-labs/seo/config'
|
|
2
|
+
// Dynamic prerender route that emits AEO markdown twins at build time.
|
|
3
|
+
//
|
|
4
|
+
// This replaces the previous `astro:build:done` filesystem-write approach. The
|
|
5
|
+
// problem with that approach: integration hooks run in raw Node ESM, where
|
|
6
|
+
// Vite-virtual imports like `astro:content` can't resolve. Consumers whose
|
|
7
|
+
// `contentProvider` transitively imported `astro:content` saw zero twin files
|
|
8
|
+
// emitted. Reference: https://github.com/withastro/astro/issues/...
|
|
9
|
+
//
|
|
10
|
+
// getStaticPaths runs INSIDE Astro's build pipeline, so Vite resolves virtual
|
|
11
|
+
// modules correctly. The route produces one prerendered file per twin URL:
|
|
12
|
+
// primary at `/article/midway.md` and, when enabled, summary at
|
|
13
|
+
// `/article/midway.md.summary.md`.
|
|
14
|
+
|
|
15
|
+
import type { APIRoute, GetStaticPaths } from 'astro'
|
|
16
|
+
import { getConfig, getContentProvider } from '../_internal/state.js'
|
|
17
|
+
import { resolveAeoTwins } from '../options.js'
|
|
18
|
+
import type { ContentItem } from '../types.js'
|
|
19
|
+
import { estimateTokenCount, generateAeoMarkdown } from '../utils/aeo.js'
|
|
20
|
+
import { generateSummaryTwin } from '../utils/aeo-summary.js'
|
|
21
|
+
import { forMarkdownTwin } from '../utils/content-filter.js'
|
|
22
|
+
import { computeContentHash } from '../utils/staleness.js'
|
|
23
|
+
|
|
24
|
+
// Prerender these URLs at build time so they're served as static files.
|
|
25
|
+
export const prerender = true
|
|
26
|
+
|
|
27
|
+
interface TwinPathProps {
|
|
28
|
+
body: string
|
|
29
|
+
kind: 'primary' | 'summary'
|
|
30
|
+
// Astro's GetStaticPaths Props type requires an index signature.
|
|
31
|
+
[key: string]: unknown
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function defaultTwin(url: string): string {
|
|
35
|
+
return `${url.replace(/\/+$/, '')}.md`
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function stripOrigin(url: string): string {
|
|
39
|
+
try {
|
|
40
|
+
return new URL(url).pathname
|
|
41
|
+
} catch {
|
|
42
|
+
return url.startsWith('/') ? url : `/${url}`
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export const getStaticPaths: GetStaticPaths = async () => {
|
|
47
|
+
const config = getConfig()
|
|
48
|
+
const contentProvider = getContentProvider()
|
|
49
|
+
const aeo = resolveAeoTwins(config.aeoTwins)
|
|
50
|
+
|
|
51
|
+
// Emit twins for static + both modes; middleware mode serves on-demand.
|
|
52
|
+
if (!aeo || aeo.mode === 'middleware' || !contentProvider) return []
|
|
53
|
+
|
|
54
|
+
let items: ContentItem[]
|
|
55
|
+
try {
|
|
56
|
+
// contentProvider resolves here because this function runs inside Astro's
|
|
57
|
+
// build pipeline — `astro:content` and other Vite virtual modules work.
|
|
58
|
+
items = await contentProvider({ type: 'articles' }, {} as never)
|
|
59
|
+
} catch {
|
|
60
|
+
// Don't fail the build; log nothing here because astro's getStaticPaths
|
|
61
|
+
// swallows console output. The consumer sees "zero paths emitted" which
|
|
62
|
+
// is the right signal to inspect their contentProvider.
|
|
63
|
+
return []
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const filtered = forMarkdownTwin(items).filter(aeo.include ?? (() => true))
|
|
67
|
+
const twinUrl = aeo.twinUrl ?? defaultTwin
|
|
68
|
+
const emitSummary = aeo.summaryTwin !== false
|
|
69
|
+
const ragMarkers = aeo.ragChunkMarkers !== false
|
|
70
|
+
const stalenessMode = aeo.stalenessCheck ?? 'content-hash'
|
|
71
|
+
|
|
72
|
+
const paths: Array<{ params: { aeoPath: string }; props: TwinPathProps }> = []
|
|
73
|
+
|
|
74
|
+
for (const item of filtered) {
|
|
75
|
+
const primaryUrl = twinUrl(item.url)
|
|
76
|
+
const primaryPath = stripOrigin(primaryUrl).replace(/^\//, '')
|
|
77
|
+
const body = item.description ?? ''
|
|
78
|
+
|
|
79
|
+
const contentHash =
|
|
80
|
+
stalenessMode === 'content-hash' ? await computeContentHash(item, body) : undefined
|
|
81
|
+
const summaryUrl = emitSummary ? `${primaryUrl}.summary.md` : undefined
|
|
82
|
+
|
|
83
|
+
// Primary twin.
|
|
84
|
+
paths.push({
|
|
85
|
+
params: { aeoPath: primaryPath.replace(/\.md$/, '') },
|
|
86
|
+
props: {
|
|
87
|
+
body: generateAeoMarkdown(item, {
|
|
88
|
+
publisherName: config.organization.name,
|
|
89
|
+
schemaType: config.schemaType,
|
|
90
|
+
content: body,
|
|
91
|
+
ragChunkMarkers: ragMarkers,
|
|
92
|
+
canonical: item.url,
|
|
93
|
+
twinUrl: primaryUrl,
|
|
94
|
+
summaryUrl,
|
|
95
|
+
contentHash,
|
|
96
|
+
}),
|
|
97
|
+
kind: 'primary',
|
|
98
|
+
},
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
// Summary twin.
|
|
102
|
+
if (emitSummary && summaryUrl) {
|
|
103
|
+
const summaryPath = stripOrigin(summaryUrl).replace(/^\//, '').replace(/\.md$/, '')
|
|
104
|
+
const summary = generateSummaryTwin(item, {
|
|
105
|
+
publisherName: config.organization.name,
|
|
106
|
+
schemaType: config.schemaType,
|
|
107
|
+
content: body,
|
|
108
|
+
fullUrl: primaryUrl,
|
|
109
|
+
})
|
|
110
|
+
paths.push({
|
|
111
|
+
params: { aeoPath: summaryPath },
|
|
112
|
+
props: { body: summary.markdown, kind: 'summary' },
|
|
113
|
+
})
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return paths
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
export const GET: APIRoute = async ({ props }) => {
|
|
121
|
+
const { body } = props as unknown as TwinPathProps
|
|
122
|
+
const tokenCount = estimateTokenCount(body)
|
|
123
|
+
return new Response(body, {
|
|
124
|
+
headers: {
|
|
125
|
+
'Content-Type': 'text/markdown; charset=utf-8',
|
|
126
|
+
'X-Robots-Tag': 'noindex',
|
|
127
|
+
'x-markdown-tokens': String(tokenCount),
|
|
128
|
+
},
|
|
129
|
+
})
|
|
130
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import 'virtual:growth-labs/seo/config'
|
|
2
|
+
import type { APIRoute } from 'astro'
|
|
3
|
+
import { getConfig, getContentProvider } from '../_internal/state.js'
|
|
4
|
+
import { resolveSeoConfig } from '../site-url.js'
|
|
5
|
+
import type { ContentItem } from '../types.js'
|
|
6
|
+
import { generateAppleNewsRss } from '../utils/apple-news-rss.js'
|
|
7
|
+
|
|
8
|
+
export const prerender = false
|
|
9
|
+
|
|
10
|
+
export const GET: APIRoute = async (context) => {
|
|
11
|
+
const config = resolveSeoConfig(getConfig())
|
|
12
|
+
const contentProvider = getContentProvider()
|
|
13
|
+
|
|
14
|
+
if (!config.appleNews?.enabled) {
|
|
15
|
+
return new Response('Apple News not enabled', { status: 404 })
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
let articles: ContentItem[] = []
|
|
19
|
+
if (contentProvider) {
|
|
20
|
+
try {
|
|
21
|
+
articles = await contentProvider({ type: 'articles' }, context)
|
|
22
|
+
} catch {}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// content-filter.forAppleNews is applied inside generateAppleNewsRss.
|
|
26
|
+
const xml = generateAppleNewsRss({
|
|
27
|
+
items: articles,
|
|
28
|
+
options: config,
|
|
29
|
+
// Consumers can supply an HTML resolver by extending the config surface.
|
|
30
|
+
// For v1, pass through; description-only is acceptable when fullContent: false.
|
|
31
|
+
})
|
|
32
|
+
|
|
33
|
+
return new Response(xml, {
|
|
34
|
+
headers: { 'Content-Type': 'application/rss+xml; charset=utf-8' },
|
|
35
|
+
})
|
|
36
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import 'virtual:growth-labs/seo/config'
|
|
2
|
+
import type { APIRoute } from 'astro'
|
|
3
|
+
import { getConfig, getContentProvider } from '../_internal/state.js'
|
|
4
|
+
import type { ContentItem } from '../types.js'
|
|
5
|
+
import { generateLlmsFull } from '../utils/llms-full.js'
|
|
6
|
+
|
|
7
|
+
export const prerender = false
|
|
8
|
+
|
|
9
|
+
export const GET: APIRoute = async (context) => {
|
|
10
|
+
const config = getConfig()
|
|
11
|
+
const contentProvider = getContentProvider()
|
|
12
|
+
|
|
13
|
+
if (!config.llmsFullTxt) {
|
|
14
|
+
return new Response('llms-full.txt disabled', { status: 404 })
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
let articles: ContentItem[] = []
|
|
18
|
+
if (contentProvider) {
|
|
19
|
+
try {
|
|
20
|
+
articles = await contentProvider({ type: 'articles' }, context)
|
|
21
|
+
} catch {}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// Members items excluded by forLlmsFull inside the generator.
|
|
25
|
+
const text = generateLlmsFull({
|
|
26
|
+
items: articles,
|
|
27
|
+
siteName: config.organization.name,
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
return new Response(text, {
|
|
31
|
+
headers: {
|
|
32
|
+
'Content-Type': 'text/plain; charset=utf-8',
|
|
33
|
+
'X-Robots-Tag': 'noindex',
|
|
34
|
+
},
|
|
35
|
+
})
|
|
36
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import 'virtual:growth-labs/seo/config'
|
|
2
|
+
import type { APIRoute } from 'astro'
|
|
3
|
+
import { getConfig } from '../_internal/state.js'
|
|
4
|
+
import { generateLlmsTxt } from '../utils/llms.js'
|
|
5
|
+
|
|
6
|
+
export const prerender = false
|
|
7
|
+
|
|
8
|
+
export const GET: APIRoute = async () => {
|
|
9
|
+
const config = getConfig()
|
|
10
|
+
const txt = generateLlmsTxt(config)
|
|
11
|
+
|
|
12
|
+
return new Response(txt, {
|
|
13
|
+
headers: { 'Content-Type': 'text/plain; charset=utf-8' },
|
|
14
|
+
})
|
|
15
|
+
}
|