@growth-labs/seo 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/dist/utils/validation.d.ts.map +1 -1
  2. package/dist/utils/validation.js +22 -0
  3. package/dist/utils/validation.js.map +1 -1
  4. package/package.json +9 -5
  5. package/src/_internal/state.ts +26 -0
  6. package/src/bindings.ts +146 -0
  7. package/src/cron/prune-aeo-r2.ts +140 -0
  8. package/src/durable-objects/aeo-revalidation-coord.ts +246 -0
  9. package/src/index.ts +380 -0
  10. package/src/middleware/seo.ts +350 -0
  11. package/src/options.ts +456 -0
  12. package/src/routes/aeo-twin.ts +130 -0
  13. package/src/routes/apple-news.ts +36 -0
  14. package/src/routes/llms-full.ts +36 -0
  15. package/src/routes/llms.ts +15 -0
  16. package/src/routes/podcast-narration.ts +45 -0
  17. package/src/routes/podcast.ts +27 -0
  18. package/src/routes/revalidate.ts +298 -0
  19. package/src/routes/robots.ts +21 -0
  20. package/src/routes/rss.ts +29 -0
  21. package/src/routes/sitemap-articles.ts +25 -0
  22. package/src/routes/sitemap-index.ts +89 -0
  23. package/src/routes/sitemap-markdown.ts +39 -0
  24. package/src/routes/sitemap-pages.ts +24 -0
  25. package/src/routes/sitemap-products.ts +24 -0
  26. package/src/routes/sitemap-videos.ts +24 -0
  27. package/src/runtime.ts +17 -0
  28. package/src/site-url-core.ts +71 -0
  29. package/src/site-url.ts +21 -0
  30. package/src/types.ts +166 -0
  31. package/src/utils/aeo-summary.ts +176 -0
  32. package/src/utils/aeo-twin-emitter.ts +173 -0
  33. package/src/utils/aeo.ts +223 -0
  34. package/src/utils/apple-news-anf.ts +163 -0
  35. package/src/utils/apple-news-rss.ts +136 -0
  36. package/src/utils/content-filter.ts +87 -0
  37. package/src/utils/crawler-class.ts +155 -0
  38. package/src/utils/define-content-provider.ts +65 -0
  39. package/src/utils/effective-auth.ts +44 -0
  40. package/src/utils/fcrdns.ts +269 -0
  41. package/src/utils/fresh-layer.ts +175 -0
  42. package/src/utils/hreflang.ts +26 -0
  43. package/src/utils/index.ts +91 -0
  44. package/src/utils/json-ld/article.ts +120 -0
  45. package/src/utils/json-ld/audio.ts +32 -0
  46. package/src/utils/json-ld/breadcrumb.ts +28 -0
  47. package/src/utils/json-ld/faq.ts +18 -0
  48. package/src/utils/json-ld/howto.ts +23 -0
  49. package/src/utils/json-ld/index.ts +12 -0
  50. package/src/utils/json-ld/item-list.ts +26 -0
  51. package/src/utils/json-ld/organization.ts +42 -0
  52. package/src/utils/json-ld/person.ts +25 -0
  53. package/src/utils/json-ld/product.ts +155 -0
  54. package/src/utils/json-ld/video.ts +20 -0
  55. package/src/utils/json-ld/website.ts +27 -0
  56. package/src/utils/llms-full.ts +90 -0
  57. package/src/utils/llms.ts +45 -0
  58. package/src/utils/meta.ts +184 -0
  59. package/src/utils/podcast.ts +112 -0
  60. package/src/utils/robots.ts +47 -0
  61. package/src/utils/rss.ts +64 -0
  62. package/src/utils/seo-head.ts +81 -0
  63. package/src/utils/sitemap-markdown.ts +80 -0
  64. package/src/utils/sitemap.ts +169 -0
  65. package/src/utils/staleness.ts +61 -0
  66. package/src/utils/validation.ts +308 -0
  67. package/src/virtual.d.ts +8 -0
  68. package/src/vite-plugin.ts +66 -0
package/src/options.ts ADDED
@@ -0,0 +1,456 @@
1
+ import { z } from 'zod'
2
+ import { validateSiteUrl } from './site-url-core.js'
3
+ import type { ContentItem, ContentProvider } from './types.js'
4
+
5
+ // ─── Locale ───
6
+
7
+ const localeSchema = z.object({
8
+ lang: z.string(),
9
+ region: z.string().optional(),
10
+ urlPrefix: z.string(),
11
+ domain: z.string().optional(),
12
+ })
13
+
14
+ // ─── Podcast ───
15
+
16
+ const podcastSchema = z.object({
17
+ enabled: z.boolean().default(false),
18
+ title: z.string(),
19
+ description: z.string(),
20
+ author: z.string(),
21
+ email: z.string().email(),
22
+ image: z.string().url(),
23
+ category: z.string(),
24
+ subcategory: z.string().optional(),
25
+ language: z.string().default('en'),
26
+ explicit: z.boolean().default(false),
27
+ feedPath: z.string().default('/podcast.xml'),
28
+ type: z.enum(['episodic', 'serial']).default('episodic'),
29
+ copyright: z.string().optional(),
30
+ })
31
+
32
+ // ─── Commerce ───
33
+
34
+ const commerceSchema = z.object({
35
+ enabled: z.boolean().default(false),
36
+ returnPolicy: z
37
+ .object({
38
+ applicableCountry: z.string(),
39
+ returnPolicyCategory: z.enum([
40
+ 'MerchantReturnFiniteReturnWindow',
41
+ 'MerchantReturnNotPermitted',
42
+ 'MerchantReturnUnlimitedWindow',
43
+ ]),
44
+ merchantReturnDays: z.number().optional(),
45
+ returnMethod: z.enum(['ReturnByMail', 'ReturnInStore', 'ReturnAtKiosk']).optional(),
46
+ returnFees: z.enum(['FreeReturn', 'ReturnFeesCustomerResponsibility']).optional(),
47
+ })
48
+ .optional(),
49
+ currency: z.string().default('USD'),
50
+ })
51
+
52
+ // ─── Apple News ───
53
+
54
+ const appleNewsSchema = z.object({
55
+ enabled: z.boolean().default(false),
56
+ channelName: z.string(),
57
+ channelId: z.string().optional(),
58
+ feedPath: z.string().default('/apple-news.xml'),
59
+ // Default publishable state applied to items that don't set their own apple-news-publishable.
60
+ // 'yes' opts every public article in by default.
61
+ defaultPublishable: z.enum(['yes', 'no']).default('yes'),
62
+ // Default Apple News section. Per-article sections can override.
63
+ defaultSection: z.string().optional(),
64
+ // If true, include full article body in content:encoded (recommended).
65
+ fullContent: z.boolean().default(true),
66
+ // Emit <link rel="alternate" type="application/rss+xml" title="Apple News"> in <head>.
67
+ discoveryLink: z.boolean().default(true),
68
+ })
69
+
70
+ // ─── Fresh-twin storage layer ───
71
+
72
+ const freshLayerSchema = z.object({
73
+ // Binding name in wrangler.toml (R2 bucket or KV namespace).
74
+ bindingName: z.string().default('AEO_TWINS'),
75
+ // 'r2' (preferred) or 'kv'. Autodetected from the binding shape at runtime;
76
+ // the explicit hint avoids ambiguity.
77
+ type: z.enum(['r2', 'kv']).default('r2'),
78
+ // Durable Object class binding for revalidation coordination. A single DO class
79
+ // handles rate limit, per-slug lock, and idempotency; one instance per request-hostname.
80
+ coordinatorBindingName: z.string().default('AEO_REVALIDATION_COORD'),
81
+ // Days to retain old-version R2 prefixes before the scheduled prune Worker deletes them.
82
+ retentionDays: z.number().int().min(1).max(365).default(7),
83
+ })
84
+
85
+ // ─── AEO twins (object form) ───
86
+ //
87
+ // The boolean form `aeoTwins: true` is equivalent to `{ mode: 'static' }`.
88
+ // The object form exposes full configuration.
89
+ const aeoTwinsObjectSchema = z.object({
90
+ // 'static' — emit prerendered .md files at build time (default, fastest serve path)
91
+ // 'middleware' — serve .md via Accept: text/markdown on same URL (requires SSR)
92
+ // 'both' — emit static twins AND run middleware negotiation (freshness on cache-miss
93
+ // via on-demand revalidation)
94
+ //
95
+ // STALENESS WARNING: `static` mode is frozen at build time. If HTML is editable
96
+ // post-publish (CMS corrections, breaking-news updates), the .md twin will drift.
97
+ // Answer engines that compare the two may downgrade trust.
98
+ // - Immutable-after-publish content -> `static` is safe.
99
+ // - Mutable content -> `both` + `onDemandRevalidation` + CMS webhook.
100
+ // - Gated content with Flexible Sampling -> `middleware` (rejected with `static`).
101
+ mode: z.enum(['static', 'middleware', 'both']).default('static'),
102
+
103
+ // Maps an article URL to its PRIMARY markdown twin URL (singular). The primary URL is
104
+ // the canonical twin used in the Link: rel="alternate" header, sitemap-markdown.xml,
105
+ // and summary-twin naming (<primary>.summary.md).
106
+ // Default: appends '.md' (e.g. /article/midway -> /article/midway.md).
107
+ twinUrl: z.function().args(z.string()).returns(z.string()).optional(),
108
+
109
+ // Alias suffixes that redirect (301) to the primary twin URL via middleware.
110
+ // Only active when `mode !== 'static'` (pure-static aliases were dropped in v7;
111
+ // they created a weak fallback when Cloudflare Assets can't serve 301s).
112
+ // Default: ['/index.md'] so `/article/midway/index.md` aliases to `/article/midway.md`
113
+ // under middleware-backed modes.
114
+ twinAliases: z.array(z.string()).default(['/index.md']),
115
+
116
+ // Emit a short "summary twin" (~300 tokens) at <primary-twin-url>.summary.md
117
+ // for context-limited agents (Perplexity, Apple Intelligence, long-convo ChatGPT).
118
+ summaryTwin: z.boolean().default(true),
119
+
120
+ // Emit semantic chunk markers (<!-- aeo:section --> comments) in .md bodies so
121
+ // RAG retrievers split on our boundaries, not theirs.
122
+ ragChunkMarkers: z.boolean().default(true),
123
+
124
+ // Headers applied to every emitted .md file. Defaults enforce canonicalization
125
+ // back to the HTML master to prevent duplicate-content issues.
126
+ responseHeaders: z
127
+ .object({
128
+ noindex: z.boolean().default(true), // X-Robots-Tag: noindex
129
+ canonicalToHtml: z.boolean().default(true), // Link: <html-url>; rel="canonical"
130
+ })
131
+ .default({}),
132
+
133
+ // Predicate to filter items at twin-emission time. Defaults to the access rule
134
+ // (no twin for members). Override for custom gating (e.g. exclude previews).
135
+ include: z.function().args(z.custom<ContentItem>()).returns(z.boolean()).optional(),
136
+
137
+ // When to treat static twins as stale vs live HTML.
138
+ // 'content-hash' — SHA-256 of body+description embedded in frontmatter; validation
139
+ // hook compares against the current contentProvider output at build time.
140
+ // 'dateModified' — compare frontmatter.dateModified against item.dateModified.
141
+ // 'none' — never check (only safe for immutable-after-publish content).
142
+ stalenessCheck: z.enum(['content-hash', 'dateModified', 'none']).default('content-hash'),
143
+
144
+ // Expose `POST /_seo/revalidate` for CMS webhooks to push fresh twins.
145
+ // Requires `freshLayer` + a `revalidateToken` >= 32 bytes (enforced at parse time).
146
+ onDemandRevalidation: z.boolean().default(false),
147
+
148
+ // Authentication token for the revalidate endpoint. Passed as `Authorization: Bearer`.
149
+ // Must be >=32 bytes when `onDemandRevalidation: true`; rejected at config-parse
150
+ // otherwise (a short token means a public unauthenticated DoS vector).
151
+ revalidateToken: z.string().min(32).optional(),
152
+
153
+ // Fresh-twin storage layer. Required when `mode !== 'static'` or `onDemandRevalidation: true`.
154
+ // Cloudflare Workers Assets are immutable at runtime, so post-build twin writes go here.
155
+ freshLayer: freshLayerSchema.optional(),
156
+ })
157
+
158
+ // ─── Flexible Sampling (Google paywall policy) ───
159
+
160
+ const flexibleSamplingSchema = z
161
+ .object({
162
+ enabled: z.boolean().default(false),
163
+ // Free-content strategy visible to anonymous users.
164
+ sampleMode: z.enum(['metered', 'lead-in', 'none']).default('lead-in'),
165
+ // Paragraphs of lead-in visible before the gate.
166
+ leadInParagraphs: z.number().int().min(0).default(2),
167
+ // Free articles per rolling 30 days for 'metered' mode. Consumer enforces count;
168
+ // this option surfaces the signal to the package.
169
+ meteredLimit: z.number().int().min(0).default(3),
170
+ })
171
+ .default({})
172
+
173
+ // ─── Crawler policy ───
174
+
175
+ const crawlerPolicySchema = z
176
+ .object({
177
+ // Block known LLM training crawlers at robots.txt AND via 403 at middleware on `access: 'members'`.
178
+ blockLlmTrainingCrawlers: z.boolean().default(true),
179
+ // User-directed LLM agents (ChatGPT-User, Claude-User, PerplexityBot-User) get the anonymous
180
+ // gated body, regardless of cookies. Never the full member body.
181
+ userDirectedAgentsSeePublicOnly: z.boolean().default(true),
182
+ // If true, verified Googlebot/Bingbot/Applebot receive full body on `access: 'members'`
183
+ // items with paywall JSON-LD markup (requires `flexibleSampling.enabled`).
184
+ verifiedSearchCrawlersSeeFullBody: z.boolean().default(true),
185
+ })
186
+ .default({})
187
+
188
+ // ─── Main schema ───
189
+
190
+ const siteUrlSchema = z.union([
191
+ z.string().url(),
192
+ z
193
+ .function()
194
+ .args()
195
+ .returns(z.string())
196
+ .transform((resolver) => validateSiteUrl(resolver(), 'site resolver return value')),
197
+ z.object({ envVar: z.string().min(1) }).strict(),
198
+ ])
199
+
200
+ export const seoOptionsSchema = z.object({
201
+ // ─── Required ───
202
+ site: siteUrlSchema,
203
+ organization: z.object({
204
+ name: z.string(),
205
+ url: z.string().url().optional(),
206
+ logo: z.string().url(),
207
+ sameAs: z.array(z.string().url()).optional(),
208
+ }),
209
+
210
+ // ─── Schema type for articles ───
211
+ schemaType: z.enum(['Article', 'NewsArticle', 'BlogPosting']).default('Article'),
212
+
213
+ // ─── Feature flags ───
214
+ googleNews: z.boolean().default(false),
215
+ llmsTxt: z.boolean().default(false),
216
+ llmsFullTxt: z.boolean().default(false),
217
+ markdownSitemap: z.boolean().default(true),
218
+ rss: z.boolean().default(false),
219
+
220
+ // ─── AEO twins ───
221
+ // Boolean form = { mode: 'static' } when true, no twins emitted when false.
222
+ aeoTwins: z.union([z.boolean(), aeoTwinsObjectSchema]).default(false),
223
+
224
+ // ─── Multilingual ───
225
+ locales: z.array(localeSchema).optional(),
226
+ defaultLocale: z.string().optional(),
227
+
228
+ // ─── Podcast ───
229
+ podcast: podcastSchema.optional(),
230
+
231
+ // ─── Apple News ───
232
+ appleNews: appleNewsSchema.optional(),
233
+
234
+ // ─── Commerce ───
235
+ commerce: commerceSchema.optional(),
236
+
237
+ // ─── Audio narration ───
238
+ audioNarration: z
239
+ .object({
240
+ enabled: z.boolean().default(false),
241
+ speakableSelectors: z.array(z.string()).default(['.article-headline', '.article-body']),
242
+ narratorName: z.string().optional(),
243
+ asPodcastFeed: z.boolean().default(false),
244
+ podcastFeedPath: z.string().default('/listen.xml'),
245
+ })
246
+ .optional(),
247
+
248
+ // ─── Flexible Sampling ───
249
+ flexibleSampling: flexibleSamplingSchema,
250
+
251
+ // ─── Crawler policy ───
252
+ crawlerPolicy: crawlerPolicySchema,
253
+
254
+ // ─── Trailing slash policy ───
255
+ trailingSlash: z.enum(['always', 'never', 'ignore']).default('never'),
256
+
257
+ // ─── Content-Signal header (Cloudflare convention) ───
258
+ contentSignal: z
259
+ .object({
260
+ aiTrain: z.enum(['yes', 'no']).default('no'),
261
+ search: z.enum(['yes', 'no']).default('yes'),
262
+ aiInput: z.enum(['yes', 'no']).default('yes'),
263
+ })
264
+ .default({}),
265
+
266
+ // ─── Validation thresholds ───
267
+ validation: z
268
+ .object({
269
+ heroMinWidth: z.number().default(1200),
270
+ titleMaxLength: z.number().default(110),
271
+ descriptionMaxLength: z.number().default(160),
272
+ enabled: z.boolean().default(true),
273
+ })
274
+ .default({}),
275
+
276
+ // ─── robots.txt ───
277
+ robots: z
278
+ .object({
279
+ additionalRules: z
280
+ .array(
281
+ z.object({
282
+ userAgent: z.string(),
283
+ allow: z.array(z.string()).optional(),
284
+ disallow: z.array(z.string()).optional(),
285
+ }),
286
+ )
287
+ .optional(),
288
+ })
289
+ .default({}),
290
+
291
+ // ─── Default meta ───
292
+ defaults: z
293
+ .object({
294
+ titleSuffix: z.string().optional(),
295
+ defaultImage: z.string().url().optional(),
296
+ twitterSite: z.string().optional(),
297
+ twitterCardType: z.enum(['summary', 'summary_large_image']).default('summary_large_image'),
298
+ locale: z.string().default('en_US'),
299
+ })
300
+ .default({}),
301
+
302
+ // ─── Content data provider (DEPRECATED) ───
303
+ /**
304
+ * @deprecated Use `contentProviderModule` instead. Inline function providers
305
+ * do not survive the Cloudflare prerender Worker because the Astro
306
+ * Cloudflare adapter prerenders static routes in a separate Worker process
307
+ * that does not re-execute `astro.config.mjs`. The integration still accepts
308
+ * this form and emits a `logger.warn` at `astro:config:setup`; it will be
309
+ * removed in the next breaking release.
310
+ *
311
+ * See packages-docs/seo-contentprovidermodule.md for the migration.
312
+ */
313
+ contentProvider: z.custom<ContentProvider>().optional(),
314
+
315
+ // ─── Content provider module path (for Cloudflare prerender workers) ───
316
+ // When the Astro Cloudflare adapter prerenders routes, it spawns a separate
317
+ // Worker process that does NOT re-execute astro.config.mjs — so a
318
+ // `contentProvider` function set in the main build-time Node process never
319
+ // reaches the prerender Worker. Injected prerender routes (specifically
320
+ // `/[...aeoPath].md`) would fail with "integration not initialized".
321
+ //
322
+ // The fix: pass the provider as a module specifier here. The vite plugin
323
+ // emits an import into its generated virtual module so every environment
324
+ // Vite bundles — main Worker AND prerender Worker — seeds state at load.
325
+ //
326
+ // Specifier form: a Vite-resolvable import path. Project-root-relative
327
+ // (e.g. '/src/lib/content-provider.ts') is the usual case. The module must
328
+ // default-export a function matching the ContentProvider signature.
329
+ contentProviderModule: z.string().optional(),
330
+
331
+ // ─── llms.txt content ───
332
+ llmsContent: z
333
+ .object({
334
+ description: z.string(),
335
+ sections: z.array(
336
+ z.object({
337
+ heading: z.string(),
338
+ links: z.array(
339
+ z.object({
340
+ title: z.string(),
341
+ url: z.string(),
342
+ description: z.string(),
343
+ }),
344
+ ),
345
+ }),
346
+ ),
347
+ optionalLinks: z.array(z.object({ title: z.string(), url: z.string() })).optional(),
348
+ })
349
+ .optional(),
350
+ })
351
+
352
+ // ─── Cross-field validation ───
353
+ //
354
+ // Enforces invariants Zod's type system can't express on its own. These are hard
355
+ // errors at parse time — not runtime surprises. Three invariants:
356
+ //
357
+ // 1. `flexibleSampling.enabled: true` + `aeoTwins.mode: 'static'` is rejected
358
+ // (static mode has no Worker to run the verified-Googlebot dispatch).
359
+ // 2. `mode !== 'static'` OR `onDemandRevalidation: true` requires `freshLayer`
360
+ // (the R2/KV binding that stores post-build twin writes).
361
+ // 3. `onDemandRevalidation: true` requires `revalidateToken >= 32` bytes
362
+ // (shorter means a public unauthenticated DoS vector; enforced here rather
363
+ // than discovered at runtime).
364
+ //
365
+ // A fourth implicit rejection: `mode: 'static'` + `onDemandRevalidation: true`
366
+ // is incoherent — static mode has no Worker to intercept revalidation writes,
367
+ // so R2 pushes never reach requests.
368
+ export const validatedSeoOptionsSchema = seoOptionsSchema.superRefine((opts, ctx) => {
369
+ // Normalize aeoTwins to the object form for cross-field checks.
370
+ const aeo =
371
+ typeof opts.aeoTwins === 'boolean'
372
+ ? opts.aeoTwins
373
+ ? { mode: 'static' as const, onDemandRevalidation: false }
374
+ : undefined
375
+ : opts.aeoTwins
376
+
377
+ if (!aeo) {
378
+ // aeoTwins: false (or unset default false) -> no further checks needed.
379
+ // Flexible Sampling without any AEO mode set is also fine (just no markdown twins).
380
+ return
381
+ }
382
+
383
+ const mode = aeo.mode
384
+ const onDemand = 'onDemandRevalidation' in aeo && aeo.onDemandRevalidation === true
385
+ const token = 'revalidateToken' in aeo ? aeo.revalidateToken : undefined
386
+ const freshLayer = 'freshLayer' in aeo ? aeo.freshLayer : undefined
387
+
388
+ // Invariant 1: Flexible Sampling cannot run on static mode.
389
+ if (opts.flexibleSampling?.enabled && mode === 'static') {
390
+ ctx.addIssue({
391
+ code: z.ZodIssueCode.custom,
392
+ path: ['flexibleSampling', 'enabled'],
393
+ message:
394
+ "flexibleSampling.enabled requires aeoTwins.mode 'middleware' or 'both'. " +
395
+ 'Static mode has no Worker to run the crawler-class dispatch, so verified Googlebot ' +
396
+ 'never receives the full-body paywall JSON-LD. Set mode to "middleware" or "both".',
397
+ })
398
+ }
399
+
400
+ // Invariant 4: static + onDemandRevalidation is incoherent.
401
+ if (mode === 'static' && onDemand) {
402
+ ctx.addIssue({
403
+ code: z.ZodIssueCode.custom,
404
+ path: ['aeoTwins', 'onDemandRevalidation'],
405
+ message:
406
+ "onDemandRevalidation requires aeoTwins.mode 'middleware' or 'both' — static mode " +
407
+ 'serves files directly from Cloudflare Assets with no Worker hop, so R2 writes from ' +
408
+ 'the revalidate endpoint would never reach requests.',
409
+ })
410
+ }
411
+
412
+ // Invariant 2: freshLayer required when mode !== 'static' or onDemandRevalidation.
413
+ const needsFreshLayer = (mode !== undefined && mode !== 'static') || onDemand
414
+ if (needsFreshLayer && !freshLayer) {
415
+ ctx.addIssue({
416
+ code: z.ZodIssueCode.custom,
417
+ path: ['aeoTwins', 'freshLayer'],
418
+ message:
419
+ `aeoTwins.freshLayer is required when mode='${mode ?? 'undefined'}' or ` +
420
+ 'onDemandRevalidation=true. Configure an R2 or KV binding plus a Durable Object ' +
421
+ 'binding in wrangler.toml and reference them via { bindingName, coordinatorBindingName }.',
422
+ })
423
+ }
424
+
425
+ // Invariant 3: onDemandRevalidation requires >=32 byte token.
426
+ if (onDemand && (!token || token.length < 32)) {
427
+ ctx.addIssue({
428
+ code: z.ZodIssueCode.custom,
429
+ path: ['aeoTwins', 'revalidateToken'],
430
+ message:
431
+ 'onDemandRevalidation requires a revalidateToken of at least 32 characters. ' +
432
+ 'Generate one with `openssl rand -base64 32` and store it in your environment.',
433
+ })
434
+ }
435
+ })
436
+
437
+ export type SeoOptions = z.input<typeof validatedSeoOptionsSchema>
438
+ export type ResolvedSeoOptions = z.output<typeof validatedSeoOptionsSchema>
439
+ export type SeoOptionsWithResolvedSite = Omit<ResolvedSeoOptions, 'site'> & { site: string }
440
+
441
+ /**
442
+ * Resolves the aeoTwins field to its object form for downstream consumers.
443
+ * `aeoTwins: true` -> { mode: 'static', ...defaults }
444
+ * `aeoTwins: false` -> undefined (no twins)
445
+ * `aeoTwins: { ... }` -> as-is with schema defaults applied
446
+ */
447
+ export function resolveAeoTwins(
448
+ aeoTwins: ResolvedSeoOptions['aeoTwins'],
449
+ ): z.output<typeof aeoTwinsObjectSchema> | undefined {
450
+ if (aeoTwins === false) return undefined
451
+ if (aeoTwins === true) {
452
+ // Apply schema defaults by parsing an empty object.
453
+ return aeoTwinsObjectSchema.parse({})
454
+ }
455
+ return aeoTwins
456
+ }
@@ -0,0 +1,130 @@
1
+ import 'virtual:growth-labs/seo/config'
2
+ // Dynamic prerender route that emits AEO markdown twins at build time.
3
+ //
4
+ // This replaces the previous `astro:build:done` filesystem-write approach. The
5
+ // problem with that approach: integration hooks run in raw Node ESM, where
6
+ // Vite-virtual imports like `astro:content` can't resolve. Consumers whose
7
+ // `contentProvider` transitively imported `astro:content` saw zero twin files
8
+ // emitted. Reference: https://github.com/withastro/astro/issues/...
9
+ //
10
+ // getStaticPaths runs INSIDE Astro's build pipeline, so Vite resolves virtual
11
+ // modules correctly. The route produces one prerendered file per twin URL:
12
+ // primary at `/article/midway.md` and, when enabled, summary at
13
+ // `/article/midway.md.summary.md`.
14
+
15
+ import type { APIRoute, GetStaticPaths } from 'astro'
16
+ import { getConfig, getContentProvider } from '../_internal/state.js'
17
+ import { resolveAeoTwins } from '../options.js'
18
+ import type { ContentItem } from '../types.js'
19
+ import { estimateTokenCount, generateAeoMarkdown } from '../utils/aeo.js'
20
+ import { generateSummaryTwin } from '../utils/aeo-summary.js'
21
+ import { forMarkdownTwin } from '../utils/content-filter.js'
22
+ import { computeContentHash } from '../utils/staleness.js'
23
+
24
+ // Prerender these URLs at build time so they're served as static files.
25
+ export const prerender = true
26
+
27
+ interface TwinPathProps {
28
+ body: string
29
+ kind: 'primary' | 'summary'
30
+ // Astro's GetStaticPaths Props type requires an index signature.
31
+ [key: string]: unknown
32
+ }
33
+
34
+ function defaultTwin(url: string): string {
35
+ return `${url.replace(/\/+$/, '')}.md`
36
+ }
37
+
38
+ function stripOrigin(url: string): string {
39
+ try {
40
+ return new URL(url).pathname
41
+ } catch {
42
+ return url.startsWith('/') ? url : `/${url}`
43
+ }
44
+ }
45
+
46
+ export const getStaticPaths: GetStaticPaths = async () => {
47
+ const config = getConfig()
48
+ const contentProvider = getContentProvider()
49
+ const aeo = resolveAeoTwins(config.aeoTwins)
50
+
51
+ // Emit twins for static + both modes; middleware mode serves on-demand.
52
+ if (!aeo || aeo.mode === 'middleware' || !contentProvider) return []
53
+
54
+ let items: ContentItem[]
55
+ try {
56
+ // contentProvider resolves here because this function runs inside Astro's
57
+ // build pipeline — `astro:content` and other Vite virtual modules work.
58
+ items = await contentProvider({ type: 'articles' }, {} as never)
59
+ } catch {
60
+ // Don't fail the build; log nothing here because astro's getStaticPaths
61
+ // swallows console output. The consumer sees "zero paths emitted" which
62
+ // is the right signal to inspect their contentProvider.
63
+ return []
64
+ }
65
+
66
+ const filtered = forMarkdownTwin(items).filter(aeo.include ?? (() => true))
67
+ const twinUrl = aeo.twinUrl ?? defaultTwin
68
+ const emitSummary = aeo.summaryTwin !== false
69
+ const ragMarkers = aeo.ragChunkMarkers !== false
70
+ const stalenessMode = aeo.stalenessCheck ?? 'content-hash'
71
+
72
+ const paths: Array<{ params: { aeoPath: string }; props: TwinPathProps }> = []
73
+
74
+ for (const item of filtered) {
75
+ const primaryUrl = twinUrl(item.url)
76
+ const primaryPath = stripOrigin(primaryUrl).replace(/^\//, '')
77
+ const body = item.description ?? ''
78
+
79
+ const contentHash =
80
+ stalenessMode === 'content-hash' ? await computeContentHash(item, body) : undefined
81
+ const summaryUrl = emitSummary ? `${primaryUrl}.summary.md` : undefined
82
+
83
+ // Primary twin.
84
+ paths.push({
85
+ params: { aeoPath: primaryPath.replace(/\.md$/, '') },
86
+ props: {
87
+ body: generateAeoMarkdown(item, {
88
+ publisherName: config.organization.name,
89
+ schemaType: config.schemaType,
90
+ content: body,
91
+ ragChunkMarkers: ragMarkers,
92
+ canonical: item.url,
93
+ twinUrl: primaryUrl,
94
+ summaryUrl,
95
+ contentHash,
96
+ }),
97
+ kind: 'primary',
98
+ },
99
+ })
100
+
101
+ // Summary twin.
102
+ if (emitSummary && summaryUrl) {
103
+ const summaryPath = stripOrigin(summaryUrl).replace(/^\//, '').replace(/\.md$/, '')
104
+ const summary = generateSummaryTwin(item, {
105
+ publisherName: config.organization.name,
106
+ schemaType: config.schemaType,
107
+ content: body,
108
+ fullUrl: primaryUrl,
109
+ })
110
+ paths.push({
111
+ params: { aeoPath: summaryPath },
112
+ props: { body: summary.markdown, kind: 'summary' },
113
+ })
114
+ }
115
+ }
116
+
117
+ return paths
118
+ }
119
+
120
+ export const GET: APIRoute = async ({ props }) => {
121
+ const { body } = props as unknown as TwinPathProps
122
+ const tokenCount = estimateTokenCount(body)
123
+ return new Response(body, {
124
+ headers: {
125
+ 'Content-Type': 'text/markdown; charset=utf-8',
126
+ 'X-Robots-Tag': 'noindex',
127
+ 'x-markdown-tokens': String(tokenCount),
128
+ },
129
+ })
130
+ }
@@ -0,0 +1,36 @@
1
+ import 'virtual:growth-labs/seo/config'
2
+ import type { APIRoute } from 'astro'
3
+ import { getConfig, getContentProvider } from '../_internal/state.js'
4
+ import { resolveSeoConfig } from '../site-url.js'
5
+ import type { ContentItem } from '../types.js'
6
+ import { generateAppleNewsRss } from '../utils/apple-news-rss.js'
7
+
8
+ export const prerender = false
9
+
10
+ export const GET: APIRoute = async (context) => {
11
+ const config = resolveSeoConfig(getConfig())
12
+ const contentProvider = getContentProvider()
13
+
14
+ if (!config.appleNews?.enabled) {
15
+ return new Response('Apple News not enabled', { status: 404 })
16
+ }
17
+
18
+ let articles: ContentItem[] = []
19
+ if (contentProvider) {
20
+ try {
21
+ articles = await contentProvider({ type: 'articles' }, context)
22
+ } catch {}
23
+ }
24
+
25
+ // content-filter.forAppleNews is applied inside generateAppleNewsRss.
26
+ const xml = generateAppleNewsRss({
27
+ items: articles,
28
+ options: config,
29
+ // Consumers can supply an HTML resolver by extending the config surface.
30
+ // For v1, pass through; description-only is acceptable when fullContent: false.
31
+ })
32
+
33
+ return new Response(xml, {
34
+ headers: { 'Content-Type': 'application/rss+xml; charset=utf-8' },
35
+ })
36
+ }
@@ -0,0 +1,36 @@
1
+ import 'virtual:growth-labs/seo/config'
2
+ import type { APIRoute } from 'astro'
3
+ import { getConfig, getContentProvider } from '../_internal/state.js'
4
+ import type { ContentItem } from '../types.js'
5
+ import { generateLlmsFull } from '../utils/llms-full.js'
6
+
7
+ export const prerender = false
8
+
9
+ export const GET: APIRoute = async (context) => {
10
+ const config = getConfig()
11
+ const contentProvider = getContentProvider()
12
+
13
+ if (!config.llmsFullTxt) {
14
+ return new Response('llms-full.txt disabled', { status: 404 })
15
+ }
16
+
17
+ let articles: ContentItem[] = []
18
+ if (contentProvider) {
19
+ try {
20
+ articles = await contentProvider({ type: 'articles' }, context)
21
+ } catch {}
22
+ }
23
+
24
+ // Members items excluded by forLlmsFull inside the generator.
25
+ const text = generateLlmsFull({
26
+ items: articles,
27
+ siteName: config.organization.name,
28
+ })
29
+
30
+ return new Response(text, {
31
+ headers: {
32
+ 'Content-Type': 'text/plain; charset=utf-8',
33
+ 'X-Robots-Tag': 'noindex',
34
+ },
35
+ })
36
+ }
@@ -0,0 +1,15 @@
1
+ import 'virtual:growth-labs/seo/config'
2
+ import type { APIRoute } from 'astro'
3
+ import { getConfig } from '../_internal/state.js'
4
+ import { generateLlmsTxt } from '../utils/llms.js'
5
+
6
+ export const prerender = false
7
+
8
+ export const GET: APIRoute = async () => {
9
+ const config = getConfig()
10
+ const txt = generateLlmsTxt(config)
11
+
12
+ return new Response(txt, {
13
+ headers: { 'Content-Type': 'text/plain; charset=utf-8' },
14
+ })
15
+ }