@growth-labs/seo 0.4.2 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +56 -37
- package/dist/index.js.map +1 -1
- package/dist/middleware/seo.d.ts.map +1 -1
- package/dist/middleware/seo.js +8 -3
- package/dist/middleware/seo.js.map +1 -1
- package/dist/options.d.ts +140 -0
- package/dist/options.d.ts.map +1 -1
- package/dist/options.js +18 -0
- package/dist/options.js.map +1 -1
- package/dist/routes/aeo-twin.d.ts.map +1 -1
- package/dist/routes/aeo-twin.js +33 -2
- package/dist/routes/aeo-twin.js.map +1 -1
- package/dist/routes/sitemap-index.d.ts.map +1 -1
- package/dist/routes/sitemap-index.js +45 -32
- package/dist/routes/sitemap-index.js.map +1 -1
- package/dist/types.d.ts +1 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/utils/define-content-provider.d.ts +1 -0
- package/dist/utils/define-content-provider.d.ts.map +1 -1
- package/dist/utils/define-content-provider.js.map +1 -1
- package/dist/utils/json-ld/video.d.ts.map +1 -1
- package/dist/utils/json-ld/video.js +18 -0
- package/dist/utils/json-ld/video.js.map +1 -1
- package/dist/utils/sitemap.d.ts.map +1 -1
- package/dist/utils/sitemap.js +1 -1
- package/dist/utils/sitemap.js.map +1 -1
- package/dist/utils/validation.d.ts +5 -0
- package/dist/utils/validation.d.ts.map +1 -1
- package/dist/utils/validation.js +78 -4
- package/dist/utils/validation.js.map +1 -1
- package/package.json +1 -1
- package/src/index.ts +57 -37
- package/src/middleware/seo.ts +8 -3
- package/src/options.ts +21 -0
- package/src/routes/aeo-twin.ts +38 -2
- package/src/routes/sitemap-index.ts +48 -35
- package/src/types.ts +1 -1
- package/src/utils/define-content-provider.ts +1 -0
- package/src/utils/json-ld/video.ts +20 -0
- package/src/utils/sitemap.ts +4 -2
- package/src/utils/validation.ts +95 -4
package/src/index.ts
CHANGED
|
@@ -89,53 +89,65 @@ export default function seo(userOptions: SeoOptions): AstroIntegration {
|
|
|
89
89
|
|
|
90
90
|
const injected: string[] = []
|
|
91
91
|
const skipped: Array<{ pattern: string; reason: string }> = []
|
|
92
|
+
const injectedRoutes = options.injectedRoutes
|
|
92
93
|
|
|
93
|
-
|
|
94
|
-
if (providerWired) {
|
|
95
|
-
injectRoute({
|
|
96
|
-
pattern: SITEMAP_INDEX_PATH,
|
|
97
|
-
entrypoint: resolveEntrypoint('./routes/sitemap-index'),
|
|
98
|
-
prerender: false,
|
|
99
|
-
})
|
|
100
|
-
injected.push(SITEMAP_INDEX_PATH)
|
|
94
|
+
const injectSeoRoute = (pattern: string, entrypoint: string) => {
|
|
101
95
|
injectRoute({
|
|
102
|
-
pattern
|
|
103
|
-
entrypoint: resolveEntrypoint(
|
|
96
|
+
pattern,
|
|
97
|
+
entrypoint: resolveEntrypoint(entrypoint),
|
|
104
98
|
prerender: false,
|
|
105
99
|
})
|
|
106
|
-
injected.push(
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
injectRoute({
|
|
114
|
-
pattern: '/sitemap-videos.xml',
|
|
115
|
-
entrypoint: resolveEntrypoint('./routes/sitemap-videos'),
|
|
116
|
-
prerender: false,
|
|
100
|
+
injected.push(pattern)
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
const skipDisabledRoute = (pattern: string, optionName: string) => {
|
|
104
|
+
skipped.push({
|
|
105
|
+
pattern,
|
|
106
|
+
reason: `injectedRoutes.${optionName} is false`,
|
|
117
107
|
})
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// ─── Sitemaps (provider-wired gated) ───
|
|
111
|
+
if (providerWired) {
|
|
112
|
+
if (injectedRoutes.sitemapIndex) {
|
|
113
|
+
injectSeoRoute(SITEMAP_INDEX_PATH, './routes/sitemap-index')
|
|
114
|
+
} else {
|
|
115
|
+
skipDisabledRoute(SITEMAP_INDEX_PATH, 'sitemapIndex')
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (injectedRoutes.sitemapArticles) {
|
|
119
|
+
injectSeoRoute('/sitemap-articles.xml', './routes/sitemap-articles')
|
|
120
|
+
} else {
|
|
121
|
+
skipDisabledRoute('/sitemap-articles.xml', 'sitemapArticles')
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
if (injectedRoutes.sitemapPages) {
|
|
125
|
+
injectSeoRoute('/sitemap-pages.xml', './routes/sitemap-pages')
|
|
126
|
+
} else {
|
|
127
|
+
skipDisabledRoute('/sitemap-pages.xml', 'sitemapPages')
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
if (injectedRoutes.sitemapVideos) {
|
|
131
|
+
injectSeoRoute('/sitemap-videos.xml', './routes/sitemap-videos')
|
|
132
|
+
} else {
|
|
133
|
+
skipDisabledRoute('/sitemap-videos.xml', 'sitemapVideos')
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if (!injectedRoutes.sitemapProducts) {
|
|
137
|
+
skipDisabledRoute('/sitemap-products.xml', 'sitemapProducts')
|
|
138
|
+
} else if (options.commerce?.enabled) {
|
|
139
|
+
injectSeoRoute('/sitemap-products.xml', './routes/sitemap-products')
|
|
126
140
|
} else {
|
|
127
141
|
skipped.push({
|
|
128
142
|
pattern: '/sitemap-products.xml',
|
|
129
143
|
reason: 'commerce.enabled is false',
|
|
130
144
|
})
|
|
131
145
|
}
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
})
|
|
138
|
-
injected.push('/sitemap-markdown.xml')
|
|
146
|
+
|
|
147
|
+
if (!injectedRoutes.sitemapMarkdown) {
|
|
148
|
+
skipDisabledRoute('/sitemap-markdown.xml', 'sitemapMarkdown')
|
|
149
|
+
} else if (options.markdownSitemap && aeo && aeo.mode !== 'middleware') {
|
|
150
|
+
injectSeoRoute('/sitemap-markdown.xml', './routes/sitemap-markdown')
|
|
139
151
|
} else if (options.markdownSitemap) {
|
|
140
152
|
skipped.push({
|
|
141
153
|
pattern: '/sitemap-markdown.xml',
|
|
@@ -313,12 +325,17 @@ export default function seo(userOptions: SeoOptions): AstroIntegration {
|
|
|
313
325
|
|
|
314
326
|
for (const file of htmlFiles) {
|
|
315
327
|
const html = readFileSync(file, 'utf-8')
|
|
328
|
+
const relPath = file.replace(outDir, '')
|
|
316
329
|
const result = validatePage(html, {
|
|
317
330
|
titleMaxLength: options.validation.titleMaxLength,
|
|
318
331
|
descriptionMaxLength: options.validation.descriptionMaxLength,
|
|
319
332
|
heroMinWidth: options.validation.heroMinWidth,
|
|
333
|
+
pagePath: relPath,
|
|
334
|
+
requireH1: options.validation.requireH1,
|
|
335
|
+
requireHeroImage: options.validation.requireHeroImage,
|
|
336
|
+
requireArticleSchema: options.validation.requireArticleSchema,
|
|
337
|
+
requireMaxImagePreviewLarge: options.validation.requireMaxImagePreviewLarge,
|
|
320
338
|
})
|
|
321
|
-
const relPath = file.replace(outDir, '')
|
|
322
339
|
for (const error of result.errors) {
|
|
323
340
|
logger.error(`${relPath}: ${error}`)
|
|
324
341
|
errorCount++
|
|
@@ -352,6 +369,9 @@ export default function seo(userOptions: SeoOptions): AstroIntegration {
|
|
|
352
369
|
|
|
353
370
|
if (errorCount || warningCount) {
|
|
354
371
|
logger.info(`SEO validation: ${errorCount} errors, ${warningCount} warnings`)
|
|
372
|
+
if (errorCount > 0) {
|
|
373
|
+
throw new Error(`[@growth-labs/seo] SEO validation failed with ${errorCount} errors`)
|
|
374
|
+
}
|
|
355
375
|
} else {
|
|
356
376
|
logger.info('SEO validation: all checks passed')
|
|
357
377
|
}
|
package/src/middleware/seo.ts
CHANGED
|
@@ -109,13 +109,18 @@ export function createSeoMiddleware(
|
|
|
109
109
|
// - CF-Connecting-IP: FCrDNS is keyed on it
|
|
110
110
|
appendVaryHeaders(newHeaders)
|
|
111
111
|
|
|
112
|
-
// Link alternate header on HTML responses when aeoTwins enabled
|
|
113
|
-
//
|
|
112
|
+
// Link alternate header on HTML responses when aeoTwins enabled.
|
|
113
|
+
//
|
|
114
|
+
// Emit only when contentProvider returns an item for this URL AND that item
|
|
115
|
+
// is not members-gated. Previously this also emitted Link headers for URLs
|
|
116
|
+
// the contentProvider did not know about (hubs, region pages, the home page,
|
|
117
|
+
// etc.) — those URLs have no twin at rest, so the advertised .md target 404s.
|
|
118
|
+
// Gating to items-only matches actually-emitted twin paths.
|
|
114
119
|
if (aeo && isHtmlResponse(response)) {
|
|
115
120
|
const linkItem = contentProvider
|
|
116
121
|
? await findItemForPath(contentProvider, context, url.pathname)
|
|
117
122
|
: undefined
|
|
118
|
-
if (
|
|
123
|
+
if (linkItem && linkItem.access !== 'members') {
|
|
119
124
|
const target =
|
|
120
125
|
aeo.mode === 'static' || aeo.mode === 'both' ? twinUrlFor(url, aeo) : url.toString()
|
|
121
126
|
const linkValue = `<${target}>; rel="alternate"; type="text/markdown"`
|
package/src/options.ts
CHANGED
|
@@ -185,6 +185,19 @@ const crawlerPolicySchema = z
|
|
|
185
185
|
})
|
|
186
186
|
.default({})
|
|
187
187
|
|
|
188
|
+
// ─── Injected route ownership ───
|
|
189
|
+
|
|
190
|
+
const injectedRoutesSchema = z
|
|
191
|
+
.object({
|
|
192
|
+
sitemapIndex: z.boolean().default(true),
|
|
193
|
+
sitemapArticles: z.boolean().default(true),
|
|
194
|
+
sitemapPages: z.boolean().default(true),
|
|
195
|
+
sitemapVideos: z.boolean().default(true),
|
|
196
|
+
sitemapProducts: z.boolean().default(true),
|
|
197
|
+
sitemapMarkdown: z.boolean().default(true),
|
|
198
|
+
})
|
|
199
|
+
.default({})
|
|
200
|
+
|
|
188
201
|
// ─── Main schema ───
|
|
189
202
|
|
|
190
203
|
const siteUrlSchema = z.union([
|
|
@@ -217,6 +230,10 @@ export const seoOptionsSchema = z.object({
|
|
|
217
230
|
markdownSitemap: z.boolean().default(true),
|
|
218
231
|
rss: z.boolean().default(false),
|
|
219
232
|
|
|
233
|
+
// ─── Injected route ownership ───
|
|
234
|
+
// Set a route false when the consumer application owns that public path.
|
|
235
|
+
injectedRoutes: injectedRoutesSchema,
|
|
236
|
+
|
|
220
237
|
// ─── AEO twins ───
|
|
221
238
|
// Boolean form = { mode: 'static' } when true, no twins emitted when false.
|
|
222
239
|
aeoTwins: z.union([z.boolean(), aeoTwinsObjectSchema]).default(false),
|
|
@@ -269,6 +286,10 @@ export const seoOptionsSchema = z.object({
|
|
|
269
286
|
heroMinWidth: z.number().default(1200),
|
|
270
287
|
titleMaxLength: z.number().default(110),
|
|
271
288
|
descriptionMaxLength: z.number().default(160),
|
|
289
|
+
requireH1: z.boolean().default(true),
|
|
290
|
+
requireHeroImage: z.boolean().default(true),
|
|
291
|
+
requireArticleSchema: z.boolean().default(true),
|
|
292
|
+
requireMaxImagePreviewLarge: z.boolean().default(true),
|
|
272
293
|
enabled: z.boolean().default(true),
|
|
273
294
|
})
|
|
274
295
|
.default({}),
|
package/src/routes/aeo-twin.ts
CHANGED
|
@@ -51,11 +51,35 @@ export const getStaticPaths: GetStaticPaths = async () => {
|
|
|
51
51
|
// Emit twins for static + both modes; middleware mode serves on-demand.
|
|
52
52
|
if (!aeo || aeo.mode === 'middleware' || !contentProvider) return []
|
|
53
53
|
|
|
54
|
+
// Twin generation now covers articles + pages + videos + podcasts. Hubs,
|
|
55
|
+
// region pillars, series indexes, and the home page typically come through
|
|
56
|
+
// `type: 'pages'`. Each consumer's contentProvider decides which surfaces
|
|
57
|
+
// belong to which type; if a type is unknown it returns [] and we skip.
|
|
58
|
+
const TWIN_TYPES = ['articles', 'pages', 'videos', 'podcasts'] as const
|
|
59
|
+
|
|
54
60
|
let items: ContentItem[]
|
|
55
61
|
try {
|
|
56
62
|
// contentProvider resolves here because this function runs inside Astro's
|
|
57
63
|
// build pipeline — `astro:content` and other Vite virtual modules work.
|
|
58
|
-
|
|
64
|
+
const batches = await Promise.all(
|
|
65
|
+
TWIN_TYPES.map((type) =>
|
|
66
|
+
(contentProvider({ type }, {} as never) as Promise<ContentItem[]>).catch(
|
|
67
|
+
() => [] as ContentItem[],
|
|
68
|
+
),
|
|
69
|
+
),
|
|
70
|
+
)
|
|
71
|
+
// Deduplicate by URL — a contentProvider might surface the same item under
|
|
72
|
+
// multiple types (rare, but cheap to defend against).
|
|
73
|
+
const seen = new Set<string>()
|
|
74
|
+
items = []
|
|
75
|
+
for (const batch of batches) {
|
|
76
|
+
for (const it of batch) {
|
|
77
|
+
if (!it || typeof it.url !== 'string') continue
|
|
78
|
+
if (seen.has(it.url)) continue
|
|
79
|
+
seen.add(it.url)
|
|
80
|
+
items.push(it)
|
|
81
|
+
}
|
|
82
|
+
}
|
|
59
83
|
} catch {
|
|
60
84
|
// Don't fail the build; log nothing here because astro's getStaticPaths
|
|
61
85
|
// swallows console output. The consumer sees "zero paths emitted" which
|
|
@@ -74,7 +98,19 @@ export const getStaticPaths: GetStaticPaths = async () => {
|
|
|
74
98
|
for (const item of filtered) {
|
|
75
99
|
const primaryUrl = twinUrl(item.url)
|
|
76
100
|
const primaryPath = stripOrigin(primaryUrl).replace(/^\//, '')
|
|
77
|
-
|
|
101
|
+
// Paywall-aware twin body. For free items the twin echoes the description
|
|
102
|
+
// (consumer-provided summary). For gated items we never emit the full body
|
|
103
|
+
// — twins are public regardless of the HTML paywall — so we keep the same
|
|
104
|
+
// description-only body and append an explicit "Full analysis is gated"
|
|
105
|
+
// footer that AI engines can cite without claiming the paid content as
|
|
106
|
+
// theirs. The HTML page carries the structured-data paywall markup
|
|
107
|
+
// (`isAccessibleForFree: false` + `hasPart`); the twin's textual boundary
|
|
108
|
+
// must agree with that signal.
|
|
109
|
+
const isGated = item.access === 'members' || item.isAccessibleForFree === false
|
|
110
|
+
const sample = item.description ?? ''
|
|
111
|
+
const body = isGated
|
|
112
|
+
? `${sample}${sample ? '\n\n' : ''}---\n\nFull analysis is behind the paywall. Read the gated piece at ${item.url}.`
|
|
113
|
+
: sample
|
|
78
114
|
|
|
79
115
|
const contentHash =
|
|
80
116
|
stalenessMode === 'content-hash' ? await computeContentHash(item, body) : undefined
|
|
@@ -14,6 +14,7 @@ export const GET: APIRoute = async (context) => {
|
|
|
14
14
|
const sitemaps: SitemapEntry[] = []
|
|
15
15
|
|
|
16
16
|
const { site } = config
|
|
17
|
+
const injectedRoutes = config.injectedRoutes
|
|
17
18
|
|
|
18
19
|
// Fetch articles lastmod if possible
|
|
19
20
|
let articlesLastmod: string | undefined
|
|
@@ -22,43 +23,49 @@ export const GET: APIRoute = async (context) => {
|
|
|
22
23
|
let productsLastmod: string | undefined
|
|
23
24
|
|
|
24
25
|
if (contentProvider) {
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
26
|
+
if (injectedRoutes.sitemapArticles) {
|
|
27
|
+
try {
|
|
28
|
+
const articles = await contentProvider({ type: 'articles' }, context as any)
|
|
29
|
+
if (articles.length > 0) {
|
|
30
|
+
const dates = articles
|
|
31
|
+
.map((a) => a.dateModified ?? a.datePublished)
|
|
32
|
+
.filter(Boolean) as string[]
|
|
33
|
+
if (dates.length > 0) {
|
|
34
|
+
articlesLastmod = dates.sort().at(-1)
|
|
35
|
+
}
|
|
33
36
|
}
|
|
34
|
-
}
|
|
35
|
-
}
|
|
37
|
+
} catch {}
|
|
38
|
+
}
|
|
36
39
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
40
|
+
if (injectedRoutes.sitemapPages) {
|
|
41
|
+
try {
|
|
42
|
+
const pages = await contentProvider({ type: 'pages' }, context as any)
|
|
43
|
+
if (pages.length > 0) {
|
|
44
|
+
const dates = pages
|
|
45
|
+
.map((p) => p.dateModified ?? p.datePublished)
|
|
46
|
+
.filter(Boolean) as string[]
|
|
47
|
+
if (dates.length > 0) {
|
|
48
|
+
pagesLastmod = dates.sort().at(-1)
|
|
49
|
+
}
|
|
45
50
|
}
|
|
46
|
-
}
|
|
47
|
-
}
|
|
51
|
+
} catch {}
|
|
52
|
+
}
|
|
48
53
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
54
|
+
if (injectedRoutes.sitemapVideos) {
|
|
55
|
+
try {
|
|
56
|
+
const videos = await contentProvider({ type: 'videos' }, context as any)
|
|
57
|
+
if (videos.length > 0) {
|
|
58
|
+
const dates = videos
|
|
59
|
+
.map((v) => v.dateModified ?? v.datePublished)
|
|
60
|
+
.filter(Boolean) as string[]
|
|
61
|
+
if (dates.length > 0) {
|
|
62
|
+
videosLastmod = dates.sort().at(-1)
|
|
63
|
+
}
|
|
57
64
|
}
|
|
58
|
-
}
|
|
59
|
-
}
|
|
65
|
+
} catch {}
|
|
66
|
+
}
|
|
60
67
|
|
|
61
|
-
if (config.commerce?.enabled) {
|
|
68
|
+
if (config.commerce?.enabled && injectedRoutes.sitemapProducts) {
|
|
62
69
|
try {
|
|
63
70
|
const products = await contentProvider({ type: 'products' }, context as any)
|
|
64
71
|
if (products.length > 0) {
|
|
@@ -73,11 +80,17 @@ export const GET: APIRoute = async (context) => {
|
|
|
73
80
|
}
|
|
74
81
|
}
|
|
75
82
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
83
|
+
if (injectedRoutes.sitemapArticles) {
|
|
84
|
+
sitemaps.push({ loc: `${site}/sitemap-articles.xml`, lastmod: articlesLastmod })
|
|
85
|
+
}
|
|
86
|
+
if (injectedRoutes.sitemapPages) {
|
|
87
|
+
sitemaps.push({ loc: `${site}/sitemap-pages.xml`, lastmod: pagesLastmod })
|
|
88
|
+
}
|
|
89
|
+
if (injectedRoutes.sitemapVideos) {
|
|
90
|
+
sitemaps.push({ loc: `${site}/sitemap-videos.xml`, lastmod: videosLastmod })
|
|
91
|
+
}
|
|
79
92
|
|
|
80
|
-
if (config.commerce?.enabled) {
|
|
93
|
+
if (config.commerce?.enabled && injectedRoutes.sitemapProducts) {
|
|
81
94
|
sitemaps.push({ loc: `${site}/sitemap-products.xml`, lastmod: productsLastmod })
|
|
82
95
|
}
|
|
83
96
|
|
package/src/types.ts
CHANGED
|
@@ -117,7 +117,7 @@ export interface ContentItem {
|
|
|
117
117
|
|
|
118
118
|
// ─── Content provider ───
|
|
119
119
|
|
|
120
|
-
export type ContentType = 'articles' | 'pages' | 'videos' | 'products' | 'authors'
|
|
120
|
+
export type ContentType = 'articles' | 'pages' | 'videos' | 'podcasts' | 'products' | 'authors'
|
|
121
121
|
|
|
122
122
|
export interface ContentProviderParams {
|
|
123
123
|
type: ContentType
|
|
@@ -16,6 +16,7 @@ export interface ContentItemByType {
|
|
|
16
16
|
articles: ContentItem
|
|
17
17
|
pages: ContentItem
|
|
18
18
|
videos: ContentItem & { video: NonNullable<ContentItem['video']> }
|
|
19
|
+
podcasts: ContentItem
|
|
19
20
|
products: ContentItem & { product: NonNullable<ContentItem['product']> }
|
|
20
21
|
authors: ContentItem
|
|
21
22
|
}
|
|
@@ -3,12 +3,22 @@ import type { ContentItem, JsonLdObject } from '../../types.js'
|
|
|
3
3
|
export function generateVideoJsonLd(item: ContentItem): JsonLdObject {
|
|
4
4
|
const video = item.video!
|
|
5
5
|
|
|
6
|
+
// Derive isAccessibleForFree: explicit field wins, otherwise derive from access.
|
|
7
|
+
// Matches the article emitter so paywalled videos carry the same Google-recognized
|
|
8
|
+
// markup as paywalled articles. Without this, Googlebot has no way to know the
|
|
9
|
+
// gated playback is intentional and may flag the page as cloaking.
|
|
10
|
+
const isAccessibleForFree = item.isAccessibleForFree ?? item.access !== 'members'
|
|
11
|
+
|
|
6
12
|
const result: JsonLdObject = {
|
|
7
13
|
'@context': 'https://schema.org',
|
|
8
14
|
'@type': 'VideoObject',
|
|
9
15
|
name: item.title,
|
|
10
16
|
thumbnailUrl: video.thumbnailUrl,
|
|
11
17
|
duration: video.duration,
|
|
18
|
+
// Google requires the string form 'True'/'False' for Rich Results when paywall
|
|
19
|
+
// markup is emitted. Same convention as the article schema.
|
|
20
|
+
// https://developers.google.com/search/docs/appearance/structured-data/paywalled-content
|
|
21
|
+
isAccessibleForFree: isAccessibleForFree ? 'True' : 'False',
|
|
12
22
|
}
|
|
13
23
|
|
|
14
24
|
if (item.description) result.description = item.description
|
|
@@ -16,5 +26,15 @@ export function generateVideoJsonLd(item: ContentItem): JsonLdObject {
|
|
|
16
26
|
if (video.embedUrl) result.embedUrl = video.embedUrl
|
|
17
27
|
if (item.datePublished) result.uploadDate = item.datePublished
|
|
18
28
|
|
|
29
|
+
// hasPart paywall marker — emitted whenever the item is gated and a cssSelector
|
|
30
|
+
// is configured. Mirrors the article emitter behaviour.
|
|
31
|
+
if (!isAccessibleForFree && item.paywallCssSelector) {
|
|
32
|
+
result.hasPart = {
|
|
33
|
+
'@type': 'WebPageElement',
|
|
34
|
+
isAccessibleForFree: 'False',
|
|
35
|
+
cssSelector: item.paywallCssSelector,
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
19
39
|
return result
|
|
20
40
|
}
|
package/src/utils/sitemap.ts
CHANGED
|
@@ -105,9 +105,11 @@ ${entries}
|
|
|
105
105
|
|
|
106
106
|
export function generateVideoSitemap(items: ContentItem[]): string {
|
|
107
107
|
const entries = items
|
|
108
|
-
.filter((item)
|
|
108
|
+
.filter((item): item is ContentItem & { video: NonNullable<ContentItem['video']> } =>
|
|
109
|
+
Boolean(item.video && (item.video.contentUrl || item.video.embedUrl)),
|
|
110
|
+
)
|
|
109
111
|
.map((item) => {
|
|
110
|
-
const v = item.video
|
|
112
|
+
const v = item.video
|
|
111
113
|
const contentUrlTag = v.contentUrl
|
|
112
114
|
? `\n <video:content_loc>${escapeXml(v.contentUrl)}</video:content_loc>`
|
|
113
115
|
: ''
|
package/src/utils/validation.ts
CHANGED
|
@@ -7,6 +7,11 @@ export interface PageValidationOptions {
|
|
|
7
7
|
titleMaxLength: number
|
|
8
8
|
descriptionMaxLength: number
|
|
9
9
|
heroMinWidth?: number
|
|
10
|
+
pagePath?: string
|
|
11
|
+
requireH1?: boolean
|
|
12
|
+
requireHeroImage?: boolean
|
|
13
|
+
requireArticleSchema?: boolean
|
|
14
|
+
requireMaxImagePreviewLarge?: boolean
|
|
10
15
|
}
|
|
11
16
|
|
|
12
17
|
/**
|
|
@@ -139,7 +144,7 @@ export function validatePage(html: string, options: PageValidationOptions): Vali
|
|
|
139
144
|
html.match(/<meta\s[^>]*property=["']og:image["'][^>]*>/i) ??
|
|
140
145
|
html.match(/<meta\s[^>]*property=og:image[^>]*>/i)
|
|
141
146
|
if (!ogImageMatch) {
|
|
142
|
-
warnings
|
|
147
|
+
pushIssue(options.requireHeroImage, errors, warnings, 'Missing hero image og:image meta tag')
|
|
143
148
|
}
|
|
144
149
|
|
|
145
150
|
// og:image:width check against heroMinWidth
|
|
@@ -154,18 +159,27 @@ export function validatePage(html: string, options: PageValidationOptions): Vali
|
|
|
154
159
|
if (widthContentMatch) {
|
|
155
160
|
const width = Number(widthContentMatch[1])
|
|
156
161
|
if (!Number.isNaN(width) && width < options.heroMinWidth) {
|
|
157
|
-
|
|
162
|
+
pushIssue(
|
|
163
|
+
options.requireHeroImage,
|
|
164
|
+
errors,
|
|
165
|
+
warnings,
|
|
166
|
+
`Hero image width ${width}px is below minimum ${options.heroMinWidth}px`,
|
|
167
|
+
)
|
|
158
168
|
}
|
|
169
|
+
} else if (options.requireHeroImage) {
|
|
170
|
+
errors.push(`Hero image width is missing; minimum is ${options.heroMinWidth}px`)
|
|
159
171
|
}
|
|
172
|
+
} else if (options.requireHeroImage && ogImageMatch) {
|
|
173
|
+
errors.push(`Hero image width is missing; minimum is ${options.heroMinWidth}px`)
|
|
160
174
|
}
|
|
161
175
|
}
|
|
162
176
|
|
|
163
177
|
// H1 checks
|
|
164
178
|
const h1Matches = html.match(/<h1[\s>]/gi) ?? []
|
|
165
179
|
if (h1Matches.length === 0) {
|
|
166
|
-
warnings
|
|
180
|
+
pushIssue(options.requireH1, errors, warnings, 'Missing H1 tag')
|
|
167
181
|
} else if (h1Matches.length > 1) {
|
|
168
|
-
warnings
|
|
182
|
+
pushIssue(options.requireH1, errors, warnings, `Multiple H1 tags found (${h1Matches.length})`)
|
|
169
183
|
}
|
|
170
184
|
|
|
171
185
|
// JSON-LD presence check
|
|
@@ -174,9 +188,86 @@ export function validatePage(html: string, options: PageValidationOptions): Vali
|
|
|
174
188
|
warnings.push('Missing JSON-LD structured data')
|
|
175
189
|
}
|
|
176
190
|
|
|
191
|
+
if (
|
|
192
|
+
options.requireArticleSchema &&
|
|
193
|
+
isLikelyArticlePath(options.pagePath) &&
|
|
194
|
+
!hasArticleJsonLd(html)
|
|
195
|
+
) {
|
|
196
|
+
errors.push('Missing valid Article JSON-LD for article route')
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
if (options.requireMaxImagePreviewLarge && !hasMaxImagePreviewLarge(html)) {
|
|
200
|
+
errors.push('Missing robots max-image-preview:large directive')
|
|
201
|
+
}
|
|
202
|
+
|
|
177
203
|
return { errors, warnings }
|
|
178
204
|
}
|
|
179
205
|
|
|
206
|
+
function pushIssue(
|
|
207
|
+
asError: boolean | undefined,
|
|
208
|
+
errors: string[],
|
|
209
|
+
warnings: string[],
|
|
210
|
+
message: string,
|
|
211
|
+
) {
|
|
212
|
+
if (asError) errors.push(message)
|
|
213
|
+
else warnings.push(message)
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
function isLikelyArticlePath(pagePath: string | undefined): boolean {
|
|
217
|
+
if (!pagePath) return false
|
|
218
|
+
return /\/(article|articles|news|story|stories)\//i.test(pagePath)
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
function hasArticleJsonLd(html: string): boolean {
|
|
222
|
+
for (const rawJson of extractJsonLdBodies(html)) {
|
|
223
|
+
try {
|
|
224
|
+
const parsed = JSON.parse(rawJson) as unknown
|
|
225
|
+
if (hasArticleType(parsed)) return true
|
|
226
|
+
} catch {}
|
|
227
|
+
}
|
|
228
|
+
return false
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
function extractJsonLdBodies(html: string): string[] {
|
|
232
|
+
const bodies: string[] = []
|
|
233
|
+
const pattern = /<script\s[^>]*type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi
|
|
234
|
+
for (const match of html.matchAll(pattern)) {
|
|
235
|
+
if (match[1]) bodies.push(match[1].trim())
|
|
236
|
+
}
|
|
237
|
+
return bodies
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
function hasArticleType(value: unknown): boolean {
|
|
241
|
+
if (Array.isArray(value)) return value.some(hasArticleType)
|
|
242
|
+
if (!isRecord(value)) return false
|
|
243
|
+
|
|
244
|
+
const type = value['@type']
|
|
245
|
+
if (type === 'Article' || type === 'NewsArticle' || type === 'BlogPosting') return true
|
|
246
|
+
if (
|
|
247
|
+
Array.isArray(type) &&
|
|
248
|
+
type.some((item) => item === 'Article' || item === 'NewsArticle' || item === 'BlogPosting')
|
|
249
|
+
) {
|
|
250
|
+
return true
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
return hasArticleType(value['@graph'])
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
function hasMaxImagePreviewLarge(html: string): boolean {
|
|
257
|
+
const metaTags = html.match(/<meta\s+[^>]*>/gi) ?? []
|
|
258
|
+
for (const tag of metaTags) {
|
|
259
|
+
if (getHtmlAttr(tag, 'name')?.toLowerCase() !== 'robots') continue
|
|
260
|
+
const content = getHtmlAttr(tag, 'content')?.toLowerCase() ?? ''
|
|
261
|
+
const directives = content.split(',').map((part) => part.trim())
|
|
262
|
+
if (directives.includes('max-image-preview:large')) return true
|
|
263
|
+
}
|
|
264
|
+
return false
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
function isRecord(value: unknown): value is Record<string, unknown> {
|
|
268
|
+
return typeof value === 'object' && value !== null
|
|
269
|
+
}
|
|
270
|
+
|
|
180
271
|
function isNoindexMetaRefreshRedirect(html: string): boolean {
|
|
181
272
|
const metaTags = html.match(/<meta\s+[^>]*>/gi) ?? []
|
|
182
273
|
const hasRefresh = metaTags.some(
|