@growth-labs/seo 0.1.5 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +142 -54
- package/dist/bindings.d.ts +127 -0
- package/dist/bindings.d.ts.map +1 -0
- package/dist/bindings.js +11 -0
- package/dist/bindings.js.map +1 -0
- package/dist/cron/prune-aeo-r2.d.ts +36 -0
- package/dist/cron/prune-aeo-r2.d.ts.map +1 -0
- package/dist/cron/prune-aeo-r2.js +94 -0
- package/dist/cron/prune-aeo-r2.js.map +1 -0
- package/dist/durable-objects/aeo-revalidation-coord.d.ts +69 -0
- package/dist/durable-objects/aeo-revalidation-coord.d.ts.map +1 -0
- package/dist/durable-objects/aeo-revalidation-coord.js +177 -0
- package/dist/durable-objects/aeo-revalidation-coord.js.map +1 -0
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +79 -12
- package/dist/index.js.map +1 -1
- package/dist/middleware/seo.d.ts +44 -4
- package/dist/middleware/seo.d.ts.map +1 -1
- package/dist/middleware/seo.js +237 -41
- package/dist/middleware/seo.js.map +1 -1
- package/dist/options.d.ts +1293 -6
- package/dist/options.d.ts.map +1 -1
- package/dist/options.js +238 -1
- package/dist/options.js.map +1 -1
- package/dist/routes/aeo-twin.d.ts +5 -0
- package/dist/routes/aeo-twin.d.ts.map +1 -0
- package/dist/routes/aeo-twin.js +108 -0
- package/dist/routes/aeo-twin.js.map +1 -0
- package/dist/routes/apple-news.d.ts +4 -0
- package/dist/routes/apple-news.d.ts.map +1 -0
- package/dist/routes/apple-news.js +28 -0
- package/dist/routes/apple-news.js.map +1 -0
- package/dist/routes/llms-full.d.ts +4 -0
- package/dist/routes/llms-full.d.ts.map +1 -0
- package/dist/routes/llms-full.js +29 -0
- package/dist/routes/llms-full.js.map +1 -0
- package/dist/routes/revalidate.d.ts +16 -0
- package/dist/routes/revalidate.d.ts.map +1 -0
- package/dist/routes/revalidate.js +243 -0
- package/dist/routes/revalidate.js.map +1 -0
- package/dist/routes/rss.d.ts.map +1 -1
- package/dist/routes/rss.js +4 -1
- package/dist/routes/rss.js.map +1 -1
- package/dist/routes/sitemap-markdown.d.ts +4 -0
- package/dist/routes/sitemap-markdown.d.ts.map +1 -0
- package/dist/routes/sitemap-markdown.js +32 -0
- package/dist/routes/sitemap-markdown.js.map +1 -0
- package/dist/types.d.ts +16 -2
- package/dist/types.d.ts.map +1 -1
- package/dist/utils/aeo-summary.d.ts +35 -0
- package/dist/utils/aeo-summary.d.ts.map +1 -0
- package/dist/utils/aeo-summary.js +141 -0
- package/dist/utils/aeo-summary.js.map +1 -0
- package/dist/utils/aeo-twin-emitter.d.ts +79 -0
- package/dist/utils/aeo-twin-emitter.d.ts.map +1 -0
- package/dist/utils/aeo-twin-emitter.js +99 -0
- package/dist/utils/aeo-twin-emitter.js.map +1 -0
- package/dist/utils/aeo.d.ts +62 -12
- package/dist/utils/aeo.d.ts.map +1 -1
- package/dist/utils/aeo.js +187 -26
- package/dist/utils/aeo.js.map +1 -1
- package/dist/utils/apple-news-anf.d.ts +38 -0
- package/dist/utils/apple-news-anf.d.ts.map +1 -0
- package/dist/utils/apple-news-anf.js +120 -0
- package/dist/utils/apple-news-anf.js.map +1 -0
- package/dist/utils/apple-news-rss.d.ts +31 -0
- package/dist/utils/apple-news-rss.d.ts.map +1 -0
- package/dist/utils/apple-news-rss.js +103 -0
- package/dist/utils/apple-news-rss.js.map +1 -0
- package/dist/utils/content-filter.d.ts +52 -0
- package/dist/utils/content-filter.d.ts.map +1 -0
- package/dist/utils/content-filter.js +75 -0
- package/dist/utils/content-filter.js.map +1 -0
- package/dist/utils/crawler-class.d.ts +39 -0
- package/dist/utils/crawler-class.d.ts.map +1 -0
- package/dist/utils/crawler-class.js +127 -0
- package/dist/utils/crawler-class.js.map +1 -0
- package/dist/utils/effective-auth.d.ts +28 -0
- package/dist/utils/effective-auth.d.ts.map +1 -0
- package/dist/utils/effective-auth.js +33 -0
- package/dist/utils/effective-auth.js.map +1 -0
- package/dist/utils/fcrdns.d.ts +73 -0
- package/dist/utils/fcrdns.d.ts.map +1 -0
- package/dist/utils/fcrdns.js +219 -0
- package/dist/utils/fcrdns.js.map +1 -0
- package/dist/utils/fresh-layer.d.ts +53 -0
- package/dist/utils/fresh-layer.d.ts.map +1 -0
- package/dist/utils/fresh-layer.js +147 -0
- package/dist/utils/fresh-layer.js.map +1 -0
- package/dist/utils/index.d.ts +14 -3
- package/dist/utils/index.d.ts.map +1 -1
- package/dist/utils/index.js +14 -3
- package/dist/utils/index.js.map +1 -1
- package/dist/utils/json-ld/article.d.ts +13 -1
- package/dist/utils/json-ld/article.d.ts.map +1 -1
- package/dist/utils/json-ld/article.js +37 -8
- package/dist/utils/json-ld/article.js.map +1 -1
- package/dist/utils/llms-full.d.ts +29 -0
- package/dist/utils/llms-full.d.ts.map +1 -0
- package/dist/utils/llms-full.js +67 -0
- package/dist/utils/llms-full.js.map +1 -0
- package/dist/utils/meta.d.ts +4 -1
- package/dist/utils/meta.d.ts.map +1 -1
- package/dist/utils/meta.js +25 -2
- package/dist/utils/meta.js.map +1 -1
- package/dist/utils/sitemap-markdown.d.ts +24 -0
- package/dist/utils/sitemap-markdown.d.ts.map +1 -0
- package/dist/utils/sitemap-markdown.js +57 -0
- package/dist/utils/sitemap-markdown.js.map +1 -0
- package/dist/utils/staleness.d.ts +27 -0
- package/dist/utils/staleness.d.ts.map +1 -0
- package/dist/utils/staleness.js +46 -0
- package/dist/utils/staleness.js.map +1 -0
- package/dist/utils/validation.d.ts +41 -0
- package/dist/utils/validation.d.ts.map +1 -1
- package/dist/utils/validation.js +78 -0
- package/dist/utils/validation.js.map +1 -1
- package/package.json +13 -1
|
@@ -1,4 +1,16 @@
|
|
|
1
1
|
import type { ResolvedSeoOptions } from '../../options.js';
|
|
2
2
|
import type { ContentItem, JsonLdObject } from '../../types.js';
|
|
3
|
-
|
|
3
|
+
/**
|
|
4
|
+
* Redaction mode for gated content.
|
|
5
|
+
* 'full' — no redaction. Used for verified search crawlers under Flexible Sampling
|
|
6
|
+
* and for the member's own request.
|
|
7
|
+
* 'redacted' — members-safe output: description truncated, articleBody absent, FAQ
|
|
8
|
+
* answers truncated. Google Rich Results still passes (description is
|
|
9
|
+
* truncated, not omitted).
|
|
10
|
+
*/
|
|
11
|
+
export type ArticleRenderMode = 'full' | 'redacted';
|
|
12
|
+
export interface GenerateArticleJsonLdOptions {
|
|
13
|
+
mode?: ArticleRenderMode;
|
|
14
|
+
}
|
|
15
|
+
export declare function generateArticleJsonLd(item: ContentItem, options: ResolvedSeoOptions, renderOptions?: GenerateArticleJsonLdOptions): JsonLdObject;
|
|
4
16
|
//# sourceMappingURL=article.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"article.d.ts","sourceRoot":"","sources":["../../../src/utils/json-ld/article.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAA;AAC1D,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAA;
|
|
1
|
+
{"version":3,"file":"article.d.ts","sourceRoot":"","sources":["../../../src/utils/json-ld/article.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAA;AAC1D,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAA;AAE/D;;;;;;;GAOG;AACH,MAAM,MAAM,iBAAiB,GAAG,MAAM,GAAG,UAAU,CAAA;AAInD,MAAM,WAAW,4BAA4B;IAC5C,IAAI,CAAC,EAAE,iBAAiB,CAAA;CACxB;AAED,wBAAgB,qBAAqB,CACpC,IAAI,EAAE,WAAW,EACjB,OAAO,EAAE,kBAAkB,EAC3B,aAAa,GAAE,4BAAiC,GAC9C,YAAY,CAkFd"}
|
|
@@ -1,16 +1,23 @@
|
|
|
1
|
-
|
|
2
|
-
export function generateArticleJsonLd(item, options) {
|
|
1
|
+
const MAX_REDACTED_DESCRIPTION_CHARS = 160;
|
|
2
|
+
export function generateArticleJsonLd(item, options, renderOptions = {}) {
|
|
3
3
|
const { organization, schemaType, audioNarration } = options;
|
|
4
|
+
const mode = renderOptions.mode ?? 'full';
|
|
5
|
+
const isMemberItem = item.access === 'members';
|
|
6
|
+
const applyRedaction = mode === 'redacted' && isMemberItem;
|
|
4
7
|
const authors = (item.authors ?? []).map((author) => {
|
|
5
8
|
const person = { '@type': 'Person', name: author.name };
|
|
6
9
|
if (author.url)
|
|
7
10
|
person.url = author.url;
|
|
8
11
|
if (author.jobTitle)
|
|
9
12
|
person.jobTitle = author.jobTitle;
|
|
13
|
+
if (author.knowsAbout?.length)
|
|
14
|
+
person.knowsAbout = author.knowsAbout;
|
|
10
15
|
if (author.sameAs?.length)
|
|
11
16
|
person.sameAs = author.sameAs;
|
|
12
17
|
return person;
|
|
13
18
|
});
|
|
19
|
+
// Derive isAccessibleForFree: explicit field wins, otherwise derive from access.
|
|
20
|
+
const isAccessibleForFree = item.isAccessibleForFree ?? item.access !== 'members';
|
|
14
21
|
const result = {
|
|
15
22
|
'@context': 'https://schema.org',
|
|
16
23
|
'@type': schemaType,
|
|
@@ -29,21 +36,30 @@ export function generateArticleJsonLd(item, options) {
|
|
|
29
36
|
},
|
|
30
37
|
},
|
|
31
38
|
author: authors,
|
|
32
|
-
|
|
39
|
+
// Google requires the string form 'True'/'False' for Rich Results when paywall markup is emitted.
|
|
40
|
+
// See https://developers.google.com/search/docs/appearance/structured-data/paywalled-content
|
|
41
|
+
isAccessibleForFree: isAccessibleForFree ? 'True' : 'False',
|
|
33
42
|
};
|
|
34
43
|
if (item.image) {
|
|
35
|
-
|
|
44
|
+
// Consumers compose @growth-labs/media if they need multi-aspect-ratio variants;
|
|
45
|
+
// this utility takes whatever URL(s) the consumer supplied in the ContentItem.
|
|
46
|
+
result.image = Array.isArray(item.image) ? item.image : [item.image];
|
|
47
|
+
}
|
|
48
|
+
if (item.description) {
|
|
49
|
+
result.description = applyRedaction
|
|
50
|
+
? truncateDescription(item.description, MAX_REDACTED_DESCRIPTION_CHARS)
|
|
51
|
+
: item.description;
|
|
36
52
|
}
|
|
37
|
-
if (item.description)
|
|
38
|
-
result.description = item.description;
|
|
39
53
|
if (item.datePublished)
|
|
40
54
|
result.datePublished = item.datePublished;
|
|
41
55
|
if (item.dateModified)
|
|
42
56
|
result.dateModified = item.dateModified;
|
|
43
|
-
|
|
57
|
+
// hasPart paywall marker — emitted whenever the item is gated (either via explicit
|
|
58
|
+
// isAccessibleForFree: false or via access: 'members').
|
|
59
|
+
if (!isAccessibleForFree && item.paywallCssSelector) {
|
|
44
60
|
result.hasPart = {
|
|
45
61
|
'@type': 'WebPageElement',
|
|
46
|
-
isAccessibleForFree:
|
|
62
|
+
isAccessibleForFree: 'False',
|
|
47
63
|
cssSelector: item.paywallCssSelector,
|
|
48
64
|
};
|
|
49
65
|
}
|
|
@@ -63,4 +79,17 @@ export function generateArticleJsonLd(item, options) {
|
|
|
63
79
|
}
|
|
64
80
|
return result;
|
|
65
81
|
}
|
|
82
|
+
/**
|
|
83
|
+
* Truncate a description to `maxChars` graphemes, on a word boundary if possible,
|
|
84
|
+
* with ellipsis suffix. Returns the original if shorter than maxChars.
|
|
85
|
+
*/
|
|
86
|
+
function truncateDescription(text, maxChars) {
|
|
87
|
+
if (text.length <= maxChars)
|
|
88
|
+
return text;
|
|
89
|
+
const targetLen = maxChars - 1; // reserve for ellipsis
|
|
90
|
+
const sliced = text.slice(0, targetLen);
|
|
91
|
+
const lastSpace = sliced.lastIndexOf(' ');
|
|
92
|
+
const cutAt = lastSpace > targetLen * 0.6 ? lastSpace : targetLen;
|
|
93
|
+
return `${sliced.slice(0, cutAt).trimEnd()}…`;
|
|
94
|
+
}
|
|
66
95
|
//# sourceMappingURL=article.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"article.js","sourceRoot":"","sources":["../../../src/utils/json-ld/article.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"article.js","sourceRoot":"","sources":["../../../src/utils/json-ld/article.ts"],"names":[],"mappings":"AAaA,MAAM,8BAA8B,GAAG,GAAG,CAAA;AAM1C,MAAM,UAAU,qBAAqB,CACpC,IAAiB,EACjB,OAA2B,EAC3B,gBAA8C,EAAE;IAEhD,MAAM,EAAE,YAAY,EAAE,UAAU,EAAE,cAAc,EAAE,GAAG,OAAO,CAAA;IAC5D,MAAM,IAAI,GAAG,aAAa,CAAC,IAAI,IAAI,MAAM,CAAA;IACzC,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,KAAK,SAAS,CAAA;IAC9C,MAAM,cAAc,GAAG,IAAI,KAAK,UAAU,IAAI,YAAY,CAAA;IAE1D,MAAM,OAAO,GAAmB,CAAC,IAAI,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE;QACnE,MAAM,MAAM,GAAiB,EAAE,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,CAAC,IAAI,EAAE,CAAA;QACrE,IAAI,MAAM,CAAC,GAAG;YAAE,MAAM,CAAC,GAAG,GAAG,MAAM,CAAC,GAAG,CAAA;QACvC,IAAI,MAAM,CAAC,QAAQ;YAAE,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAA;QACtD,IAAI,MAAM,CAAC,UAAU,EAAE,MAAM;YAAE,MAAM,CAAC,UAAU,GAAG,MAAM,CAAC,UAAU,CAAA;QACpE,IAAI,MAAM,CAAC,MAAM,EAAE,MAAM;YAAE,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAA;QACxD,OAAO,MAAM,CAAA;IACd,CAAC,CAAC,CAAA;IAEF,iFAAiF;IACjF,MAAM,mBAAmB,GAAG,IAAI,CAAC,mBAAmB,IAAI,IAAI,CAAC,MAAM,KAAK,SAAS,CAAA;IAEjF,MAAM,MAAM,GAAiB;QAC5B,UAAU,EAAE,oBAAoB;QAChC,OAAO,EAAE,UAAU;QACnB,QAAQ,EAAE,IAAI,CAAC,KAAK;QACpB,GAAG,EAAE,IAAI,CAAC,GAAG;QACb,gBAAgB,EAAE;YACjB,OAAO,EAAE,SAAS;YAClB,KAAK,EAAE,IAAI,CAAC,GAAG;SACf;QACD,SAAS,EAAE;YACV,OAAO,EAAE,cAAc;YACvB,IAAI,EAAE,YAAY,CAAC,IAAI;YACvB,IAAI,EAAE;gBACL,OAAO,EAAE,aAAa;gBACtB,GAAG,EAAE,YAAY,CAAC,IAAI;aACtB;SACD;QACD,MAAM,EAAE,OAAO;QACf,kGAAkG;QAClG,6FAA6F;QAC7F,mBAAmB,EAAE,mBAAmB,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO;KAC3D,CAAA;IAED,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;QAChB,iFAAiF;QACjF,+EAA+E;QAC/E,MAAM,CAAC,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;IACrE,CAAC;IAED,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;QACtB,MAAM,CAAC,WAAW,GAAG,cAAc;YAClC,CAAC,CAAC,mBAAmB,CAAC,IAAI,CAAC,WAAW,EAAE,8BAA8B,CAAC;YACvE,CAAC,CAAC,IAAI,CAAC,WAAW,CAAA;IACpB,CAAC;IACD,IAAI,IAAI,CAAC,aAAa;QAAE,MAAM,CAAC,aAAa,GAAG,IAAI,CAAC,aAAa,CAAA;IACjE,IAAI,IAAI,CAAC,YAAY;QAAE,MAAM,CAAC,YAAY,GAAG,IAAI,CAAC,YAAY,CAAA;IAE9D,mFAAmF;IACnF,wDAAwD;IACxD,IAAI,CAAC,mBAAmB,IAAI,IAAI,CAAC,kBAAkB,EAAE,CAAC;QACrD,MAAM,CAAC,OAAO,GAAG;YAChB,OAAO,EAAE,gBAAgB;YACzB,mBAAmB,EAAE,OAAO;YAC5B,WAAW,EAAE,IAAI,CAAC,kBAAkB;SACpC,CAAA;IACF,CAAC;IAED,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;QAChB,MAAM,CAAC,eAAe,GAAG;YACxB,OAAO,EAAE,aAAa;YACtB,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG;YAC1B,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ;YAC7B,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAC7D,CAAA;IACF,CAAC;IAED,IAAI,cAAc,EAAE,OAAO,EAAE,CAAC;QAC7B,MAAM,CAAC,SAAS,GAAG;YAClB,OAAO,EAAE,wBAAwB;YACjC,WAAW,EAAE,cAAc,CAAC,kBAAkB;SAC9C,CAAA;IACF,CAAC;IAED,OAAO,MAAM,CAAA;AACd,CAAC;AAED;;;GAGG;AACH,SAAS,mBAAmB,CAAC,IAAY,EAAE,QAAgB;IAC1D,IAAI,IAAI,CAAC,MAAM,IAAI,QAAQ;QAAE,OAAO,IAAI,CAAA;IACxC,MAAM,SAAS,GAAG,QAAQ,GAAG,CAAC,CAAA,CAAC,uBAAuB;IACtD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAA;IACvC,MAAM,SAAS,GAAG,MAAM,CAAC,WAAW,CAAC,GAAG,CAAC,CAAA;IACzC,MAAM,KAAK,GAAG,SAAS,GAAG,SAAS,GAAG,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAA;IACjE,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,OAAO,EAAE,GAAG,CAAA;AAC9C,CAAC"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import type { ContentItem } from '../types.js';
|
|
2
|
+
export interface GenerateLlmsFullOptions {
|
|
3
|
+
items: ContentItem[];
|
|
4
|
+
siteName: string;
|
|
5
|
+
contentMarkdown?: (item: ContentItem) => string | undefined;
|
|
6
|
+
maxBytes?: number;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Generate `/llms-full.txt` — a bulk public corpus dump for LLM retrievers.
|
|
10
|
+
*
|
|
11
|
+
* Structure:
|
|
12
|
+
* # <siteName>
|
|
13
|
+
*
|
|
14
|
+
* ## <article title>
|
|
15
|
+
* URL: <canonical>
|
|
16
|
+
*
|
|
17
|
+
* <markdown body>
|
|
18
|
+
*
|
|
19
|
+
* ---
|
|
20
|
+
*
|
|
21
|
+
* [next article...]
|
|
22
|
+
*
|
|
23
|
+
* Rules:
|
|
24
|
+
* - Members items excluded UNCONDITIONALLY.
|
|
25
|
+
* - Size cap honored; on-boundary truncation with a note.
|
|
26
|
+
* - Deterministic order: items passed in, order preserved.
|
|
27
|
+
*/
|
|
28
|
+
export declare function generateLlmsFull({ items, siteName, contentMarkdown, maxBytes, }: GenerateLlmsFullOptions): string;
|
|
29
|
+
//# sourceMappingURL=llms-full.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llms-full.d.ts","sourceRoot":"","sources":["../../src/utils/llms-full.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAA;AAG9C,MAAM,WAAW,uBAAuB;IACvC,KAAK,EAAE,WAAW,EAAE,CAAA;IACpB,QAAQ,EAAE,MAAM,CAAA;IAGhB,eAAe,CAAC,EAAE,CAAC,IAAI,EAAE,WAAW,KAAK,MAAM,GAAG,SAAS,CAAA;IAI3D,QAAQ,CAAC,EAAE,MAAM,CAAA;CACjB;AAID;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAgB,gBAAgB,CAAC,EAChC,KAAK,EACL,QAAQ,EACR,eAAe,EACf,QAA4B,GAC5B,EAAE,uBAAuB,GAAG,MAAM,CA4BlC"}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { forLlmsFull } from './content-filter.js';
|
|
2
|
+
const DEFAULT_MAX_BYTES = 8 * 1024 * 1024;
|
|
3
|
+
/**
|
|
4
|
+
* Generate `/llms-full.txt` — a bulk public corpus dump for LLM retrievers.
|
|
5
|
+
*
|
|
6
|
+
* Structure:
|
|
7
|
+
* # <siteName>
|
|
8
|
+
*
|
|
9
|
+
* ## <article title>
|
|
10
|
+
* URL: <canonical>
|
|
11
|
+
*
|
|
12
|
+
* <markdown body>
|
|
13
|
+
*
|
|
14
|
+
* ---
|
|
15
|
+
*
|
|
16
|
+
* [next article...]
|
|
17
|
+
*
|
|
18
|
+
* Rules:
|
|
19
|
+
* - Members items excluded UNCONDITIONALLY.
|
|
20
|
+
* - Size cap honored; on-boundary truncation with a note.
|
|
21
|
+
* - Deterministic order: items passed in, order preserved.
|
|
22
|
+
*/
|
|
23
|
+
export function generateLlmsFull({ items, siteName, contentMarkdown, maxBytes = DEFAULT_MAX_BYTES, }) {
|
|
24
|
+
const filtered = forLlmsFull(items);
|
|
25
|
+
const encoder = new TextEncoder();
|
|
26
|
+
const header = `# ${siteName}\n\n`;
|
|
27
|
+
let byteCount = encoder.encode(header).length;
|
|
28
|
+
const parts = [header];
|
|
29
|
+
let truncated = false;
|
|
30
|
+
for (const item of filtered) {
|
|
31
|
+
const body = contentMarkdown?.(item);
|
|
32
|
+
const chunk = buildChunk(item, body);
|
|
33
|
+
const chunkBytes = encoder.encode(chunk).length;
|
|
34
|
+
if (byteCount + chunkBytes > maxBytes) {
|
|
35
|
+
truncated = true;
|
|
36
|
+
break;
|
|
37
|
+
}
|
|
38
|
+
parts.push(chunk);
|
|
39
|
+
byteCount += chunkBytes;
|
|
40
|
+
}
|
|
41
|
+
if (truncated) {
|
|
42
|
+
parts.push('\n\n---\n\n> Corpus truncated at configured size cap. See /sitemap-markdown.xml for the full twin URL list.\n');
|
|
43
|
+
}
|
|
44
|
+
return parts.join('');
|
|
45
|
+
}
|
|
46
|
+
function buildChunk(item, body) {
|
|
47
|
+
const lines = [];
|
|
48
|
+
lines.push(`## ${item.title}`);
|
|
49
|
+
lines.push('');
|
|
50
|
+
lines.push(`URL: ${item.url}`);
|
|
51
|
+
if (item.datePublished)
|
|
52
|
+
lines.push(`Published: ${item.datePublished}`);
|
|
53
|
+
if (item.dateModified)
|
|
54
|
+
lines.push(`Modified: ${item.dateModified}`);
|
|
55
|
+
lines.push('');
|
|
56
|
+
if (body) {
|
|
57
|
+
lines.push(body.trim());
|
|
58
|
+
}
|
|
59
|
+
else if (item.description) {
|
|
60
|
+
lines.push(item.description.trim());
|
|
61
|
+
}
|
|
62
|
+
lines.push('');
|
|
63
|
+
lines.push('---');
|
|
64
|
+
lines.push('');
|
|
65
|
+
return `${lines.join('\n')}\n`;
|
|
66
|
+
}
|
|
67
|
+
//# sourceMappingURL=llms-full.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llms-full.js","sourceRoot":"","sources":["../../src/utils/llms-full.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAA;AAcjD,MAAM,iBAAiB,GAAG,CAAC,GAAG,IAAI,GAAG,IAAI,CAAA;AAEzC;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAM,UAAU,gBAAgB,CAAC,EAChC,KAAK,EACL,QAAQ,EACR,eAAe,EACf,QAAQ,GAAG,iBAAiB,GACH;IACzB,MAAM,QAAQ,GAAG,WAAW,CAAC,KAAK,CAAC,CAAA;IACnC,MAAM,OAAO,GAAG,IAAI,WAAW,EAAE,CAAA;IAEjC,MAAM,MAAM,GAAG,KAAK,QAAQ,MAAM,CAAA;IAClC,IAAI,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,MAAM,CAAA;IAC7C,MAAM,KAAK,GAAa,CAAC,MAAM,CAAC,CAAA;IAChC,IAAI,SAAS,GAAG,KAAK,CAAA;IAErB,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;QAC7B,MAAM,IAAI,GAAG,eAAe,EAAE,CAAC,IAAI,CAAC,CAAA;QACpC,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;QACpC,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,MAAM,CAAA;QAC/C,IAAI,SAAS,GAAG,UAAU,GAAG,QAAQ,EAAE,CAAC;YACvC,SAAS,GAAG,IAAI,CAAA;YAChB,MAAK;QACN,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QACjB,SAAS,IAAI,UAAU,CAAA;IACxB,CAAC;IAED,IAAI,SAAS,EAAE,CAAC;QACf,KAAK,CAAC,IAAI,CACT,+GAA+G,CAC/G,CAAA;IACF,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;AACtB,CAAC;AAED,SAAS,UAAU,CAAC,IAAiB,EAAE,IAAwB;IAC9D,MAAM,KAAK,GAAa,EAAE,CAAA;IAC1B,KAAK,CAAC,IAAI,CAAC,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC,CAAA;IAC9B,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IACd,KAAK,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,GAAG,EAAE,CAAC,CAAA;IAC9B,IAAI,IAAI,CAAC,aAAa;QAAE,KAAK,CAAC,IAAI,CAAC,cAAc,IAAI,CAAC,aAAa,EAAE,CAAC,CAAA;IACtE,IAAI,IAAI,CAAC,YAAY;QAAE,KAAK,CAAC,IAAI,CAAC,aAAa,IAAI,CAAC,YAAY,EAAE,CAAC,CAAA;IACnE,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IACd,IAAI,IAAI,EAAE,CAAC;QACV,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAA;IACxB,CAAC;SAAM,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;QAC7B,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,CAAA;IACpC,CAAC;IACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IACd,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;IACjB,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IACd,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAA;AAC/B,CAAC"}
|
package/dist/utils/meta.d.ts
CHANGED
|
@@ -3,5 +3,8 @@ import type { CanonicalLink, ContentItem, MetaTag } from '../types.js';
|
|
|
3
3
|
export type OgVariant = 'article' | 'product' | 'website';
|
|
4
4
|
export declare function applyTrailingSlash(url: string, policy: string): string;
|
|
5
5
|
export declare function generateCanonical(url: string, options: ResolvedSeoOptions): CanonicalLink;
|
|
6
|
-
export
|
|
6
|
+
export interface GenerateMetaOptions {
|
|
7
|
+
flexibleSamplingActive?: boolean;
|
|
8
|
+
}
|
|
9
|
+
export declare function generateMeta(item: ContentItem, variant: OgVariant, options: ResolvedSeoOptions, metaOptions?: GenerateMetaOptions): MetaTag[];
|
|
7
10
|
//# sourceMappingURL=meta.d.ts.map
|
package/dist/utils/meta.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"meta.d.ts","sourceRoot":"","sources":["../../src/utils/meta.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAA;AACvD,OAAO,KAAK,EAAE,aAAa,EAAE,WAAW,EAAE,OAAO,EAAE,MAAM,aAAa,CAAA;AAEtE,MAAM,MAAM,SAAS,GAAG,SAAS,GAAG,SAAS,GAAG,SAAS,CAAA;AAEzD,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,MAAM,CAmCtE;AAED,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,kBAAkB,GAAG,aAAa,CAKzF;AAED,wBAAgB,YAAY,CAC3B,IAAI,EAAE,WAAW,EACjB,OAAO,EAAE,SAAS,EAClB,OAAO,EAAE,kBAAkB,
|
|
1
|
+
{"version":3,"file":"meta.d.ts","sourceRoot":"","sources":["../../src/utils/meta.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAA;AACvD,OAAO,KAAK,EAAE,aAAa,EAAE,WAAW,EAAE,OAAO,EAAE,MAAM,aAAa,CAAA;AAEtE,MAAM,MAAM,SAAS,GAAG,SAAS,GAAG,SAAS,GAAG,SAAS,CAAA;AAEzD,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,MAAM,CAmCtE;AAED,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,kBAAkB,GAAG,aAAa,CAKzF;AAED,MAAM,WAAW,mBAAmB;IAKnC,sBAAsB,CAAC,EAAE,OAAO,CAAA;CAChC;AAED,wBAAgB,YAAY,CAC3B,IAAI,EAAE,WAAW,EACjB,OAAO,EAAE,SAAS,EAClB,OAAO,EAAE,kBAAkB,EAC3B,WAAW,GAAE,mBAAwB,GACnC,OAAO,EAAE,CAyHX"}
|
package/dist/utils/meta.js
CHANGED
|
@@ -42,11 +42,12 @@ export function generateCanonical(url, options) {
|
|
|
42
42
|
href: applyTrailingSlash(url, options.trailingSlash),
|
|
43
43
|
};
|
|
44
44
|
}
|
|
45
|
-
export function generateMeta(item, variant, options) {
|
|
45
|
+
export function generateMeta(item, variant, options, metaOptions = {}) {
|
|
46
46
|
const tags = [];
|
|
47
47
|
const { defaults, organization } = options;
|
|
48
48
|
const canonicalUrl = applyTrailingSlash(item.url, options.trailingSlash);
|
|
49
|
-
const
|
|
49
|
+
const firstImage = Array.isArray(item.image) ? item.image[0] : item.image;
|
|
50
|
+
const image = firstImage ?? defaults.defaultImage ?? '';
|
|
50
51
|
const ogType = variant === 'article' ? 'article' : variant === 'product' ? 'og:product' : 'website';
|
|
51
52
|
// og:type
|
|
52
53
|
tags.push({ property: 'og:type', content: ogType });
|
|
@@ -93,6 +94,28 @@ export function generateMeta(item, variant, options) {
|
|
|
93
94
|
}
|
|
94
95
|
// robots
|
|
95
96
|
tags.push({ name: 'robots', content: 'max-image-preview:large' });
|
|
97
|
+
// Flexible Sampling: prevent Google from serving the premium body from cache.
|
|
98
|
+
if (metaOptions.flexibleSamplingActive) {
|
|
99
|
+
tags.push({ name: 'googlebot', content: 'noarchive' });
|
|
100
|
+
}
|
|
101
|
+
// Apple News meta tags.
|
|
102
|
+
// `apple-news-publishable` is opt-in for the channel to pick up the article.
|
|
103
|
+
// Resolved at item level: `appleNewsPublishable: 'no'` overrides the channel default.
|
|
104
|
+
// Channel default comes from options.appleNews.defaultPublishable.
|
|
105
|
+
if (options.appleNews?.enabled) {
|
|
106
|
+
const channelDefault = options.appleNews.defaultPublishable;
|
|
107
|
+
const effective = item.appleNewsPublishable ?? channelDefault;
|
|
108
|
+
if (effective === 'yes' && item.access !== 'members') {
|
|
109
|
+
tags.push({ name: 'apple-news-publishable', content: 'yes' });
|
|
110
|
+
}
|
|
111
|
+
if (item.appleNewsId) {
|
|
112
|
+
tags.push({ name: 'apple-news-id', content: item.appleNewsId });
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
// news_keywords — read by both Google News and Apple News.
|
|
116
|
+
if (item.newsKeywords?.length) {
|
|
117
|
+
tags.push({ name: 'news_keywords', content: item.newsKeywords.join(', ') });
|
|
118
|
+
}
|
|
96
119
|
// Article-specific
|
|
97
120
|
if (variant === 'article') {
|
|
98
121
|
if (item.datePublished) {
|
package/dist/utils/meta.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"meta.js","sourceRoot":"","sources":["../../src/utils/meta.ts"],"names":[],"mappings":"AAKA,MAAM,UAAU,kBAAkB,CAAC,GAAW,EAAE,MAAc;IAC7D,IAAI,MAAM,KAAK,OAAO,EAAE,CAAC;QACxB,+CAA+C;QAC/C,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,GAAG,KAAK,GAAG,EAAE,CAAC;YACtC,IAAI,CAAC;gBACJ,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;gBAC3B,IAAI,MAAM,CAAC,QAAQ,KAAK,GAAG,EAAE,CAAC;oBAC7B,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAA;oBACrD,OAAO,MAAM,CAAC,QAAQ,EAAE,CAAA;gBACzB,CAAC;gBACD,OAAO,GAAG,CAAA;YACX,CAAC;YAAC,MAAM,CAAC;gBACR,OAAO,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAA;YACpE,CAAC;QACF,CAAC;QACD,OAAO,GAAG,CAAA;IACX,CAAC;IACD,IAAI,MAAM,KAAK,QAAQ,EAAE,CAAC;QACzB,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YACxB,IAAI,CAAC;gBACJ,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;gBAC3B,oDAAoD;gBACpD,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;oBACpC,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;oBACtD,OAAO,MAAM,CAAC,QAAQ,EAAE,CAAA;gBACzB,CAAC;gBACD,OAAO,GAAG,CAAA;YACX,CAAC;YAAC,MAAM,CAAC;gBACR,OAAO,GAAG,GAAG,GAAG,CAAA;YACjB,CAAC;QACF,CAAC;QACD,OAAO,GAAG,CAAA;IACX,CAAC;IACD,SAAS;IACT,OAAO,GAAG,CAAA;AACX,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,GAAW,EAAE,OAA2B;IACzE,OAAO;QACN,GAAG,EAAE,WAAW;QAChB,IAAI,EAAE,kBAAkB,CAAC,GAAG,EAAE,OAAO,CAAC,aAAa,CAAC;KACpD,CAAA;AACF,CAAC;
|
|
1
|
+
{"version":3,"file":"meta.js","sourceRoot":"","sources":["../../src/utils/meta.ts"],"names":[],"mappings":"AAKA,MAAM,UAAU,kBAAkB,CAAC,GAAW,EAAE,MAAc;IAC7D,IAAI,MAAM,KAAK,OAAO,EAAE,CAAC;QACxB,+CAA+C;QAC/C,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,GAAG,KAAK,GAAG,EAAE,CAAC;YACtC,IAAI,CAAC;gBACJ,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;gBAC3B,IAAI,MAAM,CAAC,QAAQ,KAAK,GAAG,EAAE,CAAC;oBAC7B,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAA;oBACrD,OAAO,MAAM,CAAC,QAAQ,EAAE,CAAA;gBACzB,CAAC;gBACD,OAAO,GAAG,CAAA;YACX,CAAC;YAAC,MAAM,CAAC;gBACR,OAAO,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAA;YACpE,CAAC;QACF,CAAC;QACD,OAAO,GAAG,CAAA;IACX,CAAC;IACD,IAAI,MAAM,KAAK,QAAQ,EAAE,CAAC;QACzB,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YACxB,IAAI,CAAC;gBACJ,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;gBAC3B,oDAAoD;gBACpD,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;oBACpC,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;oBACtD,OAAO,MAAM,CAAC,QAAQ,EAAE,CAAA;gBACzB,CAAC;gBACD,OAAO,GAAG,CAAA;YACX,CAAC;YAAC,MAAM,CAAC;gBACR,OAAO,GAAG,GAAG,GAAG,CAAA;YACjB,CAAC;QACF,CAAC;QACD,OAAO,GAAG,CAAA;IACX,CAAC;IACD,SAAS;IACT,OAAO,GAAG,CAAA;AACX,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,GAAW,EAAE,OAA2B;IACzE,OAAO;QACN,GAAG,EAAE,WAAW;QAChB,IAAI,EAAE,kBAAkB,CAAC,GAAG,EAAE,OAAO,CAAC,aAAa,CAAC;KACpD,CAAA;AACF,CAAC;AAUD,MAAM,UAAU,YAAY,CAC3B,IAAiB,EACjB,OAAkB,EAClB,OAA2B,EAC3B,cAAmC,EAAE;IAErC,MAAM,IAAI,GAAc,EAAE,CAAA;IAC1B,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,GAAG,OAAO,CAAA;IAC1C,MAAM,YAAY,GAAG,kBAAkB,CAAC,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,aAAa,CAAC,CAAA;IACxE,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAA;IACzE,MAAM,KAAK,GAAG,UAAU,IAAI,QAAQ,CAAC,YAAY,IAAI,EAAE,CAAA;IAEvD,MAAM,MAAM,GACX,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,SAAS,CAAA;IAErF,UAAU;IACV,IAAI,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,SAAS,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAA;IAEnD,WAAW;IACX,IAAI,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,UAAU,EAAE,OAAO,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC,CAAA;IAExD,iBAAiB;IACjB,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;QACtB,IAAI,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,gBAAgB,EAAE,OAAO,EAAE,IAAI,CAAC,WAAW,EAAE,CAAC,CAAA;IACrE,CAAC;IAED,SAAS;IACT,IAAI,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC,CAAA;IAExD,WAAW;IACX,IAAI,KAAK,EAAE,CAAC;QACX,IAAI,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,UAAU,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAA;QACnD,IAAI,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,gBAAgB,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAA;QAC1D,IAAI,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,iBAAiB,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAA;IAC3D,CAAC;IAED,eAAe;IACf,IAAI,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,cAAc,EAAE,OAAO,EAAE,YAAY,CAAC,IAAI,EAAE,CAAC,CAAA;IAEnE,YAAY;IACZ,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,QAAQ,CAAC,MAAM,CAAA;IAC7C,IAAI,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,WAAW,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAA;IAErD,sBAAsB;IACtB,IAAI,IAAI,CAAC,gBAAgB,EAAE,MAAM,EAAE,CAAC;QACnC,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;YACzC,IAAI,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,qBAAqB,EAAE,OAAO,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC,CAAA;QAClE,CAAC;IACF,CAAC;IAED,eAAe;IACf,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,cAAc,EAAE,OAAO,EAAE,QAAQ,CAAC,eAAe,EAAE,CAAC,CAAA;IAEtE,eAAe;IACf,IAAI,QAAQ,CAAC,WAAW,EAAE,CAAC;QAC1B,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,cAAc,EAAE,OAAO,EAAE,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAA;IACnE,CAAC;IAED,gBAAgB;IAChB,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,eAAe,EAAE,OAAO,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC,CAAA;IAEzD,sBAAsB;IACtB,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;QACtB,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,qBAAqB,EAAE,OAAO,EAAE,IAAI,CAAC,WAAW,EAAE,CAAC,CAAA;IACtE,CAAC;IAED,gBAAgB;IAChB,IAAI,KAAK,EAAE,CAAC;QACX,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,eAAe,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAA;IACrD,CAAC;IAED,SAAS;IACT,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,yBAAyB,EAAE,CAAC,CAAA;IAEjE,8EAA8E;IAC9E,IAAI,WAAW,CAAC,sBAAsB,EAAE,CAAC;QACxC,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,WAAW,EAAE,CAAC,CAAA;IACvD,CAAC;IAED,wBAAwB;IACxB,6EAA6E;IAC7E,sFAAsF;IACtF,mEAAmE;IACnE,IAAI,OAAO,CAAC,SAAS,EAAE,OAAO,EAAE,CAAC;QAChC,MAAM,cAAc,GAAG,OAAO,CAAC,SAAS,CAAC,kBAAkB,CAAA;QAC3D,MAAM,SAAS,GAAG,IAAI,CAAC,oBAAoB,IAAI,cAAc,CAAA;QAC7D,IAAI,SAAS,KAAK,KAAK,IAAI,IAAI,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;YACtD,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,wBAAwB,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAA;QAC9D,CAAC;QACD,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACtB,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,eAAe,EAAE,OAAO,EAAE,IAAI,CAAC,WAAW,EAAE,CAAC,CAAA;QAChE,CAAC;IACF,CAAC;IAED,2DAA2D;IAC3D,IAAI,IAAI,CAAC,YAAY,EAAE,MAAM,EAAE,CAAC;QAC/B,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,eAAe,EAAE,OAAO,EAAE,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IAC5E,CAAC;IAED,mBAAmB;IACnB,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;QAC3B,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACxB,IAAI,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,wBAAwB,EAAE,OAAO,EAAE,IAAI,CAAC,aAAa,EAAE,CAAC,CAAA;QAC/E,CAAC;QACD,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACvB,IAAI,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,uBAAuB,EAAE,OAAO,EAAE,IAAI,CAAC,YAAY,EAAE,CAAC,CAAA;QAC7E,CAAC;QACD,IAAI,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,CAAC;YAC1B,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;gBACnC,IAAI,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,gBAAgB,EAAE,OAAO,EAAE,MAAM,CAAC,GAAG,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC,CAAA;YAC9E,CAAC;QACF,CAAC;IACF,CAAC;IAED,mBAAmB;IACnB,IAAI,OAAO,KAAK,SAAS,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;QAC3C,MAAM,CAAC,GAAG,IAAI,CAAC,OAAO,CAAA;QACtB,IAAI,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,sBAAsB,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,CAAA;QACzE,IAAI,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,wBAAwB,EAAE,OAAO,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAA;QACtE,IAAI,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,sBAAsB,EAAE,OAAO,EAAE,CAAC,CAAC,YAAY,EAAE,CAAC,CAAA;QACxE,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC;YACb,IAAI,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,eAAe,EAAE,OAAO,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC,CAAA;QAC3D,CAAC;IACF,CAAC;IAED,OAAO,IAAI,CAAA;AACZ,CAAC"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { ContentItem } from '../types.js';
|
|
2
|
+
export interface MarkdownSitemapEntry {
|
|
3
|
+
primaryTwinUrl: string;
|
|
4
|
+
lastmod?: string;
|
|
5
|
+
freshLayerLastModified?: string;
|
|
6
|
+
}
|
|
7
|
+
export interface GenerateMarkdownSitemapOptions {
|
|
8
|
+
items: ContentItem[];
|
|
9
|
+
twinUrl?: (articleUrl: string) => string;
|
|
10
|
+
freshLayerLastmod?: Map<string, string>;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Generate `/sitemap-markdown.xml` — the AEO twin URL sitemap.
|
|
14
|
+
*
|
|
15
|
+
* Google discovers URLs via crawlable links and sitemaps, not MIME-probing. This
|
|
16
|
+
* sitemap lists every emitted `.md` twin URL so the markdown corpus is discoverable
|
|
17
|
+
* the same way HTML pages are.
|
|
18
|
+
*
|
|
19
|
+
* Filtering rules (centralized in content-filter.forMarkdownSitemap):
|
|
20
|
+
* - Members items excluded unconditionally (no .md twin exists for them).
|
|
21
|
+
* - Public items with `includeInSitemap: false` excluded.
|
|
22
|
+
*/
|
|
23
|
+
export declare function generateMarkdownSitemap({ items, twinUrl, freshLayerLastmod, }: GenerateMarkdownSitemapOptions): string;
|
|
24
|
+
//# sourceMappingURL=sitemap-markdown.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sitemap-markdown.d.ts","sourceRoot":"","sources":["../../src/utils/sitemap-markdown.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAA;AAwB9C,MAAM,WAAW,oBAAoB;IACpC,cAAc,EAAE,MAAM,CAAA;IACtB,OAAO,CAAC,EAAE,MAAM,CAAA;IAGhB,sBAAsB,CAAC,EAAE,MAAM,CAAA;CAC/B;AAED,MAAM,WAAW,8BAA8B;IAC9C,KAAK,EAAE,WAAW,EAAE,CAAA;IACpB,OAAO,CAAC,EAAE,CAAC,UAAU,EAAE,MAAM,KAAK,MAAM,CAAA;IAGxC,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CACvC;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,uBAAuB,CAAC,EACvC,KAAK,EACL,OAAwB,EACxB,iBAAiB,GACjB,EAAE,8BAA8B,GAAG,MAAM,CAkBzC"}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { forMarkdownSitemap } from './content-filter.js';
|
|
2
|
+
function escapeXml(str) {
|
|
3
|
+
return str
|
|
4
|
+
.replace(/&/g, '&')
|
|
5
|
+
.replace(/</g, '<')
|
|
6
|
+
.replace(/>/g, '>')
|
|
7
|
+
.replace(/"/g, '"')
|
|
8
|
+
.replace(/'/g, ''');
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Map an article URL to its primary markdown twin URL. Default appends `.md` to
|
|
12
|
+
* the URL, collapsing any trailing slash.
|
|
13
|
+
*
|
|
14
|
+
* Consumers can override via `aeoTwins.twinUrl` in config; when overridden, the
|
|
15
|
+
* same function drives both twin emission and sitemap discovery URL.
|
|
16
|
+
*/
|
|
17
|
+
function defaultTwinUrl(articleUrl) {
|
|
18
|
+
const trimmed = articleUrl.replace(/\/+$/, '');
|
|
19
|
+
return `${trimmed}.md`;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Generate `/sitemap-markdown.xml` — the AEO twin URL sitemap.
|
|
23
|
+
*
|
|
24
|
+
* Google discovers URLs via crawlable links and sitemaps, not MIME-probing. This
|
|
25
|
+
* sitemap lists every emitted `.md` twin URL so the markdown corpus is discoverable
|
|
26
|
+
* the same way HTML pages are.
|
|
27
|
+
*
|
|
28
|
+
* Filtering rules (centralized in content-filter.forMarkdownSitemap):
|
|
29
|
+
* - Members items excluded unconditionally (no .md twin exists for them).
|
|
30
|
+
* - Public items with `includeInSitemap: false` excluded.
|
|
31
|
+
*/
|
|
32
|
+
export function generateMarkdownSitemap({ items, twinUrl = defaultTwinUrl, freshLayerLastmod, }) {
|
|
33
|
+
const filtered = forMarkdownSitemap(items);
|
|
34
|
+
const entries = filtered
|
|
35
|
+
.map((item) => {
|
|
36
|
+
const url = twinUrl(item.url);
|
|
37
|
+
const freshMod = freshLayerLastmod?.get(item.url);
|
|
38
|
+
const buildMod = item.dateModified ?? item.datePublished ?? '';
|
|
39
|
+
// Pick the most recent of build-time and fresh-layer lastmod.
|
|
40
|
+
const lastmod = pickLatest(freshMod, buildMod);
|
|
41
|
+
const lastmodTag = lastmod ? `\n <lastmod>${escapeXml(lastmod)}</lastmod>` : '';
|
|
42
|
+
return ` <url>\n <loc>${escapeXml(url)}</loc>${lastmodTag}\n </url>`;
|
|
43
|
+
})
|
|
44
|
+
.join('\n');
|
|
45
|
+
return `<?xml version="1.0" encoding="UTF-8"?>
|
|
46
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
47
|
+
${entries}
|
|
48
|
+
</urlset>`;
|
|
49
|
+
}
|
|
50
|
+
function pickLatest(a, b) {
|
|
51
|
+
if (!a)
|
|
52
|
+
return b;
|
|
53
|
+
if (!b)
|
|
54
|
+
return a;
|
|
55
|
+
return new Date(a).getTime() >= new Date(b).getTime() ? a : b;
|
|
56
|
+
}
|
|
57
|
+
//# sourceMappingURL=sitemap-markdown.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sitemap-markdown.js","sourceRoot":"","sources":["../../src/utils/sitemap-markdown.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAA;AAExD,SAAS,SAAS,CAAC,GAAW;IAC7B,OAAO,GAAG;SACR,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC;SACtB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;SACrB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;SACrB,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC;SACvB,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAA;AAC1B,CAAC;AAED;;;;;;GAMG;AACH,SAAS,cAAc,CAAC,UAAkB;IACzC,MAAM,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAA;IAC9C,OAAO,GAAG,OAAO,KAAK,CAAA;AACvB,CAAC;AAkBD;;;;;;;;;;GAUG;AACH,MAAM,UAAU,uBAAuB,CAAC,EACvC,KAAK,EACL,OAAO,GAAG,cAAc,EACxB,iBAAiB,GACe;IAChC,MAAM,QAAQ,GAAG,kBAAkB,CAAC,KAAK,CAAC,CAAA;IAC1C,MAAM,OAAO,GAAG,QAAQ;SACtB,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACb,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;QAC7B,MAAM,QAAQ,GAAG,iBAAiB,EAAE,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;QACjD,MAAM,QAAQ,GAAG,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,aAAa,IAAI,EAAE,CAAA;QAC9D,8DAA8D;QAC9D,MAAM,OAAO,GAAG,UAAU,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAA;QAC9C,MAAM,UAAU,GAAG,OAAO,CAAC,CAAC,CAAC,kBAAkB,SAAS,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,CAAA;QAClF,OAAO,qBAAqB,SAAS,CAAC,GAAG,CAAC,SAAS,UAAU,YAAY,CAAA;IAC1E,CAAC,CAAC;SACD,IAAI,CAAC,IAAI,CAAC,CAAA;IAEZ,OAAO;;EAEN,OAAO;UACC,CAAA;AACV,CAAC;AAED,SAAS,UAAU,CAAC,CAAqB,EAAE,CAAqB;IAC/D,IAAI,CAAC,CAAC;QAAE,OAAO,CAAC,CAAA;IAChB,IAAI,CAAC,CAAC;QAAE,OAAO,CAAC,CAAA;IAChB,OAAO,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,IAAI,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AAC9D,CAAC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import type { ContentItem } from '../types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Compute a SHA-256 content hash for staleness validation. Uses `crypto.subtle`
|
|
4
|
+
* so the same implementation runs in Node (build-time) and Workers (runtime).
|
|
5
|
+
* Never node:crypto.
|
|
6
|
+
*
|
|
7
|
+
* The hash input is deterministic: item title + description + rendered body.
|
|
8
|
+
* Order matters: changes to any field produce a new hash. Whitespace-insensitive
|
|
9
|
+
* if the consumer normalizes before calling.
|
|
10
|
+
*/
|
|
11
|
+
export declare function computeContentHash(item: Pick<ContentItem, 'title' | 'description'>, renderedBody: string): Promise<string>;
|
|
12
|
+
export interface StalenessDriftRecord {
|
|
13
|
+
url: string;
|
|
14
|
+
expectedHash: string;
|
|
15
|
+
actualHash: string;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Compare expected-vs-actual content hashes across a batch of items. Used by the
|
|
19
|
+
* build-time validator to detect drift between two `contentProvider` calls made
|
|
20
|
+
* during the same build (rare but possible when provider pulls from a mutating
|
|
21
|
+
* source).
|
|
22
|
+
*
|
|
23
|
+
* Returns the list of items whose hash differs. Callers log these as warnings;
|
|
24
|
+
* the build does NOT fail on drift (spec 1403: "does not fail the build").
|
|
25
|
+
*/
|
|
26
|
+
export declare function checkStaleness(items: ContentItem[], expectedHashes: Map<string, string>, renderBody: (item: ContentItem) => string | Promise<string>): Promise<StalenessDriftRecord[]>;
|
|
27
|
+
//# sourceMappingURL=staleness.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"staleness.d.ts","sourceRoot":"","sources":["../../src/utils/staleness.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAA;AAE9C;;;;;;;;GAQG;AACH,wBAAsB,kBAAkB,CACvC,IAAI,EAAE,IAAI,CAAC,WAAW,EAAE,OAAO,GAAG,aAAa,CAAC,EAChD,YAAY,EAAE,MAAM,GAClB,OAAO,CAAC,MAAM,CAAC,CAKjB;AAUD,MAAM,WAAW,oBAAoB;IACpC,GAAG,EAAE,MAAM,CAAA;IACX,YAAY,EAAE,MAAM,CAAA;IACpB,UAAU,EAAE,MAAM,CAAA;CAClB;AAED;;;;;;;;GAQG;AACH,wBAAsB,cAAc,CACnC,KAAK,EAAE,WAAW,EAAE,EACpB,cAAc,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EACnC,UAAU,EAAE,CAAC,IAAI,EAAE,WAAW,KAAK,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,GACzD,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAYjC"}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Compute a SHA-256 content hash for staleness validation. Uses `crypto.subtle`
|
|
3
|
+
* so the same implementation runs in Node (build-time) and Workers (runtime).
|
|
4
|
+
* Never node:crypto.
|
|
5
|
+
*
|
|
6
|
+
* The hash input is deterministic: item title + description + rendered body.
|
|
7
|
+
* Order matters: changes to any field produce a new hash. Whitespace-insensitive
|
|
8
|
+
* if the consumer normalizes before calling.
|
|
9
|
+
*/
|
|
10
|
+
export async function computeContentHash(item, renderedBody) {
|
|
11
|
+
const input = [item.title, item.description ?? '', renderedBody].join('\u0001');
|
|
12
|
+
const bytes = new TextEncoder().encode(input);
|
|
13
|
+
const digest = await crypto.subtle.digest('SHA-256', bytes);
|
|
14
|
+
return toHex(new Uint8Array(digest));
|
|
15
|
+
}
|
|
16
|
+
function toHex(bytes) {
|
|
17
|
+
let out = '';
|
|
18
|
+
for (const b of bytes) {
|
|
19
|
+
out += b.toString(16).padStart(2, '0');
|
|
20
|
+
}
|
|
21
|
+
return out;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Compare expected-vs-actual content hashes across a batch of items. Used by the
|
|
25
|
+
* build-time validator to detect drift between two `contentProvider` calls made
|
|
26
|
+
* during the same build (rare but possible when provider pulls from a mutating
|
|
27
|
+
* source).
|
|
28
|
+
*
|
|
29
|
+
* Returns the list of items whose hash differs. Callers log these as warnings;
|
|
30
|
+
* the build does NOT fail on drift (spec 1403: "does not fail the build").
|
|
31
|
+
*/
|
|
32
|
+
export async function checkStaleness(items, expectedHashes, renderBody) {
|
|
33
|
+
const drift = [];
|
|
34
|
+
for (const item of items) {
|
|
35
|
+
const expected = expectedHashes.get(item.url);
|
|
36
|
+
if (!expected)
|
|
37
|
+
continue;
|
|
38
|
+
const body = await renderBody(item);
|
|
39
|
+
const actual = await computeContentHash(item, body);
|
|
40
|
+
if (actual !== expected) {
|
|
41
|
+
drift.push({ url: item.url, expectedHash: expected, actualHash: actual });
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return drift;
|
|
45
|
+
}
|
|
46
|
+
//# sourceMappingURL=staleness.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"staleness.js","sourceRoot":"","sources":["../../src/utils/staleness.ts"],"names":[],"mappings":"AAEA;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACvC,IAAgD,EAChD,YAAoB;IAEpB,MAAM,KAAK,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,WAAW,IAAI,EAAE,EAAE,YAAY,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;IAC/E,MAAM,KAAK,GAAG,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;IAC7C,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,SAAS,EAAE,KAAK,CAAC,CAAA;IAC3D,OAAO,KAAK,CAAC,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC,CAAA;AACrC,CAAC;AAED,SAAS,KAAK,CAAC,KAAiB;IAC/B,IAAI,GAAG,GAAG,EAAE,CAAA;IACZ,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACvB,GAAG,IAAI,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAA;IACvC,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC;AAQD;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CACnC,KAAoB,EACpB,cAAmC,EACnC,UAA2D;IAE3D,MAAM,KAAK,GAA2B,EAAE,CAAA;IACxC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,MAAM,QAAQ,GAAG,cAAc,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;QAC7C,IAAI,CAAC,QAAQ;YAAE,SAAQ;QACvB,MAAM,IAAI,GAAG,MAAM,UAAU,CAAC,IAAI,CAAC,CAAA;QACnC,MAAM,MAAM,GAAG,MAAM,kBAAkB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;QACnD,IAAI,MAAM,KAAK,QAAQ,EAAE,CAAC;YACzB,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,YAAY,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC,CAAA;QAC1E,CAAC;IACF,CAAC;IACD,OAAO,KAAK,CAAA;AACb,CAAC"}
|
|
@@ -15,4 +15,45 @@ export declare function validateJsonLd(jsonLd: Record<string, unknown>): Validat
|
|
|
15
15
|
* Validate an HTML string for common on-page SEO issues.
|
|
16
16
|
*/
|
|
17
17
|
export declare function validatePage(html: string, options: PageValidationOptions): ValidationResult;
|
|
18
|
+
import type { ContentItem } from '../types.js';
|
|
19
|
+
export interface HreflangReciprocityIssue {
|
|
20
|
+
url: string;
|
|
21
|
+
missingReciprocal: {
|
|
22
|
+
from: string;
|
|
23
|
+
lang: string;
|
|
24
|
+
expectedBackReference: string;
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Verify that every `alternateLocales` entry has a reciprocal entry on the
|
|
29
|
+
* target side. For search engines, missing reciprocals are a hard error — Google
|
|
30
|
+
* will ignore the hreflang annotations altogether.
|
|
31
|
+
*
|
|
32
|
+
* Runs in O(N^2) worst-case but N is bounded by the number of translated articles,
|
|
33
|
+
* not the entire catalog, so this is fine at build time.
|
|
34
|
+
*
|
|
35
|
+
* Returns the list of reciprocity violations. Callers log these as errors in the
|
|
36
|
+
* build-time validation hook.
|
|
37
|
+
*/
|
|
38
|
+
export declare function validateHreflangReciprocity(items: ContentItem[]): HreflangReciprocityIssue[];
|
|
39
|
+
export interface PrerenderGuardIssue {
|
|
40
|
+
route: string;
|
|
41
|
+
access: 'members';
|
|
42
|
+
message: string;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Fails the build when a route whose ContentItem has `access: 'members'` is
|
|
46
|
+
* configured as `prerender: true`. This is the load-bearing check that makes
|
|
47
|
+
* Flexible Sampling + static mode the only known-bad combo. The consumer's
|
|
48
|
+
* Astro integration iterates the build manifest and calls this with the
|
|
49
|
+
* prerendered-route URL set crossed against contentProvider output.
|
|
50
|
+
*
|
|
51
|
+
* Spec lines 1389-1405.
|
|
52
|
+
*
|
|
53
|
+
* Returns an empty array on success; non-empty means the build MUST fail.
|
|
54
|
+
*/
|
|
55
|
+
export declare function validatePrerenderedGatedRoutes({ prerenderedUrls, items, }: {
|
|
56
|
+
prerenderedUrls: Set<string>;
|
|
57
|
+
items: ContentItem[];
|
|
58
|
+
}): PrerenderGuardIssue[];
|
|
18
59
|
//# sourceMappingURL=validation.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"validation.d.ts","sourceRoot":"","sources":["../../src/utils/validation.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,gBAAgB;IAChC,MAAM,EAAE,MAAM,EAAE,CAAA;IAChB,QAAQ,EAAE,MAAM,EAAE,CAAA;CAClB;AAED,MAAM,WAAW,qBAAqB;IACrC,cAAc,EAAE,MAAM,CAAA;IACtB,oBAAoB,EAAE,MAAM,CAAA;IAC5B,YAAY,CAAC,EAAE,MAAM,CAAA;CACrB;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,gBAAgB,CAqEhF;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,qBAAqB,GAAG,gBAAgB,CAqF3F"}
|
|
1
|
+
{"version":3,"file":"validation.d.ts","sourceRoot":"","sources":["../../src/utils/validation.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,gBAAgB;IAChC,MAAM,EAAE,MAAM,EAAE,CAAA;IAChB,QAAQ,EAAE,MAAM,EAAE,CAAA;CAClB;AAED,MAAM,WAAW,qBAAqB;IACrC,cAAc,EAAE,MAAM,CAAA;IACtB,oBAAoB,EAAE,MAAM,CAAA;IAC5B,YAAY,CAAC,EAAE,MAAM,CAAA;CACrB;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,gBAAgB,CAqEhF;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,qBAAqB,GAAG,gBAAgB,CAqF3F;AAID,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAA;AAE9C,MAAM,WAAW,wBAAwB;IACxC,GAAG,EAAE,MAAM,CAAA;IACX,iBAAiB,EAAE;QAClB,IAAI,EAAE,MAAM,CAAA;QACZ,IAAI,EAAE,MAAM,CAAA;QACZ,qBAAqB,EAAE,MAAM,CAAA;KAC7B,CAAA;CACD;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,2BAA2B,CAAC,KAAK,EAAE,WAAW,EAAE,GAAG,wBAAwB,EAAE,CAkC5F;AAID,MAAM,WAAW,mBAAmB;IACnC,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,SAAS,CAAA;IACjB,OAAO,EAAE,MAAM,CAAA;CACf;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,8BAA8B,CAAC,EAC9C,eAAe,EACf,KAAK,GACL,EAAE;IACF,eAAe,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IAC5B,KAAK,EAAE,WAAW,EAAE,CAAA;CACpB,GAAG,mBAAmB,EAAE,CAmBxB"}
|
package/dist/utils/validation.js
CHANGED
|
@@ -138,4 +138,82 @@ export function validatePage(html, options) {
|
|
|
138
138
|
}
|
|
139
139
|
return { errors, warnings };
|
|
140
140
|
}
|
|
141
|
+
/**
|
|
142
|
+
* Verify that every `alternateLocales` entry has a reciprocal entry on the
|
|
143
|
+
* target side. For search engines, missing reciprocals are a hard error — Google
|
|
144
|
+
* will ignore the hreflang annotations altogether.
|
|
145
|
+
*
|
|
146
|
+
* Runs in O(N^2) worst-case but N is bounded by the number of translated articles,
|
|
147
|
+
* not the entire catalog, so this is fine at build time.
|
|
148
|
+
*
|
|
149
|
+
* Returns the list of reciprocity violations. Callers log these as errors in the
|
|
150
|
+
* build-time validation hook.
|
|
151
|
+
*/
|
|
152
|
+
export function validateHreflangReciprocity(items) {
|
|
153
|
+
const byUrl = new Map();
|
|
154
|
+
for (const item of items)
|
|
155
|
+
byUrl.set(item.url, item);
|
|
156
|
+
const issues = [];
|
|
157
|
+
for (const item of items) {
|
|
158
|
+
if (!item.alternateLocales || item.alternateLocales.length === 0)
|
|
159
|
+
continue;
|
|
160
|
+
for (const alt of item.alternateLocales) {
|
|
161
|
+
const altItem = byUrl.get(alt.url);
|
|
162
|
+
if (!altItem) {
|
|
163
|
+
issues.push({
|
|
164
|
+
url: item.url,
|
|
165
|
+
missingReciprocal: {
|
|
166
|
+
from: alt.url,
|
|
167
|
+
lang: alt.lang,
|
|
168
|
+
expectedBackReference: item.url,
|
|
169
|
+
},
|
|
170
|
+
});
|
|
171
|
+
continue;
|
|
172
|
+
}
|
|
173
|
+
const hasBackRef = altItem.alternateLocales?.some((a) => a.url === item.url);
|
|
174
|
+
if (!hasBackRef) {
|
|
175
|
+
issues.push({
|
|
176
|
+
url: item.url,
|
|
177
|
+
missingReciprocal: {
|
|
178
|
+
from: alt.url,
|
|
179
|
+
lang: alt.lang,
|
|
180
|
+
expectedBackReference: item.url,
|
|
181
|
+
},
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
return issues;
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* Fails the build when a route whose ContentItem has `access: 'members'` is
|
|
190
|
+
* configured as `prerender: true`. This is the load-bearing check that makes
|
|
191
|
+
* Flexible Sampling + static mode the only known-bad combo. The consumer's
|
|
192
|
+
* Astro integration iterates the build manifest and calls this with the
|
|
193
|
+
* prerendered-route URL set crossed against contentProvider output.
|
|
194
|
+
*
|
|
195
|
+
* Spec lines 1389-1405.
|
|
196
|
+
*
|
|
197
|
+
* Returns an empty array on success; non-empty means the build MUST fail.
|
|
198
|
+
*/
|
|
199
|
+
export function validatePrerenderedGatedRoutes({ prerenderedUrls, items, }) {
|
|
200
|
+
const issues = [];
|
|
201
|
+
for (const item of items) {
|
|
202
|
+
if (item.access !== 'members')
|
|
203
|
+
continue;
|
|
204
|
+
const path = new URL(item.url).pathname;
|
|
205
|
+
if (prerenderedUrls.has(path) || prerenderedUrls.has(`${path}/`)) {
|
|
206
|
+
issues.push({
|
|
207
|
+
route: path,
|
|
208
|
+
access: 'members',
|
|
209
|
+
message: `Route ${path} is prerendered but serves a members-gated item. ` +
|
|
210
|
+
`Prerendered HTML is the same bytes for every requester — there is no ` +
|
|
211
|
+
`way to serve a teaser to anonymous users and the full body to verified ` +
|
|
212
|
+
`Googlebot from the same static file. Set export const prerender = false ` +
|
|
213
|
+
`on this route, or mark this item access: 'public'.`,
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
return issues;
|
|
218
|
+
}
|
|
141
219
|
//# sourceMappingURL=validation.js.map
|