euparliamentmonitor 0.9.26 → 0.9.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/scripts/aggregator/article-generator.d.ts +1 -0
- package/scripts/aggregator/article-generator.js +2 -0
- package/scripts/aggregator/generator/render-one.js +3 -0
- package/scripts/aggregator/html/shell.d.ts +8 -0
- package/scripts/aggregator/html/shell.js +99 -10
- package/scripts/aggregator/html/toc.js +7 -2
- package/scripts/aggregator/metadata/fallback-synth.d.ts +39 -0
- package/scripts/aggregator/metadata/fallback-synth.js +89 -0
- package/scripts/aggregator/metadata/per-language-resolver.d.ts +1 -12
- package/scripts/aggregator/metadata/per-language-resolver.js +73 -156
- package/scripts/aggregator/metadata/resolve-helpers.d.ts +7 -4
- package/scripts/aggregator/metadata/resolve-helpers.js +7 -4
- package/scripts/aggregator/metadata/resolve-script-utils.d.ts +152 -0
- package/scripts/aggregator/metadata/resolve-script-utils.js +297 -0
- package/scripts/aggregator/metadata/resolve-utils.js +36 -4
- package/scripts/aggregator/progressive-disclosure.d.ts +28 -0
- package/scripts/aggregator/progressive-disclosure.js +109 -0
- package/scripts/aggregator/reader-friendly-transform.d.ts +8 -0
- package/scripts/aggregator/reader-friendly-transform.js +196 -0
- package/scripts/constants/languages.d.ts +2 -1
- package/scripts/constants/languages.js +1 -1
- package/scripts/constants/ui/index.d.ts +2 -0
- package/scripts/constants/ui/index.js +2 -0
- package/scripts/constants/ui/progressive-disclosure.d.ts +40 -0
- package/scripts/constants/ui/progressive-disclosure.js +150 -0
- package/scripts/discover-untranslated-briefs.js +296 -1
- package/scripts/validate-manifest-seo.js +12 -1
package/package.json
CHANGED
|
@@ -2,6 +2,7 @@ export { parseCliArgs, type CliOptions } from './generator/cli.js';
|
|
|
2
2
|
export { buildArticleSlug, sanitizeRunSuffix, extractDefaultDescription, } from './generator/slug.js';
|
|
3
3
|
export { discoverAnalysisRuns, groupRunsForCollision, type DiscoveredRun, } from './generator/discovery.js';
|
|
4
4
|
export { insertReaderGuideAfterExecutiveBrief } from './generator/reader-guide-insertion.js';
|
|
5
|
+
export { estimateReadingMinutes, buildLayerReadingTimes, splitBodyIntoDisclosureLayers, } from './progressive-disclosure.js';
|
|
5
6
|
export { generateArticle, type GenerateResult } from './generator/render-one.js';
|
|
6
7
|
export { generateAllArticles } from './generator/render-batch.js';
|
|
7
8
|
/**
|
|
@@ -38,6 +38,8 @@ export { buildArticleSlug, sanitizeRunSuffix, extractDefaultDescription, } from
|
|
|
38
38
|
export { discoverAnalysisRuns, groupRunsForCollision, } from './generator/discovery.js';
|
|
39
39
|
// Reader guide insertion
|
|
40
40
|
export { insertReaderGuideAfterExecutiveBrief } from './generator/reader-guide-insertion.js';
|
|
41
|
+
// Progressive disclosure reading-time helpers
|
|
42
|
+
export { estimateReadingMinutes, buildLayerReadingTimes, splitBodyIntoDisclosureLayers, } from './progressive-disclosure.js';
|
|
41
43
|
// Single-run + batch orchestrators
|
|
42
44
|
export { generateArticle } from './generator/render-one.js';
|
|
43
45
|
export { generateAllArticles } from './generator/render-batch.js';
|
|
@@ -27,6 +27,7 @@ import { blobUrl } from '../infra/github-urls.js';
|
|
|
27
27
|
import { buildArticleSlug } from './slug.js';
|
|
28
28
|
import { discoverAnalysisRuns } from './discovery.js';
|
|
29
29
|
import { insertReaderGuideAfterExecutiveBrief } from './reader-guide-insertion.js';
|
|
30
|
+
import { buildLayerReadingTimes, splitBodyIntoDisclosureLayers, } from '../progressive-disclosure.js';
|
|
30
31
|
/**
|
|
31
32
|
* Escape a string for a conservative double-quoted YAML scalar.
|
|
32
33
|
*
|
|
@@ -138,6 +139,7 @@ function writeLanguageVariant(lang, slug, aggregated, englishHtml, chromeOptions
|
|
|
138
139
|
bodyHtml = localizeArticleBody(bodyHtml, lang);
|
|
139
140
|
bodyHtml = enhanceTradecraftCards(bodyHtml, lang);
|
|
140
141
|
bodyHtml = enhanceAnalysisIndexCards(bodyHtml, lang);
|
|
142
|
+
const readingTimes = buildLayerReadingTimes(splitBodyIntoDisclosureLayers(bodyHtml).wordCounts);
|
|
141
143
|
const entry = getMetadataEntry(chromeOptions.metadata, lang);
|
|
142
144
|
const perLangDescription = lang !== 'en' && metaSource !== aggregated.markdown
|
|
143
145
|
? extractStrongProseLine(metaSource) || entry.description
|
|
@@ -157,6 +159,7 @@ function writeLanguageVariant(lang, slug, aggregated, englishHtml, chromeOptions
|
|
|
157
159
|
articleCount: chromeOptions.articleCount,
|
|
158
160
|
isBasedOn: aggregated.includedArtifacts.map((a) => blobUrl(a.repoRelPath)),
|
|
159
161
|
mentions: chromeOptions.mentions,
|
|
162
|
+
readingTimes,
|
|
160
163
|
});
|
|
161
164
|
const filename = getArticleFilename(slug, lang);
|
|
162
165
|
fs.writeFileSync(path.join(opts.outDir, filename), html, 'utf8');
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { LanguageCode } from '../../types/index.js';
|
|
2
2
|
import { type ArticleTocEntry } from './toc.js';
|
|
3
|
+
import { type LayerReadingTimes } from '../progressive-disclosure.js';
|
|
3
4
|
export type { ArticleTocEntry } from './toc.js';
|
|
4
5
|
/** Publisher organization name used in JSON-LD, meta tags. */
|
|
5
6
|
export declare const PUBLISHER_NAME = "Hack23 AB";
|
|
@@ -16,6 +17,11 @@ export interface WrapArticleOptions {
|
|
|
16
17
|
readonly articleSlug: string;
|
|
17
18
|
/** Pre-rendered HTML body fragment (from `renderMarkdown`). */
|
|
18
19
|
readonly body: string;
|
|
20
|
+
/**
|
|
21
|
+
* Enable reader-friendly post-processing for rendered HTML body text.
|
|
22
|
+
* Defaults to `true` for public HTML output.
|
|
23
|
+
*/
|
|
24
|
+
readonly readerFriendly?: boolean;
|
|
19
25
|
/** Article title — shown in `<title>`, breadcrumb, OG/Twitter meta. */
|
|
20
26
|
readonly title: string;
|
|
21
27
|
/** Article description — shown in `<meta name="description">` and OG. */
|
|
@@ -69,6 +75,8 @@ export interface WrapArticleOptions {
|
|
|
69
75
|
* are language-independent proper nouns.
|
|
70
76
|
*/
|
|
71
77
|
readonly mentions?: readonly string[];
|
|
78
|
+
/** Optional precomputed reading-time estimates per disclosure layer. */
|
|
79
|
+
readonly readingTimes?: LayerReadingTimes;
|
|
72
80
|
}
|
|
73
81
|
/**
|
|
74
82
|
* Render the full article HTML document with the shared chrome.
|
|
@@ -12,11 +12,10 @@
|
|
|
12
12
|
*/
|
|
13
13
|
import { BASE_URL, BUILD_SHORT, MERMAID_VERSION } from '../../constants/config.js';
|
|
14
14
|
import { buildHeadFreshnessTags } from '../../constants/build-info-meta.js';
|
|
15
|
-
import { ALL_LANGUAGES, PAGE_TITLES, SKIP_LINK_TEXTS, ARTICLE_NAV_LABELS, BACK_TO_NEWS_LABELS, VIEW_SOURCE_MARKDOWN_LABELS, FOOTER_SITEMAP_LABELS, FOOTER_POLITICAL_INTELLIGENCE_LABELS, getLocalizedString, getTextDirection, } from '../../constants/languages.js';
|
|
15
|
+
import { ALL_LANGUAGES, PAGE_TITLES, SKIP_LINK_TEXTS, ARTICLE_NAV_LABELS, BACK_TO_NEWS_LABELS, VIEW_SOURCE_MARKDOWN_LABELS, FOOTER_SITEMAP_LABELS, FOOTER_POLITICAL_INTELLIGENCE_LABELS, PROGRESSIVE_DISCLOSURE_LABELS, getLocalizedString, getTextDirection, } from '../../constants/languages.js';
|
|
16
16
|
import { buildOgLocaleTags } from '../../constants/og-locales.js';
|
|
17
17
|
import { ORG_SAME_AS, buildTwitterAttributionTags } from '../../constants/social-handles.js';
|
|
18
18
|
import { escapeHTML } from '../../utils/file-utils.js';
|
|
19
|
-
import { stripHtmlTags } from '../../utils/html-sanitize.js';
|
|
20
19
|
import { buildResponsiveIconLinks, buildResponsiveSocialImageMeta, buildSiteFooter, buildSiteHeader, buildPageBanner, } from '../../templates/section-builders.js';
|
|
21
20
|
import { getPoliticalIntelligenceFilename } from '../../generators/political-intelligence.js';
|
|
22
21
|
import { getSitemapFilename } from '../../generators/sitemap/index.js';
|
|
@@ -25,10 +24,85 @@ import { clampForBudget } from '../metadata/seo-budgets.js';
|
|
|
25
24
|
import { getArticleFilename, buildArticleHreflangLinks, buildLanguageSwitcher, } from './hreflang.js';
|
|
26
25
|
import { buildArticleToc } from './toc.js';
|
|
27
26
|
import { blobUrl } from '../infra/github-urls.js';
|
|
27
|
+
import { applyReaderFriendlyTransform } from '../reader-friendly-transform.js';
|
|
28
|
+
import { buildLayerReadingTimes, buildProgressiveDisclosureBody, } from '../progressive-disclosure.js';
|
|
28
29
|
/** Publisher organization name used in JSON-LD, meta tags. */
|
|
29
30
|
export const PUBLISHER_NAME = 'Hack23 AB';
|
|
30
31
|
/** Site name used across meta tags and structured data. */
|
|
31
32
|
export const SITE_NAME = 'EU Parliament Monitor';
|
|
33
|
+
/**
|
|
34
|
+
* Trailing separator characters (whitespace + editorial punctuation) that
|
|
35
|
+
* may dangle after a `Published …` tail has been removed.
|
|
36
|
+
*/
|
|
37
|
+
const TRAILING_SEPARATOR_CHARS = new Set(['\u2014', '\u2013', '|', ':', ';', ',', '-']);
|
|
38
|
+
/**
|
|
39
|
+
* Linear-time trailing trim of whitespace and editorial separator
|
|
40
|
+
* punctuation. Avoids the polynomial backtracking that an unanchored
|
|
41
|
+
* `/[\s…]+$/` character-class quantifier exhibits on adversarial input.
|
|
42
|
+
*
|
|
43
|
+
* @param value - Text whose trailing separators should be removed
|
|
44
|
+
* @returns `value` without trailing whitespace/separator characters
|
|
45
|
+
*/
|
|
46
|
+
function trimTrailingSeparators(value) {
|
|
47
|
+
let end = value.length;
|
|
48
|
+
while (end > 0) {
|
|
49
|
+
const ch = value.charAt(end - 1);
|
|
50
|
+
if (TRAILING_SEPARATOR_CHARS.has(ch) || /\s/u.test(ch)) {
|
|
51
|
+
end -= 1;
|
|
52
|
+
}
|
|
53
|
+
else {
|
|
54
|
+
break;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return value.slice(0, end);
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Anchored `Published YYYY-MM-DD` tail matcher. The leading `Published`
|
|
61
|
+
* literal keeps the match deterministic (no ambiguous leading-whitespace
|
|
62
|
+
* quantifier), so it is linear on uncontrolled input.
|
|
63
|
+
*/
|
|
64
|
+
const PUBLISHED_DATE_TAIL_RE = /Published\s+\d{4}-\d{2}-\d{2}\.?\s*$/iu;
|
|
65
|
+
/**
|
|
66
|
+
* Remove leaked `Published YYYY-MM-DD` tails from social descriptions.
|
|
67
|
+
*
|
|
68
|
+
* @param value - Raw description candidate
|
|
69
|
+
* @returns Description with trailing publication-date boilerplate removed
|
|
70
|
+
*/
|
|
71
|
+
function stripPublishedDateTail(value) {
|
|
72
|
+
if (!value)
|
|
73
|
+
return '';
|
|
74
|
+
const withoutTail = value.replace(PUBLISHED_DATE_TAIL_RE, '');
|
|
75
|
+
return trimTrailingSeparators(withoutTail).trim();
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Resolve a publish-date-safe description: prefer the stripped value, and
|
|
79
|
+
* only fall back to the original when no `Published …` tail was present.
|
|
80
|
+
* When the original was *only* a publish-date tail, stripping yields an
|
|
81
|
+
* empty string and we must not re-introduce the leaked tail.
|
|
82
|
+
*
|
|
83
|
+
* @param value - Raw description candidate
|
|
84
|
+
* @returns Stripped description, or empty when the original was tail-only
|
|
85
|
+
*/
|
|
86
|
+
function safeDescriptionWithoutPublishedTail(value) {
|
|
87
|
+
const stripped = stripPublishedDateTail(value);
|
|
88
|
+
if (stripped)
|
|
89
|
+
return stripped;
|
|
90
|
+
return PUBLISHED_DATE_TAIL_RE.test(value) ? '' : value;
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Strip numbered list prefixes from JSON-LD mention labels.
|
|
94
|
+
*
|
|
95
|
+
* @param name - Raw mention label
|
|
96
|
+
* @returns Mention label without leading numeric hierarchy markers
|
|
97
|
+
*/
|
|
98
|
+
function sanitizeMentionName(name) {
|
|
99
|
+
let cleaned = name.trim();
|
|
100
|
+
while (/^\d+\./u.test(cleaned)) {
|
|
101
|
+
cleaned = cleaned.replace(/^\d+\./u, '').trimStart();
|
|
102
|
+
}
|
|
103
|
+
cleaned = cleaned.replace(/^\d+\s+/u, '').replace(/^\.\s*/u, '');
|
|
104
|
+
return cleaned.trim();
|
|
105
|
+
}
|
|
32
106
|
/**
|
|
33
107
|
* Compute the per-surface SEO-budget-clamped variants of the article
|
|
34
108
|
* title and description for a single render. See
|
|
@@ -44,17 +118,19 @@ export const SITE_NAME = 'EU Parliament Monitor';
|
|
|
44
118
|
* @returns One {@link SeoClampedSurfaces} record per article render
|
|
45
119
|
*/
|
|
46
120
|
function computeSeoClamps(options, lang, siteTitle) {
|
|
121
|
+
const safeMetaDescription = safeDescriptionWithoutPublishedTail(options.description);
|
|
47
122
|
const pageTitle = buildPageTitle(options.title, lang, siteTitle);
|
|
48
123
|
const ogTitleClamped = clampForBudget(options.title, lang, 'ogTitle');
|
|
49
124
|
const twitterTitleClamped = clampForBudget(options.title, lang, 'twitterTitle');
|
|
50
|
-
const metaDescriptionClamped = clampForBudget(
|
|
125
|
+
const metaDescriptionClamped = clampForBudget(safeMetaDescription, lang, 'metaDescription');
|
|
51
126
|
// og:description and twitter:description prefer the longer BLUF
|
|
52
127
|
// paragraph (extendedDescription) so social-card previews show the
|
|
53
128
|
// full lede; fall back to the short meta description when the
|
|
54
129
|
// extended one is empty.
|
|
55
|
-
const
|
|
130
|
+
const socialSourceRaw = options.extendedDescription && options.extendedDescription.length > 0
|
|
56
131
|
? options.extendedDescription
|
|
57
|
-
:
|
|
132
|
+
: safeMetaDescription;
|
|
133
|
+
const socialSource = stripPublishedDateTail(socialSourceRaw) || safeMetaDescription;
|
|
58
134
|
const ogDescriptionClamped = clampForBudget(socialSource, lang, 'ogDescription');
|
|
59
135
|
const twitterDescriptionClamped = clampForBudget(socialSource, lang, 'twitterDescription');
|
|
60
136
|
const imageAltClamped = clampForBudget(`${options.title}${getTitleSeparator(lang)}${siteTitle}`, lang, 'imageAlt');
|
|
@@ -110,13 +186,22 @@ export function wrapArticleHtml(options) {
|
|
|
110
186
|
const tocHtml = buildArticleToc(options.toc ?? [], safeLang);
|
|
111
187
|
const articleMainClass = tocHtml.length > 0 ? 'article-main--with-toc' : 'article-main--no-toc';
|
|
112
188
|
const articleSectionLabel = getLocalizedArticleTypePlain(options.articleType, safeLang);
|
|
189
|
+
const disclosureBody = buildProgressiveDisclosureBody(options.body, safeLang);
|
|
190
|
+
const transformedBodyHtml = options.readerFriendly === false
|
|
191
|
+
? disclosureBody.bodyHtml
|
|
192
|
+
: applyReaderFriendlyTransform(disclosureBody.bodyHtml);
|
|
113
193
|
// Count words from the rendered body for the JSON-LD `wordCount`
|
|
114
194
|
// field (Google's NewsArticle structured-data validator emits a
|
|
115
195
|
// warning when this is missing). Done by stripping HTML tags from
|
|
116
196
|
// the rendered body then splitting on whitespace — fast and
|
|
117
197
|
// CodeQL-safe.
|
|
118
|
-
const
|
|
119
|
-
|
|
198
|
+
const wordCount = disclosureBody.wordCounts.quick +
|
|
199
|
+
disclosureBody.wordCounts.analysis +
|
|
200
|
+
disclosureBody.wordCounts.intelligence;
|
|
201
|
+
const readingTimes = options.readingTimes ?? buildLayerReadingTimes(disclosureBody.wordCounts);
|
|
202
|
+
const disclosureLabels = getLocalizedString(PROGRESSIVE_DISCLOSURE_LABELS, safeLang);
|
|
203
|
+
const min = disclosureLabels.minutesAbbr;
|
|
204
|
+
const readingTimeLine = `⏱️ ${disclosureLabels.quickRead}: ${readingTimes.quickRead}${min} · ${disclosureLabels.fullAnalysis}: ${readingTimes.fullAnalysis}${min} · ${disclosureLabels.completeIntelligence}: ${readingTimes.completeIntelligence}${min}`;
|
|
120
205
|
// Pre-compute the per-surface SEO-budget-clamped variants of title
|
|
121
206
|
// and description. Each surface gets its own clamp tuned to the
|
|
122
207
|
// documented platform envelope (Google/Bing SERP, Facebook/LinkedIn
|
|
@@ -152,6 +237,9 @@ export function wrapArticleHtml(options) {
|
|
|
152
237
|
height: 630,
|
|
153
238
|
},
|
|
154
239
|
];
|
|
240
|
+
const sanitizedMentions = (options.mentions ?? [])
|
|
241
|
+
.map((name) => sanitizeMentionName(name))
|
|
242
|
+
.filter(Boolean);
|
|
155
243
|
const jsonLd = {
|
|
156
244
|
'@context': 'https://schema.org',
|
|
157
245
|
'@type': 'NewsArticle',
|
|
@@ -194,9 +282,9 @@ export function wrapArticleHtml(options) {
|
|
|
194
282
|
isBasedOn: options.isBasedOn.map((url) => ({ '@type': 'CreativeWork', url })),
|
|
195
283
|
}
|
|
196
284
|
: {}),
|
|
197
|
-
...(
|
|
285
|
+
...(sanitizedMentions.length > 0
|
|
198
286
|
? {
|
|
199
|
-
mentions:
|
|
287
|
+
mentions: sanitizedMentions.map((name) => ({
|
|
200
288
|
'@type': 'Organization',
|
|
201
289
|
name,
|
|
202
290
|
})),
|
|
@@ -306,10 +394,11 @@ ${tocHtml} <article class="article-body" lang="${safeLang}">
|
|
|
306
394
|
<p class="article-kicker">${escapeHTML(getLocalizedArticleType(options.articleType, safeLang))}</p>
|
|
307
395
|
<h1>${escapeHTML(options.title)}</h1>
|
|
308
396
|
<p class="article-dek">${escapeHTML(options.description)}</p>
|
|
397
|
+
<p class="article-reading-times" aria-label="${escapeHTML(disclosureLabels.readingTimeAria)}">${escapeHTML(readingTimeLine)}</p>
|
|
309
398
|
<p class="article-meta"><time datetime="${options.date}">${options.date}</time> · EU Parliament Monitor</p>
|
|
310
399
|
</header>
|
|
311
400
|
${sourceMdLink}
|
|
312
|
-
${
|
|
401
|
+
${transformedBodyHtml}
|
|
313
402
|
</article>
|
|
314
403
|
</main>
|
|
315
404
|
|
|
@@ -8,12 +8,13 @@
|
|
|
8
8
|
* that mirrors the Reader Intelligence Guide so the two navigation
|
|
9
9
|
* surfaces share a single visual vocabulary.
|
|
10
10
|
*/
|
|
11
|
-
import { TOC_ARIA_LABELS, TRADECRAFT_HEADING_LABELS, ANALYSIS_INDEX_HEADING_LABELS, KEY_TAKEAWAYS_HEADING_LABELS, SUPPLEMENTARY_HEADING_LABELS, SECTION_TITLE_LABELS, getLocalizedString, } from '../../constants/languages.js';
|
|
11
|
+
import { TOC_ARIA_LABELS, TRADECRAFT_HEADING_LABELS, ANALYSIS_INDEX_HEADING_LABELS, KEY_TAKEAWAYS_HEADING_LABELS, SUPPLEMENTARY_HEADING_LABELS, SECTION_TITLE_LABELS, PROGRESSIVE_DISCLOSURE_LABELS, getLocalizedString, } from '../../constants/languages.js';
|
|
12
12
|
import { escapeHTML } from '../../utils/file-utils.js';
|
|
13
13
|
import { READER_GUIDE_SECTION_ID } from '../reader-guide-constants.js';
|
|
14
14
|
import { READER_GUIDE_TITLE_LABELS, getReaderGuideSectionIcon, } from '../reader-intelligence-guide.js';
|
|
15
15
|
import { TRADECRAFT_SECTION_ID, MANIFEST_SECTION_ID, SUPPLEMENTARY_SECTION_ID, } from '../artifact-order.js';
|
|
16
16
|
import { KEY_TAKEAWAYS_SECTION_ID } from '../key-takeaways.js';
|
|
17
|
+
import { resolveDisclosureLayer } from '../progressive-disclosure.js';
|
|
17
18
|
/**
|
|
18
19
|
* Resolve a localized title for a TOC entry based on its section ID.
|
|
19
20
|
* Falls back to the original English title if no translation is available.
|
|
@@ -90,11 +91,15 @@ export function buildArticleToc(entries, lang) {
|
|
|
90
91
|
if (entries.length === 0)
|
|
91
92
|
return '';
|
|
92
93
|
const label = escapeHTML(getLocalizedString(TOC_ARIA_LABELS, lang));
|
|
94
|
+
const layerBadgeWord = getLocalizedString(PROGRESSIVE_DISCLOSURE_LABELS, lang).layerBadge;
|
|
93
95
|
const items = entries
|
|
94
96
|
.map((e) => {
|
|
95
97
|
const displayTitle = getLocalizedTocTitle(e.id, e.title, lang);
|
|
96
98
|
const icon = getTocSectionIcon(e.id);
|
|
97
|
-
|
|
99
|
+
const layer = resolveDisclosureLayer(e.id);
|
|
100
|
+
const layerBadge = layer === 'quick' ? 'L1' : layer === 'analysis' ? 'L2' : 'L3';
|
|
101
|
+
const layerAria = escapeHTML(`${layerBadgeWord} ${layerBadge}`);
|
|
102
|
+
return ` <li data-layer="${layer}"><a href="#${escapeHTML(e.id)}"><span class="article-toc-icon" aria-hidden="true">${icon}</span> <span class="article-toc-text">${escapeHTML(displayTitle)}</span><span class="article-toc-layer article-toc-layer--${layer}" aria-label="${layerAria}">${layerBadge}</span></a></li>`;
|
|
98
103
|
})
|
|
99
104
|
.join('\n');
|
|
100
105
|
return [
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import type { PerLanguageInputs } from './per-language-resolver.js';
|
|
2
|
+
/**
|
|
3
|
+
* Humanize an article-type slug for fallback metadata synthesis.
|
|
4
|
+
*
|
|
5
|
+
* @param articleType - Canonical article-type slug
|
|
6
|
+
* @returns Title-cased label with spaces instead of hyphens
|
|
7
|
+
*/
|
|
8
|
+
export declare function humanizeArticleTypeLabel(articleType: string): string;
|
|
9
|
+
/**
|
|
10
|
+
* Format `YYYY-MM-DD` into `Mon YYYY`; falls back to the raw date when invalid.
|
|
11
|
+
*
|
|
12
|
+
* Formats with the target language (falling back to `en`) so the synthesized
|
|
13
|
+
* fallback title stays locale-appropriate for Latin non-EN locales (e.g.
|
|
14
|
+
* `sv`/`fr`) instead of emitting an English month label on every page.
|
|
15
|
+
*
|
|
16
|
+
* @param date - ISO article date
|
|
17
|
+
* @param lang - Target language code driving the month-label locale
|
|
18
|
+
* @returns Month/year label suitable for fallback titles
|
|
19
|
+
*/
|
|
20
|
+
export declare function formatMonthYear(date: string, lang: string): string;
|
|
21
|
+
/**
|
|
22
|
+
* Hard fallback title synthesizer when all resolved candidates are contaminated.
|
|
23
|
+
* Shape: `EP <Article Type>: <Top Finding> — <Mon YYYY>`.
|
|
24
|
+
*
|
|
25
|
+
* @param input - Per-language resolver inputs
|
|
26
|
+
* @param topFindingSource - Best available finding/summary source text
|
|
27
|
+
* @param contextualFallback - Last-resort contextual fallback title
|
|
28
|
+
* @returns Reader-facing synthesized fallback title
|
|
29
|
+
*/
|
|
30
|
+
export declare function synthesizeFallbackTitle(input: PerLanguageInputs, topFindingSource: string, contextualFallback: string): string;
|
|
31
|
+
/**
|
|
32
|
+
* Hard fallback description synthesizer when the resolved description leaks
|
|
33
|
+
* pipeline jargon.
|
|
34
|
+
*
|
|
35
|
+
* @param input - Per-language resolver inputs
|
|
36
|
+
* @returns Reader-facing synthesized fallback description
|
|
37
|
+
*/
|
|
38
|
+
export declare function synthesizeFallbackDescription(input: PerLanguageInputs): string;
|
|
39
|
+
//# sourceMappingURL=fallback-synth.d.ts.map
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024-2026 Hack23 AB
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
/**
|
|
4
|
+
* @file Hard fallback metadata synthesizers for the per-language SEO
|
|
5
|
+
* resolver. These run only when every resolved title/description candidate is
|
|
6
|
+
* contaminated with pipeline jargon, producing reader-facing, search-safe copy
|
|
7
|
+
* from article context instead of leaking internal pipeline artifacts.
|
|
8
|
+
*/
|
|
9
|
+
import { budgetFor, classifyScript, clampForBudget } from './seo-budgets.js';
|
|
10
|
+
import { composeContextualDescription, deriveHeadlineFromSummary, ensureDescriptionTerminator, hasLeakySeoToken, padDescriptionToFloor, sanitizeDescriptionCandidate, sanitizeTitleCandidate, } from './resolve-helpers.js';
|
|
11
|
+
import { truncateTitle } from './text-utils.js';
|
|
12
|
+
/**
|
|
13
|
+
* Humanize an article-type slug for fallback metadata synthesis.
|
|
14
|
+
*
|
|
15
|
+
* @param articleType - Canonical article-type slug
|
|
16
|
+
* @returns Title-cased label with spaces instead of hyphens
|
|
17
|
+
*/
|
|
18
|
+
export function humanizeArticleTypeLabel(articleType) {
|
|
19
|
+
return articleType
|
|
20
|
+
.split('-')
|
|
21
|
+
.filter(Boolean)
|
|
22
|
+
.map((token) => token.charAt(0).toUpperCase() + token.slice(1))
|
|
23
|
+
.join(' ');
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Format `YYYY-MM-DD` into `Mon YYYY`; falls back to the raw date when invalid.
|
|
27
|
+
*
|
|
28
|
+
* Formats with the target language (falling back to `en`) so the synthesized
|
|
29
|
+
* fallback title stays locale-appropriate for Latin non-EN locales (e.g.
|
|
30
|
+
* `sv`/`fr`) instead of emitting an English month label on every page.
|
|
31
|
+
*
|
|
32
|
+
* @param date - ISO article date
|
|
33
|
+
* @param lang - Target language code driving the month-label locale
|
|
34
|
+
* @returns Month/year label suitable for fallback titles
|
|
35
|
+
*/
|
|
36
|
+
export function formatMonthYear(date, lang) {
|
|
37
|
+
const parsed = new Date(`${date}T00:00:00Z`);
|
|
38
|
+
if (Number.isNaN(parsed.getTime()))
|
|
39
|
+
return date;
|
|
40
|
+
return new Intl.DateTimeFormat([lang, 'en'], {
|
|
41
|
+
month: 'short',
|
|
42
|
+
year: 'numeric',
|
|
43
|
+
timeZone: 'UTC',
|
|
44
|
+
}).format(parsed);
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Hard fallback title synthesizer when all resolved candidates are contaminated.
|
|
48
|
+
* Shape: `EP <Article Type>: <Top Finding> — <Mon YYYY>`.
|
|
49
|
+
*
|
|
50
|
+
* @param input - Per-language resolver inputs
|
|
51
|
+
* @param topFindingSource - Best available finding/summary source text
|
|
52
|
+
* @param contextualFallback - Last-resort contextual fallback title
|
|
53
|
+
* @returns Reader-facing synthesized fallback title
|
|
54
|
+
*/
|
|
55
|
+
export function synthesizeFallbackTitle(input, topFindingSource, contextualFallback) {
|
|
56
|
+
// The synthesized shape (`EP <Article Type>: <Top Finding> — <Mon YYYY>`)
|
|
57
|
+
// is Latin/English by construction (the `EP <Article Type>` lead-in and
|
|
58
|
+
// colon punctuation). Emitting it on a non-Latin locale would ship a
|
|
59
|
+
// pure-ASCII `<title>`, violating the locale-glyph contract (Gate 4a). For
|
|
60
|
+
// those locales we defer to the localized contextual fallback instead.
|
|
61
|
+
if (classifyScript(input.lang) !== 'latin')
|
|
62
|
+
return contextualFallback;
|
|
63
|
+
const topFinding = sanitizeTitleCandidate(deriveHeadlineFromSummary(topFindingSource));
|
|
64
|
+
const articleTypeLabel = humanizeArticleTypeLabel(input.articleType);
|
|
65
|
+
const monthYear = formatMonthYear(input.date, input.lang);
|
|
66
|
+
const synthesized = topFinding
|
|
67
|
+
? `EP ${articleTypeLabel}: ${topFinding} — ${monthYear}`
|
|
68
|
+
: `EP ${articleTypeLabel} — ${input.date}`;
|
|
69
|
+
const candidate = truncateTitle(synthesized) || synthesized;
|
|
70
|
+
return !candidate || hasLeakySeoToken(candidate) ? contextualFallback : candidate;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Hard fallback description synthesizer when the resolved description leaks
|
|
74
|
+
* pipeline jargon.
|
|
75
|
+
*
|
|
76
|
+
* @param input - Per-language resolver inputs
|
|
77
|
+
* @returns Reader-facing synthesized fallback description
|
|
78
|
+
*/
|
|
79
|
+
export function synthesizeFallbackDescription(input) {
|
|
80
|
+
const templateSubtitle = sanitizeDescriptionCandidate(input.template.subtitle);
|
|
81
|
+
const articleTypeLabel = humanizeArticleTypeLabel(input.articleType);
|
|
82
|
+
const base = templateSubtitle && !hasLeakySeoToken(templateSubtitle)
|
|
83
|
+
? templateSubtitle
|
|
84
|
+
: `EP ${articleTypeLabel} update for ${input.date}.`;
|
|
85
|
+
const synthesized = composeContextualDescription(input.lang, base, { headline: '', summary: '' }, input.date, '');
|
|
86
|
+
const clamped = clampForBudget(synthesized, input.lang, 'metaDescription');
|
|
87
|
+
return padDescriptionToFloor(ensureDescriptionTerminator(input.lang, clamped, budgetFor(input.lang, 'metaDescription')), input.lang);
|
|
88
|
+
}
|
|
89
|
+
//# sourceMappingURL=fallback-synth.js.map
|
|
@@ -39,6 +39,7 @@
|
|
|
39
39
|
*/
|
|
40
40
|
import type { LangTitleSubtitle, LanguageCode } from '../../types/index.js';
|
|
41
41
|
import type { MetadataManifest, ResolvedMetadataEntry } from './types.js';
|
|
42
|
+
export { appendRunNumberSuffix } from './resolve-script-utils.js';
|
|
42
43
|
/**
|
|
43
44
|
* Inputs to {@link resolveOneLanguage}. Extracting this struct keeps the
|
|
44
45
|
* resolver's per-language loop body free of long argument lists.
|
|
@@ -63,18 +64,6 @@ export interface PerLanguageInputs {
|
|
|
63
64
|
extendedSummary: string;
|
|
64
65
|
} | null;
|
|
65
66
|
}
|
|
66
|
-
/**
|
|
67
|
-
* No-op: run numbers must never appear in user-facing article titles.
|
|
68
|
-
* Titles should always be readable article headlines without workflow
|
|
69
|
-
* identifiers. This function is preserved for callsite backward
|
|
70
|
-
* compatibility.
|
|
71
|
-
*
|
|
72
|
-
* @param seoTitle - SEO title (returned unchanged)
|
|
73
|
-
* @param _lang - Language code (ignored)
|
|
74
|
-
* @param _runId - Manifest run identifier (ignored)
|
|
75
|
-
* @returns The unchanged input title
|
|
76
|
-
*/
|
|
77
|
-
export declare function appendRunNumberSuffix(seoTitle: string, _lang: LanguageCode, _runId: string): string;
|
|
78
67
|
/**
|
|
79
68
|
* Build the editorial source object for one language. Prefers a translated
|
|
80
69
|
* `executive-brief_<lang>.md` sibling, falls through to the English brief
|