npm - euparliamentmonitor - Versions diffs - 0.9.12 → 0.9.13 - Mend

euparliamentmonitor 0.9.12 → 0.9.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/package.json +6 -3
package/scripts/aggregator/analysis-aggregator.js +7 -2
package/scripts/aggregator/article-generator.js +2 -1
package/scripts/aggregator/article-metadata.d.ts +90 -18
package/scripts/aggregator/article-metadata.js +1290 -115
package/scripts/aggregator/editorial-brief-resolver.d.ts +67 -0
package/scripts/aggregator/editorial-brief-resolver.js +218 -0
package/scripts/discover-untranslated-briefs.js +329 -0
package/scripts/generators/news-indexes.d.ts +28 -0
package/scripts/generators/news-indexes.js +110 -20
package/scripts/templates/sync-template-frontmatter.js +4 -4
package/scripts/validate-brief-translations.js +517 -0
package/scripts/validate-manifest-seo.js +581 -0

package/scripts/aggregator/article-metadata.js CHANGED Viewed

@@ -9,48 +9,82 @@
  * published article carry a unique, content-reflective headline and
  * description in every language variant.
  *
- * Priority ladder (per language, highest wins):
+ * Priority ladder (per language, highest wins) — matches the editorial
+ * contract documented in
+ * [`.github/prompts/04-article-generation.md`](../../.github/prompts/04-article-generation.md) § 6.2:
  *
  * 1. **Manifest override** — `manifest.title` / `manifest.description` on
  *    the analysis-run manifest, either as a plain string (applied to every
  *    language) or a `LanguageMap<string>` object for explicit per-language
- *    values. Authored by Stage-B agents when they have an editorial
- *    headline for the day.
- * 2. **Artefact editorial H1** — first `# …` heading from the first
+ *    values.
+ * 2. **Localized executive brief** — for non-English `<lang>`, the
+ *    translated sibling `executive-brief_<lang>.md` (or
+ *    `extended/executive-brief_<lang>.md`) under the run directory.
+ *    Resolved via `editorial-brief-resolver.ts`. This is the authoritative
+ *    localized source produced by the `news-translate` workflow.
+ * 3. **English executive brief, verbatim** — the English brief
+ *    (`executive-brief.md` / `extended/executive-brief.md`) used as a
+ *    fall-through when a locale has no translated brief yet. Recorded in
+ *    `metadataFallback[<lang>] = "en"` so editors can audit which locales
+ *    fell through.
+ * 4. **Artefact editorial H1** — first `# …` heading from the first
  *    substantive artefact under the run directory (e.g.
  *    `intelligence/synthesis-summary.md`, `breaking-news-analysis.md`).
  *    Accepted only when the heading is not a generic
  *    `${humanize(articleType)} — ${date}` form.
- * 3. **Aggregated-markdown H1** — the first `# …` heading in the aggregator
- *    output, accepted under the same non-generic rule. In practice this
- *    tier rarely fires because the aggregator itself writes the generic
- *    default, but it covers hand-edited or historic aggregates.
- * 4. **First strong prose paragraph** — the first line of the aggregated
- *    Markdown that survives {@link shouldSkipDescriptionLine}. Used for
- *    `description`; also used for `title` as a last editorial-content
- *    resort when every heading-level source is generic.
- * 5. **Localized template** — the per-article-type `*_TITLES` generator
- *    from `src/constants/language-articles.ts`. Always parameterised by
- *    date (or derived values), so the title changes from run to run even
- *    when this last tier fires — but still the "boring repeated" option.
- *
- * Artifact-derived highlights (tiers 2–4) are used as page-specific
- * context across all 14 variants: English can use them directly, while
- * non-English variants keep the localized article-type template and append
- * the editorial topic/summary. This prevents duplicate metadata across
- * same-type pages while keeping the surrounding snippet language-specific
- * until full per-language body translations are present.
+ * 5. **Aggregated-markdown H1** — the first `# …` heading in the aggregator
+ *    output, accepted under the same non-generic rule.
+ * 6. **First strong prose paragraph** — the first line of the aggregated
+ *    Markdown that survives {@link shouldSkipDescriptionLine}.
+ * 7. **Localized template** — the per-article-type `*_TITLES` generator
+ *    from `src/constants/language-articles.ts`. Last resort.
+ *
+ * Tiers 2–6 produce the same shape ({headline, summary}); the resolver
+ * picks the highest-available tier per language. When a localized brief
+ * (tier 2) is present, the headline replaces the localized template
+ * verbatim — no concatenation. Locales without a translated brief inherit
+ * the English brief content (tier 3) so SEO surfaces never fall back to
+ * boring type-level templates while real editorial content exists.
  */
 import fs from 'fs';
 import path from 'path';
 import { ALL_LANGUAGES, getLocalizedString } from '../constants/language-core.js';
 import { BREAKING_NEWS_TITLES, COMMITTEE_REPORTS_TITLES, ELECTION_CYCLE_TITLES, LOCALIZED_KEYWORDS, MONTH_AHEAD_TITLES, MONTHLY_REVIEW_TITLES, MOTIONS_TITLES, PROPOSITIONS_TITLES, QUARTER_AHEAD_TITLES, QUARTER_IN_REVIEW_TITLES, TERM_OUTLOOK_TITLES, WEEK_AHEAD_TITLES, WEEKLY_REVIEW_TITLES, YEAR_AHEAD_TITLES, YEAR_IN_REVIEW_TITLES, } from '../constants/language-articles.js';
+import { resolveLocalizedBriefHighlight } from './editorial-brief-resolver.js';
 /** Maximum `<meta description>` length we will emit. */
 const DESCRIPTION_MAX_LENGTH = 180;
 /** Target minimum `<meta description>` length before we append context. */
 const DESCRIPTION_MIN_LENGTH = 140;
+/**
+ * Length below which a raw description is considered too short to stand
+ * on its own and gets enriched with date/context. Independent from
+ * {@link DESCRIPTION_MIN_LENGTH} (which controls sentence-boundary
+ * truncation behaviour). Set lower than DESCRIPTION_MIN_LENGTH so a
+ * clean 100-140 char prose lede is preserved verbatim instead of being
+ * padded with date/context boilerplate.
+ */
+const ENRICHMENT_TRIGGER_LENGTH = 100;
 /** Maximum `<title>` length — anything longer is truncated with an ellipsis. */
 const TITLE_MAX_LENGTH = 140;
+/**
+ * Soft target for headline-style titles produced as a fallback from
+ * BLUF/lede prose. When the candidate exceeds `TITLE_MAX_LENGTH`, the
+ * truncator first looks for a natural clause boundary
+ * (`.`, `:`, `—`, `;`) inside the `[HEADLINE_SOFT_MIN, TITLE_MAX_LENGTH]`
+ * window and breaks there instead of mid-clause-with-ellipsis. This
+ * turns a 137-character truncated prose paragraph into a complete
+ * journalistic clause, which scans much better in news cards and SERP
+ * snippets without sacrificing the keyword-rich opening.
+ */
+const HEADLINE_SOFT_MIN = 60;
+/**
+ * Punctuation marks that signal a natural clause boundary inside a
+ * BLUF / lede paragraph. Listed in preferred-break order: a colon or
+ * em-dash that introduces a list of consequences is the best break,
+ * full stops are next, and semicolons last. Single ASCII space is
+ * always a fallback boundary handled separately.
+ */
+const HEADLINE_CLAUSE_BOUNDARIES = [': ', ' — ', ' – ', '. ', '; '];
 /** Localized labels used to enrich short or duplicate-prone meta descriptions. */
 const SEO_CONTEXT_LABELS = {
     en: {
@@ -236,6 +270,8 @@ const ARTIFACT_CATEGORY_PREFIXES = [
     'economic context',
     'executive brief',
     'executive briefing',
+    'executive intelligence brief',
+    'executive intelligence briefing',
     'executive summary',
     'forward indicators',
     'historical baseline',
@@ -393,8 +429,37 @@ export function shouldSkipDescriptionLine(line) {
     }
     if (/^[-*_=~.]{3,}$/.test(line))
         return true;
+    if (isLocalizedBannerRow(line))
+        return true;
     return false;
 }
+/**
+ * Language-agnostic banner-row detector. Stage-B artefacts open with a
+ * metadata banner of the shape
+ *   `**Date:** 2026-05-15 | **Type:** Breaking | **Run:** breaking-run-001`
+ * and its localized siblings — notably Japanese / Chinese / Korean briefs
+ * which place the full-width colon `：` **inside** the bold span
+ * (`**日付：**`) rather than after it. The `METADATA_LINE_PREFIXES` table
+ * only covers the English vocabulary; this helper catches the structural
+ * shape directly: a line that starts with `**`, contains at least one
+ * `|` separator, and carries two-or-more bold key markers that end with
+ * — or are followed by — an ASCII colon `:` or full-width colon `：`.
+ * Banner rows look identical in every language we publish, so detecting
+ * them here keeps localized briefs from leaking their first banner line
+ * into the `<meta description>`.
+ *
+ * @param line - Trimmed source line
+ * @returns `true` when the line is a banner row in any locale
+ */
+function isLocalizedBannerRow(line) {
+    if (!line.startsWith('**'))
+        return false;
+    if (!line.includes('|'))
+        return false;
+    const inside = (line.match(/\*\*[^*]+[:：]\s*\*\*/g) ?? []).length;
+    const after = (line.match(/\*\*[^*]+\*\*\s*[:：]/g) ?? []).length;
+    return inside + after >= 2;
+}
 /**
  * Strip inline Markdown decorations so we can use the remaining text as
  * plain-text meta-tag content. Removes link syntax, emphasis, inline code
@@ -544,12 +609,141 @@ export function truncateDescription(text) {
 export function truncateTitle(text) {
     if (text.length <= TITLE_MAX_LENGTH)
         return text;
+    // Prefer ending at a natural clause boundary inside the
+    // `[HEADLINE_SOFT_MIN, TITLE_MAX_LENGTH]` window so the truncated
+    // title reads as a complete journalistic clause rather than a
+    // mid-sentence prose snippet. Iterate boundaries in priority order;
+    // when a candidate falls in the window, break there and drop the
+    // ellipsis since the result is grammatically complete.
+    const search = text.slice(0, TITLE_MAX_LENGTH);
+    for (const boundary of HEADLINE_CLAUSE_BOUNDARIES) {
+        const idx = search.lastIndexOf(boundary);
+        if (idx >= HEADLINE_SOFT_MIN) {
+            const clean = stripTrailingStopWordsAndPunctuation(text.slice(0, idx));
+            if (clean.length >= HEADLINE_SOFT_MIN)
+                return clean;
+        }
+    }
     const cut = text.slice(0, TITLE_MAX_LENGTH - 1);
     const lastSpace = cut.lastIndexOf(' ');
     let safe = lastSpace > TITLE_MAX_LENGTH - 40 ? cut.slice(0, lastSpace) : cut;
     safe = stripTrailingStopWordsAndPunctuation(safe);
     return `${safe}…`;
 }
+/**
+ * Return the first complete sentence from a prose paragraph, suitable
+ * for use as a fallback editorial title when the artefact H1 is
+ * categorical (e.g. `# EU Parliament Committee Reports`) and the
+ * resolver must derive `<title>` from the BLUF / lede summary instead.
+ *
+ * A "sentence" is the prefix up to the first sentence-terminator
+ * (`. `, `! `, `? `, `; `) inside the `[HEADLINE_SOFT_MIN,
+ * TITLE_MAX_LENGTH]` window. Common abbreviations (`Q1.`, `Q2.`,
+ * `H1.`, `H2.`, `Mr.`, `Mrs.`, `e.g.`, `i.e.`, `vs.`) are skipped
+ * so they don't terminate the sentence prematurely. When no
+ * acceptable terminator exists in the window, returns the entire
+ * input unchanged so {@link truncateTitle} can handle clause-boundary
+ * truncation downstream.
+ *
+ * This produces journalistically clean titles even for the
+ * propositions / committee-reports cases where the BLUF paragraph
+ * opens with a single long sentence that exceeds 140 chars —
+ * `truncateTitle` then breaks on a clause boundary, and the result is
+ * still grammatical because the input was a sentence prefix rather
+ * than an arbitrary paragraph slice.
+ *
+ * @param paragraph - Prose paragraph (post-{@link stripInlineMarkdown})
+ * @returns First sentence, or the original paragraph when none can be
+ *   identified within the soft-min window
+ */
+export function extractFirstSentence(paragraph) {
+    const trimmed = paragraph.trim();
+    if (trimmed.length <= HEADLINE_SOFT_MIN)
+        return trimmed;
+    // Limit terminator search to TITLE_MAX_LENGTH * 1.5 — beyond that
+    // we'd rather let truncateTitle clause-truncate the original
+    // paragraph than return a too-long first sentence.
+    const window = trimmed.slice(0, Math.floor(TITLE_MAX_LENGTH * 1.5));
+    // Skip common abbreviations that contain a period inside a token
+    // (Q1., e.g., i.e., vs., Mr., Mrs., No., U.S., E.U.). We walk
+    // candidate terminator positions; a position counts only when the
+    // char before it is *not* part of a known abbreviation token.
+    const terminators = ['. ', '! ', '? ', '; '];
+    let bestIdx = -1;
+    for (const t of terminators) {
+        let from = HEADLINE_SOFT_MIN;
+        let idx;
+        while ((idx = window.indexOf(t, from)) !== -1) {
+            if (!isAbbreviationBoundary(window, idx) && idx < window.length - 1) {
+                if (bestIdx === -1 || idx < bestIdx)
+                    bestIdx = idx;
+                break;
+            }
+            from = idx + t.length;
+        }
+    }
+    if (bestIdx >= HEADLINE_SOFT_MIN) {
+        return trimmed.slice(0, bestIdx + 1).trim();
+    }
+    return trimmed;
+}
+/**
+ * Abbreviation tokens (lowercase, including the trailing period) that
+ * should NOT count as sentence terminators when {@link extractFirstSentence}
+ * scans for a `.` boundary. Single-letter all-caps initials
+ * (`U.S.`, `E.U.`) are handled by the all-caps-initial check below.
+ */
+const ABBREVIATION_PREFIXES = [
+    'mr.',
+    'mrs.',
+    'ms.',
+    'dr.',
+    'st.',
+    'no.',
+    'vs.',
+    'e.g.',
+    'i.e.',
+    'etc.',
+    'cf.',
+    'al.',
+    // EP fiscal-year and quarter shorthand: Q1., Q2., Q3., Q4., H1., H2., FY.
+    'q1.',
+    'q2.',
+    'q3.',
+    'q4.',
+    'h1.',
+    'h2.',
+    'fy.',
+];
+/**
+ * Check whether the character preceding the `.` at `idx` in `text`
+ * indicates an abbreviation (so the `.` is not a sentence terminator).
+ * Matches the {@link ABBREVIATION_PREFIXES} table and the all-caps
+ * single-letter initials pattern (`U.S.`, `E.U.`).
+ *
+ * @param text - Source text (lowercased segment + original mixed-case)
+ * @param idx - Index of the `.` character in `text`
+ * @returns `true` when the period at `idx` is part of an abbreviation
+ */
+function isAbbreviationBoundary(text, idx) {
+    // All-caps single-letter initial like `U.S.` or `E.U.` — char at
+    // idx-1 is a capital letter, and idx-2 is either start of string,
+    // whitespace, or another single-letter+period pair.
+    if (idx >= 1) {
+        const prev = text.charCodeAt(idx - 1);
+        const isUpperLetter = prev >= 65 && prev <= 90;
+        if (isUpperLetter && (idx === 1 || text[idx - 2] === ' ' || text[idx - 2] === '.')) {
+            return true;
+        }
+    }
+    // ABBREVIATION_PREFIXES lookup — scan backwards from `.` to find the
+    // start of the word, then compare lowercased.
+    let start = idx;
+    while (start > 0 && /[a-zA-Z]/u.test(text[start - 1] ?? ''))
+        start--;
+    const token = text.slice(start, idx + 1).toLowerCase();
+    return ABBREVIATION_PREFIXES.includes(token);
+}
 /**
  * Return the first Markdown H1 (`# …`) in the supplied text, stripped of
  * the leading `#` and trailing anchor syntax. Returns an empty string when
@@ -573,15 +767,48 @@ export function extractFirstH1(markdown) {
     return '';
 }
 /**
- * Walk every line of the Markdown source and return the first line that
- * survives {@link shouldSkipDescriptionLine}. Inline Markdown decorations
- * are stripped and the result is truncated to fit `<meta description>`.
+ * Process one Markdown line against the in-progress paragraph buffer.
+ * Returns the desired loop control: `'continue'` (skip silently),
+ * `'break'` (paragraph terminated — emit), or `'collected'` (line was
+ * pushed into the buffer; caller checks the cap separately).
+ *
+ * Factored out of the two extractors to reduce cognitive complexity.
+ *
+ * @param line - Trimmed Markdown line
+ * @param buf - In-progress paragraph buffer (mutated on `'collected'`)
+ * @returns Loop control directive
+ */
+function collectProseLine(line, buf) {
+    const hasBuffer = buf.lines.length > 0;
+    if (hasBuffer && line === '')
+        return 'break';
+    if (line === '')
+        return 'continue';
+    if (shouldSkipDescriptionLine(line))
+        return hasBuffer ? 'break' : 'continue';
+    const plain = stripLeadingProseLabel(stripInlineMarkdown(line));
+    if (!hasBuffer && plain.length < 40)
+        return 'continue';
+    buf.lines.push(plain);
+    buf.byteCount += plain.length + 1;
+    return 'collected';
+}
+/**
+ * Walk every line of the Markdown source and return the first paragraph
+ * that survives {@link shouldSkipDescriptionLine}. Consecutive non-blank
+ * prose lines are joined with a single space so hard-wrapped ledes
+ * (column-95 conventional wrap) produce a clean 140-180-character
+ * description rather than just the first 60-90-char line.
+ *
+ * Inline Markdown decorations are stripped and the result is truncated
+ * to fit `<meta description>`.
  *
  * @param markdown - Markdown source
  * @returns Prose description, or empty string when nothing qualifies
  */
 export function extractStrongProseLine(markdown) {
     let inFence = false;
+    const buf = { lines: [], byteCount: 0 };
     for (const raw of markdown.split('\n')) {
         const line = raw.trim();
         if (line.startsWith('```') || line.startsWith('~~~')) {
@@ -590,58 +817,93 @@ export function extractStrongProseLine(markdown) {
         }
         if (inFence)
             continue;
-        if (shouldSkipDescriptionLine(line))
-            continue;
-        const plain = stripLeadingProseLabel(stripInlineMarkdown(line));
-        if (plain.length < 40)
+        const directive = collectProseLine(line, buf);
+        if (directive === 'continue')
             continue;
-        return truncateDescription(plain);
+        if (directive === 'break')
+            break;
+        if (buf.byteCount >= DESCRIPTION_MAX_LENGTH)
+            break;
     }
-    return '';
+    if (buf.lines.length === 0)
+        return '';
+    return truncateDescription(buf.lines.join(' '));
 }
 /**
- * Walk the body of an editorial artefact and, when it contains a `## …`
- * heading whose text matches one of `EDITORIAL_LEDE_HEADINGS`,
- * return the first prose paragraph that follows that heading. This is
- * the journalist's lede ("60-Second Read", "TL;DR", "BLUF — …", …) and
- * is exactly the sentence that should power `<meta description>` and
- * the OG/Twitter description fields.
+ * Classify one Markdown line for the {@link extractLedeAfterHeading}
+ * walker. The returned directive is then applied to walker state by
+ * {@link applyLedeDirective}.
  *
- * Returns the empty string when no lede heading is found or no qualifying
- * prose follows it. Inline Markdown is stripped and the result is
- * truncated to fit `<meta description>`.
+ * @param line - Trimmed Markdown line
+ * @param isInFence - True when the previous line opened a fenced block
+ * @param inLede - True when the previous line was inside a lede heading block
+ * @param hasBuffered - True when at least one prose line has been collected
+ * @returns Directive describing how the walker should treat this line
+ */
+function classifyLedeLine(line, isInFence, inLede, hasBuffered) {
+    if (line.startsWith('```') || line.startsWith('~~~'))
+        return { kind: 'fence' };
+    if (isInFence)
+        return { kind: 'pause' };
+    if (/^#{2,3}\s+/.test(line)) {
+        if (hasBuffered)
+            return { kind: 'pause' };
+        const headingText = normaliseHeadingText(line.replace(/^#{2,3}\s+/, ''));
+        const match = EDITORIAL_LEDE_HEADINGS.some((h) => isLedeHeadingMatch(headingText, h));
+        return { kind: 'heading', inLede: match };
+    }
+    return inLede ? { kind: 'collect' } : { kind: 'pause' };
+}
+/**
+ * Apply one directive emitted by {@link classifyLedeLine} to the walk
+ * state. Returns `'break'` to stop the walk, `'continue'` to skip to
+ * the next line, or `'collect'` when the caller should now run
+ * {@link collectProseLine}. Mutates `state` for fence/in-lede toggles.
  *
- * @param markdown - Editorial artefact source
- * @returns Lede paragraph, or empty string when none matched
+ * @param directive - Classification of the current line
+ * @param state - Walk state (mutated in place)
+ * @param state.inFence - True when the current line is inside a fenced block
+ * @param state.inLede - True when the current line is inside a lede heading block
+ * @param hasBuffered - Whether any prose has already been collected
+ * @returns Loop control directive
  */
+function applyLedeDirective(directive, state, hasBuffered) {
+    if (directive.kind === 'fence') {
+        state.inFence = !state.inFence;
+        return 'continue';
+    }
+    if (directive.kind === 'heading') {
+        if (hasBuffered)
+            return 'break';
+        state.inLede = directive.inLede;
+        return 'continue';
+    }
+    if (directive.kind === 'pause')
+        return 'continue';
+    return 'collect';
+}
 export function extractLedeAfterHeading(markdown) {
-    const lines = markdown.split('\n');
-    let inLede = false;
-    let inFence = false;
-    for (let i = 0; i < lines.length; i++) {
-        const raw = lines[i] ?? '';
+    const state = { inFence: false, inLede: false };
+    const buf = { lines: [], byteCount: 0 };
+    for (const raw of markdown.split('\n')) {
         const line = raw.trim();
-        if (line.startsWith('```') || line.startsWith('~~~')) {
-            inFence = !inFence;
-            continue;
-        }
-        if (inFence)
-            continue;
-        if (/^#{2,3}\s+/.test(line)) {
-            const headingText = normaliseHeadingText(line.replace(/^#{2,3}\s+/, ''));
-            inLede = EDITORIAL_LEDE_HEADINGS.some((h) => headingText === h || headingText.startsWith(`${h} `) || headingText.startsWith(`${h}:`));
-            continue;
-        }
-        if (!inLede)
+        const directive = classifyLedeLine(line, state.inFence, state.inLede, buf.lines.length > 0);
+        const action = applyLedeDirective(directive, state, buf.lines.length > 0);
+        if (action === 'break')
+            break;
+        if (action === 'continue')
             continue;
-        if (shouldSkipDescriptionLine(line))
+        const collect = collectProseLine(line, buf);
+        if (collect === 'continue')
             continue;
-        const plain = stripLeadingProseLabel(stripInlineMarkdown(line));
-        if (plain.length < 40)
-            continue;
-        return truncateDescription(plain);
+        if (collect === 'break')
+            break;
+        if (buf.byteCount >= DESCRIPTION_MAX_LENGTH)
+            break;
     }
-    return '';
+    if (buf.lines.length === 0)
+        return '';
+    return truncateDescription(buf.lines.join(' '));
 }
 /**
  * Normalise a Markdown heading's text for comparison against the
@@ -660,6 +922,32 @@ function normaliseHeadingText(raw) {
         .trim()
         .toLowerCase();
 }
+/**
+ * Word-boundary match against an editorial-lede whitelist entry. Matches
+ * when the normalised heading equals the whitelist entry exactly, or when
+ * the entry is followed by any non-alphanumeric character — covering
+ * localized parenthetical glosses written with ASCII or full-width
+ * punctuation (e.g. `bluf (bottom line up front)`, `bluf（結論先出し）`,
+ * `bluf — 핵심 결론`, `60-second read — what happened`).
+ *
+ * @param headingText - Normalised heading text (lower-case, decoration-stripped)
+ * @param whitelistEntry - Lower-case whitelist entry from
+ *                        {@link EDITORIAL_LEDE_HEADINGS}
+ * @returns `true` when `headingText` begins with `whitelistEntry` at a
+ *          word boundary
+ */
+function isLedeHeadingMatch(headingText, whitelistEntry) {
+    if (headingText === whitelistEntry)
+        return true;
+    if (!headingText.startsWith(whitelistEntry))
+        return false;
+    const next = headingText.charAt(whitelistEntry.length);
+    // Word boundary — anything that is not an ASCII letter/digit is a
+    // separator we accept. This works uniformly across ASCII parentheses,
+    // CJK full-width brackets `（`, dashes `— – -`, colons `:`, and the
+    // ideographic full-width colon `：`.
+    return next === '' || !/[a-z0-9]/.test(next);
+}
 /**
  * Return `true` when an artefact-H1 begins with one of the
  * `ARTIFACT_CATEGORY_PREFIXES` followed by a separator. Such H1s
@@ -830,8 +1118,151 @@ export function isGenericHeading(heading, articleType, date) {
     if (trailingDateOnly.test(normalized)) {
         return true;
     }
+    if (isCategoryNounHeading(normalized, articleType))
+        return true;
+    if (isBareInstitutionalHeading(normalized))
+        return true;
     return false;
 }
+/**
+ * Lower-cased institutional self-references that an executive-brief
+ * authoring template sometimes emits as the H1 when the agent forgot to
+ * substitute a real headline. They identify the publisher / institution
+ * but carry **zero editorial information** — they would produce
+ * pathological `<title>EU Parliament</title>` strings if surfaced.
+ * Matched after whitespace collapse + lowercase, with any trailing
+ * punctuation / single-date qualifier stripped so `EU Parliament ·
+ * 2026-05-15` and `Hack23 AB —` both resolve here. Date *ranges*
+ * (`(May 2026)`, `: 19–22 May 2026`) are preserved as editorial
+ * content, matching the {@link isCategoryNounHeading} contract.
+ */
+const BARE_INSTITUTIONAL_HEADINGS = [
+    'eu parliament',
+    'european parliament',
+    'the european parliament',
+    'ep',
+    'ep10',
+    'ep11',
+    'hack23',
+    'hack23 ab',
+    'eu parliament monitor',
+    'european parliament monitor',
+    'executive brief',
+    'briefing',
+    'intelligence brief',
+    'intelligence briefing',
+];
+/**
+ * Return `true` when the heading is one of {@link BARE_INSTITUTIONAL_HEADINGS}
+ * — an institutional self-reference with no editorial content. Strips a
+ * trailing single-date qualifier first so `EU Parliament — 2026-05-15`
+ * and `Hack23 AB · 2026-05-15` are caught. Date ranges and any token
+ * after the institutional noun are preserved (so
+ * `EU Parliament Week Ahead: 19–22 May 2026` is *not* flagged here —
+ * that path is owned by {@link isCategoryNounHeading} for `week-ahead`).
+ *
+ * @param normalized - Heading text after whitespace collapse
+ * @returns `true` when the heading is bare institutional boilerplate
+ */
+function isBareInstitutionalHeading(normalized) {
+    let core = normalized.toLowerCase();
+    // Same single-date / parenthetical stripping as isCategoryNounHeading
+    // so the same heading shape is recognized via either gate.
+    core = core.replace(/\s*[·:—–-]\s*\d{4}-\d{2}-\d{2}\s*$/u, '');
+    core = core.replace(/\s*\(\s*[a-z]{3,9}\s+\d{4}\s*\)\s*$/u, '');
+    core = core.replace(/\s*\(\s*\d{4}\s*\)\s*$/u, '');
+    core = core.replace(/[\s\-—–:·.]+$/u, '').trim();
+    return BARE_INSTITUTIONAL_HEADINGS.includes(core);
+}
+/**
+ * Curated category-noun whitelist per article-type slug. These are the
+ * boring "EU Parliament &lt;Type&gt;" / "EP10 &lt;Type&gt;" headings that the
+ * executive-brief authoring conventions allow as decorative H1s but
+ * which carry **no editorial information** — they merely restate the
+ * article category. When such a heading reaches the metadata resolver
+ * it must be flagged generic so the resolver falls through to the
+ * BLUF / lede summary instead of using the category noun as `<title>`.
+ *
+ * Keys are slugs (`article-type` form). Values are lowercase category
+ * cores, matched after stripping institutional prefixes
+ * (`eu parliament `, `european parliament `, `ep `, `ep10 `, `ep11 `)
+ * and trailing date qualifiers (`· 2026-05-15`, `— 2026-05-15`,
+ * `(May 2026)`, `: 19–22 May 2026` is **kept** because date ranges
+ * carry editorial info — only single-date suffixes are stripped).
+ */
+const CATEGORY_NOUN_CORES = {
+    breaking: ['breaking', 'breaking news'],
+    'week-in-review': ['week in review'],
+    'week-ahead': ['week ahead'],
+    'month-in-review': ['month in review'],
+    'month-ahead': ['month ahead'],
+    'quarter-in-review': ['quarter in review'],
+    'quarter-ahead': ['quarter ahead'],
+    'year-in-review': ['year in review'],
+    'year-ahead': ['year ahead'],
+    'committee-reports': [
+        'committee reports',
+        'committee activity',
+        'committee activity report',
+        'committee activity reports',
+    ],
+    motions: [
+        'motions',
+        'motions and adopted texts',
+        'plenary votes and resolutions',
+        'plenary votes resolutions',
+    ],
+    propositions: ['propositions', 'legislative propositions', 'legislative procedures'],
+    'election-cycle': ['election cycle'],
+    'term-outlook': ['term outlook'],
+};
+/**
+ * Return `true` when the heading is a bare category-noun string for the
+ * supplied `articleType` slug, regardless of the institutional prefix
+ * (`EU Parliament `, `European Parliament `, `EP `, `EP10 `, `EP11 `).
+ * Strips a trailing single-date qualifier (` · YYYY-MM-DD`,
+ * ` — YYYY-MM-DD`, `(May 2026)`, `(2026)`) before matching; date-range
+ * qualifiers (`: 19–22 May 2026`) carry editorial information and are
+ * NOT stripped, so headings like `EP Week Ahead: 19–22 May 2026` are
+ * preserved as legitimate editorial headlines.
+ *
+ * @param normalized - Heading text after whitespace collapse
+ * @param articleType - Article-type slug
+ * @returns `true` when the heading is category-noun boilerplate
+ */
+function isCategoryNounHeading(normalized, articleType) {
+    const cores = CATEGORY_NOUN_CORES[articleType];
+    if (!cores || cores.length === 0)
+        return false;
+    let core = normalized.toLowerCase();
+    // Strip institutional prefix (longest-first match).
+    const prefixes = [
+        "the european parliament's ",
+        'european parliament ',
+        'eu parliament ',
+        'ep11 ',
+        'ep10 ',
+        'ep ',
+    ];
+    for (const p of prefixes) {
+        if (core.startsWith(p)) {
+            core = core.slice(p.length);
+            break;
+        }
+    }
+    // Strip trailing single-date qualifier. We deliberately do NOT strip
+    // date *ranges* (`19–22 may 2026`, `28-30 april 2026`) because those
+    // identify a specific reporting window — that IS editorial content.
+    // Patterns stripped:
+    //   ` · 2026-05-15`, ` — 2026-05-15`, ` - 2026-05-15`, `: 2026-05-15`
+    //   ` (may 2026)`, ` (2026)`
+    core = core.replace(/\s*[·:—–-]\s*\d{4}-\d{2}-\d{2}\s*$/u, '');
+    core = core.replace(/\s*\(\s*[a-z]{3,9}\s+\d{4}\s*\)\s*$/u, '');
+    core = core.replace(/\s*\(\s*\d{4}\s*\)\s*$/u, '');
+    // Trailing punctuation residue.
+    core = core.replace(/[\s\-—–:·]+$/u, '').trim();
+    return cores.includes(core);
+}
 /**
  * Escape regex metacharacters so a dynamic string can be embedded safely
  * in a pattern built at runtime.
@@ -858,7 +1289,17 @@ export function extractArtifactHighlight(runDir, articleType, date) {
     const direct = scanCandidatesForHighlight(runDir, EDITORIAL_ARTEFACT_CANDIDATES, articleType, date);
     if (direct.headline)
         return { headline: direct.headline, summary: direct.summary };
-    const topLevel = safeReaddir(runDir).filter((f) => f.endsWith('.md') && f !== 'manifest.json');
+    // Top-level fallback scan — used only when none of the canonical
+    // editorial artefacts produced a non-generic H1. We must NOT pick up
+    // translated sibling briefs (`executive-brief_<lang>.md`,
+    // `synthesis-summary_<lang>.md`, …) here, because their H1s are
+    // legitimate localized headlines that the English-only
+    // {@link isGenericHeading} detector cannot recognise as boilerplate.
+    // Letting them through poisoned the English `<title>` and
+    // `<meta description>` for the 2026-05-15 batch with Arabic content
+    // from `executive-brief_ar.md`. See {@link isTranslatedSiblingBrief}
+    // and the regression test in `test/unit/article-metadata.test.js`.
+    const topLevel = safeReaddir(runDir).filter((f) => f.endsWith('.md') && f !== 'manifest.json' && !isTranslatedSiblingBrief(f));
     const fallback = scanCandidatesForHighlight(runDir, topLevel, articleType, date);
     if (fallback.headline)
         return { headline: fallback.headline, summary: fallback.summary };
@@ -868,6 +1309,28 @@ export function extractArtifactHighlight(runDir, articleType, date) {
     }
     return null;
 }
+/**
+ * Filename suffix pattern that identifies a translated sibling brief
+ * (e.g. `executive-brief_ar.md`, `synthesis-summary_zh.md`). The
+ * `_<lang>` token is matched against {@link ALL_LANGUAGES} so we never
+ * exclude a legitimate English artefact whose name happens to end in
+ * `_<two-letter-suffix>.md`.
+ */
+const TRANSLATED_SIBLING_SUFFIX_RE = new RegExp(`_(${ALL_LANGUAGES.join('|')})\\.md$`, 'i');
+/**
+ * Return `true` when a top-level `.md` filename looks like a translated
+ * sibling of a canonical editorial artefact (e.g.
+ * `executive-brief_ar.md`). These files must be excluded from the
+ * top-level fallback scan in {@link extractArtifactHighlight} because
+ * their localized H1s evade the English-only generic-heading detector
+ * and would otherwise hijack the English SEO surfaces.
+ *
+ * @param filename - Run-relative `.md` filename (no path separators)
+ * @returns `true` when the file is a translated sibling brief
+ */
+export function isTranslatedSiblingBrief(filename) {
+    return TRANSLATED_SIBLING_SUFFIX_RE.test(filename);
+}
 /**
  * Walk a list of candidate artefact paths and return the first
  * non-generic headline + summary pair, plus the first usable lede
@@ -925,6 +1388,25 @@ function probeCandidateForHighlight(runDir, rel, articleType, date) {
     if (headline && !isGenericHeading(headline, articleType, date)) {
         return { cleanHighlight: { headline: truncateTitle(headline), summary } };
     }
+    // The artefact H1 is generic boilerplate (`Executive Brief — EU Parliament
+    // Breaking News`). Before falling back to a stripped category-core
+    // headline, try to surface the FIRST NAMED PRIORITY FINDING from the
+    // brief's `## Key Developments` / `## Priority Dossiers` /
+    // `## Top Findings` block. This is the canonical Stage-B authoring
+    // pattern (see `analysis/templates/executive-brief.md`) — every brief
+    // lists its top dossiers as `**Name** (procedure-code, date) — paragraph`
+    // or `### N. Name (committee)`. Surfacing that name produces a
+    // distinctive editorial headline ("Digital Markets Act Enforcement",
+    // "Ukraine War Accountability") instead of a stripped category noun.
+    const priority = extractPriorityFindingHighlight(body);
+    if (priority?.headline) {
+        return {
+            cleanHighlight: {
+                headline: truncateTitle(priority.headline),
+                summary: priority.summary || summary,
+            },
+        };
+    }
     if (headline) {
         const stripped = stripArtifactCategoryAffix(headline);
         if (stripped && !isGenericHeading(stripped, articleType, date)) {
@@ -933,6 +1415,565 @@ function probeCandidateForHighlight(runDir, rel, articleType, date) {
     }
     return { summary };
 }
+/**
+ * Section headings inside the executive brief that introduce the
+ * named-priority-finding block (matched case-insensitively against the
+ * decoration-stripped heading text, see {@link normaliseHeadingText}).
+ */
+const PRIORITY_FINDING_SECTION_HEADINGS = [
+    'key developments',
+    'key findings',
+    'key intelligence summary',
+    'key judgements',
+    'key judgments',
+    'headline intelligence',
+    'headline judgements',
+    'headline judgments',
+    'lead story',
+    'policy intelligence alerts',
+    'priority dossiers',
+    'priority dossiers under committee scrutiny',
+    'priority findings',
+    'priority intelligence assessment',
+    'priority items',
+    'top findings',
+    'top developments',
+    'top dossiers',
+    'top trigger events',
+    'top triggers',
+    'trigger events',
+    'top documents',
+    'top procedures',
+    'top 3 triggers',
+    'wep assessment',
+    'high priority',
+    'highest priority',
+];
+/**
+ * Mine the FIRST named priority finding from an executive-brief–style
+ * artefact body. Looks for a section heading from
+ * {@link PRIORITY_FINDING_SECTION_HEADINGS} and returns the first dossier
+ * name + descriptive paragraph found inside it. Supports the three
+ * canonical Stage-B authoring patterns:
+ *
+ *   1. **Bold-in-numbered-list** (breaking briefs):
+ *      `1. **Digital Markets Act Enforcement** (TA-10-2026-0160, 2026-04-30)`
+ *      `   Parliament adopted a resolution …`
+ *   2. **Numbered subheading** (committee briefs):
+ *      `### 1. Clean Industrial Deal Implementation (ITRE/ENVI)`
+ *      `The Clean Industrial Deal framework …`
+ *   3. **Bold-leading paragraph** (synthesis variants):
+ *      `**Trigger 1: DMA Enforcement Resolution** (TA-10-2026-0160)`
+ *      `- Significance: 🟢 HIGH IMPACT …`
+ *
+ * Trailing parenthesised metadata (`(TA-10-2026-0160, 2026-04-30)`,
+ * `(ITRE/ENVI)`) is stripped from the headline so it stays headline-shaped
+ * (`Digital Markets Act Enforcement`) rather than boilerplate
+ * (`Digital Markets Act Enforcement (TA-10-2026-0160, 2026-04-30)`).
+ *
+ * @param body - Editorial artefact body
+ * @returns `{headline, summary}` when a priority finding was identified;
+ *   `null` when the body has no priority section or no usable item inside
+ */
+export function extractPriorityFindingHighlight(body) {
+    if (!body)
+        return null;
+    const lines = body.split('\n');
+    return scanPrioritySection(lines) ?? scanH2StoryHeadings(lines);
+}
+/**
+ * Strategy 1 — scan inside the first recognised priority-finding
+ * section heading for a usable item (Pattern A/B/C/D). Returns `null`
+ * when the section is absent or contains no matchable item.
+ *
+ * @param lines - Body lines (already split on `\n`)
+ * @returns `{headline, summary}` when an item was identified
+ */
+function scanPrioritySection(lines) {
+    const sectionStart = findPrioritySectionStart(lines);
+    if (sectionStart < 0)
+        return null;
+    for (let i = sectionStart + 1; i < lines.length; i++) {
+        const line = (lines[i] ?? '').trim();
+        if (!line)
+            continue;
+        // Stop at the next H2 (sibling section) but allow `### …` and
+        // `#### …` subheadings inside (e.g. `### 🔴 HIGH PRIORITY` between
+        // the section header and the first list item).
+        if (/^##(?!#)/.test(line))
+            return null;
+        const candidate = extractPriorityFindingItem(lines, i);
+        if (candidate)
+            return candidate;
+    }
+    return null;
+}
+/**
+ * Story-keyword tokens used by `## Lead Story:` / `## Story N:` /
+ * `## Trigger N:` H2 heading detection. Kept as a runtime list so the
+ * regex stays bounded and bypasses the unsafe-regex lint by avoiding
+ * deep alternation.
+ */
+const H2_STORY_TOKENS = [
+    'Lead Story',
+    'Story',
+    'Trigger',
+    'Alert',
+    'Judgement',
+    'Judgment',
+];
+/**
+ * Strategy 2 — walk every `## …` H2 heading and try to recognise a
+ * story-style heading (`## 📌 Lead Story: Russia Accountability`,
+ * `## Story 1 — DMA Enforcement`). Used as a fallback when no priority
+ * section was found, because motions briefs publish each lead story as
+ * its own H2 without a parent section.
+ *
+ * @param lines - Body lines (already split on `\n`)
+ * @returns `{headline, summary}` when a story heading was identified
+ */
+function scanH2StoryHeadings(lines) {
+    for (let i = 0; i < lines.length; i++) {
+        const line = (lines[i] ?? '').trim();
+        if (!line.startsWith('## '))
+            continue;
+        const headingText = line.replace(/^##\s+/u, '');
+        const storyHeadline = extractH2StoryHeadline(headingText);
+        if (!storyHeadline)
+            continue;
+        const result = buildPriorityResult(storyHeadline, '', lines, i);
+        if (result?.headline)
+            return result;
+    }
+    return null;
+}
+/**
+ * Recognise the H2-story shape (`📌 Lead Story: Title`, `Story 1 —
+ * Title`, `Trigger 2: Title`) and return the residual headline portion.
+ * Returns an empty string when the heading does not match a story
+ * keyword. Implemented as discrete string operations (rather than one
+ * dense regex) to keep the function under the unsafe-regex linter and
+ * cognitive-complexity budgets.
+ *
+ * @param headingText - Heading text with the leading `## ` already removed
+ * @returns Residual headline or empty string
+ */
+function extractH2StoryHeadline(headingText) {
+    // Strip a short leading decoration / emoji block (up to 4 non-alphanumerics).
+    const stripped = headingText.replace(/^[^A-Za-z0-9]{0,4}\s*/u, '');
+    for (const token of H2_STORY_TOKENS) {
+        if (!stripped.toLowerCase().startsWith(token.toLowerCase()))
+            continue;
+        let rest = stripped.slice(token.length).trim();
+        // `Story 1` / `Trigger 2` — accept and consume the trailing digit.
+        if (token !== 'Lead Story') {
+            const digit = rest.match(/^\d+\b/u);
+            if (!digit)
+                continue;
+            rest = rest.slice(digit[0].length).trim();
+        }
+        // Require an explicit `:` / `—` / `–` / `-` / `.` separator before
+        // the residual headline so plain prose H2s never match.
+        const sep = rest.match(/^[:—–\-.]\s+(.+)$/u);
+        if (sep?.[1])
+            return sep[1].trim();
+    }
+    return '';
+}
+/**
+ * Locate the line index of the first priority-finding section heading
+ * inside an artefact body. Returns `-1` when no such heading exists.
+ *
+ * @param lines - Body lines (already split on `\n`)
+ * @returns Line index of the `## …` heading, or `-1`
+ */
+function findPrioritySectionStart(lines) {
+    for (let i = 0; i < lines.length; i++) {
+        const line = (lines[i] ?? '').trim();
+        const match = line.match(/^#{2,4}\s+(.+)$/u);
+        if (!match)
+            continue;
+        const text = normaliseHeadingText(match[1] ?? '');
+        if (!text)
+            continue;
+        if (headingMatchesPriorityProbe(text))
+            return i;
+    }
+    return -1;
+}
+/**
+ * Word-boundary substring matcher for the priority-finding section
+ * detector. Extracted from {@link findPrioritySectionStart} to keep its
+ * cognitive complexity within budget.
+ *
+ * @param text - Heading text already normalised by {@link normaliseHeadingText}
+ * @returns `true` when one of {@link PRIORITY_FINDING_SECTION_HEADINGS}
+ *   appears as a word-bounded substring of {@link text}
+ */
+function headingMatchesPriorityProbe(text) {
+    for (const probe of PRIORITY_FINDING_SECTION_HEADINGS) {
+        if (text === probe)
+            return true;
+        const idx = text.indexOf(probe);
+        if (idx < 0)
+            continue;
+        const before = idx === 0 ? ' ' : (text[idx - 1] ?? ' ');
+        const after = text[idx + probe.length] ?? ' ';
+        if (!/[A-Za-z0-9]/.test(before) && !/[A-Za-z0-9]/.test(after))
+            return true;
+    }
+    return false;
+}
+/**
+ * Try to recognise a priority-finding item starting at {@link i}. Returns
+ * the resolved `{headline, summary}` pair when the item matches one of the
+ * three authoring patterns; returns `null` otherwise so the caller can
+ * advance to the next line.
+ *
+ * @param lines - Body lines (already split on `\n`)
+ * @param i - Index of the candidate line
+ * @returns Priority-finding pair when matched, `null` otherwise
+ */
+function extractPriorityFindingItem(lines, i) {
+    const line = (lines[i] ?? '').trim();
+    // Pattern A — numbered list item with bold title:
+    //   `1. **Digital Markets Act Enforcement** (TA-10-2026-0160, 2026-04-30)`
+    const numberedBold = line.match(/^\d+\.\s+\*\*([^*]+?)\*\*\s*(.*)$/u);
+    if (numberedBold) {
+        return buildPriorityResult(numberedBold[1] ?? '', numberedBold[2] ?? '', lines, i);
+    }
+    // Pattern B — numbered subheading. Requires an explicit separator
+    // (`:` / `.` / `)` / `·` / `–` / `—` / `-`) after the number so
+    // dotted decimal section labels like `### 2.1 Close to Adoption`
+    // do NOT leak into the headline. Examples:
+    //   `### 1. Clean Industrial Deal Implementation (ITRE/ENVI)`
+    //   `### 1 · Headline Judgements` (middle dot)
+    //   `### KJ-1: Digital Regulation Enforcement …`
+    //   `### KF-3: Banking Union Completion`
+    //   `### T-2: DMA Enforcement Resolution`
+    // Two narrow patterns instead of one wide alternation to keep the
+    // pattern within the unsafe-regex linter's complexity budget.
+    const numericHeading = line.match(/^#{3,4}\s+\d+[:.)·–—\s-]\s*(.+)$/u);
+    if (numericHeading) {
+        return buildPriorityResult(numericHeading[1] ?? '', '', lines, i);
+    }
+    const tagHeading = line.match(/^#{3,4}\s+[A-Z]{1,3}-?\d+[:.)·–—\s-]\s*(.+)$/u);
+    if (tagHeading) {
+        return buildPriorityResult(tagHeading[1] ?? '', '', lines, i);
+    }
+    // Pattern D — word-prefixed subheading (`### Alert 1 — Title 🔴`,
+    // `### Judgement 1 — Title`, `### Trigger 1: DMA Enforcement`):
+    const wordTaggedHeading = line.match(/^#{3,4}\s+(?:Alert|Judgement|Judgment|Finding|Story|Item|Trigger|Highlight|Dossier|Priority|Top)\s+\d+\s*[:.)·–—\s-]+(.+)$/iu);
+    if (wordTaggedHeading) {
+        return buildPriorityResult(wordTaggedHeading[1] ?? '', '', lines, i);
+    }
+    // Pattern C — bold-leading paragraph trigger:
+    //   `**Trigger 1: DMA Enforcement Resolution** (TA-10-2026-0160)`
+    //   `**Digital Markets Act Enforcement**`
+    // Rejected when:
+    //   - the bold body is longer than a plausible headline (>110 chars) —
+    //     that's a bold paragraph lede masquerading as a headline (e.g.
+    //     `**This period captures the April 2026 Strasbourg …**`)
+    //   - the bold body is a metadata key (`**Admiralty Grade: B/2**`,
+    //     `**Reporting Window:** …`, `**Date:** …`) — these are banner
+    //     rows, not editorial headlines
+    const boldOnly = line.match(/^\*\*([^*]+?)\*\*\s*(.*)$/u);
+    if (boldOnly && !line.startsWith('**Confidence') && !isMetadataBoldLine(line)) {
+        const candidate = (boldOnly[1] ?? '').trim();
+        if (candidate.length > 0 && candidate.length <= 110) {
+            return buildPriorityResult(candidate, boldOnly[2] ?? '', lines, i);
+        }
+    }
+    return null;
+}
+/**
+ * Bold prefix tokens that indicate a metadata banner row rather than an
+ * editorial headline. The Stage-B brief template uses these consistently
+ * as the lede block (`**Reporting Window:** 3 Apr – 1 May 2026`,
+ * `**Admiralty Grade:** B/2`, `**Date:** 2026-05-15`); they must never
+ * leak into `<title>`.
+ */
+const PRIORITY_METADATA_BOLD_PREFIXES = [
+    'admiralty',
+    'classification',
+    'confidence',
+    'data sources',
+    'data quality',
+    'date',
+    'generated',
+    'lead author',
+    'methodology',
+    'reporting window',
+    'run',
+    'session',
+    'source',
+    'sources',
+    'time horizon',
+    'wep',
+];
+/**
+ * Recognise a metadata-banner bold line (`**Admiralty Grade: B/2**`,
+ * `**Reporting Window:** 3 Apr – 1 May 2026`). The check is
+ * deliberately case-insensitive and tolerant of trailing colons inside
+ * or outside the bold delimiters.
+ *
+ * @param line - Trimmed source line (already known to start with `**`)
+ * @returns `true` when the line is a metadata banner that must be
+ *   skipped by Pattern C
+ */
+function isMetadataBoldLine(line) {
+    const inner = line
+        .replace(/^\*\*([^*]+?)\*\*.*$/u, '$1')
+        .trim()
+        .toLowerCase();
+    for (const prefix of PRIORITY_METADATA_BOLD_PREFIXES) {
+        if (inner === prefix)
+            return true;
+        if (inner.startsWith(`${prefix}:`))
+            return true;
+        if (inner.startsWith(`${prefix} `) && inner.includes(':'))
+            return true;
+        if (inner.startsWith(`${prefix}—`) || inner.startsWith(`${prefix} —`))
+            return true;
+    }
+    return false;
+}
+/**
+ * Compose the `{headline, summary}` pair for one matched priority-finding
+ * item. Cleans `Trigger N:` / `N.` prefixes off the headline, strips the
+ * trailing `(TA-10-…, …)` / `(ITRE/ENVI)` metadata, and gathers the
+ * following prose lines as the summary.
+ *
+ * @param rawHeadline - Raw bold title or numbered-heading text
+ * @param tail - Same-line trailing text (after the bold close / heading)
+ * @param lines - Body lines (already split on `\n`)
+ * @param i - Index of the matched line
+ * @returns Cleaned `{headline, summary}` — headline may be empty when
+ *   cleaning collapses it below a minimum length, in which case the
+ *   caller falls through
+ */
+function buildPriorityResult(rawHeadline, tail, lines, i) {
+    const cleaned = cleanPriorityHeadline(rawHeadline);
+    if (cleaned.length < 5)
+        return null;
+    const summaryLines = collectPrioritySummaryLines(tail, lines, i);
+    const summary = truncateDescription(summaryLines.join(' '));
+    return { headline: cleaned, summary };
+}
+/**
+ * Decide whether a follow-up line is a hard stop for priority-finding
+ * summary gathering (next heading / next list item) — collapses three
+ * boolean checks out of {@link buildPriorityResult}'s main loop.
+ *
+ * @param line - Trimmed follow-up line
+ * @returns `true` when the gathering loop must break
+ */
+function isPrioritySummaryStopper(line) {
+    if (/^#{1,6}\s/.test(line))
+        return true;
+    if (/^\d+\.\s/.test(line))
+        return true;
+    if (/^[-*]\s/.test(line))
+        return true;
+    return false;
+}
+/**
+ * Gather the summary prose for a priority-finding item — the same-line
+ * tail (with leading procedure-code parens stripped) plus subsequent
+ * prose lines until a blank line / new heading / new bullet is hit.
+ *
+ * @param tail - Same-line text that trails the bold/heading
+ * @param lines - Full body lines
+ * @param i - Index of the matched headline line
+ * @returns Ordered list of summary segments (already clean)
+ */
+function collectPrioritySummaryLines(tail, lines, i) {
+    const summaryLines = [];
+    // Strip leading parens-metadata (`(TA-10-2026-0160, 2026-04-30)`) and
+    // trailing parens-metadata from the tail so the summary starts with
+    // editorial prose, not a procedure-code citation.
+    let tailText = stripInlineMarkdown(tail).trim();
+    tailText = tailText.replace(/^\([^()]{3,80}\)\s*/u, '');
+    tailText = stripPriorityTailMetadata(tailText).trim();
+    if (tailText)
+        summaryLines.push(tailText);
+    for (let j = i + 1; j < lines.length; j++) {
+        const next = (lines[j] ?? '').trim();
+        if (!next) {
+            if (summaryLines.length > 0)
+                break;
+            continue;
+        }
+        if (isPrioritySummaryStopper(next))
+            break;
+        if (next.startsWith('**Confidence') || next.startsWith('- **Confidence'))
+            continue;
+        if (shouldSkipDescriptionLine(next))
+            continue;
+        summaryLines.push(stripInlineMarkdown(next));
+        if (summaryLines.join(' ').length >= DESCRIPTION_MAX_LENGTH)
+            break;
+    }
+    return summaryLines;
+}
+/**
+ * Normalise a priority-finding headline: drop the
+ * `Trigger N:` / `Dossier N:` / leading-numeric prefix, strip trailing
+ * parenthesised metadata (`(TA-10-2026-0160, 2026-04-30)`,
+ * `(ITRE/ENVI)`), and trim residual punctuation. The result is a
+ * headline-shaped string suitable for `<title>` use.
+ *
+ * @param raw - Raw bold-title or heading text
+ * @returns Cleaned headline (may be empty after stripping)
+ */
+/**
+ * Leading priority-label tokens stripped by {@link cleanPriorityHeadline}
+ * (`🔴 CRITICAL — Title` → `Title`). Kept as a list to bypass the
+ * unsafe-regex lint by avoiding deep alternation in a single pattern.
+ */
+const PRIORITY_LABEL_TOKENS = [
+    'CRITICAL',
+    'HIGH PRIORITY',
+    'HIGH',
+    'MEDIUM PRIORITY',
+    'MEDIUM',
+    'LOW PRIORITY',
+    'LOW',
+    'URGENT',
+    'ALERT',
+    'PRIORITY',
+];
+/**
+ * Trailing confidence-marker tokens stripped by
+ * {@link cleanPriorityHeadline}. Same rationale as
+ * {@link PRIORITY_LABEL_TOKENS}.
+ */
+const PRIORITY_TRAILING_TOKENS = [
+    'CRITICAL',
+    'HIGH PRIORITY',
+    'HIGH',
+    'MEDIUM PRIORITY',
+    'MEDIUM',
+    'LOW PRIORITY',
+    'LOW',
+];
+/**
+ * Leading editorial-prefix tokens stripped by
+ * {@link cleanPriorityHeadline} (`Trigger 1: Title` → `Title`).
+ */
+const PRIORITY_LEADING_PREFIX_TOKENS = [
+    'Trigger',
+    'Dossier',
+    'Priority',
+    'Finding',
+    'Item',
+    'Highlight',
+    'Top',
+    'Story',
+    'Alert',
+    'Judgement',
+    'Judgment',
+];
+/**
+ * Strip a leading priority decoration (`🔴 `, `CRITICAL — `) from a
+ * candidate headline. Extracted from {@link cleanPriorityHeadline} to
+ * keep cognitive complexity within budget.
+ *
+ * @param text - Candidate headline (already trimmed)
+ * @returns Headline with the leading decoration removed
+ */
+function stripPriorityLeadingDecoration(text) {
+    let out = text;
+    for (let pass = 0; pass < 2; pass++) {
+        out = out.replace(/^[^\p{L}\p{N}]+/u, '').trim();
+        for (const token of PRIORITY_LABEL_TOKENS) {
+            if (out.toLowerCase().startsWith(token.toLowerCase())) {
+                const rest = out.slice(token.length).trim();
+                const sep = rest.match(/^[:—–-]\s*(.+)$/u);
+                if (sep?.[1]) {
+                    out = sep[1].trim();
+                    break;
+                }
+            }
+        }
+    }
+    return out;
+}
+/**
+ * Strip a leading editorial prefix (`Trigger 1: `, `Dossier 2: `) and a
+ * stray leading ordinal (`1. `, `2.1 `) from a candidate headline.
+ *
+ * @param text - Candidate headline
+ * @returns Headline with the leading editorial decoration removed
+ */
+function stripPriorityLeadingPrefix(text) {
+    let out = text;
+    for (const token of PRIORITY_LEADING_PREFIX_TOKENS) {
+        if (!out.toLowerCase().startsWith(token.toLowerCase()))
+            continue;
+        const rest = out.slice(token.length);
+        const match = rest.match(/^\s+\d+\s*[:–—-]\s*(.+)$/u);
+        if (match?.[1]) {
+            out = match[1];
+            break;
+        }
+    }
+    // Drop a stray leading "1. " / "2) " ordinal.
+    out = out.replace(/^\d+[.):·\s]\s*/u, '');
+    return out;
+}
+/**
+ * Strip a trailing confidence marker (`🔴 CRITICAL`, `🟡 MEDIUM`) from a
+ * candidate headline. Single pass — caller invokes inside a fixed-point
+ * loop.
+ *
+ * @param text - Candidate headline
+ * @returns Headline with the trailing confidence marker removed
+ */
+function stripPriorityTrailingMarker(text) {
+    let out = text;
+    for (const token of PRIORITY_TRAILING_TOKENS) {
+        const pattern = new RegExp(`\\s+[^\\p{L}\\p{N}\\s]?\\s*${token}\\s*$`, 'iu');
+        const next = out.replace(pattern, '');
+        if (next !== out) {
+            out = next;
+            break;
+        }
+    }
+    return out;
+}
+function cleanPriorityHeadline(raw) {
+    let text = stripInlineMarkdown(raw).trim();
+    text = stripPriorityLeadingDecoration(text);
+    text = stripPriorityLeadingPrefix(text);
+    // Trailing cleanup runs in a fixed-point loop so combined patterns
+    // like "Title (Confidence, 80%): 🔴" collapse all the way down to
+    // "Title".
+    let previous = '';
+    while (previous !== text) {
+        previous = text;
+        text = stripPriorityTrailingMarker(text);
+        text = stripPriorityTailMetadata(text);
+        // Drop a single trailing emoji left after metadata stripping.
+        text = text.replace(/\s+[^\p{L}\p{N}\s]+\s*$/u, '');
+        // Drop trailing colons / dashes left over.
+        text = text.replace(/[\s:—–-]+$/u, '');
+        text = text.trim();
+    }
+    return text;
+}
+/**
+ * Strip the trailing parenthesised metadata that briefs append to every
+ * priority-finding name — procedure codes, dates, committee tags. The
+ * regex is intentionally non-greedy so it removes only the LAST
+ * parenthesised group on the line.
+ *
+ * @param text - Headline or paragraph text
+ * @returns Text with the trailing `(…)` stripped
+ */
+function stripPriorityTailMetadata(text) {
+    return text.replace(/\s*\([^()]{3,80}\)\s*$/u, '').trim();
+}
 /**
  * Read an artefact file, skipping any SPDX HTML-comment header rows so the
  * first-H1 / first-prose logic is never derailed by the REUSE preamble.
@@ -1309,49 +2350,73 @@ function resolveEditorialContent(opts) {
     }
     const summary = artefactSummary || aggregatedSummary;
     if (summary) {
-        return { headline: truncateTitle(summary), summary };
+        // The H1 is generic (category-noun, bare-institutional, or
+        // template-style) so we have to derive `<title>` from the BLUF/
+        // lede paragraph. Extract the first complete sentence so the
+        // resulting title is grammatically self-contained — falling back
+        // to clause-boundary truncation downstream when the sentence
+        // itself overruns TITLE_MAX_LENGTH.
+        const firstSentence = extractFirstSentence(summary);
+        return { headline: truncateTitle(firstSentence), summary };
     }
     return { headline: '', summary: '' };
 }
 /**
- * Enrich a localized fallback title with the article-specific editorial
- * headline so translated variants are not reduced to duplicate type/date
- * templates when the source artifacts carry a real story.
+ * Pick the per-language SEO title from the resolved editorial pair and
+ * the localized template fallback. The decision tree mirrors the priority
+ * ladder in the module header:
  *
- * @param lang - Target language code
- * @param fallbackTitle - Localized article-type fallback title
- * @param editorialHeadline - Artifact-derived editorial headline
+ *   - When an editorial headline exists (either translated brief or
+ *     English brief / aggregated source), use it **verbatim** — no
+ *     concatenation with the localized type/date template. Concatenation
+ *     historically produced strings like
+ *     `Senaste Nytt: Betydande Parlamentariska Händelser — 2026-05-15 — Breaking News: EP April 2026 Plenary Outcomes`
+ *     which mix two languages in a single `<title>` and are blocked by
+ *     `scripts/validate-manifest-seo.js`'s `english-fallthrough` gate.
+ *   - When no editorial headline exists at all, fall back to the
+ *     localized type/date template plus a run qualifier so same-type pages
+ *     remain distinguishable.
+ *
+ * @param fallbackTitle - Localized article-type template title
+ * @param editorialHeadline - Editorial headline (localized or English)
  * @param runId - Optional run id used only when no editorial headline exists
  * @returns SEO title candidate
  */
-function composeContextualTitle(lang, fallbackTitle, editorialHeadline, runId) {
-    if (lang === 'en') {
-        return editorialHeadline || withRunQualifier(fallbackTitle, runId);
-    }
-    if (editorialHeadline) {
-        return `${fallbackTitle} — ${editorialHeadline}`;
-    }
+function composeContextualTitle(fallbackTitle, editorialHeadline, runId) {
+    if (editorialHeadline)
+        return editorialHeadline;
     return withRunQualifier(fallbackTitle, runId);
 }
 /**
- * Add localized article context, date, run id and evidence language to short
- * meta descriptions. This turns generic type-level subtitles into
+ * Add localized article context to short or duplicate-prone meta
+ * descriptions. This turns generic type-level subtitles into
  * page-specific descriptions suitable for search snippets.
  *
+ * Internal artefact identifiers (`runId`) are deliberately NOT included
+ * in the description: they leak into Google snippets as opaque tokens
+ * like `breaking-run255-1778894853` and provide no value to readers.
+ * The verbose `evidence` boilerplate (`with source-linked voting,
+ * committee and legislative intelligence`) is also dropped — it pads
+ * bytes without adding editorial information and was the dominant
+ * source of mid-sentence ellipsis truncation observed in production.
+ *
+ * The reader-hint suffix (`labels.reader`) is preserved because it
+ * supplies a stable localized intent signal even when the lede is
+ * very short.
+ *
  * @param lang - Target language code
  * @param baseDescription - Best description from manifest/editorial/template
  * @param editorial - Artifact-derived headline and summary
  * @param editorial.headline - Artifact-derived headline
  * @param editorial.summary - Artifact-derived summary
  * @param date - ISO article date
- * @param runId - Optional analysis run id
+ * @param _runId - Reserved (formerly emitted; no longer used)
  * @returns Description in the target language context, capped for SEO snippets
  */
-function composeContextualDescription(lang, baseDescription, editorial, date, runId) {
+function composeContextualDescription(lang, baseDescription, editorial, date, _runId) {
     const labels = getLocalizedString(SEO_CONTEXT_LABELS, lang);
     const parts = [baseDescription.trim()];
-    const runPart = runId ? ` · ${labels.run} ${runId}` : '';
-    parts.push(`${labels.date} ${date}${runPart}, ${labels.evidence}`);
+    parts.push(`${labels.date} ${date}.`);
     const context = pickFirstNonEmpty([editorial.summary, editorial.headline]);
     if (context && !containsNormalized(parts[0] ?? '', context)) {
         parts.push(`${labels.context}: ${context}`);
@@ -1360,14 +2425,46 @@ function composeContextualDescription(lang, baseDescription, editorial, date, ru
     return truncateDescription(parts.join(' '));
 }
 /**
- * Append a run qualifier to otherwise duplicate-prone fallback titles.
+ * Append a short run qualifier to otherwise duplicate-prone fallback
+ * titles. Sanitizes the raw `runId` (which is an internal artefact
+ * identifier of the shape `<slug>-run<N>[-<unix-ts>]`) so user-facing
+ * `<title>` strings never expose Unix timestamps or the full opaque
+ * token. Only the short ordinal `N` is retained.
+ *
+ * Examples:
+ * - `breaking-run255-1778894853` → `Run 255`
+ * - `committee-reports-run330-1778735854` → `Run 330`
+ * - `breaking-run-001` → `Run 001`
+ *
+ * When the runId does not match the canonical shape, the qualifier is
+ * omitted entirely rather than leak an unknown-format token into SEO
+ * surfaces.
  *
  * @param title - Base title
- * @param runId - Optional run id
- * @returns Title with run qualifier when available
+ * @param runId - Optional run id (sanitized before use)
+ * @returns Title with short run qualifier, or unchanged when sanitization fails
  */
 function withRunQualifier(title, runId) {
-    return runId ? `${title} — Run ${runId}` : title;
+    if (!runId)
+        return title;
+    // Walk segments backwards: find the last `run<digits>` token. The
+    // runId shape is `<slug>-run<N>[-<unix-ts>]` — we explicitly avoid a
+    // single regex with overlapping `\d+` groups, which the SonarJS
+    // unsafe-regex rule flags as catastrophic-backtracking-prone.
+    const segments = runId.split('-');
+    for (const seg of segments) {
+        const m = /^run(\d+)$/u.exec(seg);
+        if (m)
+            return `${title} — Run ${m[1]}`;
+        const m2 = /^run$/u.exec(seg);
+        if (m2) {
+            const idx = segments.indexOf(seg);
+            const next = segments[idx + 1];
+            if (next && /^\d+$/u.test(next))
+                return `${title} — Run ${next}`;
+        }
+    }
+    return title;
 }
 /**
  * Case-insensitive containment check after whitespace normalization.
@@ -1449,35 +2546,23 @@ function dedupeKeywords(candidates) {
  */
 export function resolveArticleMetadata(opts) {
     const manifest = opts.manifest ?? {};
-    const editorial = resolveEditorialContent(opts);
+    const englishEditorial = resolveEditorialContent(opts);
     const template = buildTemplateFallback(opts.articleType, opts.date, manifest.committee);
     const runId = manifest.runId?.trim() ?? '';
     const result = Object.create(null);
     for (const lang of ALL_LANGUAGES) {
-        const manifestTitle = manifestOverrideFor(manifest.title, lang);
-        const manifestDescription = manifestOverrideFor(manifest.description, lang);
-        const fallback = template[lang];
-        const contextualTitle = composeContextualTitle(lang, fallback.title, editorial.headline, runId);
-        const titleCandidates = [manifestTitle, contextualTitle, fallback.title];
-        const descCandidates = [
-            manifestDescription,
-            lang === 'en' ? editorial.summary : '',
-            fallback.subtitle,
-        ];
-        const title = pickFirstNonEmpty(titleCandidates) || fallback.title;
-        const rawDescription = pickFirstNonEmpty(descCandidates) || fallback.subtitle;
-        const description = rawDescription.length >= DESCRIPTION_MIN_LENGTH &&
-            containsNormalized(rawDescription, opts.date)
-            ? rawDescription
-            : composeContextualDescription(lang, rawDescription, editorial, opts.date, runId);
-        const truncatedTitle = truncateTitle(title);
-        const truncatedDescription = truncateDescription(description);
+        const entry = resolveOneLanguage({
+            lang,
+            manifest,
+            englishEditorial,
+            template: template[lang],
+            runDir: opts.runDir,
+            articleType: opts.articleType,
+            date: opts.date,
+            runId,
+        });
         Object.defineProperty(result, lang, {
-            value: {
-                title: truncatedTitle,
-                description: truncatedDescription,
-                keywords: buildSeoKeywords(lang, opts.articleType, opts.date, runId, truncatedTitle, truncatedDescription),
-            },
+            value: entry,
             enumerable: true,
             writable: true,
             configurable: true,
@@ -1485,6 +2570,96 @@ export function resolveArticleMetadata(opts) {
     }
     return result;
 }
+/**
+ * Resolve `{title, description, keywords, source}` for one language. The
+ * priority ladder is:
+ *
+ *   1. manifest override (per-language wins, then string fall-through)
+ *   2. localized executive brief (`executive-brief_<lang>.md`) headline +
+ *      summary — only for non-English `<lang>`
+ *   3. English executive brief / aggregated editorial — verbatim for
+ *      non-English locales that have no translated brief yet, so the
+ *      SEO surfaces never collapse to a boring type/date template while a
+ *      real editorial highlight exists
+ *   4. localized template fallback
+ *
+ * @param input - Per-language inputs
+ * @returns One resolved metadata entry
+ */
+function resolveOneLanguage(input) {
+    const manifestTitle = manifestOverrideFor(input.manifest.title, input.lang);
+    const manifestDescription = manifestOverrideFor(input.manifest.description, input.lang);
+    const perLanguage = resolvePerLanguageEditorial(input);
+    const editorial = perLanguage.editorial;
+    const contextualTitle = composeContextualTitle(input.template.title, editorial.headline, input.runId);
+    const title = pickFirstNonEmpty([manifestTitle, contextualTitle, input.template.title]);
+    const rawDescription = pickFirstNonEmpty([
+        manifestDescription,
+        editorial.summary,
+        input.template.subtitle,
+    ]);
+    const description = rawDescription.length >= ENRICHMENT_TRIGGER_LENGTH
+        ? rawDescription
+        : composeContextualDescription(input.lang, rawDescription, editorial, input.date, input.runId);
+    const truncatedTitle = truncateTitle(title);
+    const truncatedDescription = truncateDescription(description);
+    const source = manifestTitle || manifestDescription ? 'manifest' : perLanguage.source;
+    return {
+        title: truncatedTitle,
+        description: truncatedDescription,
+        keywords: buildSeoKeywords(input.lang, input.articleType, input.date, input.runId, truncatedTitle, truncatedDescription),
+        source,
+    };
+}
+/**
+ * Select the editorial `{headline, summary}` pair for one language,
+ * preferring the translated `executive-brief_<lang>.md` over the English
+ * brief. Records which tier provided the content so the caller can wire
+ * up the editorial fallback note and the manifest-SEO validator without
+ * re-scanning the run directory.
+ *
+ * - For `lang === 'en'`: always returns the English `englishEditorial`
+ *   pair (whose source is the canonical English brief / aggregated
+ *   Markdown / artefact ladder in {@link resolveEditorialContent}).
+ * - For non-English `<lang>`: probes `runDir` for
+ *   `executive-brief_<lang>.md` (and the `extended/` sibling) and
+ *   prefers its headline + lede. Falls through to the English editorial
+ *   when no translated brief exists.
+ *
+ * @param input - Per-language inputs
+ * @returns Editorial pair plus the tier that produced it
+ */
+function resolvePerLanguageEditorial(input) {
+    if (input.lang !== 'en' && input.runDir) {
+        const localized = resolveLocalizedBriefHighlight(input.runDir, input.lang, input.articleType, input.date);
+        if (localized && (localized.headline || localized.summary)) {
+            // Prefer the localized headline; if missing, allow the localized
+            // summary to drive the title via {@link composeContextualTitle}'s
+            // `editorialHeadline || fallbackTitle` path while still feeding the
+            // localized summary into the description.
+            return {
+                editorial: {
+                    headline: localized.headline,
+                    summary: localized.summary,
+                },
+                source: 'localized-brief',
+            };
+        }
+    }
+    // No localized brief — fall through to the English editorial pair.
+    if (input.englishEditorial.headline || input.englishEditorial.summary) {
+        return {
+            editorial: input.englishEditorial,
+            source: input.lang === 'en' ? 'english-editorial' : 'english-brief',
+        };
+    }
+    // Nothing editorial at all → caller will fall back to the localized
+    // template.
+    return {
+        editorial: { headline: '', summary: '' },
+        source: 'template',
+    };
+}
 /**
  * Return the first non-empty, trimmed entry from a candidate list, or
  * the empty string when every entry is blank.